├── .eslintignore ├── .eslintrc.js ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── ci-on-push.yml │ ├── create-release-after-tag.yml │ ├── distribute-release-after-publish.yml │ └── test.yml ├── .gitignore ├── .gitmodules ├── .prettierrc ├── .stork-project-root ├── Cargo.lock ├── Cargo.toml ├── changelog.md ├── codecov.yml ├── deploy.md ├── jest.config.js ├── js ├── __mocks__ │ ├── dom.ts │ ├── entity.ts │ ├── resultToListItem.ts │ └── stork-search.ts ├── config.test.ts ├── config.ts ├── dom.ts ├── entity.test.ts ├── entity.ts ├── entityDom.test.ts ├── entityDom.ts ├── entityManager.test.ts ├── entityManager.ts ├── loaders │ └── indexLoader.ts ├── main.test.ts ├── main.ts ├── pencil.ts ├── resultToListItem.test.ts ├── resultToListItem.ts ├── searchData.ts ├── storkError.ts ├── test-environment.js ├── testHelpers │ └── toEqualDisregardingWhitespace.js ├── util.ts ├── validators │ └── indexParamValidator.ts ├── wasmManager.test.ts ├── wasmManager.ts ├── wasmQueue.test.ts └── wasmQueue.ts ├── justfile ├── license.txt ├── local-dev ├── index.html ├── test-configs │ ├── 3b1b.toml │ ├── beepboop.toml │ ├── bowdoin-orient.toml │ ├── federalist-zero.toml │ └── federalist.toml └── test-indexes │ └── .gitkeep ├── package.json ├── readme.md ├── scripts ├── compare_stats.py ├── generate_stats.py └── upload_build_artifacts.py ├── stork-boundary ├── Cargo.lock ├── Cargo.toml └── src │ └── lib.rs ├── stork-cli ├── Cargo.toml └── src │ ├── clap.rs │ ├── display_timings.rs │ ├── errors.rs │ ├── io.rs │ ├── main.rs │ ├── pretty_print_search_results.rs │ └── test_server │ ├── index.html │ └── mod.rs ├── stork-lib ├── Cargo.toml ├── benches │ ├── basic.rs │ └── federalist.toml └── src │ ├── config │ ├── errors.rs │ ├── file.rs │ ├── frontmatter.rs │ ├── input.rs │ ├── mod.rs │ ├── output.rs │ ├── srt.rs │ └── stemming.rs │ ├── index_v2 │ ├── mod.rs │ ├── scores.rs │ └── search.rs │ ├── index_v3 │ ├── build │ │ ├── annotated_words_from_string.rs │ │ ├── errors.rs │ │ ├── fill_containers.rs │ │ ├── fill_intermediate_entries │ │ │ ├── data_source_readers │ │ │ │ ├── filepath_data_source_reader.rs │ │ │ │ ├── mod.rs │ │ │ │ └── url_data_source_reader.rs │ │ │ ├── frontmatter.rs │ │ │ ├── mod.rs │ │ │ └── word_list_generators │ │ │ │ ├── html_word_list_generator.rs │ │ │ │ ├── markdown_word_list_generator.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── plaintext_word_list_generator.rs │ │ │ │ └── srt_word_list_generator.rs │ │ ├── fill_stems.rs │ │ ├── intermediate_entry.rs │ │ ├── mod.rs │ │ └── nudger.rs │ ├── mod.rs │ ├── read.rs │ ├── scores.rs │ ├── search │ │ ├── entry_and_intermediate_excerpts.rs │ │ ├── intermediate_excerpt.rs │ │ └── mod.rs │ └── write.rs │ ├── input.rs │ ├── lib.rs │ ├── output.rs │ └── stopwords.rs ├── stork-wasm ├── Cargo.toml └── src │ └── lib.rs ├── test-assets ├── federalist-min-0.5.3.st ├── federalist-min-0.6.0.st ├── federalist-min-0.7.0.st └── federalist.toml ├── themes ├── basic.css ├── dark.css ├── edible-dark.css ├── edible.css └── flat.css ├── tsconfig.json ├── webpack.common.js ├── webpack.dev.js ├── webpack.prod.js └── yarn.lock /.eslintignore: -------------------------------------------------------------------------------- 1 | pkg 2 | node_modules 3 | dist -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | env: { 3 | browser: true, 4 | es6: true 5 | }, 6 | extends: ["plugin:prettier/recommended"], 7 | globals: { 8 | Atomics: "readonly", 9 | SharedArrayBuffer: "readonly" 10 | }, 11 | parserOptions: { 12 | ecmaVersion: 2018, 13 | sourceType: "module" 14 | }, 15 | plugins: ["prettier"], 16 | rules: { 17 | "prettier/prettier": [ 18 | "error", 19 | {}, 20 | { 21 | usePrettierrc: true 22 | } 23 | ] 24 | }, 25 | overrides: [ 26 | { 27 | files: ["*.ts"], 28 | parser: "@typescript-eslint/parser", 29 | plugins: ["@typescript-eslint"], 30 | extends: [ 31 | "eslint:recommended", 32 | "plugin:@typescript-eslint/recommended", 33 | "prettier/@typescript-eslint" 34 | ] 35 | } 36 | ] 37 | }; 38 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | ko_fi: jameslittle230 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Make Stork better! 4 | title: "" 5 | labels: bug 6 | --- 7 | 8 | 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "" 5 | labels: feature 6 | --- 7 | 8 | 20 | -------------------------------------------------------------------------------- /.github/workflows/ci-on-push.yml: -------------------------------------------------------------------------------- 1 | on: 2 | pull_request: 3 | types: [opened, reopened, edited, synchronize] 4 | 5 | name: "Run CI on Pushes" 6 | 7 | jobs: 8 | rust_tests: 9 | name: "Run Rust Tests" 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | 14 | - uses: actions/cache@v2 15 | id: cache 16 | with: 17 | path: ~/.cargo/bin/ 18 | key: ${{ runner.os }}-cargo-rust_tests-${{ hashFiles('**/Cargo.lock') }} 19 | 20 | - name: Set up wasm-pack 21 | if: steps.cache.outputs.cache-hit != 'true' 22 | uses: actions-rs/install@v0.1 23 | with: 24 | crate: wasm-pack 25 | 26 | - name: Set up just 27 | if: steps.cache.outputs.cache-hit != 'true' 28 | uses: actions-rs/install@v0.1 29 | with: 30 | crate: just 31 | 32 | - run: just _rust-test 33 | 34 | js_tests: 35 | name: "Run JS Tests" 36 | runs-on: ubuntu-latest 37 | steps: 38 | - uses: actions/checkout@v2 39 | 40 | - uses: actions/cache@v2 41 | id: cache 42 | with: 43 | path: ~/.cargo/bin/ 44 | key: ${{ runner.os }}-cargo-js_tests-${{ hashFiles('**/Cargo.lock') }} 45 | 46 | - name: Set up wasm-pack 47 | if: steps.cache.outputs.cache-hit != 'true' 48 | uses: actions-rs/install@v0.1 49 | with: 50 | crate: wasm-pack 51 | 52 | - name: Set up just 53 | if: steps.cache.outputs.cache-hit != 'true' 54 | uses: actions-rs/install@v0.1 55 | with: 56 | crate: just 57 | 58 | - run: just _js-test 59 | 60 | - run: | 61 | curl -Os https://uploader.codecov.io/latest/linux/codecov 62 | chmod +x codecov 63 | ./codecov -t ${CODECOV_TOKEN} 64 | 65 | clippy_check: 66 | name: "Run Clippy Check" 67 | runs-on: ubuntu-latest 68 | steps: 69 | - uses: actions/checkout@v1 70 | - uses: actions-rs/toolchain@v1 71 | with: 72 | components: clippy 73 | toolchain: stable 74 | override: true 75 | 76 | - uses: actions-rs/clippy-check@v1 77 | with: 78 | token: ${{ secrets.GITHUB_TOKEN }} 79 | args: --all-targets --all-features -- -D warnings 80 | 81 | cargo_fmt: 82 | name: "Run Cargo Format Check" 83 | runs-on: ubuntu-latest 84 | steps: 85 | - uses: actions/checkout@v1 86 | - run: cargo fmt -- --check 87 | 88 | benchmark: 89 | name: Perform Relative Benchmark 90 | runs-on: ubuntu-latest 91 | steps: 92 | - uses: actions/checkout@v2 93 | with: 94 | token: ${{ secrets.GH_API_PAT }} 95 | submodules: recursive 96 | fetch-depth: 50 # this is to make sure we obtain the target base commit 97 | 98 | - uses: actions/cache@v2 99 | id: cache 100 | with: 101 | path: ~/.cargo/bin/ 102 | key: ${{ runner.os }}-cargo-benchmark-${{ hashFiles('**/Cargo.lock') }} 103 | 104 | - name: Set up wasm-pack 105 | if: steps.cache.outputs.cache-hit != 'true' 106 | uses: actions-rs/install@v0.1 107 | with: 108 | crate: wasm-pack 109 | 110 | - name: Set up just 111 | if: steps.cache.outputs.cache-hit != 'true' 112 | uses: actions-rs/install@v0.1 113 | with: 114 | crate: just 115 | 116 | - name: Set up criterion 117 | if: steps.cache.outputs.cache-hit != 'true' 118 | uses: actions-rs/install@v0.1 119 | with: 120 | crate: cargo-criterion 121 | 122 | - name: Setup benchmarks 123 | run: | 124 | echo "BASE_SHA=$(echo ${{ github.event.pull_request.base.sha }} | cut -c1-8)" >> $GITHUB_ENV 125 | echo "HEAD_SHA=$(echo ${{ github.event.pull_request.head.sha }} | cut -c1-8)" >> $GITHUB_ENV 126 | echo "BASELINE_JSON=$(mktemp)" >> $GITHUB_ENV 127 | echo "CONTENDER_JSON=$(mktemp)" >> $GITHUB_ENV 128 | echo "PR_COMMENT=$(mktemp)" >> $GITHUB_ENV 129 | 130 | - name: Run benchmarks 131 | run: | 132 | SETUP_REPO="just build-js solo-build-federalist-index" 133 | RUN_BENCHMARK="just solo-generate-stats" 134 | 135 | git checkout ${{ github.event.pull_request.base.sha }} 136 | git checkout ${{ github.event.pull_request.head.sha }} -- justfile 137 | $SETUP_REPO 138 | $RUN_BENCHMARK > ${{ env.BASELINE_JSON }} 139 | 140 | git stash 141 | git checkout ${{ github.event.pull_request.head.sha }} 142 | $SETUP_REPO 143 | $RUN_BENCHMARK > ${{ env.CONTENDER_JSON }} 144 | 145 | python3 scripts/compare_stats.py ${{ env.BASELINE_JSON }} ${{ env.CONTENDER_JSON }} > ${{ env.PR_COMMENT }} 146 | 147 | - name: Debug tempfile contents 148 | run: | 149 | cat ${{ env.BASELINE_JSON }} 150 | cat ${{ env.CONTENDER_JSON }} 151 | cat ${{ env.PR_COMMENT }} 152 | 153 | - name: Set up comment body 154 | id: get-comment-body 155 | run: | 156 | body="$(cat ${{ env.PR_COMMENT }})" 157 | body="${body//'%'/'%25'}" 158 | body="${body//$'\n'/'%0A'}" 159 | body="${body//$'\r'/'%0D'}" 160 | echo "::set-output name=body::$body" 161 | 162 | - name: Find Comment 163 | uses: peter-evans/find-comment@v1 164 | id: fc 165 | with: 166 | issue-number: ${{ github.event.pull_request.number }} 167 | comment-author: 'github-actions[bot]' 168 | body-includes: Benchmarks 169 | 170 | - name: Create or update comment 171 | uses: peter-evans/create-or-update-comment@v1 172 | with: 173 | comment-id: ${{ steps.fc.outputs.comment-id }} 174 | issue-number: ${{ github.event.pull_request.number }} 175 | body: | 176 | ### Benchmarks 177 | ${{ steps.get-comment-body.outputs.body }} 178 | 179 | Baseline: **${{ github.event.pull_request.base.sha }}**; Comparison: ${{ github.event.pull_request.head.sha }} 180 | edit-mode: replace 181 | -------------------------------------------------------------------------------- /.github/workflows/distribute-release-after-publish.yml: -------------------------------------------------------------------------------- 1 | on: 2 | release: 3 | types: [published] 4 | 5 | name: "Distribute Release after Publishing Tag" 6 | jobs: 7 | update_homebrew: 8 | name: "Update Homebrew Repository" 9 | runs-on: "macos-latest" 10 | needs: ["build_macos_10_15"] 11 | steps: 12 | - run: ls -R 13 | 14 | cargo_publish: 15 | name: "Run Cargo Publish" 16 | runs-on: "ubuntu-latest" 17 | needs: ["build_ubuntu_20"] 18 | steps: 19 | - uses: actions/checkout@v2 20 | - run: cargo publish 21 | 22 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | on: workflow_dispatch 2 | name: "Test action to understand file & directory handling" 3 | 4 | jobs: 5 | create_file: 6 | name: "Create file" 7 | runs-on: ubuntu-latest 8 | steps: 9 | - run: echo hello-world > local-name.txt 10 | - run: echo hello-world > local-name-no-ext 11 | - run: echo hello-world > same-name.txt 12 | - run: echo hello-world > same-name-no-ext 13 | 14 | - uses: actions/upload-artifact@v1 15 | with: 16 | name: artifact-name 17 | path: local-name.txt 18 | - uses: actions/upload-artifact@v1 19 | with: 20 | name: artifact-name-ext.txt 21 | path: local-name.txt 22 | - uses: actions/upload-artifact@v1 23 | with: 24 | name: artifact-name-2 25 | path: local-name-no-ext 26 | - uses: actions/upload-artifact@v1 27 | with: 28 | name: artifact-name-ext-2.txt 29 | path: local-name-no-ext 30 | - uses: actions/upload-artifact@v1 31 | with: 32 | name: same-name.txt 33 | path: same-name.txt 34 | - uses: actions/upload-artifact@v1 35 | with: 36 | name: same-name-no-ext 37 | path: same-name-no-ext 38 | download_artifacts: 39 | name: "Download artifacts" 40 | runs-on: ubuntu-latest 41 | needs: ["create_file"] 42 | steps: 43 | - uses: actions/download-artifact@v2 44 | - run: ls -lR 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build Artifacts 2 | target 3 | **/*.rs.bk 4 | node_modules 5 | dist 6 | pkg 7 | wasm-pack.log 8 | coverage 9 | 10 | # Test Files 11 | scripts/*_exe.sh 12 | /local-dev/test-corpora/federalist/*.txt 13 | /local-dev/test-corpora/3b1b/*.srt 14 | /local-dev/test-indexes/*.st 15 | /local-dev/dist 16 | yarn-error.log 17 | 18 | # Development Files 19 | .DS_Store 20 | .vscode 21 | # Vim Swap 22 | [._]*.s[a-v][a-z] 23 | !*.svg # comment out if you don't need vector files 24 | [._]*.sw[a-p] 25 | [._]s[a-rt-v][a-z] 26 | [._]ss[a-gi-z] 27 | [._]sw[a-p] 28 | 29 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "local-dev/test-corpora/federalist"] 2 | path = local-dev/test-corpora/federalist 3 | url = https://github.com/stork-search/federalist 4 | [submodule "local-dev/test-corpora/3b1b"] 5 | path = local-dev/test-corpora/3b1b 6 | url = https://github.com/stork-search/3b1b 7 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "arrowParens": "avoid", 3 | "bracketSpacing": true, 4 | "endOfLine": "auto", 5 | "htmlWhitespaceSensitivity": "css", 6 | "insertPragma": false, 7 | "jsxBracketSameLine": false, 8 | "jsxSingleQuote": false, 9 | "printWidth": 80, 10 | "proseWrap": "preserve", 11 | "quoteProps": "as-needed", 12 | "requirePragma": false, 13 | "semi": true, 14 | "singleQuote": false, 15 | "tabWidth": 2, 16 | "trailingComma": "none", 17 | "useTabs": false, 18 | "vueIndentScriptAndStyle": false 19 | } 20 | -------------------------------------------------------------------------------- /.stork-project-root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jameslittle230/stork/efa98dad15b52bd6da9c9e87d612f0913431a95e/.stork-project-root -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["stork-lib", "stork-cli", "stork-wasm"] 3 | 4 | [profile.release] 5 | lto = true 6 | opt-level = 's' 7 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | require_ci_to_pass: true 3 | notify: 4 | after_n_builds: 2 5 | 6 | comment: 7 | require_changes: true 8 | after_n_builds: 2 9 | 10 | coverage: 11 | status: 12 | patch: false 13 | project: 14 | default: 15 | target: auto 16 | threshold: 5% -------------------------------------------------------------------------------- /deploy.md: -------------------------------------------------------------------------------- 1 | # Deploying Stork 2 | 3 | ## Prepare 4 | 5 | - [ ] Start the AWS benchmark and build instances 6 | - [ ] Run `just generate-stats` on my computer to make sure it works 7 | - [ ] Run benchmarks from benchmarking machine 8 | - [ ] Log into benchmarking machine 9 | - [ ] Update to master 10 | - [ ] Run `just generate-stats` 11 | - [ ] Ensure values are acceptable, compared to existing version. If not, abort the release and debug. 12 | - [ ] If so, copy benchmark values into notes (you'll need them later!) 13 | - [ ] Create a release PR: 14 | - [ ] Add date to changelog 15 | - [ ] Bump versions 16 | - [ ] In package.json 17 | - [ ] In Cargo.toml for the lib, wasm, and cli crates 18 | - [ ] The dependency on lib in the wasm and cli crates 19 | - [ ] Commit to master and push 20 | - [ ] Create a new PR on the site 21 | - [ ] Add benchmark values 22 | - [ ] Update all CDN references to the updated version number 23 | - [ ] Add documentation, if applicable 24 | - [ ] Let the Netlify preview build in the background 25 | 26 | ## Release 27 | 28 | - [ ] On your computer, check out the latest master 29 | - [ ] Run `$ git tag -a vX.Y.Z -m "Release version X.Y.Z"` 30 | - [ ] Run `$ git push origin vX.Y.Z` 31 | - [ ] Wait for the release to be built. Github Actions will deploy the release automatically. 32 | 33 | ## Aftercare 34 | 35 | - [ ] Check that the demo the site's Netlify preview works. If not, abort the release and debug. 36 | - [ ] Add the changelog to the Github release, and publish it 37 | - [ ] Create an Amazon Linux binary. 38 | - [ ] Run the shell script from the amazon-linux build machine 39 | - [ ] Upload the Amazon Linux binary to the CDN 40 | - [ ] Upload the Amazon Linux binary to the Github release. 41 | - [ ] Merge the PR you made on [the documentation site](https://github.com/stork-search/site) 42 | - [ ] Update Homebrew 43 | - [ ] Generate a new brewfile based on the Github-generated tarball: 44 | - [ ] `$ rm /opt/homebrew/Library/Taps/homebrew/homebrew-core/Formula/stork.rb` on my computer 45 | - [ ] `$ brew create https://github.com/jameslittle230/stork/archive/vX.Y.Z.tar.gz` 46 | - [ ] Manually update the URL and SHA in the [Homebrew formula file](https://github.com/jameslittle230/homebrew-stork-tap/blob/master/Formula/stork.rb) 47 | - [ ] Run `$ cargo publish` 48 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | preset: "ts-jest", 3 | transform: { 4 | "^.+\\.jsx?$": "/node_modules/ts-jest/dist/index.js" 5 | }, 6 | testEnvironment: "/js/test-environment.js", 7 | modulePathIgnorePatterns: ["/target/*"], 8 | collectCoverage: true, 9 | collectCoverageFrom: ["js/**/*.ts"], 10 | roots: ["js"] 11 | }; 12 | -------------------------------------------------------------------------------- /js/__mocks__/dom.ts: -------------------------------------------------------------------------------- 1 | export interface MockHtmlElement { 2 | name: string; 3 | addEventListener: jest.MockedFunction; 4 | removeEventListener: jest.MockedFunction; 5 | insertAdjacentElement: jest.MockedFunction; 6 | remove: jest.MockedFunction; 7 | scrollIntoView: jest.MockedFunction; 8 | appendChild: jest.MockedFunction; 9 | classList: { 10 | entries: Array; 11 | remove: jest.MockedFunction; 12 | add: jest.MockedFunction; 13 | }; 14 | style: { 15 | width: string; 16 | }; 17 | innerHTML: string; 18 | value: string; 19 | children: Array; 20 | } 21 | 22 | export const createMockHtmlElement = function (): MockHtmlElement { 23 | return { 24 | name: "", 25 | addEventListener: jest.fn(), 26 | removeEventListener: jest.fn(), 27 | insertAdjacentElement: jest.fn(), 28 | remove: jest.fn(), 29 | appendChild: jest.fn(), 30 | scrollIntoView: jest.fn(), 31 | classList: { 32 | entries: [], 33 | remove: jest.fn(), 34 | add: jest.fn() 35 | }, 36 | style: { 37 | width: "" 38 | }, 39 | innerHTML: "innerHTML", 40 | value: "value", 41 | children: [] 42 | }; 43 | }; 44 | 45 | export const create = jest.fn( 46 | (name: string, attributes: Record>) => { 47 | const output = createMockHtmlElement(); 48 | output.name = name; 49 | attributes.classNames.forEach(className => { 50 | output.classList.entries.push(className); 51 | }); 52 | return output; 53 | } 54 | ); 55 | 56 | export const add = jest.fn( 57 | (child: MockHtmlElement, _where: string, parent: MockHtmlElement) => { 58 | parent.children.push(child); 59 | } 60 | ); 61 | 62 | export const clear = jest.fn((element: MockHtmlElement) => { 63 | element.children = []; 64 | }); 65 | 66 | export const setText = jest.fn((element: MockHtmlElement, text: string) => { 67 | element.innerHTML = text; 68 | }); 69 | 70 | export const existsBeyondContainerBounds = jest.fn( 71 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 72 | (_elem: HTMLElement, _container: HTMLElement) => true 73 | ); 74 | -------------------------------------------------------------------------------- /js/__mocks__/entity.ts: -------------------------------------------------------------------------------- 1 | export class Entity { 2 | readonly name: string; 3 | readonly url: string; 4 | readonly config: Record; 5 | constructor(name: string, url: string, config: Record) { 6 | this.name = name; 7 | this.url = url; 8 | this.config = config; 9 | } 10 | 11 | registerIndex = jest.fn().mockResolvedValue({}); 12 | attachToDom = jest.fn(); 13 | } 14 | -------------------------------------------------------------------------------- /js/__mocks__/resultToListItem.ts: -------------------------------------------------------------------------------- 1 | import { MockHtmlElement, createMockHtmlElement } from "./dom"; 2 | 3 | export function resultToListItem(): MockHtmlElement { 4 | return createMockHtmlElement(); 5 | } 6 | -------------------------------------------------------------------------------- /js/__mocks__/stork-search.ts: -------------------------------------------------------------------------------- 1 | export const wasm_search = jest.fn; 2 | 3 | export const init_spy = jest.fn().mockImplementation((input: string) => { 4 | return new Promise((res, rej) => { 5 | console.log(4, "mock stork search", input); 6 | if (input.includes("stork-search.net") || input.includes("example.com")) { 7 | res("stork-search.net"); 8 | return; 9 | } else { 10 | rej(); 11 | } 12 | }); 13 | }); 14 | 15 | export default init_spy; 16 | -------------------------------------------------------------------------------- /js/config.test.ts: -------------------------------------------------------------------------------- 1 | import { calculateOverriddenConfig } from "./config"; 2 | 3 | test("correctly overrides default config", () => { 4 | const expected = { 5 | showProgress: true, 6 | printIndexInfo: false, 7 | showScores: true 8 | }; 9 | 10 | const generated = calculateOverriddenConfig({ 11 | showScores: true 12 | }); 13 | 14 | expect(generated).toMatchObject(expected); 15 | }); 16 | -------------------------------------------------------------------------------- /js/config.ts: -------------------------------------------------------------------------------- 1 | import StorkError from "./storkError"; 2 | import { difference, plural } from "./util"; 3 | 4 | export interface Configuration { 5 | showProgress: boolean; 6 | printIndexInfo: boolean; 7 | showScores: boolean; 8 | showCloseButton: boolean; 9 | minimumQueryLength: number; 10 | forceOverwrite: boolean; 11 | resultNoun: { singular: string; plural: string }; 12 | onQueryUpdate?: (query: string, results: unknown) => unknown; 13 | onResultSelected?: (query: string, result: unknown) => unknown; 14 | onResultsHidden?: () => unknown; 15 | onInputCleared?: () => unknown; 16 | transformResultUrl: (url: string) => string; 17 | } 18 | 19 | export const defaultConfig: Readonly = { 20 | showProgress: true, 21 | printIndexInfo: false, 22 | showScores: false, 23 | showCloseButton: true, 24 | minimumQueryLength: 3, 25 | forceOverwrite: false, 26 | resultNoun: { singular: "file", plural: "files" }, 27 | onQueryUpdate: undefined, 28 | onResultSelected: undefined, 29 | onResultsHidden: undefined, 30 | onInputCleared: undefined, 31 | transformResultUrl: url => url, 32 | }; 33 | 34 | export function calculateOverriddenConfig( 35 | overrides: Partial 36 | ): Configuration | StorkError { 37 | const configKeyDiff = difference( 38 | Object.keys(overrides), 39 | Object.keys(defaultConfig) 40 | ); 41 | 42 | if (configKeyDiff.length > 0) { 43 | const keys = plural(configKeyDiff.length, "key", "keys"); 44 | const invalidKeys = JSON.stringify(configKeyDiff); 45 | return new StorkError(`Invalid ${keys} in config object: ${invalidKeys}`); 46 | } 47 | 48 | const output: Configuration = Object.assign({}, defaultConfig); 49 | 50 | for (const key of Object.keys(defaultConfig) as Array) { 51 | const overrideVal = overrides[key]; 52 | if (overrideVal !== undefined) { 53 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment 54 | // @ts-ignore 55 | output[key] = overrideVal; 56 | } 57 | } 58 | 59 | return output; 60 | } 61 | -------------------------------------------------------------------------------- /js/dom.ts: -------------------------------------------------------------------------------- 1 | export function create( 2 | name: string, 3 | attributes: Record> 4 | ): HTMLElement { 5 | const elem = document.createElement(name); 6 | if (attributes.classNames) { 7 | elem.setAttribute("class", attributes.classNames.join(" ")); 8 | } 9 | return elem; 10 | } 11 | 12 | export function add( 13 | elem: HTMLElement, 14 | location: InsertPosition, 15 | reference: HTMLElement 16 | ): void { 17 | reference.insertAdjacentElement(location, elem); 18 | } 19 | 20 | export function clear(elem: HTMLElement | null): void { 21 | while (elem && elem.firstChild) { 22 | elem.removeChild(elem.firstChild); 23 | } 24 | } 25 | 26 | export function setText(elem: HTMLElement | null, text: string): void { 27 | const textNode = document.createTextNode(text); 28 | if (elem && elem.firstChild) { 29 | elem.replaceChild(textNode, elem.firstChild); 30 | } else if (elem) { 31 | elem.appendChild(textNode); 32 | } 33 | } 34 | 35 | export function existsBeyondContainerBounds( 36 | elem: HTMLElement, 37 | container: HTMLElement 38 | ): boolean { 39 | const elemBoundingBox = elem.getBoundingClientRect(); 40 | const containerBoundingBox = container.getBoundingClientRect(); 41 | 42 | return ( 43 | elemBoundingBox.bottom > containerBoundingBox.bottom || 44 | elemBoundingBox.top < containerBoundingBox.top 45 | ); 46 | } 47 | -------------------------------------------------------------------------------- /js/entity.test.ts: -------------------------------------------------------------------------------- 1 | import { Entity } from "./entity"; 2 | import { defaultConfig } from "./config"; 3 | jest.mock("./wasmQueue"); 4 | jest.mock("./entityDom"); 5 | // eslint-disable-next-line @typescript-eslint/no-empty-function 6 | jest.mock("stork-search", () => {}, { virtual: true }); 7 | 8 | test("Can successfully generate an entity", () => { 9 | const entity = new Entity("test", "https://google.com", defaultConfig); 10 | expect(entity).toBeTruthy(); 11 | }); 12 | 13 | test("Injest search data maps url values", () => { 14 | const entity = new Entity("test", "https://google.com", defaultConfig); 15 | entity.injestSearchData({ 16 | results: [ 17 | { 18 | entry: { fields: {}, title: "bleh", url: "https://google.com" }, 19 | excerpts: [ 20 | { 21 | fields: {}, 22 | internal_annotations: [{ a: "#suffix" }], 23 | highlight_ranges: [], 24 | score: 0, 25 | text: "blah" 26 | } 27 | ], 28 | score: 0, 29 | title_highlight_ranges: [] 30 | } 31 | ], 32 | total_hit_count: 0, 33 | url_prefix: "" 34 | }); 35 | 36 | expect(entity.results[0].entry.url).toEqual("https://google.com#suffix"); 37 | }); 38 | 39 | test("Changing an entity's state calls render", () => { 40 | const entity = new Entity("test", "https://google.com", defaultConfig); 41 | entity.attachToDom(); 42 | entity.state = "loading"; 43 | expect(entity.domManager?.render as jest.Mock).toHaveBeenCalled(); 44 | }); 45 | 46 | test("Set download progress should render only if the entity's config shows the progress", () => { 47 | const entities = [false, true].map(showProgress => { 48 | const entity = new Entity("test", "https://google.com", { 49 | ...defaultConfig, 50 | showProgress 51 | }); 52 | entity.attachToDom(); 53 | entity.setDownloadProgress(20); 54 | return entity; 55 | }); 56 | 57 | const [e1_render_calls, e2_render_calls] = entities.map( 58 | e => (e.domManager?.render as jest.Mock).mock.calls.length 59 | ); 60 | 61 | // Entity 2's domManager has one more render call than entity 1's. 62 | expect(e2_render_calls - e1_render_calls).toEqual(1); 63 | }); 64 | 65 | test("Errored download calls render with an error", () => { 66 | const entity = new Entity("test", "https://google.com", defaultConfig); 67 | entity.attachToDom(); 68 | entity.setDownloadError(); 69 | 70 | const lastCall = (entity.domManager?.render as jest.Mock).mock.calls[1][0]; 71 | console.log(lastCall); 72 | expect(lastCall.state).toEqual("error"); 73 | expect(lastCall.message.toLowerCase()).toContain("error"); 74 | }); 75 | -------------------------------------------------------------------------------- /js/entity.ts: -------------------------------------------------------------------------------- 1 | import { Configuration } from "./config"; 2 | import { Result, SearchData, resolveSearch } from "./searchData"; 3 | import { EntityDom, RenderState } from "./entityDom"; 4 | import { wasm_register_index } from "stork-search"; 5 | import StorkError from "./storkError"; 6 | 7 | export type EntityState = "initialized" | "loading" | "ready" | "error"; 8 | 9 | export class Entity { 10 | readonly name: string; 11 | readonly url: string; 12 | readonly config: Configuration; 13 | 14 | private _state: EntityState = "initialized"; 15 | 16 | downloadProgress = 0; 17 | 18 | index: Uint8Array; 19 | results: Array = []; 20 | totalResultCount = 0; 21 | 22 | domManager: EntityDom | null; 23 | eventListenerFunctions: Record void> = {}; 24 | highlightedResult = 0; 25 | resultsVisible = false; 26 | hoverSelectEnabled = true; 27 | 28 | constructor(name: string, url: string, config: Configuration) { 29 | this.name = name; 30 | this.url = url; 31 | this.config = config; 32 | } 33 | 34 | public get state(): EntityState { 35 | return this._state; 36 | } 37 | 38 | public set state(value: EntityState) { 39 | this._state = value; 40 | this.render(); 41 | } 42 | 43 | private getCurrentMessage(): string | null { 44 | if (!this.domManager) return null; 45 | const query = this.domManager.getQuery(); 46 | if (this.state === "error") { 47 | return "Error! Check the browser console."; 48 | } else if (this.state != "ready") { 49 | return "Loading..."; 50 | } else if (query?.length < this.config.minimumQueryLength) { 51 | return "Filtering..."; 52 | } else if (this.results) { 53 | if (this.totalResultCount === 0) { 54 | return `No ${this.config.resultNoun.plural} found.`; 55 | } else if (this.totalResultCount === 1) { 56 | return `1 ${this.config.resultNoun.singular} found.`; 57 | } else { 58 | return `${this.totalResultCount} ${this.config.resultNoun.plural} found.`; 59 | } 60 | } 61 | 62 | return null; 63 | } 64 | 65 | private generateRenderConfig(): RenderState { 66 | return { 67 | results: this.results, 68 | resultsVisible: true, 69 | showScores: this.config.showScores, 70 | message: this.getCurrentMessage(), 71 | showProgress: this.config.showProgress, 72 | progress: this.downloadProgress, 73 | state: this.state 74 | }; 75 | } 76 | 77 | private render() { 78 | if (!this.domManager) return; 79 | this.domManager.render(this.generateRenderConfig()); 80 | } 81 | 82 | registerIndex(data: Uint8Array): Promise { 83 | return new Promise((resolve, reject) => { 84 | const indexInfo = JSON.parse(wasm_register_index(this.name, data)); 85 | if (indexInfo.error) { 86 | reject(new StorkError(indexInfo.error)); 87 | } else { 88 | if (this.config.printIndexInfo) { 89 | console.log(indexInfo); 90 | } 91 | 92 | this.state = "ready"; 93 | resolve(indexInfo); 94 | } 95 | }); 96 | } 97 | 98 | attachToDom(): void { 99 | this.domManager = new EntityDom(this.name, this); 100 | this.render(); 101 | } 102 | 103 | injestSearchData(data: SearchData): void { 104 | this.results = data.results; 105 | this.totalResultCount = data.total_hit_count; 106 | this.highlightedResult = 0; 107 | 108 | // Mutate the result URL, like we do when there's a url prefix or suffix 109 | const urlPrefix = data.url_prefix || ""; 110 | this.results.map(r => { 111 | let urlSuffix = ""; 112 | 113 | const firstInternalAnnotations = r.excerpts 114 | .map(e => e.internal_annotations) 115 | .filter(ia => !!ia)[0]; 116 | 117 | if (firstInternalAnnotations && firstInternalAnnotations[0]) { 118 | const annotationMap = firstInternalAnnotations[0]; 119 | if (typeof annotationMap["a"] === "string") { 120 | urlSuffix += annotationMap["a"]; 121 | } 122 | } 123 | 124 | // oof 125 | if ( 126 | r.excerpts && 127 | r.excerpts[0] && 128 | r.excerpts[0].internal_annotations && 129 | r.excerpts[0].internal_annotations[0] && 130 | r.excerpts[0].internal_annotations[0]["a"] && 131 | typeof r.excerpts[0].internal_annotations[0]["a"] === "string" 132 | ) { 133 | urlSuffix = r.excerpts[0].internal_annotations[0]["a"]; 134 | } 135 | r.entry.url = this.config.transformResultUrl(`${urlPrefix}${r.entry.url}${urlSuffix}`); 136 | }); 137 | 138 | this.render(); 139 | } 140 | 141 | private getSanitizedResults() { 142 | const results = this.results; 143 | results.map(result => { 144 | delete result.title_highlight_ranges; 145 | result.excerpts.map(excerpt => { 146 | delete excerpt.highlight_ranges; 147 | delete excerpt.internal_annotations; 148 | }); 149 | }); 150 | return results; 151 | } 152 | 153 | setDownloadProgress = (percentage: number): void => { 154 | this.state = "loading"; 155 | this.downloadProgress = percentage; 156 | if (this.config.showProgress) { 157 | this.render(); 158 | } 159 | }; 160 | 161 | setDownloadError(): void { 162 | this.state = "error"; 163 | } 164 | 165 | performSearch(query: string): void { 166 | if (this.state !== "ready") { 167 | this.render(); 168 | return; 169 | } 170 | 171 | if (query.length < this.config.minimumQueryLength) { 172 | this.results = []; 173 | this.render(); 174 | return; 175 | } 176 | 177 | try { 178 | const data = resolveSearch(this.name, query); 179 | if (!data) return; 180 | 181 | this.injestSearchData(data); 182 | 183 | if (this.config.onQueryUpdate) { 184 | this.config.onQueryUpdate(query, this.getSanitizedResults()); 185 | } 186 | } catch (error) { 187 | console.error(error); 188 | } 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /js/entityDom.test.ts: -------------------------------------------------------------------------------- 1 | import { Entity } from "./entity"; 2 | import { defaultConfig } from "./config"; 3 | import { EntityDom } from "./entityDom"; 4 | import { JSDOM } from "jsdom"; 5 | import { add } from "./dom"; 6 | 7 | import { createMockHtmlElement, MockHtmlElement } from "./__mocks__/dom"; 8 | 9 | jest.mock("./resultToListItem"); 10 | jest.mock("./wasmQueue"); 11 | jest.mock("./dom"); 12 | 13 | // @TODO: Mock resultToListItem() 14 | 15 | // eslint-disable-next-line @typescript-eslint/no-empty-function 16 | jest.mock("stork-search", () => {}, { virtual: true }); 17 | 18 | const mockInputElement = createMockHtmlElement(); 19 | mockInputElement.value = "input"; 20 | 21 | const mockOutputElement = createMockHtmlElement(); 22 | mockOutputElement.value = "input"; 23 | 24 | const dom = new JSDOM(); 25 | global.document = dom.window.document; 26 | 27 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment 28 | // @ts-ignore 29 | global.window = dom.window; 30 | global.document.querySelector = jest 31 | .fn() 32 | .mockImplementation((query: string) => { 33 | switch (query) { 34 | case `input[data-stork="test"]`: 35 | return mockInputElement; 36 | 37 | case `div[data-stork="test-output"]`: 38 | return mockOutputElement; 39 | } 40 | }); 41 | 42 | describe("entitydom", () => { 43 | let entity: Entity; 44 | let entityDom: EntityDom; 45 | 46 | beforeEach(() => { 47 | entity = new Entity("test", "https://google.com", defaultConfig); 48 | entity.attachToDom(); 49 | entityDom = entity.domManager; 50 | }); 51 | 52 | test("entityDom successfully constructed", () => { 53 | expect(entityDom).not.toBeNull(); 54 | }); 55 | 56 | test("calling render with one result + one excerpt", () => { 57 | (add as jest.MockedFunction).mockClear(); 58 | mockInputElement.value = "query"; 59 | 60 | entityDom.render({ 61 | results: [ 62 | { 63 | entry: { 64 | fields: {}, 65 | title: "result title", 66 | url: "https://jameslittle.me" 67 | }, 68 | excerpts: [ 69 | { 70 | fields: {}, 71 | highlight_ranges: [], 72 | internal_annotations: [], 73 | score: 10, 74 | text: "excerpt text" 75 | } 76 | ], 77 | score: 10, 78 | title_highlight_ranges: [] 79 | } 80 | ], 81 | resultsVisible: true, 82 | showProgress: true, 83 | showScores: true, 84 | progress: 0.5, 85 | message: "sup", 86 | state: "ready" 87 | }); 88 | 89 | // message, results list, list item, attribution, close button 90 | expect(add).toHaveBeenCalledTimes(5); 91 | 92 | const outputChildrenClassLists = mockOutputElement.children.map( 93 | e => e.classList.entries 94 | ); 95 | 96 | expect(outputChildrenClassLists.filter(a => a.length != 1).length).toBe(0); 97 | expect(outputChildrenClassLists.map(a => a[0])).toEqual([ 98 | "stork-message", 99 | "stork-results", 100 | "stork-attribution" 101 | ]); 102 | expect(mockOutputElement.children[1].children.length).toBe(1); 103 | expect(mockOutputElement.classList.add).toHaveBeenCalledWith( 104 | "stork-output-visible" 105 | ); 106 | }); 107 | 108 | test("calling render with one result + one excerpt", () => { 109 | (add as jest.MockedFunction).mockClear(); 110 | mockOutputElement.classList.add.mockClear(); 111 | mockInputElement.value = ""; 112 | 113 | entityDom.render({ 114 | results: [], 115 | resultsVisible: true, 116 | showProgress: true, 117 | showScores: true, 118 | progress: 0.5, 119 | message: "sup", 120 | state: "ready" 121 | }); 122 | 123 | // message, results list, list item, attribution, close button 124 | expect(add).toHaveBeenCalledTimes(0); 125 | 126 | expect(mockOutputElement.children.length).toEqual(0); 127 | expect(mockOutputElement.classList.add).not.toHaveBeenCalled(); 128 | }); 129 | 130 | test("calling changeHighlightedResult", () => { 131 | (add as jest.MockedFunction).mockClear(); 132 | 133 | // Just force entity.results.length to be 2 134 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment 135 | // @ts-ignore 136 | entity.results = ["a", "b"]; 137 | entityDom.render({ 138 | resultsVisible: true, 139 | showProgress: true, 140 | showScores: true, 141 | progress: 0.5, 142 | state: "ready", 143 | message: "sup", 144 | results: [ 145 | { 146 | entry: { 147 | fields: {}, 148 | title: "result title", 149 | url: "https://jameslittle.me" 150 | }, 151 | excerpts: [ 152 | { 153 | fields: {}, 154 | highlight_ranges: [], 155 | internal_annotations: [], 156 | score: 10, 157 | text: "excerpt text" 158 | } 159 | ], 160 | score: 10, 161 | title_highlight_ranges: [] 162 | }, 163 | { 164 | entry: { 165 | fields: {}, 166 | title: "result title", 167 | url: "https://jameslittle.me" 168 | }, 169 | excerpts: [ 170 | { 171 | fields: {}, 172 | highlight_ranges: [], 173 | internal_annotations: [], 174 | score: 10, 175 | text: "excerpt text" 176 | } 177 | ], 178 | score: 10, 179 | title_highlight_ranges: [] 180 | } 181 | ] 182 | }); 183 | 184 | // console.log(entityDom.elements.list.children.length); 185 | const highlightTarget = 1; 186 | entityDom.changeHighlightedResult({ 187 | to: highlightTarget, 188 | shouldScrollTo: true 189 | }); 190 | 191 | expect.assertions(entityDom.elements.list.children.length * 2); 192 | ((entityDom.elements.list as unknown) as MockHtmlElement).children.forEach( 193 | (listItem: MockHtmlElement, idx: number) => { 194 | const mockAddFunction = (listItem.classList 195 | .add as unknown) as jest.MockedFunction; 196 | 197 | const mockRemoveFunction = (listItem.classList 198 | .remove as unknown) as jest.MockedFunction; 199 | 200 | if (idx == highlightTarget) { 201 | expect(mockRemoveFunction).not.toHaveBeenCalled(); 202 | expect(mockAddFunction).toHaveBeenCalledTimes(1); 203 | } else { 204 | expect(mockRemoveFunction).toHaveBeenCalled(); 205 | expect(mockAddFunction).not.toHaveBeenCalled(); 206 | } 207 | } 208 | ); 209 | }); 210 | }); 211 | -------------------------------------------------------------------------------- /js/entityManager.test.ts: -------------------------------------------------------------------------------- 1 | import { attachToDom, register } from "./entityManager"; 2 | 3 | jest.mock("./loaders/indexLoader", () => { 4 | return { 5 | loadIndexFromUrl: jest.fn().mockImplementation((_url, { load }) => { 6 | load(); 7 | }) 8 | }; 9 | }); 10 | 11 | jest.mock("./wasmManager", () => ({ 12 | runAfterWasmLoaded: jest.fn().mockImplementation(fn => { 13 | fn(); 14 | }) 15 | })); 16 | 17 | jest.mock("./entity"); 18 | 19 | describe("entityManager", () => { 20 | test("can't insert two indexes with the same name", () => { 21 | expect.assertions(1); 22 | register("index-name", "", {}).then(() => 23 | register("index-name", "", {}).catch(e => expect(e).toBeTruthy()) 24 | ); 25 | }); 26 | 27 | test("attachToDom fails with missing index", () => { 28 | expect(() => attachToDom("doesnt-exist")).toThrow(); 29 | }); 30 | }); 31 | -------------------------------------------------------------------------------- /js/entityManager.ts: -------------------------------------------------------------------------------- 1 | import { Entity } from "./entity"; 2 | import { Configuration, calculateOverriddenConfig } from "./config"; 3 | import { loadIndexFromUrl } from "./loaders/indexLoader"; 4 | import { runAfterWasmLoaded } from "./wasmManager"; 5 | import StorkError from "./storkError"; 6 | 7 | const entities: Record = {}; 8 | 9 | const register = ( 10 | name: string, 11 | url: string, 12 | partialConfig: Partial 13 | ): Promise => { 14 | return new Promise((res, rej) => { 15 | const fullConfig = calculateOverriddenConfig(partialConfig); 16 | if (fullConfig instanceof StorkError) { 17 | rej(fullConfig); 18 | return; 19 | } 20 | 21 | if (entities[name] && !fullConfig.forceOverwrite) { 22 | rej( 23 | new StorkError( 24 | `You're registering an index named \`${name}\`, but that already exists. If this is expected, set forceOverwrite to true in your Javascript config to allow overwriting indexes.` 25 | ) 26 | ); 27 | } 28 | 29 | const entity = new Entity(name, url, fullConfig); 30 | entities[name] = entity; 31 | 32 | loadIndexFromUrl(url, { 33 | progress: percentage => { 34 | entity.setDownloadProgress(percentage); 35 | }, 36 | 37 | load: response => { 38 | runAfterWasmLoaded( 39 | () => { 40 | entity.registerIndex(new Uint8Array(response)).then(res).catch(rej); 41 | }, 42 | () => { 43 | entity.state = "error"; 44 | } 45 | ); 46 | }, 47 | 48 | error: () => { 49 | entity.setDownloadError(); 50 | rej(); 51 | } 52 | }); 53 | }); 54 | }; 55 | 56 | const attachToDom = (name: string): void => { 57 | if (!entities[name]) { 58 | throw new Error(`Index ${name} has not been registered!`); 59 | } 60 | 61 | entities[name].attachToDom(); 62 | }; 63 | 64 | const entityIsReady = (name: string): boolean => { 65 | return entities[name]?.state === "ready"; 66 | }; 67 | 68 | const debug = (): Record => ({ 69 | entities: { ...entities }, 70 | entitiesCount: entities.length 71 | }); 72 | 73 | export { register, attachToDom, entityIsReady, debug }; 74 | -------------------------------------------------------------------------------- /js/loaders/indexLoader.ts: -------------------------------------------------------------------------------- 1 | interface IndexLoaderCallbacks { 2 | load: (response: ArrayBufferLike) => void; 3 | progress: (percentage: number) => void; 4 | error: () => void; 5 | } 6 | 7 | export function loadIndexFromUrl( 8 | url: string, 9 | callbacks: IndexLoaderCallbacks 10 | ): void { 11 | const r = new XMLHttpRequest(); 12 | 13 | r.addEventListener("load", e => { 14 | const { status, response } = e.target as XMLHttpRequest; 15 | 16 | // This shouldn't happen on the `load` event, but handle it safely if it does 17 | if (status === 0) { 18 | callbacks.progress(e.loaded / e.total); 19 | return; 20 | } 21 | 22 | if (status < 200 || status > 299) { 23 | callbacks.error(); 24 | return; 25 | } 26 | 27 | callbacks.load(response); 28 | }); 29 | 30 | r.addEventListener("error", () => { 31 | callbacks.error(); 32 | }); 33 | 34 | r.addEventListener("progress", e => { 35 | callbacks.progress(e.loaded / e.total); 36 | }); 37 | 38 | r.responseType = "arraybuffer"; 39 | r.open("GET", url); 40 | r.send(); 41 | } 42 | -------------------------------------------------------------------------------- /js/main.test.ts: -------------------------------------------------------------------------------- 1 | import { register, initialize } from "./main"; 2 | jest.mock("stork-search"); 3 | 4 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 5 | let init: any; 6 | 7 | import("stork-search").then(module => { 8 | init = module.default; 9 | }); 10 | 11 | jest.mock("./entityManager", () => ({ 12 | register: jest.fn().mockResolvedValue(undefined), 13 | attachToDom: jest.fn(), 14 | entityIsReady: jest.fn().mockReturnValue(true) 15 | })); 16 | jest.mock("./entity"); 17 | 18 | describe("main tests", () => { 19 | beforeEach(() => { 20 | jest.resetModules(); 21 | // const m_init = init as jest.Mock; 22 | init.mockClear(); 23 | // eslint-disable-next-line @typescript-eslint/no-var-requires 24 | // return import("./main").then(module => { 25 | // initialize = module.initialize; 26 | // register = module.register; 27 | // }); 28 | }); 29 | 30 | it("should only initialize WASM once", async () => { 31 | // m_init.mockClear(); 32 | 33 | // Initialize with example URL 34 | initialize("https://example.com/stork.wasm"); 35 | 36 | // Call register, which if called alone would call init with 37 | // the default URL, but should not call init again because 38 | // init has already been called 39 | register("something", "./something.st"); 40 | expect(init).toHaveBeenCalledTimes(1); 41 | expect(init).toHaveBeenLastCalledWith("https://example.com/stork.wasm"); 42 | }); 43 | 44 | // it("should initialize WASM once with default URL when register is called", async () => { 45 | // const m_init = init as jest.Mock; 46 | // // m_init.mockClear(); 47 | 48 | // await register("something", "./something.st"); 49 | // expect(init).toHaveBeenCalledTimes(1); 50 | // expect(m_init.mock.calls[0][0]).toMatch(/stork-search\.net\/.*\.wasm/); 51 | // }); 52 | }); 53 | -------------------------------------------------------------------------------- /js/main.ts: -------------------------------------------------------------------------------- 1 | import { Configuration } from "./config"; 2 | import { 3 | register as registerEntity, 4 | attachToDom, 5 | entityIsReady, 6 | debug as entityDebug 7 | } from "./entityManager"; 8 | import { loadWasm, debug as wasmDebug } from "./wasmManager"; 9 | import { resolveSearch, SearchData } from "./searchData"; 10 | import StorkError from "./storkError"; 11 | import { validateIndexParams } from "./validators/indexParamValidator"; 12 | import { wasm_stork_version } from "stork-search"; 13 | 14 | function initialize(wasmOverrideUrl: string | null = null): Promise { 15 | return loadWasm(wasmOverrideUrl).then(() => { 16 | return; 17 | }); 18 | } 19 | 20 | function downloadIndex(name: string, url: string, config = {}): Promise { 21 | return new Promise((res, rej) => { 22 | const validationError = validateIndexParams(name, url); 23 | if (validationError) { 24 | rej(validationError); 25 | return; 26 | } 27 | 28 | registerEntity(name, url, config).then(res).catch(rej); 29 | }); 30 | } 31 | 32 | function attach(name: string): void { 33 | try { 34 | attachToDom(name); 35 | } catch (e) { 36 | throw new StorkError(e.message); 37 | } 38 | } 39 | 40 | function register( 41 | name: string, 42 | url: string, 43 | config: Partial = {} 44 | ): Promise { 45 | const initPromise = initialize(); 46 | const downloadPromise = downloadIndex(name, url, config); 47 | attach(name); 48 | 49 | // This silly `then` call turns a [(void), (void)] into a (void), which is 50 | // only necessary to make Typescript happy. 51 | // You begin to wonder if you write Typescript code, or if Typescript code writes you. 52 | return Promise.all([initPromise, downloadPromise]).then(); 53 | } 54 | 55 | function search(name: string, query: string): SearchData { 56 | if (!name || !query) { 57 | throw new StorkError( 58 | "Make sure to call stork.search() with two arguments: the index name and the search query." 59 | ); 60 | } 61 | 62 | if (!entityIsReady(name)) { 63 | throw new StorkError( 64 | "Couldn't find index. Make sure the stork.downloadIndex() promise has resolved before calling stork.search()." 65 | ); 66 | } 67 | 68 | return resolveSearch(name, query); 69 | } 70 | 71 | function debug(): Record { 72 | return { 73 | ...wasmDebug(), 74 | ...entityDebug(), 75 | jsStorkVersion: process.env.VERSION, 76 | wasmStorkVersion: wasm_stork_version 77 | }; 78 | } 79 | 80 | export { initialize, downloadIndex, attach, search, register, debug }; 81 | -------------------------------------------------------------------------------- /js/pencil.ts: -------------------------------------------------------------------------------- 1 | // It's like Handlebars, but smaller. 2 | 3 | import { HighlightRange } from "./searchData"; 4 | 5 | export function highlight( 6 | text: string, 7 | highlight_ranges: Array 8 | ): string { 9 | function insert(str: string, index: number, value: string) { 10 | return str.substr(0, index) + value + str.substr(index); 11 | } 12 | 13 | let charactersAlreadyAdded = 0; 14 | 15 | for (const range of highlight_ranges) { 16 | const beginningInsertion = ``; 17 | const endInsertion = ``; 18 | 19 | text = insert( 20 | text, 21 | range.beginning + charactersAlreadyAdded, 22 | beginningInsertion 23 | ); 24 | charactersAlreadyAdded += beginningInsertion.length; 25 | 26 | text = insert(text, range.end + charactersAlreadyAdded, endInsertion); 27 | charactersAlreadyAdded += endInsertion.length; 28 | } 29 | 30 | return text; 31 | } 32 | -------------------------------------------------------------------------------- /js/resultToListItem.test.ts: -------------------------------------------------------------------------------- 1 | import { resultToListItem } from "./resultToListItem"; 2 | import { Result } from "./searchData"; 3 | import toEqualDisregardingWhitespace from "./testHelpers/toEqualDisregardingWhitespace"; 4 | 5 | expect.extend({ 6 | toEqualDisregardingWhitespace: toEqualDisregardingWhitespace 7 | }); 8 | 9 | test("resultToListItem happy path", () => { 10 | const result: Result = { 11 | entry: { 12 | fields: {}, 13 | title: "Result Title", 14 | url: "https://jameslittle.me" 15 | }, 16 | excerpts: [ 17 | { fields: {}, score: 12, text: "This is the text of the excerpt." } 18 | ], 19 | score: 12, 20 | title_highlight_ranges: [] 21 | }; 22 | 23 | const node = resultToListItem(result, { selected: false, showScores: false }); 24 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment 25 | // @ts-ignore 26 | expect((node as Element).innerHTML).toEqualDisregardingWhitespace( 27 | ` 28 |

Result Title

29 |
30 |
31 |

32 | ...This is the text of the excerpt.... 33 |

34 |
35 |
36 |
` 37 | ); 38 | }); 39 | 40 | test("resultToListItem with no excerpts doesn't have container", () => { 41 | const result: Result = { 42 | entry: { 43 | fields: {}, 44 | title: "Result Title", 45 | url: "https://jameslittle.me" 46 | }, 47 | excerpts: [], 48 | score: 12, 49 | title_highlight_ranges: [] 50 | }; 51 | 52 | const node = resultToListItem(result, { selected: false, showScores: false }); 53 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment 54 | // @ts-ignore 55 | expect((node as Element).innerHTML).toEqualDisregardingWhitespace( 56 | ` 57 |

Result Title

58 |
` 59 | ); 60 | }); 61 | -------------------------------------------------------------------------------- /js/resultToListItem.ts: -------------------------------------------------------------------------------- 1 | import { highlight } from "./pencil"; 2 | import { Result } from "./searchData"; 3 | 4 | export interface ListItemDisplayOptions { 5 | selected: boolean; 6 | showScores: boolean; 7 | } 8 | 9 | export function resultToListItem( 10 | result: Result, 11 | options: ListItemDisplayOptions 12 | ): ChildNode { 13 | const template = document.createElement("template"); 14 | template.innerHTML = ` 15 |
  • 16 | 17 |
    18 |

    ${highlight( 19 | result.entry.title, 20 | result.title_highlight_ranges || [] 21 | )}

    22 | ${options.showScores ? `${result.score}` : ""} 23 |
    24 | ${ 25 | result.excerpts.length > 0 26 | ? '
    ' 27 | : "" 28 | } 29 | ${result.excerpts 30 | .map( 31 | e => `

    32 | ...${highlight(e.text, e.highlight_ranges || [])}... 33 |

    34 | ${options.showScores ? `${e.score}` : ""} 35 |
    ` 36 | ) 37 | .join("")} 38 | ${result.excerpts.length > 0 ? "
    " : ""} 39 |
    40 |
  • `; 41 | return template.content.firstElementChild as ChildNode; 42 | } 43 | -------------------------------------------------------------------------------- /js/searchData.ts: -------------------------------------------------------------------------------- 1 | import { wasm_search } from "stork-search"; 2 | import StorkError from "./storkError"; 3 | 4 | export interface HighlightRange { 5 | beginning: number; 6 | end: number; 7 | } 8 | 9 | export interface Entry { 10 | fields: Record; 11 | title: string; 12 | url: string; 13 | } 14 | 15 | export interface Excerpt { 16 | fields: Record; 17 | internal_annotations?: Array>; 18 | highlight_ranges?: Array; 19 | score: number; 20 | text: string; 21 | } 22 | 23 | export interface Result { 24 | entry: Entry; 25 | excerpts: Array; 26 | score: number; 27 | title_highlight_ranges?: Array; 28 | } 29 | 30 | export interface SearchData { 31 | results: Array; 32 | total_hit_count: number; 33 | url_prefix: string; 34 | } 35 | 36 | export function resolveSearch(name: string, query: string): SearchData { 37 | let searchOutput = null; 38 | let data = null; 39 | 40 | try { 41 | searchOutput = wasm_search(name, query); 42 | // If wasm_search returns an error, it will return a JSON blob. Look for 43 | // data.error to see if this is the case. 44 | data = JSON.parse(searchOutput); 45 | } catch (e) { 46 | // Data has come back improperly, even beyond an error in Rust-land. 47 | // analytics.log(e) 48 | throw new StorkError( 49 | "Could not parse data from wasm_search. If you see this, please file a bug: https://jil.im/storkbug " + 50 | searchOutput 51 | ); 52 | } 53 | 54 | if (!data) { 55 | throw new StorkError("Data was an empty object"); 56 | } 57 | 58 | if (data.error) { 59 | throw new StorkError(`Could not perform search: the WASM binary failed to return search results. 60 | You might not be serving your search index properly. 61 | If you think this is an error, please file a bug: https://jil.im/storkbug 62 | 63 | The WASM binary came back with: 64 | ${data.error}`); 65 | } 66 | 67 | return data; 68 | } 69 | -------------------------------------------------------------------------------- /js/storkError.ts: -------------------------------------------------------------------------------- 1 | class StorkError extends Error { 2 | constructor(message: string) { 3 | super(message); 4 | this.name = "StorkError"; 5 | } 6 | } 7 | 8 | export default StorkError; 9 | -------------------------------------------------------------------------------- /js/test-environment.js: -------------------------------------------------------------------------------- 1 | // This helper defines the TextEncoder field for jsdom. 2 | // https://stackoverflow.com/a/57713960/3841018 3 | const Environment = require("jest-environment-jsdom"); 4 | module.exports = class CustomTestEnvironment extends Environment { 5 | async setup() { 6 | await super.setup(); 7 | if (typeof this.global.TextEncoder === "undefined") { 8 | const { TextEncoder, TextDecoder } = require("util"); 9 | this.global.TextEncoder = TextEncoder; 10 | this.global.TextDecoder = TextDecoder; 11 | } 12 | } 13 | }; 14 | -------------------------------------------------------------------------------- /js/testHelpers/toEqualDisregardingWhitespace.js: -------------------------------------------------------------------------------- 1 | import { matcherHint, printReceived, printExpected } from "jest-matcher-utils"; 2 | import diff from "jest-diff"; 3 | 4 | const toEqualDisregardingWhitespace = (received, expected) => { 5 | const compressWhitespace = str => str.replace(/\s+/g, ``); 6 | 7 | const [received_compressed, expected_compressed] = [received, expected].map( 8 | compressWhitespace 9 | ); 10 | 11 | const pass = received_compressed == expected_compressed; 12 | 13 | const message = pass 14 | ? () => 15 | `${matcherHint(`.not.${name}`)}\n\n` + 16 | `Uncompressed expected value:\n` + 17 | ` ${printExpected(expected)}\n` + 18 | `Expected value with compressed whitespace to not equal:\n` + 19 | ` ${printExpected(expected_compressed)}\n` + 20 | `Uncompressed received value:\n` + 21 | ` ${printReceived(received)}\n` + 22 | `Received value with compressed whitespace:\n` + 23 | ` ${printReceived(received_compressed)}` 24 | : () => { 25 | const diffString = diff(expected_compressed, received_compressed, { 26 | expand: this.expand 27 | }); 28 | return ( 29 | `${matcherHint(`.${name}`)}\n\n` + 30 | `Uncompressed expected value:\n` + 31 | ` ${printExpected(expected)}\n` + 32 | `Expected value with compressed whitespace to equal:\n` + 33 | ` ${printExpected(expected_compressed)}\n` + 34 | `Uncompressed received value:\n` + 35 | ` ${printReceived(received)}\n` + 36 | `Received value with compressed whitespace:\n` + 37 | ` ${printReceived(received_compressed)}${ 38 | diffString ? `\n\nDifference:\n\n${diffString}` : `` 39 | }` 40 | ); 41 | }; 42 | return { 43 | actual: received, 44 | expected, 45 | message, 46 | name, 47 | pass 48 | }; 49 | }; 50 | 51 | export default toEqualDisregardingWhitespace; 52 | -------------------------------------------------------------------------------- /js/util.ts: -------------------------------------------------------------------------------- 1 | export function htmlToElement(html: string): ChildNode | null { 2 | const template = document.createElement("template"); 3 | html = html.trim(); // Never return a text node of whitespace as the result 4 | template.innerHTML = html; 5 | return template.content.firstChild; 6 | } 7 | 8 | export function difference(arr1: Array, arr2: Array): Array { 9 | const set1 = new Set(arr1); 10 | const set2 = new Set(arr2); 11 | const diff = new Set(Array.from(set1).filter(x => !set2.has(x))); 12 | return Array.from(diff); 13 | } 14 | 15 | export const plural = ( 16 | count: number, 17 | singular: string, 18 | plural: string 19 | ): string => (count == 1 ? singular : plural); 20 | -------------------------------------------------------------------------------- /js/validators/indexParamValidator.ts: -------------------------------------------------------------------------------- 1 | import StorkError from "../storkError"; 2 | 3 | export const validateIndexParams = ( 4 | name: string, 5 | url: string 6 | ): StorkError | null => { 7 | if (typeof name !== "string") { 8 | return new StorkError("Index registration name must be a string."); 9 | } 10 | 11 | if (typeof url !== "string") { 12 | return new StorkError("URL must be a string."); 13 | } 14 | 15 | return null; 16 | }; 17 | -------------------------------------------------------------------------------- /js/wasmManager.test.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-explicit-any */ 2 | let loadWasm: any, runAfterWasmLoaded: any; 3 | 4 | jest.mock("stork-search", () => ({ 5 | default: jest.fn().mockImplementation( 6 | url => 7 | new Promise(resolve => { 8 | resolve(url); 9 | }) 10 | ) 11 | })); 12 | 13 | describe("wasmManager", () => { 14 | beforeEach(() => { 15 | // eslint-disable-next-line @typescript-eslint/no-var-requires 16 | jest.resetModules(); 17 | return import("./wasmManager").then(module => { 18 | loadWasm = module.loadWasm; 19 | runAfterWasmLoaded = module.runAfterWasmLoaded; 20 | }); 21 | }); 22 | test("should load from the default URL", async () => { 23 | const wasm = await loadWasm(); 24 | expect(wasm).toEqual("https://files.stork-search.net/stork.wasm"); 25 | }); 26 | 27 | test("Should load from a non-standard URL", async () => { 28 | const wasm = await loadWasm("https://example.com/stork.wasm"); 29 | expect(wasm).toEqual("https://example.com/stork.wasm"); 30 | }); 31 | 32 | test("Should run a function immediately if the wasm is loaded", async () => { 33 | loadWasm(); 34 | const spy = jest.fn(); 35 | await runAfterWasmLoaded(spy); 36 | expect(spy).toHaveBeenCalled(); 37 | }); 38 | 39 | test("Should run a function only once the wasm is loaded", async () => { 40 | const spy = jest.fn(); 41 | runAfterWasmLoaded(spy); 42 | expect(spy).not.toHaveBeenCalled(); 43 | await loadWasm(); 44 | expect(spy).toHaveBeenCalled(); 45 | }); 46 | }); 47 | -------------------------------------------------------------------------------- /js/wasmManager.ts: -------------------------------------------------------------------------------- 1 | import init from "stork-search"; 2 | import StorkError from "./storkError"; 3 | 4 | const version = process.env.VERSION; 5 | const DEFAULT_WASM_URL = version 6 | ? `https://files.stork-search.net/releases/v${version}/stork.wasm` 7 | : `https://files.stork-search.net/stork.wasm`; 8 | 9 | let wasmSourceUrl: string | null = null; // only for debug output 10 | let wasmLoadPromise: Promise | null = null; 11 | 12 | let queue: { (): void }[] = []; 13 | let errorQueue: { (): void }[] = []; 14 | 15 | const loadWasm = ( 16 | overrideUrl: string | null = null 17 | ): Promise => { 18 | // If there's a WASM load in flight or complete, don't try to call init again 19 | if (wasmLoadPromise) { 20 | return wasmLoadPromise; 21 | } 22 | 23 | const url = overrideUrl || DEFAULT_WASM_URL; 24 | wasmSourceUrl = url; 25 | 26 | const p = init(url) 27 | .then(() => { 28 | flush(); 29 | return url; 30 | }) 31 | .catch(() => { 32 | errorFlush(); 33 | throw new StorkError(`Error while loading WASM at ${url}`); 34 | }); 35 | 36 | wasmLoadPromise = p; 37 | return p; 38 | }; 39 | 40 | /** 41 | * Caller should use this to queue up a function to be run only when the 42 | * WASM is loaded. If the WASM is already loaded when this method is called, 43 | * the function will run immediately. 44 | * 45 | * @param fn Function to be run once WASM is loaded 46 | * 47 | * @returns a promise if loadWasm has been called, or undefined if loadWasm 48 | * has not been called. If loadWasm has been called, the promise will resolve 49 | * when the WASM has been loaded and when the function has been run. 50 | */ 51 | const runAfterWasmLoaded = ( 52 | fn: () => void, 53 | err: () => void 54 | ): Promise | null => { 55 | if (!wasmLoadPromise) { 56 | queue.push(fn); 57 | errorQueue.push(err); 58 | return null; 59 | } else { 60 | // We have a wasmLoadPromise, but we don't know if it's resolved. 61 | // Let's wait for it to resolve, then run the function. 62 | wasmLoadPromise.then(() => fn()).catch(() => err()); 63 | return wasmLoadPromise; 64 | } 65 | }; 66 | 67 | const flush = () => { 68 | queue.forEach(fn => { 69 | fn(); 70 | }); 71 | queue = []; 72 | }; 73 | 74 | const errorFlush = () => { 75 | errorQueue.forEach(fn => { 76 | fn(); 77 | }); 78 | errorQueue = []; 79 | }; 80 | 81 | const debug = (): Record => ({ 82 | wasmSourceUrl, 83 | wasmLoadPromise, 84 | queueLength: queue.length 85 | }); 86 | 87 | export { runAfterWasmLoaded, loadWasm, debug }; 88 | -------------------------------------------------------------------------------- /js/wasmQueue.test.ts: -------------------------------------------------------------------------------- 1 | import WasmQueue from "./wasmQueue"; 2 | 3 | // eslint-disable-next-line @typescript-eslint/no-empty-function 4 | jest.mock("stork-search", undefined, { virtual: true }); 5 | 6 | test("WasmQueue %s %s", async () => { 7 | const successFxns = [jest.fn(), jest.fn()]; 8 | const queue = new WasmQueue() 9 | .runAfterWasmLoaded(successFxns[0]) 10 | .runAfterWasmLoaded(successFxns[1]); 11 | 12 | queue.flush(); 13 | 14 | const computed = successFxns.map( 15 | // Did the function get called exactly once? 16 | fn => fn.mock.calls.length === 1 17 | ); 18 | 19 | expect(computed).toEqual([true, true]); 20 | }); 21 | -------------------------------------------------------------------------------- /js/wasmQueue.ts: -------------------------------------------------------------------------------- 1 | export default class WasmQueue { 2 | queue: { (): void }[] = []; 3 | flushes = 0; 4 | 5 | /** 6 | * Caller should use this to queue up a function to be run only when the 7 | * WASM is loaded. If the WASM is already loaded when this method is called, 8 | * the function will run immediately. 9 | * 10 | * @param fn Function to be run once WASM is loaded 11 | */ 12 | runAfterWasmLoaded(fn: { (): void; (): void }): WasmQueue { 13 | if (this.flushes > 0) { 14 | fn(); 15 | } else { 16 | this.queue.push(fn); 17 | } 18 | 19 | return this; 20 | } 21 | 22 | /** 23 | * WASM loader should use this to signal to the queue that the WASM has been 24 | * loaded. 25 | */ 26 | public flush(): void { 27 | this.flushes++; 28 | for (const fn of this.queue) { 29 | fn(); 30 | } 31 | 32 | this.queue = []; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | _default: 2 | @just --choose 3 | 4 | _yarn: 5 | yarn install 6 | 7 | 8 | 9 | 10 | _js-test: build-wasm _yarn 11 | yarn jest --coverage 12 | 13 | _rust-test: 14 | cargo test 15 | 16 | test: _js-test _rust-test 17 | 18 | format: _yarn 19 | cargo fmt 20 | yarn prettier --write js/**/*.ts 21 | 22 | lint: _yarn format 23 | cargo check 24 | cargo clippy --fix --all-targets --all-features -- -D warnings 25 | yarn eslint js/**/*.ts 26 | 27 | clean: 28 | rm -rf dist 29 | rm -rf pkg 30 | rm -rf coverage 31 | rm -rf target 32 | 33 | super-clean: clean 34 | rm -rf node_modules 35 | rm -rf local-dev/test-corpora/3b1b/* 36 | rm -rf local-dev/test-corpora/federalist/* 37 | 38 | 39 | 40 | 41 | fetch-test-corpora: 42 | git submodule init 43 | git submodule update 44 | 45 | solo-build-federalist-index: 46 | cargo run -q --all-features -- build --input local-dev/test-configs/federalist.toml --output local-dev/test-indexes/federalist.st 47 | 48 | build-federalist-index: build-indexer-dev fetch-test-corpora solo-build-federalist-index 49 | 50 | build-all-indexes: build-federalist-index 51 | -cargo run -q --all-features -- build --input local-dev/test-configs/3b1b.toml --output local-dev/test-indexes/3b1b.st 52 | -cargo run -q --all-features -- build --input local-dev/test-configs/beepboop.toml --output local-dev/test-indexes/beepboop.st 53 | -cargo run -q --all-features -- build --input local-dev/test-configs/bowdoin-orient.toml --output local-dev/test-indexes/bowdoin-orient.st 54 | -cargo run -q --all-features -- build --input local-dev/test-configs/federalist-zero.toml --output local-dev/test-indexes/federalist-zero.st 55 | 56 | 57 | 58 | 59 | 60 | ############################################# 61 | # Production build settings 62 | 63 | build-indexer: 64 | cargo build --release --all-features 65 | 66 | build-wasm: 67 | cd stork-wasm && wasm-pack build --target web --out-name stork -- --no-default-features --features="v3" 68 | wc -c < ./stork-wasm/pkg/stork_bg.wasm 69 | 70 | build-wasm-all-features: 71 | cd stork-wasm && wasm-pack build --target web --out-name stork -- --features="v2, v3" 72 | wc -c < ./stork-wasm/pkg/stork_bg.wasm 73 | 74 | solo-build-js: 75 | yarn webpack --config webpack.prod.js 76 | 77 | build-js: build-wasm _yarn solo-build-js 78 | 79 | 80 | 81 | 82 | 83 | ############################################# 84 | # Development build settings 85 | # for the local dev site 86 | 87 | build-indexer-dev: 88 | cargo build --all-features 89 | 90 | build-wasm-dev: 91 | cd stork-wasm && wasm-pack build --target web --out-name stork --dev -- --no-default-features --features="v3" 92 | 93 | solo-build-js-dev: 94 | yarn webpack --config webpack.dev.js 95 | 96 | build-js-dev: build-wasm-dev _yarn solo-build-js-dev 97 | 98 | solo-build-dev-site: 99 | rm -rf local-dev/dist 100 | mkdir local-dev/dist 101 | cp themes/*.css local-dev/dist/ 102 | cp local-dev/index.html local-dev/dist/ 103 | cp dist/* local-dev/dist/ 104 | cp local-dev/test-indexes/*.st local-dev/dist/ 105 | @echo "You should run \`just serve-dev-site\` in another tab!" 106 | 107 | build-dev-site: build-js-dev build-all-indexes solo-build-dev-site 108 | 109 | build-dev-site-prod: build-js build-all-indexes solo-build-dev-site 110 | 111 | serve-dev-site: 112 | @echo "Open http://127.0.0.1:8025" 113 | python3 -m http.server --directory ./local-dev/dist 8025 114 | 115 | 116 | 117 | 118 | 119 | ############################################# 120 | # CI 121 | 122 | upload ref="": 123 | python3 scripts/upload_build_artifacts.py 124 | 125 | bench bench_name="": 126 | cargo criterion --package stork-lib --plotting-backend=disabled --message-format=json {{bench_name}} 127 | 128 | solo-generate-stats: 129 | python3 scripts/generate_stats.py 130 | 131 | generate-stats: build-js solo-build-federalist-index solo-generate-stats 132 | -------------------------------------------------------------------------------- /local-dev/test-configs/3b1b.toml: -------------------------------------------------------------------------------- 1 | [input] 2 | base_directory = "local-dev/test-corpora/3b1b" 3 | url_prefix = "https://www.youtube.com/watch?v=" 4 | title_boost = "Ridiculous" 5 | 6 | [[input.files]] 7 | path = "ch1.srt" 8 | url = "WUvTyaaNkzM" 9 | title = "The Essence of Calculus, Chapter 1" 10 | 11 | [[input.files]] 12 | path = "ch2.srt" 13 | url = "9vKqVkMQHKk" 14 | title = "The paradox of the derivative [2]" 15 | 16 | [[input.files]] 17 | path = "ch3.srt" 18 | url = "S0_qX4VJhMQ" 19 | title = "Derivative formulas through geometry [3]" 20 | 21 | [[input.files]] 22 | path = "ch4.srt" 23 | url = "YG15m2VwSjA" 24 | title = "Visualizing the chain rule and product rule [4]" 25 | 26 | [[input.files]] 27 | path = "ch5.srt" 28 | url = "m2MIpDrF7Es" 29 | title = "What's so special about Euler's number e? [5]" 30 | 31 | [[input.files]] 32 | path = "ch6.srt" 33 | url = "qb40J4N1fa4" 34 | title = "Implicit differentiation, what's going on here? [6]" 35 | 36 | [[input.files]] 37 | path = "ch7.srt" 38 | url = "kfF40MiS7zA" 39 | title = "Limits, L'Hopital's rule, and epsilon delta definitions [7]" 40 | 41 | [[input.files]] 42 | path = "ch8.srt" 43 | url = "rfG8ce4nNh0" 44 | title = "Integration and the fundamental theorem of calculus [8]" 45 | 46 | [[input.files]] 47 | path = "ch9.srt" 48 | url = "FnJqaIESC2s" 49 | title = "What does area have to do with slope? [9]" 50 | 51 | [[input.files]] 52 | path = "ch10.srt" 53 | url = "BLkz5LGWihw" 54 | title = "Higher order derivatives [10]" 55 | 56 | [[input.files]] 57 | path = "ch11.srt" 58 | url = "3d6DsjIBzJ4" 59 | title = "Taylor series [11]" 60 | 61 | [[input.files]] 62 | path = "ch12.srt" 63 | url = "CfW845LNObM" 64 | title = "What they won't teach you in calculus [12]" 65 | -------------------------------------------------------------------------------- /local-dev/test-configs/beepboop.toml: -------------------------------------------------------------------------------- 1 | [[input.files]] 2 | title = "Human Brain" 3 | url = "https://beepb00p.xyz/pkm-setup.html" 4 | filetype = "HTML" 5 | 6 | [output] 7 | save_nearest_html_id = true 8 | -------------------------------------------------------------------------------- /local-dev/test-configs/bowdoin-orient.toml: -------------------------------------------------------------------------------- 1 | [input] 2 | html_selector = "main > article.post > .single__content" 3 | 4 | [[input.files]] 5 | url = "https://bowdoinorient.com/2021/02/19/from-zoom-hangouts-to-soup-cans-bowdoins-crew-team-gets-creative-during-social-distancing/" 6 | title = "From Zoom hangouts to soup cans: Bowdoin’s crew team gets creative during social distancing" 7 | 8 | [[input.files]] 9 | url = "https://bowdoinorient.com/2021/02/19/pond-hockey-until-watson-opens-womens-hockey-stays-focused-on-virtual-team-bonding/" 10 | title = "Pond hockey: until Watson opens women’s hockey stays focused on virtual team bonding" 11 | 12 | [[input.files]] 13 | url = "https://bowdoinorient.com/2021/02/19/embracing-technology-and-social-media-bowdoin-athletics-masters-new-tools-for-recruiting/" 14 | title = "Embracing technology and social media, Bowdoin athletics masters new tools for recruiting" 15 | 16 | [[input.files]] 17 | url = "https://bowdoinorient.com/2021/02/19/bowdoin-health-services-manages-in-person-visits-and-testing-looks-to-vaccinate-students/" 18 | title = "Bowdoin Health Services manages in-person visits and testing, looks to vaccinate students" 19 | 20 | [[input.files]] 21 | url = "https://bowdoinorient.com/2021/02/18/students-reflect-on-their-decisions-to-take-personal-leaves-of-absence/" 22 | title = "Students reflect on their decisions to take personal leaves of absence" 23 | 24 | [[input.files]] 25 | url = "https://bowdoinorient.com/2021/02/18/college-reports-125-conduct-violations-during-fall-semester-launches-covid-conduct-dashboard/" 26 | title = "College reports 125 conduct violations during fall semester; launches COVID conduct dashboard" 27 | 28 | [[input.files]] 29 | url = "https://bowdoinorient.com/url-does-not-exist" 30 | title = "This URL does not exist." 31 | -------------------------------------------------------------------------------- /local-dev/test-configs/federalist-zero.toml: -------------------------------------------------------------------------------- 1 | [input] 2 | base_directory = "local-dev/test-corpora/federalist" 3 | url_prefix = "https://www.gutenberg.org/files/1404/1404-h/1404-h.htm#link2H_4_" 4 | files = [ 5 | { path = "federalist-1.txt", url = "0001", title = "General Introduction" }, 6 | { path = "federalist-2.txt", url = "0002", title = "Concerning Dangers from Foreign Force and Influence" }, 7 | { path = "federalist-3.txt", url = "0003", title = "Concerning Dangers from Foreign Force and Influence pt 2" }, 8 | { path = "federalist-4.txt", url = "0004", title = "Concerning Dangers from Foreign Force and Influence pt 3" }, 9 | { path = "federalist-5.txt", url = "0005", title = "Concerning Dangers from Foreign Force and Influence pt 4" }, 10 | { path = "federalist-6.txt", url = "0006", title = "Concerning Dangers from Dissensions Between the States" }, 11 | { path = "federalist-7.txt", url = "0007", title = "Concerning Dangers from Dissensions Between the States pt 2" }, 12 | { path = "federalist-8.txt", url = "0008", title = "The Consequences of Hostilities Between the States" }, 13 | { path = "federalist-9.txt", url = "0009", title = "The Union as a Safeguard Against Domestic Faction and Insurrection" }, 14 | { path = "federalist-10.txt", url = "0010", title = "The Union as a Safeguard Against Domestic Faction and Insurrection pt 2" }, 15 | { path = "federalist-11.txt", url = "0011", title = "The Utility of the Union in Respect to Commercial Relations and a Navy" }, 16 | { path = "federalist-12.txt", url = "0012", title = "The Utility of the Union In Respect to Revenue" }, 17 | { path = "federalist-13.txt", url = "0013", title = "Advantage of the Union in Respect to Economy in Government" }, 18 | { path = "federalist-14.txt", url = "0014", title = "Objections to the Proposed Constitution From Extent of Territory Answered" }, 19 | { path = "federalist-15.txt", url = "0015", title = "The Insufficiency of the Present Confederation to Preserve the Union" }, 20 | { path = "federalist-16.txt", url = "0016", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 2" }, 21 | { path = "federalist-17.txt", url = "0017", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 3" }, 22 | { path = "federalist-18.txt", url = "0018", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 4" }, 23 | { path = "federalist-19.txt", url = "0019", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 5" }, 24 | { path = "federalist-20.txt", url = "0020", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 6" } 25 | ] 26 | 27 | [output] 28 | excerpts_per_result = 0 29 | -------------------------------------------------------------------------------- /local-dev/test-configs/federalist.toml: -------------------------------------------------------------------------------- 1 | # The main Federalist index configuration file. This produces the Federalist 2 | # search index seen on the home page of stork-search.net. 3 | [input] 4 | base_directory = "local-dev/test-corpora/federalist" 5 | url_prefix = "https://www.gutenberg.org/files/1404/1404-h/1404-h.htm#link2H_4_" 6 | files = [ 7 | { path = "federalist-1.txt", url = "0001", title = "General Introduction" }, 8 | { path = "federalist-2.txt", url = "0002", title = "Concerning Dangers from Foreign Force and Influence" }, 9 | { path = "federalist-3.txt", url = "0003", title = "Concerning Dangers from Foreign Force and Influence pt 2" }, 10 | { path = "federalist-4.txt", url = "0004", title = "Concerning Dangers from Foreign Force and Influence pt 3" }, 11 | { path = "federalist-5.txt", url = "0005", title = "Concerning Dangers from Foreign Force and Influence pt 4" }, 12 | { path = "federalist-6.txt", url = "0006", title = "Concerning Dangers from Dissensions Between the States" }, 13 | { path = "federalist-7.txt", url = "0007", title = "Concerning Dangers from Dissensions Between the States pt 2" }, 14 | { path = "federalist-8.txt", url = "0008", title = "The Consequences of Hostilities Between the States" }, 15 | { path = "federalist-9.txt", url = "0009", title = "The Union as a Safeguard Against Domestic Faction and Insurrection" }, 16 | { path = "federalist-10.txt", url = "0010", title = "The Union as a Safeguard Against Domestic Faction and Insurrection pt 2" }, 17 | { path = "federalist-11.txt", url = "0011", title = "The Utility of the Union in Respect to Commercial Relations and a Navy" }, 18 | { path = "federalist-12.txt", url = "0012", title = "The Utility of the Union In Respect to Revenue" }, 19 | { path = "federalist-13.txt", url = "0013", title = "Advantage of the Union in Respect to Economy in Government" }, 20 | { path = "federalist-14.txt", url = "0014", title = "Objections to the Proposed Constitution From Extent of Territory Answered" }, 21 | { path = "federalist-15.txt", url = "0015", title = "The Insufficiency of the Present Confederation to Preserve the Union" }, 22 | { path = "federalist-16.txt", url = "0016", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 2" }, 23 | { path = "federalist-17.txt", url = "0017", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 3" }, 24 | { path = "federalist-18.txt", url = "0018", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 4" }, 25 | { path = "federalist-19.txt", url = "0019", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 5" }, 26 | { path = "federalist-20.txt", url = "0020", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 6" } 27 | ] 28 | 29 | [output] 30 | debug = false 31 | -------------------------------------------------------------------------------- /local-dev/test-indexes/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jameslittle230/stork/efa98dad15b52bd6da9c9e87d612f0913431a95e/local-dev/test-indexes/.gitkeep -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "stork", 3 | "version": "1.6.0", 4 | "description": "Impossibly fast web search, made for static sites.", 5 | "main": "index.js", 6 | "repository": { 7 | "type": "git", 8 | "url": "git+https://github.com/jameslittle230/stork.git" 9 | }, 10 | "keywords": [ 11 | "search", 12 | "wasm", 13 | "rust" 14 | ], 15 | "author": "James Little (https://jameslittle.me)", 16 | "license": "Apache-2.0", 17 | "bugs": { 18 | "url": "https://github.com/jameslittle230/stork/issues" 19 | }, 20 | "homepage": "https://stork-search.net", 21 | "devDependencies": { 22 | "@open-wc/webpack-import-meta-loader": "^0.4.1", 23 | "@types/jest": "^25.2.3", 24 | "@types/jsdom": "^16.2.3", 25 | "@typescript-eslint/eslint-plugin": "5.23.0", 26 | "@typescript-eslint/parser": "5.23.0", 27 | "@wasm-tool/wasm-pack-plugin": "1.6.0", 28 | "clean-webpack-plugin": "^4.0.0", 29 | "copy-webpack-plugin": "10.2.4", 30 | "eslint": "8.15.0", 31 | "eslint-config-airbnb-base": "15.0.0", 32 | "eslint-config-prettier": "8.5.0", 33 | "eslint-loader": "4.0.2", 34 | "eslint-plugin-import": "2.26.0", 35 | "eslint-plugin-prettier": "4.0.0", 36 | "jest": "^26.0.1", 37 | "jsdom": "^16.5.0", 38 | "prettier": "^2.6.2", 39 | "source-map-loader": "3.0.1", 40 | "ts-jest": "^26.0.0", 41 | "ts-loader": "9.3.0", 42 | "typescript": "4.6.4", 43 | "webpack": "^5.72.0", 44 | "webpack-cli": "^4.9.1", 45 | "webpack-merge": "^5.8.0" 46 | }, 47 | "dependencies": { 48 | "stork-search": "./stork-wasm/pkg" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /scripts/compare_stats.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | 5 | with open(sys.argv[1]) as baseline_f, open(sys.argv[2]) as contender_f: 6 | baseline_d = json.load(baseline_f) 7 | contender_d = json.load(contender_f) 8 | 9 | def generate_stats_dict(d1, d2): 10 | s_keys_d1 = set(d1.keys()) 11 | s_keys_d2 = set(d2.keys()) 12 | all_keys = list(s_keys_d1.union(s_keys_d2)) 13 | out = dict() 14 | 15 | for key in all_keys: 16 | if key in d1 and key in d2: 17 | out[key] = { 18 | "baseline": round(d1[key], 4), 19 | "contender": round(d2[key], 4), 20 | "multiplier": round(d2[key] / d1[key], 2) 21 | } 22 | elif key not in d1: 23 | out[key] = { 24 | "baseline": 0, 25 | "contender": round(d2[key], 4), 26 | "multiplier": 1 27 | } 28 | elif key not in d2: 29 | out[key] = { 30 | "baseline": round(d1[key], 4), 31 | "contender": 0, 32 | "multiplier": 1 33 | } 34 | 35 | return out 36 | 37 | stats = generate_stats_dict(baseline_d, contender_d) 38 | 39 | output = "" 40 | 41 | for key in sorted(list(stats.keys())): 42 | icon = "" 43 | if stats[key]["multiplier"] > 1.25: 44 | icon = "⚠️" 45 | if stats[key]["multiplier"] < 1.0: 46 | icon = "🎉" 47 | output += f""" 48 | 49 | 50 | """.replace(" ", "").replace("\n", "") 51 | 52 | output += "
    BenchmarkBaselineContenderComparison
    {key}{stats[key]["baseline"]}{stats[key]["contender"]}{stats[key]["multiplier"]}× {icon}
    " 53 | 54 | print(output) 55 | 56 | -------------------------------------------------------------------------------- /scripts/generate_stats.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import subprocess 5 | 6 | # REQUIREMENTS: 7 | # Run `just build-js` and `just solo-build-federalist-index` 8 | # before running this script. 9 | 10 | # Step 1: get file sizes for various distributed files 11 | files = [ 12 | './dist/stork.js', 13 | './dist/stork.wasm', 14 | './local-dev/test-indexes/federalist.st' 15 | ] 16 | 17 | sizes = dict([(file.split('/')[-1], float(os.path.getsize(file))/1000) 18 | for file in files]) 19 | 20 | # Step 2: Run benchmarks and get mean runtime for each 21 | benchmarks = [ 22 | "build/federalist", 23 | "search/federalist/liberty" 24 | ] 25 | 26 | for bench_name in benchmarks: 27 | print(f"Running benchmark for {bench_name}", file=sys.stderr) 28 | run_bench_cmd = subprocess.run( 29 | ["just", "bench", bench_name], 30 | stdout=subprocess.PIPE, 31 | text=True 32 | ) 33 | 34 | success_line = next((line for line in run_bench_cmd.stdout.splitlines() if "benchmark-complete" in line)) 35 | 36 | success_line_dict = json.loads(success_line) 37 | 38 | bench_time_ms = round(float(success_line_dict['mean']['estimate']) / 1_000_000, 4) 39 | 40 | sizes.update({ 41 | bench_name: bench_time_ms 42 | }) 43 | 44 | # Step 3: Print out results 45 | print(json.dumps(sizes, indent=2)) 46 | -------------------------------------------------------------------------------- /scripts/upload_build_artifacts.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import boto3 4 | import time 5 | 6 | # REQUIREMENTS: 7 | # This script should _only_ be run as part of the create-release-after-tag 8 | # Github action. It requires build artifacts to be in certain locations 9 | # (see comments below) that only this Github action can properly set up. 10 | 11 | 12 | def uploadFile(localPath, remotePath, extraArgs={}): 13 | print(f"Called uploadFile: {localPath} → {remotePath}") 14 | 15 | s3 = boto3.resource('s3') 16 | s3.Bucket("files.stork-search.net").upload_file(localPath, 17 | remotePath, ExtraArgs=extraArgs) 18 | 19 | 20 | def invalidate(): 21 | cloudfront = boto3.client("cloudfront") 22 | cloudfront.create_invalidation( 23 | DistributionId="E3PBNOZP9XRSWN", 24 | InvalidationBatch={ 25 | 'Paths': { 26 | 'Quantity': 1, 27 | 'Items': [ 28 | '/*', 29 | ] 30 | }, 31 | 'CallerReference': f"{time.time()}" 32 | } 33 | ) 34 | 35 | 36 | if __name__ == "__main__": 37 | 38 | opj = os.path.join 39 | 40 | if "GITHUB_ACTIONS" not in os.environ or os.environ['GITHUB_ACTIONS'] is False: 41 | print("WARNING: Environment variable `GITHUB_ACTIONS` not present.\nYou likely are misusing this script -- This script should _only_ be run\nas part of the create-release-after-tag Github action. Exiting.") 42 | exit(1) 43 | 44 | if not os.path.exists(opj(os.getcwd(), ".stork-project-root")): 45 | print( 46 | f"Current working directory {os.getcwd()} doesn't look to be the Stork project root.\nRun this as `just upload` or run it from the Stork root directory. Exiting.") 47 | exit(1) 48 | 49 | projroot = os.getcwd() 50 | ref = sys.argv[1] # Script takes one command line argument. 51 | ref = ref.split("refs/tags/")[-1] # Strip off the "refs/tags/" part. Get the last element of the list, so if you call it without "refs/tags/" it will still work. 52 | 53 | if not ref or len(ref) < 1: 54 | print("No argument passed to this script. You must pass an argument which will become the directory to which files are uploaded on the CDN.") 55 | print("Usage: python3 scripts/upload_federalist.py \"v1.2.5\"") 56 | exit(1) 57 | 58 | if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ: 59 | print("Error: Environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` must be set in order to upload to AWS S3.") 60 | exit(1) 61 | 62 | # Script expects that these files will all be present in 63 | # the ./web-artifacts directory in the project root. 64 | web_artifacts = [ 65 | {"filename": "stork.js", "contentType": "text/javascript"}, 66 | {"filename": "stork.wasm", "contentType": "application/wasm"}, 67 | {"filename": "stork.js.map", "contentType": "binary/octet-stream"}, 68 | {"filename": "basic.css", "contentType": "text/css"}, 69 | {"filename": "dark.css", "contentType": "text/css"}, 70 | {"filename": "flat.css", "contentType": "text/css"}, 71 | {"filename": "edible.css", "contentType": "text/css"}, 72 | {"filename": "edible-dark.css", "contentType": "text/css"}, 73 | ] 74 | 75 | # Script expects that for each file below, a corresponding file 76 | # will exist at ./{binary}/stork in the project root. 77 | binaries = [ 78 | "stork-macos-10-15", 79 | "stork-ubuntu-20-04", 80 | ] 81 | 82 | # Script expects that these files will exist in the project root. 83 | other_files = [ 84 | "federalist.st", 85 | ] 86 | 87 | print(f"Uploading {len(web_artifacts) + len(binaries) + len(other_files)} files to files.stork-search.net/releases/{ref} ...") 88 | 89 | for file in web_artifacts: 90 | 91 | for destination_path in [ 92 | opj("releases", ref, file["filename"]), 93 | opj("releases", "latest", file["filename"]), 94 | ]: 95 | source_path = opj(projroot, "web-artifacts", file["filename"]) 96 | 97 | uploadFile(source_path, destination_path, { 98 | 'ContentType': file["contentType"]}) 99 | 100 | for binary in binaries: 101 | for destination_path in [ 102 | opj("releases", ref, binary), 103 | opj("releases", "latest", binary), 104 | ]: 105 | source_path = opj(projroot, binary, "stork") 106 | uploadFile(source_path, destination_path) 107 | 108 | for file in other_files: 109 | for destination_path in [ 110 | opj("releases", ref, file), 111 | opj("releases", "latest", file), 112 | ]: 113 | source_path = opj(projroot, file) 114 | uploadFile(source_path, destination_path) 115 | 116 | invalidate() 117 | print("Cache invalidated.") 118 | -------------------------------------------------------------------------------- /stork-boundary/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "ansi_term" 7 | version = "0.12.1" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" 10 | dependencies = [ 11 | "winapi", 12 | ] 13 | 14 | [[package]] 15 | name = "bytes" 16 | version = "1.1.0" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" 19 | 20 | [[package]] 21 | name = "ctor" 22 | version = "0.1.21" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "ccc0a48a9b826acdf4028595adc9db92caea352f7af011a3034acd172a52a0aa" 25 | dependencies = [ 26 | "quote", 27 | "syn", 28 | ] 29 | 30 | [[package]] 31 | name = "diff" 32 | version = "0.1.12" 33 | source = "registry+https://github.com/rust-lang/crates.io-index" 34 | checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499" 35 | 36 | [[package]] 37 | name = "hex-literal" 38 | version = "0.3.4" 39 | source = "registry+https://github.com/rust-lang/crates.io-index" 40 | checksum = "7ebdb29d2ea9ed0083cd8cece49bbd968021bd99b0849edb4a9a7ee0fdf6a4e0" 41 | 42 | [[package]] 43 | name = "output_vt100" 44 | version = "0.1.2" 45 | source = "registry+https://github.com/rust-lang/crates.io-index" 46 | checksum = "53cdc5b785b7a58c5aad8216b3dfa114df64b0b06ae6e1501cef91df2fbdf8f9" 47 | dependencies = [ 48 | "winapi", 49 | ] 50 | 51 | [[package]] 52 | name = "pretty_assertions" 53 | version = "1.0.0" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | checksum = "ec0cfe1b2403f172ba0f234e500906ee0a3e493fb81092dac23ebefe129301cc" 56 | dependencies = [ 57 | "ansi_term", 58 | "ctor", 59 | "diff", 60 | "output_vt100", 61 | ] 62 | 63 | [[package]] 64 | name = "proc-macro2" 65 | version = "1.0.32" 66 | source = "registry+https://github.com/rust-lang/crates.io-index" 67 | checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43" 68 | dependencies = [ 69 | "unicode-xid", 70 | ] 71 | 72 | [[package]] 73 | name = "quote" 74 | version = "1.0.10" 75 | source = "registry+https://github.com/rust-lang/crates.io-index" 76 | checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" 77 | dependencies = [ 78 | "proc-macro2", 79 | ] 80 | 81 | [[package]] 82 | name = "serde" 83 | version = "1.0.130" 84 | source = "registry+https://github.com/rust-lang/crates.io-index" 85 | checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" 86 | dependencies = [ 87 | "serde_derive", 88 | ] 89 | 90 | [[package]] 91 | name = "serde_derive" 92 | version = "1.0.130" 93 | source = "registry+https://github.com/rust-lang/crates.io-index" 94 | checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" 95 | dependencies = [ 96 | "proc-macro2", 97 | "quote", 98 | "syn", 99 | ] 100 | 101 | [[package]] 102 | name = "stork-boundary" 103 | version = "0.1.0" 104 | dependencies = [ 105 | "bytes", 106 | "hex-literal", 107 | "pretty_assertions", 108 | "serde", 109 | "stork-shared", 110 | "thiserror", 111 | ] 112 | 113 | [[package]] 114 | name = "stork-shared" 115 | version = "0.1.0" 116 | 117 | [[package]] 118 | name = "syn" 119 | version = "1.0.82" 120 | source = "registry+https://github.com/rust-lang/crates.io-index" 121 | checksum = "8daf5dd0bb60cbd4137b1b587d2fc0ae729bc07cf01cd70b36a1ed5ade3b9d59" 122 | dependencies = [ 123 | "proc-macro2", 124 | "quote", 125 | "unicode-xid", 126 | ] 127 | 128 | [[package]] 129 | name = "thiserror" 130 | version = "1.0.30" 131 | source = "registry+https://github.com/rust-lang/crates.io-index" 132 | checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" 133 | dependencies = [ 134 | "thiserror-impl", 135 | ] 136 | 137 | [[package]] 138 | name = "thiserror-impl" 139 | version = "1.0.30" 140 | source = "registry+https://github.com/rust-lang/crates.io-index" 141 | checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" 142 | dependencies = [ 143 | "proc-macro2", 144 | "quote", 145 | "syn", 146 | ] 147 | 148 | [[package]] 149 | name = "unicode-xid" 150 | version = "0.2.2" 151 | source = "registry+https://github.com/rust-lang/crates.io-index" 152 | checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" 153 | 154 | [[package]] 155 | name = "winapi" 156 | version = "0.3.9" 157 | source = "registry+https://github.com/rust-lang/crates.io-index" 158 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 159 | dependencies = [ 160 | "winapi-i686-pc-windows-gnu", 161 | "winapi-x86_64-pc-windows-gnu", 162 | ] 163 | 164 | [[package]] 165 | name = "winapi-i686-pc-windows-gnu" 166 | version = "0.4.0" 167 | source = "registry+https://github.com/rust-lang/crates.io-index" 168 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 169 | 170 | [[package]] 171 | name = "winapi-x86_64-pc-windows-gnu" 172 | version = "0.4.0" 173 | source = "registry+https://github.com/rust-lang/crates.io-index" 174 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 175 | -------------------------------------------------------------------------------- /stork-boundary/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "stork-boundary" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | bytes = "1.1.0" 8 | serde = { version = "1.0.130", features = ["derive"] } 9 | stork-shared = { path = "../stork-shared" } 10 | thiserror = "1.0.29" 11 | 12 | [dev-dependencies] 13 | hex-literal = "0.3.3" 14 | pretty_assertions = "1.0.0" 15 | -------------------------------------------------------------------------------- /stork-boundary/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod input; 2 | mod output; 3 | 4 | pub use input::{IndexVersioningError, VersionedIndex}; 5 | pub use output::{ 6 | Entry, Excerpt, HighlightRange, IndexMetadata, InternalWordAnnotation, Output, Result, 7 | }; 8 | -------------------------------------------------------------------------------- /stork-cli/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "stork-search" 3 | version = "1.6.0" 4 | authors = ["James Little "] 5 | edition = "2021" 6 | documentation = "https://stork-search.net/docs" 7 | homepage = "https://stork-search.net" 8 | description = "Impossibly fast web search, made for static sites." 9 | repository = "https://github.com/jameslittle230/stork" 10 | license = "Apache-2.0" 11 | keywords = ["wasm", "webassembly", "search", "javascript"] 12 | categories = ["wasm"] 13 | 14 | [features] 15 | default = ["v1-compat", "search-v2", "search-v3", "build-v3-web-scraping"] 16 | test-server = ["hyper", "tokio"] 17 | v1-compat = [] 18 | search-v2 = ["stork-lib/search-v2"] 19 | search-v3 = ["stork-lib/search-v3"] 20 | build-v3 = ["search-v3", "stork-lib/build-v3"] 21 | build-v3-web-scraping = ["build-v3", "stork-lib/build-v3-web-scraping"] 22 | 23 | [dependencies] 24 | atty = "0.2.14" 25 | bytes = "1.1.0" 26 | clap = { version = "2.33.3", features = ["color"] } 27 | colored = "2.0.0" 28 | hyper = { version = "0.14.17", optional = true, features = ["server"] } 29 | num-format = "0.4.0" 30 | serde = "1.0.130" 31 | serde_json = "1.0.68" 32 | stork-lib = { path = "../stork-lib", version = "1.6.0", default-features = false } 33 | textwrap = { version = "0.14.2", features = ["terminal_size"] } 34 | thiserror = "1.0.29" 35 | tokio = { version = "1.18.4", optional = true, features = ["signal"] } 36 | 37 | [dev-dependencies] 38 | pretty_assertions = "1.0.0" 39 | 40 | [[bin]] 41 | name = "stork" 42 | path = "src/main.rs" 43 | -------------------------------------------------------------------------------- /stork-cli/src/display_timings.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt, time::Duration}; 2 | 3 | pub struct TimingStatistic { 4 | pub duration: Duration, 5 | pub description: String, 6 | } 7 | 8 | impl fmt::Display for TimingStatistic { 9 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 10 | write!( 11 | f, 12 | "{:.3?}s {}", 13 | self.duration.as_secs_f32(), 14 | self.description 15 | ) 16 | } 17 | } 18 | 19 | #[macro_export] 20 | macro_rules! display_timings { 21 | ($( $t: expr),*) => { 22 | vec![ 23 | $( 24 | $crate::display_timings::TimingStatistic { 25 | duration: $t.0, 26 | description: $t.1.to_string() 27 | }, 28 | )* 29 | ] 30 | .iter() 31 | .map(|ts| format!("{}", ts.to_string())) 32 | .collect::>() 33 | .join("\n") 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /stork-cli/src/errors.rs: -------------------------------------------------------------------------------- 1 | use std::{io, num::ParseIntError}; 2 | use stork_lib::{BuildError, ConfigReadError, IndexParseError, SearchError}; 3 | use thiserror::Error; 4 | 5 | #[allow(dead_code)] 6 | #[derive(Debug, Error)] 7 | pub enum StorkCommandLineError { 8 | #[error("Couldn't read the configuration file: {0}")] 9 | ConfigReadError(#[from] ConfigReadError), 10 | 11 | #[error("Couldn't read file `{0}`. Got error `{1}`")] 12 | FileReadError(String, io::Error), 13 | 14 | #[error("Couldn't create file `{0}`. Got error `{1}`")] 15 | FileCreateError(String, io::Error), 16 | 17 | #[error("Couldn't write to output stream. Got error `{0}`")] 18 | WriteError(io::Error), 19 | 20 | #[error("Stork doesn't support interactive stdin! Pipe in a stream instead.")] 21 | InteractiveStdinNotAllowed, 22 | 23 | #[error("{0}")] 24 | IndexParseError(#[from] IndexParseError), 25 | 26 | #[error("{0}")] 27 | BuildError(#[from] BuildError), 28 | 29 | #[error("Invalid port `{0}`. Got error `{1}`")] 30 | InvalidPort(String, ParseIntError), 31 | 32 | #[error("Couldn't start web server.")] 33 | ServerError, 34 | 35 | #[error("{0}")] 36 | SearchError(#[from] SearchError), 37 | 38 | #[error("Couldn't display search results as JSON. Got error `{0}`")] 39 | SearchResultJsonSerializationError(#[from] serde_json::Error), 40 | 41 | #[error("{0}")] 42 | InvalidCommandLineArguments(&'static str), 43 | 44 | #[error("{0}")] 45 | NotCompiledWithFeature(&'static str), 46 | } 47 | -------------------------------------------------------------------------------- /stork-cli/src/io.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fs::File, 3 | io::{stdout, BufWriter, Read, Write}, 4 | }; 5 | 6 | use bytes::Bytes; 7 | 8 | use crate::errors::StorkCommandLineError; 9 | 10 | pub fn read_stdin_bytes() -> Option { 11 | use atty::Stream; 12 | use std::io; 13 | 14 | if atty::isnt(Stream::Stdin) { 15 | let mut stdin_buffer = Vec::::new(); 16 | let _read_result = io::stdin().read_to_end(&mut stdin_buffer); 17 | return Some(Bytes::from(stdin_buffer)); 18 | } 19 | 20 | None 21 | } 22 | 23 | pub fn read_bytes_from_path(path: &str) -> Result { 24 | if path == "-" { 25 | return match read_stdin_bytes() { 26 | Some(stdin) => Ok(stdin), 27 | None => Err(StorkCommandLineError::InteractiveStdinNotAllowed), 28 | }; 29 | } 30 | 31 | // TODO: Handle path == "" case 32 | let pathbuf = std::path::PathBuf::from(path); 33 | std::fs::read(pathbuf) 34 | .map(Bytes::from) 35 | .map_err(|e| StorkCommandLineError::FileReadError(path.to_string(), e)) 36 | } 37 | 38 | pub fn read_stdin() -> Option { 39 | read_stdin_bytes().and_then(|bytes| String::from_utf8(Vec::from(bytes.as_ref())).ok()) 40 | } 41 | 42 | pub fn read_from_path(path: &str) -> Result { 43 | match (path, read_stdin()) { 44 | ("-", Some(stdin)) => Ok(stdin), 45 | ("-", None) => Err(StorkCommandLineError::InteractiveStdinNotAllowed), 46 | // handle ("", Some) or ("", None), perhaps 47 | _ => { 48 | let pathbuf = std::path::PathBuf::from(path); 49 | std::fs::read_to_string(pathbuf) 50 | .map_err(|e| StorkCommandLineError::FileReadError(path.to_string(), e)) 51 | } 52 | } 53 | } 54 | 55 | pub fn write_bytes(path: &str, bytes: &Bytes) -> Result { 56 | let mut writer: Box = if path == "-" { 57 | Box::new(stdout()) 58 | } else { 59 | let file = File::create(path) 60 | .map_err(|e| StorkCommandLineError::FileCreateError(path.to_string(), e))?; 61 | Box::new(BufWriter::new(file)) 62 | }; 63 | 64 | writer 65 | .write(bytes.as_ref()) 66 | .map_err(StorkCommandLineError::WriteError) 67 | } 68 | -------------------------------------------------------------------------------- /stork-cli/src/pretty_print_search_results.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::min; 2 | 3 | use colored::Colorize; 4 | use stork_lib::{HighlightRange, Output}; 5 | use textwrap::termwidth; 6 | 7 | fn highlight_string(string: &str, ranges: &Vec) -> String { 8 | let mut highlighted = String::new(); 9 | 10 | let mut last_end = 0; 11 | for range in ranges { 12 | highlighted.push_str(&string[last_end..range.beginning]); 13 | highlighted.push_str(&string[range.beginning..range.end].yellow()); 14 | last_end = range.end; 15 | } 16 | highlighted.push_str(&string[last_end..]); 17 | highlighted 18 | } 19 | 20 | pub fn pretty_print_search_results(results: &Output) -> String { 21 | let mut output = String::new(); 22 | 23 | let textwrap_options = textwrap::Options::new(min(120, termwidth())) 24 | .initial_indent(" - ") 25 | .subsequent_indent(" "); 26 | 27 | results.results.iter().for_each(|result| { 28 | output.push_str(&format!( 29 | "{}\n<{}{}>", 30 | result.entry.title.bold().green(), 31 | results.url_prefix, 32 | result.entry.url 33 | )); 34 | result.excerpts.iter().for_each(|excerpt| { 35 | output.push_str(&format!( 36 | "\n{}", 37 | textwrap::fill( 38 | &highlight_string(&excerpt.text, &excerpt.highlight_ranges), 39 | &textwrap_options 40 | ) 41 | )); 42 | }); 43 | output.push_str("\n\n"); 44 | }); 45 | 46 | output.push_str(&format!( 47 | "{} total results available", 48 | results.total_hit_count 49 | )); 50 | 51 | output 52 | } 53 | 54 | #[cfg(test)] 55 | mod tests { 56 | use std::collections::HashMap; 57 | 58 | use super::*; 59 | use pretty_assertions::assert_eq; 60 | 61 | #[test] 62 | fn display_pretty_search_results_given_output() { 63 | let results = Output { 64 | results: vec![stork_lib::Result { 65 | entry: stork_lib::Entry { 66 | title: "Some Document Title".to_string(), 67 | url: "https://example.com".to_string(), 68 | fields: HashMap::new(), 69 | }, 70 | score: 25, 71 | excerpts: vec![stork_lib::Excerpt { 72 | text: "This is the excerpt of the text".to_string(), 73 | highlight_ranges: vec![stork_lib::HighlightRange { 74 | beginning: 0, 75 | end: 1, 76 | }], 77 | internal_annotations: vec![stork_lib::InternalWordAnnotation::UrlSuffix( 78 | "#25".to_string(), 79 | )], 80 | fields: HashMap::new(), 81 | score: 12, 82 | }], 83 | title_highlight_ranges: vec![stork_lib::HighlightRange { 84 | beginning: 0, 85 | end: 5, 86 | }], 87 | }], 88 | total_hit_count: 21, 89 | url_prefix: String::new(), 90 | }; 91 | 92 | assert_eq!( 93 | pretty_print_search_results(&results), 94 | format!( 95 | "{}{}{}{}", 96 | "Some Document Title".bold().green(), 97 | "\n\n - ", 98 | "T".yellow(), 99 | "his is the excerpt of the text\n\n21 total results available".normal() 100 | ) 101 | ); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /stork-cli/src/test_server/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Stork Search 6 | 10 | 14 | 42 | 43 | 44 |

    Stork Test Page

    45 |

    46 | Found a bug? 47 | Report it! → 50 |

    51 |
    52 | 53 |
    54 |
    55 | 56 | 57 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /stork-cli/src/test_server/mod.rs: -------------------------------------------------------------------------------- 1 | use bytes::Bytes; 2 | use hyper::server::Server; 3 | use hyper::service::{make_service_fn, service_fn}; 4 | use hyper::{Body, Request, Response, StatusCode}; 5 | use std::convert::Infallible; 6 | use tokio::runtime::Runtime; 7 | 8 | pub fn serve(index: &Bytes, port: u16) -> Result<(), Box> { 9 | let rt = Runtime::new()?; 10 | let index_bytes = index.clone(); 11 | 12 | rt.block_on(async { 13 | // For every connection, we must make a `Service` to handle all 14 | // incoming HTTP requests on said connection. 15 | let make_svc = make_service_fn(|_conn| { 16 | // This is the `Service` that will handle the connection. 17 | // `service_fn` is a helper to convert a function that 18 | // returns a Response into a `Service`. 19 | let bytes = index_bytes.clone(); 20 | async move { 21 | Ok::<_, Infallible>(service_fn(move |request: Request| { 22 | let bytes_2 = bytes.clone(); 23 | async move { 24 | Ok::<_, Infallible>(match request.uri().to_string().as_str() { 25 | "/" => { 26 | let index_html = format!(include_str!("index.html"), env!("CARGO_PKG_VERSION")); 27 | Response::new(Body::from(index_html)) 28 | } 29 | 30 | "/test.st" => Response::new(Body::from(bytes_2)), 31 | 32 | _ => Response::builder() 33 | .status(StatusCode::NOT_FOUND) 34 | .body(Body::from("404: Not found.")) 35 | .unwrap(), 36 | }) 37 | } 38 | })) 39 | } 40 | }); 41 | 42 | let addr = ([127, 0, 0, 1], port).into(); 43 | let server = Server::bind(&addr).serve(make_svc); 44 | let graceful = server.with_graceful_shutdown(shutdown_signal()); 45 | 46 | println!("Open in your web browser to visit the test page.\nPress ctrl-C to stop the server."); 47 | 48 | if let Err(e) = graceful.await { 49 | eprintln!("server error: {e}"); 50 | } 51 | Ok(()) 52 | }) 53 | } 54 | 55 | #[cfg(feature = "test-server")] 56 | async fn shutdown_signal() { 57 | // Wait for the CTRL+C signal 58 | tokio::signal::ctrl_c() 59 | .await 60 | .expect("failed to install CTRL+C signal handler"); 61 | } 62 | -------------------------------------------------------------------------------- /stork-lib/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "stork-lib" 3 | version = "1.6.0" 4 | authors = ["James Little "] 5 | edition = "2021" 6 | documentation = "https://stork-search.net/docs" 7 | homepage = "https://stork-search.net" 8 | description = "Impossibly fast web search, made for static sites." 9 | repository = "https://github.com/jameslittle230/stork" 10 | license = "Apache-2.0" 11 | keywords = ["wasm", "webassembly", "search", "javascript"] 12 | categories = ["wasm"] 13 | exclude = ['/dist/', 'node_modules/', '/test/', '/test-assets/'] 14 | 15 | [features] 16 | default = ["build-v3-web-scraping"] 17 | search-v2 = ["bincode"] 18 | search-v3 = ["rmp-serde"] 19 | build-v3 = [ 20 | "search-v3", 21 | "num-format", 22 | "pulldown-cmark", 23 | "mime", 24 | "srtparse", 25 | "kuchiki", 26 | "frontmatter", 27 | "indicatif", 28 | ] 29 | build-v3-web-scraping = ["build-v3", "reqwest"] 30 | 31 | [dependencies] 32 | bytes = "1.1.0" 33 | thiserror = "1.0.29" 34 | rust-stemmers = "1.2.0" 35 | colored = { version = "2.0.0", optional = true } # colored console output 36 | console_error_panic_hook = { version = "0.1.6", optional = true } 37 | num-format = { version = "0.4.0", optional = true } 38 | serde = { version = "1.0", features = ["derive"] } 39 | smart-default = "0.6.0" 40 | serde_json = "1.0.72" 41 | toml = "0.5.8" 42 | frontmatter = { version = "0.4.0", optional = true } 43 | indicatif = { version = "0.16.2", optional = true } 44 | kuchiki = { version = "0.8.1", optional = true } 45 | pulldown-cmark = { version = "0.9.1", optional = true } 46 | mime = { version = "0.3.16", optional = true } 47 | reqwest = { version = "0.11", features = ["blocking", "json"], optional = true } 48 | rmp-serde = { version = "0.15.5", optional = true } 49 | srtparse = { version = "0.2.0", optional = true } 50 | unicode-segmentation = "1.8.0" 51 | bincode = { version = "1.3.3", optional = true } 52 | lazy_static = "1.4.0" 53 | 54 | [dev-dependencies] 55 | criterion = "0.3" 56 | hex-literal = "0.3.4" 57 | pretty_assertions = "1.0.0" 58 | 59 | [[bench]] 60 | name = "basic" 61 | harness = false 62 | -------------------------------------------------------------------------------- /stork-lib/benches/basic.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Criterion}; 2 | use std::{convert::TryFrom, path::PathBuf, process::exit, time::Duration}; 3 | use stork_lib::Config; 4 | 5 | fn config_from_path(path: &str) -> Config { 6 | if !std::path::Path::join(&std::env::current_dir().unwrap(), ".stork-project-root").exists() { 7 | println!("To successfully run this benchmark, the working directory must be the Stork project root.\nIt looks like the working directory is {:?}.\nRunning `just bench` will do this automatically.", std::env::current_dir()); 8 | exit(50); 9 | } 10 | 11 | let path = PathBuf::from(path); 12 | let contents = std::fs::read_to_string(path).unwrap(); 13 | return Config::try_from(contents.as_str()).unwrap(); 14 | } 15 | 16 | fn build_federalist(c: &mut Criterion) { 17 | let config = config_from_path("./stork-lib/benches/federalist.toml"); 18 | 19 | let mut group = c.benchmark_group("build"); 20 | group.measurement_time(Duration::from_secs(12)); 21 | 22 | group.bench_function("federalist", |b| { 23 | b.iter(|| stork_lib::build_index(&config).unwrap()) 24 | }); 25 | } 26 | 27 | fn search_federalist_for_liberty(c: &mut Criterion) { 28 | let config = config_from_path("./stork-lib/benches/federalist.toml"); 29 | let bytes = stork_lib::build_index(&config).unwrap().bytes; 30 | let _ = stork_lib::register_index("liberty", bytes); 31 | 32 | let mut group = c.benchmark_group("search/federalist"); 33 | group.measurement_time(Duration::from_secs(10)); 34 | 35 | let queries = vec![ 36 | "liberty", 37 | // "lib", 38 | // "liber old world", 39 | // "some long query that won't return results but let's see how it does", 40 | ]; 41 | 42 | for query in &queries { 43 | group.bench_function(query.to_owned(), |b| { 44 | b.iter(|| stork_lib::search_from_cache("liberty", query.to_owned())) 45 | }); 46 | } 47 | } 48 | 49 | criterion_group!(benches, build_federalist, search_federalist_for_liberty); 50 | criterion_main!(benches); 51 | -------------------------------------------------------------------------------- /stork-lib/benches/federalist.toml: -------------------------------------------------------------------------------- 1 | # To be used with `cargo criterion` 2 | [input] 3 | base_directory = "./local-dev/test-corpora/federalist" 4 | url_prefix = "https://www.gutenberg.org/files/1404/1404-h/1404-h.htm#link2H_4_" 5 | files = [ 6 | { path = "federalist-1.txt", url = "0001", title = "General Introduction" }, 7 | { path = "federalist-2.txt", url = "0002", title = "Concerning Dangers from Foreign Force and Influence" }, 8 | { path = "federalist-3.txt", url = "0003", title = "Concerning Dangers from Foreign Force and Influence pt 2" }, 9 | { path = "federalist-4.txt", url = "0004", title = "Concerning Dangers from Foreign Force and Influence pt 3" }, 10 | { path = "federalist-5.txt", url = "0005", title = "Concerning Dangers from Foreign Force and Influence pt 4" }, 11 | { path = "federalist-6.txt", url = "0006", title = "Concerning Dangers from Dissensions Between the States" }, 12 | { path = "federalist-7.txt", url = "0007", title = "Concerning Dangers from Dissensions Between the States pt 2" }, 13 | { path = "federalist-8.txt", url = "0008", title = "The Consequences of Hostilities Between the States" }, 14 | { path = "federalist-9.txt", url = "0009", title = "The Union as a Safeguard Against Domestic Faction and Insurrection" }, 15 | { path = "federalist-10.txt", url = "0010", title = "The Union as a Safeguard Against Domestic Faction and Insurrection pt 2" }, 16 | { path = "federalist-11.txt", url = "0011", title = "The Utility of the Union in Respect to Commercial Relations and a Navy" }, 17 | { path = "federalist-12.txt", url = "0012", title = "The Utility of the Union In Respect to Revenue" }, 18 | { path = "federalist-13.txt", url = "0013", title = "Advantage of the Union in Respect to Economy in Government" }, 19 | { path = "federalist-14.txt", url = "0014", title = "Objections to the Proposed Constitution From Extent of Territory Answered" }, 20 | { path = "federalist-15.txt", url = "0015", title = "The Insufficiency of the Present Confederation to Preserve the Union" }, 21 | { path = "federalist-16.txt", url = "0016", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 2" }, 22 | { path = "federalist-17.txt", url = "0017", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 3" }, 23 | { path = "federalist-18.txt", url = "0018", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 4" }, 24 | { path = "federalist-19.txt", url = "0019", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 5" }, 25 | { path = "federalist-20.txt", url = "0020", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 6" } 26 | ] 27 | 28 | [output] 29 | debug = false 30 | -------------------------------------------------------------------------------- /stork-lib/src/config/errors.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | 3 | #[derive(Error, Debug)] 4 | pub enum ConfigReadError { 5 | #[error("Recieved empty configuration string")] 6 | EmptyString, 7 | 8 | #[error("Cannot parse config as TOML. Stork recieved error: `{0}`")] 9 | UnparseableTomlInput(#[from] toml::de::Error), 10 | 11 | #[error("Cannot parse config as JSON. Stork recieved error: `{0}`")] 12 | UnparseableJsonInput(#[from] serde_json::Error), 13 | } 14 | 15 | impl PartialEq for ConfigReadError { 16 | fn eq(&self, other: &Self) -> bool { 17 | match (self, other) { 18 | (Self::UnparseableTomlInput(l0), Self::UnparseableTomlInput(r0)) => l0 == r0, 19 | 20 | // default case also catches UnparseableJsonInput, which would otherwise look like 21 | // the TomlInput case above, except serde_json::Error doesn't impl PartialEq. 22 | _ => core::mem::discriminant(self) == core::mem::discriminant(other), 23 | } 24 | } 25 | } 26 | 27 | #[cfg(test)] 28 | mod tests { 29 | use super::*; 30 | use pretty_assertions::assert_eq; 31 | 32 | #[test] 33 | fn from_toml_error() { 34 | let expected = "Cannot parse config as TOML. Stork recieved error: `expected an equals, found an identifier at line 1 column 6`"; 35 | let computed = toml::from_str::<()>("this is bad toml") 36 | .map_err(ConfigReadError::from) 37 | .unwrap_err() 38 | .to_string(); 39 | assert_eq!(expected, computed); 40 | } 41 | 42 | #[test] 43 | fn partial_eq_json() { 44 | let json_error_one = serde_json::from_str::<()>("this is not json").unwrap_err(); 45 | let json_error_two = serde_json::from_str::<()>("{[}").unwrap_err(); 46 | 47 | let config_read_error_one = ConfigReadError::UnparseableJsonInput(json_error_one); 48 | let config_read_error_two = ConfigReadError::UnparseableJsonInput(json_error_two); 49 | 50 | assert_eq!(config_read_error_one, config_read_error_two); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /stork-lib/src/config/frontmatter.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::module_name_repetitions)] 2 | 3 | use serde::{Deserialize, Serialize}; 4 | use smart_default::SmartDefault; 5 | 6 | #[derive(Serialize, Deserialize, Debug, Clone, SmartDefault, PartialEq)] 7 | pub enum FrontmatterConfig { 8 | Ignore, 9 | #[default] 10 | Omit, 11 | Parse, 12 | } 13 | -------------------------------------------------------------------------------- /stork-lib/src/config/input.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::module_name_repetitions)] 2 | 3 | use serde::{Deserialize, Serialize}; 4 | use smart_default::SmartDefault; 5 | 6 | use super::{File, FrontmatterConfig, SRTConfig, StemmingConfig}; 7 | 8 | #[derive(Serialize, Deserialize, Clone, Debug, SmartDefault, PartialEq)] 9 | #[serde(deny_unknown_fields)] 10 | pub enum TitleBoost { 11 | Minimal, 12 | #[default] 13 | Moderate, 14 | Large, 15 | Ridiculous, 16 | } 17 | 18 | #[derive(Serialize, Deserialize, Debug, Clone, SmartDefault, PartialEq)] 19 | #[serde(deny_unknown_fields, default)] 20 | #[allow(non_snake_case)] 21 | pub struct InputConfig { 22 | #[serde(rename = "surrounding_word_count")] 23 | pub UNUSED_surrounding_word_count: Option, 24 | pub base_directory: String, 25 | pub url_prefix: String, 26 | pub title_boost: TitleBoost, 27 | pub stemming: StemmingConfig, 28 | pub html_selector: Option, 29 | 30 | #[default(None)] 31 | pub exclude_html_selector: Option, 32 | pub frontmatter_handling: FrontmatterConfig, 33 | pub files: Vec, 34 | 35 | #[default = false] 36 | pub break_on_file_error: bool, 37 | pub srt_config: SRTConfig, 38 | 39 | #[default = 3] 40 | pub minimum_indexed_substring_length: u8, 41 | 42 | #[default = 1] 43 | pub minimum_index_ideographic_substring_length: u8, 44 | } 45 | -------------------------------------------------------------------------------- /stork-lib/src/config/output.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | use smart_default::SmartDefault; 3 | 4 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, SmartDefault)] 5 | #[serde(deny_unknown_fields, default)] 6 | #[allow(non_snake_case)] 7 | pub struct OutputConfig { 8 | #[serde(rename = "filename")] 9 | #[default(None)] 10 | pub UNUSED_filename: Option, 11 | 12 | #[default = false] 13 | pub debug: bool, 14 | 15 | #[default = false] 16 | pub save_nearest_html_id: bool, 17 | 18 | #[default = 8] 19 | pub excerpt_buffer: u8, 20 | 21 | #[default = 5] 22 | pub excerpts_per_result: u8, 23 | 24 | #[default = 10] 25 | pub displayed_results_count: u8, 26 | } 27 | -------------------------------------------------------------------------------- /stork-lib/src/config/srt.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | use smart_default::SmartDefault; 3 | 4 | #[derive(Serialize, Deserialize, Debug, Clone, SmartDefault, PartialEq)] 5 | pub struct SRTConfig { 6 | #[default = true] 7 | pub timestamp_linking: bool, 8 | 9 | #[default = "&t={ts}"] 10 | pub timestamp_template_string: String, 11 | pub timestamp_format: SRTTimestampFormat, 12 | } 13 | 14 | #[derive(Serialize, Deserialize, Debug, Clone, SmartDefault, PartialEq)] 15 | #[serde(rename_all = "snake_case")] 16 | pub enum SRTTimestampFormat { 17 | #[default] 18 | NumberOfSeconds, 19 | MinutesAndSeconds, 20 | } 21 | -------------------------------------------------------------------------------- /stork-lib/src/config/stemming.rs: -------------------------------------------------------------------------------- 1 | use rust_stemmers::Algorithm; 2 | use serde::{Deserialize, Serialize}; 3 | use std::convert::{From, TryFrom}; 4 | use std::fmt::Write; 5 | use toml::Value; 6 | 7 | #[derive(Serialize, Debug, Clone, PartialEq)] 8 | #[serde(into = "String")] 9 | #[serde(try_from = "String")] 10 | pub enum StemmingConfig { 11 | None, 12 | Language(Algorithm), 13 | } 14 | 15 | impl Default for StemmingConfig { 16 | fn default() -> Self { 17 | StemmingConfig::Language(Algorithm::English) 18 | } 19 | } 20 | 21 | impl TryFrom<&String> for StemmingConfig { 22 | type Error = toml::de::Error; 23 | fn try_from(value: &String) -> Result { 24 | #[derive(Deserialize, Debug)] 25 | struct TempAlgStructure { 26 | lang: Algorithm, 27 | } 28 | 29 | if value == "none" || value == "None" { 30 | return Ok(StemmingConfig::None); 31 | } 32 | 33 | toml::from_str(format!("lang = \"{value}\"").as_str()) 34 | .map(|t: TempAlgStructure| StemmingConfig::Language(t.lang)) 35 | } 36 | } 37 | 38 | impl<'de> serde::Deserialize<'de> for StemmingConfig { 39 | fn deserialize(deserializer: D) -> Result 40 | where 41 | D: serde::Deserializer<'de>, 42 | { 43 | use serde::de::Error; 44 | 45 | if let Ok(Value::String(string)) = Deserialize::deserialize(deserializer) { 46 | StemmingConfig::try_from(&string).map_err(|_e| { 47 | serde::de::Error::custom(format!("Unexpected value `{}`, expected `none` or a language supported by https://snowballstem.org/, e.g. `Dutch`", string.clone())) 48 | }) 49 | } else { 50 | Err(Error::custom( 51 | "Unexpected stemming config value; could not parse as string. (Maybe you need quotes?)", 52 | )) 53 | } 54 | } 55 | } 56 | 57 | impl From for String { 58 | fn from(stemming_config: StemmingConfig) -> Self { 59 | let mut output = String::new(); 60 | let _result = match stemming_config { 61 | StemmingConfig::Language(l) => write!(&mut output, "{l:?}"), 62 | StemmingConfig::None => write!(&mut output, "none"), 63 | }; 64 | output 65 | } 66 | } 67 | 68 | #[cfg(test)] 69 | mod tests { 70 | use super::*; 71 | use std::convert::TryFrom; 72 | #[test] 73 | fn test_none_lowercase() { 74 | assert_eq!( 75 | StemmingConfig::try_from(&"none".to_string()).unwrap(), 76 | StemmingConfig::None 77 | ); 78 | } 79 | 80 | #[test] 81 | fn test_none_capital() { 82 | assert_eq!( 83 | StemmingConfig::try_from(&"None".to_string()).unwrap(), 84 | StemmingConfig::None 85 | ); 86 | } 87 | 88 | #[test] 89 | fn test_dutch() { 90 | assert_eq!( 91 | StemmingConfig::try_from(&"Dutch".to_string()).unwrap(), 92 | StemmingConfig::Language(Algorithm::Dutch) 93 | ); 94 | } 95 | 96 | #[test] 97 | fn test_error() { 98 | assert!(StemmingConfig::try_from(&"Blorp".to_string()).is_err()); 99 | } 100 | 101 | #[test] 102 | fn test_dutch_tostring() { 103 | assert_eq!( 104 | // StemmingConfig::try_from(&"Dutch".to_string()).unwrap(), 105 | // StemmingConfig::Language(Algorithm::Dutch) 106 | String::from(StemmingConfig::Language(Algorithm::Dutch)), 107 | "Dutch".to_string() 108 | ); 109 | } 110 | 111 | #[test] 112 | fn test_none_tostring() { 113 | assert_eq!( 114 | // StemmingConfig::try_from(&"Dutch".to_string()).unwrap(), 115 | // StemmingConfig::Language(Algorithm::Dutch) 116 | String::from(StemmingConfig::None), 117 | "none".to_string() 118 | ); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /stork-lib/src/index_v2/mod.rs: -------------------------------------------------------------------------------- 1 | mod scores; 2 | mod search; 3 | 4 | use bytes::{Buf, Bytes}; 5 | use scores::_MATCHED_WORD_SCORE; 6 | use serde::{Deserialize, Serialize}; 7 | use std::collections::HashMap; 8 | use std::convert::{TryFrom, TryInto}; 9 | 10 | type EntryIndex = usize; 11 | type AliasTarget = String; 12 | type Score = u8; 13 | type Fields = Option>; 14 | 15 | pub use search::search; 16 | 17 | #[derive(Serialize, Deserialize, Clone, Debug)] 18 | struct Entry { 19 | contents: String, 20 | title: String, 21 | url: String, 22 | fields: Fields, 23 | } 24 | 25 | #[derive(Serialize, Deserialize, Clone, Debug)] 26 | struct SearchResult { 27 | excerpts: Vec, 28 | score: Score, 29 | } 30 | 31 | impl SearchResult { 32 | fn _new() -> SearchResult { 33 | SearchResult { 34 | excerpts: vec![], 35 | score: _MATCHED_WORD_SCORE, 36 | } 37 | } 38 | } 39 | 40 | #[derive(Serialize, Deserialize, Clone, Debug)] 41 | struct Excerpt { 42 | word_index: usize, 43 | } 44 | 45 | /** 46 | * A Container holds: 47 | * 48 | * - a `HashMap` of `EntryIndexes` to `SearchResults` 49 | * - a `HashMap` of `AliasTargets` to scores 50 | * 51 | * Each valid query should return a single Container. It is possible to derive 52 | * all search results for a given query from a single container. 53 | */ 54 | #[derive(Serialize, Deserialize, Clone, Debug)] 55 | struct Container { 56 | results: HashMap, 57 | aliases: HashMap, 58 | } 59 | 60 | impl Container { 61 | pub fn _new() -> Container { 62 | Container { 63 | results: HashMap::new(), 64 | aliases: HashMap::new(), 65 | } 66 | } 67 | } 68 | 69 | #[derive(Serialize, Deserialize, Clone, Debug)] 70 | pub struct Index { 71 | entries: Vec, 72 | queries: HashMap, 73 | } 74 | 75 | #[cfg(test)] 76 | impl Index { 77 | pub fn from_file(file: &[u8]) -> Index { 78 | let (version_size_bytes, rest) = file.split_at(std::mem::size_of::()); 79 | let version_size = u64::from_be_bytes(version_size_bytes.try_into().unwrap()); 80 | let (_version_bytes, rest) = rest.split_at(version_size.try_into().unwrap()); 81 | 82 | let (entries_size_bytes, rest) = rest.split_at(std::mem::size_of::()); 83 | let entries_size = u64::from_be_bytes(entries_size_bytes.try_into().unwrap()); 84 | let (entries_bytes, rest) = rest.split_at(entries_size.try_into().unwrap()); 85 | let entries = bincode::deserialize(entries_bytes).unwrap(); 86 | 87 | let (queries_size_bytes, rest) = rest.split_at(std::mem::size_of::()); 88 | let queries_size = u64::from_be_bytes(queries_size_bytes.try_into().unwrap()); 89 | let (queries_bytes, _rest) = rest.split_at(queries_size.try_into().unwrap()); 90 | let queries = bincode::deserialize(queries_bytes).unwrap(); 91 | 92 | Index { entries, queries } 93 | } 94 | } 95 | 96 | impl TryFrom for Index { 97 | type Error = &'static str; 98 | 99 | fn try_from(value: Bytes) -> Result { 100 | let mut value = value; 101 | 102 | let entries = { 103 | let size = value.get_u64(); 104 | let bytes = value.split_to(size.try_into().unwrap()); 105 | bincode::deserialize(bytes.as_ref()).unwrap() 106 | }; 107 | 108 | let queries = { 109 | let size = value.get_u64(); 110 | let bytes = value.split_to(size.try_into().unwrap()); 111 | bincode::deserialize(bytes.as_ref()).unwrap() 112 | }; 113 | 114 | Ok(Index { entries, queries }) 115 | } 116 | } 117 | 118 | #[cfg(test)] 119 | mod tests { 120 | use super::*; 121 | use std::fs; 122 | use std::io::{BufReader, Read}; 123 | 124 | #[test] 125 | fn can_parse_0_5_3_index() { 126 | let file = fs::File::open("../test-assets/federalist-min-0.5.3.st").unwrap(); 127 | let mut buf_reader = BufReader::new(file); 128 | let mut index_bytes: Vec = Vec::new(); 129 | let _bytes_read = buf_reader.read_to_end(&mut index_bytes); 130 | let index = Index::from_file(index_bytes.as_slice()); 131 | assert_eq!(1, index.entries.len()); 132 | assert_eq!(2477, index.queries.len()); 133 | } 134 | 135 | #[test] 136 | fn can_parse_0_6_0_index() { 137 | let file = fs::File::open("../test-assets/federalist-min-0.6.0.st").unwrap(); 138 | let mut buf_reader = BufReader::new(file); 139 | let mut index_bytes: Vec = Vec::new(); 140 | let _bytes_read = buf_reader.read_to_end(&mut index_bytes); 141 | let index = Index::from_file(index_bytes.as_slice()); 142 | assert_eq!(1, index.entries.len()); 143 | assert_eq!(2477, index.queries.len()); 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /stork-lib/src/index_v2/scores.rs: -------------------------------------------------------------------------------- 1 | pub const _MATCHED_WORD_SCORE: u8 = 128; 2 | pub const _PREFIX_SCORE: u8 = 127; 3 | pub const _STEM_SCORE: u8 = 64; 4 | pub const STOPWORD_SCORE: u8 = 16; 5 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/build/annotated_words_from_string.rs: -------------------------------------------------------------------------------- 1 | use crate::{index_v3::AnnotatedWord, InternalWordAnnotation}; 2 | 3 | pub(super) trait AnnotatedWordable { 4 | fn make_annotated_words(&self) -> Vec; 5 | fn make_annotated_words_with_annotations(&self, closure: F) -> Vec 6 | where 7 | F: Fn(&str, &mut Vec); 8 | } 9 | 10 | impl AnnotatedWordable for str { 11 | fn make_annotated_words(&self) -> Vec { 12 | self.make_annotated_words_with_annotations(|_, _| {}) 13 | } 14 | 15 | fn make_annotated_words_with_annotations)>( 16 | &self, 17 | closure: F, 18 | ) -> Vec { 19 | self.split(|c: char| c.is_ascii_whitespace() || c == '-') 20 | .map(str::trim) 21 | .filter(|s| !s.is_empty()) 22 | .map(|w| { 23 | let mut internal_annotations: Vec = Vec::new(); 24 | closure(w, &mut internal_annotations); 25 | AnnotatedWord { 26 | word: w.to_string(), 27 | internal_annotations, 28 | ..AnnotatedWord::default() 29 | } 30 | }) 31 | .collect() 32 | } 33 | } 34 | 35 | #[cfg(test)] 36 | mod tests { 37 | 38 | use crate::InternalWordAnnotation; 39 | 40 | use super::AnnotatedWordable; 41 | 42 | #[test] 43 | fn annotated_words_split_on_hyphens() { 44 | let expected: usize = 3; 45 | let computed = "Hastings-on-hudson".make_annotated_words().len(); 46 | assert_eq!(expected, computed); 47 | } 48 | 49 | #[test] 50 | fn annotated_words_split_on_whitespace() { 51 | let expected: usize = 3; 52 | let computed = "Hastings on hudson".make_annotated_words().len(); 53 | assert_eq!(expected, computed); 54 | } 55 | 56 | #[test] 57 | fn annotated_words_split_on_multiple_whitespace() { 58 | let expected: usize = 3; 59 | let computed = "Hastings on \n \t hudson" 60 | .make_annotated_words() 61 | .len(); 62 | assert_eq!(expected, computed); 63 | } 64 | 65 | #[test] 66 | fn annotated_words_can_correctly_annotate() { 67 | let computed = "Hastings on \n \t hudson".make_annotated_words_with_annotations( 68 | |word, vec| vec.push(InternalWordAnnotation::UrlSuffix(word.to_string())), 69 | ); 70 | 71 | assert_eq!(3, computed.len()); 72 | assert_eq!(1, computed[0].internal_annotations.len()); 73 | assert_eq!( 74 | InternalWordAnnotation::UrlSuffix("Hastings".to_string()), 75 | computed[0].internal_annotations[0] 76 | ); 77 | assert_eq!( 78 | InternalWordAnnotation::UrlSuffix("hudson".to_string()), 79 | computed[2].internal_annotations[0] 80 | ); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/build/errors.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt, path::PathBuf}; 2 | use thiserror::Error; 3 | 4 | use crate::config::File; 5 | 6 | #[derive(Debug, Error, Clone, PartialEq)] 7 | pub enum WordListGenerationError { 8 | #[error("SRT file could not be parsed.")] 9 | InvalidSRT, 10 | 11 | #[error("The file `{0}` could not be found.")] 12 | FileNotFound(PathBuf), 13 | 14 | #[error("Could not determine the file's filetype. Please give this file a file extension Stork knows about, or disambiguate the file's filetype within your config.")] 15 | CannotDetermineFiletype, 16 | 17 | #[error("The selector `{0}` is not present in the HTML document.")] 18 | SelectorNotPresent(String), 19 | 20 | #[error("The web page could not be fetched")] 21 | WebPageNotFetched, 22 | 23 | #[error("When fetched, the web page returned a {0} status code.")] 24 | WebPageErrorfulStatusCode(u16), 25 | 26 | #[error("Content-Type is not present or invalid")] 27 | UnknownContentType, 28 | 29 | #[error("After parsing the document, there were no words found in the word list.")] 30 | EmptyWordList, 31 | 32 | #[error("Stork was not built with the `web-scraping` feature enabled.")] 33 | FeatureNotAvailable, 34 | } 35 | 36 | fn pluralize_with_count(count: usize, singular: &str, plural: &str) -> String { 37 | format!("{count} {}", if count == 1 { singular } else { plural }) 38 | } 39 | 40 | #[derive(Debug, Error)] 41 | pub enum IndexGenerationError { 42 | #[error("No files specified in config file")] 43 | NoFilesSpecified, 44 | 45 | #[error("All files failed to be indexed.\n{}", DocumentError::display_list(.0))] 46 | AllDocumentErrors(Vec), 47 | 48 | #[error( 49 | "{} found while indexing files. If you want to fail silently and still build an index, remove `break_on_file_error` from your config.\n{}", 50 | pluralize_with_count(.0.len(), "error", "errors"), 51 | DocumentError::display_list(.0) 52 | )] 53 | PartialDocumentErrors(Vec), 54 | } 55 | 56 | impl PartialEq for IndexGenerationError { 57 | fn eq(&self, other: &Self) -> bool { 58 | match (self, other) { 59 | (Self::PartialDocumentErrors(_), Self::PartialDocumentErrors(_)) 60 | | (Self::AllDocumentErrors(_), Self::AllDocumentErrors(_)) => true, 61 | _ => core::mem::discriminant(self) == core::mem::discriminant(other), 62 | } 63 | } 64 | } 65 | 66 | /** 67 | * Associates a `WordListGenerationError` with a `File`. 68 | */ 69 | #[derive(Debug, Clone, PartialEq)] 70 | pub struct DocumentError { 71 | pub file: File, 72 | pub word_list_generation_error: WordListGenerationError, 73 | } 74 | 75 | impl std::fmt::Display for DocumentError { 76 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 77 | write!( 78 | f, 79 | "In file `{}`: {}", 80 | self.file, self.word_list_generation_error, 81 | ) 82 | } 83 | } 84 | 85 | impl DocumentError { 86 | #[must_use] 87 | pub fn display_list(vec: &[DocumentError]) -> String { 88 | format!( 89 | "Warning: Stork couldn't include {} in the index because of the following errors:\n", 90 | pluralize_with_count(vec.len(), "file", "files"), 91 | ) + &vec 92 | .iter() 93 | .map(ToString::to_string) 94 | .collect::>() 95 | .join("\n") 96 | } 97 | } 98 | 99 | #[cfg(test)] 100 | mod tests { 101 | use crate::config::DataSource; 102 | 103 | use super::*; 104 | use pretty_assertions::assert_eq; 105 | 106 | #[test] 107 | fn test_documenterrors_display() { 108 | let computed = DocumentError { 109 | file: File { 110 | title: "My Test File".to_string(), 111 | explicit_source: Some(DataSource::Contents("ignored".to_string())), 112 | ..File::default() 113 | }, 114 | word_list_generation_error: WordListGenerationError::FileNotFound(PathBuf::from( 115 | "/test", 116 | )), 117 | } 118 | .to_string(); 119 | 120 | let expected = "In file `My Test File`: The file `/test` could not be found."; 121 | assert_eq!(computed, expected); 122 | } 123 | 124 | #[test] 125 | fn test_documenterror_list_display() { 126 | let computed = DocumentError::display_list(&[ 127 | DocumentError { 128 | file: File { 129 | title: "My Test File".to_string(), 130 | explicit_source: Some(DataSource::Contents("ignored".to_string())), 131 | ..File::default() 132 | }, 133 | word_list_generation_error: WordListGenerationError::FileNotFound(PathBuf::from( 134 | "/test", 135 | )), 136 | }, 137 | DocumentError { 138 | file: File { 139 | title: "My Test File 2".to_string(), 140 | explicit_source: Some(DataSource::Contents("ignored 2".to_string())), 141 | ..File::default() 142 | }, 143 | word_list_generation_error: WordListGenerationError::FileNotFound(PathBuf::from( 144 | "/test2", 145 | )), 146 | }, 147 | ]); 148 | 149 | let expected = "Warning: Stork couldn't include 2 files in the index because of the following errors:\nIn file `My Test File`: The file `/test` could not be found.\nIn file `My Test File 2`: The file `/test2` could not be found."; 150 | assert_eq!(computed, expected); 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/build/fill_intermediate_entries/data_source_readers/filepath_data_source_reader.rs: -------------------------------------------------------------------------------- 1 | use crate::config::Filetype; 2 | 3 | use super::{ReadResult, ReaderConfig, WordListGenerationError}; 4 | use std::{ 5 | fs::File, 6 | io::{BufReader, Read}, 7 | path::Path, 8 | }; 9 | 10 | pub(crate) fn read( 11 | path: &str, 12 | config: &ReaderConfig, 13 | ) -> Result { 14 | let base_directory_path = Path::new(&config.global.base_directory); 15 | let full_pathname = base_directory_path.join(path); 16 | 17 | let file = File::open(&full_pathname) 18 | .map_err(|_| WordListGenerationError::FileNotFound(full_pathname.clone()))?; 19 | let mut buf_reader = BufReader::new(file); 20 | let mut buffer = String::new(); 21 | let _bytes_read = buf_reader.read_to_string(&mut buffer); 22 | 23 | let filetype_from_extension = get_filetype_from_path(&full_pathname); 24 | 25 | Ok(ReadResult { 26 | buffer, 27 | filetype: config.file.filetype.clone().or(filetype_from_extension), 28 | frontmatter_fields: None, 29 | }) 30 | } 31 | 32 | fn get_filetype_from_path(path: &Path) -> Option { 33 | let ext_str = path.extension()?.to_str()?; 34 | match String::from(ext_str).to_ascii_lowercase().as_ref() { 35 | "html" | "htm" => Some(Filetype::HTML), 36 | "srt" => Some(Filetype::SRTSubtitle), 37 | "txt" => Some(Filetype::PlainText), 38 | "markdown" | "mdown" | "md" => Some(Filetype::Markdown), 39 | _ => None, 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/build/fill_intermediate_entries/data_source_readers/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::config::{DataSource, Filetype}; 2 | 3 | use super::{ReadResult, ReaderConfig, WordListGenerationError}; 4 | 5 | pub mod filepath_data_source_reader; 6 | pub mod url_data_source_reader; 7 | 8 | pub fn read_from_data_source( 9 | reader_config: &ReaderConfig, 10 | ) -> Result { 11 | match &reader_config.file.source() { 12 | DataSource::Contents(contents) => Ok(ReadResult { 13 | buffer: contents.clone(), 14 | filetype: reader_config 15 | .file 16 | .filetype 17 | .clone() 18 | .or(Some(Filetype::PlainText)), 19 | frontmatter_fields: None, 20 | }), 21 | 22 | DataSource::URL(url) => return url_data_source_reader::read(url, reader_config), 23 | DataSource::FilePath(path) => filepath_data_source_reader::read(path, reader_config), 24 | } 25 | .map(|read_result| read_result.extract_frontmatter(reader_config)) 26 | } 27 | 28 | #[cfg(test)] 29 | mod tests { 30 | use std::collections::HashMap; 31 | 32 | use pretty_assertions::assert_eq; 33 | 34 | use crate::{ 35 | config::{DataSource, File, Filetype, InputConfig, OutputConfig}, 36 | index_v3::build::fill_intermediate_entries::{ReadResult, ReaderConfig}, 37 | }; 38 | 39 | use super::read_from_data_source; 40 | 41 | #[test] 42 | fn read_from_data_source_extracts_frontmatter() { 43 | let read_result = read_from_data_source(&ReaderConfig { 44 | global: InputConfig { 45 | frontmatter_handling: crate::config::FrontmatterConfig::Parse, 46 | ..InputConfig::default() 47 | }, 48 | file: File { 49 | title: "Input File".to_string(), 50 | explicit_source: Some(DataSource::Contents( 51 | r#"--- 52 | key: value 53 | --- 54 | 55 | # Header 56 | 57 | this _is_ the text"# 58 | .to_string(), 59 | )), 60 | filetype: Some(Filetype::Markdown), 61 | ..File::default() 62 | }, 63 | output: OutputConfig::default(), 64 | }) 65 | .unwrap(); 66 | 67 | assert_eq!( 68 | read_result, 69 | ReadResult { 70 | buffer: "# Header\n\nthis _is_ the text".to_string(), 71 | filetype: Some(Filetype::Markdown), 72 | frontmatter_fields: Some(HashMap::from([("key".to_string(), "value".to_string())])) 73 | } 74 | ); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/build/fill_intermediate_entries/data_source_readers/url_data_source_reader.rs: -------------------------------------------------------------------------------- 1 | use super::{ReadResult, ReaderConfig, WordListGenerationError}; 2 | 3 | #[cfg(not(feature = "build-v3-web-scraping"))] 4 | pub(crate) fn read( 5 | _url: &str, 6 | _config: &ReaderConfig, 7 | ) -> Result { 8 | Err(WordListGenerationError::FeatureNotAvailable) 9 | } 10 | 11 | #[cfg(feature = "build-v3-web-scraping")] 12 | pub(crate) fn read( 13 | url: &str, 14 | config: &ReaderConfig, 15 | ) -> Result { 16 | use crate::config::Filetype; 17 | use mime::Mime; 18 | use std::io::Read; 19 | 20 | fn filetype_from_mime(mime: &Mime) -> Option { 21 | match (mime.type_(), mime.subtype()) { 22 | (mime::TEXT, mime::PLAIN) => Some(Filetype::PlainText), 23 | (mime::TEXT, mime::HTML) => Some(Filetype::HTML), 24 | _ => None, 25 | } 26 | } 27 | 28 | let mut resp = 29 | reqwest::blocking::get(url).map_err(|_| WordListGenerationError::WebPageNotFetched)?; 30 | 31 | let _status = 32 | resp.error_for_status_ref() 33 | .map_err(|error| match error.status().map(|s| s.as_u16()) { 34 | Some(status_code) => { 35 | WordListGenerationError::WebPageErrorfulStatusCode(status_code) 36 | } 37 | None => WordListGenerationError::WebPageNotFetched, 38 | })?; 39 | 40 | let mime_type: Mime = resp 41 | .headers() 42 | .get(reqwest::header::CONTENT_TYPE) 43 | .ok_or(WordListGenerationError::UnknownContentType)? 44 | .to_str() 45 | .map_err(|_| WordListGenerationError::UnknownContentType)? 46 | .parse() 47 | .map_err(|_| WordListGenerationError::UnknownContentType)?; 48 | 49 | let mut buffer = String::new(); 50 | let _bytes_read = resp.read_to_string(&mut buffer); 51 | 52 | Ok(ReadResult { 53 | buffer, 54 | filetype: config 55 | .file 56 | .filetype 57 | .clone() 58 | .or_else(|| filetype_from_mime(&mime_type)), 59 | frontmatter_fields: None, 60 | }) 61 | } 62 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/build/fill_intermediate_entries/frontmatter.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::module_name_repetitions)] 2 | 3 | use frontmatter::{parse_and_find_content, Yaml}; 4 | use std::collections::HashMap; 5 | 6 | use crate::{config::FrontmatterConfig, Fields}; 7 | 8 | pub fn parse_frontmatter(handling: &FrontmatterConfig, buffer: &str) -> (Fields, String) { 9 | let default_output = (HashMap::new(), buffer.to_string()); 10 | match handling { 11 | FrontmatterConfig::Ignore => default_output, 12 | FrontmatterConfig::Omit => { 13 | if let Ok((_yaml, text)) = parse_and_find_content(buffer) { 14 | (HashMap::new(), text.trim().to_string()) 15 | } else { 16 | default_output 17 | } 18 | } 19 | FrontmatterConfig::Parse => { 20 | if let Ok((Some(Yaml::Hash(map)), text)) = parse_and_find_content(buffer) { 21 | let fields = map 22 | .into_iter() 23 | .map(|(k, v)| { 24 | ( 25 | k.into_string().unwrap_or_default(), 26 | v.clone().into_string().unwrap_or_else(|| { 27 | v.into_i64().map_or("error".to_string(), |i| i.to_string()) 28 | }), 29 | ) 30 | }) 31 | .collect(); 32 | return (fields, text.trim().to_string()); 33 | } 34 | 35 | default_output 36 | } 37 | } 38 | } 39 | 40 | #[cfg(test)] 41 | #[allow(clippy::unnecessary_mut_passed)] 42 | mod tests { 43 | use crate::Fields; 44 | 45 | use super::*; 46 | #[test] 47 | fn omit_option() { 48 | let expected: (Fields, String) = (HashMap::new(), "this is not".to_string()); 49 | let output = parse_frontmatter( 50 | &FrontmatterConfig::Omit, 51 | &mut r#"--- 52 | this: "is frontmatter" 53 | "that takes": "multiple lines" 54 | "and has": 22 55 | "different formats": +INF 56 | --- 57 | 58 | this is not 59 | "# 60 | .to_string(), 61 | ); 62 | 63 | let computed = (output.0, output.1); 64 | assert_eq!(expected, computed); 65 | } 66 | 67 | #[test] 68 | fn parse_option() { 69 | let expected: (Fields, String) = ( 70 | [ 71 | ("this".to_string(), "is frontmatter".to_string()), 72 | ("that takes".to_string(), "multiple lines".to_string()), 73 | ("and has".to_string(), "22".to_string()), 74 | ("different formats".to_string(), "error".to_string()), 75 | ] 76 | .iter() 77 | .cloned() 78 | .collect(), 79 | "this is not".to_string(), 80 | ); 81 | let output = parse_frontmatter( 82 | &FrontmatterConfig::Parse, 83 | &mut r#"--- 84 | this: "is frontmatter" 85 | "that takes": "multiple lines" 86 | "and has": 22 87 | "different formats": +INF 88 | --- 89 | 90 | this is not 91 | "# 92 | .to_string(), 93 | ); 94 | 95 | let computed = (output.0, output.1); 96 | assert_eq!(expected, computed); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/build/fill_intermediate_entries/word_list_generators/markdown_word_list_generator.rs: -------------------------------------------------------------------------------- 1 | use super::{html_word_list_generator, ReadResult, ReaderConfig, WordListGenerationError}; 2 | use crate::index_v3::AnnotatedWordList; 3 | use pulldown_cmark::{html, Parser}; 4 | 5 | pub fn generate( 6 | config: &ReaderConfig, 7 | read_result: &ReadResult, 8 | ) -> Result { 9 | let parser = Parser::new(&read_result.buffer); 10 | let mut html_output = String::new(); 11 | html::push_html(&mut html_output, parser); 12 | 13 | let html_string = format!("
    {html_output}
    "); 14 | 15 | let html_read_result = ReadResult { 16 | buffer: html_string, 17 | filetype: read_result.filetype.clone(), 18 | frontmatter_fields: None, 19 | }; 20 | html_word_list_generator::generate(config, &html_read_result) 21 | } 22 | 23 | #[cfg(test)] 24 | mod tests { 25 | 26 | use pretty_assertions::assert_eq; 27 | 28 | use crate::{ 29 | config::{File, Filetype, InputConfig, OutputConfig}, 30 | index_v3::build::fill_intermediate_entries::{ReadResult, ReaderConfig}, 31 | }; 32 | 33 | use super::generate; 34 | 35 | fn assert_markdown_content(word_list: &str, markdown_content: &str) { 36 | let computed: String = generate( 37 | &ReaderConfig { 38 | global: InputConfig { 39 | frontmatter_handling: crate::config::FrontmatterConfig::Omit, 40 | ..InputConfig::default() 41 | }, 42 | file: File::default(), 43 | output: OutputConfig::default(), 44 | }, 45 | &ReadResult { 46 | buffer: markdown_content.to_string(), 47 | filetype: Some(Filetype::Markdown), 48 | frontmatter_fields: None, 49 | }, 50 | ) 51 | .unwrap() 52 | .word_list 53 | .iter() 54 | .map(|aw| aw.word.clone().trim().to_string()) 55 | .collect::>() 56 | .join(" "); 57 | 58 | assert_eq!(word_list, computed); 59 | } 60 | 61 | #[test] 62 | fn test_markdown() { 63 | assert_markdown_content( 64 | "This is a title Stork should recognize this text This content should be indexed. This is another paragraph with inline text formatting . This is a link. Goodbye!", 65 | r#" 66 | # This is a title 67 | 68 | Stork should recognize this text 69 | 70 | - This content should be indexed. 71 | - This is another paragraph with **_inline text_ formatting**. 72 | - [This is a link.](https://example.com) 73 | 74 | Goodbye!"# 75 | ); 76 | } 77 | 78 | mod issue_290 { 79 | 80 | use super::assert_markdown_content; 81 | 82 | #[test] 83 | #[rustfmt::skip] 84 | fn space_after_numeric_list() { 85 | assert_markdown_content( 86 | "something below, there is a space immediately after the 1. above", 87 | r#"1. 88 | 89 | something below, there is a space immediately after the 1. above "#, 90 | ); 91 | } 92 | 93 | #[test] 94 | #[rustfmt::skip] 95 | fn space_after_bullet_list() { 96 | assert_markdown_content( 97 | "something below, there is a space immediately after the star above", 98 | r#"* 99 | 100 | something below, there is a space immediately after the star above "#, 101 | ); 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/build/fill_intermediate_entries/word_list_generators/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::{config::Filetype, index_v3::AnnotatedWordList}; 2 | 3 | use super::{ReadResult, ReaderConfig, WordListGenerationError}; 4 | 5 | pub mod html_word_list_generator; 6 | pub mod markdown_word_list_generator; 7 | pub mod plaintext_word_list_generator; 8 | pub mod srt_word_list_generator; 9 | 10 | pub(super) fn create_word_list( 11 | config: &ReaderConfig, 12 | read_result: &ReadResult, 13 | ) -> Result { 14 | match read_result.filetype { 15 | Some(Filetype::PlainText) => plaintext_word_list_generator::generate(config, read_result), 16 | Some(Filetype::SRTSubtitle) => srt_word_list_generator::generate(config, read_result), 17 | Some(Filetype::HTML) => html_word_list_generator::generate(config, read_result), 18 | Some(Filetype::Markdown) => markdown_word_list_generator::generate(config, read_result), 19 | None => Err(WordListGenerationError::CannotDetermineFiletype), 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/build/fill_intermediate_entries/word_list_generators/plaintext_word_list_generator.rs: -------------------------------------------------------------------------------- 1 | use crate::index_v3::{build::annotated_words_from_string::AnnotatedWordable, AnnotatedWordList}; 2 | 3 | use super::{ReadResult, ReaderConfig, WordListGenerationError}; 4 | 5 | #[allow(clippy::unnecessary_wraps)] 6 | pub fn generate( 7 | _config: &ReaderConfig, 8 | read_result: &ReadResult, 9 | ) -> Result { 10 | Ok(AnnotatedWordList { 11 | word_list: read_result.buffer.make_annotated_words(), 12 | }) 13 | } 14 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/build/fill_intermediate_entries/word_list_generators/srt_word_list_generator.rs: -------------------------------------------------------------------------------- 1 | use crate::config::SRTConfig; 2 | use crate::config::SRTTimestampFormat; 3 | use crate::index_v3::build::annotated_words_from_string::AnnotatedWordable; 4 | use crate::index_v3::build::fill_intermediate_entries::ReaderConfig; 5 | use crate::index_v3::AnnotatedWord; 6 | use crate::index_v3::AnnotatedWordList; 7 | use crate::InternalWordAnnotation; 8 | 9 | use super::ReadResult; 10 | use super::WordListGenerationError; 11 | 12 | pub fn generate( 13 | config: &ReaderConfig, 14 | read_result: &ReadResult, 15 | ) -> Result { 16 | let subs = srtparse::from_str(&read_result.buffer) 17 | .map_err(|_e| WordListGenerationError::InvalidSRT)?; 18 | let mut word_list: Vec = Vec::new(); 19 | 20 | for sub in subs { 21 | let mut annotated_words_for_this_sub = 22 | sub.text 23 | .make_annotated_words_with_annotations(|_word, internal_annotations| { 24 | internal_annotations.push(InternalWordAnnotation::UrlSuffix( 25 | build_srt_url_time_suffix(&sub.start_time, &config.global.srt_config), 26 | )); 27 | }); 28 | 29 | word_list.append(&mut annotated_words_for_this_sub); 30 | } 31 | 32 | Ok(AnnotatedWordList { word_list }) 33 | } 34 | 35 | fn build_srt_url_time_suffix(time: &srtparse::Time, srt_config: &SRTConfig) -> String { 36 | let time_string = match srt_config.timestamp_format { 37 | SRTTimestampFormat::NumberOfSeconds => { 38 | ((time.hours) * 3600 + (time.minutes) * 60 + (time.seconds)).to_string() 39 | } 40 | SRTTimestampFormat::MinutesAndSeconds => { 41 | if time.minutes > 0 { 42 | format!("{}m{}s", time.hours * 60 + time.minutes, time.seconds) 43 | } else { 44 | format!("{}s", time.seconds) 45 | } 46 | } 47 | }; 48 | 49 | srt_config 50 | .timestamp_template_string 51 | .replace("{}", &time_string) 52 | } 53 | 54 | #[cfg(test)] 55 | mod tests { 56 | use pretty_assertions::assert_eq; 57 | 58 | use crate::{ 59 | config::SRTConfig, 60 | index_v3::build::fill_intermediate_entries::word_list_generators::srt_word_list_generator::build_srt_url_time_suffix, 61 | }; 62 | 63 | #[test] 64 | fn it_formats_time_suffix_correctly() { 65 | let time = srtparse::Time { 66 | hours: 0, 67 | minutes: 16, 68 | seconds: 12, 69 | milliseconds: 0, 70 | }; 71 | let config = SRTConfig { 72 | timestamp_linking: true, 73 | timestamp_template_string: "{}".to_string(), 74 | timestamp_format: crate::config::SRTTimestampFormat::NumberOfSeconds, 75 | }; 76 | 77 | let computed = build_srt_url_time_suffix(&time, &config); 78 | let expected = "972"; 79 | assert_eq!(computed, expected); 80 | } 81 | 82 | #[test] 83 | fn it_formats_time_suffix_correctly_minutes_and_seconds() { 84 | let time = srtparse::Time { 85 | hours: 0, 86 | minutes: 16, 87 | seconds: 12, 88 | milliseconds: 0, 89 | }; 90 | let config = SRTConfig { 91 | timestamp_linking: true, 92 | timestamp_template_string: "{}".to_string(), 93 | timestamp_format: crate::config::SRTTimestampFormat::MinutesAndSeconds, 94 | }; 95 | 96 | let computed = build_srt_url_time_suffix(&time, &config); 97 | let expected = "16m12s"; 98 | assert_eq!(computed, expected); 99 | } 100 | 101 | #[test] 102 | fn it_formats_time_suffix_correctly_minutes_and_seconds_for_time_over_one_hour() { 103 | let time = srtparse::Time { 104 | hours: 1, 105 | minutes: 16, 106 | seconds: 12, 107 | milliseconds: 0, 108 | }; 109 | let config = SRTConfig { 110 | timestamp_linking: true, 111 | timestamp_template_string: "{}".to_string(), 112 | timestamp_format: crate::config::SRTTimestampFormat::MinutesAndSeconds, 113 | }; 114 | 115 | let computed = build_srt_url_time_suffix(&time, &config); 116 | let expected = "76m12s"; 117 | assert_eq!(computed, expected); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/build/fill_stems.rs: -------------------------------------------------------------------------------- 1 | use super::{remove_surrounding_punctuation, NormalizedEntry}; 2 | use rust_stemmers::Stemmer; 3 | use std::collections::BTreeMap; 4 | 5 | pub fn fill_stems( 6 | intermediate_entries: &[NormalizedEntry], 7 | stems: &mut BTreeMap>, 8 | ) { 9 | for entry in intermediate_entries { 10 | let contents = &entry.annotated_word_list; 11 | 12 | if let Some(stem_algorithm) = entry.stem_algorithm { 13 | for annotated_word in &contents.word_list { 14 | let normalized_word = 15 | remove_surrounding_punctuation(&annotated_word.word.to_lowercase()); 16 | let stem = Stemmer::create(stem_algorithm) 17 | .stem(&normalized_word) 18 | .to_string(); 19 | let stem_vector = stems.entry(stem).or_insert_with(Vec::default); 20 | if !stem_vector.contains(&normalized_word) { 21 | stem_vector.push(normalized_word); 22 | } 23 | } 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/build/intermediate_entry.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | index_v3::{AnnotatedWordList, Entry}, 3 | Fields, 4 | }; 5 | use rust_stemmers::Algorithm; 6 | 7 | pub struct NormalizedEntry { 8 | pub(super) annotated_word_list: AnnotatedWordList, 9 | pub(super) stem_algorithm: Option, 10 | pub(super) title: String, 11 | pub(super) url: String, 12 | pub(super) fields: Fields, 13 | } 14 | 15 | impl From<&NormalizedEntry> for Entry { 16 | fn from(ie: &NormalizedEntry) -> Self { 17 | Entry { 18 | contents: ie.annotated_word_list.get_full_text(), 19 | title: ie.title.clone(), 20 | url: ie.url.clone(), 21 | fields: ie.fields.clone(), 22 | } 23 | } 24 | } 25 | 26 | #[cfg(test)] 27 | mod tests { 28 | use crate::index_v3::AnnotatedWordList; 29 | 30 | use super::Entry; 31 | use super::NormalizedEntry; 32 | use std::collections::HashMap; 33 | 34 | #[test] 35 | fn convert_ie_to_entry() { 36 | let mut fields = HashMap::new(); 37 | 38 | fields.insert("k1".to_string(), "v1".to_string()); 39 | fields.insert("k2".to_string(), "v2".to_string()); 40 | 41 | let intended = Entry { 42 | contents: String::new(), 43 | title: "My Title".to_string(), 44 | url: "https://example.com".to_string(), 45 | fields: fields.clone(), 46 | }; 47 | 48 | let generated = Entry::from(&NormalizedEntry { 49 | annotated_word_list: AnnotatedWordList { word_list: vec![] }, 50 | stem_algorithm: None, 51 | title: "My Title".to_string(), 52 | url: "https://example.com".to_string(), 53 | fields: fields.clone(), 54 | }); 55 | 56 | assert_eq!(generated.contents, intended.contents); 57 | assert_eq!(generated.title, intended.title); 58 | assert_eq!(generated.url, intended.url); 59 | assert_eq!(generated.fields, intended.fields); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/build/nudger.rs: -------------------------------------------------------------------------------- 1 | use crate::config::Config; 2 | 3 | /** 4 | * Nudge users to build better config files. 5 | * 6 | * Config files have to be backwards compatible, so we can't remove any fields 7 | * that the user might want to deserialize. But we _can_ detect that those 8 | * fields are being used, ignore them, and throw up a warning saying they're 9 | * being ignored. 10 | */ 11 | 12 | #[derive(Debug, PartialEq)] 13 | pub(super) struct Nudger { 14 | nudges: Vec, 15 | } 16 | 17 | #[derive(Debug, PartialEq)] 18 | enum Nudge { 19 | InputSurroundingWordCount, 20 | OutputFile, 21 | } 22 | 23 | impl Nudge { 24 | fn description(&self) -> &str { 25 | match self { 26 | Nudge::InputSurroundingWordCount => "The config option `input.surrounding_word_count` is deprecated and has no effect. Please use output.excerpt_buffer instead.", 27 | Nudge::OutputFile => "The config option `output.filename` is deprecated and has no effect. Please use the --output command line option instead." 28 | } 29 | } 30 | } 31 | 32 | impl From<&Config> for Nudger { 33 | fn from(config: &Config) -> Self { 34 | let mut nudges: Vec = vec![]; 35 | 36 | if config.input.UNUSED_surrounding_word_count.is_some() { 37 | nudges.push(Nudge::InputSurroundingWordCount); 38 | } 39 | 40 | if config.output.UNUSED_filename.is_some() { 41 | nudges.push(Nudge::OutputFile); 42 | } 43 | 44 | Nudger { nudges } 45 | } 46 | } 47 | 48 | impl Nudger { 49 | pub(super) fn print(&self) { 50 | if !self.nudges.is_empty() { 51 | eprintln!("Config Warnings:"); 52 | } 53 | 54 | for nudge in &self.nudges { 55 | eprintln!("{}", nudge.description()); 56 | } 57 | } 58 | } 59 | 60 | #[cfg(test)] 61 | mod tests { 62 | use super::*; 63 | use crate::config::{Config, InputConfig, OutputConfig}; 64 | 65 | #[test] 66 | fn create_nudge() { 67 | let intended = Nudger { 68 | nudges: vec![Nudge::InputSurroundingWordCount], 69 | }; 70 | 71 | let generated = Nudger::from(&Config { 72 | input: InputConfig { 73 | UNUSED_surrounding_word_count: Some(12), 74 | ..InputConfig::default() 75 | }, 76 | output: OutputConfig::default(), 77 | }); 78 | 79 | assert_eq!(intended, generated); 80 | } 81 | 82 | #[test] 83 | fn default_config_creates_empty_nudge() { 84 | let intended = Nudger { nudges: vec![] }; 85 | let generated = Nudger::from(&Config::default()); 86 | assert_eq!(intended, generated); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/read.rs: -------------------------------------------------------------------------------- 1 | use super::Index; 2 | use bytes::Bytes; 3 | use std::convert::{TryFrom, TryInto}; 4 | 5 | impl TryFrom<&[u8]> for Index { 6 | type Error = rmp_serde::decode::Error; 7 | fn try_from(file: &[u8]) -> Result { 8 | let (version_size_bytes, rest) = file.split_at(std::mem::size_of::()); 9 | let version_size = u64::from_be_bytes(version_size_bytes.try_into().unwrap()); 10 | let version_size: usize = version_size.try_into().unwrap(); 11 | let (_version_bytes, rest) = rest.split_at(version_size); 12 | 13 | let (index_size_bytes, rest) = rest.split_at(std::mem::size_of::()); 14 | let index_size = u64::from_be_bytes(index_size_bytes.try_into().unwrap()); 15 | let index_size: usize = index_size.try_into().unwrap(); 16 | let (index_bytes, _rest) = rest.split_at(index_size); 17 | 18 | rmp_serde::from_read_ref(index_bytes) 19 | } 20 | } 21 | 22 | impl TryFrom for Index { 23 | type Error = rmp_serde::decode::Error; 24 | 25 | fn try_from(value: Bytes) -> Result { 26 | rmp_serde::from_read_ref(value.as_ref()) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/scores.rs: -------------------------------------------------------------------------------- 1 | pub const MATCHED_WORD_SCORE: u8 = 128; 2 | pub const PREFIX_SCORE: u8 = 127; 3 | pub const STEM_SCORE: u8 = 64; 4 | pub const STOPWORD_SCORE: u8 = 16; 5 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/search/intermediate_excerpt.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::Ordering; 2 | 3 | use crate::{ 4 | index_v3::{EntryIndex, Score, WordListSource}, 5 | Fields, InternalWordAnnotation, 6 | }; 7 | 8 | #[derive(Clone, Debug, Default)] 9 | pub(super) struct IntermediateExcerpt { 10 | pub(super) query: String, 11 | pub(super) entry_index: EntryIndex, 12 | pub(super) score: Score, 13 | pub(super) source: WordListSource, 14 | pub(super) word_index: usize, 15 | pub(super) internal_annotations: Vec, 16 | pub(super) fields: Fields, 17 | } 18 | 19 | impl Ord for IntermediateExcerpt { 20 | fn cmp(&self, other: &Self) -> Ordering { 21 | self.score.cmp(&other.score) 22 | } 23 | } 24 | 25 | impl PartialOrd for IntermediateExcerpt { 26 | fn partial_cmp(&self, other: &Self) -> Option { 27 | Some(self.cmp(other)) 28 | } 29 | } 30 | 31 | impl Eq for IntermediateExcerpt {} 32 | 33 | impl PartialEq for IntermediateExcerpt { 34 | fn eq(&self, other: &Self) -> bool { 35 | self.entry_index == other.entry_index 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /stork-lib/src/index_v3/write.rs: -------------------------------------------------------------------------------- 1 | use std::mem; 2 | 3 | use bytes::{BufMut, Bytes, BytesMut}; 4 | 5 | use super::Index; 6 | 7 | impl From<&Index> for Bytes { 8 | fn from(value: &Index) -> Self { 9 | let index_bytes = rmp_serde::to_vec(&value).unwrap(); 10 | let index_bytes = Bytes::from(index_bytes); 11 | let version_bytes = Bytes::from("stork-3"); 12 | 13 | let mut buf = BytesMut::with_capacity( 14 | index_bytes.len() + version_bytes.len() + 2 * mem::size_of::(), 15 | ); 16 | buf.put_u64(version_bytes.len() as u64); 17 | 18 | buf.put(version_bytes); 19 | buf.put_u64(index_bytes.len() as u64); 20 | buf.put(index_bytes); 21 | 22 | buf.freeze() 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /stork-lib/src/input.rs: -------------------------------------------------------------------------------- 1 | use bytes::{Buf, Bytes}; 2 | use thiserror::Error; 3 | 4 | #[derive(Debug, PartialEq)] 5 | pub enum VersionedIndex { 6 | V2(Bytes), 7 | V3(Bytes), 8 | } 9 | 10 | impl TryFrom for VersionedIndex { 11 | type Error = IndexVersioningError; 12 | 13 | fn try_from(value: Bytes) -> Result { 14 | let mut buffer = value; 15 | let u64_size = std::mem::size_of::(); 16 | 17 | if buffer.len() <= u64_size { 18 | return Err(IndexVersioningError::FileTooShort); 19 | } 20 | 21 | let version_size = { 22 | let version_size = buffer.get_u64(); 23 | let version_size: usize = version_size 24 | .try_into() 25 | .map_err(|_| IndexVersioningError::BadSegmentSize(version_size))?; 26 | 27 | if !(1..=32).contains(&version_size) { 28 | return Err(IndexVersioningError::BadVersionSize( 29 | version_size.try_into().unwrap(), 30 | )); 31 | } 32 | 33 | Ok::(version_size) 34 | }?; 35 | 36 | if buffer.len() < version_size { 37 | return Err(IndexVersioningError::FileTooShort); 38 | } 39 | 40 | let version_string = { 41 | let split = buffer.split_to(version_size); 42 | String::from_utf8(Vec::from(split.as_ref())) 43 | }?; 44 | 45 | match version_string.as_str() { 46 | "stork-2" => Ok(VersionedIndex::V2(buffer)), 47 | "stork-3" => { 48 | let index_size = { 49 | let index_size = buffer.get_u64(); 50 | let index_size: usize = index_size 51 | .try_into() 52 | .map_err(|_| IndexVersioningError::BadSegmentSize(index_size))?; 53 | Ok::(index_size) 54 | }?; 55 | 56 | let index_bytes = buffer.split_to(index_size); 57 | 58 | Ok(VersionedIndex::V3(index_bytes)) 59 | } 60 | _ => Err(IndexVersioningError::UnknownVersionString(version_string)), 61 | } 62 | } 63 | } 64 | 65 | #[derive(Error, Debug, PartialEq)] 66 | pub enum IndexVersioningError { 67 | #[error("Invalid index: index is too short and its version could not be determined.")] 68 | FileTooShort, 69 | 70 | #[error("Invalid index: found segment size `{0}`")] 71 | BadSegmentSize(u64), 72 | 73 | #[error("Invalid index: found version string that is `{0}` bytes long. The version string must be between 1 and 32 bytes long.")] 74 | BadVersionSize(u64), 75 | 76 | #[error( 77 | "Invalid index: could not parse version string as valid UTF8. Stork recieved error `{0}`" 78 | )] 79 | VersionStringUtf8Error(#[from] std::string::FromUtf8Error), 80 | 81 | #[error("Invalid index: unknown index version. Got `{0}`")] 82 | UnknownVersionString(String), 83 | } 84 | 85 | #[cfg(test)] 86 | mod tests { 87 | use super::*; 88 | use hex_literal::hex; 89 | use pretty_assertions::assert_eq; 90 | 91 | #[test] 92 | fn happy_path_v2_parse() { 93 | let bytes = Bytes::try_from(hex!("0000000000000007 73746F726B2D32 00").as_ref()).unwrap(); 94 | let versioned_index = VersionedIndex::try_from(bytes).unwrap(); 95 | assert_eq!( 96 | versioned_index, 97 | VersionedIndex::V2(Bytes::try_from(hex!("00").as_ref()).unwrap()) 98 | ); 99 | } 100 | 101 | #[test] 102 | fn happy_path_v3_parse() { 103 | let bytes = 104 | Bytes::try_from(hex!("0000000000000007 73746F726B2D33 0000000000000001 00").as_ref()) 105 | .unwrap(); 106 | let versioned_index = VersionedIndex::try_from(bytes).unwrap(); 107 | assert_eq!( 108 | versioned_index, 109 | VersionedIndex::V3(Bytes::try_from(hex!("00").as_ref()).unwrap()) 110 | ); 111 | } 112 | 113 | #[test] 114 | fn ascii_string_does_not_parse() { 115 | let bytes = Bytes::try_from("this is not an index".as_bytes()).unwrap(); 116 | assert_eq!( 117 | VersionedIndex::try_from(bytes).unwrap_err(), 118 | IndexVersioningError::BadVersionSize(8_388_070_249_163_485_984) 119 | ); 120 | } 121 | 122 | #[test] 123 | fn stated_33_byte_version_does_not_parse() { 124 | let bytes = Bytes::try_from(hex!("00000000 00000021 00").as_ref()).unwrap(); 125 | assert_eq!( 126 | VersionedIndex::try_from(bytes).unwrap_err(), 127 | IndexVersioningError::BadVersionSize(33) 128 | ); 129 | } 130 | 131 | #[test] 132 | fn stated_32_byte_version_parses() { 133 | let bytes = Bytes::try_from(hex!("00000000 00000020 00").as_ref()).unwrap(); 134 | assert_eq!( 135 | VersionedIndex::try_from(bytes).unwrap_err(), 136 | IndexVersioningError::FileTooShort // Because the version string isn't 32 bytes long 137 | ); 138 | } 139 | 140 | #[test] 141 | fn stated_1_byte_version_parses() { 142 | let bytes = Bytes::try_from(hex!("00000000 00000001 00").as_ref()).unwrap(); 143 | assert_eq!( 144 | VersionedIndex::try_from(bytes).unwrap_err(), 145 | IndexVersioningError::UnknownVersionString("\x00".into()) 146 | ); 147 | } 148 | 149 | #[test] 150 | fn stated_0_byte_version_does_not_parse() { 151 | let bytes = Bytes::try_from(hex!("00000000 00000000 00").as_ref()).unwrap(); 152 | assert_eq!( 153 | VersionedIndex::try_from(bytes).unwrap_err(), 154 | IndexVersioningError::BadVersionSize(0) 155 | ); 156 | } 157 | 158 | #[test] 159 | fn short_index_does_not_parse() { 160 | let bytes = Bytes::try_from(hex!("000000000000FF").as_ref()).unwrap(); 161 | assert_eq!( 162 | VersionedIndex::try_from(bytes).unwrap_err(), 163 | IndexVersioningError::FileTooShort 164 | ); 165 | } 166 | 167 | #[test] 168 | fn invalid_utf8_version_does_not_parse() { 169 | let bytes = Bytes::try_from(hex!("0000000000000004 F0288CBC").as_ref()).unwrap(); 170 | 171 | // This is an invalid 4-octet sequence where the second octet is invalid, 172 | // according to https://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805 173 | let utf8_error = String::from_utf8(hex!("f0 28 8c bc").as_ref().to_vec()).unwrap_err(); 174 | assert_eq!( 175 | VersionedIndex::try_from(bytes.clone()).unwrap_err(), 176 | IndexVersioningError::VersionStringUtf8Error(utf8_error) 177 | ); 178 | 179 | assert_eq!( 180 | VersionedIndex::try_from(bytes).unwrap_err().to_string(), 181 | "Invalid index: could not parse version string as valid UTF8. Stork recieved error `invalid utf-8 sequence of 1 bytes from index 0`" 182 | ); 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /stork-lib/src/output.rs: -------------------------------------------------------------------------------- 1 | use crate::Fields; 2 | use serde::{Deserialize, Serialize}; 3 | 4 | /** 5 | * The set of data needed to display search results to a user. 6 | */ 7 | #[derive(Serialize, Deserialize, Debug, Default, PartialEq)] 8 | pub struct Output { 9 | pub results: Vec, 10 | pub total_hit_count: usize, 11 | pub url_prefix: String, 12 | } 13 | 14 | /** 15 | * A single document in the list of matches for a search query, 16 | * along with its display information and excerpts. 17 | */ 18 | #[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] 19 | pub struct Result { 20 | pub entry: Entry, 21 | pub excerpts: Vec, 22 | pub title_highlight_ranges: Vec, 23 | pub score: usize, 24 | } 25 | 26 | /** 27 | * A document present in the search results. 28 | */ 29 | #[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] 30 | pub struct Entry { 31 | pub url: String, 32 | pub title: String, 33 | pub fields: Fields, 34 | } 35 | 36 | /** 37 | * An excerpt of a document's contents, that contains words that 38 | * were part of the search query. 39 | */ 40 | #[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] 41 | pub struct Excerpt { 42 | pub text: String, 43 | pub highlight_ranges: Vec, 44 | pub score: usize, 45 | pub internal_annotations: Vec, 46 | pub fields: Fields, 47 | } 48 | 49 | /** 50 | * An annotation attached to a given excerpt. 51 | * 52 | * This should not be displayed directly to users, but instead should 53 | * change some aspect of how that excerpt is rendered. 54 | */ 55 | #[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] 56 | pub enum InternalWordAnnotation { 57 | #[serde(rename = "a")] 58 | UrlSuffix(String), 59 | } 60 | 61 | /** 62 | * A range of characters in a string that should be highlighted. 63 | * The start and end indices are inclusive. 64 | */ 65 | #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 66 | pub struct HighlightRange { 67 | pub beginning: usize, 68 | pub end: usize, 69 | } 70 | 71 | /** 72 | * Contains metadata about an index, to be displayed to the user, often for debugging. 73 | */ 74 | #[derive(Serialize, Clone, Debug, PartialEq)] 75 | pub struct IndexMetadata { 76 | #[serde(rename = "indexVersion")] 77 | pub index_version: String, 78 | } 79 | -------------------------------------------------------------------------------- /stork-lib/src/stopwords.rs: -------------------------------------------------------------------------------- 1 | pub const STOPWORDS: &[&str] = &[ 2 | "a", 3 | "about", 4 | "above", 5 | "above", 6 | "across", 7 | "after", 8 | "afterwards", 9 | "again", 10 | "against", 11 | "all", 12 | "almost", 13 | "alone", 14 | "along", 15 | "already", 16 | "also", 17 | "although", 18 | "always", 19 | "am", 20 | "among", 21 | "amongst", 22 | "amoungst", 23 | "amount", 24 | "an", 25 | "and", 26 | "another", 27 | "any", 28 | "anyhow", 29 | "anyone", 30 | "anything", 31 | "anyway", 32 | "anywhere", 33 | "are", 34 | "around", 35 | "as", 36 | "at", 37 | "back", 38 | "be", 39 | "became", 40 | "because", 41 | "become", 42 | "becomes", 43 | "becoming", 44 | "been", 45 | "before", 46 | "beforehand", 47 | "behind", 48 | "being", 49 | "below", 50 | "beside", 51 | "besides", 52 | "between", 53 | "beyond", 54 | "bill", 55 | "both", 56 | "bottom", 57 | "but", 58 | "by", 59 | "call", 60 | "can", 61 | "cannot", 62 | "cant", 63 | "co", 64 | "con", 65 | "could", 66 | "couldnt", 67 | "cry", 68 | "de", 69 | "describe", 70 | "detail", 71 | "do", 72 | "done", 73 | "down", 74 | "due", 75 | "during", 76 | "each", 77 | "eg", 78 | "eight", 79 | "either", 80 | "eleven", 81 | "else", 82 | "elsewhere", 83 | "empty", 84 | "enough", 85 | "etc", 86 | "even", 87 | "ever", 88 | "every", 89 | "everyone", 90 | "everything", 91 | "everywhere", 92 | "except", 93 | "few", 94 | "fifteen", 95 | "fify", 96 | "fill", 97 | "find", 98 | "fire", 99 | "first", 100 | "five", 101 | "for", 102 | "former", 103 | "formerly", 104 | "forty", 105 | "found", 106 | "four", 107 | "from", 108 | "front", 109 | "full", 110 | "further", 111 | "get", 112 | "give", 113 | "go", 114 | "had", 115 | "has", 116 | "hasnt", 117 | "have", 118 | "he", 119 | "hence", 120 | "her", 121 | "here", 122 | "hereafter", 123 | "hereby", 124 | "herein", 125 | "hereupon", 126 | "hers", 127 | "herself", 128 | "him", 129 | "himself", 130 | "his", 131 | "how", 132 | "however", 133 | "hundred", 134 | "ie", 135 | "if", 136 | "in", 137 | "inc", 138 | "indeed", 139 | "interest", 140 | "into", 141 | "is", 142 | "it", 143 | "its", 144 | "itself", 145 | "keep", 146 | "last", 147 | "latter", 148 | "latterly", 149 | "least", 150 | "less", 151 | "ltd", 152 | "made", 153 | "many", 154 | "may", 155 | "me", 156 | "meanwhile", 157 | "might", 158 | "mill", 159 | "mine", 160 | "more", 161 | "moreover", 162 | "most", 163 | "mostly", 164 | "move", 165 | "much", 166 | "must", 167 | "my", 168 | "myself", 169 | "name", 170 | "namely", 171 | "neither", 172 | "never", 173 | "nevertheless", 174 | "next", 175 | "nine", 176 | "no", 177 | "nobody", 178 | "none", 179 | "noone", 180 | "nor", 181 | "not", 182 | "nothing", 183 | "now", 184 | "nowhere", 185 | "of", 186 | "off", 187 | "often", 188 | "on", 189 | "once", 190 | "one", 191 | "only", 192 | "onto", 193 | "or", 194 | "other", 195 | "others", 196 | "otherwise", 197 | "our", 198 | "ours", 199 | "ourselves", 200 | "out", 201 | "over", 202 | "own", 203 | "part", 204 | "per", 205 | "perhaps", 206 | "please", 207 | "put", 208 | "rather", 209 | "re", 210 | "same", 211 | "see", 212 | "seem", 213 | "seemed", 214 | "seeming", 215 | "seems", 216 | "serious", 217 | "several", 218 | "she", 219 | "should", 220 | "show", 221 | "side", 222 | "since", 223 | "sincere", 224 | "six", 225 | "sixty", 226 | "so", 227 | "some", 228 | "somehow", 229 | "someone", 230 | "something", 231 | "sometime", 232 | "sometimes", 233 | "somewhere", 234 | "still", 235 | "such", 236 | "system", 237 | "take", 238 | "ten", 239 | "than", 240 | "that", 241 | "the", 242 | "their", 243 | "them", 244 | "themselves", 245 | "then", 246 | "thence", 247 | "there", 248 | "thereafter", 249 | "thereby", 250 | "therefore", 251 | "therein", 252 | "thereupon", 253 | "these", 254 | "they", 255 | "thick", 256 | "thin", 257 | "third", 258 | "this", 259 | "those", 260 | "though", 261 | "three", 262 | "through", 263 | "throughout", 264 | "thru", 265 | "thus", 266 | "to", 267 | "together", 268 | "too", 269 | "top", 270 | "toward", 271 | "towards", 272 | "twelve", 273 | "twenty", 274 | "two", 275 | "un", 276 | "under", 277 | "until", 278 | "up", 279 | "upon", 280 | "us", 281 | "very", 282 | "via", 283 | "was", 284 | "we", 285 | "well", 286 | "were", 287 | "what", 288 | "whatever", 289 | "when", 290 | "whence", 291 | "whenever", 292 | "where", 293 | "whereafter", 294 | "whereas", 295 | "whereby", 296 | "wherein", 297 | "whereupon", 298 | "wherever", 299 | "whether", 300 | "which", 301 | "while", 302 | "whither", 303 | "who", 304 | "whoever", 305 | "whole", 306 | "whom", 307 | "whose", 308 | "why", 309 | "will", 310 | "with", 311 | "within", 312 | "without", 313 | "would", 314 | "yet", 315 | "you", 316 | "your", 317 | "yours", 318 | "yourself", 319 | "yourselves", 320 | "the", 321 | ]; 322 | -------------------------------------------------------------------------------- /stork-wasm/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "stork-wasm" 3 | version = "1.6.0" 4 | edition = "2021" 5 | 6 | [lib] 7 | crate-type = ["cdylib", "rlib"] 8 | 9 | [features] 10 | default = ["v3"] 11 | v2 = ["stork-lib/search-v2"] 12 | v3 = ["stork-lib/search-v3"] 13 | 14 | [dependencies] 15 | console_error_panic_hook = "0.1.6" 16 | serde = "1.0.130" 17 | serde_json = "1.0.68" 18 | wasm-bindgen = "0.2.78" 19 | stork-lib = { path = "../stork-lib", default-features = false } 20 | bytes = "1.1.0" 21 | thiserror = "1.0.29" 22 | -------------------------------------------------------------------------------- /stork-wasm/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![warn(clippy::pedantic)] 2 | #![allow(clippy::must_use_candidate)] 3 | 4 | use bytes::Bytes; 5 | use serde::Serialize; 6 | use std::{convert::From, fmt::Display}; 7 | use wasm_bindgen::prelude::*; 8 | 9 | struct JsonSerializationError {} 10 | 11 | impl Display for JsonSerializationError { 12 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 13 | write!(f, "Could not convert Stork data to JSON. If you see this, please file a bug: https://jil.im/storkbug") 14 | } 15 | } 16 | 17 | struct WasmOutput(String); 18 | 19 | impl From> for WasmOutput { 20 | fn from(r: Result) -> Self { 21 | fn wasm_format_error(e: E) -> String { 22 | format!("{{\"error\": \"{e}\"}}") 23 | } 24 | 25 | let value = match r { 26 | Ok(object) => match serde_json::to_string(&object) { 27 | Ok(s) => s, 28 | Err(_e) => wasm_format_error(JsonSerializationError {}), 29 | }, 30 | Err(e) => wasm_format_error(e), 31 | }; 32 | 33 | WasmOutput(value) 34 | } 35 | } 36 | 37 | #[wasm_bindgen] 38 | pub fn wasm_register_index(name: &str, data: &[u8]) -> String { 39 | console_error_panic_hook::set_once(); 40 | let data = Bytes::from(Vec::from(data)); // TODO: This seems questionable 41 | let result = stork_lib::register_index(name, data); 42 | WasmOutput::from(result).0 43 | } 44 | 45 | #[wasm_bindgen] 46 | pub fn wasm_search(name: &str, query: &str) -> String { 47 | console_error_panic_hook::set_once(); 48 | let result = stork_lib::search_from_cache(name, query); 49 | WasmOutput::from(result).0 50 | } 51 | 52 | #[wasm_bindgen] 53 | pub fn wasm_stork_version() -> String { 54 | env!("CARGO_PKG_VERSION").to_string() 55 | } 56 | 57 | #[cfg(test)] 58 | mod tests { 59 | 60 | #[derive(Serialize)] 61 | struct MyData { 62 | one: u32, 63 | two: String, 64 | three: bool, 65 | } 66 | 67 | use std::{ 68 | fs, 69 | io::{BufReader, Read}, 70 | }; 71 | 72 | use super::*; 73 | #[test] 74 | fn serializiable_value_can_be_deserialized() { 75 | let my_data_val = MyData { 76 | one: 42, 77 | two: "This is a string".to_string(), 78 | three: true, 79 | }; 80 | 81 | let result: Result = Ok(my_data_val); 82 | 83 | let computed = WasmOutput::from(result).0; 84 | let expected = "{\"one\":42,\"two\":\"This is a string\",\"three\":true}".to_string(); 85 | assert_eq!(computed, expected); 86 | } 87 | 88 | #[test] 89 | fn error_result_gives_error_json() { 90 | let my_error = JsonSerializationError {}; 91 | let result: Result = Err(my_error); 92 | 93 | let computed = WasmOutput::from(result).0; 94 | let expected = "{\"error\": \"Could not convert Stork data to JSON. If you see this, please file a bug: https://jil.im/storkbug\"}".to_string(); 95 | assert_eq!(computed, expected); 96 | } 97 | 98 | #[test] 99 | fn retrieve_v3_from_cache() { 100 | let file = fs::File::open("../test-assets/federalist-min-0.7.0.st").unwrap(); 101 | let mut buf_reader = BufReader::new(file); 102 | let mut index_bytes: Vec = Vec::new(); 103 | let _bytes_read = buf_reader.read_to_end(&mut index_bytes).unwrap(); 104 | 105 | let _str = wasm_register_index("zero-seven-zero", index_bytes.as_slice()); 106 | let str = wasm_register_index("zero-zeven-zero-again", index_bytes.as_slice()); 107 | assert_eq!(str, r#"{"indexVersion":"stork-3"}"#); 108 | 109 | let results = wasm_search("zero-seven-zero", "liberty"); 110 | assert!(results.contains("despotic power and hostile to the principles of liberty. An over-scrupulous jealousy of danger to the")); 111 | assert_eq!(results.len(), 1254); 112 | } 113 | 114 | #[test] 115 | fn cache_miss_errors_as_expected() { 116 | let file = fs::File::open("../test-assets/federalist-min-0.7.0.st").unwrap(); 117 | let mut buf_reader = BufReader::new(file); 118 | let mut index_bytes: Vec = Vec::new(); 119 | let _bytes_read = buf_reader.read_to_end(&mut index_bytes).unwrap(); 120 | 121 | let str = wasm_register_index("cache-name-one", index_bytes.as_slice()); 122 | assert_eq!(str, r#"{"indexVersion":"stork-3"}"#); 123 | 124 | let results = wasm_search("cache-name-two", "liberty"); 125 | assert_eq!( 126 | results, 127 | r#"{"error": "Index `cache-name-two` has not been registered. You need to register the index before performing searches with it."}"# 128 | ); 129 | } 130 | 131 | #[cfg(feature = "v2")] 132 | #[test] 133 | fn retrieve_v2_from_cache() { 134 | let file = fs::File::open("../test-assets/federalist-min-0.5.3.st").unwrap(); 135 | let mut buf_reader = BufReader::new(file); 136 | let mut index_bytes: Vec = Vec::new(); 137 | let _bytes_read = buf_reader.read_to_end(&mut index_bytes).unwrap(); 138 | 139 | let str = wasm_register_index("zero-five-three", index_bytes.as_slice()); 140 | assert_eq!(str, r#"{"indexVersion":"stork-2"}"#); 141 | 142 | let results = wasm_search("zero-five-three", "liberty"); 143 | assert!(results.contains("despotic power and hostile to the principles of liberty. An over-scrupulous jealousy of danger to the")); 144 | assert_eq!(results.len(), 1254); 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /test-assets/federalist-min-0.5.3.st: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jameslittle230/stork/efa98dad15b52bd6da9c9e87d612f0913431a95e/test-assets/federalist-min-0.5.3.st -------------------------------------------------------------------------------- /test-assets/federalist-min-0.6.0.st: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jameslittle230/stork/efa98dad15b52bd6da9c9e87d612f0913431a95e/test-assets/federalist-min-0.6.0.st -------------------------------------------------------------------------------- /test-assets/federalist-min-0.7.0.st: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jameslittle230/stork/efa98dad15b52bd6da9c9e87d612f0913431a95e/test-assets/federalist-min-0.7.0.st -------------------------------------------------------------------------------- /test-assets/federalist.toml: -------------------------------------------------------------------------------- 1 | # The main Federalist index configuration file. This produces the Federalist 2 | # search index seen on the home page of stork-search.net. 3 | [input] 4 | base_directory = "./local-dev/test-corpora/federalist" 5 | url_prefix = "https://www.gutenberg.org/files/1404/1404-h/1404-h.htm#link2H_4_" 6 | files = [ 7 | { path = "federalist-1.txt", url = "0001", title = "General Introduction" }, 8 | { path = "federalist-2.txt", url = "0002", title = "Concerning Dangers from Foreign Force and Influence" }, 9 | { path = "federalist-3.txt", url = "0003", title = "Concerning Dangers from Foreign Force and Influence pt 2" }, 10 | { path = "federalist-4.txt", url = "0004", title = "Concerning Dangers from Foreign Force and Influence pt 3" }, 11 | { path = "federalist-5.txt", url = "0005", title = "Concerning Dangers from Foreign Force and Influence pt 4" }, 12 | { path = "federalist-6.txt", url = "0006", title = "Concerning Dangers from Dissensions Between the States" }, 13 | { path = "federalist-7.txt", url = "0007", title = "Concerning Dangers from Dissensions Between the States pt 2" }, 14 | { path = "federalist-8.txt", url = "0008", title = "The Consequences of Hostilities Between the States" }, 15 | { path = "federalist-9.txt", url = "0009", title = "The Union as a Safeguard Against Domestic Faction and Insurrection" }, 16 | { path = "federalist-10.txt", url = "0010", title = "The Union as a Safeguard Against Domestic Faction and Insurrection pt 2" }, 17 | { path = "federalist-11.txt", url = "0011", title = "The Utility of the Union in Respect to Commercial Relations and a Navy" }, 18 | { path = "federalist-12.txt", url = "0012", title = "The Utility of the Union In Respect to Revenue" }, 19 | { path = "federalist-13.txt", url = "0013", title = "Advantage of the Union in Respect to Economy in Government" }, 20 | { path = "federalist-14.txt", url = "0014", title = "Objections to the Proposed Constitution From Extent of Territory Answered" }, 21 | { path = "federalist-15.txt", url = "0015", title = "The Insufficiency of the Present Confederation to Preserve the Union" }, 22 | { path = "federalist-16.txt", url = "0016", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 2" }, 23 | { path = "federalist-17.txt", url = "0017", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 3" }, 24 | { path = "federalist-18.txt", url = "0018", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 4" }, 25 | { path = "federalist-19.txt", url = "0019", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 5" }, 26 | { path = "federalist-20.txt", url = "0020", title = "The Insufficiency of the Present Confederation to Preserve the Union pt 6" } 27 | ] 28 | 29 | [output] 30 | debug = false 31 | -------------------------------------------------------------------------------- /themes/basic.css: -------------------------------------------------------------------------------- 1 | .stork-wrapper { 2 | position: relative; 3 | font-family: inherit; 4 | box-sizing: border-box; 5 | font-size: 1em; 6 | 7 | --stork-blue-2: #a5d8ff; 8 | --stork-blue-3: #74c0fc; 9 | --stork-blue-4: #4dabf7; 10 | --stork-blue-5: #339af0; 11 | --stork-blue-7: #1c7ed6; 12 | --stork-gray-8: #343a40; 13 | --stork-gray-9: #212529; 14 | --stork-yellow-2: #ffec99; 15 | 16 | --stork-border-color: hsl(0, 0%, 65%); 17 | --stork-background-color: hsla(0, 0%, 97%); 18 | --stork-text-color: var(--stork-gray-9); 19 | 20 | --stork-input-border-radius: 8px; 21 | } 22 | 23 | .stork-wrapper *, 24 | .stork-wrapper *:before, 25 | .stork-wrapper *:after { 26 | box-sizing: border-box; 27 | } 28 | 29 | .stork-wrapper .stork-input { 30 | width: 100%; 31 | height: 2.4em; 32 | font-size: 1em; 33 | padding: 0.4em 0.8em; 34 | position: relative; 35 | box-shadow: inset 0 0.1em 0.3em hsla(0, 0%, 0%, 0.1); 36 | border: 1px solid var(--stork-border-color); 37 | border-radius: var(--stork-input-border-radius); 38 | background-color: var(--stork-background-color); 39 | color: var(--stork-text-color); 40 | font-family: inherit; 41 | } 42 | 43 | .stork-wrapper .stork-input:focus { 44 | outline: none; 45 | } 46 | 47 | .stork-wrapper .stork-progress { 48 | position: absolute; 49 | display: block; 50 | content: ""; 51 | bottom: 1px; 52 | background-color: var(--stork-blue-5); 53 | box-shadow: 0 0 8px var(--stork-blue-4); 54 | height: 1px; 55 | transition: width 0.25s ease, opacity 0.4s ease 0.4s; 56 | margin-left: var(--stork-input-border-radius); 57 | max-width: calc(100% - 2 * var(--stork-input-border-radius)); 58 | } 59 | 60 | .stork-wrapper .stork-output { 61 | position: absolute; 62 | width: 100%; 63 | margin-top: 0.5em; 64 | border-radius: 4px; 65 | display: flex; 66 | flex-direction: column; 67 | z-index: 100; 68 | color: var(--stork-text-color); 69 | font-weight: 400; 70 | font-family: inherit; 71 | } 72 | 73 | .stork-wrapper .stork-attribution a:link, 74 | .stork-wrapper .stork-attribution a:visited { 75 | color: var(--stork-blue-7); 76 | } 77 | 78 | .stork-wrapper .stork-output-visible { 79 | border: 1px solid var(--stork-border-color); 80 | box-shadow: 0px 0.4px 2.2px rgba(0, 0, 0, 0.011), 81 | 0px 1px 5.3px rgba(0, 0, 0, 0.016), 0px 1.9px 10px rgba(0, 0, 0, 0.02), 82 | 0px 3.4px 17.9px rgba(0, 0, 0, 0.024), 0px 6.2px 33.4px rgba(0, 0, 0, 0.029), 83 | 0px 15px 80px rgba(0, 0, 0, 0.04); 84 | background: var(--stork-background-color); 85 | } 86 | 87 | .stork-wrapper .stork-message { 88 | width: 100%; 89 | padding: 0.5em 1em; 90 | color: var(--stork-text-color); 91 | } 92 | 93 | .stork-wrapper .stork-attribution { 94 | width: 100%; 95 | padding: 0.5em 1em; 96 | font-size: 0.8em; 97 | color: var(--stork-text-color); 98 | } 99 | 100 | .stork-wrapper .stork-results { 101 | margin: 0; 102 | padding: 0; 103 | width: 100%; 104 | list-style-type: none; 105 | max-height: 25em; 106 | overflow-y: scroll; 107 | border-top: 1px solid var(--stork-border-color); 108 | border-bottom: 1px solid var(--stork-border-color); 109 | box-shadow: inset 0em 0.7em 0.2em -0.5em hsla(0, 0%, 0%, 0.08), 110 | inset 0em -0.7em 0.2em -0.5em hsla(0, 0%, 0%, 0.08); 111 | } 112 | 113 | .stork-wrapper .stork-result:not(:last-child) { 114 | border-bottom: 1px solid var(--stork-border-color); 115 | } 116 | 117 | .stork-wrapper .stork-result.selected { 118 | background: var(--stork-blue-2); 119 | } 120 | 121 | .stork-wrapper .stork-result a:link { 122 | padding: 1em; 123 | display: block; 124 | color: currentcolor; 125 | text-decoration: none; 126 | } 127 | 128 | .stork-wrapper .stork-result p { 129 | margin: 0; 130 | } 131 | 132 | .stork-wrapper .stork-title { 133 | font-weight: bold; 134 | font-size: 0.95em; 135 | margin: 0; 136 | color: var(--stork-text-color); 137 | 138 | /* Flexbox container for the title and the score, when debugging */ 139 | display: flex; 140 | justify-content: space-between; 141 | } 142 | 143 | .stork-wrapper .stork-excerpt-container { 144 | margin-top: 0.75em; 145 | } 146 | 147 | .stork-wrapper .stork-excerpt { 148 | font-size: 0.8em; 149 | line-height: 1; 150 | margin: 0; 151 | color: var(--stork-gray-8); 152 | 153 | /* Flexbox container for the title and the score, when debugging */ 154 | display: flex; 155 | justify-content: space-between; 156 | } 157 | 158 | .stork-wrapper .stork-excerpt:not(:last-of-type) { 159 | margin-bottom: 0.6em; 160 | } 161 | 162 | .stork-wrapper .stork-highlight { 163 | background-color: var(--stork-yellow-2); 164 | padding: 0 0.1em; 165 | } 166 | 167 | .stork-wrapper .stork-error { 168 | outline: 2px solid #c92a2a; 169 | } 170 | 171 | .stork-wrapper .stork-close-button { 172 | position: absolute; 173 | /* bottom: 0; */ 174 | right: 0; 175 | margin: 0.7em 0.6em; 176 | height: 1.2em; 177 | width: 1.2em; 178 | padding: 0px; 179 | background: linear-gradient( 180 | to bottom, 181 | hsl(0, 0%, 85%) 0%, 182 | hsl(0, 0%, 83%) 100% 183 | ); 184 | border: 1px solid hsla(0, 0%, 50%, 0.3); 185 | font-size: 1em; 186 | color: hsl(0, 0%, 50%); 187 | border-radius: 15%; 188 | line-height: 1; 189 | } 190 | 191 | .stork-wrapper .stork-close-button svg { 192 | width: 0.8em; 193 | position: relative; 194 | top: 1px; 195 | margin: auto; 196 | } 197 | 198 | .stork-wrapper .stork-close-button:hover { 199 | background: hsla(0, 0%, 78%); 200 | cursor: pointer; 201 | } 202 | 203 | .stork-wrapper .stork-close-button:active { 204 | background: hsla(0, 0%, 65%); 205 | } 206 | -------------------------------------------------------------------------------- /themes/dark.css: -------------------------------------------------------------------------------- 1 | .stork-wrapper-dark { 2 | position: relative; 3 | font-family: inherit; 4 | box-sizing: border-box; 5 | font-size: 1em; 6 | 7 | --stork-blue-2: #a5d8ff; 8 | 9 | --stork-violet-4: #9775fa; 10 | --stork-violet-5: #845ef7; 11 | --stork-violet-10: #52366d; 12 | 13 | --stork-lime-3: #c0eb75; 14 | 15 | --stork-gray-1: #f1f3f5; 16 | --stork-gray-3: #dee2e6; 17 | --stork-gray-7: #495057; 18 | --stork-gray-8: #343a40; 19 | 20 | --stork-border-color: var(--stork-gray-7); 21 | --stork-background-color: var(--stork-gray-8); 22 | --stork-text-color: var(--stork-gray-1); 23 | 24 | --stork-input-border-radius: 8px; 25 | } 26 | 27 | .stork-wrapper-dark *, 28 | .stork-wrapper-dark *:before, 29 | .stork-wrapper-dark *:after { 30 | box-sizing: border-box; 31 | } 32 | 33 | .stork-wrapper-dark .stork-input { 34 | width: 100%; 35 | height: 2.4em; 36 | font-size: 1em; 37 | padding: 0.4em 0.8em; 38 | position: relative; 39 | box-shadow: inset 0 0.1em 0.3em hsla(0, 0%, 0%, 0.1); 40 | border: 1px solid var(--stork-border-color); 41 | border-radius: var(--stork-input-border-radius); 42 | background-color: var(--stork-background-color); 43 | color: var(--stork-text-color); 44 | font-family: inherit; 45 | } 46 | 47 | .stork-wrapper-dark .stork-input:focus { 48 | outline: none; 49 | } 50 | 51 | .stork-wrapper-dark .stork-progress { 52 | position: absolute; 53 | display: block; 54 | content: ""; 55 | bottom: 1px; 56 | background-color: var(--stork-violet-5); 57 | box-shadow: 0 0 8px var(--stork-violet-4); 58 | height: 1px; 59 | transition: width 0.25s ease, opacity 0.4s ease 0.4s; 60 | margin-left: var(--stork-input-border-radius); 61 | max-width: calc(100% - 2 * var(--stork-input-border-radius)); 62 | } 63 | 64 | .stork-wrapper-dark .stork-output { 65 | position: absolute; 66 | width: 100%; 67 | margin-top: 0.5em; 68 | border-radius: 4px; 69 | display: flex; 70 | flex-direction: column; 71 | z-index: 100; 72 | color: var(--stork-text-color); 73 | font-weight: 400; 74 | font-family: inherit; 75 | } 76 | 77 | .stork-wrapper-dark .stork-attribution a:link, 78 | .stork-wrapper-dark .stork-attribution a:visited { 79 | color: var(--stork-blue-2); 80 | } 81 | 82 | .stork-wrapper-dark .stork-output-visible { 83 | border: 1px solid var(--stork-border-color); 84 | box-shadow: 0px 0.4px 2.2px rgba(0, 0, 0, 0.011), 85 | 0px 1px 5.3px rgba(0, 0, 0, 0.016), 0px 1.9px 10px rgba(0, 0, 0, 0.02), 86 | 0px 3.4px 17.9px rgba(0, 0, 0, 0.024), 0px 6.2px 33.4px rgba(0, 0, 0, 0.029), 87 | 0px 15px 80px rgba(0, 0, 0, 0.04); 88 | background: var(--stork-background-color); 89 | } 90 | 91 | .stork-wrapper-dark .stork-message { 92 | width: 100%; 93 | padding: 0.5em 1em; 94 | color: var(--stork-text-color); 95 | } 96 | 97 | .stork-wrapper-dark .stork-attribution { 98 | width: 100%; 99 | padding: 0.5em 1em; 100 | font-size: 0.8em; 101 | color: var(--stork-text-color); 102 | } 103 | 104 | .stork-wrapper-dark .stork-results { 105 | margin: 0; 106 | padding: 0; 107 | width: 100%; 108 | list-style-type: none; 109 | max-height: 25em; 110 | overflow-y: scroll; 111 | border-top: 1px solid var(--stork-border-color); 112 | border-bottom: 1px solid var(--stork-border-color); 113 | box-shadow: inset 0em 0.7em 0.2em -0.5em hsla(0, 0%, 0%, 0.08), 114 | inset 0em -0.7em 0.2em -0.5em hsla(0, 0%, 0%, 0.08); 115 | } 116 | 117 | .stork-wrapper-dark .stork-result:not(:last-child) { 118 | border-bottom: 1px solid var(--stork-border-color); 119 | } 120 | 121 | .stork-wrapper-dark .stork-result.selected { 122 | background: var(--stork-violet-10); 123 | } 124 | 125 | .stork-wrapper-dark .stork-result a:link { 126 | padding: 1em; 127 | display: block; 128 | color: currentcolor; 129 | text-decoration: none; 130 | } 131 | 132 | .stork-wrapper-dark .stork-result p { 133 | margin: 0; 134 | } 135 | 136 | .stork-wrapper-dark .stork-title { 137 | font-weight: bold; 138 | font-size: 0.95em; 139 | margin: 0; 140 | color: var(--stork-text-color); 141 | 142 | /* Flexbox container for the title and the score, when debugging */ 143 | display: flex; 144 | justify-content: space-between; 145 | } 146 | 147 | .stork-wrapper-dark .stork-excerpt-container { 148 | margin-top: 0.75em; 149 | } 150 | 151 | .stork-wrapper-dark .stork-excerpt { 152 | font-size: 0.8em; 153 | line-height: 1; 154 | margin: 0; 155 | color: var(--stork-gray-3); 156 | 157 | /* Flexbox container for the title and the score, when debugging */ 158 | display: flex; 159 | justify-content: space-between; 160 | } 161 | 162 | .stork-wrapper-dark .stork-excerpt:not(:last-of-type) { 163 | margin-bottom: 0.6em; 164 | } 165 | 166 | .stork-wrapper-dark .stork-highlight { 167 | background-color: var(--stork-lime-3); 168 | padding: 0 0.1em; 169 | } 170 | 171 | .stork-wrapper-dark .stork-error { 172 | outline: 2px solid #c92a2a; 173 | } 174 | 175 | .stork-wrapper-dark .stork-close-button { 176 | position: absolute; 177 | /* bottom: 0; */ 178 | right: 0; 179 | margin: 0.7em 0.6em; 180 | height: 1.2em; 181 | width: 1.2em; 182 | padding: 0px; 183 | background: linear-gradient( 184 | to bottom, 185 | hsl(0, 0%, 85%) 0%, 186 | hsl(0, 0%, 83%) 100% 187 | ); 188 | border: 1px solid hsla(0, 0%, 50%, 0.3); 189 | font-size: 1em; 190 | color: hsl(0, 0%, 50%); 191 | border-radius: 15%; 192 | line-height: 1; 193 | } 194 | 195 | .stork-wrapper-dark .stork-close-button svg { 196 | width: 0.8em; 197 | position: relative; 198 | top: 1px; 199 | margin: auto; 200 | } 201 | 202 | .stork-wrapper-dark .stork-close-button:hover { 203 | background: hsla(0, 0%, 78%); 204 | cursor: pointer; 205 | } 206 | 207 | .stork-wrapper-dark .stork-close-button:active { 208 | background: hsla(0, 0%, 65%); 209 | } 210 | -------------------------------------------------------------------------------- /themes/edible-dark.css: -------------------------------------------------------------------------------- 1 | .stork-wrapper-edible-dark { 2 | position: relative; 3 | font-family: inherit; 4 | box-sizing: border-box; 5 | font-size: 1em; 6 | 7 | --stork-blue-2: #a5d8ff; 8 | 9 | --stork-violet-4: #9775fa; 10 | --stork-violet-5: #845ef7; 11 | --stork-violet-10: #52366d; 12 | 13 | --stork-lime-3: #c0eb75; 14 | 15 | --stork-gray-1: #f1f3f5; 16 | --stork-gray-3: #dee2e6; 17 | --stork-gray-7: #495057; 18 | --stork-gray-8: #343a40; 19 | 20 | --stork-border-color: var(--stork-gray-7); 21 | --stork-background-color: var(--stork-gray-8); 22 | --stork-text-color: var(--stork-gray-1); 23 | } 24 | 25 | .stork-wrapper-edible-dark *, 26 | .stork-wrapper-edible-dark *:before, 27 | .stork-wrapper-edible-dark *:after { 28 | box-sizing: border-box; 29 | } 30 | 31 | .stork-wrapper-edible-dark .stork-input { 32 | width: 100%; 33 | height: 2.4em; 34 | font-size: 1em; 35 | padding: 0.4em 0.8em; 36 | position: relative; 37 | box-shadow: inset 0 0.1em 0.3em hsla(0, 0%, 0%, 0.1); 38 | border: 1px solid var(--stork-border-color); 39 | border-radius: 8px; 40 | background-color: var(--stork-background-color); 41 | color: var(--stork-text-color); 42 | font-family: inherit; 43 | } 44 | 45 | .stork-wrapper-edible-dark .stork-input:focus { 46 | outline: none; 47 | } 48 | 49 | .stork-wrapper-edible-dark .stork-progress { 50 | position: absolute; 51 | display: block; 52 | content: ""; 53 | transition: width 0.25s ease, opacity 0.4s ease 0.4s; 54 | box-shadow: none; 55 | background: linear-gradient( 56 | #8bb7daff, 57 | #a3cff6ff, 58 | #a8d0f5ff, 59 | #a8d0f5ff, 60 | #a3cff6ff, 61 | #98ccf6ff, 62 | #8cc9f8ff, 63 | #82c6f8ff, 64 | #77c4f8ff, 65 | #77c4f8ff, 66 | #81c5f8ff, 67 | #88c7f8ff, 68 | #93c9f7ff, 69 | #9ccdf6ff, 70 | #a3d0f5ff, 71 | #a8d1f5ff, 72 | #add3f4ff, 73 | #add3f4ff, 74 | #a9d1f5ff, 75 | #a8d1f5ff 76 | ); 77 | border-radius: 10px; /* input border radius + 2 */ 78 | border: 1px solid #0000; 79 | height: 2.4em; 80 | bottom: 0; 81 | pointer-events: none; 82 | background-repeat: no-repeat; 83 | border-top-right-radius: 0; 84 | border-bottom-right-radius: 0; 85 | mix-blend-mode: hard-light; 86 | filter: brightness(0.75); 87 | } 88 | 89 | .stork-wrapper-edible-dark .stork-output { 90 | position: absolute; 91 | width: 100%; 92 | margin-top: 0.5em; 93 | border-radius: 4px; 94 | display: flex; 95 | flex-direction: column; 96 | z-index: 100; 97 | color: var(--stork-text-color); 98 | font-weight: 400; 99 | font-family: inherit; 100 | } 101 | 102 | .stork-wrapper-edible-dark .stork-attribution a:link, 103 | .stork-wrapper-edible-dark .stork-attribution a:visited { 104 | color: var(--stork-blue-2); 105 | } 106 | 107 | .stork-wrapper-edible-dark .stork-output-visible { 108 | border: 1px solid var(--stork-border-color); 109 | box-shadow: 0px 0.4px 2.2px rgba(0, 0, 0, 0.011), 110 | 0px 1px 5.3px rgba(0, 0, 0, 0.016), 0px 1.9px 10px rgba(0, 0, 0, 0.02), 111 | 0px 3.4px 17.9px rgba(0, 0, 0, 0.024), 0px 6.2px 33.4px rgba(0, 0, 0, 0.029), 112 | 0px 15px 80px rgba(0, 0, 0, 0.04); 113 | background: var(--stork-background-color); 114 | } 115 | 116 | .stork-wrapper-edible-dark .stork-message { 117 | width: 100%; 118 | padding: 0.5em 1em; 119 | color: var(--stork-text-color); 120 | } 121 | 122 | .stork-wrapper-edible-dark .stork-attribution { 123 | width: 100%; 124 | padding: 0.5em 1em; 125 | font-size: 0.8em; 126 | color: var(--stork-text-color); 127 | } 128 | 129 | .stork-wrapper-edible-dark .stork-results { 130 | margin: 0; 131 | padding: 0; 132 | width: 100%; 133 | list-style-type: none; 134 | max-height: 25em; 135 | overflow-y: scroll; 136 | border-top: 1px solid var(--stork-border-color); 137 | border-bottom: 1px solid var(--stork-border-color); 138 | box-shadow: inset 0em 0.7em 0.2em -0.5em hsla(0, 0%, 0%, 0.08), 139 | inset 0em -0.7em 0.2em -0.5em hsla(0, 0%, 0%, 0.08); 140 | } 141 | 142 | .stork-wrapper-edible-dark .stork-result:not(:last-child) { 143 | border-bottom: 1px solid var(--stork-border-color); 144 | } 145 | 146 | .stork-wrapper-edible-dark .stork-result.selected { 147 | background: var(--stork-violet-10); 148 | } 149 | 150 | .stork-wrapper-edible-dark .stork-result a:link { 151 | padding: 1em; 152 | display: block; 153 | color: currentcolor; 154 | text-decoration: none; 155 | } 156 | 157 | .stork-wrapper-edible-dark .stork-result p { 158 | margin: 0; 159 | } 160 | 161 | .stork-wrapper-edible-dark .stork-title { 162 | font-weight: bold; 163 | font-size: 0.95em; 164 | margin: 0; 165 | color: var(--stork-text-color); 166 | 167 | /* Flexbox container for the title and the score, when debugging */ 168 | display: flex; 169 | justify-content: space-between; 170 | } 171 | 172 | .stork-wrapper-edible-dark .stork-excerpt-container { 173 | margin-top: 0.75em; 174 | } 175 | 176 | .stork-wrapper-edible-dark .stork-excerpt { 177 | font-size: 0.8em; 178 | line-height: 1; 179 | margin: 0; 180 | color: var(--stork-gray-3); 181 | 182 | /* Flexbox container for the title and the score, when debugging */ 183 | display: flex; 184 | justify-content: space-between; 185 | } 186 | 187 | .stork-wrapper-edible-dark .stork-excerpt:not(:last-of-type) { 188 | margin-bottom: 0.6em; 189 | } 190 | 191 | .stork-wrapper-edible-dark .stork-highlight { 192 | background-color: var(--stork-lime-3); 193 | padding: 0 0.1em; 194 | } 195 | 196 | .stork-wrapper-edible-dark .stork-error { 197 | outline: 2px solid #c92a2a; 198 | } 199 | 200 | .stork-wrapper-edible-dark .stork-close-button { 201 | position: absolute; 202 | /* bottom: 0; */ 203 | right: 0; 204 | margin: 0.7em 0.6em; 205 | height: 1.2em; 206 | width: 1.2em; 207 | padding: 0px; 208 | background: linear-gradient( 209 | to bottom, 210 | hsl(0, 0%, 85%) 0%, 211 | hsl(0, 0%, 83%) 100% 212 | ); 213 | border: 1px solid hsla(0, 0%, 50%, 0.3); 214 | font-size: 1em; 215 | color: hsl(0, 0%, 50%); 216 | border-radius: 15%; 217 | line-height: 1; 218 | } 219 | 220 | .stork-wrapper-edible-dark .stork-close-button svg { 221 | width: 0.8em; 222 | position: relative; 223 | top: 1px; 224 | margin: auto; 225 | } 226 | 227 | .stork-wrapper-edible-dark .stork-close-button:hover { 228 | background: hsla(0, 0%, 78%); 229 | cursor: pointer; 230 | } 231 | 232 | .stork-wrapper-edible-dark .stork-close-button:active { 233 | background: hsla(0, 0%, 65%); 234 | } 235 | -------------------------------------------------------------------------------- /themes/edible.css: -------------------------------------------------------------------------------- 1 | .stork-wrapper-edible { 2 | position: relative; 3 | font-family: inherit; 4 | box-sizing: border-box; 5 | font-size: 1em; 6 | 7 | --stork-blue-2: #a5d8ff; 8 | --stork-blue-3: #74c0fc; 9 | --stork-blue-4: #4dabf7; 10 | --stork-blue-5: #339af0; 11 | --stork-blue-7: #1c7ed6; 12 | --stork-gray-8: #343a40; 13 | --stork-gray-9: #212529; 14 | --stork-yellow-2: #ffec99; 15 | 16 | --stork-border-color: hsl(0, 0%, 65%); 17 | --stork-background-color: hsla(0, 0%, 97%); 18 | --stork-text-color: var(var(--stork-gray-9)); 19 | } 20 | 21 | .stork-wrapper-edible *, 22 | .stork-wrapper-edible *:before, 23 | .stork-wrapper-edible *:after { 24 | box-sizing: border-box; 25 | } 26 | 27 | .stork-wrapper-edible .stork-input { 28 | width: 100%; 29 | height: 2.4em; 30 | font-size: 1em; 31 | padding: 0.4em 0.8em; 32 | position: relative; 33 | box-shadow: inset 0 0.1em 0.3em hsla(0, 0%, 0%, 0.1); 34 | border: 1px solid var(--stork-border-color); 35 | border-radius: 8px; 36 | background-color: var(--stork-background-color); 37 | color: var(--stork-text-color); 38 | font-family: inherit; 39 | } 40 | 41 | .stork-wrapper-edible .stork-input:focus { 42 | outline: none; 43 | } 44 | 45 | .stork-wrapper-edible .stork-progress { 46 | position: absolute; 47 | display: block; 48 | content: ""; 49 | transition: width 0.25s ease, opacity 0.4s ease 0.4s; 50 | box-shadow: none; 51 | background: linear-gradient( 52 | #8bb7daff, 53 | #a3cff6ff, 54 | #a8d0f5ff, 55 | #a8d0f5ff, 56 | #a3cff6ff, 57 | #98ccf6ff, 58 | #8cc9f8ff, 59 | #82c6f8ff, 60 | #77c4f8ff, 61 | #77c4f8ff, 62 | #81c5f8ff, 63 | #88c7f8ff, 64 | #93c9f7ff, 65 | #9ccdf6ff, 66 | #a3d0f5ff, 67 | #a8d1f5ff, 68 | #add3f4ff, 69 | #add3f4ff, 70 | #a9d1f5ff, 71 | #a8d1f5ff 72 | ); 73 | border-radius: 10px; /* input border radius + 2 */ 74 | border: 1px solid #0000; 75 | height: 2.4em; 76 | bottom: 0; 77 | mix-blend-mode: multiply; 78 | pointer-events: none; 79 | background-repeat: no-repeat; 80 | border-top-right-radius: 0; 81 | border-bottom-right-radius: 0; 82 | } 83 | 84 | .stork-wrapper-edible .stork-output { 85 | position: absolute; 86 | width: 100%; 87 | margin-top: 0.5em; 88 | border-radius: 4px; 89 | display: flex; 90 | flex-direction: column; 91 | z-index: 100; 92 | color: var(--stork-text-color); 93 | font-weight: 400; 94 | font-family: inherit; 95 | } 96 | 97 | .stork-wrapper-edible .stork-attribution a:link, 98 | .stork-wrapper-edible .stork-attribution a:visited { 99 | color: var(--stork-blue-7); 100 | } 101 | 102 | .stork-wrapper-edible .stork-output-visible { 103 | border: 1px solid var(--stork-border-color); 104 | box-shadow: 0px 0.4px 2.2px rgba(0, 0, 0, 0.011), 105 | 0px 1px 5.3px rgba(0, 0, 0, 0.016), 0px 1.9px 10px rgba(0, 0, 0, 0.02), 106 | 0px 3.4px 17.9px rgba(0, 0, 0, 0.024), 0px 6.2px 33.4px rgba(0, 0, 0, 0.029), 107 | 0px 15px 80px rgba(0, 0, 0, 0.04); 108 | background: var(--stork-background-color); 109 | } 110 | 111 | .stork-wrapper-edible .stork-message { 112 | width: 100%; 113 | padding: 0.5em 1em; 114 | color: var(--stork-text-color); 115 | } 116 | 117 | .stork-wrapper-edible .stork-attribution { 118 | width: 100%; 119 | padding: 0.5em 1em; 120 | font-size: 0.8em; 121 | color: var(--stork-text-color); 122 | } 123 | 124 | .stork-wrapper-edible .stork-results { 125 | margin: 0; 126 | padding: 0; 127 | width: 100%; 128 | list-style-type: none; 129 | max-height: 25em; 130 | overflow-y: scroll; 131 | border-top: 1px solid var(--stork-border-color); 132 | border-bottom: 1px solid var(--stork-border-color); 133 | box-shadow: inset 0em 0.7em 0.2em -0.5em hsla(0, 0%, 0%, 0.08), 134 | inset 0em -0.7em 0.2em -0.5em hsla(0, 0%, 0%, 0.08); 135 | } 136 | 137 | .stork-wrapper-edible .stork-result:not(:last-child) { 138 | border-bottom: 1px solid var(--stork-border-color); 139 | } 140 | 141 | .stork-wrapper-edible .stork-result.selected { 142 | background: var(--stork-blue-2); 143 | } 144 | 145 | .stork-wrapper-edible .stork-result a:link { 146 | padding: 1em; 147 | display: block; 148 | color: currentcolor; 149 | text-decoration: none; 150 | } 151 | 152 | .stork-wrapper-edible .stork-result p { 153 | margin: 0; 154 | } 155 | 156 | .stork-wrapper-edible .stork-title { 157 | font-weight: bold; 158 | font-size: 0.95em; 159 | margin: 0; 160 | color: var(--stork-text-color); 161 | 162 | /* Flexbox container for the title and the score, when debugging */ 163 | display: flex; 164 | justify-content: space-between; 165 | } 166 | 167 | .stork-wrapper-edible .stork-excerpt-container { 168 | margin-top: 0.75em; 169 | } 170 | 171 | .stork-wrapper-edible .stork-excerpt { 172 | font-size: 0.8em; 173 | line-height: 1; 174 | margin: 0; 175 | color: var(--stork-gray-8); 176 | 177 | /* Flexbox container for the title and the score, when debugging */ 178 | display: flex; 179 | justify-content: space-between; 180 | } 181 | 182 | .stork-wrapper-edible .stork-excerpt:not(:last-of-type) { 183 | margin-bottom: 0.6em; 184 | } 185 | 186 | .stork-wrapper-edible .stork-highlight { 187 | background-color: var(--stork-yellow-2); 188 | padding: 0 0.1em; 189 | } 190 | 191 | .stork-wrapper-edible .stork-error { 192 | outline: 2px solid #c92a2a; 193 | } 194 | 195 | .stork-wrapper-edible .stork-close-button { 196 | position: absolute; 197 | /* bottom: 0; */ 198 | right: 0; 199 | margin: 0.7em 0.6em; 200 | height: 1.2em; 201 | width: 1.2em; 202 | padding: 0px; 203 | background: linear-gradient( 204 | to bottom, 205 | hsl(0, 0%, 85%) 0%, 206 | hsl(0, 0%, 83%) 100% 207 | ); 208 | border: 1px solid hsla(0, 0%, 50%, 0.3); 209 | font-size: 1em; 210 | color: hsl(0, 0%, 50%); 211 | border-radius: 15%; 212 | line-height: 1; 213 | } 214 | 215 | .stork-wrapper-edible .stork-close-button svg { 216 | width: 0.8em; 217 | position: relative; 218 | top: 1px; 219 | margin: auto; 220 | } 221 | 222 | .stork-wrapper-edible .stork-close-button:hover { 223 | background: hsla(0, 0%, 78%); 224 | cursor: pointer; 225 | } 226 | 227 | .stork-wrapper-edible .stork-close-button:active { 228 | background: hsla(0, 0%, 65%); 229 | } 230 | -------------------------------------------------------------------------------- /themes/flat.css: -------------------------------------------------------------------------------- 1 | .stork-wrapper-flat { 2 | position: relative; 3 | font-family: inherit; 4 | box-sizing: border-box; 5 | font-size: 1em; 6 | 7 | --stork-blue-2: #a5d8ff; 8 | --stork-blue-3: #74c0fc; 9 | --stork-blue-4: #4dabf7; 10 | --stork-blue-5: #339af0; 11 | --stork-blue-7: #1c7ed6; 12 | --stork-gray-8: #343a40; 13 | --stork-gray-9: #212529; 14 | --stork-yellow-2: #ffec99; 15 | 16 | --stork-border-color: hsl(0, 0%, 80%); 17 | --stork-background-color: hsla(0, 0%, 97%); 18 | --stork-text-color: var(--stork-gray-9); 19 | 20 | --stork-input-height: 2.4em; 21 | } 22 | 23 | .stork-wrapper-flat *, 24 | .stork-wrapper-flat *:before, 25 | .stork-wrapper-flat *:after { 26 | box-sizing: border-box; 27 | } 28 | 29 | .stork-wrapper-flat .stork-input { 30 | width: 100%; 31 | height: var(--stork-input-height); 32 | font-size: 1em; 33 | padding: 0.4em 0.8em; 34 | position: relative; 35 | border: 2px solid var(--stork-border-color); 36 | border-radius: calc(var(--stork-input-height) / 2); 37 | background-color: var(--stork-background-color); 38 | color: var(--stork-text-color); 39 | font-family: inherit; 40 | } 41 | 42 | .stork-wrapper-flat .stork-input:focus { 43 | outline: none; 44 | } 45 | 46 | .stork-wrapper-flat .stork-progress { 47 | position: absolute; 48 | display: block; 49 | content: ""; 50 | bottom: 1px; 51 | background-color: var(--stork-blue-5); 52 | box-shadow: 0 0 8px var(--stork-blue-4); 53 | height: 1px; 54 | transition: width 0.25s ease, opacity 0.4s ease 0.4s; 55 | margin-left: calc(var(--stork-input-height) / 2); 56 | max-width: calc(100% - var(--stork-input-height)); 57 | } 58 | 59 | .stork-wrapper-flat .stork-output { 60 | position: absolute; 61 | width: 100%; 62 | margin-top: 0.5em; 63 | border-radius: 6px; 64 | display: flex; 65 | flex-direction: column; 66 | z-index: 100; 67 | color: var(--stork-text-color); 68 | font-weight: 400; 69 | font-family: inherit; 70 | } 71 | 72 | .stork-wrapper-flat .stork-attribution a:link, 73 | .stork-wrapper-flat .stork-attribution a:visited { 74 | color: var(--stork-blue-7); 75 | } 76 | 77 | .stork-wrapper-flat .stork-output-visible { 78 | border: 2px solid var(--stork-border-color); 79 | background: var(--stork-background-color); 80 | } 81 | 82 | .stork-wrapper-flat .stork-message { 83 | width: 100%; 84 | padding: 0.5em 1em; 85 | color: var(--stork-text-color); 86 | } 87 | 88 | .stork-wrapper-flat .stork-attribution { 89 | width: 100%; 90 | padding: 0.5em 1em; 91 | font-size: 0.8em; 92 | color: var(--stork-text-color); 93 | } 94 | 95 | .stork-wrapper-flat .stork-results { 96 | margin: 0; 97 | padding: 0; 98 | width: 100%; 99 | list-style-type: none; 100 | max-height: 25em; 101 | overflow-y: scroll; 102 | border-top: 1px solid var(--stork-border-color); 103 | border-bottom: 1px solid var(--stork-border-color); 104 | } 105 | 106 | .stork-wrapper-flat .stork-result:not(:last-child) { 107 | border-bottom: 1px solid var(--stork-border-color); 108 | } 109 | 110 | .stork-wrapper-flat .stork-result.selected { 111 | background: var(--stork-blue-2); 112 | } 113 | 114 | .stork-wrapper-flat .stork-result a:link { 115 | padding: 1em; 116 | display: block; 117 | color: currentcolor; 118 | text-decoration: none; 119 | } 120 | 121 | .stork-wrapper-flat .stork-result p { 122 | margin: 0; 123 | } 124 | 125 | .stork-wrapper-flat .stork-title { 126 | font-weight: bold; 127 | font-size: 0.95em; 128 | margin: 0; 129 | color: var(--stork-text-color); 130 | 131 | /* Flexbox container for the title and the score, when debugging */ 132 | display: flex; 133 | justify-content: space-between; 134 | } 135 | 136 | .stork-wrapper-flat .stork-excerpt-container { 137 | margin-top: 0.75em; 138 | } 139 | 140 | .stork-wrapper-flat .stork-excerpt { 141 | font-size: 0.8em; 142 | line-height: 1; 143 | margin: 0; 144 | color: var(--stork-gray-8); 145 | 146 | /* Flexbox container for the title and the score, when debugging */ 147 | display: flex; 148 | justify-content: space-between; 149 | } 150 | 151 | .stork-wrapper-flat .stork-excerpt:not(:last-of-type) { 152 | margin-bottom: 0.6em; 153 | } 154 | 155 | .stork-wrapper-flat .stork-highlight { 156 | background-color: var(--stork-yellow-2); 157 | padding: 0 0.1em; 158 | } 159 | 160 | .stork-wrapper-flat .stork-error { 161 | outline: 2px solid #c92a2a; 162 | } 163 | 164 | .stork-wrapper-flat .stork-close-button { 165 | position: absolute; 166 | right: 0; 167 | margin: 0.5em 0.5em; 168 | height: 1.4em; 169 | width: 1.4em; 170 | padding: 0px; 171 | background: hsl(0, 0%, 85%); 172 | border: 1px solid hsla(0, 0%, 70%); 173 | font-size: 1em; 174 | color: hsl(0, 0%, 50%); 175 | border-radius: 50%; 176 | line-height: 1; 177 | } 178 | 179 | .stork-wrapper-flat .stork-close-button svg { 180 | width: 11px; 181 | height: 11px; 182 | margin: auto; 183 | } 184 | 185 | .stork-wrapper-flat .stork-close-button:hover { 186 | background: hsla(0, 0%, 78%); 187 | cursor: pointer; 188 | } 189 | 190 | .stork-wrapper-flat .stork-close-button:active { 191 | background: hsla(0, 0%, 65%); 192 | } 193 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "outDir": "./built", 4 | "allowJs": true, 5 | "target": "es5", 6 | "noImplicitAny": true, 7 | "strictNullChecks": true, 8 | "sourceMap": true 9 | }, 10 | "include": ["./js/**/*", "./pkg/*.js"] 11 | } 12 | -------------------------------------------------------------------------------- /webpack.common.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | const CopyPlugin = require("copy-webpack-plugin"); 3 | const { CleanWebpackPlugin } = require("clean-webpack-plugin"); 4 | const { version } = require("./package.json"); 5 | const { DefinePlugin } = require("webpack"); 6 | 7 | module.exports = { 8 | resolve: { 9 | extensions: [".ts", ".tsx", ".js"] 10 | }, 11 | entry: { 12 | index: "./js/main.ts" 13 | }, 14 | output: { 15 | path: path.resolve(__dirname, "dist"), 16 | filename: "stork.js", 17 | library: "stork" 18 | }, 19 | experiments: { 20 | asyncWebAssembly: true 21 | }, 22 | devtool: "inline-source-map", 23 | plugins: [ 24 | new DefinePlugin({ 25 | "process.env.VERSION": JSON.stringify(version) 26 | }), 27 | new CleanWebpackPlugin(), 28 | new CopyPlugin({ 29 | patterns: [ 30 | { 31 | from: path.resolve(__dirname, "stork-wasm", "pkg", "stork_bg.wasm"), 32 | to: "stork.wasm" 33 | } 34 | ] 35 | }) 36 | ], 37 | module: { 38 | rules: [ 39 | { test: /\.ts?$/, loader: "ts-loader" }, 40 | { test: /\.js$/, loader: "source-map-loader", sideEffects: true }, 41 | { 42 | test: /\.js$/, 43 | loader: require.resolve("@open-wc/webpack-import-meta-loader") 44 | } 45 | ] 46 | } 47 | }; 48 | -------------------------------------------------------------------------------- /webpack.dev.js: -------------------------------------------------------------------------------- 1 | const { merge } = require("webpack-merge"); 2 | const common = require("./webpack.common.js"); 3 | 4 | module.exports = merge(common, { 5 | mode: "development", 6 | devtool: "inline-source-map", 7 | devServer: { 8 | static: "./dist" 9 | } 10 | }); 11 | -------------------------------------------------------------------------------- /webpack.prod.js: -------------------------------------------------------------------------------- 1 | const { merge } = require("webpack-merge"); 2 | const common = require("./webpack.common.js"); 3 | 4 | module.exports = merge(common, { 5 | mode: "production", 6 | devtool: "source-map" 7 | }); 8 | --------------------------------------------------------------------------------