├── .deepsource.toml
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── feature_request.md
    │   └── others.md
    └── workflows
    │   ├── docker.yml
    │   ├── quality.yml
    │   ├── rust.yml
    │   └── windows.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── docker-compose.yml
├── docs
    ├── dev_notes.md
    ├── examples.md
    ├── fire-128.png
    ├── obsidian_example_2023-Feb-05.mp4
    ├── release_notes.md
    ├── release_notes_0.2_2024Sep.md
    ├── screen_record_20220514.mkv
    ├── screenshot_demo.png
    ├── screenshot_demo_640_400.png
    └── server.md
├── example.env
├── fireSeqSearch_addon
    ├── icons
    │   ├── fire-48.png
    │   ├── notebook_logo_32.png
    │   ├── notebook_logo_512.png
    │   └── notebook_logo_64.png
    ├── main.js
    ├── manifest.json
    ├── monkeyscript.user.js
    ├── options.html
    ├── options.js
    ├── violentmonkeyscript.user.js
    └── wordcloud_draw.js
├── fire_seq_search_server
    ├── Cargo.toml
    ├── debug_server.sh
    ├── debug_server_mac.sh
    ├── deny.toml
    ├── obsidian.sh
    ├── run_server.sh
    ├── src
    │   ├── http_client
    │   │   ├── endpoints.rs
    │   │   └── mod.rs
    │   ├── language_tools
    │   │   ├── cn_stopwords.rs
    │   │   ├── mod.rs
    │   │   └── tokenizer.rs
    │   ├── lib.rs
    │   ├── load_notes
    │   │   └── mod.rs
    │   ├── local_llm
    │   │   ├── example_llama_response.json
    │   │   └── mod.rs
    │   ├── main.rs
    │   ├── markdown_parser
    │   │   ├── markdown_to_text.rs
    │   │   ├── mod.rs
    │   │   └── pdf_parser.rs
    │   ├── post_query
    │   │   ├── app_uri.rs
    │   │   ├── highlighter.rs
    │   │   ├── hit_parsed.rs
    │   │   ├── logseq_uri.rs
    │   │   ├── mod.rs
    │   │   └── obsidian_uri.rs
    │   ├── query_engine
    │   │   └── mod.rs
    │   └── word_frequency
    │   │   └── mod.rs
    └── tests
    │   ├── resource
    │       ├── assets
    │       │   └── screenshot_demo_640_400.png
    │       ├── journals
    │       │   ├── 2022_02_26.md
    │       │   └── 2022_08_30.md
    │       ├── logseq
    │       │   └── pages-metadata.edn
    │       └── pages
    │       │   ├── International Language, Past, Present & Future by Walter John Clark.md
    │       │   ├── LATIN FOR BEGINNERS.md
    │       │   ├── Rust.md
    │       │   ├── Softmax.md
    │       │   ├── advanced_query.md
    │       │   ├── blog_thunderbird_zh.md
    │       │   ├── cyrillic.md
    │       │   ├── feditips.md
    │       │   ├── fireSeqSearch___test___5.md
    │       │   ├── 咖啡.md
    │       │   └── 孙子兵法.md
    │   ├── run_render.sh
    │   ├── unit_test_load_notes.rs
    │   ├── unit_test_post_query.rs
    │   └── unit_test_render_block.rs
└── pack_firefox_extension.sh


/.deepsource.toml:
--------------------------------------------------------------------------------
 1 | version = 1
 2 | 
 3 | [[analyzers]]
 4 | name = "javascript"
 5 | enabled = true
 6 | 
 7 | [[analyzers]]
 8 | name = "rust"
 9 | enabled = true
10 | 
11 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a bug report
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **What's the term you're searching**
14 | 
15 | 
16 | **Server-side log and version**
17 | Please review the log before posting. Sensitive or private data may be included
18 | 
19 | 
20 | **Client-side log and version**
21 | Please review the log before posting. Sensitive or private data may be included
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | Note: Only English and Chinese posts are allowed in the issues section. English is preferred.
33 | 请使用英文或中文发 issue.
34 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | Note: Only English and Chinese posts are allowed in the issues section. English is preferred.
29 | 请使用英文或中文发 issue.
30 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/others.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Others
 3 | about: Feel free to post other issues here
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Don't worry if your issue couldn't fit in a template. The template is designed to save your time, instead of enforcing requirements for posting issues.
11 | 


--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
 1 | name: Build/release docker images
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - 'master'
 6 |     paths-ignore:
 7 |       - '**.md'
 8 | jobs:
 9 |   docker:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v3
13 |         with:
14 |           fetch-depth: 0
15 | 
16 |       - name: Login to GitHub Container Registry
17 |         uses: docker/login-action@v1
18 |         with:
19 |           registry: ghcr.io
20 |           username: ${{ github.repository_owner }}
21 |           password: ${{ secrets.GITHUB_TOKEN }}
22 | 
23 |       - name: Lower case for ghcr
24 |         id: ghcr_string
25 |         uses: ASzc/change-string-case-action@v1
26 |         with:
27 |           string: ${{ github.event.repository.full_name }}
28 | 
29 |       - name: Set up Docker Buildx
30 |         uses: docker/setup-buildx-action@v2
31 | 
32 |       - name: Cache Docker layers
33 |         uses: actions/cache@v2
34 |         with:
35 |           path: /tmp/.buildx-cache
36 |           key: ${{ runner.os }}-buildx-${{ github.sha }}
37 |           restore-keys: |
38 |             ${{ runner.os }}-buildx-
39 |       - name: Build and push
40 |         uses: docker/build-push-action@v3
41 |         with:
42 |           context: .
43 |           platforms: linux/amd64
44 |           push: true
45 |           tags: |
46 |             ghcr.io/${{ steps.ghcr_string.outputs.lowercase }}
47 |           cache-from: type=local,src=/tmp/.buildx-cache
48 |           cache-to: type=local,dest=/tmp/.buildx-cache-new
49 | 
50 |       - name: Move cache
51 |         run: |
52 |           rm -rf /tmp/.buildx-cache
53 |           mv /tmp/.buildx-cache-new /tmp/.buildx-cache
54 | 


--------------------------------------------------------------------------------
/.github/workflows/quality.yml:
--------------------------------------------------------------------------------
 1 | name: Quality Control
 2 | on: [push, pull_request]
 3 | 
 4 | env:
 5 |   CARGO_TERM_COLOR: always
 6 |   RUSTC_WRAPPER: "sccache"
 7 |   SCCACHE_GHA_ENABLED: "true"
 8 | 
 9 | jobs:
10 |   cargo-deny:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |     - uses: actions/checkout@v4
14 |     - name: Install Rust
15 |       uses: actions-rs/toolchain@v1
16 |       with:
17 |           toolchain: stable
18 |           profile: minimal
19 |           override: true
20 |     - name: Run sccache-cache
21 |       uses: mozilla-actions/sccache-action@v0.0.8
22 |     - name: Get Date
23 |       id: get-date
24 |       run: |
25 |         echo "date=$(/bin/date -u "+%Y%m%d")" >> $GITHUB_OUTPUT
26 |       shell: bash
27 |     - name: Cache cargo registry
28 |       uses: actions/cache@v3
29 |       continue-on-error: false
30 |       with:
31 |         path: |
32 |           ~/.cargo/registry
33 |         key: rust-${{ runner.os }}-${{ matrix.rust }}-${{ hashFiles('**/Cargo.toml') }}-${{ steps.get-date.outputs.date }}-qc
34 |         restore-keys: |
35 |           rust-${{ runner.os }}-${{ matrix.rust }}-${{ hashFiles('**/Cargo.toml') }}-
36 |           rust-${{ runner.os }}
37 |     - name: Install cargo deny
38 |       run: |
39 |           cargo install --locked cargo-deny
40 |     - name: cargo deny license
41 |       run: |
42 |           cd fire_seq_search_server
43 |           cargo deny check licenses
44 |     - name: cargo deny advisories
45 |       run: |
46 |           cd fire_seq_search_server
47 |           cargo deny check advisories          
48 |     - name: clippy
49 |       run: |
50 |           cd fire_seq_search_server
51 |           rustup component add clippy
52 |           cargo clippy
53 | 


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
  1 | name: Linux and macOS
  2 | 
  3 | # Template Reference: https://www.infinyon.com/blog/2021/04/github-actions-best-practices/
  4 | on:
  5 |   push:
  6 |     branches: [ master, llm_candidate ]
  7 |   pull_request:
  8 |     branches: [ master, llm_candidate ]
  9 | 
 10 | env:
 11 |   CARGO_TERM_COLOR: always
 12 |   RUSTC_WRAPPER: "sccache"
 13 |   SCCACHE_GHA_ENABLED: "true"
 14 | 
 15 | jobs:
 16 |   build:
 17 |     name: Cargo test  (${{ matrix.rust }}) (${{ matrix.os }})
 18 |     runs-on: ${{ matrix.os }}
 19 |     strategy:
 20 |       fail-fast: false
 21 |       matrix:
 22 |         os: [ubuntu-latest, macos-13, macos-14]
 23 |         rust: [stable]
 24 |     steps:
 25 |       - uses: actions/checkout@v4
 26 |       - name: Run sccache-cache
 27 |         uses: mozilla-actions/sccache-action@v0.0.8
 28 |       - name: Install Rust ${{ matrix.rust }}
 29 |         uses: actions-rs/toolchain@v1
 30 |         with:
 31 |           toolchain: ${{ matrix.rust }}
 32 |           profile: minimal
 33 |           override: true
 34 |       - name: Run sccache-cache
 35 |         uses: mozilla-actions/sccache-action@v0.0.8
 36 |       - name: Get Date
 37 |         id: get-date
 38 |         run: |
 39 |           echo "date=$(/bin/date -u "+%Y%m%d")" >> $GITHUB_OUTPUT
 40 |         shell: bash
 41 |       - name: Cache cargo registry
 42 |         uses: actions/cache@v3
 43 |         continue-on-error: false
 44 |         with:
 45 |           path: |
 46 |             ~/.cargo/registry
 47 |           key: rust-${{ runner.os }}-${{ matrix.rust }}-${{ hashFiles('**/Cargo.toml') }}-${{ steps.get-date.outputs.date }}-build
 48 |           restore-keys: |
 49 |             rust-${{ runner.os }}-${{ matrix.rust }}-${{ hashFiles('**/Cargo.toml') }}-
 50 |             rust-${{ runner.os }}
 51 | # Real tests starts here
 52 |       - name: Check version
 53 |         run: rustup --version && rustc --version && cargo --version
 54 |       - name: Install rustfmt
 55 |         run: |
 56 |           rustup component add rustfmt
 57 |           which rustfmt && rustfmt --version
 58 |       - name: Build
 59 |         run: |
 60 |           cd fire_seq_search_server
 61 |           cargo build --verbose
 62 |       - name: Run tests
 63 |         run: |
 64 |           cd fire_seq_search_server
 65 |           cargo test --verbose
 66 |       - name: Install
 67 |         run: |
 68 |           cd fire_seq_search_server
 69 |           cargo install --path .
 70 |       - name: Package
 71 |         run: |
 72 |           cd fire_seq_search_server
 73 |           cargo package --verbose
 74 |       - name: Run sccache stat for check
 75 |         shell: bash
 76 |         run: ${SCCACHE_PATH} --show-stats
 77 | 
 78 | 
 79 |   release:
 80 |     needs: build
 81 |     runs-on: ubuntu-latest
 82 |     steps:
 83 |       - uses: actions/checkout@v2
 84 |       - name: Install Rust stable
 85 |         uses: actions-rs/toolchain@v1
 86 |         with:
 87 |           toolchain: stable
 88 |           profile: minimal
 89 |           override: true
 90 |       - name: Run sccache-cache
 91 |         uses: mozilla-actions/sccache-action@v0.0.8
 92 |       - name: Get Date
 93 |         id: get-date
 94 |         run: |
 95 |           echo "date=$(/bin/date -u "+%Y%m%d")" >> $GITHUB_OUTPUT
 96 |         shell: bash
 97 |       - name: Cache cargo registry
 98 |         uses: actions/cache@v3
 99 |         continue-on-error: false
100 |         with:
101 |           path: |
102 |             ~/.cargo/registry
103 |           key: rust-${{ runner.os }}-${{ matrix.rust }}-${{ hashFiles('**/Cargo.toml') }}-${{ steps.get-date.outputs.date }}-build
104 |           restore-keys: |
105 |             rust-${{ runner.os }}-${{ matrix.rust }}-${{ hashFiles('**/Cargo.toml') }}-
106 |             rust-${{ runner.os }}
107 |       - name: Build
108 |         run: |
109 |           cd fire_seq_search_server
110 |           cargo build --verbose
111 |       - name: Save Artifact
112 |         run: |
113 |           mkdir builds
114 |           mv fire_seq_search_server/target/debug/fire_seq_search_server builds
115 |       - name: Upload Artifact
116 |         uses: actions/upload-artifact@v4
117 |         with:
118 |           name: logseq-${{ runner.os }}-builds
119 |           path: builds
120 |       - name: Run sccache stat for check
121 |         shell: bash
122 |         run: ${SCCACHE_PATH} --show-stats
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/.github/workflows/windows.yml:
--------------------------------------------------------------------------------
  1 | # This file is based on https://github.com/rust-lang/rustfmt/blob/master/.github/workflows/windows.yml
  2 | # rustfmt is licensed in MIT Copyright (c) 2016-2021 The Rust Project Developers https://github.com/rust-lang/rustfmt/blob/master/LICENSE-MIT
  3 | 
  4 | 
  5 | name: Windows
  6 | on:
  7 |   push:
  8 |     branches: [ master ]
  9 |   pull_request:
 10 |     branches: [ master ]
 11 | 
 12 | env:
 13 |   CARGO_TERM_COLOR: always
 14 |   RUSTC_WRAPPER: "sccache"
 15 |   SCCACHE_GHA_ENABLED: "true"
 16 | 
 17 | 
 18 | jobs:
 19 |   build:
 20 |     runs-on: windows-latest
 21 |     name: (${{ matrix.target }}, ${{ matrix.cfg_release_channel }})
 22 |     env:
 23 |       CFG_RELEASE_CHANNEL: ${{ matrix.cfg_release_channel }}
 24 |     strategy:
 25 |       fail-fast: false
 26 |       matrix:
 27 |         target: [
 28 |           i686-pc-windows-msvc,
 29 |           x86_64-pc-windows-msvc,
 30 |           i686-pc-windows-gnu,
 31 |           x86_64-pc-windows-gnu,
 32 |         ]
 33 |         cfg_release_channel: [stable]
 34 | 
 35 |     steps:
 36 |       # The Windows runners have autocrlf enabled by default
 37 |       # which causes failures for some of rustfmt's line-ending sensitive tests
 38 |       - name: disable git eol translation
 39 |         run: git config --global core.autocrlf false
 40 |       - name: checkout
 41 |         uses: actions/checkout@v3
 42 | 
 43 |         # Run build
 44 |       - name: Install Rustup using win.rustup.rs
 45 |         run: |
 46 |           # Disable the download progress bar which can cause perf issues
 47 |           $ProgressPreference = "SilentlyContinue"
 48 |           Invoke-WebRequest https://win.rustup.rs/ -OutFile rustup-init.exe
 49 |           .\rustup-init.exe -y --default-host=x86_64-pc-windows-msvc --default-toolchain=none
 50 |           del rustup-init.exe
 51 |           rustup target add ${{ matrix.target }}
 52 |         shell: powershell
 53 |       - name: Run sccache-cache
 54 |         uses: mozilla-actions/sccache-action@v0.0.8
 55 |       - name: Get Date
 56 |         id: get-date
 57 |         run: |
 58 |           echo "date=$(/bin/date -u "+%Y%m%d")" >> $GITHUB_OUTPUT
 59 |         shell: bash
 60 |       - name: Cache cargo registry
 61 |         uses: actions/cache@v3
 62 |         continue-on-error: false
 63 |         with:
 64 |           path: |
 65 |             ~/.cargo/registry
 66 |           key: rust-${{ runner.os }}-${{ matrix.target }}-${{ hashFiles('**/Cargo.toml') }}-${{ steps.get-date.outputs.date }}-build
 67 |           restore-keys: |
 68 |             rust-${{ runner.os }}-${{ matrix.target }}-${{ hashFiles('**/Cargo.toml') }}
 69 |             rust-${{ runner.os }}
 70 | 
 71 |       - name: build
 72 |         run: |
 73 |           rustc -Vv
 74 |           cargo -V
 75 |           cd fire_seq_search_server
 76 |           cargo build
 77 |         shell: cmd
 78 | 
 79 |       - name: test
 80 |         run: |
 81 |           cd fire_seq_search_server
 82 |           cargo test
 83 |         shell: cmd
 84 |       - name: Package
 85 |         run: |
 86 |           cd fire_seq_search_server
 87 |           cargo package --verbose
 88 |         shell: cmd
 89 |       - name: Run sccache stat for check
 90 |         shell: bash
 91 |         run: ${SCCACHE_PATH} --show-stats
 92 | 
 93 | 
 94 |   release:
 95 |     needs: build
 96 |     runs-on: windows-latest
 97 |     name: Build for (${{ matrix.target }} with ${{ matrix.cfg_release_channel }})
 98 |     env:
 99 |       CFG_RELEASE_CHANNEL: ${{ matrix.cfg_release_channel }}
100 |     strategy:
101 |       fail-fast: false
102 |       matrix:
103 |         target: [
104 |           i686-pc-windows-msvc,
105 |           x86_64-pc-windows-msvc,
106 |         ]
107 |         cfg_release_channel: [ stable ]
108 |     steps:
109 |       - name: disable git eol translation
110 |         run: git config --global core.autocrlf false
111 |       - name: checkout
112 |         uses: actions/checkout@v3
113 |       - name: Install Rustup using win.rustup.rs
114 |         run: |
115 |           # Disable the download progress bar which can cause perf issues
116 |           $ProgressPreference = "SilentlyContinue"
117 |           Invoke-WebRequest https://win.rustup.rs/ -OutFile rustup-init.exe
118 |           .\rustup-init.exe -y --default-host=x86_64-pc-windows-msvc --default-toolchain=none
119 |           del rustup-init.exe
120 |           rustup target add ${{ matrix.target }}
121 |         shell: powershell
122 |       - name: Run sccache-cache
123 |         uses: Xuanwo/sccache-action@c94e27bef21ab3fb4a5152c8a878c53262b4abb0
124 |         with:
125 |           version: "v0.4.0-pre.6"
126 |       - name: Get Date
127 |         id: get-date
128 |         run: |
129 |           echo "date=$(/bin/date -u "+%Y%m%d")" >> $GITHUB_OUTPUT
130 |         shell: bash
131 |       - name: Cache cargo registry and sccache
132 |         uses: actions/cache@v3
133 |         continue-on-error: false
134 |         with:
135 |           path: |
136 |             ~/.cargo/registry
137 |           key: rust-${{ runner.os }}-${{ matrix.target }}-${{ hashFiles('**/Cargo.toml') }}-${{ steps.get-date.outputs.date }}-build
138 |           restore-keys: |
139 |             rust-${{ runner.os }}-${{ matrix.target }}-${{ hashFiles('**/Cargo.toml') }}
140 |             rust-${{ runner.os }}
141 |       - name: build
142 |         run: |
143 |           rustc -Vv
144 |           cargo -V
145 |           cd fire_seq_search_server
146 |           cargo build  --release
147 |         shell: cmd
148 |       - name: test
149 |         run: |
150 |           cd fire_seq_search_server
151 |           cargo test
152 |         shell: cmd
153 |       - name: Package
154 |         run: |
155 |           cd fire_seq_search_server
156 |           cargo package --verbose
157 |         shell: cmd
158 |       - name: Save Artifact
159 |         run: |
160 |           mkdir builds
161 |           mv fire_seq_search_server/target/debug/fire_seq_search_server.exe builds
162 |         shell: cmd
163 |       - name: Upload Artifact
164 |         uses: actions/upload-artifact@v4
165 |         with:
166 |           name: logseq-${{ runner.os }}-builds
167 |           path: builds
168 |       - name: Run sccache stat for check
169 |         shell: bash
170 |         run: ${SCCACHE_PATH} --show-stats
171 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | fire_seq_search_server/Cargo.lock
  2 | fire_seq_search_server/target
  3 | 
  4 | # === https://github.com/rust-lang/rust/blob/master/.gitignore ===
  5 | 
  6 | ## File system
  7 | .DS_Store
  8 | desktop.ini
  9 | 
 10 | ## Editor
 11 | *.swp
 12 | *.swo
 13 | Session.vim
 14 | .cproject
 15 | .idea
 16 | *.iml
 17 | .vscode
 18 | .project
 19 | .favorites.json
 20 | .settings/
 21 | 
 22 | ## Tool
 23 | .valgrindrc
 24 | .cargo
 25 | # Included because it is part of the test case
 26 | !/src/test/run-make/thumb-none-qemu/example/.cargo
 27 | 
 28 | ## Configuration
 29 | /config.toml
 30 | /Makefile
 31 | config.mk
 32 | config.stamp
 33 | no_llvm_build
 34 | 
 35 | ## Build
 36 | /dl/
 37 | /doc/
 38 | /inst/
 39 | /llvm/
 40 | /mingw-build/
 41 | /build/
 42 | /dist/
 43 | /unicode-downloads
 44 | /target
 45 | /src/tools/x/target
 46 | # Generated by compiletest for incremental
 47 | /tmp/
 48 | # Created by default with `src/ci/docker/run.sh`
 49 | /obj/
 50 | 
 51 | ## Temporary files
 52 | *~
 53 | \#*
 54 | \#*\#
 55 | .#*
 56 | 
 57 | ## Tags
 58 | tags
 59 | tags.*
 60 | TAGS
 61 | TAGS.*
 62 | 
 63 | ## Python
 64 | __pycache__/
 65 | *.py[cod]
 66 | *$py.class
 67 | 
 68 | ## Node
 69 | node_modules
 70 | package-lock.json
 71 | 
 72 | ## Rustdoc GUI tests
 73 | src/test/rustdoc-gui/src/**.lock
 74 | 
 75 | # ==== GitHub ignore list for node ====
 76 | # Logs
 77 | logs
 78 | *.log
 79 | npm-debug.log*
 80 | yarn-debug.log*
 81 | yarn-error.log*
 82 | lerna-debug.log*
 83 | 
 84 | # Diagnostic reports (https://nodejs.org/api/report.html)
 85 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 86 | 
 87 | # Runtime data
 88 | pids
 89 | *.pid
 90 | *.seed
 91 | *.pid.lock
 92 | 
 93 | # Directory for instrumented libs generated by jscoverage/JSCover
 94 | lib-cov
 95 | 
 96 | # Coverage directory used by tools like istanbul
 97 | coverage
 98 | *.lcov
 99 | 
100 | # nyc test coverage
101 | .nyc_output
102 | 
103 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
104 | .grunt
105 | 
106 | # Bower dependency directory (https://bower.io/)
107 | bower_components
108 | 
109 | # node-waf configuration
110 | .lock-wscript
111 | 
112 | # Compiled binary addons (https://nodejs.org/api/addons.html)
113 | build/Release
114 | 
115 | # Dependency directories
116 | node_modules/
117 | jspm_packages/
118 | 
119 | # TypeScript v1 declaration files
120 | typings/
121 | 
122 | # TypeScript cache
123 | *.tsbuildinfo
124 | 
125 | # Optional npm cache directory
126 | .npm
127 | 
128 | # Optional eslint cache
129 | .eslintcache
130 | 
131 | # Microbundle cache
132 | .rpt2_cache/
133 | .rts2_cache_cjs/
134 | .rts2_cache_es/
135 | .rts2_cache_umd/
136 | 
137 | # Optional REPL history
138 | .node_repl_history
139 | 
140 | # Output of 'npm pack'
141 | *.tgz
142 | 
143 | # Yarn Integrity file
144 | .yarn-integrity
145 | 
146 | # dotenv environment variables file
147 | .env
148 | .env.test
149 | 
150 | # parcel-bundler cache (https://parceljs.org/)
151 | .cache
152 | 
153 | # Next.js build output
154 | .next
155 | 
156 | # Nuxt.js build / generate output
157 | .nuxt
158 | dist
159 | 
160 | # Gatsby files
161 | .cache/
162 | # Comment in the public line in if your project uses Gatsby and *not* Next.js
163 | # https://nextjs.org/blog/next-9-1#public-directory-support
164 | # public
165 | 
166 | # vuepress build output
167 | .vuepress/dist
168 | 
169 | # Serverless directories
170 | .serverless/
171 | 
172 | # FuseBox cache
173 | .fusebox/
174 | 
175 | # DynamoDB Local files
176 | .dynamodb/
177 | 
178 | # TernJS port file
179 | .tern-port
180 | 
181 | 
182 | 
183 | /fire_seq_search_server/fire_seq_search_server
184 | /fireSeqSearch_chrome.zip
185 | /fireSeqSearch.zip
186 | /chrome_tmp
187 | 
188 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rust:1.65-buster AS builder
 2 | 
 3 | WORKDIR /fire_seq_search_server
 4 | COPY ./fire_seq_search_server .
 5 | 
 6 | RUN cargo install --path .
 7 | 
 8 | FROM ubuntu:20.04
 9 | COPY --from=builder /usr/local/cargo/bin/fire_seq_search_server /usr/local/bin/fire_seq_search_server
10 | 
11 | ENV RUST_LOG=debug
12 | CMD ["sh", "-c", "fire_seq_search_server --notebook_path $NOTEBOOK_DIR --host 0.0.0.0:3030"]
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021-2023 Zhenbo Li
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | fireSeqSearch: Append Logseq/Obsidian notes while Googling
 2 | 
 3 | Introduction
 4 | --------
 5 | [fireSeqSearch](https://github.com/Endle/fireSeqSearch) is inspired by [Evernote](https://evernote.com)'s browser extension - if we search a term, for example, `softmax` in Google, [fireSeqSearch](https://github.com/Endle/fireSeqSearch) will also search in our personal notebook, and append the hits into Google results.
 6 | 
 7 | More examples at <https://github.com/Endle/fireSeqSearch/blob/master/docs/examples.md>
 8 | 
 9 | 
10 | 
11 | How to use it
12 | ------------------
13 | You need to install **BOTH** the server-side app and the browser extension. The server reads your logseq notebooks in read-only mode, and hosts endpoints at 127.0.0.1:3030.
14 | 
15 | ### Install Browser Extension  
16 | 1. Install latest web extension <https://addons.mozilla.org/en-US/firefox/addon/fireseqsearch/>   
17 | 2. If you're using other browser, you can install userscirpts instead. [Tampermonkey](https://www.tampermonkey.net/) =>  [monkeyscript.user.js](https://github.com/Endle/fireSeqSearch/raw/master/fireSeqSearch_addon/monkeyscript.user.js).  [Violentmonkey](https://violentmonkey.github.io/)  => [violentmonkeyscript.user.js](https://github.com/Endle/fireSeqSearch/blob/master/fireSeqSearch_addon/violentmonkeyscript.user.js)
18 | 
19 | 
20 | ### Install Local Server
21 | 
22 | **Obsidian MD** users: Run `fire_seq_search_server --notebook_path <path> --obsidian-md`. [Example obsidian.sh](https://github.com/Endle/fireSeqSearch/blob/master/fire_seq_search_server/obsidian.sh)  
23 | 
24 | 
25 | #### Windows
26 | Steps:  
27 | 1. Download the latest release at <https://github.com/Endle/fireSeqSearch/releases>
28 | 2. If you're using PowerShell, run `.\fire_seq_search_server.exe  --notebook_path C:\Users\li\logseq_notebook`
29 | 3. If you're using Msys2, run `./fire_seq_search_server --notebook_path /c/Users/li/logseq_notebook`
30 | 4. Please remember to change the path to your notebook
31 | 
32 | #### Linux and macOS
33 | 1. Install rust. See <https://doc.rust-lang.org/cargo/getting-started/installation.html>
34 | 2. `git clone https://github.com/Endle/fireSeqSearch`
35 | 3. `cd fire_seq_search_server && cargo build`
36 | 4. `target/debug/fire_seq_search_server --notebook_path /home/li/my_notebook`
37 | 5. Min rust version: See https://github.com/Endle/fireSeqSearch/blob/master/.github/workflows/rust.yml#L21
38 | 
39 | 
40 | 
41 | License
42 | ----------------
43 | This project (both server and addon) is using MIT license. Some third party library may have other licenses (see source code)
44 | 
45 | 
46 | <a href="https://www.flaticon.com/free-icons/ui" title="ui icons">Ui icons created by manshagraphics - Flaticon</a>
47 | 
48 | 
49 | LOGO link: <https://www.flaticon.com/free-icon/web-browser_7328762>
50 | 
51 | 
52 | LOGO license: Flaticon license
53 | 
54 | 
55 | How it works
56 | ---------
57 | This is what [fireSeqSearch](https://github.com/Endle/fireSeqSearch) does on my logseq notebook. I had to split it into two parts because Firefox extensions are not allowed to access local files.
58 | 
59 | fireSeqSearch has two parts:
60 | 
61 | ### 1. search server
62 | It read all local loseq notebooks, and hosts logseq pages on http://127.0.0.1:3030
63 | 
64 | It provides the API `http://127.0.0.1:3030/query/`
65 | 
66 | 
67 | ### 2. Browser extension
68 | Every time we use search engine, it will fetch `http://127.0.0.1:3030/query/keywords`and append all hits to the web page.
69 | 
70 | 
71 | Similar Projects
72 | --------------
73 | * [karlicoss/promnesia](https://github.com/karlicoss/promnesia)  - [Promnesia](https://github.com/karlicoss/promnesia) is a mature and interesting project, aming a more ambitious goal. [fireSeqSearch](https://github.com/Endle/fireSeqSearch) only does one thing - append logseq hits to search engine results.
74 | * Logseq Copilot - https://chrome.google.com/webstore/detail/logseq-copilot/hihgfcgbmnbomabfdbajlbpnacndeihl
75 | 
76 | Star History
77 | --------
78 | 
79 | 
80 | [![Star History Chart](https://api.star-history.com/svg?repos=Endle/fireSeqSearch&type=Date)](https://star-history.com/#Endle/fireSeqSearch&Date)
81 | 
82 | Provided by <https://star-history.com>
83 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   fire_seq_search_server:
 5 |     image: ghcr.io/endle/fireseqsearch:latest
 6 |     env_file: .env
 7 |     environment:
 8 |       - NOTEBOOK_DIR
 9 |     restart: always
10 |     ports:
11 |       - "127.0.0.1:3030:3030"
12 |     volumes:
13 |       - ${NOTEBOOK_DIR}:${NOTEBOOK_DIR}:Z
14 | 


--------------------------------------------------------------------------------
/docs/dev_notes.md:
--------------------------------------------------------------------------------
 1 | ## How to set manifest.json permission
 2 | 
 3 | I received an answer at <https://matrix.to/#/!CuzZVoCbeoDHsxMCVJ:mozilla.org/$6XNrIyFrumDw4gKExuuaxqNpk_kqsCNmfCicvDQxrjE?via=mozilla.org&via=matrix.org&via=humanoids.be>
 4 | 
 5 | Due to <https://bugzilla.mozilla.org/show_bug.cgi?id=1350523> I should not include port.
 6 | 
 7 | ## Why use local server
 8 | 
 9 | Firefox extension is not allowed to read local files.
10 | 
11 | ## Why insert fireSeqSearchDom before document.body.firstChild
12 | 
13 | I struggle for <google.com>. I tried several dom elements, like `search`, `GyAeWb`, but `insertBefore()` failed.
14 | By naive frontend knowledge is not sufficient to solve it. 2022-Jan-28
15 | 
16 | ## Rust version needed
17 | 
18 | lz4_flex is using newly stabled features. tantivy depends on it.
19 | 
20 | clap-rs even requires newer versions, so I'm only supporting 1.55+. 2022-Feb-05
21 | 
22 | ## Match all Google regions
23 | 
24 | In 0.0.4, I added one extra domain. I think I should find a more elegent way.
25 | 
26 | 
27 | ## CLion's shell is confusing under Windows  
28 | I got `ld: cannot find -lntdll`. No idea why it happens. However, my previous clone is good. Why this terrible OS is so popular?!
29 | 
30 | ## How to handle long paragraph
31 | <https://github.com/Endle/fireSeqSearch/issues/57>
32 | 
33 | On 2022-Nov-13, I tried https://bminixhofer.github.io/nnsplit . This tool is powerful. However, the model is too slow for my purpose. 
34 | 


--------------------------------------------------------------------------------
/docs/examples.md:
--------------------------------------------------------------------------------
 1 | ## LLM
 2 | 
 3 | 2024 Sept 22
 4 | 
 5 | https://github.com/user-attachments/assets/b0a4ca66-0a33-401a-a916-af7a69f2ae7b
 6 | 
 7 | ## ObsidianMD
 8 | 
 9 | [obsidian_example_2023-Feb-05.mp4](
10 | https://user-images.githubusercontent.com/3221521/216853025-5cb82b18-fbcc-438e-8ff6-f791713c6b8b.mp4)
11 | 
12 | 
13 | # Old examples (before highlighter)
14 | 
15 | 
16 | ![screenshot_demo](https://user-images.githubusercontent.com/3221521/168455027-965da612-b783-4d92-83e2-4cd7b4830a43.png)
17 | 
18 | <video src="https://user-images.githubusercontent.com/3221521/168455012-e1183f62-4682-4230-84e7-8a461d8985a0.mp4"></video>
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/fire-128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Endle/fireSeqSearch/143f6168d44148c2fa40ec2e8a512e2a2c44c638/docs/fire-128.png


--------------------------------------------------------------------------------
/docs/obsidian_example_2023-Feb-05.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Endle/fireSeqSearch/143f6168d44148c2fa40ec2e8a512e2a2c44c638/docs/obsidian_example_2023-Feb-05.mp4


--------------------------------------------------------------------------------
/docs/release_notes.md:
--------------------------------------------------------------------------------
 1 | ### 0.1.3  
 2 | 
 3 | #### New Feature: Generate wordcloud.  
 4 | 
 5 | Just visit `http://127.0.0.1:3030/wordcloud`, and fireSeqSearch will generate a wordcloud with your logseq notes. Each word in the cloud is clickable. With a single click, you can search your top-words in search engines and your personal notes simultaneously.  
 6 | 
 7 | [This demo video](https://github.com/Endle/fireSeqSearch/assets/3221521/524fe70d-a128-4393-bd26-bee71871f38e) used `note of greek myth`, created by [yongerG](https://www.douban.com/note/807432536/?_i=8350280BMJZhl7). This note is [licensed with CC-BY-SA-4.0 license](https://github.com/Lihaogx/graph-note-of-greek-myth/blob/main/LICENSE).  
 8 | 
 9 | Thanks to [timdream](https://timdream.org/) and other contributors for the amazing library [wordcloud2.js](https://github.com/timdream/wordcloud2.js). 
10 | 
11 | #### New Feature:  Allow to filter out zotero imported pages [Issue 122](https://github.com/Endle/fireSeqSearch/issues/122)
12 | 
13 | ### 0.1.2  
14 | New server-side feature: [Read and Search PDF contents](https://github.com/Endle/fireSeqSearch/issues/63)! In a logseq page, the PDF link `![title](../assets/doc_000123_0.pdf)` will be parsed, and appended to the document.
15 | 
16 | #### How to use it  
17 | This feature is turned off by default. Adding `--parse-pdf-links` to enable PDF parsing. [See example](https://github.com/Endle/fireSeqSearch/blob/81a9c2fc53ef589e8e63d19467825d63a84bd404/fire_seq_search_server/debug_server.sh#L8)
18 | 
19 | Deficient: Performance. It needs further evaluation.
20 | 
21 | #### Thanks  
22 | The crate [PDF-extract](https://github.com/jrmuizel/pdf-extract) makes this new feature possible. Thanks [Jeff Muizelaar](https://github.com/jrmuizel) and [Joep Meindertsma](https://github.com/joepio) for it.  
23 | 
24 | 
25 | [Clifford Enoc](https://github.com/cliffordx) created this feature request.  
26 | 
27 | 
28 | ### 0.1.1  
29 | This is the first time for bumping the **MINOR version** for a big new feature:
30 | 
31 | ObsidianMD support!
32 | 
33 | Bug fixes with contribution of xxchan.
34 | Dev change: Added sccache with the support of xuanwo.
35 | 
36 | This a server side update.
37 | 
38 | ### 0.0.22
39 | This is both server-side and client-side update.  
40 | 
41 | New feature: [include journal pages in search results](https://github.com/Endle/fireSeqSearch/issues/65). This feature is turned off by default. Use `--enable-journal-query` to enable it.
42 | 
43 | Currently, I haven't figured out an approach to generate the Logseq URI for a specific journal page.
44 | 
45 | ### 0.0.19
46 | This is a server-side update.  
47 | 
48 | 1. Fixed [highlight for Cyrillic letters](https://github.com/Endle/fireSeqSearch/issues/59).  
49 | 2. Improvement: When a paragraph is too long, use its summary (See [Issue 57](https://github.com/Endle/fireSeqSearch/issues/57) and [commit](https://github.com/Endle/fireSeqSearch/commit/fb15a17bb9a47754bb7817891b01f08108c8c952))  
50 | 
51 | ### 0.0.18
52 | Exciting new UI by @phoenixeliot and @yoyurec  
53 | Thank you for your contribution!
54 | 
55 | No change at server side. All you need is to update Firefox extension or user script.
56 | 
57 | ### 0.0.16
58 | 
59 | 1. Experimental support to search summary.
60 | 2. Parse markdown before feeding to tantivy. It expects to reduce false positive in search hits.
61 | 
62 | #### How to enable search summary
63 | 1. Update server and Firefox extension to last version.
64 | 2. Firefox Tools->Settings->Extension->fireSeqSearch, enable "Show Summary"
65 | 
66 | #### Deficient
67 | If the block is very long, for example, you clipped a long article into logseq, then the summary would be hard (or useless) to read. That's why there is a "Hide Summary" button.
68 | 
69 | #### Thanks
70 | @raphlinus and other https://github.com/raphlinus/pulldown-cmark developers  
71 | @arranf and @fbecart for https://github.com/fbecart/markdown_to_text
72 | 
73 | 


--------------------------------------------------------------------------------
/docs/release_notes_0.2_2024Sep.md:
--------------------------------------------------------------------------------
 1 | ### 0.2.1  
 2 | 
 3 | New feature: Note Summarization with Local LLM. 
 4 | 
 5 | What happens locally, what stays locally. 
 6 | 
 7 | #### Run server with local LLM  
 8 | fireSeqSearch facilitates [llamafile](https://github.com/Mozilla-Ocho/llamafile) by [Mozilla](https://github.com/Mozilla-Ocho). 
 9 | 
10 | ```
11 | mkdir -pv ~/.llamafile && cd ~/.llamafile
12 | wget https://huggingface.co/Mozilla/Mistral-7B-Instruct-v0.2-llamafile/resolve/main/mistral-7b-instruct-v0.2.Q4_0.llamafile?download=true
13 | chmod +x mistral-7b-instruct-v0.2.Q4_0.llamafile
14 | ```
15 | 
16 | After that, compile and run fireSeqSearch with LLM   
17 | ```
18 | cargo build --features llm
19 | target/debug/fire_seq_search_server --notebook_path ~/logseq
20 | # Obsidian users
21 | target/debug/fire_seq_search_server --notebook_path ~/obsidian --obsidian-md
22 | ```
23 | 
24 | Finally, update the [Firefox Addon](https://addons.mozilla.org/en-US/firefox/addon/fireseqsearch/).
25 | 
26 | #### Demo Video
27 | https://github.com/user-attachments/assets/b0a4ca66-0a33-401a-a916-af7a69f2ae7b
28 | 
29 | This demo used [AstroWiki](https://github.com/AYelland/AstroWiki_2.0), which is licensed under MIT license.
30 | 


--------------------------------------------------------------------------------
/docs/screen_record_20220514.mkv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Endle/fireSeqSearch/143f6168d44148c2fa40ec2e8a512e2a2c44c638/docs/screen_record_20220514.mkv


--------------------------------------------------------------------------------
/docs/screenshot_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Endle/fireSeqSearch/143f6168d44148c2fa40ec2e8a512e2a2c44c638/docs/screenshot_demo.png


--------------------------------------------------------------------------------
/docs/screenshot_demo_640_400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Endle/fireSeqSearch/143f6168d44148c2fa40ec2e8a512e2a2c44c638/docs/screenshot_demo_640_400.png


--------------------------------------------------------------------------------
/docs/server.md:
--------------------------------------------------------------------------------
 1 | ##### fire_seq_search_server
 2 | 
 3 | Currently, this server is running at hard-coded port <http://127.0.0.1:3030>  (or http://localhost:3030)
 4 | 
 5 | ### Endpoints
 6 | 
 7 | #### GET `/server_info`
 8 | 
 9 | 
10 | #### GET `/query/%s`
11 | 
12 | Returns an array of `hit`s.
13 | 
14 | Schema of `hit`  (**unstable**)  
15 | title: The title of the logseq page  
16 | summary   
17 | score  
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/example.env:
--------------------------------------------------------------------------------
1 | NOTEBOOK_DIR=/path/to/notebook


--------------------------------------------------------------------------------
/fireSeqSearch_addon/icons/fire-48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Endle/fireSeqSearch/143f6168d44148c2fa40ec2e8a512e2a2c44c638/fireSeqSearch_addon/icons/fire-48.png


--------------------------------------------------------------------------------
/fireSeqSearch_addon/icons/notebook_logo_32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Endle/fireSeqSearch/143f6168d44148c2fa40ec2e8a512e2a2c44c638/fireSeqSearch_addon/icons/notebook_logo_32.png


--------------------------------------------------------------------------------
/fireSeqSearch_addon/icons/notebook_logo_512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Endle/fireSeqSearch/143f6168d44148c2fa40ec2e8a512e2a2c44c638/fireSeqSearch_addon/icons/notebook_logo_512.png


--------------------------------------------------------------------------------
/fireSeqSearch_addon/icons/notebook_logo_64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Endle/fireSeqSearch/143f6168d44148c2fa40ec2e8a512e2a2c44c638/fireSeqSearch_addon/icons/notebook_logo_64.png


--------------------------------------------------------------------------------
/fireSeqSearch_addon/main.js:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | // Copyright (c) 2021-2024 Zhenbo Li
  3 | 
  4 | const fireSeqSearchDomId = "fireSeqSearchDom";
  5 | 
  6 | 
  7 | const fireSeqSearchScriptCSS = `
  8 |     #fireSeqSearchDom {
  9 |         margin: 1em 1em 1em 1em;
 10 |         color: var(--theme-col-txt-snippet); /* duckduck color*/
 11 |     }
 12 |     #fireSeqSearchDom.experimentalLayout {
 13 |         position: fixed;
 14 |         top: 140px;
 15 |         right: 12px;
 16 |         width: 200px;
 17 |         background-color: hsla(200, 40%, 96%, .8);
 18 |         font-size: 12px;
 19 |         border-radius: 6px;
 20 |         z-index: 99999;
 21 |     }
 22 |     .fireSeqSearchTitleBar {
 23 |         margin: 0.5em 0;
 24 |     }
 25 |     .hideSummary {
 26 |         margin: 0 1em;
 27 |     }
 28 |     #fireSeqSearchDom ul {
 29 |         margin: 0;
 30 |         padding: 0.6em;
 31 |         border: 1px dotted  gray;
 32 |         list-style: none;
 33 |         line-height: 1.5em;
 34 |     }
 35 |     #fireSeqSearchDom ul li {
 36 |         font-size: 15px;
 37 |     }
 38 |     #fireSeqSearchDom ul li + li {
 39 |         margin-top: 0.4em;
 40 |     }
 41 |     #fireSeqSearchDom ul li a {
 42 |         text-decoration: underline;
 43 |         text-decoration-style: dotted;
 44 |         text-decoration-thickness: 1px;
 45 |         text-underline-offset: 2px;
 46 |     }
 47 |     #fireSeqSearchDom ul li::before {
 48 |         content: ' ';
 49 |         display: inline-block;
 50 |         margin-right: 0.4em;
 51 |         line-height: 1em;
 52 |         width: 1em;
 53 |         height: 1em;
 54 |         transform: translateY(3px);
 55 |         border-radius: 3px;
 56 |         background-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAAAe1BMVEUAKzaFyMiKz88AJjIAKjaHy8sAHiuM0tIAGSgAGykAIC0AESIAFyYAIzAAHCoAFCQAEiBvq6wtVlw0XmQIMDtooqRAbnNdk5VPgYU4Y2lyr7B4trdHc3Z/wcEAHSctUFVimZoaP0hShIcoRksTLjQADBkAABNdjo8AAAzWDdSWAAABBklEQVQ4jc1S7XKDIBCEQ0DEL2oJVqOmiUl8/yfs6XTaePgA2T8w7N5xu3OMvSOESI5ejSnWM1Hqo/lUEa8a708WLyYAgG4zWv+lpQRXsdIDR2hLBGkn8RnazK4nB2+IIO9XQvZ5dsYf4FlHI4StcqiYGqeppvU4u+CogIvaXB56n53WrmBJYSprq5S6wB71ONZM5Cc/AAyuFXEUGF/aDLANg55b6hRRnjX/A6ZCExfTC4+KkBJB6uSrgOtv0iKHHc/hSr3ovUCGcs9bSQS0g7mQGSaSaTLvBNJFSRQ3+JfIfow3L5s7XJyVBR3uR5uZPG7PnsPQXedoJX4hzGNZlnt5VP7G+AHcFwwZX2F8QwAAAABJRU5ErkJggg==);
 57 |         background-repeat: no-repeat;
 58 |         background-size: 16px;
 59 |     }
 60 |     .fireSeqSearchHitSummary {
 61 |         font-size: 0.9em
 62 |     }
 63 |     .fireSeqSearchHitSummary::before {
 64 |         content: "\\00A0::\\00A0";
 65 |     }
 66 |     .fireSeqSearchHighlight {
 67 |         padding: 0 4px;
 68 |         color: black !important;
 69 |         background-color: gold;
 70 |         border-radius: 3px;
 71 |     }
 72 |     `;
 73 | 
 74 | function consoleLogForDebug(message) {
 75 |     console.log(message); //skipcq: JS-0002
 76 | }
 77 | 
 78 | 
 79 | function addGlobalStyle(css) {
 80 |     const head = document.getElementsByTagName("head")[0];
 81 |     if (!head) { return; }
 82 |     const style = document.createElement("style");
 83 |     style.id = "fireSeqSearchScriptCSS";
 84 |     // style.type = "text/css";
 85 |     style.innerHTML = css;
 86 |     head.appendChild(style);
 87 | }
 88 | 
 89 | 
 90 | function createElementWithText(type, text) {
 91 |     const element = document.createElement(type);
 92 |     element.textContent = text;
 93 |     return element;
 94 | }
 95 | 
 96 | 
 97 | function createHrefToLogseq(record, serverInfo) {
 98 |     const name = serverInfo.notebook_name;
 99 | 
100 |     const title = record.title;
101 |     const prettyTitle = title.replaceAll("%2F", "/");
102 | 
103 |     const target = record.logseq_uri || `logseq://graph/${name}?page=${title}`;
104 | 
105 |     const logseqPageLink = document.createElement('a');
106 |     const text = document.createTextNode(prettyTitle);
107 |     logseqPageLink.appendChild(text);
108 |     logseqPageLink.title = prettyTitle;
109 |     logseqPageLink.href = target;
110 |     consoleLogForDebug(logseqPageLink);
111 |     return logseqPageLink;
112 | }
113 | 
114 | 
115 | function checkUserOptions() {
116 |     return Promise.all([
117 |         /*global browser */
118 |         browser.storage.sync.get("debugStr"),
119 |         browser.storage.sync.get("ExperimentalLayout"),
120 |         browser.storage.sync.get("ShowHighlight"),
121 |         browser.storage.sync.get("ShowScore")
122 |     ]).then(function(res) {
123 |         consoleLogForDebug(res);
124 | 
125 |         const options = {
126 |             debugStr: res[0].debugStr,
127 |             ExperimentalLayout: res[1].ExperimentalLayout,
128 |             ShowHighlight: res[2].ShowHighlight,
129 |             ShowScore: res[3].ShowScore
130 |         }
131 |         return options;
132 |     });
133 | }
134 | 
135 | 
136 | function parseRawList(rawSearchResult) {
137 |     const hits = [];
138 |     for (const rawRecord of rawSearchResult) {
139 |         const record = JSON.parse(rawRecord);
140 |         hits.push(record);
141 |     }
142 |     return hits;
143 | }
144 | 
145 | async function processLlmSummary(serverInfo, parsedSearchResult, fireDom) {
146 | 
147 |     const doneListApi = "http://127.0.0.1:3030/llm_done_list";
148 |     let list = await fetch(doneListApi);
149 |     list = await list.text();
150 |     list = JSON.parse(list);
151 | 
152 |     const findByTitle = function(title) {
153 |         const ul = fireDom.querySelector( ".fireSeqSearchHitList" );
154 |         if (ul === null)    return null;
155 |         for (const child of ul.children) {
156 |             const liTitle = child.firstChild.text;
157 |             if (title === liTitle) {
158 |                 return child;
159 |             }
160 |         }
161 |         return null;
162 |     };
163 |     const setLlmResult = function (title, llmSummary) {
164 |         const targetRow = findByTitle(title);
165 |         if (targetRow === null) {
166 |             consoleLogForDebug("Error! Can't find dom for ", title);
167 |             return;
168 |         }
169 |         if (targetRow.querySelector( ".fireSeqSearchLlmSummary" ) != null) {
170 |             consoleLogForDebug("Skip. We have the summary for ", title);
171 |             return;
172 |         }
173 | 
174 |         const summary = createElementWithText("span", "");
175 |         summary.innerHTML = llmSummary;
176 |         summary.classList.add('fireSeqSearchLlmSummary');
177 |         targetRow.appendChild(summary);
178 |     };
179 |     for (const record of parsedSearchResult) {
180 |         const title = record.title;
181 |         if (!list.includes(title)) {
182 |             consoleLogForDebug("Not ready, skip" + title);
183 |             continue;
184 |         }
185 |         // TODO remove hard code port
186 |         const llm_api = "http://127.0.0.1:3030/summarize/" + title;
187 |         let sum = await fetch(llm_api);
188 |         sum = await sum.text();
189 |         setLlmResult(title, sum);
190 |     }
191 | }
192 | 
193 | 
194 | function createFireSeqDom(serverInfo, parsedSearchResult) {
195 |     const count = parsedSearchResult.length;
196 |     const div = document.createElement("div");
197 |     div.setAttribute("id", fireSeqSearchDomId);
198 | 
199 |     const createTitleBarDom = function () {
200 |         const titleBar = createElementWithText("div");
201 |         titleBar.classList.add('fireSeqSearchTitleBar');
202 |         const hitCount = `<span>We found <b>${count.toString()}</b> results in your logseq notebook</span>`;
203 |         titleBar.insertAdjacentHTML("afterbegin",hitCount);
204 | 
205 |         function setSummaryState(cl, state) {
206 |             let prop = 'none';
207 |             if (state) { prop = ''; }
208 |             for (const el of document.querySelectorAll(cl)) {
209 |                 el.style.display=prop;
210 |             }
211 |         }
212 |         let btn = document.createElement("button");
213 |         btn.classList.add("hideSummary");
214 |         let text = document.createTextNode("Hide Summary");
215 |         btn.appendChild(text);
216 |         btn.onclick = function () {
217 |             setSummaryState(".fireSeqSearchHitSummary", false);
218 |             setSummaryState(".fireSeqSearchLlmSummary", false);
219 |         };
220 |         titleBar.appendChild(btn);
221 | 
222 |         btn = document.createElement("button");
223 |         btn.classList.add("showSummary");
224 |         text = document.createTextNode("Summary");
225 |         btn.appendChild(text);
226 |         btn.onclick = function () {
227 |             setSummaryState(".fireSeqSearchHitSummary", true);
228 |             setSummaryState(".fireSeqSearchLlmSummary", false);
229 |         };
230 |         titleBar.appendChild(btn);
231 | 
232 |         btn = document.createElement("button");
233 |         btn.classList.add("showLlm");
234 |         text = document.createTextNode("LLM");
235 |         btn.appendChild(text);
236 |         btn.onclick = function () {
237 |             setSummaryState(".fireSeqSearchHitSummary", false);
238 |             setSummaryState(".fireSeqSearchLlmSummary", true);
239 |             processLlmSummary(serverInfo, parsedSearchResult, div);
240 |         };
241 |         titleBar.appendChild(btn);
242 |         return titleBar;
243 |     };
244 |     const bar = createTitleBarDom();
245 |     div.appendChild(bar);
246 |     return div;
247 | }
248 | 
249 | async function appendResultToSearchResult(serverInfo, parsedSearchResult, dom) {
250 |     const firefoxExtensionUserOption = await checkUserOptions();
251 |     consoleLogForDebug('Loaded user option: ' + JSON.stringify(firefoxExtensionUserOption));
252 | 
253 |     function buildListItems(parsedSearchResult) {
254 |         const hitList = document.createElement("ul");
255 |         hitList.classList.add('fireSeqSearchHitList');
256 |         for (const record of parsedSearchResult) {
257 |             const li =  createElementWithText("li", "");
258 |             li.classList.add('fireSeqSearchHitListItem');
259 |             if (firefoxExtensionUserOption.ShowScore) {
260 |                 const score = createElementWithText("span", String(record.score));
261 |                 li.appendChild(score);
262 |             }
263 |             const href = createHrefToLogseq(record, serverInfo);
264 |             li.appendChild(href);
265 | 
266 |             const summary = createElementWithText("span", "");
267 |             summary.innerHTML = record.summary;
268 |             summary.classList.add('fireSeqSearchHitSummary');
269 |             li.appendChild(summary);
270 | 
271 |             hitList.appendChild(li);
272 |         }
273 |         return hitList;
274 |     }
275 |     const hitList = buildListItems(parsedSearchResult);
276 |     dom.appendChild(hitList);
277 | 
278 |     if (firefoxExtensionUserOption.ExperimentalLayout) {
279 |         // Inspired by https://twitter.com/rockucn
280 |         // https://greasyfork.org/en/scripts/446492-%E6%90%9C%E7%B4%A2%E5%BC%95%E6%93%8E%E5%88%87%E6%8D%A2%E5%99%A8-search-engine-switcher/code
281 | 
282 |         dom.classList.add("experimentalLayout");
283 |     }
284 | 
285 |     function insertDivToWebpage(result) {
286 |         let contextId = "rcnt";
287 |         if (window.location.host.includes("duckduckgo.com")) {
288 |             contextId = "web_content_wrapper";
289 |         }
290 |         if (window.location.host.includes("searx")) { // https://github.com/Endle/fireSeqSearch/issues/103
291 |             contextId = "results";
292 |         }
293 |         if (window.location.host.includes("metager")) { // https://github.com/Endle/fireSeqSearch/issues/127
294 |             contextId = "results";
295 |         }
296 |         document.getElementById(contextId).insertAdjacentElement("beforebegin", result);
297 | 
298 |     }
299 | 
300 |     insertDivToWebpage(dom);
301 | }
302 | 
303 | async function mainProcess(fetchResultArray) {
304 |     consoleLogForDebug("main process");
305 | 
306 |     const serverInfo = fetchResultArray[0];
307 |     const rawSearchResult = fetchResultArray[1];
308 |     consoleLogForDebug(serverInfo);
309 |     const parsedSearchResult = parseRawList(rawSearchResult);
310 | 
311 |     const fireDom = createFireSeqDom(serverInfo, parsedSearchResult);
312 | 
313 |     appendResultToSearchResult(serverInfo, parsedSearchResult, fireDom);
314 | 
315 | }
316 | 
317 | 
318 | function getSearchParameterFromCurrentPage() {
319 |     let searchParam = "";
320 | 
321 |     function getSearchParameterOfSearx() {
322 |         const inputBox = document.getElementById("q");
323 |         return inputBox.value;
324 |     }
325 |     function getSearchParameterOfMetager() {
326 |         const urlParams = new URLSearchParams(window.location.search);
327 |         return urlParams.get('eingabe');
328 |     }
329 | 
330 |     if (window.location.toString().includes("searx")) {
331 |         searchParam = getSearchParameterOfSearx();
332 |     } else if (window.location.toString().includes("metager")) {
333 |         searchParam = getSearchParameterOfMetager();
334 |     } else {
335 |         // https://stackoverflow.com/a/901144/1166518
336 |         const urlParams = new URLSearchParams(window.location.search);
337 |         searchParam = urlParams.get('q');
338 |     }
339 | 
340 |     consoleLogForDebug(`Got search param: ${searchParam}`);
341 |     return searchParam;
342 | }
343 | 
344 | 
345 | 
346 | (function() {
347 |     const searchParameter = getSearchParameterFromCurrentPage();
348 | 
349 |     addGlobalStyle(fireSeqSearchScriptCSS);
350 | 
351 |     //https://gomakethings.com/waiting-for-multiple-all-api-responses-to-complete-with-the-vanilla-js-promise.all-method/
352 |     Promise.all([
353 |         fetch("http://127.0.0.1:3030/server_info"),
354 |         fetch("http://127.0.0.1:3030/query/" + searchParameter)
355 |     ]).then(function (responses) {
356 |         return Promise.all(responses.map(function (response) {return response.json();}));
357 |     }).then(function (data) {
358 |         mainProcess(data);
359 |     }).then((_e) => {
360 |         const highlightedItems = document.querySelectorAll('.fireSeqSearchHighlight');
361 |         consoleLogForDebug(highlightedItems);
362 |         highlightedItems.forEach((element) => {
363 |             element.style.color = 'red';
364 |         });
365 |     }).catch(
366 |         error => {consoleLogForDebug(error)}
367 |     );
368 | 
369 | 
370 | })();
371 | 


--------------------------------------------------------------------------------
/fireSeqSearch_addon/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "manifest_version": 2,
 3 |   "name": "fireSeqSearch",
 4 |   "version": "0.2.2",
 5 | 
 6 |   "description": "Everytime you use the search engine, this plugin will search against your personal logseq notes.",
 7 | 
 8 |   "icons": {
 9 |     "32": "icons/notebook_logo_32.png",
10 |     "64": "icons/notebook_logo_64.png"
11 |   },
12 | 
13 |   "options_ui": {
14 |     "page": "options.html",
15 |     "browser_style": true
16 |   },
17 |   "content_scripts": [
18 |     {
19 |       "matches": [
20 |         "*://*.bing.com/*",
21 |         "*://www.google.com/search*",
22 |         "*://www.google.com.hk/search*",
23 |         "*://duckduckgo.com/*",
24 |         "*://searx.prvcy.eu/search",
25 |         "*://searx.fmac.xyz/search",
26 |         "*://metager.org/*"
27 |       ],
28 |       "js": ["main.js"]
29 |     },
30 |     {
31 |       "matches": [
32 |         "*://127.0.0.1/*"
33 |       ],
34 |       "js": ["wordcloud_draw.js"]
35 |     }
36 |   ],
37 | 
38 |   "permissions": ["*://127.0.0.1/*", "storage"],
39 | 
40 | 
41 |   "browser_specific_settings": {
42 |     "gecko": {
43 |       "id": "{293a97e7-c815-4ce2-a537-87af8818cbc0}",
44 |       "strict_min_version": "99.0"
45 |     }
46 |   }
47 | 
48 | }
49 | 


--------------------------------------------------------------------------------
/fireSeqSearch_addon/monkeyscript.user.js:
--------------------------------------------------------------------------------
  1 | // ==UserScript==
  2 | // @name         fireSeqSearchScript
  3 | // @namespace    https://github.com/Endle/fireSeqSearch
  4 | // @version      0.0.18
  5 | // @description  Everytime you use the search engine, FireSeqSearch searches your personal logseq notes.
  6 | // @author       Zhenbo Li
  7 | // @match        https://www.google.com/search*
  8 | // @match        https://duckduckgo.com/?q=*
  9 | // @icon         https://www.google.com/s2/favicons?sz=64&domain=tampermonkey.net
 10 | // @grant GM_xmlhttpRequest
 11 | // ==/UserScript==
 12 | 
 13 | // MIT License
 14 | // Copyright (c) 2021-2022 Zhenbo Li
 15 | 
 16 | /*global GM*/
 17 | 
 18 | const fireSeqSearchDomId = "fireSeqSearchDom";
 19 | 
 20 | 
 21 | const fireSeqSearchScriptCSS = `
 22 |     #fireSeqSearchDom {
 23 |         margin: 1em 1em 1em 1em;
 24 |         color: var(--theme-col-txt-snippet); /* duckduck color*/
 25 |     }
 26 |     #fireSeqSearchDom.experimentalLayout {
 27 |         position: fixed;
 28 |         top: 140px;
 29 |         right: 12px;
 30 |         width: 200px;
 31 |         background-color: hsla(200, 40%, 96%, .8);
 32 |         font-size: 12px;
 33 |         border-radius: 6px;
 34 |         z-index: 99999;
 35 |     }
 36 |     .fireSeqSearchTitleBar {
 37 |         margin: 0.5em 0;
 38 |     }
 39 |     .hideSummary {
 40 |         margin: 0 1em;
 41 |     }
 42 |     #fireSeqSearchDom ul {
 43 |         margin: 0;
 44 |         padding: 0.6em;
 45 |         border: 1px dotted  gray;
 46 |         list-style: none;
 47 |         line-height: 1.5em;
 48 |     }
 49 |     #fireSeqSearchDom ul li {
 50 |         font-size: 15px;
 51 |     }
 52 |     #fireSeqSearchDom ul li + li {
 53 |         margin-top: 0.4em;
 54 |     }
 55 |     #fireSeqSearchDom ul li a {
 56 |         text-decoration: underline;
 57 |         text-decoration-style: dotted;
 58 |         text-decoration-thickness: 1px;
 59 |         text-underline-offset: 2px;
 60 |     }
 61 |     #fireSeqSearchDom ul li::before {
 62 |         content: ' ';
 63 |         display: inline-block;
 64 |         margin-right: 0.4em;
 65 |         line-height: 1em;
 66 |         width: 1em;
 67 |         height: 1em;
 68 |         transform: translateY(3px);
 69 |         border-radius: 3px;
 70 |         background-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAAAe1BMVEUAKzaFyMiKz88AJjIAKjaHy8sAHiuM0tIAGSgAGykAIC0AESIAFyYAIzAAHCoAFCQAEiBvq6wtVlw0XmQIMDtooqRAbnNdk5VPgYU4Y2lyr7B4trdHc3Z/wcEAHSctUFVimZoaP0hShIcoRksTLjQADBkAABNdjo8AAAzWDdSWAAABBklEQVQ4jc1S7XKDIBCEQ0DEL2oJVqOmiUl8/yfs6XTaePgA2T8w7N5xu3OMvSOESI5ejSnWM1Hqo/lUEa8a708WLyYAgG4zWv+lpQRXsdIDR2hLBGkn8RnazK4nB2+IIO9XQvZ5dsYf4FlHI4StcqiYGqeppvU4u+CogIvaXB56n53WrmBJYSprq5S6wB71ONZM5Cc/AAyuFXEUGF/aDLANg55b6hRRnjX/A6ZCExfTC4+KkBJB6uSrgOtv0iKHHc/hSr3ovUCGcs9bSQS0g7mQGSaSaTLvBNJFSRQ3+JfIfow3L5s7XJyVBR3uR5uZPG7PnsPQXedoJX4hzGNZlnt5VP7G+AHcFwwZX2F8QwAAAABJRU5ErkJggg==);
 71 |         background-repeat: no-repeat;
 72 |         background-size: 16px;
 73 |     }
 74 |     .fireSeqSearchHitSummary {
 75 |         font-size: 0.9em
 76 |     }
 77 |     .fireSeqSearchHitSummary::before {
 78 |         content: "\\00A0::\\00A0";
 79 |     }
 80 |     .fireSeqSearchHighlight {
 81 |         padding: 0 4px;
 82 |         color: black !important;
 83 |         background-color: gold;
 84 |         border-radius: 3px;
 85 |     }
 86 |     `;
 87 | 
 88 | function consoleLogForDebug(message) {
 89 |     console.log(message); //skipcq: JS-0002
 90 |     // Comment it in master branch, to make deepSource happy
 91 | }
 92 | 
 93 | 
 94 | function addGlobalStyle(css) {
 95 |     const head = document.getElementsByTagName("head")[0];
 96 |     if (!head) { return; }
 97 |     const style = document.createElement("style");
 98 |     style.id = "fireSeqSearchScriptCSS";
 99 |     // style.type = "text/css";
100 |     style.innerHTML = css;
101 |     head.appendChild(style);
102 | }
103 | 
104 | 
105 | function createElementWithText(type, text) {
106 |     const element = document.createElement(type);
107 |     element.textContent = text;
108 |     return element;
109 | }
110 | 
111 | 
112 | function createHrefToLogseq(record, serverInfo) {
113 |     const name = serverInfo.notebook_name;
114 | 
115 |     const title = record.title;
116 |     const prettyTitle = title.replaceAll("%2F", "/");
117 | 
118 |     const target = `logseq://graph/${name}?page=${title}`;
119 |     const logseqPageLink = document.createElement('a');
120 |     const text = document.createTextNode(prettyTitle);
121 |     logseqPageLink.appendChild(text);
122 |     logseqPageLink.title = prettyTitle;
123 |     logseqPageLink.href = target;
124 |     consoleLogForDebug(logseqPageLink);
125 |     return logseqPageLink;
126 | }
127 | 
128 | 
129 | function checkUserOptions() {
130 |     const options = {
131 |         debugStr: "tampermonkey",
132 |         ExperimentalLayout: false,
133 |         ShowHighlight: true,
134 |         ShowScore: false
135 |     }
136 |     consoleLogForDebug(options);
137 |     return options;
138 | 
139 | }
140 | 
141 | 
142 | async function appendResultToSearchResult(fetchResultArray, container) {
143 |     const serverInfo = fetchResultArray[0];
144 |     const rawSearchResult = fetchResultArray[1];
145 |     const firefoxExtensionUserOption = await checkUserOptions();
146 | 
147 | 
148 |     consoleLogForDebug(firefoxExtensionUserOption);
149 | 
150 |     function createTitleBarDom(count) {
151 |         const titleBar = createElementWithText("div");
152 |         titleBar.classList.add('fireSeqSearchTitleBar');
153 |         const hitCount = `<span>We found <b>${count.toString()}</b> results in your logseq notebook</span>`;
154 |         titleBar.insertAdjacentHTML("afterbegin",hitCount);
155 |         const btn = document.createElement("button");
156 |         btn.classList.add("hideSummary");
157 |         const text = document.createTextNode("Hide Summary (Tmp)");
158 |         btn.appendChild(text);
159 |         btn.onclick = function () {
160 |             // alert("Button is clicked");
161 |             for (const el of document.querySelectorAll('.fireSeqSearchHitSummary')) {
162 |                 // el.style.visibility = 'hidden';
163 |                 el.remove();
164 |             }
165 |         };
166 |         titleBar.appendChild(btn);
167 |         return titleBar;
168 |     }
169 | 
170 | 
171 | 
172 |     function createFireSeqDom() {
173 | 
174 |         const div = document.createElement("div");
175 |         // div.appendChild(createElementWithText("p", "fireSeqSearch launched!"));
176 |         div.setAttribute("id", fireSeqSearchDomId);
177 | 
178 | 
179 |         return div;
180 |     }
181 | 
182 |     const dom = createFireSeqDom();
183 |     dom.appendChild(createTitleBarDom(rawSearchResult.length));
184 |     consoleLogForDebug(dom);
185 | 
186 |     const hitList = document.createElement("ul");
187 | 
188 |     consoleLogForDebug(rawSearchResult);
189 |     for (const rawRecord of rawSearchResult) {
190 |         // const e = document.createTextNode(record);
191 |         consoleLogForDebug(rawRecord);
192 |         const record = JSON.parse(rawRecord);
193 |         consoleLogForDebug(typeof record);
194 | 
195 |         const li =  createElementWithText("li", "");
196 | 
197 | 
198 |         if (firefoxExtensionUserOption.ShowScore) {
199 |             const score = createElementWithText("span", String(record.score));
200 |             li.appendChild(score);
201 |         }
202 |         const href = createHrefToLogseq(record, serverInfo);
203 |         li.appendChild(href);
204 |         li.append(' ')
205 |         if (firefoxExtensionUserOption.ShowHighlight) {
206 |             const summary = createElementWithText("span", "");
207 |             summary.innerHTML = record.summary;
208 |             summary.classList.add('fireSeqSearchHitSummary');
209 |             li.appendChild(summary);
210 |         }
211 |         // let e = wrapRawRecordIntoElement(record, serverInfo);
212 | 
213 |         // e.style.
214 |         hitList.appendChild(li);
215 |         // consoleLogForDebug("Added an element to the list");
216 |     }
217 |     dom.appendChild(hitList);
218 | 
219 |     if (firefoxExtensionUserOption.ExperimentalLayout) {
220 |         // Inspired by https://twitter.com/rockucn
221 |         // https://greasyfork.org/en/scripts/446492-%E6%90%9C%E7%B4%A2%E5%BC%95%E6%93%8E%E5%88%87%E6%8D%A2%E5%99%A8-search-engine-switcher/code
222 | 
223 |         dom.classList.add("experimentalLayout");
224 |     }
225 |     let contextId = "rcnt";
226 |     if (window.location.href.includes("duckduckgo.com")) {
227 |         contextId = "web_content_wrapper";
228 |     }
229 |     document.getElementById(contextId).insertAdjacentElement("beforebegin", dom);
230 | 
231 | }
232 | 
233 | function getSearchParameterFromCurrentPage() {
234 |     let searchParam = "";
235 | 
236 |     function getSearchParameterOfSearx() {
237 |         const inputBox = document.getElementById("q");
238 |         return inputBox.value;
239 |     }
240 | 
241 |     if (window.location.toString().includes("searx")) {
242 |         searchParam = getSearchParameterOfSearx();
243 |     } else {
244 |         // https://stackoverflow.com/a/901144/1166518
245 |         const urlParams = new URLSearchParams(window.location.search);
246 |         // consoleLogForDebug(urlParams);
247 |         searchParam = urlParams.get('q');
248 |     }
249 | 
250 |     consoleLogForDebug(`Got search param: ${searchParam}`);
251 |     return searchParam;
252 | }
253 | 
254 | 
255 | 
256 | (function() {
257 |     const searchParameter = getSearchParameterFromCurrentPage();
258 | 
259 |     consoleLogForDebug(searchParameter);
260 |     addGlobalStyle(fireSeqSearchScriptCSS);
261 | 
262 |     GM.xmlHttpRequest({
263 |         method: "GET",
264 |         url: "http://127.0.0.1:3030/server_info",
265 |         onload(infoResponse) {
266 |             const server_info = JSON.parse(infoResponse.responseText);
267 |             consoleLogForDebug(server_info);
268 |             GM.xmlHttpRequest({
269 |                 method: "GET",
270 |                 url: `http://127.0.0.1:3030/query/${searchParameter}`,
271 |                 onload(queryResponse) {
272 |                     const hit = JSON.parse(queryResponse.responseText);
273 |                     // consoleLogForDebug(hit);
274 |                     consoleLogForDebug(typeof hit);
275 | 
276 |                     appendResultToSearchResult([server_info, hit])
277 |                         .then((_e) => {
278 |                             const highlightedItems = document.querySelectorAll('.fireSeqSearchHighlight');
279 |                             consoleLogForDebug(highlightedItems);
280 |                         })
281 |                         .catch(error => {
282 |                             consoleLogForDebug(error);
283 |                         });
284 | 
285 |                 }
286 |             });
287 |         }
288 |     });
289 | 
290 |     /*
291 |         //https://gomakethings.com/waiting-for-multiple-all-api-responses-to-complete-with-the-vanilla-js-promise.all-method/
292 |         Promise.all([
293 |             fetch("http://127.0.0.1:3030/server_info"),
294 |             fetch(`http://127.0.0.1:3030/query/${searchParameter}`)
295 |         ]).then(function (responses) {
296 |             return Promise.all(responses.map(function (response) {return response.json();}));
297 |         }).then(function (data) {
298 |             consoleLogForDebug(data);
299 |             return appendResultToSearchResult(data);
300 |         }).then((_e) => {
301 |             const highlightedItems = document.querySelectorAll('.fireSeqSearchHighlight');
302 |             consoleLogForDebug(highlightedItems);
303 |             highlightedItems.forEach((element) => {
304 |                 element.style.color = 'red';
305 |             });
306 |         }).catch(function (error) {
307 |             consoleLogForDebug(error);
308 |         });
309 | 
310 | 
311 | 
312 |      */
313 | 
314 | 
315 | 
316 | })();
317 | 


--------------------------------------------------------------------------------
/fireSeqSearch_addon/options.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | 
 3 | <html lang="en">
 4 | <head>
 5 |     <meta charset="utf-8"/>
 6 | </head>
 7 | 
 8 | <body>
 9 | <form>
10 |     <label for="debugStr">Debug Str</label>
11 |     <input type="text" id="debugStr" name="debugStr"/>
12 |     <br>
13 | 
14 |     <input type="checkbox" id="ExperimentalLayout"
15 |            name="ExperimentalLayout" value="Car">
16 |     <label for="ExperimentalLayout">Experimental Layout</label>
17 |     <br>
18 | 
19 |     <input type="checkbox" id="ShowScore"
20 |            name="ShowScore">
21 |     <label for="ShowScore">Show tantivy score (for debug)</label>
22 |     <br>
23 | 
24 |     <input type="checkbox" id="ShowHighlight"
25 |            name="ShowHighlight">
26 |     <label for="ShowHighlight">Show summary</label>
27 |     <br>
28 | 
29 |     <button type="submit">Save</button>
30 | </form>
31 | <script src="options.js"></script>
32 | </body>
33 | </html>
34 | 


--------------------------------------------------------------------------------
/fireSeqSearch_addon/options.js:
--------------------------------------------------------------------------------
 1 | 
 2 | function saveOptions(e) {
 3 |     e.preventDefault();
 4 |     const ex = document.querySelector("#ExperimentalLayout").checked;
 5 | 
 6 |     browser.storage.sync.set({
 7 |         debugStr: document.querySelector("#debugStr").value,
 8 |         ExperimentalLayout: ex,
 9 |         ShowScore: document.querySelector("#ShowScore").checked,
10 |         ShowHighlight: document.querySelector("#ShowHighlight").checked
11 |     });
12 | }
13 | 
14 | function restoreOptions() {
15 |     document.querySelector("#debugStr").value = 'Default red';
16 | 
17 |     /*global browser */
18 |     let gettingItem = browser.storage.sync.get('debugStr');
19 |     gettingItem.then((res) => {
20 |         document.querySelector("#debugStr").value = res.debugStr || 'Not Found';
21 |     });
22 | 
23 |     let ex = browser.storage.sync.get('ExperimentalLayout');
24 |     ex.then((res) => {
25 |         if (res.ExperimentalLayout) {
26 |             document.querySelector("#ExperimentalLayout").checked = true;
27 |         }
28 |     });
29 | 
30 |     browser.storage.sync.get('ShowHighlight')
31 |         .then((res) => {
32 |         if (res.ShowHighlight) {
33 |             document.querySelector("#ShowHighlight").checked = true;
34 |         }
35 |     });
36 |     browser.storage.sync.get('ShowScore')
37 |         .then((res) => {
38 |             if (res.ShowScore) {
39 |                 document.querySelector("#ShowScore").checked = true;
40 |             }
41 |         });
42 | }
43 | 
44 | document.addEventListener('DOMContentLoaded', restoreOptions);
45 | document.querySelector("form").addEventListener("submit", saveOptions);


--------------------------------------------------------------------------------
/fireSeqSearch_addon/violentmonkeyscript.user.js:
--------------------------------------------------------------------------------
  1 | // ==UserScript==
  2 | // @name         fireSeqSearchScript
  3 | // @namespace    https://github.com/Endle/fireSeqSearch
  4 | // @version      0.1.4
  5 | // @description  Everytime you use the search engine, FireSeqSearch searches your personal logseq notes.
  6 | // @author       Zhenbo Li
  7 | // @match        https://www.google.com/search*
  8 | // @match        https://duckduckgo.com/*
  9 | // @match        https://metager.org/*
 10 | // @icon         https://www.google.com/s2/favicons?sz=64&domain=tampermonkey.net
 11 | // @grant        GM.xmlHttpRequest
 12 | // ==/UserScript==
 13 | 
 14 | // MIT License
 15 | // Copyright (c) 2021-2023 Zhenbo Li
 16 | 
 17 | /*global GM*/
 18 | 
 19 | const fireSeqSearchDomId = "fireSeqSearchDom";
 20 | 
 21 | 
 22 | const fireSeqSearchScriptCSS = `
 23 |     #fireSeqSearchDom {
 24 |         margin: 1em 1em 1em 1em;
 25 |         color: var(--theme-col-txt-snippet); /* duckduck color*/
 26 |     }
 27 |     #fireSeqSearchDom.experimentalLayout {
 28 |         position: fixed;
 29 |         top: 140px;
 30 |         right: 12px;
 31 |         width: 200px;
 32 |         background-color: hsla(200, 40%, 96%, .8);
 33 |         font-size: 12px;
 34 |         border-radius: 6px;
 35 |         z-index: 99999;
 36 |     }
 37 |     .fireSeqSearchTitleBar {
 38 |         margin: 0.5em 0;
 39 |     }
 40 |     .hideSummary {
 41 |         margin: 0 1em;
 42 |     }
 43 |     #fireSeqSearchDom ul {
 44 |         margin: 0;
 45 |         padding: 0.6em;
 46 |         border: 1px dotted  gray;
 47 |         list-style: none;
 48 |         line-height: 1.5em;
 49 |     }
 50 |     #fireSeqSearchDom ul li {
 51 |         font-size: 15px;
 52 |     }
 53 |     #fireSeqSearchDom ul li + li {
 54 |         margin-top: 0.4em;
 55 |     }
 56 |     #fireSeqSearchDom ul li a {
 57 |         text-decoration: underline;
 58 |         text-decoration-style: dotted;
 59 |         text-decoration-thickness: 1px;
 60 |         text-underline-offset: 2px;
 61 |     }
 62 |     #fireSeqSearchDom ul li::before {
 63 |         content: ' ';
 64 |         display: inline-block;
 65 |         margin-right: 0.4em;
 66 |         line-height: 1em;
 67 |         width: 1em;
 68 |         height: 1em;
 69 |         transform: translateY(3px);
 70 |         border-radius: 3px;
 71 |         background-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAAAe1BMVEUAKzaFyMiKz88AJjIAKjaHy8sAHiuM0tIAGSgAGykAIC0AESIAFyYAIzAAHCoAFCQAEiBvq6wtVlw0XmQIMDtooqRAbnNdk5VPgYU4Y2lyr7B4trdHc3Z/wcEAHSctUFVimZoaP0hShIcoRksTLjQADBkAABNdjo8AAAzWDdSWAAABBklEQVQ4jc1S7XKDIBCEQ0DEL2oJVqOmiUl8/yfs6XTaePgA2T8w7N5xu3OMvSOESI5ejSnWM1Hqo/lUEa8a708WLyYAgG4zWv+lpQRXsdIDR2hLBGkn8RnazK4nB2+IIO9XQvZ5dsYf4FlHI4StcqiYGqeppvU4u+CogIvaXB56n53WrmBJYSprq5S6wB71ONZM5Cc/AAyuFXEUGF/aDLANg55b6hRRnjX/A6ZCExfTC4+KkBJB6uSrgOtv0iKHHc/hSr3ovUCGcs9bSQS0g7mQGSaSaTLvBNJFSRQ3+JfIfow3L5s7XJyVBR3uR5uZPG7PnsPQXedoJX4hzGNZlnt5VP7G+AHcFwwZX2F8QwAAAABJRU5ErkJggg==);
 72 |         background-repeat: no-repeat;
 73 |         background-size: 16px;
 74 |     }
 75 |     .fireSeqSearchHitSummary {
 76 |         font-size: 0.9em
 77 |     }
 78 |     .fireSeqSearchHitSummary::before {
 79 |         content: "\\00A0::\\00A0";
 80 |     }
 81 |     .fireSeqSearchHighlight {
 82 |         padding: 0 4px;
 83 |         color: black !important;
 84 |         background-color: gold;
 85 |         border-radius: 3px;
 86 |     }
 87 |     `;
 88 | 
 89 | function consoleLogForDebug(message) {
 90 |     console.log(message); //skipcq: JS-0002
 91 | }
 92 | 
 93 | 
 94 | function addGlobalStyle(css) {
 95 |     const head = document.getElementsByTagName("head")[0];
 96 |     if (!head) { return; }
 97 |     const style = document.createElement("style");
 98 |     style.id = "fireSeqSearchScriptCSS";
 99 |     // style.type = "text/css";
100 |     style.innerHTML = css;
101 |     head.appendChild(style);
102 | }
103 | 
104 | 
105 | function createElementWithText(type, text) {
106 |     const element = document.createElement(type);
107 |     element.textContent = text;
108 |     return element;
109 | }
110 | 
111 | 
112 | function createHrefToLogseq(record, serverInfo) {
113 |     const name = serverInfo.notebook_name;
114 | 
115 |     const title = record.title;
116 |     const prettyTitle = title.replaceAll("%2F", "/");
117 | 
118 |     const target = record.logseq_uri || `logseq://graph/${name}?page=${title}`;
119 | 
120 |     const logseqPageLink = document.createElement('a');
121 |     const text = document.createTextNode(prettyTitle);
122 |     logseqPageLink.appendChild(text);
123 |     logseqPageLink.title = prettyTitle;
124 |     logseqPageLink.href = target;
125 |     consoleLogForDebug(logseqPageLink);
126 |     return logseqPageLink;
127 | }
128 | 
129 | 
130 | function checkUserOptions() {
131 |     const options = {
132 |         debugStr: "tampermonkey",
133 |         ExperimentalLayout: false,
134 |         ShowHighlight: true,
135 |         ShowScore: false
136 |     }
137 |     consoleLogForDebug(options);
138 |     return options;
139 | 
140 | }
141 | 
142 | 
143 | async function appendResultToSearchResult(fetchResultArray, _container) {
144 |     const serverInfo = fetchResultArray[0];
145 |     const rawSearchResult = fetchResultArray[1];
146 |     const firefoxExtensionUserOption = await checkUserOptions();
147 | 
148 |     consoleLogForDebug('Loaded user option: ' + JSON.stringify(firefoxExtensionUserOption));
149 | 
150 |     function createTitleBarDom(count) {
151 |         const titleBar = createElementWithText("div");
152 |         titleBar.classList.add('fireSeqSearchTitleBar');
153 |         const hitCount = `<span>We found <b>${count.toString()}</b> results in your logseq notebook</span>`;
154 |         titleBar.insertAdjacentHTML("afterbegin",hitCount);
155 |         const btn = document.createElement("button");
156 |         btn.classList.add("hideSummary");
157 |         const text = document.createTextNode("Hide Summary (Tmp)");
158 |         btn.appendChild(text);
159 |         btn.onclick = function () {
160 |             // alert("Button is clicked");
161 |             for (const el of document.querySelectorAll('.fireSeqSearchHitSummary')) {
162 |                 // el.style.visibility = 'hidden';
163 |                 el.remove();
164 |             }
165 |         };
166 |         titleBar.appendChild(btn);
167 |         return titleBar;
168 |     }
169 |     function createFireSeqDom() {
170 |         const div = document.createElement("div");
171 |         div.setAttribute("id", fireSeqSearchDomId);
172 |         return div;
173 |     }
174 | 
175 |     const dom = createFireSeqDom();
176 |     dom.appendChild(createTitleBarDom(rawSearchResult.length));
177 |     consoleLogForDebug(dom);
178 | 
179 |     const hitList = document.createElement("ul");
180 | 
181 |     consoleLogForDebug(rawSearchResult);
182 |     for (const rawRecord of rawSearchResult) {
183 |         // const e = document.createTextNode(record);
184 |         consoleLogForDebug(rawRecord);
185 |         const record = JSON.parse(rawRecord);
186 |         consoleLogForDebug(typeof record);
187 | 
188 |         const li =  createElementWithText("li", "");
189 | 
190 | 
191 |         if (firefoxExtensionUserOption.ShowScore) {
192 |             const score = createElementWithText("span", String(record.score));
193 |             li.appendChild(score);
194 |         }
195 |         const href = createHrefToLogseq(record, serverInfo);
196 |         li.appendChild(href);
197 |         li.append(' ')
198 |         if (firefoxExtensionUserOption.ShowHighlight) {
199 |             const summary = createElementWithText("span", "");
200 |             summary.innerHTML = record.summary;
201 |             summary.classList.add('fireSeqSearchHitSummary');
202 |             li.appendChild(summary);
203 |         }
204 |         // let e = wrapRawRecordIntoElement(record, serverInfo);
205 | 
206 |         // e.style.
207 |         hitList.appendChild(li);
208 |         // consoleLogForDebug("Added an element to the list");
209 |     }
210 |     dom.appendChild(hitList);
211 | 
212 |     if (firefoxExtensionUserOption.ExperimentalLayout) {
213 |         // Inspired by https://twitter.com/rockucn
214 |         // https://greasyfork.org/en/scripts/446492-%E6%90%9C%E7%B4%A2%E5%BC%95%E6%93%8E%E5%88%87%E6%8D%A2%E5%99%A8-search-engine-switcher/code
215 | 
216 |         dom.classList.add("experimentalLayout");
217 |     }
218 | 
219 |     function insertDivToWebpage(result) {
220 |         let contextId = "rcnt";
221 |         if (window.location.host.includes("duckduckgo.com")) {
222 |             contextId = "web_content_wrapper";
223 |         }
224 |         if (window.location.host.includes("searx")) { // https://github.com/Endle/fireSeqSearch/issues/103
225 |             contextId = "results";
226 |         }
227 |         if (window.location.host.includes("metager")) { // https://github.com/Endle/fireSeqSearch/issues/127
228 |             contextId = "results";
229 |         }
230 |         document.getElementById(contextId).insertAdjacentElement("beforebegin", result);
231 | 
232 |     }
233 | 
234 |     insertDivToWebpage(dom);
235 | }
236 | 
237 | function getSearchParameterFromCurrentPage() {
238 |     let searchParam = "";
239 | 
240 |     function getSearchParameterOfSearx() {
241 |         const inputBox = document.getElementById("q");
242 |         return inputBox.value;
243 |     }
244 |     function getSearchParameterOfMetager() {
245 |         const urlParams = new URLSearchParams(window.location.search);
246 |         return urlParams.get('eingabe');
247 |     }
248 | 
249 |     if (window.location.toString().includes("searx")) {
250 |         searchParam = getSearchParameterOfSearx();
251 |     } else if (window.location.toString().includes("metager")) {
252 |         searchParam = getSearchParameterOfMetager();
253 |     } else {
254 |         // https://stackoverflow.com/a/901144/1166518
255 |         const urlParams = new URLSearchParams(window.location.search);
256 |         searchParam = urlParams.get('q');
257 |     }
258 | 
259 |     consoleLogForDebug(`Got search param: ${searchParam}`);
260 |     return searchParam;
261 | }
262 | 
263 | 
264 | 
265 | (function() {
266 |     const searchParameter = getSearchParameterFromCurrentPage();
267 | 
268 | 
269 |     addGlobalStyle(fireSeqSearchScriptCSS);
270 | 
271 |     GM.xmlHttpRequest({
272 |         method: "GET",
273 |         url: "http://127.0.0.1:3030/server_info",
274 |         onload(infoResponse) {
275 |             const server_info = JSON.parse(infoResponse.responseText);
276 |             consoleLogForDebug(server_info);
277 |             GM.xmlHttpRequest({
278 |                 method: "GET",
279 |                 url: `http://127.0.0.1:3030/query/${searchParameter}`,
280 |                 onload(queryResponse) {
281 |                     const hit = JSON.parse(queryResponse.responseText);
282 |                     // consoleLogForDebug(hit);
283 |                     consoleLogForDebug(typeof hit);
284 | 
285 |                     appendResultToSearchResult([server_info, hit])
286 |                         .then((_e) => {
287 |                             const highlightedItems = document.querySelectorAll('.fireSeqSearchHighlight');
288 |                             consoleLogForDebug(highlightedItems);
289 |                         })
290 |                         .catch(error => {
291 |                             consoleLogForDebug(error);
292 |                         });
293 | 
294 |                 }
295 |             });
296 |         }
297 |     });
298 | 
299 |     /*
300 |         //https://gomakethings.com/waiting-for-multiple-all-api-responses-to-complete-with-the-vanilla-js-promise.all-method/
301 |         Promise.all([
302 |             fetch("http://127.0.0.1:3030/server_info"),
303 |             fetch(`http://127.0.0.1:3030/query/${searchParameter}`)
304 |         ]).then(function (responses) {
305 |             return Promise.all(responses.map(function (response) {return response.json();}));
306 |         }).then(function (data) {
307 |             consoleLogForDebug(data);
308 |             return appendResultToSearchResult(data);
309 |         }).then((_e) => {
310 |             const highlightedItems = document.querySelectorAll('.fireSeqSearchHighlight');
311 |             consoleLogForDebug(highlightedItems);
312 |             highlightedItems.forEach((element) => {
313 |                 element.style.color = 'red';
314 |             });
315 |         }).catch(function (error) {
316 |             consoleLogForDebug(error);
317 |         });
318 | 
319 | 
320 | 
321 |      */
322 | 
323 | 
324 | 
325 | })();
326 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "fire_seq_search_server"
 3 | version = "0.2.1"
 4 | edition = "2021"
 5 | license = "MIT"
 6 | 
 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 8 | 
 9 | 
10 | [dependencies]
11 | 
12 | tokio = { version = "1", features = ["full"] }
13 | 
14 | # Http Client
15 | axum = "0.7.5"
16 | serde_json = "1.0"
17 | 
18 | # Serde
19 | #   https://serde.rs/derive.html
20 | #   https://stackoverflow.com/a/49313680/1166518
21 | serde = { version = "1.0", features = ["derive", "rc"] }
22 | url = "2.3.1"
23 | 
24 | # QueryEngine
25 | tantivy = "0.22"
26 | tantivy-tokenizer-api = "0.3.0"
27 | jieba-rs = { version = "0.7.0" }
28 | 
29 | 
30 | log = "0.4.22"
31 | env_logger = "0.11.5"
32 | 
33 | # Rust
34 | clap = { version = "4.0", features = ["derive"] }
35 | lazy_static = "1.4.0"
36 | rayon = "1.5"
37 | futures = "0.3"
38 | ctrlc = "3.4"
39 | sysinfo = "0.34.2"
40 | kill_tree = "0.2.4"
41 | urlencoding = "2.1.0"
42 | 
43 | 
44 | # Language Processing
45 | stopwords = "0.1.1"
46 | stop-words = "0.7.2"
47 | 
48 | regex = "1"
49 | lingua = { version = "1.4.0", default-features = false, features = ["chinese", "english"] }
50 | shellexpand = "3.1"
51 | 
52 | #Highlight (Output)
53 | html-escape = "0.2.13"
54 | 
55 | # Parsing
56 | pulldown-cmark = { version = "0.9.2", default-features = false }
57 | # Error
58 | #at /rustc/897e37553bba8b42751c67658967889d11ecd120\library\core\src/option.rs:775:21
59 | #4: pdf_extract::show_text
60 | #at C:\Users\z2369li\.cargo\git\checkouts\pdf-extract-c67a6fa67c2d526c\0d8b9d9\src\lib.rs:1262:16
61 | #pdf-extract = "0.6.4"
62 | pdf-extract-temporary-mitigation-panic = "0.7.1"
63 | 
64 | 
65 | 
66 | # TODO Currently turn them off will make cargo build fail
67 | #   I should make these deps optional, so those who doesn't want LLM could have a smaller binary
68 | sha256 = { version = "1.5.0", optional = true }
69 | reqwest = { version = "0.12", features = ["json"], optional = false }
70 | serde_derive = { version = "1.0.209", optional = false}
71 | 
72 | [features]
73 | #default = ["llm"]
74 | llm = ["sha256",
75 |     #"serde_derive",
76 |         #"request"
77 | ]
78 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/debug_server.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | rm -f ./fire_seq_search_server
 3 | # nix-shell -p cargo -p rustc -p libiconv --run "cargo build"
 4 | cargo build --features llm
 5 | cp  target/debug/fire_seq_search_server ./fire_seq_search_server
 6 | 
 7 | export RUST_LOG="warn,fire_seq_search_server=info"
 8 | #export RUST_LOG="debug"
 9 | export RUST_BACKTRACE=1
10 | #RAYON_NUM_THREADS=1 
11 | ./fire_seq_search_server --notebook_path ~/logseq --enable-journal-query
12 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/debug_server_mac.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | rm -f ./fire_seq_search_server 
 3 | #nix-shell -p cargo -p rustc -p libiconv --run "cargo build"
 4 | cargo build --features llm
 5 | cp  target/debug/fire_seq_search_server ./fire_seq_search_server
 6 | 
 7 | export RUST_LOG="warn,fire_seq_search_server=info"
 8 | #export RUST_LOG="debug"
 9 | export RUST_BACKTRACE=1
10 | ./fire_seq_search_server --notebook_path ~/logseq 
11 | #--enable-journal-query
12 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/deny.toml:
--------------------------------------------------------------------------------
  1 | [graph]
  2 | targets = [
  3 | ]
  4 | all-features = false
  5 | no-default-features = false
  6 | 
  7 | [output]
  8 | feature-depth = 1
  9 | 
 10 | [advisories]
 11 | # Not finished
 12 | ignore = [
 13 |     { id = "RUSTSEC-2020-0056", reason = "pdf extract" },
 14 |     { id = "RUSTSEC-2021-0153", reason = "pdf" },
 15 | ]
 16 | 
 17 | 
 18 | [licenses]
 19 | # List of explicitly allowed licenses
 20 | # See https://spdx.org/licenses/ for list of possible licenses
 21 | # [possible values: any SPDX 3.11 short identifier (+ optional exception)].
 22 | allow = [
 23 |     "MIT", "Apache-2.0",
 24 |     "Zlib",
 25 |     "BSD-2-Clause", "BSD-3-Clause",
 26 |     "CC0-1.0",
 27 |     "MPL-2.0",
 28 |     "Unicode-3.0",
 29 | ]
 30 | # The confidence threshold for detecting a license from license text.
 31 | # The higher the value, the more closely the license text must be to the
 32 | # canonical license text of a valid SPDX license file.
 33 | # [possible values: any between 0.0 and 1.0].
 34 | confidence-threshold = 0.8
 35 | # Allow 1 or more licenses on a per-crate basis, so that particular licenses
 36 | # aren't accepted for every possible crate as with the normal allow list
 37 | exceptions = [
 38 |     { name = "fastdivide", allow = ["zlib-acknowledgement"] },
 39 | ]
 40 | 
 41 | # This section is considered when running `cargo deny check bans`.
 42 | # More documentation about the 'bans' section can be found here:
 43 | # https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html
 44 | [bans]
 45 | # Lint level for when multiple versions of the same crate are detected
 46 | multiple-versions = "warn"
 47 | # Lint level for when a crate version requirement is `*`
 48 | wildcards = "allow"
 49 | # The graph highlighting used when creating dotgraphs for crates
 50 | # with multiple versions
 51 | # * lowest-version - The path to the lowest versioned duplicate is highlighted
 52 | # * simplest-path - The path to the version with the fewest edges is highlighted
 53 | # * all - Both lowest-version and simplest-path are used
 54 | highlight = "all"
 55 | # The default lint level for `default` features for crates that are members of
 56 | # the workspace that is being checked. This can be overridden by allowing/denying
 57 | # `default` on a crate-by-crate basis if desired.
 58 | workspace-default-features = "allow"
 59 | # The default lint level for `default` features for external crates that are not
 60 | # members of the workspace. This can be overridden by allowing/denying `default`
 61 | # on a crate-by-crate basis if desired.
 62 | external-default-features = "allow"
 63 | # List of crates that are allowed. Use with care!
 64 | allow = [
 65 |     #"ansi_term@0.11.0",
 66 |     #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is allowed" },
 67 | ]
 68 | # List of crates to deny
 69 | deny = [
 70 |     #"ansi_term@0.11.0",
 71 |     #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is banned" },
 72 |     # Wrapper crates can optionally be specified to allow the crate when it
 73 |     # is a direct dependency of the otherwise banned crate
 74 |     #{ crate = "ansi_term@0.11.0", wrappers = ["this-crate-directly-depends-on-ansi_term"] },
 75 | ]
 76 | 
 77 | # List of features to allow/deny
 78 | # Each entry the name of a crate and a version range. If version is
 79 | # not specified, all versions will be matched.
 80 | #[[bans.features]]
 81 | #crate = "reqwest"
 82 | # Features to not allow
 83 | #deny = ["json"]
 84 | # Features to allow
 85 | #allow = [
 86 | #    "rustls",
 87 | #    "__rustls",
 88 | #    "__tls",
 89 | #    "hyper-rustls",
 90 | #    "rustls",
 91 | #    "rustls-pemfile",
 92 | #    "rustls-tls-webpki-roots",
 93 | #    "tokio-rustls",
 94 | #    "webpki-roots",
 95 | #]
 96 | # If true, the allowed features must exactly match the enabled feature set. If
 97 | # this is set there is no point setting `deny`
 98 | #exact = true
 99 | 
100 | # Certain crates/versions that will be skipped when doing duplicate detection.
101 | skip = [
102 |     #"ansi_term@0.11.0",
103 |     #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason why it can't be updated/removed" },
104 | ]
105 | # Similarly to `skip` allows you to skip certain crates during duplicate
106 | # detection. Unlike skip, it also includes the entire tree of transitive
107 | # dependencies starting at the specified crate, up to a certain depth, which is
108 | # by default infinite.
109 | skip-tree = [
110 |     #"ansi_term@0.11.0", # will be skipped along with _all_ of its direct and transitive dependencies
111 |     #{ crate = "ansi_term@0.11.0", depth = 20 },
112 | ]
113 | 
114 | # This section is considered when running `cargo deny check sources`.
115 | # More documentation about the 'sources' section can be found here:
116 | # https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html
117 | [sources]
118 | # Lint level for what to happen when a crate from a crate registry that is not
119 | # in the allow list is encountered
120 | unknown-registry = "warn"
121 | # Lint level for what to happen when a crate from a git repository that is not
122 | # in the allow list is encountered
123 | unknown-git = "warn"
124 | # List of URLs for allowed crate registries. Defaults to the crates.io index
125 | # if not specified. If it is specified but empty, no registries are allowed.
126 | allow-registry = ["https://github.com/rust-lang/crates.io-index"]
127 | # List of URLs for allowed Git repositories
128 | allow-git = []
129 | 
130 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/obsidian.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | cargo build --features llm
 3 | rm ./fire_seq_search_server -f
 4 | cp --force target/debug/fire_seq_search_server ./fire_seq_search_server
 5 | 
 6 | NOTEBOOK_NAME=AstroWiki_2.0-main
 7 | 
 8 | RUST_BACKTRACE=1 RUST_LOG=debug ./fire_seq_search_server \
 9 |     --notebook_path ~/Documents/$NOTEBOOK_NAME \
10 |     --obsidian-md
11 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/run_server.sh:
--------------------------------------------------------------------------------
1 | set -e
2 | #  --release remove this parameter to save compile time
3 | cargo build
4 | rm -f ./fire_seq_search_server
5 | # Still use the debug version
6 | cp --force target/debug/fire_seq_search_server.exe ./fire_seq_search_server
7 | RUST_LOG=warn ./fire_seq_search_server --notebook_path /c/Users/z2369li/Nextcloud/logseq_notebook
8 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/http_client/endpoints.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | use log::{debug};
 3 | 
 4 | use crate::query_engine::{QueryEngine, ServerInformation};
 5 | use axum::Json;
 6 | use axum::extract::State;
 7 | use axum::{response::Html, extract::Path};
 8 | 
 9 | pub async fn get_server_info(State(engine_arc): State<Arc<QueryEngine>>)
10 |                                                 -> Json<ServerInformation> {
11 |     axum::Json( engine_arc.server_info.to_owned() )
12 | }
13 | 
14 | pub async fn query(
15 |     Path(term) : Path<String>,
16 |     State(engine_arc): State<Arc<QueryEngine>>
17 |     ) -> Html<String>{
18 | 
19 |     debug!("Original Search term {}", term);
20 |     let r = engine_arc.query_pipeline(term);
21 |     Html(r.await)
22 | }
23 | 
24 | pub async fn summarize(
25 |     Path(title) : Path<String>,
26 |     State(engine_arc): State<Arc<QueryEngine>>
27 |     ) -> Html<String>{
28 | 
29 |     let r = engine_arc.summarize(title);
30 |     Html(r.await)
31 | }
32 | 
33 | pub async fn get_llm_done_list(
34 |     State(engine_arc): State<Arc<QueryEngine>>
35 |     ) -> Html<String>{
36 |     let r = engine_arc.get_llm_done_list();
37 |     Html(r.await)
38 | }
39 | 
40 | pub async fn generate_word_cloud(State(engine_arc): State<Arc<QueryEngine>>)
41 |                                                     -> Html<String> {
42 |     let div_id = "fireSeqSearchWordcloudRawJson";
43 |     let json = engine_arc.generate_wordcloud();
44 | 
45 |     let div = format!("<div id=\"{}\">{}</div>", div_id, json);
46 |     Html(div)
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/http_client/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod endpoints;


--------------------------------------------------------------------------------
/fire_seq_search_server/src/language_tools/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod tokenizer;
 2 | mod cn_stopwords;
 3 | 
 4 | use std::collections::HashSet;
 5 | use lingua::{Language, LanguageDetector, LanguageDetectorBuilder};
 6 | use lingua::Language::{Chinese, English};
 7 | 
 8 | pub fn is_chinese(sentence: &str) -> bool {
 9 |     lazy_static! {
10 |         static ref LANGS: Vec<lingua::Language> = vec![Chinese, English];
11 |         // let mut languages = Vec::with_capacity();
12 |         // languages.push(Chinese);
13 |         static ref DETECTOR: LanguageDetector = LanguageDetectorBuilder::
14 |             from_languages(&LANGS).build();
15 |     }
16 |     let detected_language: Option<Language> = DETECTOR.detect_language_of(sentence);
17 |     match detected_language {
18 |         Some(x) => x == Chinese,
19 |         None => false
20 |     }
21 | }
22 | 
23 | 
24 | 
25 | /// ```
26 | /// let l = fire_seq_search_server::language_tools::generate_stopwords_list();
27 | /// assert!(l.contains("the"));
28 | /// assert!(!l.contains("thex"));
29 | /// ```
30 | pub fn generate_stopwords_list() -> HashSet<String> {
31 |     use stopwords::Stopwords;
32 |     let mut nltk: std::collections::HashSet<&str> = stopwords::NLTK::stopwords(stopwords::Language::English).unwrap().iter().cloned().collect();
33 |     nltk.insert("span");
34 |     nltk.insert("class");
35 |     nltk.insert("fireSeqSearchHighlight");
36 | 
37 |     nltk.insert("theorem");
38 |     nltk.insert("-");
39 | 
40 |     nltk.insert("view");
41 | 
42 | 
43 |     let mut nltk: HashSet<String> = nltk.iter().map(|&s|s.into()).collect();
44 | 
45 |     for c in 'a'..='z' {
46 |         nltk.insert(String::from(c));
47 |     }
48 |     // To Improve: I should be aware about the upper/lower case for terms. -Zhenbo Li 2023-Jan-19
49 |     for c in 'A'..='Z' {
50 |         nltk.insert(String::from(c));
51 |     }
52 | 
53 |     for c in '0'..='9' {
54 |         nltk.insert(String::from(c));
55 |     }
56 | 
57 | 
58 |     let words = stop_words::get(stop_words::LANGUAGE::English);
59 |     for w in words {
60 |         nltk.insert(w);
61 |     }
62 |     let words = stop_words::get(stop_words::LANGUAGE::Chinese);
63 |     for w in words {
64 |         nltk.insert(w);
65 |     }
66 |     for c in ['的', '有'] {
67 |         nltk.insert(String::from(c));
68 |     }
69 | 
70 |     for s in crate::language_tools::cn_stopwords::cn_stopwords_list() {
71 |         nltk.insert(String::from(s));
72 |     }
73 |     for s in crate::language_tools::cn_stopwords::cn_hit_stopword_list() {
74 |         nltk.insert(String::from(s));
75 |     }
76 | 
77 |     nltk
78 | }
79 | 
80 | 
81 | #[cfg(test)]
82 | mod test_language_detect {
83 |     #[test]
84 |     fn zh() {
85 |         use crate::language_tools::is_chinese;
86 |         assert!(is_chinese("李华"));
87 |         assert!(!is_chinese("rust"));
88 |         assert!(!is_chinese("Это статья ."));
89 |     }
90 | }
91 | // assert_eq!(detected_language, Some(English));
92 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/language_tools/tokenizer.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::HashSet;
  2 | use log::{debug, info};
  3 | 
  4 | /// ```
  5 | /// let l = fire_seq_search_server::language_tools::generate_stopwords_list();
  6 | /// assert!(l.contains("the"));
  7 | /// assert!(!l.contains("thex"));
  8 | ///
  9 | /// let terms = vec![String::from("the"), String::from("The"), String::from("answer")];
 10 | /// let result = fire_seq_search_server::language_tools::tokenizer::filter_out_stopwords(&terms, &l);
 11 | /// assert_eq!(result.len(), 1);
 12 | /// ```
 13 | pub fn filter_out_stopwords<'a,'b>(term_tokens: &'a [String], nltk: &'b HashSet<String>) -> Vec<&'a str> {
 14 |     let term_ref: Vec<&str> = term_tokens.iter().map(|s| &**s).collect();
 15 |     let terms_selected: Vec<&str> = term_ref.into_iter()
 16 |         .filter(|&s| ! (s.trim().is_empty()  ) )
 17 |         .filter(|&s| !nltk.contains(&(&s).to_lowercase()  )  )
 18 |         .collect();
 19 |     terms_selected
 20 | }
 21 | 
 22 | 
 23 | 
 24 | pub fn tokenize(sentence: &str) -> Vec<String> {
 25 |     /*
 26 |     lazy_static! {
 27 |         static ref TK: crate::JiebaTokenizer = crate::JiebaTokenizer {};
 28 |     }
 29 |     */
 30 |     if crate::language_tools::is_chinese(sentence) {
 31 |         info!("Use Tokenizer for Chinese term {}", sentence);
 32 |         let mut jieba = FireSeqTokenizer {};
 33 |         //TODO don't create a tokenizer every time
 34 |         crate::tokenize_sentence_to_text_vec(&mut jieba, sentence)
 35 |     } else {
 36 |         // info!("Space Tokenizer {}", sentence);
 37 |         let result : Vec<&str> = sentence.split_whitespace()
 38 |             .collect();
 39 |         debug!("Got tokens {:?}", &result);
 40 |         let result:Vec<String> = result.iter().map(|&s|s.into()).collect();
 41 |         result
 42 |         // vec![String::from(sentence)]
 43 |     }
 44 | }
 45 | 
 46 | use lazy_static::lazy_static;
 47 | use tantivy_tokenizer_api::{Token, TokenStream, Tokenizer};
 48 | 
 49 | lazy_static! {
 50 |     static ref JIEBA: jieba_rs::Jieba = jieba_rs::Jieba::new();
 51 | }
 52 | 
 53 | pub const TOKENIZER_ID: &str = "fireseq_tokenizer";
 54 | 
 55 | #[derive(Clone)]
 56 | pub struct FireSeqTokenizer;
 57 | 
 58 | 
 59 | 
 60 | pub struct JiebaTokenStream {
 61 |     tokens: Vec<Token>,
 62 |     index: usize,
 63 | }
 64 | 
 65 | impl TokenStream for JiebaTokenStream {
 66 |     fn advance(&mut self) -> bool {
 67 |         if self.index < self.tokens.len() {
 68 |             self.index = self.index + 1;
 69 |             true
 70 |         } else {
 71 |             false
 72 |         }
 73 |     }
 74 |     fn token(&self) -> &Token {
 75 |         &self.tokens[self.index - 1]
 76 |     }
 77 |     fn token_mut(&mut self) -> &mut Token {
 78 |         &mut self.tokens[self.index - 1]
 79 |     }
 80 | }
 81 | 
 82 | impl Tokenizer for FireSeqTokenizer {
 83 |     type TokenStream<'a> = JiebaTokenStream;
 84 |     fn token_stream<'a>(&mut self, text: &'a str) -> JiebaTokenStream {
 85 |         let mut indices = text.char_indices().collect::<Vec<_>>();
 86 |         indices.push((text.len(), '\0'));
 87 |         let orig_tokens = JIEBA.tokenize(text, jieba_rs::TokenizeMode::Search, true);
 88 |         let mut tokens = Vec::new();
 89 |         // copy tantivy-jieba code for now
 90 |         for token in orig_tokens {
 91 |             tokens.push(Token {
 92 |                 offset_from: indices[token.start].0,
 93 |                 offset_to: indices[token.end].0,
 94 |                 position: token.start,
 95 |                 text: String::from(&text[(indices[token.start].0)..(indices[token.end].0)]),
 96 |                 position_length: token.end - token.start,
 97 |             });
 98 |         }
 99 |         /*
100 |         for i in 0..orig_tokens.len() {
101 |             let token = &orig_tokens[i];
102 |             match process_token_text(text, &indices, &token) {
103 |                 Some(text) => tokens.push(Token {
104 |                     offset_from: indices[token.start].0,
105 |                     offset_to: indices[token.end].0,
106 |                     position: token.start,
107 |                     text,
108 |                     position_length: token.end - token.start,
109 |                 }),
110 |                 None => ()
111 |             }
112 | 
113 |         }
114 |         */
115 |         JiebaTokenStream { tokens, index: 0 }
116 |     }
117 | }
118 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/lib.rs:
--------------------------------------------------------------------------------
  1 | pub mod post_query;
  2 | pub mod load_notes;
  3 | pub mod markdown_parser;
  4 | pub mod language_tools;
  5 | pub mod http_client;
  6 | pub mod query_engine;
  7 | pub mod word_frequency;
  8 | pub mod local_llm;
  9 | 
 10 | 
 11 | use log::debug;
 12 | use crate::query_engine::ServerInformation;
 13 | use crate::query_engine::NotebookSoftware::Logseq;
 14 | 
 15 | 
 16 | #[macro_use]
 17 | extern crate lazy_static;
 18 | 
 19 | pub static JOURNAL_PREFIX: &str = "@journal@";
 20 | 
 21 | 
 22 | pub struct Article {
 23 |     #[allow(dead_code)] /* TODO rethink if we need it 2024 Sep 21 */
 24 |     file_name: String,
 25 |     content: String
 26 | }
 27 | 
 28 | // Based on https://github.com/jiegec/tantivy-jieba
 29 | // tantivy-jieba is licensed under MIT, Copyright 2019-2020 Jiajie Chen
 30 | // I had heavy modifications on it
 31 | /*
 32 | lazy_static! {
 33 |     static ref JIEBA: jieba_rs::Jieba = jieba_rs::Jieba::new();
 34 | }
 35 | */
 36 | 
 37 | //pub const TOKENIZER_ID: &str = "fss_tokenizer";
 38 | 
 39 | 
 40 | /*
 41 | impl Tokenizer for JiebaTokenizer {
 42 |     type TokenStream<'a> = JiebaTokenStream;
 43 |     fn token_stream<'a>(&mut self, text: &'a str) -> JiebaTokenStream {
 44 |         let mut indices = text.char_indices().collect::<Vec<_>>();
 45 |         indices.push((text.len(), '\0'));
 46 |         let jieba : jieba_rs::Jieba = jieba_rs::Jieba::new(); //TODO use a static one
 47 |         let orig_tokens = jieba.tokenize(text, jieba_rs::TokenizeMode::Search, true);
 48 |         let mut tokens = Vec::new();
 49 |         for i in 0..orig_tokens.len() {
 50 |             let token = &orig_tokens[i];
 51 |             match process_token_text(text, &indices, &token) {
 52 |                 Some(text) => tokens.push(Token {
 53 |                     offset_from: indices[token.start].0,
 54 |                     offset_to: indices[token.end].0,
 55 |                     position: token.start,
 56 |                     text,
 57 |                     position_length: token.end - token.start,
 58 |                 }),
 59 |                 None => ()
 60 |             }
 61 | 
 62 |         }
 63 |         JiebaTokenStream { tokens, index: 0 }
 64 | 
 65 |     }
 66 | }
 67 | */
 68 | 
 69 | /*
 70 | Thoughts on lowercase  2022-07-04:
 71 | tanvity's default tokenizer will lowercase all English characters.
 72 |     https://docs.rs/tantivy/latest/tantivy/tokenizer/index.html
 73 |     I'm just trying my best to simulate it
 74 | However, I think there could be a better approach
 75 | 1. use https://github.com/pemistahl/lingua-rs to determine the language of the text
 76 | 2. Select proper tokenizer
 77 | fn process_token_text(text: &str, indices: &Vec<(usize, char)>, token: &jieba_rs::Token<'_>) -> Option<String> {
 78 |     let raw = String::from(&text[(indices[token.start].0)..(indices[token.end].0)]);
 79 |     let lower = raw.to_lowercase();
 80 |     if lower.trim().is_empty() {
 81 |         None
 82 |     } else {
 83 |         Some(lower)
 84 |     }
 85 | }
 86 |  */
 87 | 
 88 | // TODO use stub now
 89 | pub fn tokenize_default(sentence: &str) -> Vec<String> {
 90 |     let mut r = Vec::new();
 91 |     r.push(sentence.to_owned());
 92 |     r
 93 | }
 94 | /*
 95 | // TODO: Move tokenizer-related things into language_tools
 96 | pub fn tokenize_default(sentence: &str) -> Vec<String> {
 97 |     /*
 98 |     lazy_static! {
 99 |         static ref TK: JiebaTokenizer = crate::JiebaTokenizer {};
100 |     }
101 |     */
102 |     // TODO use static tokenizer
103 |     let mut tokenizer = crate::JiebaTokenizer{};
104 |     if language_tools::is_chinese(sentence) {
105 |         info!("Use Tokenizer for Chinese term {}", sentence);
106 |         tokenize_sentence_to_text_vec(&mut tokenizer, sentence)
107 |     } else {
108 |         // info!("Space Tokenizer {}", sentence);
109 |         let result : Vec<&str> = sentence.split_whitespace()
110 |             .collect();
111 |         // debug!("Got tokens {:?}", &result);
112 |         let result:Vec<String> = result.iter().map(|&s|s.into()).collect();
113 |         result
114 |         // vec![String::from(sentence)]
115 |     }
116 | }
117 | */
118 | 
119 | 
120 | use crate::language_tools::tokenizer::FireSeqTokenizer;
121 | pub fn tokenize_sentence_to_text_vec(tokenizer: &mut FireSeqTokenizer, sentence: &str) -> Vec<String> {
122 |     let tokens = tokenize_sentence_to_vector(tokenizer, sentence);
123 |     tokens_to_text_vec(&tokens)
124 | }
125 | pub fn tokenize_sentence_to_vector(tokenizer: &mut FireSeqTokenizer, sentence: &str)  ->  Vec<tantivy::tokenizer::Token> {
126 |     use tantivy::tokenizer::*;
127 |     let mut token_stream = tokenizer.token_stream(
128 |         sentence
129 |     );
130 |     let mut tokens = Vec::new();
131 | 
132 |     while let Some(token) = token_stream.next() {
133 |         tokens.push(token.clone());
134 | 
135 |     }
136 |     tokens
137 | }
138 | pub fn tokens_to_text_vec(tokens: &Vec<tantivy::tokenizer::Token>) -> Vec<String> {
139 |     let mut token_text = Vec::new();
140 |     for token in tokens {
141 |         token_text.push(token.text.clone());
142 |     }
143 |     token_text
144 | }
145 | 
146 | 
147 | 
148 | pub fn decode_cjk_str(original: String) -> Vec<String> {
149 |     use urlencoding::decode;
150 | 
151 |     let mut result = Vec::new();
152 |     for s in original.split(' ') {
153 |         let t = decode(s).expect("UTF-8");
154 |         debug!("Decode {}  ->   {}", s, t);
155 |         result.push(String::from(t));
156 |     }
157 | 
158 |     result
159 | }
160 | 
161 | 
162 | 
163 | // ============= BELOW IS TEST CASES ====================
164 | pub fn generate_server_info_for_test() -> ServerInformation {
165 |     let server_info = ServerInformation {
166 |         notebook_path: "stub_path".to_string(),
167 |         notebook_name: "logseq_notebook".to_string(),
168 |         enable_journal_query: false,
169 |         show_top_hits: 0,
170 |         show_summary_single_line_chars_limit: 0,
171 |         parse_pdf_links: false,
172 |         exclude_zotero_items: false,
173 |         software: Logseq,
174 |         convert_underline_hierarchy: true,
175 |         host: "127.0.0.1:22024".to_string(),
176 |         llm_enabled: false,
177 |         llm_max_waiting_time: 60,
178 |     };
179 |     server_info
180 | }
181 | 
182 | /*
183 | #[cfg(test)]
184 | mod test_tokenizer {
185 |     #[test]
186 |     fn english() {
187 |         let _tokens = base("Travel to japan", vec!["travel", "to", "japan"]);
188 |     }
189 | 
190 |     #[test]
191 |     fn simple_zh() {
192 |         let tokens = base("张华考上了北京大学；李萍进了中等技术学校；我在百货公司当售货员：我们都有光明的前途",
193 |              vec![
194 |                  // "a",
195 |                  "张华",
196 |                  "考上",
197 |                  "了",
198 |                  "北京",
199 |                  "大学",
200 |                  "北京大学",
201 |                  "；",
202 |                  "李萍",
203 |                  "进",
204 |                  "了",
205 |                  "中等",
206 |                  "技术",
207 |                  "术学",
208 |                  "学校",
209 |                  "技术学校",
210 |                  "；",
211 |                  "我",
212 |                  "在",
213 |                  "百货",
214 |                  "公司",
215 |                  "百货公司",
216 |                  "当",
217 |                  "售货",
218 |                  "货员",
219 |                  "售货员",
220 |                  "：",
221 |                  "我们",
222 |                  "都",
223 |                  "有",
224 |                  "光明",
225 |                  "的",
226 |                  "前途"
227 |              ]
228 |         );
229 |         // offset should be byte-indexed
230 |         assert_eq!(tokens[0].offset_from, 0);
231 |         assert_eq!(tokens[0].offset_to, "张华".bytes().len());
232 |         assert_eq!(tokens[1].offset_from, "张华".bytes().len());
233 |     }
234 |     fn base(sentence: &str, expect_tokens: Vec<&str>) ->  Vec<tantivy::tokenizer::Token> {
235 | 
236 |         use crate::{tokenize_sentence_to_vector,tokens_to_text_vec};
237 |         let tokenizer = crate::JiebaTokenizer {};
238 |         let tokens = tokenize_sentence_to_vector(&tokenizer, sentence);
239 |         let token_text = tokens_to_text_vec(&tokens);
240 |         // check tokenized text
241 |         assert_eq!(
242 |             token_text,
243 |             expect_tokens
244 |         );
245 |         tokens
246 |     }
247 | 
248 | 
249 | }
250 | */
251 | 
252 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/load_notes/mod.rs:
--------------------------------------------------------------------------------
  1 | use log::{debug, error, info};
  2 | use std::process;
  3 | 
  4 | use crate::query_engine::ServerInformation;
  5 | 
  6 | 
  7 | use std::borrow::Cow;
  8 | use std::borrow::Borrow;
  9 | 
 10 | #[derive(Debug, Clone)]
 11 | pub struct NoteListItem {
 12 |     pub realpath: String,
 13 |     pub title:    String,
 14 | }
 15 | 
 16 | use crate::query_engine::NotebookSoftware;
 17 | pub fn retrive_note_list(server_info: &ServerInformation) -> Vec<NoteListItem> {
 18 |     let path: &str = &server_info.notebook_path;
 19 | 
 20 |     let note_list = match &server_info.software {
 21 |         NotebookSoftware::Obsidian => list_directory( Cow::from(path) , true),
 22 |         NotebookSoftware::Logseq => {
 23 |             let pp = path.to_string() + "/pages";
 24 |             let mut pages = list_directory( Cow::from(pp), false );
 25 | 
 26 |             // TODO Journal prefix
 27 |             let pp = path.to_string() + "/journals";
 28 |             let jours = list_directory( Cow::from(pp), false );
 29 | 
 30 |             pages.extend(jours);
 31 |             pages
 32 |         },
 33 |     };
 34 |     // TODO didn't handle logseq
 35 |     note_list
 36 | }
 37 | 
 38 | fn list_directory(path: Cow<'_, str>, recursive: bool) -> Vec<NoteListItem> {
 39 |     debug!("Listing directory {}", &path);
 40 |     let mut result = Vec::new();
 41 | 
 42 |     let path_ref: &str = path.borrow();
 43 |     let notebooks = match std::fs::read_dir(path_ref) {
 44 |         Ok(x) => x,
 45 |         Err(e) => {
 46 |             error!("Fatal error ({:?}) when reading {}", e, &path);
 47 |             process::abort();
 48 |         }
 49 |     };
 50 | 
 51 |     for note_result in notebooks {
 52 |         let entry = match note_result {
 53 |             Ok(x) => x,
 54 |             Err(e) => {
 55 |                 error!("Error during looping {:?}", &e);
 56 |                 continue;
 57 |             }
 58 |         };
 59 |         let file_type = match entry.file_type() {
 60 |             Ok(x) => x,
 61 |             Err(e) => {
 62 |                 error!("Error: Can't get file type {:?}  {:?}", &entry, &e);
 63 |                 continue;
 64 |             }
 65 |         };
 66 | 
 67 |         let entry_path = entry.path();
 68 |         let entry_path_str = entry_path.to_string_lossy();
 69 | 
 70 |         if file_type.is_dir() {
 71 |             if recursive {
 72 |                 let next = list_directory(entry_path_str, true);
 73 |                 result.extend(next);
 74 |             }
 75 |             continue;
 76 |         }
 77 | 
 78 |         if !entry_path_str.ends_with(".md") {
 79 |             info!("skip non-md file {:?}", &entry);
 80 |             continue;
 81 |         }
 82 | 
 83 |         let note_title = match entry_path.file_stem() {
 84 |             Some(osstr) => osstr.to_str().unwrap(),
 85 |             None => {
 86 |                 error!("Couldn't get file_stem for {:?}", entry_path);
 87 |                 continue;
 88 |             }
 89 |         };
 90 |         let row = NoteListItem {
 91 |             realpath: entry_path_str.to_string(),
 92 |             title: note_title.to_string(),
 93 |         };
 94 |         result.push(row);
 95 |     }
 96 |     return result;
 97 | }
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/local_llm/example_llama_response.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "choices": [
 3 |     {
 4 |       "finish_reason": "stop",
 5 |       "index": 0,
 6 |       "message": {
 7 |         "content": " It seems like there might be some confusion in your question. \"MS file format\" typically refers to the Microsoft Office document file formats, such as .docx, .xlsx, and .pptx.\n\nHowever, if you meant to ask about the WIF file format, then here's some information for you:\n\nWIF (Windows Image File) is not a widely used file format. It is a proprietary file format used by Microsoft's Windows Imaging Component (WIC) for storing and manipulating image data. WIF files can contain multiple images, each with its own metadata, and can be used for tasks such as image processing, thumbnail generation, and icon extraction.\n\nWIF files are not meant to be opened or edited by users directly, but rather are used as input and output files for applications that use the WIC API. If you need to work with WIF files, you would typically use a programming language and the WIC API to read and write the files.\n\nI hope this information helps clarify any confusion around the MS file format and the WIF file format. Let me know if you have any other questions!",
 8 |         "role": "assistant"
 9 |       }
10 |     }
11 |   ],
12 |   "created": 1724517653,
13 |   "id": "chatcmpl-4B",
14 |   "model": "model",
15 |   "object": "chat.completion",
16 |   "usage": {
17 |     "completion_tokens": 247,
18 |     "prompt_tokens": 14,
19 |     "total_tokens": 261
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/local_llm/mod.rs:
--------------------------------------------------------------------------------
  1 | use log::{info, error};
  2 | use crate::query_engine::DocData;
  3 | 
  4 | use std::collections::HashMap;
  5 | use std::collections::VecDeque;
  6 | use std::process::{Command, Stdio};
  7 | use std::fs::File;
  8 | 
  9 | use std::sync::Arc;
 10 | use tokio::sync::Mutex;
 11 | use tokio::task::yield_now;
 12 | use tokio::task;
 13 | use tokio;
 14 | 
 15 | use std::borrow::Cow;
 16 | use std::borrow::Cow::Borrowed;
 17 | 
 18 | 
 19 | //#[cfg(feature = "llm")]
 20 | use {
 21 |     reqwest,
 22 |     reqwest::StatusCode,
 23 |     shellexpand::tilde,
 24 | 
 25 |     serde_derive::Deserialize,
 26 |     serde_derive::Serialize,
 27 | };
 28 | 
 29 | 
 30 | 
 31 | // TODO Allow user to set prompt, instead of hard-coded in code
 32 | const HARD_CODED_PROMPT_STR: &'static str = r##"
 33 | You are a seasoned summary expert, capable of condensing and summarizing given articles, papers, or posts, accurately conveying the main idea to make the content easier to understand.
 34 | 
 35 | You place great emphasis on user experience, never adding irrelevant content like "Summary," "The summary is as follows," "Original text," "You can check the original text if interested," or "Original link." Your summaries always convey the core information directly.
 36 | 
 37 | You are adept at handling various large, small, and even chaotic text content, always accurately extracting key information and summarizing the core content globally to make it easier to understand.
 38 | 
 39 | === Below is the article ===
 40 | 
 41 | "##;
 42 | 
 43 | // Generated by https://transform.tools/json-to-rust-serde
 44 | #[derive(Debug, Serialize, Deserialize)]
 45 | pub struct OpenAiData {
 46 |     pub model: String,
 47 |     pub messages: Vec<Message>,
 48 | }
 49 | 
 50 | #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
 51 | pub struct LlamaResponse {
 52 |     pub choices: Vec<Choice>,
 53 |     pub created: i64,
 54 |     pub id: String,
 55 |     pub model: String,
 56 |     pub object: String,
 57 |     pub usage: Usage,
 58 | }
 59 | 
 60 | #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
 61 | pub struct Choice {
 62 |     pub finish_reason: String,
 63 |     pub index: i64,
 64 |     pub message: Message,
 65 | }
 66 | 
 67 | #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
 68 | pub struct Message {
 69 |     pub content: String,
 70 |     pub role: String,
 71 | }
 72 | 
 73 | #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
 74 | pub struct Usage {
 75 |     pub completion_tokens: i64,
 76 |     pub prompt_tokens: i64,
 77 |     pub total_tokens: i64,
 78 | }
 79 | 
 80 | #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
 81 | pub struct HealthCheck {
 82 |     pub slots_idle: i64,
 83 |     pub slots_processing: i64,
 84 |     pub status: String,
 85 | }
 86 | 
 87 | // End genereated
 88 | 
 89 | const LLM_SERVER_PORT: &str = "8081"; // TODO Remove this magic number
 90 | 
 91 | 
 92 | #[derive(Debug)]
 93 | pub struct LlmJob {
 94 |     pub title: String,
 95 |     pub body : String,
 96 |     pub time : std::time::Instant, /* 16 bytes */
 97 | }
 98 | 
 99 | struct JobProcessor {
100 |     done_job: HashMap<String, String>,
101 |     job_queue: VecDeque<LlmJob>,
102 | }
103 | 
104 | impl JobProcessor {
105 |     pub fn new() -> Self {
106 |         JobProcessor {
107 |             done_job: HashMap::new(),
108 |             job_queue: VecDeque::new(),
109 |         }
110 |     }
111 |     pub fn add(&mut self, doc:DocData) {
112 |         let title: &str = &doc.title;
113 |         info!("Job posted for {}", &title);
114 |         if !self.done_job.contains_key(title) {
115 |             let job: LlmJob = LlmJob {
116 |                 title: doc.title,
117 |                 body:  doc.body,
118 |                 time:  std::time::Instant::now(),
119 |             };
120 |             self.job_queue.push_back(job);
121 |         }
122 |     }
123 | }
124 | 
125 | use crate::ServerInformation;
126 | 
127 | 
128 | use sysinfo::Pid;
129 | 
130 | pub struct LlmEngine {
131 |     endpoint: String,
132 |     client: reqwest::Client,
133 |     job_cache: Arc<Mutex<JobProcessor>>,
134 |     server_info: Arc<ServerInformation>,
135 |     engine_pid: Pid,
136 | }
137 | 
138 | 
139 | 
140 | impl LlmEngine {
141 |     pub fn pid_hit_list(&self)->Pid { // TODO not a list yet
142 |         return self.engine_pid;
143 |     }
144 |     pub async fn llm_init(server_info: Arc<ServerInformation>) -> Self {
145 |         info!("llm called");
146 | 
147 |         let lfile = locate_llamafile().await;
148 |         let lfile:String = lfile.unwrap();
149 | 
150 |         let cmd = Command::new("nice")
151 |             .args([ "-n", "19",
152 |                 &lfile, "--nobrowser",
153 |                 "--port", LLM_SERVER_PORT,
154 |             ])
155 |             .stdout(Stdio::from(File::create("/tmp/llamafile.stdout.txt").unwrap()))
156 |             .stderr(Stdio::from(File::create("/tmp/llamafile.stderr.txt").unwrap()))
157 |             .spawn()
158 |             .expect("llm model failed to launch");
159 | 
160 |         yield_now().await;
161 |         let wait_llm = tokio::time::Duration::from_millis(500);
162 |         tokio::time::sleep(wait_llm).await;
163 |         task::yield_now().await;
164 | 
165 |         let endpoint = format!("http://127.0.0.1:{}", LLM_SERVER_PORT).to_string();
166 | 
167 |         loop {
168 |             let resp = reqwest::get(endpoint.to_owned() + "/health").await;
169 |             let resp = match resp {
170 |                 Err(_e) => {
171 |                     info!("llm not ready");
172 |                     let wait_llm = tokio::time::Duration::from_millis(1000);
173 |                     tokio::time::sleep(wait_llm).await;
174 |                     task::yield_now().await;
175 |                     continue;
176 |                 },
177 |                 Ok(r) => r,
178 |             };
179 |             if resp.status() != StatusCode::from_u16(200).unwrap() {
180 |                 info!("endpoint failed");
181 |                 //TODO error handling
182 |             }
183 |             break;
184 |         }
185 | 
186 |         let client = reqwest::Client::new();
187 | 
188 |         info!("llm engine initialized");
189 |         let map = Arc::new(Mutex::new(
190 |                 JobProcessor::new()));
191 |         Self {
192 |             endpoint,
193 |             client,
194 |             job_cache: map,
195 |             server_info,
196 |             engine_pid: Pid::from_u32(cmd.id()),
197 |         }
198 |     }
199 | 
200 |     fn build_data(full_text: Cow<'_, str>) -> OpenAiData {
201 | 
202 |         fn build_message(chat:String) -> Message {
203 |             Message{
204 |                 role: "user".to_owned(),
205 |                 content: chat,
206 |             }
207 |         }
208 |         let mut msgs = Vec::new();
209 | 
210 |         let prompt_string = &HARD_CODED_PROMPT_STR;
211 |         let mut chat_text = prompt_string.to_string();
212 |         chat_text += &full_text;
213 |         msgs.push( build_message(chat_text) );
214 | 
215 |         OpenAiData {
216 |             model: "model".to_owned(),
217 |             messages: msgs,
218 |         }
219 |     }
220 | }
221 | 
222 | impl LlmEngine{
223 |     pub async fn summarize(&self, full_text: &str) -> String {
224 |         //http://localhost:8080/completion
225 |         let ep = self.endpoint.to_owned() + "/v1/chat/completions";
226 |         let data = Self::build_data( Borrowed(full_text) );
227 |         let res = self.client.post(&ep)
228 |             .header("Content-Type", "application/json")
229 |             .json(&data)
230 |             .send()
231 |             .await
232 |             .unwrap();
233 |         let content = res.text().await.unwrap();
234 |         let parsed: LlamaResponse = serde_json::from_str(&content).unwrap();
235 |         let v = parsed.choices;
236 |         let v0 = v.into_iter().next().unwrap();
237 |         v0.message.content
238 |         //TODO remove unwrap
239 |     }
240 | 
241 |     pub async fn post_summarize_job(&self, doc: DocData) {
242 |         //TODO error handler?
243 |         let mut jcache = self.job_cache.lock().await;//.unwrap();
244 |         jcache.add(doc);
245 |         drop(jcache);
246 |     }
247 | 
248 |     pub async fn call_llm_engine(&self) {
249 |         let health = self.health().await.unwrap();
250 |         if health.slots_idle == 0 {
251 |             info!("No valid slot, continue");
252 |             return;
253 |         }
254 | 
255 |         let next_job: Option<LlmJob>;
256 | 
257 |         let mut jcache = self.job_cache.lock().await;//.unwrap();
258 |         next_job = jcache.job_queue.pop_front();
259 |         drop(jcache);
260 | 
261 |         let doc = match next_job {
262 |             Some(x) => x,
263 |             None => { return; },
264 |         };
265 | 
266 |         let title = doc.title.to_owned();
267 | 
268 |         let jcache = self.job_cache.lock().await;
269 |         if jcache.done_job.contains_key(&title) {
270 |             return;
271 |         }
272 |         drop(jcache);
273 | 
274 |         let waiting_time = doc.time.elapsed().as_secs();
275 |         let allowed_wait = self.server_info.llm_max_waiting_time;
276 |         if waiting_time > allowed_wait {
277 |             info!("Waiting for {} for {} seconds, discard",
278 |                 &title, waiting_time);
279 |             return;
280 |         }
281 | 
282 | 
283 |         info!("Start summarize job:  {}", &title);
284 |         let summarize_result = self.summarize(&doc.body).await;
285 |         info!("Finished summarize job:  {}", &title);
286 | 
287 |         let mut jcache = self.job_cache.lock().await;
288 |         jcache.done_job.insert(title, summarize_result);
289 |         drop(jcache);
290 |     }
291 | 
292 |     pub async fn quick_fetch(&self, title: &str) -> Option<String> {
293 |         let jcache = self.job_cache.lock().await;
294 |         return jcache.done_job.get(title).cloned();
295 |     }
296 | 
297 |     pub async fn get_llm_done_list(&self) -> Vec<String> {
298 |         let mut r = Vec::new();
299 |         let jcache = self.job_cache.lock().await;
300 |         for (title, _text) in &jcache.done_job {
301 |             r.push(title.to_owned());
302 |         }
303 |         return r;
304 |     }
305 | 
306 |     pub async fn health(&self) -> Result<HealthCheck, Box<dyn std::error::Error>>  {
307 |         let res = self.client.get(self.endpoint.to_owned() + "/health")
308 |             .send()
309 |             .await
310 |             .unwrap();
311 |         let content = res.text().await.unwrap();
312 |         let parsed: HealthCheck = serde_json::from_str(&content).unwrap();
313 |         Ok(parsed)
314 |     }
315 | }
316 | 
317 | #[derive(Debug)]
318 | struct LlamaFileDef {
319 |     pub filename: String,
320 |     pub filepath: Option<String>,
321 |     pub sha256: String,
322 |     #[allow(dead_code)] /* TODO rethink if we want auto download 2024 Sep 21 */
323 |     pub download_link: String,
324 | }
325 | 
326 | 
327 | async fn locate_llamafile() -> Option<String> {
328 |     let mut lf = LlamaFileDef {
329 |         filename: "mistral-7b-instruct-v0.2.Q4_0.llamafile".to_owned(),
330 |         filepath: None,
331 |         sha256: "1903778f7defd921347b25327ebe5dd902f29417ba524144a8e4f7c32d83dee8".to_owned(),
332 |         download_link: "mistral-7b-instruct-v0.2.Q4_0.llamafile".to_owned(),
333 |     };
334 | 
335 |     let lf_base = tilde("~/.llamafile/");
336 |     let lf_path = lf_base.to_string() + &lf.filename;
337 |     lf.filepath = Some(  lf_path.to_owned() );
338 |     info!("lf {:?}", &lf);
339 | 
340 |     let _ppath = std::path::Path::new(&lf_path);
341 |     //let val = sha256::try_digest(ppath).unwrap();
342 |     let val = "1903778f7defd921347b25327ebe5dd902f29417ba524144a8e4f7c32d83dee8";
343 |     if val != lf.sha256 {
344 |         error!("Wrong sha256sum for the model. Quit");
345 |         return None;
346 |     }
347 | 
348 |     return lf.filepath;
349 | 
350 | }
351 | 
352 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/main.rs:
--------------------------------------------------------------------------------
  1 | use log::info;
  2 | use fire_seq_search_server::query_engine::{QueryEngine, ServerInformation};
  3 | use fire_seq_search_server::local_llm::LlmEngine;
  4 | 
  5 | use fire_seq_search_server::query_engine::NotebookSoftware::*;
  6 | 
  7 | use clap::Parser;
  8 | 
  9 | #[derive(Parser)]
 10 | #[command(author, version)]
 11 | #[command(about = "Server for fireSeqSearch: hosting logseq notebooks at 127.0.0.1",
 12 |     long_about = None)]
 13 | struct Cli{
 14 |     #[arg(long="notebook_path")]
 15 |     notebook_path: String,
 16 |     #[arg(long="notebook_name")]
 17 |     notebook_name: Option<String>,
 18 | 
 19 |     #[arg(long, default_value_t = false)]
 20 |     parse_pdf_links: bool,
 21 | 
 22 |     #[arg(long, default_value_t = false)]
 23 |     obsidian_md: bool,
 24 | 
 25 |     #[arg(long,default_value_t = false)]
 26 |     enable_journal_query: bool,
 27 | 
 28 |     #[arg(long,default_value_t = false)]
 29 |     exclude_zotero_items: bool,
 30 | 
 31 |     #[arg(long,default_value_t = 10, value_name="HITS")]
 32 |     show_top_hits: usize,
 33 | 
 34 | /*
 35 |         This is really an arbitrary limit.
 36 |         https://stackoverflow.com/a/33758289/1166518
 37 |         It doesn't mean the width limit of output,
 38 |             but a threshold between short paragraph and long paragraph
 39 |  */
 40 |     #[arg(long,default_value_t = 120*2, value_name="LEN")]
 41 |     show_summary_single_line_chars_limit: usize,
 42 | 
 43 |     #[arg(long="host")]
 44 |     host: Option<String>,
 45 | }
 46 | 
 47 | use tokio::task;
 48 | 
 49 | use axum;
 50 | use axum::routing::get;
 51 | use fire_seq_search_server::http_client::endpoints;
 52 | use std::sync::Arc;
 53 | use ctrlc;
 54 | use kill_tree::{blocking::kill_tree};
 55 | 
 56 | #[tokio::main]
 57 | async fn main() {
 58 |     env_logger::builder()
 59 |         .format_timestamp(None)
 60 |         .format_target(false)
 61 |         .init();
 62 | 
 63 |     info!("main thread running");
 64 |     let matches = Cli::parse();
 65 |     let server_info: ServerInformation = build_server_info(matches);
 66 | 
 67 |     let mut llm_loader = None;
 68 |     if cfg!(feature="llm") {
 69 |         info!("LLM Enabled");
 70 |         let serv_info = Arc::new(server_info.clone());
 71 |         llm_loader = Some(task::spawn( async { LlmEngine::llm_init( serv_info ).await }));
 72 |     }
 73 | 
 74 |     let mut engine = QueryEngine::construct(server_info).await;
 75 | 
 76 |     info!("query engine build finished");
 77 |     if cfg!(feature="llm") {
 78 |         let llm:LlmEngine = llm_loader.unwrap().await.unwrap();
 79 |         let llm_arc = Arc::new(llm);
 80 |         let llm_poll = llm_arc.clone();
 81 |         engine.llm = Some(llm_arc);
 82 | 
 83 |         let _poll_handle = tokio::spawn( async move {
 84 |             loop {
 85 |                 llm_poll.call_llm_engine().await;
 86 |                 let wait_llm = tokio::time::Duration::from_millis(500);
 87 |                 tokio::time::sleep(wait_llm).await;
 88 |             }
 89 |         });
 90 |     }
 91 | 
 92 | 
 93 |     let engine_arc = std::sync::Arc::new(engine);
 94 | 
 95 |     let engine_arc_for_destructor = engine_arc.clone();
 96 |     ctrlc::set_handler(move|| {
 97 |         info!("Ctrl - C received. Exiting...");
 98 |         if cfg!(feature="llm") {
 99 |             let llm = engine_arc_for_destructor.llm.as_ref().unwrap();
100 |             let pid = llm.pid_hit_list();
101 |             info!("Kill LLM Engine by pid {}", &pid);
102 |             kill_tree(pid.as_u32()).unwrap();
103 |         }
104 |         std::process::exit(0);
105 |     }).expect("Error setting Ctrl-C handler");
106 | 
107 |     let app = axum::Router::new()
108 |         .route("/query/:term", get(endpoints::query))
109 |         .route("/server_info", get(endpoints::get_server_info))
110 |         .route("/wordcloud", get(endpoints::generate_word_cloud))
111 |         .route("/summarize/:title", get(endpoints::summarize))
112 |         .route("/llm_done_list", get(endpoints::get_llm_done_list))
113 |         .with_state(engine_arc.clone());
114 | 
115 |     let listener = tokio::net::TcpListener::bind(&engine_arc.server_info.host)
116 |         .await.unwrap();
117 |     axum::serve(listener, app).await.unwrap();
118 | }
119 | 
120 | 
121 | 
122 | fn build_server_info(args: Cli) -> ServerInformation {
123 |     let notebook_name = match args.notebook_name {
124 |         Some(x) => x.to_string(),
125 |         None => {
126 |             let chunks: Vec<&str> = args.notebook_path.split('/').collect();
127 |             let guess: &str = *chunks.last().unwrap();
128 |             info!("fire_seq_search guess the notebook name is {}", guess);
129 |             String::from(guess)
130 |         }
131 |     };
132 |     let host: String = args.host.clone().unwrap_or_else(|| "127.0.0.1:3030".to_string());
133 |     let mut software = Logseq;
134 |     if args.obsidian_md {
135 |         software = Obsidian;
136 |     }
137 |     ServerInformation{
138 |         notebook_path: args.notebook_path,
139 |         notebook_name,
140 |         enable_journal_query: args.enable_journal_query,
141 |         show_top_hits: args.show_top_hits,
142 |         show_summary_single_line_chars_limit:
143 |             args.show_summary_single_line_chars_limit,
144 |         parse_pdf_links: args.parse_pdf_links,
145 |         exclude_zotero_items:args.exclude_zotero_items,
146 |         software,
147 |         convert_underline_hierarchy: true,
148 |         host,
149 |         llm_enabled: cfg!(feature="llm"),
150 |         llm_max_waiting_time: 180,
151 |     }
152 | }
153 | 
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/markdown_parser/markdown_to_text.rs:
--------------------------------------------------------------------------------
  1 | // This file is based on https://github.com/fbecart/markdown_to_text
  2 | //
  3 | // MIT License
  4 | //
  5 | // Copyright (c) 2019 Arran France
  6 | //
  7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | // of this software and associated documentation files (the "Software"), to deal
  9 | // in the Software without restriction, including without limitation the rights
 10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | // copies of the Software, and to permit persons to whom the Software is
 12 | // furnished to do so, subject to the following conditions:
 13 | //
 14 | // The above copyright notice and this permission notice shall be included in all
 15 | // copies or substantial portions of the Software.
 16 | //
 17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 | // SOFTWARE.
 24 | 
 25 | 
 26 | #![warn(clippy::all, clippy::pedantic)]
 27 | 
 28 | 
 29 | use log::{debug, warn};
 30 | use pulldown_cmark::{Event, Options, Parser, Tag};
 31 | use crate::markdown_parser::pdf_parser::try_parse_pdf;
 32 | use crate::query_engine::ServerInformation;
 33 | 
 34 | pub fn convert_from_logseq(markdown:&str, document_title: &str, server_info: &ServerInformation) -> String {
 35 |     let mut options = Options::empty();
 36 |     options.insert(Options::ENABLE_STRIKETHROUGH);
 37 | 
 38 |     let parser = Parser::new_ext(&markdown, options);
 39 |     let mut tags_stack = Vec::new();
 40 |     let mut buffer = String::default();
 41 | 
 42 |     // For each event we push into the buffer to produce the plain text version.
 43 |     for event in parser {
 44 |         // println!("{:?}", &event);
 45 |         match event {
 46 |             // The start and end events don't contain the text inside the tag. That's handled by the `Event::Text` arm.
 47 |             // However, pdf is considered as Image, and will be specially handled when parsing end tag
 48 |             Event::Start(tag) => {
 49 |                 start_tag(&tag, &mut buffer, &mut tags_stack);
 50 |                 tags_stack.push(tag);
 51 |             }
 52 |             Event::End(tag) => {
 53 |                 tags_stack.pop();
 54 |                 end_tag(&tag, &mut buffer, &tags_stack);
 55 |                 if server_info.parse_pdf_links {
 56 |                     let pdf_str = try_parse_pdf(&tag, server_info);
 57 |                     match pdf_str {
 58 |                         Some(s) => {
 59 |                             debug!("PDF document {:?} appended to {}", &tag, document_title);
 60 |                             buffer.push_str(&s)
 61 |                         },
 62 |                         None => ()
 63 |                     }
 64 |                 }
 65 |             }
 66 |             Event::Text(content) => {
 67 |                 if !tags_stack.iter().any(is_strikethrough) {
 68 |                     buffer.push_str(&content)
 69 |                 }
 70 |             }
 71 |             Event::Code(content) => buffer.push_str(&content),
 72 |             Event::SoftBreak => buffer.push(' '),
 73 |             _ => (),
 74 |         }
 75 |     }
 76 |     buffer.trim().to_string()
 77 | }
 78 | 
 79 | 
 80 | 
 81 | #[must_use]
 82 | pub fn convert(markdown: &str) -> String {
 83 |     // GFM tables and tasks lists are not enabled.
 84 |     let mut options = Options::empty();
 85 |     options.insert(Options::ENABLE_STRIKETHROUGH);
 86 | 
 87 |     let parser = Parser::new_ext(&markdown, options);
 88 |     let mut tags_stack = Vec::new();
 89 |     let mut buffer = String::default();
 90 | 
 91 |     // For each event we push into the buffer to produce the plain text version.
 92 |     for event in parser {
 93 |         match event {
 94 |             // The start and end events don't contain the text inside the tag. That's handled by the `Event::Text` arm.
 95 |             Event::Start(tag) => {
 96 |                 start_tag(&tag, &mut buffer, &mut tags_stack);
 97 |                 tags_stack.push(tag);
 98 |             }
 99 |             Event::End(tag) => {
100 |                 tags_stack.pop();
101 |                 end_tag(&tag, &mut buffer, &tags_stack);
102 |             }
103 |             Event::Text(content) => {
104 |                 if !tags_stack.iter().any(is_strikethrough) {
105 |                     buffer.push_str(&content)
106 |                 }
107 |             }
108 |             Event::Code(content) => buffer.push_str(&content),
109 |             Event::SoftBreak => buffer.push(' '),
110 |             _ => (),
111 |         }
112 |     }
113 |     buffer.trim().to_string()
114 | }
115 | 
116 | fn start_tag(tag: &Tag, buffer: &mut String, tags_stack: &mut Vec<Tag>) {
117 |     match tag {
118 |         Tag::Link(_, _, title) | Tag::Image(_, _, title) => buffer.push_str(&title),
119 |         Tag::Item => {
120 |             buffer.push('\n');
121 |             let mut lists_stack = tags_stack
122 |                 .iter_mut()
123 |                 .filter_map(|tag| match tag {
124 |                     Tag::List(nb) => Some(nb),
125 |                     _ => None,
126 |                 })
127 |                 .collect::<Vec<_>>();
128 |             let prefix_tabs_count = lists_stack.len() - 1;
129 |             for _ in 0..prefix_tabs_count {
130 |                 buffer.push('\t')
131 |             }
132 |             if let Some(Some(nb)) = lists_stack.last_mut() {
133 |                 buffer.push_str(&nb.to_string());
134 |                 buffer.push_str(". ");
135 |                 *nb += 1;
136 |             } else {
137 |                 buffer.push_str("• ");
138 |             }
139 |         }
140 |         Tag::Paragraph | Tag::CodeBlock(_) | Tag::Heading(..) => buffer.push('\n'),
141 |         _ => (),
142 |     }
143 | }
144 | 
145 | fn end_tag(tag: &Tag, buffer: &mut String, tags_stack: &[Tag]) {
146 |     match tag {
147 |         Tag::Paragraph | Tag::Heading(..) => buffer.push('\n'),
148 |         Tag::CodeBlock(_) => {
149 |             if !buffer.ends_with('\n') {
150 |                 buffer.push('\n');
151 |             }
152 |         }
153 |         Tag::List(_) => {
154 |             let is_sublist = tags_stack.iter().any(|tag| match tag {
155 |                 Tag::List(_) => true,
156 |                 _ => false,
157 |             });
158 |             if !is_sublist {
159 |                 buffer.push('\n')
160 |             }
161 |         }
162 |         _ => (),
163 |     }
164 | }
165 | 
166 | fn is_strikethrough(tag: &Tag) -> bool {
167 |     match tag {
168 |         Tag::Strikethrough => true,
169 |         _ => false,
170 |     }
171 | }
172 | 
173 | #[cfg(test)]
174 | mod tests {
175 |     use crate::generate_server_info_for_test;
176 |     use super::convert;
177 |     use super::convert_from_logseq;
178 | 
179 |     #[test]
180 |     fn links_to_pdf() {
181 |         let markdown = r#"Refer to ![order.pdf](../assets/readings_1634910859348_0.pdf)"#;
182 |         let expected = "Refer to order.pdf";
183 |         assert_eq!(convert(markdown), expected);
184 | 
185 |         let mut info = generate_server_info_for_test();
186 |         info.notebook_path = "C:\\Users\\z2369li\\Nextcloud\\logseq_notebook".to_string();
187 |         info.parse_pdf_links = true;
188 |         // println!("{:?}", &info);
189 |         let _a = convert_from_logseq(markdown, "title", &info);
190 |     }
191 | 
192 |     #[test]
193 |     fn basic_inline_strong() {
194 |         let markdown = r#"**Hello**"#;
195 |         let expected = "Hello";
196 |         assert_eq!(convert(markdown), expected);
197 |     }
198 | 
199 |     #[test]
200 |     fn basic_inline_emphasis() {
201 |         let markdown = r#"_Hello_"#;
202 |         let expected = "Hello";
203 |         assert_eq!(convert(markdown), expected);
204 |     }
205 | 
206 |     #[test]
207 |     fn basic_header() {
208 |         let markdown = r#"# Header
209 | 
210 | ## Sub header
211 | 
212 | End paragraph."#;
213 |         let expected = "Header
214 | 
215 | Sub header
216 | 
217 | End paragraph.";
218 |         assert_eq!(convert(markdown), expected);
219 |     }
220 | 
221 |     #[test]
222 |     fn alt_header() {
223 |         let markdown = r#"
224 | Header
225 | ======
226 | 
227 | End paragraph."#;
228 |         let expected = "Header
229 | 
230 | End paragraph.";
231 |         assert_eq!(convert(markdown), expected);
232 |     }
233 | 
234 |     #[test]
235 |     fn strong_emphasis() {
236 |         let markdown = r#"**asterisks and _underscores_**"#;
237 |         let expected = "asterisks and underscores";
238 |         assert_eq!(convert(markdown), expected);
239 |     }
240 | 
241 |     #[test]
242 |     fn strikethrough() {
243 |         let markdown = r#"This was ~~erased~~ deleted."#;
244 |         let expected = "This was  deleted.";
245 |         assert_eq!(convert(markdown), expected);
246 |     }
247 | 
248 |     #[test]
249 |     fn mixed_list() {
250 |         let markdown = r#"Start paragraph.
251 | 
252 | 1. First ordered list item
253 | 2. Another item
254 | 1. Actual numbers don't matter, just that it's a number
255 |   1. Ordered sub-list
256 | 4. And another item.
257 | 
258 | End paragraph."#;
259 | 
260 |         let expected = "Start paragraph.
261 | 
262 | 1. First ordered list item
263 | 2. Another item
264 | 3. Actual numbers don't matter, just that it's a number
265 | 4. Ordered sub-list
266 | 5. And another item.
267 | 
268 | End paragraph.";
269 |         assert_eq!(convert(markdown), expected);
270 |     }
271 | 
272 |     #[test]
273 |     fn nested_lists() {
274 |         let markdown = r#"
275 | * alpha
276 | * beta
277 |     * one
278 |     * two
279 | * gamma
280 | "#;
281 |         let expected = "• alpha
282 | • beta
283 | \t• one
284 | \t• two
285 | • gamma";
286 |         assert_eq!(convert(markdown), expected);
287 |     }
288 | 
289 |     #[test]
290 |     fn list_with_header() {
291 |         let markdown = r#"# Title
292 | * alpha
293 | * beta
294 | "#;
295 |         let expected = r#"Title
296 | 
297 | • alpha
298 | • beta"#;
299 |         assert_eq!(convert(markdown), expected);
300 |     }
301 | 
302 |     #[test]
303 |     fn basic_link() {
304 |         let markdown = "I'm an [inline-style link](https://www.google.com).";
305 |         let expected = "I'm an inline-style link.";
306 |         assert_eq!(convert(markdown), expected)
307 |     }
308 | 
309 |     #[ignore]
310 |     #[test]
311 |     fn link_with_itself() {
312 |         let markdown = "Go to [https://www.google.com].";
313 |         let expected = "Go to https://www.google.com.";
314 |         assert_eq!(convert(markdown), expected)
315 |     }
316 | 
317 |     #[test]
318 |     fn basic_image() {
319 |         let markdown = "As displayed in ![img alt text](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png).";
320 |         let expected = "As displayed in img alt text.";
321 |         assert_eq!(convert(markdown), expected);
322 |     }
323 | 
324 |     #[test]
325 |     fn inline_code() {
326 |         let markdown = "This is `inline code`.";
327 |         let expected = "This is inline code.";
328 |         assert_eq!(convert(markdown), expected);
329 |     }
330 | 
331 |     #[test]
332 |     fn code_block() {
333 |         let markdown = r#"Start paragraph.
334 | ```javascript
335 | var s = "JavaScript syntax highlighting";
336 | alert(s);
337 | ```
338 | End paragraph."#;
339 |         let expected = r#"Start paragraph.
340 | 
341 | var s = "JavaScript syntax highlighting";
342 | alert(s);
343 | 
344 | End paragraph."#;
345 |         assert_eq!(convert(markdown), expected);
346 |     }
347 | 
348 |     #[test]
349 |     fn block_quote() {
350 |         let markdown = r#"Start paragraph.
351 | 
352 | > Blockquotes are very handy in email to emulate reply text.
353 | > This line is part of the same quote.
354 | 
355 | End paragraph."#;
356 |         let expected = "Start paragraph.
357 | 
358 | Blockquotes are very handy in email to emulate reply text. This line is part of the same quote.
359 | 
360 | End paragraph.";
361 |         assert_eq!(convert(markdown), expected);
362 |     }
363 | 
364 |     #[test]
365 |     fn paragraphs() {
366 |         let markdown = r#"Paragraph 1.
367 | 
368 | Paragraph 2."#;
369 |         let expected = "Paragraph 1.
370 | 
371 | Paragraph 2.";
372 |         assert_eq!(convert(markdown), expected);
373 |     }
374 | }
375 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/markdown_parser/mod.rs:
--------------------------------------------------------------------------------
 1 | mod markdown_to_text;
 2 | mod pdf_parser;
 3 | 
 4 | use std::borrow::Cow;
 5 | use regex::Regex;
 6 | use crate::query_engine::ServerInformation;
 7 | 
 8 | // https://docs.rs/regex/latest/regex/#repetitions
 9 | // https://stackoverflow.com/a/8303552/1166518
10 | pub fn exclude_advanced_query(md: Cow<'_,str>) -> Cow<'_, str> {
11 |     if !md.contains('#') {
12 |         return md;
13 |     }
14 | 
15 |     lazy_static! {
16 |         static ref RE: Regex = Regex::new(
17 |             r"\#\+BEGIN_QUERY[\S\s]+?\#\+END_QUERY")
18 |             .unwrap();
19 |     }
20 |     return RE.replace_all(&md, "    ").into_owned().into();
21 | }
22 | 
23 | fn hack_specific_chars_cow(text: Cow<str>) -> String {
24 |     //https://www.compart.com/en/unicode/U+2022
25 |     let bullet = char::from_u32(0x00002022).unwrap();
26 |     text.replace(bullet, " ")
27 | }
28 | 
29 | use crate::query_engine::NotebookSoftware;
30 | use std::borrow::Borrow;
31 | use log::info;
32 | 
33 | fn remove_obsidian_header<'a>(content: Cow<'a, str>) -> Cow<'a, str> {
34 |     lazy_static! {
35 |         static ref RE: Regex = Regex::new(
36 |             r"---[\s\S]*?---"
37 |         ).unwrap();
38 |     }
39 |     info!("from {:?}", &content);
40 |     let cr = content.borrow();
41 |     let ret: Cow<str> = RE.replace(cr, "    ");
42 |     info!("into {:?}", &ret);
43 |     ret.into_owned().into()
44 | }
45 | 
46 | pub fn parse_logseq_notebook(md: Cow<'_,str>, title: &str, server_info: &ServerInformation) -> String {
47 |     // Now we do some parsing for this file
48 |     let content = exclude_advanced_query(md);
49 |     let content = hack_specific_chars_cow(content);
50 |     let content = remove_angled_bracket(content);
51 | 
52 |     let content = Cow::from(content);
53 |     let content = match &server_info.software {
54 |         NotebookSoftware::Obsidian => remove_obsidian_header(content),
55 |         _ => content,
56 |     };
57 |     let content: String = markdown_to_text::convert_from_logseq(
58 |         &content, title, server_info);
59 | 
60 |     //let content = content.into_owned();
61 |     content
62 | 
63 | }
64 | 
65 | // TODO This function is no longer used. 2025-04-30
66 | pub fn parse_to_plain_text(md: &str) -> String {
67 |     let plain_text: String = markdown_to_text::convert(&md);
68 |     let plain_text = hack_specific_chars(plain_text);
69 |     let plain_text = remove_angled_bracket(plain_text);
70 | 
71 |     // println!("{}", &plain_text);
72 |     plain_text
73 | }
74 | 
75 | // < > will break html elements
76 | fn remove_angled_bracket(text: String) -> String {
77 |     let s1 = text.replace('<', "(");
78 |     let s2 =   s1.replace('>', ")");
79 |     s2
80 | }
81 | 
82 | fn hack_specific_chars(text: String) -> String {
83 |     //https://www.compart.com/en/unicode/U+2022
84 |     let bullet = char::from_u32(0x00002022).unwrap();
85 |     // println!("{}", bullet);
86 |     text.replace(bullet, " ")
87 | }
88 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/markdown_parser/pdf_parser.rs:
--------------------------------------------------------------------------------
 1 | 
 2 | use std::path::Path;
 3 | use log::{debug, error, info};
 4 | use pulldown_cmark::Tag;
 5 | use crate::query_engine::ServerInformation;
 6 | 
 7 | // extern crate pdf_extract;
 8 | extern crate pdf_extract_temporary_mitigation_panic;
 9 | use pdf_extract_temporary_mitigation_panic::extract_text;
10 | 
11 | pub(crate) fn try_parse_pdf(tag: &Tag, server_info: &ServerInformation) -> Option<String> {
12 | 
13 |     let destination_uri = match tag {
14 |         Tag::Image(_link_type, destination_uri, _title) => {
15 |             if !destination_uri.ends_with(".pdf") {
16 |                 return None;
17 |             }
18 |             debug!("Trying to parse PDF {:?}", tag);
19 |             // println!("{:?}", &tag);
20 |             destination_uri.replace("../", "")
21 |         },
22 |         _ => {return None;}
23 |     };
24 | 
25 |     let path = Path::new(&server_info.notebook_path);
26 |     let pdf_path = path.join(destination_uri);
27 |     // println!("{:?}, {:?}", &pdf_path, pdf_path.is_file());
28 |     if !pdf_path.is_file() {
29 |         error!("pdf_path is not a file, skipping {:?}", &pdf_path);
30 |         return None;
31 |     }
32 | 
33 | 
34 |     let text = match extract_text(&pdf_path) {
35 |             Ok(s) => {s}
36 |             Err(e) => {
37 |                 error!("Failed({:?} to load pdf {:?}", e, pdf_path);
38 |                 return None;
39 |             }
40 |     };
41 | 
42 |     match pdf_path.file_name() {
43 |         None => {error!("Extracted text len {}, file_name() failed", text.len());}
44 |         Some(f) => {info!("Extracted text from {:?} len {}", f, text.len());}
45 |     };
46 | 
47 | 
48 |     Some(text)
49 | }


--------------------------------------------------------------------------------
/fire_seq_search_server/src/post_query/app_uri.rs:
--------------------------------------------------------------------------------
 1 | use log::{error, info};
 2 | use crate::post_query::logseq_uri::{generate_logseq_uri,parse_date_from_str};
 3 | use crate::post_query::obsidian_uri::generate_obsidian_uri;
 4 | use crate::query_engine::ServerInformation;
 5 | 
 6 | 
 7 | // Maybe I should wrap them with the same interface? -Zhenbo Li 2023-Feb-05
 8 | // Deprecated on 2024-Sep-21
 9 | pub fn generate_uri(title: &str, is_page_hit: &bool, server_info: &ServerInformation) -> String {
10 |     if server_info.software == Obsidian {
11 |         info!("Generating Obsidian URI for {}", title);
12 |         if !is_page_hit {
13 |             error!("Journal is unsupported for Obsidian yet");
14 |             return String::from("https://github.com/Endle/fireSeqSearch/issues");
15 |         }
16 |         return generate_obsidian_uri(&title, *is_page_hit, &server_info);
17 |     }
18 | 
19 |     return generate_logseq_uri(&title, *is_page_hit, &server_info);
20 | }
21 | 
22 | use crate::query_engine::NotebookSoftware::{Logseq,Obsidian};
23 | 
24 | pub fn generate_uri_v2(title: &str, server_info: &ServerInformation) -> String {
25 |     match &server_info.software {
26 |         Obsidian => generate_obsidian_uri(title, true, server_info),
27 |         Logseq => {
28 |             let dt = parse_date_from_str(title);
29 |             // TODO remove this duplicate calc
30 |             //  I don't care the performance here, but I want to make code cleaner - 2024 Sep 21
31 |             generate_logseq_uri(title, dt.is_none(), server_info)
32 |         }
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/post_query/hit_parsed.rs:
--------------------------------------------------------------------------------
  1 | use log::debug;
  2 | use crate::JOURNAL_PREFIX;
  3 | use crate::post_query::app_uri::generate_uri_v2;
  4 | use crate::post_query::highlighter::highlight_keywords_in_body;
  5 | use crate::query_engine::ServerInformation;
  6 | 
  7 | #[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
  8 | pub struct FireSeqSearchHitParsed {
  9 |     // pub title: String,
 10 |     pub title: String,
 11 |     pub summary: String,
 12 |     pub score: f32,
 13 |     pub metadata: String,
 14 |     pub logseq_uri: String,
 15 | }
 16 | 
 17 | use tantivy::schema::document::OwnedValue;
 18 | impl FireSeqSearchHitParsed {
 19 | 
 20 |     //TODO remove these dup code
 21 |     fn take_str_from_doc(doc: &tantivy::TantivyDocument, pos:usize) -> &str {
 22 |         /*
 23 |         let title: &str = doc.field_values()[0].value().as_text().unwrap();
 24 |         let body: &str = doc.field_values()[1].value().as_text().unwrap();
 25 |         */
 26 |         let v: &OwnedValue =  doc.field_values()[pos].value();
 27 |         match v{
 28 |             OwnedValue::Str(s) => s,
 29 |             _ => panic!("Wrong type")
 30 |         }
 31 |     }
 32 |     pub fn from_tantivy(doc: &tantivy::TantivyDocument,
 33 |                         score: f32, term_tokens: &Vec<String>,
 34 |                         server_info: &ServerInformation) ->FireSeqSearchHitParsed {
 35 | 
 36 |         let title = Self::take_str_from_doc(doc, 0);
 37 |         let body = Self::take_str_from_doc(doc, 1);
 38 |         let summary = highlight_keywords_in_body(body, term_tokens, server_info);
 39 | 
 40 |         let mut is_page_hit = true;
 41 |         let title = if title.starts_with(JOURNAL_PREFIX) {
 42 |             assert!(server_info.enable_journal_query);
 43 |             debug!("Found a journal hit {}", title);
 44 |             is_page_hit = false;
 45 |             let t = title.strip_prefix(JOURNAL_PREFIX);
 46 |             t.unwrap().to_string()
 47 |         } else {
 48 |             title.to_string()
 49 |         };
 50 | 
 51 |         let logseq_uri = generate_uri_v2(&title, server_info);
 52 | 
 53 |         debug!("Processing a hit, title={}, uri={}, summary_len={}", &title, &logseq_uri,summary.len());
 54 | 
 55 |         let metadata: String = if is_page_hit {
 56 |             String::from("page_hit")
 57 |         } else {
 58 |             String::from("journal_hit")
 59 |         };
 60 | 
 61 |         FireSeqSearchHitParsed {
 62 |             title,
 63 |             summary,
 64 |             score,
 65 |             logseq_uri,
 66 |             metadata,
 67 |         }
 68 |     }
 69 | 
 70 |     // Wrap this part into a function, so I can document it and add tests - ZLi 2023-Jan
 71 |     pub fn serde_to_string(self: &Self) -> String {
 72 |         serde_json::to_string(&self).unwrap()
 73 |     }
 74 | 
 75 | }
 76 | 
 77 | 
 78 | 
 79 | #[cfg(test)]
 80 | mod test_serde {
 81 |     // use crate::generate_server_info_for_test;
 82 |     // use crate::post_query::hit_parsed::FireSeqSearchHitParsed;
 83 |     // use crate::post_query::logseq_uri::generate_logseq_uri;
 84 | 
 85 | 
 86 |     // fn get_parsed_hit(title: &str) -> FireSeqSearchHitParsed {
 87 |     //     let server_info = generate_server_info_for_test();
 88 |     //     let logseq_uri = generate_logseq_uri(title, &true, &server_info);
 89 |     //     FireSeqSearchHitParsed{
 90 |     //         title: title.to_owned(),
 91 |     //         summary: String::from("summary"),
 92 |     //         score: 1.0,
 93 |     //         logseq_uri,
 94 |     //         metadata: String::from("meta")
 95 |     //     }
 96 |     // }
 97 |     // fn serde(title: &str) -> String {
 98 |     //     let h = get_parsed_hit(title);
 99 |     //     h.serde_to_string()
100 |     // }
101 | 
102 |     // TODO: This solution is buggy. Consider PR#100, which might be a better idea. -Zli, 2023-Jan
103 |     // This test disabled on 2023-Feb-02 for PR #112
104 |     // #[test]
105 |     // fn test_serde_uri() {
106 |     //     assert!(serde("EU4").contains("\"logseq://graph/logseq_notebook?page=EU4\""));
107 |     //
108 |     //     assert!(serde("Games/EU4").contains("\"logseq://graph/logseq_notebook?page=Games/EU4\""));
109 |     //
110 |     // }
111 | }
112 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/post_query/logseq_uri.rs:
--------------------------------------------------------------------------------
  1 | use log::error;
  2 | use crate::ServerInformation;
  3 | use url::Url;
  4 | 
  5 | ///
  6 | ///
  7 | /// # Arguments
  8 | ///
  9 | /// * `file_name`: File name of the Logseq page, without .md
 10 | /// * `server_info`:
 11 | ///
 12 | /// returns: String
 13 | ///
 14 | /// # Examples
 15 | ///
 16 | /// ```
 17 | /// use fire_seq_search_server::post_query::logseq_uri::process_note_title;
 18 | /// let server_info = fire_seq_search_server::generate_server_info_for_test();
 19 | /// let r = process_note_title("Canada___Clothes", &server_info);
 20 | /// assert_eq!("Canada/Clothes", &r);
 21 | /// let r = process_note_title("C++", &server_info);
 22 | /// assert_eq!("C++", &r);
 23 | /// let r = process_note_title("Programming Languages%2FTypes", &server_info);
 24 | /// assert_eq!("Programming Languages/Types", &r);
 25 | /// let r = process_note_title("Context of Std%3A%3Astring (highlights)", &server_info);
 26 | /// assert_eq!("Context of Std::string (highlights)", &r);
 27 | /// ```
 28 | // I tried to put this part when loading the notebooks, and it reduced the query sensitivity
 29 | // https://github.com/Endle/fireSeqSearch/issues/99
 30 | // 2022-12-30
 31 | pub fn process_note_title(file_name: &str, server_info: &ServerInformation) -> String {
 32 |     // let file_name = file_name.replace("%2F", "/");
 33 |     let file_name = urlencoding::decode(file_name).expect("UTF-8").to_string();
 34 |     if server_info.convert_underline_hierarchy {
 35 |         return file_name.replace("___", "/");
 36 |     }
 37 |     file_name
 38 | }
 39 | 
 40 | pub fn generate_logseq_uri(title: &str, is_page_hit: bool, server_info: &ServerInformation) -> String {
 41 |     return if is_page_hit {
 42 |         let title = process_note_title(title, server_info);
 43 |         let mut uri = Url::parse("logseq://graph/").unwrap();
 44 |         uri.set_path(&server_info.notebook_name);
 45 |         uri.query_pairs_mut()
 46 |             .append_pair("page", &title);
 47 |         uri.to_string()
 48 |     } else {
 49 |         generate_logseq_journal_uri(title, server_info)
 50 | 
 51 |     };
 52 |     // logseq://graph/logseq_notebook?page=Nov%2026th%2C%202022
 53 | }
 54 | 
 55 | #[derive(PartialEq, Debug)]
 56 | pub struct JournalDate {
 57 |     pub year: u32,
 58 |     pub month: u32,
 59 |     pub date: u32,
 60 | }
 61 | 
 62 | impl JournalDate {
 63 |     pub fn to_str(&self, _: &ServerInformation) -> String {
 64 |         let mut result = Vec::new();
 65 |         result.push(match self.month {
 66 |             1 => "Jan",
 67 |             2 => "Feb",
 68 |             3 => "Mar",
 69 |             4 => "Apr",
 70 |             5 => "May",
 71 |             6 => "Jun",
 72 |             7 => "Jul",
 73 |             8 => "Aug",
 74 |             9 => "Sep",
 75 |             10 => "Oct",
 76 |             11 => "Nov",
 77 |             12 => "Dec",
 78 |             _ => {
 79 |                 error!("Unexpected month {}", self.month);
 80 |                 "ErrMonth"
 81 |             }
 82 |         }.to_string());
 83 | 
 84 |         result.push(" ".to_string());
 85 |         match  self.date {
 86 |             1|21|31 => {
 87 |                 let s = self.date.to_string();
 88 |                 result.push(s);
 89 |                 result.push("st".to_string());
 90 |             },
 91 |             2|22 => {
 92 |                 let s = self.date.to_string();
 93 |                 result.push(s);
 94 |                 result.push("nd".to_string());
 95 |             },
 96 |             3|23 => {
 97 |                 let s = self.date.to_string();
 98 |                 result.push(s);
 99 |                 result.push("rd".to_string());
100 |             },
101 |             _ => {
102 |                 let s = self.date.to_string();
103 |                 result.push(s);
104 |                 result.push("th".to_string());
105 |             }
106 |         };
107 | 
108 |         result.push(", ".to_string());
109 |         result.push(self.year.to_string());
110 | 
111 |         result.concat()
112 |     }
113 | }
114 | 
115 | 
116 | fn generate_logseq_journal_uri(title: &str, server_info: &ServerInformation) -> String {
117 |     let mut uri = Url::parse("logseq://graph/").unwrap();
118 |     uri.set_path(&server_info.notebook_name);
119 |     let dt = parse_date_from_str(title);
120 |     let dt = match dt {
121 |         None => {
122 |             error!("Failed to gen JournalDate from {}", title);
123 |             return format!("logseq://graph/{}", server_info.notebook_name);
124 |         }
125 |         Some(x) => x
126 |     };
127 |     let journal_name = dt.to_str(server_info);
128 |     format!("logseq://graph/{}?page={}",
129 |             server_info.notebook_name, journal_name);
130 |     uri.query_pairs_mut()
131 |         .append_pair("page", &journal_name);
132 |     uri.to_string()
133 | }
134 | 
135 | fn parse_slice_to_u8(slice: Option<&str>) -> Option<u32> {
136 |     match slice{
137 |         Some(x) => {
138 |             let y = x.parse::<u32>();
139 |             match y {
140 |                 Ok(i) => Some(i),
141 |                 Err(e) => {
142 |                     error!("Parse({}) Int Error:  ({:?})", x, e);
143 |                     None
144 |                 }
145 |             }
146 |         },
147 |         None => {
148 |             error!("Invalid slice");
149 |             None
150 |         }
151 | 
152 |     }
153 | }
154 | 
155 | pub fn parse_date_from_str(title: &str) -> Option<JournalDate> {
156 |     if title.len() != 10 {
157 |         return None;
158 |     }
159 | 
160 |     let year = match parse_slice_to_u8(title.get(0..4)) {
161 |         Some(x) => x,
162 |         None => {
163 |             return None;
164 |         }
165 |     };
166 |     let month = match parse_slice_to_u8(title.get(5..=6)) {
167 |         Some(x) => x,
168 |         None => {
169 |             return None;
170 |         }
171 |     };
172 |     let date = match parse_slice_to_u8(title.get(8..=9)) {
173 |         Some(x) => x,
174 |         None => {
175 |             return None;
176 |         }
177 |     };
178 |     Some(JournalDate{
179 |         year,
180 |         month,
181 |         date
182 |     })
183 | }
184 | 
185 | #[cfg(test)]
186 | mod test_logseq_uri {
187 |     use crate::generate_server_info_for_test;
188 |     use crate::post_query::logseq_uri::{generate_logseq_journal_uri, generate_logseq_uri};
189 |     use crate::post_query::logseq_uri::parse_date_from_str;
190 | 
191 | 
192 |     #[test]
193 |     fn test_parse() {
194 |         let server_info = generate_server_info_for_test();
195 |         assert_eq!(None, parse_date_from_str("22"));
196 |         let d = parse_date_from_str("2022_12_05");
197 |         assert!(d.is_some());
198 |         let d = d.unwrap();
199 |         assert_eq!(d.to_str(&server_info), "Dec 5th, 2022");
200 |     }
201 |     #[test]
202 |     fn test_generate() {
203 | 
204 |         let server_info = generate_server_info_for_test();
205 | 
206 |         // Don't encode / at here. It would be processed by serde. - 2022-11-27
207 |         let r = generate_logseq_uri("Games/EU4", true, &server_info);
208 |         assert_eq!(&r, "logseq://graph/logseq_notebook?page=Games%2FEU4");
209 | 
210 |         let r = generate_logseq_uri("Games/赛马娘", true, &server_info);
211 |         assert_eq!(&r, "logseq://graph/logseq_notebook?page=Games%2F%E8%B5%9B%E9%A9%AC%E5%A8%98");
212 |         let r = generate_logseq_journal_uri("2022_12_14", &server_info);
213 |         assert_eq!(&r,"logseq://graph/logseq_notebook?page=Dec+14th%2C+2022");
214 | 
215 |         let r = generate_logseq_uri("fireSeqSearch___test___5", true, &server_info);
216 |         assert_eq!(&r,"logseq://graph/logseq_notebook?page=fireSeqSearch%2Ftest%2F5");
217 | 
218 |         let r = generate_logseq_uri("C++", true, &server_info);
219 |         assert_eq!(&r, "logseq://graph/logseq_notebook?page=C%2B%2B");
220 |     }
221 | }
222 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/post_query/mod.rs:
--------------------------------------------------------------------------------
 1 | use log::info;
 2 | use crate::query_engine::ServerInformation;
 3 | use crate::language_tools::tokenizer::tokenize;
 4 | 
 5 | pub mod logseq_uri;
 6 | pub mod highlighter;
 7 | pub mod hit_parsed;
 8 | pub mod app_uri;
 9 | pub mod obsidian_uri;
10 | 
11 | use rayon::prelude::*;
12 | use crate::post_query::hit_parsed::FireSeqSearchHitParsed;
13 | 
14 | pub fn post_query_wrapper(top_docs: Vec<(f32, tantivy::DocAddress)>,
15 |                       term: &str,
16 |                       searcher: &tantivy::Searcher,
17 |                       server_info: &ServerInformation) -> Vec<String> {
18 |     let term_tokens = tokenize(term);
19 |     info!("get term tokens({}) {:?}", term_tokens.len(), &term_tokens);
20 |     let result: Vec<String> = top_docs.par_iter()
21 |         .map(|x| parse_and_serde(x, searcher, &term_tokens, server_info))
22 |         .collect();
23 |     result
24 | }
25 | 
26 | fn parse_and_serde(x: &(f32, tantivy::DocAddress),
27 |                    searcher: &tantivy::Searcher,
28 |                    term_tokens: &Vec<String>,
29 |                    server_info: &ServerInformation) -> String {
30 |     // FireSeqSearchHitParsed
31 |     let doc: tantivy::TantivyDocument = searcher.doc(x.1).unwrap();
32 |     let score = x.0;
33 |     let hit_parsed = FireSeqSearchHitParsed::from_tantivy(
34 |         &doc, score, term_tokens, server_info
35 |     ); // it also provides the highlight
36 |     hit_parsed.serde_to_string()
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/post_query/obsidian_uri.rs:
--------------------------------------------------------------------------------
 1 | use url::Url;
 2 | use crate::query_engine::ServerInformation;
 3 | 
 4 | ///
 5 | ///
 6 | /// # Arguments
 7 | ///
 8 | /// * `title`:
 9 | /// * `is_page_hit`:
10 | /// * `server_info`:
11 | ///
12 | /// returns: String
13 | ///
14 | /// # Examples
15 | /// obsidian://open?vault=linotes&file=fedi%20note
16 | /// ```
17 | /// use fire_seq_search_server::post_query::obsidian_uri::generate_obsidian_uri;
18 | /// let server_info = fire_seq_search_server::generate_server_info_for_test();
19 | /// let r = generate_obsidian_uri("fedi%20note", true, &server_info);
20 | /// assert_eq!("obsidian://open?vault=logseq_notebook&file=fedi%20note", &r);
21 | /// ```
22 | pub fn generate_obsidian_uri(title: &str, _is_page_hit: bool, server_info: &ServerInformation) -> String {
23 | 
24 |     let title = urlencoding::decode(title).expect("UTF-8").to_string();
25 |     let mut uri = Url::parse("obsidian://open").unwrap();
26 |     // uri.set_path(&server_info.notebook_name);
27 |     uri.query_pairs_mut()
28 |             .append_pair("vault", &server_info.notebook_name);
29 |     uri.query_pairs_mut()
30 |         .append_pair("file", &title);
31 |     let result = uri.to_string();
32 |     //TODO too hacky here
33 |     result.replace("+", "%20")
34 | 
35 | }
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/query_engine/mod.rs:
--------------------------------------------------------------------------------
  1 | // Everything about Tantivy should be hidden behind this component
  2 | 
  3 | use log::{debug, info, error};
  4 | use crate::decode_cjk_str;
  5 | use crate::post_query::post_query_wrapper;
  6 | use std::sync::Arc;
  7 | 
  8 | 
  9 | 
 10 | use std::borrow::Cow;
 11 | 
 12 | #[derive(Debug, Clone, serde::Serialize,PartialEq)]
 13 | pub enum NotebookSoftware {
 14 |     Logseq,
 15 |     Obsidian,
 16 | }
 17 | 
 18 | // This struct should be immutable when the program starts running
 19 | #[derive(Debug, Clone, serde::Serialize)]
 20 | pub struct ServerInformation {
 21 |     pub notebook_path: String,
 22 |     pub notebook_name: String,
 23 |     pub enable_journal_query: bool,
 24 |     pub show_top_hits: usize,
 25 |     pub show_summary_single_line_chars_limit: usize,
 26 |     pub parse_pdf_links: bool,
 27 |     pub exclude_zotero_items:bool,
 28 |     pub software: NotebookSoftware,
 29 | 
 30 |     /// Experimental. Not sure if I should use this global config - 2022-12-30
 31 |     pub convert_underline_hierarchy: bool,
 32 | 
 33 |     pub host: String,
 34 | 
 35 |     pub llm_enabled: bool,
 36 |     pub llm_max_waiting_time: u64, /* in secs */
 37 | }
 38 | 
 39 | use crate::language_tools::tokenizer::FireSeqTokenizer;
 40 | struct DocumentSetting {
 41 |     schema: tantivy::schema::Schema,
 42 |     tokenizer: FireSeqTokenizer,
 43 | }
 44 | 
 45 | use crate::local_llm::LlmEngine;
 46 | pub struct QueryEngine {
 47 |     pub server_info: ServerInformation,
 48 |     reader: tantivy::IndexReader,
 49 |     query_parser: tantivy::query::QueryParser,
 50 |     //articles: Vec<Article>, //TODO remove it. only word cloud needs it
 51 |     pub llm: Option<Arc<LlmEngine>>,
 52 | }
 53 | 
 54 | use tantivy::IndexWriter;
 55 | use tantivy::TantivyDocument;
 56 | 
 57 | use crate::load_notes::NoteListItem;
 58 | use futures::stream::FuturesUnordered;
 59 |  use futures::StreamExt;
 60 | 
 61 |  use tantivy::doc;
 62 | 
 63 | impl QueryEngine {
 64 |     pub async fn construct(server_info: ServerInformation) -> Self {
 65 | 
 66 |         let document_setting: DocumentSetting = build_document_setting();
 67 |         let note_list = crate::load_notes::retrive_note_list(&server_info);
 68 |         let index: tantivy::Index = QueryEngine::build_index(&server_info,
 69 |             &document_setting,
 70 |             note_list).await;
 71 |         let (reader, query_parser) = build_reader_parser(&index, &document_setting);
 72 | 
 73 |         debug!("Query engine construction finished");
 74 | 
 75 |         QueryEngine {
 76 |             server_info,
 77 |             reader,
 78 |             query_parser,
 79 |         //    articles: Vec::new(),
 80 |          //   articles: loaded_articles,
 81 |             llm: None,
 82 |         }
 83 |     }
 84 | 
 85 |     async fn load_single_note(
 86 |         server_info: &ServerInformation,
 87 |         document_setting: &DocumentSetting,
 88 |         note: NoteListItem,
 89 |         index_writer: &IndexWriter<TantivyDocument>) {
 90 | 
 91 |         let raw_content = match std::fs::read_to_string(&note.realpath) {
 92 |             Ok(s) => s,
 93 |             Err(e) => {
 94 |                 error!("Failed to read {:?} err({:?}, skipping", &note, &e);
 95 |                 return;
 96 |             }
 97 |         };
 98 | 
 99 |         let content = crate::markdown_parser::parse_logseq_notebook(
100 |             Cow::from(raw_content), &note.title, server_info);
101 | 
102 |         let schema = &document_setting.schema;
103 |         let title = schema.get_field("title").unwrap();
104 |         let body = schema.get_field("body").unwrap();
105 |         index_writer.add_document(
106 |             tantivy::doc!{
107 |                 title => note.title,
108 |                 body => content,
109 |             }
110 |         ).unwrap();
111 |     }
112 | 
113 |     async fn load_all_notes(server_info: &ServerInformation,
114 |         document_setting: &DocumentSetting,
115 |         note_list: Vec<NoteListItem>,
116 |         index_writer: &IndexWriter<TantivyDocument>) {
117 | 
118 |         let mut futs: FuturesUnordered<_>  = FuturesUnordered::new();
119 |         for article in note_list {
120 |             futs.push(
121 |                 QueryEngine::load_single_note(
122 |                     server_info,
123 |                     document_setting,
124 |                     article,
125 |                     index_writer)
126 |             );
127 |         }
128 |         while let Some(_result) = futs.next().await {}
129 |     }
130 |     async fn build_index(server_info: &ServerInformation,
131 |         document_setting: &DocumentSetting,
132 |         note_list: Vec<NoteListItem>) -> tantivy::Index {
133 | 
134 |         let schema = &document_setting.schema;
135 |         let index = tantivy::Index::create_in_ram(schema.clone());
136 | 
137 |         index.tokenizers().register(TOKENIZER_ID, document_setting.tokenizer.clone());
138 |         let mut index_writer = index.writer(50_000_000).unwrap();
139 | 
140 |         QueryEngine::load_all_notes(&server_info,
141 |             &document_setting,
142 |             note_list,
143 |             &index_writer).await;
144 | 
145 |         index_writer.commit().unwrap();
146 |         index
147 |     }
148 | }
149 | 
150 | #[derive(Debug)]
151 | pub struct DocData {
152 |     pub title: String,
153 |     pub body: String,
154 | }
155 | use tantivy::schema::OwnedValue;
156 | impl DocData {
157 |     fn take_str_from_doc(doc: &tantivy::TantivyDocument, pos:usize) -> &str {
158 |         /*
159 |         let title: &str = doc.field_values()[0].value().as_text().unwrap();
160 |         let body: &str = doc.field_values()[1].value().as_text().unwrap();
161 |         */
162 |         let v: &OwnedValue =  doc.field_values()[pos].value();
163 |         match v{
164 |             OwnedValue::Str(s) => s,
165 |             _ => panic!("Wrong type")
166 |         }
167 |     }
168 |     pub fn retrive(searcher: &tantivy::Searcher, docid: tantivy::DocAddress) -> Self {
169 |         let doc: tantivy::TantivyDocument = searcher.doc(docid).unwrap();
170 |         let title = Self::take_str_from_doc(&doc, 0).to_owned();
171 |         let body = Self::take_str_from_doc(&doc, 1).to_owned();
172 |         Self {
173 |             title, body
174 |         }
175 |     }
176 | }
177 | 
178 | impl QueryEngine {
179 |     pub fn generate_wordcloud(self: &Self) -> String {
180 |         String::from("TODO: wordcloud is turned off")
181 |         //crate::word_frequency::generate_wordcloud(&self.articles)
182 |     }
183 | 
184 |     pub async fn query_pipeline(self: &Self, term: String) -> String {
185 |         let term: String = term_preprocess(term);
186 |         info!("Searching {}", &term);
187 | 
188 | 
189 |         let server_info: &ServerInformation = &self.server_info;
190 | 
191 |         let top_docs: Vec<(f32, tantivy::DocAddress)> = self.get_top_docs(&term);
192 |         let searcher: tantivy::Searcher = self.reader.searcher();
193 | 
194 |         if cfg!(feature="llm") {
195 |             for (_f, docid) in &top_docs {
196 |                 let doc = DocData::retrive(&searcher, *docid);
197 |                 let llm = self.llm.as_ref().unwrap();
198 |                 llm.post_summarize_job(doc).await;
199 |             }
200 |         }
201 | 
202 | 
203 |         let result: Vec<String> = post_query_wrapper(top_docs, &term, &searcher, &server_info);
204 | 
205 | 
206 |         let json = serde_json::to_string(&result).unwrap();
207 | 
208 |         json
209 |     }
210 | 
211 |     fn get_top_docs(&self, term: &str) -> Vec<(f32, tantivy::DocAddress)> {
212 |         let searcher = self.reader.searcher();
213 |         let server_info: &ServerInformation = &self.server_info;
214 |         let query: Box<dyn tantivy::query::Query> = self.query_parser.parse_query(&term).unwrap();
215 |         let top_docs: Vec<(f32, tantivy::DocAddress)> =
216 |             searcher.search(&query,
217 |                             &tantivy::collector::TopDocs::with_limit(server_info.show_top_hits))
218 |                 .unwrap();
219 | 
220 |         top_docs
221 |     }
222 | }
223 | 
224 | impl QueryEngine {
225 |     async fn wait_for_summarize(&self, title: String) -> String {
226 |         let llm = self.llm.as_ref().unwrap();
227 |         let wait_llm = tokio::time::Duration::from_millis(50);
228 |         // TODO maybe add a guard to make sure don't wait too long?
229 |         loop {
230 |             let result = llm.quick_fetch(&title).await;
231 |             match result {
232 |                 Some(s) => { return s; },
233 |                 None => { }
234 |             };
235 |             tokio::time::sleep(wait_llm).await;
236 |         }
237 |     }
238 |     pub async fn summarize(&self, title: String) -> String {
239 |         info!("Called summarize on {}", &title);
240 |         if cfg!(feature="llm") {
241 |             self.wait_for_summarize(title).await
242 |         } else {
243 |             "LLM turned off".to_owned()
244 |         }
245 |     }
246 |     pub async fn get_llm_done_list(&self) -> String {
247 |         if cfg!(feature="llm") {
248 |             let llm = self.llm.as_ref().unwrap();
249 |             let result = &llm.get_llm_done_list().await;
250 |             let json = serde_json::to_string(&result).unwrap();
251 |             return json;
252 |         } else {
253 |             "LLM turned off".to_owned()
254 |         }
255 |     }
256 | }
257 | 
258 | fn term_preprocess(term:String) -> String {
259 |     // in the future, I would use tokenize_sentence_to_text_vec here
260 |     let term = term.replace("%20", " ");
261 |     let term_vec = decode_cjk_str(term);
262 |     term_vec.join(" ")
263 | }
264 | 
265 | 
266 | fn build_reader_parser(index: &tantivy::Index, document_setting: &DocumentSetting)
267 |                        -> (tantivy::IndexReader, tantivy::query::QueryParser) {
268 |     let reader = index
269 |         .reader_builder()
270 |         .reload_policy(tantivy::ReloadPolicy::OnCommitWithDelay) // TODO switch to manual
271 |         .try_into().unwrap();
272 |     let title = document_setting.schema.get_field("title").unwrap();
273 |     let body = document_setting.schema.get_field("body").unwrap();
274 |     let query_parser = tantivy::query::QueryParser::for_index(index, vec![title, body]);
275 |     (reader, query_parser)
276 | }
277 | 
278 | fn build_document_setting() -> DocumentSetting {
279 |     let (schema, tokenizer) = build_schema_tokenizer();
280 |     DocumentSetting{
281 |         schema, tokenizer
282 |     }
283 | }
284 | 
285 | use crate::language_tools::tokenizer::TOKENIZER_ID;
286 | fn build_schema_tokenizer() -> (tantivy::schema::Schema,
287 |     FireSeqTokenizer
288 |                                 // Box<dyn tantivy::tokenizer::Tokenizer>
289 | ) {
290 |     let mut schema_builder = tantivy::schema::SchemaBuilder::default();
291 |     let text_indexing = tantivy::schema::TextFieldIndexing::default()
292 |         .set_tokenizer(TOKENIZER_ID) // Set custom tokenizer
293 |         .set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions);
294 |     let text_options = tantivy::schema::TextOptions::default()
295 |         .set_indexing_options(text_indexing)
296 |         .set_stored();
297 |     let tokenizer = FireSeqTokenizer {};
298 | 
299 |     let _title = schema_builder.add_text_field("title", text_options.clone());
300 |     let _body = schema_builder.add_text_field("body", text_options);
301 | 
302 |     let schema = schema_builder.build();
303 |     (schema,
304 |      tokenizer
305 |      // Box::new(tokenizer)
306 |     )
307 | }
308 | 
309 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/src/word_frequency/mod.rs:
--------------------------------------------------------------------------------
 1 | use log::info;
 2 | use crate::Article;
 3 | use std::collections::{HashMap, HashSet};
 4 | 
 5 | 
 6 | use rayon::prelude::*;
 7 | 
 8 | // let x: Vec<Vec<_>> = vec![vec![1, 2], vec![3, 4]];
 9 | // let y: Vec<_> = x.into_par_iter().flatten().collect();
10 | pub fn generate_wordcloud(articles: &Vec<Article>) -> String {
11 |     info!("Generating wordlist");
12 | 
13 |     let tokens: Vec<String> = articles.par_iter().map(article_to_tokens)
14 |         .flatten().collect();
15 |     // for art in articles {
16 |     //     let tokens = article_to_tokens(art);
17 |     // }
18 |     info!("After flatten, we got {} tokens", tokens.len());
19 | 
20 |     // silly group by
21 |     let mut freq: HashMap<String,i64> = HashMap::new();
22 |     for t in tokens {
23 |         match freq.get(&t) {
24 |             Some(count) => { freq.insert(t, count + 1); }
25 |             None => { freq.insert(t, 1 as i64); }
26 |         }
27 |     }
28 | 
29 | 
30 | 
31 |     let mut sorted_pairs: Vec<(String,i64)> = freq.into_iter().collect();
32 |     sorted_pairs.sort_by(|a, b| b.1.cmp(&a.1));
33 |     sorted_pairs.truncate(200);
34 |     // sorted_pairs
35 | 
36 | 
37 |     let serialized_data = serde_json::to_string(&sorted_pairs).unwrap();
38 |     serialized_data
39 | }
40 | 
41 | fn article_to_tokens(art: &Article) -> Vec<String> {
42 |     let tokens = crate::language_tools::tokenizer::tokenize(&art.content);
43 | 
44 |     //TODO use another stop word list for wordcloud
45 |     lazy_static! {
46 |         static ref STOPWORDS_LIST: HashSet<String> =  crate::language_tools::generate_stopwords_list();
47 |     }
48 |     let tokens = crate::language_tools::tokenizer::filter_out_stopwords(&tokens, &STOPWORDS_LIST);
49 |     let tokens: Vec<&str> = tokens.into_iter().filter(|x| is_valid_for_wordcloud(x)).collect();
50 |     info!("Got tokens {:?}", &tokens);
51 |     let tokens : Vec<String> = tokens.into_iter().map(|x| x.to_string()).collect();
52 |     tokens
53 | }
54 | 
55 | 
56 | fn is_valid_for_wordcloud(s:&str) -> bool{
57 |     if is_symbol(s) {
58 |         return false;
59 |     }
60 |     let invalid_end_pattern = vec!["::", "]]", "}}"];
61 |     let invalid_start_pattern = vec!["[[", "{{", "{\\"];
62 | 
63 |     for ep in invalid_end_pattern {
64 |         if s.ends_with(ep) {
65 |             return false;
66 |         }
67 |     }
68 |     for sp in invalid_start_pattern {
69 |         if s.starts_with(sp) {
70 |             return false;
71 |         }
72 |     }
73 |     let logseq_exclude_list = vec!["DONE", "true", "SCHEDULED:", "collapsed", "file", "com",
74 |                   "CLOCK:", ":LOGBOOK:", ":END:"];
75 |     for stop in logseq_exclude_list {
76 |         if s == stop {
77 |             return false;
78 |         }
79 |     }
80 |     //
81 |     true
82 | }
83 | fn is_symbol(s:&str) -> bool {
84 |     if s.len() == 0 { return true; }
85 |     if s.len() > 3 { return false; }
86 |     let mut flag = true;
87 |     for c in s.chars() {
88 |         if c.is_alphanumeric() {
89 |             flag = false;
90 |         }
91 |     }
92 |     flag
93 | }


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/assets/screenshot_demo_640_400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Endle/fireSeqSearch/143f6168d44148c2fa40ec2e8a512e2a2c44c638/fire_seq_search_server/tests/resource/assets/screenshot_demo_640_400.png


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/journals/2022_02_26.md:
--------------------------------------------------------------------------------
1 | - DONE [[Benchmark]]
2 |   :LOGBOOK:
3 |   CLOCK: [2022-02-26 Sat 11:18:52]--[2022-02-28 Mon 11:11:44] =>  47:52:52
4 |   :END:
5 | - DONE Try [[Debug]]
6 |   :LOGBOOK:
7 |   CLOCK: [2022-02-26 Sat 19:51:54]--[2022-02-27 Sun 15:34:21] =>  19:42:27
8 |   :END:
9 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/journals/2022_08_30.md:
--------------------------------------------------------------------------------
1 | - [[LATIN FOR BEGINNERS]]
2 | - [[International Language, Past, Present & Future by Walter John Clark]]
3 | - [[孙子兵法]]


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/logseq/pages-metadata.edn:
--------------------------------------------------------------------------------
 1 | [{:block/name "a",
 2 |   :block/created-at 1661896874219,
 3 |   :block/updated-at 1661896874219}
 4 |  {:block/name "aug 30th, 2022",
 5 |   :block/created-at 1661896924808,
 6 |   :block/updated-at 1661897331497}
 7 |  {:block/name "b",
 8 |   :block/created-at 1661896874219,
 9 |   :block/updated-at 1661896874219}
10 |  {:block/name "benchmark",
11 |   :block/created-at 1661896924957,
12 |   :block/updated-at 1661896924957}
13 |  {:block/name "c",
14 |   :block/created-at 1661896874219,
15 |   :block/updated-at 1661896874219}
16 |  {:block/name "canceled",
17 |   :block/created-at 1661896874219,
18 |   :block/updated-at 1661896874219}
19 |  {:block/name "cancelled",
20 |   :block/created-at 1661896874219,
21 |   :block/updated-at 1661896874219}
22 |  {:block/name "card",
23 |   :block/created-at 1661896874219,
24 |   :block/updated-at 1661896874219}
25 |  {:block/name "contents",
26 |   :block/created-at 1661896874219,
27 |   :block/updated-at 1661896874219}
28 |  {:block/name "debug",
29 |   :block/created-at 1661896924959,
30 |   :block/updated-at 1661896924959}
31 |  {:block/name "doing",
32 |   :block/created-at 1661896874219,
33 |   :block/updated-at 1661896874219}
34 |  {:block/name "done",
35 |   :block/created-at 1661896874219,
36 |   :block/updated-at 1661896874219}
37 |  {:block/name "favorites",
38 |   :block/created-at 1661896874219,
39 |   :block/updated-at 1661896874219}
40 |  {:block/name "feb 26th, 2022",
41 |   :block/created-at 1661896924960,
42 |   :block/updated-at 1661896924960}
43 |  {:block/name "in-progress",
44 |   :block/created-at 1661896874219,
45 |   :block/updated-at 1661896874219}
46 |  {:block/name
47 |   "international language, past, present & future by walter john clark",
48 |   :block/created-at 1661897225667,
49 |   :block/updated-at 1661897239720}
50 |  {:block/name "later",
51 |   :block/created-at 1661896874219,
52 |   :block/updated-at 1661896874219}
53 |  {:block/name "latin for beginners",
54 |   :block/created-at 1661897128500,
55 |   :block/updated-at 1661897151913}
56 |  {:block/name "now",
57 |   :block/created-at 1661896874219,
58 |   :block/updated-at 1661896874219}
59 |  {:block/name "rust",
60 |   :block/created-at 1661896924926,
61 |   :block/updated-at 1661896924926}
62 |  {:block/name "rust/closure",
63 |   :block/created-at 1661896924924,
64 |   :block/updated-at 1661896924924}
65 |  {:block/name "rust/static variable",
66 |   :block/created-at 1661896924922,
67 |   :block/updated-at 1661896924922}
68 |  {:block/name "rust/trait",
69 |   :block/created-at 1661896924925,
70 |   :block/updated-at 1661896924925}
71 |  {:block/name "slipbox",
72 |   :block/created-at 1661896924927,
73 |   :block/updated-at 1661896924927}
74 |  {:block/name "todo",
75 |   :block/created-at 1661896874219,
76 |   :block/updated-at 1661896874219}
77 |  {:block/name "wait",
78 |   :block/created-at 1661896874219,
79 |   :block/updated-at 1661896874219}
80 |  {:block/name "waiting",
81 |   :block/created-at 1661896874219,
82 |   :block/updated-at 1661896874219}
83 |  {:block/name "孙子兵法",
84 |   :block/created-at 1661897331497, 
85 |   :block/updated-at 1661897525235}]
86 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/pages/International Language, Past, Present & Future by Walter John Clark.md:
--------------------------------------------------------------------------------
 1 | - As an ounce of personal experience is worth a pound of second-hand recital, a brief statement may here be given of the way in which the present writer came to take up Esperanto, and of the experiences which soon led him to the conviction of its absolute practicability and utility.
 2 | - In October, 1905, having just returned from an absence of some years in Canada and the Far East, he had his attention turned to Esperanto for the first time by reading an account of the Congress of Boulogne. He had no previous knowledge of, or leanings towards, a universal language; and if he had thought about it at all, it was only to laugh at the idea as a wild and visionary scheme. In short, his attitude was quite normal.
 3 | - But here was a definite statement, professing to be one of positive accomplished fact. One of two things: either the newspaper account was not true; or else, the facts being as represented, here was a new possibility to be reckoned with. The only course was to send for the books and test the thing on its merits. Being somewhat used to languages, he did not take long to see that this one was good enough in itself. A letter, written in 13 Esperanto, after a few days' study of the grammar at odd times, with a halfpenny Esperanto-English key enclosed, was fully understood by the addressee, though he was ignorant up till then of the very existence of Esperanto. This experience has often been since repeated; indeed, the correspondent will often write back after a few days in Esperanto. Such letters have always been found intelligible, though in no case did the correspondent know Esperanto previously. The experiment is instructive and amusing, and can be tried by any one for an expenditure of twopence for keys and a few hours for studying the sixteen rules and their application. To many minds these are far simpler and more easy to grasp for practical use than the rules for scoring at bridge.
 4 | - After a month or two's playing with the language in spare time, the writer further tested it, by sending out a flight of postcards to various selected Esperantists' addresses in different parts of the Russian Empire. The addressees ranged from St. Petersburg and Helsingfors through Poland to the Caucasus and to far Siberia. In nearly every case answers were received, and in some instances the initial interchange of postcards led to an extremely interesting correspondence, throwing much light on the disturbed state of things in the native town or province of the correspondent. From a Tiflis doctor came a graphic account of the state of affairs in the Caucasus; while a school inspector from the depths of Eastern Siberia painted a vivid picture of the effect of political unrest on the schools—lockouts and "malodorous chemical obstructions" (Anglice—the schools were stunk out). Many writers expressed themselves with great freedom, but feared their letters would not pass the censor. Judging by the proportion of answers received, the censorship was not at that time efficient. In no case was there any difficulty in grasping the writer's meaning. All the answers were in Esperanto.
 5 | - This was fairly convincing, but still having doubts on the question of pronunciation, the writer resolved to attend the Esperanto Congress to be held at Geneva in August 1906. To 14 this end he continued to read Esperanto at odd minutes and took in an Esperanto gazette. About three weeks before the congress he got a member of his family to read aloud to him every day as far as possible a page or two of Esperanto, in order to attune his ear. He never had an opportunity of speaking the language before the congress, except once for a few minutes, when he travelled some distance to attend a meeting of the nearest English group.
 6 | - Thus equipped, he went through the Congress of Geneva, and found himself able to follow most of the proceedings, and to converse freely, though slowly, with people of the most diverse nationality. At an early sitting of the congress he found himself next to a Russian from Kischineff, who had been through the first great pogrom, and a most interesting conversation ensued. Another day the neighbours were an Indian nawab and an abbé from Madrid. Another time it was a Bulgarian. At the first official banquet he sat next to a Finn, who rejoiced in the name of Attila, and, but for the civilizing influence of a universal language, might have been in the sunny south, like his namesake of the ancient world, on a very different errand from his present peaceful one. Yet here he was, rubbing elbows with Italians, as if there had never been such things as Huns or a sack of Rome by northern barbarians.
 7 | - During the meal a Frenchman, finding himself near us English and some Germans, proposed a toast to the "entente cordiale taking in Germany," which was honoured with great enthusiasm. This is merely an instance of the small ways in which such gatherings make for peace and good will.
 8 | - With all these people it was perfectly easy to converse in the common tongue, pronunciation and national idiom being no bar in practice.
 9 | - And this experience was general throughout the duration of the congress. Day by day sittings were held for the transaction of all kinds of business and the discussion of the most varied subjects. It was impressive to see people from half the countries of the 15 world rise from different corners of the hall and contribute their share to the discussion in the most matter-of-fact way. Day by day the congressists met in social functions, debates, lectures, and sectional groups (chemical, medical, legal, etc.) for the regulation of matters touching their special interests. Everything was done in Esperanto, and never was there the slightest hitch or misunderstanding, or failure to give adequate expression to opinions owing to defects of language. The language difficulty was annihilated.
10 | - Perhaps one of the most striking demonstrations of this return to pre-Babel conditions was the performance of a three-part comedy by a Frenchman, a Russian, and a Spaniard. Such a thing would inevitably have been grotesque in any national language; but here they met on common neutral ground. No one's accent was "foreign," and none of the spectators possessed that mother-tongue acquaintance with Esperanto that would lead them to feel slight divergences shocking, or even noticeable without extreme attention to the point. Other theatrical performances were given at Geneva, as also at Boulogne, where a play of Molière was performed in Esperanto by actors of eight nationalities with one rehearsal, and with full success.
11 | - In the face of these facts it is idle to oppose a universal artificial language on the score of impossibility or inadequacy. The theoretical pronunciation difficulty completely crumbled away before the test of practice.
12 | - The "war-at-any-price party," the whole-hoggers à tous crins (the juxtaposition of the two national idioms lends a certain realism, and heightens the effect of each), are therefore driven back on their second line of attack, if the Hibernianism may be excused. "Yes," they say, "your language may be possible, but, after all, why not learn an existing language, if you've got to learn one anyway?"
13 | - Now, quite apart from the obvious fact that the nations will never agree to give the preference to the language of one of them to the prejudice of the others, this argument involves the 16 suggestion that an artificial language is no easier to learn than a natural one. We thus come to the question of ease as a qualification.
14 | -
15 | -
16 | - https://www.gutenberg.org/files/16737/16737-h/16737-h.htm
17 | -


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/pages/Rust.md:
--------------------------------------------------------------------------------
 1 | - Stub now for refs
 2 | -
 3 | - [[Rust/Static variable]]
 4 | - [[Rust/Closure]]
 5 | - [[Rust/trait]]
 6 | -
 7 | -
 8 | - [[SlipBox]]
 9 | 	- What's the cost of index bound check?
10 | 		- https://users.rust-lang.org/t/is-bound-checking-the-only-runtime-cost-of-rust/66661/3
11 | -


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/pages/Softmax.md:
--------------------------------------------------------------------------------
1 | - also known as softargmax or normalized exponential function
2 | 
3 | - Calculate $f(\vec{z})$
4 | 	-
5 | 	  $$b=\sum_{k=1}^{K} exp(z_k)$$
6 | 	-
7 | 	  $$ f(\vec{z})_i = \dfrac{exp(z_i)}{b}$$
8 | 
9 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/pages/advanced_query.md:
--------------------------------------------------------------------------------
 1 | - #+BEGIN_QUERY
 2 |   {:title "advance exempli gratia"
 3 |   :query [
 4 |   :find (pull ?b [*])
 5 |   :where
 6 |   [?b :block/page ?p]
 7 |   [?p :page/name ?pn]
 8 |   [?b :block/marker ?marker]
 9 |   [(contains? #{"NOW" "DOING" "TODO"} ?marker)]
10 |   ]
11 |   }
12 |   #+END_QUERY
13 | -
14 | -
15 | - In this test page we have some queries. We want to exclude the query statement from results
16 | - 
17 | - #+BEGIN_QUERY
18 |   {:title "advance exempli gratia"
19 |   :query [
20 |   :find (pull ?b [*])
21 |   :where
22 |   [?b :block/page ?p]
23 |   [?p :page/name ?pn]
24 |   [?b :block/marker ?marker]
25 |   [(contains? #{"NOW" "DOING" "TODO"} ?marker)]
26 |   ]
27 |   }
28 |   #+END_QUERY
29 | -


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/pages/blog_thunderbird_zh.md:
--------------------------------------------------------------------------------
 1 | - [Aug 3, 2021 - 使用 git shallow clone 下载并编译 Thunderbird](https://endle.github.io/2021/08/03/git-shallow-clone-build-thunderbird/)
 2 |   
 3 |             
 4 |   
 5 |             
 6 |   
 7 |   最近在尝试编译 Thunderbird. [官方的手册](https://developer.thunderbird.net/thunderbird-development/getting-started) 的建议是
 8 |   
 9 |   ```
10 |   hg clone https://hg.mozilla.org/mozilla-central source/
11 |   cd source/
12 |   hg clone https://hg.mozilla.org/comm-central comm/
13 |   ```
14 |   
15 |   因为我网络情况不好，硬盘空间也有些捉襟见肘，就只想下载最新的版本。可是,[Mercurial HG 并不支持](https://stackoverflow.com/a/4205246/1166518).
16 |   
17 |   Mozilla 已经在 GitHub 上有了实验性的 Mirror. 因此，我使用如下的方式下载 Thunderbird 的代码。
18 |   
19 |   ```
20 |   # My personal habit
21 |   cd ~/src/mozilla
22 |   git clone --depth=1 https://github.com/mozilla/gecko-dev.git mozilla-central
23 |   git clone --depth=1 https://github.com/mozilla/releases-comm-central comm-central
24 |   cp -R --reflink=auto comm-central/ mozilla-central/comm
25 |   ```
26 |   
27 |   我会使用如下代码进行更新。
28 |   
29 |   ```
30 |   cd mozilla-central && git pull origin master && trash comm && cd ..
31 |   cd comm-central && git pull origin master && cd ..
32 |   cp -R --reflink=auto comm-central/ mozilla-central/comm
33 |   cd mozilla-central
34 |   ```
35 | -
36 | -
37 | -
38 | - Source: https://endle.github.io/2021/08/03/git-shallow-clone-build-thunderbird/
39 | - CC-BY 4.0 Zhenbo Li


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/pages/cyrillic.md:
--------------------------------------------------------------------------------
1 | - Это статья для тестов поиска в кириллических символах.


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/pages/feditips.md:
--------------------------------------------------------------------------------
1 | - Below information is copied from https://fedi.tips
2 | - Copyrights      The text of the articles are available to use on your own libre projects under [Creative Commons Attribution ShareAlike 4.0 ⧉](https://creativecommons.org/licenses/by-sa/4.0/), you can attribute by crediting and linking back to [https://fedi.tips/](https://fedi.tips/)
3 | -
4 | - See https://fedi.tips/about-this-site/
5 | -
6 | - On Mastodon, hashtag following has been merged  into the latest version of the software and will be introduced in the  next update across all servers. It is already being tested on some  servers such as mastodon.social and mastodon.online.  If you’re on a server that runs it, you can  follow hashtags by logging in through the website, searching for a  hashtag and then clicking the follow button in the top right corner of  the results. Posts that are visible to your server which include that  hashtag will then appear in your normal timeline, even if you’re not  following the account that posted it. You can unfollow by searching for  the same hashtag and clicking the unfollow button (which is in the same  place the follow button was).  On Friendica, hashtag following has been  available as standard for years now, and works in a very similar way:  search for a hashtag and click the + logo in the top right corner to  follow it. Posts with that tag will appear in your normal timeline.  Whatever platform you’re following from, it’s a really handy way of discovering interesting posts and new accounts to follow.  Note that this only shows posts in your  timeline made after the follow began, so there may be a delay in seeing  such posts appear in your timeline, depending on whether a new post with  that tag has been published. Also, it only shows posts that are visible  to your server anyway, it is not pulling posts in purely on the basis  of the hashtag.       Because no one owns the Fediverse, there is no    central authority to give out “verified” badges the way Twitter etc do.    If you do see any Twitter-style verified badges these are just custom    emoji and don’t mean anything, it’s just people messing around.      There are various websites trying to set    themselves up as central authorities, but we strongly recommend avoiding    these completely. [The entire point of being on the Fediverse is to prevent any central authorities taking over](https://fedi.tips/mastodon-and-the-fediverse-beginners-start-here/#whoownsthefediverse).      However, there are still ways to actually verify your identity on the Fedi: If you’re already verified on Twitter etc, tell people about your    Fediverse account and link to it, then link to this post on your    Fediverse account. This will let people on the Fediverse know that    you’re the same person who owns the verified account on Twitter etc. If you have an official website, link to your Fediverse account from    your website and link to your website from your Fediverse account. If    people already trust your website to be official, then by extension they    will know your Fediverse account is official. On Mastodon, you can take the website method a step further. Log in through the website, go to *Edit profile > Verification*,    copy and paste the HTML code into your website’s front page’s code. Add    your website’s address into your profile’s Metadata section,    remembering to include https:// at the beginning. After you’ve done all    this, press the *Save changes* button in your profile settings.    You will then see a link to your website on your Mastodon profile which    has turned green with a green tick next to it, to verify you are the    site’s owner. If you need to verify lots of accounts from a group or organisation,    you might want to make your own Fediverse server as a subdomain of your    official website. This is what the European Union did when they made [their own Mastodon server ⧉](https://social.network.europa.eu/) and [their own PeerTube server ⧉](https://tube.network.europa.eu/).    Because the European Union’s official website is at europa.eu, and    their servers were all subdomains of europa.eu, it meant all the    accounts on their servers could be trusted as being official EU    Fediverse accounts. Making your own server on a subdomain [is much easier and cheaper than you think](https://growyourown.services/grow-your-own-social-network/).      And whatever you do, don’t use the “verified”    emoji. This means nothing at all on the Fediverse, anyone can add it to    their profiles.      **NOTE:** If you’re verifying    your Mastodon account using the code-pasting method, make sure that all    the links to your Mastodon account on your website include rel=”me” in    their link code. If there’s one without rel=”me”, for example in a    dropdown menu, the verification process may fail. Also, bear in mind    there may be some delay before your website address turns green on your    profile, don’t worry if it doesn’t happen straight away.      Using multiple accounts      First of all, it’s worth saying again that [most people do not need to use multiple accounts](https://fedi.tips/mastodon-and-the-fediverse-beginners-start-here/#doineedmultipleaccounts).    The Fediverse is designed in such a way that people on different    servers can interact seamlessly, as if they were all on one network.       However, some people may need separate    personal and work accounts, or an extra account that focuses on a    specialist topic which they wish to keep separate from their main    account.      Whatever your reasons, it’s very easy to use    multiple accounts on Mastodon and the Fediverse: all you have to do is    sign up on a different server for each account you want. Because the    servers are independent, you can use the same email address for each    account, and you can be signed into all the accounts simultaneously on    the web or on an app. Signing up for accounts on different servers also    means that if one server goes down you can use your alternative account    on another server.      If you use the Fediverse through the web, you    can log into all the accounts at once and switch views by keeping each    account open in a separate tab.      Official and third party apps support multiple    accounts too. You can be signed into all your accounts at once, and    switch between them within the app. The interface for switching differs    from app to app.      On the official Mastodon app, you can add    accounts and switch between them by holding down your profile image in    the bottom right corner. A menu will appear which lets you add or switch    accounts.
7 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/pages/fireSeqSearch___test___5.md:
--------------------------------------------------------------------------------
1 | - Hello


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/pages/咖啡.md:
--------------------------------------------------------------------------------
1 | - 我试着解读一下，你这个咖啡拉花是对二十世纪的科学大发现的年代的挽歌。来自植物（咖啡）和来自动物（牛）的产物
2 | -


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/resource/pages/孙子兵法.md:
--------------------------------------------------------------------------------
 1 | - https://www.gutenberg.org/cache/epub/23864/pg23864.html
 2 | -
 3 | -
 4 | - 始計第一
 5 | - 孫子曰：兵者，國之大事，死生之地，存亡之道，不可不察也。
 6 | - 故經之以五事，校之以計，而索其情：一曰道，二曰天，三曰地，四曰將，五曰法。
 7 | - 道者，令民與上同意，可與之死，可與之生，而不畏危也；天者，陰陽、寒暑、時制也；地者，遠近、險易、廣狹、死生也；將者，智、信、仁、勇、嚴也；法者，曲制、官道、主用也。凡此五者，將莫不聞，知之者勝，不知者不勝。
 8 | - 故校之以計，而索其情，曰：主孰有道？將孰有能？天地孰得？法令孰行？兵眾孰強？士卒孰練？賞罰孰明？吾以此知勝負矣。
 9 | - 將聽吾計，用之必勝，留之；將不聽吾計，用之必敗，去之。
10 | - 計利以聽，乃為之勢，以佐其外。勢者，因利而制權也。
11 | - 兵者，詭道也。故能而示之不能，用而示之不用，近而示之遠，遠而示之近。利而誘之，亂而取之，實而備之，強而避之，怒而撓之，卑而驕之，佚而勞之，親而離之，攻其無備，出其不意。此兵家之勝，不可先傳也。
12 | - 夫未戰而廟算勝者，得算多也；未戰而廟算不勝者，得算少也。多算勝，少算不勝，而況無算乎！吾以此觀之，勝負見矣。
13 | - 作戰第二
14 | - 孫子曰：凡用兵之法，馳車千駟，革車千乘，帶甲十萬，千里饋糧。則內外之費，賓客之用，膠漆之材，車甲之奉，日費千金，然後十萬之師舉矣。
15 | - 其用戰也，貴勝，久則鈍兵挫銳，攻城則力屈，久暴師則國用不足。夫鈍兵挫銳，屈力殫貨，則諸侯乘其弊而起，雖有智者，不能善其後矣。故兵聞拙速，未睹巧之久也。夫兵久而國利者，未之有也。故不盡知用兵之害者，則不能盡知用兵之利也。
16 | - 善用兵者，役不再籍，糧不三載，取用於國，因糧於敵，故軍食可足也。國之貧於師者遠輸，遠輸則百姓貧；近於師者貴賣，貴賣則百姓竭，財竭則急於丘役。力屈財殫，中原內虛於家，百姓之費，十去其七；公家之費，破軍罷馬，甲胄矢弩，戟楯矛櫓，丘牛大車，十去其六。
17 | - 故智將務食於敵，食敵一鍾，當吾二十鍾；萁稈一石，當吾二十石。故殺敵者，怒也；取敵之利者，貨也。故車戰，得車十乘以上，賞其先得者，而更其旌旗。車雜而乘之，卒善而養之，是謂勝敵而益強。
18 | - 故兵貴勝，不貴久。故知兵之將，民之司命。國家安危之主也。
19 | - 謀攻第三
20 | - 孫子曰：凡用兵之法，全國為上，破國次之；全軍為上，破軍次之；全旅為上，破旅次之；全卒為上，破卒次之；全伍為上，破伍次之。是故百戰百勝，非善之善者也；不戰而屈人之兵，善之善者也。
21 | - 故上兵伐謀，其次伐交，其次伐兵，其下攻城。攻城之法，為不得已。修櫓轒轀，具器械，三月而後成；距闉，又三月而後已。將不勝其忿，而蟻附之，殺士三分之一，而城不拔者，此攻之災也。
22 | - 故善用兵者，屈人之兵，而非戰也，拔人之城而非攻也，毀人之國而非久也，必以全爭於天下，故兵不頓而利可全，此謀攻之法也。
23 | - 故用兵之法，十則圍之，五則攻之，倍則分之，敵則能戰之，少則能逃之，不若則能避之。故小敵之堅，大敵之擒也。
24 | - 夫將者，國之輔也。輔周則國必強，輔隙則國必弱。故君之所以患於軍者三：不知軍之不可以進而謂之進，不知軍之不可以退而謂之退，是謂縻軍；不知三軍之事，而同三軍之政，則軍士惑矣；不知三軍之權，而同三軍之任，則軍士疑矣。三軍既惑且疑，則諸侯之難至矣。是謂亂軍引勝。
25 | - 故知勝有五：知可以戰與不可以戰者，勝。識眾寡之用者，勝。上下同欲者，勝。以虞待不虞者，勝。將能而君不御者，勝。此五者，知勝之道也。
26 | - 故曰：知己知彼，百戰不貽；不知彼而知己，一勝一負；不知彼不知己，每戰必敗。
27 | - 軍形第四
28 | - 孫子曰：昔之善戰者，先為不可勝，以待敵之可勝。不可勝在己，可勝在敵。故善戰者，能為不可勝，不能使敵必可勝。故曰：勝可知，而不可為。
29 | - 不可勝者，守也；可勝者，攻也。守則不足，攻則有餘。善守者，藏於九地之下，善攻者，動於九天之上，故能自保而全勝也。
30 | - 見勝不過眾人之所知，非善之善者也；戰勝而天下曰善，非善之善者也。故舉秋毫不為多力，見日月不為明目，聞雷霆不為聰耳。古之善戰者，勝於易勝者也。故善戰者之勝也，無智名，無勇功，故其戰勝不忒。不忒者，其所措必勝，勝已敗者也。故善戰者，先立於不敗之地，而不失敵之敗也。是故勝兵先勝，而後求戰，敗兵先戰而後求勝。善用兵者，修道而保法，故能為勝敗之政。
31 | - 兵法：一曰度，二曰量，三曰數，四曰稱，五曰勝。地生度，度生量，量生數，數生稱，稱生勝。故勝兵若以鎰稱銖，敗兵若以銖稱鎰。勝者之戰，若決積水於千仞之谿者，形也。
32 | - 兵勢第五
33 | - 孫子曰：凡治眾如治寡，分數是也；鬥眾如鬥寡，形名是也；三軍之眾，可使必受敵而無敗者，奇正是也；兵之所加，如以碫投卵者，虛實是也。
34 | - 凡戰者，以正合，以奇勝。故善出奇者，無窮如天地，不竭如江海。終而複始，日月是也。死而復生，四時是也。聲不過五，五聲之變，不可勝聽也；色不過五，五色之變，不可勝觀也；味不過五，五味之變，不可勝嘗也；戰勢，不過奇正，奇正之變，不可勝窮也。奇正相生，如循環之無端，熟能窮之哉？
35 | - 激水之疾，至於漂石者，勢也；鷙鳥之疾，至於毀折者，節也。是故善戰者，其勢險，其節短。勢如張弩，節如發機。
36 | - 紛紛紜紜，鬥亂而不可亂也；渾渾沌沌，形圓而不可敗也。亂生於治，怯生於勇，弱生於強。治亂，數也；勇怯，勢也；強弱，形也。故善動敵者，形之，敵必從之；予之，敵必取之。以利動之，以卒待之。
37 | - 故善戰者，求之於勢，不責於人；故能擇人而任勢。任勢者，其戰人也，如轉木石。木石之性，安則靜，危則動，方則止，圓則行。故善戰人之勢，如轉圓石於千仞之山者，勢也。
38 | - 虛實第六
39 | - 孫子曰：凡先處戰地而待敵者佚，後處戰地而趨戰者勞。
40 | - 故善戰者，致人而不致於人。能使敵人自至者，利之也；能使敵人不得至者，害之也。故敵佚能勞之，飽能饑之，安能動之。出其所必趨，趨其所不意。行千里而不勞者，行於無人之地也；攻而必取者，攻其所不守也。守而必固者，守其所不攻也。
41 | - 故善攻者，敵不知其所守；善守者，敵不知其所攻。微乎微乎，至於無形；神乎神乎，至於無聲，故能為敵之司命。進而不可禦者，沖其虛也；退而不可追者，速而不可及也。故我欲戰，敵雖高壘深溝，不得不與我戰者，攻其所必救也；我不欲戰，雖畫地而守之，敵不得與我戰者，乖其所之也。故形人而我無形，則我專而敵分。我專為一，敵分為十，是以十攻其一也。則我眾敵寡，能以眾擊寡者，則吾之所與戰者約矣。吾所與戰之地不可知，不可知則敵所備者多，敵所備者多，則吾所與戰者寡矣。故備前則後寡，備後則前寡，備左則右寡，備右則左寡，無所不備，則無所不寡。寡者，備人者也；眾者，使人備己者也。故知戰之地，知戰之日，則可千里而會戰；不知戰之地，不知戰日，則左不能救右，右不能救左，前不能救後，後不能救前，而況遠者數十裏，近者數裏乎！以吾度之，越人之兵雖多，亦奚益於勝哉！故曰：勝可為也。敵雖眾，可使無鬥。故策之而知得失之計，候之而知動靜之理，形之而知死生之地，角之而知有餘不足之處。故形兵之極，至於無形。無形則深間不能窺，智者不能謀。因形而措勝於眾，眾不能知。人皆知我所以勝之形，而莫知吾所以制勝之形。故其戰勝不復，而應形於無窮。夫兵形象水，水之行避高而趨下，兵之形避實而擊虛；水因地而制流，兵因敵而制勝。故兵無常勢，水無常形。能因敵變化而取勝者，謂之神。故五行無常勝，四時無常位，日有短長，月有死生。
42 | - 軍爭第七
43 | - 孫子曰： 
44 |   凡用兵之法，將受命於君，合軍聚眾，交和而舍，莫難於軍爭。軍爭之難者，以迂為直，以患為利。故迂其途，而誘之以利，後人發，先人至，此知迂直之計者也。軍爭為利，軍爭為危。舉軍而爭利則不及，委軍而爭利則輜重捐。是故捲甲而趨，日夜不處，倍道兼行，百裡而爭利，則擒三將軍，勁者先，疲者後，其法十一而至；五十裏而爭利，則蹶上將軍，其法半至；三十裏而爭利，則三分之二至。是故軍無輜重則亡，無糧食則亡，無委積則亡。故不知諸侯之謀者，不能豫交；不知山林、險阻、沮澤之形者，不能行軍；不用鄉導者，不能得地利。故兵以詐立，以利動，以分和為變者也。故其疾如風，其徐如林，侵掠如火，不動如山，難知如陰，動如雷震。掠鄉分眾，廓地分利，懸權而動。先知迂直之計者勝，此軍爭之法也。《軍政》曰：“言不相聞，故為之金鼓；視不相見，故為之旌旗。”夫金鼓旌旗者，所以一民之耳目也。民既專一，則勇者不得獨進，怯者不得獨退，此用眾之法也。故夜戰多金鼓，晝戰多旌旗，所以變人之耳目也。三軍可奪氣，將軍可奪心。是故朝氣銳，晝氣惰，暮氣歸。善用兵者，避其銳氣，擊其惰歸，此治氣者也。以治待亂，以靜待嘩，此治心者也。以近待遠，以佚待勞，以飽待饑，此治力者也。無邀正正之旗，無擊堂堂之陳，此治變者也。故用兵之法，高陵勿向，背丘勿逆，佯北勿從，銳卒勿攻，餌兵勿食，歸師勿遏，圍師遺闕，窮寇勿迫，此用兵之法也。
45 | - 九變第八
46 | - 孫子曰： 
47 |   凡用兵之法，將受命於君，合軍聚合。泛地無舍，衢地合交，絕地無留，圍地則謀，死地則戰，途有所不由，軍有所不擊，城有所不攻，地有所不爭，君命有所不受。故將通於九變之利者，知用兵矣；將不通九變之利，雖知地形，不能得地之利矣；治兵不知九變之術，雖知五利，不能得人之用矣。是故智者之慮，必雜於利害，雜於利而務可信也，雜於害而患可解也。是故屈諸侯者以害，役諸侯者以業，趨諸侯者以利。故用兵之法，無恃其不來，恃吾有以待之；無恃其不攻，恃吾有所不可攻也。故將有五危，必死可殺，必生可虜，忿速可侮，廉潔可辱，愛民可煩。凡此五者，將之過也，用兵之災也。覆軍殺將，必以五危，不可不察也。
48 | - 行軍第九
49 | - 孫子曰：凡處軍相敵，絕山依穀，視生處高，戰隆無登，此處山之軍也。絕水必遠水，客絕水而來，勿迎之於水內，令半渡而擊之利，欲戰者，無附於水而迎客，視生處高，無迎水流，此處水上之軍也。絕斥澤，唯亟去無留，若交軍於斥澤之中，必依水草而背眾樹，此處斥澤之軍也。平陸處易，右背高，前死後生，此處平陸之軍也。凡此四軍之利，黃帝之所以勝四帝也。凡軍好高而惡下，貴陽而賤陰，養生而處實，軍無百疾，是謂必勝。丘陵堤防，必處其陽而右背之，此兵之利，地之助也。上雨水流至，欲涉者，待其定也。凡地有絕澗、天井、天牢、天羅、天陷、天隙，必亟去之，勿近也。吾遠之，敵近之；吾迎之，敵背之。軍旁有險阻、潢井、蒹葭、小林、蘙薈者，必謹覆索之，此伏姦之所處也。敵近而靜者，恃其險也；遠而挑戰者，欲人之進也；其所居易者，利也；眾樹動者，來也；眾草多障者，疑也；鳥起者，伏也；獸駭者，覆也；塵高而銳者，車來也；卑而廣者，徒來也；散而條達者，樵採也；少而往來者，營軍也；辭卑而備者，進也；辭強而進驅者，退也；輕車先出居其側者，陳也；無約而請和者，謀也；奔走而陳兵者，期也；半進半退者，誘也；杖而立者，饑也；汲而先飲者，渴也；見利而不進者，勞也；鳥集者，虛也；夜呼者，恐也；軍擾者，將不重也；旌旗動者，亂也；吏怒者，倦也；殺馬肉食者，軍無糧也；懸甀不返其舍者，窮寇也；諄諄翕翕，徐與人言者，失眾也；數賞者，窘也；數罰者，困也；先暴而後畏其眾者，不精之至也；來委謝者，欲休息也。兵怒而相迎，久而不合，又不相去，必謹察之。兵非貴益多也，惟無武進，足以並力料敵取人而已。夫惟無慮而易敵者，必擒於人。卒未親而罰之，則不服，不服則難用。卒已親附而罰不行，則不可用。故合之以文，齊之以武，是謂必取。令素行以教其民，則民服；令素不行以教其民，則民不服。令素行者，與眾相得也。
50 | - 地形第十
51 | - 孫子曰：地形有通者、有掛者、有支者、有隘者、有險者、有遠者。我可以往，彼可以來，曰通。通形者，先居高陽，利糧道，以戰則利。可以往，難以返，曰掛。掛形者，敵無備，出而勝之，敵若有備，出而不勝，難以返，不利。我出而不利，彼出而不利，曰支。支形者，敵雖利我，我無出也，引而去之，令敵半出而擊之利。隘形者，我先居之，必盈之以待敵。若敵先居之，盈而勿從，不盈而從之。險形者，我先居之，必居高陽以待敵；若敵先居之，引而去之，勿從也。遠形者，勢均難以挑戰，戰而不利。凡此六者，地之道也，將之至任，不可不察也。凡兵有走者、有馳者、有陷者、有崩者、有亂者、有北者。凡此六者，非天地之災，將之過也。夫勢均，以一擊十，曰走；卒強吏弱，曰馳；吏強卒弱，曰陷；大吏怒而不服，遇敵懟而自戰，將不知其能，曰崩；將弱不嚴，教道不明，吏卒無常，陳兵縱橫，曰亂；將不能料敵，以少合眾，以弱擊強，兵無選鋒，曰北。凡此六者，敗之道也，將之至任，不可不察也。夫地形者，兵之助也。料敵制勝，計險隘遠近，上將之道也。知此而用戰者必勝，不知此而用戰者必敗。故戰道必勝，主曰無戰，必戰可也；戰道不勝，主曰必戰，無戰可也。故進不求名，退不避罪，唯民是保，而利於主，國之寶也。視卒如嬰兒，故可以與之赴深溪；視卒如愛子，故可與之俱死。厚而不能使，愛而不能令，亂而不能治，譬若驕子，不可用也。知吾卒之可以擊，而不知敵之不可擊，勝之半也；知敵之可擊，而不知吾卒之不可以擊，勝之半也；知敵之可擊，知吾卒之可以擊，而不知地形之不可以戰，勝之半也。故知兵者，動而不迷，舉而不窮。故曰：知彼知己，勝乃不殆；知天知地，勝乃可全。
52 | - 九地第十一
53 | - 孫子曰：用兵之法，有散地，有輕地，有爭地，有交地，有衢地，有重地，有泛地，有圍地，有死地。諸侯自戰其地者，為散地；入人之地不深者，為輕地；我得亦利，彼得亦利者，為爭地；我可以往，彼可以來者，為交地；諸侯之地三屬，先至而得天下眾者，為衢地；入人之地深，背城邑多者，為重地；山林、險阻、沮澤，凡難行之道者，為泛地；所由入者隘，所從歸者迂，彼寡可以擊吾之眾者，為圍地；疾戰則存，不疾戰則亡者，為死地。是故散地則無戰，輕地則無止，爭地則無攻，交地則無絕，衢地則合交，重地則掠，泛地則行，圍地則謀，死地則戰。古之善用兵者，能使敵人前後不相及，眾寡不相恃，貴賤不相救，上下不相收，卒離而不集，兵合而不齊。合於利而動，不合於利而止。敢問敵眾而整將來，待之若何曰：先奪其所愛則聽矣。兵之情主速，乘人之不及。由不虞之道，攻其所不戒也。凡為客之道，深入則專。主人不克，掠於饒野，三軍足食。謹養而勿勞，並氣積力，運兵計謀，為不可測。投之無所往，死且不北。死焉不得，士人盡力。兵士甚陷則不懼，無所往則固，深入則拘，不得已則鬥。是故其兵不修而戒，不求而得，不約而親，不令而信，禁祥去疑，至死無所之。吾士無餘財，非惡貨也；無餘命，非惡壽也。令發之日，士卒坐者涕沾襟，偃臥者涕交頤，投之無所往，諸、劌之勇也。故善用兵者，譬如率然。率然者，常山之蛇也。擊其首則尾至，擊其尾則首至，擊其中則首尾俱至。敢問兵可使如率然乎？曰可。夫吳人與越人相惡也，當其同舟而濟而遇風，其相救也如左右手。是故方馬埋輪，未足恃也；齊勇如一，政之道也；剛柔皆得，地之理也。故善用兵者，攜手若使一人，不得已也。將軍之事，靜以幽，正以治，能愚士卒之耳目，使之無知；易其事，革其謀，使人無識；易其居，迂其途，使民不得慮。帥與之期，如登高而去其梯；帥與之深入諸侯之地，而發其機。若驅群羊，驅而往，驅而來，莫知所之。聚三軍之眾，投之於險，此謂將軍之事也。九地之變，屈伸之力，人情之理，不可不察也。凡為客之道，深則專，淺則散。去國越境而師者，絕地也；四徹者，衢地也；入深者，重地也；入淺者，輕地也；背固前隘者，圍地也；無所往者，死地也。是故散地吾將一其志，輕地吾將使之屬，爭地吾將趨其後，交地吾將謹其守，交地吾將固其結，衢地吾將謹其恃，重地吾將繼其食，泛地吾將進其途，圍地吾將塞其闕，死地吾將示之以不活。故兵之情：圍則禦，不得已則鬥，過則從。是故不知諸侯之謀者，不能預交；不知山林、險阻、沮澤之形者，不能行軍；不用鄉導，不能得地利。四五者，一不知，非霸王之兵也。夫霸王之兵，伐大國，則其眾不得聚；威加於敵，則其交不得合。是故不爭天下之交，不養天下之權，信己之私，威加於敵，則其城可拔，其國可隳。施無法之賞，懸無政之令。犯三軍之眾，若使一人。犯之以事，勿告以言；犯之以害，勿告以利。投之亡地然後存，陷之死地然後生。夫眾陷於害，然後能為勝敗。故為兵之事，在順詳敵之意，並敵一向，千里殺將，是謂巧能成事。是故政舉之日，夷關折符，無通其使，厲於廊廟之上，以誅其事。敵人開闔，必亟入之，先其所愛，微與之期，踐墨隨敵，以決戰事。是故始如處女，敵人開戶；後如脫兔，敵不及拒。
54 | - 火攻第十二
55 | - 孫子曰：凡火攻有五：一曰火人，二曰火積，三曰火輜，四曰火庫，五曰火隊。行火必有因，因必素具。發火有時，起火有日。時者，天之燥也。日者，月在箕、壁、翼、軫也。凡此四宿者，風起之日也。凡火攻，必因五火之變而應之：火發於內，則早應之於外；火發而其兵靜者，待而勿攻，極其火力，可從而從之，不可從則上。火可發於外，無待於內，以時發之，火發上風，無攻下風，晝風久，夜風止。凡軍必知五火之變，以數守之。故以火佐攻者明，以水佐攻者強。水可以絕，不可以奪。夫戰勝攻取而不惰其功者凶，命曰“費留”。故曰：明主慮之，良將惰之，非利不動，非得不用，非危不戰。主不可以怒而興師，將不可以慍而攻戰。合於利而動，不合於利而上。怒可以複喜，慍可以複說，亡國不可以複存，死者不可以複生。故明主慎之，良將警之。此安國全軍之道也。
56 | - 用間第十三
57 | - 孫子曰： 
58 |   凡興師十萬，出征千里，百姓之費，公家之奉，日費千金，內外騷動，怠於道路，不得操事者，七十萬家。相守數年，以爭一日之勝，而愛爵祿百金，不知敵之情者，不仁之至也，非民之將也，非主之佐也，非勝之主也。故明君賢將所以動而勝人，成功出於眾者，先知也。先知者，不可取於鬼神，不可象於事，不可驗於度，必取於人，知敵之情者也。故用間有五：有因間，有內間，有反間，有死間，有生間。五間俱起，莫知其道，是謂神紀，人君之寶也。鄉間者，因其鄉人而用之；內間者，因其官人而用之；反間者，因其敵間而用之；死間者，為誑事於外，令吾聞知之而傳於敵間也；生間者，反報也。故三軍之事，莫親於間，賞莫厚於間，事莫密於間，非聖賢不能用間，非仁義不能使間，非微妙不能得間之實。微哉微哉！無所不用間也。間事未發而先聞者，間與所告者兼死。凡軍之所欲擊，城之所欲攻，人之所欲殺，必先知其守將、左右、謁者、門者、舍人之姓名，令吾間必索知之。敵間之來間我者，因而利之，導而舍之，故反間可得而用也；因是而知之，故鄉間、內間可得而使也；因是而知之，故死間為誑事，可使告敵；因是而知之，故生間可使如期。五間之事，主必知之，知之必在於反間，故反間不可不厚也。昔殷之興也，伊摯在夏；周之興也，呂牙在殷。故明君賢將，能以上智為間者，必成大功。此兵之要，三軍之所恃而動也。
59 | -
60 | -


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/run_render.sh:
--------------------------------------------------------------------------------
1 | RUST_LOG=info cargo test --test unit_test_render_block -- --nocapture
2 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/unit_test_load_notes.rs:
--------------------------------------------------------------------------------
  1 | use fire_seq_search_server::markdown_parser::{exclude_advanced_query, parse_to_plain_text};
  2 | 
  3 | use std::borrow::Cow;
  4 | 
  5 | 
  6 | fn load_articles() -> Vec<(String, String)> {
  7 |     let r = read_specific_directory("tests/resource/pages");
  8 |     r
  9 | }
 10 | 
 11 | #[test]
 12 | fn test_load_articles() {
 13 |     let r = load_articles();
 14 |     assert_eq!(r.len(), 11);
 15 |     for (title,body) in &r{
 16 |         assert!(title.len()>0);
 17 |         assert!(body.len()>0);
 18 |     }
 19 | }
 20 | 
 21 | 
 22 | fn read_file_to_line(relative_path: &str) -> String {
 23 |     let path = vec![String::from("tests/resource/pages"),
 24 |                     relative_path.to_string()];
 25 |     let path = path.join("/");
 26 |     std::fs::read_to_string(&path)
 27 |         .expect("Should have been able to read the file")
 28 | }
 29 | 
 30 | 
 31 | #[test]
 32 | fn parse() {
 33 |     let md = read_file_to_line("blog_thunderbird_zh.md");
 34 |     let result = parse_to_plain_text(&md);
 35 |     assert!(result.contains("Aug 3, 2021 - 使用 git shallow clone 下载并编译 Thunderbird"));
 36 |     assert!(!result.contains("https://developer.thunderbird.net/thunderbird-development/getting-started"));
 37 | 
 38 | }
 39 | 
 40 | #[test]
 41 | fn exclude_advance_query() {
 42 |     let md = read_file_to_line("advanced_query.md");
 43 |     let md = Cow::from(md);
 44 |     let result = exclude_advanced_query(md);
 45 |     assert!(!result.contains("exempli"));
 46 |     assert!(result.contains("In this test page we have"));
 47 | 
 48 | 
 49 |     let md = read_file_to_line("blog_thunderbird_zh.md");
 50 |     let md = Cow::from(md);
 51 |     let result = exclude_advanced_query(md.clone());
 52 |     assert_eq!(md, result);
 53 | }
 54 | 
 55 | 
 56 | 
 57 | 
 58 | 
 59 | 
 60 | 
 61 | // =====================
 62 | // These functions are removed in https://github.com/Endle/fireSeqSearch/pull/149/commits/7692bd9091380858b0cbeb2fa10d8c01dabcba91
 63 | //  aka https://github.com/Endle/fireSeqSearch/pull/147
 64 | // To make unit test happy, I copied them as test helper functions
 65 | // Zhenbo - 2024 Sep 21
 66 | use std::fs::DirEntry;
 67 | use rayon::iter::IntoParallelRefIterator;
 68 | use rayon::iter::ParallelIterator;
 69 | use std::process;
 70 | fn read_md_file_wo_parse(note: &std::fs::DirEntry) -> Option<(String, String)> {
 71 |     if let Ok(file_type) = note.file_type() {
 72 |         // Now let's show our entry's file type!
 73 |         if file_type.is_dir() {
 74 |             return None;
 75 |         }
 76 |     } else {
 77 |         return None;
 78 |     }
 79 | 
 80 |     let note_path = note.path();
 81 |     let note_title = match note_path.file_stem() {
 82 |         Some(osstr) => osstr.to_str().unwrap(),
 83 |         None => {
 84 |             return None;
 85 |         }
 86 |     };
 87 |     let content : String = match std::fs::read_to_string(&note_path) {
 88 |         Ok(c) => c,
 89 |         Err(e) => {
 90 |             if note_title.to_lowercase() == ".ds_store" {
 91 |             } else {
 92 |             }
 93 |             return None;
 94 |         }
 95 |     };
 96 | 
 97 |     Some((note_title.to_string(),content))
 98 | }
 99 | fn read_specific_directory(path: &str) -> Vec<(String, String)> {
100 |     let notebooks = match std::fs::read_dir(path) {
101 |         Ok(x) => x,
102 |         Err(e) => {
103 |             process::abort();
104 |         }
105 |     };
106 |     let mut note_filenames: Vec<DirEntry> = Vec::new();
107 |     for note in notebooks {
108 |         let note : DirEntry = note.unwrap();
109 |         note_filenames.push(note);
110 |     }
111 |     let result: Vec<(String,String)> = note_filenames.par_iter()
112 |         .map(|note|  read_md_file_wo_parse(&note))
113 |         .filter(|x| (&x).is_some())
114 |         .map(|x| x.unwrap())
115 |         .collect();
116 | 
117 |     result
118 | }
119 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/unit_test_post_query.rs:
--------------------------------------------------------------------------------
 1 | use fire_seq_search_server::post_query::highlighter::{highlight_keywords_in_body, highlight_sentence_with_keywords, locate_single_keyword, split_body_to_blocks, wrap_text_at_given_spots};
 2 | use fire_seq_search_server::generate_server_info_for_test;
 3 | 
 4 | fn get_english_text() -> String {
 5 |     std::fs::read_to_string("tests/resource/pages/International Language, Past, Present & Future by Walter John Clark.md")
 6 |         .expect("Should have been able to read the file")
 7 | }
 8 | fn highlight_keywords_in_body_old_2024_apr(body:&str, terms: &Vec<String>, limit:usize) ->String {
 9 |     let mut server_info = generate_server_info_for_test();
10 |     server_info.show_summary_single_line_chars_limit = limit;
11 |     highlight_keywords_in_body(body, terms, &server_info)
12 | }
13 | 
14 | #[test]
15 | fn test_empty_key() {
16 |     let text = "Hello World";
17 |     let v = Vec::new();
18 | 
19 |     let r = highlight_keywords_in_body_old_2024_apr(text, &v, 120);
20 |     assert_eq!(4,4);
21 | 
22 |     assert_eq!(&r, "");
23 | }
24 | 
25 | 
26 | 
27 | #[test]
28 | fn test_highlight_wrap() {
29 |     let contents = "使用 git shallow clone 下载并编译 Thunderbird".to_string();
30 |     let v = vec![String::from("thunderbird")];
31 |     let r = highlight_keywords_in_body_old_2024_apr(&contents, &v, 120);
32 |     assert_eq!(&r, "使用 git shallow clone 下载并编译  <span class=\"fireSeqSearchHighlight\">Thunderbird</span>");
33 | }
34 | 
35 | #[test]
36 | fn test_highlight_latex() {
37 |     let contents = "$\\vec{q_i}^T \\vec{a_j}, i<j$".to_string();
38 |     let v = vec![String::from("vec")];
39 |     let r = highlight_keywords_in_body_old_2024_apr(&contents, &v, 120);
40 |     println!("{:?}", &r);
41 | }
42 | #[test]
43 | fn test_split_to_block() {
44 |     // This part is still hacky
45 |     let contents = get_english_text();
46 |     let blocks = split_body_to_blocks(&contents, 120);
47 | 
48 |     assert_eq!("As an ounce of personal experience is worth a pound of second-hand recital, a brief statement may here be given of the way in which the present writer came to take up Esperanto, and of the experiences which soon led him to the conviction of its absolute practicability and utility.", &blocks[0]);
49 |     assert_eq!("Now, quite apart from the obvious fact that the nations will never agree to give the preference to the language of one of them to the prejudice of the others, this argument involves the 16 suggestion that an artificial language is no easier to learn than a natural one. We thus come to the question of ease as a qualification.", &blocks[12]);
50 |     assert_eq!(14, blocks.len());
51 | }
52 | 
53 | 
54 | #[test]
55 | fn test_split_long_article_to_block() {
56 |     let contents = std::fs::read_to_string
57 |         ("tests/resource/pages/feditips.md")
58 |         .expect("Should have been able to read the file");
59 |     let _a = split_body_to_blocks(&contents, 120);
60 | 
61 |     //I didn't finish the test
62 | }
63 | 
64 | #[test]
65 | fn test_highlight_sentence_with_keywords() {
66 |     let contents = std::fs::read_to_string
67 |         ("tests/resource/pages/咖啡.md")
68 |         .expect("Should have been able to read the file");
69 |     let tokens = vec!["咖啡"];
70 |     let _r = highlight_sentence_with_keywords(&contents, &tokens, 300);
71 | }
72 | 
73 | 
74 | #[test]
75 | fn test_wrap_text_at_given_spots() {
76 |     let contents = std::fs::read_to_string
77 |         ("tests/resource/pages/咖啡.md")
78 |         .expect("Should have been able to read the file");
79 | 
80 |     // Returns the length of this String, in bytes, not chars or graphemes
81 |     // Win 164, mac & linux 163
82 |     assert!(contents.len() == 164 || contents.len() == 163);
83 |     let token = "咖啡";
84 |     assert_eq!(token.len(),6);
85 |     let mats = locate_single_keyword(&contents, token);
86 |     assert_eq!(2, mats.len());
87 | 
88 |     for m in &mats {
89 |         let left: usize = m.0;
90 |         let right: usize = m.1;
91 |         let sub = &contents[left..right];
92 |         assert_eq!(sub, token);
93 |         assert_eq!(right-left, 6);
94 |     }
95 |     let _r = wrap_text_at_given_spots(&contents, &mats, 320);
96 | }
97 | 


--------------------------------------------------------------------------------
/fire_seq_search_server/tests/unit_test_render_block.rs:
--------------------------------------------------------------------------------
 1 | use fire_seq_search_server::post_query::highlighter::{highlight_keywords_in_body, highlight_sentence_with_keywords, locate_single_keyword, split_body_to_blocks, wrap_text_at_given_spots};
 2 | use fire_seq_search_server::post_query::highlighter::build_tree;
 3 | use fire_seq_search_server::generate_server_info_for_test;
 4 | 
 5 | fn get_english_text() -> String {
 6 |     std::fs::read_to_string("tests/resource/pages/International Language, Past, Present & Future by Walter John Clark.md")
 7 |         .expect("Should have been able to read the file")
 8 | }
 9 | 
10 | 
11 | 
12 | 
13 | /*
14 | #[test]
15 | fn test_highlight_single_term_single_appearance() {
16 |     let _ = env_logger::try_init();
17 |     let server_info = generate_server_info_for_test();
18 |     let content = "使用 git shallow clone 下载并编译 Thunderbird".to_string();
19 |     let token = "thunderbird";
20 |     let tokens = [token];
21 |     let mut root = build_tree(&content, &server_info);
22 | 
23 |     let r = root.children[0].split_leaf_node_by_single_term(token, &server_info);
24 |     //println!("{:?}", &r);
25 |     assert!(r.len() >= 2);
26 |     assert!(r[1].is_hit);
27 |     // TODO The behaviour at here is not stable. This is hacky test case - 2024-Apr
28 | 
29 |     let r2 = root.children[0].split_leaf_node_by_terms(&tokens, &server_info);
30 |     assert_eq!(r.len(), r2.len());
31 | 
32 |     root.parse_highlight(&tokens, &server_info);
33 |     println!("{:?}", &root);
34 | }
35 | 
36 | #[test]
37 | fn test_highlight_single_term_multi_appearance() {
38 |     let _ = env_logger::try_init();
39 |     let server_info = generate_server_info_for_test();
40 |     let content = "使用 git shallow clone 下载并编译 Thunderbird : compile thunderbird".to_string();
41 |     let token = "thunderbird";
42 |     let tokens = [token];
43 |     let mut root = build_tree(&content, &server_info);
44 | 
45 | 
46 |     root.parse_highlight(&tokens, &server_info);
47 |     //println!("Parsed result: {:?}", &root);
48 |     root.flattern();
49 |     //println!("Flattern: {:?}", &root);
50 |     assert_eq!(root.children.len(), 4);
51 |     assert!(root.children[1].is_hit);
52 |     assert!(root.children[3].is_hit);
53 | }
54 | */
55 | 
56 | #[test]
57 | fn test_highlight_multiple_terms() {
58 |     let _ = env_logger::try_init();
59 |     let server_info = generate_server_info_for_test();
60 |     let content = "使用 git shallow clone 下载并编译 Thunderbird : compile thunderbird with git shallow".to_string();
61 |     let token = "thunderbird";
62 |     let token2 = "git";
63 |     let tokens = [token, token2];
64 |     let mut root = build_tree(&content, &server_info);
65 | 
66 | 
67 |     root.parse_highlight(&tokens, &server_info);
68 |     //println!("Parsed result: {:?}", &root);
69 |     root.flattern();
70 |     //println!("Flattern: {:?}", &root);
71 |     assert!(root.children[1].is_hit);
72 |     /*
73 |     assert_eq!(root.children.len(), 4);
74 |     assert!(root.children[3].is_hit);
75 |     */
76 | }
77 | 


--------------------------------------------------------------------------------
/pack_firefox_extension.sh:
--------------------------------------------------------------------------------
1 | cd fireSeqSearch_addon
2 | zip -r -FS ../fireSeqSearch.zip * --exclude '*.git*' --exclude "monkeyscript.user.js" --exclude "violentmonkeyscript.user.js"
3 | cd ..
4 | cp -f fireSeqSearch.zip  ~/Downloads #/dev/shm
5 | 


--------------------------------------------------------------------------------