├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── pr-checks.yml
├── .gitignore
├── .npmrc
├── .routify
    └── routes.js
├── CLAUDE.md
├── LICENSE
├── bun.lockb
├── index.html
├── package.json
├── postcss.config.cjs
├── readme.md
├── scripts
    ├── release.sh
    ├── replace-manifest.cjs
    └── resize-images.sh
├── src
    ├── assets
    │   ├── icon-1.png
    │   ├── icon-2.png
    │   ├── icon-cropped-1.png
    │   ├── icon-cropped-1_128.png
    │   ├── icon-cropped-1_16.png
    │   ├── icon-cropped-1_48.png
    │   ├── icon-cropped-2.png
    │   ├── icon_128.png
    │   ├── icon_16.png
    │   ├── icon_48.png
    │   ├── star-empty-38.png
    │   └── star-filled-38.png
    ├── background.ts
    ├── background
    │   ├── backend-adapter.ts
    │   ├── backend-debug.ts
    │   ├── backend-pglite.ts
    │   ├── backend-vlcn.ts
    │   ├── backend.ts
    │   ├── embedding
    │   │   └── pipeline.ts
    │   └── pglite
    │   │   ├── HAX_pglite.ts
    │   │   ├── defaultBlacklistRules.ts
    │   │   ├── job_queue.test.ts
    │   │   ├── job_queue.ts
    │   │   ├── migration-manager.test.ts
    │   │   ├── migration-manager.ts
    │   │   ├── migrations
    │   │       └── 001_init.ts
    │   │   └── tasks.ts
    ├── common
    │   ├── logs.ts
    │   ├── utils.test.ts
    │   └── utils.ts
    ├── content-scripts
    │   └── content-script.ts
    ├── manifest.json
    ├── types.ts
    └── ui
    │   ├── .routify
    │       └── urlIndex.json
    │   ├── App.svelte
    │   ├── DetailsPanel.svelte
    │   ├── ExportProgress.svelte
    │   ├── LayoutWrapper.svelte
    │   ├── Menu.svelte
    │   ├── MigrationModal.svelte
    │   ├── Modal.svelte
    │   ├── RecentItems.svelte
    │   ├── ResultItem.svelte
    │   ├── ResultRowView.svelte
    │   ├── global.css
    │   ├── lib
    │       ├── commands.ts
    │       ├── constants.ts
    │       ├── dom.test.ts
    │       ├── dom.ts
    │       ├── rpc.ts
    │       └── streaming-export.ts
    │   ├── main.ts
    │   ├── pages
    │       ├── index.html
    │       │   ├── _layout.svelte
    │       │   ├── database-repl.svelte
    │       │   ├── dev.svelte
    │       │   ├── doc
    │       │   │   └── [url].svelte
    │       │   ├── index.svelte
    │       │   ├── settings.svelte
    │       │   └── task-queue.svelte
    │       └── index.svelte
    │   ├── routes.js
    │   └── store
    │       ├── displaySettings.ts
    │       ├── menuState.ts
    │       └── statsStore.ts
├── static
    ├── screenshot-1.png
    ├── screenshot-2.png
    ├── screenshot-3.png
    └── screenshot-4.png
├── tailwind.config.cjs
├── tsconfig.json
├── vite.config.content-script.ts
└── vite.config.ts


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: [iansinnott]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
12 | polar: # Replace with a single Polar username
13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
14 | thanks_dev: # Replace with a single thanks.dev username
15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
16 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-checks.yml:
--------------------------------------------------------------------------------
 1 | name: PR Checks
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [main, master]
 6 | 
 7 | jobs:
 8 |   check:
 9 |     runs-on: ubuntu-latest
10 | 
11 |     steps:
12 |       - uses: actions/checkout@v3
13 | 
14 |       - name: Setup Node.js
15 |         uses: actions/setup-node@v3
16 |         with:
17 |           node-version: "20"
18 | 
19 |       - name: Install pnpm
20 |         uses: pnpm/action-setup@v2
21 |         with:
22 |           version: 8
23 | 
24 |       - name: Setup Bun
25 |         uses: oven-sh/setup-bun@v2
26 | 
27 |       - name: Install dependencies
28 |         run: pnpm install
29 | 
30 |       - name: Run type check
31 |         run: pnpm run type-check
32 | 
33 |       - name: Run tests
34 |         run: pnpm run test
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | node_modules
 2 | tmp
 3 | dist*
 4 | *.zip
 5 | 
 6 | 
 7 | vite.config.ts.timestamp-*
 8 | 
 9 | 
10 | .env*
11 | src/ui/.routify/config.js
12 | src/ui/.routify/routes.js
13 | 


--------------------------------------------------------------------------------
/.npmrc:
--------------------------------------------------------------------------------
1 | auto-install-peers=true
2 | 


--------------------------------------------------------------------------------
/.routify/routes.js:
--------------------------------------------------------------------------------
1 | export * from "@roxi/routify/runtime/defaultTmp/routes"


--------------------------------------------------------------------------------
/CLAUDE.md:
--------------------------------------------------------------------------------
 1 | # CLAUDE.md - Full Text Tabs Forever
 2 | 
 3 | This is a Chrome extension, not a normal web app.
 4 | 
 5 | ## Build/Test/Lint Commands
 6 | 
 7 | - Build: `bun run build` (combines content script and main build)
 8 | - Watch/Dev: `bun run dev` (concurrently watches both content script and main)
 9 | - Dev Chrome: `bun run dev:chrome` (Chrome-specific dev mode)
10 | - Test: `bun run test`
11 | - Run single test: `bun test src/path/to/file.test.ts`
12 | - Type check: `bun run type-check` (runs `tsc --noEmit`)
13 | - Svelte check: `bun run check` (runs svelte-check)
14 | - Chrome build: `bun run build:chrome`
15 | - Firefox build: `bun run build:firefox`
16 | - Clean build: `bun run build:clean` (removes dist directory contents)
17 | 
18 | ## Code Style Guidelines
19 | 
20 | - TypeScript with strict mode enabled
21 | - Use ES modules (import/export)
22 | - Follow functional programming patterns where possible
23 | - Tests use Bun's built-in test runner with describe/it/expect pattern
24 | - 2-space indentation
25 | - No semicolons at line ends
26 | - Use arrow functions for callbacks
27 | - Prefer const over let, avoid var
28 | - Use utility functions from src/common/utils.ts
29 | - Handle errors with try/catch blocks and proper logging
30 | - Use async/await for asynchronous code
31 | - Prefer explicit typing over 'any'
32 | - Use camelCase for variables/functions, PascalCase for classes/interfaces
33 | - Tailwind CSS for styling
34 | - Svelte components with TypeScript
35 | - Zod for validation
36 | 
37 | ## Naming Conventions
38 | 
39 | - Component files: PascalCase.svelte
40 | - Utility files: kebab-case.ts
41 | - Test files: name.test.ts adjacent to implementation
42 | - Use descriptive, meaningful names
43 | 
44 | ## Project Organization
45 | 
46 | - Background service worker in src/background/
47 | - UI components in src/ui/
48 | - Common utilities in src/common/
49 | - Content scripts in src/content-scripts/
50 | - Assets in src/assets/
51 | - Types in src/types.ts
52 | - Embedding pipeline in src/background/embedding/
53 | - PGLite database functionality in src/background/pglite/
54 | - UI stores in src/ui/store/
55 | - UI pages in src/ui/pages/
56 | 
57 | ## Technologies
58 | 
59 | - Vite for building
60 | - Svelte for UI components
61 | - SPA routing with svelte-spa-router
62 | - Database backends (PGLite and VLCN options)
63 | - Embedded SQLite with @electric-sql/pglite
64 | - Embedding functionality with @xenova/transformers
65 | - Supports sortable views by last visited date or rank


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Ian Sinnott
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/bun.lockb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/bun.lockb


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <link rel="icon" href="/assets/icon-cropped-1_128.png" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 7 |     <title>FTTF</title>
 8 |   </head>
 9 |   <body>
10 |     <div id="app"></div>
11 |     <!-- <script type="module" src="/src/background.ts"></script> -->
12 |     <script type="module" src="/src/ui/main.ts"></script>
13 |   </body>
14 | </html>
15 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "full-text-tabs-forever",
 3 |   "version": "2.1.2",
 4 |   "description": "A web extension for searching your full browsing history.",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "//dev": "vite build -w was failing. something about the chrome extension vite plugin and the manifest. re-saving the manifest makes -w work, but that defeats the purpose of automated watch",
 8 |     "dev:chrome": "TARGET=chrome concurrently 'bun run build:content --watch' 'bun run build:main --watch'",
 9 |     "dev": "concurrently 'bun run build:content --watch' 'bun run build:main --watch'",
10 |     "//build": "it's ultimately chrome's fault that we need a separate build for content-script. Unlike the service worker you cannot specify type:module, so... the script can't have imports.",
11 |     "build": "bun run build:content && bun run build:main",
12 |     "build:content": "vite build -c vite.config.content-script.ts",
13 |     "build:main": "vite build",
14 |     "release": "yarn version && ./scripts/release.sh",
15 |     "build:clean": "rm -rf ./dist/*",
16 |     "build:chrome": "TARGET=chrome bun run build",
17 |     "build:firefox": "TARGET=firefox bun run build",
18 |     "check": "svelte-check --tsconfig ./tsconfig.json",
19 |     "type-check": "bun tsc --noEmit",
20 |     "test": "bun test"
21 |   },
22 |   "type": "module",
23 |   "keywords": [
24 |     "search",
25 |     "browser history",
26 |     "postgres",
27 |     "pgvector",
28 |     "vector search",
29 |     "chrome extension"
30 |   ],
31 |   "author": "",
32 |   "license": "MIT",
33 |   "devDependencies": {
34 |     "@tailwindcss/forms": "~0.5.3",
35 |     "@tailwindcss/line-clamp": "~0.4.2",
36 |     "@tailwindcss/typography": "~0.5.9",
37 |     "@types/archiver": "6.0.1",
38 |     "@types/bun": "1.1.6",
39 |     "@types/chrome": "~0.0.210",
40 |     "@types/node": "~20.15.0",
41 |     "archiver": "6.0.1",
42 |     "autoprefixer": "~10.4.13",
43 |     "concurrently": "~7.6.0",
44 |     "postcss": "~8.4.21",
45 |     "prettier": "3.1.0",
46 |     "svelte-check": "~3.6.3",
47 |     "svelte-preprocess": "~5.1.3",
48 |     "tailwindcss": "~3.2.4",
49 |     "tsx": "~3.12.2",
50 |     "typescript": "~5.5.4",
51 |     "vite": "~5.0.10",
52 |     "vite-plugin-top-level-await": "1.4.4"
53 |   },
54 |   "dependencies": {
55 |     "@electric-sql/pglite": "0.2.17",
56 |     "@electric-sql/pglite-repl": "0.2.17",
57 |     "@mozilla/readability": "~0.5.0",
58 |     "@sveltejs/vite-plugin-svelte": "~3.1.2",
59 |     "@types/turndown": "~5.0.1",
60 |     "@types/webextension-polyfill": "~0.10.0",
61 |     "@vlcn.io/crsqlite-wasm": "0.16.0",
62 |     "@xenova/transformers": "2.17.2",
63 |     "classnames": "~2.3.2",
64 |     "lucide-svelte": "0.435.0",
65 |     "svelte": "~4.0.0",
66 |     "svelte-markdown": "~0.4.1",
67 |     "svelte-spa-router": "^4.0.1",
68 |     "turndown": "^5",
69 |     "webextension-polyfill": "~0.10.0",
70 |     "zod": "3.23.8"
71 |   },
72 |   "volta": {
73 |     "node": "20.18.1"
74 |   },
75 |   "trustedDependencies": [
76 |     "@swc/core",
77 |     "protobufjs",
78 |     "svelte-preprocess"
79 |   ]
80 | }
81 | 


--------------------------------------------------------------------------------
/postcss.config.cjs:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   plugins: {
3 |     tailwindcss: {},
4 |     autoprefixer: {},
5 |   },
6 | }
7 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | <!-- display a screenshot -->
  2 | <div align="center">
  3 | 
  4 | [<img src="src/assets/icon-1.png" alt="full text tabs forever (FTTF) logo" width=128>](https://chrome.google.com/webstore/detail/full-text-tabs-forever/gfmbnlbnapjmffgcnbopfgmflmlfghel)<br>
  5 | 
  6 | # Full Text Tabs Forever
  7 | 
  8 | Search everything you read online. FTTF lets you search the full text of every web page you visit.
  9 | 
 10 | Available in the [Chrome Web Store](https://chrome.google.com/webstore/detail/full-text-tabs-forever/gfmbnlbnapjmffgcnbopfgmflmlfghel).
 11 | 
 12 | Available in the [Firefox Add-ons Store](https://addons.mozilla.org/en-US/firefox/addon/full-text-tabs-forever/).
 13 | 
 14 | > **IMPORTANT FOR v2.0 USERS:** If you're upgrading from v1.x, see the [Database Migration](#database-migration-v20) section for instructions on migrating your existing data.
 15 | 
 16 | _Firefox requires additional permissions. See [below](#firefox)._
 17 | 
 18 | </div>
 19 | 
 20 | **Doesn't the browser do that already? How is this different?**
 21 | 
 22 | Chrome does not let you search the text on pages you've visited, **only the URLs and titles**, and it deletes your history after a number of months. Firefox will keep your history for longer, but likewise doesn't let you search page content, only URLs and titles.
 23 | 
 24 | 
 25 | FTTF is different:
 26 | 
 27 | - **Full-Text Search Capabilities:** The full content of every page you've visited becomes searchable.
 28 | - **Permanent History:** Your digital footprints are yours to keep. Your data is yours, so it should not be removed without your approval. Nothing is deleted automatically.
 29 | - **Instant indexing:** FTTF creates a search index as you browse, so pages are immediately available for searching right after you land on a page.
 30 | - **For your eyes only:** Your browsing history is stored locally on your device, and not on any external servers. Beware that if you switch computers your FTTF history will not automatically come with you. It can be exported though.
 31 | 
 32 | <div align="center">
 33 | 
 34 | ![](static/screenshot-1.png)
 35 | 
 36 | </div>
 37 | 
 38 | **Who is it for?**
 39 | 
 40 | Data hoarders like myself that never want to delete anything, and want everything to be searchable. More generally, if you've ever felt limited by the standard history search you should try this out.
 41 | 
 42 | **How it works:**
 43 | 
 44 | Browser extensions have access to the pages you visit, which lets FTTF make an index of the content on any page. When a page loads, its content is extracted and indexed.
 45 | 
 46 | Extracted? Yes, or "distilled" if you prefer. Full web pages are huge and have a lot of information that's not related to the content itself. FTTF will ignore all of that. It acts like "reader mode" to find relevant content on a page and only index that.
 47 | 
 48 | # Installation
 49 | 
 50 | Install in your browser via the [Chrome Web Store](https://chrome.google.com/webstore/detail/full-text-tabs-forever/gfmbnlbnapjmffgcnbopfgmflmlfghel) or the [Firefox Add-ons Store](https://addons.mozilla.org/en-US/firefox/addon/full-text-tabs-forever/).
 51 | 
 52 | # Testing
 53 | 
 54 | This project uses `bun` as a unit testing framework, but not (currently) as a bundler. You will need to install `bun`, then:
 55 | 
 56 | `bun test`
 57 | 
 58 | Or, `bun run test` if you prefer.
 59 | 
 60 | # Note to self: Submitting a new version manually
 61 | 
 62 | > How could this be automated?
 63 | 
 64 | - Manually bump the version in the manifest file
 65 | - Run the build
 66 |   - `bun run build:chrome`
 67 |   - `bun run build:firefox`
 68 | - Submit
 69 |   - Chrome
 70 |     - Go to: https://chrome.google.com/webstore/devconsole/bc898ad5-018e-4774-b9ab-c4bef7b7f92b/gfmbnlbnapjmffgcnbopfgmflmlfghel/edit/package
 71 |     - Upload the `fttf-chrome.zip` file
 72 |   - Firefox
 73 |     - Go to: https://addons.mozilla.org/en-US/developers/addon/full-text-tabs-forever/edit
 74 |     - Upload the `fttf-firefox.zip` file
 75 |     - Zip the original source code and upload that too: `zip -r src.zip src`
 76 | 
 77 | # Firefox
 78 | 
 79 | Install here: https://addons.mozilla.org/en-US/firefox/addon/full-text-tabs-forever/
 80 | 
 81 | Currently you have to manually enable additional permissions in Firefox like so:
 82 | 
 83 | ![Firefox permissions](https://drive.zenture.cloud/s/d3mboA7GwPCXH8b/download).
 84 | 
 85 | See this comment for more details: https://github.com/iansinnott/full-text-tabs-forever/issues/3#issuecomment-1963238416
 86 | 
 87 | Support was added in: https://github.com/iansinnott/full-text-tabs-forever/pull/4.
 88 | 
 89 | # Database Migration (v2.0)
 90 | 
 91 | With version 2.0, Full Text Tabs Forever has migrated from SQLite (VLCN) to PostgreSQL (PgLite) as its database backend. This change brings several improvements:
 92 | 
 93 | - Better full-text search capabilities with PostgreSQL's advanced text search
 94 | - Support for vector embeddings for semantic search (coming soon)
 95 | - Improved performance for large databases
 96 | - More efficient storage of document fragments
 97 | 
 98 | ## For Existing Users
 99 | 
100 | If you're upgrading from a previous version (v1.x), your data will not be lost! The extension includes a migration system that will:
101 | 
102 | 1. Detect your existing VLCN (SQLite) database
103 | 2. Provide a simple one-click migration option in the Settings page
104 | 3. Transfer all your saved pages to the new PostgreSQL database
105 | 4. Show real-time progress during migration
106 | 5. Preserve all your searchable content
107 | 
108 | To migrate your data:
109 | 
110 | 1. After upgrading, open the extension
111 | 2. Go to the Settings page
112 | 3. Find the "Import VLCN Database (v1)" section
113 | 4. Click the "Import VLCN Database" button
114 | 5. Wait for the migration to complete - this may take several minutes depending on how many pages you've saved
115 | 6. Your data is now accessible in the new database system!
116 | 
117 | The migration happens entirely on your device, and no data is sent to external servers. Your privacy remains protected throughout the process.
118 | 
119 | # TODO
120 | 
121 | - [ ] Backfill history
122 |       Currently only new pages you visit are indexed, but we could backfill by opening every page in the browser's history that hasn't yet been indexed. An optional feature, but a useful one.
123 | - [ ] Backup and sync
124 |       Improved export/import capabilities for moving data between devices.
125 | - [ ] Semantic search
126 |       Leverage vector embeddings in the new PostgreSQL backend for more intelligent searching.
127 | - [ ] Integrate with [browser-gopher](https://github.com/iansinnott/browser-gopher)
128 |       Browser gopher and [BrowserParrot](https://www.browserparrot.com/) were the initial impetus to create a better way to ingest full text web pages, without triggering a Cloudflare captcha party on your home connection.
129 | - [x] Migrate to PostgreSQL
130 |       Replace SQLite with a more powerful database backend using PgLite.
131 | - [x] Improve discoverability of functionality.
132 |       There is now a button to open the command palette. Still not much GUI, but enough to be discovered.
133 | - [x] Firefox
134 |       ~~This should not be too difficult since this project was started with web extension polyfills. However, there is currently some chrome specific code.~~
135 |       It appears that the APIs do not have to be rewritten to work in Firefox. See this PR for details: https://github.com/iansinnott/full-text-tabs-forever/pull/4
136 | 
137 | # Contributing
138 | 
139 | PRs welcome!
140 | 


--------------------------------------------------------------------------------
/scripts/release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | main() {
 4 |   echo "Releasing new version..."
 5 |   echo
 6 |   echo "    PWD: $PWD"
 7 | 
 8 |   local version=$(jq -r '.version' package.json)
 9 |   
10 |   # Replace version in src/manifest.json
11 |   sed -i '' -e "s/\"version\": \".*\"/\"version\": \"$version\"/g" src/manifest.json
12 |   
13 |   # amend last commit
14 |   git add src/manifest.json > /dev/null
15 |   git commit --amend --no-edit > /dev/null
16 | 
17 |   # upsert the tag. if running yarn version the tag will have been created already
18 |   git tag -d "v$version" > /dev/null 2>&1 || true 
19 |   git tag -a "v$version" -m "v$version" > /dev/null
20 |   
21 |   echo "    Tag: v$version"
22 |   echo "    Commit: $(git rev-parse HEAD)"
23 |   echo
24 |   echo "Don't forget to push the tag to GitHub: git push --tags"
25 | }
26 | 
27 | main


--------------------------------------------------------------------------------
/scripts/replace-manifest.cjs:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Because chrome is so sensitive about the manifest file this script serves to
 3 |  * modify it for distribution.
 4 |  */
 5 | const { readFileSync, writeFileSync } = require("fs");
 6 | const path = require("path");
 7 | 
 8 | const modifyManifest = (manifest) => {
 9 |   delete manifest["$schema"];
10 | };
11 | 
12 | try {
13 |   const manifestV3 = JSON.parse(
14 |     readFileSync(path.resolve(__dirname, "../dist/manifest.json"), "utf8")
15 |   );
16 | 
17 |   // Mutate the manifest object
18 |   modifyManifest(manifestV3);
19 | 
20 |   writeFileSync(
21 |     path.resolve(__dirname, "../dist/manifest.json"),
22 |     JSON.stringify(manifestV3, null, 2)
23 |   );
24 | 
25 |   console.log("Manifest converted v3 -> v2");
26 | } catch (err) {
27 |   console.error("Could not build manifest", err);
28 | }
29 | 


--------------------------------------------------------------------------------
/scripts/resize-images.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Input file
 4 | input_file="$1"
 5 | 
 6 | if [[ ! -e $input_file ]]; then
 7 |   echo "File does not exist"
 8 |   exit 1
 9 | fi
10 | 
11 | if [[ ${input_file: -4} != ".png" ]]; then
12 |   echo "File is not a PNG"
13 |   exit 1
14 | fi
15 | 
16 | # Output directory
17 | output_dir="src/assets"
18 | 
19 | # Create the output directory if it doesn't exist
20 | mkdir -p $output_dir
21 | 
22 | # Icon sizes
23 | sizes=(16 48 128)
24 | 
25 | # Generate the icons
26 | for size in "${sizes[@]}"; do
27 |   base_name=$(basename "$input_file" .png)
28 |   echo "Generating ${size}x${size} icon..."
29 |   convert "$input_file" -resize "${size}x${size}" "$output_dir/${base_name}_${size}.png"
30 | done


--------------------------------------------------------------------------------
/src/assets/icon-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-1.png


--------------------------------------------------------------------------------
/src/assets/icon-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-2.png


--------------------------------------------------------------------------------
/src/assets/icon-cropped-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-cropped-1.png


--------------------------------------------------------------------------------
/src/assets/icon-cropped-1_128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-cropped-1_128.png


--------------------------------------------------------------------------------
/src/assets/icon-cropped-1_16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-cropped-1_16.png


--------------------------------------------------------------------------------
/src/assets/icon-cropped-1_48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-cropped-1_48.png


--------------------------------------------------------------------------------
/src/assets/icon-cropped-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-cropped-2.png


--------------------------------------------------------------------------------
/src/assets/icon_128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon_128.png


--------------------------------------------------------------------------------
/src/assets/icon_16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon_16.png


--------------------------------------------------------------------------------
/src/assets/icon_48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon_48.png


--------------------------------------------------------------------------------
/src/assets/star-empty-38.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/star-empty-38.png


--------------------------------------------------------------------------------
/src/assets/star-filled-38.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/star-filled-38.png


--------------------------------------------------------------------------------
/src/background.ts:
--------------------------------------------------------------------------------
 1 | // import browser, { omnibox, Runtime } from "webextension-polyfill";
 2 | 
 3 | import { PgLiteBackend } from "./background/backend-pglite";
 4 | import { log } from "./common/logs";
 5 | import { debounce } from "./common/utils";
 6 | import { BackendAdapter } from "./background/backend-adapter";
 7 | 
 8 | // Although there were initially multiple adapters there is no mainly one.
 9 | const adapter = new BackendAdapter({
10 |   backend: new PgLiteBackend(),
11 |   runtime: chrome.runtime,
12 | });
13 | 
14 | /**
15 |  * Expose for debugging
16 |  * @example await fttf.backend._db.execO(`select * from sqlite_master;`)
17 |  */
18 | globalThis.fttf = adapter;
19 | 
20 | export type FTTF = {
21 |   adapter: BackendAdapter;
22 | };
23 | 
24 | if (adapter.onMessage) {
25 |   chrome.runtime.onMessage.addListener((...args) => adapter.onMessage(...args));
26 | }
27 | 
28 | // @note We do not support spas currently. URL changes trigger here, but we do
29 | // not then instruct the frontend to send the full text.
30 | const updateHandler = debounce(
31 |   async (tabId: number, changeInfo: chrome.tabs.TabChangeInfo, tab: chrome.tabs.Tab) => {
32 |     console.debug("%ctab update", "color:gray;", "no action performed", tab.url);
33 |     // browser.tabs.sendMessage(tabId, ["onTabUpdated", { tabId, changeInfo }]);
34 |   },
35 |   200
36 | );
37 | 
38 | // Listen for tab updates, because the content script normally only runs on load. This is for SPA apps
39 | chrome.tabs.onUpdated.addListener((...args) => updateHandler(...args));
40 | 
41 | // When the extension button is clicked, log a message
42 | chrome.action.onClicked.addListener(async () => {
43 |   await adapter.openIndexPage();
44 | });
45 | 


--------------------------------------------------------------------------------
/src/background/backend-adapter.ts:
--------------------------------------------------------------------------------
  1 | import type { SendResponse } from "./backend";
  2 | import { VLCN } from "./backend-vlcn";
  3 | import { PgLiteBackend } from "./backend-pglite";
  4 | import { log } from "../common/logs";
  5 | 
  6 | export type BackendAdapterRuntime = {
  7 |   sendMessage: typeof chrome.runtime.sendMessage;
  8 |   getURL: typeof chrome.runtime.getURL;
  9 | };
 10 | 
 11 | export class BackendAdapter {
 12 |   backend: PgLiteBackend;
 13 |   runtime: BackendAdapterRuntime;
 14 |   _vlcn: VLCN | null = null;
 15 | 
 16 |   constructor({ backend, runtime }: { backend: PgLiteBackend; runtime: BackendAdapterRuntime }) {
 17 |     this.backend = backend;
 18 |     this.runtime = runtime;
 19 |   }
 20 | 
 21 |   onMessage(message: any, sender: chrome.runtime.MessageSender, sendResponse: SendResponse) {
 22 |     // Special case for migrating from VLCN to PgLite
 23 |     if (message[0] === "importVLCNDocuments" || message[0] === "importVLCNDocumentsV1") {
 24 |       this.importVLCNDocumentsV1()
 25 |         .then((result) => {
 26 |           sendResponse({ ok: true, ...result });
 27 |         })
 28 |         .catch((err) => {
 29 |           sendResponse({ error: err.message });
 30 |         });
 31 |       return true;
 32 |     }
 33 | 
 34 |     // Add handler for checking VLCN migration status
 35 |     if (message[0] === "checkVLCNMigrationStatus") {
 36 |       this.checkVLCNMigrationStatus()
 37 |         .then((result) => {
 38 |           sendResponse(result);
 39 |         })
 40 |         .catch((err) => {
 41 |           sendResponse({ error: err.message });
 42 |         });
 43 |       return true;
 44 |     }
 45 | 
 46 |     let waitForResponse = false;
 47 |     try {
 48 |       const { tab } = sender;
 49 |       const [method, payload] = message as [string, any];
 50 | 
 51 |       if (sender.url !== tab?.url) {
 52 |         console.log(`%cinfo`, "color:yellow;", "sender URL and tab URL differ. probably iframe");
 53 |       }
 54 | 
 55 |       // @ts-ignore This could be handled better. unimportant for now
 56 |       if (typeof this.backend[method] === "function") {
 57 |         waitForResponse = true;
 58 |         // @ts-ignore
 59 |         this.backend[method](payload, sender)
 60 |           .then((ret) => {
 61 |             sendResponse(ret);
 62 |           })
 63 |           .catch((err) => {
 64 |             console.error(`backend :: err :: ${method} ::`, payload);
 65 |             console.error(err);
 66 |             sendResponse({ error: err.message, stack: err.stack });
 67 |           });
 68 |       } else {
 69 |         console.warn(`%c${method}`, "color:yellow;", "is not a valid method", payload);
 70 |         sendResponse({ error: `'${method}' is not a valid RPC` });
 71 |       }
 72 |     } catch (err) {
 73 |       console.error("Could not parse message", message, sender, err);
 74 |       sendResponse({ error: err.message });
 75 |     }
 76 | 
 77 |     return waitForResponse; // Keep channel open for async response. Yikes
 78 |   }
 79 | 
 80 |   async checkVLCNMigrationStatus() {
 81 |     try {
 82 |       const isComplete = await this.isMigrationComplete();
 83 | 
 84 |       if (isComplete) {
 85 |         return { available: true, migrated: true };
 86 |       }
 87 | 
 88 |       if (!this._vlcn) {
 89 |         this._vlcn = new VLCN();
 90 |         try {
 91 |           await this._vlcn.readyPromise;
 92 |         } catch (err) {
 93 |           console.error("Failed to initialize VLCN", err);
 94 |           return { available: false, error: err.message };
 95 |         }
 96 |       }
 97 | 
 98 |       const status = await this._vlcn.getStatus();
 99 |       if (!status.ok) {
100 |         return { available: false, error: status.error };
101 |       }
102 | 
103 |       // Check if there are documents to migrate
104 |       const count = await this._vlcn.sql<{
105 |         count: number;
106 |       }>`select count(*) as count from "document";`;
107 | 
108 |       const documentCount = count[0].count;
109 | 
110 |       // Flag the migration as somplete so that we don't continue to initialize
111 |       // VLCN ever time. Ultimately we will remove VLCN completely.
112 |       if (documentCount === 0) {
113 |         await this.setMigrationComplete();
114 |       }
115 | 
116 |       return {
117 |         available: true,
118 |         migrated: false,
119 |         documentCount,
120 |       };
121 |     } catch (err) {
122 |       console.error("Error checking VLCN migration status", err);
123 |       return { available: false, error: err.message };
124 |     }
125 |   }
126 | 
127 |   // Created for debugging workflow
128 |   async openIndexPage() {
129 |     const [existingTab] = await chrome.tabs.query({
130 |       url: this.runtime.getURL("index.html"),
131 |     });
132 | 
133 |     if (existingTab) {
134 |       await chrome.tabs.update(existingTab.id!, { active: true });
135 |     } else {
136 |       await chrome.tabs.create({ url: chrome.runtime.getURL("index.html") });
137 |     }
138 |   }
139 | 
140 |   async setMigrationComplete() {
141 |     // First create the table if it doesn't exist
142 |     await this.backend.db!.exec(
143 |       `CREATE TABLE IF NOT EXISTS migration_info (key TEXT PRIMARY KEY, value TEXT);`
144 |     );
145 | 
146 |     // Then insert the migration flag
147 |     await this.backend.db!.exec(
148 |       `INSERT INTO migration_info (key, value) VALUES ('migrated_to_pglite', '1') ON CONFLICT(key) DO UPDATE SET value = '1';`
149 |     );
150 |   }
151 | 
152 |   async isMigrationComplete() {
153 |     try {
154 |       const result = await this.backend.db!.query<{ value: string }>(
155 |         `SELECT value FROM migration_info WHERE key = 'migrated_to_pglite';`
156 |       );
157 |       return result.rows[0]?.value === "1";
158 |     } catch (error) {
159 |       // If we haven't run the migration yet don't consider this an error
160 |       if (error instanceof Error && error.message.includes("does not exist")) {
161 |         return false;
162 |       }
163 | 
164 |       throw error;
165 |     }
166 |   }
167 | 
168 |   async importVLCNDocumentsV1() {
169 |     try {
170 |       // Send initial status update
171 |       this.runtime.sendMessage({
172 |         type: "vlcnMigrationStatus",
173 |         status: "starting",
174 |         message: "Initializing VLCN database...",
175 |       });
176 | 
177 |       if (!this._vlcn) {
178 |         this._vlcn = new VLCN();
179 |         await this._vlcn.readyPromise;
180 |       }
181 | 
182 |       // Check document count
183 |       const count = await this._vlcn.sql<{
184 |         count: number;
185 |       }>`select count(*) as count from "document";`;
186 | 
187 |       console.log("vlcnAdapter :: count", count);
188 | 
189 |       if (count[0].count === 0) {
190 |         this.runtime.sendMessage({
191 |           type: "vlcnMigrationStatus",
192 |           status: "empty",
193 |           message: "No documents found in the VLCN database.",
194 |         });
195 |         return { imported: 0, message: "No documents found in VLCN database" };
196 |       }
197 | 
198 |       // Send update with document count
199 |       this.runtime.sendMessage({
200 |         type: "vlcnMigrationStatus",
201 |         status: "fetching",
202 |         message: `Found ${count[0].count} documents to migrate...`,
203 |       });
204 | 
205 |       // Process documents in batches
206 |       const BATCH_SIZE = 100;
207 |       let imported = 0;
208 |       let duplicates = 0;
209 |       let processed = 0;
210 |       const totalDocuments = count[0].count;
211 | 
212 |       // Send update before importing
213 |       this.runtime.sendMessage({
214 |         type: "vlcnMigrationStatus",
215 |         status: "importing",
216 |         message: `Beginning import of ${totalDocuments} documents...`,
217 |         total: totalDocuments,
218 |         current: 0,
219 |       });
220 | 
221 |       while (processed < totalDocuments) {
222 |         // Fetch batch of documents
223 |         const batchQuery = `SELECT 
224 |           id,
225 |           title,
226 |           url,
227 |           excerpt,
228 |           mdContent,
229 |           mdContentHash,
230 |           publicationDate,
231 |           hostname,
232 |           lastVisit,
233 |           lastVisitDate,
234 |           extractor,
235 |           createdAt,
236 |           updatedAt
237 |         FROM "document"
238 |         LIMIT ${BATCH_SIZE} OFFSET ${processed};`;
239 | 
240 |         const batch = await this._vlcn?.db.execA(batchQuery);
241 | 
242 |         if (batch.length === 0) {
243 |           break; // No more documents to process
244 |         }
245 | 
246 |         if (processed === 0) {
247 |           // Log sample of first batch only
248 |           console.log(
249 |             "vlcnAdapter :: docs sample",
250 |             batch.slice(0, 3).map((d) => ({ id: d[0], title: d[1], url: d[2] }))
251 |           );
252 |         }
253 | 
254 |         // Import current batch
255 |         const batchResult = await this.backend.importDocumentsJSONv1({ document: batch });
256 | 
257 |         imported += batchResult.imported;
258 |         duplicates += batchResult.duplicates;
259 |         processed += batch.length;
260 | 
261 |         // Update progress
262 |         this.runtime.sendMessage({
263 |           type: "vlcnMigrationStatus",
264 |           status: "importing",
265 |           message: `Imported ${processed} of ${totalDocuments} documents...`,
266 |           total: totalDocuments,
267 |           current: processed,
268 |         });
269 |       }
270 | 
271 |       const result = { imported, duplicates };
272 | 
273 |       // Send completion status
274 |       this.runtime.sendMessage({
275 |         type: "vlcnMigrationStatus",
276 |         status: "complete",
277 |         message: `Migration complete. Imported ${result.imported} documents (${result.duplicates} were duplicates).`,
278 |         result,
279 |       });
280 | 
281 |       // Mark VLCN database as migrated to prevent duplicate migrations
282 |       try {
283 |         await this.setMigrationComplete();
284 | 
285 |         console.log("Marked VLCN database as migrated successfully");
286 |       } catch (err) {
287 |         console.error("Error marking VLCN database as migrated", err);
288 |       }
289 | 
290 |       return result;
291 |     } catch (error) {
292 |       console.error("VLCN migration failed", error);
293 | 
294 |       // Send error status
295 |       this.runtime.sendMessage({
296 |         type: "vlcnMigrationStatus",
297 |         status: "error",
298 |         message: `Migration failed: ${error.message}`,
299 |         error: error.message,
300 |       });
301 | 
302 |       return { error: error.message };
303 |     }
304 |   }
305 | }
306 | 


--------------------------------------------------------------------------------
/src/background/backend-debug.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This backend is used for debugging purposes. It does not index anything.
 3 |  */
 4 | 
 5 | import { formatDebuggablePayload } from "../common/utils";
 6 | import { Backend, DetailRow } from "./backend";
 7 | 
 8 | export class DebugBackend implements Backend {
 9 |   getStatus: Backend["getStatus"] = async () => {
10 |     return {
11 |       ok: true,
12 |     };
13 |   };
14 | 
15 |   search: Backend["search"] = async (search) => {
16 |     console.debug(`backend#%c${"search"}`, "color:lime;", search);
17 |     return {
18 |       ok: true,
19 |       results: [],
20 |       count: 0,
21 |       perfMs: 0,
22 |       query: search.query,
23 |     };
24 |   };
25 | 
26 |   async findOne(query: { where: { url: string } }): Promise<DetailRow | null> {
27 |     console.debug(`backend#%c${"findOne"}`, "color:lime;", query);
28 |     return null;
29 |   }
30 | 
31 |   getPageStatus: Backend["getPageStatus"] = async (payload, sender) => {
32 |     const { tab } = sender;
33 |     let shouldIndex = tab?.url?.startsWith("http"); // ignore chrome extensions, about:blank, etc
34 | 
35 |     try {
36 |       const url = new URL(tab?.url || "");
37 |       if (url.hostname === "localhost") shouldIndex = false;
38 |       if (url.hostname.endsWith(".local")) shouldIndex = false;
39 |     } catch (err) {
40 |       // should not happen
41 |       throw err;
42 |     }
43 | 
44 |     console.debug(`%c${"getPageStatus"}`, "color:lime;", { shouldIndex, url: tab?.url }, payload);
45 | 
46 |     return {
47 |       shouldIndex,
48 |     };
49 |   };
50 | 
51 |   indexPage: Backend["indexPage"] = async (payload, sender) => {
52 |     const { tab } = sender;
53 | 
54 |     // remove adjacent whitespace since it serves no purpose. The html or
55 |     // markdown content stores formatting.
56 |     const plainText = payload.text_content?.replace(/[ \t]+/g, " ").replace(/\n+/g, "\n");
57 | 
58 |     console.debug(`%c${"indexPage"}`, "color:lime;", tab?.url);
59 |     console.debug(formatDebuggablePayload({ ...payload, textContent: plainText }));
60 |     return {
61 |       message: "debug backend does not index pages",
62 |     };
63 |   };
64 | 
65 |   nothingToIndex: Backend["nothingToIndex"] = async (payload, sender) => {
66 |     const { tab } = sender;
67 |     console.debug(`%c${"nothingToIndex"}`, "color:beige;", tab?.url);
68 |     return {
69 |       ok: true,
70 |     };
71 |   };
72 | 
73 |   getRecent: Backend["getRecent"] = async (options) => {
74 |     console.debug(`backend#%c${"getRecent"}`, "color:lime;", options);
75 |     return {
76 |       ok: true,
77 |       results: [],
78 |       count: 0,
79 |       perfMs: 0,
80 |     };
81 |   };
82 | }
83 | 


--------------------------------------------------------------------------------
/src/background/backend.ts:
--------------------------------------------------------------------------------
  1 | import type { Runtime } from "webextension-polyfill";
  2 | import type { Readability } from "@mozilla/readability";
  3 | 
  4 | export type SendResponse = (response?: any) => void;
  5 | 
  6 | export type RemoteProcWithSender<T = any, Ret = any> = (
  7 |   payload: T,
  8 |   sender: Runtime.MessageSender
  9 | ) => Promise<Ret>;
 10 | export type RemoteProc<T = any, Ret = any> = (payload: T) => Promise<Ret>;
 11 | 
 12 | type ReadabilityArticle = Omit<NonNullable<ReturnType<Readability["parse"]>>, "content">;
 13 | 
 14 | export type Article = ReadabilityArticle & {
 15 |   extractor: string;
 16 |   /** Optional for now b/c i'm not sending it over the wire if turndown is used in the content script */
 17 |   html_content?: string;
 18 |   /** Optional because the parsing can fail */
 19 |   md_content?: string;
 20 |   text_content?: string;
 21 |   date?: string;
 22 |   _extraction_time: number;
 23 | };
 24 | 
 25 | export type ArticleRow = Omit<Article, "html_content" | "text_content"> & {
 26 |   id: number;
 27 |   md_content_hash?: string;
 28 |   md_content?: string;
 29 |   url: string;
 30 |   hostname: string;
 31 |   search_words?: string[];
 32 |   last_visit?: number; // Timestamp
 33 |   last_visit_date?: string;
 34 |   updated_at: number;
 35 |   created_at: number; // Timestamp
 36 |   publication_date?: number;
 37 | };
 38 | 
 39 | /** @deprecated don't use urls directly for now. use documents which have URLs */
 40 | export type UrlRow = {
 41 |   url: string;
 42 |   url_hash: string;
 43 |   title?: string;
 44 |   last_visit?: number; // Timestamp
 45 |   hostname: string;
 46 |   text_content_hash?: string;
 47 |   search_words?: string[];
 48 | };
 49 | 
 50 | export type ResultRow = {
 51 |   rowid: number;
 52 |   id: number;
 53 |   entity_id: number;
 54 |   attribute: string;
 55 |   snippet?: string;
 56 |   url: string;
 57 |   hostname: string;
 58 |   title?: string;
 59 |   excerpt?: string;
 60 |   last_visit?: number; // Timestamp
 61 |   last_visit_date?: string;
 62 |   md_content_hash?: string;
 63 |   updated_at: number;
 64 |   created_at: number; // Timestamp
 65 | };
 66 | 
 67 | export type DetailRow = ResultRow & {
 68 |   md_content?: string;
 69 | };
 70 | 
 71 | type FirstArg<T> = T extends (arg: infer U, ...args: any[]) => any ? U : never;
 72 | 
 73 | export type RpcMessage =
 74 |   | [method: "getPageStatus"]
 75 |   | [method: "indexPage", payload: FirstArg<Backend["indexPage"]>]
 76 |   | [method: "nothingToIndex"]
 77 |   | [method: "getStats"]
 78 |   | [method: "getStatus"]
 79 |   | [method: "exportJson"]
 80 |   | [method: "importJson"]
 81 |   | [method: "reindex"]
 82 |   | [method: "search", payload: FirstArg<Backend["search"]>]
 83 |   | [method: string, payload?: any];
 84 | 
 85 | export type DBDump = Record<string, any[][]>;
 86 | 
 87 | export interface Backend {
 88 |   getStatus(): Promise<{ ok: true } | { ok: false; error: string; detail?: any }>;
 89 |   getPageStatus: (_: any, sender: { tab: { url: string } }) => Promise<any>;
 90 |   indexPage: (payload: Article, sender: { tab: { url: string } }) => Promise<any>;
 91 |   nothingToIndex: RemoteProcWithSender;
 92 |   search: RemoteProc<
 93 |     {
 94 |       query: string;
 95 |       limit?: number;
 96 |       offset?: number;
 97 |       orderBy: "updated_at" | "rank" | "last_visit";
 98 |       preprocessQuery?: boolean;
 99 |     },
100 |     {
101 |       ok: boolean;
102 |       results: ResultRow[];
103 |       count?: number;
104 |       perfMs: number;
105 |       query: string;
106 |     }
107 |   >;
108 |   getRecent(options: { limit?: number; offset?: number }): Promise<{
109 |     ok: boolean;
110 |     results: ResultRow[];
111 |     count?: number;
112 |     perfMs: number;
113 |   }>;
114 |   findOne(query: { where: { url: string } }): Promise<DetailRow | null>;
115 |   exportJson?(): Promise<any>;
116 |   importDocumentsJSONv1?(payload: {
117 |     document: any[][];
118 |   }): Promise<{ imported: number; duplicates: number }>;
119 | }
120 | 


--------------------------------------------------------------------------------
/src/background/embedding/pipeline.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * For use in background.js - Handles requests from the UI, runs the model, then
 3 |  * sends back a response
 4 |  */
 5 | 
 6 | import { pipeline, env, type FeatureExtractionPipeline } from "@xenova/transformers";
 7 | 
 8 | export type TransformersProgress =
 9 |   | {
10 |       status: "done" | "initiate" | "download";
11 |       name: string;
12 |       file: string;
13 |     }
14 |   | {
15 |       status: "progress";
16 |       name: string;
17 |       file: string;
18 |       progress: number;
19 |       loaded: number;
20 |       total: number;
21 |     }
22 |   | {
23 |       status: "ready";
24 |       task: string;
25 |       model: string;
26 |     };
27 | 
28 | // Skip initial check for local models, since we are not loading any local models.
29 | env.allowLocalModels = false;
30 | 
31 | // Due to a bug in onnxruntime-web, we must disable multithreading for now.
32 | // See https://github.com/microsoft/onnxruntime/issues/14445 for more information.
33 | env.backends.onnx.wasm.numThreads = 1;
34 | 
35 | class PipelineSingleton {
36 |   static task = "feature-extraction" as const;
37 |   static model = "Xenova/all-MiniLM-L6-v2";
38 |   static instance: FeatureExtractionPipeline | null = null;
39 | 
40 |   static async getInstance(progress_callback?: (x: TransformersProgress) => void) {
41 |     if (this.instance === null) {
42 |       console.time("loading pipeline");
43 |       this.instance = await pipeline(this.task, this.model, { progress_callback });
44 |       console.timeEnd("loading pipeline");
45 |     }
46 | 
47 |     return this.instance;
48 |   }
49 | }
50 | 
51 | export const createTensor = async (text: string) => {
52 |   // Get the pipeline instance. This will load and build the model when run for the first time.
53 |   let model = await PipelineSingleton.getInstance((data) => {
54 |     console.log("progress ::", data);
55 |   });
56 | 
57 |   // Actually run the model on the input text
58 |   let tensor = await model(text, { pooling: "mean", normalize: true });
59 | 
60 |   return tensor;
61 | };
62 | 
63 | // Create generic classify function, which will be reused for the different types of events.
64 | export const createEmbedding = async (text: string) => {
65 |   const tensor = await createTensor(text);
66 |   return tensor.tolist()?.[0] as number[];
67 | };
68 | 


--------------------------------------------------------------------------------
/src/background/pglite/HAX_pglite.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * HAX: Load PGlite in a service worker
 3 |  *
 4 |  * This is a temporary solution to allow PGlite to work in a service worker.
 5 |  * Hopefully in future versions this will not be necessary. The core issue here
 6 |  * is that PGlite, perhaps via some internal emscripted logic, is using the
 7 |  * _synchronous_ XMLHttpRequest API to load assets. This poses two issues:
 8 |  *
 9 |  * - chrome does not support XMLHttpRequest AT ALL in service workers
10 |  * - we cannot create a full polyfill for XMLHttpRequest because we cannot mimic the synchronous behavior
11 |  *
12 |  * Thus this script simply loads the relevant bytes into memory and hands them
13 |  * back if requested via the correct URL.
14 |  *
15 |  * @todo Not sure if vite grabs the relevant asset and puts in the the build,
16 |  * might need to create a plugin for that. works for the `dev` comamnd but might
17 |  * not wokr for `build`.
18 |  */
19 | 
20 | const assetCache = new Map<string, ArrayBuffer>();
21 | 
22 | async function preloadAssets() {
23 |   // NOTE: The wasm file exists in the pglite package but does not seem to be used. preloading the data file was enough
24 |   const assetUrls = [
25 |     chrome.runtime.getURL("/assets/postgres-CkP7QCDB.data"), // 0.2.17
26 |   ];
27 | 
28 |   for (const url of assetUrls) {
29 |     try {
30 |       const response = await fetch(url);
31 |       if (!response.ok) {
32 |         console.log(`failed to fetch asset :: ${url}`);
33 |         continue;
34 |       }
35 |       const arrayBuffer = await response.arrayBuffer();
36 |       assetCache.set(url, arrayBuffer);
37 |     } catch (error) {
38 |       console.error(`failed to preload asset :: ${url}`, error);
39 |     }
40 |   }
41 | }
42 | 
43 | // As with XMLHttpRequest, this is not supported in the service worker context.
44 | class ProgressEventPolyfill {
45 |   type: string;
46 |   constructor(type: string) {
47 |     this.type = type;
48 |   }
49 | }
50 | 
51 | // A partial polyfill for XMLHttpRequest to support the loading of pglite in a
52 | // service worker
53 | class XMLHttpRequestPolyfill {
54 |   private url: string = "";
55 |   public onload: ((this: XMLHttpRequest, ev: ProgressEvent) => any) | null = null;
56 |   public onerror: ((this: XMLHttpRequest, ev: ProgressEvent) => any) | null = null;
57 |   public status: number = 0;
58 |   public responseText: string = "";
59 |   public response: any = null;
60 | 
61 |   open(method: string, url: string) {
62 |     console.log("open ::", { method, url });
63 |     this.url = url;
64 |   }
65 | 
66 |   send(body: any = null) {
67 |     console.log("send ::", { body });
68 |     if (assetCache.has(this.url)) {
69 |       this.response = assetCache.get(this.url);
70 |       this.status = 200;
71 |       if (this.onload) {
72 |         // @ts-expect-error
73 |         this.onload.call(this, new ProgressEventPolyfill("load") as any);
74 |       }
75 |     } else {
76 |       console.error(`asset not preloaded :: ${this.url}`);
77 |       this.status = 404;
78 |       if (this.onerror) {
79 |         // @ts-expect-error
80 |         this.onerror.call(this, new ProgressEventPolyfill("error") as any);
81 |       }
82 |     }
83 |   }
84 | }
85 | 
86 | (globalThis as any).XMLHttpRequest = XMLHttpRequestPolyfill;
87 | (globalThis as any).ProgressEvent = ProgressEventPolyfill;
88 | 
89 | // Preload assets BEFORE importing PGlite
90 | //
91 | // NOTE: This will require vite-plugin-top-level-await. Chrome will not allow
92 | // top level await in service workers even if supported by the browser in other
93 | // context.
94 | await preloadAssets();
95 | 
96 | import { PGlite } from "@electric-sql/pglite";
97 | 
98 | export { PGlite };
99 | 


--------------------------------------------------------------------------------
/src/background/pglite/defaultBlacklistRules.ts:
--------------------------------------------------------------------------------
 1 | export const defaultBlacklistRules: Array<[string, "url_only" | "no_index"]> = [
 2 |   ["https://news.ycombinator.com", "url_only"],
 3 |   ["https://news.ycombinator.com/news", "url_only"],
 4 |   ["https://news.ycombinator.com/new", "url_only"],
 5 |   ["https://news.ycombinator.com/best", "url_only"],
 6 |   ["http://localhost%", "no_index"],
 7 |   ["https://localhost%", "no_index"],
 8 |   ["https://www.bankofamerica.com%", "url_only"],
 9 |   ["https://www.chase.com%", "url_only"],
10 |   ["https://www.wellsfargo.com%", "url_only"],
11 |   ["https://www.citibank.com%", "url_only"],
12 |   ["https://www.capitalone.com%", "url_only"],
13 |   ["https://app.mercury.com%", "url_only"],
14 |   ["https://www.schwab.com%", "url_only"],
15 |   ["https://www.fidelity.com%", "url_only"],
16 |   ["https://www.vanguard.com%", "url_only"],
17 |   ["https://www.etrade.com%", "url_only"],
18 |   ["https://www.tdameritrade.com%", "url_only"],
19 |   ["https://www.robinhood.com%", "url_only"],
20 |   ["https://www.paypal.com%", "url_only"],
21 |   ["https://www.venmo.com%", "url_only"],
22 |   ["https://www.facebook.com", "url_only"],
23 |   ["https://www.amazon.com%", "url_only"],
24 |   ["https://www.ebay.com%", "url_only"],
25 |   ["https://www.dropbox.com", "url_only"],
26 |   ["https://drive.google.com%", "url_only"],
27 |   ["https://www.coinbase.com%", "url_only"],
28 |   ["https://www.webmd.com", "url_only"],
29 |   ["https://%.local", "no_index"],
30 |   ["https://%.internal", "no_index"],
31 |   ["https://twitter.com", "url_only"],
32 |   ["https://twitter.com/home", "url_only"],
33 |   ["https://x.com", "url_only"],
34 |   ["https://x.com/home", "url_only"],
35 |   ["https://www.linkedin.com", "url_only"],
36 |   ["https://www.tiktok.com", "url_only"],
37 |   ["https://mail.google.com", "no_index"],
38 |   ["https://outlook.live.com%", "no_index"],
39 |   ["https://docs.google.com%", "url_only"],
40 |   ["https://www.office.com%", "url_only"],
41 |   ["https://slack.com", "url_only"],
42 |   ["https://zoom.us%", "url_only"],
43 |   ["https://www.ask.com/web?q=%", "url_only"],
44 |   ["https://www.baidu.com/s?%", "url_only"],
45 |   ["https://www.reddit.com/search%", "url_only"],
46 |   ["https://www.bing.com/search%", "url_only"],
47 |   ["https://search.yahoo.com/search%", "url_only"],
48 |   ["https://www.duckduckgo.com/?q=%", "url_only"],
49 |   ["https://yandex.com/search/?%", "url_only"],
50 |   ["https://%dashlane.com%", "no_index"],
51 |   ["https://%bitwarden.com%", "no_index"],
52 |   ["https://%lastpass.com%", "no_index"],
53 |   ["https://%1password.com%", "no_index"],
54 |   ["https://kagi.com/search%", "url_only"],
55 |   ["https://www.google.com/search%", "url_only"],
56 | ];
57 | 


--------------------------------------------------------------------------------
/src/background/pglite/job_queue.test.ts:
--------------------------------------------------------------------------------
  1 | // @ts-nocheck
  2 | import { describe, it, expect, beforeEach, afterEach, mock } from "bun:test";
  3 | import { JobQueue, JOB_QUEUE_SCHEMA } from "./job_queue";
  4 | import { PGlite } from "@electric-sql/pglite";
  5 | import * as defaultTasks from "./tasks";
  6 | 
  7 | describe("JobQueue", () => {
  8 |   let db: PGlite;
  9 |   let jobQueue: JobQueue;
 10 |   let mockTasks: typeof defaultTasks;
 11 | 
 12 |   beforeEach(async () => {
 13 |     // Create an in-memory PGLite instance
 14 |     db = new PGlite("memory://");
 15 |     await db.query(JOB_QUEUE_SCHEMA);
 16 | 
 17 |     // Create mock tasks
 18 |     mockTasks = {
 19 |       ...defaultTasks,
 20 |       generate_fragments: {
 21 |         handler: mock(() => Promise.resolve()),
 22 |         params: { parse: (p: any) => p },
 23 |       },
 24 |     };
 25 | 
 26 |     jobQueue = new JobQueue(db, mockTasks, 100);
 27 |     await jobQueue.initialize();
 28 |   });
 29 | 
 30 |   afterEach(async () => {
 31 |     // Clean up the database
 32 |     await db.query("DROP TABLE IF EXISTS task");
 33 |     await db.close();
 34 |   });
 35 | 
 36 |   it("should initialize the job queue", async () => {
 37 |     const result = await db.query<{ count: number }>("SELECT COUNT(*) as count FROM task");
 38 |     expect(result.rows[0].count).toBe(0);
 39 |   });
 40 | 
 41 |   it("should enqueue a task", async () => {
 42 |     const taskType = "generate_fragments";
 43 |     const params = { articleId: 1 };
 44 | 
 45 |     const taskId = await jobQueue.enqueue(taskType, params);
 46 |     expect(taskId).toBeGreaterThan(0);
 47 | 
 48 |     const result = await db.query<{ count: number }>("SELECT COUNT(*) as count FROM task");
 49 |     expect(result.rows[0].count).toBe(1);
 50 |   });
 51 | 
 52 |   it("should not enqueue duplicate tasks", async () => {
 53 |     const taskType = "generate_fragments";
 54 |     const params = { articleId: 1 };
 55 | 
 56 |     const taskId1 = await jobQueue.enqueue(taskType, params);
 57 |     const taskId2 = await jobQueue.enqueue(taskType, params);
 58 | 
 59 |     expect(taskId1).toBeGreaterThan(0);
 60 |     expect(taskId2).toBeUndefined();
 61 | 
 62 |     const result = await db.query<{ count: number }>("SELECT COUNT(*) as count FROM task");
 63 |     expect(result.rows[0].count).toBe(1);
 64 |   });
 65 | 
 66 |   it("should process pending tasks", async () => {
 67 |     const taskType = "generate_fragments";
 68 |     const params = { articleId: 1 };
 69 | 
 70 |     await jobQueue.enqueue(taskType, params);
 71 | 
 72 |     await jobQueue.processPendingTasks();
 73 | 
 74 |     // Wait for a short time to allow the task to be processed
 75 |     await new Promise((resolve) => setTimeout(resolve, 100));
 76 | 
 77 |     const result = await db.query<{ count: number }>("SELECT COUNT(*) as count FROM task");
 78 |     expect(result.rows[0].count).toBe(0);
 79 |     expect(mockTasks[taskType].handler).toHaveBeenCalledTimes(1);
 80 |   });
 81 | 
 82 |   it("should mark failed tasks", async () => {
 83 |     const taskType = "generate_fragments";
 84 |     const params = { articleId: 1 };
 85 | 
 86 |     // Mock the task handler to throw an error
 87 |     mockTasks[taskType] = {
 88 |       handler: mock(() => Promise.reject(new Error("Test error"))),
 89 |       params: { parse: (p: any) => p },
 90 |     };
 91 | 
 92 |     await jobQueue.enqueue(taskType, params);
 93 | 
 94 |     await jobQueue.processPendingTasks();
 95 | 
 96 |     // Wait for a short time to allow the task to be processed
 97 |     await new Promise((resolve) => setTimeout(resolve, 100));
 98 | 
 99 |     const result = await db.query<{ count: number; failed_count: number }>(
100 |       "SELECT COUNT(*) as count, COUNT(failed_at) as failed_count FROM task"
101 |     );
102 |     expect(result.rows[0].count).toBe(1);
103 |     expect(result.rows[0].failed_count).toBe(1);
104 |   });
105 | 
106 |   it("should stop processing tasks when requested", async () => {
107 |     const taskType = "generate_fragments";
108 |     const params = { articleId: 1 };
109 | 
110 |     // Mock the task handler
111 |     mockTasks[taskType] = {
112 |       handler: mock(() => new Promise((resolve) => setTimeout(resolve, 500))),
113 |       params: { parse: (p: any) => p },
114 |     };
115 | 
116 |     await jobQueue.enqueue(taskType, params);
117 |     await jobQueue.enqueue(taskType, { articleId: 2 });
118 | 
119 |     const processPromise = jobQueue.processPendingTasks();
120 | 
121 |     // Stop the queue after a short delay
122 |     setTimeout(() => jobQueue.stop(), 100);
123 | 
124 |     await processPromise;
125 | 
126 |     const result = await db.query<{ count: number }>("SELECT COUNT(*) as count FROM task");
127 |     expect(result.rows[0].count).toBe(1); // One task should remain unprocessed
128 |   });
129 | });
130 | 


--------------------------------------------------------------------------------
/src/background/pglite/job_queue.ts:
--------------------------------------------------------------------------------
  1 | import type { PGlite, Transaction } from "@electric-sql/pglite";
  2 | import type { TaskDefinition } from "./tasks";
  3 | import * as defaultTasks from "./tasks";
  4 | 
  5 | const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
  6 | 
  7 | type DBWriter = Pick<Transaction, "query" | "exec" | "sql">;
  8 | 
  9 | export const JOB_QUEUE_SCHEMA = `
 10 |   CREATE TABLE IF NOT EXISTS task (
 11 |     id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
 12 |     task_type TEXT NOT NULL,
 13 |     params JSONB DEFAULT '{}'::jsonb NOT NULL,
 14 |     created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL,
 15 |     failed_at TIMESTAMP WITH TIME ZONE,
 16 |     error TEXT,
 17 |     CONSTRAINT task_task_type_params_unique UNIQUE(task_type, params)
 18 |   );
 19 | `;
 20 | 
 21 | export class JobQueue {
 22 |   private isProcessing: boolean = false;
 23 |   private shouldStop: boolean = false;
 24 | 
 25 |   constructor(
 26 |     private db: PGlite,
 27 |     private tasks: typeof defaultTasks = defaultTasks,
 28 |     private taskInterval: number = 1000
 29 |   ) {}
 30 | 
 31 |   async initialize() {
 32 |     await this.db.query(JOB_QUEUE_SCHEMA);
 33 |   }
 34 | 
 35 |   async enqueue(
 36 |     taskType: keyof typeof this.tasks,
 37 |     params: object = {},
 38 |     tx: DBWriter = this.db
 39 |   ): Promise<number> {
 40 |     const task = this.tasks[taskType];
 41 | 
 42 |     if (!task) {
 43 |       throw new Error(`Task type ${taskType} not implemented`);
 44 |     }
 45 | 
 46 |     // Make sure params are valid before adding to queue
 47 |     task.params?.parse(params);
 48 | 
 49 |     const result = await tx.query<{ id: number }>(
 50 |       `
 51 |       INSERT INTO task (task_type, params)
 52 |       VALUES ($1, $2::jsonb)
 53 |       ON CONFLICT (task_type, params) DO NOTHING
 54 |       RETURNING id
 55 |     `,
 56 |       [taskType, params]
 57 |     );
 58 | 
 59 |     const taskId = result.rows[0]?.id;
 60 | 
 61 |     return taskId;
 62 |   }
 63 | 
 64 |   /**
 65 |    * Process a single task from the queue
 66 |    *
 67 |    * NOTE: a few things about this queue strategy:
 68 |    * - priority queue based on logic in the ORDER BY clause. add cases as needed
 69 |    * - random order if no priority is set
 70 |    */
 71 |   private async processQueue() {
 72 |     let processedId: number | null = null;
 73 | 
 74 |     try {
 75 |       await this.db.transaction(async (tx) => {
 76 |         const result = await tx.query<{
 77 |           id: number;
 78 |           task_type: string;
 79 |           params: Record<string, any>;
 80 |         }>(`
 81 |           SELECT id, task_type, params::jsonb
 82 |           FROM task
 83 |           WHERE failed_at IS NULL
 84 |           ORDER BY
 85 |             CASE
 86 |               WHEN task_type = 'generate_fragments' THEN 0
 87 |               ELSE random()
 88 |             END,
 89 |             created_at
 90 |           LIMIT 1
 91 |           FOR UPDATE SKIP LOCKED
 92 |         `);
 93 | 
 94 |         if (!result.rows.length) {
 95 |           console.log("task :: empty queue");
 96 |           return;
 97 |         }
 98 | 
 99 |         const { id, task_type, params } = result.rows[0];
100 | 
101 |         processedId = id;
102 | 
103 |         if (!(task_type in this.tasks)) {
104 |           console.warn(`task :: ${task_type} :: not implemented`);
105 |           await this.markTaskAsFailed(tx, id, "Task type not implemented");
106 |           return;
107 |         }
108 | 
109 |         const task = this.tasks[task_type as keyof typeof this.tasks] as TaskDefinition;
110 |         const start = performance.now();
111 |         try {
112 |           await task.handler(tx, task.params?.parse(params));
113 |           await tx.query("DELETE FROM task WHERE id = $1", [id]);
114 |         } catch (error) {
115 |           console.error(`task :: error`, error.message);
116 |           throw error;
117 |         } finally {
118 |           console.log(
119 |             `task :: ${performance.now() - start}ms :: ${task_type} :: ${JSON.stringify(params)}`
120 |           );
121 |         }
122 |       });
123 |     } catch (error) {
124 |       console.error(`task :: processQueue :: error`, error);
125 | 
126 |       // NOTE this cannot be done within the transaction. using the tx after a
127 |       // failure will result in an error saying the transaction is aborted.
128 |       if (processedId) {
129 |         await this.markTaskAsFailed(this.db, processedId, error.message);
130 |       }
131 |     }
132 |   }
133 | 
134 |   private async markTaskAsFailed(tx: DBWriter, id: number, errorMessage: string) {
135 |     await tx.query(
136 |       `
137 |       UPDATE task
138 |       SET failed_at = CURRENT_TIMESTAMP, error = $1
139 |       WHERE id = $2
140 |     `,
141 |       [errorMessage, id]
142 |     );
143 |   }
144 | 
145 |   async processPendingTasks() {
146 |     if (this.isProcessing) {
147 |       return;
148 |     }
149 | 
150 |     this.isProcessing = true;
151 |     this.shouldStop = false;
152 | 
153 |     const getPendingCount = async () => {
154 |       const pendingTasks = await this.db.query<{ count: number }>(`
155 |         SELECT COUNT(*) as count FROM task
156 |         WHERE failed_at IS NULL
157 |     `);
158 |       return pendingTasks.rows[0].count;
159 |     };
160 | 
161 |     try {
162 |       while ((await getPendingCount()) > 0 && !this.shouldStop) {
163 |         await this.processQueue();
164 |         await sleep(this.taskInterval);
165 |       }
166 |     } finally {
167 |       this.isProcessing = false;
168 |     }
169 |   }
170 | 
171 |   stop() {
172 |     this.shouldStop = true;
173 |   }
174 | }
175 | 


--------------------------------------------------------------------------------
/src/background/pglite/migration-manager.test.ts:
--------------------------------------------------------------------------------
  1 | import { describe, it, expect, beforeEach, mock } from "bun:test";
  2 | import { PGlite } from "@electric-sql/pglite"; // Use the direct import for testing
  3 | import { MigrationManager, Migration } from "./migration-manager";
  4 | 
  5 | describe("MigrationManager", () => {
  6 |   let db: PGlite;
  7 |   let migrationManager: MigrationManager;
  8 | 
  9 |   beforeEach(async () => {
 10 |     if (db) {
 11 |       await db.close();
 12 |     }
 13 | 
 14 |     // Create a new in-memory database for each test
 15 |     db = new PGlite();
 16 |     migrationManager = new MigrationManager(db);
 17 |   });
 18 | 
 19 |   it("should initialize with no migrations", async () => {
 20 |     const status = await migrationManager.applyMigrations();
 21 | 
 22 |     expect(status.ok).toBe(true);
 23 |     expect(status.currentVersion).toBe(0);
 24 |     expect(status.availableVersion).toBe(0);
 25 |     expect(status.pendingCount).toBe(0);
 26 |   });
 27 | 
 28 |   it("should register migrations correctly", () => {
 29 |     const migration1: Migration = {
 30 |       version: 1,
 31 |       name: "test_migration_1",
 32 |       description: "Test migration 1",
 33 |       sql: "CREATE TABLE test1 (id SERIAL PRIMARY KEY);",
 34 |     };
 35 | 
 36 |     const migration2: Migration = {
 37 |       version: 2,
 38 |       name: "test_migration_2",
 39 |       description: "Test migration 2",
 40 |       sql: "CREATE TABLE test2 (id SERIAL PRIMARY KEY);",
 41 |     };
 42 | 
 43 |     migrationManager.registerMigration(migration1);
 44 |     migrationManager.registerMigration(migration2);
 45 | 
 46 |     // We're testing internal state here, so we need to cast to access private properties
 47 |     const migrations = (migrationManager as any).migrations;
 48 |     expect(migrations.length).toBe(2);
 49 |     expect(migrations[0].version).toBe(1);
 50 |     expect(migrations[1].version).toBe(2);
 51 |   });
 52 | 
 53 |   it("should apply migrations in order", async () => {
 54 |     const migration1: Migration = {
 55 |       version: 1,
 56 |       name: "test_migration_1",
 57 |       description: "Test migration 1",
 58 |       sql: "CREATE TABLE test1 (id SERIAL PRIMARY KEY);",
 59 |     };
 60 | 
 61 |     const migration2: Migration = {
 62 |       version: 2,
 63 |       name: "test_migration_2",
 64 |       description: "Test migration 2",
 65 |       sql: "CREATE TABLE test2 (id SERIAL PRIMARY KEY);",
 66 |     };
 67 | 
 68 |     migrationManager.registerMigration(migration1);
 69 |     migrationManager.registerMigration(migration2);
 70 | 
 71 |     const status = await migrationManager.applyMigrations();
 72 | 
 73 |     expect(status.ok).toBe(true);
 74 |     expect(status.currentVersion).toBe(2);
 75 |     expect(status.availableVersion).toBe(2);
 76 |     expect(status.pendingCount).toBe(0);
 77 | 
 78 |     // Verify tables were created
 79 |     const result1 = await db.query(
 80 |       "SELECT table_name FROM information_schema.tables WHERE table_name = 'test1'"
 81 |     );
 82 |     const result2 = await db.query(
 83 |       "SELECT table_name FROM information_schema.tables WHERE table_name = 'test2'"
 84 |     );
 85 | 
 86 |     expect(result1.rows.length).toBe(1);
 87 |     expect(result2.rows.length).toBe(1);
 88 |   });
 89 | 
 90 |   it("should only apply pending migrations", async () => {
 91 |     // First migration
 92 |     const migration1: Migration = {
 93 |       version: 1,
 94 |       name: "test_migration_1",
 95 |       description: "Test migration 1",
 96 |       sql: "CREATE TABLE test1 (id SERIAL PRIMARY KEY);",
 97 |     };
 98 | 
 99 |     migrationManager.registerMigration(migration1);
100 |     await migrationManager.applyMigrations();
101 | 
102 |     // Second migration
103 |     const migration2: Migration = {
104 |       version: 2,
105 |       name: "test_migration_2",
106 |       description: "Test migration 2",
107 |       sql: "CREATE TABLE test2 (id SERIAL PRIMARY KEY);",
108 |     };
109 | 
110 |     migrationManager.registerMigration(migration2);
111 |     const status = await migrationManager.applyMigrations();
112 | 
113 |     expect(status.ok).toBe(true);
114 |     expect(status.currentVersion).toBe(2);
115 | 
116 |     // Verify the migrations table has 2 records
117 |     const migrationsResult = await db.query<{ version: number }>(
118 |       "SELECT * FROM migrations ORDER BY version"
119 |     );
120 |     expect(migrationsResult.rows.length).toBe(2);
121 |     expect(migrationsResult.rows[0].version).toBe(1);
122 |     expect(migrationsResult.rows[1].version).toBe(2);
123 |   });
124 | 
125 |   it("should handle errors in migrations", async () => {
126 |     const migration1: Migration = {
127 |       version: 1,
128 |       name: "test_migration_1",
129 |       description: "Test migration 1",
130 |       sql: "CREATE TABLE test1 (id SERIAL PRIMARY KEY);",
131 |     };
132 | 
133 |     // This migration has invalid SQL
134 |     const migration2: Migration = {
135 |       version: 2,
136 |       name: "invalid_migration",
137 |       description: "Invalid SQL migration",
138 |       sql: "CREATE TABLE WITH INVALID SYNTAX!!!",
139 |     };
140 | 
141 |     migrationManager.registerMigration(migration1);
142 |     migrationManager.registerMigration(migration2);
143 | 
144 |     const status = await migrationManager.applyMigrations();
145 | 
146 |     expect(status.ok).toBe(false);
147 |     expect(status.currentVersion).toBe(1); // Only the first migration should be applied
148 | 
149 |     // Verify only the first table exists
150 |     const result1 = await db.query(
151 |       "SELECT table_name FROM information_schema.tables WHERE table_name = 'test1'"
152 |     );
153 |     const result2 = await db.query(
154 |       "SELECT table_name FROM information_schema.tables WHERE table_name = 'test2'"
155 |     );
156 | 
157 |     expect(result1.rows.length).toBe(1);
158 |     expect(result2.rows.length).toBe(0);
159 |   });
160 | 
161 |   it("should handle migrations with out-of-order versions", async () => {
162 |     const migration2: Migration = {
163 |       version: 2,
164 |       name: "test_migration_2",
165 |       description: "Test migration 2",
166 |       sql: "CREATE TABLE test2 (id SERIAL PRIMARY KEY);",
167 |     };
168 | 
169 |     const migration1: Migration = {
170 |       version: 1,
171 |       name: "test_migration_1",
172 |       description: "Test migration 1",
173 |       sql: "CREATE TABLE test1 (id SERIAL PRIMARY KEY);",
174 |     };
175 | 
176 |     // Register in reverse order
177 |     migrationManager.registerMigration(migration2);
178 |     migrationManager.registerMigration(migration1);
179 | 
180 |     const status = await migrationManager.applyMigrations();
181 | 
182 |     expect(status.ok).toBe(true);
183 |     expect(status.currentVersion).toBe(2);
184 | 
185 |     // Verify migrations were applied in correct order
186 |     const migrationsResult = await db.query<{ version: number }>(
187 |       "SELECT * FROM migrations ORDER BY version"
188 |     );
189 |     expect(migrationsResult.rows.length).toBe(2);
190 |     expect(migrationsResult.rows[0].version).toBe(1);
191 |     expect(migrationsResult.rows[1].version).toBe(2);
192 |   });
193 | });
194 | 


--------------------------------------------------------------------------------
/src/background/pglite/migration-manager.ts:
--------------------------------------------------------------------------------
  1 | import { PGlite } from "./HAX_pglite";
  2 | import { Transaction } from "@electric-sql/pglite";
  3 | 
  4 | /**
  5 |  * Simple migration interface for defining database schema changes
  6 |  * with forward-only migrations
  7 |  */
  8 | export interface Migration {
  9 |   version: number;
 10 |   name: string;
 11 |   description: string;
 12 |   sql: string;  // SQL to execute for this migration
 13 | }
 14 | 
 15 | /**
 16 |  * Migration status
 17 |  */
 18 | export interface MigrationStatus {
 19 |   ok: boolean;
 20 |   currentVersion: number;
 21 |   availableVersion: number;
 22 |   pendingCount: number;
 23 | }
 24 | 
 25 | /**
 26 |  * A simple, forward-only migration manager for PGlite
 27 |  */
 28 | export class MigrationManager {
 29 |   private db: PGlite;
 30 |   private migrations: Migration[] = [];
 31 |   private currentVersion = 0;
 32 |   private highestVersion = 0;
 33 | 
 34 |   constructor(db: PGlite) {
 35 |     this.db = db;
 36 |   }
 37 | 
 38 |   /**
 39 |    * Register a migration with the manager
 40 |    */
 41 |   registerMigration(migration: Migration): void {
 42 |     this.migrations.push(migration);
 43 |     
 44 |     // Update highest available version
 45 |     this.highestVersion = Math.max(this.highestVersion, migration.version);
 46 |     
 47 |     // Sort migrations by version
 48 |     this.migrations.sort((a, b) => a.version - b.version);
 49 |   }
 50 | 
 51 |   /**
 52 |    * Check if a table exists
 53 |    */
 54 |   private async checkTableExists(tableName: string): Promise<boolean> {
 55 |     try {
 56 |       const result = await this.db.query<{ exists: boolean }>(
 57 |         "SELECT EXISTS (SELECT FROM pg_tables WHERE tablename = $1) as exists",
 58 |         [tableName]
 59 |       );
 60 |       return result.rows[0]?.exists || false;
 61 |     } catch (error) {
 62 |       // If this fails, assume table doesn't exist
 63 |       console.warn(`Error checking if table ${tableName} exists:`, error);
 64 |       return false;
 65 |     }
 66 |   }
 67 | 
 68 |   /**
 69 |    * Get current migration version from the database
 70 |    */
 71 |   private async getCurrentVersion(): Promise<number> {
 72 |     try {
 73 |       const migrationsTableExists = await this.checkTableExists('migrations');
 74 |       
 75 |       if (!migrationsTableExists) {
 76 |         return 0; // No migrations applied yet
 77 |       }
 78 |       
 79 |       const result = await this.db.query<{ max_version: number }>(
 80 |         "SELECT MAX(version) as max_version FROM migrations"
 81 |       );
 82 |       
 83 |       return result.rows[0]?.max_version || 0;
 84 |     } catch (error) {
 85 |       console.error("Error getting current migration version:", error);
 86 |       return 0;
 87 |     }
 88 |   }
 89 | 
 90 |   /**
 91 |    * Apply a single migration
 92 |    */
 93 |   private async applyMigration(migration: Migration): Promise<boolean> {
 94 |     try {
 95 |       console.debug(`Applying migration ${migration.name} (v${migration.version})...`);
 96 |       
 97 |       const startTime = performance.now();
 98 |       
 99 |       await this.db.transaction(async (tx) => {
100 |         // Execute migration SQL
101 |         await tx.exec(migration.sql);
102 |         
103 |         // Record migration in the migrations table
104 |         await tx.query(
105 |           "INSERT INTO migrations (version, name, description, applied_at) VALUES ($1, $2, $3, $4)",
106 |           [migration.version, migration.name, migration.description, Date.now()]
107 |         );
108 |       });
109 |       
110 |       const duration = Math.round(performance.now() - startTime);
111 |       console.debug(`Migration ${migration.name} (v${migration.version}) applied successfully in ${duration}ms`);
112 |       
113 |       return true;
114 |     } catch (error) {
115 |       console.error(`Error applying migration ${migration.name} (v${migration.version}):`, error);
116 |       return false;
117 |     }
118 |   }
119 | 
120 |   /**
121 |    * Apply all pending migrations
122 |    */
123 |   async applyMigrations(): Promise<MigrationStatus> {
124 |     try {
125 |       // Ensure migrations table exists
126 |       const migrationsTableExists = await this.checkTableExists('migrations');
127 |       
128 |       if (!migrationsTableExists) {
129 |         // Create migrations table if it doesn't exist
130 |         await this.db.exec(`
131 |           CREATE TABLE IF NOT EXISTS migrations (
132 |             id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
133 |             version INTEGER UNIQUE NOT NULL,
134 |             name TEXT NOT NULL,
135 |             description TEXT,
136 |             applied_at BIGINT NOT NULL
137 |           );
138 |         `);
139 |       }
140 |       
141 |       // Check if the migrations table has the required columns
142 |       try {
143 |         await this.db.query("SELECT name FROM migrations LIMIT 0");
144 |       } catch (error) {
145 |         console.warn("Migrations table exists but may be missing columns. Attempting to upgrade schema...");
146 |         // Add missing columns if they don't exist
147 |         try {
148 |           await this.db.exec("ALTER TABLE migrations ADD COLUMN IF NOT EXISTS name TEXT NOT NULL DEFAULT 'legacy_migration'");
149 |           await this.db.exec("ALTER TABLE migrations ADD COLUMN IF NOT EXISTS description TEXT");
150 |           console.debug("Successfully upgraded migrations table schema");
151 |         } catch (alterError) {
152 |           console.error("Failed to alter migrations table:", alterError);
153 |           throw alterError;
154 |         }
155 |       }
156 |       
157 |       // Get current version
158 |       this.currentVersion = await this.getCurrentVersion();
159 |       console.debug(`Current migration version: ${this.currentVersion}`);
160 |       
161 |       // Find pending migrations
162 |       const pendingMigrations = this.migrations.filter(m => m.version > this.currentVersion);
163 |       console.debug(`Found ${pendingMigrations.length} pending migrations`);
164 |       
165 |       if (pendingMigrations.length === 0) {
166 |         return {
167 |           ok: true,
168 |           currentVersion: this.currentVersion,
169 |           availableVersion: this.highestVersion,
170 |           pendingCount: 0
171 |         };
172 |       }
173 |       
174 |       // Apply migrations in order
175 |       for (const migration of pendingMigrations) {
176 |         const success = await this.applyMigration(migration);
177 |         
178 |         if (!success) {
179 |           return {
180 |             ok: false,
181 |             currentVersion: this.currentVersion,
182 |             availableVersion: this.highestVersion,
183 |             pendingCount: pendingMigrations.length
184 |           };
185 |         }
186 |         
187 |         this.currentVersion = migration.version;
188 |       }
189 |       
190 |       return {
191 |         ok: true,
192 |         currentVersion: this.currentVersion,
193 |         availableVersion: this.highestVersion,
194 |         pendingCount: 0
195 |       };
196 |     } catch (error) {
197 |       console.error("Error applying migrations:", error);
198 |       
199 |       return {
200 |         ok: false,
201 |         currentVersion: this.currentVersion,
202 |         availableVersion: this.highestVersion,
203 |         pendingCount: this.migrations.filter(m => m.version > this.currentVersion).length
204 |       };
205 |     }
206 |   }
207 | }


--------------------------------------------------------------------------------
/src/background/pglite/migrations/001_init.ts:
--------------------------------------------------------------------------------
 1 | import { Migration } from '../migration-manager';
 2 | 
 3 | export const migration: Migration = {
 4 |   version: 1,
 5 |   name: 'initial_schema',
 6 |   description: 'Initial schema creation with base tables for documents and search',
 7 |   sql: `
 8 |     -- make sure pgvector is enabled
 9 |     CREATE EXTENSION IF NOT EXISTS vector;
10 |     CREATE EXTENSION IF NOT EXISTS pg_trgm;
11 |     
12 |     CREATE TABLE IF NOT EXISTS document (
13 |       id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
14 |       title TEXT, 
15 |       url TEXT UNIQUE NOT NULL,
16 |       excerpt TEXT,
17 |       md_content TEXT,
18 |       md_content_hash TEXT,
19 |       publication_date BIGINT,
20 |       hostname TEXT,
21 |       last_visit BIGINT,
22 |       last_visit_date TEXT,
23 |       extractor TEXT,
24 |       created_at BIGINT NOT NULL DEFAULT EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000,
25 |       updated_at BIGINT
26 |     );
27 |     
28 |     CREATE INDEX IF NOT EXISTS document_hostname ON document (hostname);
29 |     
30 |     CREATE TABLE IF NOT EXISTS document_fragment (
31 |       id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
32 |       entity_id BIGINT NOT NULL REFERENCES document (id) ON DELETE CASCADE,
33 |       attribute TEXT, 
34 |       value TEXT,
35 |       fragment_order INTEGER,
36 |       created_at BIGINT NOT NULL DEFAULT EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000,
37 |       search_vector tsvector,
38 |       content_vector vector(384)
39 |     );
40 |     
41 |     CREATE OR REPLACE FUNCTION update_document_fragment_fts() RETURNS TRIGGER AS $$
42 |     BEGIN
43 |       NEW.search_vector := to_tsvector('simple', NEW.value);
44 |       RETURN NEW;
45 |     END;
46 |     $$ LANGUAGE plpgsql;
47 |     
48 |     -- Trigger to update search vector
49 |     DROP TRIGGER IF EXISTS update_document_fragment_fts_trigger ON document_fragment;
50 |     CREATE TRIGGER update_document_fragment_fts_trigger
51 |     BEFORE INSERT OR UPDATE ON document_fragment
52 |     FOR EACH ROW EXECUTE FUNCTION update_document_fragment_fts();
53 |     
54 |     -- Index for full-text search
55 |     CREATE INDEX IF NOT EXISTS idx_document_fragment_search_vector ON document_fragment USING GIN(search_vector);
56 |     
57 |     -- Index for trigram similarity search, i.e. postgres trigram
58 |     -- NOTE: Disabled for now. Takes up a significant amount of space and not yet proven useful for this project
59 |     --CREATE INDEX IF NOT EXISTS trgm_idx_document_fragment_value ON document_fragment USING GIN(value gin_trgm_ops);
60 |     
61 |     CREATE TABLE IF NOT EXISTS blacklist_rule (
62 |       id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
63 |       pattern TEXT UNIQUE NOT NULL,
64 |       level TEXT NOT NULL CHECK (level IN ('no_index', 'url_only')),
65 |       created_at BIGINT NOT NULL DEFAULT EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000
66 |     );
67 |     
68 |     CREATE INDEX IF NOT EXISTS idx_blacklist_rule_pattern ON blacklist_rule (pattern);
69 |     
70 |     CREATE TABLE IF NOT EXISTS migrations (
71 |       id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
72 |       version INTEGER UNIQUE NOT NULL,
73 |       name TEXT NOT NULL,
74 |       description TEXT,
75 |       applied_at BIGINT NOT NULL
76 |     );
77 |   `
78 | };
79 | 
80 | // For backward compatibility with existing code
81 | export const sql = migration.sql;


--------------------------------------------------------------------------------
/src/background/pglite/tasks.ts:
--------------------------------------------------------------------------------
  1 | import type { Transaction } from "@electric-sql/pglite";
  2 | import { z } from "zod";
  3 | import { createEmbedding } from "../embedding/pipeline";
  4 | import { getArticleFragments, segment } from "../../common/utils";
  5 | 
  6 | /**
  7 |  * A helper for type inference.
  8 |  */
  9 | function createTask<T extends z.AnyZodObject | undefined = undefined>({
 10 |   params = z.object({}),
 11 |   handler,
 12 | }: {
 13 |   params?: T;
 14 |   handler: (
 15 |     tx: Transaction,
 16 |     params: T extends z.AnyZodObject ? z.infer<T> : undefined
 17 |   ) => Promise<void>;
 18 | }) {
 19 |   return { params, handler } as const;
 20 | }
 21 | 
 22 | export type TaskDefinition<T extends z.AnyZodObject = z.AnyZodObject> = ReturnType<
 23 |   typeof createTask<T>
 24 | >;
 25 | 
 26 | export const generate_vector = createTask({
 27 |   params: z.object({
 28 |     fragment_id: z.number(),
 29 |   }),
 30 |   handler: async (tx, params) => {
 31 |     const result = await tx.query<{ value: string }>(
 32 |       "SELECT value FROM document_fragment WHERE id = $1",
 33 |       [params.fragment_id]
 34 |     );
 35 |     const embedding = await createEmbedding(result.rows[0].value);
 36 |     await tx.query("UPDATE document_fragment SET content_vector = $1 WHERE id = $2", [
 37 |       JSON.stringify(embedding),
 38 |       params.fragment_id,
 39 |     ]);
 40 |   },
 41 | });
 42 | 
 43 | export const generate_fragments = createTask({
 44 |   params: z.object({
 45 |     document_id: z.number(),
 46 |   }),
 47 |   handler: async (tx, params) => {
 48 |     const document = await tx.query<{
 49 |       id: number;
 50 |       title: string;
 51 |       url: string;
 52 |       excerpt: string;
 53 |       md_content: string;
 54 |     }>("SELECT * FROM document WHERE id = $1", [params.document_id]);
 55 |     const row = document.rows[0];
 56 | 
 57 |     if (!row) {
 58 |       throw new Error("Document not found");
 59 |     }
 60 | 
 61 |     const fragments = getArticleFragments(row.md_content || "");
 62 | 
 63 |     const sql = `
 64 |       INSERT INTO document_fragment (
 65 |         entity_id,
 66 |         attribute,
 67 |         value,
 68 |         fragment_order
 69 |       ) VALUES ($1, $2, $3, $4)
 70 |       ON CONFLICT DO NOTHING;
 71 |     `;
 72 | 
 73 |     let triples: [e: number, a: string, v: string, o: number][] = [];
 74 |     if (row.title) triples.push([params.document_id, "title", segment(row.title), 0]);
 75 |     if (row.excerpt) triples.push([params.document_id, "excerpt", segment(row.excerpt), 0]);
 76 |     if (row.url) triples.push([params.document_id, "url", row.url, 0]);
 77 |     triples = triples.concat(
 78 |       fragments
 79 |         .filter((x) => x.trim())
 80 |         .map((fragment, i) => {
 81 |           return [params.document_id, "content", fragment, i];
 82 |         })
 83 |     );
 84 | 
 85 |     const logLimit = 5;
 86 |     console.debug(
 87 |       `generate_fragments :: triples :: ${triples.length} (${triples.length - logLimit} omitted)`,
 88 |       triples.slice(0, logLimit)
 89 |     );
 90 | 
 91 |     for (const param of triples) {
 92 |       await tx.query(sql, param);
 93 |     }
 94 |   },
 95 | });
 96 | 
 97 | export const ping = createTask({
 98 |   handler: async () => {
 99 |     console.log("Pong!");
100 |   },
101 | });
102 | 
103 | export const failing_task = createTask({
104 |   handler: async () => {
105 |     throw new Error("This task always fails");
106 |   },
107 | });
108 | 


--------------------------------------------------------------------------------
/src/common/logs.ts:
--------------------------------------------------------------------------------
1 | export function log(...args: string[]) {
2 |   console.log(...args);
3 | }
4 | 


--------------------------------------------------------------------------------
/src/common/utils.test.ts:
--------------------------------------------------------------------------------
  1 | import { describe, it, expect } from "bun:test";
  2 | 
  3 | import { getArticleFragments, segment, sanitizeHtmlAllowMark } from "./utils";
  4 | 
  5 | describe("getArticleFragments", () => {
  6 |   it("should handle longform, multi-paragraph text", () => {
  7 |     const longText = `# Introduction
  8 | 
  9 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
 10 | 
 11 | ## Section 1
 12 | 
 13 | Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
 14 | 
 15 | ### Subsection 1.1
 16 | 
 17 | Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo.`;
 18 | 
 19 |     const fragments = getArticleFragments(longText);
 20 |     expect(fragments.length).toBeGreaterThan(1);
 21 |     expect(fragments[0]).toBe(
 22 |       "# Introduction Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat."
 23 |     );
 24 |     expect(fragments[1]).toBe(
 25 |       "## Section 1 Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
 26 |     );
 27 |     expect(fragments[2]).toBe(
 28 |       "### Subsection 1.1 Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo."
 29 |     );
 30 |   });
 31 | 
 32 |   it("should handle short text below minFragmentLength", () => {
 33 |     const shortText = "This is a short text.";
 34 |     const fragments = getArticleFragments(shortText);
 35 |     expect(fragments).toHaveLength(1);
 36 |     expect(fragments[0]).toBe(shortText);
 37 |   });
 38 | 
 39 |   it("should handle empty input", () => {
 40 |     const fragments = getArticleFragments("");
 41 |     expect(fragments).toHaveLength(0);
 42 |   });
 43 | 
 44 |   it("should handle input with only headings", () => {
 45 |     const headingsOnly = `# Heading 1
 46 | ## Heading 2
 47 | ### Heading 3`;
 48 |     const fragments = getArticleFragments(headingsOnly);
 49 |     expect(fragments).toHaveLength(3);
 50 |     expect(fragments[0]).toBe("# Heading 1");
 51 |     expect(fragments[1]).toBe("## Heading 2");
 52 |     expect(fragments[2]).toBe("### Heading 3");
 53 |   });
 54 | 
 55 |   it("should handle input with very long paragraphs", () => {
 56 |     const longParagraph = "Lorem ipsum ".repeat(100);
 57 |     const fragments = getArticleFragments(longParagraph);
 58 |     expect(fragments.length).toBe(1);
 59 |     expect(fragments[0].length).toBeGreaterThan(100);
 60 |   });
 61 | 
 62 |   it("should respect custom minFragmentLength", () => {
 63 |     const text = `Short para 1.
 64 | 
 65 | Slightly longer paragraph 2.
 66 | 
 67 | Even longer paragraph 3 with more content.`;
 68 | 
 69 |     const fragments = getArticleFragments(text);
 70 |     expect(fragments[0]).toBe(
 71 |       "Short para 1. Slightly longer paragraph 2. Even longer paragraph 3 with more content."
 72 |     );
 73 |   });
 74 | });
 75 | 
 76 | describe("getArticleFragments with plain text", () => {
 77 |   it("should handle a single long paragraph", () => {
 78 |     const text =
 79 |       "This is a long paragraph that should be treated as a single fragment. It contains multiple sentences and goes on for a while to ensure it exceeds the minimum fragment length of 100 characters. The content is not particularly meaningful, but it serves the purpose of this test case.";
 80 |     const fragments = getArticleFragments(text);
 81 |     expect(fragments).toHaveLength(1);
 82 |     expect(fragments[0]).toBe(text);
 83 |   });
 84 | 
 85 |   it("should split long text into multiple fragments", () => {
 86 |     const text =
 87 |       "First paragraph that is long enough to be its own fragment. It contains multiple sentences to exceed the minimum length of 100 characters.\n\nSecond paragraph that is also long enough to be a separate fragment. It also has multiple sentences and exceeds 100 characters.\n\nThird paragraph, again long enough to be distinct and over 100 characters in length.";
 88 |     const fragments = getArticleFragments(text);
 89 |     expect(fragments).toHaveLength(3);
 90 |     expect(fragments[0]).toContain("First paragraph");
 91 |     expect(fragments[1]).toContain("Second paragraph");
 92 |     expect(fragments[2]).toContain("Third paragraph");
 93 |   });
 94 | 
 95 |   it("should combine short paragraphs", () => {
 96 |     const text =
 97 |       "Short para 1.\n\nAnother short one.\n\nYet another.\n\nStill short.\n\nNeed more text to reach 100 characters. This should do it, creating a single fragment.";
 98 |     const fragments = getArticleFragments(text);
 99 |     expect(fragments).toHaveLength(1);
100 |     expect(fragments[0]).toContain("Short para 1.");
101 |     expect(fragments[0]).toContain("Need more text to reach 100 characters.");
102 |   });
103 | 
104 |   it("should handle text with varying paragraph lengths", () => {
105 |     const text =
106 |       "Short intro.\n\nThis is a much longer paragraph that should be its own fragment because it exceeds the minimum length of 100 characters. It contains multiple sentences to ensure it's long enough.\n\nAnother short paragraph.\n\nYet another long paragraph that should be separate. It also contains multiple sentences and exceeds the minimum length of 100 characters to be its own fragment.";
107 |     const fragments = getArticleFragments(text);
108 |     expect(fragments).toHaveLength(2);
109 |     expect(fragments[0]).toContain("This is a much longer paragraph");
110 |     expect(fragments[1]).toContain("Yet another long paragraph");
111 |   });
112 | 
113 |   it("should handle text with line breaks but no paragraphs", () => {
114 |     const text =
115 |       "This is a text\nwith line breaks\nbut no paragraph\nbreaks. It should\nbe treated as one\nfragment. We need to add more text to ensure it exceeds 100 characters and becomes a valid fragment.";
116 |     const fragments = getArticleFragments(text);
117 |     expect(fragments).toHaveLength(1);
118 |     expect(fragments[0]).toBe(
119 |       "This is a text with line breaks but no paragraph breaks. It should be treated as one fragment. We need to add more text to ensure it exceeds 100 characters and becomes a valid fragment."
120 |     );
121 |   });
122 | });
123 | 
124 | describe("segment", () => {
125 |   it("should not affect normal English text", () => {
126 |     const text = "This is a normal English sentence.";
127 |     expect(segment(text)).toBe(text);
128 |   });
129 | 
130 |   it("should handle empty string", () => {
131 |     expect(segment("")).toBe("");
132 |   });
133 | 
134 |   it("should handle text with numbers and punctuation", () => {
135 |     const text = "Hello, world! This is test #123.";
136 |     expect(segment(text)).toBe(text);
137 |   });
138 | 
139 |   it("should segment text with non-Latin characters", () => {
140 |     const text = "こんにちは世界";
141 |     const segmented = segment(text);
142 |     expect(segmented).toBe("こんにちは 世界");
143 |   });
144 | 
145 |   it("should handle mixed Latin and non-Latin text", () => {
146 |     const text = "Hello こんにちは world 世界";
147 |     const segmented = segment(text);
148 |     expect(segmented).toBe("Hello こんにちは world 世界");
149 |   });
150 | 
151 |   it("should handle mixed Latin and Mandarin Chinese text", () => {
152 |     const text = "Hello 你好世界我是一个人工智能助手 world 这是一个测试";
153 |     const segmented = segment(text);
154 |     expect(segmented).toBe("Hello 你好 世界 我是 一个 人工 智能 助手 world 这 是 一个 测试");
155 |   });
156 | 
157 |   it("should handle chinese with punctuation", () => {
158 |     const text =
159 |       "你好，世界！这是一个测试句子，用于检查中文文本的分段功能。我们希望确保即使在有标点符号的情况下，文本也能正确分段。";
160 |     const segmented = segment(text);
161 |     expect(segmented).toBe(
162 |       "你好 ， 世界 ！ 这 是 一个 测试 句子 ， 用于 检查 中文 文本 的 分段 功能 。 我们 希望 确保 即使 在 有 标点 符号 的 情况 下 ， 文本 也能 正确 分段 。"
163 |     );
164 |   });
165 | });
166 | 
167 | describe("sanitizeHtmlAllowMark", () => {
168 |   it("should preserve mark tags while removing all other HTML tags", () => {
169 |     const html = '<div>Text with <mark>highlighted</mark> and <b>bold</b> and <i>italic</i> parts</div>';
170 |     const sanitized = sanitizeHtmlAllowMark(html);
171 |     expect(sanitized).toBe('Text with <mark>highlighted</mark> and bold and italic parts');
172 |   });
173 | 
174 |   it("should strip attributes from mark tags", () => {
175 |     const html = 'Text with <mark class="highlight" style="background-color: yellow;">attributes</mark>';
176 |     const sanitized = sanitizeHtmlAllowMark(html);
177 |     expect(sanitized).toBe('Text with <mark>attributes</mark>');
178 |   });
179 | 
180 |   it("should handle empty input", () => {
181 |     expect(sanitizeHtmlAllowMark("")).toBe("");
182 |     expect(sanitizeHtmlAllowMark(null as any)).toBe("");
183 |     expect(sanitizeHtmlAllowMark(undefined as any)).toBe("");
184 |   });
185 | 
186 |   it("should remove script tags and their content", () => {
187 |     const html = 'Text <script>alert("xss")</script> with scripts';
188 |     const sanitized = sanitizeHtmlAllowMark(html);
189 |     expect(sanitized).toBe('Text  with scripts');
190 |   });
191 | 
192 |   it("should remove style tags and their content", () => {
193 |     const html = 'Text <style>.dangerous { color: red; }</style> with styles';
194 |     const sanitized = sanitizeHtmlAllowMark(html);
195 |     expect(sanitized).toBe('Text  with styles');
196 |   });
197 | 
198 |   it("should handle complex nested HTML while preserving mark tags", () => {
199 |     const html = `
200 |       <div class="container">
201 |         <h1>Title</h1>
202 |         <p>Paragraph with <mark>highlighted</mark> text and <span class="danger">dangerous</span> content</p>
203 |         <script>document.write('xss');</script>
204 |         <ul>
205 |           <li>Item 1 with <mark>highlight</mark></li>
206 |           <li>Item 2</li>
207 |         </ul>
208 |       </div>
209 |     `;
210 |     const sanitized = sanitizeHtmlAllowMark(html);
211 |     expect(sanitized).toContain('<mark>highlighted</mark>');
212 |     expect(sanitized).toContain('<mark>highlight</mark>');
213 |     expect(sanitized).not.toContain('<div');
214 |     expect(sanitized).not.toContain('<h1>');
215 |     expect(sanitized).not.toContain('<p>');
216 |     expect(sanitized).not.toContain('<script>');
217 |     expect(sanitized).not.toContain('<span>');
218 |     expect(sanitized).not.toContain('class=');
219 |   });
220 | 
221 |   it("should normalize mark tag case", () => {
222 |     const html = 'Text with <MARK>uppercase</MARK> and <Mark>mixed case</mArk> tags';
223 |     const sanitized = sanitizeHtmlAllowMark(html);
224 |     expect(sanitized).toBe('Text with <mark>uppercase</mark> and <mark>mixed case</mark> tags');
225 |   });
226 | });
227 | 


--------------------------------------------------------------------------------
/src/common/utils.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Formats a payload for debugging purposes, truncating large text content.
  3 |  */
  4 | export const formatDebuggablePayload = (payload: { [key: string]: any }) => {
  5 |   const maxTrim = 600;
  6 |   const { title, text_content, siteName } = payload;
  7 |   let trimmedBody = (text_content || "")
  8 |     .trim()
  9 |     .slice(0, maxTrim / 2)
 10 |     .trim();
 11 | 
 12 |   if (text_content?.length > maxTrim / 2) {
 13 |     trimmedBody += `\n\n... ${(((text_content?.length || 0) - maxTrim) / 1000).toFixed(
 14 |       2
 15 |     )}kb trimmed ...\n\n`;
 16 |     trimmedBody += text_content
 17 |       .trim()
 18 |       .slice(-maxTrim / 2)
 19 |       .trim();
 20 |   }
 21 | 
 22 |   return `
 23 | ---
 24 | title: ${title}
 25 | siteName: ${siteName}
 26 | ---
 27 |   
 28 | ${trimmedBody}
 29 | `.trim();
 30 | };
 31 | 
 32 | export const shasum = async (text: string) => {
 33 |   const hashBuffer = await crypto.subtle.digest("SHA-1", new TextEncoder().encode(text));
 34 |   const hashArray = Array.from(new Uint8Array(hashBuffer));
 35 |   const hashHex = hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
 36 |   return hashHex;
 37 | };
 38 | 
 39 | const NON_LATIN_CHARS = /[^\u0000-\u007F]/g;
 40 | 
 41 | export const segment = (text: string) => {
 42 |   if (typeof Intl.Segmenter === "undefined" || !NON_LATIN_CHARS.test(text)) {
 43 |     return text;
 44 |   }
 45 | 
 46 |   const segmenter = new Intl.Segmenter(undefined, {
 47 |     granularity: "word",
 48 |   });
 49 | 
 50 |   const segments = segmenter.segment(text);
 51 |   return Array.from(segments)
 52 |     .map((segment) => segment.segment)
 53 |     .filter((segment) => segment.trim() !== "")
 54 |     .join(" ");
 55 | };
 56 | 
 57 | /**
 58 |  * Break down a large text document into smaller fragments, considering markdown
 59 |  * structure.
 60 |  */
 61 | export const getArticleFragments = (textContent: string): string[] => {
 62 |   const minFragmentLength = 100;
 63 |   const lines = textContent.trim().split("\n");
 64 |   const fragments: string[] = [];
 65 |   let currentFragment = "";
 66 | 
 67 |   for (const line of lines) {
 68 |     const trimmedLine = line.trim();
 69 | 
 70 |     if (trimmedLine === "") {
 71 |       if (currentFragment.length >= minFragmentLength) {
 72 |         fragments.push(currentFragment.trim());
 73 |         currentFragment = "";
 74 |       }
 75 |       continue;
 76 |     }
 77 | 
 78 |     if (trimmedLine.startsWith("#")) {
 79 |       if (currentFragment) {
 80 |         fragments.push(currentFragment.trim());
 81 |       }
 82 |       currentFragment = trimmedLine;
 83 |     } else {
 84 |       currentFragment += (currentFragment ? " " : "") + trimmedLine;
 85 |     }
 86 | 
 87 |     if (currentFragment.length >= minFragmentLength * 2) {
 88 |       fragments.push(currentFragment.trim());
 89 |       currentFragment = "";
 90 |     }
 91 |   }
 92 | 
 93 |   if (currentFragment) {
 94 |     fragments.push(currentFragment.trim());
 95 |   }
 96 | 
 97 |   return fragments.filter((fragment) => fragment.length > 0).map(segment);
 98 | };
 99 | 
100 | export const debounce = <T extends (...args: any[]) => any>(fn: T, delay: number) => {
101 |   let timeout: Timer;
102 |   return (...args: Parameters<T>) => {
103 |     clearTimeout(timeout);
104 |     timeout = setTimeout(() => {
105 |       fn(...args);
106 |     }, delay);
107 |   };
108 | };
109 | 
110 | export const toLabel = (name: string) => {
111 |   return name
112 |     .replace(/-/g, " ")
113 |     .replace(/([A-Z])/g, " $1")
114 |     .replace(/^./, (str) => str.toUpperCase());
115 | };
116 | 
117 | /**
118 |  * Gets the Google favicon URL for a given webpage URL
119 |  */
120 | export const getFaviconByUrl = (url: string) => {
121 |   try {
122 |     const u = new URL(url);
123 |     return `https://www.google.com/s2/favicons?domain=${u.hostname}`;
124 |   } catch (e) {
125 |     console.error("Could not parse URL for favicon:", url, e);
126 |     return "";
127 |   }
128 | };
129 | 
130 | /**
131 |  * Cleans a URL by removing the protocol (http://, https://),
132 |  * the www prefix, and any trailing slashes
133 |  */
134 | export const cleanUrl = (url: string): string => {
135 |   return url.replace(/^(https?:\/\/(?:www\.)?)/, "").replace(/\/$/, "");
136 | };
137 | 
138 | /**
139 |  * Formats a timestamp into a relative time string (just now, Xm ago, Xh ago)
140 |  */
141 | export const getRelativeTime = (timestamp: number | undefined) => {
142 |   if (!timestamp) return "";
143 | 
144 |   const now = new Date();
145 |   const visitTime = new Date(timestamp);
146 | 
147 |   // Check if the visit was on the same day (in local time)
148 |   const isToday =
149 |     visitTime.getFullYear() === now.getFullYear() &&
150 |     visitTime.getMonth() === now.getMonth() &&
151 |     visitTime.getDate() === now.getDate();
152 | 
153 |   // If not today, return the time directly
154 |   if (!isToday) {
155 |     return visitTime.toLocaleTimeString([], {
156 |       hour: "2-digit",
157 |       minute: "2-digit",
158 |     });
159 |   }
160 | 
161 |   // For today, use relative time
162 |   const diffMs = now.getTime() - visitTime.getTime();
163 |   const diffMinutes = Math.floor(diffMs / 60000);
164 |   const diffHours = Math.floor(diffMinutes / 60);
165 | 
166 |   if (diffMinutes < 1) {
167 |     return "just now";
168 |   } else if (diffMinutes < 60) {
169 |     return `${diffMinutes}m ago`;
170 |   } else {
171 |     return `${diffHours}h ago`;
172 |   }
173 | };
174 | 
175 | /**
176 |  * Formats a timestamp into a full local date-time string
177 |  */
178 | export const getFullLocalTime = (timestamp: number | undefined) => {
179 |   if (!timestamp) return "";
180 |   const visitTime = new Date(timestamp);
181 |   return visitTime.toLocaleString([], {
182 |     year: "numeric",
183 |     month: "short",
184 |     day: "numeric",
185 |     hour: "2-digit",
186 |     minute: "2-digit",
187 |     second: "2-digit",
188 |   });
189 | };
190 | 
191 | /**
192 |  * Groups items by their visit date
193 |  */
194 | export const groupItemsByVisitDate = <T extends { last_visit?: number }>(items: T[]) => {
195 |   // Group items by local date derived from last_visit timestamp
196 |   return items.reduce(
197 |     (acc, item) => {
198 |       // Convert timestamp to local date string
199 |       let date = "Unknown";
200 | 
201 |       if (item.last_visit) {
202 |         const visitDate = new Date(item.last_visit);
203 |         const today = new Date();
204 | 
205 |         if (
206 |           visitDate.getFullYear() === today.getFullYear() &&
207 |           visitDate.getMonth() === today.getMonth() &&
208 |           visitDate.getDate() === today.getDate()
209 |         ) {
210 |           date = "Today";
211 |         } else {
212 |           date = visitDate.toLocaleDateString(undefined, {
213 |             weekday: "long",
214 |             year: "numeric",
215 |             month: "long",
216 |             day: "numeric",
217 |           });
218 |         }
219 |       }
220 | 
221 |       if (!acc[date]) {
222 |         acc[date] = [];
223 |       }
224 | 
225 |       acc[date].push(item);
226 |       return acc;
227 |     },
228 |     {} as Record<string, T[]>
229 |   );
230 | };
231 | 
232 | /**
233 |  * Sanitizes HTML content but allows only <mark> tags to highlight text
234 |  * All other HTML tags and attributes will be stripped
235 |  *
236 |  * NOTE: Why not use dompurify? The types didn't work. TypeScript didn't like it, despite installing @types/dompurify.
237 |  */
238 | export const sanitizeHtmlAllowMark = (htmlContent: string): string => {
239 |   if (!htmlContent) return "";
240 | 
241 |   // Robust regex-based sanitizer that only allows <mark> tags
242 |   // First remove script and style tags with their content
243 |   const withoutScriptAndStyle = htmlContent
244 |     // Remove script tags and their content
245 |     .replace(/<script[\s\S]*?<\/script>/gi, "")
246 |     // Remove style tags and their content
247 |     .replace(/<style[\s\S]*?<\/style>/gi, "");
248 | 
249 |   // Then handle the remaining HTML, preserving only mark tags
250 |   return (
251 |     withoutScriptAndStyle
252 |       // Remove all tags except mark tags
253 |       .replace(/(<(?!\/?(mark|MARK)[>\s])[^>]+>)/gi, "")
254 |       // Strip any attributes from mark tags
255 |       .replace(/<(mark|MARK)([^>]*)>/gi, "<mark>")
256 |       // Normalize closing marks
257 |       .replace(/<\/(mark|MARK)([^>]*)>/gi, "</mark>")
258 |   );
259 | };
260 | 


--------------------------------------------------------------------------------
/src/content-scripts/content-script.ts:
--------------------------------------------------------------------------------
  1 | import browser from "webextension-polyfill";
  2 | import { Readability } from "@mozilla/readability";
  3 | import type { RpcMessage } from "../background/backend";
  4 | 
  5 | const rpc = async (message: RpcMessage) => {
  6 |   return browser.runtime.sendMessage(message);
  7 | };
  8 | 
  9 | const log = (...args: any[]) => {
 10 |   console.debug(...args);
 11 | };
 12 | 
 13 | const detectDate = () => {
 14 |   let date: string | null = null;
 15 |   try {
 16 |     const el = document.querySelector<HTMLMetaElement>(
 17 |       'meta[property="article:published_time"],meta[property="og:pubdate"],meta[property="og:publish_date"],meta[name="citation_online_date"],meta[name="dc.Date"]'
 18 |     );
 19 |     if (el) {
 20 |       date = new Date(el.content).toISOString();
 21 |     } else {
 22 |       const el = document.querySelector<HTMLScriptElement>('script[type="application/ld+json"]');
 23 |       if (el) {
 24 |         const j = JSON.parse(el.textContent || "{}");
 25 |         if (j && j.datePublished) {
 26 |           date = new Date(j.datePublished).toISOString();
 27 |         }
 28 |       }
 29 |     }
 30 |   } catch (err) {
 31 |     log("could not detect date", err);
 32 |   }
 33 | 
 34 |   return date;
 35 | };
 36 | 
 37 | // Function to extract YouTube transcript if available
 38 | const extractYouTubeTranscript = async (): Promise<string | null> => {
 39 |   // Only run on YouTube watch pages
 40 |   if (!location.hostname.includes("youtube.com") || !location.pathname.startsWith("/watch")) {
 41 |     return null;
 42 |   }
 43 | 
 44 |   try {
 45 |     // Access YouTube's global variable for captions
 46 |     const playerResponse = (window as any).ytInitialPlayerResponse;
 47 | 
 48 |     if (!playerResponse?.captions?.playerCaptionsTracklistRenderer?.captionTracks?.length) {
 49 |       log("fttf :: no captions available");
 50 |       return null;
 51 |     }
 52 | 
 53 |     const captionTracks = playerResponse.captions.playerCaptionsTracklistRenderer.captionTracks;
 54 | 
 55 |     // Try to find English captions first
 56 |     const chosenTrack =
 57 |       captionTracks.find((track: any) => track.languageCode === "en") || captionTracks[0];
 58 | 
 59 |     if (!chosenTrack?.baseUrl) {
 60 |       return null;
 61 |     }
 62 | 
 63 |     // Fetch the caption data
 64 |     const response = await fetch(chosenTrack.baseUrl);
 65 |     const captionData = await response.text();
 66 | 
 67 |     // Parse the TTML data
 68 |     const textLines: string[] = [];
 69 | 
 70 |     // Try DOMParser first
 71 |     try {
 72 |       const parser = new DOMParser();
 73 |       const xmlDoc = parser.parseFromString(captionData, "application/xml");
 74 | 
 75 |       // Check for parser errors
 76 |       const parserErrors = xmlDoc.getElementsByTagName("parsererror");
 77 |       if (parserErrors.length > 0) {
 78 |         throw new Error("DOMParser parsererror encountered");
 79 |       }
 80 | 
 81 |       // Extract text content
 82 |       const textNodes = xmlDoc.getElementsByTagName("text");
 83 |       for (let i = 0; i < textNodes.length; i++) {
 84 |         const textContent = textNodes[i].textContent?.trim();
 85 |         if (textContent) {
 86 |           textLines.push(textContent);
 87 |         }
 88 |       }
 89 |     } catch (err) {
 90 |       // Fallback to regex parsing if DOMParser fails
 91 |       log("fttf :: falling back to regex parsing for transcript");
 92 |       const textTagRegex = /<text[^>]*>([\s\S]*?)<\/text>/g;
 93 |       let match;
 94 |       while ((match = textTagRegex.exec(captionData)) !== null) {
 95 |         const raw = match[1].trim();
 96 |         if (raw) {
 97 |           textLines.push(raw);
 98 |         }
 99 |       }
100 |     }
101 | 
102 |     if (textLines.length === 0) {
103 |       return null;
104 |     }
105 | 
106 |     return textLines.join("\n");
107 |   } catch (err) {
108 |     log("fttf :: error extracting transcript", err);
109 |     return null;
110 |   }
111 | };
112 | 
113 | const main = async () => {
114 |   const res = await rpc(["getPageStatus"]);
115 | 
116 |   // @todo Check if we actually need to index the page before continuing. What
117 |   // comes next is likely expensive to run on every page load all the time. May
118 |   // also consider moving it to web worker.
119 |   if (!res?.shouldIndex) {
120 |     const debugUrls = localStorage.getItem("@fttf/debugUrls")?.split(",") || [];
121 |     if (!debugUrls.includes(location.hostname)) {
122 |       log("fttf :: skip :: due to server response", res);
123 |       return;
124 |     }
125 |   }
126 | 
127 |   if (res?.indexLevel === "url_only") {
128 |     const date = detectDate();
129 |     const result = await rpc([
130 |       "indexPage",
131 |       {
132 |         title: document.title,
133 |         date,
134 |       },
135 |     ]);
136 | 
137 |     log("fttf :: result", result);
138 |     return;
139 |   }
140 | 
141 |   // Wait for the dom to be ready. Yeah, crude. What's the best move here?
142 |   await new Promise((resolve) => {
143 |     // Wait for dom to stop changing for at least 1 second
144 |     let len = document.body?.innerText?.length || 0;
145 |     let timeout: Timer;
146 |     let timeout2: Timer;
147 | 
148 |     const fn = () => {
149 |       const newLen = document.body?.innerText?.length || 0;
150 |       if (newLen === len) {
151 |         clearTimeout(timeout2);
152 |         resolve(null);
153 |       } else {
154 |         log("fttf :: wait :: still waiting for dom to stop changing");
155 |         len = newLen;
156 |         timeout = setTimeout(fn, 1000);
157 |       }
158 |     };
159 | 
160 |     // kick it off
161 |     timeout = setTimeout(fn, 1000);
162 | 
163 |     // Resolve regardless if too much time ellapses
164 |     timeout2 = setTimeout(() => {
165 |       clearTimeout(timeout);
166 |       resolve(null);
167 |     }, 10000);
168 |   });
169 | 
170 |   // Wait for an idle moment so that we don't cause any dropped frames (hopefully)
171 |   await new Promise((resolve) => requestIdleCallback(resolve));
172 | 
173 |   // Extract YouTube transcript if available
174 |   const transcript = await extractYouTubeTranscript();
175 | 
176 |   // parse() will mutate the dom, so we need to clone in order not to spoil the normal reading of the site
177 |   const domClone = document.cloneNode(true) as Document;
178 |   let startTime: number;
179 |   let endTime: number;
180 | 
181 |   // time how long the next line takes
182 |   startTime = performance.now();
183 |   const readabilityArticle = new Readability(domClone, {
184 |     // charThreshold: 50,
185 |     // nbTopCandidates: 10,
186 |   }).parse();
187 |   endTime = performance.now();
188 | 
189 |   if (!readabilityArticle) {
190 |     await rpc(["nothingToIndex"]);
191 |     return;
192 |   }
193 | 
194 |   const date = detectDate();
195 |   const { content, textContent, ...rest } = readabilityArticle;
196 | 
197 |   console.debug("fttf :: readabilityArticle", rest);
198 | 
199 |   // Lazy load the turndown lib
200 |   const TurndownService = (await import("turndown")).default;
201 | 
202 |   const turndown = new TurndownService({
203 |     headingStyle: "atx",
204 |     codeBlockStyle: "fenced",
205 |     hr: "---",
206 |   });
207 | 
208 |   log("fttf :: markdown");
209 |   // @todo Would be nice to not have to turndown for every page. This used to be
210 |   // in the background script but threw for dom reasons. Would need to modify
211 |   // turndown. NOTE: It supports running in node, but the internal env check
212 |   // thinks its in a browser
213 |   const mdContent = turndown.turndown(content);
214 | 
215 |   // If we have a transcript, append it to the markdown content
216 |   const finalMdContent = transcript ? `${mdContent}\n\n## Transcript\n\n${transcript}` : mdContent;
217 | 
218 |   log("article:", textContent, {
219 |     ...rest,
220 |     date,
221 |   });
222 | 
223 |   const result = await rpc([
224 |     "indexPage",
225 |     {
226 |       ...rest,
227 |       _extraction_time: endTime - startTime,
228 |       extractor: "readability",
229 |       text_content: textContent,
230 |       md_content: finalMdContent,
231 |       date,
232 |     },
233 |   ]);
234 | 
235 |   log("fttf :: result", result);
236 | };
237 | 
238 | const mainWrapper = async () => {
239 |   // check if dom content has already been loaded
240 |   if (document.readyState !== "complete") {
241 |     // @note This can take _a while_, but is in place to account for apps that
242 |     // may not have built the dom yet
243 |     await new Promise((resolve) => {
244 |       document.onreadystatechange = () => {
245 |         if (document.readyState === "complete") {
246 |           resolve(null);
247 |         }
248 |       };
249 |     });
250 | 
251 |     log("%cready.", "color:green;font-size:12px;", document.readyState);
252 |   }
253 | 
254 |   await main();
255 | };
256 | 
257 | // Plumbing
258 | (async () => {
259 |   // listen for browser push state updates and hash changes
260 |   window.addEventListener("popstate", () => {
261 |     log("%cpopstate", "color:orange;font-size:18px;", location.toString());
262 |     mainWrapper();
263 |   });
264 | 
265 |   await mainWrapper();
266 | })();
267 | 


--------------------------------------------------------------------------------
/src/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://json.schemastore.org/chrome-manifest",
 3 |     "name": "Full Text Tabs Forever",
 4 |     "description": "Search your full browsing history",
 5 |     "version": "2.1.2",
 6 |     "manifest_version": 3,
 7 |     "permissions": [
 8 |         "unlimitedStorage",
 9 |         "downloads",
10 |         "tabs",
11 |         "activeTab"
12 |     ],
13 |     "icons": {
14 |         "16": "assets/icon_16.png",
15 |         "48": "assets/icon_48.png",
16 |         "128": "assets/icon_128.png"
17 |     },
18 |     "background": {
19 |         "service_worker": "background.js",
20 |         "type": "module"
21 |     },
22 |     "action": {
23 |         "default_icon": {
24 |             "16": "assets/icon_16.png",
25 |             "48": "assets/icon_48.png",
26 |             "128": "assets/icon_128.png"
27 |         },
28 |         "default_title": "Open Tab History"
29 |     },
30 |     "host_permissions": [
31 |         "<all_urls>"
32 |     ],
33 |     "content_security_policy": {
34 |         "extension_pages": "script-src 'self' 'wasm-unsafe-eval'; object-src 'self';"
35 |     },
36 |     "content_scripts": [
37 |         {
38 |             "matches": [
39 |                 "<all_urls>"
40 |             ],
41 |             "js": [
42 |                 "content-scripts/content-script.js"
43 |             ]
44 |         }
45 |     ]
46 | }


--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
1 | // @note This is here to allow importing the lib from cdn if desired. Not sure if it is actually better to import from CDN. Tradeoffs:
2 | // - when visiting pages you've been to before it should be fater with CDN, bc/ page doesn't have to eval all that JS
3 | // - adds a network dependency though, and more tooling complexity (this file is a case in point)
4 | // To use: const { Readability } = await import("https://cdn.jsdelivr.net/npm/@mozilla/readability/+esm");
5 | declare module "https://cdn.jsdelivr.net/npm/@mozilla/readability/+esm" {
6 |   export * from "@mozilla/readability";
7 | }
8 | 


--------------------------------------------------------------------------------
/src/ui/.routify/urlIndex.json:
--------------------------------------------------------------------------------
1 | [
2 |   "/index",
3 |   "/index.html/database-repl",
4 |   "/index.html/index",
5 |   "/index.html/settings",
6 |   "/index.html/task-queue"
7 | ]


--------------------------------------------------------------------------------
/src/ui/App.svelte:
--------------------------------------------------------------------------------
1 | <script lang="ts">
2 |   import Router from 'svelte-spa-router'
3 |   import { routes } from './routes'
4 | </script>
5 | 
6 | <Router {routes} />


--------------------------------------------------------------------------------
/src/ui/DetailsPanel.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import type { DetailRow } from "@/background/backend";
 3 |   import { fttf } from "./lib/rpc";
 4 |   import SvelteMarkdown from "svelte-markdown";
 5 | 
 6 |   // Note this will not have the content. we need a separate fetch for that
 7 |   export let docUrl: string;
 8 |   export let showDetails = false;
 9 |   let row: DetailRow | null = null;
10 |   let err: Error | null = null;
11 | 
12 |   const fetchRow = async (url: string) => {
13 |     try {
14 |       row = await fttf.adapter.backend.findOne({ where: { url } });
15 |     } catch (_err) {
16 |       err = _err;
17 |     }
18 |   };
19 | 
20 |   $: fetchRow(docUrl);
21 |   $: console.log({ detailRow: row });
22 | </script>
23 | 
24 | <div class="prose prose-invert">
25 |   {#if row}
26 |     <div class="Controls flex space-x-4 mb-4 w-full">
27 |       <button
28 |         class="px-3 py-2 rounded border border-white"
29 |         on:click|preventDefault={() => {
30 |           window.open(row?.url, "_blank");
31 |         }}
32 |       >
33 |         Open URL
34 |       </button>
35 |       <button
36 |         class="!ml-auto font-mono px-3 py-2"
37 |         on:click={() => {
38 |           showDetails = false;
39 |         }}
40 |       >
41 |         {"x"}
42 |       </button>
43 |     </div>
44 |     <h1 class="text-3xl">
45 |       <span class="block">{row?.title}</span>
46 |       <a href={row.url} target="_blank" rel="noopener noreferrer" class="block text-sm italic"
47 |         >{row.url}</a
48 |       >
49 |     </h1>
50 |     <SvelteMarkdown source={row.md_content} />
51 |   {:else if err}
52 |     <div class="error">
53 |       {err.message}
54 |     </div>
55 |   {:else}
56 |     <!-- Intentionally left blank. Loading is local so it is a quick blip the user doesn't parse -->
57 |   {/if}
58 | </div>
59 | 


--------------------------------------------------------------------------------
/src/ui/ExportProgress.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import { onMount, onDestroy } from 'svelte';
 3 |   import { writable } from 'svelte/store';
 4 |   
 5 |   export let visible = false;
 6 |   export let current = 0;
 7 |   export let total = 0;
 8 |   
 9 |   const progress = writable(0);
10 |   const percentage = writable(0);
11 |   
12 |   $: {
13 |     $progress = current;
14 |     $percentage = total > 0 ? Math.round((current / total) * 100) : 0;
15 |   }
16 | </script>
17 | 
18 | {#if visible}
19 |   <div class="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50">
20 |     <div class="bg-white p-6 rounded-lg shadow-xl max-w-md w-full">
21 |       <h3 class="text-lg font-semibold mb-4">Exporting Database</h3>
22 |       
23 |       <div class="mb-4">
24 |         <div class="w-full bg-gray-200 rounded-full h-2.5">
25 |           <div 
26 |             class="bg-blue-600 h-2.5 rounded-full" 
27 |             style="width: {$percentage}%"
28 |           ></div>
29 |         </div>
30 |         <div class="mt-2 text-sm text-gray-600 flex justify-between">
31 |           <span>{$progress} of {total} documents</span>
32 |           <span>{$percentage}%</span>
33 |         </div>
34 |       </div>
35 |       
36 |       <p class="text-sm text-gray-600 mb-4">
37 |         Please don't close this window while export is in progress.
38 |       </p>
39 |     </div>
40 |   </div>
41 | {/if}


--------------------------------------------------------------------------------
/src/ui/LayoutWrapper.svelte:
--------------------------------------------------------------------------------
1 | <script>
2 |   export let layout;
3 |   export let component;
4 |   export let params = {};
5 | </script>
6 | 
7 | <svelte:component this={layout}>
8 |   <svelte:component this={component} {params} />
9 | </svelte:component>


--------------------------------------------------------------------------------
/src/ui/Menu.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |   import classNames from "classnames";
  3 |   import { fly } from "svelte/transition";
  4 |   import { rpc } from "./lib/rpc";
  5 |   import { displaySettings } from "./store/displaySettings";
  6 |   import { navigationRoutes } from "./routes";
  7 |   import { handleImport, vacuumFull, exportJson } from "./lib/commands";
  8 |   import { push } from "svelte-spa-router";
  9 |   let _class: string = "";
 10 |   export { _class as class };
 11 |   export let open: boolean = false;
 12 |   export let onClose: () => void;
 13 |   let filterText = "";
 14 |   let currentIndex = 0;
 15 | 
 16 |   const commands = [
 17 |     ...navigationRoutes
 18 |       .filter((route) => route.name !== "index")
 19 |       .map((route) => ({
 20 |         name: `Page: ${route.label}`,
 21 |         exec: async () => {
 22 |           push(route.path);
 23 |           return true;
 24 |         },
 25 |       })),
 26 |     {
 27 |       name: "DB: Import...",
 28 |       exec: async () => {
 29 |         const result = await handleImport();
 30 |         if (result.success) {
 31 |           onClose();
 32 |           return true;
 33 |         }
 34 |         return false;
 35 |       },
 36 |     },
 37 |     {
 38 |       name: "DB: Export...",
 39 |       exec: async () => {
 40 |         const result = await exportJson();
 41 |         if (result.success) {
 42 |           onClose();
 43 |           return true;
 44 |         }
 45 |         return false;
 46 |       },
 47 |     },
 48 |     {
 49 |       name: "DB: Vacuum",
 50 |       exec: async () => {
 51 |         const res = await vacuumFull();
 52 |         console.log("vacuum", res);
 53 |         return true;
 54 |       },
 55 |     },
 56 |     {
 57 |       name: "UI: Toggle Show Stats",
 58 |       exec: async () => {
 59 |         displaySettings.update((s) => ({ ...s, showStats: !s.showStats }));
 60 |         return true;
 61 |       },
 62 |     },
 63 |     {
 64 |       name: "Dev: Toggle Query Processing",
 65 |       exec: async () => {
 66 |         displaySettings.update((s) => ({ ...s, preprocessQuery: !s.preprocessQuery }));
 67 |         return true;
 68 |       },
 69 |     },
 70 |     {
 71 |       name: "Dev: Reindex",
 72 |       exec: async () => {
 73 |         const res = await rpc(["reindex"]);
 74 |         console.log("reindex", res);
 75 |         return true;
 76 |       },
 77 |     },
 78 |   ];
 79 | 
 80 |   $: filteredCommands = commands.filter((c) =>
 81 |     c.name.toLowerCase().includes(filterText.toLowerCase())
 82 |   );
 83 | 
 84 |   const scrollIntoView = (index: number) => {
 85 |     const el = document.querySelector(`.commands:nth-child(${index + 1})`);
 86 |     if (el) {
 87 |       el.scrollIntoView({ block: "nearest", behavior: "smooth" });
 88 |     }
 89 |   };
 90 | 
 91 |   const keybinds: Record<string, (e: KeyboardEvent) => void> = {
 92 |     Escape: () => {
 93 |       onClose();
 94 |     },
 95 |     Enter: async (e) => {
 96 |       e.stopPropagation();
 97 |       e.preventDefault();
 98 |       if (filteredCommands[currentIndex]) {
 99 |         const shouldClose = await filteredCommands[currentIndex].exec();
100 |         if (shouldClose) onClose();
101 |       }
102 |     },
103 |     ArrowDown: () => {
104 |       if (currentIndex < filteredCommands.length - 1) {
105 |         currentIndex++;
106 |         scrollIntoView(currentIndex);
107 |       }
108 |     },
109 |     ArrowUp: () => {
110 |       if (currentIndex > 0) {
111 |         currentIndex--;
112 |         scrollIntoView(currentIndex);
113 |       }
114 |     },
115 |   };
116 | 
117 |   const handleKeydown = (e: KeyboardEvent) => {
118 |     const fn = keybinds[e.key];
119 |     if (fn) {
120 |       fn(e);
121 |     }
122 |   };
123 | </script>
124 | 
125 | <div
126 |   in:fly={{ y: 20, duration: 200 }}
127 |   out:fly={{ y: 20, duration: 200 }}
128 |   on:keydown={handleKeydown}
129 |   class={classNames(
130 |     "fixed top-[20%] left-1/2 transform -translate-x-1/2 w-full max-w-[600px] rounded-lg shadow shadow-black",
131 |     "bg-zinc-900 text-white",
132 |     _class
133 |   )}
134 | >
135 |   <form on:submit|preventDefault>
136 |     <input
137 |       type="text"
138 |       bind:value={filterText}
139 |       on:input={() => (currentIndex = 0)}
140 |       data-menu-input
141 |       class="appearance-none w-full outline-none focus:ring-0 text-white text-lg bg-[#1d1d1d] rounded-t-lg px-3 py-3 border-none border-b border-zinc-600"
142 |     />
143 |   </form>
144 |   <div class="commands p-2 text-lg overflow-auto max-h-[60vh]">
145 |     {#each filteredCommands as command, i (command.name)}
146 |       <div
147 |         on:mouseenter={() => {
148 |           currentIndex = i;
149 |         }}
150 |         on:click={async () => {
151 |           const shouldClose = await command.exec();
152 |           if (shouldClose) onClose();
153 |         }}
154 |         class={classNames("command p-2 rounded", { "bg-white/10": currentIndex === i })}
155 |       >
156 |         {command.name}
157 |       </div>
158 |     {:else}
159 |       <div class="text-center py-3 font-mono">No commands</div>
160 |     {/each}
161 |   </div>
162 | </div>
163 | 


--------------------------------------------------------------------------------
/src/ui/MigrationModal.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |   import { onMount, onDestroy, createEventDispatcher } from "svelte";
  3 |   import Modal from "./Modal.svelte";
  4 |   import { rpc } from "./lib/rpc";
  5 | 
  6 |   export let open = false;
  7 | 
  8 |   let isImporting = false;
  9 |   let isMigrated = false;
 10 |   let vlcnImportMessage = "";
 11 |   let migrationProgress = 0;
 12 |   let totalDocuments = 0;
 13 |   let showMigrationUI = false;
 14 | 
 15 |   // Track if migration was completed successfully
 16 |   let migrationComplete = false;
 17 |   let autoCloseTimer = null;
 18 | 
 19 |   // Define message listener function
 20 |   const handleMessage = (message) => {
 21 |     if (message.type === "vlcnMigrationStatus") {
 22 |       console.log("Migration status update:", message);
 23 | 
 24 |       if (message.status === "starting" || message.status === "fetching") {
 25 |         vlcnImportMessage = message.message;
 26 |       } else if (message.status === "importing") {
 27 |         totalDocuments = message.total;
 28 |         migrationProgress = message.current;
 29 |         vlcnImportMessage = message.message;
 30 |       } else if (message.status === "progress") {
 31 |         migrationProgress = message.current;
 32 |         vlcnImportMessage = `Imported ${message.current} of ${message.total} documents...`;
 33 |       } else if (message.status === "complete") {
 34 |         isImporting = false;
 35 |         vlcnImportMessage = message.message;
 36 |         migrationProgress = totalDocuments;
 37 |         migrationComplete = true;
 38 |         isMigrated = true;
 39 | 
 40 |         // Set a timer to close the modal after showing the success message
 41 |         if (autoCloseTimer) clearTimeout(autoCloseTimer);
 42 |         autoCloseTimer = setTimeout(() => {
 43 |           open = false;
 44 |           setTimeout(() => window.location.reload(), 1000); // This is the simple way to trigger the count to refresh on the main page
 45 |           const dispatch = createEventDispatcher();
 46 |           dispatch("close");
 47 |           dispatch("migrationComplete", { success: true });
 48 |         }, 5000); // Show success message for 5 seconds before closing
 49 |       } else if (message.status === "error") {
 50 |         isImporting = false;
 51 |         vlcnImportMessage = message.message;
 52 |       } else if (message.status === "empty") {
 53 |         isImporting = false;
 54 |         vlcnImportMessage = message.message;
 55 |       }
 56 |     }
 57 |     return true;
 58 |   };
 59 | 
 60 |   // Setup and tear down listener
 61 |   const setupMigrationListener = () => {
 62 |     chrome.runtime.onMessage.addListener(handleMessage);
 63 |   };
 64 | 
 65 |   const removeMigrationListener = () => {
 66 |     chrome.runtime.onMessage.removeListener(handleMessage);
 67 |   };
 68 | 
 69 |   onMount(async () => {
 70 |     setupMigrationListener();
 71 |     await checkVLCNMigrationStatus();
 72 |   });
 73 | 
 74 |   onDestroy(() => {
 75 |     removeMigrationListener();
 76 |     // Clear any pending timers
 77 |     if (autoCloseTimer) {
 78 |       clearTimeout(autoCloseTimer);
 79 |       autoCloseTimer = null;
 80 |     }
 81 |   });
 82 | 
 83 |   // Reset state when reused
 84 |   const resetState = () => {
 85 |     isImporting = false;
 86 |     migrationProgress = 0;
 87 |     totalDocuments = 0;
 88 |     vlcnImportMessage = "";
 89 |     migrationComplete = false;
 90 | 
 91 |     // Also clear any pending timers
 92 |     if (autoCloseTimer) {
 93 |       clearTimeout(autoCloseTimer);
 94 |       autoCloseTimer = null;
 95 |     }
 96 |   };
 97 | 
 98 |   // Watch for open prop changes to refresh data
 99 |   $: if (open) {
100 |     checkVLCNMigrationStatus();
101 |   }
102 | 
103 |   const checkVLCNMigrationStatus = async () => {
104 |     // Reset the state to prevent flashing old data
105 |     resetState();
106 | 
107 |     try {
108 |       // First, check if the VLCN backend is available and has been migrated
109 |       const response = await rpc(["checkVLCNMigrationStatus"]);
110 |       if (response?.migrated) {
111 |         isMigrated = true;
112 |         vlcnImportMessage = "VLCN database has already been migrated to PgLite.";
113 |         showMigrationUI = false;
114 |       } else if (response?.available) {
115 |         showMigrationUI = true;
116 |         isMigrated = false;
117 |         vlcnImportMessage = `Found ${response.documentCount} documents to migrate from VLCN database.`;
118 |       } else {
119 |         showMigrationUI = false;
120 |         vlcnImportMessage = "No VLCN database found. If you're a new user, you can ignore this.";
121 |       }
122 |     } catch (error) {
123 |       console.error("Error checking VLCN migration status", error);
124 |       showMigrationUI = false;
125 |     }
126 |   };
127 | 
128 |   const dispatch = createEventDispatcher();
129 | 
130 |   const importVLCNDatabase = async () => {
131 |     // Reset any previous state
132 |     migrationComplete = false;
133 |     isImporting = true;
134 |     migrationProgress = 0;
135 |     vlcnImportMessage = "Initializing VLCN database migration...";
136 | 
137 |     try {
138 |       await rpc(["importVLCNDocumentsV1"]);
139 |       // Status updates will come through the message listener
140 |       // When complete, the listener will set migrationComplete = true
141 |       // and schedule auto-closing after a delay
142 |     } catch (error) {
143 |       isImporting = false;
144 |       vlcnImportMessage = `Error importing VLCN database: ${error.message}`;
145 |       dispatch("migrationError", { error: error.message });
146 |     }
147 |   };
148 | </script>
149 | 
150 | <Modal
151 |   open={open && (showMigrationUI || migrationComplete)}
152 |   title={migrationComplete ? "Migration Complete" : "Database Migration Available"}
153 |   wideContent={false}
154 |   on:close
155 | >
156 |   <div class="text-sm text-slate-300">
157 |     {#if migrationComplete}
158 |       <div class="flex flex-col items-center text-center">
159 |         <svg
160 |           xmlns="http://www.w3.org/2000/svg"
161 |           class="h-16 w-16 text-green-500 mb-4"
162 |           fill="none"
163 |           viewBox="0 0 24 24"
164 |           stroke="currentColor"
165 |         >
166 |           <path
167 |             stroke-linecap="round"
168 |             stroke-linejoin="round"
169 |             stroke-width="2"
170 |             d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z"
171 |           />
172 |         </svg>
173 |         <h3 class="text-xl font-semibold text-green-400 mb-2">Migration Complete!</h3>
174 |         <p class="mb-4">{vlcnImportMessage}</p>
175 |         <p class="text-sm text-gray-400">
176 |           This dialog will close automatically in a few seconds...
177 |         </p>
178 |       </div>
179 |     {:else}
180 |       <p class="mb-4">
181 |         We detected data from a previous version of Full Text Tabs Forever. Would you like to
182 |         migrate your search index to the new version?
183 |       </p>
184 |       <p class="mb-4">
185 |         The import process may take several minutes depending on the size of your database.
186 |       </p>
187 |       <div class="mt-4">
188 |         <button
189 |           on:click={importVLCNDatabase}
190 |           class="bg-pink-800 text-white py-2 px-4 rounded hover:bg-pink-900 disabled:opacity-50 disabled:cursor-not-allowed"
191 |           disabled={isImporting || isMigrated}
192 |         >
193 |           {#if isImporting}
194 |             Importing...
195 |           {:else if isMigrated}
196 |             Already Migrated
197 |           {:else}
198 |             Import VLCN Database
199 |           {/if}
200 |         </button>
201 |       </div>
202 |       {#if vlcnImportMessage && !migrationComplete}
203 |         <div class="mt-4">
204 |           <p
205 |             class:text-green-500={vlcnImportMessage.includes("successfully")}
206 |             class:text-red-500={vlcnImportMessage.includes("Error") ||
207 |               vlcnImportMessage.includes("failed")}
208 |             class:text-yellow-500={vlcnImportMessage.includes("already been migrated")}
209 |           >
210 |             {vlcnImportMessage}
211 |           </p>
212 | 
213 |           {#if isImporting && totalDocuments > 0}
214 |             <div class="mt-2">
215 |               <div class="w-full bg-gray-700 rounded-full h-2.5 mb-2">
216 |                 <div
217 |                   class="bg-pink-800 h-2.5 rounded-full"
218 |                   style="width: {(migrationProgress / totalDocuments) * 100}%"
219 |                 ></div>
220 |               </div>
221 |               <p class="text-xs text-gray-400 text-right">
222 |                 {migrationProgress} of {totalDocuments} documents
223 |               </p>
224 |             </div>
225 |           {/if}
226 |         </div>
227 |       {/if}
228 |     {/if}
229 |   </div>
230 | </Modal>
231 | 


--------------------------------------------------------------------------------
/src/ui/Modal.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import { onMount, createEventDispatcher } from 'svelte';
 3 |   import { fly } from 'svelte/transition';
 4 | 
 5 |   export let open = false;
 6 |   export let title = '';
 7 |   export let showClose = true;
 8 |   export let wideContent = false;
 9 | 
10 |   const dispatch = createEventDispatcher();
11 | 
12 |   const closeModal = () => {
13 |     dispatch('close');
14 |   };
15 | 
16 |   // Close on escape key
17 |   const handleKeydown = (e: KeyboardEvent) => {
18 |     if (e.key === 'Escape' && open) {
19 |       closeModal();
20 |     }
21 |   };
22 | 
23 |   onMount(() => {
24 |     document.addEventListener('keydown', handleKeydown);
25 |     return () => {
26 |       document.removeEventListener('keydown', handleKeydown);
27 |     };
28 |   });
29 | </script>
30 | 
31 | {#if open}
32 |   <div
33 |     class="fixed inset-0 z-50 flex items-center justify-center p-4 bg-black bg-opacity-50"
34 |     on:click|self={closeModal}
35 |     in:fly={{ y: -10, duration: 150 }}
36 |     out:fly={{ y: -10, duration: 100 }}
37 |   >
38 |     <div class={`bg-slate-900 rounded-lg shadow-xl ${wideContent ? 'w-full max-w-2xl' : 'max-w-md w-full'}`}>
39 |       <div class="flex justify-between items-center p-4 border-b border-slate-700">
40 |         <h3 class="text-lg font-medium">{title}</h3>
41 |         {#if showClose}
42 |           <button
43 |             type="button"
44 |             class="text-slate-400 hover:text-white"
45 |             on:click={closeModal}
46 |           >
47 |             <svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
48 |               <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12"></path>
49 |             </svg>
50 |           </button>
51 |         {/if}
52 |       </div>
53 |       <div class="p-4">
54 |         <slot></slot>
55 |       </div>
56 |     </div>
57 |   </div>
58 | {/if}


--------------------------------------------------------------------------------
/src/ui/RecentItems.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import { onMount } from "svelte";
 3 |   import type { ResultRow } from "@/background/backend";
 4 |   import { fttf } from "@/ui/lib/rpc";
 5 |   import { groupItemsByVisitDate } from "@/common/utils";
 6 |   import ResultItem from "@/ui/ResultItem.svelte";
 7 | 
 8 |   export let limit = 100;
 9 |   export let offset = 0;
10 | 
11 |   let loading = true;
12 |   let error = false;
13 |   let recentItems: ResultRow[] = [];
14 |   let groupedItems: Record<string, ResultRow[]> = {};
15 |   let currentGroupIndex = -1;
16 |   let currentItemIndex = -1;
17 |   let enableMouseEvents = false;
18 | 
19 |   const loadRecentItems = async () => {
20 |     loading = true;
21 |     try {
22 |       const response = await fttf.adapter.backend.getRecent({
23 |         limit,
24 |         offset,
25 |       });
26 | 
27 |       if (response.ok) {
28 |         recentItems = response.results;
29 |         updateGroupedItems();
30 |       } else {
31 |         error = true;
32 |       }
33 |     } catch (err) {
34 |       console.error("Error loading recent items:", err);
35 |       error = true;
36 |     } finally {
37 |       loading = false;
38 |     }
39 |   };
40 | 
41 |   const updateGroupedItems = () => {
42 |     // Use the utility function to group the items
43 |     groupedItems = groupItemsByVisitDate(recentItems);
44 |   };
45 | 
46 |   onMount(() => {
47 |     loadRecentItems();
48 | 
49 |     // Enable mouse events on first mouse move
50 |     const handleMouseMove = () => {
51 |       if (!enableMouseEvents) enableMouseEvents = true;
52 |       window.removeEventListener("mousemove", handleMouseMove);
53 |     };
54 | 
55 |     window.addEventListener("mousemove", handleMouseMove);
56 | 
57 |     return () => {
58 |       window.removeEventListener("mousemove", handleMouseMove);
59 |     };
60 |   });
61 | </script>
62 | 
63 | <div class="recent-items pb-8">
64 |   {#if loading}
65 |     <div class="py-4 text-slate-400">Loading recent pages...</div>
66 |   {:else if error}
67 |     <div class="py-4 text-red-400">There was an error loading recent pages.</div>
68 |   {:else if recentItems.length === 0}
69 |     <div class="py-4 text-slate-400">No recent pages found.</div>
70 |   {:else}
71 |     <div class="flex flex-col space-y-4">
72 |       {#each Object.entries(groupedItems) as [date, items], i (date)}
73 |         <div class="date-group">
74 |           <div class="text-sm font-medium text-slate-400 mb-2">{date}</div>
75 |           {#each items as item, j}
76 |             <ResultItem
77 |               {item}
78 |               showTime={true}
79 |               showSnippets={false}
80 |               selected={i === currentGroupIndex && j === currentItemIndex}
81 |               groupIndex={i}
82 |               on:focus={() => {
83 |                 currentGroupIndex = i;
84 |                 currentItemIndex = j;
85 |               }}
86 |               on:mouseover={() => {
87 |                 if (enableMouseEvents) {
88 |                   currentGroupIndex = i;
89 |                   currentItemIndex = j;
90 |                 }
91 |               }}
92 |             />
93 |           {/each}
94 |         </div>
95 |       {/each}
96 |     </div>
97 |   {/if}
98 | </div>
99 | 


--------------------------------------------------------------------------------
/src/ui/ResultItem.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |   import type { ResultRow } from "@/background/backend";
  3 |   import classNames from "classnames";
  4 |   import {
  5 |     getFaviconByUrl,
  6 |     cleanUrl,
  7 |     getRelativeTime,
  8 |     getFullLocalTime,
  9 |     sanitizeHtmlAllowMark,
 10 |   } from "@/common/utils";
 11 |   import { push } from "svelte-spa-router";
 12 | 
 13 |   export let item: ResultRow;
 14 |   export let showTime = false;
 15 |   export let showSnippets = true;
 16 |   export let selected = false;
 17 |   export let highlightClass = "bg-slate-800";
 18 |   export let groupIndex: number | undefined = undefined;
 19 | 
 20 |   let _class = "";
 21 |   export { _class as class };
 22 | 
 23 |   const handleClick = () => {
 24 |     const encodedUrl = encodeURIComponent(item.url);
 25 |     push(`/doc/${encodedUrl}`);
 26 |   };
 27 | 
 28 |   // Use URL only when we need to render
 29 |   $: url = item.url;
 30 |   $: urlObj = new URL(url);
 31 | </script>
 32 | 
 33 | <!-- svelte-ignore a11y-click-events-have-key-events -->
 34 | <div
 35 |   data-groupIndex={groupIndex}
 36 |   class={classNames(
 37 |     "result-group p-3 -mx-2 rounded-lg",
 38 |     {
 39 |       [highlightClass]: selected,
 40 |       "bg-transparent": !selected,
 41 |     },
 42 |     _class
 43 |   )}
 44 |   on:mouseover
 45 |   on:focus
 46 |   on:click={handleClick}
 47 | >
 48 |   <a
 49 |     class={classNames("result mb-1", showTime ? "with-time" : "")}
 50 |     href={url}
 51 |     on:click|preventDefault
 52 |   >
 53 |     {#if showTime && item.last_visit}
 54 |       <div class="text-xs text-slate-500 mr-3" title={getFullLocalTime(item.last_visit)}>
 55 |         {getRelativeTime(item.last_visit)}
 56 |       </div>
 57 |     {/if}
 58 |     <div class="favicon mr-3 self-center">
 59 |       <img
 60 |         class="w-4 h-4 rounded-lg"
 61 |         src={getFaviconByUrl(url)}
 62 |         alt="favicon for {urlObj.hostname}"
 63 |       />
 64 |     </div>
 65 |     {#if item.title}
 66 |       <div class="title mr-3 text-slate-300 text-base">
 67 |         {@html sanitizeHtmlAllowMark(item.title)}
 68 |       </div>
 69 |     {/if}
 70 |     <div class="url truncate text-indigo-200">
 71 |       {cleanUrl(url)}
 72 |     </div>
 73 |   </a>
 74 | 
 75 |   {#if showSnippets && item.snippet && item.attribute !== "title" && item.attribute !== "url"}
 76 |     <div data-has-snippet="1" class="pl-7 truncate">
 77 |       <!-- Using HTML sanitized to only allow <mark> tags generated by the DB -->
 78 |       {@html sanitizeHtmlAllowMark(item.snippet)}
 79 |     </div>
 80 |   {/if}
 81 | 
 82 |   <slot></slot>
 83 | </div>
 84 | 
 85 | <style>
 86 |   .result:not(.with-time) {
 87 |     display: grid;
 88 |     grid-template-columns: auto auto minmax(0, 1fr);
 89 |     grid-template-rows: auto;
 90 |     align-items: baseline;
 91 |   }
 92 | 
 93 |   .result.with-time {
 94 |     display: grid;
 95 |     grid-template-columns: auto auto auto minmax(0, 1fr);
 96 |     grid-template-rows: auto;
 97 |     align-items: baseline;
 98 |   }
 99 | </style>
100 | 


--------------------------------------------------------------------------------
/src/ui/ResultRowView.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import type { ResultRow } from "@/background/backend";
 3 |   import classNames from "classnames";
 4 |   let _class: string = "";
 5 |   export let item: ResultRow;
 6 |   export { _class as class };
 7 |   let el: HTMLDivElement | null = null;
 8 | </script>
 9 | 
10 | <div
11 |   bind:this={el}
12 |   data-has-snippet="1"
13 |   class={classNames("ResultRowView pl-[5.75rem] truncate", _class)}
14 | >
15 |   <!-- Using HTML is to allow the <mark> tags generated by sqlite. This should be sanitized -->
16 |   {@html item.snippet}
17 | </div>
18 | 


--------------------------------------------------------------------------------
/src/ui/global.css:
--------------------------------------------------------------------------------
 1 | @tailwind base;
 2 | @tailwind components;
 3 | @tailwind utilities;
 4 | 
 5 | body {
 6 |   @apply bg-slate-900 text-slate-100;
 7 | }
 8 | 
 9 | mark {
10 |   @apply bg-pink-900/50 border-dashed border-b border-pink-600 text-pink-200;
11 | }
12 | 
13 | ::highlight(snippet) {
14 |   @apply bg-pink-900/50 bg-pink-200 text-pink-900 underline;
15 | }
16 | 


--------------------------------------------------------------------------------
/src/ui/lib/commands.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod";
 2 | import { readFileAsText, pickFile } from "./dom";
 3 | import { rpc, fttf } from "./rpc";
 4 | import { updateStats, stats } from "../store/statsStore";
 5 | import { get } from "svelte/store";
 6 | import { streamingExport } from "./streaming-export";
 7 | 
 8 | const dbImportSchema = z.object({
 9 |   document: z.array(z.any()),
10 | });
11 | 
12 | export const handleImport = async (): Promise<{ success: boolean; message?: string }> => {
13 |   try {
14 |     const file = await pickFile(".json");
15 |     if (file.type !== "application/json") {
16 |       return { success: false, message: "Please upload a JSON file." };
17 |     }
18 | 
19 |     const text = await readFileAsText(file);
20 |     const content = JSON.parse(text);
21 |     const result = dbImportSchema.safeParse(content);
22 |     if (!result.success) {
23 |       console.error("Error parsing JSON:", result);
24 |       return { success: false, message: "Invalid JSON file. Please upload a valid JSON file." };
25 |     }
26 | 
27 |     const documents = result.data.document;
28 |     await rpc(["importDocumentsJSONv1", { document: documents }]);
29 |     console.log("Imported:", documents.length, "documents");
30 | 
31 |     await updateStats();
32 |     return { success: true };
33 |   } catch (error) {
34 |     if (error instanceof Error && error.message === "No file selected") {
35 |       return { success: false };
36 |     }
37 |     console.error("Error importing JSON:", error);
38 |     return { success: false, message: "Error importing file. Please try again." };
39 |   }
40 | };
41 | 
42 | export const vacuumFull = async () => {
43 |   await updateStats();
44 |   let x = get(stats);
45 |   const before = x?.Size;
46 |   await rpc(["pg.exec", { sql: "VACUUM FULL" }]);
47 |   await updateStats();
48 |   x = get(stats);
49 |   const after = x?.Size;
50 |   return { before, after };
51 | };
52 | 
53 | /**
54 |  * Export the database to a JSON file
55 |  * For large databases, this will use the streaming export API if available
56 |  * Otherwise falls back to the regular export method
57 |  * @param options Optional configuration including progress callback
58 |  *
59 |  * @todo This doesn't do much... probably remove it in favor of streamingExport directly.
60 |  */
61 | export const exportJson = async (options?: {
62 |   onProgress?: (progress: { current: number; total: number }) => void;
63 | }): Promise<{ success: boolean; message?: string }> => {
64 |   try {
65 |     // Use streaming export if available, which will fall back to regular export if needed
66 |     const result = await streamingExport({
67 |       batchSize: 200,
68 |       onProgress: options?.onProgress,
69 |     });
70 | 
71 |     return result;
72 |   } catch (error) {
73 |     console.error("Error exporting JSON:", error);
74 |     return { success: false, message: "Error exporting file. Please try again." };
75 |   }
76 | };
77 | 


--------------------------------------------------------------------------------
/src/ui/lib/constants.ts:
--------------------------------------------------------------------------------
1 | /** the minimum char length of a query */
2 | export const MIN_QUERY_LENGTH = 3;
3 | 
4 | export const routeLabels: Record<string, string> = {
5 |   index: "Search",
6 |   "database-repl": "SQL",
7 |   settings: "Settings",
8 | };
9 | 


--------------------------------------------------------------------------------
/src/ui/lib/dom.test.ts:
--------------------------------------------------------------------------------
 1 | import { expect, test } from "bun:test";
 2 | import { findRanges } from "./dom";
 3 | 
 4 | test("findRanges", () => {
 5 |   const testCases = [
 6 |     { input: "hello world", search: "hello", expected: [[0, 5]] },
 7 |     { input: "hey there you", search: "you", expected: [[10, 13]] },
 8 |     { input: "hey there you", search: "there", expected: [[4, 9]] },
 9 |     {
10 |       input: "recently trying sqlite in the browser using a vfs",
11 |       search: "sqlite",
12 |       expected: [[16, 22]],
13 |     },
14 |     {
15 |       input: "recently trying sqlite in the browser using a vfs",
16 |       search: "browser",
17 |       expected: [[30, 37]],
18 |     },
19 |     {
20 |       input: "recently trying sqlite in the browser using a vfs",
21 |       search: "sqlite browser",
22 |       expected: [
23 |         [16, 22],
24 |         [30, 37],
25 |       ],
26 |     },
27 |   ];
28 | 
29 |   for (const { input, search, expected } of testCases) {
30 |     const ranges = findRanges(input, search);
31 |     expect(ranges).toEqual(expected as [number, number][]);
32 |   }
33 | });
34 | 


--------------------------------------------------------------------------------
/src/ui/lib/dom.ts:
--------------------------------------------------------------------------------
 1 | import { MIN_QUERY_LENGTH } from "./constants";
 2 | 
 3 | export const findRanges = (str: string, query: string) => {
 4 |   const ranges: [number, number][] = [];
 5 |   const s = str.toLowerCase();
 6 |   const queries = query
 7 |     .toLowerCase()
 8 |     .split(" ")
 9 |     .map((x) => x.trim())
10 |     .filter((x) => x.length >= MIN_QUERY_LENGTH);
11 | 
12 |   for (const q of queries) {
13 |     let i = 0;
14 |     while (i < s.length) {
15 |       const idx = s.indexOf(q, i);
16 |       if (idx === -1) {
17 |         break;
18 |       }
19 |       ranges.push([idx, idx + q.length]);
20 |       i = idx + q.length;
21 |     }
22 |   }
23 |   return ranges;
24 | };
25 | 
26 | /**
27 |  * Highlight functionality is not used as of this commit. It was used when
28 |  * sqlite fts wasn't available to provide highlighting via the SNIPPET function.
29 |  * Since then we've moved to pg fts and can use the pg_headline function,
30 |  * however it still maybe useful to have highlighting for other functionality.
31 |  */
32 | export const makeHighlights = (nodes: Node[], query: string) => {
33 |   const rs: Range[] = [];
34 |   for (const node of nodes) {
35 |     if (node.nodeType !== Node.TEXT_NODE) {
36 |       console.warn("Tried to highlight non-text node", node);
37 |       continue;
38 |     }
39 | 
40 |     const text = node.textContent || "";
41 |     const xs = findRanges(text, query);
42 |     for (const [qstart, qend] of xs) {
43 |       const r = new Range();
44 |       r.setStart(node, qstart);
45 |       r.setEnd(node, qend);
46 |       rs.push(r);
47 |     }
48 |   }
49 | 
50 |   return new Highlight(...rs);
51 | };
52 | 
53 | export const readFileAsText = (file: File): Promise<string> => {
54 |   return new Promise((resolve, reject) => {
55 |     const reader = new FileReader();
56 |     reader.onload = (e) => {
57 |       if (typeof e.target?.result === "string") {
58 |         resolve(e.target.result);
59 |       } else {
60 |         reject(new Error("Failed to read file as text"));
61 |       }
62 |     };
63 |     reader.onerror = (e) => reject(e);
64 |     reader.readAsText(file);
65 |   });
66 | };
67 | 
68 | export const pickFile = (accept?: string): Promise<File> => {
69 |   return new Promise((resolve, reject) => {
70 |     const input = document.createElement("input");
71 |     input.type = "file";
72 |     if (accept) input.accept = accept;
73 |     input.style.display = "none";
74 |     document.body.appendChild(input);
75 | 
76 |     input.onchange = (event) => {
77 |       const file = (event.target as HTMLInputElement).files?.[0];
78 |       if (file) {
79 |         resolve(file);
80 |       } else {
81 |         reject(new Error("No file selected"));
82 |       }
83 |       document.body.removeChild(input);
84 |     };
85 | 
86 |     input.click();
87 |   });
88 | };
89 | 


--------------------------------------------------------------------------------
/src/ui/lib/rpc.ts:
--------------------------------------------------------------------------------
 1 | import type { RpcMessage } from "@/background/backend";
 2 | import type { FTTF } from "@/background";
 3 | 
 4 | export type BrowserFTTF = FTTF & {
 5 |   rpc: (message: RpcMessage) => Promise<any>;
 6 | };
 7 | 
 8 | /**
 9 |  * NOTE: when the backend wants to send an error the frontend will know about it
10 |  * passes the error prop. We re-throw it to match async/await error expectations.
11 |  */
12 | export const rpc = async <T = any>(message: RpcMessage): Promise<T> => {
13 |   const response = await chrome.runtime.sendMessage(message);
14 |   if (response && response.error) {
15 |     throw new Error(response.error);
16 |   }
17 |   return response;
18 | };
19 | 
20 | export const fttf: BrowserFTTF = {
21 |   rpc,
22 |   adapter: {
23 |     onInstalled: async () => {},
24 |     onMessage: () => true,
25 |     openIndexPage() {
26 |       return chrome.runtime.sendMessage(["openIndexPage"]);
27 |     },
28 |     // @ts-expect-error fixing these types is an exercise for another day...
29 |     backend: {
30 |       getStatus() {
31 |         return chrome.runtime.sendMessage(["getStatus"]);
32 |       },
33 |       search: async (query) => {
34 |         return chrome.runtime.sendMessage(["search", query]);
35 |       },
36 |       getPageStatus: async (url) => {
37 |         return chrome.runtime.sendMessage(["getPageStatus", url]);
38 |       },
39 |       indexPage: async (url) => {
40 |         return chrome.runtime.sendMessage(["indexPage", url]);
41 |       },
42 |       nothingToIndex: async (url) => {
43 |         return chrome.runtime.sendMessage(["nothingToIndex", url]);
44 |       },
45 |       findOne: async (url) => {
46 |         return chrome.runtime.sendMessage(["findOne", url]);
47 |       },
48 |       getRecent: async (options) => {
49 |         return chrome.runtime.sendMessage(["getRecent", options]);
50 |       },
51 |     },
52 |   },
53 | };
54 | 


--------------------------------------------------------------------------------
/src/ui/lib/streaming-export.ts:
--------------------------------------------------------------------------------
  1 | import { rpc } from './rpc';
  2 | 
  3 | // Polyfill for browsers that don't support the File System Access API
  4 | // This is a simplified version; for production, consider using a more robust library
  5 | const getFileSystemAccessPolyfill = () => {
  6 |   return {
  7 |     showSaveFilePicker: async (options: any) => {
  8 |       throw new Error('File System Access API not supported in this browser');
  9 |     }
 10 |   };
 11 | };
 12 | 
 13 | // Check if File System Access API is supported
 14 | const isFileSystemAccessSupported = () => {
 15 |   return typeof window !== 'undefined' && 'showSaveFilePicker' in window;
 16 | };
 17 | 
 18 | interface FileSystemAccessAPI {
 19 |   showSaveFilePicker: (options: any) => Promise<any>;
 20 | }
 21 | 
 22 | // Get the File System Access API or a polyfill
 23 | const getFileSystemAccess = (): FileSystemAccessAPI => {
 24 |   if (isFileSystemAccessSupported()) {
 25 |     // We've already checked that showSaveFilePicker exists on window
 26 |     return window as any as FileSystemAccessAPI;
 27 |   }
 28 |   return getFileSystemAccessPolyfill();
 29 | };
 30 | 
 31 | /**
 32 |  * Stream export documents to a file
 33 |  * @param options Configuration options
 34 |  * @returns Promise that resolves when export is complete
 35 |  */
 36 | export const streamingExport = async (options: {
 37 |   batchSize?: number,
 38 |   onProgress?: (progress: { current: number, total: number }) => void,
 39 | }) => {
 40 |   const { batchSize = 100, onProgress } = options;
 41 |   
 42 |   // Get database stats to estimate workload
 43 |   const stats = await rpc(['getStats']) as {
 44 |     document: { count: number };
 45 |     document_fragment: { count: number };
 46 |     db: { size_bytes: number };
 47 |   };
 48 |   
 49 |   // Update progress with initial values
 50 |   if (onProgress) {
 51 |     onProgress({ current: 0, total: stats.document.count });
 52 |   }
 53 |   
 54 |   // Check if File System Access API is supported
 55 |   if (!isFileSystemAccessSupported()) {
 56 |     console.log('File System Access API not supported, falling back to regular export');
 57 |     // Fall back to the existing exportJson function
 58 |     return rpc(['exportJson']);
 59 |   }
 60 |   
 61 |   try {
 62 |     // Use File System Access API to get a file handle
 63 |     const fileHandle = await getFileSystemAccess().showSaveFilePicker({
 64 |       suggestedName: `fttf-${Date.now()}.json`,
 65 |       types: [{ 
 66 |         description: 'JSON Files',
 67 |         accept: { 'application/json': ['.json'] } 
 68 |       }],
 69 |       excludeAcceptAllOption: false,
 70 |     });
 71 |     
 72 |     // Create a writable stream
 73 |     const writableStream = await fileHandle.createWritable();
 74 |     
 75 |     // Write the opening of the JSON
 76 |     const encoder = new TextEncoder();
 77 |     await writableStream.write(encoder.encode('{"document":['));
 78 |     
 79 |     // Get total count for progress reporting
 80 |     const totalDocs = stats.document.count;
 81 |     let processedCount = 0;
 82 |     let isFirstBatch = true;
 83 |     
 84 |     // Process in batches
 85 |     while (processedCount < totalDocs) {
 86 |       // Fetch a batch of documents
 87 |       const batch = await rpc(['getDocumentBatch', { 
 88 |         offset: processedCount, 
 89 |         limit: batchSize 
 90 |       }]);
 91 |       
 92 |       if (!batch || !batch.rows || batch.rows.length === 0) {
 93 |         break;
 94 |       }
 95 |       
 96 |       // Convert batch to JSON string
 97 |       let batchJson = '';
 98 |       
 99 |       for (let i = 0; i < batch.rows.length; i++) {
100 |         // Add comma separator between batches, but not before the first one
101 |         if (i === 0 && !isFirstBatch) {
102 |           batchJson += ',';
103 |         } else if (i > 0) {
104 |           batchJson += ',';
105 |         }
106 |         
107 |         batchJson += JSON.stringify(batch.rows[i]);
108 |       }
109 |       
110 |       // Write batch to file
111 |       await writableStream.write(encoder.encode(batchJson));
112 |       
113 |       // Update progress
114 |       processedCount += batch.rows.length;
115 |       isFirstBatch = false;
116 |       
117 |       if (onProgress) {
118 |         onProgress({ current: processedCount, total: totalDocs });
119 |       }
120 |     }
121 |     
122 |     // Write the closing of the JSON
123 |     await writableStream.write(encoder.encode(']}'));
124 |     
125 |     // Close the stream
126 |     await writableStream.close();
127 |     
128 |     return { success: true };
129 |   } catch (error) {
130 |     console.error('Error during streaming export:', error);
131 |     
132 |     // If there was an error with the File System API, fall back to the regular export
133 |     if (error.name === 'NotSupportedError' || error.message.includes('not supported')) {
134 |       console.log('Falling back to regular export method');
135 |       return rpc(['exportJson']);
136 |     }
137 |     
138 |     return { 
139 |       success: false, 
140 |       message: error instanceof Error ? error.message : 'Unknown error during export' 
141 |     };
142 |   }
143 | };


--------------------------------------------------------------------------------
/src/ui/main.ts:
--------------------------------------------------------------------------------
 1 | import "./global.css";
 2 | import App from "./App.svelte";
 3 | 
 4 | // Connect to the background page to signal the extension is open
 5 | // This is used to trigger automatic migration if needed
 6 | const port = chrome.runtime.connect({ name: "extension-page" });
 7 | 
 8 | const app = new App({
 9 |   target: document.getElementById("app")!,
10 | });
11 | 
12 | export default app;
13 | 


--------------------------------------------------------------------------------
/src/ui/pages/index.html/_layout.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |   import { Cog } from "lucide-svelte";
  3 |   import { menuOpen } from "@/ui/store/menuState";
  4 |   import Menu from "@/ui/Menu.svelte";
  5 |   import { onMount, tick } from "svelte";
  6 |   import { fttf, rpc } from "@/ui/lib/rpc";
  7 |   import { navigationRoutes } from "@/ui/routes";
  8 | 
  9 |   onMount(() => {
 10 |     if (typeof window !== "undefined") {
 11 |       (window as any).fttf = fttf;
 12 |     }
 13 |   });
 14 | 
 15 |   onMount(() => {
 16 |     // Experiment to see if pinging the db can help resolve disconnected errors.
 17 |     const interval = setInterval(async () => {
 18 |       const result = await rpc<{ rows: { current_date: string }[] }>([
 19 |         "pg.query",
 20 |         {
 21 |           sql: "SELECT current_date;",
 22 |           params: [],
 23 |         },
 24 |       ]);
 25 |       console.debug("ping pg ::", result.rows[0]?.current_date);
 26 |     }, 5000);
 27 | 
 28 |     return () => clearInterval(interval);
 29 |   });
 30 | 
 31 |   const handleCmdK = () => {
 32 |     $menuOpen = !$menuOpen;
 33 |     if ($menuOpen) {
 34 |       tick().then(() => {
 35 |         document.querySelector<HTMLInputElement>("input[data-menu-input]")?.focus();
 36 |       });
 37 |     }
 38 |   };
 39 | </script>
 40 | 
 41 | <svelte:window
 42 |   on:keydown={(e) => {
 43 |     if (e.key === "k" && (e.ctrlKey || e.metaKey)) {
 44 |       e.preventDefault();
 45 |       handleCmdK();
 46 |     }
 47 |   }}
 48 | />
 49 | 
 50 | <div class={"App h-screen"}>
 51 |   <header>
 52 |     <!-- Intentionally empty for now. a placeholder -->
 53 |   </header>
 54 |   <slot />
 55 | </div>
 56 | 
 57 | {#if $menuOpen}
 58 |   <Menu
 59 |     onClose={() => {
 60 |       $menuOpen = false;
 61 |     }}
 62 |   />
 63 | {/if}
 64 | 
 65 | <div class="LowerMenu">
 66 |   <nav class="flex space-x-2 overflow-x-auto rounded bg-zinc-900 border border-zinc-600 text-white">
 67 |     {#each navigationRoutes as route}
 68 |       <a
 69 |         href={"#" + route.path}
 70 |         class="flex-shrink-0 px-4 py-2 hover:bg-zinc-800 whitespace-nowrap items-center flex"
 71 |       >
 72 |         {route.label}
 73 |       </a>
 74 |     {/each}
 75 |   </nav>
 76 |   <button
 77 |     class="flex-shrink-0 p-2 rounded bg-zinc-900 border border-zinc-600 w-11 h-11 hover:bg-zinc-800"
 78 |     on:click={handleCmdK}
 79 |   >
 80 |     <Cog />
 81 |   </button>
 82 | </div>
 83 | 
 84 | <style>
 85 |   /* Hide scrollbar for Chrome, Safari and Opera */
 86 |   nav::-webkit-scrollbar {
 87 |     display: none;
 88 |   }
 89 | 
 90 |   /* Hide scrollbar for IE, Edge and Firefox */
 91 |   nav {
 92 |     -ms-overflow-style: none; /* IE and Edge */
 93 |     scrollbar-width: none; /* Firefox */
 94 |   }
 95 |   .LowerMenu {
 96 |     @apply fixed bottom-0 pb-4 left-4 z-10 max-w-[calc(100%-2rem)] w-full;
 97 |     display: grid;
 98 |     grid-template-columns: 1fr auto;
 99 |     gap: 0.5rem;
100 |     background-color: #0f172a;
101 |   }
102 | </style>
103 | 


--------------------------------------------------------------------------------
/src/ui/pages/index.html/database-repl.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import { onMount } from "svelte";
 3 |   import type { QueryOptions, Results } from "@electric-sql/pglite";
 4 |   import { PGlite } from "@electric-sql/pglite";
 5 |   import { rpc } from "@/ui/lib/rpc";
 6 | 
 7 |   let replElement: HTMLElement & { pg: PGlite };
 8 |   let showInstructions: boolean;
 9 | 
10 |   onMount(async () => {
11 |     showInstructions = localStorage.getItem("showInstructions") !== "false";
12 | 
13 |     // @ts-expect-error Types are wrong for pglite for some reason
14 |     await import("@electric-sql/pglite-repl/webcomponent");
15 | 
16 |     const pg = new PGliteServiceWorkerProxy();
17 |     const pgproxy = new Proxy(pg, {
18 |       get: (target, prop: string | symbol, receiver) => {
19 |         console.log(`pg :: property access :: ${String(prop)}`);
20 |         if (!Reflect.has(target, prop)) {
21 |           console.warn(`pg :: property access :: ${String(prop)} is not supported`);
22 |         } else {
23 |           return Reflect.get(target, prop, receiver);
24 |         }
25 |       },
26 |     });
27 | 
28 |     if (replElement) {
29 |       // @ts-expect-error
30 |       replElement.pg = pgproxy;
31 |     }
32 |   });
33 | 
34 |   function toggleInstructions() {
35 |     showInstructions = !showInstructions;
36 |     localStorage.setItem("showInstructions", showInstructions.toString());
37 |   }
38 | 
39 |   class PGliteServiceWorkerProxy implements Pick<PGlite, "query" | "exec" | "waitReady"> {
40 |     async query<T>(query: string, params?: any[], options?: QueryOptions): Promise<Results<T>> {
41 |       const result = await rpc(["pg.query", { sql: query, params, options }]);
42 |       return result as Results<T>;
43 |     }
44 | 
45 |     async exec(query: string, options?: QueryOptions): Promise<Array<Results>> {
46 |       const result = await rpc(["pg.exec", { sql: query, options }]);
47 |       return result as Array<Results>;
48 |     }
49 | 
50 |     waitReady = Promise.resolve();
51 |   }
52 | </script>
53 | 
54 | <div class="flex flex-col p-4 h-[calc(100%-70px)]">
55 |   <button
56 |     on:click={toggleInstructions}
57 |     class="self-end mb-2 px-3 py-1 bg-gray-700 rounded-md text-sm"
58 |   >
59 |     {showInstructions ? "Hide" : "Show"} Instructions
60 |   </button>
61 | 
62 |   {#if showInstructions}
63 |     <div class="Instructions prose prose-invert mb-8">
64 |       <h2>SQL Playground</h2>
65 |       <p>
66 |         You can directly access the browsing history database here. If you're not familiar with SQL,
67 |         ChatGPT can likely help you.
68 |       </p>
69 |       <p>Here's an example query that shows the 10 most recently visited documents:</p>
70 |       <pre>SELECT * FROM document order by last_visit desc limit 10;</pre>
71 |     </div>
72 |   {/if}
73 | 
74 |   <pglite-repl bind:this={replElement}></pglite-repl>
75 | </div>
76 | 
77 | <style>
78 |   :global(pglite-repl) {
79 |     display: block;
80 |     width: 100%;
81 |     max-height: 100%;
82 |     min-height: 200px;
83 |     border: 1px solid #34333e;
84 |     @apply rounded-lg overflow-hidden;
85 |   }
86 | </style>
87 | 


--------------------------------------------------------------------------------
/src/ui/pages/index.html/dev.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |   // Configuration options
  3 |   const FILE_SIZE_MB = 200; // File size in MB
  4 |   const CHUNK_SIZE_KB = 64; // Chunk size in KB
  5 |   const FILE_SIZE_BYTES = FILE_SIZE_MB * 1024 * 1024;
  6 |   const CHUNK_SIZE_BYTES = CHUNK_SIZE_KB * 1024;
  7 | 
  8 |   async function saveTextFile() {
  9 |     try {
 10 |       // @ts-ignore
 11 |       const fileHandle = await window.showSaveFilePicker({
 12 |         _preferPolyfill: false,
 13 |         suggestedName: "Untitled.txt",
 14 |         types: [{ accept: { "text/plain": [".txt"] } }],
 15 |         excludeAcceptAllOption: false,
 16 |       });
 17 | 
 18 |       // Look at what extension they chosen
 19 |       const extensionChosen = fileHandle.name.split(".").pop();
 20 | 
 21 |       const textContent = `Hello! This is a text file saved with extension .${extensionChosen}`;
 22 |       const encoder = new TextEncoder();
 23 |       const blob = new Blob([encoder.encode(textContent)], {
 24 |         type: "text/plain",
 25 |       });
 26 | 
 27 |       // Using writer methods
 28 |       const writer = await fileHandle.createWritable();
 29 |       await writer.write(blob);
 30 |       await writer.close();
 31 |     } catch (error) {
 32 |       console.error("Error picking or saving file:", error);
 33 |     }
 34 |   }
 35 | 
 36 |   async function saveLargeFile() {
 37 |     try {
 38 |       // @ts-ignore
 39 |       const fileHandle = await window.showSaveFilePicker({
 40 |         _preferPolyfill: false,
 41 |         suggestedName: `large-file-${FILE_SIZE_MB}MB.bin`,
 42 |         types: [{ accept: { "application/octet-stream": [".bin"] } }],
 43 |         excludeAcceptAllOption: false,
 44 |       });
 45 | 
 46 |       // Create a writable stream
 47 |       const writableStream = await fileHandle.createWritable();
 48 | 
 49 |       // Create a TransformStream to generate data
 50 |       const { readable, writable } = new TransformStream();
 51 | 
 52 |       // Start the stream piping
 53 |       readable.pipeTo(writableStream).catch((error) => {
 54 |         console.error("Streaming error:", error);
 55 |       });
 56 | 
 57 |       // Write data in chunks to avoid memory issues
 58 |       const writer = writable.getWriter();
 59 |       const numChunks = Math.ceil(FILE_SIZE_BYTES / CHUNK_SIZE_BYTES);
 60 |       let bytesWritten = 0;
 61 | 
 62 |       // Generate dummy data in chunks
 63 |       for (let i = 0; i < numChunks; i++) {
 64 |         // Create a chunk of dummy data
 65 |         const currentChunkSize =
 66 |           i < numChunks - 1 ? CHUNK_SIZE_BYTES : FILE_SIZE_BYTES - bytesWritten;
 67 | 
 68 |         const chunk = new Uint8Array(currentChunkSize);
 69 | 
 70 |         // Fill with random-ish data (pattern based on chunk number)
 71 |         for (let j = 0; j < chunk.length; j++) {
 72 |           chunk[j] = (i + j) % 256;
 73 |         }
 74 | 
 75 |         // Write the chunk and flush
 76 |         await writer.write(chunk);
 77 | 
 78 |         // Update bytes written
 79 |         bytesWritten += currentChunkSize;
 80 |       }
 81 | 
 82 |       // Close the writer to signal we're done
 83 |       await writer.close();
 84 | 
 85 |       console.log(`${FILE_SIZE_MB}MB file saved successfully!`);
 86 |     } catch (error) {
 87 |       console.error("Error saving large file:", error);
 88 |     }
 89 |   }
 90 | </script>
 91 | 
 92 | <div class="p-4">
 93 |   <h1 class="text-2xl font-bold mb-4">Development Page</h1>
 94 |   <p class="mb-4">This is a blank development page. Content will be added later.</p>
 95 | 
 96 |   <div class="flex space-x-4 mb-6">
 97 |     <button
 98 |       on:click={saveTextFile}
 99 |       class="bg-blue-500 hover:bg-blue-600 text-white font-medium py-2 px-4 rounded"
100 |     >
101 |       Pick File
102 |     </button>
103 | 
104 |     <button
105 |       on:click={saveLargeFile}
106 |       class="bg-green-500 hover:bg-green-600 text-white font-medium py-2 px-4 rounded"
107 |     >
108 |       Save {FILE_SIZE_MB}MB File
109 |     </button>
110 |   </div>
111 | 
112 |   <div class="mt-4">
113 |     <p class="text-sm text-gray-600">
114 |       Open browser console (F12) to see log messages during file operations.
115 |     </p>
116 |   </div>
117 | </div>
118 | 


--------------------------------------------------------------------------------
/src/ui/pages/index.html/doc/[url].svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |   import type { DetailRow, ResultRow } from "@/background/backend";
  3 |   import { fttf } from "@/ui/lib/rpc";
  4 |   import SvelteMarkdown from "svelte-markdown";
  5 |   import { onMount } from "svelte";
  6 |   import { location } from "svelte-spa-router";
  7 | 
  8 |   let row: (DetailRow & Partial<ResultRow>) | null = null;
  9 |   let err: Error | null = null;
 10 |   let showRawContent = false;
 11 |   
 12 |   // Get the URL parameter from the location
 13 |   export let params = {};
 14 | 
 15 |   const fetchRow = async (url: string) => {
 16 |     try {
 17 |       row = await fttf.adapter.backend.findOne({ where: { url } });
 18 |     } catch (_err) {
 19 |       err = _err;
 20 |     }
 21 |   };
 22 | 
 23 |   onMount(() => {
 24 |     if (params.url) {
 25 |       const decodedUrl = decodeURIComponent(params.url);
 26 |       fetchRow(decodedUrl);
 27 |     }
 28 |   });
 29 | 
 30 |   const formatDate = (timestamp: number) => {
 31 |     return new Date(timestamp).toLocaleString();
 32 |   };
 33 | </script>
 34 | 
 35 | <div class="!max-w-[920px] prose prose-invert p-4 pb-20">
 36 |   {#if row}
 37 |     <section class="bg-zinc-900 rounded-lg border border-zinc-700 p-4 mb-6">
 38 |       <div class="Controls flex flex-wrap gap-4 mb-4">
 39 |         <a
 40 |           class="px-3 py-2 rounded border border-white text-white no-underline"
 41 |           on:click={() => {
 42 |             window.history.back();
 43 |           }}
 44 |         >
 45 |           Back to Search
 46 |         </a>
 47 |         <a
 48 |           href={row.url}
 49 |           target="_blank"
 50 |           rel="noopener noreferrer"
 51 |           class="px-3 py-2 rounded border border-white text-white no-underline"
 52 |         >
 53 |           Open URL
 54 |         </a>
 55 |         <div class="flex gap-4 mr-0 ml-auto">
 56 |           <button
 57 |             on:click={() => (showRawContent = !showRawContent)}
 58 |             class="px-3 py-2 rounded border border-white text-white"
 59 |           >
 60 |             {showRawContent ? "Show Formatted" : "Show Raw"}
 61 |           </button>
 62 |         </div>
 63 |       </div>
 64 |       <div class="Metadata text-sm text-zinc-400">
 65 |         <p>Last visited: {row.last_visit ? formatDate(row.last_visit) : "N/A"}</p>
 66 |         <p>Created: {formatDate(row.created_at)}</p>
 67 |         <p>Updated: {formatDate(row.updated_at)}</p>
 68 |       </div>
 69 |     </section>
 70 | 
 71 |     <section class="bg-zinc-900 rounded-lg border border-zinc-700 p-4">
 72 |       <h1 class="text-3xl mt-0">
 73 |         <span class="block">{row.title}</span>
 74 |         <a href={row.url} target="_blank" rel="noopener noreferrer" class="block text-sm italic"
 75 |           >{row.url}</a
 76 |         >
 77 |       </h1>
 78 |       {#if showRawContent}
 79 |         <pre class="whitespace-pre-wrap">{row.md_content}</pre>
 80 |       {:else}
 81 |         <SvelteMarkdown source={row.md_content} />
 82 |       {/if}
 83 |     </section>
 84 |   {:else if err}
 85 |     <section class="Error bg-red-900 text-red-100 p-4 rounded-lg">
 86 |       {err.message}
 87 |     </section>
 88 |   {:else}
 89 |     <section class="Loading bg-zinc-900 text-zinc-300 p-4 rounded-lg">Loading...</section>
 90 |   {/if}
 91 | </div>
 92 | 
 93 | <style>
 94 |   .prose :global(pre) {
 95 |     background-color: #1e1e1e;
 96 |     padding: 1rem;
 97 |     border-radius: 0.5rem;
 98 |     overflow-x: auto;
 99 |   }
100 |   .Metadata p:last-child {
101 |     margin-bottom: 0;
102 |   }
103 |   section {
104 |     @apply p-6 md:p-12;
105 |   }
106 | </style>
107 | 


--------------------------------------------------------------------------------
/src/ui/pages/index.html/index.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |   import type { ResultRow } from "@/background/backend";
  3 |   import { fly } from "svelte/transition";
  4 |   import { debounce, getFaviconByUrl, cleanUrl } from "@/common/utils";
  5 |   import { onMount, tick } from "svelte";
  6 |   import classNames from "classnames";
  7 |   import { fttf, rpc } from "@/ui/lib/rpc";
  8 |   import ResultRowView from "@/ui/ResultRowView.svelte";
  9 |   import ResultItem from "@/ui/ResultItem.svelte";
 10 |   import RecentItems from "@/ui/RecentItems.svelte";
 11 |   import { MIN_QUERY_LENGTH } from "@/ui/lib/constants";
 12 |   import { displaySettings } from "@/ui/store/displaySettings";
 13 |   import { stats, updateStats } from "@/ui/store/statsStore";
 14 |   import { push, querystring } from "svelte-spa-router";
 15 |   import { get } from "svelte/store";
 16 |   import MigrationModal from "@/ui/MigrationModal.svelte";
 17 | 
 18 |   let q = "";
 19 |   let res: Awaited<ReturnType<typeof fttf.adapter.backend.search>> | null = null;
 20 |   let results: ResultRow[] | undefined;
 21 |   let currentIndex = 0;
 22 |   let enableMouseEvents = false;
 23 |   let showMigrationModal = false;
 24 | 
 25 |   // Parse query parameters
 26 |   const getParams = () => {
 27 |     const searchParams = new URLSearchParams(get(querystring));
 28 |     return {
 29 |       q: searchParams.get("q") || "",
 30 |     };
 31 |   };
 32 | 
 33 |   let params = getParams();
 34 | 
 35 |   // Update params when querystring changes
 36 |   $: $querystring, (params = getParams());
 37 | 
 38 |   const handleMouseMove = (e: MouseEvent) => {
 39 |     if (!enableMouseEvents) enableMouseEvents = true;
 40 |   };
 41 | 
 42 |   $: preprocessQuery = $displaySettings.preprocessQuery;
 43 | 
 44 |   const updateUrlWithQuery = (query: string) => {
 45 |     if (params.q !== query) {
 46 |       const searchParams = new URLSearchParams();
 47 |       searchParams.set("q", query);
 48 |       push(`/?${searchParams.toString()}`);
 49 |     }
 50 |   };
 51 | 
 52 |   const handleSearch = debounce(async (query: string) => {
 53 |     query = query.trim();
 54 |     if (query.length >= MIN_QUERY_LENGTH) {
 55 |       res = await fttf.adapter.backend.search({
 56 |         query,
 57 |         limit: 500,
 58 |         orderBy: $displaySettings.sortMode,
 59 |         preprocessQuery,
 60 |       });
 61 |       currentIndex = 0;
 62 |       console.log("[search-results]", res);
 63 |       updateUrlWithQuery(query);
 64 |     } else {
 65 |       res = null;
 66 |       updateUrlWithQuery("");
 67 |     }
 68 |   }, 120);
 69 | 
 70 |   // Re-search if the preprocessQuery setting changes
 71 |   $: {
 72 |     preprocessQuery;
 73 |     handleSearch(q);
 74 |   }
 75 | 
 76 |   // Using getFaviconByUrl from utils.ts
 77 | 
 78 |   const scrollIntoView = (i: number) => {
 79 |     const el = document.querySelector<HTMLDivElement>(`[data-groupIndex='${i}']`);
 80 |     if (el /* && ((el.offsetTop + el.offsetHeight) > window.innerHeight) */) {
 81 |       if (i === urls.length - 1) {
 82 |         // scroll to bottom
 83 |         el.parentElement?.scrollTo({ top: el.offsetTop, behavior: "smooth" });
 84 |       } else {
 85 |         el.scrollIntoView({ block: "nearest", behavior: "smooth" });
 86 |       }
 87 |     }
 88 |   };
 89 | 
 90 |   let input: HTMLInputElement | null = null;
 91 | 
 92 |   const keybinds: Record<string, (e: KeyboardEvent) => void> = {
 93 |     "/": (e) => {
 94 |       if (document.activeElement !== input) {
 95 |         e.preventDefault();
 96 |         input?.select();
 97 |       }
 98 |     },
 99 |     Escape: (e) => {
100 |       e.preventDefault();
101 |       q = "";
102 |       tick().then(() => {
103 |         input?.focus();
104 |       });
105 |     },
106 |     ArrowUp: (e) => {
107 |       e.preventDefault();
108 |       if (currentIndex > 0) {
109 |         currentIndex--;
110 |         scrollIntoView(currentIndex);
111 |       }
112 |     },
113 |     ArrowDown: (e) => {
114 |       e.preventDefault();
115 |       const len = Object.keys(groups || {}).length;
116 |       if (currentIndex < len - 1) {
117 |         currentIndex++;
118 |         scrollIntoView(currentIndex);
119 |       }
120 |     },
121 |     Enter: (e) => {
122 |       e.preventDefault();
123 |       if (currentUrl && e.metaKey) {
124 |         const encodedUrl = encodeURIComponent(currentUrl);
125 |         push(`/doc/${encodedUrl}`);
126 |       } else {
127 |         // open a new tab
128 |         window.open(currentUrl, "_blank");
129 |       }
130 |     },
131 |   };
132 | 
133 |   let loading = true;
134 |   let error: string | null = null;
135 |   let errorDetail: any = null;
136 | 
137 |   onMount(async () => {
138 |     await tick();
139 |     input?.focus();
140 | 
141 |     const initialQuery = params.q;
142 | 
143 |     if (initialQuery) {
144 |       q = initialQuery;
145 |       handleSearch(initialQuery);
146 |     }
147 | 
148 |     let status = await fttf.adapter.backend.getStatus();
149 | 
150 |     // Wait. Sometimes the backend takes a while to start up
151 |     if (!status.ok) {
152 |       for (const wait of [100, 200, 300, 400, 500, 1000, 2000]) {
153 |         await new Promise((resolve) => setTimeout(resolve, wait));
154 |         status = await fttf.adapter.backend.getStatus();
155 |         if (status.ok) {
156 |           break;
157 |         }
158 |       }
159 |     }
160 | 
161 |     // If still not OK assume it's an error
162 |     if (!status.ok) {
163 |       error = status.error;
164 |       errorDetail = status.detail;
165 |     } else {
166 |       loading = false;
167 |       await updateStats();
168 | 
169 |       // Check for VLCN migration
170 |       try {
171 |         await new Promise((resolve) => setTimeout(resolve, 1000));
172 |         const migrationStatus = await rpc(["checkVLCNMigrationStatus"]);
173 |         if (
174 |           migrationStatus?.available &&
175 |           !migrationStatus?.migrated &&
176 |           migrationStatus?.documentCount > 0
177 |         ) {
178 |           showMigrationModal = true;
179 |         }
180 |       } catch (error) {
181 |         console.error("Error checking VLCN migration status", error);
182 |       }
183 |     }
184 |   });
185 | 
186 |   const groupByUrl = (results?: ResultRow[]) => {
187 |     if (!results) {
188 |       return;
189 |     }
190 | 
191 |     const hitsByUrl: Record<string, Set<string>> = {};
192 | 
193 |     const urlCanParse = (url: string) => {
194 |       try {
195 |         new URL(url);
196 |         return true;
197 |       } catch (error) {
198 |         return false;
199 |       }
200 |     };
201 | 
202 |     return results.reduce(
203 |       (acc, x) => {
204 |         const key = x.url;
205 |         hitsByUrl[key] ??= new Set();
206 | 
207 |         // This SHOULD NOT happen, but it was a bug at one point thus the guard
208 |         if (!urlCanParse(key)) {
209 |           console.warn("groupByUrl :: invalid URL ::", key);
210 |           return acc;
211 |         }
212 | 
213 |         acc[key] ??= {
214 |           id: x.rowid,
215 |           url: x.url, // @note this should be href-able
216 |           displayUrl: cleanUrl(x.url),
217 |           title: x.title,
218 |           hostname: x.hostname,
219 |           last_visit: x.last_visit,
220 |           hits: [],
221 |         };
222 | 
223 |         // Update last_visit if current item has a more recent timestamp
224 |         if (x.last_visit && (!acc[key].last_visit || x.last_visit > acc[key].last_visit)) {
225 |           // @ts-ignore
226 |           acc[key].last_visit = x.last_visit;
227 |         }
228 | 
229 |         if (x.attribute === "title") {
230 |           acc[key].title = x.snippet;
231 |         } else if (x.attribute === "url") {
232 |           acc[key].displayUrl = cleanUrl(x.snippet as string);
233 |         } else if (x.snippet && !hitsByUrl[key].has(x.snippet)) {
234 |           hitsByUrl[key].add(x.snippet);
235 |           acc[key].hits.push(x);
236 |         }
237 | 
238 |         return acc;
239 |       },
240 |       {} as Record<
241 |         string,
242 |         {
243 |           id: number;
244 |           url: string;
245 |           displayUrl?: string;
246 |           title?: string;
247 |           hostname: string;
248 |           last_visit?: number;
249 |           hits: ResultRow[];
250 |         }
251 |       >
252 |     );
253 |   };
254 | 
255 |   $: {
256 |     q;
257 |     $displaySettings.sortMode;
258 |     handleSearch(q);
259 |   }
260 |   $: results = res?.results;
261 |   $: groups = groupByUrl(results);
262 | 
263 |   // Always group URLs by date regardless of sort mode
264 |   $: dateGroupedResults = groupByDate(groups || {});
265 | 
266 |   $: urls = Object.values(dateGroupedResults).flatMap((group) => Object.keys(group));
267 | 
268 |   $: currentUrl = urls.at(currentIndex);
269 | 
270 |   // Function to group results by date when sorting by last_visit
271 |   const groupByDate = (urlGroups: Record<string, any>) => {
272 |     const groupedByDate: Record<string, Record<string, any>> = {};
273 | 
274 |     Object.entries(urlGroups).forEach(([url, group]) => {
275 |       if (!group.last_visit) {
276 |         groupedByDate["Unknown"] = groupedByDate["Unknown"] || {};
277 |         groupedByDate["Unknown"][url] = group;
278 |         return;
279 |       }
280 | 
281 |       const visitDate = new Date(group.last_visit);
282 |       const today = new Date();
283 | 
284 |       let dateKey;
285 |       if (
286 |         visitDate.getFullYear() === today.getFullYear() &&
287 |         visitDate.getMonth() === today.getMonth() &&
288 |         visitDate.getDate() === today.getDate()
289 |       ) {
290 |         dateKey = "Today";
291 |       } else {
292 |         dateKey = visitDate.toLocaleDateString(undefined, {
293 |           weekday: "long",
294 |           year: "numeric",
295 |           month: "long",
296 |           day: "numeric",
297 |         });
298 |       }
299 | 
300 |       groupedByDate[dateKey] = groupedByDate[dateKey] || {};
301 |       groupedByDate[dateKey][url] = group;
302 |     });
303 | 
304 |     return groupedByDate;
305 |   };
306 | </script>
307 | 
308 | <svelte:window
309 |   on:mousemove={handleMouseMove}
310 |   on:keydown={(e) => {
311 |     const key = e.key;
312 |     if (keybinds[key]) {
313 |       if (enableMouseEvents) enableMouseEvents = false;
314 |       keybinds[key](e);
315 |     }
316 |   }}
317 | />
318 | 
319 | <div class="App">
320 |   <form on:submit|preventDefault class="px-3 md:px-6 pt-3 md:pt-6">
321 |     <input
322 |       class="w-full block px-3 md:px-6 py-3 text-lg font-mono text-white bg-slate-800 focus:ring-2 ring-indigo-300 border-none rounded-lg"
323 |       type="text"
324 |       placeholder="Search.."
325 |       bind:this={input}
326 |       bind:value={q}
327 |     />
328 |   </form>
329 |   <div
330 |     class="stats px-6 md:px-12 py-6 text-sm text-slate-400 flex flex-col sm:flex-row sm:justify-between"
331 |   >
332 |     <div class="InnerStats">
333 |       {#if res}
334 |         Showing {results?.length} of {res.count}. Took
335 |         <code>{Math.round(10 * res.perfMs) / 10}</code>ms.
336 |       {:else if $stats && $displaySettings.showStats}
337 |         <div class="inline-stats flex space-x-4" in:fly|local={{ y: -20, duration: 150 }}>
338 |           {#each Object.entries($stats) as [k, v]}
339 |             <span><strong>{k}:</strong> {v}</span>
340 |           {/each}
341 |         </div>
342 |       {/if}
343 |     </div>
344 | 
345 |     {#if res}
346 |       <div class="flex justify-between items-center mb-2">
347 |         <div class="sort-controls flex items-center">
348 |           <span class="mr-2">Sort by:</span>
349 |           <label class="inline-flex items-center mr-3 cursor-pointer">
350 |             <input
351 |               type="radio"
352 |               name="sortMode"
353 |               value="last_visit"
354 |               bind:group={$displaySettings.sortMode}
355 |               class="form-radio h-4 w-4 text-indigo-600 bg-slate-700 border-slate-500 focus:ring-indigo-500"
356 |             />
357 |             <span class="ml-1">Last Visit</span>
358 |           </label>
359 |           <label class="inline-flex items-center cursor-pointer">
360 |             <input
361 |               type="radio"
362 |               name="sortMode"
363 |               value="rank"
364 |               bind:group={$displaySettings.sortMode}
365 |               class="form-radio h-4 w-4 text-indigo-600 bg-slate-700 border-slate-500 focus:ring-indigo-500"
366 |             />
367 |             <span class="ml-1">Rank</span>
368 |           </label>
369 |         </div>
370 |       </div>
371 |     {/if}
372 |   </div>
373 |   {#if error}
374 |     <div
375 |       class="error px-6 md:px-12 py-6 text-sm text-red-200 bg-red-900/70 m-6 rounded-lg border border-red-600"
376 |     >
377 |       <h3 class="text-3xl">Error: <code>{error}</code></h3>
378 |       <pre>{errorDetail?.stack}</pre>
379 |     </div>
380 |   {/if}
381 |   {#if !res}
382 |     <div class="recent px-6 md:px-12 py-6 max-w-[inherit] overflow-auto">
383 |       <RecentItems limit={500} offset={0} />
384 |     </div>
385 |   {/if}
386 |   <div class="results px-6 md:p-12 md:pt-6 overflow-auto flex flex-col space-y-4">
387 |     {#each Object.entries(dateGroupedResults) as [date, dateGroup], dateIndex (date)}
388 |       {#if Object.keys(dateGroup).length > 0}
389 |         <div class="date-group">
390 |           <div class="text-sm font-medium text-slate-400 mb-2">{date}</div>
391 |           {#each Object.entries(dateGroup) as [url, group], urlIndex (url)}
392 |             <!-- Using ResultItem for the main item display -->
393 |             <ResultItem
394 |               item={{
395 |                 rowid: group.id,
396 |                 id: group.id,
397 |                 entity_id: group.id,
398 |                 attribute: "url",
399 |                 url: group.url,
400 |                 hostname: group.hostname,
401 |                 title: group.title,
402 |                 snippet: group.title,
403 |                 last_visit: group.last_visit,
404 |                 updated_at: 0,
405 |                 created_at: 0,
406 |               }}
407 |               showTime={true}
408 |               showSnippets={false}
409 |               selected={currentUrl === url}
410 |               highlightClass="bg-slate-800"
411 |               groupIndex={urls.indexOf(url)}
412 |               on:focus={() => (currentIndex = urls.indexOf(url))}
413 |               on:mouseover={() => {
414 |                 if (enableMouseEvents) {
415 |                   currentIndex = urls.indexOf(url);
416 |                 }
417 |               }}
418 |             >
419 |               <!-- Render snippets inside the slot -->
420 |               {#each group.hits as hit (hit.rowid)}
421 |                 <ResultRowView item={hit} />
422 |               {/each}
423 |             </ResultItem>
424 |           {/each}
425 |         </div>
426 |       {/if}
427 |     {/each}
428 |   </div>
429 | </div>
430 | 
431 | <MigrationModal open={showMigrationModal} on:close={() => (showMigrationModal = false)} />
432 | 
433 | <style>
434 |   .App {
435 |     display: grid;
436 |     grid-template-columns: 1fr;
437 |     grid-template-rows: auto auto auto minmax(0, 1fr);
438 |   }
439 | </style>
440 | 


--------------------------------------------------------------------------------
/src/ui/pages/index.html/settings.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |   import { handleImport, exportJson } from "@/ui/lib/commands";
  3 |   import { rpc } from "@/ui/lib/rpc";
  4 |   import { onMount } from "svelte";
  5 |   import ExportProgress from "@/ui/ExportProgress.svelte";
  6 |   import MigrationModal from "@/ui/MigrationModal.svelte";
  7 | 
  8 |   type BlacklistRule = {
  9 |     id: number;
 10 |     pattern: string;
 11 |     level: "no_index" | "url_only";
 12 |   };
 13 | 
 14 |   let errorMessage = "";
 15 |   let blacklistRules: BlacklistRule[] = [];
 16 |   let newPattern = "";
 17 |   let newLevel: "no_index" | "url_only" = "no_index";
 18 |   let addRuleError = "";
 19 |   let showMigrationModal = false;
 20 | 
 21 |   // Add this new variable
 22 |   let activeSection = "import-json";
 23 | 
 24 |   // Export progress tracking
 25 |   let isExporting = false;
 26 |   let exportProgress = 0;
 27 |   let exportTotal = 0;
 28 |   let exportErrorMessage = "";
 29 | 
 30 |   // Add this new function
 31 |   const scrollToSection = (sectionId: string) => {
 32 |     activeSection = sectionId;
 33 |     const element = document.getElementById(sectionId);
 34 |     if (element) {
 35 |       element.scrollIntoView({ behavior: "smooth" });
 36 |     }
 37 |   };
 38 | 
 39 |   onMount(async () => {
 40 |     await fetchBlacklistRules();
 41 | 
 42 |     // Check for VLCN migration
 43 |     try {
 44 |       const migrationStatus = await rpc(["checkVLCNMigrationStatus"]);
 45 |       if (
 46 |         migrationStatus?.available &&
 47 |         !migrationStatus?.migrated &&
 48 |         migrationStatus?.documentCount > 0
 49 |       ) {
 50 |         showMigrationModal = true;
 51 |       }
 52 |     } catch (error) {
 53 |       console.error("Error checking VLCN migration status", error);
 54 |     }
 55 |   });
 56 | 
 57 |   const handleFileUpload = async () => {
 58 |     errorMessage = "";
 59 |     const result = await handleImport();
 60 |     if (!result.success && result.message) {
 61 |       errorMessage = result.message;
 62 |     }
 63 |   };
 64 | 
 65 |   const fetchBlacklistRules = async () => {
 66 |     const response = await rpc<{ rows: BlacklistRule[] }>([
 67 |       "pg.query",
 68 |       {
 69 |         sql: "SELECT id, pattern, level FROM blacklist_rule ORDER BY created_at DESC",
 70 |         params: [],
 71 |       },
 72 |     ]);
 73 |     blacklistRules = response.rows;
 74 |   };
 75 | 
 76 |   const deleteBlacklistRule = async (id: number) => {
 77 |     await rpc([
 78 |       "pg.query",
 79 |       {
 80 |         sql: "DELETE FROM blacklist_rule WHERE id = $1",
 81 |         params: [id],
 82 |       },
 83 |     ]);
 84 |     await fetchBlacklistRules();
 85 |   };
 86 | 
 87 |   const addBlacklistRule = async () => {
 88 |     addRuleError = "";
 89 |     if (!newPattern.trim()) {
 90 |       addRuleError = "Pattern is required";
 91 |       return;
 92 |     }
 93 | 
 94 |     if (newPattern.replace(/%/g, "").trim().length === 0) {
 95 |       addRuleError =
 96 |         "Pattern must be more specific. A wildcard-only pattern will match everything, which defeats the purpose of this extension.";
 97 |       return;
 98 |     }
 99 | 
100 |     if (newPattern.trim().includes("*")) {
101 |       addRuleError = "The '*' character is not allowed. Use '%' instead.";
102 |       return;
103 |     }
104 | 
105 |     if (!newPattern.trim().startsWith("http") && !newPattern.trim().startsWith("%")) {
106 |       addRuleError = "Pattern must start with 'http' or be a `%` wildcard.";
107 |       return;
108 |     }
109 | 
110 |     try {
111 |       await rpc(["addBlacklistRule", { pattern: newPattern.trim(), level: newLevel }]);
112 |       newPattern = "";
113 |       newLevel = "no_index";
114 |       await fetchBlacklistRules();
115 |     } catch (error) {
116 |       addRuleError = `Error adding rule: ${error.message}`;
117 |     }
118 |   };
119 | </script>
120 | 
121 | <ExportProgress visible={isExporting} current={exportProgress} total={exportTotal} />
122 | 
123 | <div class="flex flex-col md:flex-row p-4 md:p-12 h-[calc(100%-70px)] overflow-auto">
124 |   <nav class="md:w-1/4 mb-4 md:mb-0 md:mr-8">
125 |     <ul
126 |       class="flex flex-row md:flex-col space-x-2 md:space-x-0 md:space-y-2 overflow-x-auto md:overflow-x-visible whitespace-nowrap"
127 |     >
128 |       <li>
129 |         <button
130 |           class="px-3 py-2 rounded-md text-sm font-medium {activeSection === 'import-json'
131 |             ? 'bg-pink-800 text-white'
132 |             : 'text-gray-300 hover:bg-gray-700'}"
133 |           on:click={() => scrollToSection("import-json")}
134 |         >
135 |           Import JSON
136 |         </button>
137 |       </li>
138 |       <li>
139 |         <button
140 |           class="px-3 py-2 rounded-md text-sm font-medium {activeSection === 'blacklist-rules'
141 |             ? 'bg-pink-800 text-white'
142 |             : 'text-gray-300 hover:bg-gray-700'}"
143 |           on:click={() => scrollToSection("blacklist-rules")}
144 |         >
145 |           Blacklist Rules
146 |         </button>
147 |       </li>
148 |     </ul>
149 |   </nav>
150 | 
151 |   <div class="md:w-3/4 prose dark:prose-invert">
152 |     <h3>Settings</h3>
153 | 
154 |     <section id="import-json">
155 |       <h4>Import/Export JSON Database</h4>
156 |       <p>
157 |         Upload a JSON file to import your database or export your current database to a JSON file.
158 |       </p>
159 |       <div class="mt-4 flex space-x-2">
160 |         <button
161 |           on:click={handleFileUpload}
162 |           class="bg-pink-800 text-white py-2 px-4 rounded hover:bg-pink-900"
163 |         >
164 |           Import JSON
165 |         </button>
166 | 
167 |         <button
168 |           on:click={async () => {
169 |             isExporting = true;
170 |             exportProgress = 0;
171 |             exportTotal = 0;
172 |             exportErrorMessage = "";
173 | 
174 |             try {
175 |               // Get initial stats for progress
176 |               const stats = await rpc(["getStats"]);
177 | 
178 |               // Start with an estimate
179 |               exportTotal = stats.document.count;
180 | 
181 |               // These variables will be updated by the exportJson function
182 |               // through the onProgress callback in streamingExport
183 |               const result = await exportJson({
184 |                 onProgress: (progress) => {
185 |                   // Update the UI progress variables
186 |                   exportProgress = progress.current;
187 |                   exportTotal = progress.total;
188 | 
189 |                   console.log(
190 |                     `Export progress: ${progress.current}/${progress.total} documents (${Math.round(
191 |                       (progress.current / progress.total) * 100
192 |                     )}%)`
193 |                   );
194 |                 },
195 |               });
196 | 
197 |               if (!result.success) {
198 |                 exportErrorMessage = result.message || "Export failed";
199 |               }
200 |             } catch (error) {
201 |               exportErrorMessage = error instanceof Error ? error.message : "Export failed";
202 |             } finally {
203 |               isExporting = false;
204 |             }
205 |           }}
206 |           class="bg-blue-600 text-white py-2 px-4 rounded hover:bg-blue-700"
207 |         >
208 |           Export JSON
209 |         </button>
210 |       </div>
211 | 
212 |       {#if errorMessage}
213 |         <p class="text-red-500 mt-2">{errorMessage}</p>
214 |       {/if}
215 |       {#if exportErrorMessage}
216 |         <p class="text-red-500 mt-2">{exportErrorMessage}</p>
217 |       {/if}
218 |     </section>
219 | 
220 |     <section id="blacklist-rules">
221 |       <h4 class="mt-8">Blacklist Rules</h4>
222 |       <p class="mb-0">
223 |         Manage your blacklist rules. These rules determine which URLs should not be indexed or only
224 |         have their URLs indexed.
225 |       </p>
226 |       <p class="mt-4">
227 |         Note: The <code class="wildcard font-sans">%</code> character is used as a wildcard in these
228 |         rules. It matches any sequence of characters. For example,
229 |         <code>https://example.com/%</code> would match any URL on example.com.
230 |       </p>
231 |       <p class="mt-4">
232 |         You can choose to only index the URL of a page (<code class="wildcard font-sans"
233 |           >url_only</code
234 |         >), or to not index the page at all (<code class="wildcard font-sans">no_index</code>).
235 |       </p>
236 | 
237 |       <!-- Add new blacklist rule form -->
238 |       <div class="mt-4 mb-4">
239 |         <h5 class="mb-2">Add New Blacklist Rule</h5>
240 |         <form on:submit|preventDefault={addBlacklistRule} class="flex flex-col space-y-2">
241 |           <input
242 |             type="text"
243 |             bind:value={newPattern}
244 |             placeholder="Pattern (e.g., example.com/%)"
245 |             class="p-2 border rounded text-black"
246 |           />
247 |           <select bind:value={newLevel} class="p-2 border rounded text-black">
248 |             <option value="no_index">No Index</option>
249 |             <option value="url_only">URL Only</option>
250 |           </select>
251 |           <button type="submit" class="bg-pink-800 text-white py-2 px-4 rounded hover:bg-pink-900">
252 |             Add Rule
253 |           </button>
254 |         </form>
255 |         {#if addRuleError}
256 |           <p class="text-red-500 mt-2">{addRuleError}</p>
257 |         {/if}
258 |       </div>
259 | 
260 |       <table class="table-auto w-full mt-0">
261 |         <thead>
262 |           <tr>
263 |             <th class="px-4 py-2 text-left">Pattern</th>
264 |             <th class="px-4 py-2 text-left">Level</th>
265 |             <th class="px-4 py-2 text-left">Actions</th>
266 |           </tr>
267 |         </thead>
268 |         <tbody>
269 |           {#each blacklistRules as rule}
270 |             <tr class="hover:bg-gray-800">
271 |               <td class="px-2 py-0 font-mono text-xs border-b border-gray-200/20">
272 |                 {#if rule.pattern.includes("%")}
273 |                   {@html rule.pattern.replace(/%/g, '<span class="wildcard">%</span>')}
274 |                 {:else}
275 |                   {rule.pattern}
276 |                 {/if}
277 |               </td>
278 |               <td class="px-2 py-0 border-b border-gray-200/20">{rule.level}</td>
279 |               <td class="px-2 py-0 border-b border-gray-200/20">
280 |                 <button
281 |                   on:click={() => deleteBlacklistRule(rule.id)}
282 |                   class="text-red-500 py-1 px-2 rounded"
283 |                 >
284 |                   Delete
285 |                 </button>
286 |               </td>
287 |             </tr>
288 |           {/each}
289 |         </tbody>
290 |       </table>
291 |     </section>
292 |   </div>
293 | </div>
294 | 
295 | <MigrationModal open={showMigrationModal} on:close={() => (showMigrationModal = false)} />
296 | 
297 | <style>
298 |   :global(.wildcard) {
299 |     @apply text-yellow-400 font-bold font-sans text-sm;
300 |   }
301 | </style>
302 | 


--------------------------------------------------------------------------------
/src/ui/pages/index.html/task-queue.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |   import { onMount, onDestroy } from "svelte";
  3 |   import { rpc } from "@/ui/lib/rpc";
  4 |   import { RotateCcw, Trash } from "lucide-svelte";
  5 | 
  6 |   type Task = {
  7 |     id: number;
  8 |     task_type: string;
  9 |     params: Record<string, any>;
 10 |     created_at: number;
 11 |     failed_at: number | null;
 12 |     error: string | null;
 13 |   };
 14 | 
 15 |   let tasks: Task[] = [];
 16 |   let failedTasks: Task[] = [];
 17 |   let totalTasks = 0;
 18 |   let totalFailedTasks = 0;
 19 |   let error = null;
 20 |   let refreshInterval: Timer;
 21 | 
 22 |   async function fetchTasks() {
 23 |     try {
 24 |       error = null;
 25 | 
 26 |       const [countResult, tasksResult, failedTasksResult] = await Promise.all([
 27 |         rpc(["pg.query", { sql: "SELECT COUNT(*) as count FROM task", params: [] }]),
 28 |         rpc([
 29 |           "pg.query",
 30 |           {
 31 |             sql: `SELECT id, task_type, params, created_at 
 32 |                 FROM task 
 33 |                 WHERE failed_at IS NULL
 34 |                 ORDER BY created_at DESC 
 35 |                 LIMIT $1`,
 36 |             params: [100],
 37 |           },
 38 |         ]),
 39 |         rpc([
 40 |           "pg.query",
 41 |           {
 42 |             sql: `SELECT id, task_type, params, created_at, failed_at, error
 43 |                 FROM task 
 44 |                 WHERE failed_at IS NOT NULL
 45 |                 ORDER BY failed_at DESC 
 46 |                 LIMIT $1`,
 47 |             params: [100],
 48 |           },
 49 |         ]),
 50 |       ]);
 51 | 
 52 |       totalTasks = countResult.rows[0].count;
 53 |       tasks = tasksResult.rows;
 54 |       failedTasks = failedTasksResult.rows;
 55 |       totalFailedTasks = failedTasks.length;
 56 |     } catch (e) {
 57 |       error = e.message;
 58 |     }
 59 |   }
 60 | 
 61 |   async function retryTask(taskId: number) {
 62 |     await rpc([
 63 |       "pg.query",
 64 |       {
 65 |         sql: "UPDATE task SET failed_at = NULL, error = NULL WHERE id = $1",
 66 |         params: [taskId],
 67 |       },
 68 |     ]);
 69 |     await rpc(["processJobQueue"]);
 70 |     await fetchTasks();
 71 |   }
 72 | 
 73 |   async function removeTask(taskId: number) {
 74 |     await rpc([
 75 |       "pg.query",
 76 |       {
 77 |         sql: "DELETE FROM task WHERE id = $1",
 78 |         params: [taskId],
 79 |       },
 80 |     ]);
 81 |     await fetchTasks();
 82 |   }
 83 | 
 84 |   onMount(() => {
 85 |     fetchTasks();
 86 |     refreshInterval = setInterval(fetchTasks, 2000);
 87 |   });
 88 | 
 89 |   onDestroy(() => {
 90 |     clearInterval(refreshInterval);
 91 |   });
 92 | 
 93 |   function formatDate(timestamp) {
 94 |     return new Date(timestamp).toLocaleString();
 95 |   }
 96 | </script>
 97 | 
 98 | <div class="flex flex-col p-12 h-[calc(100%-70px)]">
 99 |   <div class=" prose dark:prose-invert mb-6">
100 |     <h3>Task Queue</h3>
101 |     <p class="mb-4">
102 |       This page displays the current state of the task queue. It shows both pending and failed
103 |       tasks, and automatically refreshes to provide up-to-date information.
104 |     </p>
105 |     <p>
106 |       When the system is functioning normally you can expect the number of pending tasks to either
107 |       be zero or be decreasing.
108 |     </p>
109 |     <hr class="border-gray-700 my-6" />
110 |   </div>
111 | 
112 |   {#if error}
113 |     <p class="text-red-500">Error: {error}</p>
114 |   {:else}
115 |     <p>Total tasks in queue: {totalTasks}</p>
116 | 
117 |     <h4 class="text-lg font-bold mt-4">Pending Tasks</h4>
118 |     <table class="w-full mt-4">
119 |       <thead>
120 |         <tr>
121 |           <th class="text-left">ID</th>
122 |           <th class="text-left">Type</th>
123 |           <th class="text-left">Params</th>
124 |           <th class="text-left">Created At</th>
125 |         </tr>
126 |       </thead>
127 |       <tbody>
128 |         {#if tasks.length === 0}
129 |           <tr>
130 |             <td colspan="4" class="text-center py-4">There are currently no pending tasks.</td>
131 |           </tr>
132 |         {:else}
133 |           {#each tasks as task}
134 |             <tr>
135 |               <td>{task.id}</td>
136 |               <td>{task.task_type}</td>
137 |               <td class="font-mono">{JSON.stringify(task.params)}</td>
138 |               <td>{formatDate(task.created_at)}</td>
139 |             </tr>
140 |           {/each}
141 |         {/if}
142 |       </tbody>
143 |     </table>
144 |     {#if tasks.length > 0 && tasks.length < totalTasks - totalFailedTasks}
145 |       <p class="mt-4">Showing {tasks.length} of {totalTasks - totalFailedTasks} pending tasks.</p>
146 |     {/if}
147 | 
148 |     {#if failedTasks.length > 0}
149 |       <h4 class="mt-8 text-xl font-bold">Failed Tasks</h4>
150 |       <table class="w-full mt-4">
151 |         <thead>
152 |           <tr>
153 |             <th class="text-left">ID</th>
154 |             <th class="text-left">Type</th>
155 |             <th class="text-left">Params</th>
156 |             <th class="text-left">Created At</th>
157 |             <th class="text-left">Failed At</th>
158 |             <th class="text-left">Error</th>
159 |             <th class="text-left"></th>
160 |           </tr>
161 |         </thead>
162 |         <tbody>
163 |           {#each failedTasks as task}
164 |             <tr>
165 |               <td>{task.id}</td>
166 |               <td>{task.task_type}</td>
167 |               <td class="font-mono">{JSON.stringify(task.params)}</td>
168 |               <td>{formatDate(task.created_at)}</td>
169 |               <td>{formatDate(task.failed_at)}</td>
170 |               <td class="bg-red-500/30 text-red-200">{task.error}</td>
171 |               <td>
172 |                 <div class="flex gap-2">
173 |                   <button
174 |                     on:click={() => retryTask(task.id)}
175 |                     class="text-white p-1 rounded hover:text-blue-400"
176 |                   >
177 |                     <RotateCcw size={16} />
178 |                   </button>
179 |                   <button
180 |                     on:click={() => removeTask(task.id)}
181 |                     class="text-red-500 p-1 rounded hover:text-red-400"
182 |                   >
183 |                     <Trash size={16} />
184 |                   </button>
185 |                 </div>
186 |               </td>
187 |             </tr>
188 |           {/each}
189 |         </tbody>
190 |       </table>
191 |       {#if failedTasks.length < totalFailedTasks}
192 |         <p class="mt-4">Showing {failedTasks.length} of {totalFailedTasks} failed tasks.</p>
193 |       {/if}
194 |     {/if}
195 |   {/if}
196 | </div>
197 | 
198 | <style>
199 |   table {
200 |     border-collapse: collapse;
201 |   }
202 |   th,
203 |   td {
204 |     border: 1px solid #4a5568;
205 |     padding: 8px;
206 |   }
207 |   th {
208 |     background-color: #2d3748;
209 |   }
210 | </style>
211 | 


--------------------------------------------------------------------------------
/src/ui/pages/index.svelte:
--------------------------------------------------------------------------------
1 | <h1>Not used</h1>
2 | <p>This page is not used, because chrome extensions do not support `/` routes.</p>
3 | 


--------------------------------------------------------------------------------
/src/ui/routes.js:
--------------------------------------------------------------------------------
 1 | import { wrap } from "svelte-spa-router/wrap";
 2 | 
 3 | // Import layout component
 4 | import Layout from "./pages/index.html/_layout.svelte";
 5 | import LayoutWrapper from "./LayoutWrapper.svelte";
 6 | 
 7 | // Import page components
 8 | import Index from "./pages/index.html/index.svelte";
 9 | import DatabaseRepl from "./pages/index.html/database-repl.svelte";
10 | import Settings from "./pages/index.html/settings.svelte";
11 | import TaskQueue from "./pages/index.html/task-queue.svelte";
12 | import DocView from "./pages/index.html/doc/[url].svelte";
13 | import Dev from "./pages/index.html/dev.svelte";
14 | 
15 | // Route definitions with layout wrapper
16 | export const routes = {
17 |   // Home page
18 |   "/": wrap({
19 |     component: LayoutWrapper,
20 |     props: {
21 |       layout: Layout,
22 |       component: Index,
23 |     },
24 |   }),
25 | 
26 |   // Database REPL page
27 |   "/database-repl": wrap({
28 |     component: LayoutWrapper,
29 |     props: {
30 |       layout: Layout,
31 |       component: DatabaseRepl,
32 |     },
33 |   }),
34 | 
35 |   // Settings page
36 |   "/settings": wrap({
37 |     component: LayoutWrapper,
38 |     props: {
39 |       layout: Layout,
40 |       component: Settings,
41 |     },
42 |   }),
43 | 
44 |   // Task Queue page
45 |   "/task-queue": wrap({
46 |     component: LayoutWrapper,
47 |     props: {
48 |       layout: Layout,
49 |       component: TaskQueue,
50 |     },
51 |   }),
52 | 
53 |   // Document view page with URL parameter
54 |   "/doc/:url": wrap({
55 |     component: LayoutWrapper,
56 |     props: {
57 |       layout: Layout,
58 |       component: DocView,
59 |     },
60 |   }),
61 | 
62 |   // Development page (for dev use only)
63 |   "/dev": wrap({
64 |     component: LayoutWrapper,
65 |     props: {
66 |       layout: Layout,
67 |       component: Dev,
68 |     },
69 |   }),
70 | };
71 | 
72 | // Navigation routes for menu display
73 | export const navigationRoutes = [
74 |   { path: "/", name: "index", label: "Search" },
75 |   { path: "/database-repl", name: "database-repl", label: "SQL" },
76 |   { path: "/settings", name: "settings", label: "Settings" },
77 |   { path: "/task-queue", name: "task-queue", label: "Task Queue" },
78 | ];
79 | 


--------------------------------------------------------------------------------
/src/ui/store/displaySettings.ts:
--------------------------------------------------------------------------------
 1 | import { writable } from "svelte/store";
 2 | 
 3 | // Load preferences from localStorage if available
 4 | const loadFromStorage = () => {
 5 |   try {
 6 |     const storedSettings = localStorage.getItem("displaySettings");
 7 |     if (storedSettings) {
 8 |       return JSON.parse(storedSettings);
 9 |     }
10 |   } catch (e) {
11 |     console.error("Failed to load settings from localStorage", e);
12 |   }
13 |   return {};
14 | };
15 | 
16 | // Default settings
17 | const defaultSettings = {
18 |   showStats: true,
19 |   preprocessQuery: true,
20 |   sortMode: "last_visit", // Default sort mode
21 | };
22 | 
23 | // Combine stored settings with defaults
24 | const initialSettings = {
25 |   ...defaultSettings,
26 |   ...loadFromStorage(),
27 | };
28 | 
29 | // Create the writable store
30 | const settings = writable(initialSettings);
31 | 
32 | // Subscribe to changes and save to localStorage
33 | settings.subscribe((value) => {
34 |   try {
35 |     localStorage.setItem("displaySettings", JSON.stringify(value));
36 |   } catch (e) {
37 |     console.error("Failed to save settings to localStorage", e);
38 |   }
39 | });
40 | 
41 | export const displaySettings = settings;
42 | 


--------------------------------------------------------------------------------
/src/ui/store/menuState.ts:
--------------------------------------------------------------------------------
1 | import { writable } from "svelte/store";
2 | 
3 | export const menuOpen = writable(false);
4 | 


--------------------------------------------------------------------------------
/src/ui/store/statsStore.ts:
--------------------------------------------------------------------------------
 1 | import { rpc } from "@/ui/lib/rpc";
 2 | import { writable } from "svelte/store";
 3 | 
 4 | export type Stats = {
 5 |   Documents: string;
 6 |   Fragments: string;
 7 |   Size: string;
 8 | };
 9 | 
10 | export const stats = writable<Stats | null>(null);
11 | 
12 | export async function updateStats() {
13 |   try {
14 |     const _stats = (await rpc(["getStats"])) as {
15 |       document: { count: number };
16 |       document_fragment: { count: number };
17 |       db: { size_bytes: number };
18 |     };
19 | 
20 |     stats.set({
21 |       Documents: _stats.document.count.toLocaleString(),
22 |       Fragments: _stats.document_fragment.count.toLocaleString(),
23 |       Size: (_stats.db.size_bytes / 1024 / 1024).toFixed(2) + "MB",
24 |     });
25 |   } catch (error) {
26 |     console.error("Error updating stats:", error);
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/static/screenshot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/static/screenshot-1.png


--------------------------------------------------------------------------------
/static/screenshot-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/static/screenshot-2.png


--------------------------------------------------------------------------------
/static/screenshot-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/static/screenshot-3.png


--------------------------------------------------------------------------------
/static/screenshot-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/static/screenshot-4.png


--------------------------------------------------------------------------------
/tailwind.config.cjs:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | module.exports = {
3 |   content: ["./index.html", "./src/**/*.{js,ts,jsx,tsx,svelte}"],
4 |   theme: {
5 |     extend: {},
6 |   },
7 |   plugins: [require("@tailwindcss/forms"), require("@tailwindcss/typography"), require('@tailwindcss/line-clamp')],
8 | };
9 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "esnext",
 4 |     "module": "esnext",
 5 |     "moduleResolution": "node",
 6 |     "useUnknownInCatchVariables": false,
 7 |     "strict": true,
 8 |     "sourceMap": true,
 9 |     "resolveJsonModule": true,
10 |     "esModuleInterop": true,
11 |     "lib": ["esnext", "dom"],
12 |     "typeRoots": ["node_modules/@types"],
13 |     "skipLibCheck": true,
14 |     "allowJs": true,
15 |     "importHelpers": true,
16 |     "removeComments": true,
17 |     "forceConsistentCasingInFileNames": true,
18 |     "noImplicitAny": false,
19 |     "baseUrl": ".",
20 |     "paths": {
21 |       "@/*": ["src/*"]
22 |     }
23 |   },
24 |   "include": ["src/**/*.d.ts", "src/**/*.js", "src/**/*.ts", "src/**/*.svelte"]
25 | }
26 | 


--------------------------------------------------------------------------------
/vite.config.content-script.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from "vite";
 2 | import { svelte, vitePreprocess } from "@sveltejs/vite-plugin-svelte";
 3 | 
 4 | // https://vitejs.dev/config/
 5 | export default defineConfig({
 6 |   build: {
 7 |     rollupOptions: {
 8 |       // Vite was not ignoring tmp dir, which is the only reason I added this
 9 |       input: {
10 |         "content-scripts/content-script": "src/content-scripts/content-script.ts",
11 |       },
12 |       output: {
13 |         inlineDynamicImports: true,
14 |         entryFileNames: "[name].js",
15 |       },
16 |     },
17 |     minify: false,
18 |     emptyOutDir: false,
19 |   },
20 |   plugins: [
21 |     svelte({
22 |       preprocess: vitePreprocess(),
23 |       onwarn: (warning, handler) => {
24 |         // Ignore all a11y warnings
25 |         if (warning.code?.startsWith('a11y-')) {
26 |           return;
27 |         }
28 |         handler(warning);
29 |       }
30 |     }),
31 |   ],
32 | });
33 | 


--------------------------------------------------------------------------------
/vite.config.ts:
--------------------------------------------------------------------------------
  1 | import { svelte, vitePreprocess } from "@sveltejs/vite-plugin-svelte";
  2 | import { defineConfig } from "vite";
  3 | import path from "node:path";
  4 | import fs, { readFileSync, writeFileSync } from "node:fs";
  5 | import archiver from "archiver";
  6 | import type { Manifest } from "webextension-polyfill";
  7 | import vitePluginTopLevelAwait from "vite-plugin-top-level-await";
  8 | 
  9 | const TARGET: "chrome" | "firefox" = process.env.TARGET as "chrome" | "firefox";
 10 | const FF_ADDON_ID = process.env.FF_ADDON_ID as string;
 11 | 
 12 | if (!["chrome", "firefox"].includes(TARGET)) {
 13 |   throw new Error(`Invalid TARGET: ${TARGET}. Specify TARGET=chrome or TARGET=firefox`);
 14 | }
 15 | 
 16 | if (TARGET === "firefox" && !FF_ADDON_ID) {
 17 |   throw new Error(`FF_ADDON_ID is required for firefox builds`);
 18 | }
 19 | 
 20 | const isFirefox = TARGET === "firefox";
 21 | 
 22 | // https://vitejs.dev/config/
 23 | export default defineConfig({
 24 |   build: {
 25 |     rollupOptions: {
 26 |       input: {
 27 |         index: "index.html",
 28 |         background: "src/background.ts",
 29 |       },
 30 |       output: {
 31 |         entryFileNames: "[name].js",
 32 |       },
 33 |     },
 34 |     minify: false,
 35 |     emptyOutDir: false,
 36 |   },
 37 |   server: {
 38 |     headers: {
 39 |       "Cross-Origin-Opener-Policy": "same-origin",
 40 |       "Cross-Origin-Embedder-Policy": "require-corp",
 41 |     },
 42 |   },
 43 |   optimizeDeps: {
 44 |     exclude: ["@sqlite.org/sqlite-wasm", "@vlcn.io/crsqlite-wasm"],
 45 |   },
 46 |   resolve: {
 47 |     alias: {
 48 |       "@": path.resolve(__dirname, "./src"),
 49 |     },
 50 |   },
 51 |   plugins: [
 52 |     vitePluginTopLevelAwait(),
 53 |     svelte({
 54 |       preprocess: vitePreprocess(),
 55 |       onwarn: (warning, handler) => {
 56 |         // Ignore all a11y warnings
 57 |         if (warning.code.startsWith('a11y-')) {
 58 |           return;
 59 |         }
 60 |         handler(warning);
 61 |       }
 62 |     }),
 63 | 
 64 |     // Watch additional files
 65 |     {
 66 |       name: "watch-additional-files",
 67 |       buildStart() {
 68 |         for (const pathName of ["src/manifest.json", `src/manifest-${TARGET}.json`]) {
 69 |           if (fs.existsSync(path.resolve(__dirname, pathName))) {
 70 |             this.addWatchFile(path.resolve(__dirname, pathName));
 71 |           }
 72 |         }
 73 |       },
 74 |     },
 75 | 
 76 |     // Copy assets to dist
 77 |     {
 78 |       name: "copy-plugin",
 79 |       apply: "build",
 80 |       enforce: "post",
 81 |       generateBundle() {
 82 |         const sourceDir = path.resolve(__dirname, "src/assets");
 83 |         const destinationDir = path.resolve(__dirname, "dist/assets");
 84 | 
 85 |         fs.mkdirSync(destinationDir, { recursive: true });
 86 | 
 87 |         const files = fs.readdirSync(sourceDir);
 88 | 
 89 |         for (const filepath of files) {
 90 |           const sourcePath = path.join(sourceDir, filepath);
 91 |           const destinationPath = path.join(destinationDir, filepath);
 92 | 
 93 |           fs.copyFileSync(sourcePath, destinationPath);
 94 |           console.log(`[copy-plugin] ${sourcePath} -> ${destinationPath}`);
 95 |         }
 96 | 
 97 |         try {
 98 |           const manifest = JSON.parse(
 99 |             readFileSync(path.resolve(__dirname, "src/manifest.json"), "utf8")
100 |           );
101 | 
102 |           // Mutate the manifest object
103 |           delete manifest["$schema"]; // Schema is just provided for autocomplete but chrome doesn't like it
104 | 
105 |           // Handle FF special cases
106 |           if (isFirefox) {
107 |             // Case 1: FF doesn't support service_worker, it prefers a background.scripts array
108 |             manifest.background.scripts = [manifest.background.service_worker];
109 |             delete manifest.background.service_worker;
110 | 
111 |             // Case 2: FF requires an id. See: https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/manifest.json/browser_specific_settings
112 |             manifest.browser_specific_settings = {
113 |               gecko: {
114 |                 id: `{${FF_ADDON_ID}}`, // FF loves those braces
115 |                 strict_min_version: "109.0", // When they added (partial) MV3 support. No lover for MV3 here, but since it's a hard req for chrome we use it for FF too
116 |               },
117 |             };
118 |           }
119 | 
120 |           writeFileSync(
121 |             path.join(__dirname, "dist/manifest.json"),
122 |             JSON.stringify(manifest, null, 2)
123 |           );
124 |           console.log(`[copy-plugin] copied manifest`);
125 |         } catch (err) {
126 |           console.error("Could not build manifest", err);
127 |         }
128 |       },
129 |     },
130 | 
131 |     // Create zip dist file for upload to chrome web store
132 |     {
133 |       name: "zip-plugin",
134 |       apply: "build",
135 |       enforce: "post",
136 |       writeBundle() {
137 |         const output = fs.createWriteStream(__dirname + `/fttf-${TARGET}.zip`);
138 |         const archive = archiver("zip", {
139 |           zlib: { level: 9 },
140 |         });
141 | 
142 |         // listen for all archive data to be processed
143 |         output.on("close", function () {
144 |           console.log(archive.pointer() + " total bytes");
145 |           console.log("Archiver has been finalized and the output file descriptor has closed.");
146 |         });
147 | 
148 |         // good practice to catch warnings (ie stat failures and other non-blocking errors)
149 |         archive.on("warning", function (err) {
150 |           if (err.code === "ENOENT") {
151 |             console.warn("no file", err);
152 |           } else {
153 |             // throw error
154 |             throw err;
155 |           }
156 |         });
157 | 
158 |         // good practice to catch this error explicitly
159 |         archive.on("error", function (err) {
160 |           throw err;
161 |         });
162 | 
163 |         // pipe archive data to the file
164 |         archive.pipe(output);
165 | 
166 |         // append files from a directory
167 |         archive.directory(__dirname + "/dist/", false);
168 | 
169 |         // finalize the archive (ie we are done appending files but streams have to finish yet)
170 |         archive.finalize();
171 |       },
172 |     },
173 |   ],
174 | });
175 | 


--------------------------------------------------------------------------------