├── .github ├── FUNDING.yml └── workflows │ └── pr-checks.yml ├── .gitignore ├── .npmrc ├── .routify └── routes.js ├── CLAUDE.md ├── LICENSE ├── bun.lockb ├── index.html ├── package.json ├── postcss.config.cjs ├── readme.md ├── scripts ├── release.sh ├── replace-manifest.cjs └── resize-images.sh ├── src ├── assets │ ├── icon-1.png │ ├── icon-2.png │ ├── icon-cropped-1.png │ ├── icon-cropped-1_128.png │ ├── icon-cropped-1_16.png │ ├── icon-cropped-1_48.png │ ├── icon-cropped-2.png │ ├── icon_128.png │ ├── icon_16.png │ ├── icon_48.png │ ├── star-empty-38.png │ └── star-filled-38.png ├── background.ts ├── background │ ├── backend-adapter.ts │ ├── backend-debug.ts │ ├── backend-pglite.ts │ ├── backend-vlcn.ts │ ├── backend.ts │ ├── embedding │ │ └── pipeline.ts │ └── pglite │ │ ├── HAX_pglite.ts │ │ ├── defaultBlacklistRules.ts │ │ ├── job_queue.test.ts │ │ ├── job_queue.ts │ │ ├── migration-manager.test.ts │ │ ├── migration-manager.ts │ │ ├── migrations │ │ └── 001_init.ts │ │ └── tasks.ts ├── common │ ├── logs.ts │ ├── utils.test.ts │ └── utils.ts ├── content-scripts │ └── content-script.ts ├── manifest.json ├── types.ts └── ui │ ├── .routify │ └── urlIndex.json │ ├── App.svelte │ ├── DetailsPanel.svelte │ ├── ExportProgress.svelte │ ├── LayoutWrapper.svelte │ ├── Menu.svelte │ ├── MigrationModal.svelte │ ├── Modal.svelte │ ├── RecentItems.svelte │ ├── ResultItem.svelte │ ├── ResultRowView.svelte │ ├── global.css │ ├── lib │ ├── commands.ts │ ├── constants.ts │ ├── dom.test.ts │ ├── dom.ts │ ├── rpc.ts │ └── streaming-export.ts │ ├── main.ts │ ├── pages │ ├── index.html │ │ ├── _layout.svelte │ │ ├── database-repl.svelte │ │ ├── dev.svelte │ │ ├── doc │ │ │ └── [url].svelte │ │ ├── index.svelte │ │ ├── settings.svelte │ │ └── task-queue.svelte │ └── index.svelte │ ├── routes.js │ └── store │ ├── displaySettings.ts │ ├── menuState.ts │ └── statsStore.ts ├── static ├── screenshot-1.png ├── screenshot-2.png ├── screenshot-3.png └── screenshot-4.png ├── tailwind.config.cjs ├── tsconfig.json ├── vite.config.content-script.ts └── vite.config.ts /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [iansinnott] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | polar: # Replace with a single Polar username 13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username 14 | thanks_dev: # Replace with a single thanks.dev username 15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 16 | -------------------------------------------------------------------------------- /.github/workflows/pr-checks.yml: -------------------------------------------------------------------------------- 1 | name: PR Checks 2 | 3 | on: 4 | pull_request: 5 | branches: [main, master] 6 | 7 | jobs: 8 | check: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v3 13 | 14 | - name: Setup Node.js 15 | uses: actions/setup-node@v3 16 | with: 17 | node-version: "20" 18 | 19 | - name: Install pnpm 20 | uses: pnpm/action-setup@v2 21 | with: 22 | version: 8 23 | 24 | - name: Setup Bun 25 | uses: oven-sh/setup-bun@v2 26 | 27 | - name: Install dependencies 28 | run: pnpm install 29 | 30 | - name: Run type check 31 | run: pnpm run type-check 32 | 33 | - name: Run tests 34 | run: pnpm run test 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | tmp 3 | dist* 4 | *.zip 5 | 6 | 7 | vite.config.ts.timestamp-* 8 | 9 | 10 | .env* 11 | src/ui/.routify/config.js 12 | src/ui/.routify/routes.js 13 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | auto-install-peers=true 2 | -------------------------------------------------------------------------------- /.routify/routes.js: -------------------------------------------------------------------------------- 1 | export * from "@roxi/routify/runtime/defaultTmp/routes" -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | # CLAUDE.md - Full Text Tabs Forever 2 | 3 | This is a Chrome extension, not a normal web app. 4 | 5 | ## Build/Test/Lint Commands 6 | 7 | - Build: `bun run build` (combines content script and main build) 8 | - Watch/Dev: `bun run dev` (concurrently watches both content script and main) 9 | - Dev Chrome: `bun run dev:chrome` (Chrome-specific dev mode) 10 | - Test: `bun run test` 11 | - Run single test: `bun test src/path/to/file.test.ts` 12 | - Type check: `bun run type-check` (runs `tsc --noEmit`) 13 | - Svelte check: `bun run check` (runs svelte-check) 14 | - Chrome build: `bun run build:chrome` 15 | - Firefox build: `bun run build:firefox` 16 | - Clean build: `bun run build:clean` (removes dist directory contents) 17 | 18 | ## Code Style Guidelines 19 | 20 | - TypeScript with strict mode enabled 21 | - Use ES modules (import/export) 22 | - Follow functional programming patterns where possible 23 | - Tests use Bun's built-in test runner with describe/it/expect pattern 24 | - 2-space indentation 25 | - No semicolons at line ends 26 | - Use arrow functions for callbacks 27 | - Prefer const over let, avoid var 28 | - Use utility functions from src/common/utils.ts 29 | - Handle errors with try/catch blocks and proper logging 30 | - Use async/await for asynchronous code 31 | - Prefer explicit typing over 'any' 32 | - Use camelCase for variables/functions, PascalCase for classes/interfaces 33 | - Tailwind CSS for styling 34 | - Svelte components with TypeScript 35 | - Zod for validation 36 | 37 | ## Naming Conventions 38 | 39 | - Component files: PascalCase.svelte 40 | - Utility files: kebab-case.ts 41 | - Test files: name.test.ts adjacent to implementation 42 | - Use descriptive, meaningful names 43 | 44 | ## Project Organization 45 | 46 | - Background service worker in src/background/ 47 | - UI components in src/ui/ 48 | - Common utilities in src/common/ 49 | - Content scripts in src/content-scripts/ 50 | - Assets in src/assets/ 51 | - Types in src/types.ts 52 | - Embedding pipeline in src/background/embedding/ 53 | - PGLite database functionality in src/background/pglite/ 54 | - UI stores in src/ui/store/ 55 | - UI pages in src/ui/pages/ 56 | 57 | ## Technologies 58 | 59 | - Vite for building 60 | - Svelte for UI components 61 | - SPA routing with svelte-spa-router 62 | - Database backends (PGLite and VLCN options) 63 | - Embedded SQLite with @electric-sql/pglite 64 | - Embedding functionality with @xenova/transformers 65 | - Supports sortable views by last visited date or rank -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Ian Sinnott 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /bun.lockb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/bun.lockb -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | FTTF 8 | 9 | 10 |
11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "full-text-tabs-forever", 3 | "version": "2.1.2", 4 | "description": "A web extension for searching your full browsing history.", 5 | "main": "index.js", 6 | "scripts": { 7 | "//dev": "vite build -w was failing. something about the chrome extension vite plugin and the manifest. re-saving the manifest makes -w work, but that defeats the purpose of automated watch", 8 | "dev:chrome": "TARGET=chrome concurrently 'bun run build:content --watch' 'bun run build:main --watch'", 9 | "dev": "concurrently 'bun run build:content --watch' 'bun run build:main --watch'", 10 | "//build": "it's ultimately chrome's fault that we need a separate build for content-script. Unlike the service worker you cannot specify type:module, so... the script can't have imports.", 11 | "build": "bun run build:content && bun run build:main", 12 | "build:content": "vite build -c vite.config.content-script.ts", 13 | "build:main": "vite build", 14 | "release": "yarn version && ./scripts/release.sh", 15 | "build:clean": "rm -rf ./dist/*", 16 | "build:chrome": "TARGET=chrome bun run build", 17 | "build:firefox": "TARGET=firefox bun run build", 18 | "check": "svelte-check --tsconfig ./tsconfig.json", 19 | "type-check": "bun tsc --noEmit", 20 | "test": "bun test" 21 | }, 22 | "type": "module", 23 | "keywords": [ 24 | "search", 25 | "browser history", 26 | "postgres", 27 | "pgvector", 28 | "vector search", 29 | "chrome extension" 30 | ], 31 | "author": "", 32 | "license": "MIT", 33 | "devDependencies": { 34 | "@tailwindcss/forms": "~0.5.3", 35 | "@tailwindcss/line-clamp": "~0.4.2", 36 | "@tailwindcss/typography": "~0.5.9", 37 | "@types/archiver": "6.0.1", 38 | "@types/bun": "1.1.6", 39 | "@types/chrome": "~0.0.210", 40 | "@types/node": "~20.15.0", 41 | "archiver": "6.0.1", 42 | "autoprefixer": "~10.4.13", 43 | "concurrently": "~7.6.0", 44 | "postcss": "~8.4.21", 45 | "prettier": "3.1.0", 46 | "svelte-check": "~3.6.3", 47 | "svelte-preprocess": "~5.1.3", 48 | "tailwindcss": "~3.2.4", 49 | "tsx": "~3.12.2", 50 | "typescript": "~5.5.4", 51 | "vite": "~5.0.10", 52 | "vite-plugin-top-level-await": "1.4.4" 53 | }, 54 | "dependencies": { 55 | "@electric-sql/pglite": "0.2.17", 56 | "@electric-sql/pglite-repl": "0.2.17", 57 | "@mozilla/readability": "~0.5.0", 58 | "@sveltejs/vite-plugin-svelte": "~3.1.2", 59 | "@types/turndown": "~5.0.1", 60 | "@types/webextension-polyfill": "~0.10.0", 61 | "@vlcn.io/crsqlite-wasm": "0.16.0", 62 | "@xenova/transformers": "2.17.2", 63 | "classnames": "~2.3.2", 64 | "lucide-svelte": "0.435.0", 65 | "svelte": "~4.0.0", 66 | "svelte-markdown": "~0.4.1", 67 | "svelte-spa-router": "^4.0.1", 68 | "turndown": "^5", 69 | "webextension-polyfill": "~0.10.0", 70 | "zod": "3.23.8" 71 | }, 72 | "volta": { 73 | "node": "20.18.1" 74 | }, 75 | "trustedDependencies": [ 76 | "@swc/core", 77 | "protobufjs", 78 | "svelte-preprocess" 79 | ] 80 | } 81 | -------------------------------------------------------------------------------- /postcss.config.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | 2 |
3 | 4 | [full text tabs forever (FTTF) logo](https://chrome.google.com/webstore/detail/full-text-tabs-forever/gfmbnlbnapjmffgcnbopfgmflmlfghel)
5 | 6 | # Full Text Tabs Forever 7 | 8 | Search everything you read online. FTTF lets you search the full text of every web page you visit. 9 | 10 | Available in the [Chrome Web Store](https://chrome.google.com/webstore/detail/full-text-tabs-forever/gfmbnlbnapjmffgcnbopfgmflmlfghel). 11 | 12 | Available in the [Firefox Add-ons Store](https://addons.mozilla.org/en-US/firefox/addon/full-text-tabs-forever/). 13 | 14 | > **IMPORTANT FOR v2.0 USERS:** If you're upgrading from v1.x, see the [Database Migration](#database-migration-v20) section for instructions on migrating your existing data. 15 | 16 | _Firefox requires additional permissions. See [below](#firefox)._ 17 | 18 |
19 | 20 | **Doesn't the browser do that already? How is this different?** 21 | 22 | Chrome does not let you search the text on pages you've visited, **only the URLs and titles**, and it deletes your history after a number of months. Firefox will keep your history for longer, but likewise doesn't let you search page content, only URLs and titles. 23 | 24 | 25 | FTTF is different: 26 | 27 | - **Full-Text Search Capabilities:** The full content of every page you've visited becomes searchable. 28 | - **Permanent History:** Your digital footprints are yours to keep. Your data is yours, so it should not be removed without your approval. Nothing is deleted automatically. 29 | - **Instant indexing:** FTTF creates a search index as you browse, so pages are immediately available for searching right after you land on a page. 30 | - **For your eyes only:** Your browsing history is stored locally on your device, and not on any external servers. Beware that if you switch computers your FTTF history will not automatically come with you. It can be exported though. 31 | 32 |
33 | 34 | ![](static/screenshot-1.png) 35 | 36 |
37 | 38 | **Who is it for?** 39 | 40 | Data hoarders like myself that never want to delete anything, and want everything to be searchable. More generally, if you've ever felt limited by the standard history search you should try this out. 41 | 42 | **How it works:** 43 | 44 | Browser extensions have access to the pages you visit, which lets FTTF make an index of the content on any page. When a page loads, its content is extracted and indexed. 45 | 46 | Extracted? Yes, or "distilled" if you prefer. Full web pages are huge and have a lot of information that's not related to the content itself. FTTF will ignore all of that. It acts like "reader mode" to find relevant content on a page and only index that. 47 | 48 | # Installation 49 | 50 | Install in your browser via the [Chrome Web Store](https://chrome.google.com/webstore/detail/full-text-tabs-forever/gfmbnlbnapjmffgcnbopfgmflmlfghel) or the [Firefox Add-ons Store](https://addons.mozilla.org/en-US/firefox/addon/full-text-tabs-forever/). 51 | 52 | # Testing 53 | 54 | This project uses `bun` as a unit testing framework, but not (currently) as a bundler. You will need to install `bun`, then: 55 | 56 | `bun test` 57 | 58 | Or, `bun run test` if you prefer. 59 | 60 | # Note to self: Submitting a new version manually 61 | 62 | > How could this be automated? 63 | 64 | - Manually bump the version in the manifest file 65 | - Run the build 66 | - `bun run build:chrome` 67 | - `bun run build:firefox` 68 | - Submit 69 | - Chrome 70 | - Go to: https://chrome.google.com/webstore/devconsole/bc898ad5-018e-4774-b9ab-c4bef7b7f92b/gfmbnlbnapjmffgcnbopfgmflmlfghel/edit/package 71 | - Upload the `fttf-chrome.zip` file 72 | - Firefox 73 | - Go to: https://addons.mozilla.org/en-US/developers/addon/full-text-tabs-forever/edit 74 | - Upload the `fttf-firefox.zip` file 75 | - Zip the original source code and upload that too: `zip -r src.zip src` 76 | 77 | # Firefox 78 | 79 | Install here: https://addons.mozilla.org/en-US/firefox/addon/full-text-tabs-forever/ 80 | 81 | Currently you have to manually enable additional permissions in Firefox like so: 82 | 83 | ![Firefox permissions](https://drive.zenture.cloud/s/d3mboA7GwPCXH8b/download). 84 | 85 | See this comment for more details: https://github.com/iansinnott/full-text-tabs-forever/issues/3#issuecomment-1963238416 86 | 87 | Support was added in: https://github.com/iansinnott/full-text-tabs-forever/pull/4. 88 | 89 | # Database Migration (v2.0) 90 | 91 | With version 2.0, Full Text Tabs Forever has migrated from SQLite (VLCN) to PostgreSQL (PgLite) as its database backend. This change brings several improvements: 92 | 93 | - Better full-text search capabilities with PostgreSQL's advanced text search 94 | - Support for vector embeddings for semantic search (coming soon) 95 | - Improved performance for large databases 96 | - More efficient storage of document fragments 97 | 98 | ## For Existing Users 99 | 100 | If you're upgrading from a previous version (v1.x), your data will not be lost! The extension includes a migration system that will: 101 | 102 | 1. Detect your existing VLCN (SQLite) database 103 | 2. Provide a simple one-click migration option in the Settings page 104 | 3. Transfer all your saved pages to the new PostgreSQL database 105 | 4. Show real-time progress during migration 106 | 5. Preserve all your searchable content 107 | 108 | To migrate your data: 109 | 110 | 1. After upgrading, open the extension 111 | 2. Go to the Settings page 112 | 3. Find the "Import VLCN Database (v1)" section 113 | 4. Click the "Import VLCN Database" button 114 | 5. Wait for the migration to complete - this may take several minutes depending on how many pages you've saved 115 | 6. Your data is now accessible in the new database system! 116 | 117 | The migration happens entirely on your device, and no data is sent to external servers. Your privacy remains protected throughout the process. 118 | 119 | # TODO 120 | 121 | - [ ] Backfill history 122 | Currently only new pages you visit are indexed, but we could backfill by opening every page in the browser's history that hasn't yet been indexed. An optional feature, but a useful one. 123 | - [ ] Backup and sync 124 | Improved export/import capabilities for moving data between devices. 125 | - [ ] Semantic search 126 | Leverage vector embeddings in the new PostgreSQL backend for more intelligent searching. 127 | - [ ] Integrate with [browser-gopher](https://github.com/iansinnott/browser-gopher) 128 | Browser gopher and [BrowserParrot](https://www.browserparrot.com/) were the initial impetus to create a better way to ingest full text web pages, without triggering a Cloudflare captcha party on your home connection. 129 | - [x] Migrate to PostgreSQL 130 | Replace SQLite with a more powerful database backend using PgLite. 131 | - [x] Improve discoverability of functionality. 132 | There is now a button to open the command palette. Still not much GUI, but enough to be discovered. 133 | - [x] Firefox 134 | ~~This should not be too difficult since this project was started with web extension polyfills. However, there is currently some chrome specific code.~~ 135 | It appears that the APIs do not have to be rewritten to work in Firefox. See this PR for details: https://github.com/iansinnott/full-text-tabs-forever/pull/4 136 | 137 | # Contributing 138 | 139 | PRs welcome! 140 | -------------------------------------------------------------------------------- /scripts/release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | main() { 4 | echo "Releasing new version..." 5 | echo 6 | echo " PWD: $PWD" 7 | 8 | local version=$(jq -r '.version' package.json) 9 | 10 | # Replace version in src/manifest.json 11 | sed -i '' -e "s/\"version\": \".*\"/\"version\": \"$version\"/g" src/manifest.json 12 | 13 | # amend last commit 14 | git add src/manifest.json > /dev/null 15 | git commit --amend --no-edit > /dev/null 16 | 17 | # upsert the tag. if running yarn version the tag will have been created already 18 | git tag -d "v$version" > /dev/null 2>&1 || true 19 | git tag -a "v$version" -m "v$version" > /dev/null 20 | 21 | echo " Tag: v$version" 22 | echo " Commit: $(git rev-parse HEAD)" 23 | echo 24 | echo "Don't forget to push the tag to GitHub: git push --tags" 25 | } 26 | 27 | main -------------------------------------------------------------------------------- /scripts/replace-manifest.cjs: -------------------------------------------------------------------------------- 1 | /** 2 | * Because chrome is so sensitive about the manifest file this script serves to 3 | * modify it for distribution. 4 | */ 5 | const { readFileSync, writeFileSync } = require("fs"); 6 | const path = require("path"); 7 | 8 | const modifyManifest = (manifest) => { 9 | delete manifest["$schema"]; 10 | }; 11 | 12 | try { 13 | const manifestV3 = JSON.parse( 14 | readFileSync(path.resolve(__dirname, "../dist/manifest.json"), "utf8") 15 | ); 16 | 17 | // Mutate the manifest object 18 | modifyManifest(manifestV3); 19 | 20 | writeFileSync( 21 | path.resolve(__dirname, "../dist/manifest.json"), 22 | JSON.stringify(manifestV3, null, 2) 23 | ); 24 | 25 | console.log("Manifest converted v3 -> v2"); 26 | } catch (err) { 27 | console.error("Could not build manifest", err); 28 | } 29 | -------------------------------------------------------------------------------- /scripts/resize-images.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Input file 4 | input_file="$1" 5 | 6 | if [[ ! -e $input_file ]]; then 7 | echo "File does not exist" 8 | exit 1 9 | fi 10 | 11 | if [[ ${input_file: -4} != ".png" ]]; then 12 | echo "File is not a PNG" 13 | exit 1 14 | fi 15 | 16 | # Output directory 17 | output_dir="src/assets" 18 | 19 | # Create the output directory if it doesn't exist 20 | mkdir -p $output_dir 21 | 22 | # Icon sizes 23 | sizes=(16 48 128) 24 | 25 | # Generate the icons 26 | for size in "${sizes[@]}"; do 27 | base_name=$(basename "$input_file" .png) 28 | echo "Generating ${size}x${size} icon..." 29 | convert "$input_file" -resize "${size}x${size}" "$output_dir/${base_name}_${size}.png" 30 | done -------------------------------------------------------------------------------- /src/assets/icon-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-1.png -------------------------------------------------------------------------------- /src/assets/icon-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-2.png -------------------------------------------------------------------------------- /src/assets/icon-cropped-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-cropped-1.png -------------------------------------------------------------------------------- /src/assets/icon-cropped-1_128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-cropped-1_128.png -------------------------------------------------------------------------------- /src/assets/icon-cropped-1_16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-cropped-1_16.png -------------------------------------------------------------------------------- /src/assets/icon-cropped-1_48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-cropped-1_48.png -------------------------------------------------------------------------------- /src/assets/icon-cropped-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon-cropped-2.png -------------------------------------------------------------------------------- /src/assets/icon_128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon_128.png -------------------------------------------------------------------------------- /src/assets/icon_16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon_16.png -------------------------------------------------------------------------------- /src/assets/icon_48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/icon_48.png -------------------------------------------------------------------------------- /src/assets/star-empty-38.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/star-empty-38.png -------------------------------------------------------------------------------- /src/assets/star-filled-38.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/src/assets/star-filled-38.png -------------------------------------------------------------------------------- /src/background.ts: -------------------------------------------------------------------------------- 1 | // import browser, { omnibox, Runtime } from "webextension-polyfill"; 2 | 3 | import { PgLiteBackend } from "./background/backend-pglite"; 4 | import { log } from "./common/logs"; 5 | import { debounce } from "./common/utils"; 6 | import { BackendAdapter } from "./background/backend-adapter"; 7 | 8 | // Although there were initially multiple adapters there is no mainly one. 9 | const adapter = new BackendAdapter({ 10 | backend: new PgLiteBackend(), 11 | runtime: chrome.runtime, 12 | }); 13 | 14 | /** 15 | * Expose for debugging 16 | * @example await fttf.backend._db.execO(`select * from sqlite_master;`) 17 | */ 18 | globalThis.fttf = adapter; 19 | 20 | export type FTTF = { 21 | adapter: BackendAdapter; 22 | }; 23 | 24 | if (adapter.onMessage) { 25 | chrome.runtime.onMessage.addListener((...args) => adapter.onMessage(...args)); 26 | } 27 | 28 | // @note We do not support spas currently. URL changes trigger here, but we do 29 | // not then instruct the frontend to send the full text. 30 | const updateHandler = debounce( 31 | async (tabId: number, changeInfo: chrome.tabs.TabChangeInfo, tab: chrome.tabs.Tab) => { 32 | console.debug("%ctab update", "color:gray;", "no action performed", tab.url); 33 | // browser.tabs.sendMessage(tabId, ["onTabUpdated", { tabId, changeInfo }]); 34 | }, 35 | 200 36 | ); 37 | 38 | // Listen for tab updates, because the content script normally only runs on load. This is for SPA apps 39 | chrome.tabs.onUpdated.addListener((...args) => updateHandler(...args)); 40 | 41 | // When the extension button is clicked, log a message 42 | chrome.action.onClicked.addListener(async () => { 43 | await adapter.openIndexPage(); 44 | }); 45 | -------------------------------------------------------------------------------- /src/background/backend-adapter.ts: -------------------------------------------------------------------------------- 1 | import type { SendResponse } from "./backend"; 2 | import { VLCN } from "./backend-vlcn"; 3 | import { PgLiteBackend } from "./backend-pglite"; 4 | import { log } from "../common/logs"; 5 | 6 | export type BackendAdapterRuntime = { 7 | sendMessage: typeof chrome.runtime.sendMessage; 8 | getURL: typeof chrome.runtime.getURL; 9 | }; 10 | 11 | export class BackendAdapter { 12 | backend: PgLiteBackend; 13 | runtime: BackendAdapterRuntime; 14 | _vlcn: VLCN | null = null; 15 | 16 | constructor({ backend, runtime }: { backend: PgLiteBackend; runtime: BackendAdapterRuntime }) { 17 | this.backend = backend; 18 | this.runtime = runtime; 19 | } 20 | 21 | onMessage(message: any, sender: chrome.runtime.MessageSender, sendResponse: SendResponse) { 22 | // Special case for migrating from VLCN to PgLite 23 | if (message[0] === "importVLCNDocuments" || message[0] === "importVLCNDocumentsV1") { 24 | this.importVLCNDocumentsV1() 25 | .then((result) => { 26 | sendResponse({ ok: true, ...result }); 27 | }) 28 | .catch((err) => { 29 | sendResponse({ error: err.message }); 30 | }); 31 | return true; 32 | } 33 | 34 | // Add handler for checking VLCN migration status 35 | if (message[0] === "checkVLCNMigrationStatus") { 36 | this.checkVLCNMigrationStatus() 37 | .then((result) => { 38 | sendResponse(result); 39 | }) 40 | .catch((err) => { 41 | sendResponse({ error: err.message }); 42 | }); 43 | return true; 44 | } 45 | 46 | let waitForResponse = false; 47 | try { 48 | const { tab } = sender; 49 | const [method, payload] = message as [string, any]; 50 | 51 | if (sender.url !== tab?.url) { 52 | console.log(`%cinfo`, "color:yellow;", "sender URL and tab URL differ. probably iframe"); 53 | } 54 | 55 | // @ts-ignore This could be handled better. unimportant for now 56 | if (typeof this.backend[method] === "function") { 57 | waitForResponse = true; 58 | // @ts-ignore 59 | this.backend[method](payload, sender) 60 | .then((ret) => { 61 | sendResponse(ret); 62 | }) 63 | .catch((err) => { 64 | console.error(`backend :: err :: ${method} ::`, payload); 65 | console.error(err); 66 | sendResponse({ error: err.message, stack: err.stack }); 67 | }); 68 | } else { 69 | console.warn(`%c${method}`, "color:yellow;", "is not a valid method", payload); 70 | sendResponse({ error: `'${method}' is not a valid RPC` }); 71 | } 72 | } catch (err) { 73 | console.error("Could not parse message", message, sender, err); 74 | sendResponse({ error: err.message }); 75 | } 76 | 77 | return waitForResponse; // Keep channel open for async response. Yikes 78 | } 79 | 80 | async checkVLCNMigrationStatus() { 81 | try { 82 | const isComplete = await this.isMigrationComplete(); 83 | 84 | if (isComplete) { 85 | return { available: true, migrated: true }; 86 | } 87 | 88 | if (!this._vlcn) { 89 | this._vlcn = new VLCN(); 90 | try { 91 | await this._vlcn.readyPromise; 92 | } catch (err) { 93 | console.error("Failed to initialize VLCN", err); 94 | return { available: false, error: err.message }; 95 | } 96 | } 97 | 98 | const status = await this._vlcn.getStatus(); 99 | if (!status.ok) { 100 | return { available: false, error: status.error }; 101 | } 102 | 103 | // Check if there are documents to migrate 104 | const count = await this._vlcn.sql<{ 105 | count: number; 106 | }>`select count(*) as count from "document";`; 107 | 108 | const documentCount = count[0].count; 109 | 110 | // Flag the migration as somplete so that we don't continue to initialize 111 | // VLCN ever time. Ultimately we will remove VLCN completely. 112 | if (documentCount === 0) { 113 | await this.setMigrationComplete(); 114 | } 115 | 116 | return { 117 | available: true, 118 | migrated: false, 119 | documentCount, 120 | }; 121 | } catch (err) { 122 | console.error("Error checking VLCN migration status", err); 123 | return { available: false, error: err.message }; 124 | } 125 | } 126 | 127 | // Created for debugging workflow 128 | async openIndexPage() { 129 | const [existingTab] = await chrome.tabs.query({ 130 | url: this.runtime.getURL("index.html"), 131 | }); 132 | 133 | if (existingTab) { 134 | await chrome.tabs.update(existingTab.id!, { active: true }); 135 | } else { 136 | await chrome.tabs.create({ url: chrome.runtime.getURL("index.html") }); 137 | } 138 | } 139 | 140 | async setMigrationComplete() { 141 | // First create the table if it doesn't exist 142 | await this.backend.db!.exec( 143 | `CREATE TABLE IF NOT EXISTS migration_info (key TEXT PRIMARY KEY, value TEXT);` 144 | ); 145 | 146 | // Then insert the migration flag 147 | await this.backend.db!.exec( 148 | `INSERT INTO migration_info (key, value) VALUES ('migrated_to_pglite', '1') ON CONFLICT(key) DO UPDATE SET value = '1';` 149 | ); 150 | } 151 | 152 | async isMigrationComplete() { 153 | try { 154 | const result = await this.backend.db!.query<{ value: string }>( 155 | `SELECT value FROM migration_info WHERE key = 'migrated_to_pglite';` 156 | ); 157 | return result.rows[0]?.value === "1"; 158 | } catch (error) { 159 | // If we haven't run the migration yet don't consider this an error 160 | if (error instanceof Error && error.message.includes("does not exist")) { 161 | return false; 162 | } 163 | 164 | throw error; 165 | } 166 | } 167 | 168 | async importVLCNDocumentsV1() { 169 | try { 170 | // Send initial status update 171 | this.runtime.sendMessage({ 172 | type: "vlcnMigrationStatus", 173 | status: "starting", 174 | message: "Initializing VLCN database...", 175 | }); 176 | 177 | if (!this._vlcn) { 178 | this._vlcn = new VLCN(); 179 | await this._vlcn.readyPromise; 180 | } 181 | 182 | // Check document count 183 | const count = await this._vlcn.sql<{ 184 | count: number; 185 | }>`select count(*) as count from "document";`; 186 | 187 | console.log("vlcnAdapter :: count", count); 188 | 189 | if (count[0].count === 0) { 190 | this.runtime.sendMessage({ 191 | type: "vlcnMigrationStatus", 192 | status: "empty", 193 | message: "No documents found in the VLCN database.", 194 | }); 195 | return { imported: 0, message: "No documents found in VLCN database" }; 196 | } 197 | 198 | // Send update with document count 199 | this.runtime.sendMessage({ 200 | type: "vlcnMigrationStatus", 201 | status: "fetching", 202 | message: `Found ${count[0].count} documents to migrate...`, 203 | }); 204 | 205 | // Process documents in batches 206 | const BATCH_SIZE = 100; 207 | let imported = 0; 208 | let duplicates = 0; 209 | let processed = 0; 210 | const totalDocuments = count[0].count; 211 | 212 | // Send update before importing 213 | this.runtime.sendMessage({ 214 | type: "vlcnMigrationStatus", 215 | status: "importing", 216 | message: `Beginning import of ${totalDocuments} documents...`, 217 | total: totalDocuments, 218 | current: 0, 219 | }); 220 | 221 | while (processed < totalDocuments) { 222 | // Fetch batch of documents 223 | const batchQuery = `SELECT 224 | id, 225 | title, 226 | url, 227 | excerpt, 228 | mdContent, 229 | mdContentHash, 230 | publicationDate, 231 | hostname, 232 | lastVisit, 233 | lastVisitDate, 234 | extractor, 235 | createdAt, 236 | updatedAt 237 | FROM "document" 238 | LIMIT ${BATCH_SIZE} OFFSET ${processed};`; 239 | 240 | const batch = await this._vlcn?.db.execA(batchQuery); 241 | 242 | if (batch.length === 0) { 243 | break; // No more documents to process 244 | } 245 | 246 | if (processed === 0) { 247 | // Log sample of first batch only 248 | console.log( 249 | "vlcnAdapter :: docs sample", 250 | batch.slice(0, 3).map((d) => ({ id: d[0], title: d[1], url: d[2] })) 251 | ); 252 | } 253 | 254 | // Import current batch 255 | const batchResult = await this.backend.importDocumentsJSONv1({ document: batch }); 256 | 257 | imported += batchResult.imported; 258 | duplicates += batchResult.duplicates; 259 | processed += batch.length; 260 | 261 | // Update progress 262 | this.runtime.sendMessage({ 263 | type: "vlcnMigrationStatus", 264 | status: "importing", 265 | message: `Imported ${processed} of ${totalDocuments} documents...`, 266 | total: totalDocuments, 267 | current: processed, 268 | }); 269 | } 270 | 271 | const result = { imported, duplicates }; 272 | 273 | // Send completion status 274 | this.runtime.sendMessage({ 275 | type: "vlcnMigrationStatus", 276 | status: "complete", 277 | message: `Migration complete. Imported ${result.imported} documents (${result.duplicates} were duplicates).`, 278 | result, 279 | }); 280 | 281 | // Mark VLCN database as migrated to prevent duplicate migrations 282 | try { 283 | await this.setMigrationComplete(); 284 | 285 | console.log("Marked VLCN database as migrated successfully"); 286 | } catch (err) { 287 | console.error("Error marking VLCN database as migrated", err); 288 | } 289 | 290 | return result; 291 | } catch (error) { 292 | console.error("VLCN migration failed", error); 293 | 294 | // Send error status 295 | this.runtime.sendMessage({ 296 | type: "vlcnMigrationStatus", 297 | status: "error", 298 | message: `Migration failed: ${error.message}`, 299 | error: error.message, 300 | }); 301 | 302 | return { error: error.message }; 303 | } 304 | } 305 | } 306 | -------------------------------------------------------------------------------- /src/background/backend-debug.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * This backend is used for debugging purposes. It does not index anything. 3 | */ 4 | 5 | import { formatDebuggablePayload } from "../common/utils"; 6 | import { Backend, DetailRow } from "./backend"; 7 | 8 | export class DebugBackend implements Backend { 9 | getStatus: Backend["getStatus"] = async () => { 10 | return { 11 | ok: true, 12 | }; 13 | }; 14 | 15 | search: Backend["search"] = async (search) => { 16 | console.debug(`backend#%c${"search"}`, "color:lime;", search); 17 | return { 18 | ok: true, 19 | results: [], 20 | count: 0, 21 | perfMs: 0, 22 | query: search.query, 23 | }; 24 | }; 25 | 26 | async findOne(query: { where: { url: string } }): Promise { 27 | console.debug(`backend#%c${"findOne"}`, "color:lime;", query); 28 | return null; 29 | } 30 | 31 | getPageStatus: Backend["getPageStatus"] = async (payload, sender) => { 32 | const { tab } = sender; 33 | let shouldIndex = tab?.url?.startsWith("http"); // ignore chrome extensions, about:blank, etc 34 | 35 | try { 36 | const url = new URL(tab?.url || ""); 37 | if (url.hostname === "localhost") shouldIndex = false; 38 | if (url.hostname.endsWith(".local")) shouldIndex = false; 39 | } catch (err) { 40 | // should not happen 41 | throw err; 42 | } 43 | 44 | console.debug(`%c${"getPageStatus"}`, "color:lime;", { shouldIndex, url: tab?.url }, payload); 45 | 46 | return { 47 | shouldIndex, 48 | }; 49 | }; 50 | 51 | indexPage: Backend["indexPage"] = async (payload, sender) => { 52 | const { tab } = sender; 53 | 54 | // remove adjacent whitespace since it serves no purpose. The html or 55 | // markdown content stores formatting. 56 | const plainText = payload.text_content?.replace(/[ \t]+/g, " ").replace(/\n+/g, "\n"); 57 | 58 | console.debug(`%c${"indexPage"}`, "color:lime;", tab?.url); 59 | console.debug(formatDebuggablePayload({ ...payload, textContent: plainText })); 60 | return { 61 | message: "debug backend does not index pages", 62 | }; 63 | }; 64 | 65 | nothingToIndex: Backend["nothingToIndex"] = async (payload, sender) => { 66 | const { tab } = sender; 67 | console.debug(`%c${"nothingToIndex"}`, "color:beige;", tab?.url); 68 | return { 69 | ok: true, 70 | }; 71 | }; 72 | 73 | getRecent: Backend["getRecent"] = async (options) => { 74 | console.debug(`backend#%c${"getRecent"}`, "color:lime;", options); 75 | return { 76 | ok: true, 77 | results: [], 78 | count: 0, 79 | perfMs: 0, 80 | }; 81 | }; 82 | } 83 | -------------------------------------------------------------------------------- /src/background/backend.ts: -------------------------------------------------------------------------------- 1 | import type { Runtime } from "webextension-polyfill"; 2 | import type { Readability } from "@mozilla/readability"; 3 | 4 | export type SendResponse = (response?: any) => void; 5 | 6 | export type RemoteProcWithSender = ( 7 | payload: T, 8 | sender: Runtime.MessageSender 9 | ) => Promise; 10 | export type RemoteProc = (payload: T) => Promise; 11 | 12 | type ReadabilityArticle = Omit>, "content">; 13 | 14 | export type Article = ReadabilityArticle & { 15 | extractor: string; 16 | /** Optional for now b/c i'm not sending it over the wire if turndown is used in the content script */ 17 | html_content?: string; 18 | /** Optional because the parsing can fail */ 19 | md_content?: string; 20 | text_content?: string; 21 | date?: string; 22 | _extraction_time: number; 23 | }; 24 | 25 | export type ArticleRow = Omit & { 26 | id: number; 27 | md_content_hash?: string; 28 | md_content?: string; 29 | url: string; 30 | hostname: string; 31 | search_words?: string[]; 32 | last_visit?: number; // Timestamp 33 | last_visit_date?: string; 34 | updated_at: number; 35 | created_at: number; // Timestamp 36 | publication_date?: number; 37 | }; 38 | 39 | /** @deprecated don't use urls directly for now. use documents which have URLs */ 40 | export type UrlRow = { 41 | url: string; 42 | url_hash: string; 43 | title?: string; 44 | last_visit?: number; // Timestamp 45 | hostname: string; 46 | text_content_hash?: string; 47 | search_words?: string[]; 48 | }; 49 | 50 | export type ResultRow = { 51 | rowid: number; 52 | id: number; 53 | entity_id: number; 54 | attribute: string; 55 | snippet?: string; 56 | url: string; 57 | hostname: string; 58 | title?: string; 59 | excerpt?: string; 60 | last_visit?: number; // Timestamp 61 | last_visit_date?: string; 62 | md_content_hash?: string; 63 | updated_at: number; 64 | created_at: number; // Timestamp 65 | }; 66 | 67 | export type DetailRow = ResultRow & { 68 | md_content?: string; 69 | }; 70 | 71 | type FirstArg = T extends (arg: infer U, ...args: any[]) => any ? U : never; 72 | 73 | export type RpcMessage = 74 | | [method: "getPageStatus"] 75 | | [method: "indexPage", payload: FirstArg] 76 | | [method: "nothingToIndex"] 77 | | [method: "getStats"] 78 | | [method: "getStatus"] 79 | | [method: "exportJson"] 80 | | [method: "importJson"] 81 | | [method: "reindex"] 82 | | [method: "search", payload: FirstArg] 83 | | [method: string, payload?: any]; 84 | 85 | export type DBDump = Record; 86 | 87 | export interface Backend { 88 | getStatus(): Promise<{ ok: true } | { ok: false; error: string; detail?: any }>; 89 | getPageStatus: (_: any, sender: { tab: { url: string } }) => Promise; 90 | indexPage: (payload: Article, sender: { tab: { url: string } }) => Promise; 91 | nothingToIndex: RemoteProcWithSender; 92 | search: RemoteProc< 93 | { 94 | query: string; 95 | limit?: number; 96 | offset?: number; 97 | orderBy: "updated_at" | "rank" | "last_visit"; 98 | preprocessQuery?: boolean; 99 | }, 100 | { 101 | ok: boolean; 102 | results: ResultRow[]; 103 | count?: number; 104 | perfMs: number; 105 | query: string; 106 | } 107 | >; 108 | getRecent(options: { limit?: number; offset?: number }): Promise<{ 109 | ok: boolean; 110 | results: ResultRow[]; 111 | count?: number; 112 | perfMs: number; 113 | }>; 114 | findOne(query: { where: { url: string } }): Promise; 115 | exportJson?(): Promise; 116 | importDocumentsJSONv1?(payload: { 117 | document: any[][]; 118 | }): Promise<{ imported: number; duplicates: number }>; 119 | } 120 | -------------------------------------------------------------------------------- /src/background/embedding/pipeline.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * For use in background.js - Handles requests from the UI, runs the model, then 3 | * sends back a response 4 | */ 5 | 6 | import { pipeline, env, type FeatureExtractionPipeline } from "@xenova/transformers"; 7 | 8 | export type TransformersProgress = 9 | | { 10 | status: "done" | "initiate" | "download"; 11 | name: string; 12 | file: string; 13 | } 14 | | { 15 | status: "progress"; 16 | name: string; 17 | file: string; 18 | progress: number; 19 | loaded: number; 20 | total: number; 21 | } 22 | | { 23 | status: "ready"; 24 | task: string; 25 | model: string; 26 | }; 27 | 28 | // Skip initial check for local models, since we are not loading any local models. 29 | env.allowLocalModels = false; 30 | 31 | // Due to a bug in onnxruntime-web, we must disable multithreading for now. 32 | // See https://github.com/microsoft/onnxruntime/issues/14445 for more information. 33 | env.backends.onnx.wasm.numThreads = 1; 34 | 35 | class PipelineSingleton { 36 | static task = "feature-extraction" as const; 37 | static model = "Xenova/all-MiniLM-L6-v2"; 38 | static instance: FeatureExtractionPipeline | null = null; 39 | 40 | static async getInstance(progress_callback?: (x: TransformersProgress) => void) { 41 | if (this.instance === null) { 42 | console.time("loading pipeline"); 43 | this.instance = await pipeline(this.task, this.model, { progress_callback }); 44 | console.timeEnd("loading pipeline"); 45 | } 46 | 47 | return this.instance; 48 | } 49 | } 50 | 51 | export const createTensor = async (text: string) => { 52 | // Get the pipeline instance. This will load and build the model when run for the first time. 53 | let model = await PipelineSingleton.getInstance((data) => { 54 | console.log("progress ::", data); 55 | }); 56 | 57 | // Actually run the model on the input text 58 | let tensor = await model(text, { pooling: "mean", normalize: true }); 59 | 60 | return tensor; 61 | }; 62 | 63 | // Create generic classify function, which will be reused for the different types of events. 64 | export const createEmbedding = async (text: string) => { 65 | const tensor = await createTensor(text); 66 | return tensor.tolist()?.[0] as number[]; 67 | }; 68 | -------------------------------------------------------------------------------- /src/background/pglite/HAX_pglite.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * HAX: Load PGlite in a service worker 3 | * 4 | * This is a temporary solution to allow PGlite to work in a service worker. 5 | * Hopefully in future versions this will not be necessary. The core issue here 6 | * is that PGlite, perhaps via some internal emscripted logic, is using the 7 | * _synchronous_ XMLHttpRequest API to load assets. This poses two issues: 8 | * 9 | * - chrome does not support XMLHttpRequest AT ALL in service workers 10 | * - we cannot create a full polyfill for XMLHttpRequest because we cannot mimic the synchronous behavior 11 | * 12 | * Thus this script simply loads the relevant bytes into memory and hands them 13 | * back if requested via the correct URL. 14 | * 15 | * @todo Not sure if vite grabs the relevant asset and puts in the the build, 16 | * might need to create a plugin for that. works for the `dev` comamnd but might 17 | * not wokr for `build`. 18 | */ 19 | 20 | const assetCache = new Map(); 21 | 22 | async function preloadAssets() { 23 | // NOTE: The wasm file exists in the pglite package but does not seem to be used. preloading the data file was enough 24 | const assetUrls = [ 25 | chrome.runtime.getURL("/assets/postgres-CkP7QCDB.data"), // 0.2.17 26 | ]; 27 | 28 | for (const url of assetUrls) { 29 | try { 30 | const response = await fetch(url); 31 | if (!response.ok) { 32 | console.log(`failed to fetch asset :: ${url}`); 33 | continue; 34 | } 35 | const arrayBuffer = await response.arrayBuffer(); 36 | assetCache.set(url, arrayBuffer); 37 | } catch (error) { 38 | console.error(`failed to preload asset :: ${url}`, error); 39 | } 40 | } 41 | } 42 | 43 | // As with XMLHttpRequest, this is not supported in the service worker context. 44 | class ProgressEventPolyfill { 45 | type: string; 46 | constructor(type: string) { 47 | this.type = type; 48 | } 49 | } 50 | 51 | // A partial polyfill for XMLHttpRequest to support the loading of pglite in a 52 | // service worker 53 | class XMLHttpRequestPolyfill { 54 | private url: string = ""; 55 | public onload: ((this: XMLHttpRequest, ev: ProgressEvent) => any) | null = null; 56 | public onerror: ((this: XMLHttpRequest, ev: ProgressEvent) => any) | null = null; 57 | public status: number = 0; 58 | public responseText: string = ""; 59 | public response: any = null; 60 | 61 | open(method: string, url: string) { 62 | console.log("open ::", { method, url }); 63 | this.url = url; 64 | } 65 | 66 | send(body: any = null) { 67 | console.log("send ::", { body }); 68 | if (assetCache.has(this.url)) { 69 | this.response = assetCache.get(this.url); 70 | this.status = 200; 71 | if (this.onload) { 72 | // @ts-expect-error 73 | this.onload.call(this, new ProgressEventPolyfill("load") as any); 74 | } 75 | } else { 76 | console.error(`asset not preloaded :: ${this.url}`); 77 | this.status = 404; 78 | if (this.onerror) { 79 | // @ts-expect-error 80 | this.onerror.call(this, new ProgressEventPolyfill("error") as any); 81 | } 82 | } 83 | } 84 | } 85 | 86 | (globalThis as any).XMLHttpRequest = XMLHttpRequestPolyfill; 87 | (globalThis as any).ProgressEvent = ProgressEventPolyfill; 88 | 89 | // Preload assets BEFORE importing PGlite 90 | // 91 | // NOTE: This will require vite-plugin-top-level-await. Chrome will not allow 92 | // top level await in service workers even if supported by the browser in other 93 | // context. 94 | await preloadAssets(); 95 | 96 | import { PGlite } from "@electric-sql/pglite"; 97 | 98 | export { PGlite }; 99 | -------------------------------------------------------------------------------- /src/background/pglite/defaultBlacklistRules.ts: -------------------------------------------------------------------------------- 1 | export const defaultBlacklistRules: Array<[string, "url_only" | "no_index"]> = [ 2 | ["https://news.ycombinator.com", "url_only"], 3 | ["https://news.ycombinator.com/news", "url_only"], 4 | ["https://news.ycombinator.com/new", "url_only"], 5 | ["https://news.ycombinator.com/best", "url_only"], 6 | ["http://localhost%", "no_index"], 7 | ["https://localhost%", "no_index"], 8 | ["https://www.bankofamerica.com%", "url_only"], 9 | ["https://www.chase.com%", "url_only"], 10 | ["https://www.wellsfargo.com%", "url_only"], 11 | ["https://www.citibank.com%", "url_only"], 12 | ["https://www.capitalone.com%", "url_only"], 13 | ["https://app.mercury.com%", "url_only"], 14 | ["https://www.schwab.com%", "url_only"], 15 | ["https://www.fidelity.com%", "url_only"], 16 | ["https://www.vanguard.com%", "url_only"], 17 | ["https://www.etrade.com%", "url_only"], 18 | ["https://www.tdameritrade.com%", "url_only"], 19 | ["https://www.robinhood.com%", "url_only"], 20 | ["https://www.paypal.com%", "url_only"], 21 | ["https://www.venmo.com%", "url_only"], 22 | ["https://www.facebook.com", "url_only"], 23 | ["https://www.amazon.com%", "url_only"], 24 | ["https://www.ebay.com%", "url_only"], 25 | ["https://www.dropbox.com", "url_only"], 26 | ["https://drive.google.com%", "url_only"], 27 | ["https://www.coinbase.com%", "url_only"], 28 | ["https://www.webmd.com", "url_only"], 29 | ["https://%.local", "no_index"], 30 | ["https://%.internal", "no_index"], 31 | ["https://twitter.com", "url_only"], 32 | ["https://twitter.com/home", "url_only"], 33 | ["https://x.com", "url_only"], 34 | ["https://x.com/home", "url_only"], 35 | ["https://www.linkedin.com", "url_only"], 36 | ["https://www.tiktok.com", "url_only"], 37 | ["https://mail.google.com", "no_index"], 38 | ["https://outlook.live.com%", "no_index"], 39 | ["https://docs.google.com%", "url_only"], 40 | ["https://www.office.com%", "url_only"], 41 | ["https://slack.com", "url_only"], 42 | ["https://zoom.us%", "url_only"], 43 | ["https://www.ask.com/web?q=%", "url_only"], 44 | ["https://www.baidu.com/s?%", "url_only"], 45 | ["https://www.reddit.com/search%", "url_only"], 46 | ["https://www.bing.com/search%", "url_only"], 47 | ["https://search.yahoo.com/search%", "url_only"], 48 | ["https://www.duckduckgo.com/?q=%", "url_only"], 49 | ["https://yandex.com/search/?%", "url_only"], 50 | ["https://%dashlane.com%", "no_index"], 51 | ["https://%bitwarden.com%", "no_index"], 52 | ["https://%lastpass.com%", "no_index"], 53 | ["https://%1password.com%", "no_index"], 54 | ["https://kagi.com/search%", "url_only"], 55 | ["https://www.google.com/search%", "url_only"], 56 | ]; 57 | -------------------------------------------------------------------------------- /src/background/pglite/job_queue.test.ts: -------------------------------------------------------------------------------- 1 | // @ts-nocheck 2 | import { describe, it, expect, beforeEach, afterEach, mock } from "bun:test"; 3 | import { JobQueue, JOB_QUEUE_SCHEMA } from "./job_queue"; 4 | import { PGlite } from "@electric-sql/pglite"; 5 | import * as defaultTasks from "./tasks"; 6 | 7 | describe("JobQueue", () => { 8 | let db: PGlite; 9 | let jobQueue: JobQueue; 10 | let mockTasks: typeof defaultTasks; 11 | 12 | beforeEach(async () => { 13 | // Create an in-memory PGLite instance 14 | db = new PGlite("memory://"); 15 | await db.query(JOB_QUEUE_SCHEMA); 16 | 17 | // Create mock tasks 18 | mockTasks = { 19 | ...defaultTasks, 20 | generate_fragments: { 21 | handler: mock(() => Promise.resolve()), 22 | params: { parse: (p: any) => p }, 23 | }, 24 | }; 25 | 26 | jobQueue = new JobQueue(db, mockTasks, 100); 27 | await jobQueue.initialize(); 28 | }); 29 | 30 | afterEach(async () => { 31 | // Clean up the database 32 | await db.query("DROP TABLE IF EXISTS task"); 33 | await db.close(); 34 | }); 35 | 36 | it("should initialize the job queue", async () => { 37 | const result = await db.query<{ count: number }>("SELECT COUNT(*) as count FROM task"); 38 | expect(result.rows[0].count).toBe(0); 39 | }); 40 | 41 | it("should enqueue a task", async () => { 42 | const taskType = "generate_fragments"; 43 | const params = { articleId: 1 }; 44 | 45 | const taskId = await jobQueue.enqueue(taskType, params); 46 | expect(taskId).toBeGreaterThan(0); 47 | 48 | const result = await db.query<{ count: number }>("SELECT COUNT(*) as count FROM task"); 49 | expect(result.rows[0].count).toBe(1); 50 | }); 51 | 52 | it("should not enqueue duplicate tasks", async () => { 53 | const taskType = "generate_fragments"; 54 | const params = { articleId: 1 }; 55 | 56 | const taskId1 = await jobQueue.enqueue(taskType, params); 57 | const taskId2 = await jobQueue.enqueue(taskType, params); 58 | 59 | expect(taskId1).toBeGreaterThan(0); 60 | expect(taskId2).toBeUndefined(); 61 | 62 | const result = await db.query<{ count: number }>("SELECT COUNT(*) as count FROM task"); 63 | expect(result.rows[0].count).toBe(1); 64 | }); 65 | 66 | it("should process pending tasks", async () => { 67 | const taskType = "generate_fragments"; 68 | const params = { articleId: 1 }; 69 | 70 | await jobQueue.enqueue(taskType, params); 71 | 72 | await jobQueue.processPendingTasks(); 73 | 74 | // Wait for a short time to allow the task to be processed 75 | await new Promise((resolve) => setTimeout(resolve, 100)); 76 | 77 | const result = await db.query<{ count: number }>("SELECT COUNT(*) as count FROM task"); 78 | expect(result.rows[0].count).toBe(0); 79 | expect(mockTasks[taskType].handler).toHaveBeenCalledTimes(1); 80 | }); 81 | 82 | it("should mark failed tasks", async () => { 83 | const taskType = "generate_fragments"; 84 | const params = { articleId: 1 }; 85 | 86 | // Mock the task handler to throw an error 87 | mockTasks[taskType] = { 88 | handler: mock(() => Promise.reject(new Error("Test error"))), 89 | params: { parse: (p: any) => p }, 90 | }; 91 | 92 | await jobQueue.enqueue(taskType, params); 93 | 94 | await jobQueue.processPendingTasks(); 95 | 96 | // Wait for a short time to allow the task to be processed 97 | await new Promise((resolve) => setTimeout(resolve, 100)); 98 | 99 | const result = await db.query<{ count: number; failed_count: number }>( 100 | "SELECT COUNT(*) as count, COUNT(failed_at) as failed_count FROM task" 101 | ); 102 | expect(result.rows[0].count).toBe(1); 103 | expect(result.rows[0].failed_count).toBe(1); 104 | }); 105 | 106 | it("should stop processing tasks when requested", async () => { 107 | const taskType = "generate_fragments"; 108 | const params = { articleId: 1 }; 109 | 110 | // Mock the task handler 111 | mockTasks[taskType] = { 112 | handler: mock(() => new Promise((resolve) => setTimeout(resolve, 500))), 113 | params: { parse: (p: any) => p }, 114 | }; 115 | 116 | await jobQueue.enqueue(taskType, params); 117 | await jobQueue.enqueue(taskType, { articleId: 2 }); 118 | 119 | const processPromise = jobQueue.processPendingTasks(); 120 | 121 | // Stop the queue after a short delay 122 | setTimeout(() => jobQueue.stop(), 100); 123 | 124 | await processPromise; 125 | 126 | const result = await db.query<{ count: number }>("SELECT COUNT(*) as count FROM task"); 127 | expect(result.rows[0].count).toBe(1); // One task should remain unprocessed 128 | }); 129 | }); 130 | -------------------------------------------------------------------------------- /src/background/pglite/job_queue.ts: -------------------------------------------------------------------------------- 1 | import type { PGlite, Transaction } from "@electric-sql/pglite"; 2 | import type { TaskDefinition } from "./tasks"; 3 | import * as defaultTasks from "./tasks"; 4 | 5 | const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); 6 | 7 | type DBWriter = Pick; 8 | 9 | export const JOB_QUEUE_SCHEMA = ` 10 | CREATE TABLE IF NOT EXISTS task ( 11 | id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY, 12 | task_type TEXT NOT NULL, 13 | params JSONB DEFAULT '{}'::jsonb NOT NULL, 14 | created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL, 15 | failed_at TIMESTAMP WITH TIME ZONE, 16 | error TEXT, 17 | CONSTRAINT task_task_type_params_unique UNIQUE(task_type, params) 18 | ); 19 | `; 20 | 21 | export class JobQueue { 22 | private isProcessing: boolean = false; 23 | private shouldStop: boolean = false; 24 | 25 | constructor( 26 | private db: PGlite, 27 | private tasks: typeof defaultTasks = defaultTasks, 28 | private taskInterval: number = 1000 29 | ) {} 30 | 31 | async initialize() { 32 | await this.db.query(JOB_QUEUE_SCHEMA); 33 | } 34 | 35 | async enqueue( 36 | taskType: keyof typeof this.tasks, 37 | params: object = {}, 38 | tx: DBWriter = this.db 39 | ): Promise { 40 | const task = this.tasks[taskType]; 41 | 42 | if (!task) { 43 | throw new Error(`Task type ${taskType} not implemented`); 44 | } 45 | 46 | // Make sure params are valid before adding to queue 47 | task.params?.parse(params); 48 | 49 | const result = await tx.query<{ id: number }>( 50 | ` 51 | INSERT INTO task (task_type, params) 52 | VALUES ($1, $2::jsonb) 53 | ON CONFLICT (task_type, params) DO NOTHING 54 | RETURNING id 55 | `, 56 | [taskType, params] 57 | ); 58 | 59 | const taskId = result.rows[0]?.id; 60 | 61 | return taskId; 62 | } 63 | 64 | /** 65 | * Process a single task from the queue 66 | * 67 | * NOTE: a few things about this queue strategy: 68 | * - priority queue based on logic in the ORDER BY clause. add cases as needed 69 | * - random order if no priority is set 70 | */ 71 | private async processQueue() { 72 | let processedId: number | null = null; 73 | 74 | try { 75 | await this.db.transaction(async (tx) => { 76 | const result = await tx.query<{ 77 | id: number; 78 | task_type: string; 79 | params: Record; 80 | }>(` 81 | SELECT id, task_type, params::jsonb 82 | FROM task 83 | WHERE failed_at IS NULL 84 | ORDER BY 85 | CASE 86 | WHEN task_type = 'generate_fragments' THEN 0 87 | ELSE random() 88 | END, 89 | created_at 90 | LIMIT 1 91 | FOR UPDATE SKIP LOCKED 92 | `); 93 | 94 | if (!result.rows.length) { 95 | console.log("task :: empty queue"); 96 | return; 97 | } 98 | 99 | const { id, task_type, params } = result.rows[0]; 100 | 101 | processedId = id; 102 | 103 | if (!(task_type in this.tasks)) { 104 | console.warn(`task :: ${task_type} :: not implemented`); 105 | await this.markTaskAsFailed(tx, id, "Task type not implemented"); 106 | return; 107 | } 108 | 109 | const task = this.tasks[task_type as keyof typeof this.tasks] as TaskDefinition; 110 | const start = performance.now(); 111 | try { 112 | await task.handler(tx, task.params?.parse(params)); 113 | await tx.query("DELETE FROM task WHERE id = $1", [id]); 114 | } catch (error) { 115 | console.error(`task :: error`, error.message); 116 | throw error; 117 | } finally { 118 | console.log( 119 | `task :: ${performance.now() - start}ms :: ${task_type} :: ${JSON.stringify(params)}` 120 | ); 121 | } 122 | }); 123 | } catch (error) { 124 | console.error(`task :: processQueue :: error`, error); 125 | 126 | // NOTE this cannot be done within the transaction. using the tx after a 127 | // failure will result in an error saying the transaction is aborted. 128 | if (processedId) { 129 | await this.markTaskAsFailed(this.db, processedId, error.message); 130 | } 131 | } 132 | } 133 | 134 | private async markTaskAsFailed(tx: DBWriter, id: number, errorMessage: string) { 135 | await tx.query( 136 | ` 137 | UPDATE task 138 | SET failed_at = CURRENT_TIMESTAMP, error = $1 139 | WHERE id = $2 140 | `, 141 | [errorMessage, id] 142 | ); 143 | } 144 | 145 | async processPendingTasks() { 146 | if (this.isProcessing) { 147 | return; 148 | } 149 | 150 | this.isProcessing = true; 151 | this.shouldStop = false; 152 | 153 | const getPendingCount = async () => { 154 | const pendingTasks = await this.db.query<{ count: number }>(` 155 | SELECT COUNT(*) as count FROM task 156 | WHERE failed_at IS NULL 157 | `); 158 | return pendingTasks.rows[0].count; 159 | }; 160 | 161 | try { 162 | while ((await getPendingCount()) > 0 && !this.shouldStop) { 163 | await this.processQueue(); 164 | await sleep(this.taskInterval); 165 | } 166 | } finally { 167 | this.isProcessing = false; 168 | } 169 | } 170 | 171 | stop() { 172 | this.shouldStop = true; 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/background/pglite/migration-manager.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect, beforeEach, mock } from "bun:test"; 2 | import { PGlite } from "@electric-sql/pglite"; // Use the direct import for testing 3 | import { MigrationManager, Migration } from "./migration-manager"; 4 | 5 | describe("MigrationManager", () => { 6 | let db: PGlite; 7 | let migrationManager: MigrationManager; 8 | 9 | beforeEach(async () => { 10 | if (db) { 11 | await db.close(); 12 | } 13 | 14 | // Create a new in-memory database for each test 15 | db = new PGlite(); 16 | migrationManager = new MigrationManager(db); 17 | }); 18 | 19 | it("should initialize with no migrations", async () => { 20 | const status = await migrationManager.applyMigrations(); 21 | 22 | expect(status.ok).toBe(true); 23 | expect(status.currentVersion).toBe(0); 24 | expect(status.availableVersion).toBe(0); 25 | expect(status.pendingCount).toBe(0); 26 | }); 27 | 28 | it("should register migrations correctly", () => { 29 | const migration1: Migration = { 30 | version: 1, 31 | name: "test_migration_1", 32 | description: "Test migration 1", 33 | sql: "CREATE TABLE test1 (id SERIAL PRIMARY KEY);", 34 | }; 35 | 36 | const migration2: Migration = { 37 | version: 2, 38 | name: "test_migration_2", 39 | description: "Test migration 2", 40 | sql: "CREATE TABLE test2 (id SERIAL PRIMARY KEY);", 41 | }; 42 | 43 | migrationManager.registerMigration(migration1); 44 | migrationManager.registerMigration(migration2); 45 | 46 | // We're testing internal state here, so we need to cast to access private properties 47 | const migrations = (migrationManager as any).migrations; 48 | expect(migrations.length).toBe(2); 49 | expect(migrations[0].version).toBe(1); 50 | expect(migrations[1].version).toBe(2); 51 | }); 52 | 53 | it("should apply migrations in order", async () => { 54 | const migration1: Migration = { 55 | version: 1, 56 | name: "test_migration_1", 57 | description: "Test migration 1", 58 | sql: "CREATE TABLE test1 (id SERIAL PRIMARY KEY);", 59 | }; 60 | 61 | const migration2: Migration = { 62 | version: 2, 63 | name: "test_migration_2", 64 | description: "Test migration 2", 65 | sql: "CREATE TABLE test2 (id SERIAL PRIMARY KEY);", 66 | }; 67 | 68 | migrationManager.registerMigration(migration1); 69 | migrationManager.registerMigration(migration2); 70 | 71 | const status = await migrationManager.applyMigrations(); 72 | 73 | expect(status.ok).toBe(true); 74 | expect(status.currentVersion).toBe(2); 75 | expect(status.availableVersion).toBe(2); 76 | expect(status.pendingCount).toBe(0); 77 | 78 | // Verify tables were created 79 | const result1 = await db.query( 80 | "SELECT table_name FROM information_schema.tables WHERE table_name = 'test1'" 81 | ); 82 | const result2 = await db.query( 83 | "SELECT table_name FROM information_schema.tables WHERE table_name = 'test2'" 84 | ); 85 | 86 | expect(result1.rows.length).toBe(1); 87 | expect(result2.rows.length).toBe(1); 88 | }); 89 | 90 | it("should only apply pending migrations", async () => { 91 | // First migration 92 | const migration1: Migration = { 93 | version: 1, 94 | name: "test_migration_1", 95 | description: "Test migration 1", 96 | sql: "CREATE TABLE test1 (id SERIAL PRIMARY KEY);", 97 | }; 98 | 99 | migrationManager.registerMigration(migration1); 100 | await migrationManager.applyMigrations(); 101 | 102 | // Second migration 103 | const migration2: Migration = { 104 | version: 2, 105 | name: "test_migration_2", 106 | description: "Test migration 2", 107 | sql: "CREATE TABLE test2 (id SERIAL PRIMARY KEY);", 108 | }; 109 | 110 | migrationManager.registerMigration(migration2); 111 | const status = await migrationManager.applyMigrations(); 112 | 113 | expect(status.ok).toBe(true); 114 | expect(status.currentVersion).toBe(2); 115 | 116 | // Verify the migrations table has 2 records 117 | const migrationsResult = await db.query<{ version: number }>( 118 | "SELECT * FROM migrations ORDER BY version" 119 | ); 120 | expect(migrationsResult.rows.length).toBe(2); 121 | expect(migrationsResult.rows[0].version).toBe(1); 122 | expect(migrationsResult.rows[1].version).toBe(2); 123 | }); 124 | 125 | it("should handle errors in migrations", async () => { 126 | const migration1: Migration = { 127 | version: 1, 128 | name: "test_migration_1", 129 | description: "Test migration 1", 130 | sql: "CREATE TABLE test1 (id SERIAL PRIMARY KEY);", 131 | }; 132 | 133 | // This migration has invalid SQL 134 | const migration2: Migration = { 135 | version: 2, 136 | name: "invalid_migration", 137 | description: "Invalid SQL migration", 138 | sql: "CREATE TABLE WITH INVALID SYNTAX!!!", 139 | }; 140 | 141 | migrationManager.registerMigration(migration1); 142 | migrationManager.registerMigration(migration2); 143 | 144 | const status = await migrationManager.applyMigrations(); 145 | 146 | expect(status.ok).toBe(false); 147 | expect(status.currentVersion).toBe(1); // Only the first migration should be applied 148 | 149 | // Verify only the first table exists 150 | const result1 = await db.query( 151 | "SELECT table_name FROM information_schema.tables WHERE table_name = 'test1'" 152 | ); 153 | const result2 = await db.query( 154 | "SELECT table_name FROM information_schema.tables WHERE table_name = 'test2'" 155 | ); 156 | 157 | expect(result1.rows.length).toBe(1); 158 | expect(result2.rows.length).toBe(0); 159 | }); 160 | 161 | it("should handle migrations with out-of-order versions", async () => { 162 | const migration2: Migration = { 163 | version: 2, 164 | name: "test_migration_2", 165 | description: "Test migration 2", 166 | sql: "CREATE TABLE test2 (id SERIAL PRIMARY KEY);", 167 | }; 168 | 169 | const migration1: Migration = { 170 | version: 1, 171 | name: "test_migration_1", 172 | description: "Test migration 1", 173 | sql: "CREATE TABLE test1 (id SERIAL PRIMARY KEY);", 174 | }; 175 | 176 | // Register in reverse order 177 | migrationManager.registerMigration(migration2); 178 | migrationManager.registerMigration(migration1); 179 | 180 | const status = await migrationManager.applyMigrations(); 181 | 182 | expect(status.ok).toBe(true); 183 | expect(status.currentVersion).toBe(2); 184 | 185 | // Verify migrations were applied in correct order 186 | const migrationsResult = await db.query<{ version: number }>( 187 | "SELECT * FROM migrations ORDER BY version" 188 | ); 189 | expect(migrationsResult.rows.length).toBe(2); 190 | expect(migrationsResult.rows[0].version).toBe(1); 191 | expect(migrationsResult.rows[1].version).toBe(2); 192 | }); 193 | }); 194 | -------------------------------------------------------------------------------- /src/background/pglite/migration-manager.ts: -------------------------------------------------------------------------------- 1 | import { PGlite } from "./HAX_pglite"; 2 | import { Transaction } from "@electric-sql/pglite"; 3 | 4 | /** 5 | * Simple migration interface for defining database schema changes 6 | * with forward-only migrations 7 | */ 8 | export interface Migration { 9 | version: number; 10 | name: string; 11 | description: string; 12 | sql: string; // SQL to execute for this migration 13 | } 14 | 15 | /** 16 | * Migration status 17 | */ 18 | export interface MigrationStatus { 19 | ok: boolean; 20 | currentVersion: number; 21 | availableVersion: number; 22 | pendingCount: number; 23 | } 24 | 25 | /** 26 | * A simple, forward-only migration manager for PGlite 27 | */ 28 | export class MigrationManager { 29 | private db: PGlite; 30 | private migrations: Migration[] = []; 31 | private currentVersion = 0; 32 | private highestVersion = 0; 33 | 34 | constructor(db: PGlite) { 35 | this.db = db; 36 | } 37 | 38 | /** 39 | * Register a migration with the manager 40 | */ 41 | registerMigration(migration: Migration): void { 42 | this.migrations.push(migration); 43 | 44 | // Update highest available version 45 | this.highestVersion = Math.max(this.highestVersion, migration.version); 46 | 47 | // Sort migrations by version 48 | this.migrations.sort((a, b) => a.version - b.version); 49 | } 50 | 51 | /** 52 | * Check if a table exists 53 | */ 54 | private async checkTableExists(tableName: string): Promise { 55 | try { 56 | const result = await this.db.query<{ exists: boolean }>( 57 | "SELECT EXISTS (SELECT FROM pg_tables WHERE tablename = $1) as exists", 58 | [tableName] 59 | ); 60 | return result.rows[0]?.exists || false; 61 | } catch (error) { 62 | // If this fails, assume table doesn't exist 63 | console.warn(`Error checking if table ${tableName} exists:`, error); 64 | return false; 65 | } 66 | } 67 | 68 | /** 69 | * Get current migration version from the database 70 | */ 71 | private async getCurrentVersion(): Promise { 72 | try { 73 | const migrationsTableExists = await this.checkTableExists('migrations'); 74 | 75 | if (!migrationsTableExists) { 76 | return 0; // No migrations applied yet 77 | } 78 | 79 | const result = await this.db.query<{ max_version: number }>( 80 | "SELECT MAX(version) as max_version FROM migrations" 81 | ); 82 | 83 | return result.rows[0]?.max_version || 0; 84 | } catch (error) { 85 | console.error("Error getting current migration version:", error); 86 | return 0; 87 | } 88 | } 89 | 90 | /** 91 | * Apply a single migration 92 | */ 93 | private async applyMigration(migration: Migration): Promise { 94 | try { 95 | console.debug(`Applying migration ${migration.name} (v${migration.version})...`); 96 | 97 | const startTime = performance.now(); 98 | 99 | await this.db.transaction(async (tx) => { 100 | // Execute migration SQL 101 | await tx.exec(migration.sql); 102 | 103 | // Record migration in the migrations table 104 | await tx.query( 105 | "INSERT INTO migrations (version, name, description, applied_at) VALUES ($1, $2, $3, $4)", 106 | [migration.version, migration.name, migration.description, Date.now()] 107 | ); 108 | }); 109 | 110 | const duration = Math.round(performance.now() - startTime); 111 | console.debug(`Migration ${migration.name} (v${migration.version}) applied successfully in ${duration}ms`); 112 | 113 | return true; 114 | } catch (error) { 115 | console.error(`Error applying migration ${migration.name} (v${migration.version}):`, error); 116 | return false; 117 | } 118 | } 119 | 120 | /** 121 | * Apply all pending migrations 122 | */ 123 | async applyMigrations(): Promise { 124 | try { 125 | // Ensure migrations table exists 126 | const migrationsTableExists = await this.checkTableExists('migrations'); 127 | 128 | if (!migrationsTableExists) { 129 | // Create migrations table if it doesn't exist 130 | await this.db.exec(` 131 | CREATE TABLE IF NOT EXISTS migrations ( 132 | id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY, 133 | version INTEGER UNIQUE NOT NULL, 134 | name TEXT NOT NULL, 135 | description TEXT, 136 | applied_at BIGINT NOT NULL 137 | ); 138 | `); 139 | } 140 | 141 | // Check if the migrations table has the required columns 142 | try { 143 | await this.db.query("SELECT name FROM migrations LIMIT 0"); 144 | } catch (error) { 145 | console.warn("Migrations table exists but may be missing columns. Attempting to upgrade schema..."); 146 | // Add missing columns if they don't exist 147 | try { 148 | await this.db.exec("ALTER TABLE migrations ADD COLUMN IF NOT EXISTS name TEXT NOT NULL DEFAULT 'legacy_migration'"); 149 | await this.db.exec("ALTER TABLE migrations ADD COLUMN IF NOT EXISTS description TEXT"); 150 | console.debug("Successfully upgraded migrations table schema"); 151 | } catch (alterError) { 152 | console.error("Failed to alter migrations table:", alterError); 153 | throw alterError; 154 | } 155 | } 156 | 157 | // Get current version 158 | this.currentVersion = await this.getCurrentVersion(); 159 | console.debug(`Current migration version: ${this.currentVersion}`); 160 | 161 | // Find pending migrations 162 | const pendingMigrations = this.migrations.filter(m => m.version > this.currentVersion); 163 | console.debug(`Found ${pendingMigrations.length} pending migrations`); 164 | 165 | if (pendingMigrations.length === 0) { 166 | return { 167 | ok: true, 168 | currentVersion: this.currentVersion, 169 | availableVersion: this.highestVersion, 170 | pendingCount: 0 171 | }; 172 | } 173 | 174 | // Apply migrations in order 175 | for (const migration of pendingMigrations) { 176 | const success = await this.applyMigration(migration); 177 | 178 | if (!success) { 179 | return { 180 | ok: false, 181 | currentVersion: this.currentVersion, 182 | availableVersion: this.highestVersion, 183 | pendingCount: pendingMigrations.length 184 | }; 185 | } 186 | 187 | this.currentVersion = migration.version; 188 | } 189 | 190 | return { 191 | ok: true, 192 | currentVersion: this.currentVersion, 193 | availableVersion: this.highestVersion, 194 | pendingCount: 0 195 | }; 196 | } catch (error) { 197 | console.error("Error applying migrations:", error); 198 | 199 | return { 200 | ok: false, 201 | currentVersion: this.currentVersion, 202 | availableVersion: this.highestVersion, 203 | pendingCount: this.migrations.filter(m => m.version > this.currentVersion).length 204 | }; 205 | } 206 | } 207 | } -------------------------------------------------------------------------------- /src/background/pglite/migrations/001_init.ts: -------------------------------------------------------------------------------- 1 | import { Migration } from '../migration-manager'; 2 | 3 | export const migration: Migration = { 4 | version: 1, 5 | name: 'initial_schema', 6 | description: 'Initial schema creation with base tables for documents and search', 7 | sql: ` 8 | -- make sure pgvector is enabled 9 | CREATE EXTENSION IF NOT EXISTS vector; 10 | CREATE EXTENSION IF NOT EXISTS pg_trgm; 11 | 12 | CREATE TABLE IF NOT EXISTS document ( 13 | id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY, 14 | title TEXT, 15 | url TEXT UNIQUE NOT NULL, 16 | excerpt TEXT, 17 | md_content TEXT, 18 | md_content_hash TEXT, 19 | publication_date BIGINT, 20 | hostname TEXT, 21 | last_visit BIGINT, 22 | last_visit_date TEXT, 23 | extractor TEXT, 24 | created_at BIGINT NOT NULL DEFAULT EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000, 25 | updated_at BIGINT 26 | ); 27 | 28 | CREATE INDEX IF NOT EXISTS document_hostname ON document (hostname); 29 | 30 | CREATE TABLE IF NOT EXISTS document_fragment ( 31 | id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY, 32 | entity_id BIGINT NOT NULL REFERENCES document (id) ON DELETE CASCADE, 33 | attribute TEXT, 34 | value TEXT, 35 | fragment_order INTEGER, 36 | created_at BIGINT NOT NULL DEFAULT EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000, 37 | search_vector tsvector, 38 | content_vector vector(384) 39 | ); 40 | 41 | CREATE OR REPLACE FUNCTION update_document_fragment_fts() RETURNS TRIGGER AS $$ 42 | BEGIN 43 | NEW.search_vector := to_tsvector('simple', NEW.value); 44 | RETURN NEW; 45 | END; 46 | $$ LANGUAGE plpgsql; 47 | 48 | -- Trigger to update search vector 49 | DROP TRIGGER IF EXISTS update_document_fragment_fts_trigger ON document_fragment; 50 | CREATE TRIGGER update_document_fragment_fts_trigger 51 | BEFORE INSERT OR UPDATE ON document_fragment 52 | FOR EACH ROW EXECUTE FUNCTION update_document_fragment_fts(); 53 | 54 | -- Index for full-text search 55 | CREATE INDEX IF NOT EXISTS idx_document_fragment_search_vector ON document_fragment USING GIN(search_vector); 56 | 57 | -- Index for trigram similarity search, i.e. postgres trigram 58 | -- NOTE: Disabled for now. Takes up a significant amount of space and not yet proven useful for this project 59 | --CREATE INDEX IF NOT EXISTS trgm_idx_document_fragment_value ON document_fragment USING GIN(value gin_trgm_ops); 60 | 61 | CREATE TABLE IF NOT EXISTS blacklist_rule ( 62 | id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY, 63 | pattern TEXT UNIQUE NOT NULL, 64 | level TEXT NOT NULL CHECK (level IN ('no_index', 'url_only')), 65 | created_at BIGINT NOT NULL DEFAULT EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000 66 | ); 67 | 68 | CREATE INDEX IF NOT EXISTS idx_blacklist_rule_pattern ON blacklist_rule (pattern); 69 | 70 | CREATE TABLE IF NOT EXISTS migrations ( 71 | id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY, 72 | version INTEGER UNIQUE NOT NULL, 73 | name TEXT NOT NULL, 74 | description TEXT, 75 | applied_at BIGINT NOT NULL 76 | ); 77 | ` 78 | }; 79 | 80 | // For backward compatibility with existing code 81 | export const sql = migration.sql; -------------------------------------------------------------------------------- /src/background/pglite/tasks.ts: -------------------------------------------------------------------------------- 1 | import type { Transaction } from "@electric-sql/pglite"; 2 | import { z } from "zod"; 3 | import { createEmbedding } from "../embedding/pipeline"; 4 | import { getArticleFragments, segment } from "../../common/utils"; 5 | 6 | /** 7 | * A helper for type inference. 8 | */ 9 | function createTask({ 10 | params = z.object({}), 11 | handler, 12 | }: { 13 | params?: T; 14 | handler: ( 15 | tx: Transaction, 16 | params: T extends z.AnyZodObject ? z.infer : undefined 17 | ) => Promise; 18 | }) { 19 | return { params, handler } as const; 20 | } 21 | 22 | export type TaskDefinition = ReturnType< 23 | typeof createTask 24 | >; 25 | 26 | export const generate_vector = createTask({ 27 | params: z.object({ 28 | fragment_id: z.number(), 29 | }), 30 | handler: async (tx, params) => { 31 | const result = await tx.query<{ value: string }>( 32 | "SELECT value FROM document_fragment WHERE id = $1", 33 | [params.fragment_id] 34 | ); 35 | const embedding = await createEmbedding(result.rows[0].value); 36 | await tx.query("UPDATE document_fragment SET content_vector = $1 WHERE id = $2", [ 37 | JSON.stringify(embedding), 38 | params.fragment_id, 39 | ]); 40 | }, 41 | }); 42 | 43 | export const generate_fragments = createTask({ 44 | params: z.object({ 45 | document_id: z.number(), 46 | }), 47 | handler: async (tx, params) => { 48 | const document = await tx.query<{ 49 | id: number; 50 | title: string; 51 | url: string; 52 | excerpt: string; 53 | md_content: string; 54 | }>("SELECT * FROM document WHERE id = $1", [params.document_id]); 55 | const row = document.rows[0]; 56 | 57 | if (!row) { 58 | throw new Error("Document not found"); 59 | } 60 | 61 | const fragments = getArticleFragments(row.md_content || ""); 62 | 63 | const sql = ` 64 | INSERT INTO document_fragment ( 65 | entity_id, 66 | attribute, 67 | value, 68 | fragment_order 69 | ) VALUES ($1, $2, $3, $4) 70 | ON CONFLICT DO NOTHING; 71 | `; 72 | 73 | let triples: [e: number, a: string, v: string, o: number][] = []; 74 | if (row.title) triples.push([params.document_id, "title", segment(row.title), 0]); 75 | if (row.excerpt) triples.push([params.document_id, "excerpt", segment(row.excerpt), 0]); 76 | if (row.url) triples.push([params.document_id, "url", row.url, 0]); 77 | triples = triples.concat( 78 | fragments 79 | .filter((x) => x.trim()) 80 | .map((fragment, i) => { 81 | return [params.document_id, "content", fragment, i]; 82 | }) 83 | ); 84 | 85 | const logLimit = 5; 86 | console.debug( 87 | `generate_fragments :: triples :: ${triples.length} (${triples.length - logLimit} omitted)`, 88 | triples.slice(0, logLimit) 89 | ); 90 | 91 | for (const param of triples) { 92 | await tx.query(sql, param); 93 | } 94 | }, 95 | }); 96 | 97 | export const ping = createTask({ 98 | handler: async () => { 99 | console.log("Pong!"); 100 | }, 101 | }); 102 | 103 | export const failing_task = createTask({ 104 | handler: async () => { 105 | throw new Error("This task always fails"); 106 | }, 107 | }); 108 | -------------------------------------------------------------------------------- /src/common/logs.ts: -------------------------------------------------------------------------------- 1 | export function log(...args: string[]) { 2 | console.log(...args); 3 | } 4 | -------------------------------------------------------------------------------- /src/common/utils.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from "bun:test"; 2 | 3 | import { getArticleFragments, segment, sanitizeHtmlAllowMark } from "./utils"; 4 | 5 | describe("getArticleFragments", () => { 6 | it("should handle longform, multi-paragraph text", () => { 7 | const longText = `# Introduction 8 | 9 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. 10 | 11 | ## Section 1 12 | 13 | Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. 14 | 15 | ### Subsection 1.1 16 | 17 | Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo.`; 18 | 19 | const fragments = getArticleFragments(longText); 20 | expect(fragments.length).toBeGreaterThan(1); 21 | expect(fragments[0]).toBe( 22 | "# Introduction Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat." 23 | ); 24 | expect(fragments[1]).toBe( 25 | "## Section 1 Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." 26 | ); 27 | expect(fragments[2]).toBe( 28 | "### Subsection 1.1 Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo." 29 | ); 30 | }); 31 | 32 | it("should handle short text below minFragmentLength", () => { 33 | const shortText = "This is a short text."; 34 | const fragments = getArticleFragments(shortText); 35 | expect(fragments).toHaveLength(1); 36 | expect(fragments[0]).toBe(shortText); 37 | }); 38 | 39 | it("should handle empty input", () => { 40 | const fragments = getArticleFragments(""); 41 | expect(fragments).toHaveLength(0); 42 | }); 43 | 44 | it("should handle input with only headings", () => { 45 | const headingsOnly = `# Heading 1 46 | ## Heading 2 47 | ### Heading 3`; 48 | const fragments = getArticleFragments(headingsOnly); 49 | expect(fragments).toHaveLength(3); 50 | expect(fragments[0]).toBe("# Heading 1"); 51 | expect(fragments[1]).toBe("## Heading 2"); 52 | expect(fragments[2]).toBe("### Heading 3"); 53 | }); 54 | 55 | it("should handle input with very long paragraphs", () => { 56 | const longParagraph = "Lorem ipsum ".repeat(100); 57 | const fragments = getArticleFragments(longParagraph); 58 | expect(fragments.length).toBe(1); 59 | expect(fragments[0].length).toBeGreaterThan(100); 60 | }); 61 | 62 | it("should respect custom minFragmentLength", () => { 63 | const text = `Short para 1. 64 | 65 | Slightly longer paragraph 2. 66 | 67 | Even longer paragraph 3 with more content.`; 68 | 69 | const fragments = getArticleFragments(text); 70 | expect(fragments[0]).toBe( 71 | "Short para 1. Slightly longer paragraph 2. Even longer paragraph 3 with more content." 72 | ); 73 | }); 74 | }); 75 | 76 | describe("getArticleFragments with plain text", () => { 77 | it("should handle a single long paragraph", () => { 78 | const text = 79 | "This is a long paragraph that should be treated as a single fragment. It contains multiple sentences and goes on for a while to ensure it exceeds the minimum fragment length of 100 characters. The content is not particularly meaningful, but it serves the purpose of this test case."; 80 | const fragments = getArticleFragments(text); 81 | expect(fragments).toHaveLength(1); 82 | expect(fragments[0]).toBe(text); 83 | }); 84 | 85 | it("should split long text into multiple fragments", () => { 86 | const text = 87 | "First paragraph that is long enough to be its own fragment. It contains multiple sentences to exceed the minimum length of 100 characters.\n\nSecond paragraph that is also long enough to be a separate fragment. It also has multiple sentences and exceeds 100 characters.\n\nThird paragraph, again long enough to be distinct and over 100 characters in length."; 88 | const fragments = getArticleFragments(text); 89 | expect(fragments).toHaveLength(3); 90 | expect(fragments[0]).toContain("First paragraph"); 91 | expect(fragments[1]).toContain("Second paragraph"); 92 | expect(fragments[2]).toContain("Third paragraph"); 93 | }); 94 | 95 | it("should combine short paragraphs", () => { 96 | const text = 97 | "Short para 1.\n\nAnother short one.\n\nYet another.\n\nStill short.\n\nNeed more text to reach 100 characters. This should do it, creating a single fragment."; 98 | const fragments = getArticleFragments(text); 99 | expect(fragments).toHaveLength(1); 100 | expect(fragments[0]).toContain("Short para 1."); 101 | expect(fragments[0]).toContain("Need more text to reach 100 characters."); 102 | }); 103 | 104 | it("should handle text with varying paragraph lengths", () => { 105 | const text = 106 | "Short intro.\n\nThis is a much longer paragraph that should be its own fragment because it exceeds the minimum length of 100 characters. It contains multiple sentences to ensure it's long enough.\n\nAnother short paragraph.\n\nYet another long paragraph that should be separate. It also contains multiple sentences and exceeds the minimum length of 100 characters to be its own fragment."; 107 | const fragments = getArticleFragments(text); 108 | expect(fragments).toHaveLength(2); 109 | expect(fragments[0]).toContain("This is a much longer paragraph"); 110 | expect(fragments[1]).toContain("Yet another long paragraph"); 111 | }); 112 | 113 | it("should handle text with line breaks but no paragraphs", () => { 114 | const text = 115 | "This is a text\nwith line breaks\nbut no paragraph\nbreaks. It should\nbe treated as one\nfragment. We need to add more text to ensure it exceeds 100 characters and becomes a valid fragment."; 116 | const fragments = getArticleFragments(text); 117 | expect(fragments).toHaveLength(1); 118 | expect(fragments[0]).toBe( 119 | "This is a text with line breaks but no paragraph breaks. It should be treated as one fragment. We need to add more text to ensure it exceeds 100 characters and becomes a valid fragment." 120 | ); 121 | }); 122 | }); 123 | 124 | describe("segment", () => { 125 | it("should not affect normal English text", () => { 126 | const text = "This is a normal English sentence."; 127 | expect(segment(text)).toBe(text); 128 | }); 129 | 130 | it("should handle empty string", () => { 131 | expect(segment("")).toBe(""); 132 | }); 133 | 134 | it("should handle text with numbers and punctuation", () => { 135 | const text = "Hello, world! This is test #123."; 136 | expect(segment(text)).toBe(text); 137 | }); 138 | 139 | it("should segment text with non-Latin characters", () => { 140 | const text = "こんにちは世界"; 141 | const segmented = segment(text); 142 | expect(segmented).toBe("こんにちは 世界"); 143 | }); 144 | 145 | it("should handle mixed Latin and non-Latin text", () => { 146 | const text = "Hello こんにちは world 世界"; 147 | const segmented = segment(text); 148 | expect(segmented).toBe("Hello こんにちは world 世界"); 149 | }); 150 | 151 | it("should handle mixed Latin and Mandarin Chinese text", () => { 152 | const text = "Hello 你好世界我是一个人工智能助手 world 这是一个测试"; 153 | const segmented = segment(text); 154 | expect(segmented).toBe("Hello 你好 世界 我是 一个 人工 智能 助手 world 这 是 一个 测试"); 155 | }); 156 | 157 | it("should handle chinese with punctuation", () => { 158 | const text = 159 | "你好,世界!这是一个测试句子,用于检查中文文本的分段功能。我们希望确保即使在有标点符号的情况下,文本也能正确分段。"; 160 | const segmented = segment(text); 161 | expect(segmented).toBe( 162 | "你好 , 世界 ! 这 是 一个 测试 句子 , 用于 检查 中文 文本 的 分段 功能 。 我们 希望 确保 即使 在 有 标点 符号 的 情况 下 , 文本 也能 正确 分段 。" 163 | ); 164 | }); 165 | }); 166 | 167 | describe("sanitizeHtmlAllowMark", () => { 168 | it("should preserve mark tags while removing all other HTML tags", () => { 169 | const html = '
Text with highlighted and bold and italic parts
'; 170 | const sanitized = sanitizeHtmlAllowMark(html); 171 | expect(sanitized).toBe('Text with highlighted and bold and italic parts'); 172 | }); 173 | 174 | it("should strip attributes from mark tags", () => { 175 | const html = 'Text with attributes'; 176 | const sanitized = sanitizeHtmlAllowMark(html); 177 | expect(sanitized).toBe('Text with attributes'); 178 | }); 179 | 180 | it("should handle empty input", () => { 181 | expect(sanitizeHtmlAllowMark("")).toBe(""); 182 | expect(sanitizeHtmlAllowMark(null as any)).toBe(""); 183 | expect(sanitizeHtmlAllowMark(undefined as any)).toBe(""); 184 | }); 185 | 186 | it("should remove script tags and their content", () => { 187 | const html = 'Text with scripts'; 188 | const sanitized = sanitizeHtmlAllowMark(html); 189 | expect(sanitized).toBe('Text with scripts'); 190 | }); 191 | 192 | it("should remove style tags and their content", () => { 193 | const html = 'Text with styles'; 194 | const sanitized = sanitizeHtmlAllowMark(html); 195 | expect(sanitized).toBe('Text with styles'); 196 | }); 197 | 198 | it("should handle complex nested HTML while preserving mark tags", () => { 199 | const html = ` 200 |
201 |

Title

202 |

Paragraph with highlighted text and dangerous content

203 | 204 |
    205 |
  • Item 1 with highlight
  • 206 |
  • Item 2
  • 207 |
208 |
209 | `; 210 | const sanitized = sanitizeHtmlAllowMark(html); 211 | expect(sanitized).toContain('highlighted'); 212 | expect(sanitized).toContain('highlight'); 213 | expect(sanitized).not.toContain(''); 215 | expect(sanitized).not.toContain('

'); 216 | expect(sanitized).not.toContain(' 5 | 6 | -------------------------------------------------------------------------------- /src/ui/DetailsPanel.svelte: -------------------------------------------------------------------------------- 1 | 23 | 24 |

25 | {#if row} 26 |
27 | 35 | 43 |
44 |

45 | {row?.title} 46 | {row.url} 49 |

50 | 51 | {:else if err} 52 |
53 | {err.message} 54 |
55 | {:else} 56 | 57 | {/if} 58 |
59 | -------------------------------------------------------------------------------- /src/ui/ExportProgress.svelte: -------------------------------------------------------------------------------- 1 | 17 | 18 | {#if visible} 19 |
20 |
21 |

Exporting Database

22 | 23 |
24 |
25 |
29 |
30 |
31 | {$progress} of {total} documents 32 | {$percentage}% 33 |
34 |
35 | 36 |

37 | Please don't close this window while export is in progress. 38 |

39 |
40 |
41 | {/if} -------------------------------------------------------------------------------- /src/ui/LayoutWrapper.svelte: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /src/ui/Menu.svelte: -------------------------------------------------------------------------------- 1 | 124 | 125 |
135 |
136 | (currentIndex = 0)} 140 | data-menu-input 141 | class="appearance-none w-full outline-none focus:ring-0 text-white text-lg bg-[#1d1d1d] rounded-t-lg px-3 py-3 border-none border-b border-zinc-600" 142 | /> 143 |
144 |
145 | {#each filteredCommands as command, i (command.name)} 146 |
{ 148 | currentIndex = i; 149 | }} 150 | on:click={async () => { 151 | const shouldClose = await command.exec(); 152 | if (shouldClose) onClose(); 153 | }} 154 | class={classNames("command p-2 rounded", { "bg-white/10": currentIndex === i })} 155 | > 156 | {command.name} 157 |
158 | {:else} 159 |
No commands
160 | {/each} 161 |
162 |
163 | -------------------------------------------------------------------------------- /src/ui/MigrationModal.svelte: -------------------------------------------------------------------------------- 1 | 149 | 150 | 156 |
157 | {#if migrationComplete} 158 |
159 | 166 | 172 | 173 |

Migration Complete!

174 |

{vlcnImportMessage}

175 |

176 | This dialog will close automatically in a few seconds... 177 |

178 |
179 | {:else} 180 |

181 | We detected data from a previous version of Full Text Tabs Forever. Would you like to 182 | migrate your search index to the new version? 183 |

184 |

185 | The import process may take several minutes depending on the size of your database. 186 |

187 |
188 | 201 |
202 | {#if vlcnImportMessage && !migrationComplete} 203 |
204 |

210 | {vlcnImportMessage} 211 |

212 | 213 | {#if isImporting && totalDocuments > 0} 214 |
215 |
216 |
220 |
221 |

222 | {migrationProgress} of {totalDocuments} documents 223 |

224 |
225 | {/if} 226 |
227 | {/if} 228 | {/if} 229 |
230 |
231 | -------------------------------------------------------------------------------- /src/ui/Modal.svelte: -------------------------------------------------------------------------------- 1 | 30 | 31 | {#if open} 32 |
38 |
39 |
40 |

{title}

41 | {#if showClose} 42 | 51 | {/if} 52 |
53 |
54 | 55 |
56 |
57 |
58 | {/if} -------------------------------------------------------------------------------- /src/ui/RecentItems.svelte: -------------------------------------------------------------------------------- 1 | 62 | 63 |
64 | {#if loading} 65 |
Loading recent pages...
66 | {:else if error} 67 |
There was an error loading recent pages.
68 | {:else if recentItems.length === 0} 69 |
No recent pages found.
70 | {:else} 71 |
72 | {#each Object.entries(groupedItems) as [date, items], i (date)} 73 |
74 |
{date}
75 | {#each items as item, j} 76 | { 83 | currentGroupIndex = i; 84 | currentItemIndex = j; 85 | }} 86 | on:mouseover={() => { 87 | if (enableMouseEvents) { 88 | currentGroupIndex = i; 89 | currentItemIndex = j; 90 | } 91 | }} 92 | /> 93 | {/each} 94 |
95 | {/each} 96 |
97 | {/if} 98 |
99 | -------------------------------------------------------------------------------- /src/ui/ResultItem.svelte: -------------------------------------------------------------------------------- 1 | 32 | 33 | 34 |
48 | 53 | {#if showTime && item.last_visit} 54 |
55 | {getRelativeTime(item.last_visit)} 56 |
57 | {/if} 58 |
59 | favicon for {urlObj.hostname} 64 |
65 | {#if item.title} 66 |
67 | {@html sanitizeHtmlAllowMark(item.title)} 68 |
69 | {/if} 70 |
71 | {cleanUrl(url)} 72 |
73 |
74 | 75 | {#if showSnippets && item.snippet && item.attribute !== "title" && item.attribute !== "url"} 76 |
77 | 78 | {@html sanitizeHtmlAllowMark(item.snippet)} 79 |
80 | {/if} 81 | 82 | 83 |
84 | 85 | 100 | -------------------------------------------------------------------------------- /src/ui/ResultRowView.svelte: -------------------------------------------------------------------------------- 1 | 9 | 10 |
15 | 16 | {@html item.snippet} 17 |
18 | -------------------------------------------------------------------------------- /src/ui/global.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | body { 6 | @apply bg-slate-900 text-slate-100; 7 | } 8 | 9 | mark { 10 | @apply bg-pink-900/50 border-dashed border-b border-pink-600 text-pink-200; 11 | } 12 | 13 | ::highlight(snippet) { 14 | @apply bg-pink-900/50 bg-pink-200 text-pink-900 underline; 15 | } 16 | -------------------------------------------------------------------------------- /src/ui/lib/commands.ts: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | import { readFileAsText, pickFile } from "./dom"; 3 | import { rpc, fttf } from "./rpc"; 4 | import { updateStats, stats } from "../store/statsStore"; 5 | import { get } from "svelte/store"; 6 | import { streamingExport } from "./streaming-export"; 7 | 8 | const dbImportSchema = z.object({ 9 | document: z.array(z.any()), 10 | }); 11 | 12 | export const handleImport = async (): Promise<{ success: boolean; message?: string }> => { 13 | try { 14 | const file = await pickFile(".json"); 15 | if (file.type !== "application/json") { 16 | return { success: false, message: "Please upload a JSON file." }; 17 | } 18 | 19 | const text = await readFileAsText(file); 20 | const content = JSON.parse(text); 21 | const result = dbImportSchema.safeParse(content); 22 | if (!result.success) { 23 | console.error("Error parsing JSON:", result); 24 | return { success: false, message: "Invalid JSON file. Please upload a valid JSON file." }; 25 | } 26 | 27 | const documents = result.data.document; 28 | await rpc(["importDocumentsJSONv1", { document: documents }]); 29 | console.log("Imported:", documents.length, "documents"); 30 | 31 | await updateStats(); 32 | return { success: true }; 33 | } catch (error) { 34 | if (error instanceof Error && error.message === "No file selected") { 35 | return { success: false }; 36 | } 37 | console.error("Error importing JSON:", error); 38 | return { success: false, message: "Error importing file. Please try again." }; 39 | } 40 | }; 41 | 42 | export const vacuumFull = async () => { 43 | await updateStats(); 44 | let x = get(stats); 45 | const before = x?.Size; 46 | await rpc(["pg.exec", { sql: "VACUUM FULL" }]); 47 | await updateStats(); 48 | x = get(stats); 49 | const after = x?.Size; 50 | return { before, after }; 51 | }; 52 | 53 | /** 54 | * Export the database to a JSON file 55 | * For large databases, this will use the streaming export API if available 56 | * Otherwise falls back to the regular export method 57 | * @param options Optional configuration including progress callback 58 | * 59 | * @todo This doesn't do much... probably remove it in favor of streamingExport directly. 60 | */ 61 | export const exportJson = async (options?: { 62 | onProgress?: (progress: { current: number; total: number }) => void; 63 | }): Promise<{ success: boolean; message?: string }> => { 64 | try { 65 | // Use streaming export if available, which will fall back to regular export if needed 66 | const result = await streamingExport({ 67 | batchSize: 200, 68 | onProgress: options?.onProgress, 69 | }); 70 | 71 | return result; 72 | } catch (error) { 73 | console.error("Error exporting JSON:", error); 74 | return { success: false, message: "Error exporting file. Please try again." }; 75 | } 76 | }; 77 | -------------------------------------------------------------------------------- /src/ui/lib/constants.ts: -------------------------------------------------------------------------------- 1 | /** the minimum char length of a query */ 2 | export const MIN_QUERY_LENGTH = 3; 3 | 4 | export const routeLabels: Record = { 5 | index: "Search", 6 | "database-repl": "SQL", 7 | settings: "Settings", 8 | }; 9 | -------------------------------------------------------------------------------- /src/ui/lib/dom.test.ts: -------------------------------------------------------------------------------- 1 | import { expect, test } from "bun:test"; 2 | import { findRanges } from "./dom"; 3 | 4 | test("findRanges", () => { 5 | const testCases = [ 6 | { input: "hello world", search: "hello", expected: [[0, 5]] }, 7 | { input: "hey there you", search: "you", expected: [[10, 13]] }, 8 | { input: "hey there you", search: "there", expected: [[4, 9]] }, 9 | { 10 | input: "recently trying sqlite in the browser using a vfs", 11 | search: "sqlite", 12 | expected: [[16, 22]], 13 | }, 14 | { 15 | input: "recently trying sqlite in the browser using a vfs", 16 | search: "browser", 17 | expected: [[30, 37]], 18 | }, 19 | { 20 | input: "recently trying sqlite in the browser using a vfs", 21 | search: "sqlite browser", 22 | expected: [ 23 | [16, 22], 24 | [30, 37], 25 | ], 26 | }, 27 | ]; 28 | 29 | for (const { input, search, expected } of testCases) { 30 | const ranges = findRanges(input, search); 31 | expect(ranges).toEqual(expected as [number, number][]); 32 | } 33 | }); 34 | -------------------------------------------------------------------------------- /src/ui/lib/dom.ts: -------------------------------------------------------------------------------- 1 | import { MIN_QUERY_LENGTH } from "./constants"; 2 | 3 | export const findRanges = (str: string, query: string) => { 4 | const ranges: [number, number][] = []; 5 | const s = str.toLowerCase(); 6 | const queries = query 7 | .toLowerCase() 8 | .split(" ") 9 | .map((x) => x.trim()) 10 | .filter((x) => x.length >= MIN_QUERY_LENGTH); 11 | 12 | for (const q of queries) { 13 | let i = 0; 14 | while (i < s.length) { 15 | const idx = s.indexOf(q, i); 16 | if (idx === -1) { 17 | break; 18 | } 19 | ranges.push([idx, idx + q.length]); 20 | i = idx + q.length; 21 | } 22 | } 23 | return ranges; 24 | }; 25 | 26 | /** 27 | * Highlight functionality is not used as of this commit. It was used when 28 | * sqlite fts wasn't available to provide highlighting via the SNIPPET function. 29 | * Since then we've moved to pg fts and can use the pg_headline function, 30 | * however it still maybe useful to have highlighting for other functionality. 31 | */ 32 | export const makeHighlights = (nodes: Node[], query: string) => { 33 | const rs: Range[] = []; 34 | for (const node of nodes) { 35 | if (node.nodeType !== Node.TEXT_NODE) { 36 | console.warn("Tried to highlight non-text node", node); 37 | continue; 38 | } 39 | 40 | const text = node.textContent || ""; 41 | const xs = findRanges(text, query); 42 | for (const [qstart, qend] of xs) { 43 | const r = new Range(); 44 | r.setStart(node, qstart); 45 | r.setEnd(node, qend); 46 | rs.push(r); 47 | } 48 | } 49 | 50 | return new Highlight(...rs); 51 | }; 52 | 53 | export const readFileAsText = (file: File): Promise => { 54 | return new Promise((resolve, reject) => { 55 | const reader = new FileReader(); 56 | reader.onload = (e) => { 57 | if (typeof e.target?.result === "string") { 58 | resolve(e.target.result); 59 | } else { 60 | reject(new Error("Failed to read file as text")); 61 | } 62 | }; 63 | reader.onerror = (e) => reject(e); 64 | reader.readAsText(file); 65 | }); 66 | }; 67 | 68 | export const pickFile = (accept?: string): Promise => { 69 | return new Promise((resolve, reject) => { 70 | const input = document.createElement("input"); 71 | input.type = "file"; 72 | if (accept) input.accept = accept; 73 | input.style.display = "none"; 74 | document.body.appendChild(input); 75 | 76 | input.onchange = (event) => { 77 | const file = (event.target as HTMLInputElement).files?.[0]; 78 | if (file) { 79 | resolve(file); 80 | } else { 81 | reject(new Error("No file selected")); 82 | } 83 | document.body.removeChild(input); 84 | }; 85 | 86 | input.click(); 87 | }); 88 | }; 89 | -------------------------------------------------------------------------------- /src/ui/lib/rpc.ts: -------------------------------------------------------------------------------- 1 | import type { RpcMessage } from "@/background/backend"; 2 | import type { FTTF } from "@/background"; 3 | 4 | export type BrowserFTTF = FTTF & { 5 | rpc: (message: RpcMessage) => Promise; 6 | }; 7 | 8 | /** 9 | * NOTE: when the backend wants to send an error the frontend will know about it 10 | * passes the error prop. We re-throw it to match async/await error expectations. 11 | */ 12 | export const rpc = async (message: RpcMessage): Promise => { 13 | const response = await chrome.runtime.sendMessage(message); 14 | if (response && response.error) { 15 | throw new Error(response.error); 16 | } 17 | return response; 18 | }; 19 | 20 | export const fttf: BrowserFTTF = { 21 | rpc, 22 | adapter: { 23 | onInstalled: async () => {}, 24 | onMessage: () => true, 25 | openIndexPage() { 26 | return chrome.runtime.sendMessage(["openIndexPage"]); 27 | }, 28 | // @ts-expect-error fixing these types is an exercise for another day... 29 | backend: { 30 | getStatus() { 31 | return chrome.runtime.sendMessage(["getStatus"]); 32 | }, 33 | search: async (query) => { 34 | return chrome.runtime.sendMessage(["search", query]); 35 | }, 36 | getPageStatus: async (url) => { 37 | return chrome.runtime.sendMessage(["getPageStatus", url]); 38 | }, 39 | indexPage: async (url) => { 40 | return chrome.runtime.sendMessage(["indexPage", url]); 41 | }, 42 | nothingToIndex: async (url) => { 43 | return chrome.runtime.sendMessage(["nothingToIndex", url]); 44 | }, 45 | findOne: async (url) => { 46 | return chrome.runtime.sendMessage(["findOne", url]); 47 | }, 48 | getRecent: async (options) => { 49 | return chrome.runtime.sendMessage(["getRecent", options]); 50 | }, 51 | }, 52 | }, 53 | }; 54 | -------------------------------------------------------------------------------- /src/ui/lib/streaming-export.ts: -------------------------------------------------------------------------------- 1 | import { rpc } from './rpc'; 2 | 3 | // Polyfill for browsers that don't support the File System Access API 4 | // This is a simplified version; for production, consider using a more robust library 5 | const getFileSystemAccessPolyfill = () => { 6 | return { 7 | showSaveFilePicker: async (options: any) => { 8 | throw new Error('File System Access API not supported in this browser'); 9 | } 10 | }; 11 | }; 12 | 13 | // Check if File System Access API is supported 14 | const isFileSystemAccessSupported = () => { 15 | return typeof window !== 'undefined' && 'showSaveFilePicker' in window; 16 | }; 17 | 18 | interface FileSystemAccessAPI { 19 | showSaveFilePicker: (options: any) => Promise; 20 | } 21 | 22 | // Get the File System Access API or a polyfill 23 | const getFileSystemAccess = (): FileSystemAccessAPI => { 24 | if (isFileSystemAccessSupported()) { 25 | // We've already checked that showSaveFilePicker exists on window 26 | return window as any as FileSystemAccessAPI; 27 | } 28 | return getFileSystemAccessPolyfill(); 29 | }; 30 | 31 | /** 32 | * Stream export documents to a file 33 | * @param options Configuration options 34 | * @returns Promise that resolves when export is complete 35 | */ 36 | export const streamingExport = async (options: { 37 | batchSize?: number, 38 | onProgress?: (progress: { current: number, total: number }) => void, 39 | }) => { 40 | const { batchSize = 100, onProgress } = options; 41 | 42 | // Get database stats to estimate workload 43 | const stats = await rpc(['getStats']) as { 44 | document: { count: number }; 45 | document_fragment: { count: number }; 46 | db: { size_bytes: number }; 47 | }; 48 | 49 | // Update progress with initial values 50 | if (onProgress) { 51 | onProgress({ current: 0, total: stats.document.count }); 52 | } 53 | 54 | // Check if File System Access API is supported 55 | if (!isFileSystemAccessSupported()) { 56 | console.log('File System Access API not supported, falling back to regular export'); 57 | // Fall back to the existing exportJson function 58 | return rpc(['exportJson']); 59 | } 60 | 61 | try { 62 | // Use File System Access API to get a file handle 63 | const fileHandle = await getFileSystemAccess().showSaveFilePicker({ 64 | suggestedName: `fttf-${Date.now()}.json`, 65 | types: [{ 66 | description: 'JSON Files', 67 | accept: { 'application/json': ['.json'] } 68 | }], 69 | excludeAcceptAllOption: false, 70 | }); 71 | 72 | // Create a writable stream 73 | const writableStream = await fileHandle.createWritable(); 74 | 75 | // Write the opening of the JSON 76 | const encoder = new TextEncoder(); 77 | await writableStream.write(encoder.encode('{"document":[')); 78 | 79 | // Get total count for progress reporting 80 | const totalDocs = stats.document.count; 81 | let processedCount = 0; 82 | let isFirstBatch = true; 83 | 84 | // Process in batches 85 | while (processedCount < totalDocs) { 86 | // Fetch a batch of documents 87 | const batch = await rpc(['getDocumentBatch', { 88 | offset: processedCount, 89 | limit: batchSize 90 | }]); 91 | 92 | if (!batch || !batch.rows || batch.rows.length === 0) { 93 | break; 94 | } 95 | 96 | // Convert batch to JSON string 97 | let batchJson = ''; 98 | 99 | for (let i = 0; i < batch.rows.length; i++) { 100 | // Add comma separator between batches, but not before the first one 101 | if (i === 0 && !isFirstBatch) { 102 | batchJson += ','; 103 | } else if (i > 0) { 104 | batchJson += ','; 105 | } 106 | 107 | batchJson += JSON.stringify(batch.rows[i]); 108 | } 109 | 110 | // Write batch to file 111 | await writableStream.write(encoder.encode(batchJson)); 112 | 113 | // Update progress 114 | processedCount += batch.rows.length; 115 | isFirstBatch = false; 116 | 117 | if (onProgress) { 118 | onProgress({ current: processedCount, total: totalDocs }); 119 | } 120 | } 121 | 122 | // Write the closing of the JSON 123 | await writableStream.write(encoder.encode(']}')); 124 | 125 | // Close the stream 126 | await writableStream.close(); 127 | 128 | return { success: true }; 129 | } catch (error) { 130 | console.error('Error during streaming export:', error); 131 | 132 | // If there was an error with the File System API, fall back to the regular export 133 | if (error.name === 'NotSupportedError' || error.message.includes('not supported')) { 134 | console.log('Falling back to regular export method'); 135 | return rpc(['exportJson']); 136 | } 137 | 138 | return { 139 | success: false, 140 | message: error instanceof Error ? error.message : 'Unknown error during export' 141 | }; 142 | } 143 | }; -------------------------------------------------------------------------------- /src/ui/main.ts: -------------------------------------------------------------------------------- 1 | import "./global.css"; 2 | import App from "./App.svelte"; 3 | 4 | // Connect to the background page to signal the extension is open 5 | // This is used to trigger automatic migration if needed 6 | const port = chrome.runtime.connect({ name: "extension-page" }); 7 | 8 | const app = new App({ 9 | target: document.getElementById("app")!, 10 | }); 11 | 12 | export default app; 13 | -------------------------------------------------------------------------------- /src/ui/pages/index.html/_layout.svelte: -------------------------------------------------------------------------------- 1 | 40 | 41 | { 43 | if (e.key === "k" && (e.ctrlKey || e.metaKey)) { 44 | e.preventDefault(); 45 | handleCmdK(); 46 | } 47 | }} 48 | /> 49 | 50 |
51 |
52 | 53 |
54 | 55 |
56 | 57 | {#if $menuOpen} 58 | { 60 | $menuOpen = false; 61 | }} 62 | /> 63 | {/if} 64 | 65 |
66 | 76 | 82 |
83 | 84 | 103 | -------------------------------------------------------------------------------- /src/ui/pages/index.html/database-repl.svelte: -------------------------------------------------------------------------------- 1 | 53 | 54 |
55 | 61 | 62 | {#if showInstructions} 63 |
64 |

SQL Playground

65 |

66 | You can directly access the browsing history database here. If you're not familiar with SQL, 67 | ChatGPT can likely help you. 68 |

69 |

Here's an example query that shows the 10 most recently visited documents:

70 |
SELECT * FROM document order by last_visit desc limit 10;
71 |
72 | {/if} 73 | 74 | 75 |
76 | 77 | 87 | -------------------------------------------------------------------------------- /src/ui/pages/index.html/dev.svelte: -------------------------------------------------------------------------------- 1 | 91 | 92 |
93 |

Development Page

94 |

This is a blank development page. Content will be added later.

95 | 96 |
97 | 103 | 104 | 110 |
111 | 112 |
113 |

114 | Open browser console (F12) to see log messages during file operations. 115 |

116 |
117 |
118 | -------------------------------------------------------------------------------- /src/ui/pages/index.html/doc/[url].svelte: -------------------------------------------------------------------------------- 1 | 34 | 35 |
36 | {#if row} 37 |
38 |
39 | { 42 | window.history.back(); 43 | }} 44 | > 45 | Back to Search 46 | 47 | 53 | Open URL 54 | 55 |
56 | 62 |
63 |
64 | 69 |
70 | 71 |
72 |

73 | {row.title} 74 | {row.url} 77 |

78 | {#if showRawContent} 79 |
{row.md_content}
80 | {:else} 81 | 82 | {/if} 83 |
84 | {:else if err} 85 |
86 | {err.message} 87 |
88 | {:else} 89 |
Loading...
90 | {/if} 91 |
92 | 93 | 107 | -------------------------------------------------------------------------------- /src/ui/pages/index.html/index.svelte: -------------------------------------------------------------------------------- 1 | 307 | 308 | { 311 | const key = e.key; 312 | if (keybinds[key]) { 313 | if (enableMouseEvents) enableMouseEvents = false; 314 | keybinds[key](e); 315 | } 316 | }} 317 | /> 318 | 319 |
320 |
321 | 328 |
329 |
332 |
333 | {#if res} 334 | Showing {results?.length} of {res.count}. Took 335 | {Math.round(10 * res.perfMs) / 10}ms. 336 | {:else if $stats && $displaySettings.showStats} 337 |
338 | {#each Object.entries($stats) as [k, v]} 339 | {k}: {v} 340 | {/each} 341 |
342 | {/if} 343 |
344 | 345 | {#if res} 346 |
347 |
348 | Sort by: 349 | 359 | 369 |
370 |
371 | {/if} 372 |
373 | {#if error} 374 |
377 |

Error: {error}

378 |
{errorDetail?.stack}
379 |
380 | {/if} 381 | {#if !res} 382 |
383 | 384 |
385 | {/if} 386 |
387 | {#each Object.entries(dateGroupedResults) as [date, dateGroup], dateIndex (date)} 388 | {#if Object.keys(dateGroup).length > 0} 389 |
390 |
{date}
391 | {#each Object.entries(dateGroup) as [url, group], urlIndex (url)} 392 | 393 | (currentIndex = urls.indexOf(url))} 413 | on:mouseover={() => { 414 | if (enableMouseEvents) { 415 | currentIndex = urls.indexOf(url); 416 | } 417 | }} 418 | > 419 | 420 | {#each group.hits as hit (hit.rowid)} 421 | 422 | {/each} 423 | 424 | {/each} 425 |
426 | {/if} 427 | {/each} 428 |
429 |
430 | 431 | (showMigrationModal = false)} /> 432 | 433 | 440 | -------------------------------------------------------------------------------- /src/ui/pages/index.html/settings.svelte: -------------------------------------------------------------------------------- 1 | 120 | 121 | 122 | 123 |
124 | 150 | 151 |
152 |

Settings

153 | 154 |
155 |

Import/Export JSON Database

156 |

157 | Upload a JSON file to import your database or export your current database to a JSON file. 158 |

159 |
160 | 166 | 167 | 210 |
211 | 212 | {#if errorMessage} 213 |

{errorMessage}

214 | {/if} 215 | {#if exportErrorMessage} 216 |

{exportErrorMessage}

217 | {/if} 218 |
219 | 220 |
221 |

Blacklist Rules

222 |

223 | Manage your blacklist rules. These rules determine which URLs should not be indexed or only 224 | have their URLs indexed. 225 |

226 |

227 | Note: The % character is used as a wildcard in these 228 | rules. It matches any sequence of characters. For example, 229 | https://example.com/% would match any URL on example.com. 230 |

231 |

232 | You can choose to only index the URL of a page (url_only), or to not index the page at all (no_index). 235 |

236 | 237 | 238 |
239 |
Add New Blacklist Rule
240 |
241 | 247 | 251 | 254 |
255 | {#if addRuleError} 256 |

{addRuleError}

257 | {/if} 258 |
259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | {#each blacklistRules as rule} 270 | 271 | 278 | 279 | 287 | 288 | {/each} 289 | 290 |
PatternLevelActions
272 | {#if rule.pattern.includes("%")} 273 | {@html rule.pattern.replace(/%/g, '%')} 274 | {:else} 275 | {rule.pattern} 276 | {/if} 277 | {rule.level} 280 | 286 |
291 |
292 |
293 |
294 | 295 | (showMigrationModal = false)} /> 296 | 297 | 302 | -------------------------------------------------------------------------------- /src/ui/pages/index.html/task-queue.svelte: -------------------------------------------------------------------------------- 1 | 97 | 98 |
99 |
100 |

Task Queue

101 |

102 | This page displays the current state of the task queue. It shows both pending and failed 103 | tasks, and automatically refreshes to provide up-to-date information. 104 |

105 |

106 | When the system is functioning normally you can expect the number of pending tasks to either 107 | be zero or be decreasing. 108 |

109 |
110 |
111 | 112 | {#if error} 113 |

Error: {error}

114 | {:else} 115 |

Total tasks in queue: {totalTasks}

116 | 117 |

Pending Tasks

118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | {#if tasks.length === 0} 129 | 130 | 131 | 132 | {:else} 133 | {#each tasks as task} 134 | 135 | 136 | 137 | 138 | 139 | 140 | {/each} 141 | {/if} 142 | 143 |
IDTypeParamsCreated At
There are currently no pending tasks.
{task.id}{task.task_type}{JSON.stringify(task.params)}{formatDate(task.created_at)}
144 | {#if tasks.length > 0 && tasks.length < totalTasks - totalFailedTasks} 145 |

Showing {tasks.length} of {totalTasks - totalFailedTasks} pending tasks.

146 | {/if} 147 | 148 | {#if failedTasks.length > 0} 149 |

Failed Tasks

150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | {#each failedTasks as task} 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 187 | 188 | {/each} 189 | 190 |
IDTypeParamsCreated AtFailed AtError
{task.id}{task.task_type}{JSON.stringify(task.params)}{formatDate(task.created_at)}{formatDate(task.failed_at)}{task.error} 172 |
173 | 179 | 185 |
186 |
191 | {#if failedTasks.length < totalFailedTasks} 192 |

Showing {failedTasks.length} of {totalFailedTasks} failed tasks.

193 | {/if} 194 | {/if} 195 | {/if} 196 |
197 | 198 | 211 | -------------------------------------------------------------------------------- /src/ui/pages/index.svelte: -------------------------------------------------------------------------------- 1 |

Not used

2 |

This page is not used, because chrome extensions do not support `/` routes.

3 | -------------------------------------------------------------------------------- /src/ui/routes.js: -------------------------------------------------------------------------------- 1 | import { wrap } from "svelte-spa-router/wrap"; 2 | 3 | // Import layout component 4 | import Layout from "./pages/index.html/_layout.svelte"; 5 | import LayoutWrapper from "./LayoutWrapper.svelte"; 6 | 7 | // Import page components 8 | import Index from "./pages/index.html/index.svelte"; 9 | import DatabaseRepl from "./pages/index.html/database-repl.svelte"; 10 | import Settings from "./pages/index.html/settings.svelte"; 11 | import TaskQueue from "./pages/index.html/task-queue.svelte"; 12 | import DocView from "./pages/index.html/doc/[url].svelte"; 13 | import Dev from "./pages/index.html/dev.svelte"; 14 | 15 | // Route definitions with layout wrapper 16 | export const routes = { 17 | // Home page 18 | "/": wrap({ 19 | component: LayoutWrapper, 20 | props: { 21 | layout: Layout, 22 | component: Index, 23 | }, 24 | }), 25 | 26 | // Database REPL page 27 | "/database-repl": wrap({ 28 | component: LayoutWrapper, 29 | props: { 30 | layout: Layout, 31 | component: DatabaseRepl, 32 | }, 33 | }), 34 | 35 | // Settings page 36 | "/settings": wrap({ 37 | component: LayoutWrapper, 38 | props: { 39 | layout: Layout, 40 | component: Settings, 41 | }, 42 | }), 43 | 44 | // Task Queue page 45 | "/task-queue": wrap({ 46 | component: LayoutWrapper, 47 | props: { 48 | layout: Layout, 49 | component: TaskQueue, 50 | }, 51 | }), 52 | 53 | // Document view page with URL parameter 54 | "/doc/:url": wrap({ 55 | component: LayoutWrapper, 56 | props: { 57 | layout: Layout, 58 | component: DocView, 59 | }, 60 | }), 61 | 62 | // Development page (for dev use only) 63 | "/dev": wrap({ 64 | component: LayoutWrapper, 65 | props: { 66 | layout: Layout, 67 | component: Dev, 68 | }, 69 | }), 70 | }; 71 | 72 | // Navigation routes for menu display 73 | export const navigationRoutes = [ 74 | { path: "/", name: "index", label: "Search" }, 75 | { path: "/database-repl", name: "database-repl", label: "SQL" }, 76 | { path: "/settings", name: "settings", label: "Settings" }, 77 | { path: "/task-queue", name: "task-queue", label: "Task Queue" }, 78 | ]; 79 | -------------------------------------------------------------------------------- /src/ui/store/displaySettings.ts: -------------------------------------------------------------------------------- 1 | import { writable } from "svelte/store"; 2 | 3 | // Load preferences from localStorage if available 4 | const loadFromStorage = () => { 5 | try { 6 | const storedSettings = localStorage.getItem("displaySettings"); 7 | if (storedSettings) { 8 | return JSON.parse(storedSettings); 9 | } 10 | } catch (e) { 11 | console.error("Failed to load settings from localStorage", e); 12 | } 13 | return {}; 14 | }; 15 | 16 | // Default settings 17 | const defaultSettings = { 18 | showStats: true, 19 | preprocessQuery: true, 20 | sortMode: "last_visit", // Default sort mode 21 | }; 22 | 23 | // Combine stored settings with defaults 24 | const initialSettings = { 25 | ...defaultSettings, 26 | ...loadFromStorage(), 27 | }; 28 | 29 | // Create the writable store 30 | const settings = writable(initialSettings); 31 | 32 | // Subscribe to changes and save to localStorage 33 | settings.subscribe((value) => { 34 | try { 35 | localStorage.setItem("displaySettings", JSON.stringify(value)); 36 | } catch (e) { 37 | console.error("Failed to save settings to localStorage", e); 38 | } 39 | }); 40 | 41 | export const displaySettings = settings; 42 | -------------------------------------------------------------------------------- /src/ui/store/menuState.ts: -------------------------------------------------------------------------------- 1 | import { writable } from "svelte/store"; 2 | 3 | export const menuOpen = writable(false); 4 | -------------------------------------------------------------------------------- /src/ui/store/statsStore.ts: -------------------------------------------------------------------------------- 1 | import { rpc } from "@/ui/lib/rpc"; 2 | import { writable } from "svelte/store"; 3 | 4 | export type Stats = { 5 | Documents: string; 6 | Fragments: string; 7 | Size: string; 8 | }; 9 | 10 | export const stats = writable(null); 11 | 12 | export async function updateStats() { 13 | try { 14 | const _stats = (await rpc(["getStats"])) as { 15 | document: { count: number }; 16 | document_fragment: { count: number }; 17 | db: { size_bytes: number }; 18 | }; 19 | 20 | stats.set({ 21 | Documents: _stats.document.count.toLocaleString(), 22 | Fragments: _stats.document_fragment.count.toLocaleString(), 23 | Size: (_stats.db.size_bytes / 1024 / 1024).toFixed(2) + "MB", 24 | }); 25 | } catch (error) { 26 | console.error("Error updating stats:", error); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /static/screenshot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/static/screenshot-1.png -------------------------------------------------------------------------------- /static/screenshot-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/static/screenshot-2.png -------------------------------------------------------------------------------- /static/screenshot-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/static/screenshot-3.png -------------------------------------------------------------------------------- /static/screenshot-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iansinnott/full-text-tabs-forever/25795d53a1522841956f26b1f6772d9cb340b51a/static/screenshot-4.png -------------------------------------------------------------------------------- /tailwind.config.cjs: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: ["./index.html", "./src/**/*.{js,ts,jsx,tsx,svelte}"], 4 | theme: { 5 | extend: {}, 6 | }, 7 | plugins: [require("@tailwindcss/forms"), require("@tailwindcss/typography"), require('@tailwindcss/line-clamp')], 8 | }; 9 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "esnext", 4 | "module": "esnext", 5 | "moduleResolution": "node", 6 | "useUnknownInCatchVariables": false, 7 | "strict": true, 8 | "sourceMap": true, 9 | "resolveJsonModule": true, 10 | "esModuleInterop": true, 11 | "lib": ["esnext", "dom"], 12 | "typeRoots": ["node_modules/@types"], 13 | "skipLibCheck": true, 14 | "allowJs": true, 15 | "importHelpers": true, 16 | "removeComments": true, 17 | "forceConsistentCasingInFileNames": true, 18 | "noImplicitAny": false, 19 | "baseUrl": ".", 20 | "paths": { 21 | "@/*": ["src/*"] 22 | } 23 | }, 24 | "include": ["src/**/*.d.ts", "src/**/*.js", "src/**/*.ts", "src/**/*.svelte"] 25 | } 26 | -------------------------------------------------------------------------------- /vite.config.content-script.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "vite"; 2 | import { svelte, vitePreprocess } from "@sveltejs/vite-plugin-svelte"; 3 | 4 | // https://vitejs.dev/config/ 5 | export default defineConfig({ 6 | build: { 7 | rollupOptions: { 8 | // Vite was not ignoring tmp dir, which is the only reason I added this 9 | input: { 10 | "content-scripts/content-script": "src/content-scripts/content-script.ts", 11 | }, 12 | output: { 13 | inlineDynamicImports: true, 14 | entryFileNames: "[name].js", 15 | }, 16 | }, 17 | minify: false, 18 | emptyOutDir: false, 19 | }, 20 | plugins: [ 21 | svelte({ 22 | preprocess: vitePreprocess(), 23 | onwarn: (warning, handler) => { 24 | // Ignore all a11y warnings 25 | if (warning.code?.startsWith('a11y-')) { 26 | return; 27 | } 28 | handler(warning); 29 | } 30 | }), 31 | ], 32 | }); 33 | -------------------------------------------------------------------------------- /vite.config.ts: -------------------------------------------------------------------------------- 1 | import { svelte, vitePreprocess } from "@sveltejs/vite-plugin-svelte"; 2 | import { defineConfig } from "vite"; 3 | import path from "node:path"; 4 | import fs, { readFileSync, writeFileSync } from "node:fs"; 5 | import archiver from "archiver"; 6 | import type { Manifest } from "webextension-polyfill"; 7 | import vitePluginTopLevelAwait from "vite-plugin-top-level-await"; 8 | 9 | const TARGET: "chrome" | "firefox" = process.env.TARGET as "chrome" | "firefox"; 10 | const FF_ADDON_ID = process.env.FF_ADDON_ID as string; 11 | 12 | if (!["chrome", "firefox"].includes(TARGET)) { 13 | throw new Error(`Invalid TARGET: ${TARGET}. Specify TARGET=chrome or TARGET=firefox`); 14 | } 15 | 16 | if (TARGET === "firefox" && !FF_ADDON_ID) { 17 | throw new Error(`FF_ADDON_ID is required for firefox builds`); 18 | } 19 | 20 | const isFirefox = TARGET === "firefox"; 21 | 22 | // https://vitejs.dev/config/ 23 | export default defineConfig({ 24 | build: { 25 | rollupOptions: { 26 | input: { 27 | index: "index.html", 28 | background: "src/background.ts", 29 | }, 30 | output: { 31 | entryFileNames: "[name].js", 32 | }, 33 | }, 34 | minify: false, 35 | emptyOutDir: false, 36 | }, 37 | server: { 38 | headers: { 39 | "Cross-Origin-Opener-Policy": "same-origin", 40 | "Cross-Origin-Embedder-Policy": "require-corp", 41 | }, 42 | }, 43 | optimizeDeps: { 44 | exclude: ["@sqlite.org/sqlite-wasm", "@vlcn.io/crsqlite-wasm"], 45 | }, 46 | resolve: { 47 | alias: { 48 | "@": path.resolve(__dirname, "./src"), 49 | }, 50 | }, 51 | plugins: [ 52 | vitePluginTopLevelAwait(), 53 | svelte({ 54 | preprocess: vitePreprocess(), 55 | onwarn: (warning, handler) => { 56 | // Ignore all a11y warnings 57 | if (warning.code.startsWith('a11y-')) { 58 | return; 59 | } 60 | handler(warning); 61 | } 62 | }), 63 | 64 | // Watch additional files 65 | { 66 | name: "watch-additional-files", 67 | buildStart() { 68 | for (const pathName of ["src/manifest.json", `src/manifest-${TARGET}.json`]) { 69 | if (fs.existsSync(path.resolve(__dirname, pathName))) { 70 | this.addWatchFile(path.resolve(__dirname, pathName)); 71 | } 72 | } 73 | }, 74 | }, 75 | 76 | // Copy assets to dist 77 | { 78 | name: "copy-plugin", 79 | apply: "build", 80 | enforce: "post", 81 | generateBundle() { 82 | const sourceDir = path.resolve(__dirname, "src/assets"); 83 | const destinationDir = path.resolve(__dirname, "dist/assets"); 84 | 85 | fs.mkdirSync(destinationDir, { recursive: true }); 86 | 87 | const files = fs.readdirSync(sourceDir); 88 | 89 | for (const filepath of files) { 90 | const sourcePath = path.join(sourceDir, filepath); 91 | const destinationPath = path.join(destinationDir, filepath); 92 | 93 | fs.copyFileSync(sourcePath, destinationPath); 94 | console.log(`[copy-plugin] ${sourcePath} -> ${destinationPath}`); 95 | } 96 | 97 | try { 98 | const manifest = JSON.parse( 99 | readFileSync(path.resolve(__dirname, "src/manifest.json"), "utf8") 100 | ); 101 | 102 | // Mutate the manifest object 103 | delete manifest["$schema"]; // Schema is just provided for autocomplete but chrome doesn't like it 104 | 105 | // Handle FF special cases 106 | if (isFirefox) { 107 | // Case 1: FF doesn't support service_worker, it prefers a background.scripts array 108 | manifest.background.scripts = [manifest.background.service_worker]; 109 | delete manifest.background.service_worker; 110 | 111 | // Case 2: FF requires an id. See: https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/manifest.json/browser_specific_settings 112 | manifest.browser_specific_settings = { 113 | gecko: { 114 | id: `{${FF_ADDON_ID}}`, // FF loves those braces 115 | strict_min_version: "109.0", // When they added (partial) MV3 support. No lover for MV3 here, but since it's a hard req for chrome we use it for FF too 116 | }, 117 | }; 118 | } 119 | 120 | writeFileSync( 121 | path.join(__dirname, "dist/manifest.json"), 122 | JSON.stringify(manifest, null, 2) 123 | ); 124 | console.log(`[copy-plugin] copied manifest`); 125 | } catch (err) { 126 | console.error("Could not build manifest", err); 127 | } 128 | }, 129 | }, 130 | 131 | // Create zip dist file for upload to chrome web store 132 | { 133 | name: "zip-plugin", 134 | apply: "build", 135 | enforce: "post", 136 | writeBundle() { 137 | const output = fs.createWriteStream(__dirname + `/fttf-${TARGET}.zip`); 138 | const archive = archiver("zip", { 139 | zlib: { level: 9 }, 140 | }); 141 | 142 | // listen for all archive data to be processed 143 | output.on("close", function () { 144 | console.log(archive.pointer() + " total bytes"); 145 | console.log("Archiver has been finalized and the output file descriptor has closed."); 146 | }); 147 | 148 | // good practice to catch warnings (ie stat failures and other non-blocking errors) 149 | archive.on("warning", function (err) { 150 | if (err.code === "ENOENT") { 151 | console.warn("no file", err); 152 | } else { 153 | // throw error 154 | throw err; 155 | } 156 | }); 157 | 158 | // good practice to catch this error explicitly 159 | archive.on("error", function (err) { 160 | throw err; 161 | }); 162 | 163 | // pipe archive data to the file 164 | archive.pipe(output); 165 | 166 | // append files from a directory 167 | archive.directory(__dirname + "/dist/", false); 168 | 169 | // finalize the archive (ie we are done appending files but streams have to finish yet) 170 | archive.finalize(); 171 | }, 172 | }, 173 | ], 174 | }); 175 | --------------------------------------------------------------------------------