├── .DS_Store
├── .eslintrc.cjs
├── .github
├── FUNDING.yml
└── workflows
│ └── node.js.yml
├── .gitignore
├── .npm.release
├── .npmignore
├── .npmrelease
├── CONTRIBUTING.md
├── NOTICE
├── README.md
├── TODO
├── docs
├── OLD-README.md
├── SECURITY.md
├── features.md
├── issues
└── todo
├── eslint.config.js
├── exec.js
├── global-run.cjs
├── icons
├── cd.ico
├── dk.icns
└── dk.ico
├── package-lock.json
├── package.json
├── public
├── favicon.ico
├── find_cleaned_duplicates.mjs
├── find_crawlable.mjs
├── injection.js
├── library
│ └── README.md
├── make_top.mjs
├── old-index.html
├── problem_find.mjs
├── redirector.html
├── style.css
├── test-injection.html
└── top.html
├── scripts
├── build_only.sh
├── clean.sh
├── downloadnet-entitlements.xml
├── go_build.sh
├── go_dev.sh
├── postinstall.sh
├── publish.sh
├── release.sh
└── sign_windows_release.ps1
├── sign-win.ps1
├── src
├── app.js
├── archivist.js
├── args.js
├── blockedResponse.js
├── bookmarker.js
├── common.js
├── gem-highlighter.js
├── hello.js
├── highlighter.js
├── index.js
├── installBrowser.js
├── launcher.js
├── libraryServer.js
├── protocol.js
├── root.cjs
└── root.js
├── stampers
├── macos-new.sh
├── macos.sh
├── nix.sh
├── notarize_macos.sh
└── win.bat
└── test.sh
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DO-SAY-GO/dn/c757475ca9f14dd46549774a8a6a89656df362b2/.DS_Store
--------------------------------------------------------------------------------
/.eslintrc.cjs:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | "env": {
3 | "es2021": true,
4 | "node": true
5 | },
6 | "extends": "eslint:recommended",
7 | "parserOptions": {
8 | "ecmaVersion": 13,
9 | "sourceType": "module"
10 | },
11 | "ignorePatterns": [
12 | "build/**/*.js"
13 | ],
14 | "rules": {
15 | }
16 | };
17 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | custom: https://dosaygo.com/downloadnet
4 |
--------------------------------------------------------------------------------
/.github/workflows/node.js.yml:
--------------------------------------------------------------------------------
1 | # This workflow will do a clean installation of node dependencies, cache/restore them, build the source code and run tests across different versions of node
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-nodejs
3 |
4 | name: Node.js CI
5 |
6 | on:
7 | push:
8 | branches: [ "fun" ]
9 | pull_request:
10 | branches: [ "fun" ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 |
17 | strategy:
18 | matrix:
19 | node-version: [16.x, 18.x, 19.x]
20 | # See supported Node.js release schedule at https://nodejs.org/en/about/releases/
21 |
22 | steps:
23 | - uses: actions/checkout@v3
24 | - name: Use Node.js ${{ matrix.node-version }}
25 | uses: actions/setup-node@v3
26 | with:
27 | node-version: ${{ matrix.node-version }}
28 | cache: 'npm'
29 | - run: npm ci
30 | - run: npm run build --if-present
31 | - run: npm test
32 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pkg
2 | "
3 | "*
4 | *~
5 | .*.un~
6 | *.blob
7 | .\build\*
8 | 22120-arc
9 |
10 | .*.swp
11 |
12 | # Bundling and packaging
13 | 22120.exe
14 | 22120.nix
15 | 22120.mac
16 | 22120.win32.exe
17 | 22120.nix32
18 | bin/*
19 | build/*
20 |
21 | #Leave these to allow install by npm -g
22 | #22120.js
23 | #*.22120.js
24 |
25 | # Library
26 | public/library/cache.json
27 | public/library/http*
28 |
29 |
30 | # Logs
31 | logs
32 | *.log
33 | npm-debug.log*
34 | yarn-debug.log*
35 | yarn-error.log*
36 | lerna-debug.log*
37 |
38 | # Diagnostic reports (https://nodejs.org/api/report.html)
39 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
40 |
41 | # Runtime data
42 | pids
43 | *.pid
44 | *.seed
45 | *.pid.lock
46 |
47 | # Directory for instrumented libs generated by jscoverage/JSCover
48 | lib-cov
49 |
50 | # Coverage directory used by tools like istanbul
51 | coverage
52 | *.lcov
53 |
54 | # nyc test coverage
55 | .nyc_output
56 |
57 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
58 | .grunt
59 |
60 | # Bower dependency directory (https://bower.io/)
61 | bower_components
62 |
63 | # node-waf configuration
64 | .lock-wscript
65 |
66 | # Compiled binary addons (https://nodejs.org/api/addons.html)
67 | build/Release
68 |
69 | # Dependency directories
70 | node_modules/
71 | jspm_packages/
72 |
73 | # TypeScript v1 declaration files
74 | typings/
75 |
76 | # TypeScript cache
77 | *.tsbuildinfo
78 |
79 | # Optional npm cache directory
80 | .npm
81 |
82 | # Optional eslint cache
83 | .eslintcache
84 |
85 | # Microbundle cache
86 | .rpt2_cache/
87 | .rts2_cache_cjs/
88 | .rts2_cache_es/
89 | .rts2_cache_umd/
90 |
91 | # Optional REPL history
92 | .node_repl_history
93 |
94 | # Output of 'npm pack'
95 | *.tgz
96 |
97 | # Yarn Integrity file
98 | .yarn-integrity
99 |
100 | # dotenv environment variables file
101 | .env
102 | .env.test
103 |
104 | # parcel-bundler cache (https://parceljs.org/)
105 | .cache
106 |
107 | # Next.js build output
108 | .next
109 |
110 | # Nuxt.js build / generate output
111 | .nuxt
112 | dist
113 |
114 | # Gatsby files
115 | .cache/
116 | # Comment in the public line in if your project uses Gatsby and *not* Next.js
117 | # https://nextjs.org/blog/next-9-1#public-directory-support
118 | # public
119 |
120 | # vuepress build output
121 | .vuepress/dist
122 |
123 | # Serverless directories
124 | .serverless/
125 |
126 | # FuseBox cache
127 | .fusebox/
128 |
129 | # DynamoDB Local files
130 | .dynamodb/
131 |
132 | # TernJS port file
133 | .tern-port
134 |
--------------------------------------------------------------------------------
/.npm.release:
--------------------------------------------------------------------------------
1 | Sun Jan 15 15:11:49 CST 2023
2 |
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 |
2 | .*.swp
3 | *~
4 | .*un~
5 |
6 | # Bundling and packaging
7 | build/bin/*
8 |
9 | build/cjs/*
10 |
11 |
--------------------------------------------------------------------------------
/.npmrelease:
--------------------------------------------------------------------------------
1 | Fri Aug 30 00:09:47 CST 2024
2 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | When contributing to this repository, please first discuss the change you wish to make via issue,
4 | email, or any other method with the owners of this repository before making a change.
5 |
6 | Please note we have a code of conduct, please follow it in all your interactions with the project.
7 |
8 | ## Pull Request Process
9 |
10 | 1. Ensure any install or build dependencies are removed before the end of the layer when doing a
11 | build.
12 | 2. Update the README.md with details of changes to the interface, this includes new environment
13 | variables, exposed ports, useful file locations and container parameters.
14 | 3. Increase the version numbers in any examples files and the README.md to the new version that this
15 | Pull Request would represent. The versioning scheme we use is [SemVer](http://semver.org/).
16 | 4. You may merge the Pull Request in once you have the sign-off of two other developers, or if you
17 | do not have permission to do that, you may request the second reviewer to merge it for you.
18 |
19 | ## Code of Conduct
20 |
21 | ### Our Pledge
22 |
23 | In the interest of fostering an open and welcoming environment, we as
24 | contributors and maintainers pledge to making participation in our project and
25 | our community a harassment-free experience for everyone, regardless of age, body
26 | size, disability, ethnicity, gender identity and expression, level of experience,
27 | nationality, personal appearance, race, religion, or sexual identity and
28 | orientation.
29 |
30 | ### Our Standards
31 |
32 | Examples of behavior that contributes to creating a positive environment
33 | include:
34 |
35 | * Using welcoming and inclusive language
36 | * Being respectful of differing viewpoints and experiences
37 | * Gracefully accepting constructive criticism
38 | * Focusing on what is best for the community
39 | * Showing empathy towards other community members
40 |
41 | Examples of unacceptable behavior by participants include:
42 |
43 | * The use of sexualized language or imagery and unwelcome sexual attention or
44 | advances
45 | * Trolling, insulting/derogatory comments, and personal or political attacks
46 | * Public or private harassment
47 | * Publishing others' private information, such as a physical or electronic
48 | address, without explicit permission
49 | * Other conduct which could reasonably be considered inappropriate in a
50 | professional setting
51 |
52 | ### Our Responsibilities
53 |
54 | Project maintainers are responsible for clarifying the standards of acceptable
55 | behavior and are expected to take appropriate and fair corrective action in
56 | response to any instances of unacceptable behavior.
57 |
58 | Project maintainers have the right and responsibility to remove, edit, or
59 | reject comments, commits, code, wiki edits, issues, and other contributions
60 | that are not aligned to this Code of Conduct, or to ban temporarily or
61 | permanently any contributor for other behaviors that they deem inappropriate,
62 | threatening, offensive, or harmful.
63 |
64 | ### Scope
65 |
66 | This Code of Conduct applies both within project spaces and in public spaces
67 | when an individual is representing the project or its community. Examples of
68 | representing a project or community include using an official project e-mail
69 | address, posting via an official social media account, or acting as an appointed
70 | representative at an online or offline event. Representation of a project may be
71 | further defined and clarified by project maintainers.
72 |
73 | ### Enforcement
74 |
75 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
76 | reported by contacting the project team at [INSERT EMAIL ADDRESS]. All
77 | complaints will be reviewed and investigated and will result in a response that
78 | is deemed necessary and appropriate to the circumstances. The project team is
79 | obligated to maintain confidentiality with regard to the reporter of an incident.
80 | Further details of specific enforcement policies may be posted separately.
81 |
82 | Project maintainers who do not follow or enforce the Code of Conduct in good
83 | faith may face temporary or permanent repercussions as determined by other
84 | members of the project's leadership.
85 |
86 | ### Attribution
87 |
88 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
89 | available at [http://contributor-covenant.org/version/1/4][version]
90 |
91 | [homepage]: http://contributor-covenant.org
92 | [version]: http://contributor-covenant.org/version/1/4/
93 |
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright Dosyago Corporation & Cris Stringfellow (https://dosaygo.com)
2 |
3 | 22120 and all previously released versions, including binaries, NPM packages, and
4 | Docker images (including all named archivist1, and all other previous names)
5 | is re-licensed under the following PolyForm Strict License 1.0.0 and all previous
6 | licenses are revoked.
7 |
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # :floppy_disk: [DownloadNet (dn)](https://github.com/dosyago/DownloadNet) – Your Offline Web Archive with Full Text Search
2 |
3 | 
4 | 
5 | 
6 |
7 | Imagine a world where everything you browse online is saved and accessible, even when you're offline. That's the magic of DownloadNet (dn).
8 |
9 | ## Why dn?
10 |
11 | - **Seamless Offline Experience** :earth_africa:: With dn, your offline browsing feels exactly like being online. It hooks directly into your browser, caching every page you visit, so you never lose track of that one article or resource you meant to revisit.
12 | - **Full Text Search** :mag:: Unlike other archiving tools, dn gives you the power to search through your entire archive. No more digging through countless files—just search and find.
13 | - **Completely Private** :lock:: Everything is stored locally on your machine. Browse whatever you want, with the peace of mind that it's all private and secure.
14 |
15 | ## Getting Started
16 |
17 | ### 1. **Download a Pre-built Binary (Simplest Option)** :package:
18 | If you’re not familiar with Git or npm, this is the easiest way to get started:
19 |
20 | 1. **Go to the [Releases Page](https://github.com/dosyago/DownloadNet/releases)**
21 | 2. **Download** the binary for your operating system (e.g., Windows, macOS, Linux).
22 | 3. **Run** the downloaded application. That’s it! You’re ready to start archiving.
23 |
24 | >[!NOTE]
25 | > macOS now has a proper package installer, so it will be even easier.
26 |
27 | ### 2. **Install via npm (For Users Familiar with Command Line)** :rocket:
28 |
29 | 1. **Open your terminal** (Command Prompt on Windows, Terminal on macOS/Linux).
30 | 2. **Install dn globally** with npm:
31 | ```sh
32 | npm i -g downloadnet@latest
33 | ```
34 | 3. **Start dn** by typing:
35 | ```sh
36 | dn
37 | ```
38 |
39 | > [!NOTE]
40 | > Make sure you have Node.js installed before attempting to use npm. If you're new to npm, see the next section for guidance.
41 |
42 | ### 3. **New to npm? No Problem!** :bulb:
43 |
44 | If you’ve never used npm before, don’t worry—it’s easy to get started.
45 |
46 | - **What is npm?** npm is a package manager for Node.js, a JavaScript runtime that allows you to run server-side code. You’ll use npm to install and manage software like dn.
47 | - **Installing Node.js and npm:** The easiest way to install Node.js (which includes npm) is by using Node Version Manager (nvm). This tool allows you to easily install, manage, and switch between different versions of Node.js.
48 |
49 | **To install nvm:**
50 |
51 | 1. **Visit the [nvm GitHub page](https://github.com/nvm-sh/nvm#installing-and-updating)** for installation instructions.
52 | 2. **Follow the steps** to install nvm on your system.
53 | 3. Once nvm is installed, **install the latest version of Node.js** by running:
54 | ```sh
55 | nvm install node
56 | ```
57 | 4. Now you can install dn using npm as described in the section above!
58 |
59 | > [!TIP]
60 | > Using nvm allows you to easily switch between Node.js versions and manage your environment more effectively.
61 |
62 | ### 4. **Build Your Own Binary (For Developers or Power Users)** :hammer_and_wrench:
63 |
64 | If you like to tinker and want to build the binary yourself, here’s how:
65 |
66 | 1. **Download Git:** If you haven’t used Git before, download and install it from [git-scm.com](https://git-scm.com/).
67 | 2. **Clone the Repository:**
68 | ```sh
69 | git clone https://github.com/dosyago/DownloadNet.git
70 | ```
71 | 3. **Navigate to the Project Directory:**
72 | ```sh
73 | cd DownloadNet
74 | ```
75 | 4. **Install Dependencies:**
76 | ```sh
77 | npm i
78 | ```
79 | 5. **Build the Binary:**
80 | ```sh
81 | npm run build
82 | ```
83 |
84 | 6. **Find Your Binary:** The newly built binary will be in the `./build/bin` directory, ready to be executed!
85 |
86 | ### 5. **Run Directly from the Repository (Quick Start)** :runner:
87 |
88 | Want to get dn up and running without building a binary? No problem!
89 |
90 | 1. **Clone the Repository:**
91 | ```sh
92 | git clone https://github.com/dosyago/DownloadNet.git
93 | ```
94 | 2. **Navigate to the Project Directory:**
95 | ```sh
96 | cd DownloadNet
97 | ```
98 | 3. **Install Dependencies:**
99 | ```sh
100 | npm i
101 | ```
102 | 4. **Start dn:**
103 | ```sh
104 | npm start
105 | ```
106 |
107 | And just like that, you’re archiving!
108 |
109 | ## How It Works
110 |
111 | dn runs as an intercepting proxy, hooking into your browser's internal fetch cycle. Once you fire up dn, it automatically configures your browser, and you’re good to go. Everything you browse is archived, and you can choose to save everything or just what you bookmark.
112 |
113 | ### Modes:
114 |
115 | - **Save Mode** :floppy_disk:: Archive and index as you browse.
116 | - **Serve Mode** :open_file_folder:: Browse your saved content as if you were still online.
117 |
118 | > [!CAUTION]
119 | > As your archive grows, you may encounter performance issues. If that happens, you can adjust the memory settings by setting environment variables for NODE runtime arguments, like `--max-old-space-size`.
120 |
121 | ## Accessing Your Archive
122 |
123 | Once dn is running, your archive is at your fingertips. Just go to `http://localhost:22120` in your browser. Your archive’s control panel opens automatically, and from there, you can search, configure settings, and explore everything you’ve saved.
124 |
125 | ## Minimalistic Interface, Maximum Power
126 |
127 | dn’s interface is basic but functional. It’s not about flashy design; it’s about delivering what you need—offline access to the web, as if you were still connected.
128 |
129 | ## Advanced Settings (If Needed)
130 |
131 | As your archive grows, you may want to adjust where it's stored, manage memory settings, or blacklist domains you don’t want to archive. All of these settings can be tweaked directly from the control panel or command line.
132 |
133 | ## Get Started Now
134 |
135 | With dn, you’ll never lose track of anything you’ve read online. It’s all right there in your own offline archive, fully searchable and always accessible. Whether you're in save mode or serve mode, dn keeps your digital life intact.
136 |
137 | **:arrow_down: Download** | **:rocket: Install** | **:runner: Run** | **:mag_right: Never Lose Anything Again**
138 |
139 | [Get Started with dn](https://github.com/dosyago/DownloadNet)
140 |
141 | ----
142 |
--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
1 | Ultimate Goal
2 |
3 | - stable across releases (binaries, npm, can add to winget/choco in future)
4 | - revenue
5 |
6 | ----
7 |
8 | Releases
9 |
10 | - macos signed
11 | - win signed
12 | - linux
13 | - release per arch where relevant as well
14 |
15 | Future
16 |
17 | - UX to select an existing Chrome profile from standard locations. Ident via Google Profile Picture.png and then copy to a $USER_DATA_DIR/Default directory (rsync or robocopy maybe) and pass --user-data-dir=$USER_DATA_DIR for it to just work as of chrome 136 anyway.
18 | - consider other mitigations if these are ineffective (watermark in archvies, other limitations, closed source / more advanced features unlocked, etc)
19 | - More future tasks:
20 | Marketing
21 |
22 | - /download-net page on dosaygo.com
23 |
24 | Crawl fixes
25 |
26 | - make batch size work
27 | - ensure no more than tab from any domain per batch (so that between loads timeouts are enforced)
28 | - save crawl in a "running crawls page"
29 | - be able to pause a crawl and restart it (should be simple), and crawl state persisted to disk.
30 |
31 | Add product key
32 |
33 | - product key section in crawl and settings
34 | - 15 minutes then shutdown
35 | - no free eval license key
36 | - license is 69 per seat per year
37 | - plumbing for the backend
38 |
39 | Dev
40 |
41 | - add cross plat node exec.js for scripts
42 |
43 |
--------------------------------------------------------------------------------
/docs/OLD-README.md:
--------------------------------------------------------------------------------
1 | # :floppy_disk: [DownloadNet](https://github.com/c9fe/22120) [](https://sloc.xyz) [](https://npmjs.com/package/archivist1) [](https://npmjs.com/package/downloadnet) [](https://GitHub.com/crisdosyago/DownloadNet/releases) [](https://hits.seeyoufarm.com) 
2 |
3 | :floppy_disk: - an internet on yer Disk
4 |
5 | **DownloadNet** (codename *PROJECT 22120*) is an archivist browser controller that caches everything you browse, a library server with full text search to serve your archive.
6 |
7 | **Now with full text search over your archive.**
8 |
9 | This feature is just released in version 2 so it will improve over time.
10 |
11 | ## And one more thing...
12 |
13 | **Coming to a future release, soon!**: The ability to publish your own search engine that you curated with the best resources based on your expert knowledge and experience.
14 |
15 | ## Get it
16 |
17 | [Download a release](https://github.com/crisdosyago/Diskernet/releases)
18 |
19 | or ...
20 |
21 | **Get it on [npm](https://www.npmjs.com/package/downloadnet):**
22 |
23 | ```sh
24 | $ npm i -g downloadnet@latest
25 | ```
26 |
27 | or...
28 |
29 | **Build your own binaries:**
30 |
31 | ```sh
32 | $ git clone https://github.com/crisdosyago/DownloadNet
33 | $ cd DownloadNet
34 | $ npm i
35 | $ ./scripts/build_setup.sh
36 | $ ./scripts/compile.sh
37 | $ cd bin/
38 | ```
39 |
40 |
41 | ----------------
42 | - [Overview](#classical_building-22120---)
43 | * [License](#license)
44 | * [About](#about)
45 | * [Get 22120](#get-22120)
46 | * [Using](#using)
47 | + [Pick save mode or serve mode](#pick-save-mode-or-serve-mode)
48 | + [Exploring your 22120 archive](#exploring-your-22120-archive)
49 | * [Format](#format)
50 | * [Why not WARC (or another format like MHTML) ?](#why-not-warc-or-another-format-like-mhtml-)
51 | * [How it works](#how-it-works)
52 | * [FAQ](#faq)
53 | + [Do I need to download something?](#do-i-need-to-download-something)
54 | + [Can I use this with a browser that's not Chrome-based?](#can-i-use-this-with-a-browser-thats-not-chrome-based)
55 | + [How does this interact with Ad blockers?](#how-does-this-interact-with-ad-blockers)
56 | + [How secure is running chrome with remote debugging port open?](#how-secure-is-running-chrome-with-remote-debugging-port-open)
57 | + [Is this free?](#is-this-free)
58 | + [What if it can't find my chrome?](#what-if-it-cant-find-my-chrome)
59 | + [What's the roadmap?](#whats-the-roadmap)
60 | + [What about streaming content?](#what-about-streaming-content)
61 | + [Can I black list domains to not archive them?](#can-i-black-list-domains-to-not-archive-them)
62 | + [Is there a DEBUG mode for troubleshooting?](#is-there-a-debug-mode-for-troubleshooting)
63 | + [Can I version the archive?](#can-i-version-the-archive)
64 | + [Can I change the archive path?](#can-i-change-the-archive-path)
65 | + [Can I change this other thing?](#can-i-change-this-other-thing)
66 |
67 | ------------------
68 |
69 | ## License
70 |
71 | 22120 is licensed under Polyform Strict License 1.0.0 (no modification, no distribution). You can purchase a license for different uses below:
72 |
73 |
74 | - for personal, research, noncommercial purposes:
75 | [Buy a Perpetual Non-commercial Use License of the current Version re-upped Monthly to the Latest Version, USD$1.99 per month](https://buy.stripe.com/fZeg0a45zdz58U028z) [Read license](https://github.com/DOSYCORPS/polyform-licenses/blob/1.0.0/PolyForm-Noncommercial-1.0.0.md)
76 | - for part of your internal tooling in your org: [Buy a Perpetual Internal Use License of the current Version re-upped Monthly to the Latest Version, USD $12.99 per month](https://buy.stripe.com/00g4hsgSlbqXb288wY) [Read license](https://github.com/DOSYCORPS/polyform-licenses/blob/1.0.0/PolyForm-Internal-Use-1.0.0.md)
77 | - for anywhere in your business: [Buy a Perpetual Small-medium Business License of the current Version re-upped Monthly to the Latest Version, USD $99 per month](https://buy.stripe.com/aEUbJUgSl2UreekdRj) [Read license](https://github.com/DOSYCORPS/polyform-licenses/blob/1.0.0/PolyForm-Small-Business-1.0.0.md)
78 |
79 |
Top
80 |
81 | ## About
82 |
83 | **This project literally makes your web browsing available COMPLETELY OFFLINE.** Your browser does not even know the difference. It's literally that amazing. Yes.
84 |
85 | Save your browsing, then switch off the net and go to `http://localhost:22120` and switch mode to **serve** then browse what you browsed before. It all still works.
86 |
87 | **warning: if you have Chrome open, it will close it automatically when you open 22120, and relaunch it. You may lose any unsaved work.**
88 |
89 | Top
90 |
91 | ## Get 22120
92 |
93 | 3 ways to get it:
94 |
95 | 1. Get binary from the [releases page.](https://github.com/c9fe/22120/releases), or
96 | 2. Run with npx: `npx downloadnet@latest`, or
97 | - `npm i -g downloadnet@latest && exlibris`
98 | 3. Clone this repo and run as a Node.JS app: `npm i && npm start`
99 |
100 | Top
101 |
102 | ## Using
103 |
104 | ### Pick save mode or serve mode
105 |
106 | Go to http://localhost:22120 in your browser,
107 | and follow the instructions.
108 |
109 | Top
110 |
111 | ### Exploring your 22120 archive
112 |
113 | Archive will be located in `22120-arc/public/library`\*
114 |
115 | But it's not public, don't worry!
116 |
117 | You can also check out the archive index, for a listing of every title in the archive. The index is accessible from the control page, which by default is at [http://localhost:22120](http://localhost:22120) (unless you changed the port).
118 |
119 | \**Note:`22120-arc` is the archive root of a single archive, and by defualt it is placed in your home directory. But you can change the parent directory for `22120-arc` to have multiple archvies.*
120 |
121 | Top
122 |
123 | ## Format
124 |
125 | The archive format is:
126 |
127 | `22120-arc/public/library//.json`
128 |
129 | Inside the JSON file, is a JSON object with headers, response code, key and a base 64 encoded response body.
130 |
131 | Top
132 |
133 | ## Why not WARC (or another format like MHTML) ?
134 |
135 | **The case for the 22120 format.**
136 |
137 | Other formats (like MHTML and SingleFile) save translations of the resources you archive. They create modifications, such as altering the internal structure of the HTML, changing hyperlinks and URLs into "flat" embedded data URIs, or local references, and require other "hacks* in order to save a "perceptually similar" copy of the archived resource.
138 |
139 | 22120 throws all that out, and calls rubbish on it. 22120 saves a *verbatim* **high-fidelity** copy of the resources your archive. It does not alter their internal structure in any way. Instead it records each resource in its own metadata file. In that way it is more similar to HAR and WARC, but still radically different. Compared to WARC and HAR, our format is radically simplified, throwing out most of the metadata information and unnecessary fields these formats collect.
140 |
141 | **Why?**
142 |
143 | At 22120, we believe in the resources and in verbatim copies. We don't annoint ourselves as all knowing enough to modify the resource source of truth before we archive it, just so it can "fit the format* we choose. We don't believe we need to decorate with obtuse and superfluous metadata. We don't believe we should be modifying or altering resources we archive. We belive we should save them exactly as they were presented. We believe in simplicity. We believe the format should fit (or at least accommodate, and be suited to) the resource, not the other way around. We don't believe in conflating **metadata** with **content**; so we separate them. We believe separating metadata and content, and keeping the content pure and altered throughout the archiving process is not only the right thing to do, it simplifies every part of the audit trail, because we know that the modifications between archived copies of a resource of due to changes to the resources themselves, not artefacts of the format or archiving process.
144 |
145 | Both SingleFile and MHTML require mutilatious modifications of the resources so that the resources can be "forced to fit" the format. At 22120, we believe this is not required (and in any case should never be performed). We see it as akin to lopping off the arms of a Roman statue in order to fit it into a presentation and security display box. How ridiculous! The web may be a more "pliable" medium but that does not mean we should treat it without respect for its inherent content.
146 |
147 | **Why is changing the internal structure of resources so bad?**
148 |
149 | In our view, the internal structure of the resource as presented, *is the cannon*. Internal structure is not just substitutable "presentation" - no, in fact it encodes vital semantic information such as hyperlink relationships, source choices, and the "strokes" of the resource author as they create their content, even if it's mediated through a web server or web framework.
150 |
151 | **Why else is 22120 the obvious and natural choice?**
152 |
153 | 22120 also archives resources exactly as they are sent to the browser. It runs connected to a browser, and so is able to access the full-scope of resources (with, currently, the exception of video, audio and websockets, for now) in their highest fidelity, without modification, that the browser receives and is able to archive them in the exact format presented to the user. Many resources undergo presentational and processing changes before they are presented to the user. This is the ubiquitous, "web app", where client-side scripting enabled by JavaScript, creates resources and resource views on the fly. These sorts of "hyper resources" or "realtime" or "client side" resources, prevalent in SPAs, are not able to be archived, at least not utilizing the normal archive flow, within traditional `wget`-based archiving tools.
154 |
155 | In short, the web is an *online* medium, and it should be archived and presented in the same fashion. 22120 archives content exactly as it is received and presented by a browser, and it also replays that content exactly as if the resource were being taken from online. Yes, it requires a browser for this exercise, but that browser need not be connected to the internet. It is only natural that viewing a web resource requires the web browser. And because of 22120 the browser doesn't know the difference! Resources presented to the browser form a remote web site, and resources given to the browser by 22120, are seen by the browser as ***exactly the same.*** This ensures that the people viewing the archive are also not let down and are given the change to have the exact same experience as if they were viewing the resource online.
156 |
157 | Top
158 |
159 | ## How it works
160 |
161 | Uses DevTools protocol to intercept all requests, and caches responses against a key made of (METHOD and URL) onto disk. It also maintains an in memory set of keys so it knows what it has on disk.
162 |
163 | Top
164 |
165 | ## FAQ
166 |
167 | ### Do I need to download something?
168 |
169 | Yes. But....If you like **22120**, you might love the clientless hosted version coming in future. You'll be able to build your archives online from any device, without any download, then download the archive to run on any desktop. You'll need to sign up to use it, but you can jump the queue and sign up [today](https://dosyago.com).
170 |
171 | ### Can I use this with a browser that's not Chrome-based?
172 |
173 | No.
174 |
175 | Top
176 |
177 | ### How does this interact with Ad blockers?
178 |
179 | Interacts just fine. The things ad blockers stop will not be archived.
180 |
181 | Top
182 |
183 | ### How secure is running chrome with remote debugging port open?
184 |
185 | Seems pretty secure. It's not exposed to the public internet, and pages you load that tried to use it cannot use the protocol for anything (except to open a new tab, which they can do anyway). It seems there's a potential risk from malicious browser extensions, but we'd need to confirm that and if that's so, work out blocks. See [this useful security related post](https://github.com/c9fe/22120/issues/67) for some info.
186 |
187 | Top
188 |
189 | ### Is this free?
190 |
191 | Yes this is totally free to download and use for personal non-commercial use. If you want to modify or distribute it, or use it commercially (either internally or for customer functions) you need to purchase a [Noncommercial, internal use, or SMB license](#license).
192 |
193 | Top
194 |
195 | ### What if it can't find my chrome?
196 |
197 | See this useful [issue](https://github.com/c9fe/22120/issues/68).
198 |
199 | Top
200 |
201 | ### What's the roadmap?
202 |
203 | - Full text search ✅
204 | - Library server to serve archive publicly.
205 | - Distributed p2p web browser on IPFS
206 |
207 | Top
208 |
209 | ### What about streaming content?
210 |
211 | The following are probably hard (and I haven't thought much about):
212 |
213 | - Streaming content (audio, video)
214 | - "Impure" request response pairs (such as if you call GET /endpoint 1 time you get "A", if you call it a second time you get "AA", and other examples like this).
215 | - WebSockets (how to capture and replay that faithfully?)
216 |
217 | Probably some way to do this tho.
218 |
219 | Top
220 |
221 | ### Can I black list domains to not archive them?
222 |
223 | Yes! Put any domains into `22120-arc/no.json`\*, eg:
224 |
225 | ```json
226 | [
227 | "*.horribleplantations.com",
228 | "*.cactusfernfurniture.com",
229 | "*.gustymeadows.com",
230 | "*.nytimes.com",
231 | "*.cnn.co?"
232 | ]
233 | ```
234 |
235 | Will not cache any resource with a host matching those. Wildcards:
236 |
237 | - `*` (0 or more anything) and
238 | - `?` (0 or 1 anything)
239 |
240 | \**Note: the `no` file is per-archive. `22120-arc` is the archive root of a single archive, and by defualt it is placed in your home directory. But you can change the parent directory for `22120-arc` to have multiple archvies, and each archive requires its own `no` file, if you want a blacklist in that archive.*
241 |
242 | Top
243 |
244 | ### Is there a DEBUG mode for troubleshooting?
245 |
246 | Yes, just make sure you set an environment variable called `DEBUG_22120` to anything non empty.
247 |
248 | So for example in posix systems:
249 |
250 | ```bash
251 | export DEBUG_22120=True
252 | ```
253 |
254 | Top
255 |
256 | ### Can I version the archive?
257 |
258 | Yes! But you need to use `git` for versioning. Just initiate a git repo in your archive repository. And when you want to save a snapshot, make a new git commit.
259 |
260 | Top
261 |
262 | ### Can I change the archive path?
263 |
264 | Yes, there's a control for changing the archive path in the control page: http://localhost:22120
265 |
266 | Top
267 |
268 | ### Can I change this other thing?
269 |
270 | There's a few command line arguments. You'll see the format printed as the first printed line when you start the program.
271 |
272 | For other things you can examine the source code.
273 |
274 | Top
275 |
276 |
--------------------------------------------------------------------------------
/docs/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | Use this section to tell people about which versions of your project are
6 | currently being supported with security updates.
7 |
8 | | Version | Supported |
9 | | ------- | ------------------ |
10 | | Latest | :white_check_mark: |
11 |
12 |
13 | ## Reporting a Vulnerability
14 |
15 | To report a vulnerability, contact: cris@dosycorp.com
16 |
17 | To view previous responsible disclosure vulnerability reports, mediation write ups, notes and other information, please visit the [Dosyago Responsible Dislcousre Center](https://github.com/dosyago/vulnerability-reports)
18 |
--------------------------------------------------------------------------------
/docs/features.md:
--------------------------------------------------------------------------------
1 | Cool Possible Feature Ideas
2 |
3 | - might be nice to have historical documents indexed as well. For example. Every time we reload a page, we could add a new copy to the index, if it's different...or we could add a new copy if it's been more than X time since the last time we added it. So 1 day , or 1 week. Then we show all results in search (maybe in an expander under the main URL, like "historical URL". So you can find a result that was on front page of HN 1 year ago or 3 weeks ago, even if you revisit and reindex HN every day.
4 |
5 |
--------------------------------------------------------------------------------
/docs/issues:
--------------------------------------------------------------------------------
1 | - ndx index seems to lose documents.
2 | - e.g.
3 | 1. visit goog:hell
4 | 2. visit top link: wiki - hell
5 | 3. visit hellomagainze.com
6 | 4. search hell
7 | 5. see results: goog/hell, wiki/hell, hellomag
8 | 6. reload wiki - hell
9 | 7. search hell
10 | 8. see results: wiki/hell, hellomag
11 | - WHERE THE HELL DID goog/hell go?
12 |
13 |
--------------------------------------------------------------------------------
/docs/todo:
--------------------------------------------------------------------------------
1 | - complete snippet generation
2 | - sometimes we are not getting any segments. In that case we should just show the first part of the file.
3 | - improve trigram segmenter: lower max segment length, increase fore and aft context
4 | - Index.json is randomly getting clobbered sometimes. Investigate and fix. Important because this breaks the whole archive.
5 | - No idea what's causing this after an small investigation. But I've added a log on saveIndex to see when it writes.
6 | - publish button
7 | - way to selectively add (bookmark mode)
8 | - way to remove (all modes) items from index
9 | - save trigram index to disk
10 | - let's not reindex unless we have changed contentSignature
11 | - let's not write FTS indexes unless we have changed them since last time (UpdatedKeys)
12 | - result paging
13 | - We need to not open other localhosts if we already have one open
14 | - We need to reload on localhost 22120 if we open with that
15 | - throttle how often this can occur per URL
16 | - search improvements
17 | - use different min score options for different sources (noticed URL not match meghan highlight for hello mag even tho query got megan and did match and highlight queen in url)
18 | - get snippets earlier (before rendering in lib server) and use to add to signal
19 | - if we have multiple query terms (multiple determined by some form of tokenization) then try to show all terms present in the snippet. even tho one term may be higher scoring. Should we do multiple passes of ukkonen distance one for whole query and one for each term? This will be easier / faster with trigrams I guess. Basically we want snippet to be a relevant summary that provides signal.
20 | - Another way to improve snippet highlight is to 'revert back' the highlighted text, and calculate their match/ukkonen on the query term. So e.g. if we get q:'israle beverly', hl:['beverly', 'beverly'], it's good overlap, but if we get hl:['is it really'] even tho that might score ok for israle, it's not a good match. so can we 'score that back' if we go match('is it really', 'israel') and see it is low, so we exclude it?
21 | - try an exact match on the query term if possible for highlight. first one.
22 | - we could also add signal from the highlighting to just in time alter the order (e.g. 'hell wiki' search brings google search to top rank, but the Hell wikipedia page has more highlight visible)
23 | - Create instant search (or at least instant queries (so search over previous queries -- not results necessarily))
24 | - an error in Full text search can corrupt the index and make it unrecoverable...we need to guard against this
25 | - this is still happening. sometimes the index is not saved, even on a normal error free restart. unknown why.
26 |
--------------------------------------------------------------------------------
/eslint.config.js:
--------------------------------------------------------------------------------
1 | import globals from "globals";
2 | import pluginJs from "@eslint/js";
3 |
4 |
5 | export default [
6 | {languageOptions: { globals: globals.browser }},
7 | pluginJs.configs.recommended,
8 | ];
--------------------------------------------------------------------------------
/exec.js:
--------------------------------------------------------------------------------
1 | import path from 'path';
2 | import {execSync} from 'child_process';
3 |
4 | const runPath = path.resolve(process.argv[2]);
5 | execSync(`"${runPath}"`,{stdio:'inherit'});
6 |
--------------------------------------------------------------------------------
/global-run.cjs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | const os = require('os');
4 | const { spawn } = require('child_process');
5 | const fs = require('fs');
6 | const path = require('path');
7 |
8 | if (!fs.existsSync(path.join(process.cwd(), 'node_modules'))) {
9 | spawn('npm', ['i'], { stdio: 'inherit' });
10 | }
11 |
12 | // Getting the total system memory
13 | const totalMemory = os.totalmem();
14 |
15 | // Allocating 90% of the total memory
16 | const memoryAllocation = Math.floor((totalMemory / (1024 * 1024)) * 0.8); // Converted bytes to MB and took 90% of it
17 |
18 | console.log(`Index can use up to: ${memoryAllocation}MB RAM`);
19 |
20 | // Running the application
21 | spawn('node', [`--max-old-space-size=${memoryAllocation}`, path.resolve(__dirname, 'build', 'global', 'downloadnet.cjs')], { stdio: 'inherit' });
22 |
23 |
--------------------------------------------------------------------------------
/icons/cd.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DO-SAY-GO/dn/c757475ca9f14dd46549774a8a6a89656df362b2/icons/cd.ico
--------------------------------------------------------------------------------
/icons/dk.icns:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DO-SAY-GO/dn/c757475ca9f14dd46549774a8a6a89656df362b2/icons/dk.icns
--------------------------------------------------------------------------------
/icons/dk.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DO-SAY-GO/dn/c757475ca9f14dd46549774a8a6a89656df362b2/icons/dk.ico
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "downloadnet",
3 | "version": "4.5.2",
4 | "type": "module",
5 | "description": "Library server and an archivist browser controller.",
6 | "main": "global-run.cjs",
7 | "module": "build/esm/downloadnet.mjs",
8 | "bin": {
9 | "dn": "global-run.cjs"
10 | },
11 | "scripts": {
12 | "start": "node --max-old-space-size=4096 src/app.js",
13 | "build": "node exec.js \"./scripts/build_only.sh\"",
14 | "parcel": "node exec.js \"./scripts/parcel.sh\"",
15 | "clean": "node exec.js \"./scripts/clean.sh\"",
16 | "test": "node --watch src/app.js",
17 | "inspect": "node --inspect-brk=127.0.0.1:9999 src/app.js",
18 | "save": "node src/app.js DownloadNet save",
19 | "serve": "node src/app.js DownloadNet serve",
20 | "lint": "watch -n 5 npx eslint .",
21 | "test-hl": "node src/highlighter.js",
22 | "prepublishOnly": "npm run build"
23 | },
24 | "repository": {
25 | "type": "git",
26 | "url": "git+https://github.com/dosyago/DownloadNet.git"
27 | },
28 | "keywords": [
29 | "archivist",
30 | "library"
31 | ],
32 | "author": "@dosy",
33 | "bugs": {
34 | "url": "https://github.com/dosyago/DownloadNet/issues"
35 | },
36 | "homepage": "https://github.com/dosyago/DownloadNet#readme",
37 | "dependencies": {
38 | "@667/ps-list": "latest",
39 | "@dosyago/rainsum": "latest",
40 | "chalk": "latest",
41 | "chrome-launcher": "latest",
42 | "express": "latest",
43 | "flexsearch": "latest",
44 | "fz-search": "latest",
45 | "inquirer": "latest",
46 | "natural": "latest",
47 | "ndx": "^1.0.2",
48 | "ndx-query": "^1.0.1",
49 | "ndx-serializable": "^1.0.0",
50 | "ukkonen": "latest",
51 | "ws": "latest"
52 | },
53 | "devDependencies": {
54 | "@eslint/js": "latest",
55 | "esbuild": "latest",
56 | "eslint": "latest",
57 | "globals": "latest",
58 | "postject": "latest"
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DO-SAY-GO/dn/c757475ca9f14dd46549774a8a6a89656df362b2/public/favicon.ico
--------------------------------------------------------------------------------
/public/find_cleaned_duplicates.mjs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | import fs from 'node:fs';
4 | import path from 'node:path';
5 | import child_process from 'node:child_process';
6 |
7 | import {
8 | loadPref,
9 | cache_file,
10 | index_file,
11 | } from '../src/args.js';
12 |
13 | const CLEAN = true;
14 | const CONCURRENT = 7;
15 | const sleep = ms => new Promise(res => setTimeout(res, ms));
16 | const problems = new Map();
17 | let cleaning = false;
18 | let made = false;
19 |
20 | process.on('exit', cleanup);
21 | process.on('SIGINT', cleanup);
22 | process.on('SIGTERM', cleanup);
23 | process.on('SIGHUP', cleanup);
24 | process.on('SIGUSR2', cleanup);
25 | process.on('beforeExit', cleanup);
26 |
27 | console.log({Pref:loadPref(), cache_file: cache_file(), index_file: index_file()});
28 | make();
29 |
30 | async function make() {
31 | const indexFile = fs.readFileSync(index_file()).toString();
32 | JSON.parse(indexFile).map(([key, value]) => {
33 | if ( typeof key === "number" ) return;
34 | if ( key.startsWith('ndx') ) return;
35 | if ( value.title === undefined ) {
36 | console.log('no title property', {key, value});
37 | }
38 | const url = key;
39 | const title = value.title.toLocaleLowerCase();
40 | if ( title.length === 0 || title.includes('404') || title.includes('not found') ) {
41 | if ( problems.has(url) ) {
42 | console.log('Found duplicate', url, title, problems.get(url));
43 | }
44 | const prob = {title, dupes:[], dupe:false};
45 | problems.set(url, prob);
46 | const cleaned1 = clean(url);
47 | if ( problems.has(cleaned1) ) {
48 | console.log(`Found duplicate`, {url, title, cleaned1, dupeEntry:problems.get(cleaned1)});
49 | prob.dupe = true;
50 | prob.dupes.push(cleaned1);
51 | url !== cleaned1 && (problems.delete(cleaned1), prob.diff = true);
52 | }
53 | const cleaned2 = clean2(url);
54 | if ( problems.has(cleaned2) ) {
55 | console.log(`Found duplicate`, {url, title, cleaned2, dupeEntry: problems.get(cleaned2)});
56 | prob.dupe = true;
57 | prob.dupes.push(cleaned2);
58 | url !== cleaned2 && (problems.delete(cleaned2), prob.diff = true);
59 | }
60 | }
61 | });
62 |
63 | made = true;
64 |
65 | cleanup();
66 | }
67 |
68 | function cleanup() {
69 | if ( cleaning ) return;
70 | if ( ! made ) return;
71 | cleaning = true;
72 | console.log('cleanup running');
73 | const outData = [...problems.entries()].filter(([key, {dupe}]) => dupe);
74 | outData.sort(([a], [b]) => a.localeCompare(b));
75 | fs.writeFileSync(
76 | path.resolve('.', 'url-cleaned-dupes.json'),
77 | JSON.stringify(outData, null, 2)
78 | );
79 | const {size:bytesWritten} = fs.statSync(
80 | path.resolve('.', 'url-cleaned-dupes.json'),
81 | {bigint: true}
82 | );
83 | console.log(`Wrote ${outData.length} dupe urls in ${bytesWritten} bytes.`);
84 | process.exit(0);
85 | }
86 |
87 | function clean(urlString) {
88 | const url = new URL(urlString);
89 | if ( url.hash.startsWith('#!') || url.hostname.includes('google.com') || url.hostname.includes('80s.nyc') ) {
90 | } else {
91 | url.hash = '';
92 | }
93 | for ( const [key, value] of url.searchParams ) {
94 | if ( key.startsWith('utm_') ) {
95 | url.searchParams.delete(key);
96 | }
97 | }
98 | url.pathname = url.pathname.replace(/\/$/, '');
99 | url.protocol = 'https:';
100 | url.pathname = url.pathname.replace(/(\.htm.?|\.php|\.asp.?)$/, '');
101 | if ( url.hostname.startsWith('www.') ) {
102 | url.hostname = url.hostname.replace(/^www./, '');
103 | }
104 | const key = url.toString();
105 | return key;
106 | }
107 |
108 | function clean2(urlString) {
109 | const url = new URL(urlString);
110 | url.pathname = '';
111 | return url.toString();
112 | }
113 |
114 | function curlCommand(url) {
115 | return `curl -k -L -s -o /dev/null -w '%{url_effective}' ${JSON.stringify(url)} \
116 | -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
117 | -H 'Accept-Language: en,en-US;q=0.9,zh-TW;q=0.8,zh-CN;q=0.7,zh;q=0.6,ja;q=0.5' \
118 | -H 'Cache-Control: no-cache' \
119 | -H 'Connection: keep-alive' \
120 | -H 'DNT: 1' \
121 | -H 'Pragma: no-cache' \
122 | -H 'Sec-Fetch-Dest: document' \
123 | -H 'Sec-Fetch-Mode: navigate' \
124 | -H 'Sec-Fetch-Site: none' \
125 | -H 'Sec-Fetch-User: ?1' \
126 | -H 'Upgrade-Insecure-Requests: 1' \
127 | -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36' \
128 | -H 'sec-ch-ua: "Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"' \
129 | -H 'sec-ch-ua-mobile: ?0' \
130 | -H 'sec-ch-ua-platform: "macOS"' \
131 | --compressed ;
132 | `;
133 | }
134 |
--------------------------------------------------------------------------------
/public/find_crawlable.mjs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | import fs from 'node:fs';
4 | import path from 'node:path';
5 | import child_process from 'node:child_process';
6 |
7 | const CLEAN = false;
8 | const CONCURRENT = 7;
9 | const sleep = ms => new Promise(res => setTimeout(res, ms));
10 | const entries = [];
11 | let cleaning = false;
12 |
13 | process.on('exit', cleanup);
14 | process.on('SIGINT', cleanup);
15 | process.on('SIGTERM', cleanup);
16 | process.on('SIGHUP', cleanup);
17 | process.on('SIGUSR2', cleanup);
18 | process.on('beforeExit', cleanup);
19 |
20 | make();
21 |
22 | async function make() {
23 | const titlesFile = fs.readFileSync(path.resolve('.', 'topTitles.json')).toString();
24 | const titles = new Map(JSON.parse(titlesFile).map(([url, title]) => [url, {url,title}]));
25 | titles.forEach(({url,title}) => {
26 | if ( title.length === 0 && url.startsWith('https:') && !url.endsWith('.pdf') ) {
27 | entries.push(url);
28 | }
29 | });
30 |
31 | cleanup();
32 | }
33 |
34 | function cleanup() {
35 | if ( cleaning ) return;
36 | cleaning = true;
37 | console.log('cleanup running');
38 | fs.writeFileSync(
39 | path.resolve('.', 'recrawl-https-3.json'),
40 | JSON.stringify(entries, null, 2)
41 | );
42 | console.log(`Wrote recrawlable urls`);
43 | process.exit(0);
44 | }
45 |
46 | function clean(urlString) {
47 | const url = new URL(urlString);
48 | if ( url.hash.startsWith('#!') || url.hostname.includes('google.com') || url.hostname.includes('80s.nyc') ) {
49 | } else {
50 | url.hash = '';
51 | }
52 | for ( const [key, value] of url.searchParams ) {
53 | if ( key.startsWith('utm_') ) {
54 | url.searchParams.delete(key);
55 | }
56 | }
57 | url.pathname = url.pathname.replace(/\/$/, '');
58 | url.protocol = 'https:';
59 | url.pathname = url.pathname.replace(/(\.htm.?|\.php)$/, '');
60 | if ( url.hostname.startsWith('www.') ) {
61 | url.hostname = url.hostname.replace(/^www./, '');
62 | }
63 | const key = url.toString();
64 | return key;
65 | }
66 |
67 | function clean2(urlString) {
68 | const url = new URL(urlString);
69 | url.pathname = '';
70 | return url.toString();
71 | }
72 |
73 | function curlCommand(url) {
74 | return `curl -k -L -s -o /dev/null -w '%{url_effective}' ${JSON.stringify(url)} \
75 | -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
76 | -H 'Accept-Language: en,en-US;q=0.9,zh-TW;q=0.8,zh-CN;q=0.7,zh;q=0.6,ja;q=0.5' \
77 | -H 'Cache-Control: no-cache' \
78 | -H 'Connection: keep-alive' \
79 | -H 'DNT: 1' \
80 | -H 'Pragma: no-cache' \
81 | -H 'Sec-Fetch-Dest: document' \
82 | -H 'Sec-Fetch-Mode: navigate' \
83 | -H 'Sec-Fetch-Site: none' \
84 | -H 'Sec-Fetch-User: ?1' \
85 | -H 'Upgrade-Insecure-Requests: 1' \
86 | -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36' \
87 | -H 'sec-ch-ua: "Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"' \
88 | -H 'sec-ch-ua-mobile: ?0' \
89 | -H 'sec-ch-ua-platform: "macOS"' \
90 | --compressed ;
91 | `;
92 | }
93 |
--------------------------------------------------------------------------------
/public/injection.js:
--------------------------------------------------------------------------------
1 | import {DEBUG as debug} from '../src/common.js';
2 |
3 | const DEBUG = debug || false;
4 |
5 | export function getInjection({sessionId}) {
6 | // Notes:
7 | // say() function
8 | // why aliased? Resistant to page overwriting
9 | // just a precaution as we are already in an isolated world here, but this makes
10 | // this script more portable if it were introduced globally as well as robust
11 | // against API or behaviour changes of the browser or its remote debugging protocol
12 | // in future
13 | return `
14 | {
15 | const X = 1;
16 | const DEBUG = ${JSON.stringify(DEBUG, null, 2)};
17 | const MIN_CHECK_TEXT = 3000; // min time between checking documentElement.innerText
18 | const MIN_NOTIFY = 5000; // min time between telling controller text maybe changed
19 | const MAX_NOTIFICATIONS = 13; // max times we will tell controller text maybe changed
20 | const OBSERVER_OPTS = {
21 | subtree: true,
22 | childList: true,
23 | characterData: true
24 | };
25 | const Top = globalThis.top;
26 | let lastInnerText;
27 |
28 | if ( Top === globalThis ) {
29 | const ConsoleInfo = console.info.bind(console);
30 | const JSONStringify = JSON.stringify.bind(JSON);
31 | const TITLE_CHANGES = 10;
32 | const INITIAL_CHECK_TIME = 500;
33 | const TIME_MULTIPLIER = Math.E;
34 | const sessionId = "${sessionId}";
35 | const sleep = ms => new Promise(res => setTimeout(res, ms));
36 | const handler = throttle(handleFrameMessage, MIN_NOTIFY);
37 | let count = 0;
38 |
39 | installTop();
40 |
41 | async function installTop() {
42 | console.log("Installing in top frame...");
43 | self.startUrl = location.href;
44 | say({install: { sessionId, startUrl }});
45 | await sleep(1000);
46 | beginTitleChecks();
47 | // start monitoring text changes from 30 seconds after load
48 | setTimeout(() => beginTextNotifications(), 30000);
49 | console.log("Installed.");
50 | }
51 |
52 | function beginTitleChecks() {
53 | let lastTitle = null;
54 | let checker;
55 | let timeToNextCheck = INITIAL_CHECK_TIME;
56 | let changesLogged = 0;
57 |
58 | check();
59 | console.log('Begun logging title changes.');
60 |
61 | function check() {
62 | clearTimeout(checker);
63 | const currentTitle = document.title;
64 | if ( lastTitle !== currentTitle ) {
65 | say({titleChange: {lastTitle, currentTitle, url: location.href, sessionId}});
66 | lastTitle = currentTitle;
67 | changesLogged++;
68 | } else {
69 | // increase check time if there's no change
70 | timeToNextCheck *= TIME_MULTIPLIER;
71 | }
72 | if ( changesLogged < TITLE_CHANGES ) {
73 | checker = setTimeout(check, timeToNextCheck);
74 | } else {
75 | console.log('Finished logging title changes.');
76 | }
77 | }
78 | }
79 |
80 | function say(thing) {
81 | ConsoleInfo(JSONStringify(thing));
82 | }
83 |
84 | function beginTextNotifications() {
85 | // listen for {textChange:true} messages
86 | // throttle them
87 | // on leading throttle edge send message to controller with
88 | // console.info(JSON.stringify({textChange:...}));
89 | self.addEventListener('message', messageParser);
90 |
91 | console.log('Begun notifying of text changes.');
92 |
93 | function messageParser({data, origin}) {
94 | let source;
95 | try {
96 | ({source} = data.frameTextChangeNotification);
97 | if ( count > MAX_NOTIFICATIONS ) {
98 | self.removeEventListener('message', messageParser);
99 | return;
100 | }
101 | count++;
102 | handler({textChange:{source}});
103 | } catch(e) {
104 | DEBUG.verboseSlow && console.warn('could not parse message', data, e);
105 | }
106 | }
107 | }
108 |
109 | function handleFrameMessage({textChange}) {
110 | const {source} = textChange;
111 | console.log('Telling controller that text changed');
112 | say({textChange:{source, sessionId, count}});
113 | }
114 | }
115 |
116 | beginTextMutationChecks();
117 |
118 | function beginTextMutationChecks() {
119 | // create mutation observer for text
120 | // throttle output
121 |
122 | const observer = new MutationObserver(throttle(check, MIN_CHECK_TEXT));
123 | observer.observe(document.documentElement || document, OBSERVER_OPTS);
124 |
125 | console.log('Begun observing text changes.');
126 |
127 | function check() {
128 | console.log('check');
129 | const textMutated = document.documentElement.innerText !== lastInnerText;
130 | if ( textMutated ) {
131 | DEBUG.verboseSlow && console.log('Text changed');
132 | lastInnerText = document.documentElement.innerText;
133 | Top.postMessage({frameTextChangeNotification:{source:location.href}}, '*');
134 | }
135 | }
136 | }
137 |
138 | // javascript throttle function
139 | // source: https://stackoverflow.com/a/59378445
140 | /*
141 | function throttle(func, timeFrame) {
142 | var lastTime = 0;
143 | return function (...args) {
144 | var now = new Date();
145 | if (now - lastTime >= timeFrame) {
146 | func.apply(this, args);
147 | lastTime = now;
148 | }
149 | };
150 | }
151 | */
152 |
153 | // alternate throttle function with trailing edge call
154 | // source: https://stackoverflow.com/a/27078401
155 | ///*
156 | // Notes
157 | // Returns a function, that, when invoked, will only be triggered at most once
158 | // during a given window of time. Normally, the throttled function will run
159 | // as much as it can, without ever going more than once per \`wait\` duration;
160 | // but if you'd like to disable the execution on the leading edge, pass
161 | // \`{leading: false}\`. To disable execution on the trailing edge, ditto.
162 | function throttle(func, wait, options) {
163 | var context, args, result;
164 | var timeout = null;
165 | var previous = 0;
166 | if (!options) options = {};
167 | var later = function() {
168 | previous = options.leading === false ? 0 : Date.now();
169 | timeout = null;
170 | result = func.apply(context, args);
171 | if (!timeout) context = args = null;
172 | };
173 | return function() {
174 | var now = Date.now();
175 | if (!previous && options.leading === false) previous = now;
176 | var remaining = wait - (now - previous);
177 | context = this;
178 | args = arguments;
179 | if (remaining <= 0 || remaining > wait) {
180 | if (timeout) {
181 | clearTimeout(timeout);
182 | timeout = null;
183 | }
184 | previous = now;
185 | result = func.apply(context, args);
186 | if (!timeout) context = args = null;
187 | } else if (!timeout && options.trailing !== false) {
188 | timeout = setTimeout(later, remaining);
189 | }
190 | return result;
191 | };
192 | }
193 | //*/
194 | }
195 | `;
196 | }
197 |
--------------------------------------------------------------------------------
/public/library/README.md:
--------------------------------------------------------------------------------
1 | # ALT Default storage directory for library
2 |
3 | Remove `public/library/http*` and `public/library/cache.json` from `.gitignore` if you forked this repo and want to commit your library using git.
4 |
5 | ## Clearing your cache
6 |
7 | To clear everything, delete all directories that start with `http` or `https` and delete cache.json
8 |
9 | To clear only stuff from domains you don't want, delete all directories you don't want that start with `http` or `https` and DON'T delete cache.json
10 |
11 |
--------------------------------------------------------------------------------
/public/make_top.mjs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | import fs from 'node:fs';
4 | import path from 'node:path';
5 | import child_process from 'node:child_process';
6 |
7 | const CLEAN = false;
8 | const CONCURRENT = 7;
9 | const sleep = ms => new Promise(res => setTimeout(res, ms));
10 | const entries = [];
11 | const counted = new Set();
12 | const errors = new Map();
13 | let counts;
14 | let cleaning = false;
15 |
16 | process.on('exit', cleanup);
17 | process.on('SIGINT', cleanup);
18 | process.on('SIGTERM', cleanup);
19 | process.on('SIGHUP', cleanup);
20 | process.on('SIGUSR2', cleanup);
21 | process.on('beforeExit', cleanup);
22 |
23 | make();
24 |
25 | async function make() {
26 | const titlesFile = fs.readFileSync(path.resolve('.', 'topTitles.json')).toString();
27 | const titles = new Map(JSON.parse(titlesFile).map(([url, title]) => [url, {url,title}]));
28 | if ( CLEAN ) {
29 | for ( const [url, obj] of titles ) {
30 | const k1 = clean(url);
31 | const k2 = clean2(url);
32 | if ( !titles.has(k1) ) {
33 | titles.set(k1, obj);
34 | }
35 | if ( !titles.has(k2) ) {
36 | titles.set(k2, obj);
37 | }
38 | }
39 | }
40 | const remainingFile = fs.readFileSync(path.resolve('.', 'remainingFile.json')).toString();
41 | const remainingSet = new Set(JSON.parse(remainingFile));
42 | const countsFile = fs.readFileSync(path.resolve('.', 'ran-counts.json')).toString();
43 | counts = new Map(JSON.parse(countsFile).filter(([url, count]) => remainingSet.has(url)));
44 | let current = 0;
45 | for ( const [url, count] of counts ) {
46 | let title;
47 | let realUrl;
48 | if ( titles.has(url) ) {
49 | ({title} = titles.get(url));
50 | entries.push({
51 | url,
52 | title,
53 | count,
54 | });
55 | counted.add(url);
56 | } else {
57 | console.log(`Curl call for ${url} in progress...`);
58 | let notifyCurlComplete;
59 | const curlCall = new Promise(res => notifyCurlComplete = res);
60 | do {
61 | await sleep(1000);
62 | } while ( current >= CONCURRENT );
63 | child_process.exec(curlCommand(url), (err, stdout, stderr) => {
64 | if ( ! err && (!stderr || stderr.length == 0)) {
65 | realUrl = stdout;
66 | if ( titles.has(realUrl) ) {
67 | ({title} = titles.get(realUrl));
68 | entries.push({
69 | url,
70 | realUrl,
71 | title,
72 | count,
73 | });
74 | counted.add(url);
75 | }
76 | } else {
77 | console.log(`Error on curl for ${url}`, {err, stderr});
78 | errors.set(url, {err, stderr});
79 | }
80 | console.log(`Curl call for ${url} complete!`);
81 | notifyCurlComplete();
82 | });
83 | current += 1;
84 | curlCall.then(() => current -= 1);
85 | }
86 | }
87 | cleanup();
88 | }
89 |
90 | async function make_v2() {
91 | const titlesFile = fs.readFileSync(path.resolve('.', 'topTitles.json')).toString();
92 | const titles = new Map(JSON.parse(titlesFile).map(([url, title]) => [url, {url,title}]));
93 | if ( CLEAN ) {
94 | for ( const [url, obj] of titles ) {
95 | const k1 = clean(url);
96 | const k2 = clean2(url);
97 | if ( !titles.has(k1) ) {
98 | titles.set(k1, obj);
99 | }
100 | if ( !titles.has(k2) ) {
101 | titles.set(k2, obj);
102 | }
103 | }
104 | }
105 | const countsFile = fs.readFileSync(path.resolve('.', 'ran-counts.json')).toString();
106 | counts = new Map(JSON.parse(countsFile));
107 | let current = 0;
108 | for ( const [url, count] of counts ) {
109 | let title;
110 | let realUrl;
111 | if ( titles.has(url) ) {
112 | ({title} = titles.get(url));
113 | entries.push({
114 | url,
115 | title,
116 | count,
117 | });
118 | counted.add(url);
119 | } else {
120 | console.log(`Curl call for ${url} in progress...`);
121 | let notifyCurlComplete;
122 | const curlCall = new Promise(res => notifyCurlComplete = res);
123 | do {
124 | await sleep(250);
125 | } while ( current >= CONCURRENT );
126 | child_process.exec(curlCommand(url), (err, stdout, stderr) => {
127 | if ( ! err && (!stderr || stderr.length == 0)) {
128 | realUrl = stdout;
129 | if ( titles.has(realUrl) ) {
130 | ({title} = titles.get(realUrl));
131 | entries.push({
132 | url,
133 | realUrl,
134 | title,
135 | count,
136 | });
137 | counted.add(url);
138 | }
139 | } else {
140 | console.log(`Error on curl for ${url}`, {err, stderr});
141 | errors.set(url, {err, stderr});
142 | }
143 | console.log(`Curl call for ${url} complete!`);
144 | notifyCurlComplete();
145 | });
146 | current += 1;
147 | curlCall.then(() => current -= 1);
148 | }
149 | }
150 | cleanup();
151 | }
152 |
153 | function cleanup() {
154 | if ( cleaning ) return;
155 | cleaning = true;
156 | console.log('cleanup running');
157 | if ( errors.size ) {
158 | fs.writeFileSync(
159 | path.resolve('.', 'errorLinks4.json'),
160 | JSON.stringify([...errors.keys()], null, 2)
161 | );
162 | console.log(`Wrote errors`);
163 | }
164 | if ( counted.size !== counts.size ) {
165 | counted.forEach(url => counts.delete(url));
166 | fs.writeFileSync(
167 | path.resolve('.', 'noTitleFound4.json'),
168 | JSON.stringify([...counts.keys()], null, 2)
169 | )
170 | console.log(`Wrote noTitleFound`);
171 | }
172 | fs.writeFileSync(
173 | path.resolve('.', 'topFrontPageLinksWithCounts4.json'),
174 | JSON.stringify(entries, null, 2)
175 | );
176 | console.log(`Wrote top links with counts`);
177 | process.exit(0);
178 | }
179 |
180 | async function make_v1() {
181 | const titlesFile = fs.readFileSync(path.resolve('.', 'topTitles.json')).toString();
182 | const titles = new Map(JSON.parse(titlesFile).map(([url, title]) => [clean(url), {url,title}]));
183 | const countsFile = fs.readFileSync(path.resolve('.', 'counts.json')).toString();
184 | const counts = new Map(JSON.parse(countsFile).map(([url, count]) => [clean(url), count]));
185 | for ( const [key, count] of counts ) {
186 | counts.set(clean2(key), count);
187 | }
188 | const entries = [];
189 | for ( const [key, {url,title}] of titles ) {
190 | entries.push({
191 | url, title,
192 | count: counts.get(key) ||
193 | counts.get(url) ||
194 | counts.get(clean2(key)) ||
195 | console.log(`No count found for`, {key, url, title, c2key: clean2(key)})
196 | });
197 | }
198 | fs.writeFileSync(
199 | path.resolve('.', 'topFrontPageLinks.json'),
200 | JSON.stringify(entries, null, 2)
201 | );
202 | }
203 |
204 | function clean(urlString) {
205 | const url = new URL(urlString);
206 | if ( url.hash.startsWith('#!') || url.hostname.includes('google.com') || url.hostname.includes('80s.nyc') ) {
207 | } else {
208 | url.hash = '';
209 | }
210 | for ( const [key, value] of url.searchParams ) {
211 | if ( key.startsWith('utm_') ) {
212 | url.searchParams.delete(key);
213 | }
214 | }
215 | url.pathname = url.pathname.replace(/\/$/, '');
216 | url.protocol = 'https:';
217 | url.pathname = url.pathname.replace(/(\.htm.?|\.php)$/, '');
218 | if ( url.hostname.startsWith('www.') ) {
219 | url.hostname = url.hostname.replace(/^www./, '');
220 | }
221 | const key = url.toString();
222 | return key;
223 | }
224 |
225 | function clean2(urlString) {
226 | const url = new URL(urlString);
227 | url.pathname = '';
228 | return url.toString();
229 | }
230 |
231 | function curlCommand(url) {
232 | return `curl -k -L -s -o /dev/null -w '%{url_effective}' ${JSON.stringify(url)} \
233 | -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
234 | -H 'Accept-Language: en,en-US;q=0.9,zh-TW;q=0.8,zh-CN;q=0.7,zh;q=0.6,ja;q=0.5' \
235 | -H 'Cache-Control: no-cache' \
236 | -H 'Connection: keep-alive' \
237 | -H 'DNT: 1' \
238 | -H 'Pragma: no-cache' \
239 | -H 'Sec-Fetch-Dest: document' \
240 | -H 'Sec-Fetch-Mode: navigate' \
241 | -H 'Sec-Fetch-Site: none' \
242 | -H 'Sec-Fetch-User: ?1' \
243 | -H 'Upgrade-Insecure-Requests: 1' \
244 | -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36' \
245 | -H 'sec-ch-ua: "Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"' \
246 | -H 'sec-ch-ua-mobile: ?0' \
247 | -H 'sec-ch-ua-platform: "macOS"' \
248 | --compressed ;
249 | `;
250 | }
251 |
--------------------------------------------------------------------------------
/public/old-index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Your Personal Search Engine and Archive
4 |
5 |
8 |
9 | View your index
10 |
11 |
147 |
154 |
198 |
228 |
239 |
240 | Notice a bug? Open an issue!
241 |
242 |
247 |
--------------------------------------------------------------------------------
/public/problem_find.mjs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | import fs from 'node:fs';
4 | import path from 'node:path';
5 | import child_process from 'node:child_process';
6 |
7 | import {
8 | loadPref,
9 | cache_file,
10 | index_file,
11 | } from '../src/args.js';
12 |
13 | const CLEAN = false;
14 | const CONCURRENT = 7;
15 | const sleep = ms => new Promise(res => setTimeout(res, ms));
16 | const problems = new Map();
17 | let cleaning = false;
18 | let made = false;
19 |
20 | process.on('exit', cleanup);
21 | process.on('SIGINT', cleanup);
22 | process.on('SIGTERM', cleanup);
23 | process.on('SIGHUP', cleanup);
24 | process.on('SIGUSR2', cleanup);
25 | process.on('beforeExit', cleanup);
26 |
27 | console.log({Pref:loadPref(), cache_file: cache_file(), index_file: index_file()});
28 | make();
29 |
30 | async function make() {
31 | const indexFile = fs.readFileSync(index_file()).toString();
32 | JSON.parse(indexFile).map(([key, value]) => {
33 | if ( typeof key === "number" ) return;
34 | if ( key.startsWith('ndx') ) return;
35 | if ( value.title === undefined ) {
36 | console.log('no title property', {key, value});
37 | }
38 | const url = key;
39 | const title = value.title.toLocaleLowerCase();
40 | if ( title.length === 0 || title.includes('404') || title.includes('not found') ) {
41 | if ( problems.has(url) ) {
42 | console.log('Found duplicate', url, title, problems.get(url));
43 | }
44 | problems.set(url, title);
45 | }
46 | });
47 |
48 | made = true;
49 |
50 | cleanup();
51 | }
52 |
53 | function cleanup() {
54 | if ( cleaning ) return;
55 | if ( ! made ) return;
56 | cleaning = true;
57 | console.log('cleanup running');
58 | const outData = [...problems.entries()];
59 | fs.writeFileSync(
60 | path.resolve('.', 'url-problems.json'),
61 | JSON.stringify(outData, null, 2)
62 | );
63 | const {size:bytesWritten} = fs.statSync(
64 | path.resolve('.', 'url-problems.json'),
65 | {bigint: true}
66 | );
67 | console.log(`Wrote ${outData.length} problem urls in ${bytesWritten} bytes.`);
68 | process.exit(0);
69 | }
70 |
71 | function clean(urlString) {
72 | const url = new URL(urlString);
73 | if ( url.hash.startsWith('#!') || url.hostname.includes('google.com') || url.hostname.includes('80s.nyc') ) {
74 | } else {
75 | url.hash = '';
76 | }
77 | for ( const [key, value] of url.searchParams ) {
78 | if ( key.startsWith('utm_') ) {
79 | url.searchParams.delete(key);
80 | }
81 | }
82 | url.pathname = url.pathname.replace(/\/$/, '');
83 | url.protocol = 'https:';
84 | url.pathname = url.pathname.replace(/(\.htm.?|\.php)$/, '');
85 | if ( url.hostname.startsWith('www.') ) {
86 | url.hostname = url.hostname.replace(/^www./, '');
87 | }
88 | const key = url.toString();
89 | return key;
90 | }
91 |
92 | function clean2(urlString) {
93 | const url = new URL(urlString);
94 | url.pathname = '';
95 | return url.toString();
96 | }
97 |
98 | function curlCommand(url) {
99 | return `curl -k -L -s -o /dev/null -w '%{url_effective}' ${JSON.stringify(url)} \
100 | -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
101 | -H 'Accept-Language: en,en-US;q=0.9,zh-TW;q=0.8,zh-CN;q=0.7,zh;q=0.6,ja;q=0.5' \
102 | -H 'Cache-Control: no-cache' \
103 | -H 'Connection: keep-alive' \
104 | -H 'DNT: 1' \
105 | -H 'Pragma: no-cache' \
106 | -H 'Sec-Fetch-Dest: document' \
107 | -H 'Sec-Fetch-Mode: navigate' \
108 | -H 'Sec-Fetch-Site: none' \
109 | -H 'Sec-Fetch-User: ?1' \
110 | -H 'Upgrade-Insecure-Requests: 1' \
111 | -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36' \
112 | -H 'sec-ch-ua: "Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"' \
113 | -H 'sec-ch-ua-mobile: ?0' \
114 | -H 'sec-ch-ua-platform: "macOS"' \
115 | --compressed ;
116 | `;
117 | }
118 |
--------------------------------------------------------------------------------
/public/redirector.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | About to index archive and index
4 |
22 |
--------------------------------------------------------------------------------
/public/style.css:
--------------------------------------------------------------------------------
1 | /* public/style.css */
2 |
3 | /* 1. Modern CSS Reset (Simplified) */
4 | *, *::before, *::after {
5 | box-sizing: border-box;
6 | margin: 0;
7 | padding: 0;
8 | }
9 |
10 | html {
11 | -webkit-text-size-adjust: 100%;
12 | tab-size: 4;
13 | font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
14 | line-height: 1.5;
15 | }
16 |
17 | body {
18 | min-height: 100vh;
19 | display: flex;
20 | flex-direction: column;
21 | }
22 |
23 | img, picture, video, canvas, svg {
24 | display: block;
25 | max-width: 100%;
26 | }
27 |
28 | input, button, textarea, select {
29 | font: inherit;
30 | }
31 |
32 | button {
33 | cursor: pointer;
34 | }
35 |
36 | a {
37 | text-decoration: none;
38 | color: inherit;
39 | }
40 |
41 | ul, ol {
42 | list-style: none;
43 | }
44 |
45 | /* 2. CSS Custom Properties (Variables) & Theming */
46 | :root {
47 | /* Light Mode (Default) */
48 | --font-primary: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
49 | --font-monospace: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, Courier, monospace;
50 |
51 | --color-text: #222;
52 | --color-text-muted: #555;
53 | --color-background: #f8f9fa;
54 | --color-surface: #ffffff;
55 | --color-primary: #007bff;
56 | --color-primary-hover: #0056b3;
57 | --color-secondary: #6c757d;
58 | --color-border: #dee2e6;
59 | --color-accent: #17a2b8;
60 | --color-success: #28a745;
61 | --color-danger: #dc3545;
62 | --color-warning: #ffc107;
63 | --color-highlight-bg: #ffe082; /* For search term highlighting */
64 |
65 | --spacing-xs: 0.25rem;
66 | --spacing-sm: 0.5rem;
67 | --spacing-md: 1rem;
68 | --spacing-lg: 1.5rem;
69 | --spacing-xl: 2rem;
70 |
71 | --border-radius: 0.375rem;
72 | --shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
73 | --shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
74 | }
75 |
76 | @media (prefers-color-scheme: dark) {
77 | :root {
78 | --color-text: #e9ecef;
79 | --color-text-muted: #adb5bd;
80 | --color-background: #121212; /* Slightly off-black for depth */
81 | --color-surface: #1e1e1e; /* For cards, modals, etc. */
82 | --color-primary: #0d6efd;
83 | --color-primary-hover: #0b5ed7;
84 | --color-secondary: #495057;
85 | --color-border: #343a40;
86 | --color-accent: #20c997;
87 | --color-success: #198754;
88 | --color-danger: #dc3545;
89 | --color-warning: #ffca2c;
90 | --color-highlight-bg: #4a3c00; /* Darker highlight for dark mode */
91 | }
92 | }
93 |
94 | /* 3. Base & Layout Styles */
95 | body {
96 | font-family: var(--font-primary);
97 | background-color: var(--color-background);
98 | color: var(--color-text);
99 | display: flex;
100 | flex-direction: column;
101 | min-height: 100vh;
102 | }
103 |
104 | .container {
105 | width: 90%;
106 | max-width: 1000px;
107 | margin: 0 auto;
108 | padding: var(--spacing-lg) var(--spacing-md);
109 | flex-grow: 1;
110 | display: flex;
111 | flex-direction: column;
112 | }
113 |
114 | .site-header {
115 | padding-bottom: var(--spacing-md);
116 | margin-bottom: var(--spacing-lg);
117 | border-bottom: 1px solid var(--color-border);
118 | display: flex;
119 | justify-content: space-between;
120 | align-items: center;
121 | flex-wrap: wrap; /* Allow wrapping on small screens */
122 | }
123 |
124 | .site-header h1 {
125 | font-size: 1.75rem;
126 | font-weight: 600;
127 | margin: 0;
128 | }
129 | .site-header h1 a {
130 | color: var(--color-primary);
131 | transition: color 0.2s ease-in-out;
132 | }
133 | .site-header h1 a:hover {
134 | color: var(--color-primary-hover);
135 | }
136 |
137 | .main-nav ul {
138 | display: flex;
139 | gap: var(--spacing-md);
140 | }
141 | .main-nav a {
142 | color: var(--color-text-muted);
143 | font-weight: 500;
144 | transition: color 0.2s ease-in-out;
145 | }
146 | .main-nav a:hover, .main-nav a.active {
147 | color: var(--color-primary);
148 | }
149 |
150 | main {
151 | flex-grow: 1;
152 | }
153 |
154 | .page-title {
155 | font-size: 1.5rem;
156 | margin-bottom: var(--spacing-lg);
157 | color: var(--color-text);
158 | }
159 |
160 | .site-footer {
161 | text-align: center;
162 | padding: var(--spacing-md);
163 | margin-top: var(--spacing-xl);
164 | border-top: 1px solid var(--color-border);
165 | font-size: 0.9rem;
166 | color: var(--color-text-muted);
167 | }
168 |
169 | /* 4. Form Elements */
170 | form {
171 | background-color: var(--color-surface);
172 | padding: var(--spacing-lg);
173 | border-radius: var(--border-radius);
174 | box-shadow: var(--shadow-sm);
175 | margin-bottom: var(--spacing-lg);
176 | }
177 |
178 | fieldset {
179 | border: none;
180 | padding: 0;
181 | margin: 0;
182 | }
183 |
184 | legend {
185 | font-size: 1.2rem;
186 | font-weight: 600;
187 | margin-bottom: var(--spacing-md);
188 | color: var(--color-text);
189 | padding: 0; /* Resetting some browser defaults */
190 | display: block; /* Ensure it takes full width if needed */
191 | width: 100%;
192 | }
193 |
194 | .form-group {
195 | margin-bottom: var(--spacing-md);
196 | }
197 |
198 | .form-group label {
199 | display: block;
200 | margin-bottom: var(--spacing-sm);
201 | font-weight: 500;
202 | color: var(--color-text-muted);
203 | }
204 |
205 | .form-group label small {
206 | font-weight: normal;
207 | font-size: 0.85em;
208 | display: block;
209 | }
210 |
211 | input[type="text"],
212 | input[type="search"],
213 | input[type="url"],
214 | input[type="number"],
215 | input[type="email"],
216 | textarea,
217 | select {
218 | width: 100%;
219 | padding: var(--spacing-sm) var(--spacing-md);
220 | border: 1px solid var(--color-border);
221 | border-radius: var(--border-radius);
222 | background-color: var(--color-background); /* Slightly different from surface for depth */
223 | color: var(--color-text);
224 | transition: border-color 0.2s ease-in-out, box-shadow 0.2s ease-in-out;
225 | }
226 |
227 | input[type="text"]:focus,
228 | input[type="search"]:focus,
229 | input[type="url"]:focus,
230 | input[type="number"]:focus,
231 | input[type="email"]:focus,
232 | textarea:focus,
233 | select:focus {
234 | outline: none;
235 | border-color: var(--color-primary);
236 | box-shadow: 0 0 0 0.2rem rgba(var(--color-primary), 0.25);
237 | }
238 |
239 | textarea {
240 | min-height: 100px;
241 | resize: vertical;
242 | }
243 |
244 | .input-group {
245 | display: flex;
246 | }
247 | .input-group input[type="search"] {
248 | border-top-right-radius: 0;
249 | border-bottom-right-radius: 0;
250 | flex-grow: 1;
251 | }
252 | .input-group button {
253 | border-top-left-radius: 0;
254 | border-bottom-left-radius: 0;
255 | }
256 |
257 |
258 | button, .button {
259 | display: inline-block;
260 | padding: var(--spacing-sm) var(--spacing-lg);
261 | font-weight: 500;
262 | text-align: center;
263 | vertical-align: middle;
264 | border: 1px solid transparent;
265 | border-radius: var(--border-radius);
266 | background-color: var(--color-primary);
267 | color: #fff;
268 | transition: background-color 0.2s ease-in-out, border-color 0.2s ease-in-out;
269 | line-height: 1.5; /* Ensure consistent height with inputs */
270 | }
271 |
272 | button:hover, .button:hover {
273 | background-color: var(--color-primary-hover);
274 | }
275 |
276 | button.secondary, .button.secondary {
277 | background-color: var(--color-secondary);
278 | color: #fff;
279 | }
280 | button.secondary:hover, .button.secondary:hover {
281 | background-color: darken(var(--color-secondary), 10%);
282 | }
283 |
284 | button.danger, .button.danger {
285 | background-color: var(--color-danger);
286 | color: #fff;
287 | }
288 | button.danger:hover, .button.danger:hover {
289 | background-color: darken(var(--color-danger), 10%);
290 | }
291 |
292 | button.icon-button {
293 | background: none;
294 | border: none;
295 | color: var(--color-text-muted);
296 | padding: var(--spacing-xs);
297 | font-size: 1.2em; /* Adjust as needed */
298 | line-height: 1;
299 | }
300 | button.icon-button:hover {
301 | color: var(--color-primary);
302 | }
303 |
304 |
305 | /* 5. List & Item Styles (for search results, index) */
306 | .item-list {
307 | margin-top: var(--spacing-lg);
308 | }
309 |
310 | .item-list li {
311 | background-color: var(--color-surface);
312 | padding: var(--spacing-md);
313 | margin-bottom: var(--spacing-md);
314 | border-radius: var(--border-radius);
315 | box-shadow: var(--shadow-sm);
316 | border: 1px solid var(--color-border);
317 | }
318 |
319 | .item-list li .item-title {
320 | font-size: 1.15rem;
321 | font-weight: 600;
322 | margin-bottom: var(--spacing-xs);
323 | }
324 | .item-list li .item-title a {
325 | color: var(--color-primary);
326 | }
327 | .item-list li .item-title a:hover {
328 | text-decoration: underline;
329 | }
330 |
331 | .item-list li .item-url {
332 | font-size: 0.9rem;
333 | color: var(--color-text-muted);
334 | word-break: break-all;
335 | margin-bottom: var(--spacing-sm);
336 | display: block; /* Ensure it's on its own line if needed */
337 | }
338 | .item-list li .item-url a {
339 | color: var(--color-secondary);
340 | }
341 | .item-list li .item-url a:hover {
342 | text-decoration: underline;
343 | }
344 |
345 |
346 | .item-list li .item-snippet {
347 | font-size: 0.95rem;
348 | line-height: 1.6;
349 | color: var(--color-text);
350 | }
351 | .item-list li .item-snippet mark { /* For highlighted search terms */
352 | background-color: var(--color-highlight-bg);
353 | color: var(--color-text); /* Ensure text is readable on highlight */
354 | padding: 0.1em 0.2em;
355 | border-radius: 0.2em;
356 | }
357 |
358 | .item-actions {
359 | margin-top: var(--spacing-sm);
360 | display: flex;
361 | gap: var(--spacing-sm);
362 | }
363 |
364 |
365 | /* Pagination */
366 | .pagination {
367 | display: flex;
368 | justify-content: center;
369 | align-items: center;
370 | gap: var(--spacing-sm);
371 | margin-top: var(--spacing-lg);
372 | padding: var(--spacing-md);
373 | }
374 | .pagination a, .pagination span {
375 | padding: var(--spacing-sm) var(--spacing-md);
376 | border-radius: var(--border-radius);
377 | color: var(--color-primary);
378 | }
379 | .pagination a {
380 | border: 1px solid var(--color-primary);
381 | }
382 | .pagination a:hover {
383 | background-color: var(--color-primary);
384 | color: #fff;
385 | }
386 | .pagination span { /* Current page */
387 | background-color: var(--color-primary);
388 | color: #fff;
389 | font-weight: 600;
390 | }
391 | .pagination .disabled {
392 | color: var(--color-text-muted);
393 | pointer-events: none;
394 | border-color: var(--color-border);
395 | }
396 |
397 |
398 | /* Utilities */
399 | .text-center {
400 | text-align: center;
401 | }
402 | .text-muted {
403 | color: var(--color-text-muted) !important;
404 | }
405 | .mb-0 { margin-bottom: 0 !important; }
406 | .mt-0 { margin-top: 0 !important; }
407 | .debug-info {
408 | font-size: 0.8rem;
409 | color: var(--color-accent);
410 | font-family: var(--font-monospace);
411 | }
412 |
413 | /* Specific for edit index delete button */
414 | .delete-form {
415 | display: inline; /* Keep it on the same line */
416 | }
417 | .delete-button {
418 | background: none;
419 | border: none;
420 | color: var(--color-danger);
421 | padding: 0 var(--spacing-xs);
422 | font-size: 1em;
423 | cursor: pointer;
424 | margin-left: var(--spacing-sm);
425 | }
426 | .delete-button:hover {
427 | color: darken(var(--color-danger), 15%);
428 | }
429 | .strikethrough {
430 | text-decoration: line-through;
431 | opacity: 0.7;
432 | }
433 |
434 | /* Edit toggle */
435 | .edit-toggle-section {
436 | display: flex;
437 | justify-content: flex-end;
438 | margin-bottom: var(--spacing-md);
439 | }
440 | .edit-toggle-section details {
441 | position: relative; /* For absolute positioning of the button */
442 | }
443 | .edit-toggle-section summary {
444 | display: inline-block;
445 | cursor: pointer;
446 | padding: var(--spacing-xs) var(--spacing-sm);
447 | border-radius: var(--border-radius);
448 | background-color: var(--color-surface);
449 | border: 1px solid var(--color-border);
450 | color: var(--color-text-muted);
451 | }
452 | .edit-toggle-section summary:hover {
453 | border-color: var(--color-primary);
454 | color: var(--color-primary);
455 | }
456 | .edit-toggle-section summary::-webkit-details-marker { /* Hide default arrow */
457 | display: none;
458 | }
459 | .edit-toggle-section summary::marker { /* Hide default arrow FF */
460 | display: none;
461 | }
462 | .edit-toggle-section .details-content {
463 | position: absolute;
464 | right: 0;
465 | top: calc(100% + var(--spacing-xs)); /* Position below the summary */
466 | background-color: var(--color-surface);
467 | border: 1px solid var(--color-border);
468 | border-radius: var(--border-radius);
469 | padding: var(--spacing-sm);
470 | box-shadow: var(--shadow-md);
471 | z-index: 10;
472 | white-space: nowrap; /* Prevent button text from wrapping */
473 | }
474 |
475 |
476 | /* Responsive adjustments */
477 | @media (max-width: 768px) {
478 | .site-header {
479 | flex-direction: column;
480 | align-items: flex-start;
481 | gap: var(--spacing-sm);
482 | }
483 | .main-nav ul {
484 | flex-direction: column;
485 | gap: var(--spacing-xs);
486 | }
487 | .input-group {
488 | flex-direction: column;
489 | }
490 | .input-group input[type="search"], .input-group button {
491 | border-radius: var(--border-radius); /* Reset individual border radius */
492 | }
493 | .input-group input[type="search"] {
494 | margin-bottom: var(--spacing-sm);
495 | }
496 | }
497 |
498 | @media (max-width: 480px) {
499 | .container {
500 | width: 95%;
501 | padding-left: var(--spacing-sm);
502 | padding-right: var(--spacing-sm);
503 | }
504 | .site-header h1 {
505 | font-size: 1.5rem;
506 | }
507 | .page-title {
508 | font-size: 1.3rem;
509 | }
510 | button, .button {
511 | padding: var(--spacing-sm) var(--spacing-md); /* Slightly smaller padding */
512 | }
513 | }
514 |
515 | /* public/style.css */
516 | /* ... (keep all existing CSS from the previous version) ... */
517 |
518 | /* ADD THE FOLLOWING AT THE END OF THE FILE, OR INTEGRATE INTO RELEVANT SECTIONS */
519 |
520 | /* Layout for pages with a sidebar */
521 | .page-with-sidebar {
522 | display: grid;
523 | grid-template-columns: 220px 1fr; /* Sidebar width and main content */
524 | gap: var(--spacing-lg);
525 | flex-grow: 1; /* Ensure it takes available space in the container */
526 | }
527 |
528 | .page-sidebar {
529 | background-color: var(--color-surface);
530 | padding: var(--spacing-md);
531 | border-radius: var(--border-radius);
532 | box-shadow: var(--shadow-sm);
533 | border-right: 1px solid var(--color-border);
534 | height: fit-content; /* So it doesn't stretch unnecessarily if content is short */
535 | position: sticky; /* Make sidebar sticky */
536 | top: var(--spacing-lg); /* Adjust based on your header or desired spacing */
537 | }
538 |
539 | .page-sidebar h3 {
540 | font-size: 1.1rem;
541 | font-weight: 600;
542 | margin-bottom: var(--spacing-md);
543 | padding-bottom: var(--spacing-sm);
544 | border-bottom: 1px solid var(--color-border);
545 | color: var(--color-text);
546 | }
547 |
548 | .sidebar-nav ul {
549 | list-style: none;
550 | padding: 0;
551 | margin: 0;
552 | }
553 |
554 | .sidebar-nav li a {
555 | display: block;
556 | padding: var(--spacing-sm) var(--spacing-md);
557 | color: var(--color-text-muted);
558 | text-decoration: none;
559 | border-radius: calc(var(--border-radius) / 2);
560 | transition: background-color 0.2s ease-in-out, color 0.2s ease-in-out;
561 | margin-bottom: var(--spacing-xs);
562 | }
563 |
564 | .sidebar-nav li a:hover {
565 | background-color: var(--color-background); /* Subtle hover */
566 | color: var(--color-primary);
567 | }
568 |
569 | .sidebar-nav li a.active {
570 | background-color: var(--color-primary);
571 | color: #fff;
572 | font-weight: 500;
573 | }
574 |
575 | .main-content-area {
576 | /* This will hold the sections that are shown/hidden */
577 | }
578 |
579 | .main-content-area > section {
580 | display: none; /* Hide all sections by default */
581 | animation: fadeIn 0.3s ease-in-out;
582 | }
583 |
584 | .main-content-area > section.active-section {
585 | display: block; /* Show only the active section */
586 | }
587 |
588 | @keyframes fadeIn {
589 | from { opacity: 0; transform: translateY(10px); }
590 | to { opacity: 1; transform: translateY(0); }
591 | }
592 |
593 |
594 | /* Responsive adjustments for sidebar layout */
595 | @media (max-width: 992px) { /* Adjust breakpoint as needed */
596 | .page-with-sidebar {
597 | grid-template-columns: 1fr; /* Stack sidebar and content */
598 | }
599 | .page-sidebar {
600 | position: static; /* Remove stickiness on smaller screens */
601 | margin-bottom: var(--spacing-lg);
602 | border-right: none;
603 | border-bottom: 1px solid var(--color-border);
604 | }
605 | }
606 |
607 | /* Styling for form error messages (if not already present or to refine) */
608 | .form-error-message {
609 | color: var(--color-danger);
610 | background-color: var(--color-surface); /* Or a light red like #f8d7da */
611 | border: 1px solid var(--color-danger);
612 | padding: var(--spacing-md);
613 | margin-bottom: var(--spacing-md);
614 | border-radius: var(--border-radius);
615 | }
616 |
--------------------------------------------------------------------------------
/public/test-injection.html:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/public/top.html:
--------------------------------------------------------------------------------
1 |
4 |
--------------------------------------------------------------------------------
/scripts/build_only.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | #set -x
4 | source $HOME/.nvm/nvm.sh
5 |
6 | rm -rf build
7 | mkdir -p build/esm/
8 | mkdir -p build/cjs/
9 | mkdir -p build/global/
10 | mkdir -p build/bin/
11 | nvm use v22
12 | if [[ ! -d "node_modules" ]]; then
13 | npm i
14 | fi
15 | if [[ -n "$NO_MINIFY" ]]; then
16 | ./node_modules/.bin/esbuild src/app.js --bundle --outfile=build/esm/downloadnet.mjs --format=esm --platform=node --analyze
17 | ./node_modules/.bin/esbuild src/app.js --bundle --outfile=build/cjs/out.cjs --platform=node --analyze
18 | else
19 | ./node_modules/.bin/esbuild src/app.js --bundle --outfile=build/esm/downloadnet.mjs --format=esm --platform=node --minify --analyze
20 | ./node_modules/.bin/esbuild src/app.js --bundle --outfile=build/cjs/out.cjs --platform=node --minify --analyze
21 | fi
22 | cp -r public build/
23 | echo "const bigR = require('module').createRequire(__dirname); require = bigR; process.traceProcessWarnings = true; " > build/cjs/dn.cjs
24 | # polyfill for process.disableWarning idea as node arg --disableWarning=ExperimentalWarning is likely not accessible in this setup
25 | #echo "const __orig_emit = process.emit; process.emit = (event, error) => event === 'warning' && error.name === 'ExperimentalWarning' ? false : originalEmit.call(process, event, error);" >> build/cjs/dn.cjs
26 | # although we can use the sea config key disableExperimentalSEAWarning to achieve same
27 | cat build/cjs/out.cjs >> build/cjs/dn.cjs
28 | echo "#!/usr/bin/env node" > build/global/downloadnet.cjs
29 | cat build/cjs/dn.cjs >> build/global/downloadnet.cjs
30 | chmod +x build/global/downloadnet.cjs
31 | if [[ "$OSTYPE" == darwin* ]]; then
32 | echo "Using macOS builder..." >&2
33 | ./stampers/macos-new.sh dn-macos build/cjs/dn.cjs build/bin/
34 | #./stampers/macos.sh dn-macos build/cjs/dn.cjs build/bin/
35 | elif [[ "$(node.exe -p process.platform)" == win* ]]; then
36 | echo "Using windows builder..." >&2
37 | ./stampers/win.bat dn-win.exe ./build/cjs/dn.cjs ./build/bin/
38 | else
39 | echo "Using linux builder..." >&2
40 | ./stampers/nix.sh dn-nix build/cjs/dn.cjs build/bin/
41 | fi
42 | echo "Done"
43 |
44 | read -p "Any key to exit"
45 |
46 |
--------------------------------------------------------------------------------
/scripts/clean.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | rm package-lock.json; rm -rf node_modules; rm -rf build/*
4 |
--------------------------------------------------------------------------------
/scripts/downloadnet-entitlements.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | com.apple.security.network.server
6 |
7 | com.apple.security.cs.allow-jit
8 |
9 | com.apple.security.cs.allow-unsigned-executable-memory
10 |
11 | com.apple.security.cs.disable-library-validation
12 |
13 | com.apple.security.cs.disable-executable-page-protection
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/scripts/go_build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | cp ./.package.build.json ./package.json
4 | cp ./src/.common.build.js ./src/common.js
5 |
6 |
--------------------------------------------------------------------------------
/scripts/go_dev.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | gut "Just built"
4 | cp ./.package.dev.json ./package.json
5 | cp ./src/.common.dev.js ./src/common.js
6 |
7 |
--------------------------------------------------------------------------------
/scripts/postinstall.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | which brew || /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
4 | which mkcert || brew install mkcert
5 | mkdir -p $HOME/local-sslcerts
6 | cd $HOME/local-sslcerts
7 |
8 | mkcert -key-file privkey.pem -cert-file fullchain.pem localhost
9 | mkcert -install
10 |
11 |
--------------------------------------------------------------------------------
/scripts/publish.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ./scripts/go_build.sh
4 | gpush minor "$@"
5 | ./scripts/go_dev.sh
6 |
7 |
--------------------------------------------------------------------------------
/scripts/release.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | #./scripts/compile.sh
4 | description=$1
5 | latest_tag=$(git describe --abbrev=0)
6 | grel release -u o0101 -r dn --tag $latest_tag --name "New release" --description '"'"$description"'"'
7 | grel upload -u o0101 -r dn --tag $latest_tag --name "downloadnet-win.exe" --file bin/downloadnet-win.exe
8 | grel upload -u o0101 -r dn --tag $latest_tag --name "downloadnet-linux" --file bin/downloadnet-linux
9 | grel upload -u o0101 -r dn --tag $latest_tag --name "downloadnet-macos" --file bin/downloadnet-macos
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/scripts/sign_windows_release.ps1:
--------------------------------------------------------------------------------
1 | param (
2 | [Parameter(Mandatory=$true)]
3 | [string]$ExePath,
4 |
5 | [Parameter(Mandatory=$true)]
6 | [string]$KeyVaultName,
7 |
8 | [string]$SubscriptionId,
9 | [string]$ResourceGroup,
10 | [string]$CertificateName,
11 | [string]$AppId,
12 | [string]$ClientSecret,
13 | [string]$TenantId,
14 |
15 | # --- Version Info Metadata ---
16 | [string]$CompanyName = "DOSAYGO",
17 | [string]$ProductName = "DownloadNet",
18 | [string]$FileDescription = "Offline full-text search archive of what you browse",
19 | [string]$FileVersion = "4.5.1.0",
20 | [string]$ProductVersion = "4.5.1.0",
21 |
22 | # --- Signature Metadata ---
23 | [string]$SignatureDescription = "DownloadNet - offline full-text search archive of the web for you.",
24 | [string]$SignatureUrl = "https://github.com/DO-SAY-GO/dn"
25 | )
26 |
27 | # --- Function to check/install resedit-cli via npm ---
28 | function Ensure-ReseditInstalled {
29 | $isInstalled = Get-Command "resedit" -ErrorAction SilentlyContinue
30 |
31 | if (-not $isInstalled) {
32 | Write-Host "resedit-cli not found. Attempting to install with npm..." -ForegroundColor Yellow
33 | npm i -g resedit-cli
34 | if ($LASTEXITCODE -ne 0) {
35 | Write-Error "Failed to install resedit-cli using npm. Ensure npm is installed and accessible."
36 | exit 1
37 | }
38 | # Refresh PATH to include newly installed resedit-cli
39 | $env:Path = [System.Environment]::GetEnvironmentVariable("Path", "User") + ";" + [System.Environment]::GetEnvironmentVariable("Path", "Machine")
40 | } else {
41 | Write-Host "resedit-cli is already installed." -ForegroundColor Green
42 | }
43 | }
44 |
45 | # --- Call resedit-cli to update version metadata ---
46 | function Set-VersionMetadata {
47 | Ensure-ReseditInstalled
48 |
49 | Write-Host "Setting executable metadata using resedit-cli..." -ForegroundColor Yellow
50 | $tempOutput = "$ExePath.tmp.exe"
51 | $reseditArgs = @(
52 | "--in", "`"$ExePath`"",
53 | "--out", "`"$tempOutput`"",
54 | "--company-name", "`"$CompanyName`"",
55 | "--product-name", "`"$ProductName`"",
56 | "--file-description", "`"$FileDescription`"",
57 | "--file-version", "`"$FileVersion`"",
58 | "--product-version", "`"$ProductVersion`""
59 | )
60 |
61 | $reseditCommand = "resedit $reseditArgs"
62 | Write-Verbose "Executing: $reseditCommand"
63 | Invoke-Expression $reseditCommand
64 |
65 | if ($LASTEXITCODE -ne 0) {
66 | Write-Error "resedit-cli failed to apply version metadata."
67 | if (Test-Path $tempOutput) { Remove-Item $tempOutput -Force }
68 | exit 1
69 | }
70 |
71 | # Replace original file with updated one
72 | Move-Item -Path $tempOutput -Destination $ExePath -Force
73 | Write-Host "Version metadata applied successfully." -ForegroundColor Green
74 | }
75 |
76 | # --- RUN METADATA SETTING STEP FIRST ---
77 | Set-VersionMetadata
78 |
79 | # --- Configuration (Defaults from original script) ---
80 | $DefaultSPNName = "CodeSigningSP" # Original SPN name
81 | $TimestampServer = "http://timestamp.digicert.com"
82 | $AzureSignToolExe = "AzureSignTool.exe" # Assumes in PATH
83 | $SignToolExe = "signtool.exe" # Assumes in PATH
84 |
85 | # --- Original Script's Flow (unchanged) ---
86 |
87 | function Show-Usage {
88 | Write-Host "Usage: .\sign_windows_downloadnet_configurable_metadata.ps1 -ExePath -KeyVaultName [-SubscriptionId ] [-ResourceGroup ] [-CertificateName ] [-AppId -ClientSecret -TenantId ] [-SignatureDescription ] [-SignatureUrl ]"
89 | exit 1
90 | }
91 |
92 | if (-not $ExePath -or -not $KeyVaultName) { Show-Usage }
93 | if ($AppId -and (-not $ClientSecret -or -not $TenantId)) { Write-Error "Error: If -AppId is provided, -ClientSecret and -TenantId must also be provided."; Show-Usage }
94 | if (-not (Test-Path $ExePath -PathType Leaf)) { Write-Error "Error: Executable not found at path: $ExePath"; exit 1 }
95 |
96 | if (-not $SubscriptionId) {
97 | Write-Host "Fetching the active Azure subscription..."
98 | $subscriptionOutput = az account show | ConvertFrom-Json -ErrorAction SilentlyContinue
99 | if ($LASTEXITCODE -ne 0 -or !$subscriptionOutput.id) { Write-Error "Error: Failed to retrieve active subscription. Ensure 'az' CLI is installed and you are logged in with 'az login'."; exit 1 }
100 | $SubscriptionId = $subscriptionOutput.id
101 | Write-Host "Using active subscription ID: $SubscriptionId"
102 | }
103 |
104 | Write-Host "Setting active subscription to: $SubscriptionId"
105 | az account set --subscription $SubscriptionId
106 | if ($LASTEXITCODE -ne 0) { Write-Error "Error: Failed to set active subscription."; exit 1 }
107 |
108 | Write-Host "Fetching Key Vault details for: $KeyVaultName"
109 | $keyVaultOutput = az keyvault show --name $KeyVaultName --subscription $SubscriptionId | ConvertFrom-Json -ErrorAction SilentlyContinue
110 | if ($LASTEXITCODE -ne 0 -or !$keyVaultOutput.properties.vaultUri) { Write-Error "Error: Failed to retrieve Key Vault details."; exit 1 }
111 | $KeyVaultUrl = $keyVaultOutput.properties.vaultUri
112 | Write-Host "Key Vault URL: $KeyVaultUrl"
113 |
114 | if (-not $ResourceGroup) {
115 | $ResourceGroup = $keyVaultOutput.resourceGroup
116 | if (-not $ResourceGroup) { Write-Error "Error: Could not retrieve resource group from Key Vault details."; exit 1 }
117 | Write-Host "Using resource group from Key Vault: $ResourceGroup"
118 | }
119 |
120 | if (-not $CertificateName) {
121 | Write-Host "CertificateName not provided. Fetching available certificates in Key Vault: $KeyVaultName"
122 | $certListOutput = az keyvault certificate list --vault-name $KeyVaultName | ConvertFrom-Json -ErrorAction SilentlyContinue
123 | if ($LASTEXITCODE -ne 0 -or !$certListOutput) { Write-Error "Error: Failed to list certificates in Key Vault, or no certificates found."; exit 1 }
124 | $certificates = @($certListOutput)
125 | if ($certificates.Count -eq 0) { Write-Error "Error: No certificates found in Key Vault: $KeyVaultName"; exit 1 }
126 | Write-Host "Available certificates:"
127 | $certificates | ForEach-Object { Write-Host " - $($_.name)" }
128 | $CertificateName = $certificates[0].name
129 | Write-Host "Using first available certificate: $CertificateName" -ForegroundColor Green
130 | }
131 |
132 | if (-not $AppId) {
133 | Write-Host "Service Principal AppId not provided. Creating a new service principal named '$DefaultSPNName'..."
134 | $scope = "/subscriptions/$SubscriptionId/resourceGroups/$ResourceGroup/providers/Microsoft.KeyVault/vaults/$KeyVaultName"
135 | # Using "Contributor" role as in the original script.
136 | # For production, consider least privilege (e.g., custom role with only cert get & key sign).
137 | $spnOutput = az ad sp create-for-rbac --name $DefaultSPNName --role Contributor --scopes $scope | ConvertFrom-Json -ErrorAction SilentlyContinue
138 | if ($LASTEXITCODE -ne 0 -or !$spnOutput.appId) { Write-Error "Error: Failed to create service principal."; exit 1 }
139 | $AppId = $spnOutput.appId
140 | $ClientSecret = $spnOutput.password
141 | $TenantId = $spnOutput.tenant
142 | Write-Host "Service principal '$DefaultSPNName' created successfully." -ForegroundColor Green
143 | Write-Host "AppId : $AppId"
144 | Write-Host "Secret : $ClientSecret (Note: This secret is shown only once. Store it securely.)"
145 | Write-Host "TenantId: $TenantId"
146 |
147 | # Grant permissions using set-policy as in the original script
148 | Write-Host "Setting Key Vault access policy for SPN '$AppId'..."
149 | az keyvault set-policy --name $KeyVaultName --spn $AppId --key-permissions sign --certificate-permissions get
150 | if ($LASTEXITCODE -ne 0) { Write-Error "Error: Failed to set Key Vault policy."; exit 1 }
151 | Write-Host "Key Vault access policy set successfully." -ForegroundColor Green
152 | }
153 |
154 | # --- Construct AzureSignTool command with metadata flags ---
155 | $signToolBaseArgs = @(
156 | "sign",
157 | "-kvu", "`"$KeyVaultUrl`"",
158 | "-kvi", "`"$AppId`"",
159 | "-kvs", "`"$ClientSecret`"", # ClientSecret might contain special characters
160 | "-kvt", "`"$TenantId`"",
161 | "-kvc", "`"$CertificateName`"",
162 | "-tr", "`"$TimestampServer`""
163 | )
164 | # Add description if provided
165 | if ($SignatureDescription) {
166 | $signToolBaseArgs += "-d", "`"$SignatureDescription`""
167 | }
168 | # Add description URL if provided
169 | if ($SignatureUrl) {
170 | $signToolBaseArgs += "-du", "`"$SignatureUrl`""
171 | }
172 | # Add verbose flag and executable path
173 | $signToolBaseArgs += "-v", "`"$ExePath`""
174 |
175 | $signCommand = "$AzureSignToolExe $($signToolBaseArgs -join ' ')"
176 |
177 | Write-Host "Signing the executable: $ExePath (Cert: $CertificateName, KV: $KeyVaultName)" -ForegroundColor Yellow
178 | Write-Verbose "Executing: $signCommand"
179 | $signOutput = Invoke-Expression $signCommand
180 |
181 | if ($LASTEXITCODE -ne 0) {
182 | Write-Error "Error: Failed to sign the executable with AzureSignTool. Exit code: $LASTEXITCODE"
183 | Write-Error "AzureSignTool Output: $signOutput"
184 | exit 1
185 | }
186 | Write-Host "Executable signed successfully by AzureSignTool." -ForegroundColor Green
187 | $signOutput | Write-Host
188 |
189 | Write-Host "Verifying the signature using $SignToolExe..." -ForegroundColor Yellow
190 | $verifyCommand = "$SignToolExe verify /pa `"$ExePath`""
191 | Write-Verbose "Executing: $verifyCommand"
192 | $verifyOutput = Invoke-Expression $verifyCommand
193 |
194 | if ($LASTEXITCODE -ne 0) {
195 | Write-Error "Error: Signature verification failed with $SignToolExe. Exit code: $LASTEXITCODE"
196 | Write-Error "$SignToolExe Output: $verifyOutput"
197 | exit 1
198 | }
199 | Write-Host "Signature verified successfully by $SignToolExe." -ForegroundColor Green
200 | $verifyOutput | Write-Host
201 |
202 | Write-Host "Signing process completed." -ForegroundColor Green
203 |
--------------------------------------------------------------------------------
/sign-win.ps1:
--------------------------------------------------------------------------------
1 | .\scripts\sign_windows_release.ps1 -ExePath .\build\bin\dn-win.exe -KeyVaultName codeSigningForever
2 |
3 |
--------------------------------------------------------------------------------
/src/args.js:
--------------------------------------------------------------------------------
1 | import os from 'os';
2 | import path from 'path';
3 | import fs from 'fs';
4 |
5 | const server_port = process.env.PORT || process.argv[2] || 22120;
6 | const mode = process.argv[3] || 'save';
7 | const chrome_port = process.argv[4] || 9222;
8 |
9 | const Pref = {};
10 | export const CONFIG_DIR = path.resolve(os.homedir(), '.config', 'dosyago', 'DownloadNet');
11 | fs.mkdirSync(CONFIG_DIR, {recursive:true});
12 | const pref_file = path.resolve(CONFIG_DIR, 'config.json');
13 | const cacheId = Math.random();
14 |
15 | loadPref();
16 |
17 | let BasePath = Pref.BasePath;
18 | export const archive_root = () => path.resolve(BasePath, '22120-arc');
19 | export const no_file = () => path.resolve(archive_root(), 'no.json');
20 | export const temp_browser_cache = () => path.resolve(archive_root(), 'temp-browser-cache' + cacheId);
21 | export const library_path = () => path.resolve(archive_root(), 'public', 'library');
22 | export const cache_file = () => path.resolve(library_path(), 'cache.json');
23 | export const index_file = () => path.resolve(library_path(), 'index.json');
24 | export const fts_index_dir = () => path.resolve(library_path(), 'fts');
25 |
26 | const flex_fts_index_dir = base => path.resolve(base || fts_index_dir(), 'flex');
27 | const ndx_fts_index_dir = base => path.resolve(base || fts_index_dir(), 'ndx');
28 | const fuzzy_fts_index_dir = base => path.resolve(base || fts_index_dir(), 'fuzzy');
29 |
30 | const results_per_page = 10;
31 |
32 | updateBasePath(process.argv[5] || Pref.BasePath || CONFIG_DIR);
33 |
34 | const args = {
35 | mode,
36 |
37 | server_port,
38 | chrome_port,
39 |
40 | updateBasePath,
41 | getBasePath,
42 |
43 | library_path,
44 | no_file,
45 | temp_browser_cache,
46 | cache_file,
47 | index_file,
48 | fts_index_dir,
49 | flex_fts_index_dir,
50 | ndx_fts_index_dir,
51 | fuzzy_fts_index_dir,
52 |
53 | results_per_page,
54 | CONFIG_DIR
55 | };
56 |
57 | export default args;
58 |
59 | function updateBasePath(new_base_path, {force:force = false, before: before = []} = {}) {
60 | new_base_path = path.resolve(new_base_path);
61 | if ( !force && (BasePath == new_base_path) ) {
62 | return false;
63 | }
64 |
65 | console.log(`Updating base path from ${BasePath} to ${new_base_path}...`);
66 | BasePath = new_base_path;
67 |
68 | if ( Array.isArray(before) ) {
69 | for( const task of before ) {
70 | try { task(); } catch(e) {
71 | console.error(`before updateBasePath task failed. Task: ${task}`);
72 | }
73 | }
74 | } else {
75 | throw new TypeError(`If given, argument before to updateBasePath() must be an array of functions.`);
76 | }
77 |
78 | if ( !fs.existsSync(library_path()) ) {
79 | console.log(`Archive directory (${library_path()}) does not exist, creating...`);
80 | fs.mkdirSync(library_path(), {recursive:true});
81 | console.log(`Created.`);
82 | }
83 |
84 | if ( !fs.existsSync(cache_file()) ) {
85 | console.log(`Cache file does not exist, creating...`);
86 | fs.writeFileSync(cache_file(), JSON.stringify([]));
87 | console.log(`Created!`);
88 | }
89 |
90 | if ( !fs.existsSync(index_file()) ) {
91 | //console.log(`INDEXLOG: Index file does not exist, creating...`);
92 | fs.writeFileSync(index_file(), JSON.stringify([]));
93 | console.log(`Created!`);
94 | }
95 |
96 | if ( !fs.existsSync(flex_fts_index_dir()) ) {
97 | console.log(`FTS Index directory does not exist, creating...`);
98 | fs.mkdirSync(flex_fts_index_dir(), {recursive:true});
99 | console.log(`Created!`);
100 | }
101 |
102 | if ( !fs.existsSync(ndx_fts_index_dir()) ) {
103 | console.log(`NDX FTS Index directory does not exist, creating...`);
104 | fs.mkdirSync(ndx_fts_index_dir(), {recursive:true});
105 | console.log(`Created!`);
106 | }
107 |
108 | if ( !fs.existsSync(fuzzy_fts_index_dir()) ) {
109 | console.log(`FUZZY FTS Index directory does not exist, creating...`);
110 | fs.mkdirSync(fuzzy_fts_index_dir(), {recursive:true});
111 | fs.writeFileSync(path.resolve(fuzzy_fts_index_dir(), 'docs.fzz'), JSON.stringify([]));
112 | console.log('Also creating FUZZY FTS Index docs file...');
113 | console.log(`Created all!`);
114 | }
115 |
116 |
117 |
118 | console.log(`Base path updated to: ${BasePath}. Saving to preferences...`);
119 | Pref.BasePath = BasePath;
120 | savePref();
121 | console.log(`Saved!`);
122 |
123 | return true;
124 | }
125 |
126 | function getBasePath() {
127 | return BasePath;
128 | }
129 |
130 | export function loadPref() {
131 | if ( fs.existsSync(pref_file) ) {
132 | try {
133 | Object.assign(Pref, JSON.parse(fs.readFileSync(pref_file)));
134 | } catch(e) {
135 | console.warn("Error reading from preferences file", e);
136 | }
137 | } else {
138 | console.log("Preferences file does not exist. Creating one...");
139 | savePref();
140 | }
141 | return clone(Pref);
142 | }
143 |
144 | function savePref() {
145 | try {
146 | fs.writeFileSync(pref_file, JSON.stringify(Pref,null,2));
147 | } catch(e) {
148 | console.warn("Error writing preferences file", pref_file, Pref, e);
149 | }
150 | }
151 |
152 | function clone(o) {
153 | return JSON.parse(JSON.stringify(o));
154 | }
155 |
156 |
--------------------------------------------------------------------------------
/src/blockedResponse.js:
--------------------------------------------------------------------------------
1 | export const BLOCKED_CODE = 200;
2 | export const BLOCKED_BODY = Buffer.from(`
3 |
4 | Request blocked
5 | This navigation was prevented by 22120 as a Chrome bug fix for some requests causing issues.
6 | `).toString("base64");
7 | export const BLOCKED_HEADERS = [
8 | {name: "X-Powered-By", value: "Dosyago-Corporation"},
9 | {name: "X-Blocked-Internally", value: "Custom 22120 Chrome bug fix"},
10 | {name: "Accept-Ranges", value: "bytes"},
11 | {name: "Cache-Control", value: "public, max-age=0"},
12 | {name: "Content-Type", value: "text/html; charset=UTF-8"},
13 | {name: "Content-Length", value: `${BLOCKED_BODY.length}`}
14 | ];
15 |
16 | const BLOCKED_RESPONSE = `
17 | HTTP/1.1 ${BLOCKED_CODE} OK
18 | X-Powered-By: Zanj-Dosyago-Corporation
19 | X-Blocked-Internally: Custom ad blocking
20 | Accept-Ranges: bytes
21 | Cache-Control: public, max-age=0
22 | Content-Type: text/html; charset=UTF-8
23 | Content-Length: ${BLOCKED_BODY.length}
24 |
25 | ${BLOCKED_BODY}
26 | `;
27 |
28 | export default BLOCKED_RESPONSE;
29 |
30 |
--------------------------------------------------------------------------------
/src/bookmarker.js:
--------------------------------------------------------------------------------
1 | import os from 'os';
2 | import Path from 'path';
3 | import fs from 'fs';
4 |
5 | import {DEBUG as debug} from './common.js';
6 |
7 | const DEBUG = debug || false;
8 | // Chrome user data directories by platform.
9 | // Source 1: https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
10 | // Source 2: https://superuser.com/questions/329112/where-are-the-user-profile-directories-of-google-chrome-located-in
11 |
12 | const FS_WATCH_OPTS = {
13 | persistent: false,
14 | };
15 |
16 | // Note:
17 | // Not all the below are now used or supported by this code
18 | const UDD_PATHS = {
19 | 'win': '%LOCALAPPDATA%\\Google\\Chrome\\User Data',
20 | 'winxp' : '%USERPROFILE%\\Local Settings\\Application Data\\Google\\Chrome\\User Data',
21 | 'macos' : Path.resolve(os.homedir(), 'Library/Application Support/Google/Chrome'),
22 | 'nix' : Path.resolve(os.homedir(), '.config/google-chrome'),
23 | 'chromeos': '/home/chronos', /* no support */
24 | 'ios': 'Library/Application Support/Google/Chrome', /* no support */
25 | };
26 | const PLAT_TABLE = {
27 | 'darwin': 'macos',
28 | 'linux': 'nix'
29 | };
30 | const PROFILE_DIR_NAME_REGEX = /^(Default|Profile \d+)$/i;
31 | const isProfileDir = name => PROFILE_DIR_NAME_REGEX.test(name);
32 | const BOOKMARK_FILE_NAME_REGEX = /^Bookmarks$/i;
33 | const isBookmarkFile = name => BOOKMARK_FILE_NAME_REGEX.test(name);
34 | const State = {
35 | active: new Set(), /* active Bookmark files (we don't know these until file changes) */
36 | books: {
37 |
38 | }
39 | };
40 |
41 | export async function* bookmarkChanges() {
42 | // try to get the profile directory
43 | const rootDir = getProfileRootDir();
44 |
45 | if ( !fs.existsSync(rootDir) ) {
46 | throw new TypeError(`Sorry! The directory where we thought the Chrome profile directories may be found (${rootDir}), does not exist. We can't monitor changes to your bookmarks, so Bookmark Select Mode is not supported.`);
47 | }
48 |
49 | // state constants and variables (including chokidar file glob observer)
50 | const observers = [];
51 | const ps = [];
52 | let change = false;
53 | let notifyChange = false;
54 | let stopLooping = false;
55 | let shuttingDown = false;
56 |
57 | // create sufficient observers
58 | const dirs = fs.readdirSync(rootDir, {withFileTypes:true}).reduce((Files, dirent) => {
59 | if ( dirent.isDirectory() && isProfileDir(dirent.name) ) {
60 | const filePath = Path.resolve(rootDir, dirent.name);
61 |
62 | if ( fs.existsSync(filePath) ) {
63 | Files.push(filePath);
64 | }
65 | }
66 | return Files;
67 | }, []);
68 | for( const dirPath of dirs ) {
69 | // first read it in
70 | const filePath = Path.resolve(dirPath, 'Bookmarks');
71 | if ( fs.existsSync(filePath) ) {
72 | const data = fs.readFileSync(filePath);
73 | const jData = JSON.parse(data);
74 | State.books[filePath] = flatten(jData, {toMap:true});
75 | }
76 |
77 | const observer = fs.watch(dirPath, FS_WATCH_OPTS);
78 | console.log(`Observing ${dirPath}`);
79 | // Note
80 | // allow the parent process to exit
81 | //even if observer is still active somehow
82 | observer.unref();
83 |
84 | // listen for all events from the observer
85 | observer.on('change', (event, filename) => {
86 | filename = filename || '';
87 | // listen to everything
88 | const path = Path.resolve(dirPath, filename);
89 | DEBUG.verboseSlow && console.log(event, path);
90 | if ( isBookmarkFile(filename) ) {
91 | if ( ! State.active.has(path) ) {
92 | State.active.add(path);
93 | }
94 | // but only act if it is a bookmark file
95 | DEBUG.verboseSlow && console.log(event, path, notifyChange);
96 | // save the event type and file it happened to
97 | change = {event, path};
98 | // drop the most recently pushed promise from our bookkeeping list
99 | ps.pop();
100 | // resolve the promise in the wait loop to process the bookmark file and emit the changes
101 | notifyChange && notifyChange();
102 | }
103 | });
104 | observer.on('error', error => {
105 | console.warn(`Bookmark file observer for ${dirPath} error`, error);
106 | observers.slice(observers.indexOf(observer), 1);
107 | if ( observers.length ) {
108 | notifyChange && notifyChange();
109 | } else {
110 | stopLooping && stopLooping();
111 | }
112 | });
113 | observer.on('close', () => {
114 | console.info(`Observer for ${dirPath} closed`);
115 | observers.slice(observers.indexOf(observer), 1);
116 | if ( observers.length ) {
117 | notifyChange && notifyChange();
118 | } else {
119 | stopLooping && stopLooping();
120 | }
121 | });
122 |
123 | observers.push(observer);
124 | }
125 |
126 | // make sure we kill the watcher on process restart or shutdown
127 | process.on('SIGTERM', shutdown);
128 | process.on('SIGHUP', shutdown);
129 | process.on('SIGINT', shutdown);
130 | process.on('SIGBRK', shutdown);
131 |
132 | // the main wait loop that enables us to turn a traditional NodeJS eventemitter
133 | // into an asychronous stream generator
134 | waiting: while(true) {
135 | // Note: code resilience
136 | //the below two statements can come in any order in this loop, both work
137 |
138 | // get, process and publish changes
139 | // only do if the change is there (first time it won't be because
140 | // we haven't yielded out (async or yield) yet)
141 | if ( change ) {
142 | const {path} = change;
143 | change = false;
144 |
145 | try {
146 | const changes = flatten(
147 | JSON.parse(fs.readFileSync(path)),
148 | {toMap:true, map: State.books[path]}
149 | );
150 |
151 | for( const changeEvent of changes ) yield changeEvent;
152 | } catch(e) {
153 | console.warn(`Error publishing Bookmarks changes`, e);
154 | }
155 | }
156 |
157 | // wait for the next change
158 | // always wait tho (to allow queueing of the next event to process)
159 | try {
160 | await new Promise((res, rej) => {
161 | // save these
162 | notifyChange = res; // so we can turn the next turn of the loop
163 | stopLooping = rej; // so we can break out of the loop (on shutdown)
164 | ps.push({res,rej}); // so we can clean up any left over promises
165 | });
166 | } catch {
167 | ps.pop();
168 | break waiting;
169 | }
170 | }
171 |
172 | shutdown();
173 |
174 | return true;
175 |
176 | async function shutdown() {
177 | if ( shuttingDown ) return;
178 | shuttingDown = true;
179 | console.log('Bookmark observer shutting down...');
180 | // clean up any outstanding waiting promises
181 | while ( ps.length ) {
182 | /* eslint-disable no-empty */
183 | try { ps.pop().rej(); } finally {}
184 | /* eslint-enable no-empty */
185 | }
186 | // stop the waiting loop
187 | stopLooping && setTimeout(() => stopLooping('bookmark watching stopped'), 0);
188 | // clean up any observers
189 | while(observers.length) {
190 | /* eslint-disable no-empty */
191 | try { observers.pop().close(); } finally {}
192 | /* eslint-enable no-empty */
193 | }
194 | console.log('Bookmark observer shut down cleanly.');
195 | }
196 | }
197 |
198 | export function hasBookmark(url) {
199 | return Object.keys(State.books).filter(key => {
200 | if ( State.active.size == 0 ) return true;
201 | return State.active.has(key);
202 | }).map(key => State.books[key])
203 | .some(map => map.has(url));
204 | }
205 |
206 | function getProfileRootDir() {
207 | const plat = os.platform();
208 | let name = PLAT_TABLE[plat];
209 | let rootDir;
210 |
211 | DEBUG.verboseSlow && console.log({plat, name});
212 |
213 | if ( !name ) {
214 | if ( plat === 'win32' ) {
215 | // because Chrome profile dir location only changes in XP
216 | // we only care if it's XP or not and so
217 | // we try to resolve based on the version major and minor (given by release)
218 | // source: https://docs.microsoft.com/en-us/windows/win32/sysinfo/operating-system-version?redirectedfrom=MSDN
219 | const rel = os.release();
220 | const ver = parseFloat(rel);
221 | if ( !Number.isNaN(ver) && ver <= 5.2 ) {
222 | // this should be reliable
223 | name = 'winxp';
224 | } else {
225 | // this may not be reliable, but we just do it
226 | name = 'win';
227 | }
228 | } else {
229 | throw new TypeError(
230 | `Sorry! We don't know how to find the default Chrome profile on OS platform: ${plat}`
231 | );
232 | }
233 | }
234 |
235 | if ( UDD_PATHS[name] ) {
236 | rootDir = Path.resolve(resolveEnvironmentVariablesToPathSegments(UDD_PATHS[name]));
237 | } else {
238 | throw new TypeError(
239 | `Sorry! We don't know how to find the default Chrome profile on OS name: ${name}`
240 | );
241 | }
242 |
243 | return rootDir;
244 | }
245 |
246 | function flatten(bookmarkObj, {toMap: toMap = false, map} = {}) {
247 | const nodes = [...Object.values(bookmarkObj.roots)];
248 | const urls = toMap? (map || new Map()) : [];
249 | const urlSet = new Set();
250 | const changes = [];
251 |
252 | while(nodes.length) {
253 | const next = nodes.pop();
254 | const {name, type, url} = next;
255 | switch(type) {
256 | case "url":
257 | if ( toMap ) {
258 | if ( map ) {
259 | if ( urls.has(url) ) {
260 | const {name:oldName} = urls.get(url);
261 | if ( name !== oldName ) {
262 | if ( !urlSet.has(url) ) {
263 | changes.push({
264 | type: "Title updated",
265 | url,
266 | oldName,
267 | name
268 | });
269 | }
270 | }
271 | } else {
272 | changes.push({
273 | type: "new",
274 | name, url
275 | });
276 | }
277 | }
278 | if ( !urlSet.has(url) ) {
279 | urls.set(url, next);
280 | }
281 | urlSet.add(url);
282 | } else {
283 | urls.push(next);
284 | }
285 | break;
286 | case "folder":
287 | nodes.push(...next.children);
288 | break;
289 | default:
290 | console.info("New type", type, next);
291 | break;
292 |
293 | }
294 | }
295 |
296 | if (map) {
297 | [...map.keys()].forEach(url => {
298 | if ( !urlSet.has(url) ) {
299 | changes.push({
300 | type: "delete",
301 | url
302 | });
303 | map.delete(url);
304 | }
305 | });
306 | }
307 |
308 | return map ? changes : urls;
309 | }
310 |
311 | // source: https://stackoverflow.com/a/33017068
312 | function resolveEnvironmentVariablesToPathSegments(path) {
313 | return path.replace(/%([^%]+)%/g, function(_, key) {
314 | return process.env[key];
315 | });
316 | }
317 |
318 | /*
319 | test();
320 | async function test() {
321 | for await ( const change of bookmarkChanges() ) {
322 | console.log(change);
323 | }
324 | }
325 | */
326 |
327 |
328 | /*
329 | function* profileDirectoryEnumerator(maxN = 9999) {
330 | let index = 0;
331 | while(index <= maxN) {
332 | const profileDirName = index ? `Profile ${index}` : `Default`;
333 | yield profileDirName;
334 | }
335 | }
336 | */
337 |
--------------------------------------------------------------------------------
/src/common.js:
--------------------------------------------------------------------------------
1 | import path from 'path';
2 | import {fileURLToPath} from 'url';
3 | import fs from 'fs';
4 | import os from 'os';
5 | import { root } from './root.js';
6 |
7 | const { APP_ROOT: __ROOT } = root;
8 |
9 | const DEEB = process.env.DEBUG_22120_VERBOSE || false;
10 |
11 | export const DEBUG = {
12 | showBrowser: false,
13 | verboseBrowser: false,
14 | showList: false,
15 | showStatus: false,
16 | debugSec: false,
17 | askFirst: true,
18 | verboseSlow: process.env.VERBOSE_DEBUG_22120 || DEEB,
19 | debug: process.env.DEBUG_22120 || DEEB,
20 | verbose: DEEB || process.env.VERBOSE_DEBUG_22120 || process.env.DEBUG_22120,
21 | checkPred: false,
22 | }
23 | export const SHOW_FETCH = false;
24 |
25 | if ( DEBUG.debug ) {
26 | console.log({__ROOT});
27 | }
28 |
29 | // server related
30 | export const PUBLIC_SERVER = true;
31 |
32 | // crawl related
33 | export const MIN_TIME_PER_PAGE = 10000;
34 | export const MAX_TIME_PER_PAGE = 32000;
35 | export const MIN_WAIT = 200;
36 | export const MAX_WAITS = 300;
37 | export const BATCH_SIZE = 5; // crawl batch size (how many concurrent tabs for crawling)
38 | export const MAX_REAL_URL_LENGTH = 2**15 - 1;
39 |
40 | export const CHECK_INTERVAL = 400;
41 | export const TEXT_NODE = 3;
42 | export const MAX_HIGHLIGHTABLE_LENGTH = 0; /* 0 is no max length for highlight */
43 | export const MAX_TITLE_LENGTH = 140;
44 | export const MAX_URL_LENGTH = 140;
45 | export const MAX_HEAD = 140;
46 |
47 | const LOCALP = path.resolve(os.homedir(), 'local-sslcerts', 'privkey.pem');
48 | const ANYP = path.resolve(os.homedir(), 'sslcerts', 'privkey.pem');
49 | export const GO_SECURE = fs.existsSync(LOCALP) || fs.existsSync(ANYP);
50 | const cert_path = GO_SECURE ? path.dirname(fs.existsSync(LOCALP) ? LOCALP : fs.existsSync(ANYP) ? ANYP : null) : null;
51 | export const CERT_PATH = () => GO_SECURE ? cert_path : false;
52 |
53 | export class RichError extends Error {
54 | constructor(msg) {
55 | super(msg);
56 | let textMessage;
57 | try {
58 | textMessage = JSON.stringify(msg);
59 | } catch(e) {
60 | console.warn(`Could not create RichError from argument ${msg.toString ? msg.toString() : msg} as JSON serialization failed. RichError argument MUST be JSON serializable. Failure error was:`, e);
61 | return;
62 | }
63 | super(textMessage);
64 | }
65 | }
66 |
67 | /* text nodes inside these elements that are ignored */
68 | export const FORBIDDEN_TEXT_PARENT = new Set([
69 | 'STYLE',
70 | 'SCRIPT',
71 | 'NOSCRIPT',
72 | /* we could remove these last two so as to index them as well */
73 | 'DATALIST',
74 | 'OPTION'
75 | ]);
76 | export const ERROR_CODE_SAFE_TO_IGNORE = new Set([
77 | -32000, /* message:
78 | Can only get response body on requests captured after headers received.
79 | * ignore because:
80 | seems to only happen when new navigation aborts all
81 | pending requests of the unloading page
82 | */
83 | -32602, /* message:
84 | Invalid InterceptionId.
85 | * ignore because:
86 | seems to only happen when new navigation aborts all
87 | pending requests of the unloading page
88 | */
89 | ]);
90 |
91 | export const SNIP_CONTEXT = 31;
92 |
93 | export const NO_SANDBOX = (process.env.DEBUG_22120 && process.env.SET_22120_NO_SANDBOX) || false;
94 |
95 | export const APP_ROOT = __ROOT;
96 |
97 | export const sleep = ms => new Promise(res => setTimeout(res, ms));
98 |
99 | export function say(o) {
100 | console.log(JSON.stringify(o));
101 | }
102 |
103 | export function clone(o) {
104 | return JSON.parse(JSON.stringify(o));
105 | }
106 |
107 | export async function untilTrue(pred, waitOverride = MIN_WAIT, maxWaits = MAX_WAITS) {
108 | if ( waitOverride < 0 ) {
109 | maxWaits = -1;
110 | waitOverride = MIN_WAIT;
111 | }
112 | let waitCount = 0;
113 | let resolve;
114 | const pr = new Promise(res => resolve = res);
115 | setTimeout(checkPred, 0);
116 | return pr;
117 |
118 | async function checkPred() {
119 | DEBUG.checkPred && console.log('Checking', pred.toString());
120 | if ( await pred() ) {
121 | return resolve(true);
122 | } else {
123 | waitCount++;
124 | if ( waitCount < maxWaits || maxWaits < 0 ) {
125 | setTimeout(checkPred, waitOverride);
126 | } else {
127 | resolve(false);
128 | }
129 | }
130 | }
131 | }
132 |
--------------------------------------------------------------------------------
/src/hello.js:
--------------------------------------------------------------------------------
1 | console.log(`hello...is it me you're looking for?`);
2 |
--------------------------------------------------------------------------------
/src/highlighter.js:
--------------------------------------------------------------------------------
1 | // highlighter.js
2 |
3 | import ukkonen from 'ukkonen';
4 | import {DEBUG} from './common.js';
5 |
6 | const MAX_ACCEPT_SCORE = 0.5;
7 | const CHUNK_SIZE = 12;
8 |
9 | // Helper to wrap query terms with tags within a text
10 | // This function will be used by both highlight and trilight before returning results.
11 | function internalMarkText(textToMark, queryToFind) {
12 | if (!textToMark || !queryToFind) return textToMark;
13 | try {
14 | // Case-insensitive replacement, escaping regex special characters in query
15 | const escapedQuery = queryToFind.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
16 | const regex = new RegExp('(' + escapedQuery + ')', 'gi');
17 | return textToMark.replace(regex, '$1 ');
18 | } catch (e) {
19 | console.warn("internalMarkText: Regex failed for query:", queryToFind, e);
20 | return textToMark; // Fallback to original text if regex fails
21 | }
22 | }
23 |
24 |
25 | function calculateUkkonenParams(queryLength, chunkSize = CHUNK_SIZE) {
26 | // Renamed from 'params' for clarity
27 | const maxDistance = chunkSize; // Max edit distance for Ukkonen
28 | const minPossibleScore = Math.abs(queryLength - chunkSize); // Minimum edits based on length difference
29 | // Max possible score range (denominator for scaling)
30 | let maxScoreRange = Math.max(queryLength, chunkSize) - minPossibleScore;
31 | if (maxScoreRange === 0) maxScoreRange = 1; // Avoid division by zero
32 |
33 | return {maxDistance, minPossibleScore, maxScoreRange};
34 | }
35 |
36 | export function highlight(query, docString, {
37 | maxLength = 0,
38 | maxAcceptScore = MAX_ACCEPT_SCORE,
39 | chunkSize = CHUNK_SIZE,
40 | // Options from server (around, before) are now handled internally by
41 | // numResults and contextChars are effectively handled by the original logic's
42 | // "better.slice(0,3)" and "extra" context respectively.
43 | } = {}) {
44 | if (chunkSize % 2) {
45 | // Original code threw an error. Preserving this behavior.
46 | throw new TypeError(`chunkSize must be even. Was: ${chunkSize} which is odd.`);
47 | }
48 |
49 | let docChars = Array.from(docString); // Use character array for Unicode safety
50 | if (maxLength > 0 && docChars.length > maxLength) {
51 | docChars = docChars.slice(0, maxLength);
52 | }
53 |
54 | if (docChars.length === 0 || query.trim() === "") {
55 | return [];
56 | }
57 |
58 | const queryLower = query.toLocaleLowerCase(); // Lowercase query once
59 | const queryLength = Array.from(query).length; // Unicode-safe query length
60 |
61 | if (queryLength === 0) return [];
62 |
63 | const {maxDistance, minPossibleScore, maxScoreRange} = calculateUkkonenParams(queryLength, chunkSize);
64 |
65 | // --- Fragment Generation (Identical to original) ---
66 | // First set of fragments (docChars1)
67 | const docChars1 = [...docChars]; // Create a mutable copy
68 | // Pad to make length a multiple of chunkSize
69 | const padding1Length = (chunkSize - docChars1.length % chunkSize) % chunkSize;
70 | docChars1.push(...Array(padding1Length).fill(' '));
71 | const fragments1 = docChars1.reduce(getFragmenter(chunkSize, {symbolsArray: docChars}), []); // Pass original docChars for context
72 |
73 | // Second set of fragments (docChars2) with offset
74 | const docChars2 = [...docChars]; // Create another mutable copy
75 | // Pad start by half chunkSize
76 | docChars2.splice(0, 0, ...Array(chunkSize / 2).fill(' '));
77 | // Pad end to make length a multiple of chunkSize
78 | const padding2Length = (chunkSize - docChars2.length % chunkSize) % chunkSize;
79 | docChars2.push(...Array(padding2Length).fill(' '));
80 | const fragments2 = docChars2.reduce(getFragmenter(chunkSize, {symbolsArray: docChars, initialOffset: -(chunkSize/2)}), []); // Adjust offset
81 |
82 | DEBUG.verboseSlow && console.log("highlight: fragments1 count:", fragments1.length, "fragments2 count:", fragments2.length);
83 |
84 | const allFragments = [...fragments1, ...fragments2];
85 | const scoredFragments = allFragments.map(fragment => {
86 | // fragment.text is already from the original doc, no need to lowercase it here for distance calculation
87 | // ukkonen should compare queryLower with fragment.text.toLocaleLowerCase()
88 | const distance = ukkonen(queryLower, fragment.text.toLocaleLowerCase(), maxDistance);
89 |
90 | let scaledScore;
91 | if (distance === -1) { // Exceeded maxDistance
92 | scaledScore = Infinity;
93 | } else {
94 | scaledScore = (distance - minPossibleScore) / maxScoreRange;
95 | }
96 | return {score: scaledScore, fragment}; // fragment object contains {text, offset, symbols}
97 | });
98 |
99 | // Sort ascending (smallest scores win)
100 | scoredFragments.sort((a, b) => a.score - b.score);
101 |
102 | const initialHighlights = [];
103 | for (const {score, fragment} of scoredFragments) {
104 | if (score > maxAcceptScore) {
105 | // If we already have some highlights, we can stop if scores get too bad.
106 | // If we have none, we might continue to find at least one, even if poor.
107 | if (initialHighlights.length > 0) break;
108 | }
109 | initialHighlights.push({score, fragment});
110 | if (initialHighlights.length >= 10 + 1) break; // Get a bit more than needed for the "better" selection (original took 10 for "better")
111 | }
112 |
113 | DEBUG.verboseSlow && console.log("highlight: initialHighlights count:", initialHighlights.length);
114 |
115 | let topSnippets;
116 |
117 | if (initialHighlights.length === 0) {
118 | DEBUG.verboseSlow && console.log('highlight: Zero initial highlights. Considering first scored fragment if available.');
119 | // Original logic: scores.slice(0,1) - this implies taking the best raw score if no "good" highlights
120 | if (scoredFragments.length > 0 && scoredFragments[0].score !== Infinity) {
121 | // Take the single best fragment, expand context, and mark it.
122 | const bestFragment = scoredFragments[0].fragment;
123 | const contextChars = chunkSize; // Original 'extra' was chunkSize
124 | const start = Math.max(0, bestFragment.offset - contextChars);
125 | const end = Math.min(docChars.length, bestFragment.offset + Array.from(bestFragment.text).length + contextChars);
126 | const snippetText = docChars.slice(start, end).join('');
127 |
128 | topSnippets = [{
129 | // score: scoredFragments[0].score, // Keep score if needed
130 | fragment: {
131 | text: internalMarkText(snippetText, query),
132 | offset: bestFragment.offset // Original offset of the core matched chunk
133 | }
134 | }];
135 | } else {
136 | topSnippets = []; // Truly no usable fragments
137 | }
138 | } else {
139 | // --- "Better" loop for context expansion and re-scoring (Identical logic to original) ---
140 | const contextCharsForBetterLoop = chunkSize; // Original 'extra' was chunkSize
141 | let betterScoredSnippets = initialHighlights.slice(0, 10).map(hl => {
142 | const originalFragment = hl.fragment;
143 | const originalFragmentTextChars = Array.from(originalFragment.text); // Unicode safe length
144 | const originalFragmentLength = originalFragmentTextChars.length;
145 |
146 | // Expand context using original document characters (hl.fragment.symbols)
147 | const startContext = Math.max(0, originalFragment.offset - contextCharsForBetterLoop);
148 | const endContext = Math.min(originalFragment.symbols.length, originalFragment.offset + originalFragmentLength + contextCharsForBetterLoop);
149 |
150 | const expandedText = originalFragment.symbols.slice(startContext, endContext).join('');
151 | const expandedTextLength = Array.from(expandedText).length; // Unicode safe
152 |
153 | // Re-calculate Ukkonen parameters for this new expanded text against the query
154 | const {
155 | maxDistance: newMaxDist,
156 | minPossibleScore: newMinScore,
157 | maxScoreRange: newMaxScoreRange
158 | } = calculateUkkonenParams(queryLength, expandedTextLength); // chunkSize is now expandedTextLength
159 |
160 | const newDistance = ukkonen(queryLower, expandedText.toLocaleLowerCase(), newMaxDist);
161 |
162 | let newScaledScore;
163 | if (newDistance === -1) {
164 | newScaledScore = Infinity;
165 | } else {
166 | newScaledScore = (newDistance - newMinScore) / newMaxScoreRange;
167 | }
168 |
169 | // The fragment text for output is the expanded text
170 | return {
171 | score: newScaledScore,
172 | fragment: { // New fragment object
173 | text: expandedText, // This text will be marked later
174 | // The offset should ideally be the start of this expanded snippet in the original document
175 | offset: startContext,
176 | // symbols: originalFragment.symbols // Not needed in final output
177 | }
178 | };
179 | });
180 |
181 | betterScoredSnippets.sort((a, b) => a.score - b.score);
182 | DEBUG.verboseSlow && console.log("highlight: betterScoredSnippets (after re-scoring with context):", JSON.stringify(betterScoredSnippets.slice(0,3),null,2));
183 |
184 | // Take top 3 from these "better" snippets and apply marking
185 | topSnippets = betterScoredSnippets.slice(0, 3).map(item => ({
186 | // score: item.score, // Keep score if needed
187 | fragment: {
188 | text: internalMarkText(item.fragment.text, query),
189 | offset: item.fragment.offset
190 | }
191 | }));
192 | }
193 |
194 | DEBUG.verboseSlow && console.log("highlight: final topSnippets to return:", topSnippets);
195 | return topSnippets;
196 | }
197 |
198 |
199 | // --- getFragmenter (Helper for highlight and trilight) ---
200 | // Preserving its original logic as much as possible, with clearer parameters.
201 | // The `symbolsArray` and `initialOffset` are for `highlight`'s specific needs.
202 | function getFragmenter(chunkSize, {overlap = false, symbolsArray = null, initialOffset = 0} = {}) {
203 | if (!Number.isInteger(chunkSize) || chunkSize < 1) {
204 | throw new TypeError(`chunkSize needs to be a whole number greater than 0`);
205 | }
206 |
207 | let currentFragmentCharCount; // Renamed from currentLength for clarity
208 |
209 | return function fragmentReducer(fragmentsAccumulator, nextCharSymbol, charIndex, fullSymbolArray) {
210 | // `fullSymbolArray` is the array being reduced.
211 | // `symbolsArray` (passed in options) is the *original* document characters,
212 | // used by `highlight` to ensure fragment.symbols points to the original doc.
213 | const effectiveSymbolsArray = symbolsArray || fullSymbolArray;
214 | const effectiveCharIndex = charIndex + initialOffset; // Adjust index for highlight's second pass
215 |
216 | if (overlap) {
217 | // Logic for overlapping fragments (primarily for trilight's n-grams)
218 | // This part of original getFragmenter was complex and seemed to modify previous frags.
219 | // For n-grams, it's simpler: create a new fragment for each possible n-gram.
220 | if (charIndex <= fullSymbolArray.length - chunkSize) {
221 | const ngramChars = fullSymbolArray.slice(charIndex, charIndex + chunkSize);
222 | fragmentsAccumulator.push({
223 | text: ngramChars.join(''),
224 | offset: effectiveCharIndex, // Offset in the original document
225 | symbols: effectiveSymbolsArray
226 | });
227 | }
228 | } else {
229 | // Logic for non-overlapping fragments (for highlight's chunking)
230 | if (fragmentsAccumulator.length === 0 || currentFragmentCharCount >= chunkSize) {
231 | // Start a new fragment
232 | fragmentsAccumulator.push({
233 | text: nextCharSymbol,
234 | offset: effectiveCharIndex, // Offset in the original document
235 | symbols: effectiveSymbolsArray
236 | });
237 | currentFragmentCharCount = 1;
238 | } else {
239 | // Add to the current fragment
240 | const currentFragment = fragmentsAccumulator[fragmentsAccumulator.length - 1];
241 | currentFragment.text += nextCharSymbol;
242 | currentFragmentCharCount++;
243 | }
244 | }
245 | return fragmentsAccumulator;
246 | };
247 | }
248 |
249 |
250 | // --- trilight function ---
251 | // Preserving original algorithm and segment generation logic with clarity and support.
252 | export function trilight(query, docString, {
253 | maxLength = 0,
254 | ngramSize = 3,
255 | maxSegmentSize = 140,
256 | // numResults is implicitly 3 due to .slice(0,3) at the end
257 | } = {}) {
258 | const originalDocChars = Array.from(docString); // For final slicing, Unicode safe
259 | const queryChars = Array.from(query.toLocaleLowerCase()); // Lowercase query once
260 |
261 | let docCharsForProcessing = Array.from(docString.toLocaleLowerCase());
262 | if (maxLength > 0 && docCharsForProcessing.length > maxLength) {
263 | docCharsForProcessing = docCharsForProcessing.slice(0, maxLength);
264 | }
265 |
266 | if (docCharsForProcessing.length < ngramSize || queryChars.length < ngramSize) {
267 | return [];
268 | }
269 |
270 | // Generate n-grams for document and query using the getFragmenter
271 | // For n-grams, getFragmenter should be called with overlap: true
272 | const docNgrams = docCharsForProcessing.reduce(getFragmenter(ngramSize, {overlap: true, symbolsArray: originalDocChars}), []);
273 | const queryNgrams = queryChars.reduce(getFragmenter(ngramSize, {overlap: true, symbolsArray: queryChars}), []); // symbolsArray here is queryChars
274 |
275 | if (docNgrams.length === 0 || queryNgrams.length === 0) return [];
276 |
277 | // Index document n-grams by their text
278 | const docNgramIndex = new Map();
279 | docNgrams.forEach(ngram => {
280 | if (!docNgramIndex.has(ngram.text)) {
281 | docNgramIndex.set(ngram.text, []);
282 | }
283 | // Store original character offset of the ngram in the document
284 | docNgramIndex.get(ngram.text).push(ngram.offset);
285 | });
286 |
287 | // --- Find matching entries (Identical to original logic) ---
288 | const matchingEntries = [];
289 | queryNgrams.forEach((queryNgram, queryNgramIndex) => {
290 | const docOffsetsForNgram = docNgramIndex.get(queryNgram.text);
291 | if (docOffsetsForNgram) {
292 | docOffsetsForNgram.forEach(docCharOffset => {
293 | matchingEntries.push({
294 | ngramText: queryNgram.text,
295 | queryNgramIndex: queryNgramIndex, // Index of ngram within queryNgrams list
296 | docCharOffset: docCharOffset // Character offset of ngram in original document
297 | });
298 | });
299 | }
300 | });
301 | matchingEntries.sort((a, b) => a.docCharOffset - b.docCharOffset); // Sort by document offset
302 |
303 | // --- Identify runs of consecutive matching n-grams (Identical to original logic) ---
304 | const runs = [];
305 | if (matchingEntries.length > 0) {
306 | let currentRun = {
307 | ngramsInRun: [matchingEntries[0].ngramText],
308 | startQueryNgramIndex: matchingEntries[0].queryNgramIndex,
309 | startDocCharOffset: matchingEntries[0].docCharOffset
310 | };
311 | let lastQueryNgramIndexInRun = matchingEntries[0].queryNgramIndex;
312 | let lastDocCharOffsetInRun = matchingEntries[0].docCharOffset;
313 |
314 | for (let i = 1; i < matchingEntries.length; i++) {
315 | const entry = matchingEntries[i];
316 | const queryIndexDiff = entry.queryNgramIndex - lastQueryNgramIndexInRun;
317 | const docOffsetDiff = entry.docCharOffset - lastDocCharOffsetInRun;
318 |
319 | if (queryIndexDiff === 1 && docOffsetDiff === 1) { // Consecutive in both query and doc
320 | currentRun.ngramsInRun.push(entry.ngramText);
321 | } else {
322 | // End current run, add its length, then push
323 | currentRun.charLengthInDoc = currentRun.ngramsInRun.length + (ngramSize - 1);
324 | runs.push(currentRun);
325 | // Start new run
326 | currentRun = {
327 | ngramsInRun: [entry.ngramText],
328 | startQueryNgramIndex: entry.queryNgramIndex,
329 | startDocCharOffset: entry.docCharOffset
330 | };
331 | }
332 | lastQueryNgramIndexInRun = entry.queryNgramIndex;
333 | lastDocCharOffsetInRun = entry.docCharOffset;
334 | }
335 | // Add the last run
336 | currentRun.charLengthInDoc = currentRun.ngramsInRun.length + (ngramSize - 1);
337 | runs.push(currentRun);
338 | }
339 |
340 | DEBUG.verboseSlow && console.log("trilight: identified runs:", runs.length);
341 |
342 | // --- Calculate gaps between runs (Identical to original logic) ---
343 | const gaps = [];
344 | if (runs.length > 1) {
345 | for (let i = 0; i < runs.length - 1; i++) {
346 | const run1 = runs[i];
347 | const run2 = runs[i+1];
348 | gaps.push({
349 | connectedRuns: [run1, run2],
350 | gapSize: run2.startDocCharOffset - (run1.startDocCharOffset + run1.charLengthInDoc)
351 | });
352 | }
353 | }
354 | gaps.sort((a, b) => a.gapSize - b.gapSize); // Sort by smallest gap
355 |
356 | // --- Merge runs into segments (Identical to original logic) ---
357 | const segments = [];
358 | const runToSegmentMap = new Map(); // Maps run's startDocCharOffset to the segment it belongs to
359 |
360 | // Initialize segments with individual runs if they are not too long
361 | runs.forEach(run => {
362 | if (run.charLengthInDoc <= maxSegmentSize) {
363 | const newSegment = {
364 | startOffset: run.startDocCharOffset,
365 | endOffset: run.startDocCharOffset + run.charLengthInDoc,
366 | score: run.charLengthInDoc // Initial score is its own length
367 | };
368 | segments.push(newSegment);
369 | runToSegmentMap.set(run.startDocCharOffset, newSegment);
370 | }
371 | });
372 |
373 |
374 | for (const gapInfo of gaps) {
375 | const runLeft = gapInfo.connectedRuns[0];
376 | const runRight = gapInfo.connectedRuns[1];
377 |
378 | const segmentForLeftRun = runToSegmentMap.get(runLeft.startDocCharOffset);
379 | const segmentForRightRun = runToSegmentMap.get(runRight.startDocCharOffset);
380 |
381 | if (segmentForLeftRun && segmentForRightRun && segmentForLeftRun === segmentForRightRun) {
382 | continue; // Already in the same segment
383 | }
384 |
385 | let merged = false;
386 | if (segmentForLeftRun && !segmentForRightRun) { // Try to extend left segment with right run
387 | const potentialNewEnd = runRight.startDocCharOffset + runRight.charLengthInDoc;
388 | if ((potentialNewEnd - segmentForLeftRun.startOffset) <= maxSegmentSize) {
389 | segmentForLeftRun.endOffset = potentialNewEnd;
390 | segmentForLeftRun.score += runRight.charLengthInDoc; // Add length of right run
391 | runToSegmentMap.set(runRight.startDocCharOffset, segmentForLeftRun); // Right run now points to left's segment
392 | // Remove standalone segment for right run if it existed (it shouldn't if !segmentForRightRun)
393 | const rightRunStandaloneSegmentIndex = segments.findIndex(s => s.startOffset === runRight.startDocCharOffset && s.endOffset === runRight.startDocCharOffset + runRight.charLengthInDoc);
394 | if (rightRunStandaloneSegmentIndex > -1) segments.splice(rightRunStandaloneSegmentIndex, 1);
395 | merged = true;
396 | }
397 | } else if (!segmentForLeftRun && segmentForRightRun) { // Try to extend right segment with left run
398 | const potentialNewStart = runLeft.startDocCharOffset;
399 | if ((segmentForRightRun.endOffset - potentialNewStart) <= maxSegmentSize) {
400 | segmentForRightRun.startOffset = potentialNewStart;
401 | segmentForRightRun.score += runLeft.charLengthInDoc;
402 | runToSegmentMap.set(runLeft.startDocCharOffset, segmentForRightRun);
403 | const leftRunStandaloneSegmentIndex = segments.findIndex(s => s.startOffset === runLeft.startDocCharOffset && s.endOffset === runLeft.startDocCharOffset + runLeft.charLengthInDoc);
404 | if (leftRunStandaloneSegmentIndex > -1) segments.splice(leftRunStandaloneSegmentIndex, 1);
405 | merged = true;
406 | }
407 | } else if (segmentForLeftRun && segmentForRightRun) { // Both runs are in existing (different) segments, try to merge these segments
408 | const potentialNewLength = segmentForRightRun.endOffset - segmentForLeftRun.startOffset;
409 | if (potentialNewLength <= maxSegmentSize) {
410 | segmentForLeftRun.endOffset = segmentForRightRun.endOffset;
411 | segmentForLeftRun.score += segmentForRightRun.score; // Combine scores
412 |
413 | // All runs that were part of segmentForRightRun now point to segmentForLeftRun
414 | for (const [runStartOffset, seg] of runToSegmentMap.entries()) {
415 | if (seg === segmentForRightRun) {
416 | runToSegmentMap.set(runStartOffset, segmentForLeftRun);
417 | }
418 | }
419 | // Remove segmentForRightRun from segments array
420 | const rightSegmentIndex = segments.indexOf(segmentForRightRun);
421 | if (rightSegmentIndex > -1) segments.splice(rightSegmentIndex, 1);
422 | merged = true;
423 | }
424 | }
425 | // Original code also had a case for creating a new segment from two runs not yet in segments.
426 | // This is covered by the initialization of segments with individual runs, and then merging.
427 | // The provided logic for merging was:
428 | // else { /* if (!leftSeg && !rightSeg) */
429 | // const newSegment = { start: runs[0].di, end: runs[0].di + runs[0].length + nextGap.gap + runs[1].length, score: runs[0].length + runs[1].length };
430 | // if ( newSegment.end - newSegment.start <= maxSegmentSize ) { runSegMap[runs[0].di] = newSegment; runSegMap[runs[1].di] = newSegment; segments.push(newSegment); assigned = newSegment; }
431 | // }
432 | // This specific "else" is tricky to map directly if segments are pre-initialized.
433 | // The current merging logic tries to extend existing segments. If two runs are not in segments
434 | // and their combined length (including gap) is <= maxSegmentSize, they should form a new segment.
435 | // This is implicitly handled if they were small enough to be individual segments initially and then get merged.
436 | // The key is that `runToSegmentMap` correctly tracks which segment a run belongs to.
437 |
438 | if (merged) {
439 | DEBUG.verboseSlow && console.log('trilight: Merged gap, new segment length:', segmentForLeftRun ? segmentForLeftRun.endOffset - segmentForLeftRun.startOffset : segmentForRightRun.endOffset - segmentForRightRun.startOffset);
440 | } else {
441 | DEBUG.verboseSlow && console.log('trilight: Gap could not be merged or runs not in mappable segments.');
442 | }
443 | }
444 |
445 | // Deduplicate segments that might have become identical after merges (e.g., if map pointed multiple runs to same segment object)
446 | const uniqueSegments = Array.from(new Set(segments.filter(s => s))); // Filter out undefined/null if any
447 | uniqueSegments.sort((a, b) => b.score - a.score); // Sort by score (descending)
448 |
449 | const textSegments = uniqueSegments.slice(0, 3).map(segment => {
450 | const snippetText = originalDocChars.slice(segment.startOffset, segment.endOffset).join('');
451 | return { // Return in the same format as highlight()
452 | fragment: {
453 | text: internalMarkText(snippetText, query),
454 | offset: segment.startOffset
455 | }
456 | };
457 | });
458 |
459 | DEBUG.verboseSlow && console.log("trilight: final textSegments:", textSegments.length);
460 |
461 | if (textSegments.length === 0 && originalDocChars.length > 0) {
462 | DEBUG.verboseSlow && console.log("trilight: No segments found, returning beginning of doc.");
463 | const fallbackText = originalDocChars.slice(0, Math.min(maxSegmentSize, originalDocChars.length)).join('');
464 | return [{ fragment: { text: internalMarkText(fallbackText, query), offset: 0 } }];
465 | }
466 |
467 | return textSegments;
468 | }
469 |
--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
1 |
2 | require = require('esm')(module/*, options*/);
3 | module.exports = require('./app.js');
4 |
5 |
--------------------------------------------------------------------------------
/src/installBrowser.js:
--------------------------------------------------------------------------------
1 | import { exec } from 'child_process';
2 | import { promisify } from 'util';
3 | import { createWriteStream } from 'fs';
4 | import { pipeline } from 'stream/promises';
5 | import { readFile } from 'fs/promises';
6 | import https from 'node:https';
7 |
8 | // Constants
9 | const execPromise = promisify(exec);
10 |
11 | const SUPPORTED_BROWSERS = ['chrome', 'brave', 'vivaldi', 'edge', 'chromium'];
12 | const PLATFORM = process.platform; // 'win32', 'darwin', 'linux'
13 | const ARCH = process.arch; // 'x64', 'arm64', etc.
14 |
15 | // Logic
16 | // None; function is exported for use
17 |
18 | // Functions
19 | export async function installBrowser(browserName) {
20 | if (!SUPPORTED_BROWSERS.includes(browserName)) {
21 | throw new Error(`Unsupported browser: ${browserName}. Supported: ${SUPPORTED_BROWSERS.join(', ')}`);
22 | }
23 |
24 | console.log(`Installing ${browserName} on ${PLATFORM} (${ARCH})...`);
25 |
26 | await checkBrowserAvailability(browserName);
27 | const binaryPath = await installBrowserForPlatform(browserName);
28 | console.log(`${browserName} installed at: ${binaryPath}`);
29 | return binaryPath;
30 | }
31 |
32 | async function checkBrowserAvailability(browserName) {
33 | if (PLATFORM === 'linux' && ARCH === 'arm64' && browserName === 'chrome') {
34 | throw new Error('Chrome is not available for ARM64 Linux. Try Brave or Chromium instead.');
35 | }
36 | // Add more checks for other browsers if needed (e.g., Vivaldi ARM64 stability)
37 | }
38 |
39 | async function installBrowserForPlatform(browserName) {
40 | if (PLATFORM === 'win32') {
41 | return await installOnWindows(browserName);
42 | } else if (PLATFORM === 'darwin') {
43 | return await installOnMacOS(browserName);
44 | } else if (PLATFORM === 'linux') {
45 | return await installOnLinux(browserName);
46 | } else {
47 | throw new Error(`Unsupported platform: ${PLATFORM}`);
48 | }
49 | }
50 |
51 | async function installOnWindows(browserName) {
52 | try {
53 | // Check if winget is installed
54 | try {
55 | await execPromise('winget --version');
56 | } catch {
57 | console.log('winget not found. Installing winget...');
58 | await execPromise('powershell -Command "irm asheroto.com/winget | iex"');
59 | console.log('winget installed successfully.');
60 | }
61 |
62 | if (browserName === 'chrome') {
63 | await execPromise('winget install Google.Chrome --silent --accept-package-agreements --accept-source-agreements');
64 | return 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe';
65 | } else if (browserName === 'brave') {
66 | await execPromise('winget install Brave.Brave --silent --accept-package-agreements --accept-source-agreements');
67 | return 'C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe';
68 | } else if (browserName === 'vivaldi') {
69 | await execPromise('winget install Vivaldi.Vivaldi --silent --accept-package-agreements --accept-source-agreements');
70 | return 'C:\\Program Files\\Vivaldi\\Application\\vivaldi.exe';
71 | } else if (browserName === 'edge') {
72 | await execPromise('winget install Microsoft.Edge --silent --accept-package-agreements --accept-source-agreements');
73 | return 'C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe';
74 | } else if (browserName === 'chromium') {
75 | const url = getDownloadUrl(browserName, PLATFORM, ARCH);
76 | if (!url) throw new Error('Chromium download not supported on Windows');
77 | const outputPath = 'C:\\Program Files\\Chromium\\chromium.exe';
78 | await downloadBinary(url, outputPath);
79 | return outputPath;
80 | }
81 | } catch (error) {
82 | console.error(`Windows install failed: ${error.message}`);
83 | throw error;
84 | }
85 | }
86 |
87 | async function installOnMacOS(browserName) {
88 | try {
89 | // Check if brew is installed
90 | try {
91 | await execPromise('brew --version');
92 | } catch {
93 | throw new Error('Homebrew is not installed. Please install it from https://brew.sh and try again.');
94 | }
95 |
96 | if (browserName === 'chrome') {
97 | await execPromise('brew install --cask google-chrome');
98 | return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome';
99 | } else if (browserName === 'brave') {
100 | await execPromise('brew install --cask brave-browser');
101 | return '/Applications/Brave Browser.app/Contents/MacOS/Brave Browser';
102 | } else if (browserName === 'vivaldi') {
103 | await execPromise('brew install --cask vivaldi');
104 | return '/Applications/Vivaldi.app/Contents/MacOS/Vivaldi';
105 | } else if (browserName === 'edge') {
106 | await execPromise('brew install --cask microsoft-edge');
107 | return '/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge';
108 | } else if (browserName === 'chromium') {
109 | await execPromise('brew install --cask chromium');
110 | return '/Applications/Chromium.app/Contents/MacOS/Chromium';
111 | }
112 | } catch (error) {
113 | console.error(`macOS install failed: ${error.message}`);
114 | throw error;
115 | }
116 | }
117 |
118 | async function installOnLinux(browserName) {
119 | try {
120 | const distro = await getLinuxDistro();
121 | if (distro === 'debian') {
122 | return await installOnDebian(browserName);
123 | } else if (distro === 'fedora') {
124 | return await installOnFedora(browserName);
125 | } else {
126 | throw new Error(`Unsupported Linux distribution: ${distro}`);
127 | }
128 | } catch (error) {
129 | console.error(`Linux install failed: ${error.message}`);
130 | throw error;
131 | }
132 | }
133 |
134 | async function installOnDebian(browserName) {
135 | let binaryPath = '/usr/bin/' + browserName;
136 | if (browserName === 'chrome') {
137 | await execPromise('wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | sudo apt-key add -');
138 | await execPromise(`sudo sh -c 'echo "deb [arch=${ARCH === 'arm64' ? 'arm64' : 'amd64'}] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list'`);
139 | await execPromise('sudo apt-get update && sudo apt-get install -y google-chrome-stable');
140 | binaryPath = '/usr/bin/google-chrome';
141 | } else if (browserName === 'brave') {
142 | await execPromise('sudo curl -fsSLo /usr/share/keyrings/brave-browser-archive-keyring.gpg https://brave-browser-apt-release.s3.brave.com/brave-browser-archive-keyring.gpg');
143 | await execPromise(`echo "deb [signed-by=/usr/share/keyrings/brave-browser-archive-keyring.gpg arch=${ARCH === 'arm64' ? 'arm64' : 'amd64'}] https://brave-browser-apt-release.s3.brave.com/ stable main" | sudo tee /etc/apt/sources.list.d/brave-browser-release.list`);
144 | await execPromise('sudo apt update && sudo apt install -y brave-browser');
145 | binaryPath = '/usr/bin/brave-browser';
146 | } else if (browserName === 'vivaldi') {
147 | await execPromise('wget -qO- https://repo.vivaldi.com/archive/linux_signing_key.pub | sudo apt-key add -');
148 | await execPromise(`sudo add-apt-repository "deb [arch=${ARCH === 'arm64' ? 'arm64' : 'amd64'}] https://repo.vivaldi.com/archive/deb/ stable main"`);
149 | await execPromise('sudo apt update && sudo apt install -y vivaldi-stable');
150 | binaryPath = '/usr/bin/vivaldi';
151 | } else if (browserName === 'edge') {
152 | await execPromise('curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > microsoft.gpg');
153 | await execPromise('sudo mv microsoft.gpg /usr/share/keyrings/microsoft-archive-keyring.gpg');
154 | await execPromise(`sudo sh -c 'echo "deb [arch=${ARCH === 'arm64' ? 'arm64' : 'amd64'} signed-by=/usr/share/keyrings/microsoft-archive-keyring.gpg] https://packages.microsoft.com/repos/edge stable main" > /etc/apt/sources.list.d/microsoft-edge.list'`);
155 | await execPromise('sudo apt update && sudo apt install -y microsoft-edge-stable');
156 | binaryPath = '/usr/bin/microsoft-edge';
157 | } else if (browserName === 'chromium') {
158 | await execPromise('sudo apt update && sudo apt install -y chromium-browser');
159 | // Check for Snap installation
160 | try {
161 | const { stdout } = await execPromise('which chromium');
162 | binaryPath = stdout.trim();
163 | if (binaryPath.includes('/snap/')) {
164 | binaryPath = '/snap/bin/chromium';
165 | } else {
166 | binaryPath = '/usr/bin/chromium-browser';
167 | }
168 | } catch {
169 | binaryPath = '/usr/bin/chromium-browser';
170 | }
171 | }
172 | return binaryPath;
173 | }
174 |
175 | async function installOnFedora(browserName) {
176 | let binaryPath = '/usr/bin/' + browserName;
177 | if (browserName === 'chrome') {
178 | await execPromise('sudo dnf config-manager --add-repo https://dl.google.com/linux/chrome/rpm/stable/x86_64');
179 | await execPromise('sudo rpm --import https://dl.google.com/linux/linux_signing_key.pub');
180 | await execPromise('sudo dnf install -y google-chrome-stable');
181 | binaryPath = '/usr/bin/google-chrome';
182 | } else if (browserName === 'brave') {
183 | await execPromise('sudo dnf config-manager --add-repo https://brave-browser-rpm-release.s3.brave.com/x86_64/');
184 | await execPromise('sudo rpm --import https://brave-browser-rpm-release.s3.brave.com/brave-core.asc');
185 | await execPromise('sudo dnf install -y brave-browser');
186 | binaryPath = '/usr/bin/brave-browser';
187 | } else if (browserName === 'vivaldi') {
188 | await execPromise('sudo dnf config-manager --add-repo https://repo.vivaldi.com/archive/vivaldi-fedora.repo');
189 | await execPromise('sudo dnf install -y vivaldi-stable');
190 | binaryPath = '/usr/bin/vivaldi';
191 | } else if (browserName === 'edge') {
192 | await execPromise('sudo rpm --import https://packages.microsoft.com/keys/microsoft.asc');
193 | await execPromise('sudo dnf config-manager --add-repo https://packages.microsoft.com/yumrepos/edge');
194 | await execPromise('sudo dnf install -y microsoft-edge-stable');
195 | binaryPath = '/usr/bin/microsoft-edge';
196 | } else if (browserName === 'chromium') {
197 | await execPromise('sudo dnf install -y chromium');
198 | binaryPath = '/usr/bin/chromium-browser';
199 | }
200 | return binaryPath;
201 | }
202 |
203 | async function getLinuxDistro() {
204 | try {
205 | const osRelease = await readFile('/etc/os-release', 'utf8');
206 | const lines = osRelease.split('\n');
207 | const releaseInfo = {};
208 | for (const line of lines) {
209 | const [key, value] = line.split('=');
210 | if (key && value) {
211 | releaseInfo[key] = value.replace(/"/g, '');
212 | }
213 | }
214 |
215 | if (releaseInfo.ID === 'fedora' || releaseInfo.ID_LIKE?.includes('fedora')) {
216 | return 'fedora';
217 | } else if (releaseInfo.ID === 'debian' || releaseInfo.ID === 'ubuntu' || releaseInfo.ID_LIKE?.includes('debian')) {
218 | return 'debian';
219 | } else {
220 | return releaseInfo.ID || 'unknown';
221 | }
222 | } catch (error) {
223 | console.error(`Failed to read /etc/os-release: ${error.message}`);
224 | return 'unknown';
225 | }
226 | }
227 |
228 | // Helper functions
229 | async function downloadBinary(url, outputPath) {
230 | const response = await fetch(url, {
231 | agent: url.startsWith('https:') ? new https.Agent({ keepAlive: true }) : undefined
232 | });
233 | if (!response.ok) {
234 | throw new Error(`Failed to download ${url}: ${response.statusText}`);
235 | }
236 | await pipeline(response.body, createWriteStream(outputPath));
237 | }
238 |
239 | function getDownloadUrl(browserName, platform, arch) {
240 | const urls = {
241 | chrome: {
242 | win32: { x64: 'https://dl.google.com/chrome/install/ChromeSetup.exe', arm64: 'https://dl.google.com/chrome/install/ChromeSetup.exe' },
243 | darwin: { x64: 'https://dl.google.com/chrome/mac/stable/GGRO/googlechrome.dmg', arm64: 'https://dl.google.com/chrome/mac/arm64/googlechrome.dmg' },
244 | linux: { x64: 'https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb', arm64: null }
245 | },
246 | brave: {
247 | win32: { x64: 'https://referrals.brave.com/latest/BraveBrowserSetup.exe', arm64: 'https://referrals.brave.com/latest/BraveBrowserSetup.exe' },
248 | darwin: { x64: 'https://laptop-updates.brave.com/latest/osx/Brave-Browser.dmg', arm64: 'https://laptop-updates.brave.com/latest/osx-arm64/Brave-Browser.dmg' },
249 | linux: { x64: 'https://laptop-updates.brave.com/latest/linux64', arm64: 'https://laptop-updates.brave.com/latest/linux-arm64' }
250 | },
251 | vivaldi: {
252 | win32: { x64: 'https://downloads.vivaldi.com/stable/Vivaldi_Setup.exe', arm64: 'https://downloads.vivaldi.com/stable/Vivaldi_Setup.exe' },
253 | darwin: { x64: 'https://downloads.vivaldi.com/stable/Vivaldi.dmg', arm64: 'https://downloads.vivaldi.com/stable/Vivaldi.dmg' },
254 | linux: { x64: 'https://downloads.vivaldi.com/stable/vivaldi-stable_amd64.deb', arm64: 'https://downloads.vivaldi.com/stable/vivaldi-stable_arm64.deb' }
255 | },
256 | edge: {
257 | win32: { x64: 'https://go.microsoft.com/fwlink/?linkid=2069148', arm64: 'https://go.microsoft.com/fwlink/?linkid=2069148' },
258 | darwin: { x64: 'https://go.microsoft.com/fwlink/?linkid=2069324', arm64: 'https://go.microsoft.com/fwlink/?linkid=2069324' },
259 | linux: { x64: 'https://packages.microsoft.com/repos/edge/pool/main/m/microsoft-edge-stable/microsoft-edge-stable_latest_amd64.deb', arm64: 'https://packages.microsoft.com/repos/edge/pool/main/m/microsoft-edge-stable/microsoft-edge-stable_latest_arm64.deb' }
260 | },
261 | chromium: {
262 | win32: { x64: 'https://download-chromium.appspot.com/dl/Win_x64?type=snapshots', arm64: 'https://download-chromium.appspot.com/dl/Win_arm64?type=snapshots' },
263 | darwin: { x64: 'https://download-chromium.appspot.com/dl/Mac?type=snapshots', arm64: 'https://download-chromium.appspot.com/dl/Mac_Arm?type=snapshots' },
264 | linux: { x64: 'https://download-chromium.appspot.com/dl/Linux_x64?type=snapshots', arm64: 'https://download-chromium.appspot.com/dl/Linux_Arm?type=snapshots' }
265 | }
266 | };
267 | return urls[browserName]?.[platform]?.[arch] || null;
268 | }
269 |
--------------------------------------------------------------------------------
/src/launcher.js:
--------------------------------------------------------------------------------
1 | // launcher.js
2 | import { spawn } from 'child_process';
3 | import { DEBUG } from './common.js'; // Assuming common.js is accessible
4 |
5 | /**
6 | * Launches a browser executable with specified arguments.
7 | * @param {string} executablePath - Absolute path to the browser executable.
8 | * @param {string[]} browserArgs - Array of arguments to pass to the browser.
9 | * @param {object} [options={}] - Options for child_process.spawn.
10 | * @returns {import('child_process').ChildProcess | null} The spawned browser process or null on error.
11 | */
12 | function launch(executablePath, browserArgs = [], options = {}) {
13 | if (!executablePath) {
14 | console.error('launcher.js: Executable path is required.');
15 | return null;
16 | }
17 |
18 | DEBUG.verbose && console.log(`launcher.js: Spawning '${executablePath}' with args:`, browserArgs);
19 |
20 | try {
21 | const defaultSpawnOptions = {
22 | detached: process.platform !== 'win32', // Detach by default on non-Windows for independent exit
23 | stdio: ['ignore', 'pipe', 'pipe'],
24 | };
25 |
26 | const spawnOptions = { ...defaultSpawnOptions, ...options };
27 |
28 | const browserProcess = spawn(executablePath, browserArgs, spawnOptions);
29 |
30 | browserProcess.on('error', (err) => {
31 | console.error(`launcher.js: Failed to start browser process for ${executablePath}: ${err.message}`);
32 | });
33 |
34 | if (DEBUG.verboseBrowser) {
35 | const browserName = executablePath.split(/[/\\]/).pop();
36 | browserProcess.stdout.on('data', (data) => {
37 | DEBUG.verbose && process.stdout.write(`[BROWSER STDOUT - ${browserName}]: ${data}`);
38 | });
39 | browserProcess.stderr.on('data', (data) => {
40 | DEBUG.verbose && process.stderr.write(`[BROWSER STDERR - ${browserName}]: ${data}`);
41 | });
42 | }
43 |
44 | // If detached, unref() allows the parent to exit independently.
45 | // This is often desired so closing the terminal doesn't kill the browser launched by the script.
46 | if (spawnOptions.detached) {
47 | browserProcess.unref();
48 | }
49 |
50 | return browserProcess;
51 | } catch (error) {
52 | console.error(`launcher.js: Error spawning browser ${executablePath}: ${error.message}`);
53 | DEBUG.verbose && console.error(error);
54 | return null;
55 | }
56 | }
57 |
58 | export default {
59 | launch,
60 | };
61 |
--------------------------------------------------------------------------------
/src/protocol.js:
--------------------------------------------------------------------------------
1 | import Ws from 'ws';
2 | import {sleep, untilTrue, SHOW_FETCH, DEBUG, ERROR_CODE_SAFE_TO_IGNORE} from './common.js';
3 |
4 | const ROOT_SESSION = "browser";
5 | const MESSAGES = new Map();
6 |
7 | const RANDOM_LOCAL = () => [
8 | '127.0.0.1',
9 | '[::1]',
10 | 'localhost',
11 | '127.0.0.1',
12 | '[::1]',
13 | 'localhost'
14 | ][Math.floor(Math.random()*6)];
15 |
16 | export async function connect({port:port = 9222} = {}) {
17 | let webSocketDebuggerUrl, socket;
18 | let url;
19 | try {
20 | await untilTrue(async () => {
21 | let result = false;
22 | try {
23 | url = `http://${RANDOM_LOCAL()}:${port}/json/version`;
24 | DEBUG.verbose && console.log(`Trying browser at ${url}...`, url);
25 | const {webSocketDebuggerUrl} = await Promise.race([
26 | fetch(url).then(r => r.json()),
27 | (async () => {
28 | await sleep(2500);
29 | throw new Error(`Connect took too long.`)
30 | })(),
31 | ]);
32 | if ( webSocketDebuggerUrl ) {
33 | result = true;
34 | }
35 | } catch(e) {
36 | DEBUG.verbose && console.error('Error while checking browser', e);
37 | } finally {
38 | return result;
39 | }
40 | });
41 | ({webSocketDebuggerUrl} = await fetch(url).then(r => r.json()));
42 | let isOpen = false;
43 | socket = new Ws(webSocketDebuggerUrl);
44 | socket.on('open', () => { isOpen = true });
45 | await untilTrue(() => isOpen);
46 | DEBUG.verbose && console.log(`Connected to browser`);
47 | } catch(e) {
48 | console.log("Error communicating with browser", e);
49 | process.exit(1);
50 | }
51 |
52 | const Resolvers = {};
53 | const Handlers = {};
54 | socket.on('message', handle);
55 | let id = 0;
56 |
57 | let resolve, reject;
58 | const promise = new Promise((res, rej) => (resolve = res, reject = rej));
59 |
60 | switch(socket.readyState) {
61 | case Ws.CONNECTING:
62 | socket.on('open', () => resolve()); break;
63 | case Ws.OPEN:
64 | resolve(); break;
65 | case Ws.CLOSED:
66 | case Ws.CLOSING:
67 | reject(); break;
68 | }
69 |
70 | await promise;
71 |
72 | return {
73 | send,
74 | on, ons, ona,
75 | close
76 | };
77 |
78 | async function send(method, params = {}, sessionId) {
79 | const message = {
80 | method, params, sessionId,
81 | id: ++id
82 | };
83 | if ( ! sessionId ) {
84 | delete message[sessionId];
85 | }
86 | const key = `${sessionId||ROOT_SESSION}:${message.id}`;
87 | let resolve;
88 | const promise = new Promise(res => resolve = res);
89 | Resolvers[key] = resolve;
90 | const outGoing = JSON.stringify(message);
91 | MESSAGES.set(key, outGoing);
92 | socket.send(outGoing);
93 | DEBUG.verboseSlow && (SHOW_FETCH || !method.startsWith('Fetch')) && console.log("Sent", message);
94 | return promise;
95 | }
96 |
97 | async function handle(message) {
98 | if ( typeof message !== "string" ) {
99 | try {
100 | message += '';
101 | } catch(e) {
102 | message = message.toString();
103 | }
104 | }
105 | const stringMessage = message;
106 | message = JSON.parse(message);
107 | if ( message.error ) {
108 | const showError = DEBUG.protocol || !ERROR_CODE_SAFE_TO_IGNORE.has(message.error.code);
109 | if ( showError ) {
110 | DEBUG.protocol && console.warn(message);
111 | }
112 | }
113 | const {sessionId} = message;
114 | const {method} = message;
115 | const {id, result} = message;
116 |
117 | if ( id ) {
118 | const key = `${sessionId||ROOT_SESSION}:${id}`;
119 | const resolve = Resolvers[key];
120 | if ( ! resolve ) {
121 | DEBUG.protocol && console.warn(`No resolver for key`, key, stringMessage.slice(0,140));
122 | } else {
123 | Resolvers[key] = undefined;
124 | try {
125 | await resolve(result);
126 | } catch(e) {
127 | console.warn(`Resolver failed`, e, key, stringMessage.slice(0,140), resolve);
128 | }
129 | }
130 | if ( DEBUG ) {
131 | if ( message.error ) {
132 | const showError = DEBUG || !ERROR_CODE_SAFE_TO_IGNORE.has(message.error.code);
133 | if ( showError ) {
134 | const originalMessage = MESSAGES.get(key);
135 | DEBUG.protocol && console.warn({originalMessage});
136 | }
137 | }
138 | }
139 | MESSAGES.delete(key);
140 | } else if ( method ) {
141 | const listeners = Handlers[method];
142 | if ( Array.isArray(listeners) ) {
143 | for( const func of listeners ) {
144 | try {
145 | func({message, sessionId});
146 | } catch(e) {
147 | console.warn(`Listener failed`, method, e, func.toString().slice(0,140), stringMessage.slice(0,140));
148 | }
149 | }
150 | }
151 | } else {
152 | console.warn(`Unknown message on socket`, message);
153 | }
154 | }
155 |
156 | function on(method, handler) {
157 | let listeners = Handlers[method];
158 | if ( ! listeners ) {
159 | Handlers[method] = listeners = [];
160 | }
161 | listeners.push(wrap(handler));
162 | }
163 |
164 | function ons(method, handler) {
165 | let listeners = Handlers[method];
166 | if ( ! listeners ) {
167 | Handlers[method] = listeners = [];
168 | }
169 | listeners.push(handler);
170 | }
171 |
172 | function ona(method, handler, sessionId) {
173 | let listeners = Handlers[method];
174 | if ( ! listeners ) {
175 | Handlers[method] = listeners = [];
176 | }
177 | listeners.push(({message}) => {
178 | if ( message.sessionId === sessionId ) {
179 | handler(message.params);
180 | } else {
181 | console.log(`No such`, {method, handler, sessionId, message});
182 | }
183 | });
184 | }
185 |
186 | function close() {
187 | socket.close();
188 | }
189 |
190 | function wrap(fn) {
191 | return ({message}) => fn(message.params)
192 | }
193 | }
194 |
--------------------------------------------------------------------------------
/src/root.cjs:
--------------------------------------------------------------------------------
1 | const path = require('path');
2 | const url = require('url');
3 |
4 | const file = __filename;
5 | const dir = path.dirname(file);
6 | const APP_ROOT = dir;
7 |
8 | //console.log({APP_ROOT});
9 |
10 | module.exports = {
11 | APP_ROOT,
12 | dir,
13 | file
14 | }
15 |
16 |
--------------------------------------------------------------------------------
/src/root.js:
--------------------------------------------------------------------------------
1 | import path from 'path';
2 | import url from 'url';
3 |
4 | let mod;
5 | let esm = false;
6 |
7 | try {
8 | const [a, b] = [__dirname, __filename];
9 | } catch(e) {
10 | esm = true;
11 | }
12 |
13 | if ( ! esm ) {
14 | mod = require('./root.cjs');
15 | } else {
16 | const file = url.fileURLToPath(import.meta.url);
17 | const dir = path.dirname(file);
18 | mod = {
19 | dir,
20 | file,
21 | APP_ROOT: dir
22 | };
23 | }
24 |
25 | //console.log({root});
26 |
27 | export const root = mod;
28 |
29 |
--------------------------------------------------------------------------------
/stampers/macos-new.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # macOS Single Executable Application (SEA) Stamper, Signer, and Conditional Notarizer for DownloadNet
4 |
5 | set -e
6 | # set -x
7 |
8 | # --- Configuration & Variables ---
9 | DEFAULT_NODE_VERSION="22"
10 | MACOS_APP_BUNDLE_ID="com.DOSAYGO.DownloadNet" # Your registered Bundle ID
11 | ENTITLEMENTS_FILE_PATH="scripts/downloadnet-entitlements.xml"
12 | NOTARIZE_SCRIPT_PATH="./stampers/notarize_macos.sh" # Path to your notarization script
13 |
14 | # --- NEW: Check for Notarization Environment Variables ---
15 | CAN_ATTEMPT_NOTARIZATION=true
16 | echo "INFO: Checking for notarization prerequisites..." >&2
17 | if [ -z "$API_KEY_ID" ]; then
18 | echo "WARNING: Environment variable API_KEY_ID is not set. Notarization will be skipped." >&2
19 | CAN_ATTEMPT_NOTARIZATION=false
20 | fi
21 | if [ -z "$API_KEY_ISSUER_ID" ]; then
22 | echo "WARNING: Environment variable API_KEY_ISSUER_ID is not set. Notarization will be skipped." >&2
23 | CAN_ATTEMPT_NOTARIZATION=false
24 | fi
25 | if [ -z "$API_KEY_P8_PATH" ]; then
26 | echo "WARNING: Environment variable API_KEY_P8_PATH is not set. Notarization will be skipped." >&2
27 | CAN_ATTEMPT_NOTARIZATION=false
28 | elif [ ! -f "$API_KEY_P8_PATH" ]; then # Also check if the path points to an actual file
29 | echo "WARNING: API Key .p8 file not found at path specified by API_KEY_P8_PATH: '$API_KEY_P8_PATH'. Notarization will be skipped." >&2
30 | CAN_ATTEMPT_NOTARIZATION=false
31 | fi
32 |
33 | if [ "$CAN_ATTEMPT_NOTARIZATION" = true ]; then
34 | echo "INFO: Notarization environment variables appear to be set." >&2
35 | else
36 | echo "INFO: One or more required environment variables for notarization are missing or invalid." >&2
37 | echo " To enable notarization, please set: API_KEY_ID, API_KEY_ISSUER_ID, API_KEY_P8_PATH." >&2
38 | fi
39 | echo "-----------------------------------------------------" >&2
40 |
41 |
42 | # --- Helper Functions (source_nvm, find_developer_id_identities - keep as is) ---
43 | source_nvm() {
44 | if [ -n "$NVM_DIR" ] && [ -s "$NVM_DIR/nvm.sh" ]; then source "$NVM_DIR/nvm.sh";
45 | elif [ -s "$HOME/.nvm/nvm.sh" ]; then source "$HOME/.nvm/nvm.sh"; fi
46 | if ! command -v nvm &> /dev/null; then echo "ERROR: NVM command not found." >&2; return 1; fi
47 | return 0
48 | }
49 |
50 | find_developer_id_identities() {
51 | local identities_output developer_id_identities=() identity_line
52 | echo "INFO: Searching for valid 'Developer ID Application' signing identities in keychain..." >&2
53 | identities_output=$(security find-identity -v -p codesigning | awk '{$1=$1;print}')
54 | while IFS= read -r identity_line; do
55 | if [[ "$identity_line" == *"Developer ID Application:"* ]]; then
56 | local name; name=$(echo "$identity_line" | awk -F '"' '{print $2}')
57 | if [ -n "$name" ]; then developer_id_identities+=("$name"); fi
58 | fi
59 | done <<< "$identities_output"; for id_name in "${developer_id_identities[@]}"; do echo "$id_name"; done
60 | }
61 | # --- End Helper Functions ---
62 |
63 | if [ "$#" -ne 3 ]; then
64 | echo "Usage: $0 " >&2
65 | exit 1
66 | fi
67 |
68 | EXE_NAME_ARG="$1"
69 | JS_SOURCE_FILE_ARG="$2"
70 | OUTPUT_FOLDER_ARG="$3"
71 |
72 | echo "--- DownloadNet macOS SEA Stamper, Signer & Conditional Notarizer ---"
73 | # Steps 1-5: Setup, SEA generation, Node binary prep, Injection (keep as is)
74 | echo "[Step 1/8] Setting up Node.js environment..." >&2
75 | if ! source_nvm; then exit 1; fi
76 | nvm install "$DEFAULT_NODE_VERSION" > /dev/null || { echo "ERROR: Failed to install Node $DEFAULT_NODE_VERSION" >&2; exit 1; }
77 | nvm use "$DEFAULT_NODE_VERSION" > /dev/null || { echo "ERROR: Failed to use Node $DEFAULT_NODE_VERSION" >&2; exit 1; }
78 | echo "INFO: Using Node version: $(node -v)" >&2
79 | if [ ! -f "$ENTITLEMENTS_FILE_PATH" ]; then echo "ERROR: Entitlements file not found at $ENTITLEMENTS_FILE_PATH" >&2; exit 1; fi
80 | echo "INFO: Using entitlements file: $ENTITLEMENTS_FILE_PATH" >&2
81 | mkdir -p "$OUTPUT_FOLDER_ARG"
82 | TEMP_EXE_PATH="./${EXE_NAME_ARG}_sea_final_build"
83 | echo "[Step 2/8] Creating sea-config.json..." >&2
84 | cat < sea-config.json
85 | {
86 | "main": "${JS_SOURCE_FILE_ARG}",
87 | "output": "sea-prep.blob",
88 | "disableExperimentalSEAWarning": true,
89 | "useCodeCache": true,
90 | "assets": {
91 | "favicon.ico": "public/favicon.ico",
92 | "top.html": "public/top.html",
93 | "style.css": "public/style.css",
94 | "injection.js": "public/injection.js",
95 | "redirector.html": "public/redirector.html"
96 | }
97 | }
98 | EOF
99 | echo "[Step 3/8] Generating SEA blob..." >&2
100 | node --experimental-sea-config sea-config.json || { echo "ERROR: Failed to generate SEA blob." >&2; rm -f sea-config.json; exit 1; }
101 | echo "[Step 4/8] Preparing Node binary..." >&2
102 | NODE_EXECUTABLE_PATH="$(command -v node)"
103 | cp "$NODE_EXECUTABLE_PATH" "$TEMP_EXE_PATH" || { echo "ERROR: Failed to copy node binary." >&2; rm -f sea-config.json sea-prep.blob; exit 1; }
104 | echo "INFO: Removing existing signature from copied Node binary $TEMP_EXE_PATH..." >&2
105 | codesign --remove-signature "$TEMP_EXE_PATH" 2>/dev/null || echo "INFO: No existing signature or removal failed (okay)." >&2
106 | echo "[Step 5/8] Injecting SEA blob into $TEMP_EXE_PATH..." >&2
107 | NPX_CMD="npx"; if ! command -v npx &> /dev/null; then NODE_BIN_PATH=$(dirname "$(command -v node)"); if [ -x "$NODE_BIN_PATH/npx" ]; then NPX_CMD="$NODE_BIN_PATH/npx"; else echo "ERROR: npx not found." >&2; exit 1; fi; fi
108 | "$NPX_CMD" postject "$TEMP_EXE_PATH" NODE_SEA_BLOB sea-prep.blob \
109 | --sentinel-fuse NODE_SEA_FUSE_fce680ab2cc467b6e072b8b5df1996b2 \
110 | --macho-segment-name NODE_SEA || { echo "ERROR: postject failed."; rm -f sea-config.json sea-prep.blob "$TEMP_EXE_PATH"; exit 1; }
111 | echo "INFO: SEA blob injected." >&2
112 |
113 | # Step 6: Code Signing (keep as is)
114 | echo "[Step 6/8] Code Signing Process..." >&2
115 | SELECTED_SIGNING_IDENTITY=""
116 | if [ -n "${MACOS_CODESIGN_IDENTITY_DOWNLOADNET}" ]; then SELECTED_SIGNING_IDENTITY="${MACOS_CODESIGN_IDENTITY_DOWNLOADNET}"; echo "INFO: Using pre-set signing identity: ${SELECTED_SIGNING_IDENTITY}" >&2
117 | else
118 | DEVELOPER_ID_CANDIDATES=(); while IFS= read -r line; do DEVELOPER_ID_CANDIDATES+=("$line"); done < <(find_developer_id_identities)
119 | NUM_CANDIDATES=${#DEVELOPER_ID_CANDIDATES[@]}
120 | if [ "$NUM_CANDIDATES" -eq 0 ]; then SELECTED_SIGNING_IDENTITY="-"; echo "WARNING: No Developer ID certs found. Ad-hoc signing." >&2
121 | elif [ "$NUM_CANDIDATES" -eq 1 ]; then SELECTED_SIGNING_IDENTITY="${DEVELOPER_ID_CANDIDATES[0]}"; echo "INFO: Auto-selected unique Developer ID cert: $SELECTED_SIGNING_IDENTITY" >&2
122 | else
123 | if [ -t 0 ]; then PS3="Select certificate by number (or 'a' for ad-hoc, 'q' to quit): "; select opt in "${DEVELOPER_ID_CANDIDATES[@]}" "Ad-hoc Sign (not for distribution)" "Quit"; do case $REPLY in q|$(($NUM_CANDIDATES+2))) exit 1;; $(($NUM_CANDIDATES+1))) SELECTED_SIGNING_IDENTITY="-"; break;; *) if [[ "$REPLY" -ge 1 && "$REPLY" -le "$NUM_CANDIDATES" ]]; then SELECTED_SIGNING_IDENTITY="${DEVELOPER_ID_CANDIDATES[$((REPLY-1))]}"; break; else echo "Invalid."; fi;; esac; done;
124 | else SELECTED_SIGNING_IDENTITY="${DEVELOPER_ID_CANDIDATES[0]}"; echo "WARNING: Non-interactive, multiple certs, using first: $SELECTED_SIGNING_IDENTITY" >&2; fi
125 | echo "INFO: You selected: $SELECTED_SIGNING_IDENTITY" >&2
126 | fi
127 | fi
128 | if [ -z "$SELECTED_SIGNING_IDENTITY" ]; then echo "ERROR: No signing identity selected." >&2; exit 1; fi
129 | echo "INFO: Signing $TEMP_EXE_PATH with identity: '$SELECTED_SIGNING_IDENTITY', bundle ID: '$MACOS_APP_BUNDLE_ID', entitlements: '$ENTITLEMENTS_FILE_PATH'" >&2
130 | SIGN_OPTIONS="--force --deep --timestamp --identifier \"$MACOS_APP_BUNDLE_ID\" --entitlements \"$ENTITLEMENTS_FILE_PATH\""
131 | if [ "$SELECTED_SIGNING_IDENTITY" != "-" ]; then SIGN_OPTIONS="$SIGN_OPTIONS --options runtime"; fi
132 | eval "codesign $SIGN_OPTIONS --sign \"$SELECTED_SIGNING_IDENTITY\" \"$TEMP_EXE_PATH\""
133 | if [ $? -ne 0 ]; then echo "ERROR: codesign failed." >&2; exit 1; fi
134 | echo "INFO: Code signing successful." >&2
135 |
136 | # Step 7: Verifying Signature and Testing Execution
137 | echo "[Step 7/8] Verifying Signature and Testing Execution..." >&2
138 | echo "INFO: Verifying signature for $TEMP_EXE_PATH..." >&2
139 | codesign --verify --strict --verbose=4 "$TEMP_EXE_PATH" || { echo "ERROR: codesign --verify failed." >&2; exit 1; }
140 | echo "INFO: Signature verified." >&2
141 | echo "INFO: Displaying signature details (check entitlements)..." >&2
142 | codesign --display --entitlements - --verbose=2 "$TEMP_EXE_PATH"
143 | echo "INFO: Assessing with spctl for $TEMP_EXE_PATH..." >&2
144 | spctl_output=$(spctl --assess --type execute --verbose "$TEMP_EXE_PATH" 2>&1) || true
145 | echo "$spctl_output"
146 |
147 | APP_SIGNED_WITH_DEV_ID=false
148 | if [ "$SELECTED_SIGNING_IDENTITY" != "-" ]; then
149 | APP_SIGNED_WITH_DEV_ID=true
150 | fi
151 |
152 | ELIGIBLE_FOR_NOTARIZATION=false
153 | if [ "$APP_SIGNED_WITH_DEV_ID" = true ] && [[ "$spctl_output" == *"source=Unnotarized Developer ID"* || "$spctl_output" == *"rejected"* ]]; then
154 | echo "INFO: App signed with Developer ID and appears unnotarized. Eligible for notarization attempt." >&2
155 | ELIGIBLE_FOR_NOTARIZATION=true
156 | elif [ "$APP_SIGNED_WITH_DEV_ID" = true ] && [[ "$spctl_output" == *": accepted"* && ("$spctl_output" == *"source=Notarized Developer ID"* || "$spctl_output" == *"source=Apple notarization"*) ]]; then
157 | echo "INFO: App appears to be already signed with Developer ID and notarized." >&2
158 | elif [ "$SELECTED_SIGNING_IDENTITY" == "-" ]; then
159 | echo "INFO: App is ad-hoc signed. Notarization is not applicable." >&2
160 | else
161 | echo "WARNING: App status is unclear or not suitable for notarization based on spctl assessment." >&2
162 | fi
163 |
164 | PROCEED_WITH_NOTARIZATION_USER_CONFIRMED="no"
165 | if [ "$ELIGIBLE_FOR_NOTARIZATION" = true ]; then
166 | echo "---------------------------------------------------------------------"
167 | echo "TESTING EXECUTABLE: The application '$TEMP_EXE_PATH' will now run in the foreground."
168 | echo "Please interact with it to verify its basic functionality."
169 | echo "Once you are done testing and have exited the application (or used Ctrl+C), "
170 | echo "this script will ask for your confirmation to notarize."
171 | echo "---------------------------------------------------------------------"
172 | chmod +x "$TEMP_EXE_PATH"
173 | if ! "$TEMP_EXE_PATH"; then
174 | echo "WARNING: Application exited with a non-zero status during test run." >&2
175 | fi
176 | echo "---------------------------------------------------------------------"
177 | if [ -t 0 ]; then
178 | read -r -p "Do you want to proceed with notarization for '$EXE_NAME_ARG' ? (y/N): " USER_CONFIRM_SUCCESS
179 | if [[ "$USER_CONFIRM_SUCCESS" =~ ^[Yy]$ ]]; then
180 | echo "INFO: User confirmed successful execution."
181 | PROCEED_WITH_NOTARIZATION_USER_CONFIRMED="yes"
182 | else
183 | echo "INFO: Person indicated a preference to skip notarization."
184 | fi
185 | else
186 | echo "WARNING: Non-interactive environment. Cannot get user confirmation for test run." >&2
187 | echo " To notarize in CI, ensure MACOS_CODESIGN_IDENTITY_DOWNLOADNET is set and notarization env vars are present." >&2
188 | echo " And consider adding an automated test or always notarizing if Dev ID signed." >&2
189 | fi
190 | fi
191 |
192 |
193 | # Step 8: Conditional Notarization and Finalization
194 | echo "[Step 8/8] Conditional Notarization and Finalization..." >&2
195 | FINAL_NOTARIZATION_DECISION="no"
196 |
197 | if [ "$ELIGIBLE_FOR_NOTARIZATION" = true ] && [ "$PROCEED_WITH_NOTARIZATION_USER_CONFIRMED" = "yes" ] && [ "$CAN_ATTEMPT_NOTARIZATION" = true ]; then
198 | if [ -x "$NOTARIZE_SCRIPT_PATH" ]; then
199 | echo "INFO: Proceeding to notarization for $TEMP_EXE_PATH..." >&2
200 | # Pass the temporary executable path and bundle ID to the notarization script
201 | if "$NOTARIZE_SCRIPT_PATH" "$TEMP_EXE_PATH" "$MACOS_APP_BUNDLE_ID"; then
202 | echo "INFO: Notarization process reported success for $TEMP_EXE_PATH." >&2
203 | FINAL_NOTARIZATION_DECISION="yes" # Assume success from script
204 | else
205 | echo "ERROR: Notarization process reported failure for $TEMP_EXE_PATH." >&2
206 | # Notarization script should output details. The main build might still succeed but app won't be notarized.
207 | fi
208 | else
209 | echo "WARNING: Notarization script $NOTARIZE_SCRIPT_PATH not found or not executable. Skipping actual notarization." >&2
210 | echo " (CAN_ATTEMPT_NOTARIZATION was true, but script is missing)" >&2
211 | fi
212 | elif [ "$ELIGIBLE_FOR_NOTARIZATION" = true ]; then # Eligible, but user said no or env vars missing
213 | if [ "$CAN_ATTEMPT_NOTARIZATION" = false ]; then
214 | echo "INFO: Notarization skipped because required environment variables (API_KEY_ID, etc.) are not set." >&2
215 | elif [ "$PROCEED_WITH_NOTARIZATION_USER_CONFIRMED" = "no" ]; then
216 | echo "INFO: Notarization skipped based on test run outcome or user choice." >&2
217 | fi
218 | fi
219 |
220 |
221 | FINAL_EXE_PATH="$OUTPUT_FOLDER_ARG/$EXE_NAME_ARG"
222 | echo "INFO: Moving $TEMP_EXE_PATH to $FINAL_EXE_PATH..." >&2
223 | mv "$TEMP_EXE_PATH" "$FINAL_EXE_PATH" || { echo "ERROR: Failed to move executable."; exit 1; }
224 |
225 | echo "INFO: Cleaning up temporary files..." >&2
226 | rm -f sea-config.json sea-prep.blob
227 |
228 | echo "--- DownloadNet macOS SEA Stamping & Signing Complete ---" >&2
229 | echo "SUCCESS: Executable created at: $FINAL_EXE_PATH" >&2
230 | if [ "$FINAL_NOTARIZATION_DECISION" = "yes" ]; then
231 | echo "INFO: The executable should be notarized."
232 | elif [ "$ELIGIBLE_FOR_NOTARIZATION" = true ]; then # Was eligible but didn't get notarized for some reason
233 | echo "WARNING: The executable is signed with Developer ID but was NOT notarized."
234 | elif [ "$SELECTED_SIGNING_IDENTITY" == "-" ]; then
235 | echo "INFO: The executable is ad-hoc signed (not for distribution, notarization not applicable)."
236 | else
237 | echo "INFO: Notarization was not attempted or was not applicable for other reasons."
238 | fi
239 |
--------------------------------------------------------------------------------
/stampers/macos.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source $HOME/.nvm/nvm.sh
4 |
5 | # Variables
6 | EXE_NAME="$1"
7 | JS_SOURCE_FILE="$2"
8 | OUTPUT_FOLDER="$3"
9 |
10 | # Ensure nvm is installed
11 | if ! command -v nvm &> /dev/null
12 | then
13 | echo "nvm not found. Installing..."
14 | curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash
15 | # shellcheck source=/dev/null
16 | source ~/.nvm/nvm.sh
17 | fi
18 |
19 | # Use Node 22
20 | nvm install 22
21 | nvm use 22
22 |
23 | # Create sea-config.json
24 | cat < sea-config.json
25 | {
26 | "main": "${JS_SOURCE_FILE}",
27 | "output": "sea-prep.blob",
28 | "disableExperimentalSEAWarning": true,
29 | "useCodeCache": true,
30 | "assets": {
31 | "favicon.ico": "public/favicon.ico",
32 | "top.html": "public/top.html",
33 | "style.css": "public/style.css",
34 | "injection.js": "public/injection.js",
35 | "redirector.html": "public/redirector.html"
36 | }
37 | }
38 | EOF
39 |
40 | # Generate the blob
41 | node --experimental-sea-config sea-config.json
42 |
43 | # Copy node binary
44 | cp "$(command -v node)" "$EXE_NAME"
45 |
46 | # Remove the signature of the binary
47 | codesign --remove-signature "$EXE_NAME"
48 |
49 | # Inject the blob
50 | npx postject "$EXE_NAME" NODE_SEA_BLOB sea-prep.blob \
51 | --sentinel-fuse NODE_SEA_FUSE_fce680ab2cc467b6e072b8b5df1996b2 \
52 | --macho-segment-name NODE_SEA
53 |
54 | # Sign the binary
55 | codesign --sign - "$EXE_NAME"
56 |
57 | # Move the executable to the output folder
58 | mv "$EXE_NAME" "$OUTPUT_FOLDER"
59 |
60 | # Clean up
61 | rm sea-config.json sea-prep.blob
62 |
63 |
--------------------------------------------------------------------------------
/stampers/nix.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source $HOME/.nvm/nvm.sh
4 |
5 | # Variables
6 | EXE_NAME="$1"
7 | JS_SOURCE_FILE="$2"
8 | OUTPUT_FOLDER="$3"
9 |
10 | # Ensure nvm is installed
11 | if ! command -v nvm &> /dev/null
12 | then
13 | echo "nvm not found. Installing..."
14 | curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash
15 | # shellcheck source=/dev/null
16 | source ~/.nvm/nvm.sh
17 | fi
18 |
19 | # Use Node 22
20 | nvm install 22
21 | nvm use 22
22 |
23 | # Create sea-config.json
24 | cat < sea-config.json
25 | {
26 | "main": "${JS_SOURCE_FILE}",
27 | "output": "sea-prep.blob",
28 | "disableExperimentalSEAWarning": true,
29 | "useCodeCache": true,
30 | "assets": {
31 | "favicon.ico": "public/favicon.ico",
32 | "top.html": "public/top.html",
33 | "style.css": "public/style.css",
34 | "injection.js": "public/injection.js",
35 | "redirector.html": "public/redirector.html"
36 | }
37 | }
38 | EOF
39 |
40 | # Generate the blob
41 | node --experimental-sea-config sea-config.json
42 |
43 | # Copy node binary
44 | cp "$(command -v node)" "$EXE_NAME"
45 |
46 | # Inject the blob
47 | npx postject "$EXE_NAME" NODE_SEA_BLOB sea-prep.blob \
48 | --sentinel-fuse NODE_SEA_FUSE_fce680ab2cc467b6e072b8b5df1996b2
49 |
50 | # Move the executable to the output folder
51 | mv "$EXE_NAME" "$OUTPUT_FOLDER"
52 |
53 | # Clean up
54 | rm sea-config.json sea-prep.blob
55 |
56 |
--------------------------------------------------------------------------------
/stampers/notarize_macos.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # create-notarized-pkg.sh
4 | # Creates a notarized and stapled .pkg installer from a code-signed binary, signing the entire package.
5 |
6 | # Usage
7 | usage() {
8 | echo "Usage: $0 --binary --keychain-profile --bundle-id --version --installer-cert "
9 | echo "Example: $0 --binary ./bin/dn-macos --keychain-profile notarization-profile --bundle-id com.DOSAYGO.DownloadNet --version 4.5.1 --installer-cert 'Developer ID Installer: DOSAYGO"
10 | exit 1
11 | }
12 |
13 | # Parse command-line arguments
14 | while [ "$#" -gt 0 ]; do
15 | case "$1" in
16 | --binary) BINARY_PATH="$2"; shift 2 ;;
17 | --keychain-profile) KEYCHAIN_PROFILE="$2"; shift 2 ;;
18 | --bundle-id) BUNDLE_ID="$2"; shift 2 ;;
19 | --version) VERSION="$2"; shift 2 ;;
20 | --installer-cert) INSTALLER_CERT="$2"; shift 2 ;;
21 | *) echo "Unknown option: $1"; usage ;;
22 | esac
23 | done
24 |
25 | # Validate inputs
26 | if [ -z "$BINARY_PATH" ] || [ -z "$KEYCHAIN_PROFILE" ] || [ -z "$BUNDLE_ID" ] || [ -z "$VERSION" ] || [ -z "$INSTALLER_CERT" ]; then
27 | echo "Error: All arguments are required."
28 | usage
29 | fi
30 |
31 | if [ ! -f "$BINARY_PATH" ]; then
32 | echo "Error: Binary not found at $BINARY_PATH"
33 | exit 1
34 | fi
35 |
36 | # Verify binary is code-signed
37 | echo "Verifying signature of input binary: $BINARY_PATH"
38 | if ! codesign --verify --verbose "$BINARY_PATH"; then
39 | echo "Error: Input binary is not code-signed or signature is invalid."
40 | exit 1
41 | fi
42 |
43 | # Set up working directory
44 | BUILD_DIR="$HOME/build"
45 | echo "Cleaning and setting up working directory: $BUILD_DIR"
46 | rm -rf "$BUILD_DIR"
47 | mkdir -p "$BUILD_DIR/pkg_root/usr/local/bin"
48 |
49 | # Copy binary to package root
50 | BINARY_NAME=$(basename "$BINARY_PATH")
51 | cp "$BINARY_PATH" "$BUILD_DIR/pkg_root/usr/local/bin/$BINARY_NAME"
52 | chmod +x "$BUILD_DIR/pkg_root/usr/local/bin/$BINARY_NAME"
53 |
54 | # Verify signature after copying
55 | echo "Verifying signature of copied binary: $BUILD_DIR/pkg_root/usr/local/bin/$BINARY_NAME"
56 | if ! codesign --verify --verbose "$BUILD_DIR/pkg_root/usr/local/bin/$BINARY_NAME"; then
57 | echo "Error: Copied binary lost its signature or is invalid."
58 | exit 1
59 | fi
60 |
61 | # Create component package
62 | COMPONENT_PKG="$BUILD_DIR/component.pkg"
63 | pkgbuild --root "$BUILD_DIR/pkg_root" \
64 | --identifier "$BUNDLE_ID" \
65 | --version "$VERSION" \
66 | --install-location "/" \
67 | "$COMPONENT_PKG"
68 |
69 | if [ $? -ne 0 ]; then
70 | echo "Error: Failed to create component package."
71 | exit 1
72 | fi
73 |
74 | # Create distribution package
75 | UNSIGNED_DISTRIBUTION_PKG="$BUILD_DIR/unsigned-notarized-$BINARY_NAME-$VERSION.pkg"
76 | productbuild --package "$COMPONENT_PKG" \
77 | --identifier "$BUNDLE_ID" \
78 | --version "$VERSION" \
79 | "$UNSIGNED_DISTRIBUTION_PKG"
80 |
81 | if [ $? -ne 0 ]; then
82 | echo "Error: Failed to create distribution package."
83 | exit 1
84 | fi
85 |
86 | # Sign the distribution package
87 | DISTRIBUTION_PKG="notarized-$BINARY_NAME-$VERSION.pkg"
88 | echo "Signing distribution package with Installer certificate: $INSTALLER_CERT"
89 | productsign --sign "$INSTALLER_CERT" "$UNSIGNED_DISTRIBUTION_PKG" "$DISTRIBUTION_PKG"
90 |
91 | if [ $? -ne 0 ]; then
92 | echo "Error: Failed to sign distribution package."
93 | exit 1
94 | fi
95 |
96 | # Notarize the package
97 | echo "Submitting $DISTRIBUTION_PKG for notarization..."
98 | SUBMISSION_OUTPUT=$(xcrun notarytool submit "$DISTRIBUTION_PKG" --keychain-profile "$KEYCHAIN_PROFILE" --wait 2>&1)
99 |
100 | if [ $? -ne 0 ]; then
101 | echo "Error: Notarization submission failed."
102 | echo "$SUBMISSION_OUTPUT"
103 | exit 1
104 | fi
105 |
106 | # Extract submission ID
107 | SUBMISSION_ID=$(echo "$SUBMISSION_OUTPUT" | grep "id:" | head -1 | awk '{print $2}')
108 |
109 | if [ -z "$SUBMISSION_ID" ]; then
110 | echo "Error: Could not retrieve submission ID."
111 | exit 1
112 | fi
113 |
114 | echo "Notarization submission ID: $SUBMISSION_ID"
115 |
116 | # Check notarization status
117 | LOG_OUTPUT=$(xcrun notarytool log "$SUBMISSION_ID" --keychain-profile "$KEYCHAIN_PROFILE")
118 | STATUS=$(echo "$LOG_OUTPUT" | grep '"status":' | awk -F'"' '{print $4}')
119 |
120 | if [ "$STATUS" != "Accepted" ]; then
121 | echo "Error: Notarization failed. Status: $STATUS"
122 | echo "Notarization log:"
123 | echo "$LOG_OUTPUT"
124 | exit 1
125 | fi
126 |
127 | echo "Notarization successful. Status: $STATUS"
128 |
129 | # Staple the notarization ticket
130 | xcrun stapler staple "$DISTRIBUTION_PKG"
131 |
132 | if [ $? -ne 0 ]; then
133 | echo "Error: Failed to staple notarization ticket."
134 | exit 1
135 | fi
136 |
137 | echo "Successfully created notarized and stapled package: $DISTRIBUTION_PKG"
138 |
139 | # Clean up
140 | rm -rf "$BUILD_DIR"
141 |
142 | echo "Package is ready for distribution. Upload $DISTRIBUTION_PKG to your GitHub release."
143 |
--------------------------------------------------------------------------------
/stampers/win.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | setlocal
3 |
4 | :: Check for required arguments
5 | if "%~3"=="" (
6 | echo Usage: %0 executable_name js_source_file output_folder
7 | exit /b 1
8 | )
9 |
10 | :: Define variables from command line arguments
11 | set "EXE_NAME=%~1"
12 | set "JS_SOURCE_FILE=%~2"
13 | set "OUTPUT_FOLDER=%~3"
14 | set "SEA_CONFIG=sea-config.json"
15 |
16 | echo "Exe name: %EXE_NAME%"
17 | echo "JS source: %JS_SOURCE_FILE%"
18 | echo "Output folder: %OUTPUT_FOLDER%"
19 | echo "SEA Config file: %SEA_CONFIG%"
20 |
21 | set /p "user_input=Press enter to continue"
22 |
23 | :: Ensure output folder exists
24 | if not exist "%OUTPUT_FOLDER%" mkdir "%OUTPUT_FOLDER%"
25 |
26 | :: Create configuration file for SEA
27 | (
28 | echo {
29 | echo "main": "%JS_SOURCE_FILE%",
30 | echo "output": "sea-prep.blob",
31 | echo "disableExperimentalSEAWarning": true,
32 | echo "useCodeCache": true,
33 | echo "assets": {
34 | echo "favicon.ico": "public/favicon.ico",
35 | echo "top.html": "public/top.html",
36 | echo "style.css": "public/style.css",
37 | echo "injection.js": "public/injection.js",
38 | echo "redirector.html": "public/redirector.html"
39 | echo }
40 | echo }
41 | ) > "%OUTPUT_FOLDER%\%SEA_CONFIG%"
42 |
43 | :: Generate the blob to be injected
44 | node --experimental-sea-config "%OUTPUT_FOLDER%\%SEA_CONFIG%"
45 |
46 | :: Copy the node executable and rename
47 | node -e "require('fs').copyFileSync(process.execPath, '%OUTPUT_FOLDER%\%EXE_NAME%')"
48 |
49 | :: Optionally, remove signature from the binary (use signtool if necessary, or skip this step)
50 | signtool.exe remove /s "%OUTPUT_FOLDER%\%EXE_NAME%"
51 |
52 | :: Inject the blob into the copied binary
53 | npx postject "%OUTPUT_FOLDER%\%EXE_NAME%" NODE_SEA_BLOB sea-prep.blob --sentinel-fuse NODE_SEA_FUSE_fce680ab2cc467b6e072b8b5df1996b2
54 |
55 | :: Clean up
56 | echo Application built successfully.
57 |
58 | :end
59 |
60 |
--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Variables
4 | EXE_NAME="$1"
5 | JS_SOURCE_FILE="$2"
6 | OUTPUT_FOLDER="$3"
7 |
8 | # Ensure nvm is installed
9 | if ! command -v nvm &> /dev/null
10 | then
11 | echo "nvm not found. Installing..."
12 | curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash
13 | # shellcheck source=/dev/null
14 | source ~/.nvm/nvm.sh
15 | fi
16 |
17 | # Use Node 22
18 | nvm install 22
19 | nvm use 22
20 |
21 | # Create sea-config.json
22 | cat < sea-config.json
23 | {
24 | "main": "${JS_SOURCE_FILE}",
25 | "output": "sea-prep.blob",
26 | "assets": {
27 | "index.html": "public/index.html",
28 | "top.html": "public/top.html",
29 | "style.css": "public/style.css",
30 | "injection.js": "public/injection.js",
31 | "redirector.html": "public/redirector.html"
32 | }
33 | }
34 | EOF
35 |
36 | # Generate the blob
37 | node --experimental-sea-config sea-config.json
38 |
39 | # Copy node binary
40 | cp "$(command -v node)" "$EXE_NAME"
41 |
42 | # Remove the signature of the binary
43 | codesign --remove-signature "$EXE_NAME"
44 |
45 | # Inject the blob
46 | npx postject "$EXE_NAME" NODE_SEA_BLOB sea-prep.blob \
47 | --sentinel-fuse NODE_SEA_FUSE_fce680ab2cc467b6e072b8b5df1996b2 \
48 | --macho-segment-name NODE_SEA
49 |
50 | # Sign the binary
51 | codesign --sign - "$EXE_NAME"
52 |
53 | # Move the executable to the output folder
54 | mv "$EXE_NAME" "$OUTPUT_FOLDER"
55 |
56 | # Clean up
57 | rm sea-config.json sea-prep.blob
58 |
59 |
--------------------------------------------------------------------------------