├── .github ├── FUNDING.yml └── workflows │ ├── docker-image.yml │ └── docker-test.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── bun.lockb ├── bunenv.txt ├── dockerfile ├── index.ts ├── models ├── apiconfig.model.ts ├── csstarget.model.ts └── imapconfig.model.ts ├── node └── imap-watch.utility.ts ├── package.json ├── public ├── index.html ├── logo.ico ├── logo.png └── logo.svg ├── tsconfig.json ├── utilities ├── data-handler.utility.ts ├── imap.utility.ts ├── rss-builder.utility.ts ├── security.utility.ts └── suggestion-engine.utility.ts └── workers ├── feed-updater.worker.ts └── imap-feed.worker.ts /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [TBosak] 4 | -------------------------------------------------------------------------------- /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- 1 | name: Build and Push Docker Images 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | 7 | jobs: 8 | build-and-push: 9 | runs-on: ubuntu-latest 10 | permissions: 11 | contents: read 12 | packages: write 13 | 14 | steps: 15 | - name: Checkout repository 16 | uses: actions/checkout@v3 17 | 18 | - name: Set up Docker Buildx 19 | uses: docker/setup-buildx-action@v2 20 | 21 | # Login to Docker Hub 22 | - name: Log in to Docker Hub 23 | uses: docker/login-action@v2 24 | with: 25 | registry: docker.io 26 | username: ${{ secrets.DOCKERHUB_USERNAME }} 27 | password: ${{ secrets.DOCKERHUB_TOKEN }} 28 | 29 | # Login to GitHub Container Registry 30 | - name: Log in to GitHub Container Registry 31 | uses: docker/login-action@v2 32 | with: 33 | registry: ghcr.io 34 | username: ${{ github.actor }} 35 | password: ${{ secrets.GHCR_TOKEN }} 36 | 37 | - name: Extract version number 38 | id: vars 39 | run: | 40 | VERSION=${{ github.run_number }} 41 | echo "VERSION=$VERSION" >> $GITHUB_ENV 42 | 43 | - name: Build and push Docker images to Docker Hub and GHCR 44 | uses: docker/build-push-action@v4 45 | with: 46 | context: . 47 | push: true 48 | tags: | 49 | docker.io/tbosk/mkfd:latest 50 | docker.io/tbosk/mkfd:${{ env.VERSION }} 51 | ghcr.io/tbosak/mkfd:latest 52 | ghcr.io/tbosak/mkfd:${{ env.VERSION }} 53 | -------------------------------------------------------------------------------- /.github/workflows/docker-test.yml: -------------------------------------------------------------------------------- 1 | name: Build and Push Test Docker Images 2 | 3 | on: 4 | push: 5 | branches: [test] 6 | 7 | jobs: 8 | build-and-push: 9 | runs-on: ubuntu-latest 10 | permissions: 11 | contents: read 12 | packages: write 13 | 14 | steps: 15 | - name: Checkout repository 16 | uses: actions/checkout@v3 17 | 18 | - name: Set up Docker Buildx 19 | uses: docker/setup-buildx-action@v2 20 | 21 | # Login to Docker Hub 22 | - name: Log in to Docker Hub 23 | uses: docker/login-action@v2 24 | with: 25 | registry: docker.io 26 | username: ${{ secrets.DOCKERHUB_USERNAME }} 27 | password: ${{ secrets.DOCKERHUB_TOKEN }} 28 | 29 | # Login to GitHub Container Registry 30 | - name: Log in to GitHub Container Registry 31 | uses: docker/login-action@v2 32 | with: 33 | registry: ghcr.io 34 | username: ${{ github.actor }} 35 | password: ${{ secrets.GHCR_TOKEN }} 36 | 37 | - name: Extract version number 38 | id: vars 39 | run: | 40 | VERSION=${{ github.run_number }} 41 | echo "VERSION=$VERSION" >> $GITHUB_ENV 42 | 43 | - name: Build and push Docker images to Docker Hub and GHCR 44 | uses: docker/build-push-action@v4 45 | with: 46 | context: . 47 | push: true 48 | tags: | 49 | docker.io/tbosk/mkfd:test-latest 50 | docker.io/tbosk/mkfd:test-${{ env.VERSION }} 51 | ghcr.io/tbosak/mkfd:test-latest 52 | ghcr.io/tbosak/mkfd:test-${{ env.VERSION }} 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore 2 | 3 | # Logs 4 | 5 | logs 6 | _.log 7 | npm-debug.log_ 8 | yarn-debug.log* 9 | yarn-error.log* 10 | lerna-debug.log* 11 | .pnpm-debug.log* 12 | 13 | # Diagnostic reports (https://nodejs.org/api/report.html) 14 | 15 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json 16 | 17 | # Runtime data 18 | 19 | pids 20 | _.pid 21 | _.seed 22 | \*.pid.lock 23 | 24 | # Directory for instrumented libs generated by jscoverage/JSCover 25 | 26 | lib-cov 27 | 28 | # Coverage directory used by tools like istanbul 29 | 30 | coverage 31 | \*.lcov 32 | 33 | # nyc test coverage 34 | 35 | .nyc_output 36 | 37 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 38 | 39 | .grunt 40 | 41 | # Bower dependency directory (https://bower.io/) 42 | 43 | bower_components 44 | 45 | # node-waf configuration 46 | 47 | .lock-wscript 48 | 49 | # Compiled binary addons (https://nodejs.org/api/addons.html) 50 | 51 | build/Release 52 | 53 | # Dependency directories 54 | 55 | node_modules/ 56 | jspm_packages/ 57 | 58 | # Snowpack dependency directory (https://snowpack.dev/) 59 | 60 | web_modules/ 61 | 62 | # TypeScript cache 63 | 64 | \*.tsbuildinfo 65 | 66 | # Optional npm cache directory 67 | 68 | .npm 69 | 70 | # Optional eslint cache 71 | 72 | .eslintcache 73 | 74 | # Optional stylelint cache 75 | 76 | .stylelintcache 77 | 78 | # Microbundle cache 79 | 80 | .rpt2_cache/ 81 | .rts2_cache_cjs/ 82 | .rts2_cache_es/ 83 | .rts2_cache_umd/ 84 | 85 | # Optional REPL history 86 | 87 | .node_repl_history 88 | 89 | # Output of 'npm pack' 90 | 91 | \*.tgz 92 | 93 | # Yarn Integrity file 94 | 95 | .yarn-integrity 96 | 97 | # dotenv environment variable files 98 | 99 | .env 100 | .env.development.local 101 | .env.test.local 102 | .env.production.local 103 | .env.local 104 | 105 | # parcel-bundler cache (https://parceljs.org/) 106 | 107 | .cache 108 | .parcel-cache 109 | 110 | # Next.js build output 111 | 112 | .next 113 | out 114 | 115 | # Nuxt.js build / generate output 116 | 117 | .nuxt 118 | dist 119 | 120 | # Gatsby files 121 | 122 | .cache/ 123 | 124 | # Comment in the public line in if your project uses Gatsby and not Next.js 125 | 126 | # https://nextjs.org/blog/next-9-1#public-directory-support 127 | 128 | # public 129 | 130 | # vuepress build output 131 | 132 | .vuepress/dist 133 | 134 | # vuepress v2.x temp and cache directory 135 | 136 | .temp 137 | .cache 138 | 139 | # Docusaurus cache and generated files 140 | 141 | .docusaurus 142 | 143 | # Serverless directories 144 | 145 | .serverless/ 146 | 147 | # FuseBox cache 148 | 149 | .fusebox/ 150 | 151 | # DynamoDB Local files 152 | 153 | .dynamodb/ 154 | 155 | # TernJS port file 156 | 157 | .tern-port 158 | 159 | # Stores VSCode versions used for testing VSCode extensions 160 | 161 | .vscode-test 162 | 163 | # yarn v2 164 | 165 | .yarn/cache 166 | .yarn/unplugged 167 | .yarn/build-state.yml 168 | .yarn/install-state.gz 169 | .pnp.\* 170 | public/feeds/ 171 | configs/ -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## 🤝 Contributing to Mkfd 2 | 3 | Thanks for considering a contribution to **Mkfd**! This project turns webpages, email folders, or REST APIs into structured RSS feeds using Bun and Hono. Below are ways you can get involved and help improve the project. 4 | 5 | --- 6 | 7 | ### 🧑‍💻 Code Contributions 8 | 9 | - **Fix Bugs** 10 | - Help identify and patch issues in feed parsing, scheduling, or UI logic. 11 | - **Complete To-Do Items** 12 | - Check off tasks listed in [`README.md`](https://github.com/TBosak/mkfd#-to-do). 13 | - **Feature Development** 14 | - Propose and implement new features. 15 | - **Improve the Web UI** 16 | - Enhance the `index.html` GUI or improve its responsiveness and user experience. 17 | 18 | ### 📄 Documentation 19 | 20 | - **Improve Existing Docs** 21 | - Expand `README.md` with usage examples and troubleshooting tips. 22 | - **Add New Guides** 23 | - Create tutorials for building feeds from various sources. 24 | - Document how CSS selectors and API mappings work. 25 | 26 | ### 🧪 Testing 27 | 28 | - **Write Tests** 29 | - Add unit/integration tests for utilities and adapters. 30 | - **Report Bugs** 31 | - Open detailed GitHub issues with steps to reproduce problems. 32 | - **Cross-Browser Testing** 33 | - Ensure GUI functionality across different browsers. 34 | 35 | ### 🛠️ DevOps 36 | 37 | - **Improve Docker Setup** 38 | - Optimize the Dockerfile or suggest multi-arch improvements. 39 | - **Create Helm Charts** 40 | - Add Kubernetes deployment files for streamlined hosting. 41 | - Work on TrueNAS app catalog integration. 42 | - **Add CI for Testing** 43 | - Propose GitHub Actions for test and lint automation. 44 | 45 | ### 🌍 Community & Support 46 | 47 | - **Help in Issues or Discussions** 48 | - Answer questions and guide new users. 49 | - **Feature Requests** 50 | - Suggest new functionalities via GitHub issues. 51 | - **Translations** 52 | - Contribute internationalization for UI or docs. 53 | 54 | ### 💡 Ideas & Feedback 55 | 56 | - **Use the App** 57 | - Try building your own feeds and tell us what works or what doesn't. 58 | - **Feedback Matters** 59 | - Open issues for ideas, bugs, or UI/UX improvements. 60 | 61 | --- 62 | 63 | ## 🛠️ Getting Started 64 | 65 | 1. Install dependencies: 66 | ```bash 67 | bun install 68 | ``` 69 | 2. Run the app locally: 70 | ```bash 71 | bun run index.ts --passkey=your_passkey --cookieSecret=your_cookie_secret --encryptionKey=your_encryption_key_here 72 | ``` 73 | 3. Access it via: 74 | ``` 75 | http://localhost:5000/ 76 | ``` 77 | 78 | --- 79 | 80 | ## 🚀 How to Contribute 81 | 82 | 1. **Fork** the repository on GitHub. 83 | 2. **Clone** your fork locally: 84 | ```bash 85 | git clone https://github.com/your-username/mkfd.git 86 | cd mkfd 87 | ``` 88 | 3. **Create a new branch** for your feature or fix: 89 | ```bash 90 | git checkout -b your-feature-name 91 | ``` 92 | 4. **Make your changes**, test them locally, and commit: 93 | ```bash 94 | git commit -am "Add your message here" 95 | ``` 96 | 5. **Push** the branch to your fork: 97 | ```bash 98 | git push origin your-feature-name 99 | ``` 100 | 6. **Open a Pull Request** against the `main` branch on the original repository. 101 | 102 | We’ll review your PR and work with you to get it merged. Welcome aboard ✨ 103 | 104 | Thanks again for helping improve Mkfd 💜 105 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Tim Barani 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | mkfd

6 | Docker Pulls 7 | GitHub Repo stars 8 |

9 | 10 | ## 🏃 Running locally 11 | 12 | ### 🍞 Bun installation Visit [bun.sh](https://bun.sh/) for more info 13 | 14 | ```bash 15 | curl https://bun.sh/install | bash 16 | ``` 17 | 18 | If you are using email feeds, you will need to install a version of NodeJS that can run typescript natively. Mkfd creates a Node process for email feeds, as Bun does not currently play well with the popular IMAP packages that are built to run in Node. 19 | 20 | ### 📦 To install dependencies 21 | 22 | ```bash 23 | bun install 24 | ``` 25 | 26 | ### 🚀 To run 27 | 28 | ```bash 29 | bun run index.ts --passkey=your_passkey_here --cookieSecret=your_cookie_secret_here --encryptionKey=your_encryption_key_here --ssl=true/false 30 | ``` 31 | 32 | ➡️ Access the GUI at `http://localhost:5000/` 33 | 34 | --- 35 | 36 | ## 🐳 Running with Docker 37 | 38 | ### 🏠 Locally 39 | 40 | ```bash 41 | docker build -t mkfd . 42 | docker run -p 5000:5000 -v /local/mount/path:/app/configs -e PASSKEY=your_passkey -e COOKIE_SECRET=your_cookie_secret -e ENCRYPTION_KEY=your_encryption_key -e SSL=true/false mkfd 43 | ``` 44 | 45 | ### 📥 From Docker Hub 46 | 47 | ```bash 48 | docker pull tbosk/mkfd:latest 49 | docker run -p 5000:5000 -v /local/mount/path:/app/configs -e PASSKEY=your_passkey -e COOKIE_SECRET=your_cookie_secret -e ENCRYPTION_KEY=your_encryption_key -e SSL=true/false tbosk/mkfd:latest 50 | ``` 51 | 52 | If you don't supply the keys and cookie secret, the app will prompt you for them (just make sure to run docker with "it" flag to get an interactive shell). Make sure to reuse your encryption key for email feeds. 53 | 54 | ## 📧 Email Feeds 55 | 56 | Mkfd supports email feeds via IMAP. You can use any email provider that supports IMAP, such as Gmail, Yahoo, or Outlook. To set up an email feed, you need to provide the following information: 57 | 58 | - **Email address**: The email address you want to use for the feed. 59 | - **IMAP server**: The IMAP server address for your email provider. For example, Gmail's IMAP server is `imap.gmail.com`. 60 | - **IMAP port**: The port number for the IMAP server. For Gmail, this is `993`. 61 | - **IMAP password**: The password for your email account. You may need to generate an app password if you have two-factor authentication enabled. 62 | - **Folder**: The folder you want to monitor for new emails. Ex: `INBOX`. 63 | 64 | The encryption key is used to encrypt your password before storing it in the yaml config file. This is important for security reasons, as it prevents anyone from accessing your password in plain text. Make sure to use an encryption key that is at least 16 characters long. 65 | 66 | Email feeds do not refresh on intervals. The process runs continuously and updates when a new email is received. 67 | 68 | ## 🖼️ GUI 69 | 70 | ![mkfdgui](https://github.com/user-attachments/assets/620d4f1f-15a6-4120-8265-6ba07aa4aa27) 71 | 72 | --- 73 | 74 | ## 🌎 Environment Variables / Command Line Arguments 75 | 76 | - **Passkey**: A passkey is a unique identifier that is used to authenticate requests to the Mkfd API. It is used to ensure that only authorized users can access the API and perform actions such as creating, updating, or deleting feeds. 77 | 78 | - **Cookie secret**: A cookie secret is a random string that is used to sign cookies in the Mkfd application. It is used to ensure that cookies cannot be tampered with by malicious users. The cookie secret must be at least 32 characters long. 79 | 80 | - **Encryption key**: An encryption key is a random string that is used to encrypt sensitive data in the Mkfd application. It is used to ensure that sensitive data, such as passwords, cannot be accessed by unauthorized users. The encryption key must 16, 24, or 32 characters long. 81 | 82 | - **SSL**: A boolean value that indicates whether to use SSL for the Mkfd application. Set to true if serving over HTTPS, false if serving over HTTP. 83 | 84 | --- 85 | 86 | ## 🔗 What is a Drill Chain? 87 | 88 | A drill chain is a sequence of CSS selector steps used to navigate through multiple HTML pages to extract a final value (e.g., a link, image, or text). This is useful when data cannot be accessed from a single page or selector alone—such as when a link leads to another page that contains the actual data of interest. 89 | 90 | ### 🔍 Why Use Drill Chains? 91 | 92 | Some websites structure content across multiple layers: 93 | - The first page contains a list of links. 94 | - The second page (linked from the first) contains the actual title, image, or description. 95 | - Drill chains automate that navigation process. 96 | 97 | --- 98 | 99 | ## 🔧 To Do 100 | 101 | - [ ] Add ALL possible RSS fields to models 102 | - [x] Add option for parallel iterators 103 | - [ ] Scraping how-to video 104 | - [x] Add feed preview pane 105 | - [ ] Store/compare feed data to enable timestamping feed items 106 | - [x] Create dockerfile 107 | - [ ] Create Helm chart files 108 | - [x] Create GUI 109 | - [ ] Utilities 110 | - [x] HTML stripper 111 | - [x] Source URL wrapper for relative links 112 | - [ ] Nested link follower/drilldown functionality for each feed item property 113 | - [x] Adjust date parser logic with overrides from an optional date format input 114 | - [ ] Add selector suggestion engine 115 | - [ ] Amass contributors 116 | 117 |
118 | 119 | [![paypal](https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=V5LC4XTQDDE82&source=url) 120 | -------------------------------------------------------------------------------- /bun.lockb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TBosak/mkfd/38384769c31531fef0726e6d2f2da4c744a168ec/bun.lockb -------------------------------------------------------------------------------- /bunenv.txt: -------------------------------------------------------------------------------- 1 | export BUN_INSTALL="$HOME/.bun" 2 | export PATH="$BUN_INSTALL/bin:$PATH" -------------------------------------------------------------------------------- /dockerfile: -------------------------------------------------------------------------------- 1 | FROM oven/bun:1.2.2-debian 2 | 3 | RUN apt-get update && apt-get install -y \ 4 | curl \ 5 | && rm -rf /var/lib/apt/lists/* 6 | 7 | RUN curl -fsSL https://deb.nodesource.com/setup_23.x | bash - \ 8 | && apt-get install -y nodejs 9 | 10 | ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright 11 | RUN bunx patchright install --with-deps chrome 12 | 13 | WORKDIR /app 14 | 15 | COPY package.json bun.lockb ./ 16 | RUN bun install 17 | 18 | COPY . . 19 | 20 | EXPOSE 5000 21 | 22 | VOLUME ["/app/configs"] 23 | 24 | CMD ["bun", "run", "index.ts"] -------------------------------------------------------------------------------- /index.ts: -------------------------------------------------------------------------------- 1 | import { file } from "bun"; 2 | import { existsSync, mkdirSync, unlink } from "fs"; 3 | import { readFile, readdir, writeFile } from "fs/promises"; 4 | import { Context, Hono } from "hono"; 5 | import { serveStatic, getConnInfo } from "hono/bun"; 6 | import { except } from "hono/combine"; 7 | import * as yaml from "js-yaml"; 8 | import minimist from "minimist"; 9 | import { basename, join } from "path"; 10 | import { v4 as uuidv4 } from "uuid"; 11 | import { DOMParser } from "xmldom"; 12 | import ApiConfig from "./models/apiconfig.model"; 13 | import CSSTarget from "./models/csstarget.model"; 14 | import axios from "axios"; 15 | import { createInterface } from "readline"; 16 | import { buildRSS, buildRSSFromApiData } from "./utilities/rss-builder.utility"; 17 | import { Config } from "node-imap"; 18 | import { listImapFolders } from "./utilities/imap.utility"; 19 | import { encrypt } from "./utilities/security.utility"; 20 | import { CookieStore, sessionMiddleware } from "hono-sessions"; 21 | import { suggestSelectors } from "./utilities/suggestion-engine.utility"; 22 | import { parseCookiesForPlaywright } from "./utilities/data-handler.utility"; 23 | import { chromium } from "patchright"; 24 | 25 | const app = new Hono(); 26 | const store = new CookieStore(); 27 | const args = minimist(process.argv.slice(3)); 28 | 29 | async function prompt(question: string): Promise { 30 | const rl = createInterface({ 31 | input: process.stdin, 32 | output: process.stdout, 33 | }); 34 | 35 | return new Promise((resolve) => { 36 | rl.question(question, (answer) => { 37 | rl.close(); 38 | resolve(answer); 39 | }); 40 | }); 41 | } 42 | 43 | const SSL = process.env.SSL === "true" || args.ssl === true; 44 | 45 | async function getSecrets() { 46 | const passkey = 47 | process.env.PASSKEY ?? args.passkey ?? (await prompt("Enter passkey: ")); 48 | const cookieSecret = 49 | process.env.COOKIE_SECRET ?? 50 | args.cookieSecret ?? 51 | (await prompt("Enter cookie secret: ")); 52 | const encryptionKey = 53 | process.env.ENCRYPTION_KEY ?? 54 | args.encryptionKey ?? 55 | (await prompt("Enter encryption key: ")); 56 | return { passkey, cookieSecret, encryptionKey }; 57 | } 58 | 59 | const { passkey, cookieSecret, encryptionKey } = await getSecrets(); 60 | var feedUpdaters: Map = new Map(); 61 | var feedIntervals: Map = new Map(); 62 | 63 | const feedPath = join(__dirname, "/public/feeds"); 64 | if (!existsSync(feedPath)) { 65 | mkdirSync(feedPath); 66 | } 67 | 68 | const configsDir = join(__dirname, "configs"); 69 | if (!existsSync(configsDir)) { 70 | mkdirSync(configsDir); 71 | } 72 | 73 | // Start processing immediately on startup 74 | processFeedsAtStart(); 75 | //ALLOW LOCAL NETWORK TO ACCESS API 76 | const middleware = async (c: Context, next) => { 77 | const connInfo = await getConnInfo(c); 78 | const isLocal = 79 | !connInfo?.remote?.address || 80 | ["127.0.0.1", "::1"].includes(connInfo.remote.address); 81 | 82 | if (isLocal) return await next(); 83 | 84 | const session = c.get("session"); 85 | const authenticated = session.get("authenticated"); 86 | 87 | if (authenticated === true) { 88 | return await next(); 89 | } 90 | 91 | if (c.req.method === "POST" && c.req.path === "/passkey") { 92 | const body = await c.req.parseBody(); 93 | const inputKey = body["passkey"]; 94 | 95 | if (inputKey === passkey) { 96 | session.set("authenticated", true); 97 | return c.redirect("/"); 98 | } else { 99 | return c.html( 100 | '

Incorrect passkey. Try again.

' 101 | ); 102 | } 103 | } 104 | 105 | if (c.req.path === "/passkey") { 106 | return await next(); 107 | } 108 | 109 | return c.redirect("/passkey"); 110 | }; 111 | 112 | app.use( 113 | "*", 114 | sessionMiddleware({ 115 | store, 116 | encryptionKey: cookieSecret, 117 | expireAfterSeconds: 60 * 60 * 24, 118 | cookieOptions: { 119 | path: "/", 120 | httpOnly: true, 121 | secure: SSL, 122 | sameSite: "lax", 123 | }, 124 | }) 125 | ); 126 | app.use("/*", except("/public/feeds/*", middleware)); 127 | app.use("/public/*", serveStatic({ root: "./" })); 128 | app.use("/configs/*", serveStatic({ root: "./" })); 129 | app.get("/", (ctx) => ctx.html(file("./public/index.html").text())); 130 | app.post("/", async (ctx) => { 131 | const feedId = uuidv4(); 132 | const contentType = ctx.req.header("Content-Type") || ""; 133 | 134 | let body: Record; 135 | 136 | try { 137 | if (contentType.includes("application/json")) { 138 | body = await ctx.req.json(); 139 | } else if ( 140 | contentType.includes("multipart/form-data") || 141 | contentType.includes("application/x-www-form-urlencoded") 142 | ) { 143 | const formData = await ctx.req.formData(); 144 | body = Object.fromEntries(formData); 145 | } else { 146 | return ctx.text("Unsupported Content-Type.", 415); 147 | } 148 | } catch { 149 | return ctx.text("Invalid request body.", 400); 150 | } 151 | 152 | const extract = (key: string, fallback: any = undefined) => 153 | body[key] ?? fallback; 154 | 155 | const cookieNames = extract("cookieNames[]") || []; 156 | const cookieValues = extract("cookieValues[]") || []; 157 | const cookieString = cookieNames 158 | .map((rawName: string, i: number) => { 159 | const rawValue = cookieValues[i] ?? ""; 160 | const name = rawName.trim(); 161 | const value = rawValue.trim(); 162 | return `${name}=${value}`; 163 | }) 164 | .join("; "); 165 | 166 | const feedType = extract("feedType", "webScraping"); 167 | 168 | const apiConfig: ApiConfig = { 169 | title: extract("feedName", "RSS Feed"), 170 | baseUrl: extract("feedUrl"), 171 | method: extract("apiMethod", "GET"), 172 | route: extract("apiRoute"), 173 | params: JSON.parse(extract("apiParams", "{}")), 174 | headers: JSON.parse(extract("apiHeaders", "{}")), 175 | cookieString: cookieString, 176 | body: JSON.parse(extract("apiBody", "{}")), 177 | advanced: ["on", true, "true"].includes(extract("advanced")), 178 | }; 179 | 180 | const emailConfig = { 181 | host: extract("emailHost"), 182 | port: parseInt(extract("emailPort", "993")), 183 | user: extract("emailUsername"), 184 | encryptedPassword: encrypt(extract("emailPassword"), encryptionKey), 185 | folder: extract("emailFolder"), 186 | }; 187 | 188 | const feedConfig = { 189 | feedId, 190 | feedName: apiConfig.title, 191 | feedType, 192 | config: feedType === "email" ? emailConfig : apiConfig, 193 | article: 194 | feedType === "webScraping" 195 | ? { 196 | iterator: new CSSTarget(extract("itemSelector")), 197 | title: buildCSSTarget("title", body), 198 | description: buildCSSTarget("description", body), 199 | link: buildCSSTarget("link", body), 200 | enclosure: buildCSSTarget("enclosure", body), 201 | date: buildCSSTarget("date", body), 202 | } 203 | : {}, 204 | apiMapping: 205 | feedType === "api" 206 | ? { 207 | items: extract("apiItemsPath"), 208 | title: extract("apiTitleField"), 209 | description: extract("apiDescriptionField"), 210 | link: extract("apiLinkField"), 211 | date: extract("apiDateField"), 212 | } 213 | : {}, 214 | refreshTime: parseInt(extract("refreshTime", "5")), 215 | reverse: ["on", true, "true"].includes(extract("reverse")), 216 | strict: ["on", true, "true"].includes(extract("strict")), 217 | }; 218 | 219 | const yamlStr = yaml.dump(feedConfig); 220 | const yamlFilePath = join(configsDir, `${feedId}.yaml`); 221 | await writeFile(yamlFilePath, yamlStr, "utf8"); 222 | 223 | setFeedUpdaterInterval(feedConfig); 224 | 225 | if (contentType.includes("application/json")) { 226 | return ctx.json({ 227 | message: "RSS feed is being generated.", 228 | feedUrl: `public/feeds/${feedId}.xml`, 229 | }); 230 | } 231 | 232 | return ctx.html(` 233 |

Your RSS feed is being generated and will update every ${feedConfig.refreshTime} minutes.

234 |

Access it at: public/feeds/${feedId}.xml

235 | `); 236 | }); 237 | 238 | app.post("/preview", async (ctx) => { 239 | try { 240 | const jsonData = await ctx.req.json(); 241 | 242 | const extract = (key: string, fallback: any = undefined) => 243 | jsonData[key] ?? fallback; 244 | 245 | const feedType = extract("feedType", "webScraping"); 246 | const cookieNames = extract("cookieNames[]") || []; 247 | const cookieValues = extract("cookieValues[]") || []; 248 | 249 | const cookieString = cookieNames 250 | .map((rawName: string, i: number) => { 251 | const rawValue = cookieValues[i] ?? ""; 252 | const name = rawName.trim(); 253 | const value = rawValue.trim(); 254 | return `${name}=${value}`; 255 | }) 256 | .join("; "); 257 | 258 | const apiConfig: ApiConfig = { 259 | title: extract("feedName", "RSS Feed"), 260 | baseUrl: extract("feedUrl"), 261 | method: extract("apiMethod", "GET"), 262 | route: extract("apiRoute"), 263 | params: JSON.parse(extract("apiParams", "{}")), 264 | headers: JSON.parse(extract("apiHeaders", "{}")), 265 | cookieString: cookieString, 266 | body: JSON.parse(extract("apiBody", "{}")), 267 | advanced: ["on", true, "true"].includes(extract("advanced")), 268 | }; 269 | 270 | const feedConfig = { 271 | feedId: "preview", 272 | feedName: apiConfig.title, 273 | feedType, 274 | config: apiConfig, 275 | article: 276 | feedType === "webScraping" 277 | ? { 278 | iterator: new CSSTarget(extract("itemSelector")), 279 | title: buildCSSTarget("title", jsonData), 280 | description: buildCSSTarget("description", jsonData), 281 | link: buildCSSTarget("link", jsonData), 282 | author: buildCSSTarget("author", jsonData), 283 | date: buildCSSTarget("date", jsonData), 284 | enclosure: buildCSSTarget("enclosure", jsonData), 285 | } 286 | : {}, 287 | apiMapping: 288 | feedType === "api" 289 | ? { 290 | items: extract("apiItemsPath"), 291 | title: extract("apiTitleField"), 292 | description: extract("apiDescriptionField"), 293 | link: extract("apiLinkField"), 294 | date: extract("apiDateField"), 295 | } 296 | : {}, 297 | refreshTime: parseInt(extract("refreshTime", "5")), 298 | reverse: ["on", true, "true"].includes(extract("reverse")), 299 | strict: ["on", true, "true"].includes(extract("strict")), 300 | }; 301 | 302 | const response = await generatePreview(feedConfig); 303 | 304 | return ctx.text(response, 200, { 305 | "Content-Type": "application/rss+xml", 306 | "Cache-Control": "no-cache, no-store, must-revalidate", 307 | }); 308 | } catch (error) { 309 | console.error("Error generating preview:", error); 310 | return ctx.text("Invalid request.", 400); 311 | } 312 | }); 313 | 314 | app.get("/feeds", async (ctx) => { 315 | const files = await readdir(configsDir); 316 | const yamlFiles = files.filter((file) => file.endsWith(".yaml")); 317 | const configs = []; 318 | 319 | // Read feed configurations 320 | for (const file of yamlFiles) { 321 | const filePath = join(configsDir, file); 322 | const yamlContent = await readFile(filePath, "utf8"); 323 | const feedConfig = yaml.load(yamlContent); 324 | configs.push(feedConfig); 325 | } 326 | 327 | // Start building the HTML response 328 | let response = ` 329 | 330 | 331 | 332 | 333 | Feeds 334 | 335 | 336 | 337 |
338 | 343 |

Active RSS Feeds

344 |
345 | `; 346 | 347 | // Process each feed to extract information 348 | for (const config of configs) { 349 | const feedId = config.feedId; 350 | const feedName = config.feedName; 351 | const feedType = config.feedType; 352 | 353 | // Read the corresponding XML file 354 | const xmlFilePath = join(feedPath, `${feedId}.xml`); 355 | let lastBuildDate = "N/A"; 356 | try { 357 | const xmlContent = await readFile(xmlFilePath, "utf8"); 358 | // Parse the XML to extract lastBuildDate 359 | const parser = new DOMParser(); 360 | const xmlDoc = parser.parseFromString(xmlContent, "application/xml"); 361 | const lastBuildDateNode = xmlDoc.getElementsByTagName("lastBuildDate")[0]; 362 | if (lastBuildDateNode && lastBuildDateNode.textContent) { 363 | lastBuildDate = new Date( 364 | lastBuildDateNode.textContent 365 | ).toLocaleString(); 366 | } 367 | } catch (error) { 368 | console.error(`Error reading XML for feedId ${feedId}:`, error); 369 | } 370 | 371 | // Build the card for this feed 372 | response += ` 373 |
374 |
375 |

${feedName}

376 |
377 |

Feed ID: ${feedId}

378 |

Build Time: ${lastBuildDate}

379 |

Feed Type: ${feedType}

380 |
381 |
382 | View Feed 383 |
384 | 385 | 386 |
387 | 388 |
389 |
390 | `; 391 | } 392 | 393 | // Close the grid and body 394 | response += ` 395 |
396 |
397 | 398 | 399 | `; 400 | 401 | return ctx.html(response); 402 | }); 403 | 404 | function injectSelectorGadget(html) { 405 | const SG_SCRIPT = ` 406 | 450 | `; 451 | 452 | let modified = html; 453 | if (modified.includes("")) { 454 | modified = modified.replace("", SG_SCRIPT + "\n"); 455 | } else { 456 | modified += SG_SCRIPT; 457 | } 458 | return modified; 459 | } 460 | 461 | app.get("/proxy", async (ctx) => { 462 | // 1) Read the remote URL from query params 463 | const targetUrl = ctx.req.query("url"); 464 | if (!targetUrl) { 465 | return ctx.text('Missing "url" parameter', 400); 466 | } 467 | 468 | try { 469 | const response = await axios.get(targetUrl); 470 | let html = response.data; 471 | 472 | html = injectSelectorGadget(html); 473 | 474 | return ctx.html(html); 475 | } catch (error) { 476 | console.error("Error fetching remote URL:", error); 477 | return ctx.text("Could not fetch the target URL", 500); 478 | } 479 | }); 480 | 481 | // Passkey entry routes 482 | app.get("/passkey", (c) => { 483 | return c.html(` 484 | 485 | 486 | 487 | Enter Passkey 488 | 489 | 490 | 491 |
492 |

Enter Passkey

493 |
494 | 495 | 496 | 497 |
498 |
499 | 500 | 501 | `); 502 | }); 503 | 504 | app.post("/delete-feed", async (c) => { 505 | const data = await c.req.parseBody(); 506 | const feedId = data["feedId"]; 507 | 508 | if (!feedId) { 509 | return c.text("Feed name is required.", 400); 510 | } 511 | 512 | const sanitizedFeedName = basename(feedId as string); // Prevent path traversal 513 | const success = await deleteFeed(sanitizedFeedName); 514 | 515 | if (success) { 516 | return c.redirect("/feeds"); 517 | } else { 518 | return c.text("Failed to delete feed.", 500); 519 | } 520 | }); 521 | 522 | app.post("/imap/folders", async (c) => { 523 | const config = await c.req.json(); 524 | console.log("IMAP config:", config); 525 | const folders = await listImapFolders(config); 526 | console.log("IMAP folders:", folders); 527 | return c.json({ folders }); 528 | }); 529 | 530 | app.post("/utils/suggest-selectors", async (c) => { 531 | const { url } = await c.req.json(); 532 | try { 533 | const selectors = await suggestSelectors(url); 534 | return c.json(selectors); 535 | } catch (err) { 536 | return c.json({ error: err.message }, 500); 537 | } 538 | }); 539 | 540 | app.post("/utils/root-url", async (c) => { 541 | const { url } = await c.req.json(); 542 | try { 543 | const parsed = new URL(url); 544 | return c.json({ origin: parsed.origin }); 545 | } catch { 546 | return c.json({ origin: "" }, 400); 547 | } 548 | }); 549 | 550 | function buildCSSTarget(prefix: string, body: Record): CSSTarget { 551 | const extract = (k: string) => body[k]?.toString() ?? ""; 552 | 553 | const dateFormat = extract(`${prefix}Format`); 554 | const customDateFormat = 555 | dateFormat === "other" ? extract("customDateFormat") : undefined; 556 | 557 | const target = new CSSTarget( 558 | extract(`${prefix}Selector`), 559 | extract(`${prefix}Attribute`), 560 | ["on", "true", true].includes(extract(`${prefix}StripHtml`)), 561 | extract(`${prefix}BaseUrl`), 562 | ["on", "true", true].includes(extract(`${prefix}RelativeLink`)), 563 | ["on", "true", true].includes(extract(`${prefix}TitleCase`)), 564 | extract(`${prefix}Iterator`), 565 | dateFormat === "other" ? customDateFormat : dateFormat 566 | ); 567 | 568 | // Parse the chain 569 | target.drillChain = parseDrillChain(prefix, body); 570 | return target; 571 | } 572 | 573 | function parseDrillChain( 574 | prefix: string, 575 | body: Record 576 | ): Array<{ 577 | selector: string; 578 | attribute: string; 579 | isRelative: boolean; 580 | baseUrl: string; 581 | stripHtml: boolean; 582 | }> { 583 | const key = `${prefix}DrillChain`; 584 | const rawChain = body[key]; 585 | 586 | if (Array.isArray(rawChain)) { 587 | return rawChain.map((step) => ({ 588 | selector: step.selector ?? "", 589 | attribute: step.attribute ?? "", 590 | isRelative: ["on", "true", true].includes(step.isRelative), 591 | baseUrl: step.baseUrl ?? "", 592 | stripHtml: ["on", "true", true].includes(step.stripHtml), 593 | })); 594 | } 595 | 596 | const chainSteps = []; 597 | const chainKeyRegex = new RegExp(`^${key}\\[(\\d+)\\]\\[(.*?)\\]$`); 598 | const tempStore: Record> = {}; 599 | 600 | for (const key of Object.keys(body)) { 601 | const match = chainKeyRegex.exec(key); 602 | if (match) { 603 | const index = match[1]; 604 | const fieldName = match[2]; 605 | if (!tempStore[index]) tempStore[index] = {}; 606 | tempStore[index][fieldName] = body[key]; 607 | } 608 | } 609 | 610 | const sortedKeys = Object.keys(tempStore).sort( 611 | (a, b) => parseInt(a) - parseInt(b) 612 | ); 613 | for (const idx of sortedKeys) { 614 | const row = tempStore[idx]; 615 | chainSteps.push({ 616 | selector: row.selector ?? "", 617 | attribute: row.attribute ?? "", 618 | isRelative: ["on", "true", true].includes(row.isRelative), 619 | baseUrl: row.baseUrl ?? "", 620 | }); 621 | } 622 | 623 | return chainSteps; 624 | } 625 | 626 | function initializeWorker(feedConfig: any) { 627 | feedUpdaters.set( 628 | feedConfig.feedId, 629 | new Worker( 630 | feedConfig.feedType === "email" 631 | ? "./workers/imap-feed.worker.ts" 632 | : "./workers/feed-updater.worker.ts", 633 | { type: "module" } 634 | ) 635 | ); 636 | 637 | feedUpdaters.get(feedConfig.feedId).onmessage = (message) => { 638 | if (message.data.status === "done") { 639 | console.log(`Feed updates completed for ${feedConfig.feedId}.`); 640 | } else if (message.data.status === "error") { 641 | console.error( 642 | `Feed updates for ${feedConfig.feedId} encountered an error:`, 643 | message.data.error 644 | ); 645 | } 646 | }; 647 | 648 | feedUpdaters.get(feedConfig.feedId).onerror = (error) => { 649 | console.error("Worker error:", error); 650 | }; 651 | } 652 | 653 | async function processFeedsAtStart() { 654 | try { 655 | const files = await readdir(configsDir); 656 | const yamlFiles = files.filter((file) => file.endsWith(".yaml")); 657 | 658 | for (const file of yamlFiles) { 659 | const filePath = join(configsDir, file); 660 | const yamlContent = await readFile(filePath, "utf8"); 661 | const feedConfig = yaml.load(yamlContent); 662 | console.log("Processing feed:", feedConfig.feedId); 663 | setFeedUpdaterInterval(feedConfig); 664 | } 665 | } catch (error) { 666 | console.error("Error processing feeds:", error); 667 | } 668 | } 669 | 670 | async function generatePreview(feedConfig: any) { 671 | try { 672 | let rssXml; 673 | 674 | if (feedConfig.feedType === "webScraping") { 675 | if (feedConfig.config.advanced) { 676 | const context = await chromium.launch({ 677 | channel: "chrome", 678 | headless: true, 679 | }); 680 | const page = await context.newPage(); 681 | 682 | if ( 683 | feedConfig.config.headers && 684 | Object.keys(feedConfig.config.headers).length 685 | ) { 686 | await page.setExtraHTTPHeaders(feedConfig.config.headers); 687 | } 688 | 689 | if (feedConfig.config.cookieString) { 690 | const domain = new URL(feedConfig.config.baseUrl).hostname; 691 | const cookiesArray = parseCookiesForPlaywright( 692 | feedConfig.config.cookieString, 693 | domain 694 | ); 695 | if (cookiesArray.length) await page.context().addCookies(cookiesArray); 696 | } 697 | 698 | await page.goto(feedConfig.config.baseUrl, { 699 | waitUntil: "networkidle", 700 | }); 701 | const html = await page.content(); 702 | await context.close(); 703 | return buildRSS(html, feedConfig); 704 | } else { 705 | // Otherwise, use axios 706 | const response = await axios.get(feedConfig.config.baseUrl, { 707 | headers: { 708 | ...(feedConfig.config.headers || {}), 709 | Cookie: feedConfig.config.cookieString || "", 710 | }, 711 | }); 712 | const html = response.data; 713 | rssXml = await buildRSS(html, feedConfig); 714 | } 715 | } else if (feedConfig.feedType === "api") { 716 | const axiosConfig = { 717 | method: feedConfig.config.method || "GET", 718 | url: feedConfig.config.baseUrl + (feedConfig.config.route || ""), 719 | headers: feedConfig.config.headers || {}, 720 | params: feedConfig.config.params || {}, 721 | data: feedConfig.config.body || {}, 722 | withCredentials: feedConfig.config.withCredentials || false, 723 | }; 724 | 725 | console.log("axiosConfig:", axiosConfig); 726 | const response = await axios(axiosConfig); 727 | const apiData = response.data; 728 | 729 | rssXml = buildRSSFromApiData(apiData, feedConfig); 730 | } 731 | return rssXml; 732 | } catch (error) { 733 | console.error( 734 | `Error fetching data for feedId ${feedConfig.feedId}:`, 735 | error.message 736 | ); 737 | } 738 | } 739 | 740 | function setFeedUpdaterInterval(feedConfig: any) { 741 | const feedId = feedConfig.feedId; 742 | 743 | if (!feedUpdaters.has(feedId)) { 744 | console.log("Initializing worker for feed:", feedId); 745 | initializeWorker(feedConfig); 746 | feedUpdaters.get(feedId).postMessage({ 747 | command: "start", 748 | config: feedConfig, 749 | encryptionKey: encryptionKey, 750 | }); 751 | } 752 | 753 | if (feedConfig.feedType !== "email") { 754 | if (!feedIntervals.has(feedId)) { 755 | console.log("Setting interval for feed:", feedId); 756 | 757 | const interval = setInterval(() => { 758 | console.log("Engaging worker for feed:", feedId); 759 | feedUpdaters 760 | .get(feedId) 761 | .postMessage({ command: "start", config: feedConfig }); 762 | }, feedConfig.refreshTime * 60 * 1000); 763 | 764 | feedIntervals.set(feedId, interval); 765 | } 766 | } 767 | } 768 | 769 | function clearAllFeedUpdaterIntervals() { 770 | for (const [feedId, intervalId] of feedIntervals.entries()) { 771 | clearFeedUpdaterInterval(feedId); 772 | 773 | const worker = feedUpdaters.get(feedId); 774 | if (worker) { 775 | worker.terminate(); 776 | feedUpdaters.delete(feedId); 777 | } 778 | } 779 | } 780 | 781 | function clearFeedUpdaterInterval(feedId: string) { 782 | const interval = feedIntervals.get(feedId); 783 | if (interval) { 784 | clearInterval(interval); 785 | feedIntervals.delete(feedId); 786 | } 787 | } 788 | 789 | async function deleteFeed(feedId: string): Promise { 790 | try { 791 | const feedFilePath = join("configs", `${feedId}.yaml`); 792 | await unlink(feedFilePath, (error) => { 793 | if (error) { 794 | console.error(`Failed to delete feed file ${feedId}.yaml:`, error); 795 | } 796 | }); 797 | 798 | console.log(`Feed ${feedId} deleted.`); 799 | return true; 800 | } catch (error) { 801 | console.error(`Failed to delete feed ${feedId}:`, error); 802 | return false; 803 | } 804 | } 805 | 806 | export default { 807 | port: 5000, 808 | fetch: app.fetch, 809 | }; 810 | 811 | process.on("exit", () => { 812 | clearAllFeedUpdaterIntervals(); 813 | }); 814 | 815 | process.on("SIGINT", () => { 816 | clearAllFeedUpdaterIntervals(); 817 | process.exit(); 818 | }); 819 | 820 | process.on("SIGTERM", () => { 821 | clearAllFeedUpdaterIntervals(); 822 | process.exit(); 823 | }); 824 | -------------------------------------------------------------------------------- /models/apiconfig.model.ts: -------------------------------------------------------------------------------- 1 | export default class ApiConfig { 2 | constructor( 3 | public title?: string, 4 | public baseUrl?: string, 5 | public method?: string, 6 | public route?: string, 7 | public params?: { [key: string]: string }, 8 | public headers?: { [key: string]: string }, 9 | public cookieString?: string, 10 | public body?: any, 11 | public withCredentials?: boolean, 12 | public contributor?: string, 13 | public advanced?: boolean, 14 | ) {} 15 | } 16 | -------------------------------------------------------------------------------- /models/csstarget.model.ts: -------------------------------------------------------------------------------- 1 | export default class CSSTarget { 2 | constructor( 3 | public selector: string, 4 | public attribute?: string, 5 | public stripHtml?: boolean, 6 | public rootUrl?: string, 7 | public relativeLink?: boolean, 8 | public titleCase?: boolean, 9 | public iterator?: string, 10 | public dateFormat?: string, 11 | public drillChain?: Array<{ 12 | selector: string; 13 | attribute: string; 14 | isRelative: boolean; 15 | baseUrl: string; 16 | stripHtml: boolean; 17 | }>) {} 18 | } 19 | -------------------------------------------------------------------------------- /models/imapconfig.model.ts: -------------------------------------------------------------------------------- 1 | export interface ImapConfig { 2 | host: string; 3 | port: number; 4 | tls: boolean; 5 | user: string; 6 | encryptedPassword: string; 7 | folder: string; 8 | } 9 | -------------------------------------------------------------------------------- /node/imap-watch.utility.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import yaml from "js-yaml"; 3 | import path from "path"; 4 | import Imap from "node-imap"; 5 | import libmime from "libmime"; 6 | import minimist from "minimist"; 7 | import RSS from "rss"; 8 | import { existsSync, readFileSync, writeFileSync } from "fs"; 9 | import { simpleParser } from "mailparser"; 10 | import { decrypt } from "../utilities/security.utility.ts"; 11 | import { fileURLToPath } from "url"; 12 | import { dirname } from "path"; 13 | 14 | const __filename = fileURLToPath(import.meta.url); 15 | const __dirname = dirname(__filename); 16 | 17 | const args = minimist(process.argv.slice(2)); 18 | const encryptionKey: string = args.key || ""; 19 | const configHash: string = args.hash || ""; 20 | 21 | export interface Email { 22 | UID: number; 23 | subject: string; 24 | from: string; 25 | date: string; 26 | content: string; 27 | } 28 | 29 | if (!encryptionKey || !configHash) { 30 | console.error( 31 | "Usage: node imap-watcher.service.ts --key= --hash=", 32 | ); 33 | process.exit(1); 34 | } 35 | 36 | const yamlPath: string = path.join( 37 | __dirname, 38 | "../configs", 39 | `${configHash}.yaml`, 40 | ); 41 | if (!existsSync(yamlPath)) { 42 | console.error(`YAML config not found at: ${yamlPath}`); 43 | process.exit(1); 44 | } 45 | 46 | const fileContents = readFileSync(yamlPath, "utf8"); 47 | const rawConfig = yaml.load(fileContents); 48 | 49 | const imapConfig = { 50 | host: rawConfig.config.host, 51 | port: rawConfig.config.port, 52 | user: rawConfig.config.user, 53 | password: decrypt(rawConfig.config.encryptedPassword, encryptionKey), 54 | folder: rawConfig.config.folder || "INBOX", 55 | }; 56 | 57 | class ImapWatcher { 58 | private config: any; 59 | private imap: Imap; 60 | 61 | constructor(config: any) { 62 | this.config = config; 63 | this.imap = new Imap({ 64 | user: config.user, 65 | password: config.password, 66 | host: config.host, 67 | port: config.port, 68 | tls: true, 69 | }); 70 | } 71 | 72 | async start(): Promise { 73 | try { 74 | await this.connect(); 75 | await this.openBox(this.config.folder); 76 | this.fetchRecentStartupEmails(); 77 | 78 | this.imap.on("mail", (n) => { 79 | console.log(`[IMAP] New mail event: ${n}`); 80 | this.fetchNewEmails(); 81 | }); 82 | 83 | this.imap.on("close", () => this.reconnect()); 84 | this.imap.on("error", () => this.reconnect()); 85 | } catch (err) { 86 | console.error("[IMAP] Failed to start:", err); 87 | this.reconnect(); 88 | } 89 | } 90 | 91 | private connect(): Promise { 92 | return new Promise((resolve, reject) => { 93 | this.imap.once("ready", () => { 94 | console.log("[IMAP] Connected"); 95 | resolve(); 96 | }); 97 | this.imap.once("error", reject); 98 | this.imap.connect(); 99 | }); 100 | } 101 | 102 | private openBox(boxName: string): Promise { 103 | return new Promise((resolve, reject) => { 104 | this.imap.openBox(boxName, false, (err) => { 105 | if (err) return reject(err); 106 | console.log(`[IMAP] Box "${boxName}" opened`); 107 | resolve(); 108 | }); 109 | }); 110 | } 111 | 112 | private fetchRecentStartupEmails(): void { 113 | console.log("[IMAP] Fetching emails..."); 114 | const twoDaysAgo = new Date(); 115 | twoDaysAgo.setDate(twoDaysAgo.getDate() - 2); 116 | 117 | this.imap.search([["SINCE", twoDaysAgo.toUTCString()]], (err, results) => { 118 | if (err || !results || results.length === 0) { 119 | console.log("[IMAP] No recent emails found on startup."); 120 | return; 121 | } 122 | 123 | const recentUids = results 124 | .sort((a: number, b: number) => a - b) 125 | .slice(-10); 126 | const fetch = this.imap.fetch(recentUids, { bodies: [""], struct: true }); 127 | const tasks: Promise[] = []; 128 | 129 | fetch.on("message", (msg, seqno) => { 130 | const chunks: Buffer[] = []; 131 | msg.on("body", (stream) => { 132 | stream.on("data", (chunk: Buffer | string) => { 133 | if (typeof chunk === "string") { 134 | chunks.push(Buffer.from(chunk, "utf-8")); 135 | } else { 136 | chunks.push(chunk); 137 | } 138 | }); 139 | }); 140 | 141 | const task = new Promise((resolveTask, rejectTask) => { 142 | msg.once("end", async () => { 143 | try { 144 | const raw = Buffer.concat(chunks); 145 | const parsed = await simpleParser(raw); 146 | 147 | const subject = parsed.subject 148 | ? libmime.decodeWords(parsed.subject) 149 | : "(No Subject)"; 150 | const from = parsed.from?.text 151 | ? libmime.decodeWords(parsed.from.text) 152 | : "(Unknown Sender)"; 153 | const date = 154 | parsed.date?.toISOString() || new Date().toISOString(); 155 | const content = parsed.text || parsed.html || "(No content)"; 156 | const email: Email = { 157 | UID: seqno, 158 | subject, 159 | from, 160 | date, 161 | content, 162 | }; 163 | resolveTask(email); 164 | } catch (parseErr) { 165 | console.error( 166 | `[IMAP] Failed to parse message ${seqno}:`, 167 | parseErr, 168 | ); 169 | rejectTask(parseErr); 170 | } 171 | }); 172 | }); 173 | tasks.push(task); 174 | }); 175 | 176 | fetch.once("end", async () => { 177 | await Promise.allSettled(tasks).then((results) => { 178 | const emails = results 179 | .filter((result) => result.status === "fulfilled") 180 | .map((result) => (result as PromiseFulfilledResult).value); 181 | 182 | if (emails.length > 0) { 183 | console.log("[IMAP] Startup emails fetched"); 184 | const rss = buildRSSFromEmailFolder(emails, this.config); 185 | writeFileSync( 186 | path.join(__dirname, "../public/feeds", `${configHash}.xml`), 187 | rss, 188 | ); 189 | console.log("[IMAP] RSS Feed generated"); 190 | } else { 191 | console.log("[IMAP] No valid emails found."); 192 | } 193 | }); 194 | console.log("[IMAP] Finished processing emails."); 195 | }); 196 | 197 | fetch.once("error", (fetchErr) => { 198 | console.error("[IMAP] Startup fetch error:", fetchErr); 199 | }); 200 | }); 201 | } 202 | 203 | private fetchNewEmails(): void { 204 | console.log("[IMAP] Fetching emails..."); 205 | 206 | this.imap.search(["ALL"], (err, results) => { 207 | if (err || !results || results.length === 0) { 208 | console.error("[IMAP] Error or no emails found:", err); 209 | return; 210 | } 211 | 212 | const recentUids = results.sort((a, b) => a - b).slice(-10); 213 | const fetch = this.imap.fetch(recentUids, { bodies: [""], struct: true }); 214 | const tasks: Promise[] = []; 215 | 216 | fetch.on("message", (msg, seqno) => { 217 | const chunks: Buffer[] = []; 218 | msg.on("body", (stream) => { 219 | stream.on("data", (chunk: Buffer | string) => { 220 | if (typeof chunk === "string") { 221 | chunks.push(Buffer.from(chunk, "utf-8")); 222 | } else { 223 | chunks.push(chunk); 224 | } 225 | }); 226 | }); 227 | 228 | const task = new Promise((resolveTask, rejectTask) => { 229 | msg.once("end", async () => { 230 | try { 231 | const raw = Buffer.concat(chunks); 232 | const parsed = await simpleParser(raw); 233 | 234 | const subject = parsed.subject 235 | ? libmime.decodeWords(parsed.subject) 236 | : "(No Subject)"; 237 | const from = parsed.from?.text 238 | ? libmime.decodeWords(parsed.from.text) 239 | : "(Unknown Sender)"; 240 | const date = 241 | parsed.date?.toISOString() || new Date().toISOString(); 242 | const content = parsed.text || parsed.html || "(No content)"; 243 | const email: Email = { 244 | UID: seqno, 245 | subject, 246 | from, 247 | date, 248 | content, 249 | }; 250 | resolveTask(email); 251 | } catch (parseErr) { 252 | console.error( 253 | `[IMAP] Failed to parse message ${seqno}:`, 254 | parseErr, 255 | ); 256 | rejectTask(parseErr); 257 | } 258 | }); 259 | }); 260 | tasks.push(task); 261 | }); 262 | 263 | fetch.once("end", async () => { 264 | await Promise.allSettled(tasks).then((results) => { 265 | const emails = results 266 | .filter((result) => result.status === "fulfilled") 267 | .map((result) => (result as PromiseFulfilledResult).value); 268 | 269 | if (emails.length > 0) { 270 | console.log("[IMAP] Recent emails fetched, updating RSS..."); 271 | const rss = buildRSSFromEmailFolder(emails, this.config); 272 | writeFileSync( 273 | path.join(__dirname, "../public/feeds", `${configHash}.xml`), 274 | rss, 275 | ); 276 | console.log("[IMAP] RSS Feed regenerated"); 277 | } else { 278 | console.log("[IMAP] No valid emails found."); 279 | } 280 | }); 281 | console.log("[IMAP] Completed processing new emails."); 282 | }); 283 | 284 | fetch.once("error", (fetchErr) => { 285 | console.error("[IMAP] Error fetching new emails:", fetchErr); 286 | }); 287 | }); 288 | } 289 | 290 | private reconnect(): void { 291 | console.log("[IMAP] Reconnecting in 10s..."); 292 | setTimeout(() => this.start(), 10000); 293 | } 294 | 295 | public stop(): void { 296 | if (this.imap) { 297 | console.log("[IMAP] Stopping watcher..."); 298 | this.imap.end(); 299 | } 300 | } 301 | } 302 | 303 | export function buildRSSFromEmailFolder(emails, config) { 304 | const feed = new RSS({ 305 | title: config.title || "Email RSS Feed", 306 | description: "RSS feed generated from IMAP email folder", 307 | pubDate: new Date(), 308 | }); 309 | 310 | emails.forEach((email) => { 311 | feed.item({ 312 | title: email.subject, 313 | description: email.content, 314 | author: email.from, 315 | url: email.link, 316 | guid: email.UID, 317 | date: email.date || new Date(), 318 | }); 319 | }); 320 | 321 | return feed.xml({ indent: true }); 322 | } 323 | 324 | const watcher = new ImapWatcher(imapConfig); 325 | 326 | watcher.start(); 327 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mkfd", 3 | "module": "index.ts", 4 | "type": "module", 5 | "version": "1.0.0", 6 | "scripts": { 7 | "start": "bun run index.ts", 8 | "dev": "bun run index.ts --watch --passkey=admin123 --cookieSecret=a18c1fd2211edd76a18c1fd2211edd76 --encryptionKey=a18c1fd2211edd76" 9 | }, 10 | "dependencies": { 11 | "axios": "^1.8.2", 12 | "bun": "^1.1.33", 13 | "bun-types": "^0.1.0", 14 | "cheerio": "^1.0.0", 15 | "dayjs": "^1.11.13", 16 | "hono": "^4.6.8", 17 | "hono-sessions": "^0.7.0", 18 | "js-yaml": "^4.1.0", 19 | "mailparser": "^3.7.2", 20 | "minimist": "^1.2.8", 21 | "node-forge": "^1.3.1", 22 | "node-imap": "^0.9.6", 23 | "patchright": "^1.51.3", 24 | "readline": "^1.3.0", 25 | "rss": "1.2.2", 26 | "uuid": "^11.0.2", 27 | "xmldom": "^0.6.0" 28 | }, 29 | "devDependencies": { 30 | "@types/bun": "^1.1.12", 31 | "@types/cheerio": "0.22.31", 32 | "@types/xml": "1.0.8", 33 | "@types/node-imap": "^0.9.3" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Feed Builder 7 | 8 | 9 | 61 | 62 | 63 | 64 | 65 | 75 |
76 |
77 | 78 |

Feed Builder

79 |

Enter details below to create a new RSS feed

80 |
81 |
82 |
83 | 84 |

85 |
86 | 87 | 88 | 89 |
90 |
91 |
92 | 93 |

98 | 115 |
116 |

CSS Selectors for RSS Feed Items 🪄

117 | 118 |

119 | 120 |

121 | 122 |

123 | 124 |

125 |

129 |

133 | Title Drill Chain (Optional) 134 | ℹ️ 136 |
137 | 140 |

141 |
142 | Description 143 | 144 |

145 | 146 |

147 | 148 |

149 |

153 |

157 | Description Drill Chain (Optional) 158 | ℹ️ 160 |
161 | 164 |
165 |

166 |
167 | Link 168 | 169 |

170 | 171 |

172 | 173 |

174 |

178 | 179 | 180 | Link Drill Chain (Optional) 181 | ℹ️ 183 |
184 | 187 |
188 |

189 |
190 | Enclosure (Image, Video, Etc.) 191 | 192 |

193 | 194 |

195 | 196 |

197 |

201 | 202 | 204 | Enclosure Drill Chain (Optional) 205 | ℹ️ 207 |
208 | 211 |
212 |

213 |
214 | Author 215 | 216 |

217 | 218 |

219 | 220 |

221 |

225 | 229 |

230 | Author Drill Chain (Optional) 231 | ℹ️ 233 |
234 | 237 |
238 |

239 |
240 | Date 241 | 242 |

243 | 244 |

245 | 246 |

247 | 248 | ℹ️ 250 |

258 | 259 | 261 | Date Drill Chain (Optional) 262 | ℹ️ 264 |
265 | 268 |
269 |
270 | 297 |

298 |
299 | Additional Options ⚙️ 300 | 301 |
302 | 303 | 304 | 305 | 306 | 307 | 311 | 317 | 322 |
323 |

324 |
325 | 326 | 327 |
328 |
329 |
330 | 336 | 343 | 351 |
352 | 802 | 803 | 804 | -------------------------------------------------------------------------------- /public/logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TBosak/mkfd/38384769c31531fef0726e6d2f2da4c744a168ec/public/logo.ico -------------------------------------------------------------------------------- /public/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TBosak/mkfd/38384769c31531fef0726e6d2f2da4c744a168ec/public/logo.png -------------------------------------------------------------------------------- /public/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 13 | 15 | 18 | 21 | 22 | 25 | 30 | 31 | 32 | 35 | 37 | 40 | 43 | 45 | 48 | 50 | 53 | 55 | 58 | 62 | 63 | 64 | 65 | 68 | 72 | 73 | 76 | 80 | 81 | 84 | 88 | 89 | 92 | 96 | 97 | 100 | 104 | 105 | 106 | 107 | 112 | 116 | 120 | 121 | 122 | 123 | 128 | 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "lib": ["dom","ESNext"], 4 | "module": "esnext", 5 | "target": "esnext", 6 | "moduleResolution": "node", 7 | "typeRoots": ["node_modules/@types"], 8 | "allowSyntheticDefaultImports": true, 9 | "allowImportingTsExtensions": true, 10 | "resolveJsonModule": true, 11 | "noEmit": true, 12 | "allowJs": true, 13 | "types": ["bun-types", "cheerio"] 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /utilities/data-handler.utility.ts: -------------------------------------------------------------------------------- 1 | import axios from "axios"; 2 | import dayjs from "dayjs"; 3 | import * as cheerio from "cheerio"; 4 | import customParseFormat from "dayjs/plugin/customParseFormat"; 5 | import { Browser, chromium, Cookie, Page } from "patchright"; 6 | import { discoverUrl, looksLikeUrl } from "./rss-builder.utility"; 7 | 8 | dayjs.extend(customParseFormat); 9 | 10 | export function stripHtml(html: string) { 11 | return html.replace(/<(?:.|\n)*?>/gm, ""); 12 | } 13 | 14 | export function titleCase(words: string) { 15 | return words.replace(/\w\S*/g, function (txt) { 16 | return txt.charAt(0).toUpperCase() + txt.substring(1).toLowerCase(); 17 | }); 18 | } 19 | 20 | export function appendUrl(url?: string, link?: string) { 21 | if (!!url && !!link) { 22 | if (link.startsWith("/")) { 23 | return url.endsWith("/") 24 | ? `${url.substring(0, url.length - 1)}${link}` 25 | : `${url}${link}`; 26 | } 27 | return url.endsWith("/") ? `${url}${link}` : `${url}/${link}`; 28 | } 29 | } 30 | 31 | export function processWords( 32 | words?: string, 33 | title?: boolean, 34 | removeHtml?: boolean, 35 | ) { 36 | var result = words ?? ""; 37 | if (removeHtml) result = stripHtml(result); 38 | if (title) result = titleCase(result); 39 | return result; 40 | } 41 | 42 | export function processLinks( 43 | words?: string, 44 | removeHtml?: boolean, 45 | relativeLink?: boolean, 46 | rootUrl?: string, 47 | ) { 48 | var result = words ?? ""; 49 | if (removeHtml) result = stripHtml(result); 50 | if (relativeLink && rootUrl) result = appendUrl(rootUrl, result); 51 | return result; 52 | } 53 | 54 | export function processDates( 55 | date?: any, 56 | removeHtml?: boolean, 57 | userDateFormat?: string, 58 | ) { 59 | let result = date ?? ""; 60 | if (removeHtml) result = stripHtml(result); 61 | 62 | if (userDateFormat) { 63 | const parsed = dayjs(result, userDateFormat); 64 | if (parsed.isValid()) return parsed.toDate().toLocaleString(); 65 | } 66 | 67 | const patterns = [ 68 | { regex: /\b\d{10}\b/, type: "unix" }, 69 | { regex: /\b\d{13}\b/, type: "unixMillis" }, 70 | { 71 | regex: /\b\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z?\b/, 72 | type: "iso", 73 | }, 74 | { regex: /\b\d{4}-\d{2}-\d{2}\b/, type: "yyyy-mm-dd" }, 75 | { 76 | regex: /\b\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\b/, 77 | type: "yyyy-mm-dd hh:mm:ss", 78 | }, 79 | { 80 | regex: /\b\w{3}, \d{2} \w{3} \d{4} \d{2}:\d{2}:\d{2} GMT\b/, 81 | type: "utc", 82 | }, 83 | ]; 84 | 85 | function parseDate(value: string, type: string): Date | null { 86 | switch (type) { 87 | case "unix": 88 | return new Date(parseInt(value) * 1000); 89 | case "unixMillis": 90 | return new Date(parseInt(value)); 91 | case "iso": 92 | return new Date(value); 93 | case "yyyy-mm-dd": 94 | return new Date(`${value}T00:00:00Z`); 95 | case "yyyy-mm-dd hh:mm:ss": 96 | return new Date(value + "Z"); 97 | case "utc": 98 | return new Date(value); 99 | default: 100 | return null; 101 | } 102 | } 103 | 104 | for (const { regex, type } of patterns) { 105 | const match = result.match(regex); 106 | if (match) { 107 | const parsedDate = parseDate(match[0], type); 108 | if (parsedDate && !isNaN(parsedDate.getTime())) { 109 | return parsedDate.toLocaleString(); 110 | } 111 | } 112 | } 113 | 114 | return result; 115 | } 116 | 117 | export function get(obj, path, defaultValue) { 118 | const keys = path.split("."); 119 | let result = obj; 120 | for (let key of keys) { 121 | if (result == null || !(key in result)) { 122 | return defaultValue; 123 | } 124 | result = result[key]; 125 | } 126 | return result; 127 | } 128 | 129 | export async function resolveDrillChain( 130 | startingHtmlOrUrl: string, 131 | chain: Array<{ 132 | selector: string; 133 | attribute: string; 134 | isRelative: boolean; 135 | baseUrl: string; 136 | stripHtml: boolean; 137 | }>, 138 | useAdvanced: boolean = false, 139 | expectUrl: boolean = false 140 | ): Promise { 141 | if (!chain || chain.length === 0) return ""; 142 | 143 | let currentHtml = ""; 144 | let browser: Browser | null = null; 145 | let page: Page | null = null; 146 | 147 | try { 148 | if (startingHtmlOrUrl.startsWith("http://") || startingHtmlOrUrl.startsWith("https://")) { 149 | if (useAdvanced) { 150 | browser = await chromium.launch({ headless: true }); 151 | const context = await browser.newContext(); 152 | page = await context.newPage(); 153 | await page.goto(startingHtmlOrUrl, { waitUntil: "networkidle" }); 154 | currentHtml = await page.content(); 155 | } else { 156 | const resp = await axios.get(startingHtmlOrUrl); 157 | currentHtml = resp.data; 158 | } 159 | } else { 160 | currentHtml = startingHtmlOrUrl; 161 | } 162 | 163 | let finalValue = ""; 164 | 165 | for (let i = 0; i < chain.length; i++) { 166 | const { selector, attribute, isRelative, baseUrl } = chain[i]; 167 | const $ = cheerio.load(currentHtml); 168 | const el = $(selector).first(); 169 | if (!el || el.length === 0) { 170 | finalValue = ""; 171 | break; 172 | } 173 | 174 | const rawValue = attribute ? (el.attr(attribute) ?? "") : (chain[i].stripHtml ? (el.text() ?? "") : (el.html() ?? "")); 175 | 176 | if (i === chain.length - 1) { 177 | let val = rawValue; 178 | 179 | if (expectUrl && !looksLikeUrl(val)) { 180 | const $frag = cheerio.load(val); // rawValue might be HTML 181 | const mined = discoverUrl($frag, $frag.root()); 182 | if (mined) val = mined; 183 | } 184 | 185 | finalValue = val; 186 | } else { 187 | let absoluteUrl = rawValue; 188 | if (isRelative && baseUrl) { 189 | absoluteUrl = 190 | baseUrl.endsWith("/") || rawValue.startsWith("/") 191 | ? baseUrl + rawValue 192 | : `${baseUrl}/${rawValue}`; 193 | } 194 | 195 | if (useAdvanced && browser && page) { 196 | try { 197 | await page.goto(absoluteUrl, { waitUntil: "networkidle" }); 198 | currentHtml = await page.content(); 199 | } catch { 200 | finalValue = ""; 201 | break; 202 | } 203 | } else { 204 | try { 205 | const resp = await axios.get(absoluteUrl); 206 | currentHtml = resp.data; 207 | } catch { 208 | finalValue = ""; 209 | break; 210 | } 211 | } 212 | } 213 | } 214 | 215 | return finalValue; 216 | } finally { 217 | if (browser) await browser.close(); 218 | } 219 | } 220 | 221 | export function parseCookiesForPlaywright(cookieString: string, domain: string): Cookie[] { 222 | return cookieString.split(";").map(part => { 223 | const [name, ...valuePieces] = part.trim().split("="); 224 | return { 225 | name, 226 | value: valuePieces.join("="), 227 | domain, // or supply `url: feedConfig.config.baseUrl` 228 | path: "/" 229 | } as Cookie; 230 | }); 231 | } -------------------------------------------------------------------------------- /utilities/imap.utility.ts: -------------------------------------------------------------------------------- 1 | import Imap, { Config } from "node-imap"; 2 | 3 | export async function listImapFolders(config: Config): Promise { 4 | const imap = new Imap({ 5 | user: config.user, 6 | password: config.password, 7 | host: config.host, 8 | port: config.port, 9 | tls: true, 10 | }); 11 | 12 | const openImapConnection = (): Promise => 13 | new Promise((resolve, reject) => { 14 | imap.once("ready", resolve); 15 | imap.once("error", reject); 16 | imap.connect(); 17 | }); 18 | 19 | const getMailboxes = (): Promise => 20 | new Promise((resolve, reject) => { 21 | imap.getBoxes((err, boxes) => { 22 | if (err) reject(err); 23 | else resolve(boxes); 24 | }); 25 | }); 26 | 27 | const closeConnection = (): Promise => 28 | new Promise((resolve) => { 29 | imap.once("close", resolve); 30 | imap.end(); 31 | }); 32 | 33 | try { 34 | await openImapConnection(); 35 | const mailboxes = await getMailboxes(); 36 | await closeConnection(); 37 | 38 | const flattenFolders = (boxes: Imap.MailBoxes, prefix = ""): string[] => { 39 | let folders: string[] = []; 40 | for (const box in boxes) { 41 | folders.push(prefix + box); 42 | if (boxes[box].children) { 43 | folders = folders.concat( 44 | flattenFolders( 45 | boxes[box].children!, 46 | prefix + box + boxes[box].delimiter, 47 | ), 48 | ); 49 | } 50 | } 51 | return folders; 52 | }; 53 | 54 | return flattenFolders(mailboxes); 55 | } catch (err) { 56 | console.error("IMAP Error:", err); 57 | return []; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /utilities/rss-builder.utility.ts: -------------------------------------------------------------------------------- 1 | import * as cheerio from "cheerio"; 2 | import RSS from "rss"; 3 | import CSSTarget from "../models/csstarget.model"; 4 | import { 5 | processDates, 6 | processLinks, 7 | processWords, 8 | get, 9 | resolveDrillChain, 10 | } from "./data-handler.utility"; 11 | import ApiConfig from "./../models/apiconfig.model"; 12 | 13 | export async function buildRSS(res: any, feedConfig: any): Promise { 14 | const apiConfig: ApiConfig = feedConfig.config; 15 | const article = feedConfig.article as { 16 | iterator: CSSTarget; 17 | title?: CSSTarget; 18 | description?: CSSTarget; 19 | author?: CSSTarget; 20 | link?: CSSTarget; 21 | date?: CSSTarget; 22 | enclosure?: CSSTarget; 23 | }; 24 | const reverse: boolean = feedConfig.reverse || false; 25 | const strict: boolean = feedConfig.strict || false; 26 | const advanced: boolean = apiConfig.advanced || false; 27 | const $ = cheerio.load(res); 28 | const elements = $(article.iterator.selector).toArray(); 29 | 30 | if (article) { 31 | var input = await Promise.all( 32 | elements.map(async (el, i) => { 33 | const itemData = { 34 | title: processWords( 35 | await extractField($, el, article.title, advanced), 36 | article.title?.titleCase, 37 | article.title?.stripHtml 38 | ), 39 | description: processWords( 40 | await extractField($, el, article.description, advanced), 41 | article.description?.titleCase, 42 | article.description?.stripHtml 43 | ), 44 | url: processLinks( 45 | await extractField($, el, article.link, advanced, false, true), 46 | article.link?.stripHtml, 47 | article.link?.relativeLink, 48 | article.link?.rootUrl 49 | ), 50 | author: processWords( 51 | await extractField($, el, article.author, advanced), 52 | article.author?.titleCase, 53 | article.author?.stripHtml 54 | ), 55 | date: processDates( 56 | await extractField($, el, article.date, advanced), 57 | article.date?.stripHtml, 58 | article.date?.dateFormat 59 | ), 60 | enclosure: { 61 | url: processLinks( 62 | await extractField($, el, article.enclosure, advanced, true, false), 63 | article.enclosure?.stripHtml, 64 | article.enclosure?.relativeLink, 65 | article.enclosure?.rootUrl 66 | ), 67 | size: 0, 68 | type: "application/octet-stream", 69 | }, 70 | }; 71 | if (itemData.enclosure.url) { 72 | if (itemData.enclosure.url.startsWith("//")) { 73 | itemData.enclosure.url = "http:" + itemData.enclosure.url; 74 | } 75 | try { 76 | const url = itemData.enclosure.url; 77 | const response = await fetch(url); 78 | if (response.ok) { 79 | const contentLength = response.headers.get("content-length"); 80 | const contentType = response.headers.get("content-type"); 81 | itemData.enclosure["size"] = parseInt(contentLength) || 0; 82 | itemData.enclosure["type"] = 83 | contentType || "application/octet-stream"; 84 | } 85 | } catch (err) { 86 | console.error( 87 | "Failed to fetch enclosure:", 88 | itemData.enclosure.url, 89 | err 90 | ); 91 | } 92 | } 93 | 94 | return itemData; // This is the resolved value of the Promise 95 | }) 96 | ); 97 | 98 | if (strict) { 99 | input = filterStrictly(input); 100 | } 101 | 102 | if (reverse) { 103 | input.reverse(); 104 | } 105 | 106 | const feed = new RSS({ 107 | title: apiConfig?.title || $("title")?.text()?.trim(), 108 | description: $('meta[property="twitter:description"]')?.attr("content"), 109 | author: "mkfd", 110 | site_url: apiConfig.baseUrl, 111 | generator: "Generated by mkfd", 112 | }); 113 | 114 | for (const item of input) { 115 | feed.item({ 116 | title: item.title, 117 | description: item.description, 118 | url: item.url, 119 | guid: Bun.hash(JSON.stringify(item)), 120 | author: item.author, 121 | date: item.date, 122 | enclosure: { 123 | url: item.enclosure.url, 124 | size: item.enclosure.size, 125 | type: item.enclosure.type, 126 | }, 127 | }); 128 | } 129 | 130 | return feed.xml({ indent: true }); 131 | } 132 | } 133 | 134 | export function buildRSSFromApiData(apiData, feedConfig) { 135 | const feed = new RSS({ 136 | title: feedConfig.config.title || "API RSS Feed", 137 | description: "RSS feed generated from API data", 138 | feed_url: feedConfig.config.baseUrl + (feedConfig.config.route || ""), 139 | site_url: feedConfig.config.baseUrl, 140 | pubDate: new Date(), 141 | }); 142 | 143 | const itemsPath = feedConfig.apiMapping.items || ""; 144 | var items = get(apiData, itemsPath, []); 145 | 146 | if (feedConfig.strict) { 147 | items = filterStrictly(items); 148 | } 149 | 150 | if (feedConfig.reverse) { 151 | items.reverse(); 152 | } 153 | 154 | items.forEach((item) => { 155 | feed.item({ 156 | title: get(item, feedConfig.apiMapping.title, ""), 157 | description: get(item, feedConfig.apiMapping.description, ""), 158 | url: get(item, feedConfig.apiMapping.link, ""), 159 | guid: Bun.hash(JSON.stringify(item)), 160 | date: get(item, feedConfig.apiMapping.date, "") || new Date(), 161 | }); 162 | }); 163 | 164 | return feed.xml({ indent: true }); 165 | } 166 | 167 | function getNonNullProps(item: any): Set { 168 | const nonNull = new Set(); 169 | 170 | for (const [key, val] of Object.entries(item)) { 171 | if (key === "enclosure") { 172 | const eUrl = (val as any)?.url; 173 | if (eUrl !== null && eUrl !== undefined && eUrl !== "") { 174 | nonNull.add("enclosure"); 175 | } 176 | } else { 177 | if (val !== null && val !== undefined && val !== "") { 178 | nonNull.add(key); 179 | } 180 | } 181 | } 182 | 183 | return nonNull; 184 | } 185 | function filterStrictly(items: any[]): any[] { 186 | const itemPropsSets = items.map((item) => getNonNullProps(item)); 187 | const maxSize = Math.max(...itemPropsSets.map((s) => s.size), 0); 188 | const topIndices = itemPropsSets 189 | .map((propsSet, i) => (propsSet.size === maxSize ? i : -1)) 190 | .filter((i) => i !== -1); 191 | let intersect: Set = new Set(itemPropsSets[topIndices[0]] ?? []); 192 | for (let i = 1; i < topIndices.length; i++) { 193 | const s = itemPropsSets[topIndices[i]]; 194 | const temp = new Set(); 195 | for (const prop of intersect) { 196 | if (s.has(prop)) { 197 | temp.add(prop); 198 | } 199 | } 200 | intersect = temp; 201 | } 202 | const requiredProps = intersect; 203 | const filtered = items.filter((_, idx) => { 204 | const itemSet = itemPropsSets[idx]; 205 | for (const prop of requiredProps) { 206 | if (!itemSet.has(prop)) { 207 | return false; 208 | } 209 | } 210 | return true; 211 | }); 212 | return filtered; 213 | } 214 | 215 | async function extractField( 216 | $: cheerio.Root, 217 | el: cheerio.Element, 218 | field: CSSTarget, 219 | advanced: boolean = false, 220 | forEnclosure: boolean = false, 221 | forLink: boolean = false 222 | ): Promise { 223 | if (!field) return ""; 224 | 225 | if (field.drillChain?.length) { 226 | const itemHtml = $.html(el); 227 | return await resolveDrillChain(itemHtml, field.drillChain, advanced, forLink || forEnclosure); 228 | } 229 | 230 | const target = $(el).find(field.selector); 231 | 232 | if (field.attribute) { 233 | const rawAttr = target.attr(field.attribute); 234 | if (rawAttr) return rawAttr; 235 | } 236 | 237 | const rawText = target.html()?.trim(); 238 | 239 | if (rawText && /^https?:\/\//i.test(rawText)) { 240 | return rawText; 241 | } 242 | 243 | if (rawText && !forEnclosure && !forLink) { 244 | return rawText; 245 | } 246 | 247 | if (forLink) { 248 | const directHref = 249 | target.attr("href") || 250 | target.attr("src") || 251 | target.attr("data-src") || 252 | target.attr("srcset"); 253 | if (directHref) return directHref; 254 | 255 | const nestedHref = target 256 | .find("*") 257 | .toArray() 258 | .map((child) => $(child).attr("href")) 259 | .find((url) => url && /^https?:\/\//i.test(url)); 260 | if (nestedHref) return nestedHref; 261 | } 262 | 263 | if (forEnclosure || forLink) { 264 | const url = discoverUrl($, target); 265 | if (url) return url; 266 | } 267 | return ""; 268 | } 269 | 270 | export function looksLikeUrl(str: string) { 271 | return /^https?:\/\//i.test(str) || str.startsWith("//"); 272 | } 273 | 274 | export function discoverUrl($: cheerio.Root, target: cheerio.Cheerio): string { 275 | const directAttr = 276 | target.attr("href") || target.attr("src") || target.attr("data-src") || target.attr("srcset"); 277 | if (directAttr) { 278 | const first = directAttr.split(/[,\s]/)[0]; 279 | return decodeURIComponent(first); 280 | } 281 | const ld = target.find('script[type="application/ld+json"]').first().html(); 282 | if (ld) { 283 | try { 284 | const data = JSON.parse(ld); 285 | const candidate = 286 | data?.contentUrl ?? 287 | data?.thumbnailUrl ?? 288 | (Array.isArray(data?.image) ? data.image[0] : data?.image); 289 | if (candidate && looksLikeMedia(candidate)) return candidate; 290 | } catch { /* ignore bad JSON */ } 291 | } 292 | // inline/background style on element 293 | const inlineStyle = target.attr("style"); 294 | let m = inlineStyle?.match(/background(?:-image)?:[^;]*url\(["']?(.*?)["']?\)/i); 295 | if (m?.[1]) return m[1]; 296 | 297 | // walk up DOM for bg-image urls 298 | let p = target.parent(); 299 | while (p.length) { 300 | const ps = p.attr("style"); 301 | m = ps?.match(/background(?:-image)?:[^;]*url\(["']?(.*?)["']?\)/i); 302 | if (m?.[1]) return m[1]; 303 | p = p.parent(); 304 | } 305 | 306 | // direct attrs 307 | const direct = 308 | target.attr("href") || 309 | target.attr("src") || 310 | target.attr("data-src") || 311 | target.attr("srcset"); 312 | if (direct) return decodeURIComponent(direct.split(/[,\s]/)[0]); 313 | 314 | // nested 315 | const nestedSrc = target 316 | .find("img, video, audio") 317 | .toArray() 318 | .map((c) => $(c).attr("src")) 319 | .find((url) => looksLikeUrl(url ?? "")); 320 | if (nestedSrc) return nestedSrc; 321 | 322 | /* ----- catch-all: mine outerHTML ----- */ 323 | const html = $.html(target); 324 | const abs = nextUsefulAbs(html); 325 | if (abs) return abs; 326 | 327 | const attr = html.match(/\b(?:src|href|data-[\w-]+)=["']([^"']+)["']/i); 328 | if (attr?.[1]) return decodeURIComponent(attr[1]); 329 | 330 | return ""; 331 | } 332 | 333 | const ABS_URL_RE = /https?:\/\/[^\s"'<>]+/ig; // global, to keep scanning 334 | const BORING = /^https?:\/\/(?:schema\.org|www\.w3\.org)\b/i; 335 | 336 | function nextUsefulAbs(html: string): string { 337 | let m: RegExpExecArray | null; 338 | while ((m = ABS_URL_RE.exec(html))) { 339 | const u = decodeURIComponent(m[0]); 340 | if (!BORING.test(u) && (looksLikeMedia(u))) 341 | return u; 342 | } 343 | return ""; 344 | } 345 | 346 | function looksLikeMedia(url: string): boolean { 347 | return /\.(jpe?g|png|gif|webp|bmp|svg|mp4|m4v|mov|webm|m3u8|mp3|aac|ogg|wav)$/i 348 | .test(url.split('?')[0]); // ignore query-string when matching 349 | } -------------------------------------------------------------------------------- /utilities/security.utility.ts: -------------------------------------------------------------------------------- 1 | import forge from "node-forge"; 2 | 3 | export function encrypt(text: string, encryptionKey: string): string { 4 | const iv = forge.random.getBytesSync(16); 5 | const key = forge.util.createBuffer(encryptionKey, "utf8").getBytes(32); 6 | 7 | const cipher = forge.cipher.createCipher("AES-CBC", key); 8 | cipher.start({ iv }); 9 | cipher.update(forge.util.createBuffer(text, "utf8")); 10 | cipher.finish(); 11 | 12 | const encrypted = cipher.output.getBytes(); 13 | 14 | return forge.util.encode64(iv + encrypted); 15 | } 16 | 17 | export function decrypt(encryptedText: string, encryptionKey: string): string { 18 | const raw = forge.util.decode64(encryptedText); 19 | const iv = raw.substring(0, 16); 20 | const encrypted = raw.substring(16); 21 | const key = forge.util.createBuffer(encryptionKey, "utf8").getBytes(32); 22 | 23 | const decipher = forge.cipher.createDecipher("AES-CBC", key); 24 | decipher.start({ iv }); 25 | decipher.update(forge.util.createBuffer(encrypted)); 26 | const success = decipher.finish(); 27 | 28 | if (!success) { 29 | throw new Error( 30 | "Decryption failed. Possibly due to invalid key or corrupted data.", 31 | ); 32 | } 33 | 34 | const plainText = decipher.output.toString("utf8"); 35 | return plainText.trim(); 36 | } 37 | -------------------------------------------------------------------------------- /utilities/suggestion-engine.utility.ts: -------------------------------------------------------------------------------- 1 | import axios from "axios"; 2 | import * as cheerio from "cheerio"; 3 | import CSSTarget from "../models/csstarget.model"; 4 | 5 | interface SuggestedSelectors { 6 | iterator: string; 7 | title: CSSTarget; 8 | description: CSSTarget; 9 | link: CSSTarget; 10 | enclosure: CSSTarget; 11 | date: CSSTarget; 12 | author: CSSTarget; 13 | } 14 | 15 | export async function suggestSelectors( 16 | url: string 17 | ): Promise { 18 | const response = await axios.get(url); 19 | const $ = cheerio.load(response.data); 20 | 21 | const fieldCandidates: Record = { 22 | title: [ 23 | "h1", 24 | "h2", 25 | "h3", 26 | "header h1", 27 | "header h2", 28 | "header h3", 29 | '[class*="title"]', 30 | '[id*="title"]', 31 | '[class*="headline"]', 32 | '[id*="headline"]', 33 | '[class*="heading"]', 34 | '[id*="heading"]', 35 | '[class*="post-title"]', 36 | '[id*="post-title"]', 37 | '[class*="entry-title"]', 38 | '[id*="entry-title"]', 39 | '[class*="article-title"]', 40 | '[id*="article-title"]', 41 | '[class*="page-title"]', 42 | '[id*="page-title"]', 43 | '[itemprop="headline"]', 44 | '[itemtype*="schema.org"] [itemprop="headline"]', 45 | '[itemtype*="schema.org/Article"] [itemprop="name"]', 46 | '[itemtype*="schema.org/BlogPosting"] [itemprop="name"]', 47 | ], 48 | description: [ 49 | "p", 50 | "article p", 51 | "div p", 52 | "section p", 53 | '[class*="desc"]', 54 | '[id*="desc"]', 55 | '[class*="summary"]', 56 | '[id*="summary"]', 57 | '[class*="body"]', 58 | '[id*="body"]', 59 | '[class*="content"]', 60 | '[id*="content"]', 61 | '[class*="excerpt"]', 62 | '[id*="excerpt"]', 63 | '[class*="text"]', 64 | '[id*="text"]', 65 | '[class*="blurb"]', 66 | '[id*="blurb"]', 67 | '[itemprop="description"]', 68 | '[itemtype*="schema.org"] [itemprop="description"]', 69 | '[itemtype*="schema.org"] [itemprop="articleBody"]', 70 | '[itemtype*="schema.org/Article"] [itemprop="description"]', 71 | ], 72 | link: [ 73 | "a", 74 | '[class*="link"]', 75 | '[id*="link"]', 76 | '[class*="url"]', 77 | '[id*="url"]', 78 | '[class*="href"]', 79 | '[id*="href"]', 80 | '[class*="readmore"]', 81 | '[id*="readmore"]', 82 | '[class*="entry-link"]', 83 | '[id*="entry-link"]', 84 | '[itemprop="url"]', 85 | '[itemtype*="schema.org"] [itemprop="url"]', 86 | '[itemtype*="schema.org/Article"] a', 87 | '[itemtype*="schema.org/BlogPosting"] a', 88 | ], 89 | enclosure: [ 90 | "img", 91 | "video", 92 | "audio", 93 | '[class*="media"]', 94 | '[id*="media"]', 95 | '[class*="thumbnail"]', 96 | '[id*="thumbnail"]', 97 | '[class*="image"]', 98 | '[id*="image"]', 99 | '[class*="photo"]', 100 | '[id*="photo"]', 101 | '[class*="video"]', 102 | '[id*="video"]', 103 | '[class*="audio"]', 104 | '[id*="audio"]', 105 | '[itemprop="image"]', 106 | '[itemprop="thumbnailUrl"]', 107 | "figure img", 108 | ".post img", 109 | "img.attachment", 110 | '[itemtype*="schema.org"] [itemprop="image"]', 111 | '[itemtype*="schema.org/Article"] img', 112 | '[itemtype*="schema.org/BlogPosting"] img', 113 | ], 114 | date: [ 115 | "time", 116 | "span", 117 | "div", 118 | '[class*="date"]', 119 | '[id*="date"]', 120 | '[class*="time"]', 121 | '[id*="time"]', 122 | '[class*="published"]', 123 | '[id*="published"]', 124 | '[class*="updated"]', 125 | '[id*="updated"]', 126 | '[class*="timestamp"]', 127 | '[id*="timestamp"]', 128 | '[itemprop="datePublished"]', 129 | '[itemprop="dateModified"]', 130 | "[datetime]", 131 | 'meta[itemprop="datePublished"]', 132 | '[itemtype*="schema.org"] [itemprop="datePublished"]', 133 | '[itemtype*="schema.org"] [itemprop="dateModified"]', 134 | '[itemtype*="schema.org/Article"] [itemprop="datePublished"]', 135 | '[itemtype*="schema.org/BlogPosting"] [itemprop="datePublished"]', 136 | ], 137 | author: [ 138 | '[class*="author"]', 139 | '[id*="author"]', 140 | '[class*="byline"]', 141 | '[id*="byline"]', 142 | '[class*="writer"]', 143 | '[id*="writer"]', 144 | '[class*="contributor"]', 145 | '[id*="contributor"]', 146 | '[class*="creator"]', 147 | '[id*="creator"]', 148 | '[class*="name"]', 149 | '[id*="name"]', 150 | '[itemprop="author"]', 151 | '[rel="author"]', 152 | '[class*="posted-by"]', 153 | '[id*="posted-by"]', 154 | '[href*="author="]', 155 | '[href*="/author/"]', 156 | '[itemtype*="schema.org"] [itemprop="author"]', 157 | '[itemtype*="schema.org/Person"] [itemprop="name"]', 158 | '[itemtype*="schema.org/Article"] [itemprop="author"]', 159 | '[itemtype*="schema.org/BlogPosting"] [itemprop="author"]', 160 | ], 161 | }; 162 | 163 | const structuralIterators = [ 164 | "ul > li", 165 | "ol > li", 166 | "nav > ul > li", 167 | "div[class*='post']", 168 | "div[class*='entry']", 169 | "div[class*='item']", 170 | "div[class*='card']", 171 | "[class*='feed-item']", 172 | "[class*='post-item']", 173 | "article", 174 | "section[class*='post']", 175 | "section[class*='story']", 176 | ].filter((sel) => $(sel).length >= 3); 177 | 178 | // Then combine them with your other approach: 179 | const parentBased = findCommonParents($, Object.values(fieldCandidates)); 180 | const allCandidates = [...new Set([...parentBased, ...structuralIterators])]; 181 | 182 | if (allCandidates.length === 0) { 183 | throw new Error("No common repeating parent structures identified."); 184 | } 185 | 186 | const iteratorSelector = allCandidates[0]; 187 | const firstItem = $(iteratorSelector).first(); 188 | const childSelectors = suggestChildSelectors( 189 | $, 190 | iteratorSelector, 191 | fieldCandidates 192 | ); 193 | const rawDateText = $(iteratorSelector) 194 | .find(childSelectors.date.selector) 195 | .first() 196 | .text() 197 | .trim(); 198 | const inferredDateFormat = detectDateFormat(rawDateText); 199 | 200 | const linkElem = firstItem.find(childSelectors.link.selector ?? "").first(); 201 | const href = linkElem.attr("href"); 202 | const isLinkRelative = isRelativeUrl(href); 203 | 204 | const enclosureElem = firstItem 205 | .find(childSelectors.enclosure.selector ?? "") 206 | .first(); 207 | const src = enclosureElem.attr("src"); 208 | const isEnclosureRelative = isRelativeUrl(src); 209 | 210 | const baseUrl = extractRootUrl(url); 211 | 212 | return { 213 | iterator: iteratorSelector, 214 | title: { 215 | selector: childSelectors.title.selector ?? "", 216 | attribute: childSelectors.title.attribute ?? "", 217 | stripHtml: false, 218 | }, 219 | description: { 220 | selector: childSelectors.description.selector ?? "", 221 | attribute: childSelectors.description.attribute ?? "", 222 | stripHtml: false, 223 | }, 224 | link: { 225 | selector: childSelectors.link.selector ?? "", 226 | attribute: childSelectors.link.attribute ?? "", 227 | relativeLink: isLinkRelative, 228 | rootUrl: isLinkRelative ? baseUrl : undefined, 229 | }, 230 | enclosure: { 231 | selector: childSelectors.enclosure.selector ?? "", 232 | attribute: childSelectors.enclosure.attribute ?? "", 233 | relativeLink: isEnclosureRelative, 234 | rootUrl: isEnclosureRelative ? baseUrl : undefined, 235 | }, 236 | date: { 237 | selector: childSelectors.date.selector ?? "", 238 | attribute: childSelectors.date.attribute ?? "", 239 | dateFormat: inferredDateFormat ?? undefined, 240 | }, 241 | author: { 242 | selector: childSelectors.author.selector ?? "", 243 | attribute: childSelectors.author.attribute ?? "", 244 | }, 245 | }; 246 | } 247 | 248 | function findCommonParents( 249 | $: cheerio.Root, 250 | candidateGroups: string[][] 251 | ): string[] { 252 | const candidateSelectors: string[] = []; 253 | 254 | // First, explicitly check for known good container elements like
255 | const semanticTags = ["article", "li", "section", "div"]; 256 | 257 | for (const tag of semanticTags) { 258 | const matches = $(tag); 259 | if (matches.length >= 3) { 260 | candidateSelectors.push(tag); 261 | } 262 | } 263 | 264 | // If nothing semantic was found, fall back to heuristic detection 265 | if (candidateSelectors.length === 0) { 266 | const selectorCounts: Record = {}; 267 | 268 | // Flatten all possible selectors 269 | const flatSelectors = candidateGroups.flat(); 270 | 271 | flatSelectors.forEach((sel) => { 272 | $(sel).each((_, el) => { 273 | const parent = $(el).parent(); 274 | const tagName = parent.prop("tagName")?.toLowerCase() || ""; 275 | const classList = (parent.attr("class") || "") 276 | .trim() 277 | .split(/\s+/) 278 | .join("."); 279 | const parentSelector = `${tagName}${classList ? "." + classList : ""}`; 280 | selectorCounts[parentSelector] = 281 | (selectorCounts[parentSelector] || 0) + 1; 282 | }); 283 | }); 284 | 285 | const sorted = Object.entries(selectorCounts).sort((a, b) => b[1] - a[1]); 286 | 287 | if (sorted.length > 0 && sorted[0][1] >= 3) { 288 | candidateSelectors.push(sorted[0][0]); 289 | } 290 | } 291 | 292 | return candidateSelectors; 293 | } 294 | 295 | function suggestChildSelectors( 296 | $: cheerio.Root, 297 | parentSelector: string, 298 | fieldCandidates: Record 299 | ): Record { 300 | const results: Record = {}; 301 | 302 | for (const field of Object.keys(fieldCandidates)) { 303 | let bestScore = -Infinity; 304 | let bestTarget: CSSTarget = new CSSTarget(""); 305 | 306 | for (const candidate of fieldCandidates[field]) { 307 | const fullSelector = `${parentSelector} ${candidate}`.trim(); 308 | $(fullSelector).each((_, el) => { 309 | let localScore = scoreElementByField(field, el, $); 310 | 311 | if (field === "enclosure") { 312 | const isMediaTag = 313 | el.type === "tag" && 314 | ["img", "video", "audio"].includes(el.tagName?.toLowerCase() || ""); 315 | const hasValidSrc = 316 | isMediaTag && /^https?:\/\//i.test($(el).attr("src") || ""); 317 | if (!hasValidSrc) { 318 | const nestedMediaEl = $(el) 319 | .find("img[src^='http'], audio[src^='http'], video[src^='http']") 320 | .first(); 321 | if (nestedMediaEl.length) { 322 | el = nestedMediaEl.get(0); 323 | 324 | localScore += 20; 325 | } 326 | } 327 | } 328 | 329 | if (localScore > bestScore) { 330 | bestScore = localScore; 331 | 332 | // Compute selector *relative* to parent 333 | const pathFromParent = $(el) 334 | .parentsUntil(parentSelector) 335 | .toArray() 336 | .reverse(); 337 | pathFromParent.push(el); // include the element itself 338 | const relSelector = pathFromParent 339 | .map((e) => { 340 | const tag = e.type === "tag" ? e.tagName?.toLowerCase() : "*"; 341 | if (!tag) return "*"; 342 | const classes = ($(e).attr("class") || "") 343 | .split(/\s+/) 344 | .filter(Boolean) 345 | .map((cls) => `.${cls}`) 346 | .join(""); 347 | return tag + classes; 348 | }) 349 | .join(" > "); 350 | 351 | const attr = 352 | field === "link" || field === "enclosure" 353 | ? $(el).attr("href") || $(el).attr("src") 354 | : undefined; 355 | let attribute = attr 356 | ? Object.keys($(el).attr() || {}).find( 357 | (k) => $(el).attr(k) === attr 358 | ) 359 | : undefined; 360 | 361 | const tagName = el.type === "tag" ? el.tagName.toLowerCase() : ""; 362 | if (field === "date" && tagName === "time") { 363 | const dtValue = $(el).attr("datetime"); 364 | if (dtValue) { 365 | attribute = "datetime"; 366 | } 367 | } 368 | 369 | const isRelative = attr && !/^https?:\/\//i.test(attr); 370 | bestTarget = new CSSTarget( 371 | relSelector, 372 | attribute, 373 | false, 374 | isRelative ? "" : undefined, 375 | isRelative 376 | ); 377 | 378 | if (field === "date") { 379 | const format = detectDateFormat($(el).text()); 380 | if (format) bestTarget.dateFormat = format; 381 | } 382 | } 383 | }); 384 | } 385 | 386 | results[field] = bestTarget; 387 | } 388 | 389 | return results; 390 | } 391 | 392 | function detectDateFormat(dateStr: string): string | null { 393 | const patterns: { regex: RegExp; format: string }[] = [ 394 | { regex: /^\d{4}[-/]\d{2}[-/]\d{2}/, format: "YYYY-MM-DD" }, 395 | { regex: /^\d{2}[-/]\d{2}[-/]\d{4}/, format: "MM-DD-YYYY" }, 396 | { regex: /^\d{2}[-/]\d{2}[-/]\d{2}/, format: "MM-DD-YY" }, 397 | { regex: /^\d{2}\.\d{2}\.\d{4}/, format: "DD.MM.YYYY" }, 398 | { regex: /^\d{4}\.\d{2}\.\d{2}/, format: "YYYY.MM.DD" }, 399 | { regex: /^\d{8}$/, format: "YYYYMMDD" }, 400 | { regex: /^\d{1,2} [A-Za-z]+ \d{4}/, format: "D MMMM YYYY" }, 401 | { regex: /^[A-Za-z]+ \d{1,2}, \d{4}/, format: "MMMM D, YYYY" }, 402 | { regex: /^[A-Za-z]+ \d{4}/, format: "MMMM YYYY" }, 403 | { regex: /^\d{1,2}\/\d{1,2}\/\d{4}/, format: "M/D/YYYY" }, 404 | { regex: /^[A-Za-z]{3} \d{1,2}, \d{4}/, format: "MMM D, YYYY" }, 405 | { regex: /^\d{1,2} [A-Za-z]{3} \d{4}/, format: "D MMM YYYY" }, 406 | ]; 407 | 408 | for (const { regex, format } of patterns) { 409 | if (regex.test(dateStr.trim())) { 410 | return format; 411 | } 412 | } 413 | 414 | return null; 415 | } 416 | 417 | function isRelativeUrl(value: string | undefined): boolean { 418 | if (!value) return false; 419 | if (value.startsWith("//")) { 420 | return false; 421 | } 422 | return value.startsWith("/") || !/^https?:\/\//i.test(value); 423 | } 424 | 425 | function extractRootUrl(url: string): string { 426 | try { 427 | const u = new URL(url); 428 | return u.origin; 429 | } catch { 430 | return ""; 431 | } 432 | } 433 | 434 | function scoreElementByField( 435 | field: string, 436 | el: cheerio.Element, 437 | $: cheerio.Root 438 | ): number { 439 | const text = $(el).text().trim(); 440 | const len = text.length; 441 | const words = text.split(/\s+/).length; 442 | const tag = el.type === "tag" ? el.tagName.toLowerCase() : ""; 443 | 444 | if (len === 0) return 0; 445 | 446 | let score = 0; 447 | 448 | if (["h1", "h2", "h3", "p", "time", "a"].includes(tag)) score += 20; 449 | if (["div", "span"].includes(tag)) score += 5; 450 | 451 | switch (field) { 452 | case "title": 453 | if (len >= 10 && len <= 100) score += 50; 454 | if (tag.startsWith("h")) score += 10; 455 | break; 456 | case "description": 457 | if (len >= 80 && len <= 600) score += 50; 458 | if (tag === "p") score += 10; 459 | break; 460 | case "date": 461 | const hasDateTokens = 462 | /\b\d{1,4}\b/.test(text) || 463 | /\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\b/i.test(text); 464 | if (hasDateTokens) score += 50; 465 | if (tag === "time") score += 20; 466 | break; 467 | case "author": 468 | if (words >= 1 && words <= 4) score += 40; 469 | if (/by\s/i.test(text)) score += 10; 470 | break; 471 | case "link": 472 | case "enclosure": 473 | const attr = $(el).attr("href") || $(el).attr("src"); 474 | if (attr) score += 30; 475 | if (attr && attr.startsWith("http")) score += 20; 476 | if (field === "enclosure") { 477 | if ( 478 | ["img", "audio", "video"].includes(tag) && 479 | attr && 480 | /^https?:\/\//i.test(attr) 481 | ) { 482 | score += 50; 483 | } else { 484 | const nestedMedia = $(el) 485 | .find("img[src^='http'], audio[src^='http'], video[src^='http']") 486 | .first(); 487 | if (nestedMedia.length > 0) { 488 | score += 30; 489 | } 490 | } 491 | } 492 | break; 493 | } 494 | 495 | if (new Set(text.split(/\s+/)).size === words) score += 10; 496 | if (len < 5 || words < 2) score -= 30; 497 | if (len > 1000) score -= 30; 498 | 499 | return score; 500 | } 501 | -------------------------------------------------------------------------------- /workers/feed-updater.worker.ts: -------------------------------------------------------------------------------- 1 | import { writeFile } from "fs/promises" 2 | import axios, { AxiosRequestConfig } from "axios" 3 | import { buildRSS, buildRSSFromApiData } from "../utilities/rss-builder.utility" 4 | import { join } from "path" 5 | import { parseCookiesForPlaywright } from "../utilities/data-handler.utility" 6 | import { chromium } from "patchright"; 7 | 8 | declare var self: Worker 9 | const rssDir = "./public/feeds" 10 | 11 | async function fetchDataAndUpdateFeed(feedConfig: any) { 12 | try { 13 | let rssXml: string | undefined 14 | if (feedConfig.feedType === "webScraping" && !feedConfig.config.advanced) { 15 | const response = await axios.get(feedConfig.config.baseUrl, { 16 | headers: { 17 | ...(feedConfig.config.headers || {}), 18 | Cookie: feedConfig.config.cookieString || "" 19 | } 20 | }); 21 | const html = response.data 22 | rssXml = await buildRSS(html, feedConfig) 23 | 24 | } else if (feedConfig.feedType === "webScraping" && feedConfig.config.advanced) { 25 | const context = await chromium.launch({ 26 | channel: "chrome", 27 | headless: true, 28 | }); 29 | const page = await context.newPage(); 30 | 31 | if (feedConfig.config.headers && Object.keys(feedConfig.config.headers).length) { 32 | await page.setExtraHTTPHeaders(feedConfig.config.headers); 33 | } 34 | 35 | if (feedConfig.config.cookieString) { 36 | const domain = new URL(feedConfig.config.baseUrl).hostname; 37 | const cookiesArray = parseCookiesForPlaywright(feedConfig.config.cookieString, domain); 38 | if (cookiesArray.length) await page.context().addCookies(cookiesArray); 39 | } 40 | 41 | await page.goto(feedConfig.config.baseUrl, { waitUntil: "networkidle" }); 42 | const html = await page.content(); 43 | await context.close(); 44 | rssXml = await buildRSS(html, feedConfig) 45 | 46 | } else if (feedConfig.feedType === "api") { 47 | const axiosConfig: AxiosRequestConfig = { 48 | method: feedConfig.config.method || "GET", 49 | url: feedConfig.config.baseUrl + (feedConfig.config.route || ""), 50 | headers: { 51 | ...feedConfig.config.headers, 52 | Cookie: feedConfig.config.cookieString || "" 53 | }, 54 | params: feedConfig.config.params || {}, 55 | data: feedConfig.config.body || {}, 56 | withCredentials: feedConfig.config.withCredentials || false 57 | } 58 | const response = await axios(axiosConfig) 59 | const apiData = response.data 60 | rssXml = buildRSSFromApiData(apiData, feedConfig) 61 | 62 | } 63 | 64 | if (rssXml) { 65 | const rssFilePath = join(rssDir, `${feedConfig.feedId}.xml`) 66 | await writeFile(rssFilePath, rssXml, "utf8") 67 | } 68 | 69 | } catch (error) { 70 | console.error(`Error fetching data for feedId ${feedConfig.feedId}:`, error.message) 71 | } 72 | } 73 | 74 | self.onmessage = (message) => { 75 | if (message.data.command === "start") { 76 | fetchDataAndUpdateFeed(message.data.config) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /workers/imap-feed.worker.ts: -------------------------------------------------------------------------------- 1 | declare var self: Worker; 2 | 3 | import { spawn } from "bun"; 4 | 5 | let childProcess: any = null; 6 | 7 | self.onmessage = (message) => { 8 | if (message.data.command === "start" && !childProcess) { 9 | const encryptionKey = message.data.encryptionKey; 10 | const configHash = message.data.config.feedId; 11 | 12 | if (!encryptionKey || typeof encryptionKey !== "string") { 13 | console.error("[IMAP WORKER] Invalid encryption key:", encryptionKey); 14 | self.postMessage({ status: "error", error: "Invalid encryption key" }); 15 | return; 16 | } 17 | 18 | console.log("[IMAP WORKER] Spawning Node IMAP watcher subprocess..."); 19 | 20 | childProcess = spawn({ 21 | cmd: [ 22 | "node", 23 | "./node/imap-watch.utility.ts", 24 | `--key=${encryptionKey}`, 25 | `--hash=${configHash}`, 26 | ], 27 | stdout: "inherit", 28 | stderr: "inherit", 29 | }); 30 | 31 | // Now we can handle output 32 | // childProcess.stdout.ondata = (chunk) => { 33 | // console.log("[Node IMAP stdout]", chunk.toString()); 34 | // }; 35 | // if (childProcess.stderr) { 36 | // childProcess.stderr.ondata = (chunk) => { 37 | // console.error("[Node IMAP stderr]", chunk.toString()); 38 | // }; 39 | // } 40 | 41 | childProcess.onexit = (exitCode) => { 42 | console.log( 43 | "[IMAP WORKER] Node IMAP process exited with code:", 44 | exitCode, 45 | ); 46 | childProcess = null; 47 | }; 48 | 49 | self.postMessage({ status: "IMAP worker started." }); 50 | } else if (message.data.command === "stop" && childProcess) { 51 | console.log("[IMAP WORKER] Stopping Node IMAP watcher..."); 52 | childProcess.kill(); 53 | childProcess = null; 54 | self.postMessage({ status: "IMAP worker stopped." }); 55 | } 56 | }; 57 | --------------------------------------------------------------------------------