├── .nvmrc
├── .cursorignore
├── services
    └── meridian-ml-service
    │   ├── src
    │       └── meridian_ml_service
    │       │   ├── __init__.py
    │       │   ├── schemas.py
    │       │   ├── config.py
    │       │   ├── main.py
    │       │   ├── dependencies.py
    │       │   └── embeddings.py
    │   ├── .env.example
    │   ├── .dockerignore
    │   ├── fly.toml
    │   ├── README.md
    │   ├── pyproject.toml
    │   └── Dockerfile
├── apps
    ├── frontend
    │   ├── src
    │   │   ├── public
    │   │   │   ├── robots.txt
    │   │   │   ├── favicon.ico
    │   │   │   ├── favicon-16x16.png
    │   │   │   ├── favicon-32x32.png
    │   │   │   ├── apple-touch-icon.png
    │   │   │   ├── android-chrome-192x192.png
    │   │   │   ├── android-chrome-512x512.png
    │   │   │   └── site.webmanifest
    │   │   ├── server
    │   │   │   ├── tsconfig.json
    │   │   │   ├── api
    │   │   │   │   ├── briefs
    │   │   │   │   │   ├── latest.get.ts
    │   │   │   │   │   ├── index.get.ts
    │   │   │   │   │   └── [slug]
    │   │   │   │   │   │   └── index.get.ts
    │   │   │   │   ├── reports.get.ts
    │   │   │   │   ├── admin
    │   │   │   │   │   ├── login.post.ts
    │   │   │   │   │   └── sources
    │   │   │   │   │   │   ├── [id]
    │   │   │   │   │   │       ├── init-dos.post.ts
    │   │   │   │   │   │       ├── index.delete.ts
    │   │   │   │   │   │       └── details.get.ts
    │   │   │   │   │   │   ├── index.post.ts
    │   │   │   │   │   │   └── index.get.ts
    │   │   │   │   └── subscribe.post.ts
    │   │   │   └── lib
    │   │   │   │   └── utils.ts
    │   │   ├── app.vue
    │   │   ├── shared
    │   │   │   └── types.ts
    │   │   ├── composables
    │   │   │   ├── useSEO.ts
    │   │   │   ├── useReadingProgess.ts
    │   │   │   ├── useStickyElement.ts
    │   │   │   └── useTableOfContents.ts
    │   │   ├── pages
    │   │   │   ├── briefs
    │   │   │   │   ├── latest.vue
    │   │   │   │   └── index.vue
    │   │   │   ├── admin
    │   │   │   │   └── login.vue
    │   │   │   └── index.vue
    │   │   ├── layouts
    │   │   │   ├── admin.vue
    │   │   │   └── default.vue
    │   │   ├── plugins
    │   │   │   └── markdown.ts
    │   │   └── components
    │   │   │   └── SubscriptionForm.vue
    │   ├── tsconfig.json
    │   ├── eslint.config.mjs
    │   ├── .env.example
    │   ├── .gitignore
    │   ├── tailwind.config.ts
    │   ├── package.json
    │   ├── nuxt.config.ts
    │   └── README.md
    ├── backend
    │   ├── tsconfig.build.json
    │   ├── test
    │   │   ├── tsconfig.json
    │   │   ├── parseRss.spec.ts
    │   │   ├── utils.spec.ts
    │   │   ├── parseArticle.spec.ts
    │   │   ├── rateLimiter.spec.ts
    │   │   └── fixtures
    │   │   │   └── ft_com.xml
    │   ├── biome.json
    │   ├── src
    │   │   ├── lib
    │   │   │   ├── tryCatchAsync.ts
    │   │   │   ├── embeddings.ts
    │   │   │   ├── utils.ts
    │   │   │   ├── logger.ts
    │   │   │   ├── parsers.ts
    │   │   │   ├── rateLimiter.ts
    │   │   │   └── articleFetchers.ts
    │   │   ├── app.ts
    │   │   ├── prompts
    │   │   │   └── articleRepresentation.prompt.ts
    │   │   ├── routers
    │   │   │   ├── reports.router.ts
    │   │   │   ├── events.router.ts
    │   │   │   ├── sources.router.ts
    │   │   │   ├── openGraph.router.ts
    │   │   │   └── durableObjects.router.ts
    │   │   └── index.ts
    │   ├── package.json
    │   ├── tsconfig.json
    │   └── wrangler.jsonc
    └── briefs
    │   └── src
    │       ├── events.py
    │       └── llm.py
├── packages
    └── database
    │   ├── migrations
    │       ├── 0000_blushing_boomerang.sql
    │       ├── 0003_regular_swordsman.sql
    │       ├── 0002_calm_sebastian_shaw.sql
    │       ├── meta
    │       │   ├── 0000_snapshot.json
    │       │   └── _journal.json
    │       └── 0001_premium_wolfpack.sql
    │   ├── .env.example
    │   ├── README.MD
    │   ├── Makefile
    │   ├── drizzle.config.ts
    │   ├── src
    │       ├── index.ts
    │       ├── database.ts
    │       ├── seed.ts
    │       ├── validators
    │       │   ├── dataSourceConfig.ts
    │       │   └── analysisPayload.ts
    │       └── schema.ts
    │   ├── tsconfig.json
    │   └── package.json
├── screenshot.png
├── .vscode
    ├── extensions.json
    └── settings.json
├── pnpm-workspace.yaml
├── .prettierrc
├── turbo.json
├── package.json
├── LICENSE
├── .gitignore
├── .github
    └── workflows
    │   └── deploy-services.yaml
└── README.md


/.nvmrc:
--------------------------------------------------------------------------------
1 | 22.14.0


--------------------------------------------------------------------------------
/.cursorignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | .venv


--------------------------------------------------------------------------------
/services/meridian-ml-service/src/meridian_ml_service/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/apps/frontend/src/public/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | Allow: /
3 | Disallow: /api


--------------------------------------------------------------------------------
/packages/database/migrations/0000_blushing_boomerang.sql:
--------------------------------------------------------------------------------
1 | CREATE EXTENSION vector;


--------------------------------------------------------------------------------
/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iliane5/meridian/HEAD/screenshot.png


--------------------------------------------------------------------------------
/apps/frontend/src/server/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "../../.nuxt/tsconfig.server.json"
3 | }
4 | 


--------------------------------------------------------------------------------
/packages/database/.env.example:
--------------------------------------------------------------------------------
1 | DATABASE_URL="postgresql://postgres:mysecretpassword@localhost:5432/postgres"


--------------------------------------------------------------------------------
/packages/database/migrations/0003_regular_swordsman.sql:
--------------------------------------------------------------------------------
1 | ALTER TABLE "ingested_items" ADD COLUMN "embedding_text" text;


--------------------------------------------------------------------------------
/apps/frontend/src/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iliane5/meridian/HEAD/apps/frontend/src/public/favicon.ico


--------------------------------------------------------------------------------
/apps/frontend/src/public/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iliane5/meridian/HEAD/apps/frontend/src/public/favicon-16x16.png


--------------------------------------------------------------------------------
/apps/frontend/src/public/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iliane5/meridian/HEAD/apps/frontend/src/public/favicon-32x32.png


--------------------------------------------------------------------------------
/apps/frontend/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   // https://nuxt.com/docs/guide/concepts/typescript
3 |   "extends": "./.nuxt/tsconfig.json"
4 | }
5 | 


--------------------------------------------------------------------------------
/packages/database/README.MD:
--------------------------------------------------------------------------------
1 | docker run --name my-postgres -e POSTGRES_PASSWORD=mysecretpassword -p 5432:5432 pgvector/pgvector:pg16
2 | 


--------------------------------------------------------------------------------
/apps/frontend/src/public/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iliane5/meridian/HEAD/apps/frontend/src/public/apple-touch-icon.png


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 |   "recommendations": ["astro-build.astro-vscode", "unifiedjs.vscode-mdx"],
3 |   "unwantedRecommendations": [],
4 | }
5 | 


--------------------------------------------------------------------------------
/apps/frontend/src/public/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iliane5/meridian/HEAD/apps/frontend/src/public/android-chrome-192x192.png


--------------------------------------------------------------------------------
/apps/frontend/src/public/android-chrome-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iliane5/meridian/HEAD/apps/frontend/src/public/android-chrome-512x512.png


--------------------------------------------------------------------------------
/packages/database/Makefile:
--------------------------------------------------------------------------------
1 | db:
2 | 	@docker run -d --name my-postgres -e POSTGRES_PASSWORD=mysecretpassword -p 5432:5432 pgvector/pgvector:pg16 && pnpm migrate && pnpm studio


--------------------------------------------------------------------------------
/apps/backend/tsconfig.build.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "./tsconfig.json",
3 |   "compilerOptions": {
4 |     "outDir": "./dist",
5 |     "declaration": true,
6 |     "declarationMap": true
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/pnpm-workspace.yaml:
--------------------------------------------------------------------------------
 1 | packages:
 2 |   - apps/*
 3 |   - packages/*
 4 | 
 5 | ignoredBuiltDependencies:
 6 |   - unrs-resolver
 7 |   - workerd
 8 | 
 9 | onlyBuiltDependencies:
10 |   - '@biomejs/biome'
11 | 


--------------------------------------------------------------------------------
/apps/frontend/src/app.vue:
--------------------------------------------------------------------------------
1 | <template>
2 |   <div class="min-h-screen flex flex-col">
3 |     <NuxtRouteAnnouncer />
4 |     <NuxtLayout>
5 |       <NuxtPage />
6 |     </NuxtLayout>
7 |   </div>
8 | </template>
9 | 


--------------------------------------------------------------------------------
/apps/backend/test/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "../tsconfig.json",
3 |   "compilerOptions": {
4 |     "types": ["@cloudflare/vitest-pool-workers"]
5 |   },
6 |   "include": ["./**/*.ts", "../worker-configuration.d.ts"],
7 |   "exclude": []
8 | }
9 | 


--------------------------------------------------------------------------------
/apps/frontend/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | // @ts-check
 2 | import withNuxt from './.nuxt/eslint.config.mjs';
 3 | 
 4 | export default withNuxt(
 5 |   // Your custom configs here,
 6 |   {
 7 |     rules: {
 8 |       'vue/html-self-closing': 'off',
 9 |     },
10 |   }
11 | );
12 | 


--------------------------------------------------------------------------------
/services/meridian-ml-service/.env.example:
--------------------------------------------------------------------------------
1 | # Model to use for embeddings (example)
2 | EMBEDDING_MODEL_NAME=intfloat/multilingual-e5-small
3 | 
4 | # Port for local development (if not using Cloud Run's $PORT)
5 | # PORT=8080
6 | 
7 | # API Token for auth
8 | # API_TOKEN=hunter2


--------------------------------------------------------------------------------
/apps/frontend/.env.example:
--------------------------------------------------------------------------------
1 | NUXT_DATABASE_URL="postgresql://postgres:mysecretpassword@localhost:5432/postgres"
2 | NUXT_SESSION_PASSWORD=password-with-at-least-32-characters
3 | NUXT_PUBLIC_WORKER_API="http://localhost:8787"
4 | NUXT_ADMIN_USERNAME="admin"
5 | NUXT_ADMIN_PASSWORD="hunter2"
6 | NUXT_WORKER_API_TOKEN="hunter2"


--------------------------------------------------------------------------------
/apps/frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | # Nuxt dev/build outputs
 2 | .output
 3 | .data
 4 | .nuxt
 5 | .nitro
 6 | .cache
 7 | dist
 8 | 
 9 | # Node dependencies
10 | node_modules
11 | 
12 | # Logs
13 | logs
14 | *.log
15 | 
16 | # Misc
17 | .DS_Store
18 | .fleet
19 | .idea
20 | 
21 | # Local env files
22 | .env
23 | .env.*
24 | !.env.example
25 | 


--------------------------------------------------------------------------------
/packages/database/drizzle.config.ts:
--------------------------------------------------------------------------------
 1 | import 'dotenv/config';
 2 | import { defineConfig } from 'drizzle-kit';
 3 | 
 4 | export default defineConfig({
 5 |   out: './migrations',
 6 |   schema: './src/schema.ts',
 7 |   dialect: 'postgresql',
 8 |   dbCredentials: {
 9 |     url: process.env.DATABASE_URL || '',
10 |   },
11 | });
12 | 


--------------------------------------------------------------------------------
/apps/frontend/tailwind.config.ts:
--------------------------------------------------------------------------------
 1 | import typography from '@tailwindcss/typography';
 2 | import type { Config } from 'tailwindcss';
 3 | 
 4 | export default {
 5 |   content: ['./src/**/*.{js,ts,jsx,tsx,vue}'],
 6 |   darkMode: 'class',
 7 |   theme: {
 8 |     extend: {},
 9 |   },
10 |   plugins: [typography],
11 | } satisfies Config;
12 | 


--------------------------------------------------------------------------------
/packages/database/migrations/0002_calm_sebastian_shaw.sql:
--------------------------------------------------------------------------------
1 | ALTER TYPE "public"."ingested_item_status" ADD VALUE 'FAILED_RENDER' BEFORE 'FAILED_FETCH';--> statement-breakpoint
2 | ALTER TYPE "public"."ingested_item_status" ADD VALUE 'FAILED_EMBEDDING' BEFORE 'SKIPPED_PDF';--> statement-breakpoint
3 | ALTER TYPE "public"."ingested_item_status" ADD VALUE 'FAILED_R2_UPLOAD' BEFORE 'SKIPPED_PDF';


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "semi": true,
 3 |   "singleQuote": true,
 4 |   "tabWidth": 2,
 5 |   "printWidth": 120,
 6 |   "trailingComma": "es5",
 7 |   "bracketSpacing": true,
 8 |   "arrowParens": "avoid",
 9 |   "endOfLine": "lf",
10 |   "overrides": [
11 |     {
12 |       "files": "*.astro",
13 |       "options": {
14 |         "parser": "astro"
15 |       }
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/packages/database/src/index.ts:
--------------------------------------------------------------------------------
1 | export * from './schema';
2 | export { and, inArray, desc, eq, gte, isNull, sql, lte, isNotNull, not, cosineDistance, gt } from 'drizzle-orm';
3 | export * from './database';
4 | export { RssSourceConfigV1, DataSourceConfigWrapper } from './validators/dataSourceConfig';
5 | export { AnalysisPayloadBaseV1, AnalysisPayloadWrapper } from './validators/analysisPayload';
6 | 


--------------------------------------------------------------------------------
/apps/frontend/src/shared/types.ts:
--------------------------------------------------------------------------------
 1 | export interface Brief {
 2 |   slug: string;
 3 |   date: {
 4 |     month: string;
 5 |     day: number;
 6 |     year: number;
 7 |   };
 8 |   id: number;
 9 |   title: string;
10 |   content: string;
11 |   createdAt: Date;
12 |   totalArticles: number;
13 |   totalSources: number;
14 |   usedArticles: number;
15 |   usedSources: number;
16 |   model_author: string | null;
17 | }
18 | 


--------------------------------------------------------------------------------
/apps/frontend/src/public/site.webmanifest:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "Meridian",
 3 |   "short_name": "Meridian",
 4 |   "icons": [
 5 |     { "src": "/android-chrome-192x192.png", "sizes": "192x192", "type": "image/png" },
 6 |     { "src": "/android-chrome-512x512.png", "sizes": "512x512", "type": "image/png" }
 7 |   ],
 8 |   "theme_color": "#ffffff",
 9 |   "background_color": "#ffffff",
10 |   "display": "standalone"
11 | }
12 | 


--------------------------------------------------------------------------------
/packages/database/src/database.ts:
--------------------------------------------------------------------------------
 1 | import { drizzle } from 'drizzle-orm/postgres-js';
 2 | import postgres from 'postgres';
 3 | 
 4 | import * as schema from './schema';
 5 | 
 6 | export const client = (url: string, options?: postgres.Options<{}> | undefined) => postgres(url, options);
 7 | 
 8 | export const getDb = (url: string, options?: postgres.Options<{}> | undefined) =>
 9 |   drizzle(client(url, options), { schema });
10 | 


--------------------------------------------------------------------------------
/packages/database/migrations/meta/0000_snapshot.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": "558fb55b-0e9b-4a90-b83b-7add24c77c0b",
 3 |   "prevId": "00000000-0000-0000-0000-000000000000",
 4 |   "version": "7",
 5 |   "dialect": "postgresql",
 6 |   "tables": {},
 7 |   "enums": {},
 8 |   "schemas": {},
 9 |   "views": {},
10 |   "sequences": {},
11 |   "roles": {},
12 |   "policies": {},
13 |   "_meta": {
14 |     "columns": {},
15 |     "schemas": {},
16 |     "tables": {}
17 |   }
18 | }


--------------------------------------------------------------------------------
/services/meridian-ml-service/src/meridian_ml_service/schemas.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | 
 3 | 
 4 | class EmbeddingRequest(BaseModel):
 5 |     texts: list[str] = Field(..., min_length=1, description="List of texts to embed")
 6 | 
 7 | 
 8 | class EmbeddingResponse(BaseModel):
 9 |     embeddings: list[list[float]] = Field(
10 |         ..., description="List of computed embeddings"
11 |     )
12 |     model_name: str = Field(..., description="Name of the model used")
13 | 


--------------------------------------------------------------------------------
/turbo.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://turbo.build/schema.json",
 3 |   "ui": "stream",
 4 |   "tasks": {
 5 |     "build": {
 6 |       "dependsOn": ["^build"],
 7 |       "inputs": ["$TURBO_DEFAULT$", ".env*"],
 8 |       "outputs": ["dist/**"]
 9 |     },
10 |     "lint": {
11 |       "dependsOn": ["^lint"]
12 |     },
13 |     "typecheck": {
14 |       "dependsOn": ["^typecheck"]
15 |     },
16 |     "dev": {
17 |       "cache": false,
18 |       "persistent": true
19 |     }
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/apps/frontend/src/composables/useSEO.ts:
--------------------------------------------------------------------------------
 1 | export function useSEO(opts: { title: string; description: string; ogImage: string; ogUrl: string }) {
 2 |   return useSeoMeta({
 3 |     title: opts.title,
 4 |     description: opts.description,
 5 |     ogTitle: opts.title,
 6 |     ogDescription: opts.description,
 7 |     twitterTitle: opts.title,
 8 |     twitterDescription: opts.description,
 9 |     ogImage: opts.ogImage,
10 |     twitterImage: opts.ogImage,
11 |     twitterCard: 'summary_large_image',
12 |     ogLocale: 'en_US',
13 |     ogUrl: opts.ogUrl,
14 |   });
15 | }
16 | 


--------------------------------------------------------------------------------
/services/meridian-ml-service/.dockerignore:
--------------------------------------------------------------------------------
 1 | Dockerfile
 2 | .dockerignore
 3 | .git
 4 | .gitignore
 5 | __pycache__/
 6 | *.pyc
 7 | *.pyo
 8 | *.pyd
 9 | .env
10 | .venv/
11 | venv/
12 | env/
13 | *.env.*
14 | !.env.example
15 | # Additional exclusions for ML projects
16 | .pytest_cache/
17 | .mypy_cache/
18 | .ruff_cache/
19 | **/.cache/
20 | **/__pycache__/
21 | **/*.egg-info/
22 | dist/
23 | build/
24 | .coverage
25 | htmlcov/
26 | .ipynb_checkpoints/
27 | **/*.ipynb
28 | .DS_Store
29 | # Documentation
30 | docs/
31 | README.md
32 | # Tests
33 | tests/
34 | # Dev tooling
35 | .github/


--------------------------------------------------------------------------------
/packages/database/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://json.schemastore.org/tsconfig",
 3 |   "display": "Node 20",
 4 |   "_version": "20.1.0",
 5 | 
 6 |   "compilerOptions": {
 7 |     "lib": ["es2023"],
 8 |     "module": "nodenext",
 9 |     "target": "es2022",
10 | 
11 |     "strict": true,
12 |     "esModuleInterop": true,
13 |     "skipLibCheck": true,
14 |     "moduleResolution": "node16",
15 | 
16 |     "baseUrl": ".",
17 |     "paths": {
18 |       "@/*": ["./src/*"]
19 |     }
20 |   },
21 |   "include": ["**/*.ts"],
22 |   "exclude": ["dist", "build", "node_modules"]
23 | }
24 | 


--------------------------------------------------------------------------------
/apps/frontend/src/server/api/briefs/latest.get.ts:
--------------------------------------------------------------------------------
 1 | import { $reports, desc } from '@meridian/database';
 2 | import { ensureDate, generateReportSlug, getDB } from '~/server/lib/utils';
 3 | 
 4 | export default defineEventHandler(async event => {
 5 |   const latestReport = await getDB(event).query.$reports.findFirst({
 6 |     orderBy: desc($reports.createdAt),
 7 |     columns: { id: true, createdAt: true, title: true },
 8 |   });
 9 |   if (latestReport === undefined) {
10 |     throw createError({ statusCode: 404, statusMessage: 'No reports found' });
11 |   }
12 | 
13 |   return generateReportSlug(ensureDate(latestReport.createdAt));
14 | });
15 | 


--------------------------------------------------------------------------------
/apps/backend/biome.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
 3 |   "vcs": {
 4 |     "enabled": false,
 5 |     "clientKind": "git",
 6 |     "useIgnoreFile": false
 7 |   },
 8 |   "files": {
 9 |     "ignoreUnknown": false,
10 |     "ignore": ["worker-configuration.d.ts"]
11 |   },
12 |   "formatter": {
13 |     "enabled": false
14 |   },
15 |   "organizeImports": {
16 |     "enabled": true
17 |   },
18 |   "linter": {
19 |     "enabled": true,
20 |     "rules": {
21 |       "recommended": true
22 |     }
23 |   },
24 |   "javascript": {
25 |     "formatter": {
26 |       "quoteStyle": "double"
27 |     }
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/apps/backend/src/lib/tryCatchAsync.ts:
--------------------------------------------------------------------------------
 1 | import { type Result, err, ok } from 'neverthrow';
 2 | 
 3 | /**
 4 |  * Wraps an existing Promise, converting resolution to Ok and rejection/throw to Err.
 5 |  * The error type is 'unknown' because anything can be thrown.
 6 |  *
 7 |  * @param promise The promise to wrap.
 8 |  * @returns A Promise resolving to a Result<T, unknown>.
 9 |  */
10 | export async function tryCatchAsync<T>(promise: Promise<T>): Promise<Result<T, unknown>> {
11 |   try {
12 |     const value = await promise;
13 |     return ok(value);
14 |   } catch (error) {
15 |     // Catches synchronous throws during promise creation *and* promise rejections.
16 |     return err(error);
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/apps/frontend/src/server/api/briefs/index.get.ts:
--------------------------------------------------------------------------------
 1 | import { $reports, desc } from '@meridian/database';
 2 | import { ensureDate, formatReportDate, generateReportSlug, getDB } from '~/server/lib/utils';
 3 | 
 4 | export default defineEventHandler(async event => {
 5 |   const reports = await getDB(event).query.$reports.findMany({
 6 |     orderBy: desc($reports.createdAt),
 7 |     columns: { id: true, createdAt: true, title: true },
 8 |   });
 9 | 
10 |   // Process reports to add date and slug
11 |   return reports.map(report => {
12 |     const createdAt = ensureDate(report.createdAt);
13 |     return {
14 |       ...report,
15 |       date: formatReportDate(createdAt),
16 |       slug: generateReportSlug(createdAt),
17 |     };
18 |   });
19 | });
20 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "meridian",
 3 |   "private": true,
 4 |   "license": "MIT",
 5 |   "author": {
 6 |     "name": "Iliane Amadou",
 7 |     "email": "mail@iliane.xyz",
 8 |     "url": "https://iliane.xyz"
 9 |   },
10 |   "scripts": {
11 |     "build": "turbo run build",
12 |     "dev": "turbo run dev",
13 |     "lint": "turbo run lint",
14 |     "format": "prettier --write \"**/*.{ts,tsx,md,vue}\"",
15 |     "typecheck": "turbo run typecheck"
16 |   },
17 |   "devDependencies": {
18 |     "prettier": "^3.5.3",
19 |     "turbo": "^2.4.4",
20 |     "typescript": "5.8.2",
21 |     "@biomejs/biome": "^1.9.4",
22 |     "eslint": "^9.27.0"
23 |   },
24 |   "packageManager": "pnpm@10.9.0",
25 |   "engines": {
26 |     "node": ">=22"
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/packages/database/src/seed.ts:
--------------------------------------------------------------------------------
 1 | import 'dotenv/config';
 2 | 
 3 | import { $sources } from './schema';
 4 | import { getDb } from './database';
 5 | 
 6 | async function main() {
 7 |   await getDb(process.env.DATABASE_URL!)
 8 |     .insert($sources)
 9 |     .values({
10 |       id: 1,
11 |       name: 'Hacker news',
12 |       url: 'https://news.ycombinator.com/rss',
13 |       scrape_frequency: 1,
14 |       category: 'news',
15 |       paywall: false,
16 |       lastChecked: new Date(),
17 |     })
18 |     .onConflictDoNothing();
19 | }
20 | 
21 | main()
22 |   .then(() => {
23 |     console.log('✅ Seeded database');
24 |     process.exit(0);
25 |   })
26 |   .catch(err => {
27 |     console.error('Error seeding database', err);
28 |     process.exit(1);
29 |   });
30 | 


--------------------------------------------------------------------------------
/apps/frontend/src/pages/briefs/latest.vue:
--------------------------------------------------------------------------------
 1 | <script setup lang="ts">
 2 | useSEO({
 3 |   title: 'latest report | meridian',
 4 |   description:
 5 |     'a daily brief of everything important happening that i care about, with actual analysis beyond headlines',
 6 |   ogImage: `${useRuntimeConfig().public.WORKER_API}/og/default`,
 7 |   ogUrl: `https://news.iliane.xyz/latest`,
 8 | });
 9 | 
10 | // redirect to the latest report
11 | const { data: latestSlug, error } = await useFetch('/api/briefs/latest');
12 | if (error.value !== null) {
13 |   throw createError({ statusCode: 500 });
14 | }
15 | if (latestSlug.value !== null) {
16 |   await navigateTo(`/briefs/${latestSlug.value}`);
17 | }
18 | </script>
19 | 
20 | <template>
21 |   <div>
22 |     <p>Redirecting to the latest report...</p>
23 |   </div>
24 | </template>
25 | 


--------------------------------------------------------------------------------
/packages/database/src/validators/dataSourceConfig.ts:
--------------------------------------------------------------------------------
 1 | import { z } from 'zod';
 2 | 
 3 | // RSS Source Configuration Schema v1.0
 4 | export const RssSourceConfigV1 = z.object({
 5 |   url: z.string().url(),
 6 |   rss_paywall: z.boolean().optional().default(false),
 7 |   config_schema_version: z.literal('1.0'),
 8 | });
 9 | 
10 | // Base Data Source Configuration Wrapper
11 | // Discriminated union that can wrap different source configs
12 | export const DataSourceConfigWrapper = z.discriminatedUnion('source_type', [
13 |   z.object({
14 |     source_type: z.literal('RSS'),
15 |     config: RssSourceConfigV1,
16 |   }),
17 | ]);
18 | 
19 | // Type exports for TypeScript usage
20 | export type RssSourceConfigV1Type = z.infer<typeof RssSourceConfigV1>;
21 | export type DataSourceConfigWrapperType = z.infer<typeof DataSourceConfigWrapper>;
22 | 


--------------------------------------------------------------------------------
/packages/database/migrations/meta/_journal.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "7",
 3 |   "dialect": "postgresql",
 4 |   "entries": [
 5 |     {
 6 |       "idx": 0,
 7 |       "version": "7",
 8 |       "when": 1745430466343,
 9 |       "tag": "0000_blushing_boomerang",
10 |       "breakpoints": true
11 |     },
12 |     {
13 |       "idx": 1,
14 |       "version": "7",
15 |       "when": 1748143359245,
16 |       "tag": "0001_premium_wolfpack",
17 |       "breakpoints": true
18 |     },
19 |     {
20 |       "idx": 2,
21 |       "version": "7",
22 |       "when": 1748146595683,
23 |       "tag": "0002_calm_sebastian_shaw",
24 |       "breakpoints": true
25 |     },
26 |     {
27 |       "idx": 3,
28 |       "version": "7",
29 |       "when": 1748576379169,
30 |       "tag": "0003_regular_swordsman",
31 |       "breakpoints": true
32 |     }
33 |   ]
34 | }


--------------------------------------------------------------------------------
/apps/frontend/src/server/api/reports.get.ts:
--------------------------------------------------------------------------------
 1 | import { ensureDate, formatReportDate, generateReportSlug, getDB } from '~/server/lib/utils';
 2 | 
 3 | export default defineEventHandler(async event => {
 4 |   const reports = await getDB(event).query.$reports.findMany();
 5 | 
 6 |   // Process reports to add date and slug
 7 |   const processedReports = reports
 8 |     .map(report => {
 9 |       const createdAt = ensureDate(report.createdAt);
10 |       return {
11 |         ...report,
12 |         date: formatReportDate(createdAt),
13 |         slug: generateReportSlug(createdAt),
14 |       };
15 |     })
16 |     .sort((a, b) => {
17 |       const dateA = a.createdAt ? new Date(a.createdAt).getTime() : 0;
18 |       const dateB = b.createdAt ? new Date(b.createdAt).getTime() : 0;
19 |       return dateB - dateA;
20 |     });
21 | 
22 |   return processedReports;
23 | });
24 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "files.associations": {
 3 |     "wrangler.json": "jsonc"
 4 |   },
 5 |   "typescript.tsdk": "node_modules/typescript/lib",
 6 | 
 7 |   "editor.defaultFormatter": "esbenp.prettier-vscode",
 8 | 
 9 |   "eslint.useFlatConfig": true,
10 |   "eslint.validate": ["vue", "typescript", "javascript"],
11 |   "eslint.workingDirectories": ["./apps/frontend"],
12 | 
13 |   // hide git ignored files
14 |   "files.exclude": {
15 |     "**/*.turbo": true,
16 |     "**/.turbo": true,
17 |     "**/.venv": true,
18 |     "**/node_modules": true,
19 |     "**/.nuxt": true,
20 |     "**/.output": true,
21 |     "**/dist": true,
22 |     "**/.wrangler": true,
23 |     "**/.mypy_cache": true,
24 |     "**/.ruff_cache": true,
25 |     "**/*.egg-info": true,
26 |     "**/__pycache__": true
27 |   },
28 |   "[python]": {
29 |     "editor.defaultFormatter": "charliermarsh.ruff"
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/packages/database/src/validators/analysisPayload.ts:
--------------------------------------------------------------------------------
 1 | import { z } from 'zod';
 2 | import type { DataSourceConfigWrapper, RssSourceConfigV1 } from './dataSourceConfig';
 3 | 
 4 | // RSS Source Configuration Schema v1.0
 5 | export const AnalysisPayloadBaseV1 = z.object({
 6 |   schema_version: z.literal('1.0'),
 7 |   analysis_type: z.string(),
 8 |   data: z.record(z.unknown()),
 9 | });
10 | 
11 | // Base Analysis Payload Wrapper
12 | // Discriminated union that can wrap different analysis payloads
13 | export const AnalysisPayloadWrapper = z.discriminatedUnion('analysis_type', [
14 |   z.object({
15 |     analysis_type: z.literal('RSS'),
16 |     data: AnalysisPayloadBaseV1,
17 |   }),
18 | ]);
19 | 
20 | // Type exports for TypeScript usage
21 | export type RssSourceConfigV1Type = z.infer<typeof RssSourceConfigV1>;
22 | export type DataSourceConfigWrapperType = z.infer<typeof DataSourceConfigWrapper>;
23 | 


--------------------------------------------------------------------------------
/apps/backend/src/app.ts:
--------------------------------------------------------------------------------
 1 | import { Hono } from 'hono';
 2 | import { trimTrailingSlash } from 'hono/trailing-slash';
 3 | import type { Env } from './index';
 4 | import durableObjectsRouter from './routers/durableObjects.router';
 5 | import eventsRouter from './routers/events.router';
 6 | import openGraph from './routers/openGraph.router';
 7 | import reportsRouter from './routers/reports.router';
 8 | import sourcesRouter from './routers/sources.router';
 9 | 
10 | export type HonoEnv = { Bindings: Env };
11 | 
12 | const app = new Hono<HonoEnv>()
13 |   .use(trimTrailingSlash())
14 |   .get('/favicon.ico', async c => c.notFound()) // disable favicon
15 |   .route('/reports', reportsRouter)
16 |   .route('/sources', sourcesRouter)
17 |   .route('/openGraph', openGraph)
18 |   .route('/events', eventsRouter)
19 |   .route('/do', durableObjectsRouter)
20 |   .get('/ping', async c => c.json({ pong: true }));
21 | 
22 | export default app;
23 | 


--------------------------------------------------------------------------------
/apps/frontend/src/layouts/admin.vue:
--------------------------------------------------------------------------------
 1 | <script lang="ts" setup>
 2 | if (useUserSession().loggedIn.value === false) {
 3 |   await navigateTo('/admin/login');
 4 | }
 5 | 
 6 | async function logout() {
 7 |   try {
 8 |     await useUserSession().clear();
 9 |     location.reload();
10 |   } catch (error) {
11 |     console.error(error);
12 |     alert('Failed to log out');
13 |   }
14 | }
15 | </script>
16 | 
17 | <template>
18 |   <div class="min-h-screen bg-gray-50 p-4">
19 |     <div class="flex justify-between items-center mb-4 border-b pb-2">
20 |       <NuxtLink to="/admin">
21 |         <h1 class="text-lg font-medium text-gray-800">Admin Panel</h1>
22 |       </NuxtLink>
23 |       <button
24 |         class="text-sm text-gray-600 hover:cursor-pointer hover:text-gray-900 border px-2 py-0.5 rounded"
25 |         @click="logout"
26 |       >
27 |         Log out
28 |       </button>
29 |     </div>
30 |     <slot />
31 |   </div>
32 | </template>
33 | 


--------------------------------------------------------------------------------
/packages/database/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@meridian/database",
 3 |   "version": "0.0.0",
 4 |   "private": true,
 5 |   "license": "MIT",
 6 |   "author": {
 7 |     "name": "Iliane Amadou",
 8 |     "email": "mail@iliane.xyz",
 9 |     "url": "https://iliane.xyz"
10 |   },
11 |   "publishConfig": {
12 |     "access": "public"
13 |   },
14 |   "exports": {
15 |     ".": "./src/index.ts"
16 |   },
17 |   "scripts": {
18 |     "migrate": "drizzle-kit migrate",
19 |     "generate": "drizzle-kit generate",
20 |     "studio": "drizzle-kit studio",
21 |     "typecheck": "tsc --noEmit",
22 |     "seed": "tsx src/seed.ts"
23 |   },
24 |   "dependencies": {
25 |     "drizzle-orm": "^0.42.0",
26 |     "postgres": "^3.4.5",
27 |     "tsx": "^4.19.3",
28 |     "zod": "^3.22.4"
29 |   },
30 |   "devDependencies": {
31 |     "@types/node": "^22.13.14",
32 |     "dotenv": "^16.4.7",
33 |     "drizzle-kit": "^0.31.0",
34 |     "typescript": "^5.8.2"
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/apps/frontend/src/pages/briefs/index.vue:
--------------------------------------------------------------------------------
 1 | <script setup lang="ts">
 2 | useSEO({
 3 |   title: 'briefs | meridian',
 4 |   description: 'list of all briefs',
 5 |   ogImage: `${useRuntimeConfig().public.WORKER_API}/og/default`,
 6 |   ogUrl: `https://news.iliane.xyz/briefs`,
 7 | });
 8 | 
 9 | const { data: briefsList, error } = await useFetch('/api/briefs');
10 | if (error.value !== null) {
11 |   console.error('Failed to fetch briefs list');
12 |   throw createError({ statusCode: 500, statusMessage: 'Failed to fetch briefs list' });
13 | }
14 | </script>
15 | 
16 | <template>
17 |   <div class="flex flex-col gap-6">
18 |     <NuxtLink v-for="brief in briefsList" :key="brief.id" class="group" :to="`/briefs/${brief.slug}`">
19 |       <p class="text-xl font-bold group-hover:underline">{{ brief.title }}</p>
20 |       <p class="text-sm text-gray-600 mt-1">
21 |         {{ brief.date?.month.toLowerCase() }} {{ brief.date?.day }}, {{ brief.date?.year }}
22 |       </p>
23 |     </NuxtLink>
24 |   </div>
25 | </template>
26 | 


--------------------------------------------------------------------------------
/apps/frontend/src/server/api/admin/login.post.ts:
--------------------------------------------------------------------------------
 1 | import { z } from 'zod';
 2 | 
 3 | const loginSchema = z.object({ username: z.string(), password: z.string() });
 4 | 
 5 | export default eventHandler(async event => {
 6 |   const config = useRuntimeConfig(event);
 7 | 
 8 |   const bodyResult = loginSchema.safeParse(await readBody(event));
 9 |   if (bodyResult.success === false) {
10 |     throw createError({ statusCode: 400, message: 'Invalid request body' });
11 |   }
12 | 
13 |   const { username, password } = bodyResult.data;
14 |   if (username !== config.admin.username || password !== config.admin.password) {
15 |     throw createError({ statusCode: 401, message: 'Wrong password' });
16 |   }
17 | 
18 |   try {
19 |     await setUserSession(event, { user: { login: 'admin' }, loggedInAt: Date.now() });
20 |   } catch (error) {
21 |     console.error('Failed to set user session', error);
22 |     throw createError({ statusCode: 500, message: 'Failed to set user session' });
23 |   }
24 | 
25 |   return setResponseStatus(event, 201);
26 | });
27 | 


--------------------------------------------------------------------------------
/services/meridian-ml-service/fly.toml:
--------------------------------------------------------------------------------
 1 | # fly.toml app configuration file generated for meridian-ml-service on 2025-04-25T16:34:14+02:00
 2 | #
 3 | # See https://fly.io/docs/reference/configuration/ for information about how to use this file.
 4 | #
 5 | 
 6 | app = 'meridian-ml-service'
 7 | primary_region = 'cdg'
 8 | 
 9 | [build]
10 | 
11 | [http_service]
12 |   internal_port = 8080
13 |   force_https = true
14 |   auto_stop_machines = 'stop'
15 |   auto_start_machines = true
16 |   min_machines_running = 0
17 |   processes = ['app']
18 | 
19 | [[vm]]
20 |   memory = '1gb'
21 |   cpu_kind = 'shared'
22 |   cpus = 1
23 | 
24 | # Add a health check pointing to your root or ping endpoint
25 | [[services.http_checks]]
26 |    interval = "10s"
27 |    timeout = "2s"
28 |    grace_period = "5s" # Give it time to start, especially with model download
29 |    method = "GET"
30 |    path = "/ping" # or "/"
31 |    protocol = "http"
32 |    port = 8080
33 | 
34 | [[services.ports]]
35 |     handlers = ["http"]
36 |     port = 8080
37 |     force_https = true  # optional


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Iliane Amadou
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/apps/frontend/src/server/lib/utils.ts:
--------------------------------------------------------------------------------
 1 | import { getDb } from '@meridian/database';
 2 | import type { H3Event } from 'h3';
 3 | 
 4 | export const MONTH_NAMES = [
 5 |   'January',
 6 |   'February',
 7 |   'March',
 8 |   'April',
 9 |   'May',
10 |   'June',
11 |   'July',
12 |   'August',
13 |   'September',
14 |   'October',
15 |   'November',
16 |   'December',
17 | ];
18 | 
19 | export interface FormattedDate {
20 |   month: string;
21 |   day: number;
22 |   year: number;
23 | }
24 | 
25 | export function formatReportDate(date: Date): FormattedDate {
26 |   return {
27 |     month: MONTH_NAMES[date.getUTCMonth()],
28 |     day: date.getUTCDate(),
29 |     year: date.getUTCFullYear(),
30 |   };
31 | }
32 | 
33 | export function generateReportSlug(date: Date): string {
34 |   const { month, day, year } = formatReportDate(date);
35 |   return `${month.toLowerCase()}-${day}-${year}`;
36 | }
37 | 
38 | export function ensureDate(dateInput: Date | string | null | undefined): Date {
39 |   return dateInput ? new Date(dateInput) : new Date();
40 | }
41 | 
42 | export function getDB(event: H3Event) {
43 |   return getDb(useRuntimeConfig(event).database.url);
44 | }
45 | 


--------------------------------------------------------------------------------
/apps/frontend/src/plugins/markdown.ts:
--------------------------------------------------------------------------------
 1 | import MarkdownIt from 'markdown-it';
 2 | import mdColorDefault from 'markdown-it-color';
 3 | // @ts-expect-error - no types for this package
 4 | import mdTaskListsDefault from 'markdown-it-deflist';
 5 | 
 6 | import { defineNuxtPlugin } from '#app';
 7 | 
 8 | // Helper to get the actual function, handling CJS/ESM differences
 9 | // eslint-disable-next-line @typescript-eslint/no-explicit-any
10 | const unwrapDefault = (mod: any) => mod.default || mod;
11 | 
12 | const markdownItColor = unwrapDefault(mdColorDefault);
13 | const markdownItTaskLists = unwrapDefault(mdTaskListsDefault);
14 | 
15 | export default defineNuxtPlugin({
16 |   name: 'markdown-it',
17 |   setup() {
18 |     const md = new MarkdownIt({
19 |       linkify: true,
20 |       breaks: true,
21 |       typographer: true,
22 |       html: true, // Be careful with this if markdown comes from users!
23 |     })
24 |       .use(markdownItTaskLists)
25 |       .use(markdownItColor, { defaultClassName: 'text-primary' });
26 | 
27 |     return {
28 |       provide: {
29 |         md: md, // Provide the configured instance
30 |       },
31 |     };
32 |   },
33 | });
34 | 


--------------------------------------------------------------------------------
/services/meridian-ml-service/README.md:
--------------------------------------------------------------------------------
 1 | **3. Development Workflow & VS Code:**
 2 | 
 3 | - **Setup:**
 4 |   1.  Install `uv`: Follow instructions at [https://github.com/astral-sh/uv](https://github.com/astral-sh/uv)
 5 |   2.  Create a virtual environment: `uv venv` (creates `.venv`)
 6 |   3.  Activate it: `source .venv/bin/activate`
 7 |   4.  Install dependencies: `uv pip install -e .[dev]` (Installs package in editable mode + dev deps)
 8 |   5.  Copy `.env.example` to `.env` if needed for local settings.
 9 | - **Running Locally:**
10 |   `uvicorn meridian_ml_service.main:app --reload --host 0.0.0.0 --port 8080`
11 | - **Linting/Formatting:**
12 |   `uv run ruff check . --fix`
13 |   `uv run ruff format .`
14 | - **Type Checking:**
15 |   `uv run mypy src/`
16 | - **VS Code:**
17 |   1.  Install the official **Python** extension (Microsoft).
18 |   2.  Install the **Ruff** extension (Astral Software). Configure it to use `ruff format` on save if desired.
19 |   3.  Install the **Mypy Type Checker** extension (Microsoft).
20 |   4.  Ensure VS Code detects and uses the `.venv` virtual environment. Your editor should now show linting/formatting/type errors inline.
21 | 


--------------------------------------------------------------------------------
/apps/backend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@meridian/backend",
 3 |   "version": "0.0.0",
 4 |   "private": true,
 5 |   "license": "MIT",
 6 |   "author": {
 7 |     "name": "Iliane Amadou",
 8 |     "email": "mail@iliane.xyz",
 9 |     "url": "https://iliane.xyz"
10 |   },
11 |   "scripts": {
12 |     "dev": "wrangler dev",
13 |     "test": "vitest run",
14 |     "cf-typegen": "wrangler types",
15 |     "lint": "biome check .",
16 |     "lint:fix": "biome check --write .",
17 |     "typecheck": "tsc --noEmit"
18 |   },
19 |   "devDependencies": {
20 |     "@biomejs/biome": "1.9.4",
21 |     "@cloudflare/vitest-pool-workers": "^0.8.19",
22 |     "@types/node": "^22.14.1",
23 |     "typescript": "^5.8.2",
24 |     "vitest": "^3.1.2",
25 |     "wrangler": "^4.14.0"
26 |   },
27 |   "dependencies": {
28 |     "@ai-sdk/google": "^1.2.13",
29 |     "@cloudflare/puppeteer": "^1.0.2",
30 |     "@hono/zod-validator": "^0.4.3",
31 |     "@meridian/database": "workspace:*",
32 |     "@mozilla/readability": "^0.6.0",
33 |     "ai": "^4.3.9",
34 |     "fast-xml-parser": "^5.2.1",
35 |     "hono": "^4.7.7",
36 |     "linkedom": "^0.18.9",
37 |     "neverthrow": "^8.2.0",
38 |     "workers-og": "^0.0.25",
39 |     "zod": "^3.24.3"
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/services/meridian-ml-service/src/meridian_ml_service/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from functools import lru_cache
 3 | from typing import Optional
 4 | 
 5 | from dotenv import load_dotenv
 6 | from pydantic import BaseModel, Field
 7 | 
 8 | # Load environment variables from .env file
 9 | load_dotenv()
10 | 
11 | 
12 | # Using a simple class for now, can switch to pydantic-settings later if needed
13 | class Settings(BaseModel):
14 |     embedding_model_name: str = "intfloat/multilingual-e5-small"  # Default
15 |     api_token: Optional[str] = Field(
16 |         default=None, description="Optional API token for authentication"
17 |     )
18 | 
19 | 
20 | @lru_cache  # Cache the settings object
21 | def get_settings() -> Settings:
22 |     """Loads settings, prioritizing environment variables."""
23 |     model_name_from_env = os.getenv("EMBEDDING_MODEL_NAME")
24 |     api_token_from_env = os.getenv("API_TOKEN")
25 |     return Settings(
26 |         embedding_model_name=(
27 |             model_name_from_env
28 |             if model_name_from_env
29 |             else "intfloat/multilingual-e5-small"
30 |         ),
31 |         api_token=api_token_from_env,
32 |     )
33 | 
34 | 
35 | settings = get_settings()  # Load settings once on module import
36 | 


--------------------------------------------------------------------------------
/apps/frontend/src/server/api/admin/sources/[id]/init-dos.post.ts:
--------------------------------------------------------------------------------
 1 | import { getDB } from '~/server/lib/utils';
 2 | import { $data_sources, eq } from '@meridian/database';
 3 | 
 4 | export default defineEventHandler(async event => {
 5 |   await requireUserSession(event); // require auth
 6 | 
 7 |   const sourceId = Number(getRouterParam(event, 'id'));
 8 |   if (Number.isNaN(sourceId)) {
 9 |     throw createError({ statusCode: 400, statusMessage: 'Invalid source ID' });
10 |   }
11 | 
12 |   const db = getDB(event);
13 |   const config = useRuntimeConfig();
14 | 
15 |   const source = await db.query.$data_sources.findFirst({ where: eq($data_sources.id, sourceId) });
16 |   if (source === undefined) {
17 |     throw createError({ statusCode: 404, statusMessage: 'Source not found' });
18 |   }
19 | 
20 |   try {
21 |     await fetch(`${config.public.WORKER_API}/do/admin/source/${sourceId}/init`, {
22 |       method: 'POST',
23 |       headers: {
24 |         Authorization: `Bearer ${config.worker.api_token}`,
25 |       },
26 |     });
27 |   } catch (error) {
28 |     console.error('Failed to initialize DO', error);
29 |     throw createError({ statusCode: 500, statusMessage: 'Failed to initialize DO' });
30 |   }
31 | 
32 |   return { success: true };
33 | });
34 | 


--------------------------------------------------------------------------------
/apps/backend/src/lib/embeddings.ts:
--------------------------------------------------------------------------------
 1 | import { err, ok } from 'neverthrow';
 2 | import { z } from 'zod';
 3 | import type { Env } from '../index';
 4 | import { tryCatchAsync } from './tryCatchAsync';
 5 | 
 6 | const embeddingsResponseSchema = z.object({
 7 |   embeddings: z.array(z.array(z.number())),
 8 | });
 9 | 
10 | export async function createEmbeddings(env: Env, texts: string[]) {
11 |   const response = await tryCatchAsync(
12 |     fetch(`${env.MERIDIAN_ML_SERVICE_URL}/embeddings`, {
13 |       method: 'POST',
14 |       body: JSON.stringify({ texts }),
15 |       headers: {
16 |         Authorization: `Bearer ${env.MERIDIAN_ML_SERVICE_API_KEY}`,
17 |         'Content-Type': 'application/json',
18 |       },
19 |     })
20 |   );
21 |   if (response.isErr()) {
22 |     return err(response.error);
23 |   }
24 |   if (!response.value.ok) {
25 |     return err(new Error(`Failed to fetch embeddings: ${response.value.statusText}`));
26 |   }
27 | 
28 |   const jsonResult = await tryCatchAsync(response.value.json());
29 |   if (jsonResult.isErr()) {
30 |     return err(jsonResult.error);
31 |   }
32 | 
33 |   const parsedResponse = embeddingsResponseSchema.safeParse(jsonResult.value);
34 |   if (parsedResponse.success === false) {
35 |     return err(new Error(`Invalid response ${JSON.stringify(parsedResponse.error)}`));
36 |   }
37 | 
38 |   return ok(parsedResponse.data.embeddings);
39 | }
40 | 


--------------------------------------------------------------------------------
/apps/frontend/src/server/api/admin/sources/[id]/index.delete.ts:
--------------------------------------------------------------------------------
 1 | import { $data_sources, eq } from '@meridian/database';
 2 | import { getDB } from '~/server/lib/utils';
 3 | 
 4 | export default defineEventHandler(async event => {
 5 |   await requireUserSession(event); // require auth
 6 | 
 7 |   const sourceId = Number(getRouterParam(event, 'id'));
 8 |   if (Number.isNaN(sourceId)) {
 9 |     throw createError({ statusCode: 400, statusMessage: 'Invalid source ID' });
10 |   }
11 | 
12 |   const db = getDB(event);
13 |   const source = await db.query.$data_sources.findFirst({ where: eq($data_sources.id, sourceId) });
14 |   if (source === undefined) {
15 |     throw createError({ statusCode: 404, statusMessage: 'Source not found' });
16 |   }
17 | 
18 |   const config = useRuntimeConfig();
19 | 
20 |   try {
21 |     const response = await fetch(`${config.public.WORKER_API}/do/admin/source/${sourceId}`, {
22 |       method: 'DELETE',
23 |       headers: {
24 |         Authorization: `Bearer ${config.worker.api_token}`,
25 |       },
26 |     });
27 |     if (!response.ok) {
28 |       throw new Error(`Failed to delete source: ${response.statusText}`);
29 |     }
30 |   } catch (error) {
31 |     console.error('Failed to delete source:', error);
32 |     throw createError({ statusCode: 500, statusMessage: 'Failed to delete source' });
33 |   }
34 | 
35 |   return { success: true };
36 | });
37 | 


--------------------------------------------------------------------------------
/apps/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@meridian/frontend",
 3 |   "private": true,
 4 |   "type": "module",
 5 |   "version": "0.0.0",
 6 |   "license": "MIT",
 7 |   "author": {
 8 |     "name": "Iliane Amadou",
 9 |     "email": "mail@iliane.xyz",
10 |     "url": "https://iliane.xyz"
11 |   },
12 |   "scripts": {
13 |     "dev": "nuxt dev",
14 |     "build": "nuxt build",
15 |     "lint": "eslint .",
16 |     "lint:fix": "eslint . --fix",
17 |     "preview": "nuxt preview",
18 |     "postinstall": "nuxt prepare",
19 |     "typecheck": "nuxt typecheck"
20 |   },
21 |   "devDependencies": {
22 |     "@headlessui/vue": "^1.7.23",
23 |     "@heroicons/vue": "^2.2.0",
24 |     "@mailerlite/mailerlite-nodejs": "^1.4.0",
25 |     "@meridian/database": "workspace:*",
26 |     "@nuxt/eslint": "1.4.1",
27 |     "@nuxtjs/color-mode": "3.5.2",
28 |     "@radix-ui/colors": "^3.0.0",
29 |     "@tailwindcss/typography": "^0.5.16",
30 |     "@tailwindcss/vite": "^4.1.4",
31 |     "@types/markdown-it": "^14.1.2",
32 |     "@unhead/vue": "^2.0.8",
33 |     "markdown-it": "^14.1.0",
34 |     "markdown-it-color": "^2.1.1",
35 |     "markdown-it-deflist": "^3.0.0",
36 |     "nuxt": "^3.16.2",
37 |     "nuxt-auth-utils": "0.5.20",
38 |     "tailwindcss": "^4.1.4",
39 |     "vue": "^3.5.13",
40 |     "vue-router": "^4.5.0",
41 |     "vue-tsc": "^2.2.10",
42 |     "wrangler": "^4.13.0",
43 |     "zod": "^3.24.3"
44 |   },
45 |   "dependencies": {
46 |     "date-fns": "^4.1.0"
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/apps/frontend/src/pages/admin/login.vue:
--------------------------------------------------------------------------------
 1 | <script setup lang="ts">
 2 | definePageMeta({
 3 |   layout: false,
 4 |   middleware() {
 5 |     if (useUserSession().loggedIn.value === true) {
 6 |       return navigateTo('/admin');
 7 |     }
 8 |   },
 9 | });
10 | 
11 | const errorMessage = ref('');
12 | 
13 | async function login(event: Event) {
14 |   const form = event.target as HTMLFormElement;
15 |   const formData = new FormData(form);
16 |   try {
17 |     await $fetch('/api/admin/login', {
18 |       method: 'POST',
19 |       body: {
20 |         username: formData.get('username') as string,
21 |         password: formData.get('password') as string,
22 |       },
23 |     });
24 |     await navigateTo('/admin', { external: true });
25 |   } catch (error) {
26 |     console.error('Failed to login', error);
27 |     errorMessage.value = 'Invalid username or password';
28 |   }
29 | }
30 | </script>
31 | 
32 | <template>
33 |   <div class="flex flex-col items-center justify-center h-screen py-12">
34 |     <form class="flex flex-col justify-center gap-4 items-center border p-4" @submit.prevent="login">
35 |       <input type="text" name="username" placeholder="username" class="border border-black p-1" />
36 |       <input type="password" name="password" placeholder="password" class="border border-black p-1" />
37 |       <p v-if="errorMessage" class="text-red-500">{{ errorMessage }}</p>
38 |       <button type="submit" class="bg-black text-white w-full py-px">Login</button>
39 |     </form>
40 |   </div>
41 | </template>
42 | 


--------------------------------------------------------------------------------
/apps/backend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     /* Visit https://aka.ms/tsconfig.json to read more about this file */
 4 | 
 5 |     /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
 6 |     "target": "es2022",
 7 |     /* Specify a set of bundled library declaration files that describe the target runtime environment. */
 8 |     "lib": ["es2022"],
 9 |     /* Specify what JSX code is generated. */
10 |     "jsx": "react-jsx",
11 | 
12 |     /* Specify what module code is generated. */
13 |     "module": "es2022",
14 |     /* Specify how TypeScript looks up a file from a given module specifier. */
15 |     "moduleResolution": "bundler",
16 |     /* Enable importing .json files */
17 |     "resolveJsonModule": true,
18 | 
19 |     /* Allow JavaScript files to be a part of your program. Use the `checkJS` option to get errors from these files. */
20 |     "allowJs": true,
21 |     /* Enable error reporting in type-checked JavaScript files. */
22 |     "checkJs": false,
23 | 
24 |     /* Disable emitting files from a compilation. */
25 |     "noEmit": true,
26 | 
27 |     /* Ensure that each file can be safely transpiled without relying on other imports. */
28 |     "isolatedModules": true,
29 |     /* Allow 'import x from y' when a module doesn't have a default export. */
30 |     "allowSyntheticDefaultImports": true,
31 |     /* Ensure that casing is correct in imports. */
32 |     "forceConsistentCasingInFileNames": true,
33 | 
34 |     /* Enable all strict type-checking options. */
35 |     "strict": true,
36 | 
37 |     /* Skip type checking all .d.ts files. */
38 |     "skipLibCheck": true
39 |   }
40 | }
41 | 


--------------------------------------------------------------------------------
/apps/backend/src/lib/utils.ts:
--------------------------------------------------------------------------------
 1 | import { getDb as getDbFromDatabase } from '@meridian/database';
 2 | import type { Context } from 'hono';
 3 | import type { HonoEnv } from '../app';
 4 | 
 5 | export function getDb(hyperdrive: Hyperdrive) {
 6 |   return getDbFromDatabase(hyperdrive.connectionString, {
 7 |     // Workers limit the number of concurrent external connections, so be sure to limit
 8 |     // the size of the local connection pool that postgres.js may establish.
 9 |     max: 5,
10 |     // If you are not using array types in your Postgres schema,
11 |     // disabling this will save you an extra round-trip every time you connect.
12 |     fetch_types: false,
13 |   });
14 | }
15 | 
16 | export function hasValidAuthToken(c: Context<HonoEnv>) {
17 |   const auth = c.req.header('Authorization');
18 |   if (auth === undefined || auth !== `Bearer ${c.env.API_TOKEN}`) {
19 |     return false;
20 |   }
21 |   return true;
22 | }
23 | 
24 | export const userAgents = [
25 |   // ios (golden standard for publishers)
26 |   'Mozilla/5.0 (iPhone; CPU iPhone OS 17_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1', // iphone safari (best overall)
27 |   'Mozilla/5.0 (iPhone; CPU iPhone OS 17_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/123.0.6312.87 Mobile/15E148 Safari/604.1', // iphone chrome
28 | 
29 |   // android (good alternatives)
30 |   'Mozilla/5.0 (Linux; Android 14; SM-S908B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Mobile Safari/537.36', // samsung flagship
31 |   'Mozilla/5.0 (Linux; Android 14; Pixel 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Mobile Safari/537.36', // pixel
32 | ];
33 | 


--------------------------------------------------------------------------------
/apps/frontend/src/server/api/admin/sources/index.post.ts:
--------------------------------------------------------------------------------
 1 | import { getDB } from '~/server/lib/utils';
 2 | import { z } from 'zod';
 3 | import type { DataSourceConfigWrapper } from '@meridian/database';
 4 | import { $data_sources } from '@meridian/database';
 5 | 
 6 | const schema = z.object({
 7 |   url: z.string().url(),
 8 | });
 9 | 
10 | export default defineEventHandler(async event => {
11 |   await requireUserSession(event); // require auth
12 | 
13 |   const bodyResult = schema.safeParse(await readBody(event));
14 |   if (bodyResult.success === false) {
15 |     throw createError({ statusCode: 400, statusMessage: 'Invalid request body' });
16 |   }
17 | 
18 |   try {
19 |     await getDB(event)
20 |       .insert($data_sources)
21 |       .values({
22 |         name: 'Unknown',
23 |         source_type: 'RSS',
24 |         config: {
25 |           source_type: 'RSS',
26 |           config: {
27 |             config_schema_version: '1.0',
28 |             rss_paywall: false,
29 |             url: bodyResult.data.url,
30 |           },
31 |         } satisfies z.infer<typeof DataSourceConfigWrapper>,
32 |       });
33 |   } catch (error) {
34 |     console.error('Failed to add source', error);
35 |     throw createError({ statusCode: 500, statusMessage: 'Failed to add source' });
36 |   }
37 | 
38 |   const config = useRuntimeConfig();
39 | 
40 |   try {
41 |     await fetch(`${config.public.WORKER_API}/do/admin/initialize-dos`, {
42 |       method: 'POST',
43 |       headers: {
44 |         Authorization: `Bearer ${config.worker.api_token}`,
45 |       },
46 |     });
47 |   } catch (error) {
48 |     console.error('Failed to initialize DOs', error);
49 |     throw createError({ statusCode: 500, statusMessage: 'Failed to initialize DOs' });
50 |   }
51 | 
52 |   return {
53 |     success: true,
54 |   };
55 | });
56 | 


--------------------------------------------------------------------------------
/services/meridian-ml-service/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "meridian_ml_service"
 3 | version = "0.1.0"
 4 | description = "Python service for ML tasks (embeddings, clustering) for Meridian."
 5 | authors = [{ name = "Iliane Amadou", email = "mail@iliane.xyz" }]
 6 | requires-python = ">=3.11" # Stable, well-supported, performant
 7 | dependencies = [
 8 |     "fastapi>=0.115.12",  # Last version with Pydantic v2 support
 9 |     "uvicorn[standard]>=0.34.2", # Includes performance extras
10 |     "pydantic>=2.11.3",
11 |     "numpy>=2.2.0",
12 |     "torch>=2.6.0",  # CPU version will be installed via extra-index-url
13 |     "transformers>=4.51.3",
14 |     "sentence-transformers>=4.1.0", # Often simplifies embedding tasks
15 |     "python-dotenv>=1.1.0",
16 |     # Add later when needed:
17 |     # "umap-learn>=0.5.5",
18 |     # "hdbscan>=0.8.33",
19 | ]
20 | 
21 | [project.optional-dependencies]
22 | dev = [
23 |     "ruff>=0.4.4",   # Fast linter/formatter
24 |     "mypy>=1.10.0",  # Static type checker
25 | ]
26 | 
27 | # Configuration for Ruff (Linter/Formatter)
28 | [tool.ruff]
29 | line-length = 88
30 | target-version = "py311"
31 | 
32 | [tool.ruff.lint]
33 | # See https://docs.astral.sh/ruff/rules/ for rule codes
34 | select = ["E", "F", "W", "I", "N", "UP", "B", "A", "C4", "T20", "SIM", "PTH"]
35 | ignore = ["E501"] # Ignore line length rule (handled by formatter)
36 | 
37 | [tool.ruff.format]
38 | quote-style = "double"
39 | 
40 | # Configuration for Mypy (Type Checker)
41 | [tool.mypy]
42 | python_version = "3.11"
43 | warn_return_any = true
44 | warn_unused_configs = true
45 | ignore_missing_imports = true # Be pragmatic initially
46 | # Add stricter checks as needed
47 | 
48 | # Build system config (standard for setuptools/uv)
49 | [build-system]
50 | requires = ["setuptools>=61.0"]
51 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/apps/frontend/src/server/api/subscribe.post.ts:
--------------------------------------------------------------------------------
 1 | import MailerLite from '@mailerlite/mailerlite-nodejs';
 2 | import { $newsletter } from '@meridian/database';
 3 | import { z } from 'zod';
 4 | import { getDB } from '../lib/utils';
 5 | 
 6 | export default defineEventHandler(async event => {
 7 |   const config = useRuntimeConfig(event);
 8 | 
 9 |   // Parse the request body to get the email
10 |   const body = await readBody(event);
11 |   const bodyContent = z.object({ email: z.string().email() }).safeParse(body);
12 |   if (bodyContent.success === false) {
13 |     throw createError({ statusCode: 400, statusMessage: 'Invalid email format' });
14 |   }
15 | 
16 |   try {
17 |     // Insert email into the newsletter table
18 |     await Promise.all([
19 |       getDB(event).insert($newsletter).values({ email: bodyContent.data.email }).onConflictDoNothing(),
20 |       (async () => {
21 |         if (config.mailerlite.api_key === undefined || config.mailerlite.group_id === undefined) {
22 |           console.warn('MailerLite is not configured');
23 |           return; // nothing if mailerlite is not configured
24 |         }
25 |         const mailerlite = new MailerLite({ api_key: config.mailerlite.api_key });
26 |         try {
27 |           await mailerlite.subscribers.createOrUpdate({
28 |             email: bodyContent.data.email,
29 |             groups: [config.mailerlite.group_id],
30 |           });
31 |         } catch (error) {
32 |           console.error('MailerLite error:', error);
33 |           throw createError({ statusCode: 500, statusMessage: 'MailerLite error' });
34 |         }
35 |       })(),
36 |     ]);
37 | 
38 |     return { success: true, message: 'Successfully subscribed' };
39 |   } catch (error) {
40 |     console.error('Database error:', error);
41 |     throw createError({ statusCode: 500, statusMessage: 'Database error' });
42 |   }
43 | });
44 | 


--------------------------------------------------------------------------------
/apps/frontend/src/pages/index.vue:
--------------------------------------------------------------------------------
 1 | <script setup lang="ts">
 2 | import SubscriptionForm from '../components/SubscriptionForm.vue';
 3 | 
 4 | const { $md } = useNuxtApp();
 5 | const config = useRuntimeConfig();
 6 | 
 7 | const text = ref(
 8 |   `
 9 | ## what is this?
10 | 
11 | my personal intelligence agency. i built a daily brief of everything important happening that i care about, with *actual analysis* beyond headlines.
12 | 
13 | gives me the "what", "why", and "so what" behind global events in 15min.
14 | 
15 | ### how it works
16 | - scrapes hundreds of news sources
17 | - clusters related articles
18 | - separates facts from disputed claims
19 | - spots bias patterns
20 | - runs targeted web searches to find context and fill knowledge gaps
21 | - identifies what actually matters
22 | - delivers one clean & engaging brief
23 | 
24 | ### why i built this
25 | 
26 | always thought it was cool how presidents get those daily briefings - everything they need to know, perfectly prepared by a team of analysts. figured *why can't i have that too*?
27 | 
28 | now with ai, i can. what would've required an *entire* intelligence agency now costs about *a dollar a day* in compute.
29 | 
30 | this brief is tuned to what i care about (geopolitics, french news, tech, actual good news).
31 | 
32 | just what's happening and why it matters. no middleman deciding what reaches me.
33 | `.trim()
34 | );
35 | 
36 | useSEO({
37 |   title: 'home | meridian',
38 |   description:
39 |     'a daily brief of everything important happening that i care about, with actual analysis beyond headlines',
40 |   ogImage: `${config.public.WORKER_API}/openGraph/default`,
41 |   ogUrl: `https://news.iliane.xyz/`,
42 | });
43 | </script>
44 | 
45 | <template>
46 |   <div class="flex flex-col gap-6">
47 |     <div class="prose" v-html="$md.render(text)" />
48 | 
49 |     <!-- Subscription area -->
50 |     <div class="mt-4 pt-8 pb-6 border-t border-gray-300">
51 |       <SubscriptionForm />
52 |     </div>
53 |   </div>
54 | </template>
55 | 


--------------------------------------------------------------------------------
/apps/frontend/src/composables/useReadingProgess.ts:
--------------------------------------------------------------------------------
 1 | function throttle<T extends (...args: unknown[]) => unknown>(func: T, wait: number) {
 2 |   let timeout: ReturnType<typeof setTimeout> | null = null;
 3 |   let lastArgs: Parameters<T> | null = null;
 4 | 
 5 |   const throttled = (...args: Parameters<T>) => {
 6 |     lastArgs = args;
 7 | 
 8 |     if (!timeout) {
 9 |       func(...args);
10 |       timeout = setTimeout(() => {
11 |         if (lastArgs) func(...lastArgs);
12 |         timeout = null;
13 |         lastArgs = null;
14 |       }, wait);
15 |     }
16 |   };
17 | 
18 |   throttled.cancel = () => {
19 |     if (timeout) {
20 |       clearTimeout(timeout);
21 |       timeout = null;
22 |       lastArgs = null;
23 |     }
24 |   };
25 | 
26 |   return throttled;
27 | }
28 | 
29 | export function useReadingProgress() {
30 |   const readingProgress = ref(0);
31 |   const showBackToTop = ref(false);
32 |   let scrollListener: () => void;
33 | 
34 |   const calculateProgress = () => {
35 |     const scrollTop = document.documentElement.scrollTop;
36 |     const scrollHeight = document.documentElement.scrollHeight - document.documentElement.clientHeight;
37 |     readingProgress.value = scrollHeight > 0 ? (scrollTop / scrollHeight) * 100 : 0;
38 |     showBackToTop.value = scrollTop > 500; // Show back to top button after scrolling down 500px
39 |   };
40 | 
41 |   const throttledCalculateProgress = throttle(calculateProgress, 25);
42 | 
43 |   const scrollToTop = () => {
44 |     window.scrollTo({ top: 0, behavior: 'smooth' });
45 |   };
46 | 
47 |   onMounted(() => {
48 |     scrollListener = throttledCalculateProgress;
49 |     window.addEventListener('scroll', scrollListener);
50 |     calculateProgress(); // Initial calculation
51 |   });
52 | 
53 |   onUnmounted(() => {
54 |     window.removeEventListener('scroll', scrollListener);
55 |     throttledCalculateProgress.cancel();
56 |   });
57 | 
58 |   return {
59 |     readingProgress,
60 |     showBackToTop,
61 |     scrollToTop,
62 |   };
63 | }
64 | 


--------------------------------------------------------------------------------
/services/meridian-ml-service/src/meridian_ml_service/main.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from fastapi import Depends, FastAPI, HTTPException
 3 | 
 4 | from .config import settings
 5 | from .dependencies import (
 6 |     ModelDep,
 7 |     verify_token,
 8 |     get_embedding_model,
 9 | )  # Import auth dependency
10 | from .embeddings import compute_embeddings
11 | from .schemas import EmbeddingRequest, EmbeddingResponse
12 | 
13 | app = FastAPI(
14 |     title="Meridian ML Service",
15 |     description="Handles ML tasks like embeddings and clustering.",
16 |     version="0.1.0",
17 | )
18 | 
19 | 
20 | # Simple root endpoint for health check
21 | @app.get("/")
22 | async def read_root():
23 |     return {"status": "ok", "service": "Meridian ML Service"}
24 | 
25 | 
26 | @app.get("/ping")
27 | async def ping():
28 |     return {"pong": True}
29 | 
30 | 
31 | @app.post("/embeddings", response_model=EmbeddingResponse)
32 | async def api_compute_embeddings(
33 |     request: EmbeddingRequest,
34 |     model_components: ModelDep,  # ModelDep already includes Depends
35 |     _: None = Depends(verify_token),
36 | ):
37 |     """
38 |     Computes embeddings for the provided list of texts.
39 |     """
40 |     print(f"Received request to embed {len(request.texts)} texts.")
41 |     try:
42 |         embeddings_np: np.ndarray = compute_embeddings(
43 |             texts=request.texts,
44 |             model_components=model_components,
45 |         )
46 | 
47 |         embeddings_list: list[list[float]] = embeddings_np.tolist()
48 | 
49 |         return EmbeddingResponse(
50 |             embeddings=embeddings_list, model_name=settings.embedding_model_name
51 |         )
52 |     except Exception as e:
53 |         print(f"ERROR during embedding computation: {e}")
54 |         # Consider more specific error handling based on exception types
55 |         raise HTTPException(
56 |             status_code=500,
57 |             detail=f"Internal server error during embedding computation: {str(e)}",
58 |         ) from e
59 | 


--------------------------------------------------------------------------------
/apps/frontend/src/server/api/briefs/[slug]/index.get.ts:
--------------------------------------------------------------------------------
 1 | import { $reports, and, gte, lte } from '@meridian/database';
 2 | import { ensureDate, formatReportDate, getDB } from '~/server/lib/utils';
 3 | 
 4 | interface Brief {
 5 |   id: number;
 6 |   createdAt: Date;
 7 |   title: string;
 8 |   content: string;
 9 |   model_author: string | null;
10 |   totalArticles: number;
11 |   totalSources: number;
12 |   usedSources: number;
13 |   usedArticles: number;
14 |   slug: string;
15 |   date: {
16 |     month: string;
17 |     day: number;
18 |     year: number;
19 |   };
20 | }
21 | 
22 | export default defineEventHandler(async event => {
23 |   const slug = getRouterParam(event, 'slug');
24 |   if (slug === undefined) {
25 |     throw createError({ statusCode: 400, statusMessage: 'Slug is required' });
26 |   }
27 | 
28 |   // decode slug & get date
29 |   const date = new Date(slug);
30 |   if (Number.isNaN(date.getTime())) {
31 |     throw createError({ statusCode: 400, statusMessage: 'Invalid slug' });
32 |   }
33 | 
34 |   // set start/end of the day for date range query
35 |   const startOfDay = new Date(date.getFullYear(), date.getMonth(), date.getDate());
36 |   const endOfDay = new Date(date.getFullYear(), date.getMonth(), date.getDate() + 1);
37 | 
38 |   // get report created on this day
39 |   const report = await getDB(event).query.$reports.findFirst({
40 |     where: and(gte($reports.createdAt, startOfDay), lte($reports.createdAt, endOfDay)),
41 |     columns: {
42 |       id: true,
43 |       createdAt: true,
44 |       title: true,
45 |       content: true,
46 |       model_author: true,
47 |       totalArticles: true,
48 |       totalSources: true,
49 |       usedSources: true,
50 |       usedArticles: true,
51 |     },
52 |   });
53 |   if (report === undefined) {
54 |     throw createError({ statusCode: 404, statusMessage: 'Report not found' });
55 |   }
56 | 
57 |   return {
58 |     ...report,
59 |     slug,
60 |     date: formatReportDate(ensureDate(report.createdAt)),
61 |   } satisfies Brief;
62 | });
63 | 


--------------------------------------------------------------------------------
/apps/briefs/src/events.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import os
 3 | from datetime import date
 4 | from pydantic import BaseModel, field_validator
 5 | from typing import Optional
 6 | import pandas as pd
 7 | from datetime import datetime
 8 | from typing import Optional
 9 | from pydantic import BaseModel, field_validator
10 | from dotenv import load_dotenv
11 | 
12 | load_dotenv()
13 | 
14 | 
15 | class Source(BaseModel):
16 |     id: int
17 |     name: str
18 | 
19 | 
20 | class Event(BaseModel):
21 |     id: int
22 |     sourceId: int
23 |     url: str
24 |     title: str
25 |     publishDate: datetime  # changed from date to datetime
26 |     contentFileKey: str
27 |     primary_location: str
28 |     completeness: str
29 |     content_quality: str
30 |     event_summary_points: list[str]
31 |     thematic_keywords: list[str]
32 |     topic_tags: list[str]
33 |     key_entities: list[str]
34 |     content_focus: list[str]
35 |     embedding: list[float]
36 |     createdAt: datetime
37 | 
38 |     @field_validator("publishDate", mode="before")
39 |     @classmethod
40 |     def parse_date(cls, value):
41 |         if value is None:
42 |             return None
43 | 
44 |         # Handle ISO format with timezone info
45 |         try:
46 |             return datetime.fromisoformat(value)
47 |         except ValueError:
48 |             # For older Python versions or non-standard formats
49 |             # you might need dateutil
50 |             from dateutil import parser
51 | 
52 |             return parser.parse(value)
53 | 
54 | 
55 | def get_events(date: str = None):
56 |     url = f"http://localhost:8787/events"
57 | 
58 |     if date:
59 |         url += f"?date={date}"
60 | 
61 |     response = requests.get(
62 |         url,
63 |         headers={"Authorization": f"Bearer {os.environ.get('MERIDIAN_SECRET_KEY')}"},
64 |     )
65 |     data = response.json()
66 | 
67 |     sources = [Source(**source) for source in data["sources"]]
68 |     events = [Event(**event) for event in data["events"]]
69 | 
70 |     return sources, events
71 | 


--------------------------------------------------------------------------------
/apps/backend/src/prompts/articleRepresentation.prompt.ts:
--------------------------------------------------------------------------------
 1 | export function getArticleRepresentationPrompt(title: string, url: string, text: string) {
 2 |   return `
 3 | Transform article into standardized format. No repeated info across fields.
 4 | 
 5 | Fields:
 6 | Topic: technology/politics/business/health/agriculture/sports/international
 7 | Subtopic: specific area (ai-research, elections, trade-policy)  
 8 | Geography: global/us/china/europe/[city]/[region] 
 9 | Scope: policy/technical/market/social-impact/breaking-news/analysis
10 | Urgency: breaking/developing/routine/historical
11 | Source: mainstream/trade/academic/government/blog
12 | Entities: [max 5 key people/orgs/products/places]
13 | Tags: [max 5 additional specifics not covered above]
14 | 
15 | Examples:
16 | 
17 | INPUT: """
18 | Nvidia CEO Jensen Huang Warns Companies to Adopt AI Now
19 | 
20 | Nvidia CEO delivered stark warning to business leaders yesterday, stating companies must integrate AI immediately or face obsolescence. Speaking to Fortune 500 executives, emphasized current AI revolution represents 'once-in-a-lifetime transformation'. Stock surged 180% this year as AI chip demand accelerates.
21 | """
22 | 
23 | OUTPUT:
24 | Topic: technology
25 | Subtopic: business-strategy
26 | Geography: us
27 | Scope: market
28 | Urgency: routine
29 | Source: mainstream
30 | Entities: [Jensen Huang, Nvidia, Fortune 500]
31 | Tags: [stock-surge, 180-percent, chip-demand]
32 | 
33 | INPUT: """
34 | Breaking: Emergency Wheat Export Ban by Inner Mongolia Agricultural Ministry
35 | 
36 | Ministry announced immediate wheat export suspension today, citing food security concerns amid drought. Affects 2.3 million tons scheduled for neighboring provinces. Farmers concerned about revenue losses, traders predict price volatility.
37 | """
38 | 
39 | OUTPUT:
40 | Topic: agriculture
41 | Subtopic: trade-policy
42 | Geography: inner-mongolia
43 | Scope: breaking-news
44 | Urgency: breaking
45 | Source: mainstream
46 | Entities: [Inner Mongolia Agricultural Ministry]
47 | Tags: [export-ban, drought, 2.3-million-tons, price-volatility]
48 | 
49 | INPUT: """"
50 | # (${title})[${url}]
51 | 
52 | ${text.slice(0, 1500)}...
53 | """
54 | 
55 | OUTPUT:
56 | `.trim();
57 | }
58 | 


--------------------------------------------------------------------------------
/apps/frontend/nuxt.config.ts:
--------------------------------------------------------------------------------
 1 | import tailwindcss from '@tailwindcss/vite';
 2 | 
 3 | // https://nuxt.com/docs/api/configuration/nuxt-config
 4 | export default defineNuxtConfig({
 5 |   app: {
 6 |     head: {
 7 |       htmlAttrs: { lang: 'en' },
 8 |       link: [{ rel: 'icon', type: 'image/png', href: '/favicon.ico' }],
 9 |     },
10 |   },
11 | 
12 |   colorMode: { classSuffix: '', preference: 'system', fallback: 'system' },
13 |   compatibilityDate: '2025-03-01',
14 |   css: ['~/assets/css/main.css'],
15 | 
16 |   devtools: { enabled: true },
17 |   devServer: { host: '0.0.0.0' },
18 | 
19 |   modules: ['@nuxtjs/color-mode', 'nuxt-auth-utils', '@nuxt/eslint'],
20 | 
21 |   nitro: { prerender: { autoSubfolderIndex: false }, cloudflare: { nodeCompat: true, deployConfig: true } },
22 | 
23 |   routeRules: {
24 |     // Cache the list of briefs for 1 hour on CDN, 15 mins in browser
25 |     // Allow serving stale data for up to a day while revalidating
26 |     '/api/briefs': {
27 |       cache: {
28 |         maxAge: 60 * 15, // 15 minutes browser cache
29 |         staleMaxAge: 60 * 60 * 24, // 1 day stale-while-revalidate on CDN
30 |       },
31 |     },
32 |     // Cache individual briefs for longer (assuming they don't change once published)
33 |     // Cache for 1 day on CDN, 1 hour in browser
34 |     '/api/briefs/**': {
35 |       // Matches /api/briefs/some-slug, /api/briefs/another-slug etc.
36 |       cache: {
37 |         maxAge: 60 * 60, // 1 hour browser cache
38 |         staleMaxAge: 60 * 60 * 24 * 7, // 1 week stale-while-revalidate on CDN
39 |       },
40 |     },
41 |   },
42 | 
43 |   // In production, these are set via the environment variables
44 |   // NUXT_+{key}
45 |   runtimeConfig: {
46 |     database: { url: '' }, // NUXT_DATABASE_URL
47 |     mailerlite: { api_key: '', group_id: '' }, // NUXT_MAILERLITE_API_KEY, NUXT_MAILERLITE_GROUP_ID
48 |     admin: { username: 'admin', password: 'hunter2' }, // NUXT_ADMIN_USERNAME, NUXT_ADMIN_PASSWORD
49 |     worker: { api_token: 'hunter2' }, // NUXT_WORKER_API_TOKEN
50 | 
51 |     // IMPORTANT: all "public" config is exposed to the client
52 |     public: { WORKER_API: 'http://localhost:8787' }, // NUXT_PUBLIC_WORKER_API
53 |   },
54 | 
55 |   srcDir: 'src',
56 | 
57 |   vite: { plugins: [tailwindcss()] },
58 | });
59 | 


--------------------------------------------------------------------------------
/apps/backend/src/lib/logger.ts:
--------------------------------------------------------------------------------
 1 | // Define the basic structure for your logs
 2 | interface LogEntry {
 3 |   level: 'debug' | 'info' | 'warn' | 'error';
 4 |   message: string;
 5 |   timestamp: string;
 6 |   context?: Record<string, unknown>;
 7 |   error?: {
 8 |     message: string;
 9 |     stack?: string;
10 |     cause?: unknown;
11 |   };
12 | }
13 | 
14 | // Basic logger class
15 | export class Logger {
16 |   private baseContext: Record<string, unknown>;
17 | 
18 |   constructor(baseContext: Record<string, unknown> = {}) {
19 |     // Clone the context to prevent mutation issues if the source object changes
20 |     this.baseContext = { ...baseContext };
21 |   }
22 | 
23 |   // Method to create a "child" logger with additional context
24 |   child(additionalContext: Record<string, unknown>): Logger {
25 |     return new Logger({ ...this.baseContext, ...additionalContext });
26 |   }
27 | 
28 |   // Central logging function
29 |   private log(level: LogEntry['level'], message: string, context?: Record<string, unknown>, error?: Error) {
30 |     const entry: LogEntry = {
31 |       level,
32 |       message,
33 |       timestamp: new Date().toISOString(),
34 |       // Merge base context, method-specific context
35 |       context: { ...this.baseContext, ...context },
36 |     };
37 | 
38 |     if (error) {
39 |       entry.error = {
40 |         message: error.message,
41 |         stack: error.stack,
42 |         // Include cause if available
43 |         ...(error.cause ? { cause: error.cause } : {}),
44 |       };
45 |     }
46 | 
47 |     // The core idea: output structured JSON via console.log
48 |     // Logpush / Tail Workers will pick this up.
49 |     console.log(JSON.stringify(entry));
50 |   }
51 | 
52 |   // Convenience methods for different levels
53 |   debug(message: string, context?: Record<string, unknown>) {
54 |     this.log('debug', message, context);
55 |   }
56 | 
57 |   info(message: string, context?: Record<string, unknown>) {
58 |     this.log('info', message, context);
59 |   }
60 | 
61 |   warn(message: string, context?: Record<string, unknown>, error?: Error) {
62 |     this.log('warn', message, context, error);
63 |   }
64 | 
65 |   error(message: string, context?: Record<string, unknown>, error?: Error) {
66 |     this.log('error', message, context, error);
67 |   }
68 | }
69 | 


--------------------------------------------------------------------------------
/services/meridian-ml-service/src/meridian_ml_service/dependencies.py:
--------------------------------------------------------------------------------
 1 | from typing import Annotated, Union
 2 | import asyncio
 3 | from functools import lru_cache
 4 | 
 5 | from fastapi import Depends, HTTPException, Security
 6 | from fastapi.security import APIKeyHeader
 7 | from starlette.status import HTTP_403_FORBIDDEN
 8 | 
 9 | from .config import settings
10 | from .embeddings import ModelComponents, load_embedding_model
11 | 
12 | # Global lock for model loading
13 | _model_lock = asyncio.Lock()
14 | _model_instance: Union[ModelComponents, None] = None
15 | 
16 | 
17 | async def get_embedding_model() -> ModelComponents:
18 |     """FastAPI dependency to get the loaded embedding model components in a thread-safe way."""
19 |     global _model_instance
20 | 
21 |     if _model_instance is not None:
22 |         return _model_instance
23 | 
24 |     async with _model_lock:
25 |         # double-check pattern to avoid race conditions
26 |         if _model_instance is not None:
27 |             return _model_instance
28 | 
29 |         try:
30 |             _model_instance = load_embedding_model()
31 |             return _model_instance
32 |         except Exception as e:
33 |             # Consider how to handle model loading failure more gracefully in API
34 |             # Maybe return HTTP 503 Service Unavailable?
35 |             print(f"FATAL: Could not provide embedding model: {e}")
36 |             raise  # Let FastAPI handle internal server error for now
37 | 
38 | 
39 | ModelDep = Annotated[ModelComponents, Depends(get_embedding_model)]
40 | 
41 | api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
42 | 
43 | 
44 | async def verify_token(api_key: Union[str, None] = Security(api_key_header)) -> None:
45 |     if settings.api_token is None:
46 |         return  # auth is disabled if no token is configured
47 | 
48 |     if api_key is None:
49 |         raise HTTPException(
50 |             status_code=HTTP_403_FORBIDDEN, detail="Invalid or missing API token"
51 |         )
52 | 
53 |     # Extract token from Bearer format
54 |     token = api_key
55 |     if api_key.startswith("Bearer "):
56 |         token = api_key[7:]  # Remove "Bearer " prefix
57 | 
58 |     if token != settings.api_token:
59 |         raise HTTPException(
60 |             status_code=HTTP_403_FORBIDDEN, detail="Invalid or missing API token"
61 |         )
62 | 


--------------------------------------------------------------------------------
/apps/frontend/src/composables/useStickyElement.ts:
--------------------------------------------------------------------------------
 1 | export function useStickyElement(
 2 |   targetRef: Ref<HTMLElement | null>,
 3 |   options: IntersectionObserverInit = { threshold: 0 }
 4 | ) {
 5 |   const isSticky = ref(false);
 6 |   let observer: IntersectionObserver | null = null;
 7 | 
 8 |   const setupObserver = () => {
 9 |     if (observer) observer.disconnect(); // Clean up previous observer
10 | 
11 |     if (targetRef.value) {
12 |       // Use a placeholder element *before* the target to detect when the target *would* leave the screen top
13 |       // Or observe the target itself and check entry.boundingClientRect.top <= options.rootMargin top value (if set)
14 |       // Simpler approach: Observe the target and set sticky when it's *not* intersecting the *top* of the viewport.
15 |       // Let's observe a sentinel element placed *above* the header for simplicity if possible,
16 |       // otherwise observe the header itself and use rootMargin.
17 | 
18 |       // Assuming we observe the element *itself* and want it sticky when it scrolls *off* the top.
19 |       // We need a negative top margin equal to the element's height or just 1px if we only care when it *starts* scrolling off.
20 |       // Let's stick to the original logic: observe the element, become sticky when *not* intersecting.
21 |       // This requires the element to *start* within the viewport.
22 | 
23 |       observer = new IntersectionObserver(
24 |         ([entry]) => {
25 |           // Becomes sticky when the *observed element* is no longer intersecting the viewport (at the top)
26 |           // This interpretation might depend on where the observed element is relative to the sticky element itself.
27 |           // If targetRef *is* the element becoming sticky, this works.
28 |           isSticky.value = !entry.isIntersecting;
29 |         },
30 |         options // Use provided options (e.g., { threshold: 0 })
31 |       );
32 |       observer.observe(targetRef.value);
33 |     }
34 |   };
35 | 
36 |   onMounted(() => {
37 |     // Need to wait for the element to be mounted and potentially rendered
38 |     nextTick(setupObserver);
39 |   });
40 | 
41 |   onUnmounted(() => {
42 |     if (observer) {
43 |       observer.disconnect();
44 |     }
45 |   });
46 | 
47 |   // Re-setup if the target element changes (e.g., v-if)
48 |   watch(targetRef, () => {
49 |     nextTick(setupObserver);
50 |   });
51 | 
52 |   return {
53 |     isSticky,
54 |   };
55 | }
56 | 


--------------------------------------------------------------------------------
/apps/backend/src/routers/reports.router.ts:
--------------------------------------------------------------------------------
 1 | import { zValidator } from '@hono/zod-validator';
 2 | import { $reports, desc } from '@meridian/database';
 3 | import { Hono } from 'hono';
 4 | import { z } from 'zod';
 5 | import type { HonoEnv } from '../app';
 6 | import { tryCatchAsync } from '../lib/tryCatchAsync';
 7 | import { getDb, hasValidAuthToken } from '../lib/utils';
 8 | 
 9 | const route = new Hono<HonoEnv>()
10 |   .get('/last-report', async c => {
11 |     // check auth token
12 |     const hasValidToken = hasValidAuthToken(c);
13 |     if (!hasValidToken) {
14 |       return c.json({ error: 'Unauthorized' }, 401);
15 |     }
16 | 
17 |     const reportResult = await tryCatchAsync(
18 |       getDb(c.env.HYPERDRIVE).query.$reports.findFirst({
19 |         orderBy: desc($reports.createdAt),
20 |       })
21 |     );
22 |     if (reportResult.isErr()) {
23 |       return c.json({ error: 'Failed to fetch last report' }, 500);
24 |     }
25 | 
26 |     const report = reportResult.value;
27 |     if (report === undefined) {
28 |       return c.json({ error: 'No report found' }, 404);
29 |     }
30 | 
31 |     return c.json(report);
32 |   })
33 |   .post(
34 |     '/report',
35 |     zValidator(
36 |       'json',
37 |       z.object({
38 |         title: z.string(),
39 |         content: z.string(),
40 |         totalArticles: z.number(),
41 |         totalSources: z.number(),
42 |         usedArticles: z.number(),
43 |         usedSources: z.number(),
44 |         tldr: z.string(),
45 |         createdAt: z.coerce.date(),
46 |         model_author: z.string(),
47 |         clustering_params: z.object({
48 |           umap: z.object({
49 |             n_neighbors: z.number(),
50 |           }),
51 |           hdbscan: z.object({
52 |             min_cluster_size: z.number(),
53 |             min_samples: z.number(),
54 |             epsilon: z.number(),
55 |           }),
56 |         }),
57 |       })
58 |     ),
59 |     async c => {
60 |       if (!hasValidAuthToken(c)) {
61 |         return c.json({ error: 'Unauthorized' }, 401);
62 |       }
63 | 
64 |       const db = getDb(c.env.HYPERDRIVE);
65 |       const body = c.req.valid('json');
66 | 
67 |       const reportResult = await tryCatchAsync(db.insert($reports).values(body));
68 |       if (reportResult.isErr()) {
69 |         return c.json({ error: 'Failed to insert report' }, 500);
70 |       }
71 | 
72 |       return c.json({ success: true });
73 |     }
74 |   );
75 | 
76 | export default route;
77 | 


--------------------------------------------------------------------------------
/apps/frontend/src/layouts/default.vue:
--------------------------------------------------------------------------------
 1 | <script lang="ts" setup>
 2 | import { SunIcon, MoonIcon } from '@heroicons/vue/20/solid';
 3 | </script>
 4 | 
 5 | <template>
 6 |   <div>
 7 |     <div class="max-w-3xl mx-auto px-6 w-full flex-1">
 8 |       <nav class="py-4">
 9 |         <ul class="flex space-x-4 items-center font-medium">
10 |           <div class="flex w-full flex-row gap-4">
11 |             <li>
12 |               <NuxtLink class="hover:underline" active-class="underline" to="/"> home </NuxtLink>
13 |             </li>
14 |             <li>
15 |               <NuxtLink class="hover:underline" active-class="underline" to="/briefs"> briefs </NuxtLink>
16 |             </li>
17 |             <li v-if="useUserSession().loggedIn.value">
18 |               <NuxtLink class="hover:underline" active-class="underline" to="/admin"> admin </NuxtLink>
19 |             </li>
20 |           </div>
21 |           <ClientOnly>
22 |             <button
23 |               v-if="$colorMode.value === 'dark'"
24 |               class="hover:cursor-pointer"
25 |               aria-label="Switch to light mode"
26 |               @click="$colorMode.preference = 'light'"
27 |             >
28 |               <SunIcon class="w-5 h-5 text-zinc-700 dark:text-zinc-300 hover:text-zinc-900 dark:hover:text-zinc-600" />
29 |             </button>
30 |             <button
31 |               v-if="$colorMode.value === 'light'"
32 |               class="hover:cursor-pointer"
33 |               aria-label="Switch to dark mode"
34 |               @click="$colorMode.preference = 'dark'"
35 |             >
36 |               <MoonIcon class="w-5 h-5 text-zinc-700 dark:text-zinc-300 hover:text-zinc-900 dark:hover:text-zinc-600" />
37 |             </button>
38 |           </ClientOnly>
39 |         </ul>
40 |       </nav>
41 |       <div class="h-px w-full bg-gray-300 mb-4" />
42 |       <NuxtPage />
43 |     </div>
44 |     <footer>
45 |       <div class="max-w-3xl mx-auto px-6 py-4">
46 |         <div class="h-px w-full bg-gray-300 mb-4" />
47 |         <p class="text-sm text-center">
48 |           built by
49 |           <strong class="underline"><NuxtLink :to="`https://iliane.xyz`">iliane</NuxtLink></strong>
50 |           ·
51 |           <span
52 |             >open source on
53 |             <strong class="underline"
54 |               ><NuxtLink to="https://github.com/iliane5/meridian" target="_blank" rel="noopener noreferrer"
55 |                 >github</NuxtLink
56 |               ></strong
57 |             ></span
58 |           >
59 |         </p>
60 |       </div>
61 |     </footer>
62 |   </div>
63 | </template>
64 | 


--------------------------------------------------------------------------------
/apps/backend/src/routers/events.router.ts:
--------------------------------------------------------------------------------
 1 | import { $data_sources, $ingested_items, and, eq, gte, isNotNull, lte, not } from '@meridian/database';
 2 | import { Hono } from 'hono';
 3 | import type { HonoEnv } from '../app';
 4 | import { getDb, hasValidAuthToken } from '../lib/utils';
 5 | 
 6 | const route = new Hono<HonoEnv>().get('/', async c => {
 7 |   // require bearer auth token
 8 |   const hasValidToken = hasValidAuthToken(c);
 9 |   if (!hasValidToken) {
10 |     return c.json({ error: 'Unauthorized' }, 401);
11 |   }
12 | 
13 |   // Check if a date query parameter was provided in yyyy-mm-dd format
14 |   const dateParam = c.req.query('date');
15 | 
16 |   let endDate: Date;
17 |   if (dateParam) {
18 |     // Parse the date parameter explicitly with UTC
19 |     // Append T07:00:00Z to ensure it's 7am UTC
20 |     endDate = new Date(`${dateParam}T07:00:00Z`);
21 |     // Check if date is valid
22 |     if (Number.isNaN(endDate.getTime())) {
23 |       return c.json({ error: 'Invalid date format. Please use yyyy-mm-dd' }, 400);
24 |     }
25 |   } else {
26 |     // Use current date if no date parameter was provided
27 |     endDate = new Date();
28 |     // Set to 7am UTC today
29 |     endDate.setUTCHours(7, 0, 0, 0);
30 |   }
31 | 
32 |   // Create a 30-hour window ending at 7am UTC on the specified date
33 |   const startDate = new Date(endDate.getTime() - 30 * 60 * 60 * 1000);
34 | 
35 |   const db = getDb(c.env.HYPERDRIVE);
36 |   const [allSources, events] = await Promise.all([
37 |     db.select({ id: $data_sources.id, name: $data_sources.name }).from($data_sources),
38 |     db
39 |       .select({
40 |         id: $ingested_items.id,
41 |         sourceId: $ingested_items.data_source_id,
42 |         url: $ingested_items.url_to_original,
43 |         title: $ingested_items.display_title,
44 |         publishDate: $ingested_items.published_at,
45 |         contentFileKey: $ingested_items.raw_data_r2_key,
46 |         embedding: $ingested_items.embedding,
47 |         createdAt: $ingested_items.ingested_at,
48 |       })
49 |       .from($ingested_items)
50 |       .where(
51 |         and(
52 |           isNotNull($ingested_items.embedding),
53 |           gte($ingested_items.published_at, startDate),
54 |           lte($ingested_items.published_at, endDate),
55 |           isNotNull($ingested_items.processed_at)
56 |         )
57 |       ),
58 |   ]);
59 | 
60 |   return c.json({
61 |     sources: allSources,
62 |     events,
63 |     dateRange: {
64 |       startDate: startDate.toISOString(),
65 |       endDate: endDate.toISOString(),
66 |     },
67 |   });
68 | });
69 | 
70 | export default route;
71 | 


--------------------------------------------------------------------------------
/apps/frontend/README.md:
--------------------------------------------------------------------------------
 1 | # Meridian Frontend
 2 | 
 3 | This is the Nuxt 3 frontend application for the [Meridian project](https://github.com/iliane5/meridian) (your personal AI intelligence agency). It provides the web interface for viewing generated intelligence briefs and managing sources (admin).
 4 | 
 5 | Built with:
 6 | 
 7 | - [Nuxt 3](https://nuxt.com/) (Vue 3)
 8 | - [Tailwind CSS](https://tailwindcss.com/) (with Radix UI colors)
 9 | - [TypeScript](https://www.typescriptlang.org/)
10 | 
11 | ## Key Features
12 | 
13 | - Displays daily intelligence briefs with rich formatting (`/briefs/[slug]`).
14 | - Interactive Table of Contents for easy navigation within briefs.
15 | - Subscription form for updates (`/`).
16 | - Consumes the Meridian API (via Nitro server routes in `/server/api` and potentially external workers).
17 | 
18 | ## Setup
19 | 
20 | Make sure you have [Node.js](https://nodejs.org/) (v22+ recommended) and [pnpm](https://pnpm.io/) installed.
21 | 
22 | From the _root_ of the Meridian monorepo:
23 | 
24 | ```bash
25 | # Install all workspace dependencies
26 | pnpm install
27 | ```
28 | 
29 | Or, if you're only working within this app (less common in a monorepo):
30 | 
31 | ```bash
32 | cd apps/frontend
33 | pnpm install
34 | ```
35 | 
36 | You'll also need to ensure the necessary environment variables are configured (likely in a `.env` file in the root or this directory, depending on your setup) – particularly for the database connection (`DATABASE_URL`) and any external API endpoints (`WORKER_API`). See the [main project README](https://github.com/iliane5/meridian#setup) for full setup details.
37 | 
38 | ## Development Server
39 | 
40 | Start the Nuxt development server (usually on `http://localhost:3000`):
41 | 
42 | ```bash
43 | # From the root directory
44 | pnpm --filter @meridian/frontend dev
45 | 
46 | # Or from the apps/frontend directory
47 | pnpm dev
48 | ```
49 | 
50 | ## Production Build
51 | 
52 | Build the application for production:
53 | 
54 | ```bash
55 | # From the root directory
56 | pnpm --filter @meridian/frontend build
57 | 
58 | # Or from the apps/frontend directory
59 | pnpm build
60 | ```
61 | 
62 | Locally preview the production build:
63 | 
64 | ```bash
65 | # From the root directory
66 | pnpm --filter @meridian/frontend preview
67 | 
68 | # Or from the apps/frontend directory
69 | pnpm preview
70 | ```
71 | 
72 | ## Deployment
73 | 
74 | This application is typically deployed using [Cloudflare Pages](https://pages.cloudflare.com/).
75 | 
76 | Check out the [Nuxt deployment documentation](https://nuxt.com/docs/getting-started/deployment) for general deployment information.
77 | 


--------------------------------------------------------------------------------
/apps/backend/src/routers/sources.router.ts:
--------------------------------------------------------------------------------
 1 | import { zValidator } from '@hono/zod-validator';
 2 | import { $data_sources, eq } from '@meridian/database';
 3 | import { Hono } from 'hono';
 4 | import { z } from 'zod';
 5 | import type { HonoEnv } from '../app';
 6 | import { Logger } from '../lib/logger';
 7 | import { tryCatchAsync } from '../lib/tryCatchAsync';
 8 | import { getDb, hasValidAuthToken } from '../lib/utils';
 9 | 
10 | const logger = new Logger({ router: 'sources' });
11 | 
12 | const route = new Hono<HonoEnv>().delete(
13 |   '/:id',
14 |   zValidator(
15 |     'param',
16 |     z.object({
17 |       id: z.coerce.number(),
18 |     })
19 |   ),
20 |   async c => {
21 |     if (!hasValidAuthToken(c)) {
22 |       return c.json({ error: 'Unauthorized' }, 401);
23 |     }
24 | 
25 |     const routeLogger = logger.child({
26 |       operation: 'delete-source',
27 |       source_id: c.req.valid('param').id,
28 |     });
29 |     routeLogger.info('Attempting to delete source');
30 | 
31 |     const db = getDb(c.env.HYPERDRIVE);
32 | 
33 |     const sourceResult = await tryCatchAsync(
34 |       db.query.$data_sources.findFirst({
35 |         where: eq($data_sources.id, c.req.valid('param').id),
36 |       })
37 |     );
38 |     if (sourceResult.isErr()) {
39 |       const error = sourceResult.error instanceof Error ? sourceResult.error : new Error(String(sourceResult.error));
40 |       routeLogger.error('Failed to fetch source', undefined, error);
41 |       return c.json({ error: 'Failed to fetch source' }, 500);
42 |     }
43 | 
44 |     const source = sourceResult.value;
45 |     if (source === undefined) {
46 |       routeLogger.warn('Source not found');
47 |       return c.json({ error: "Source doesn't exist" }, 404);
48 |     }
49 | 
50 |     routeLogger.debug('Source found, proceeding with deletion', { source_url: source.config.config.url });
51 |     const doId = c.env.DATA_SOURCE_INGESTOR.idFromName(source.config.config.url); // Use URL for ID stability
52 |     const stub = c.env.DATA_SOURCE_INGESTOR.get(doId);
53 | 
54 |     const deleteResult = await tryCatchAsync(
55 |       Promise.all([db.delete($data_sources).where(eq($data_sources.id, c.req.valid('param').id)), stub.destroy()])
56 |     );
57 |     if (deleteResult.isErr()) {
58 |       const error = deleteResult.error instanceof Error ? deleteResult.error : new Error(String(deleteResult.error));
59 |       routeLogger.error('Failed to delete source', undefined, error);
60 |       return c.json({ error: 'Failed to delete source' }, 500);
61 |     }
62 | 
63 |     routeLogger.info('Source deleted successfully');
64 |     return c.json({ success: true });
65 |   }
66 | );
67 | 
68 | export default route;
69 | 


--------------------------------------------------------------------------------
/apps/backend/test/parseRss.spec.ts:
--------------------------------------------------------------------------------
 1 | import { readFileSync } from 'node:fs';
 2 | import path from 'node:path';
 3 | import { describe, expect, it } from 'vitest';
 4 | import { parseRSSFeed } from '../src/lib/parsers';
 5 | 
 6 | describe('parseRssFeed', () => {
 7 |   // helper to load fixtures
 8 |   const loadFixture = (filename: string) => readFileSync(path.join(__dirname, 'fixtures', filename), 'utf-8');
 9 | 
10 |   it('handles independant.co.uk feed', async () => {
11 |     const xml = loadFixture('independant_co_uk.xml');
12 |     const result = await parseRSSFeed(xml);
13 |     if (result.isErr()) throw result.error;
14 | 
15 |     expect(result.value).toHaveLength(100);
16 | 
17 |     expect(result.value[0].title).toBe(
18 |       'Trump makes good on promise as thousands of JFK assassination files are released: Live updates'
19 |     );
20 |     expect(result.value[0].link).toBe(
21 |       'https://www.independent.co.uk/news/world/americas/us-politics/jfk-files-released-assassination-trump-b2717229.html'
22 |     );
23 |     expect(result.value[0].pubDate).toStrictEqual(new Date('Tue, 18 Mar 2025 23:24:58 GMT'));
24 |   });
25 | 
26 |   it('handles cn.nytimes.com feed', async () => {
27 |     const xml = loadFixture('cn_nytimes_com.xml');
28 |     const result = await parseRSSFeed(xml);
29 |     if (result.isErr()) throw result.error;
30 | 
31 |     expect(result.value).toHaveLength(20);
32 | 
33 |     expect(result.value[0].title).toBe('前高管揭Facebook内幕：配合北京开发审查工具');
34 |     expect(result.value[0].link).toBe('https://cn.nytimes.com/culture/20250318/careless-people-sarah-wynn-williams/');
35 |     expect(result.value[0].pubDate).toStrictEqual(new Date('Tue, 18 Mar 2025 04:59:35 +0800'));
36 |   });
37 | 
38 |   it('handles ft.com feed', async () => {
39 |     const xml = loadFixture('ft_com.xml');
40 |     const result = await parseRSSFeed(xml);
41 | 
42 |     if (result.isErr()) throw result.error;
43 | 
44 |     expect(result.value).toHaveLength(25);
45 | 
46 |     expect(result.value[0].title).toBe('‘If Trump defies a Supreme Court order, will it matter to markets?’');
47 |     expect(result.value[0].link).toBe('https://www.ft.com/content/2e579290-fc0c-4b88-8703-f0bae45266d9');
48 |     expect(result.value[0].pubDate).toStrictEqual(new Date('Tue, 18 Mar 2025 23:34:47 GMT'));
49 |   });
50 | 
51 |   it('handles theverge.com feed', async () => {
52 |     const xml = loadFixture('theverge_com.xml');
53 |     const result = await parseRSSFeed(xml);
54 |     if (result.isErr()) throw result.error;
55 | 
56 |     expect(result.value).toHaveLength(10);
57 | 
58 |     expect(result.value[0].title).toBe('The Boeing Starliner astronauts returned to Earth today');
59 |     expect(result.value[0].link).toBe(
60 |       'https://www.theverge.com/news/628311/nasa-crew-10-mission-starliner-astronauts-return-spacex'
61 |     );
62 |     expect(result.value[0].pubDate).toStrictEqual(new Date('2025-03-18T18:04:44-04:00'));
63 |   });
64 | });
65 | 


--------------------------------------------------------------------------------
/apps/frontend/src/components/SubscriptionForm.vue:
--------------------------------------------------------------------------------
 1 | <script setup lang="ts">
 2 | const COOKIE_NAME = 'meridian_subscribed';
 3 | const LEGACY_STORAGE_KEY = 'meridian_subscribed';
 4 | 
 5 | // Subscription state
 6 | const email = ref('');
 7 | const isSubmitting = ref(false);
 8 | const errorMessage = ref('');
 9 | const hasSubscribed = useCookie<string | null>(COOKIE_NAME);
10 | 
11 | // Migrate old localStorage data if it exists
12 | onMounted(() => {
13 |   const legacyValue = localStorage.getItem(LEGACY_STORAGE_KEY);
14 |   if (legacyValue === 'true' && !hasSubscribed.value) {
15 |     hasSubscribed.value = 'true';
16 |     localStorage.removeItem(LEGACY_STORAGE_KEY);
17 |   }
18 | });
19 | 
20 | /**
21 |  * Subscription form handlers
22 |  */
23 | const handleSubmit = async () => {
24 |   isSubmitting.value = true;
25 |   errorMessage.value = '';
26 | 
27 |   try {
28 |     const response = await $fetch('/api/subscribe', {
29 |       method: 'POST',
30 |       body: { email: email.value },
31 |     });
32 | 
33 |     if (!response.success) {
34 |       throw new Error(response.message || 'Failed to subscribe');
35 |     }
36 | 
37 |     email.value = '';
38 |     hasSubscribed.value = 'true';
39 |   } catch (error: unknown) {
40 |     errorMessage.value = error instanceof Error ? error.message : 'Something went wrong, please try again.';
41 |     console.error('Subscription error:', error);
42 |   } finally {
43 |     isSubmitting.value = false;
44 |   }
45 | };
46 | 
47 | const handleChangeEmail = () => {
48 |   hasSubscribed.value = null;
49 |   errorMessage.value = '';
50 | };
51 | </script>
52 | 
53 | <template>
54 |   <div>
55 |     <div v-if="!hasSubscribed" class="gap-2 text-sm flex flex-col items-center">
56 |       <p>Want this brief in your inbox? Sign up for updates</p>
57 |       <form class="flex flex-col group max-w-md mx-auto" @submit.prevent="handleSubmit">
58 |         <div class="flex border border-zinc-300">
59 |           <input
60 |             v-model="email"
61 |             type="email"
62 |             placeholder="your@email.com"
63 |             required
64 |             :aria-invalid="!!errorMessage"
65 |             aria-describedby="subscription-error"
66 |             class="flex-grow px-4 py-2 focus:outline-zinc-400"
67 |           />
68 |           <button
69 |             type="submit"
70 |             class="bg-zinc-300 text-zinc-700 hover:cursor-pointer px-4 py-2 font-medium hover:bg-zinc-400 dark:hover:bg-zinc-400 transition-colors"
71 |             :disabled="isSubmitting"
72 |           >
73 |             {{ isSubmitting ? 'Sending...' : 'Subscribe' }}
74 |           </button>
75 |         </div>
76 |         <div v-if="errorMessage" id="subscription-error" class="text-red-600 text-xs mt-2" role="alert">
77 |           {{ errorMessage }}
78 |         </div>
79 |       </form>
80 |     </div>
81 |     <div v-else class="text-center text-sm">
82 |       <p>You're subscribed to our updates!</p>
83 |       <button class="underline mt-2 text-xs" @click="handleChangeEmail">Change email</button>
84 |     </div>
85 |   </div>
86 | </template>
87 | 


--------------------------------------------------------------------------------
/packages/database/migrations/0001_premium_wolfpack.sql:
--------------------------------------------------------------------------------
 1 | CREATE TYPE "public"."ingested_item_status" AS ENUM('NEW', 'PENDING_PROCESSING', 'PROCESSED', 'FAILED_FETCH', 'FAILED_PROCESSING', 'SKIPPED_PDF', 'SKIPPED_TOO_OLD');--> statement-breakpoint
 2 | CREATE TYPE "public"."source_type" AS ENUM('RSS');--> statement-breakpoint
 3 | CREATE TABLE IF NOT EXISTS "data_sources" (
 4 | 	"id" serial PRIMARY KEY NOT NULL,
 5 | 	"name" text NOT NULL,
 6 | 	"source_type" "source_type" NOT NULL,
 7 | 	"config" jsonb NOT NULL,
 8 | 	"config_version_hash" text,
 9 | 	"publisher_id" integer,
10 | 	"scrape_frequency_minutes" integer DEFAULT 240 NOT NULL,
11 | 	"last_checked" timestamp,
12 | 	"do_initialized_at" timestamp,
13 | 	"created_at" timestamp DEFAULT now() NOT NULL,
14 | 	"updated_at" timestamp DEFAULT now() NOT NULL
15 | );
16 | --> statement-breakpoint
17 | CREATE TABLE IF NOT EXISTS "ingested_items" (
18 | 	"id" bigserial PRIMARY KEY NOT NULL,
19 | 	"item_id_from_source" text NOT NULL,
20 | 	"raw_data_r2_key" text NOT NULL,
21 | 	"display_title" text,
22 | 	"url_to_original" text NOT NULL,
23 | 	"published_at" timestamp,
24 | 	"status" "ingested_item_status" DEFAULT 'NEW',
25 | 	"content_body_r2_key" text,
26 | 	"content_body_text" text,
27 | 	"word_count" integer,
28 | 	"analysis_payload" jsonb,
29 | 	"source_specific_metadata" jsonb,
30 | 	"used_browser" boolean,
31 | 	"embedding" vector(384),
32 | 	"fail_reason" text,
33 | 	"data_source_id" integer NOT NULL,
34 | 	"processed_at" timestamp,
35 | 	"ingested_at" timestamp DEFAULT CURRENT_TIMESTAMP,
36 | 	CONSTRAINT "ingested_items_url_to_original_unique" UNIQUE("url_to_original"),
37 | 	CONSTRAINT "uniqueSourceItem" UNIQUE("data_source_id","item_id_from_source")
38 | );
39 | --> statement-breakpoint
40 | CREATE TABLE IF NOT EXISTS "newsletter" (
41 | 	"id" serial PRIMARY KEY NOT NULL,
42 | 	"email" text NOT NULL,
43 | 	"created_at" timestamp DEFAULT CURRENT_TIMESTAMP,
44 | 	CONSTRAINT "newsletter_email_unique" UNIQUE("email")
45 | );
46 | --> statement-breakpoint
47 | CREATE TABLE IF NOT EXISTS "publishers" (
48 | 	"id" serial PRIMARY KEY NOT NULL,
49 | 	"name" text NOT NULL,
50 | 	"base_url" text,
51 | 	"created_at" timestamp DEFAULT now() NOT NULL
52 | );
53 | --> statement-breakpoint
54 | CREATE TABLE IF NOT EXISTS "reports" (
55 | 	"id" serial PRIMARY KEY NOT NULL,
56 | 	"title" text NOT NULL,
57 | 	"content" text NOT NULL,
58 | 	"total_articles" integer NOT NULL,
59 | 	"total_sources" integer NOT NULL,
60 | 	"used_articles" integer NOT NULL,
61 | 	"used_sources" integer NOT NULL,
62 | 	"tldr" text,
63 | 	"clustering_params" jsonb,
64 | 	"model_author" text,
65 | 	"created_at" timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL
66 | );
67 | --> statement-breakpoint
68 | ALTER TABLE IF EXISTS "data_sources" ADD CONSTRAINT "data_sources_publisher_id_publishers_id_fk" FOREIGN KEY ("publisher_id") REFERENCES "public"."publishers"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint
69 | ALTER TABLE IF EXISTS "ingested_items" ADD CONSTRAINT "ingested_items_data_source_id_data_sources_id_fk" FOREIGN KEY ("data_source_id") REFERENCES "public"."data_sources"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint
70 | CREATE INDEX IF NOT EXISTS "embeddingIndex" ON "ingested_items" USING hnsw ("embedding" vector_cosine_ops);


--------------------------------------------------------------------------------
/apps/frontend/src/server/api/admin/sources/[id]/details.get.ts:
--------------------------------------------------------------------------------
 1 | import { $ingested_items, $data_sources, eq, and, desc, ingestedItemStatusEnum } from '@meridian/database';
 2 | import { getDB } from '~/server/lib/utils';
 3 | 
 4 | // to access the enums
 5 | type ArticleStatus = (typeof ingestedItemStatusEnum.enumValues)[number];
 6 | 
 7 | export default defineEventHandler(async event => {
 8 |   await requireUserSession(event); // require auth
 9 | 
10 |   const sourceId = Number(getRouterParam(event, 'id'));
11 |   if (Number.isNaN(sourceId)) {
12 |     throw createError({ statusCode: 400, statusMessage: 'Invalid source ID' });
13 |   }
14 | 
15 |   // get source details
16 |   const db = getDB(event);
17 |   const source = await db.query.$data_sources.findFirst({ where: eq($data_sources.id, sourceId) });
18 |   if (source === undefined) {
19 |     throw createError({ statusCode: 404, statusMessage: 'Source not found' });
20 |   }
21 | 
22 |   // get query params for filtering and sorting
23 |   const query = getQuery(event);
24 |   const page = Number(query.page) || 1;
25 |   const pageSize = 50;
26 |   const status = query.status as string;
27 |   const sortBy = (query.sortBy as string) || 'createdAt';
28 |   const sortOrder = query.sortOrder === 'asc' ? 'asc' : 'desc';
29 | 
30 |   // build where clause
31 |   const conditions = [eq($ingested_items.data_source_id, sourceId)];
32 | 
33 |   // only add conditions if they're valid enum values
34 |   if (ingestedItemStatusEnum.enumValues.includes(status as ArticleStatus)) {
35 |     conditions.push(eq($ingested_items.status, status as ArticleStatus));
36 |   }
37 | 
38 |   const whereClause = and(...conditions);
39 | 
40 |   // determine sort field
41 |   const sortField =
42 |     sortBy === 'publishedAt'
43 |       ? $ingested_items.published_at
44 |       : sortBy === 'processedAt'
45 |         ? $ingested_items.processed_at
46 |         : $ingested_items.ingested_at;
47 | 
48 |   // get articles with filters and sorting
49 |   const articles = await db.query.$ingested_items.findMany({
50 |     where: whereClause,
51 |     orderBy: sortOrder === 'asc' ? sortField : desc(sortField),
52 |     limit: pageSize,
53 |     offset: (page - 1) * pageSize,
54 |   });
55 | 
56 |   // get total count with filters
57 |   const totalCount = await db.query.$ingested_items.findMany({
58 |     where: whereClause,
59 |     columns: { id: true },
60 |   });
61 | 
62 |   return {
63 |     id: source.id,
64 |     name: source.name,
65 |     url: source.config.config.url,
66 |     initialized: source.do_initialized_at !== null,
67 |     frequency:
68 |       source.scrape_frequency_minutes <= 60
69 |         ? 'Hourly'
70 |         : source.scrape_frequency_minutes <= 120
71 |           ? '4 Hours'
72 |           : source.scrape_frequency_minutes <= 180
73 |             ? '6 Hours'
74 |             : 'Daily',
75 |     lastFetched: source.lastChecked?.toISOString(),
76 |     articles: articles.map(article => ({
77 |       id: article.id,
78 |       title: article.display_title ?? 'Unknown',
79 |       url: article.url_to_original ?? 'Unknown',
80 |       publishedAt: article.published_at?.toISOString(),
81 |       status: article.status,
82 |       failReason: article.fail_reason,
83 |       processedAt: article.processed_at?.toISOString(),
84 |       createdAt: article.ingested_at?.toISOString(),
85 |       hasEmbedding: article.embedding !== null,
86 |     })),
87 |     pagination: {
88 |       currentPage: page,
89 |       totalPages: Math.ceil(totalCount.length / pageSize),
90 |       totalItems: totalCount.length,
91 |     },
92 |   };
93 | });
94 | 


--------------------------------------------------------------------------------
/services/meridian-ml-service/Dockerfile:
--------------------------------------------------------------------------------
 1 | # syntax=docker/dockerfile:1
 2 | 
 3 | # --- Builder Stage ---
 4 | FROM python:3.11-slim AS builder
 5 | 
 6 | # Install uv (your project uses it, it's fast)
 7 | RUN pip install uv
 8 | 
 9 | WORKDIR /app
10 | 
11 | # Copy only dependency definitions first for better layer caching
12 | COPY pyproject.toml ./
13 | 
14 | # Install dependencies efficiently in one step and clean up
15 | RUN uv pip install --system --no-cache --index-strategy unsafe-best-match --extra-index-url https://download.pytorch.org/whl/cpu --requirement pyproject.toml && \
16 |     rm -rf /root/.cache /tmp/* /var/tmp/*
17 | 
18 | # Pre-download the model and save it to a known location
19 | RUN mkdir -p /app/models && \
20 |     python3 -c "from transformers import AutoTokenizer, AutoModel; \
21 |     model_name = 'intfloat/multilingual-e5-small'; \
22 |     tokenizer = AutoTokenizer.from_pretrained(model_name); \
23 |     model = AutoModel.from_pretrained(model_name); \
24 |     tokenizer.save_pretrained('/app/models'); \
25 |     model.save_pretrained('/app/models')"
26 | 
27 | # --- Runtime Stage ---
28 | FROM python:3.11-slim
29 | 
30 | # Install only runtime essentials and clean up in one layer
31 | RUN apt-get update && \
32 |     apt-get install -y --no-install-recommends \
33 |     ca-certificates && \
34 |     apt-get clean && \
35 |     rm -rf /var/lib/apt/lists/*
36 | 
37 | WORKDIR /app
38 | 
39 | # Create a non-root user for security
40 | RUN useradd --create-home --shell /bin/bash appuser
41 | USER appuser
42 | WORKDIR /home/appuser/app
43 | 
44 | # Copy installed dependencies from the builder stage's system python env
45 | COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
46 | COPY --from=builder /usr/local/bin /usr/local/bin
47 | 
48 | # Copy the pre-downloaded model
49 | COPY --from=builder /app/models /home/appuser/app/models
50 | 
51 | # Copy your application code
52 | # Important: Ensure the path matches your project structure relative to the Dockerfile
53 | # Assuming Dockerfile is in 'meridian-ml-service' directory
54 | COPY --chown=appuser:appuser ./src ./src
55 | # Good practice, though likely not needed at runtime here
56 | COPY --chown=appuser:appuser pyproject.toml ./
57 | 
58 | # Environment variables
59 | ENV PYTHONUNBUFFERED=1 \
60 |     # Add src directory to Python path so modules can be found
61 |     PYTHONPATH=/home/appuser/app:${PYTHONPATH} \
62 |     # Fly.io sets PORT automatically, uvicorn will pick it up via $PORT in CMD
63 |     # Default model from your config. Can be overridden via fly secrets.
64 |     EMBEDDING_MODEL_NAME="/home/appuser/app/models" \
65 |     # API_TOKEN should be provided via secrets at runtime, not in the Dockerfile
66 |     # Set Hugging Face cache directory to somewhere writeable by appuser
67 |     HF_HOME=/home/appuser/.cache/huggingface \
68 |     TRANSFORMERS_CACHE=/home/appuser/.cache/huggingface/transformers \
69 |     HF_HUB_CACHE=/home/appuser/.cache/huggingface/hub
70 | 
71 | # Ensure the cache directory exists and is owned by the app user
72 | # This RUN command executes as root before switching back to appuser implicitly for CMD
73 | USER root
74 | RUN mkdir -p /home/appuser/.cache/huggingface && \
75 |     chown -R appuser:appuser /home/appuser/.cache
76 | USER appuser
77 | 
78 | # Expose the default port. Fly will map this.
79 | EXPOSE 8080
80 | 
81 | # Run the application using uvicorn
82 | # Update the import path to match your module structure
83 | # Use $PORT which fly provides.
84 | CMD ["uvicorn", "src.meridian_ml_service.main:app", "--host", "0.0.0.0", "--port", "8080"]


--------------------------------------------------------------------------------
/apps/backend/wrangler.jsonc:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * For more details on how to configure Wrangler, refer to:
  3 |  * https://developers.cloudflare.com/workers/wrangler/configuration/
  4 |  */
  5 | {
  6 |   "$schema": "node_modules/wrangler/config-schema.json",
  7 |   "name": "meridian-backend",
  8 |   "main": "src/index.ts",
  9 |   "compatibility_date": "2025-04-30",
 10 |   "compatibility_flags": ["nodejs_compat"],
 11 |   "migrations": [
 12 |     {
 13 |       "new_sqlite_classes": ["DataSourceIngestorDO"],
 14 |       "tag": "v1",
 15 |     },
 16 |   ],
 17 |   "durable_objects": {
 18 |     "bindings": [
 19 |       {
 20 |         "class_name": "DataSourceIngestorDO",
 21 |         "name": "DATA_SOURCE_INGESTOR",
 22 |       },
 23 |     ],
 24 |   },
 25 |   "observability": {
 26 |     "enabled": true,
 27 |   },
 28 |   "hyperdrive": [
 29 |     {
 30 |       "binding": "HYPERDRIVE",
 31 |       "id": "b748bf8359b74c519d64501151cecd80",
 32 |       "localConnectionString": "postgresql://postgres:mysecretpassword@localhost:5432/postgres",
 33 |     },
 34 |   ],
 35 |   /**
 36 |    * Smart Placement
 37 |    * Docs: https://developers.cloudflare.com/workers/configuration/smart-placement/#smart-placement
 38 |    */
 39 |   "placement": { "mode": "smart" },
 40 |   /**
 41 |    * Bindings
 42 |    * Bindings allow your Worker to interact with resources on the Cloudflare Developer Platform, including
 43 |    * databases, object storage, AI inference, real-time communication and more.
 44 |    * https://developers.cloudflare.com/workers/runtime-apis/bindings/
 45 |    */
 46 |   "queues": {
 47 |     "producers": [
 48 |       {
 49 |         "queue": "meridian-article-processing-queue-prod",
 50 |         "binding": "ARTICLE_PROCESSING_QUEUE",
 51 |       },
 52 |     ],
 53 |     "consumers": [
 54 |       {
 55 |         "queue": "meridian-article-processing-queue-prod",
 56 |         "max_batch_size": 100,
 57 |         "max_batch_timeout": 30,
 58 |         "max_retries": 5,
 59 |         "dead_letter_queue": "meridian-article-processing-dlq",
 60 |         // "retry_delay": 60
 61 |       },
 62 |     ],
 63 |   },
 64 |   "r2_buckets": [
 65 |     {
 66 |       "binding": "ARTICLES_BUCKET",
 67 |       "bucket_name": "meridian-articles-prod",
 68 |       "preview_bucket_name": "meridian-articles-dev",
 69 |       "jurisdiction": "eu",
 70 |     },
 71 |   ],
 72 |   "workflows": [
 73 |     {
 74 |       "name": "meridian_process_ingested_item",
 75 |       "binding": "PROCESS_INGESTED_ITEM",
 76 |       "class_name": "ProcessIngestedItemWorkflow",
 77 |     },
 78 |   ],
 79 |   // !!! NOTE !!! : tail workers make workers with durable objects CRASH for now - 30/04/2025
 80 |   // "tail_consumers": [
 81 |   //   {
 82 |   //     "service": "meridian-backend",
 83 |   //   },
 84 |   // ],
 85 |   /**
 86 |    * Environment Variables
 87 |    * https://developers.cloudflare.com/workers/wrangler/configuration/#environment-variables
 88 |    */
 89 |   // "vars": { "MY_VARIABLE": "production_value" },
 90 |   /**
 91 |    * Note: Use secrets to store sensitive data.
 92 |    * https://developers.cloudflare.com/workers/configuration/secrets/
 93 |    */
 94 | 
 95 |   /**
 96 |    * Static Assets
 97 |    * https://developers.cloudflare.com/workers/static-assets/binding/
 98 |    */
 99 |   // "assets": { "directory": "./public/", "binding": "ASSETS" },
100 | 
101 |   /**
102 |    * Service Bindings (communicate between multiple Workers)
103 |    * https://developers.cloudflare.com/workers/wrangler/configuration/#service-bindings
104 |    */
105 |   // "services": [{ "binding": "MY_SERVICE", "service": "my-service" }]
106 | }
107 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore
  2 | 
  3 | # Logs
  4 | 
  5 | logs
  6 | _.log
  7 | npm-debug.log_
  8 | yarn-debug.log*
  9 | yarn-error.log*
 10 | lerna-debug.log*
 11 | .pnpm-debug.log*
 12 | 
 13 | # Caches
 14 | 
 15 | .cache
 16 | 
 17 | # Diagnostic reports (https://nodejs.org/api/report.html)
 18 | 
 19 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
 20 | 
 21 | # Runtime data
 22 | 
 23 | pids
 24 | _.pid
 25 | _.seed
 26 | *.pid.lock
 27 | 
 28 | # Directory for instrumented libs generated by jscoverage/JSCover
 29 | 
 30 | lib-cov
 31 | 
 32 | # Coverage directory used by tools like istanbul
 33 | 
 34 | coverage
 35 | *.lcov
 36 | 
 37 | # nyc test coverage
 38 | 
 39 | .nyc_output
 40 | 
 41 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 42 | 
 43 | .grunt
 44 | 
 45 | # Bower dependency directory (https://bower.io/)
 46 | 
 47 | bower_components
 48 | 
 49 | # node-waf configuration
 50 | 
 51 | .lock-wscript
 52 | 
 53 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 54 | 
 55 | build/Release
 56 | 
 57 | # Dependency directories
 58 | 
 59 | node_modules/
 60 | jspm_packages/
 61 | 
 62 | # Snowpack dependency directory (https://snowpack.dev/)
 63 | 
 64 | web_modules/
 65 | 
 66 | # TypeScript cache
 67 | 
 68 | *.tsbuildinfo
 69 | 
 70 | # Optional npm cache directory
 71 | 
 72 | .npm
 73 | 
 74 | # Optional eslint cache
 75 | 
 76 | .eslintcache
 77 | 
 78 | # Optional stylelint cache
 79 | 
 80 | .stylelintcache
 81 | 
 82 | # Microbundle cache
 83 | 
 84 | .rpt2_cache/
 85 | .rts2_cache_cjs/
 86 | .rts2_cache_es/
 87 | .rts2_cache_umd/
 88 | 
 89 | # Optional REPL history
 90 | 
 91 | .node_repl_history
 92 | 
 93 | # Output of 'npm pack'
 94 | 
 95 | *.tgz
 96 | 
 97 | # Yarn Integrity file
 98 | 
 99 | .yarn-integrity
100 | 
101 | # dotenv environment variable files
102 | 
103 | .env
104 | .env.development.local
105 | .env.test.local
106 | .env.production.local
107 | .env.local
108 | 
109 | # parcel-bundler cache (https://parceljs.org/)
110 | 
111 | .parcel-cache
112 | 
113 | # Next.js build output
114 | 
115 | .next
116 | out
117 | 
118 | # Nuxt.js build / generate output
119 | 
120 | .nuxt
121 | dist
122 | 
123 | # Gatsby files
124 | 
125 | # Comment in the public line in if your project uses Gatsby and not Next.js
126 | 
127 | # https://nextjs.org/blog/next-9-1#public-directory-support
128 | 
129 | # public
130 | 
131 | # vuepress build output
132 | 
133 | .vuepress/dist
134 | 
135 | # vuepress v2.x temp and cache directory
136 | 
137 | .temp
138 | 
139 | # Docusaurus cache and generated files
140 | 
141 | .docusaurus
142 | 
143 | # Serverless directories
144 | 
145 | .serverless/
146 | 
147 | # FuseBox cache
148 | 
149 | .fusebox/
150 | 
151 | # DynamoDB Local files
152 | 
153 | .dynamodb/
154 | 
155 | # TernJS port file
156 | 
157 | .tern-port
158 | 
159 | # Stores VSCode versions used for testing VSCode extensions
160 | 
161 | .vscode-test
162 | 
163 | # yarn v2
164 | 
165 | .yarn/cache
166 | .yarn/unplugged
167 | .yarn/build-state.yml
168 | .yarn/install-state.gz
169 | .pnp.*
170 | 
171 | # IntelliJ based IDEs
172 | .idea
173 | 
174 | # Finder (MacOS) folder config
175 | .DS_Store
176 | 
177 | .venv
178 | 
179 | .wrangler
180 | 
181 | .turbo
182 | 
183 | notebooks
184 | 
185 | browser-worker
186 | 
187 | .dev.vars
188 | 
189 | apps/scrapers/feeds.json
190 | 
191 | __pycache__
192 | 
193 | apps/briefs
194 | 
195 | # python stuff
196 | .mypy_cache
197 | .ruff_cache
198 | *.egg-info
199 | 
200 | 
201 | # TEMP
202 | feeds.json
203 | CONTRIBUTING.md
204 | MAIN_LIST.JSON
205 | NOTES.MD
206 | SETUP.MD
207 | TODO.MD
208 | reportV5-confidential.ipynb
209 | 


--------------------------------------------------------------------------------
/apps/backend/src/index.ts:
--------------------------------------------------------------------------------
 1 | import app from './app';
 2 | import { DataSourceIngestorDO } from './durable_objects/dataSourceIngestorDO';
 3 | import { Logger } from './lib/logger';
 4 | import { type ProcessArticlesParams, startProcessArticleWorkflow } from './workflows/processIngestedItem.workflow';
 5 | 
 6 | export type Env = {
 7 |   // Bindings
 8 |   ARTICLES_BUCKET: R2Bucket;
 9 |   ARTICLE_PROCESSING_QUEUE: Queue<ProcessArticlesParams>;
10 |   DATA_SOURCE_INGESTOR: DurableObjectNamespace<DataSourceIngestorDO>;
11 |   PROCESS_INGESTED_ITEM: Workflow<ProcessArticlesParams>;
12 |   HYPERDRIVE: Hyperdrive;
13 | 
14 |   // Secrets
15 |   API_TOKEN: string;
16 | 
17 |   AXIOM_DATASET: string | undefined; // optional, use if you want to send logs to axiom
18 |   AXIOM_TOKEN: string | undefined; // optional, use if you want to send logs to axiom
19 | 
20 |   CLOUDFLARE_API_TOKEN: string;
21 |   CLOUDFLARE_ACCOUNT_ID: string;
22 | 
23 |   DATABASE_URL: string;
24 | 
25 |   GEMINI_API_KEY: string;
26 |   GEMINI_BASE_URL: string;
27 | 
28 |   MERIDIAN_ML_SERVICE_URL: string;
29 |   MERIDIAN_ML_SERVICE_API_KEY: string;
30 | };
31 | 
32 | // Create a base logger for the queue handler
33 | const queueLogger = new Logger({ service: 'article-queue-handler' });
34 | 
35 | export default {
36 |   fetch: app.fetch,
37 |   async queue(batch: MessageBatch<unknown>, env: Env): Promise<void> {
38 |     const batchLogger = queueLogger.child({ batch_size: batch.messages.length });
39 |     batchLogger.info('Received batch of articles to process');
40 | 
41 |     const articlesToProcess: number[] = [];
42 |     for (const message of batch.messages) {
43 |       const { ingested_item_ids } = message.body as ProcessArticlesParams;
44 |       batchLogger.debug('Processing message', { message_id: message.id, article_count: ingested_item_ids.length });
45 | 
46 |       for (const id of ingested_item_ids) {
47 |         articlesToProcess.push(id);
48 |       }
49 |     }
50 | 
51 |     batchLogger.info('Articles extracted from batch', { total_articles: articlesToProcess.length });
52 | 
53 |     if (articlesToProcess.length === 0) {
54 |       batchLogger.info('Queue batch was empty, nothing to process');
55 |       batch.ackAll(); // Acknowledge the empty batch
56 |       return;
57 |     }
58 | 
59 |     // Process articles in chunks of 96
60 |     const CHUNK_SIZE = 96;
61 |     const articleChunks = [];
62 |     for (let i = 0; i < articlesToProcess.length; i += CHUNK_SIZE) {
63 |       articleChunks.push(articlesToProcess.slice(i, i + CHUNK_SIZE));
64 |     }
65 | 
66 |     batchLogger.info('Split articles into chunks', { chunk_count: articleChunks.length });
67 | 
68 |     // Process each chunk sequentially
69 |     for (const chunk of articleChunks) {
70 |       const workflowResult = await startProcessArticleWorkflow(env, { ingested_item_ids: chunk });
71 |       if (workflowResult.isErr()) {
72 |         batchLogger.error(
73 |           'Failed to trigger ProcessArticles Workflow',
74 |           { error_message: workflowResult.error.message, chunk_size: chunk.length },
75 |           workflowResult.error
76 |         );
77 |         // Retry the entire batch if Workflow creation failed
78 |         batch.retryAll({ delaySeconds: 30 }); // Retry after 30 seconds
79 |         return;
80 |       }
81 | 
82 |       batchLogger.info('Successfully triggered ProcessArticles Workflow for chunk', {
83 |         workflow_id: workflowResult.value.id,
84 |         chunk_size: chunk.length,
85 |       });
86 |     }
87 | 
88 |     batch.ackAll(); // Acknowledge the entire batch after all chunks are processed
89 |   },
90 | } satisfies ExportedHandler<Env>;
91 | 
92 | export { DataSourceIngestorDO };
93 | export { ProcessIngestedItemWorkflow } from './workflows/processIngestedItem.workflow';
94 | 


--------------------------------------------------------------------------------
/apps/backend/test/utils.spec.ts:
--------------------------------------------------------------------------------
  1 | import type { Context } from 'hono';
  2 | import { beforeEach, describe, expect, it, vi } from 'vitest';
  3 | import type { HonoEnv } from '../src/app';
  4 | import { hasValidAuthToken } from '../src/lib/utils';
  5 | 
  6 | describe('hasValidAuthToken', () => {
  7 |   // Mock Context object
  8 |   let mockContext: Context<HonoEnv>;
  9 |   const validToken = 'valid-token-12345';
 10 | 
 11 |   beforeEach(() => {
 12 |     // Reset mocks
 13 |     vi.resetAllMocks();
 14 | 
 15 |     // Create a mock context with request headers and environment
 16 |     mockContext = {
 17 |       req: {
 18 |         header: vi.fn(),
 19 |       },
 20 |       env: {
 21 |         API_TOKEN: validToken,
 22 |       },
 23 |     } as unknown as Context<HonoEnv>;
 24 |   });
 25 | 
 26 |   it('should return true when Authorization header has the correct Bearer token', () => {
 27 |     // Setup header mock to return the valid token
 28 |     mockContext.req.header = vi.fn().mockImplementation((name: string) => {
 29 |       if (name === 'Authorization') return `Bearer ${validToken}`;
 30 |       return undefined;
 31 |     });
 32 | 
 33 |     // Call the function
 34 |     const result = hasValidAuthToken(mockContext);
 35 | 
 36 |     // Assert
 37 |     expect(result).toBe(true);
 38 |     expect(mockContext.req.header).toHaveBeenCalledWith('Authorization');
 39 |   });
 40 | 
 41 |   it('should return false when Authorization header is missing', () => {
 42 |     // Setup header mock to return undefined
 43 |     mockContext.req.header = vi.fn().mockImplementation((name: string) => {
 44 |       return undefined;
 45 |     });
 46 | 
 47 |     // Call the function
 48 |     const result = hasValidAuthToken(mockContext);
 49 | 
 50 |     // Assert
 51 |     expect(result).toBe(false);
 52 |     expect(mockContext.req.header).toHaveBeenCalledWith('Authorization');
 53 |   });
 54 | 
 55 |   it('should return false when Authorization header has incorrect token value', () => {
 56 |     // Setup header mock to return an invalid token
 57 |     mockContext.req.header = vi.fn().mockImplementation((name: string) => {
 58 |       if (name === 'Authorization') return 'Bearer wrong-token';
 59 |       return undefined;
 60 |     });
 61 | 
 62 |     // Call the function
 63 |     const result = hasValidAuthToken(mockContext);
 64 | 
 65 |     // Assert
 66 |     expect(result).toBe(false);
 67 |     expect(mockContext.req.header).toHaveBeenCalledWith('Authorization');
 68 |   });
 69 | 
 70 |   it('should return false when Authorization header uses a scheme other than Bearer', () => {
 71 |     // Setup header mock to return a non-Bearer token
 72 |     mockContext.req.header = vi.fn().mockImplementation((name: string) => {
 73 |       if (name === 'Authorization') return `Basic ${validToken}`;
 74 |       return undefined;
 75 |     });
 76 | 
 77 |     // Call the function
 78 |     const result = hasValidAuthToken(mockContext);
 79 | 
 80 |     // Assert
 81 |     expect(result).toBe(false);
 82 |     expect(mockContext.req.header).toHaveBeenCalledWith('Authorization');
 83 |   });
 84 | 
 85 |   it('should return false when API_TOKEN environment variable is not set or empty', () => {
 86 |     // Mock the environment with an empty API_TOKEN
 87 |     mockContext.env.API_TOKEN = '';
 88 | 
 89 |     // Setup header mock to return a valid token format
 90 |     mockContext.req.header = vi.fn().mockImplementation((name: string) => {
 91 |       if (name === 'Authorization') return `Bearer ${validToken}`;
 92 |       return undefined;
 93 |     });
 94 | 
 95 |     // Call the function
 96 |     const result = hasValidAuthToken(mockContext);
 97 | 
 98 |     // Assert
 99 |     expect(result).toBe(false);
100 |     expect(mockContext.req.header).toHaveBeenCalledWith('Authorization');
101 |   });
102 | });
103 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-services.yaml:
--------------------------------------------------------------------------------
  1 | name: Deploy services
  2 | on:
  3 |   push:
  4 |     branches:
  5 |       - main
  6 | jobs:
  7 |   deploy:
  8 |     runs-on: ubuntu-latest
  9 |     timeout-minutes: 10
 10 |     steps:
 11 |       - uses: actions/checkout@v4
 12 |       - uses: pnpm/action-setup@v4
 13 |         with:
 14 |           version: 10.9.0
 15 | 
 16 |       - uses: actions/setup-node@v4
 17 |         with:
 18 |           node-version: '22.14.0'
 19 |           cache: 'pnpm'
 20 | 
 21 |       - name: Install dependencies
 22 |         run: pnpm install
 23 | 
 24 |       - name: Check Git status before generating migrations
 25 |         run: git status --porcelain
 26 |         id: pre_migration_status
 27 |         working-directory: 'packages/database'
 28 | 
 29 |       - name: Generate migrations
 30 |         run: pnpm generate
 31 |         working-directory: 'packages/database'
 32 | 
 33 |       - name: Check if new migrations were created
 34 |         id: check_migrations
 35 |         run: |
 36 |           git status --porcelain
 37 |           if [[ $(git status --porcelain | grep -E "^\?\?" | wc -l) -gt 0 ]]; then
 38 |             echo "New migration files were created during CI. Please run 'pnpm generate' locally and commit the changes."
 39 |             echo "new_files=true" >> $GITHUB_OUTPUT
 40 |             exit 1
 41 |           fi
 42 |           if [[ $(git status --porcelain | grep -E "^M" | wc -l) -gt 0 ]]; then
 43 |             echo "Existing migration files were modified during CI. Please run 'pnpm generate' locally and commit the changes."
 44 |             echo "modified_files=true" >> $GITHUB_OUTPUT
 45 |             exit 1
 46 |           fi
 47 |           echo "No new or modified migration files detected."
 48 |         working-directory: 'packages/database'
 49 | 
 50 |       - name: Run database migrations
 51 |         run: pnpm migrate
 52 |         working-directory: 'packages/database'
 53 |         env:
 54 |           DATABASE_URL: ${{ secrets.DATABASE_URL }}
 55 | 
 56 |       - name: Build & Deploy Worker
 57 |         uses: cloudflare/wrangler-action@v3
 58 |         with:
 59 |           apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
 60 |           accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
 61 |           packageManager: pnpm
 62 |           workingDirectory: 'apps/backend'
 63 |           environment: production
 64 |           secrets: |
 65 |             API_TOKEN
 66 |             AXIOM_DATASET
 67 |             AXIOM_TOKEN
 68 |             CLOUDFLARE_API_TOKEN
 69 |             CLOUDFLARE_ACCOUNT_ID
 70 |             DATABASE_URL
 71 |             GEMINI_BASE_URL
 72 |             GEMINI_API_KEY
 73 |             MERIDIAN_ML_SERVICE_URL
 74 |             MERIDIAN_ML_SERVICE_API_KEY
 75 |         env:
 76 |           API_TOKEN: ${{ secrets.API_TOKEN }}
 77 |           AXIOM_DATASET: ${{ secrets.AXIOM_DATASET }}
 78 |           AXIOM_TOKEN: ${{ secrets.AXIOM_TOKEN }}
 79 |           DATABASE_URL: ${{ secrets.DATABASE_URL }}
 80 |           CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
 81 |           CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
 82 |           GEMINI_BASE_URL: ${{ secrets.GEMINI_BASE_URL }}
 83 |           GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
 84 |           MERIDIAN_ML_SERVICE_URL: ${{ secrets.MERIDIAN_ML_SERVICE_URL }}
 85 |           MERIDIAN_ML_SERVICE_API_KEY: ${{ secrets.MERIDIAN_ML_SERVICE_API_KEY }}
 86 | 
 87 |       # - name: Build Nuxt Application
 88 |       #   run: pnpm build --filter=@meridian/frontend # Or 'yarn generate', ensure this matches your static build script in package.json (npx nuxi generate)
 89 |       #   env:
 90 |       #     NUXT_DATABASE_URL: ${{ secrets.DATABASE_URL }}
 91 | 
 92 |       # - name: Publish to Cloudflare Pages
 93 |       #   uses: cloudflare/wrangler-action@v3 # Use the official Cloudflare Wrangler action
 94 |       #   with:
 95 |       #     apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} # Use the secret token
 96 |       #     accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} # Use the secret account ID
 97 |       #     command: pages deploy apps/frontend/dist --project-name=meridian-frontend --branch=main
 98 |       #     secrets: |
 99 |       #       NUXT_DATABASE_URL
100 |       #   env:
101 |       #     NUXT_DATABASE_URL: ${{ secrets.DATABASE_URL }}
102 |       # Replace YOUR_CLOUDFLARE_PAGES_PROJECT_NAME with the actual name from Step 3
103 |       # The --branch flag tells Cloudflare which production branch this deployment corresponds to
104 | 


--------------------------------------------------------------------------------
/services/meridian-ml-service/src/meridian_ml_service/embeddings.py:
--------------------------------------------------------------------------------
  1 | from functools import lru_cache
  2 | from typing import Any
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn.functional as F  # noqa: N812
  7 | from tqdm import tqdm
  8 | from transformers import AutoModel, AutoTokenizer
  9 | 
 10 | from .config import settings  # Import settings instance
 11 | 
 12 | # Re-using your type alias and functions, adding type hints and minor adjustments
 13 | ModelComponents = tuple[Any, Any, torch.device]
 14 | 
 15 | 
 16 | @lru_cache(maxsize=1)  # Cache the loaded model globally
 17 | def load_embedding_model() -> ModelComponents:
 18 |     """Loads tokenizer, model from HuggingFace based on settings."""
 19 |     model_name = settings.embedding_model_name
 20 |     print(f"Attempting to load embedding model: {model_name}")
 21 |     try:
 22 |         tokenizer = AutoTokenizer.from_pretrained(
 23 |             model_name, local_files_only=True, trust_remote_code=True
 24 |         )
 25 |         model = AutoModel.from_pretrained(
 26 |             model_name, local_files_only=True, trust_remote_code=True
 27 |         )
 28 | 
 29 |         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 30 |         model.to(device)
 31 |         model.eval()
 32 |         print(f"Embedding model '{model_name}' loaded successfully on device: {device}")
 33 |         return tokenizer, model, device
 34 |     except Exception as e:
 35 |         print(f"ERROR: Failed to load model: {e}")
 36 |         raise  # Critical failure
 37 | 
 38 | 
 39 | def _average_pool(
 40 |     last_hidden_states: torch.Tensor, attention_mask: torch.Tensor
 41 | ) -> torch.Tensor:
 42 |     """Helper function for pooling."""
 43 |     last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
 44 |     return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
 45 | 
 46 | 
 47 | def compute_embeddings(
 48 |     texts: list[str],
 49 |     model_components: ModelComponents,
 50 |     batch_size: int = 32,  # Make configurable later if needed
 51 |     normalize: bool = True,
 52 |     e5_prefix: str | None = None,
 53 | ) -> np.ndarray:
 54 |     """Computes embeddings for a list of texts using the provided model components."""
 55 |     tokenizer, model, device = model_components
 56 |     all_embeddings: list[np.ndarray] = []
 57 | 
 58 |     if e5_prefix:
 59 |         texts_to_embed = [f"{e5_prefix}{text}" for text in texts]
 60 |         print(f"Adding prefix '{e5_prefix}' to texts for embedding.")
 61 |     else:
 62 |         texts_to_embed = texts
 63 | 
 64 |     print(f"Computing embeddings for {len(texts_to_embed)} texts...")
 65 |     for i in tqdm(
 66 |         range(0, len(texts_to_embed), batch_size),
 67 |         desc="Computing Embeddings",
 68 |         leave=False,
 69 |     ):
 70 |         batch_texts = texts_to_embed[i : i + batch_size]
 71 |         try:
 72 |             batch_dict = tokenizer(
 73 |                 batch_texts,
 74 |                 max_length=512,
 75 |                 padding=True,
 76 |                 truncation=True,
 77 |                 return_tensors="pt",
 78 |             ).to(device)
 79 |         except Exception as e:
 80 |             print(f"ERROR: Tokenization failed for batch starting at index {i}: {e}")
 81 |             raise
 82 | 
 83 |         with torch.no_grad():
 84 |             try:
 85 |                 outputs = model(**batch_dict)
 86 |                 embeddings = _average_pool(
 87 |                     outputs.last_hidden_state, batch_dict["attention_mask"]
 88 |                 )
 89 |             except Exception as e:
 90 |                 print(
 91 |                     f"ERROR: Model inference failed for batch starting at index {i}: {e}"
 92 |                 )
 93 |                 raise
 94 | 
 95 |         if normalize:
 96 |             embeddings = F.normalize(embeddings, p=2, dim=1)
 97 | 
 98 |         all_embeddings.append(embeddings.cpu().numpy())
 99 | 
100 |     if not all_embeddings:
101 |         print("Warning: No embeddings generated.")
102 |         # Determine embedding dimension dynamically or return empty array of correct shape if possible
103 |         # Example: get embedding dim from model config if loaded
104 |         # embedding_dim = model.config.hidden_size
105 |         # return np.empty((0, embedding_dim), dtype=np.float32)
106 |         # Fallback for now:
107 |         return np.empty((0, 0), dtype=np.float32)
108 | 
109 |     final_embeddings = np.vstack(all_embeddings)
110 |     print(f"Embeddings computed. Shape: {final_embeddings.shape}")
111 |     return final_embeddings
112 | 


--------------------------------------------------------------------------------
/packages/database/src/schema.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |   boolean,
  3 |   index,
  4 |   integer,
  5 |   jsonb,
  6 |   pgEnum,
  7 |   pgTable,
  8 |   serial,
  9 |   text,
 10 |   timestamp,
 11 |   vector,
 12 |   bigserial,
 13 |   unique,
 14 | } from 'drizzle-orm/pg-core';
 15 | import { sql } from 'drizzle-orm';
 16 | import type { DataSourceConfigWrapperType } from './validators/dataSourceConfig';
 17 | import type { AnalysisPayloadWrapper } from './validators/analysisPayload';
 18 | 
 19 | /**
 20 |  * Note: We use $ to denote the table objects
 21 |  * This frees up the uses of sources, articles, reports, etc as variables in the codebase
 22 |  **/
 23 | 
 24 | export const ingestedItemStatusEnum = pgEnum('ingested_item_status', [
 25 |   'NEW',
 26 |   'PENDING_PROCESSING',
 27 |   'PROCESSED',
 28 |   'FAILED_RENDER',
 29 |   'FAILED_FETCH',
 30 |   'FAILED_PROCESSING',
 31 |   'FAILED_EMBEDDING',
 32 |   'FAILED_R2_UPLOAD',
 33 |   'SKIPPED_PDF',
 34 |   'SKIPPED_TOO_OLD',
 35 | ]);
 36 | 
 37 | export const sourceTypeEnum = pgEnum('source_type', ['RSS']);
 38 | 
 39 | export const $publishers = pgTable('publishers', {
 40 |   id: serial('id').primaryKey(),
 41 |   name: text('name').notNull(),
 42 |   base_url: text('base_url'),
 43 |   created_at: timestamp('created_at', { mode: 'date' }).defaultNow().notNull(),
 44 | });
 45 | 
 46 | export const $data_sources = pgTable('data_sources', {
 47 |   id: serial('id').primaryKey(),
 48 |   name: text('name').notNull(),
 49 |   source_type: sourceTypeEnum().notNull(),
 50 |   config: jsonb('config').$type<DataSourceConfigWrapperType>().notNull(), // Stores source-specific config like {"url": "...", "config_schema_version": "1.0", "paywall": false, "category": "..."}
 51 |   config_version_hash: text('config_version_hash'), // Hash of config to detect changes
 52 |   publisher_id: integer('publisher_id').references(() => $publishers.id),
 53 |   scrape_frequency_minutes: integer('scrape_frequency_minutes').notNull().default(240), // Default: 4 hours
 54 |   lastChecked: timestamp('last_checked', { mode: 'date' }),
 55 |   do_initialized_at: timestamp('do_initialized_at', { mode: 'date' }),
 56 |   created_at: timestamp('created_at', { mode: 'date' }).defaultNow().notNull(),
 57 |   updated_at: timestamp('updated_at', { mode: 'date' }).defaultNow().notNull(),
 58 | });
 59 | 
 60 | export const $ingested_items = pgTable(
 61 |   'ingested_items',
 62 |   {
 63 |     id: bigserial('id', { mode: 'number' }).primaryKey(),
 64 | 
 65 |     item_id_from_source: text('item_id_from_source').notNull(), // RSS guid, Tweet ID, etc.
 66 |     raw_data_r2_key: text('raw_data_r2_key').notNull(), // R2 key for original payload
 67 | 
 68 |     display_title: text('display_title'), // nullable, might be derived later
 69 |     url_to_original: text('url_to_original').notNull().unique(),
 70 |     published_at: timestamp('published_at', { mode: 'date' }),
 71 | 
 72 |     status: ingestedItemStatusEnum().default('NEW'),
 73 | 
 74 |     content_body_r2_key: text('content_body_r2_key'), // R2 key for processed text
 75 |     content_body_text: text('content_body_text'), // inline snippet or full text if small
 76 |     word_count: integer('word_count'),
 77 | 
 78 |     embedding_text: text('embedding_text'), // text used to generate embedding
 79 |     analysis_payload: jsonb('analysis_payload').$type<typeof AnalysisPayloadWrapper>(), // structured LLM analysis
 80 |     source_specific_metadata: jsonb('source_specific_metadata'), // small, queryable metadata
 81 | 
 82 |     usedBrowser: boolean('used_browser'),
 83 |     embedding: vector('embedding', { dimensions: 384 }),
 84 |     fail_reason: text('fail_reason'),
 85 | 
 86 |     data_source_id: integer('data_source_id')
 87 |       .references(() => $data_sources.id)
 88 |       .notNull(),
 89 | 
 90 |     processed_at: timestamp('processed_at', { mode: 'date' }),
 91 |     ingested_at: timestamp('ingested_at', { mode: 'date' }).default(sql`CURRENT_TIMESTAMP`),
 92 |   },
 93 |   table => [
 94 |     index('embeddingIndex').using('hnsw', table.embedding.op('vector_cosine_ops')),
 95 |     unique('uniqueSourceItem').on(table.data_source_id, table.item_id_from_source),
 96 |   ]
 97 | );
 98 | 
 99 | export const $reports = pgTable('reports', {
100 |   id: serial('id').primaryKey(),
101 |   title: text('title').notNull(),
102 |   content: text('content').notNull(),
103 | 
104 |   totalArticles: integer('total_articles').notNull(),
105 |   totalSources: integer('total_sources').notNull(),
106 | 
107 |   usedArticles: integer('used_articles').notNull(),
108 |   usedSources: integer('used_sources').notNull(),
109 | 
110 |   tldr: text('tldr'),
111 | 
112 |   clustering_params: jsonb('clustering_params'),
113 | 
114 |   model_author: text('model_author'),
115 | 
116 |   createdAt: timestamp('created_at', { mode: 'date' })
117 |     .default(sql`CURRENT_TIMESTAMP`)
118 |     .notNull(),
119 | });
120 | 
121 | export const $newsletter = pgTable('newsletter', {
122 |   id: serial('id').primaryKey(),
123 |   email: text('email').notNull().unique(),
124 |   createdAt: timestamp('created_at', { mode: 'date' }).default(sql`CURRENT_TIMESTAMP`),
125 | });
126 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Meridian: Your Personal Intelligence Agency
  2 | 
  3 | [![Build Status](https://img.shields.io/github/actions/workflow/status/iliane5/meridian/deploy-services.yaml?branch=main)](https://github.com/iliane5/meridian/actions/workflows/deploy-services.yaml)
  4 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
  5 | 
  6 | **Presidential-level intelligence briefings, built with AI, tailored for you.**
  7 | 
  8 | Meridian cuts through news noise by scraping hundreds of sources, analyzing stories with AI, and delivering concise, personalized daily briefs.
  9 | 
 10 | <p align="center">
 11 |   <img src="./screenshot.png" alt="Meridian Brief Example" width="700"/>
 12 | </p>
 13 | 
 14 | ## Why It Exists
 15 | 
 16 | Presidents get tailored daily intelligence briefs. Now with AI, you can too. Meridian delivers:
 17 | 
 18 | - Key global events filtered by relevance
 19 | - Context and underlying drivers
 20 | - Analysis of implications
 21 | - Open-source transparency
 22 | 
 23 | Built for the curious who want depth beyond headlines without the time sink.
 24 | 
 25 | ## Key Features
 26 | 
 27 | - **Source Coverage**: Hundreds of diverse news sources
 28 | - **AI Analysis**: Multi-stage LLM processing (Gemini) for article and cluster analysis
 29 | - **Smart Clustering**: Embeddings + UMAP + HDBSCAN to group related articles
 30 | - **Personalized Briefing**: Daily brief with analytical voice and continuity tracking
 31 | - **Web Interface**: Clean Nuxt 3 frontend
 32 | 
 33 | ## How It Works
 34 | 
 35 | ```mermaid
 36 | graph TD
 37 |     A[RSS Feed URLs] --> B(Scraper Workflow CF);
 38 |     B --> C[Article Metadata DB];
 39 |     C --> D(Article Processor Workflow CF);
 40 |     D -- Fetches --> E{Content Extraction};
 41 |     E -- Standard --> F[Direct Fetch];
 42 |     E -- Complex/Paywall --> G[Browser Rendering API];
 43 |     F --> H[LLM Article Analysis];
 44 |     G --> H;
 45 |     H --> I[Processed Articles DB];
 46 |     I --> J(Brief Generation Python);
 47 |     J -- Embeddings --> K[UMAP/HDBSCAN Clustering];
 48 |     K --> L[LLM Cluster Review];
 49 |     L --> M[LLM Deep Analysis JSON];
 50 |     M --> N[Markdown Summary Generation];
 51 |     O[Previous Day TLDR DB] --> P{Final Briefing LLM};
 52 |     N --> P;
 53 |     P --> Q[Final Brief Markdown];
 54 |     Q --> R[Reports DB];
 55 |     R --> S(Frontend API CF);
 56 |     S --> T[Frontend UI Nuxt];
 57 | ```
 58 | 
 59 | 1. **Scraping**: Cloudflare Workers fetch RSS feeds, store metadata
 60 | 2. **Processing**: Extract text, analyze with Gemini for relevance and structure
 61 | 3. **Brief Generation**: Cluster articles, generate analysis, synthesize final brief
 62 | 4. **Frontend**: Display briefs via Nuxt/Cloudflare
 63 | 
 64 | ## Tech Stack
 65 | 
 66 | - **Infrastructure**: Turborepo, Cloudflare (Workers, Workflows, Pages)
 67 | - **Backend**: Hono, TypeScript, PostgreSQL, Drizzle
 68 | - **AI/ML**: Gemini models, multilingual-e5-small embeddings, UMAP, HDBSCAN
 69 | - **Frontend**: Nuxt 3, Vue 3, Tailwind
 70 | 
 71 | ## Setup
 72 | 
 73 | **Prerequisites**: Node.js v22+, pnpm v9.15+, Python 3.10+, PostgreSQL, Cloudflare account, Google AI API key
 74 | 
 75 | ```bash
 76 | git clone https://github.com/iliane5/meridian.git
 77 | cd meridian
 78 | pnpm install
 79 | # Configure .env files
 80 | pnpm --filter @meridian/database migrate
 81 | # Deploy via Wrangler, run Python briefing notebook manually
 82 | ```
 83 | 
 84 | ## Status & Next Steps
 85 | 
 86 | - ✅ **Core Pipeline**: Scraping, processing, analysis working
 87 | - ⏳ **Top Priority**: Automate brief generation (currently manual Python notebook)
 88 | - ⚠️ **Monitoring**: Improve scraping robustness
 89 | - 🔜 **Future**: Add testing, newsletter distribution
 90 | 
 91 | ## AI Collaboration
 92 | 
 93 | This project benefited significantly from AI assistance:
 94 | 
 95 | - **Claude 3.7 Sonnet**: Contributed to early architecture brainstorming, generated browser js scraping scripts, refined prompts, and called me out when I was overthinking or overengineering.
 96 | - **Gemini 2.5 Pro**: Excelled with long-context tasks - comparing outputs across different prompt variants, reviewing the entire codebase before opensourcing, and nailing the analytical tone for briefs
 97 | - **Gemini 2.0 Flash**: The true unsung hero of this project - blazing fast, dirt cheap, and surprisingly capable when prompted well. It's the workhorse that makes running meridian economically viable without sponsors or grants. Essentially free intelligence at scale.
 98 | 
 99 | The first two compressed months of dev work into days and made building this way more fun. But Flash isn't just a time-saver—it's the engine that makes Meridian possible at all. No human is reading 2000+ articles daily and analyzing 100+ story clusters. Having AI peers for brainstorming felt like cheating; having AI workers for the actual intelligence pipeline feels like living in the future.
100 | 
101 | ## License
102 | 
103 | MIT License - See [LICENSE](./LICENSE) file for details.
104 | 
105 | ---
106 | 
107 | _Built because we live in an age of magic, and we keep forgetting to use it._
108 | 


--------------------------------------------------------------------------------
/apps/backend/src/routers/openGraph.router.ts:
--------------------------------------------------------------------------------
  1 | import { zValidator } from '@hono/zod-validator';
  2 | import { Hono } from 'hono';
  3 | import { ImageResponse } from 'workers-og';
  4 | import { z } from 'zod';
  5 | import type { HonoEnv } from '../app';
  6 | 
  7 | const getBriefOpenGraph = (opts: { title: string; date: Date; totalArticles: number; totalSources: number }) =>
  8 |   `
  9 | <div style="display: flex; width: 100%; height: 100%; background-color: white">
 10 |   <div
 11 |     style="
 12 |       display: flex;
 13 |       height: 100%;
 14 |       width: 100%;
 15 |       align-items: center;
 16 |       justify-content: center;
 17 |       letter-spacing: -0.02em;
 18 |       font-weight: 700;
 19 |       position: relative;
 20 |     "
 21 |   >
 22 |     <div style="right: 42px; top: 42px; position: absolute; display: flex; align-items: center">
 23 |       <span style="margin-left: 8px; font-size: 25px; font-weight: normal; letter-spacing: normal">
 24 |         ${opts.date.toLocaleDateString('en-US', { month: 'long', day: 'numeric', year: 'numeric' })}
 25 |       </span>
 26 |     </div>
 27 | 
 28 |     <div style="left: 42px; bottom: 42px; position: absolute; display: flex; align-items: center">
 29 |       <span style="margin-left: 8px; font-size: 25px; font-weight: normal; letter-spacing: normal">
 30 |         Intelligence brief · ${opts.totalArticles} articles · ${opts.totalSources} sources
 31 |       </span>
 32 |     </div>
 33 | 
 34 |     <div style="right: 42px; bottom: 42px; position: absolute; display: flex; align-items: center">
 35 |       <span style="margin-left: 8px; font-size: 25px; font-weight: normal; letter-spacing: normal"> news.iliane.xyz </span>
 36 |     </div>
 37 | 
 38 |     <div
 39 |       style="
 40 |         display: flex;
 41 |         flex-wrap: wrap;
 42 |         justify-content: center;
 43 |         padding: 10px 25px;
 44 |         margin: 0 42px;
 45 |         font-size: 60px;
 46 |         width: auto;
 47 |         max-width: 1000px;
 48 |         text-align: center;
 49 |         background-color: white;
 50 |         color: black;
 51 |         line-height: 1.4;
 52 |       "
 53 |     >
 54 |       ${decodeURIComponent(opts.title.trim())}
 55 |     </div>
 56 |   </div>
 57 | </div>`;
 58 | 
 59 | const getHomeOpenGraph = () => `
 60 | <div style="display: flex; width: 100%; height: 100%; background-color: white">
 61 |   <div
 62 |     style="
 63 |       display: flex;
 64 |       flex-direction: column;
 65 |       height: 100%;
 66 |       width: 100%;
 67 |       align-items: center;
 68 |       justify-content: center;
 69 |       letter-spacing: -0.02em;
 70 |       font-weight: 700;
 71 |       position: relative;
 72 |     "
 73 |   >
 74 |     <div
 75 |       style="
 76 |         display: flex;
 77 |         flex-wrap: wrap;
 78 |         justify-content: center;
 79 |         padding: 10px 25px;
 80 |         margin: 0 42px;
 81 |         font-size: 110px;
 82 |         width: auto;
 83 |         max-width: 600px;
 84 |         text-align: center;
 85 |         background-color: white;
 86 |         color: black;
 87 |         line-height: 1.4;
 88 |       "
 89 |     >
 90 |       Meridian
 91 |     </div>
 92 | 
 93 |     <div
 94 |       style="
 95 |         display: flex;
 96 |         flex-wrap: wrap;
 97 |         justify-content: center;
 98 |         padding: 10px 25px;
 99 |         margin: 0 42px;
100 |         font-size: 35px;
101 |         width: auto;
102 |         max-width: 900px;
103 |         text-align: center;
104 |         background-color: white;
105 |         color: black;
106 |         font-weight: 100;
107 |       "
108 |     >
109 |       a daily brief of everything important happening that i care about, with actual analysis beyond headlines
110 |     </div>
111 |   </div>
112 | </div>`;
113 | 
114 | const route = new Hono<HonoEnv>()
115 |   .get('/default', async c => {
116 |     const response = new ImageResponse(getHomeOpenGraph(), { width: 1200, height: 630 });
117 |     response.headers.set('Cache-Control', 'public, max-age=86400'); // Cache for 1 day
118 |     return response;
119 |   })
120 |   .get(
121 |     '/brief',
122 |     zValidator(
123 |       'query',
124 |       z.object({
125 |         title: z.string(),
126 |         date: z.string().transform(val => new Date(Number.parseInt(val))),
127 |         articles: z.string().transform(val => Number.parseInt(val)),
128 |         sources: z.string().transform(val => Number.parseInt(val)),
129 |       })
130 |     ),
131 |     async c => {
132 |       const query = c.req.valid('query');
133 |       const response = new ImageResponse(
134 |         getBriefOpenGraph({
135 |           title: query.title,
136 |           date: query.date,
137 |           totalArticles: query.articles,
138 |           totalSources: query.sources,
139 |         }),
140 |         { width: 1200, height: 630 }
141 |       );
142 |       // Cache brief images for longer since they don't change much despite having params
143 |       response.headers.set('Cache-Control', 'public, max-age=86400, stale-while-revalidate=43200');
144 |       return response;
145 |     }
146 |   );
147 | 
148 | export default route;
149 | 


--------------------------------------------------------------------------------
/apps/backend/test/parseArticle.spec.ts:
--------------------------------------------------------------------------------
  1 | import { Readability } from '@mozilla/readability';
  2 | import * as linkedom from 'linkedom';
  3 | import { beforeEach, describe, expect, it, vi } from 'vitest';
  4 | import { parseArticle } from '../src/lib/parsers';
  5 | 
  6 | // Mock the Readability and parseHTML dependencies
  7 | vi.mock('@mozilla/readability', () => {
  8 |   return {
  9 |     Readability: vi.fn(),
 10 |   };
 11 | });
 12 | 
 13 | vi.mock('linkedom', () => {
 14 |   return {
 15 |     parseHTML: vi.fn(),
 16 |   };
 17 | });
 18 | 
 19 | describe('parseArticle', () => {
 20 |   // Note: Testing Readability itself is hard. Focus on the wrapper.
 21 | 
 22 |   beforeEach(() => {
 23 |     vi.resetAllMocks();
 24 | 
 25 |     // Default mocks for linkedom
 26 |     vi.mocked(linkedom.parseHTML).mockReturnValue({
 27 |       document: 'mock-document',
 28 |     } as unknown);
 29 |   });
 30 | 
 31 |   it('should return an error Result if Readability constructor or parse() throws an exception', () => {
 32 |     // Setup: Make Readability throw an error
 33 |     vi.mocked(Readability).mockImplementation(() => {
 34 |       throw new Error('Readability error');
 35 |     });
 36 | 
 37 |     // Execute
 38 |     const result = parseArticle({ html: '<html><body>Test</body></html>' });
 39 | 
 40 |     // Verify
 41 |     expect(result.isErr()).toBe(true);
 42 |     if (result.isErr()) {
 43 |       expect(result.error.type).toBe('READABILITY_ERROR');
 44 |     }
 45 |   });
 46 | 
 47 |   it('should return an error Result if Readability returns null', () => {
 48 |     // Setup: Make Readability.parse() return null
 49 |     vi.mocked(Readability).mockImplementation(() => {
 50 |       return {
 51 |         parse: () => null,
 52 |       } as unknown as Readability;
 53 |     });
 54 | 
 55 |     // Execute
 56 |     const result = parseArticle({ html: '<html><body>Test</body></html>' });
 57 | 
 58 |     // Verify
 59 |     expect(result.isErr()).toBe(true);
 60 |     if (result.isErr()) {
 61 |       expect(result.error.type).toBe('NO_ARTICLE_FOUND');
 62 |     }
 63 |   });
 64 | 
 65 |   it('should return an error Result if Readability result is missing title', () => {
 66 |     // Setup: Make Readability.parse() return an object without a title
 67 |     vi.mocked(Readability).mockImplementation(() => {
 68 |       return {
 69 |         parse: () => ({
 70 |           title: '', // empty title
 71 |           textContent: 'Some content',
 72 |         }),
 73 |       } as unknown as Readability;
 74 |     });
 75 | 
 76 |     // Execute
 77 |     const result = parseArticle({ html: '<html><body>Test</body></html>' });
 78 | 
 79 |     // Verify
 80 |     expect(result.isErr()).toBe(true);
 81 |     if (result.isErr()) {
 82 |       expect(result.error.type).toBe('NO_ARTICLE_FOUND');
 83 |     }
 84 |   });
 85 | 
 86 |   it('should return an error Result if Readability result is missing textContent', () => {
 87 |     // Setup: Make Readability.parse() return an object without textContent
 88 |     vi.mocked(Readability).mockImplementation(() => {
 89 |       return {
 90 |         parse: () => ({
 91 |           title: 'Article Title',
 92 |           textContent: '', // empty textContent
 93 |         }),
 94 |       } as unknown as Readability;
 95 |     });
 96 | 
 97 |     // Execute
 98 |     const result = parseArticle({ html: '<html><body>Test</body></html>' });
 99 | 
100 |     // Verify
101 |     expect(result.isErr()).toBe(true);
102 |     if (result.isErr()) {
103 |       expect(result.error.type).toBe('NO_ARTICLE_FOUND');
104 |     }
105 |   });
106 | 
107 |   it('should return the extracted title, cleaned textContent, and publishedTime when successful', () => {
108 |     // Setup: Make Readability.parse() return a valid article
109 |     vi.mocked(Readability).mockImplementation(() => {
110 |       return {
111 |         parse: () => ({
112 |           title: 'Article Title',
113 |           textContent: 'Article content here',
114 |           publishedTime: '2025-03-18T18:04:44-04:00',
115 |         }),
116 |       } as unknown as Readability;
117 |     });
118 | 
119 |     // Execute
120 |     const result = parseArticle({ html: '<html><body>Test</body></html>' });
121 | 
122 |     // Verify
123 |     expect(result.isOk()).toBe(true);
124 |     if (result.isOk()) {
125 |       expect(result.value).toEqual({
126 |         title: 'Article Title',
127 |         text: 'Article content here',
128 |         publishedTime: '2025-03-18T18:04:44-04:00',
129 |       });
130 |     }
131 |   });
132 | 
133 |   it('should clean and normalize whitespace in the extracted textContent', () => {
134 |     // Setup: Make Readability.parse() return messy text content
135 |     const messyText = '  Multiple    spaces  \n\n\n  and \t\t tabs \n   and extra newlines  ';
136 |     vi.mocked(Readability).mockImplementation(() => {
137 |       return {
138 |         parse: () => ({
139 |           title: 'Article Title',
140 |           textContent: messyText,
141 |         }),
142 |       } as unknown as Readability;
143 |     });
144 | 
145 |     // Execute
146 |     const result = parseArticle({ html: '<html><body>Test</body></html>' });
147 | 
148 |     // Verify
149 |     expect(result.isOk()).toBe(true);
150 |     if (result.isOk()) {
151 |       // The text should be cleaned according to the cleanString function logic
152 |       expect(result.value.text).toBe('Multiple spaces\nand tabs\nand extra newlines');
153 |     }
154 |   });
155 | });
156 | 


--------------------------------------------------------------------------------
/apps/frontend/src/server/api/admin/sources/index.get.ts:
--------------------------------------------------------------------------------
  1 | import { sql, $ingested_items, and, gte } from '@meridian/database';
  2 | import { getDB } from '~/server/lib/utils';
  3 | 
  4 | export default defineEventHandler(async event => {
  5 |   await requireUserSession(event); // require auth
  6 | 
  7 |   const db = getDB(event);
  8 |   const sources = await db.query.$data_sources.findMany();
  9 |   if (sources.length === 0) {
 10 |     return { overview: null, sources: [] };
 11 |   }
 12 | 
 13 |   // get article stats for last 7 days
 14 |   const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);
 15 |   const articleStats = await db.query.$ingested_items.findMany({
 16 |     where: sql`ingested_at >= ${sevenDaysAgo.toISOString()}`,
 17 |     columns: {
 18 |       data_source_id: true,
 19 |       status: true,
 20 |       ingested_at: true,
 21 |       processed_at: true,
 22 |     },
 23 |   });
 24 | 
 25 |   // calculate per-source stats
 26 |   const sourceStats = sources.map(source => {
 27 |     const sourceArticles = articleStats.filter(a => a.data_source_id === source.id);
 28 |     const last24hArticles = sourceArticles.filter(
 29 |       a => a.ingested_at && new Date(a.ingested_at) > new Date(Date.now() - 24 * 60 * 60 * 1000)
 30 |     );
 31 | 
 32 |     // calculate health metrics
 33 |     const totalArticles = sourceArticles.length;
 34 |     const processedArticles = sourceArticles.filter(a => a.status === 'PROCESSED');
 35 |     const failedArticles = sourceArticles.filter(a => a.status?.endsWith('_FAILED'));
 36 | 
 37 |     // calculate processing time for processed articles
 38 |     const processingTimes = processedArticles
 39 |       .map(a =>
 40 |         a.processed_at && a.ingested_at ? new Date(a.processed_at).getTime() - new Date(a.ingested_at).getTime() : null
 41 |       )
 42 |       .filter(time => time !== null);
 43 | 
 44 |     const avgProcessingTime = processingTimes.length
 45 |       ? Math.round(processingTimes.reduce((a, b) => a + b, 0) / processingTimes.length / 1000) // in seconds
 46 |       : null;
 47 | 
 48 |     return {
 49 |       id: source.id,
 50 |       name: source.name,
 51 |       url: source.config.config.url,
 52 |       paywall: source.config.config.rss_paywall,
 53 |       frequency:
 54 |         source.scrape_frequency_minutes <= 60
 55 |           ? 'Hourly'
 56 |           : source.scrape_frequency_minutes <= 120
 57 |             ? '4 Hours'
 58 |             : source.scrape_frequency_minutes <= 180
 59 |               ? '6 Hours'
 60 |               : 'Daily',
 61 |       lastChecked: source.lastChecked?.toISOString(),
 62 | 
 63 |       // article counts
 64 |       totalArticles: sourceArticles.length,
 65 |       avgPerDay: last24hArticles.length / 24,
 66 | 
 67 |       // health metrics
 68 |       processSuccessRate: totalArticles ? (processedArticles.length / totalArticles) * 100 : null,
 69 |       errorRate: totalArticles ? (failedArticles.length / totalArticles) * 100 : null,
 70 |       avgProcessingTime,
 71 |     };
 72 |   });
 73 | 
 74 |   // get global stats
 75 |   const startOfToday = new Date();
 76 |   startOfToday.setUTCHours(0, 0, 0, 0);
 77 | 
 78 |   const [lastSourceCheck, lastArticleProcessed, lastArticleFetched, todayStats, staleSources] = await Promise.all([
 79 |     // get latest source check
 80 |     db.query.$data_sources.findFirst({
 81 |       orderBy: sql`last_checked DESC NULLS LAST`,
 82 |       columns: { lastChecked: true },
 83 |     }),
 84 |     // get latest processed article
 85 |     db.query.$ingested_items.findFirst({
 86 |       where: sql`status = 'PROCESSED'`,
 87 |       orderBy: sql`processed_at DESC NULLS LAST`,
 88 |       columns: { processed_at: true },
 89 |     }),
 90 |     // get latest fetched article
 91 |     db.query.$ingested_items.findFirst({
 92 |       orderBy: sql`ingested_at DESC NULLS LAST`,
 93 |       columns: { ingested_at: true },
 94 |     }),
 95 |     // get today's stats
 96 |     db.query.$ingested_items.findMany({
 97 |       where: and(gte($ingested_items.ingested_at, startOfToday)),
 98 |       columns: {
 99 |         status: true,
100 |         ingested_at: true,
101 |         processed_at: true,
102 |       },
103 |     }),
104 |     // get stale sources count
105 |     db.query.$data_sources.findMany({
106 |       where: sql`(
107 |         (scrape_frequency_minutes <= 60 AND last_checked < NOW() - INTERVAL '2 hours') OR
108 |         (scrape_frequency_minutes <= 120 AND last_checked < NOW() - INTERVAL '8 hours') OR
109 |         (scrape_frequency_minutes <= 180 AND last_checked < NOW() - INTERVAL '12 hours') OR
110 |         (scrape_frequency_minutes <= 240 AND last_checked < NOW() - INTERVAL '48 hours')
111 |       )`,
112 |       columns: { id: true },
113 |     }),
114 |   ]);
115 | 
116 |   const overview = {
117 |     lastSourceCheck: lastSourceCheck?.lastChecked?.toISOString() ?? null,
118 |     lastArticleProcessed: lastArticleProcessed?.processed_at?.toISOString() ?? null,
119 |     lastArticleFetched: lastArticleFetched?.ingested_at?.toISOString() ?? null,
120 |     articlesProcessedToday: todayStats.filter(a => a.status === 'PROCESSED').length,
121 |     articlesFetchedToday: todayStats.length,
122 |     errorsToday: todayStats.filter(a => a.status?.endsWith('_FAILED')).length,
123 |     staleSourcesCount: staleSources.length,
124 |     totalSourcesCount: sources.length,
125 |   };
126 | 
127 |   return {
128 |     overview,
129 |     sources: sourceStats,
130 |   };
131 | });
132 | 


--------------------------------------------------------------------------------
/apps/backend/src/lib/parsers.ts:
--------------------------------------------------------------------------------
  1 | import { Readability } from '@mozilla/readability';
  2 | import { XMLParser } from 'fast-xml-parser';
  3 | import { parseHTML } from 'linkedom';
  4 | import { Result, err, ok } from 'neverthrow';
  5 | import { z } from 'zod';
  6 | 
  7 | const rssFeedSchema = z.object({
  8 |   title: z.string().min(1),
  9 |   link: z.string(),
 10 |   pubDate: z.date().nullable(),
 11 | });
 12 | 
 13 | function cleanString(text: string) {
 14 |   return text
 15 |     .replace(/[ \t]+/g, ' ') // collapse spaces/tabs
 16 |     .replace(/\n\s+/g, '\n') // clean spaces after newlines
 17 |     .replace(/\s+\n/g, '\n') // clean spaces before newlines
 18 |     .replace(/\n{3,}/g, '\n\n') // keep max 2 consecutive newlines
 19 |     .trim(); // clean edges
 20 | }
 21 | 
 22 | function cleanUrl(url: string) {
 23 |   const u = new URL(url);
 24 | 
 25 |   const paramsToRemove = ['utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content', 'fbclid', 'gclid'];
 26 |   for (const param of paramsToRemove) {
 27 |     u.searchParams.delete(param);
 28 |   }
 29 | 
 30 |   return u.toString();
 31 | }
 32 | 
 33 | /**
 34 |  * Parses an RSS/XML feed content to extract article information
 35 |  *
 36 |  * Handles various RSS feed formats and structures while normalizing the output.
 37 |  * Extracts titles, links, and publication dates from the feed items.
 38 |  *
 39 |  * @param xml The XML content of the RSS feed as a string
 40 |  * @returns A Result containing either an array of parsed feed items or an error
 41 |  */
 42 | export async function parseRSSFeed(xml: string): Promise<Result<z.infer<typeof rssFeedSchema>[], Error>> {
 43 |   const safeParser = Result.fromThrowable(
 44 |     (xml: string) => new XMLParser({ ignoreAttributes: false, attributeNamePrefix: '@_' }).parse(xml),
 45 |     e => (e instanceof Error ? e : new Error(String(e)))
 46 |   );
 47 | 
 48 |   const parsedXml = safeParser(xml);
 49 |   if (parsedXml.isErr()) {
 50 |     return err(new Error(`Parse error: ${parsedXml.error.message}`));
 51 |   }
 52 | 
 53 |   const result = parsedXml.value;
 54 | 
 55 |   // handle various feed structures
 56 |   let items = result.rss?.channel?.item || result.feed?.entry || result.item || result['rdf:RDF']?.item || [];
 57 | 
 58 |   // handle single item case
 59 |   items = Array.isArray(items) ? items : [items];
 60 | 
 61 |   // biome-ignore lint/suspicious/noExplicitAny: <explanation>
 62 |   const properItems = items.map((item: any) => {
 63 |     let title = '';
 64 |     let link = '';
 65 |     let id = '';
 66 |     let pubDateString: string | null = null;
 67 | 
 68 |     if (typeof item.title === 'string') {
 69 |       title = item.title;
 70 |     } else if (typeof item.title === 'object' && item.title['#text']) {
 71 |       title = item.title['#text'];
 72 |     } else {
 73 |       title = 'UNKNOWN';
 74 |     }
 75 | 
 76 |     if (typeof item.link === 'string') {
 77 |       link = item.link;
 78 |     } else if (typeof item.link === 'object' && item.link['@_href']) {
 79 |       link = item.link['@_href'];
 80 |     } else if (typeof item.guid === 'string') {
 81 |       link = item.guid;
 82 |     } else {
 83 |       link = 'UNKNOWN';
 84 |     }
 85 | 
 86 |     if (typeof item.guid === 'string') {
 87 |       id = item.guid;
 88 |     } else if (typeof item.guid === 'object' && item.guid['#text']) {
 89 |       id = item.guid['#text'];
 90 |     } else {
 91 |       id = 'UNKNOWN';
 92 |     }
 93 | 
 94 |     if (typeof item.pubDate === 'string') {
 95 |       pubDateString = item.pubDate;
 96 |     } else if (typeof item.published === 'string') {
 97 |       pubDateString = item.published;
 98 |     } else if (typeof item.updated === 'string') {
 99 |       pubDateString = item.updated;
100 |     }
101 | 
102 |     let pubDate: Date | null = null;
103 |     if (pubDateString) {
104 |       pubDate = new Date(pubDateString);
105 |       if (Number.isNaN(pubDate.getTime())) {
106 |         pubDate = null;
107 |       }
108 |     }
109 | 
110 |     return {
111 |       title: cleanString(title),
112 |       link: cleanUrl(cleanString(link)),
113 |       id: cleanString(id),
114 |       pubDate,
115 |     };
116 |   });
117 | 
118 |   // standardize the items
119 |   const parsedItems = z.array(rssFeedSchema).safeParse(properItems);
120 |   if (parsedItems.success === false) {
121 |     return err(new Error(`Validation error: ${parsedItems.error.message}`));
122 |   }
123 | 
124 |   return ok(parsedItems.data);
125 | }
126 | 
127 | /**
128 |  * Parses HTML content to extract article text and metadata
129 |  *
130 |  * Uses Mozilla Readability to identify and extract the main content
131 |  * from an HTML document, ignoring navigation, ads, and other non-content elements.
132 |  *
133 |  * @param opts Object containing the HTML content to parse
134 |  * @returns A Result containing either the parsed article data or an error object
135 |  */
136 | export function parseArticle(opts: { html: string }) {
137 |   const safeReadability = Result.fromThrowable(
138 |     (html: string) => new Readability(parseHTML(html).document).parse(),
139 |     e => (e instanceof Error ? e : new Error(String(e)))
140 |   );
141 | 
142 |   const articleResult = safeReadability(opts.html);
143 |   if (articleResult.isErr()) {
144 |     return err({ type: 'READABILITY_ERROR', error: articleResult.error });
145 |   }
146 | 
147 |   // if we can't parse the article or there is no article, not much we can do
148 |   const article = articleResult.value;
149 |   if (article === null || !article.title || !article.textContent) {
150 |     return err({ type: 'NO_ARTICLE_FOUND', error: new Error('No article found') });
151 |   }
152 | 
153 |   return ok({
154 |     title: article.title,
155 |     text: cleanString(article.textContent),
156 |     publishedTime: article.publishedTime || undefined,
157 |   });
158 | }
159 | 


--------------------------------------------------------------------------------
/apps/backend/src/lib/rateLimiter.ts:
--------------------------------------------------------------------------------
  1 | import type { WorkflowStep } from 'cloudflare:workers';
  2 | import { Logger } from './logger';
  3 | 
  4 | /**
  5 |  * Configuration options for the rate limiter
  6 |  */
  7 | type RateLimiterOptions = {
  8 |   maxConcurrent: number;
  9 |   globalCooldownMs: number;
 10 |   domainCooldownMs: number;
 11 | };
 12 | 
 13 | /**
 14 |  * Represents a batch item with an ID and URL
 15 |  */
 16 | type BatchItem<IdType = number | string> = {
 17 |   id: IdType;
 18 |   url: string;
 19 | };
 20 | 
 21 | /**
 22 |  * Rate limiter that respects per-domain cooldowns to prevent overloading specific domains
 23 |  * when making HTTP requests. Handles batching and throttling of requests.
 24 |  *
 25 |  * @template T Type of the batch items, must extend BatchItem
 26 |  * @template I Type of the ID field, defaults to number | string
 27 |  */
 28 | export class DomainRateLimiter<T extends BatchItem<I>, I = number | string> {
 29 |   private lastDomainAccess = new Map<string, number>();
 30 |   private options: RateLimiterOptions;
 31 |   private logger: Logger;
 32 | 
 33 |   /**
 34 |    * Creates a new DomainRateLimiter instance
 35 |    *
 36 |    * @param options Configuration options for throttling
 37 |    */
 38 |   constructor(options: RateLimiterOptions) {
 39 |     this.options = options;
 40 |     this.logger = new Logger({ service: 'DomainRateLimiter' });
 41 |   }
 42 | 
 43 |   /**
 44 |    * Processes a batch of items with domain-aware rate limiting
 45 |    *
 46 |    * @param items Array of items to process
 47 |    * @param step Workflow step instance for handling sleeps/delays
 48 |    * @param processItem Function that processes a single item and returns a result
 49 |    * @returns Promise resolving to an array of results in the same order as input items
 50 |    *
 51 |    * @template R The return type of the processItem function
 52 |    */
 53 |   async processBatch<R>(
 54 |     items: T[],
 55 |     step: WorkflowStep,
 56 |     processItem: (item: T, domain: string) => Promise<R>
 57 |   ): Promise<R[]> {
 58 |     const batchLogger = this.logger.child({ batch_size: items.length });
 59 |     batchLogger.info('Starting batch processing');
 60 | 
 61 |     const results: R[] = [];
 62 |     const remainingItems = [...items];
 63 | 
 64 |     while (remainingItems.length > 0) {
 65 |       const currentBatch: T[] = [];
 66 |       const currentTime = Date.now();
 67 | 
 68 |       // Select items for current batch based on domain cooldown
 69 |       for (const item of [...remainingItems]) {
 70 |         if (currentBatch.length >= this.options.maxConcurrent) break;
 71 | 
 72 |         try {
 73 |           const domain = new URL(item.url).hostname;
 74 |           const lastAccess = this.lastDomainAccess.get(domain) || 0;
 75 | 
 76 |           if (currentTime - lastAccess >= this.options.domainCooldownMs) {
 77 |             currentBatch.push(item);
 78 |             // Remove from remaining items
 79 |             const idx = remainingItems.findIndex(i => i.id === item.id);
 80 |             if (idx >= 0) remainingItems.splice(idx, 1);
 81 |           }
 82 |         } catch (e) {
 83 |           // Skip invalid URLs
 84 |           const idx = remainingItems.findIndex(i => i.id === item.id);
 85 |           if (idx >= 0) remainingItems.splice(idx, 1);
 86 |         }
 87 |       }
 88 | 
 89 |       if (currentBatch.length === 0) {
 90 |         // Nothing ready yet, wait for next domain to be ready
 91 |         const nextReady = Math.min(
 92 |           ...remainingItems
 93 |             .map(item => {
 94 |               try {
 95 |                 const domain = new URL(item.url).hostname;
 96 |                 const lastAccess = this.lastDomainAccess.get(domain) || 0;
 97 |                 return this.options.domainCooldownMs - (currentTime - lastAccess);
 98 |               } catch {
 99 |                 return Number.POSITIVE_INFINITY; // Skip invalid URLs
100 |               }
101 |             })
102 |             .filter(time => time > 0) // Only consider positive wait times
103 |         );
104 |         batchLogger.debug('Waiting for domain cooldown', { wait_time_ms: Math.max(500, nextReady) });
105 |         await step.sleep(`waiting for domain cooldown (${Math.round(nextReady / 1000)}s)`, Math.max(500, nextReady));
106 |         continue;
107 |       }
108 | 
109 |       batchLogger.debug('Processing batch', { batch_size: currentBatch.length, remaining: remainingItems.length });
110 | 
111 |       // Process current batch in parallel
112 |       const batchResults = await Promise.allSettled(
113 |         currentBatch.map(async item => {
114 |           try {
115 |             const domain = new URL(item.url).hostname;
116 |             this.lastDomainAccess.set(domain, Date.now());
117 |             return await processItem(item, domain);
118 |           } catch (error) {
119 |             const itemLogger = batchLogger.child({ item_id: item.id });
120 |             itemLogger.error(
121 |               'Error processing item',
122 |               undefined,
123 |               error instanceof Error ? error : new Error(String(error))
124 |             );
125 |             throw error;
126 |           }
127 |         })
128 |       );
129 | 
130 |       // Add results
131 |       for (const result of batchResults) {
132 |         if (result.status === 'fulfilled') {
133 |           results.push(result.value);
134 |         }
135 |       }
136 | 
137 |       // Apply global cooldown between batches if we have more items to process
138 |       if (remainingItems.length > 0) {
139 |         batchLogger.debug('Applying global rate limit', { cooldown_ms: this.options.globalCooldownMs });
140 |         await step.sleep(
141 |           `global rate limit (${Math.round(this.options.globalCooldownMs / 1000)}s)`,
142 |           this.options.globalCooldownMs
143 |         );
144 |       }
145 |     }
146 | 
147 |     batchLogger.info('Batch processing complete', { processed_count: results.length });
148 |     return results;
149 |   }
150 | }
151 | 


--------------------------------------------------------------------------------
/apps/briefs/src/llm.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from openai import OpenAI
  3 | from dotenv import load_dotenv
  4 | import json
  5 | import re
  6 | from typing import Dict, Optional, Any, Tuple, List
  7 | 
  8 | load_dotenv()
  9 | 
 10 | client = OpenAI(
 11 |     # This is the default and can be omitted
 12 |     api_key=os.environ.get("GOOGLE_API_KEY"),
 13 |     base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
 14 | )
 15 | 
 16 | # ---- NEW: Global list to store LLM calls ----
 17 | LLM_CALL_LOG = []
 18 | # --------------------------------------------
 19 | 
 20 | 
 21 | def call_llm(
 22 |     model: str, messages: list[dict], temperature: float = 0
 23 | ) -> Tuple[Optional[str], Optional[Tuple[int, int]]]:
 24 |     """
 25 |     Calls the LLM API, logs the interaction, and returns content and usage.
 26 |     Returns (None, None) on API error.
 27 |     """
 28 |     try:
 29 |         response = client.chat.completions.create(
 30 |             model=model,
 31 |             messages=messages,
 32 |             n=1,
 33 |             temperature=temperature,
 34 |         )
 35 | 
 36 |         output_content = response.choices[0].message.content
 37 |         usage_stats = (
 38 |             response.usage.prompt_tokens,
 39 |             response.usage.completion_tokens,
 40 |         )
 41 | 
 42 |         # ---- NEW: Log the call details ----
 43 |         log_entry = {
 44 |             "model": model,
 45 |             "messages": messages,
 46 |             "temperature": temperature,
 47 |             "output": output_content,
 48 |             "usage": {
 49 |                 "prompt_tokens": usage_stats[0],
 50 |                 "completion_tokens": usage_stats[1],
 51 |             },
 52 |         }
 53 |         LLM_CALL_LOG.append(log_entry)
 54 |         # -----------------------------------
 55 | 
 56 |         return output_content, usage_stats
 57 | 
 58 |     except Exception as e:
 59 |         print(f"ERROR: LLM API call failed for model {model}: {e}")
 60 |         # Log the error attempt? Maybe not for the clean log requested.
 61 |         # You could add a separate error log if needed.
 62 |         # ---- NEW: Log the failed attempt ----
 63 |         # log_entry = {
 64 |         #     "model": model,
 65 |         #     "messages": messages,
 66 |         #     "temperature": temperature,
 67 |         #     "output": None,
 68 |         #     "error": str(e),
 69 |         #     "usage": None
 70 |         # }
 71 |         # LLM_CALL_LOG.append(log_entry)
 72 |         # ------------------------------------
 73 |         return None, None  # Indicate failure
 74 | 
 75 | 
 76 | # ---- NEW: Function to save the log ----
 77 | def save_llm_log_to_json(filename: str = "llm_calls_log.json"):
 78 |     """Saves the accumulated LLM call log to a JSON file."""
 79 |     print(f"Attempting to save {len(LLM_CALL_LOG)} LLM calls to {filename}...")
 80 |     try:
 81 |         with open(filename, "w", encoding="utf-8") as f:
 82 |             json.dump(LLM_CALL_LOG, f, indent=2, ensure_ascii=False)
 83 |         print(f"LLM call log successfully saved to {filename}")
 84 |     except Exception as e:
 85 |         print(f"ERROR: Failed to save LLM call log to {filename}: {e}")
 86 | 
 87 | 
 88 | # Option 1: Automatically save on exit (use with caution in notebooks)
 89 | # atexit.register(save_llm_log_to_json, filename="llm_calls_log_autosave.json")
 90 | # ---------------------------------------
 91 | 
 92 | 
 93 | def parse_llm_json_output(
 94 |     llm_output: Optional[str],
 95 |     expected_schema: Optional[
 96 |         Dict
 97 |     ] = None,  # Currently unused, but placeholder for future validation
 98 | ) -> Optional[Dict]:
 99 |     """
100 |     Safely parses JSON from LLM output, handling markdown code fences.
101 | 
102 |     Args:
103 |         llm_output: The raw string output from the LLM.
104 |         expected_schema: Optional schema for basic validation (not implemented yet).
105 | 
106 |     Returns:
107 |         The parsed dictionary or None if parsing fails or input is None.
108 |     """
109 |     if llm_output is None:
110 |         # print("Debug: parse_llm_json_output received None input.") # Optional debug print
111 |         return None
112 | 
113 |     # Regex to find JSON within ```json ... ``` blocks
114 |     match = re.search(r"```json\s*(\{.*?\})\s*```", llm_output, re.DOTALL)
115 | 
116 |     json_string = None
117 |     if match:
118 |         json_string = match.group(1).strip()
119 |         # print("Debug: Found JSON within ```json fences.") # Optional debug print
120 |     else:
121 |         # print("Debug: No ```json fences found. Checking if entire string is JSON.") # Optional debug print
122 |         # Fallback: Check if the entire string is valid JSON (maybe without fences)
123 |         # Be cautious with this fallback as LLMs often add extra text
124 |         temp_string = llm_output.strip()
125 |         if temp_string.startswith("{") and temp_string.endswith("}"):
126 |             # print("Debug: Entire string looks like JSON.") # Optional debug print
127 |             json_string = temp_string
128 |         # else: # Removed risky loose brace finding
129 | 
130 |     if json_string:
131 |         try:
132 |             parsed_json = json.loads(json_string)
133 |             if isinstance(parsed_json, dict):
134 |                 # Optional: Add basic schema validation here if needed
135 |                 # if expected_schema and not all(key in parsed_json for key in expected_schema):
136 |                 #     print("Warning: Parsed JSON missing expected keys.")
137 |                 #     return None # Or handle differently
138 |                 # print("Debug: Successfully parsed JSON string into dict.") # Optional debug print
139 |                 return parsed_json
140 |             else:
141 |                 print(f"Warning: Parsed JSON is not a dictionary: {type(parsed_json)}")
142 |                 return None
143 |         except json.JSONDecodeError as e:
144 |             print(
145 |                 f"ERROR: Failed to decode JSON: {e}\nInput string (first 500 chars): {json_string[:500]}..."
146 |             )
147 |             return None
148 |         except Exception as e:
149 |             print(f"ERROR: Unexpected error parsing JSON: {e}")
150 |             return None
151 |     else:
152 |         # print(f"Warning: No valid JSON structure found in LLM output (first 500 chars): {llm_output[:500]}...") # Keep this warning
153 |         return None
154 | 


--------------------------------------------------------------------------------
/apps/frontend/src/composables/useTableOfContents.ts:
--------------------------------------------------------------------------------
  1 | export interface TocItem {
  2 |   id: string;
  3 |   text: string;
  4 |   level: number;
  5 | }
  6 | 
  7 | export interface UseTableOfContentsOptions {
  8 |   contentRef: Ref<HTMLElement | null>;
  9 |   headerOffset?: number;
 10 |   selectors?: string; // e.g., 'h2, h3, u > strong'
 11 | }
 12 | 
 13 | const DEFAULT_HEADER_OFFSET = 80;
 14 | const DEFAULT_SELECTORS = 'h2, h3, u > strong';
 15 | 
 16 | // Simple slugify, might need refinement depending on edge cases
 17 | const generateSlug = (text: string): string => {
 18 |   return text
 19 |     .toLowerCase()
 20 |     .replace(/[^a-z0-9\s-]/g, '') // Remove special chars except space/hyphen
 21 |     .trim()
 22 |     .replace(/\s+/g, '-') // Replace spaces with hyphens
 23 |     .slice(0, 50); // Limit length
 24 | };
 25 | 
 26 | export function useTableOfContents({
 27 |   contentRef,
 28 |   headerOffset = DEFAULT_HEADER_OFFSET,
 29 |   selectors = DEFAULT_SELECTORS,
 30 | }: UseTableOfContentsOptions) {
 31 |   const tocItems = ref<TocItem[]>([]);
 32 |   const activeHeadingId = ref<string | null>(null);
 33 |   const mobileMenuOpen = ref(false); // Keep mobile state here if tied to TOC display
 34 | 
 35 |   let observer: IntersectionObserver | null = null;
 36 | 
 37 |   const generateToc = () => {
 38 |     if (!contentRef.value) return;
 39 | 
 40 |     const elements = contentRef.value.querySelectorAll(selectors);
 41 |     const newTocItems: TocItem[] = [];
 42 |     const observedElements: Element[] = []; // Keep track of elements to observe
 43 | 
 44 |     elements.forEach((el, index) => {
 45 |       let level: number;
 46 |       const text = el.textContent?.trim() || '';
 47 |       let targetElement: HTMLElement = el as HTMLElement;
 48 | 
 49 |       if (el.tagName === 'H2') level = 2;
 50 |       else if (el.tagName === 'H3') level = 3;
 51 |       else if (el.tagName === 'STRONG' && el.parentElement?.tagName === 'U') {
 52 |         level = 5; // Special level for topics
 53 |         targetElement = el.parentElement; // Target the <u> tag
 54 |       } else {
 55 |         return; // Skip unrecognized elements
 56 |       }
 57 | 
 58 |       // Ensure unique ID even if slug is identical
 59 |       const id = `${level === 5 ? 'topic' : 'section'}-${index}-${generateSlug(text)}`;
 60 | 
 61 |       if (text && targetElement) {
 62 |         targetElement.id = id; // Assign ID
 63 |         newTocItems.push({ id, text, level });
 64 |         observedElements.push(targetElement); // Add element for intersection observer
 65 |       }
 66 |     });
 67 | 
 68 |     tocItems.value = newTocItems;
 69 |     setupIntersectionObserver(observedElements); // Setup observer after generating TOC
 70 |   };
 71 | 
 72 |   const setupIntersectionObserver = (elements: Element[]) => {
 73 |     // Disconnect previous observer if exists
 74 |     if (observer) {
 75 |       observer.disconnect();
 76 |     }
 77 | 
 78 |     // Observer options: trigger when heading is near the top of the viewport
 79 |     const options = {
 80 |       rootMargin: `-${headerOffset - 1}px 0px -${window.innerHeight - headerOffset - 50}px 0px`, // Adjust bottom margin as needed
 81 |       threshold: 0, // Trigger as soon as any part enters/leaves the rootMargin
 82 |     };
 83 | 
 84 |     observer = new IntersectionObserver(entries => {
 85 |       // Find the topmost visible entry
 86 |       let topmostVisibleEntry: IntersectionObserverEntry | null = null;
 87 |       entries.forEach(entry => {
 88 |         if (entry.isIntersecting) {
 89 |           // Prioritize the entry closest to the top boundary defined by rootMargin
 90 |           if (!topmostVisibleEntry || entry.boundingClientRect.top < topmostVisibleEntry.boundingClientRect.top) {
 91 |             topmostVisibleEntry = entry;
 92 |           }
 93 |         }
 94 |       });
 95 | 
 96 |       if (topmostVisibleEntry) {
 97 |         activeHeadingId.value = (topmostVisibleEntry as IntersectionObserverEntry).target.id;
 98 |       } else {
 99 |         // If no entry is intersecting within the top margin, check if we scrolled past the first item
100 |         if (tocItems.value.length > 0 && window.scrollY > document.getElementById(tocItems.value[0].id)!.offsetTop) {
101 |           // Potentially keep the last active ID, or find the last item scrolled past
102 |           // For simplicity, let's just keep the *last* one that *was* active if nothing is currently in the top zone
103 |           // activeHeadingId.value remains unchanged unless explicitly cleared or updated
104 |         } else {
105 |           // Scrolled to the very top above the first item
106 |           activeHeadingId.value = null;
107 |         }
108 |       }
109 |     }, options);
110 | 
111 |     elements.forEach(el => observer!.observe(el));
112 |   };
113 | 
114 |   // Computed property for the "current section name" shown in mobile/dropdown
115 |   const currentSectionName = computed(() => {
116 |     if (!activeHeadingId.value) {
117 |       return 'on this page'; // Default text
118 |     }
119 |     const activeItem = tocItems.value.find(item => item.id === activeHeadingId.value);
120 |     // Maybe find the parent H2 if the active item is H3/topic? Depends on desired UX.
121 |     // For now, just use the active item's text.
122 |     return activeItem ? activeItem.text.toLowerCase() : 'on this page';
123 |   });
124 | 
125 |   const scrollToSection = (id: string) => {
126 |     const el = document.getElementById(id);
127 |     if (el) {
128 |       const elementPosition = el.getBoundingClientRect().top;
129 |       const offsetPosition = elementPosition + window.pageYOffset - headerOffset;
130 |       window.scrollTo({ top: offsetPosition, behavior: 'smooth' });
131 |       mobileMenuOpen.value = false; // Close mobile menu on selection
132 |     }
133 |   };
134 | 
135 |   onMounted(() => {
136 |     // Ensure DOM is ready before querying elements
137 |     nextTick(() => {
138 |       generateToc();
139 |     });
140 |   });
141 | 
142 |   onUnmounted(() => {
143 |     if (observer) {
144 |       observer.disconnect();
145 |     }
146 |   });
147 | 
148 |   // Optional: Watch for content changes if the article content could be dynamic
149 |   // watch(contentRef, () => { nextTick(generateToc); });
150 | 
151 |   return {
152 |     tocItems,
153 |     activeHeadingId,
154 |     currentSectionName,
155 |     mobileMenuOpen,
156 |     generateToc, // Expose if manual regeneration is needed
157 |     scrollToSection,
158 |   };
159 | }
160 | 


--------------------------------------------------------------------------------
/apps/backend/src/lib/articleFetchers.ts:
--------------------------------------------------------------------------------
  1 | import { err, ok } from 'neverthrow';
  2 | import { z } from 'zod';
  3 | import type { Env } from '../index';
  4 | import { parseArticle } from './parsers';
  5 | import { tryCatchAsync } from './tryCatchAsync';
  6 | import { userAgents } from './utils';
  7 | 
  8 | /**
  9 |  * Schema for validating responses from the Cloudflare Browser Rendering API
 10 |  */
 11 | export const articleSchema = z.object({
 12 |   status: z.coerce.boolean(),
 13 |   errors: z.array(z.object({ code: z.number(), message: z.string() })).optional(),
 14 |   result: z.string(),
 15 | });
 16 | 
 17 | /**
 18 |  * Fetches an article using Cloudflare's Browser Rendering API
 19 |  *
 20 |  * This method simulates a real browser to handle modern websites with complex
 21 |  * JavaScript, cookie consent walls, paywalls, and other obstacles that might
 22 |  * prevent content scraping with a regular HTTP client.
 23 |  *
 24 |  * @param env Application environment with Cloudflare credentials
 25 |  * @param url URL of the article to fetch
 26 |  * @returns Result containing either the parsed article content or an error object
 27 |  */
 28 | export async function getArticleWithBrowser(env: Env, url: string) {
 29 |   const response = await tryCatchAsync(
 30 |     fetch(`https://api.cloudflare.com/client/v4/accounts/${env.CLOUDFLARE_ACCOUNT_ID}/browser-rendering/content`, {
 31 |       method: 'POST',
 32 |       headers: {
 33 |         'Content-Type': 'application/json',
 34 |         Authorization: `Bearer ${env.CLOUDFLARE_API_TOKEN}`,
 35 |       },
 36 |       body: JSON.stringify({
 37 |         url,
 38 |         userAgent: userAgents[Math.floor(Math.random() * userAgents.length)],
 39 |         setExtraHTTPHeaders: {
 40 |           Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
 41 |           'Accept-Encoding': 'gzip, deflate, br',
 42 |           Connection: 'keep-alive',
 43 |           DNT: '1',
 44 |           'Accept-Language': 'en-US,en;q=0.5',
 45 |           'Sec-Fetch-Dest': 'document',
 46 |           'Sec-Fetch-Mode': 'navigate',
 47 |           'Sec-Fetch-Site': 'none',
 48 |           'Sec-Fetch-User': '?1',
 49 |           'Upgrade-Insecure-Requests': '1',
 50 |         },
 51 |         cookies: [],
 52 |         gotoOptions: {
 53 |           waitUntil: 'networkidle0',
 54 |           timeout: 30000,
 55 |           referer: 'https://www.google.com/',
 56 |         },
 57 |         viewport: {
 58 |           width: 390,
 59 |           height: 844,
 60 |           deviceScaleFactor: 3,
 61 |           isMobile: true,
 62 |           hasTouch: true,
 63 |           isLandscape: false,
 64 |         },
 65 |         rejectResourceTypes: ['image', 'media', 'font', 'websocket'],
 66 |         bestAttempt: true,
 67 |         // all of these are very brittle, like all script tag usage
 68 |         // this mostly works for now but good to revisit every once in a while
 69 |         addScriptTag: [
 70 |           // Ensures consistent date formatting by overriding Intl.DateTimeFormat
 71 |           // to always use 'en-US' locale regardless of browser settings
 72 |           // This prevents inconsistent date parsing across different environments
 73 |           {
 74 |             content:
 75 |               "(() => { Object.defineProperty(Intl, 'DateTimeFormat', { \n    writable: true, \n    value: new Proxy(Intl.DateTimeFormat, { \n      construct: (target, args) => new target('en-US', Object.assign({}, args[1])) \n    })\n  }); })();",
 76 |           },
 77 |           // Automatically accepts cookie consent popups by finding buttons that contain
 78 |           // 'accept' and 'cookie'/'consent' text, then programmatically clicking the first match
 79 |           // This bypasses cookie walls that would otherwise block content access
 80 |           {
 81 |             content:
 82 |               "(() => { const cookieButtons = Array.from(document.querySelectorAll(\'button, a\')).filter(el => el.textContent.toLowerCase().includes(\'accept\') && (el.textContent.toLowerCase().includes(\'cookie\') || el.textContent.toLowerCase().includes(\'consent\'))); if(cookieButtons.length > 0) { cookieButtons[0].click(); } })();",
 83 |           },
 84 |           // Circumvents paywalls by:
 85 |           // 1. Removing elements with paywall/subscribe identifiers in id/class
 86 |           // 2. Removing modal overlays and fixed position barriers
 87 |           // 3. Restoring normal page scroll behavior
 88 |           // This targets common paywall implementations across various sites
 89 |           {
 90 |             content:
 91 |               "(() => { const paywallElements = Array.from(document.querySelectorAll(\'div, section\')).filter(el => el.id.toLowerCase().includes(\'paywall\') || el.className.toLowerCase().includes(\'paywall\') || el.id.toLowerCase().includes(\'subscribe\') || el.className.toLowerCase().includes(\'subscribe\')); paywallElements.forEach(el => el.remove()); document.querySelectorAll(\'.modal, .modal-backdrop, body > div[style*=\"position: fixed\"]\').forEach(el => el.remove()); document.body.style.overflow = \'auto\'; })();",
 92 |           },
 93 |           // Cleans up the DOM by removing non-content elements that interfere with article parsing:
 94 |           // - Scripts, styles, iframes that might contain tracking or ads
 95 |           // - Ad containers and advertisement blocks
 96 |           // - Social media widgets and sharing buttons
 97 |           // - Comments sections, navbars, headers, footers (except those within articles)
 98 |           // - Various UI elements not relevant to the core article content
 99 |           {
100 |             content:
101 |               '(() => { document.querySelectorAll(\'script, style, iframe, .ad, .ads, .advertisement, [class*="social"], [id*="social"], .share, .comments, aside, nav, header:not(article header), footer:not(article footer), [role="complementary"], [role="banner"], [role="navigation"], form, .related, .recommended, .newsletter, .subscription\').forEach(el => el.remove()); })();',
102 |           },
103 |           // Simplifies the DOM by stripping all HTML attributes except essential ones:
104 |           // - href: preserves links
105 |           // - src: maintains images and embedded content
106 |           // - alt: keeps accessibility text for images
107 |           // - title: retains tooltip text
108 |           // This reduces noise and potential tracking parameters in the parsed content
109 |           {
110 |             content:
111 |               "(() => { const keepAttributes = [\'href\', \'src\', \'alt\', \'title\']; document.querySelectorAll(\'*\').forEach(el => { [...el.attributes].forEach(attr => { if (!keepAttributes.includes(attr.name.toLowerCase())) { el.removeAttribute(attr.name); }}); }); })();",
112 |           },
113 |           // Recursively removes empty elements to clean up the DOM structure
114 |           // Continues removing elements until no more empty ones are found
115 |           // This eliminates spacing artifacts and layout containers that serve no content purpose
116 |           {
117 |             content:
118 |               "(() => { function removeEmpty() { let removed = 0; document.querySelectorAll(\'div, span, p, section, article\').forEach(el => { if (!el.hasChildNodes() || el.textContent.trim() === \'\') { el.remove(); removed++; } }); return removed; } let pass; do { pass = removeEmpty(); } while(pass > 0); })();",
119 |           },
120 |           // Removes simple meta tags that provide minimal information value
121 |           // Meta tags with only one attribute are typically not useful for content analysis
122 |           // This helps reduce noise in the document head
123 |           {
124 |             content:
125 |               "(() => { document.querySelectorAll(\'meta\').forEach(meta => { if (meta.attributes.length <= 1) { meta.remove(); } }); })();",
126 |           },
127 |         ],
128 |         waitForSelector: {
129 |           selector: 'article, .article, .content, .post, #article, main',
130 |           timeout: 5000,
131 |         },
132 |       }),
133 |     })
134 |   );
135 |   if (response.isErr()) {
136 |     return err({ type: 'FETCH_ERROR', error: response.error });
137 |   }
138 | 
139 |   const parsedPageContent = articleSchema.safeParse(await response.value.json());
140 |   if (parsedPageContent.success === false) {
141 |     return err({ type: 'VALIDATION_ERROR', error: parsedPageContent.error });
142 |   }
143 | 
144 |   const articleResult = parseArticle({ html: parsedPageContent.data.result });
145 |   if (articleResult.isErr()) {
146 |     return err({ type: 'PARSE_ERROR', error: articleResult.error });
147 |   }
148 | 
149 |   return ok(articleResult.value);
150 | }
151 | 
152 | /**
153 |  * Fetches an article using a simple HTTP request
154 |  *
155 |  * This is a lighter-weight alternative to browser rendering that works for
156 |  * simpler websites that don't rely heavily on client-side JavaScript for content.
157 |  *
158 |  * @param url URL of the article to fetch
159 |  * @returns Result containing either the parsed article content or an error object
160 |  */
161 | export async function getArticleWithFetch(url: string) {
162 |   const response = await tryCatchAsync(
163 |     fetch(url, {
164 |       method: 'GET',
165 |       headers: {
166 |         'User-Agent': userAgents[Math.floor(Math.random() * userAgents.length)],
167 |         Referer: 'https://www.google.com/',
168 |       },
169 |     })
170 |   );
171 |   if (response.isErr()) {
172 |     return err({ type: 'FETCH_ERROR', error: response.error });
173 |   }
174 | 
175 |   const articleResult = parseArticle({ html: await response.value.text() });
176 |   if (articleResult.isErr()) {
177 |     return err({ type: 'PARSE_ERROR', error: articleResult.error });
178 |   }
179 | 
180 |   return ok(articleResult.value);
181 | }
182 | 


--------------------------------------------------------------------------------
/apps/backend/test/rateLimiter.spec.ts:
--------------------------------------------------------------------------------
  1 | import type { WorkflowStep } from 'cloudflare:workers';
  2 | import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
  3 | import { DomainRateLimiter } from '../src/lib/rateLimiter';
  4 | 
  5 | type BatchItem = {
  6 |   id: number | string;
  7 |   url: string;
  8 | };
  9 | 
 10 | describe('DomainRateLimiter', () => {
 11 |   // Mock 'step.sleep' to track calls and resolve immediately or after checking delays
 12 |   let mockSleep: ReturnType<typeof vi.fn>;
 13 |   let step: WorkflowStep; // Mocked step object
 14 |   let rateLimiter: DomainRateLimiter<BatchItem>;
 15 |   let processItem: ReturnType<typeof vi.fn>;
 16 | 
 17 |   beforeEach(() => {
 18 |     mockSleep = vi.fn().mockImplementation((reason, ms) => {
 19 |       // Advance time when sleep is called
 20 |       vi.advanceTimersByTime(ms);
 21 |       return Promise.resolve();
 22 |     });
 23 |     step = { sleep: mockSleep } as unknown as WorkflowStep;
 24 |     // Setup rateLimiter with specific options for testing
 25 |     rateLimiter = new DomainRateLimiter({ maxConcurrent: 2, globalCooldownMs: 100, domainCooldownMs: 200 });
 26 |     processItem = vi.fn().mockImplementation(async (item: BatchItem) => `processed-${item.id}`);
 27 | 
 28 |     // Mock Date.now() to control time
 29 |     vi.useFakeTimers();
 30 |     vi.setSystemTime(new Date(2023, 1, 1, 0, 0, 0));
 31 |   });
 32 | 
 33 |   afterEach(() => {
 34 |     vi.useRealTimers();
 35 |   });
 36 | 
 37 |   it('should process all items concurrently if limits are not hit', async () => {
 38 |     const items = [
 39 |       { id: 1, url: 'https://domain1.com/page1' },
 40 |       { id: 2, url: 'https://domain2.com/page1' },
 41 |     ];
 42 | 
 43 |     const results = await rateLimiter.processBatch(items, step, processItem);
 44 | 
 45 |     expect(results).toHaveLength(2);
 46 |     expect(results).toEqual(['processed-1', 'processed-2']);
 47 |     expect(processItem).toHaveBeenCalledTimes(2);
 48 |     expect(mockSleep).not.toHaveBeenCalled();
 49 |   });
 50 | 
 51 |   it('should not exceed maxConcurrent processing simultaneously', async () => {
 52 |     const items = [
 53 |       { id: 1, url: 'https://domain1.com/page1' },
 54 |       { id: 2, url: 'https://domain2.com/page1' },
 55 |       { id: 3, url: 'https://domain3.com/page1' },
 56 |     ];
 57 | 
 58 |     await rateLimiter.processBatch(items, step, processItem);
 59 | 
 60 |     // Should process first two items concurrently (maxConcurrent: 2), then apply global cooldown
 61 |     expect(mockSleep).toHaveBeenCalledWith(expect.any(String), 100);
 62 |     expect(processItem).toHaveBeenCalledTimes(3);
 63 |   });
 64 | 
 65 |   it('should call step.sleep for globalCooldownMs between batches if needed', async () => {
 66 |     const items = [
 67 |       { id: 1, url: 'https://domain1.com/page1' },
 68 |       { id: 2, url: 'https://domain2.com/page1' },
 69 |       { id: 3, url: 'https://domain3.com/page1' },
 70 |       { id: 4, url: 'https://domain4.com/page1' },
 71 |       { id: 5, url: 'https://domain5.com/page1' },
 72 |     ];
 73 | 
 74 |     await rateLimiter.processBatch(items, step, processItem);
 75 | 
 76 |     // Should have 3 batches: 2 items, 2 items, 1 item
 77 |     // Sleep should be called twice for global cooldown between batches
 78 |     expect(mockSleep).toHaveBeenCalledTimes(2);
 79 |     expect(mockSleep).toHaveBeenCalledWith(expect.stringContaining('global rate limit'), 100);
 80 |   });
 81 | 
 82 |   it('should call step.sleep for domainCooldownMs if processing the same domain twice quickly', async () => {
 83 |     const items = [
 84 |       { id: 1, url: 'https://domain1.com/page1' },
 85 |       { id: 2, url: 'https://domain1.com/page2' }, // Same domain
 86 |     ];
 87 | 
 88 |     // Process first item
 89 |     await rateLimiter.processBatch([items[0]], step, processItem);
 90 | 
 91 |     // Reset mock to track calls separately
 92 |     mockSleep.mockClear();
 93 |     processItem.mockClear();
 94 | 
 95 |     // Advance time but not enough to clear domain cooldown
 96 |     vi.advanceTimersByTime(100);
 97 | 
 98 |     // Process second item
 99 |     await rateLimiter.processBatch([items[1]], step, processItem);
100 | 
101 |     // Should wait for domain cooldown
102 |     expect(mockSleep).toHaveBeenCalledWith(expect.stringContaining('waiting for domain cooldown'), expect.any(Number));
103 |   });
104 | 
105 |   it('should allow different domains to be processed concurrently without domain cooldown', async () => {
106 |     const items = [
107 |       { id: 1, url: 'https://domain1.com/page1' },
108 |       { id: 2, url: 'https://domain2.com/page1' },
109 |     ];
110 | 
111 |     await rateLimiter.processBatch(items, step, processItem);
112 | 
113 |     // Should process both concurrently without domain cooldown
114 |     expect(processItem).toHaveBeenCalledTimes(2);
115 |     expect(mockSleep).not.toHaveBeenCalled();
116 |   });
117 | 
118 |   it('should skip items with invalid URLs without throwing an error', async () => {
119 |     const items = [
120 |       { id: 1, url: 'https://domain1.com/page1' },
121 |       { id: 2, url: 'invalid-url' }, // Invalid URL
122 |     ];
123 | 
124 |     const results = await rateLimiter.processBatch(items, step, processItem);
125 | 
126 |     // Should only process valid URLs
127 |     expect(results).toHaveLength(1);
128 |     expect(results).toEqual(['processed-1']);
129 |     expect(processItem).toHaveBeenCalledTimes(1);
130 |   });
131 | 
132 |   it('should call step.sleep with calculated wait time if all available items are domain-limited', async () => {
133 |     // Process first item
134 |     await rateLimiter.processBatch([{ id: 1, url: 'https://domain1.com/page1' }], step, processItem);
135 | 
136 |     // Reset mocks
137 |     mockSleep.mockClear();
138 |     processItem.mockClear();
139 | 
140 |     // Advance time to 100ms
141 |     vi.advanceTimersByTime(100);
142 | 
143 |     // Try to process the same domain again (should need to wait 100ms more)
144 |     await rateLimiter.processBatch([{ id: 2, url: 'https://domain1.com/page2' }], step, processItem);
145 | 
146 |     // Should wait for remaining time on domain cooldown (200ms - 100ms = 100ms)
147 |     expect(mockSleep).toHaveBeenCalledWith(expect.stringContaining('waiting for domain cooldown'), expect.any(Number));
148 | 
149 |     // Should eventually process the item
150 |     expect(processItem).toHaveBeenCalledTimes(1);
151 |   });
152 | 
153 |   it('should call the processItem function with the correct item and extracted domain', async () => {
154 |     const item = { id: 1, url: 'https://example.com/page1' };
155 | 
156 |     await rateLimiter.processBatch([item], step, processItem);
157 | 
158 |     expect(processItem).toHaveBeenCalledWith(item, 'example.com');
159 |   });
160 | 
161 |   it('should return results for all successfully processed items', async () => {
162 |     const items = [
163 |       { id: 1, url: 'https://domain1.com/page1' },
164 |       { id: 2, url: 'https://domain2.com/page1' },
165 |     ];
166 | 
167 |     const results = await rateLimiter.processBatch(items, step, processItem);
168 | 
169 |     expect(results).toEqual(['processed-1', 'processed-2']);
170 |   });
171 | 
172 |   it('should handle errors during processItem gracefully and continue processing others', async () => {
173 |     const items = [
174 |       { id: 1, url: 'https://domain1.com/page1' },
175 |       { id: 2, url: 'https://domain2.com/page1' },
176 |     ];
177 | 
178 |     // Make the first item fail
179 |     processItem.mockImplementation(async (item: BatchItem) => {
180 |       if (item.id === 1) throw new Error('Processing failed');
181 |       return `processed-${item.id}`;
182 |     });
183 | 
184 |     const results = await rateLimiter.processBatch(items, step, processItem);
185 | 
186 |     // Should have only the successful result
187 |     expect(results).toEqual(['processed-2']);
188 |     expect(processItem).toHaveBeenCalledTimes(2);
189 |   });
190 | 
191 |   it('should update internal lastDomainAccess times correctly', async () => {
192 |     const items = [
193 |       { id: 1, url: 'https://domain1.com/page1' },
194 |       { id: 2, url: 'https://domain1.com/page2' }, // Same domain
195 |     ];
196 | 
197 |     // Process first item
198 |     await rateLimiter.processBatch([items[0]], step, processItem);
199 | 
200 |     // Advance time past domain cooldown
201 |     vi.advanceTimersByTime(250);
202 | 
203 |     // Reset mock to track calls separately
204 |     mockSleep.mockClear();
205 |     processItem.mockClear();
206 | 
207 |     // Process second item
208 |     await rateLimiter.processBatch([items[1]], step, processItem);
209 | 
210 |     // Should not wait for domain cooldown since time has advanced past cooldown period
211 |     expect(mockSleep).not.toHaveBeenCalled();
212 |     expect(processItem).toHaveBeenCalledTimes(1);
213 |   });
214 | 
215 |   it('should only wait for cooldowns of domains with pending items', async () => {
216 |     // First, process items from two different domains
217 |     await rateLimiter.processBatch(
218 |       [
219 |         { id: 1, url: 'https://domain1.com/page1' },
220 |         { id: 2, url: 'https://domain2.com/page1' },
221 |       ],
222 |       step,
223 |       processItem
224 |     );
225 | 
226 |     // Reset mocks
227 |     mockSleep.mockClear();
228 |     processItem.mockClear();
229 | 
230 |     // Advance time partially through cooldown period
231 |     vi.advanceTimersByTime(50);
232 | 
233 |     // Set up domain1 with a much longer remaining cooldown (by manipulating lastDomainAccess)
234 |     // @ts-expect-error accessing private property for testing
235 |     rateLimiter.lastDomainAccess.set('domain1.com', Date.now());
236 | 
237 |     // Now process only domain2 item
238 |     await rateLimiter.processBatch([{ id: 3, url: 'https://domain2.com/page2' }], step, processItem);
239 | 
240 |     // Should wait for domain2's cooldown (150ms) not domain1's longer cooldown (200ms)
241 |     expect(mockSleep).toHaveBeenCalledWith(expect.stringContaining('waiting for domain cooldown'), expect.any(Number));
242 | 
243 |     // Verify the wait time is for domain2 not domain1
244 |     const sleepTime = mockSleep.mock.calls[0][1];
245 |     expect(sleepTime).toBe(500); // There's a minimum 500ms wait time enforced in the code
246 | 
247 |     // Should eventually process the item
248 |     expect(processItem).toHaveBeenCalledTimes(1);
249 |   });
250 | });
251 | 


--------------------------------------------------------------------------------
/apps/backend/src/routers/durableObjects.router.ts:
--------------------------------------------------------------------------------
  1 | import { zValidator } from '@hono/zod-validator';
  2 | import { $data_sources, $ingested_items, eq, isNull } from '@meridian/database';
  3 | import { Hono } from 'hono';
  4 | import { z } from 'zod';
  5 | import type { HonoEnv } from '../app';
  6 | import { Logger } from '../lib/logger';
  7 | import { tryCatchAsync } from '../lib/tryCatchAsync';
  8 | import { getDb, hasValidAuthToken } from '../lib/utils';
  9 | 
 10 | const logger = new Logger({ router: 'durable-objects' });
 11 | 
 12 | const route = new Hono<HonoEnv>()
 13 |   // handle DO-specific routes
 14 |   .get(
 15 |     '/source/:sourceId/*',
 16 |     zValidator(
 17 |       'param',
 18 |       z.object({
 19 |         sourceId: z.string().min(1, 'Source ID is required'),
 20 |       })
 21 |     ),
 22 |     async c => {
 23 |       const { sourceId } = c.req.valid('param');
 24 |       const doId = c.env.DATA_SOURCE_INGESTOR.idFromName(decodeURIComponent(sourceId));
 25 |       const stub = c.env.DATA_SOURCE_INGESTOR.get(doId);
 26 | 
 27 |       // reconstruct path for the DO
 28 |       const url = new URL(c.req.url);
 29 |       const pathParts = url.pathname.split('/');
 30 |       const doPath = `/${pathParts.slice(4).join('/')}`;
 31 |       const doUrl = new URL(doPath + url.search, 'http://do');
 32 | 
 33 |       const doRequest = new Request(doUrl.toString(), c.req.raw);
 34 |       return stub.fetch(doRequest);
 35 |     }
 36 |   )
 37 |   // admin endpoints
 38 |   .post(
 39 |     '/admin/source/:sourceId/init',
 40 |     zValidator(
 41 |       'param',
 42 |       z.object({
 43 |         sourceId: z.string().min(1, 'Source ID is required'),
 44 |       })
 45 |     ),
 46 |     async c => {
 47 |       // auth check
 48 |       if (!hasValidAuthToken(c)) {
 49 |         return c.json({ error: 'Unauthorized' }, 401);
 50 |       }
 51 | 
 52 |       const initLogger = logger.child({ operation: 'init-source' });
 53 |       const { sourceId } = c.req.valid('param');
 54 | 
 55 |       const db = getDb(c.env.HYPERDRIVE);
 56 | 
 57 |       // Get the source first
 58 |       const sourceResult = await tryCatchAsync(
 59 |         db.query.$data_sources.findFirst({
 60 |           where: eq($data_sources.id, Number(sourceId)),
 61 |         })
 62 |       );
 63 | 
 64 |       if (sourceResult.isErr()) {
 65 |         const error = sourceResult.error instanceof Error ? sourceResult.error : new Error(String(sourceResult.error));
 66 |         initLogger.error('Failed to fetch source', { sourceId }, error);
 67 |         return c.json({ error: 'Failed to fetch source' }, 500);
 68 |       }
 69 | 
 70 |       const source = sourceResult.value;
 71 |       if (!source) {
 72 |         return c.json({ error: 'Source not found' }, 404);
 73 |       }
 74 | 
 75 |       // Initialize the DO
 76 |       const doId = c.env.DATA_SOURCE_INGESTOR.idFromName(source.config.config.url);
 77 |       const stub = c.env.DATA_SOURCE_INGESTOR.get(doId);
 78 | 
 79 |       const initResult = await tryCatchAsync(
 80 |         stub.initialize({
 81 |           id: source.id,
 82 |           source_type: source.source_type,
 83 |           config: source.config,
 84 |           config_version_hash: source.config_version_hash,
 85 |           scrape_frequency_tier: source.scrape_frequency_minutes,
 86 |         })
 87 |       );
 88 |       if (initResult.isErr()) {
 89 |         const error = initResult.error instanceof Error ? initResult.error : new Error(String(initResult.error));
 90 |         initLogger.error('Failed to initialize source DO', { sourceId, url: source.config.config.url }, error);
 91 |         return c.json({ error: 'Failed to initialize source DO' }, 500);
 92 |       }
 93 | 
 94 |       initLogger.info('Successfully initialized source DO', { sourceId, url: source.config.config.url });
 95 |       return c.json({ success: true });
 96 |     }
 97 |   )
 98 |   .post('/admin/initialize-dos', async c => {
 99 |     // auth check
100 |     if (!hasValidAuthToken(c)) {
101 |       return c.json({ error: 'Unauthorized' }, 401);
102 |     }
103 | 
104 |     const initLogger = logger.child({ operation: 'initialize-dos' });
105 |     initLogger.info('Initializing SourceScraperDOs from database');
106 | 
107 |     const db = getDb(c.env.HYPERDRIVE);
108 | 
109 |     // Get batch size from query params, default to 100
110 |     const batchSize = Number(c.req.query('batchSize')) || 100;
111 |     initLogger.info('Using batch size', { batchSize });
112 | 
113 |     const allSourcesResult = await tryCatchAsync(
114 |       db
115 |         .select({
116 |           id: $data_sources.id,
117 |           source_type: $data_sources.source_type,
118 |           config: $data_sources.config,
119 |           config_version_hash: $data_sources.config_version_hash,
120 |           scrape_frequency_tier: $data_sources.scrape_frequency_minutes,
121 |         })
122 |         .from($data_sources)
123 |         .where(isNull($data_sources.do_initialized_at))
124 |     );
125 |     if (allSourcesResult.isErr()) {
126 |       const error =
127 |         allSourcesResult.error instanceof Error ? allSourcesResult.error : new Error(String(allSourcesResult.error));
128 |       initLogger.error('Failed to fetch sources from database', undefined, error);
129 |       return c.json({ error: 'Failed to fetch sources from database' }, 500);
130 |     }
131 | 
132 |     const allSources = allSourcesResult.value;
133 |     initLogger.info('Sources fetched from database', { source_count: allSources.length });
134 | 
135 |     // Process sources in batches
136 |     let processedCount = 0;
137 |     let successCount = 0;
138 | 
139 |     // Create batches of sources
140 |     const batches = [];
141 |     for (let i = 0; i < allSources.length; i += batchSize) {
142 |       batches.push(allSources.slice(i, i + batchSize));
143 |     }
144 | 
145 |     // Process each batch sequentially
146 |     for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) {
147 |       const batch = batches[batchIndex];
148 |       initLogger.info('Processing batch', { batchIndex: batchIndex + 1, batchSize: batch.length });
149 | 
150 |       const batchResults = await Promise.all(
151 |         batch.map(async source => {
152 |           const sourceLogger = initLogger.child({ source_id: source.id, url: source.config.config.url });
153 |           const doId = c.env.DATA_SOURCE_INGESTOR.idFromName(source.config.config.url);
154 |           const stub = c.env.DATA_SOURCE_INGESTOR.get(doId);
155 | 
156 |           sourceLogger.debug('Initializing DO');
157 |           const result = await tryCatchAsync(stub.initialize(source));
158 |           if (result.isErr()) {
159 |             const error = result.error instanceof Error ? result.error : new Error(String(result.error));
160 |             sourceLogger.error('Failed to initialize DO', undefined, error);
161 |             return false;
162 |           }
163 | 
164 |           sourceLogger.debug('Successfully initialized DO');
165 |           return true;
166 |         })
167 |       );
168 | 
169 |       processedCount += batch.length;
170 |       successCount += batchResults.filter(success => success).length;
171 | 
172 |       initLogger.info('Batch completed', {
173 |         batchIndex: batchIndex + 1,
174 |         batchSuccessful: batchResults.filter(success => success).length,
175 |         totalProcessed: processedCount,
176 |         totalSuccessful: successCount,
177 |       });
178 |     }
179 | 
180 |     initLogger.info('Initialization process complete', { total: allSources.length, successful: successCount });
181 |     return c.json({ initialized: successCount, total: allSources.length });
182 |   })
183 |   .delete(
184 |     '/admin/source/:sourceId',
185 |     zValidator(
186 |       'param',
187 |       z.object({
188 |         sourceId: z.string().min(1, 'Source ID is required'),
189 |       })
190 |     ),
191 |     async c => {
192 |       // auth check
193 |       if (!hasValidAuthToken(c)) {
194 |         return c.json({ error: 'Unauthorized' }, 401);
195 |       }
196 | 
197 |       const deleteLogger = logger.child({ operation: 'delete-source' });
198 |       const { sourceId } = c.req.valid('param');
199 | 
200 |       const db = getDb(c.env.HYPERDRIVE);
201 | 
202 |       // Get the source first to get its URL
203 |       const sourceResult = await tryCatchAsync(
204 |         db.query.$data_sources.findFirst({
205 |           where: eq($data_sources.id, Number(sourceId)),
206 |         })
207 |       );
208 | 
209 |       if (sourceResult.isErr()) {
210 |         const error = sourceResult.error instanceof Error ? sourceResult.error : new Error(String(sourceResult.error));
211 |         deleteLogger.error('Failed to fetch source', { sourceId }, error);
212 |         return c.json({ error: 'Failed to fetch source' }, 500);
213 |       }
214 | 
215 |       const source = sourceResult.value;
216 |       if (!source) {
217 |         return c.json({ error: 'Source not found' }, 404);
218 |       }
219 | 
220 |       // Delete the durable object first
221 |       const doId = c.env.DATA_SOURCE_INGESTOR.idFromName(source.config.config.url);
222 |       const stub = c.env.DATA_SOURCE_INGESTOR.get(doId);
223 | 
224 |       const deleteResult = await tryCatchAsync(
225 |         stub.fetch('http://do/delete', {
226 |           method: 'DELETE',
227 |         })
228 |       );
229 |       if (deleteResult.isErr()) {
230 |         const error = deleteResult.error instanceof Error ? deleteResult.error : new Error(String(deleteResult.error));
231 |         deleteLogger.error('Failed to delete source DO', { sourceId, url: source.config.config.url }, error);
232 |         return c.json({ error: 'Failed to delete source DO' }, 500);
233 |       }
234 | 
235 |       // Then delete from database
236 |       // delete the articles first
237 |       const articlesResult = await tryCatchAsync(
238 |         db.delete($ingested_items).where(eq($ingested_items.data_source_id, Number(sourceId)))
239 |       );
240 |       if (articlesResult.isErr()) {
241 |         const error =
242 |           articlesResult.error instanceof Error ? articlesResult.error : new Error(String(articlesResult.error));
243 |         deleteLogger.error('Failed to delete articles', { sourceId }, error);
244 |         return c.json({ error: 'Failed to delete articles' }, 500);
245 |       }
246 | 
247 |       const dbDeleteResult = await tryCatchAsync(
248 |         db.delete($data_sources).where(eq($data_sources.id, Number(sourceId)))
249 |       );
250 |       if (dbDeleteResult.isErr()) {
251 |         const error =
252 |           dbDeleteResult.error instanceof Error ? dbDeleteResult.error : new Error(String(dbDeleteResult.error));
253 |         deleteLogger.error('Failed to delete source from database', { sourceId }, error);
254 |         return c.json({ error: 'Failed to delete source from database' }, 500);
255 |       }
256 | 
257 |       deleteLogger.info('Successfully deleted source', { sourceId, url: source.config.config.url });
258 |       return c.json({ success: true });
259 |     }
260 |   );
261 | 
262 | export default route;
263 | 


--------------------------------------------------------------------------------
/apps/backend/test/fixtures/ft_com.xml:
--------------------------------------------------------------------------------
1 | 
2 | <?xml version="1.0" encoding="UTF-8"?><rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0"><channel><title><![CDATA[World]]></title><description><![CDATA[World]]></description><link>https://www.ft.com/stream/82645c31-4426-4ef5-99c9-9df6e0940c00</link><generator>RSS for Node</generator><lastBuildDate>Tue, 18 Mar 2025 23:53:48 GMT</lastBuildDate><atom:link href="https://www.ft.com/stream/82645c31-4426-4ef5-99c9-9df6e0940c00?format=rss" rel="self" type="application/rss+xml"/><copyright><![CDATA[© Copyright The Financial Times Ltd 2025. "FT" and "Financial Times" are trademarks of the Financial Times. See http://www.ft.com/servicestools/help/terms#legal1 for the terms and conditions of reuse.]]></copyright><language><![CDATA[en]]></language><webMaster><![CDATA[client.support@ft.com (FT Client Support)]]></webMaster><ttl>15</ttl><category><![CDATA[Newspapers]]></category><item><title><![CDATA[‘If Trump defies a Supreme Court order, will it matter to markets?’]]></title><description><![CDATA[Uncomfortable sell-side headlines for $1,000 please Alex]]></description><link>https://www.ft.com/content/2e579290-fc0c-4b88-8703-f0bae45266d9</link><guid isPermaLink="false">2e579290-fc0c-4b88-8703-f0bae45266d9</guid><pubDate>Tue, 18 Mar 2025 23:34:47 GMT</pubDate></item><item><title><![CDATA[Putin agrees 30-day halt to strikes on Ukrainian energy infrastructure in call with Trump]]></title><description><![CDATA[Russian president sticks to tough demands for overall ceasefire and end to war]]></description><link>https://www.ft.com/content/75b37ad2-0f35-4fe7-b3cf-a36b965c9a71</link><guid isPermaLink="false">75b37ad2-0f35-4fe7-b3cf-a36b965c9a71</guid><pubDate>Tue, 18 Mar 2025 23:17:18 GMT</pubDate></item><item><title><![CDATA[Why has Netanyahu renewed Israel’s offensive against Hamas?]]></title><description><![CDATA[Israeli premier had sought to change terms of ceasefire deal, but critics say he has a personal interest in prolonging the war]]></description><link>https://www.ft.com/content/f190b582-7b24-4e30-9d9a-024bff2c6f6f</link><guid isPermaLink="false">f190b582-7b24-4e30-9d9a-024bff2c6f6f</guid><pubDate>Tue, 18 Mar 2025 22:52:03 GMT</pubDate></item><item><title><![CDATA[US chief justice rebukes Trump after president’s threat to impeach judges]]></title><description><![CDATA[John Roberts issues rare statement to say attacks on judicial branch of government are not ‘appropriate’]]></description><link>https://www.ft.com/content/86b3b77a-2986-4f0f-9475-886c846dfd68</link><guid isPermaLink="false">86b3b77a-2986-4f0f-9475-886c846dfd68</guid><pubDate>Tue, 18 Mar 2025 22:42:48 GMT</pubDate></item><item><title><![CDATA[FirstFT: US chief justice issues rare rebuke to Donald Trump]]></title><description><![CDATA[Also in today’s newsletter, Putin agrees temporary halt to attacks on Ukrainian energy infrastructure, and BYD shares hit record high]]></description><link>https://www.ft.com/content/e615e03f-c0b8-4f79-9ad8-4763a22cff87</link><guid isPermaLink="false">e615e03f-c0b8-4f79-9ad8-4763a22cff87</guid><pubDate>Tue, 18 Mar 2025 22:01:17 GMT</pubDate></item><item><title><![CDATA[Reeves to squeeze public spending further in Spring Statement ]]></title><description><![CDATA[Billions of pounds of savings sought from Whitehall budgets to fill a hole in the public finances ]]></description><link>https://www.ft.com/content/2f72d3c9-4508-40f7-a4a9-07d7b0750c71</link><guid isPermaLink="false">2f72d3c9-4508-40f7-a4a9-07d7b0750c71</guid><pubDate>Tue, 18 Mar 2025 21:34:29 GMT</pubDate></item><item><title><![CDATA[Howard Lutnick touts Elon Musk’s Starlink for US broadband scheme]]></title><description><![CDATA[Commerce secretary urges officials to consider Trump adviser’s satellite connection for rural households ]]></description><link>https://www.ft.com/content/ae99e775-cc64-4831-9ace-6853d0f457ed</link><guid isPermaLink="false">ae99e775-cc64-4831-9ace-6853d0f457ed</guid><pubDate>Tue, 18 Mar 2025 21:31:29 GMT</pubDate></item><item><title><![CDATA[The Capital One shakedown]]></title><description><![CDATA[Business-friendly-as-a-service]]></description><link>https://www.ft.com/content/3007e000-7e61-4e70-86e8-07c52df6b52f</link><guid isPermaLink="false">3007e000-7e61-4e70-86e8-07c52df6b52f</guid><pubDate>Tue, 18 Mar 2025 20:35:56 GMT</pubDate></item><item><title><![CDATA[Wall Street stocks slide as sell-off in tech shares picks up pace]]></title><description><![CDATA[Tesla sinks as investors shift away from riskier holdings]]></description><link>https://www.ft.com/content/ed38a070-38d3-4ba4-908c-aeaba8a8f185</link><guid isPermaLink="false">ed38a070-38d3-4ba4-908c-aeaba8a8f185</guid><pubDate>Tue, 18 Mar 2025 20:05:36 GMT</pubDate></item><item><title><![CDATA[Netanyahu says Israel has resumed ‘fighting with force’ against Hamas]]></title><description><![CDATA[Israeli military’s renewed air strikes in Gaza kill more than 400 people, say Palestinian officials]]></description><link>https://www.ft.com/content/880503da-a915-4fe5-ad5e-deab8a00b669</link><guid isPermaLink="false">880503da-a915-4fe5-ad5e-deab8a00b669</guid><pubDate>Tue, 18 Mar 2025 20:00:03 GMT</pubDate></item><item><title><![CDATA[Erdoğan’s main rival risks ban from Turkish vote after degree annulled]]></title><description><![CDATA[Popular Istanbul mayor faces potential disqualification from elections amid crackdown on opposition]]></description><link>https://www.ft.com/content/a7b053f8-9762-4dd5-a786-25071d4cc233</link><guid isPermaLink="false">a7b053f8-9762-4dd5-a786-25071d4cc233</guid><pubDate>Tue, 18 Mar 2025 19:21:18 GMT</pubDate></item><item><title><![CDATA[How the UK’s welfare cuts will change claimants’ lives]]></title><description><![CDATA[Radical changes that will cut income for 1mn people may not get many more into work, say analysts ]]></description><link>https://www.ft.com/content/131d963c-ca3d-40d5-a0f4-45ced7e72098</link><guid isPermaLink="false">131d963c-ca3d-40d5-a0f4-45ced7e72098</guid><pubDate>Tue, 18 Mar 2025 18:30:39 GMT</pubDate></item><item><title><![CDATA[Tennis stars accuse governing bodies of ‘cartel’ to deny them more prize money ]]></title><description><![CDATA[Players’ group behind lawsuit is affiliated with sports company backed by hedge fund tycoon Bill Ackman]]></description><link>https://www.ft.com/content/06b2464a-d913-4ca0-9d50-cfd89f580b72</link><guid isPermaLink="false">06b2464a-d913-4ca0-9d50-cfd89f580b72</guid><pubDate>Tue, 18 Mar 2025 18:25:08 GMT</pubDate></item><item><title><![CDATA[Former Russian politician goes on trial in UK for breaching sanctions]]></title><description><![CDATA[The ex-governor of Sevastopol used funds for school fees and to access a Mercedes-Benz, court hears  ]]></description><link>https://www.ft.com/content/ad585753-e04b-4534-ae6b-85ff95619ed5</link><guid isPermaLink="false">ad585753-e04b-4534-ae6b-85ff95619ed5</guid><pubDate>Tue, 18 Mar 2025 18:04:16 GMT</pubDate></item><item><title><![CDATA[The White House war on federal statistics]]></title><description><![CDATA[Economists and scientists worry over disappearing data and advisory groups]]></description><link>https://www.ft.com/content/a2cbb4e6-c0d8-49ee-84db-a708fdfb7c52</link><guid isPermaLink="false">a2cbb4e6-c0d8-49ee-84db-a708fdfb7c52</guid><pubDate>Tue, 18 Mar 2025 17:40:30 GMT</pubDate></item><item><title><![CDATA[Sterling climbs above $1.30 for first time since November]]></title><description><![CDATA[Pound has rebounded this year amid stubbornly high UK inflation and a broad dollar retreat]]></description><link>https://www.ft.com/content/7e45cce6-e8bf-4a7b-baf6-2a0634f53c1e</link><guid isPermaLink="false">7e45cce6-e8bf-4a7b-baf6-2a0634f53c1e</guid><pubDate>Tue, 18 Mar 2025 17:02:50 GMT</pubDate></item><item><title><![CDATA[Germany’s parliament approves Merz’s €1tn spending plan]]></title><description><![CDATA[Chancellor-in-waiting uses outgoing Bundestag to loosen debt brake and unleash investments in defence and infrastructure]]></description><link>https://www.ft.com/content/80742c32-1af3-4881-a935-f3045df12b12</link><guid isPermaLink="false">80742c32-1af3-4881-a935-f3045df12b12</guid><pubDate>Tue, 18 Mar 2025 16:27:09 GMT</pubDate></item><item><title><![CDATA[Will anybody buy a ‘Mar-a-Lago accord’?]]></title><description><![CDATA[The US president wants both to protect domestic manufacturing and hold the dollar as the reserve currency]]></description><link>https://www.ft.com/content/9fa4a76d-60bb-45cd-aba0-744973f98dea</link><guid isPermaLink="false">9fa4a76d-60bb-45cd-aba0-744973f98dea</guid><pubDate>Tue, 18 Mar 2025 16:06:56 GMT</pubDate></item><item><title><![CDATA[Brussels seeks to purchase weapons for entire EU]]></title><description><![CDATA[European Commission president Ursula von der Leyen proposes central arms reserve capitals could draw from]]></description><link>https://www.ft.com/content/aedd1e6b-fb4f-41fd-af10-af9dce6f88dc</link><guid isPermaLink="false">aedd1e6b-fb4f-41fd-af10-af9dce6f88dc</guid><pubDate>Tue, 18 Mar 2025 15:49:36 GMT</pubDate></item><item><title><![CDATA[Europe’s moment is more than reheated Gaullism]]></title><description><![CDATA[Macron may not yet be bragging about it, but his crusade for strategic autonomy seems at long last to be vindicated]]></description><link>https://www.ft.com/content/af23ff94-5578-4d7a-b4db-47d010cb7b11</link><guid isPermaLink="false">af23ff94-5578-4d7a-b4db-47d010cb7b11</guid><pubDate>Tue, 18 Mar 2025 15:26:41 GMT</pubDate></item><item><title><![CDATA[Labour unveils disability benefits cuts that aim to save over £5bn]]></title><description><![CDATA[Move risks stoking biggest backbench rebellion of Keir Starmer’s premiership]]></description><link>https://www.ft.com/content/0cc2d3f0-7ed8-4ee4-aa41-313fd3fb4463</link><guid isPermaLink="false">0cc2d3f0-7ed8-4ee4-aa41-313fd3fb4463</guid><pubDate>Tue, 18 Mar 2025 14:58:43 GMT</pubDate></item><item><title><![CDATA[What auto bosses worry will be Trump’s next target in tariff war]]></title><description><![CDATA[Executives are concerned that levies on components could be in the president’s sights]]></description><link>https://www.ft.com/content/e50a432d-454b-4a13-924d-98657498ba81</link><guid isPermaLink="false">e50a432d-454b-4a13-924d-98657498ba81</guid><pubDate>Tue, 18 Mar 2025 14:38:31 GMT</pubDate></item><item><title><![CDATA[Hong Kong’s cargo sector faces a tariff test]]></title><description><![CDATA[City’s strategic position makes it increasingly vulnerable to escalating geopolitical tensions between US and China]]></description><link>https://www.ft.com/content/01e24e19-9987-40c1-a408-762f1d821c3b</link><guid isPermaLink="false">01e24e19-9987-40c1-a408-762f1d821c3b</guid><pubDate>Tue, 18 Mar 2025 13:18:23 GMT</pubDate></item><item><title><![CDATA[UK’s red tape cut should follow business world’s dotted lines]]></title><description><![CDATA[Companies undergoing efficiency drives know that such a mission calls for hard numbers and goals]]></description><link>https://www.ft.com/content/db69dc53-719b-4add-81f5-13543a7a839e</link><guid isPermaLink="false">db69dc53-719b-4add-81f5-13543a7a839e</guid><pubDate>Tue, 18 Mar 2025 13:12:49 GMT</pubDate></item><item><title><![CDATA[Economists forecast slowing US growth and increased inflation ]]></title><description><![CDATA[Also in today’s newsletter, US’s campaign against the Houthis and the man moving Meta to the right]]></description><link>https://www.ft.com/content/58a5a30a-485c-4ac3-9987-a3b7cef5da6c</link><guid isPermaLink="false">58a5a30a-485c-4ac3-9987-a3b7cef5da6c</guid><pubDate>Tue, 18 Mar 2025 13:00:50 GMT</pubDate></item></channel></rss>


--------------------------------------------------------------------------------