├── src ├── env.ts ├── types │ ├── globals.d.ts │ ├── Embedded.ts │ ├── AiClient.ts │ ├── Feeds.ts │ ├── Webhook.ts │ ├── OmnivoreArticle.ts │ ├── Bedrock.ts │ └── OmnivoreSchema.ts ├── clients │ ├── ai │ │ ├── client.ts │ │ ├── prompt.ts │ │ ├── openAi.ts │ │ └── bedrock.ts │ └── omnivore │ │ └── omnivore.ts ├── lib │ ├── article.ts │ ├── store │ │ ├── Store.ts │ │ ├── labelLocal.ts │ │ └── labelDynamoDBStore.ts │ ├── util │ │ ├── math.ts │ │ ├── cache.ts │ │ └── logger.ts │ ├── service │ │ └── page.ts │ ├── labels.ts │ └── embedding.ts ├── app.ts ├── lambda.ts ├── routes │ └── webhook.ts └── resources │ └── config.ts ├── .dockerignore ├── infra └── cdk │ ├── package-lock.json │ ├── .npmignore │ ├── jest.config.js │ ├── README.md │ ├── test │ └── cdk.test.ts │ ├── package.json │ ├── tsconfig.json │ ├── bin │ └── cdk.ts │ ├── lib │ └── omnivore-tagging-stack.ts │ └── cdk.json ├── .eslintignore ├── .prettierrc ├── docs ├── labels.png ├── set_webhook.png └── setting_webhook.png ├── .gitignore ├── tsconfig.json ├── .eslintrc ├── package.json └── README.md /src/env.ts: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | cdk* -------------------------------------------------------------------------------- /src/types/globals.d.ts: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /infra/cdk/package-lock.json: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | src/lib/util/logger.ts -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "semi": false, 3 | "singleQuote": true 4 | } 5 | -------------------------------------------------------------------------------- /docs/labels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Podginator/omnivore-automatic-labelling/HEAD/docs/labels.png -------------------------------------------------------------------------------- /infra/cdk/.npmignore: -------------------------------------------------------------------------------- 1 | *.ts 2 | !*.d.ts 3 | 4 | # CDK asset staging directory 5 | .cdk.staging 6 | cdk.out 7 | -------------------------------------------------------------------------------- /docs/set_webhook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Podginator/omnivore-automatic-labelling/HEAD/docs/set_webhook.png -------------------------------------------------------------------------------- /docs/setting_webhook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Podginator/omnivore-automatic-labelling/HEAD/docs/setting_webhook.png -------------------------------------------------------------------------------- /src/types/Embedded.ts: -------------------------------------------------------------------------------- 1 | import { Embedding } from './AiClient' 2 | 3 | export type Embedded = { 4 | base: T 5 | embedding: Embedding 6 | } 7 | -------------------------------------------------------------------------------- /src/clients/ai/client.ts: -------------------------------------------------------------------------------- 1 | import { AiClient } from '../../types/AiClient' 2 | import { OpenAiClient } from './openAi' 3 | 4 | export const client: AiClient = new OpenAiClient() 5 | -------------------------------------------------------------------------------- /infra/cdk/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | testEnvironment: 'node', 3 | roots: ['/test'], 4 | testMatch: ['**/*.test.ts'], 5 | transform: { 6 | '^.+\\.tsx?$': 'ts-jest' 7 | } 8 | }; 9 | -------------------------------------------------------------------------------- /src/types/AiClient.ts: -------------------------------------------------------------------------------- 1 | export type Embedding = Array 2 | export interface AiClient { 3 | getEmbeddings(text: string): Promise 4 | summarizeText(text: string): Promise 5 | tokenLimit: number 6 | embeddingLimit: number 7 | } 8 | -------------------------------------------------------------------------------- /src/clients/ai/prompt.ts: -------------------------------------------------------------------------------- 1 | export const SUMMARISE_PROMPT = (articleContent: string) => 2 | `Please create a summary of the article below. Please Do not exceed 25 words. Please do not add any of your own prose.\n${articleContent}\n' Here is a 25 word summary of the article:\n` 3 | -------------------------------------------------------------------------------- /src/types/Feeds.ts: -------------------------------------------------------------------------------- 1 | export type OmnivoreFeed = { 2 | id: string 3 | description?: string 4 | image?: string 5 | link: string 6 | title: string 7 | type: string 8 | } 9 | 10 | export type OmnivoreContentFeed = { 11 | feed: OmnivoreFeed 12 | content: string 13 | } 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IntelliJ IDEA 2 | .idea/ 3 | 4 | # Compiled files 5 | node_modules/ 6 | dist/ 7 | out/ 8 | 9 | # Editor-specific files 10 | *.iml 11 | *.ipr 12 | *.iws 13 | .idea/ 14 | 15 | # Logs and databases 16 | *.log 17 | *.sqlite 18 | *.sqlite3 19 | 20 | # OS-specific files 21 | .DS_Store 22 | Thumbs.db 23 | 24 | cdk.out -------------------------------------------------------------------------------- /src/types/Webhook.ts: -------------------------------------------------------------------------------- 1 | export interface OmnivorePage { 2 | type: 'page' 3 | userId: string 4 | id: string 5 | description: string 6 | title: string 7 | slug: string 8 | state: string 9 | } 10 | 11 | export interface PageWebhookInput { 12 | action: string 13 | userId: string 14 | page: OmnivorePage 15 | } 16 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json.schemastore.org/tsconfig", 3 | "display": "Node 16", 4 | "include": ["./src/**/*"], 5 | "exclude": ["./cdk/**/*"], 6 | "compilerOptions": { 7 | "outDir": "dist", 8 | "lib": ["es2020", "dom"], 9 | "module": "commonjs", 10 | "target": "es2020", 11 | 12 | "strict": true, 13 | "esModuleInterop": true, 14 | "skipLibCheck": true, 15 | "forceConsistentCasingInFileNames": true, 16 | "experimentalDecorators": true 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/types/OmnivoreArticle.ts: -------------------------------------------------------------------------------- 1 | export type OmnivoreArticle = { 2 | slug: string 3 | title: string 4 | description: string 5 | summary: string 6 | image?: string 7 | authors: string 8 | site: string 9 | url: string 10 | publishedAt: Date 11 | type: 'community' | 'rss' 12 | feedId: string 13 | } 14 | 15 | export type RSSArticle = { 16 | title: string 17 | link: string 18 | description: string 19 | 'media:thumbnail': { '@_url': string } 20 | 'dc:creator': string 21 | pubDate: string 22 | } 23 | -------------------------------------------------------------------------------- /src/lib/article.ts: -------------------------------------------------------------------------------- 1 | import { OmnivorePage } from '../types/Webhook' 2 | import { Embedded } from '../types/Embedded' 3 | import { client as aiClient } from '../clients/ai/client' 4 | 5 | export const getArticleEmbedding = async ( 6 | article: OmnivorePage, 7 | ): Promise> => { 8 | const articleTitleAndDescription = `${article.title}: ${article.description}` 9 | const embedding = await aiClient.getEmbeddings(articleTitleAndDescription) 10 | 11 | return { 12 | base: article, 13 | embedding: embedding, 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/lib/store/Store.ts: -------------------------------------------------------------------------------- 1 | import { LocalLabelStore } from './labelLocal' 2 | import { LabelDynamoDBStore } from './labelDynamoDBStore' 3 | import { DynamoDBClient } from '@aws-sdk/client-dynamodb' 4 | 5 | export interface Store { 6 | get(): Promise 7 | put(val: T): Promise 8 | } 9 | 10 | export const store = (() => { 11 | const localStore = new LocalLabelStore() 12 | if (process.env.DYNAMODB_TABLE_NAME != undefined) { 13 | return new LabelDynamoDBStore(localStore, new DynamoDBClient()) 14 | } 15 | 16 | return localStore 17 | })() 18 | -------------------------------------------------------------------------------- /src/types/Bedrock.ts: -------------------------------------------------------------------------------- 1 | import { Embedding } from './AiClient' 2 | 3 | export type BedrockClientParams = { 4 | region: string 5 | endpoint: string 6 | } 7 | 8 | export type BedrockClientResponse = { 9 | completion: string 10 | embedding: Embedding 11 | embeddings?: Embedding[] 12 | } 13 | 14 | export type BedrockInvokeParams = { 15 | model: string 16 | max_tokens_to_sample: number 17 | temperature: number 18 | top_k: number 19 | top_p: number 20 | stop_sequences: string[] 21 | anthropic_version?: string //TODO: Add the actual params. 22 | prompt: string 23 | } 24 | -------------------------------------------------------------------------------- /infra/cdk/README.md: -------------------------------------------------------------------------------- 1 | # Welcome to your CDK TypeScript project 2 | 3 | This is a blank project for CDK development with TypeScript. 4 | 5 | The `cdk.json` file tells the CDK Toolkit how to execute your app. 6 | 7 | ## Useful commands 8 | 9 | * `npm run build` compile typescript to js 10 | * `npm run watch` watch for changes and compile 11 | * `npm run test` perform the jest unit tests 12 | * `npx cdk deploy` deploy this stack to your default AWS account/region 13 | * `npx cdk diff` compare deployed stack with current state 14 | * `npx cdk synth` emits the synthesized CloudFormation template 15 | -------------------------------------------------------------------------------- /src/lib/util/math.ts: -------------------------------------------------------------------------------- 1 | function calcVectorSize(vec: number[]) { 2 | return Math.sqrt(vec.reduce((accum, curr) => accum + Math.pow(curr, 2), 0)) 3 | } 4 | 5 | export function cosineSimilarity(vec1: number[], vec2: number[]) { 6 | const dotProduct = vec1 7 | .map((val, i) => val * vec2[i]) 8 | .reduce((accum, curr) => accum + curr, 0) 9 | const vec1Size = calcVectorSize(vec1) 10 | const vec2Size = calcVectorSize(vec2) 11 | 12 | return dotProduct / (vec1Size * vec2Size) 13 | } 14 | 15 | export function normalizeValue(min: number, max: number, value: number) { 16 | return (value - min) / (max - min) 17 | } 18 | -------------------------------------------------------------------------------- /src/lib/store/labelLocal.ts: -------------------------------------------------------------------------------- 1 | import { Store } from './Store' 2 | import { Label } from '../../types/OmnivoreSchema' 3 | import { Embedded } from '../../types/Embedded' 4 | import { Cache } from '../util/cache' 5 | 6 | export class LocalLabelStore implements Store> { 7 | private cache: Cache> 8 | 9 | constructor() { 10 | this.cache = new Cache>([], (label) => label.base.name) 11 | } 12 | 13 | async get(): Promise[]> { 14 | return this.cache.getAll() 15 | } 16 | 17 | async put(val: Embedded