├── .dockerignore ├── .eslintrc.js ├── .github └── workflows │ └── deploy.yml ├── .gitignore ├── .npmrc ├── .prettierrc ├── .vscode └── settings.json ├── LICENCE.md ├── README.md ├── apps ├── api │ ├── .env.example │ ├── .gitignore │ ├── Dockerfile │ ├── fly.toml │ ├── package.json │ ├── src │ │ ├── common │ │ │ ├── content.ts │ │ │ ├── db.ts │ │ │ ├── filter.ts │ │ │ ├── meta.ts │ │ │ ├── openai.ts │ │ │ ├── postType.ts │ │ │ ├── split.ts │ │ │ └── typesense.ts │ │ ├── crawler │ │ │ ├── main.ts │ │ │ └── routes.ts │ │ ├── index.ts │ │ └── routes │ │ │ ├── add.ts │ │ │ ├── backfill.ts │ │ │ ├── crawl.ts │ │ │ ├── getIndexedNowPages.ts │ │ │ ├── hello.ts │ │ │ └── index.ts │ └── tsconfig.json └── web │ ├── .env.example │ ├── .gitignore │ ├── .prettierignore │ ├── README.md │ ├── package.json │ ├── postcss.config.js │ ├── src │ ├── app.css │ ├── app.d.ts │ ├── app.html │ ├── common │ │ ├── cohere.ts │ │ ├── constants.ts │ │ ├── formActions.ts │ │ ├── openai.ts │ │ ├── typesense.ts │ │ └── util.ts │ ├── components │ │ ├── Form.svelte │ │ ├── Header.svelte │ │ ├── IdeaCard.svelte │ │ └── icons │ │ │ └── x.svelte │ └── routes │ │ ├── +layout.svelte │ │ ├── +page.server.ts │ │ ├── +page.svelte │ │ └── about │ │ ├── +page.server.ts │ │ └── +page.svelte │ ├── static │ ├── favicon.png │ ├── fonts │ │ ├── DMSans-VariableFont.ttf │ │ ├── PTMono-Regular.ttf │ │ └── Reckless-VariableFont.woff2 │ ├── logo.svg │ ├── metaimage.png │ ├── telephone.png │ └── typewriter.png │ ├── svelte.config.js │ ├── tailwind.config.js │ ├── tsconfig.json │ ├── vercel.json │ ├── vite.config.ts │ └── yarn.lock ├── package.json ├── packages ├── core │ ├── .env.example │ ├── .gitignore │ ├── README.md │ ├── package.json │ ├── prisma │ │ ├── migrations │ │ │ ├── 20240111115033_add_post │ │ │ │ └── migration.sql │ │ │ ├── 20240111121259_add_scrape_state │ │ │ │ └── migration.sql │ │ │ ├── 20240111152613_convert_post_date │ │ │ │ └── migration.sql │ │ │ ├── 20240111153141_convert_post_date_back │ │ │ │ └── migration.sql │ │ │ ├── 20240111155745_add_redirect_status │ │ │ │ └── migration.sql │ │ │ ├── 20240126145714_add_directory_scrape_state │ │ │ │ └── migration.sql │ │ │ ├── 20240128184424_add_vector_search │ │ │ │ └── migration.sql │ │ │ ├── 20240128193052_fix_search_functions │ │ │ │ └── migration.sql │ │ │ ├── 20240128193504_change_post_id │ │ │ │ └── migration.sql │ │ │ ├── 20240128200314_add_post_id │ │ │ │ └── migration.sql │ │ │ ├── 20240129141512_remove_post_id │ │ │ │ └── migration.sql │ │ │ ├── 20240129141653_edit_embeddings │ │ │ │ └── migration.sql │ │ │ ├── 20240222114727_enable_domain_scrape │ │ │ │ └── migration.sql │ │ │ ├── 20240222153334_add_submitted_domain_table │ │ │ │ └── migration.sql │ │ │ ├── 20240223142705_per_post_scrape_state │ │ │ │ └── migration.sql │ │ │ ├── 20240223150159_change_scrapestate_id │ │ │ │ └── migration.sql │ │ │ └── migration_lock.toml │ │ └── schema.prisma │ ├── src │ │ └── index.ts │ └── tsconfig.json ├── eslint-config │ ├── README.md │ ├── library.js │ ├── next.js │ ├── package.json │ └── react-internal.js └── typescript-config │ ├── base.json │ ├── nextjs.json │ ├── package.json │ └── react-library.json ├── pnpm-lock.yaml ├── pnpm-workspace.yaml ├── tsconfig.json └── turbo.json /.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist 3 | tsconfig.tsbuildinfo 4 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | // This configuration only applies to the package manager root. 2 | /** @type {import("eslint").Linter.Config} */ 3 | module.exports = { 4 | ignorePatterns: ["apps/**", "packages/**"], 5 | extends: ["@repo/eslint-config/library.js"], 6 | parser: "@typescript-eslint/parser", 7 | parserOptions: { 8 | project: true 9 | } 10 | }; 11 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: 🚀 Deploy 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - main 8 | 9 | permissions: 10 | contents: read 11 | 12 | env: 13 | FLYCTL_VERSION: 0.0.500 14 | 15 | jobs: 16 | changes: 17 | runs-on: ubuntu-latest 18 | outputs: 19 | api: ${{ steps.changes.outputs.api }} 20 | steps: 21 | - uses: actions/checkout@v4 22 | - uses: dorny/paths-filter@v2 23 | id: changes 24 | with: 25 | filters: | 26 | api: 27 | - 'packages/core/**' 28 | - 'apps/api/**' 29 | - '.github/**' 30 | 31 | api: 32 | needs: changes 33 | if: ${{ needs.changes.outputs.api == 'true' }} 34 | runs-on: ubuntu-latest 35 | env: 36 | FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} 37 | steps: 38 | - uses: actions/checkout@v4 39 | - uses: superfly/flyctl-actions@master 40 | with: 41 | args: deploy --config apps/api/fly.toml . 42 | version: $FLYCTL_VERSION 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # Dependencies 4 | node_modules 5 | .pnp 6 | .pnp.js 7 | 8 | # Local env files 9 | .env 10 | .env.local 11 | .env.development.local 12 | .env.test.local 13 | .env.production.local 14 | 15 | # Testing 16 | coverage 17 | 18 | # Turbo 19 | .turbo 20 | 21 | # Vercel 22 | .vercel 23 | 24 | # Build Outputs 25 | .next/ 26 | out/ 27 | build 28 | dist 29 | 30 | 31 | # Debug 32 | npm-debug.log* 33 | yarn-debug.log* 34 | yarn-error.log* 35 | 36 | # Misc 37 | .DS_Store 38 | *.pem 39 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/.npmrc -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "tabWidth": 4, 3 | "useTabs": false, 4 | "singleQuote": false, 5 | "trailingComma": "none", 6 | "printWidth": 100, 7 | // "plugins": ["prettier-plugin-svelte", "prettier-plugin-tailwindcss"], 8 | "pluginSearchDirs": ["."], 9 | "overrides": [{ "files": "*.svelte", "options": { "parser": "svelte" } }] 10 | } 11 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "eslint.workingDirectories": [ 3 | { 4 | "mode": "auto" 5 | } 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /LICENCE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Peter Hagen & Louis Barclay 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # About Ideas Now 2 | 3 | aboutideasnow.com indexes the /about, /ideas, and /now pages of 1000s of personal websites, and lets you search across them. 4 | The purpose is to help you find interesting people and talk to them. 5 | 6 | If you want to help improve the website, please open a Github issue! 7 | 8 | ## Code structure 9 | 10 | This project uses Turborepo with a number of packages: 11 | 12 | - `apps/web`: A SvelteKit web app for the aboutideasnow.com website 13 | - `apps/api`: A Node.js server to run periodic website scrapes 14 | - `packages/core`: Shared util functions and the Prisma Postgres database config 15 | 16 | ## Installation 17 | 18 | - Create `.env` files in `apps/api`, `apps/web`, and `packages/core` (look at the `.env.example` files) 19 | - `pnpm install` to install the dependencies 20 | 21 | ## Development 22 | 23 | - `pnpm dev` to run all apps 24 | - `cd packages/core && pnpm generate/migrate` to regenerate the database types after changes, or to apply them to the database 25 | 26 | ## Deployment 27 | 28 | - Create a Postgres database (e.g. via Supabase), and use its connection string as the `DATABASE_URL` secret in the following deployments. 29 | - Deploy the web app JavaScript build. If using Vercel, you'll have to override the CI/CD install command with `pnpm install --unsafe-perm` so it runs the `packages/core` postinstall step. It correctly fills in all other steps automatically. 30 | - Deploy the api app using Docker (from the root monorepo context). This repo already has a Github action to automatically deploy to fly.io. 31 | 32 | 1 33 | -------------------------------------------------------------------------------- /apps/api/.env.example: -------------------------------------------------------------------------------- 1 | DATABASE_URL="" 2 | OPENAI_API_KEY="" 3 | TYPESENSE_URL="" 4 | TYPESENSE_ADMIN_API_KEY="" 5 | -------------------------------------------------------------------------------- /apps/api/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist 3 | .env 4 | -------------------------------------------------------------------------------- /apps/api/Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax = docker/dockerfile:1 2 | 3 | # Adjust NODE_VERSION as desired 4 | ARG NODE_VERSION=18.16.1 5 | FROM node:${NODE_VERSION}-slim as base 6 | 7 | LABEL fly_launch_runtime="Node.js/Prisma" 8 | 9 | # Node.js/Prisma app lives here 10 | WORKDIR /app 11 | 12 | # Throw-away build stage to reduce size of final image 13 | FROM base as build 14 | 15 | # Install packages needed to build node modules 16 | RUN apt-get update -qq && \ 17 | apt-get install -y build-essential openssl pkg-config python-is-python3 18 | 19 | # Copy application code 20 | COPY --link . . 21 | 22 | # Install dev CLIs 23 | RUN npm install -g pnpm turbo 24 | 25 | # Install dependencies 26 | RUN pnpm install --frozen-lockfile 27 | 28 | # Build application 29 | RUN pnpm build --filter=@repo/api 30 | 31 | # Remove development dependencies 32 | RUN pnpm install --prod --frozen-lockfile 33 | 34 | # Final stage for app image 35 | FROM base 36 | ENV NODE_ENV="production" 37 | 38 | # Install packages needed for deployment 39 | RUN apt-get update -qq && \ 40 | apt-get install --no-install-recommends -y chromium chromium-sandbox openssl && \ 41 | rm -rf /var/lib/apt/lists /var/cache/apt/archives 42 | 43 | # Copy built application 44 | COPY --from=build /app /app 45 | 46 | # Start the server by default, this can be overwritten at runtime 47 | EXPOSE 3000 48 | ENV PUPPETEER_EXECUTABLE_PATH="/usr/bin/chromium" 49 | CMD [ "node", "apps/api/dist/index.js" ] 50 | -------------------------------------------------------------------------------- /apps/api/fly.toml: -------------------------------------------------------------------------------- 1 | # fly.toml app configuration file generated for ideasideasideas on 2024-01-11T10:40:26+01:00 2 | # 3 | # See https://fly.io/docs/reference/configuration/ for information about how to use this file. 4 | # 5 | 6 | app = "ideasideasideas" 7 | primary_region = "iad" 8 | 9 | [build] 10 | dockerfile = "Dockerfile" 11 | 12 | [env] 13 | PORT = "3000" 14 | 15 | [http_service] 16 | internal_port = 3000 17 | force_https = true 18 | auto_stop_machines = true 19 | auto_start_machines = true 20 | min_machines_running = 1 21 | processes = ["app"] 22 | 23 | [[vm]] 24 | cpu_kind = "shared" 25 | cpus = 1 26 | memory_mb = 1024 27 | -------------------------------------------------------------------------------- /apps/api/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@repo/api", 3 | "version": "1.0.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "nodemon src/index.ts", 7 | "build": "tsc" 8 | }, 9 | "dependencies": { 10 | "@crawlee/memory-storage": "^3.7.2", 11 | "@mozilla/readability": "^0.5.0", 12 | "@postlight/parser": "^2.2.3", 13 | "@repo/core": "workspace:*", 14 | "cheerio": "1.0.0-rc.12", 15 | "cors": "^2.8.5", 16 | "crawlee": "^3.7.2", 17 | "dotenv": "^16.4.1", 18 | "express": "^4.18.2", 19 | "jsdom": "^23.2.0", 20 | "langchain": "^0.1.11", 21 | "metascraper": "^5.43.4", 22 | "metascraper-date": "^5.43.4", 23 | "normalize-url": "^8.0.0", 24 | "openai": "^4.26.0", 25 | "turndown": "^7.1.2", 26 | "typesense": "^1.7.2" 27 | }, 28 | "devDependencies": { 29 | "@flydotio/dockerfile": "^0.4.11", 30 | "@types/cors": "^2.8.17", 31 | "@types/express": "^4.17.21", 32 | "@types/node": "^20.11.10", 33 | "@types/turndown": "^5.0.4", 34 | "nodemon": "^3.0.3", 35 | "parser": "link:@types/@postlight/parser", 36 | "ts-node": "^10.9.2", 37 | "typescript": "^5.3.3" 38 | }, 39 | "type": "module" 40 | } 41 | -------------------------------------------------------------------------------- /apps/api/src/common/content.ts: -------------------------------------------------------------------------------- 1 | import { Readability } from "@mozilla/readability"; 2 | // @ts-ignore types missing 3 | import Parser from "@postlight/parser"; 4 | import { JSDOM } from "jsdom"; 5 | import TurndownService from "turndown"; 6 | 7 | export async function getPageContent(url: string, html: string) { 8 | const dom = new JSDOM(html, { url }); 9 | const document = dom.window.document; 10 | 11 | const rawContent = document.body.textContent?.replace(/\s+/g, " ") || ""; 12 | 13 | const articleContent = await getReaderModeHtml(url, html, document); 14 | 15 | return { rawContent, articleContent }; 16 | } 17 | 18 | async function getReaderModeHtml(url: string, html: string, document: Document) { 19 | // Try Readability 20 | const article1 = new Readability(document).parse(); 21 | const content1 = htmlToMarkdown(article1?.content); 22 | if (content1) { 23 | return content1; 24 | } 25 | 26 | // Try Postlight 27 | const article2 = await Parser.parse(url, { html }); 28 | const content2 = htmlToMarkdown(article2?.content); 29 | if (content2) { 30 | return content2; 31 | } 32 | 33 | return undefined; 34 | } 35 | 36 | function htmlToMarkdown(html?: string) { 37 | if (!html) { 38 | return undefined; 39 | } 40 | 41 | return new TurndownService() 42 | .addRule("remove-tags", { 43 | filter: ["figure", "img"], 44 | replacement: function () { 45 | return ""; 46 | } 47 | }) 48 | .addRule("unwrap-links", { 49 | filter: ["a"], 50 | replacement: function (content) { 51 | return content; 52 | } 53 | }) 54 | .turndown(html); 55 | } 56 | -------------------------------------------------------------------------------- /apps/api/src/common/db.ts: -------------------------------------------------------------------------------- 1 | import { getDatabaseClient } from "@repo/core/dist"; 2 | import { unIndexPost } from "./typesense.js"; 3 | import { PostType } from "@repo/core/generated/prisma-client"; 4 | 5 | export const db = getDatabaseClient(); 6 | 7 | // Delete all post state if it exists 8 | export async function deletePost(domain: string, postType: PostType) { 9 | try { 10 | await db.post.deleteMany({ where: { domain, type: postType } }); 11 | } catch {} 12 | try { 13 | await db.scrapeState.deleteMany({ where: { domain, type: postType } }); 14 | } catch {} 15 | try { 16 | await unIndexPost(domain, postType); 17 | } catch {} 18 | } 19 | -------------------------------------------------------------------------------- /apps/api/src/common/filter.ts: -------------------------------------------------------------------------------- 1 | export function isExcludedPage( 2 | url: string, 3 | domain: string, 4 | title: string, 5 | pathname: string, 6 | rawContent: string, 7 | articleContent?: string 8 | ) { 9 | // Missing article content 10 | const wordCount = articleContent?.split(/\s+/).length || 0; 11 | if (!articleContent || wordCount < 10) { 12 | return true; 13 | } 14 | 15 | // Check words in title & content 16 | const wordBlocklist = [ 17 | "404", 18 | "oops", 19 | "missing", 20 | "not found", 21 | "does not exist", 22 | "cannot find", 23 | "cannot be found", 24 | "couldn't find", 25 | "nothing found", 26 | "no page", 27 | "private site", 28 | "you're lost", 29 | "this website uses cookies", 30 | "does not exist", 31 | "you need to enable javascript", 32 | "server error", 33 | "| substack", 34 | "existiert noch nicht" 35 | ]; 36 | if ( 37 | wordBlocklist.some( 38 | (w) => title.toLowerCase().includes(w) || rawContent.toLowerCase().includes(w) 39 | ) 40 | ) { 41 | return true; 42 | } 43 | 44 | // Broken platform links such as https://hiradnotes.substack.com/now 45 | if (domain.includes("substack") || ["soundcloud.com"].includes(domain)) { 46 | return true; 47 | } 48 | 49 | // Ensure /ideas pages actually talk about ideas (and are not fallback pages) 50 | if ( 51 | pathname === "/ideas" && 52 | !(title.toLowerCase().includes("idea") || rawContent.toLowerCase().includes("idea")) 53 | ) { 54 | return true; 55 | } 56 | 57 | // Exclude some domains & pages manually 58 | if ( 59 | [ 60 | // company websites 61 | "founderslist.com", 62 | "notesnook.com", 63 | "aisnakeoil.com", 64 | "dev.to", 65 | "github.com", 66 | "blog.tjcx.me", 67 | "ethical.net", 68 | "newstalk.com", 69 | "thoughtdistillery.com", 70 | "archive.blogs.harvard.edu", 71 | "indieweb.org", 72 | "wrightplacetv.com", 73 | "zotero.org", 74 | "theatlantic.com", 75 | "roarmag.org", 76 | "profiles.wordpress.org", 77 | "privacytools.io", 78 | "openthemagazine.com", 79 | "newstatesman.com", 80 | "learninpublic.org", 81 | "vibilagare.se", 82 | // bad date parsing 83 | "manojranaweera.me", 84 | "blog.halfvast.com" 85 | ].includes(domain) 86 | ) { 87 | return true; 88 | } 89 | if ( 90 | [ 91 | "https://jonathanhaslett.com/ideas", 92 | "https://levinofearth.com/ideas", 93 | "https://andreiluca.com/ideas", 94 | "https://bunniestudios.com/ideas", 95 | "https://drorpoleg.com/ideas", 96 | "https://verraes.net/ideas", 97 | "https://blog.nateliason.com/ideas", 98 | "https://ajain.tech/ideas", 99 | "https://tracingwoodgrains.com/ideas", 100 | "https://secretorum.life/ideas", 101 | "https://blazsemprimoznik.com/ideas", 102 | "https://xan.lol/ideas", 103 | "https://hardtowrite.com/ideas", 104 | "https://flaviocopes.com/ideas", 105 | "https://kvark.github.io/ideas", 106 | "https://cwinters.com/ideas", 107 | "https://simply.joejenett.com/ideas", 108 | "https://joshj.blog/ideas", 109 | "https://ianbrodie.com/ideas", 110 | "https://marcjenkins.co.uk/ideas", 111 | "https://hans.gerwitz.com/ideas", 112 | "https://reesskennedy.com/ideas", 113 | "https://blog.bitsapien.dev/ideas", 114 | "https://jfpenn.com/ideas", 115 | "https://jonathontoon.com/ideas", 116 | "https://whatlisacooks.com/ideas", 117 | "https://thistooshallgrow.com/ideas", 118 | "https://teodorapetkova.com/ideas", 119 | "https://sohl-dickstein.github.io/ideas", 120 | "https://scottaaronson.blog/ideas", 121 | "https://ryanholiday.net/ideas", 122 | "https://rocketcrab.com/ideas", 123 | "https://neilkakkar.com/ideas", 124 | "https://nadia.xyz/ideas", 125 | "https://mywiki.wooledge.org/ideas", 126 | "https://michaelnielsen.org/ideas", 127 | "https://matthiasportzel.com/ideas", 128 | "https://markuskaarlonen.com/ideas", 129 | "https://liw.fi/ideas", 130 | "https://literacyenquirer.blogspot.com/ideas" 131 | ].includes(url) 132 | ) { 133 | return true; 134 | } 135 | 136 | return false; 137 | } 138 | -------------------------------------------------------------------------------- /apps/api/src/common/meta.ts: -------------------------------------------------------------------------------- 1 | // @ts-ignore 2 | import Parser from "@postlight/parser"; 3 | import _metascraper from "metascraper"; 4 | import _metascraperDate from "metascraper-date"; 5 | import { openai } from "./openai.js"; 6 | 7 | // @ts-ignore 8 | const metascraper = _metascraper([_metascraperDate()]); 9 | 10 | export async function getMeta(url: string, html: string, content?: string, log = console.log) { 11 | const meta = await metascraper({ url, html }); 12 | const domain = getDomain(url); 13 | 14 | let date: Date | undefined = undefined; 15 | 16 | // Use GPT date parse by default as it's the most reliable 17 | if (content) { 18 | date = await findDateUsingGPT(content.slice(0, 1000)); 19 | if (!date && content.length > 2000) { 20 | // Try end of large pages 21 | date = await findDateUsingGPT(content.slice(-1000)); 22 | } 23 | } 24 | 25 | console.log(`GPT date: ${date?.toISOString().slice(0, 10)}`); 26 | console.log(`meta date: ${meta.date}`); 27 | 28 | if (!date) { 29 | // Use metadata date instead 30 | date = meta.date ? new Date(meta.date) : undefined; 31 | 32 | // Ingore dates a long time in the past 33 | if (date && date.toISOString().slice(0, 10) < "2010-01-01") { 34 | date = undefined; 35 | } 36 | 37 | // Don't trust very recent or future dates, e.g. on https://francescasciandra.art/now, https://nycsubwaygirl.com/now 38 | // But include the current date in case people create their now page before submitting it 39 | if (date && date.getTime() > new Date().getTime() - 1000 * 60) { 40 | date = undefined; 41 | } 42 | } 43 | 44 | return { 45 | domain, 46 | date 47 | }; 48 | } 49 | 50 | async function findDateUsingGPT(text: string): Promise { 51 | if (!text) { 52 | return undefined; 53 | } 54 | 55 | const response = await openai.chat.completions.create({ 56 | model: "gpt-3.5-turbo-1106", // 3.5 seems enough for this 57 | max_tokens: 50, 58 | temperature: 0, 59 | messages: [ 60 | { 61 | role: "system", 62 | content: `You are an API that extracts the lastUpdated full ISO date from a text. Return null if there's no date mentioned. Return only the data as JSON.` 63 | }, 64 | { 65 | role: "user", 66 | content: text 67 | } 68 | ], 69 | response_format: { type: "json_object" } 70 | }); 71 | const completion = response.choices[0].message.content; 72 | // Parse again with Chrono for error handling 73 | 74 | try { 75 | const isoString = JSON.parse(completion!).lastUpdated; 76 | const date = new Date(isoString); 77 | 78 | // GTP returns 1970-01-01 for empty dates 79 | if (date && date.toISOString().slice(0, 10) < "2010-01-01") { 80 | return undefined; 81 | } 82 | // Don't trust future dates, e.g. on https://kunalmarwaha.com/now 83 | if (date && date.toISOString().slice(0, 10) > new Date().toISOString().slice(0, 10)) { 84 | return undefined; 85 | } 86 | 87 | return date; 88 | } catch (err) { 89 | console.error(`Could not parse date from string with GPT: ${completion}`); 90 | return undefined; 91 | } 92 | } 93 | 94 | export function getDomain(url: string) { 95 | return new URL(url).hostname.replace("www.", ""); 96 | } 97 | -------------------------------------------------------------------------------- /apps/api/src/common/openai.ts: -------------------------------------------------------------------------------- 1 | import OpenAI from "openai"; 2 | import { env } from "process"; 3 | 4 | export const openai = new OpenAI({ 5 | apiKey: env.OPENAI_API_KEY 6 | }); 7 | 8 | export async function generateEmbedding(text: string) { 9 | const result = await openai.embeddings.create({ 10 | input: text, 11 | model: "text-embedding-3-small", 12 | dimensions: 512 13 | }); 14 | 15 | const [{ embedding }] = result.data; 16 | return embedding; 17 | } 18 | -------------------------------------------------------------------------------- /apps/api/src/common/postType.ts: -------------------------------------------------------------------------------- 1 | import { PostType } from "@repo/core/generated/prisma-client"; 2 | 3 | export function getPostType(pathname: string) { 4 | // Ignore trailing slash 5 | if (pathname.endsWith("/")) { 6 | pathname = pathname.slice(0, -1); 7 | } 8 | 9 | // Treat / as /about 10 | if (pathname === "") { 11 | pathname = "/about"; 12 | } 13 | // Allow /about variants like /about-me but not article slugs 14 | if (pathname.includes("about") && pathname.length <= 20) { 15 | pathname = "/about"; 16 | } 17 | 18 | if (pathname === "/about") { 19 | return PostType.ABOUT; 20 | } else if (pathname === "/now") { 21 | return PostType.NOW; 22 | } else if (pathname === "/ideas") { 23 | return PostType.IDEAS; 24 | } 25 | 26 | return undefined; 27 | } 28 | -------------------------------------------------------------------------------- /apps/api/src/common/split.ts: -------------------------------------------------------------------------------- 1 | import { MarkdownTextSplitter } from "langchain/text_splitter"; 2 | 3 | const splitter = new MarkdownTextSplitter({ 4 | chunkSize: 500, 5 | chunkOverlap: 0 6 | }); 7 | 8 | export async function getPostParagraphs(text: string) { 9 | // Exclude heading lines 10 | const lines = text.trim().split("\n"); 11 | // console.log(lines.slice(0, 10)); 12 | while (lines[0] && lines[0].length < 100) { 13 | lines.shift(); 14 | } 15 | text = lines.join("\n"); 16 | 17 | // console.log(lines.slice(0, 10)); 18 | 19 | const paragraphs = await splitter.splitText(text); 20 | return paragraphs.filter((paragraph) => paragraph.length > 100).slice(0, 10); 21 | } 22 | -------------------------------------------------------------------------------- /apps/api/src/common/typesense.ts: -------------------------------------------------------------------------------- 1 | import Typesense from "typesense"; 2 | import { getPostParagraphs } from "./split.js"; 3 | import type { Post, PostType } from "@repo/core/generated/prisma-client"; 4 | import { env } from "process"; 5 | 6 | export const typesense = new Typesense.Client({ 7 | nodes: [ 8 | { 9 | host: env.TYPESENSE_URL!, 10 | port: 443, 11 | protocol: "https" 12 | } 13 | ], 14 | apiKey: env.TYPESENSE_ADMIN_API_KEY!, 15 | connectionTimeoutSeconds: 2 16 | }); 17 | 18 | // typesense.collections().create({ 19 | // name: "paragraphs", 20 | // fields: [ 21 | // // { name: "id", type: "string" }, 22 | // { name: "domain", type: "string", facet: true }, 23 | // { name: "type", type: "string", facet: true }, 24 | // { name: "url", type: "string" }, 25 | // { name: "content", type: "string" }, 26 | // { name: "updatedAt", type: "int64" }, // epoch milliseconds to enable sorting, 27 | // { 28 | // name: "embedding", 29 | // type: "float[]", 30 | // embed: { 31 | // from: ["content"], 32 | // model_config: { 33 | // model_name: "openai/text-embedding-3-small", 34 | // api_key: env.OPENAI_API_KEY! 35 | // } 36 | // } 37 | // } 38 | // ], 39 | // default_sorting_field: "updatedAt" 40 | // }); 41 | 42 | export async function indexPost(post: Post, logger = console.log) { 43 | try { 44 | const t0 = Date.now(); 45 | const paragraphs = await getPostParagraphs(post.content); 46 | 47 | // Paragraph splitting debug 48 | // logger(`# ${post.url}\n`); 49 | // for (const p of paragraphs) { 50 | // logger(`- ${p}\n`); 51 | // } 52 | // logger(`\n\n`); 53 | // return; 54 | 55 | // Delete existing paragraphs for this post (the number might have changed) 56 | await unIndexPost(post.domain, post.type); 57 | 58 | if (paragraphs.length === 0) { 59 | return; 60 | } 61 | 62 | await typesense 63 | .collections("paragraphs") 64 | .documents() 65 | .import( 66 | paragraphs.map((p, i) => ({ 67 | // id: `${post.domain}-${post.type}-${i}`, 68 | url: post.url, 69 | domain: post.domain, 70 | type: post.type, 71 | content: p, 72 | updatedAt: post.updatedAt.getTime() 73 | })) 74 | ); 75 | 76 | // logger(`Inserted ${paragraphs.length} paragraphs in ${Date.now() - t0}ms`); 77 | } catch (e) { 78 | logger(`Error indexing post ${post.url}: ${e}`); 79 | } 80 | } 81 | 82 | export async function unIndexPost(domain: string, postType: PostType) { 83 | await typesense 84 | .collections("paragraphs") 85 | .documents() 86 | .delete({ filter_by: `domain:${domain} && type:${postType}` }); 87 | } 88 | -------------------------------------------------------------------------------- /apps/api/src/crawler/main.ts: -------------------------------------------------------------------------------- 1 | // For more information, see https://crawlee.dev/ 2 | import "dotenv/config"; 3 | import { 4 | CheerioCrawler, 5 | RequestOptions, 6 | ProxyConfiguration, 7 | purgeDefaultStorages, 8 | Configuration 9 | } from "crawlee"; 10 | import { MemoryStorage } from "@crawlee/memory-storage"; 11 | import { router } from "./routes.js"; 12 | import normalizeUrl from "normalize-url"; 13 | import { db } from "../common/db.js"; 14 | import { getDomain } from "../common/meta.js"; 15 | import { ScrapeStatus } from "@repo/core/generated/prisma-client"; 16 | import { getPostType } from "../common/postType.js"; 17 | 18 | export async function runCrawler(directoryUrls: string[], documentUrls: string[]) { 19 | // Seed URLs 20 | const crawlerQueue: RequestOptions[] = [ 21 | ...directoryUrls.map((url) => ({ 22 | url, 23 | label: "directory" 24 | })), 25 | ...documentUrls.map((url) => ({ 26 | url, 27 | label: "document" 28 | })) 29 | ]; 30 | 31 | // Run crawler 32 | const proxyConfiguration = new ProxyConfiguration({ 33 | // proxyUrls: ["URL"] 34 | // proxyUrls: Array.from(Array(100).keys()).map( 35 | // (i) => `URL:${10000 + i + 1}` 36 | // ) 37 | }); 38 | await purgeDefaultStorages(); 39 | const crawler = new CheerioCrawler( 40 | { 41 | // proxyConfiguration, 42 | // useSessionPool: true, 43 | // persistCookiesPerSession: true, 44 | // additionalHttpErrorStatusCodes: [403, 444, 503], 45 | 46 | minConcurrency: 1, 47 | maxConcurrency: 10, 48 | retryOnBlocked: false, 49 | maxRequestRetries: 1, 50 | maxRequestsPerMinute: 120, 51 | sameDomainDelaySecs: 0, 52 | 53 | requestHandler: router, 54 | failedRequestHandler: async ({ request, log, enqueueLinks }) => { 55 | const url = normalizeUrl(request.url); 56 | const domain = getDomain(url); 57 | const pathname = new URL(url).pathname; 58 | const postType = getPostType(pathname); 59 | 60 | log.info(`Failed to crawl ${url}`); 61 | 62 | // Mark as unavailable 63 | if (postType) { 64 | await db.scrapeState.upsert({ 65 | where: { domain_type: { domain, type: postType } }, 66 | create: { 67 | domain, 68 | type: postType, 69 | status: ScrapeStatus.UNAVAILABLE, 70 | scapedAt: new Date() 71 | }, 72 | update: { 73 | status: ScrapeStatus.UNAVAILABLE, 74 | scapedAt: new Date() 75 | } 76 | }); 77 | } 78 | 79 | // Try other paths 80 | if (postType === "ABOUT" && pathname !== "/") { 81 | log.info(`Trying / instead of /about for ${domain}`); 82 | await enqueueLinks({ 83 | strategy: "all", 84 | label: "document", 85 | urls: [`https://${domain}/`] 86 | }); 87 | return; 88 | } 89 | } 90 | }, 91 | new Configuration({ 92 | persistStateIntervalMillis: 10_000, 93 | storageClient: new MemoryStorage({ 94 | persistStorage: false, 95 | writeMetadata: false 96 | }) 97 | }) 98 | ); 99 | console.log(`Crawling ${crawlerQueue.length} urls...`); 100 | await crawler.run(crawlerQueue); 101 | } 102 | -------------------------------------------------------------------------------- /apps/api/src/crawler/routes.ts: -------------------------------------------------------------------------------- 1 | import { createCheerioRouter } from "crawlee"; 2 | import { getDomain, getMeta } from "../common/meta.js"; 3 | import normalizeUrl from "normalize-url"; 4 | import { getPageContent } from "../common/content.js"; 5 | import { PostType, ScrapeStatus } from "@repo/core/generated/prisma-client"; 6 | import { db, deletePost } from "../common/db.js"; 7 | import { isExcludedPage } from "../common/filter.js"; 8 | import { indexPost, unIndexPost } from "../common/typesense.js"; 9 | import { getPostType } from "../common/postType.js"; 10 | 11 | export const router = createCheerioRouter(); 12 | 13 | // Scrape a directory of links 14 | router.addHandler("directory", async ({ $, request, enqueueLinks, log }) => { 15 | const url = request.loadedUrl!; 16 | const domain = getDomain(url); 17 | log.info(`crawling directory: ${url}`); 18 | 19 | // Extract links 20 | const links = $("a[href]") 21 | .map((_, el) => $(el).attr("href")) 22 | .get() 23 | // map to absolute urls 24 | .map((link) => { 25 | try { 26 | const obj = new URL(link, url); 27 | // filter out current-domain links 28 | if (obj.hostname === domain) { 29 | return null; 30 | } 31 | return obj.toString(); 32 | } catch { 33 | return null; 34 | } 35 | }) 36 | .filter((link) => link !== null) as string[]; 37 | 38 | // Exclude already checked links 39 | const excludedDomains = new Set(); 40 | const scrapeStates = await db.scrapeState.findMany({ 41 | where: { domain: { in: links.map(getDomain) } } 42 | }); 43 | scrapeStates.forEach((s) => excludedDomains.add(s.domain)); 44 | const newLinks = links.filter((link) => !excludedDomains.has(getDomain(link))); 45 | log.info(`Found ${newLinks.length} new links`); 46 | 47 | // Randomize order to work around network errors 48 | newLinks.sort(() => Math.random() - 0.5); 49 | 50 | // Scrape new links 51 | await enqueueLinks({ 52 | strategy: "all", 53 | label: "document", 54 | urls: newLinks.flatMap((url) => { 55 | const domain = getDomain(url); 56 | return [`https://${domain}/about`, `https://${domain}/now`, `https://${domain}/ideas`]; 57 | }) 58 | }); 59 | }); 60 | 61 | // Scrape an individual page 62 | router.addHandler("document", async ({ $, request, log, enqueueLinks }) => { 63 | const url = normalizeUrl(request.loadedUrl || request.url); 64 | const domain = getDomain(url); 65 | const pathname = new URL(url).pathname; 66 | 67 | const originalUrl = normalizeUrl(request.url); 68 | const originalDomain = getDomain(originalUrl); 69 | const originalPathname = new URL(originalUrl).pathname; 70 | 71 | // Detect post type 72 | const postType = getPostType(pathname) || getPostType(originalPathname); 73 | if (!postType) { 74 | log.info(`${domain} ${pathname} skipped (not /about, /now, or /ideas)\n`); 75 | return; 76 | } 77 | 78 | // Store domain redirects 79 | if (domain !== originalDomain) { 80 | log.info(`Redirected from ${originalDomain} to ${domain}`); 81 | await db.scrapeState.upsert({ 82 | where: { domain_type: { domain: originalDomain, type: postType } }, 83 | create: { 84 | domain: originalDomain, 85 | type: postType, 86 | status: ScrapeStatus.REDIRECTED, 87 | scapedAt: new Date() 88 | }, 89 | update: { 90 | status: ScrapeStatus.REDIRECTED, 91 | scapedAt: new Date() 92 | } 93 | }); 94 | } 95 | 96 | const existingPost = await db.post.findFirst({ where: { domain, type: postType } }); 97 | 98 | // Extract content 99 | const title = $("title").text(); 100 | const html = $.html(); 101 | const { rawContent, articleContent } = await getPageContent(url, html); 102 | const wordCount = articleContent?.split(/\s+/).length || 0; 103 | 104 | // Check if should exclude / delete post 105 | if ( 106 | !articleContent || 107 | isExcludedPage(url, domain, title, pathname, rawContent, articleContent) 108 | ) { 109 | if (pathname === "/about") { 110 | log.info(`Trying / instead of /about for ${domain}\n`); 111 | enqueueLinks({ 112 | strategy: "all", 113 | label: "document", 114 | urls: [`https://${domain}/`] 115 | }); 116 | return; 117 | } 118 | log.info(`excluding ${url} (title: ${title})\n`); 119 | 120 | // Update scrape time if exists, otherwise save as no content 121 | await db.scrapeState.upsert({ 122 | where: { domain_type: { domain, type: postType } }, 123 | create: { 124 | domain, 125 | type: postType, 126 | status: ScrapeStatus.NO_CONTENT, 127 | scapedAt: new Date() 128 | }, 129 | update: { 130 | scapedAt: new Date() 131 | } 132 | }); 133 | 134 | // Delete post if existed before 135 | if (existingPost) { 136 | await deletePost(domain, postType); 137 | } 138 | 139 | return; 140 | } 141 | 142 | // Check if content has changed 143 | if (existingPost && existingPost.content === articleContent) { 144 | log.info(`skipping ${url} (content unchanged)\n`); 145 | 146 | // Update scrape time 147 | await db.scrapeState.upsert({ 148 | where: { domain_type: { domain, type: postType } }, 149 | create: { 150 | domain, 151 | type: postType, 152 | status: ScrapeStatus.SCRAPED, 153 | scapedAt: new Date() 154 | }, 155 | update: { 156 | scapedAt: new Date() 157 | } 158 | }); 159 | 160 | return; 161 | } 162 | 163 | // Use rawContent in case date is outside main text (e.g. on https://alexcarpenter.me/now) 164 | const meta = await getMeta(url, html, rawContent, log.info.bind(log)); 165 | // Log debug stats 166 | log.info(`scraped ${url}:`); 167 | log.info(`\ttitle: ${title}`); 168 | log.info(`\twords: ${wordCount}`); 169 | log.info(`\tdate: ${meta.date?.toISOString().slice(0, 10)}`); 170 | log.info(``); 171 | 172 | // Update post 173 | const post = { 174 | url, 175 | domain, 176 | type: postType, 177 | content: articleContent, 178 | updatedAt: meta.date || new Date("1970-01-01") 179 | }; 180 | await db.post.upsert({ 181 | where: { url }, 182 | create: post, 183 | update: post 184 | }); 185 | 186 | // Index for search async 187 | indexPost(post, log.info.bind(log)); 188 | 189 | // Save scrape success 190 | await db.scrapeState.upsert({ 191 | where: { domain_type: { domain, type: postType } }, 192 | create: { 193 | domain, 194 | type: postType, 195 | status: ScrapeStatus.SCRAPED, 196 | scapedAt: new Date() 197 | }, 198 | update: { 199 | status: ScrapeStatus.SCRAPED, 200 | scapedAt: new Date() 201 | } 202 | }); 203 | }); 204 | -------------------------------------------------------------------------------- /apps/api/src/index.ts: -------------------------------------------------------------------------------- 1 | import "dotenv/config"; 2 | import express, { Application } from "express"; 3 | import cors from "cors"; 4 | import { router } from "./routes/index.js"; 5 | 6 | // Setup express server 7 | const app: Application = express(); 8 | 9 | // Process middleware 10 | app.use(cors()); 11 | app.use(express.json({ limit: "5mb" })); 12 | 13 | // Handle requests 14 | app.use(router); 15 | 16 | // Start server 17 | const PORT: number = parseInt(process.env.PORT as string, 10) || 7101; 18 | app.listen(PORT, () => { 19 | console.log(`API listening on port ${PORT}`); 20 | }); 21 | -------------------------------------------------------------------------------- /apps/api/src/routes/add.ts: -------------------------------------------------------------------------------- 1 | import type { Request, Response } from "express"; 2 | import { runCrawler } from "../crawler/main.js"; 3 | import { db } from "../common/db.js"; 4 | import { getDomain } from "../common/meta.js"; 5 | import { SubmittedDomain } from "@repo/core/generated/prisma-client"; 6 | import { unIndexPost } from "../common/typesense.js"; 7 | 8 | export async function addDirectory(req: Request, res: Response) { 9 | const url = req.query.url as string | undefined; 10 | if (!url) { 11 | return res.status(400).json({ message: "Missing url" }); 12 | } 13 | 14 | // Don't await 15 | runCrawler([url], []); 16 | 17 | const domain = getDomain(url); 18 | await db.scrapeState.upsert({ 19 | where: { domain_type: { domain, type: "ABOUT" } }, 20 | create: { 21 | domain, 22 | domainType: "DIRECTORY", 23 | type: "ABOUT", 24 | status: "SCRAPED", 25 | scapedAt: new Date() 26 | }, 27 | update: { scapedAt: new Date() } 28 | }); 29 | 30 | return res.json({ message: "Pending" }); 31 | } 32 | 33 | export async function addBatchDomains(req: Request, res: Response) { 34 | let domains = req.body.domains as string[]; 35 | domains = domains.filter((domain) => domain.includes(".")); 36 | 37 | const links = domains.flatMap((domain) => [ 38 | `https://${domain}/about`, 39 | `https://${domain}/now`, 40 | `https://${domain}/ideas` 41 | ]); 42 | 43 | // Exclude existing domains 44 | const excludedDomains = new Set(); 45 | const scrapeStates = await db.scrapeState.findMany({ 46 | where: { domain: { in: links.map(getDomain) } } 47 | }); 48 | scrapeStates.forEach((s) => excludedDomains.add(s.domain)); 49 | const newLinks = links.filter((link) => !excludedDomains.has(getDomain(link))); 50 | console.log(`Found ${newLinks.length} new links`); 51 | 52 | runCrawler([], links); 53 | 54 | return res.json({ message: "Pending" }); 55 | } 56 | 57 | export async function addDomain(req: Request, res: Response) { 58 | // Parse params 59 | let url = parseUrl(req); 60 | if (!url) { 61 | return res.status(400).json({ message: "Missing url" }); 62 | } 63 | const domain = getDomain(url); 64 | let email = (req.query.email as string) || null; 65 | 66 | // Scrape website 67 | let success = true; 68 | try { 69 | await runCrawler( 70 | [], 71 | [`https://${domain}/about`, `https://${domain}/now`, `https://${domain}/ideas`] 72 | ); 73 | } catch (err) { 74 | success = false; 75 | } 76 | 77 | // Save submitted info if not exists 78 | try { 79 | await db.submittedDomain.create({ 80 | data: { 81 | domain, 82 | email, 83 | success, 84 | submittedAt: new Date() 85 | } 86 | }); 87 | } catch {} 88 | 89 | // Return results 90 | if (success) { 91 | const posts = await db.post.findMany({ 92 | where: { domain }, 93 | orderBy: { updatedAt: "desc" } 94 | }); 95 | return res.json(posts); 96 | } else { 97 | return res.status(500).json({ message: "Failed to scrape website :(" }); 98 | } 99 | } 100 | 101 | function parseUrl(req: Request) { 102 | let url = req.query.url as string | undefined; 103 | 104 | if (!url?.startsWith("http")) { 105 | url = `https://${url}`; 106 | } 107 | 108 | // Validate url 109 | if (!url.includes(".")) { 110 | return; 111 | } 112 | try { 113 | new URL(url); 114 | } catch (error) { 115 | return; 116 | } 117 | 118 | return url; 119 | } 120 | -------------------------------------------------------------------------------- /apps/api/src/routes/backfill.ts: -------------------------------------------------------------------------------- 1 | import type { Request, Response } from "express"; 2 | import { db } from "../common/db.js"; 3 | import { indexPost } from "../common/typesense.js"; 4 | 5 | export async function runBackfill(req: Request, res: Response) { 6 | const start = parseInt(req.query.start as string) || 0; 7 | const limit = parseInt(req.query.limit as string) || 10000; 8 | 9 | const posts = await db.post.findMany({ 10 | orderBy: { updatedAt: "desc" }, 11 | take: limit, 12 | skip: start 13 | }); 14 | 15 | let index = start; 16 | for (const post of posts) { 17 | console.log(`(${index}/${posts.length + start}) Backfilling post ${post.url}`); 18 | 19 | try { 20 | indexPost(post); 21 | } catch (e) { 22 | console.error(e); 23 | } 24 | 25 | await new Promise((resolve) => setTimeout(resolve, 300)); 26 | 27 | index++; 28 | } 29 | 30 | console.log(`Backfill complete`); 31 | return res.json({ success: true }); 32 | } 33 | -------------------------------------------------------------------------------- /apps/api/src/routes/crawl.ts: -------------------------------------------------------------------------------- 1 | import type { Request, Response } from "express"; 2 | import { runCrawler } from "../crawler/main.js"; 3 | import { db } from "../common/db.js"; 4 | 5 | export async function periodicCrawl(req: Request, res: Response) { 6 | const includeDirectories = req.query.directories === "true"; 7 | const limit = parseInt(req.query.limit as string) || 10_000; 8 | 9 | // Check all directories for new links 10 | const directoryScrapes = await db.scrapeState.findMany({ 11 | where: { domainType: "DIRECTORY" } 12 | }); 13 | const directories = includeDirectories 14 | ? directoryScrapes.map((s) => `https://${s.domain}`) 15 | : []; 16 | 17 | // Re-scrape all indexed pages 18 | const scrapeStates = await db.scrapeState.findMany({ 19 | where: { status: "SCRAPED", domainType: "INDIVIDUAL_SITE" }, 20 | orderBy: { scapedAt: "asc" }, 21 | take: limit 22 | }); 23 | const documents = scrapeStates.map((s) => `https://${s.domain}/${s.type.toLowerCase()}`); 24 | 25 | // const posts = await db.post.findMany({ 26 | // where: { type: "NOW", updatedAt: { lt: new Date("2010-01-01") } }, 27 | // select: { url: true }, 28 | // take: limit 29 | // }); 30 | // const documents = posts.map((d) => d.url); 31 | 32 | // const docs = await db.post.findMany({ 33 | // where: { type: "ABOUT" }, 34 | // orderBy: { updatedAt: "asc" }, 35 | // select: { url: true }, 36 | // take: limit 37 | // }); 38 | // const documents = docs.map((d) => d.url); 39 | 40 | // Don't await response 41 | runCrawler(directories, documents); 42 | 43 | return res.json({ message: "Pending" }); 44 | } 45 | -------------------------------------------------------------------------------- /apps/api/src/routes/getIndexedNowPages.ts: -------------------------------------------------------------------------------- 1 | import { Request, Response } from "express"; 2 | import { db } from "../common/db.js"; 3 | 4 | export async function getIndexedNowPages(req: Request, res: Response) { 5 | // List valid /now posts 6 | const posts = await db.post.findMany({ 7 | where: { 8 | type: "NOW" 9 | }, 10 | select: { 11 | domain: true, 12 | // url: true, 13 | updatedAt: true 14 | }, 15 | orderBy: { 16 | updatedAt: "desc" 17 | } 18 | }); 19 | // Map format 20 | const websitesUpdatedAt = posts.reduce((obj, post) => { 21 | let updatedAt: string | null = post.updatedAt.toISOString().slice(0, 10); 22 | if (updatedAt === "1970-01-01") { 23 | updatedAt = null; 24 | } 25 | 26 | return { 27 | ...obj, 28 | [post.domain]: updatedAt 29 | }; 30 | }, {}); 31 | 32 | const lastScrapeState = await db.scrapeState.findFirst({ 33 | where: { 34 | type: "NOW" 35 | }, 36 | select: { 37 | scapedAt: true 38 | }, 39 | orderBy: { 40 | scapedAt: "desc" 41 | } 42 | }); 43 | 44 | return res.json({ 45 | validWebsitesCount: posts.length, 46 | lastScrapedAt: lastScrapeState?.scapedAt, 47 | websitesUpdatedAt 48 | }); 49 | } 50 | -------------------------------------------------------------------------------- /apps/api/src/routes/hello.ts: -------------------------------------------------------------------------------- 1 | import type { Request, Response } from "express"; 2 | 3 | export async function hello(req: Request, res: Response) { 4 | return res.json({ message: "Hello World!!!" }); 5 | } 6 | -------------------------------------------------------------------------------- /apps/api/src/routes/index.ts: -------------------------------------------------------------------------------- 1 | import { Router } from "express"; 2 | import { hello } from "./hello.js"; 3 | import { periodicCrawl } from "./crawl.js"; 4 | import { addBatchDomains, addDirectory, addDomain } from "./add.js"; 5 | import { runBackfill } from "./backfill.js"; 6 | import { getIndexedNowPages } from "./getIndexedNowPages.js"; 7 | 8 | export const router = Router(); 9 | 10 | router.get("/hello", hello); 11 | router.post("/periodic-crawl", periodicCrawl); 12 | router.post("/add-directory", addDirectory); 13 | router.post("/add-domain", addDomain); 14 | router.post("/add-batch-domains", addBatchDomains); 15 | router.post("/backfill", runBackfill); 16 | 17 | router.get("/indexed-now-pages", getIndexedNowPages); 18 | -------------------------------------------------------------------------------- /apps/api/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "NodeNext", 5 | "outDir": "./dist", 6 | "rootDir": "./src", 7 | "strict": true, 8 | "strictNullChecks": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true 12 | }, 13 | "include": ["src"], 14 | "exclude": ["node_modules", "dist", "openapi"], 15 | "ts-node": { 16 | "esm": true 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /apps/web/.env.example: -------------------------------------------------------------------------------- 1 | ########### Public secrets ########### 2 | 3 | PUBLIC_TYPESENSE_URL="o8k5amdihc3vegn7p-1.a1.typesense.net" 4 | PUBLIC_TYPESENSE_SEARCH_API_KEY="IRHY5vFh26RNZxoYem0hFs6Gb6ilSOnv" 5 | 6 | ########### Private secrets ########### 7 | 8 | DATABASE_URL="" 9 | INTERNAL_API_URL="https://api.aboutideasnow.com" 10 | 11 | OPENAI_API_KEY="" 12 | COHERE_API_KEY="" 13 | -------------------------------------------------------------------------------- /apps/web/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | /build 4 | /.svelte-kit 5 | /package 6 | .env 7 | .env.* 8 | !.env.example 9 | vite.config.js.timestamp-* 10 | vite.config.ts.timestamp-* 11 | -------------------------------------------------------------------------------- /apps/web/.prettierignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | /build 4 | /.svelte-kit 5 | /package 6 | .env 7 | .env.* 8 | !.env.example 9 | 10 | # Ignore files for PNPM, NPM and YARN 11 | pnpm-lock.yaml 12 | package-lock.json 13 | yarn.lock 14 | -------------------------------------------------------------------------------- /apps/web/README.md: -------------------------------------------------------------------------------- 1 | # create-svelte 2 | 3 | Everything you need to build a Svelte project, powered by [`create-svelte`](https://github.com/sveltejs/kit/tree/master/packages/create-svelte). 4 | 5 | ## Creating a project 6 | 7 | If you're seeing this, you've probably already done this step. Congrats! 8 | 9 | ```bash 10 | # create a new project in the current directory 11 | npm create svelte@latest 12 | 13 | # create a new project in my-app 14 | npm create svelte@latest my-app 15 | ``` 16 | 17 | ## Developing 18 | 19 | Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server: 20 | 21 | ```bash 22 | npm run dev 23 | 24 | # or start the server and open the app in a new browser tab 25 | npm run dev -- --open 26 | ``` 27 | 28 | ## Building 29 | 30 | To create a production version of your app: 31 | 32 | ```bash 33 | npm run build 34 | ``` 35 | 36 | You can preview the production build with `npm run preview`. 37 | 38 | > To deploy your app, you may need to install an [adapter](https://kit.svelte.dev/docs/adapters) for your target environment. 39 | -------------------------------------------------------------------------------- /apps/web/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@repo/web", 3 | "version": "1.0.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "vite dev --port 7100", 7 | "build": "vite build", 8 | "preview": "vite preview", 9 | "test": "playwright test", 10 | "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json", 11 | "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch", 12 | "lint": "prettier --plugin-search-dir . --check .", 13 | "format": "prettier --plugin-search-dir . --write ." 14 | }, 15 | "dependencies": { 16 | "@langchain/community": "^0.0.21", 17 | "@langchain/openai": "^0.0.13", 18 | "@repo/core": "workspace:*", 19 | "@supabase/supabase-js": "^2.39.3", 20 | "@vercel/analytics": "^1.2.2", 21 | "axios": "^1.6.7", 22 | "clsx": "^2.1.0", 23 | "cohere-ai": "^7.7.3", 24 | "lodash": "^4.17.21", 25 | "loglevel": "^1.9.1", 26 | "openai": "^4.26.0", 27 | "posthog-js": "^1.108.3", 28 | "typesense": "^1.7.2" 29 | }, 30 | "devDependencies": { 31 | "@csstools/postcss-oklab-function": "^3.0.3", 32 | "@playwright/test": "^1.28.1", 33 | "@sveltejs/adapter-vercel": "^4.0.4", 34 | "@sveltejs/kit": "^2.0.0", 35 | "@sveltejs/vite-plugin-svelte": "^3.0.0", 36 | "@types/lodash": "^4.14.202", 37 | "autoprefixer": "^10.4.15", 38 | "postcss": "^8.4.29", 39 | "prettier": "^3.0.3", 40 | "prettier-plugin-svelte": "^2.10.1", 41 | "prettier-plugin-tailwindcss": "^0.5.4", 42 | "svelte": "^4.0.5", 43 | "svelte-check": "^3.4.3", 44 | "tailwindcss": "^3.3.3", 45 | "tslib": "^2.4.1", 46 | "typescript": "^5.0.0", 47 | "vite": "^5.0.0" 48 | }, 49 | "type": "module" 50 | } -------------------------------------------------------------------------------- /apps/web/postcss.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | tailwindcss: {}, 4 | "@csstools/postcss-oklab-function": { preserve: true }, 5 | autoprefixer: {} 6 | } 7 | }; 8 | -------------------------------------------------------------------------------- /apps/web/src/app.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | @layer base { 6 | h2 { 7 | @apply mb-4 text-3xl font-bold text-center font-title; 8 | } 9 | p { 10 | @apply mb-2; 11 | } 12 | a { 13 | @apply font-bold transition-opacity hover:opacity-50; 14 | } 15 | ul, 16 | ol { 17 | @apply pr-5 ml-5 list-outside w-max; 18 | } 19 | ul { 20 | @apply list-disc; 21 | } 22 | ol { 23 | @apply list-decimal; 24 | } 25 | li { 26 | @apply mb-2; 27 | } 28 | } 29 | 30 | @media (min-width: 768px) { 31 | ul, 32 | ol { 33 | @apply pr-0 ml-10; 34 | } 35 | } 36 | 37 | .highlight { 38 | @apply font-bold bg-yellow-200; 39 | } 40 | 41 | .loader { 42 | width: 20px; 43 | height: 20px; 44 | border-radius: 50%; 45 | position: relative; 46 | animation: rotate 1s linear infinite; 47 | } 48 | .loader::before { 49 | content: ""; 50 | box-sizing: border-box; 51 | position: absolute; 52 | inset: 0px; 53 | border-radius: 50%; 54 | border: 2px solid black; 55 | animation: prixClipFix 2s linear infinite; 56 | } 57 | 58 | @keyframes rotate { 59 | 100% { 60 | transform: rotate(360deg); 61 | } 62 | } 63 | 64 | @keyframes prixClipFix { 65 | 0% { 66 | clip-path: polygon(50% 50%, 0 0, 0 0, 0 0, 0 0, 0 0); 67 | } 68 | 25% { 69 | clip-path: polygon(50% 50%, 0 0, 100% 0, 100% 0, 100% 0, 100% 0); 70 | } 71 | 50% { 72 | clip-path: polygon(50% 50%, 0 0, 100% 0, 100% 100%, 100% 100%, 100% 100%); 73 | } 74 | 75% { 75 | clip-path: polygon(50% 50%, 0 0, 100% 0, 100% 100%, 0 100%, 0 100%); 76 | } 77 | 100% { 78 | clip-path: polygon(50% 50%, 0 0, 100% 0, 100% 100%, 0 100%, 0 0); 79 | } 80 | } 81 | 82 | @font-face { 83 | font-family: "Reckless"; 84 | font-weight: 100 1000; 85 | font-display: block; 86 | src: url(/fonts/Reckless-VariableFont.woff2) format("woff2"); 87 | } 88 | 89 | @font-face { 90 | font-family: "DM Sans"; 91 | font-weight: 100 1000; 92 | font-display: block; 93 | src: 94 | local("DM Sans"), 95 | url(/fonts/DMSans-VariableFont.ttf) format("truetype"); 96 | } 97 | 98 | @font-face { 99 | font-family: "PT Mono"; 100 | font-weight: 400; 101 | font-display: block; 102 | src: 103 | local("PT Mono"), 104 | url(/fonts/PTMono-Regular.ttf) format("truetype"); 105 | } 106 | -------------------------------------------------------------------------------- /apps/web/src/app.d.ts: -------------------------------------------------------------------------------- 1 | // See https://kit.svelte.dev/docs/types#app 2 | // for information about these interfaces 3 | declare global { 4 | namespace App { 5 | // interface Error {} 6 | // interface Locals {} 7 | // interface PageData {} 8 | // interface Platform {} 9 | } 10 | } 11 | 12 | export {}; 13 | -------------------------------------------------------------------------------- /apps/web/src/app.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | About Ideas Now | Search 1000s of personal sites 9 | 10 | 14 | 15 | 16 | 17 | 18 | 19 | 23 | 24 | 25 | 26 | 30 | 31 | 32 | 36 | 37 | %sveltekit.head% 38 | 39 | 40 | %sveltekit.body% 41 | 42 | 43 | -------------------------------------------------------------------------------- /apps/web/src/common/cohere.ts: -------------------------------------------------------------------------------- 1 | import { COHERE_API_KEY } from "$env/static/private"; 2 | import { CohereClient } from "cohere-ai"; 3 | 4 | export const cohere = new CohereClient({ 5 | token: COHERE_API_KEY 6 | }); 7 | -------------------------------------------------------------------------------- /apps/web/src/common/constants.ts: -------------------------------------------------------------------------------- 1 | export const colorPalette = ["#ce7e8f", "#ebbe6b", "#6eaecf", "#F9B09A", "#8e79af", "#479A70"]; 2 | 3 | export const exampleSearchQueries = [ 4 | { 5 | term: "building open source", 6 | emoji: "📖" 7 | }, 8 | { 9 | term: "starting a community", 10 | emoji: "👥" 11 | }, 12 | // { 13 | // term: "looking for a cofounder", 14 | // emoji: "🤝" 15 | // }, 16 | { 17 | term: "founding a company", 18 | emoji: "🏢" 19 | }, 20 | { 21 | term: "writing a book", 22 | emoji: "🖊️" 23 | }, 24 | { 25 | term: "building an app", 26 | emoji: "📱" 27 | }, 28 | { 29 | term: "quitting social media", 30 | emoji: "📵" 31 | }, 32 | // { 33 | // term: "taking a break", 34 | // emoji: "🏖️" 35 | // }, 36 | { 37 | term: "traveling the world", 38 | emoji: "🌎" 39 | }, 40 | { 41 | term: "taking photos", 42 | emoji: "📸" 43 | }, 44 | // { 45 | // term: "spending time with my kids", 46 | // emoji: "👨‍👩‍👧‍👦" 47 | // }, 48 | // { 49 | // term: "learning to code", 50 | // emoji: "👨‍💻" 51 | // }, 52 | { 53 | term: "making music", 54 | emoji: "🎵" 55 | }, 56 | { 57 | term: "volunteering", 58 | emoji: "🤲" 59 | } 60 | ]; 61 | -------------------------------------------------------------------------------- /apps/web/src/common/formActions.ts: -------------------------------------------------------------------------------- 1 | // Import necessary types and constants 2 | import { INTERNAL_API_URL } from "$env/static/private"; 3 | import type { Post } from "@repo/core/generated/prisma-client"; 4 | 5 | // Abstracted form action logic 6 | export async function handleSubmit( 7 | request: Request 8 | ): Promise<{ addedDomain: boolean; scrapedPosts?: Post[] }> { 9 | const data = await request.formData(); 10 | const domain = data.get("domain"); 11 | const email = data.get("email"); 12 | const response = await fetch(`${INTERNAL_API_URL}/add-domain?url=${domain}&email=${email}`, { 13 | method: "POST" 14 | }); 15 | if (!response.ok) { 16 | return { addedDomain: false }; 17 | } 18 | const scrapedPosts: Post[] = await response.json(); 19 | return { addedDomain: true, scrapedPosts }; 20 | } 21 | -------------------------------------------------------------------------------- /apps/web/src/common/openai.ts: -------------------------------------------------------------------------------- 1 | import { OPENAI_API_KEY } from "$env/static/private"; 2 | import OpenAI from "openai"; 3 | 4 | export const openai = new OpenAI({ 5 | apiKey: OPENAI_API_KEY 6 | }); 7 | 8 | export async function generateEmbedding(text: string) { 9 | const result = await openai.embeddings.create({ 10 | input: text, 11 | model: "text-embedding-3-small", 12 | dimensions: 512 13 | }); 14 | 15 | const [{ embedding }] = result.data; 16 | return embedding; 17 | } 18 | -------------------------------------------------------------------------------- /apps/web/src/common/typesense.ts: -------------------------------------------------------------------------------- 1 | import { PUBLIC_TYPESENSE_SEARCH_API_KEY, PUBLIC_TYPESENSE_URL } from "$env/static/public"; 2 | import type { Post, PostType } from "@repo/core/generated/prisma-client"; 3 | import Typesense from "typesense"; 4 | 5 | export const typesense = new Typesense.Client({ 6 | nodes: [ 7 | { 8 | host: PUBLIC_TYPESENSE_URL, 9 | port: 443, 10 | protocol: "https" 11 | } 12 | ], 13 | apiKey: PUBLIC_TYPESENSE_SEARCH_API_KEY, 14 | connectionTimeoutSeconds: 2 15 | }); 16 | 17 | export type SearchedPost = Post & { 18 | id: number; 19 | }; 20 | 21 | export async function searchPosts(query: string, postType?: PostType): Promise { 22 | const searchResults = await typesense 23 | .collections("paragraphs") 24 | .documents() 25 | .search({ 26 | q: query, 27 | 28 | // query_by: "embedding", 29 | // uncomment this to enable keyword search 30 | query_by: "content,domain", 31 | 32 | filter_by: postType ? `type:${postType}` : undefined, 33 | 34 | // required for embeddings 35 | exclude_fields: "embedding", 36 | prefix: false, 37 | 38 | // group by domain to return only best paragraph 39 | group_by: "domain,type", 40 | group_limit: 1, 41 | 42 | limit: 50 43 | }); 44 | 45 | const hits = searchResults.grouped_hits?.map((hit) => hit.hits[0]) || []; 46 | console.log(hits); 47 | return ( 48 | hits.map((hit) => { 49 | // Highlight search matches 50 | let htmlContent = hit.document.content; 51 | hit.highlights?.forEach((highlight) => { 52 | if (highlight.field !== "content") { 53 | return; 54 | } 55 | 56 | if (highlight.snippet) { 57 | // Use TypeSense snippet to avoid highlighting small words in random places 58 | // However this shrinks the displayed paragraph :( 59 | htmlContent = highlight?.snippet.replaceAll( 60 | "", 61 | '' 62 | ); 63 | } else { 64 | highlight.matched_tokens?.forEach((token) => { 65 | // Exclude small words like "an" 66 | if (typeof token !== "string" || token.length <= 3) { 67 | return; 68 | } 69 | 70 | htmlContent = htmlContent.replace( 71 | token, 72 | `${token}` 73 | ); 74 | }); 75 | } 76 | }); 77 | 78 | return { 79 | ...hit.document, 80 | updatedAt: new Date(hit.document.updatedAt), 81 | content: htmlContent 82 | }; 83 | }) || [] 84 | ); 85 | } 86 | -------------------------------------------------------------------------------- /apps/web/src/common/util.ts: -------------------------------------------------------------------------------- 1 | // Update timestamps are saved at UTC in the database. 2 | // Interpreting the dates in the client timezone may change the date, so parse date explicitly. 3 | // See https://github.com/lindylearn/aboutideasnow/issues/8 4 | export function getUTCDate(date: Date) { 5 | const utcDate = new Date(date.toISOString().slice(0, 10)); 6 | 7 | return new Intl.DateTimeFormat("en-US", { 8 | month: "long", 9 | day: "numeric", 10 | year: "numeric", 11 | timeZone: "UTC" 12 | }).format(utcDate); 13 | } 14 | -------------------------------------------------------------------------------- /apps/web/src/components/Form.svelte: -------------------------------------------------------------------------------- 1 | 12 | 13 |
14 | {#if form?.addedDomain === false} 15 | Error indexing your domain :(
We will take a look and add your site as soon as 16 | possible! 17 | {:else if form?.scrapedPosts} 18 | {#if form?.scrapedPosts.length === 0} 19 | We didn't find a /now, /about, or /ideas page on your website. Add one and try again! 20 | {:else} 21 | Indexed your website successfully! Found posts: 22 |
    23 | {#each form.scrapedPosts as post} 24 |
  • 25 | 26 | {new URL(post.url).hostname}{new URL(post.url).pathname} 27 | 28 | {#if post.updatedAt && new Date(post.updatedAt).getFullYear() > 1970} 29 | last updated at {getUTCDate(new Date(post.updatedAt))} 30 | {:else} 31 | without update time 32 | {/if} 33 |
  • 34 | {/each} 35 |
36 | {/if} 37 | {:else if isAddingDomain} 38 | Indexing your domain... 39 | {:else} 40 |
{ 44 | // Show loading state until page data is reloaded 45 | isAddingDomain = true; 46 | 47 | posthog.capture("siteAdded"); 48 | }} 49 | > 50 |
51 | 57 | 62 |
63 | 69 |
70 | {/if} 71 |
72 | -------------------------------------------------------------------------------- /apps/web/src/components/Header.svelte: -------------------------------------------------------------------------------- 1 | 9 | 10 | 28 | -------------------------------------------------------------------------------- /apps/web/src/components/IdeaCard.svelte: -------------------------------------------------------------------------------- 1 | 19 | 20 | { 26 | posthog.capture("postClicked"); 27 | }} 28 | > 29 |
30 |
31 | {post.domain} 36 |
37 |

38 | {post.domain} 39 |

40 |
44 | /{post.type.toLowerCase()} 45 |
46 |
47 |
48 |
49 |
50 |
53 | {#if date && date.getFullYear() > 1970} 54 | Updated {getUTCDate(date)} 55 | 56 |

{/if}{@html post.content} 57 | 61 |
62 |
63 |
64 | -------------------------------------------------------------------------------- /apps/web/src/components/icons/x.svelte: -------------------------------------------------------------------------------- 1 | 12 | -------------------------------------------------------------------------------- /apps/web/src/routes/+layout.svelte: -------------------------------------------------------------------------------- 1 | 26 | 27 | 28 | 50 | 51 |
54 | 55 | 56 | {#if href !== "/"} 57 | 76 | {/if} 77 |
78 | 79 | 106 | -------------------------------------------------------------------------------- /apps/web/src/routes/+page.server.ts: -------------------------------------------------------------------------------- 1 | import { getDatabaseClient } from "@repo/core/dist"; 2 | import { type Post, PostType, PrismaClient } from "@repo/core/generated/prisma-client"; 3 | import { handleSubmit } from "../common/formActions.js"; 4 | 5 | export async function load({ url, setHeaders }): Promise<{ 6 | websiteCount: number; 7 | defaultPosts: Post[]; 8 | }> { 9 | try { 10 | const db = getDatabaseClient(); 11 | 12 | const postTypeFilter = 13 | (url.searchParams.get("filter")?.toUpperCase() as PostType) || undefined; 14 | 15 | const defaultPosts = await getRepresentativePosts(postTypeFilter, db); 16 | const websiteCount = await db.scrapeState.count({ 17 | where: { 18 | status: "SCRAPED", 19 | type: "ABOUT" 20 | } 21 | }); 22 | 23 | db.$disconnect(); 24 | 25 | // Cache for 1 hour 26 | setHeaders({ 27 | "Cache-Control": "max-age=0, s-max-age=3600" 28 | }); 29 | 30 | return { websiteCount, defaultPosts }; 31 | } catch (err) { 32 | console.error(`load() function failed: ${err}`); 33 | 34 | return { websiteCount: 7591, defaultPosts: [] }; 35 | } 36 | } 37 | 38 | async function getRepresentativePosts( 39 | postTypeFilter: PostType | undefined, 40 | db: PrismaClient, 41 | limit = 12 42 | ) { 43 | // Apply filter if present 44 | if (postTypeFilter) { 45 | return await getPosts(postTypeFilter, db, limit); 46 | } 47 | 48 | // Ensure that all three post types exist 49 | const postsByType = await Promise.all( 50 | [PostType.ABOUT, PostType.IDEAS, PostType.NOW].map((type) => 51 | getPosts(type, db, Math.floor(limit / 3)) 52 | ) 53 | ); 54 | return postsByType.flat().sort((a, b) => a.domain.localeCompare(b.domain)); 55 | } 56 | 57 | async function getPosts(postTypeFilter: PostType, db: PrismaClient, limit = 12) { 58 | return await db.post.findMany({ 59 | where: { type: postTypeFilter }, 60 | orderBy: { updatedAt: "desc" }, 61 | take: limit 62 | }); 63 | } 64 | 65 | export const actions = { 66 | default: async ({ request }) => { 67 | return await handleSubmit(request); 68 | } 69 | }; 70 | -------------------------------------------------------------------------------- /apps/web/src/routes/+page.svelte: -------------------------------------------------------------------------------- 1 | 92 | 93 |
94 | 95 |
96 |

97 | Find people to talk to or collaborate with by searching across the /about, /ideas and /now 98 | pages of {data.websiteCount} 99 | personal websites. 100 |

101 | Read the manifesto 104 |
105 | 106 |
107 |
111 | 112 | 122 |
123 | {#if isSearching || $navigating} 124 |
125 | {:else if searchQuery} 126 | 136 | {/if} 137 |
138 |
139 | 140 | 164 |
165 | 166 | 188 | 189 |
193 | {#each exampleSearchQueries as exampleSearchQuery} 194 | 206 | {/each} 207 |
208 | 209 |
213 | {#each searchedPosts.length ? searchedPosts : data.defaultPosts as post, index (post.url)} 214 | 215 | {/each} 216 |
217 | 218 |
219 |

220 | Find more posts by searching for things you're interested in!
Or click the 221 | AboutIdeasNow logo to filter by a specific post type. 222 |

223 |
224 | 225 |
229 |

Add your site here!

230 |

231 | Help other people find you by adding your website to aboutideasnow.com. 232 | Learn more 233 |

234 |
235 |
236 | -------------------------------------------------------------------------------- /apps/web/src/routes/about/+page.server.ts: -------------------------------------------------------------------------------- 1 | import { handleSubmit } from "../../common/formActions.js"; 2 | 3 | export const actions = { 4 | default: async ({ request }) => { 5 | return await handleSubmit(request); 6 | } 7 | }; 8 | -------------------------------------------------------------------------------- /apps/web/src/routes/about/+page.svelte: -------------------------------------------------------------------------------- 1 | 10 | 11 |
{ 13 | window.location.href = "/"; 14 | }} 15 | /> 16 | 17 |
18 |
19 |

20 | aboutideasnow.com exists to help you find your people. 21 |

22 |

23 | 1000s of personal websites exist on the internet, outside of social media — 24 | created by creators, thinkers, and doers of all sorts. 25 |

26 |

27 | We index the /about, /ideas, and /now pages of these independent sites to give you a 28 | handy way of searching through them. 29 |

30 |
31 | 32 |
33 |

What's /ideas?

34 |

35 | The way we see it, the /about, /ideas, and /now pages on websites each have a different 36 | purpose. And all three should be there. 37 |

38 |
    39 |
  • 40 | /about pages are about the past: how people see themselves 41 | and what brought them there. That's useful as a general overview. 42 |
  • 43 |
  • 44 | /now is a more personal look at what people are doing 45 | right now, what they care about, and what they could use help with. 46 |
  • 47 |
  • 48 | /ideas should be about the future: the crazy things people 49 | always wanted to make, concepts they're mulling over, or planned projects. 50 |
  • 51 |
52 |

53 | If you create an /ideas page on your website, people who are 54 | looking for collaborators are much more likely to find you. 55 |

56 |
57 | 58 |
59 |

Add your website

60 |

61 | Create any of the above pages on your website. Mention when you last updated each page 62 | so people know it's fresh. 63 |

64 |

65 | Then submit your website here to add it to 66 | aboutideasnow.com. 67 |

68 |

69 | If you enter your email address, we'll let you know if someone mentions similar ideas to 70 | you on their website. 71 |

72 | 73 |
74 | 75 |
76 |

Who built this?

77 |

78 | aboutideasnow.com is a decentralized solution on the increasingly centralized web. We 79 | exist only to direct you to people's personal websites. You take it from there. 80 |

81 |

82 | Peter Hagen 83 | and 84 | Louis Barclay built this 85 | website in February 2024. 86 |

87 |

88 | Artem Tyurin came up with the notion of /ideas pages. 90 |

91 |

92 | Derek Sivers started the /now page 93 | movement without which this all wouldn't be possible. 94 |

95 |

96 | Contribute 97 | on GitHub to add 98 | your name here! 99 |

100 |
101 |
102 | -------------------------------------------------------------------------------- /apps/web/static/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/favicon.png -------------------------------------------------------------------------------- /apps/web/static/fonts/DMSans-VariableFont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/fonts/DMSans-VariableFont.ttf -------------------------------------------------------------------------------- /apps/web/static/fonts/PTMono-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/fonts/PTMono-Regular.ttf -------------------------------------------------------------------------------- /apps/web/static/fonts/Reckless-VariableFont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/fonts/Reckless-VariableFont.woff2 -------------------------------------------------------------------------------- /apps/web/static/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /apps/web/static/metaimage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/metaimage.png -------------------------------------------------------------------------------- /apps/web/static/telephone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/telephone.png -------------------------------------------------------------------------------- /apps/web/static/typewriter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/typewriter.png -------------------------------------------------------------------------------- /apps/web/svelte.config.js: -------------------------------------------------------------------------------- 1 | import adapter from "@sveltejs/adapter-vercel"; 2 | import { vitePreprocess } from "@sveltejs/vite-plugin-svelte"; 3 | 4 | /** @type {import('@sveltejs/kit').Config} */ 5 | const config = { 6 | // Consult https://kit.svelte.dev/docs/integrations#preprocessors 7 | // for more information about preprocessors 8 | preprocess: vitePreprocess(), 9 | 10 | kit: { 11 | // adapter-auto only supports some environments, see https://kit.svelte.dev/docs/adapter-auto for a list. 12 | // If your environment is not supported or you settled on a specific environment, switch out the adapter. 13 | // See https://kit.svelte.dev/docs/adapters for more information about adapters. 14 | adapter: adapter() 15 | } 16 | }; 17 | 18 | export default config; 19 | -------------------------------------------------------------------------------- /apps/web/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | export default { 3 | content: ["./src/**/*.{html,js,svelte,ts}"], 4 | 5 | plugins: [], 6 | theme: { 7 | extend: { 8 | colors: { 9 | background: "#fafafa", 10 | light: "rgba(255, 255, 255, 0.32)", 11 | border: "rgba(33, 35, 36, 0.04)", 12 | bold: "rgb(213, 199, 162)", 13 | text: "#2c2c2c" 14 | }, 15 | fontFamily: { 16 | title: ["Reckless", "serif"], 17 | text: ["DM Sans", "sans-serif"], 18 | mono: ["PT Mono", "monospace"] 19 | }, 20 | keyframes: { 21 | fadein: { 22 | "0%": { opacity: "0" }, 23 | "100%": { opacity: "1" } 24 | }, 25 | cardFadein: { 26 | "0%": { opacity: "0", transform: "translateY(5px)" }, 27 | "100%": { opacity: "1", transform: "translateY(0)" } 28 | } 29 | }, 30 | animation: { 31 | fadein: "fadein 200ms ease-out backwards", 32 | cardFadein: "cardFadein 600ms cubic-bezier(0.16, 1, 0.3, 1) backwards" 33 | } 34 | } 35 | } 36 | }; 37 | -------------------------------------------------------------------------------- /apps/web/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./.svelte-kit/tsconfig.json", 3 | "compilerOptions": { 4 | "allowJs": true, 5 | "checkJs": true, 6 | "esModuleInterop": true, 7 | "forceConsistentCasingInFileNames": true, 8 | "resolveJsonModule": true, 9 | "skipLibCheck": true, 10 | "sourceMap": true, 11 | "strict": true, 12 | "strictNullChecks": true 13 | } 14 | // Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias 15 | // 16 | // If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes 17 | // from the referenced tsconfig.json - TypeScript does not merge them in 18 | } 19 | -------------------------------------------------------------------------------- /apps/web/vercel.json: -------------------------------------------------------------------------------- 1 | { 2 | "redirects": [ 3 | { 4 | "source": "/now", 5 | "destination": "https://github.com/lindylearn/aboutideasnow/pulls", 6 | "permanent": true 7 | }, 8 | { 9 | "source": "/ideas", 10 | "destination": "https://github.com/lindylearn/aboutideasnow/issues", 11 | "permanent": true 12 | } 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /apps/web/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { sveltekit } from "@sveltejs/kit/vite"; 2 | import { defineConfig } from "vite"; 3 | 4 | export default defineConfig({ 5 | plugins: [sveltekit()], 6 | resolve: { 7 | preserveSymlinks: true 8 | } 9 | }); 10 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "project-template", 3 | "private": true, 4 | "scripts": { 5 | "build": "turbo build", 6 | "dev": "turbo dev", 7 | "lint": "turbo lint", 8 | "format": "prettier --write \"**/*.{ts,tsx,md}\"" 9 | }, 10 | "devDependencies": { 11 | "@repo/eslint-config": "workspace:*", 12 | "@repo/typescript-config": "workspace:*", 13 | "prettier": "^3.1.1", 14 | "turbo": "latest" 15 | }, 16 | "packageManager": "pnpm@8.9.0", 17 | "engines": { 18 | "node": ">=18" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /packages/core/.env.example: -------------------------------------------------------------------------------- 1 | # Supabase database URL 2 | DATABASE_URL="" 3 | -------------------------------------------------------------------------------- /packages/core/.gitignore: -------------------------------------------------------------------------------- 1 | generated 2 | -------------------------------------------------------------------------------- /packages/core/README.md: -------------------------------------------------------------------------------- 1 | # Core package 2 | 3 | Common util functions and the database config. 4 | 5 | ## Development 6 | 7 | After changing `schema.prisma`, run `pnpm generate` and `pnpm migrate $migration-name`. 8 | -------------------------------------------------------------------------------- /packages/core/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@repo/core", 3 | "version": "1.0.0", 4 | "description": "", 5 | "exports": { 6 | "./dist": "./dist/index.js", 7 | "./generated/prisma-client": "./generated/prisma-client/index.js" 8 | }, 9 | "scripts": { 10 | "postinstall": "pnpm generate", 11 | "dev": "tsc -w", 12 | "build": "tsc", 13 | "generate": "prisma generate", 14 | "migrate": "prisma migrate dev --name" 15 | }, 16 | "keywords": [], 17 | "author": "", 18 | "license": "ISC", 19 | "dependencies": { 20 | "@prisma/client": "^5.8.0", 21 | "extract-date": "^2.8.2", 22 | "prisma": "^5.8.0" 23 | }, 24 | "devDependencies": { 25 | "@repo/typescript-config": "workspace:^", 26 | "typescript": "^5.3.3" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240111115033_add_post/migration.sql: -------------------------------------------------------------------------------- 1 | -- CreateTable 2 | CREATE TABLE "Post" ( 3 | "domain" TEXT NOT NULL, 4 | "url" TEXT NOT NULL, 5 | "content" TEXT NOT NULL, 6 | "updatedAt" TIMESTAMP(3), 7 | "authorName" TEXT, 8 | 9 | CONSTRAINT "Post_pkey" PRIMARY KEY ("domain") 10 | ); 11 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240111121259_add_scrape_state/migration.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Warnings: 3 | 4 | - Added the required column `type` to the `Post` table without a default value. This is not possible if the table is not empty. 5 | 6 | */ 7 | -- CreateEnum 8 | CREATE TYPE "PostType" AS ENUM ('NOW', 'IDEAS'); 9 | 10 | -- CreateEnum 11 | CREATE TYPE "ScrapeStatus" AS ENUM ('UNAVAILABLE', 'NO_CONTENT', 'SCRAPED'); 12 | 13 | -- AlterTable 14 | ALTER TABLE "Post" ADD COLUMN "type" "PostType" NOT NULL; 15 | 16 | -- CreateTable 17 | CREATE TABLE "ScrapeState" ( 18 | "domain" TEXT NOT NULL, 19 | "status" "ScrapeStatus" NOT NULL, 20 | "scapedAt" TIMESTAMP(3), 21 | 22 | CONSTRAINT "ScrapeState_pkey" PRIMARY KEY ("domain") 23 | ); 24 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240111152613_convert_post_date/migration.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Warnings: 3 | 4 | - Made the column `updatedAt` on table `Post` required. This step will fail if there are existing NULL values in that column. 5 | 6 | */ 7 | -- AlterTable 8 | ALTER TABLE "Post" ALTER COLUMN "updatedAt" SET NOT NULL, 9 | ALTER COLUMN "updatedAt" SET DEFAULT '1970-01-01', 10 | ALTER COLUMN "updatedAt" SET DATA TYPE TEXT; 11 | 12 | -- AlterTable 13 | ALTER TABLE "ScrapeState" ADD COLUMN "url" TEXT; 14 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240111153141_convert_post_date_back/migration.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Warnings: 3 | 4 | - Changed the type of `updatedAt` on the `Post` table. No cast exists, the column would be dropped and recreated, which cannot be done if there is data, since the column is required. 5 | 6 | */ 7 | -- AlterTable 8 | ALTER TABLE "Post" DROP COLUMN "updatedAt", 9 | ADD COLUMN "updatedAt" TIMESTAMP(3) NOT NULL; 10 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240111155745_add_redirect_status/migration.sql: -------------------------------------------------------------------------------- 1 | -- AlterEnum 2 | ALTER TYPE "ScrapeStatus" ADD VALUE 'REDIRECTED'; 3 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240126145714_add_directory_scrape_state/migration.sql: -------------------------------------------------------------------------------- 1 | -- CreateEnum 2 | CREATE TYPE "DomainType" AS ENUM ('INDIVIDUAL_SITE', 'DIRECTORY'); 3 | 4 | -- AlterTable 5 | ALTER TABLE "ScrapeState" ADD COLUMN "domainType" "DomainType" NOT NULL DEFAULT 'INDIVIDUAL_SITE'; 6 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240128184424_add_vector_search/migration.sql: -------------------------------------------------------------------------------- 1 | -- From https://js.langchain.com/docs/integrations/retrievers/supabase-hybrid/?ref=blog.langchain.dev#create-a-table-and-search-functions-in-your-database 2 | 3 | -- Enable the pgvector extension to work with embedding vectors 4 | create extension vector; 5 | 6 | -- Create a table to store your documents 7 | alter table "Post" 8 | add column embedding vector(1024); 9 | 10 | -- Create a function to similarity search for documents 11 | create function match_posts ( 12 | query_embedding vector(1024), 13 | match_count int DEFAULT null, 14 | filter jsonb DEFAULT '{}' 15 | ) returns table ( 16 | id bigint, 17 | content text, 18 | similarity float 19 | ) 20 | language plpgsql 21 | as $$ 22 | #variable_conflict use_column 23 | begin 24 | return query 25 | select 26 | id, 27 | content, 28 | 1 - (documents.embedding <=> query_embedding) as similarity 29 | from documents 30 | order by documents.embedding <=> query_embedding 31 | limit match_count; 32 | end; 33 | $$; 34 | 35 | -- Create a function to keyword search for documents 36 | create function kw_match_posts(query_text text, match_count int) 37 | returns table (id bigint, content text, similarity real) 38 | as $$ 39 | 40 | begin 41 | return query execute 42 | format('select id, content, ts_rank(to_tsvector(content), plainto_tsquery($1)) as similarity 43 | from documents 44 | where to_tsvector(content) @@ plainto_tsquery($1) 45 | order by similarity desc 46 | limit $2') 47 | using query_text, match_count; 48 | end; 49 | $$ language plpgsql; 50 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240128193052_fix_search_functions/migration.sql: -------------------------------------------------------------------------------- 1 | drop function if exists match_posts; 2 | drop function if exists kw_match_posts; 3 | 4 | -- Create a function to similarity search for "Post" 5 | create or replace function match_posts ( 6 | query_embedding vector(1024), 7 | match_count int DEFAULT null, 8 | filter jsonb DEFAULT '{}' 9 | ) returns table ( 10 | "url" text, 11 | "domain" text, 12 | "type" "PostType", 13 | "content" text, 14 | "updatedAt" timestamp, 15 | similarity float 16 | ) 17 | language plpgsql 18 | as $$ 19 | #variable_conflict use_column 20 | begin 21 | return query 22 | select 23 | "url", 24 | "domain", 25 | "type", 26 | "content", 27 | "updatedAt", 28 | 1 - ("Post".embedding <=> query_embedding) as similarity 29 | from "Post" 30 | order by "Post".embedding <=> query_embedding 31 | limit match_count; 32 | end; 33 | $$; 34 | 35 | -- Create a function to keyword search for "Post" 36 | create or replace function kw_match_posts(query_text text, match_count int) 37 | returns table ( 38 | "url" text, 39 | "domain" text, 40 | "type" "PostType", 41 | "content" text, 42 | "updatedAt" timestamp, 43 | similarity real 44 | ) 45 | as $$ 46 | begin 47 | return query execute 48 | format(' 49 | select 50 | "url", 51 | "domain", 52 | "type", 53 | "content", 54 | "updatedAt", 55 | ts_rank(to_tsvector(content), plainto_tsquery($1)) as similarity 56 | from "Post" 57 | where to_tsvector(content) @@ plainto_tsquery($1) 58 | order by similarity desc 59 | limit $2') 60 | using query_text, match_count; 61 | end; 62 | $$ language plpgsql; 63 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240128193504_change_post_id/migration.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Warnings: 3 | 4 | - The primary key for the `Post` table will be changed. If it partially fails, the table could be left without primary key constraint. 5 | 6 | */ 7 | -- AlterTable 8 | ALTER TABLE "Post" DROP CONSTRAINT "Post_pkey", 9 | ADD CONSTRAINT "Post_pkey" PRIMARY KEY ("url"); 10 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240128200314_add_post_id/migration.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Warnings: 3 | 4 | - The primary key for the `Post` table will be changed. If it partially fails, the table could be left without primary key constraint. 5 | 6 | */ 7 | -- AlterTable 8 | ALTER TABLE "Post" DROP CONSTRAINT "Post_pkey", 9 | ADD COLUMN "id" SERIAL NOT NULL, 10 | ADD CONSTRAINT "Post_pkey" PRIMARY KEY ("id"); 11 | CREATE UNIQUE INDEX "Post_url_key" ON "Post"("url"); 12 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240129141512_remove_post_id/migration.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Warnings: 3 | 4 | - The primary key for the `Post` table will be changed. If it partially fails, the table could be left without primary key constraint. 5 | - You are about to drop the column `authorName` on the `Post` table. All the data in the column will be lost. 6 | - You are about to drop the column `id` on the `Post` table. All the data in the column will be lost. 7 | 8 | */ 9 | -- DropIndex 10 | DROP INDEX "Post_url_key"; 11 | 12 | -- AlterTable 13 | ALTER TABLE "Post" DROP CONSTRAINT "Post_pkey", 14 | DROP COLUMN "authorName", 15 | DROP COLUMN "id", 16 | ADD CONSTRAINT "Post_pkey" PRIMARY KEY ("url"); 17 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240129141653_edit_embeddings/migration.sql: -------------------------------------------------------------------------------- 1 | alter table "Post" 2 | alter COLUMN embedding TYPE vector(512); 3 | 4 | CREATE INDEX "Post_content_search" ON "Post" USING GIN (to_tsvector('english', content)); 5 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240222114727_enable_domain_scrape/migration.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Warnings: 3 | 4 | - You are about to drop the column `url` on the `ScrapeState` table. All the data in the column will be lost. 5 | 6 | */ 7 | -- AlterEnum 8 | ALTER TYPE "PostType" ADD VALUE 'ABOUT'; 9 | 10 | -- AlterTable 11 | ALTER TABLE "ScrapeState" DROP COLUMN "url"; 12 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240222153334_add_submitted_domain_table/migration.sql: -------------------------------------------------------------------------------- 1 | -- CreateTable 2 | CREATE TABLE "SubmittedDomain" ( 3 | "domain" TEXT NOT NULL, 4 | "email" TEXT, 5 | "success" BOOLEAN NOT NULL, 6 | "submittedAt" TIMESTAMP(3) NOT NULL, 7 | 8 | CONSTRAINT "SubmittedDomain_pkey" PRIMARY KEY ("domain") 9 | ); 10 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240223142705_per_post_scrape_state/migration.sql: -------------------------------------------------------------------------------- 1 | -- AlterTable 2 | ALTER TABLE "ScrapeState" ADD COLUMN "type" "PostType" NOT NULL DEFAULT 'NOW'; 3 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/20240223150159_change_scrapestate_id/migration.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Warnings: 3 | 4 | - The primary key for the `ScrapeState` table will be changed. If it partially fails, the table could be left without primary key constraint. 5 | 6 | */ 7 | -- AlterTable 8 | ALTER TABLE "ScrapeState" DROP CONSTRAINT "ScrapeState_pkey", 9 | ALTER COLUMN "type" DROP DEFAULT, 10 | ADD CONSTRAINT "ScrapeState_pkey" PRIMARY KEY ("domain", "type"); 11 | -------------------------------------------------------------------------------- /packages/core/prisma/migrations/migration_lock.toml: -------------------------------------------------------------------------------- 1 | # Please do not edit this file manually 2 | # It should be added in your version-control system (i.e. Git) 3 | provider = "postgresql" -------------------------------------------------------------------------------- /packages/core/prisma/schema.prisma: -------------------------------------------------------------------------------- 1 | generator client { 2 | provider = "prisma-client-js" 3 | output = "../generated/prisma-client" 4 | } 5 | 6 | datasource db { 7 | provider = "postgresql" 8 | url = env("DATABASE_URL") 9 | } 10 | 11 | model Post { 12 | url String @id 13 | domain String 14 | type PostType 15 | 16 | content String 17 | updatedAt DateTime 18 | 19 | embedding Unsupported("vector(512)")? 20 | } 21 | 22 | enum PostType { 23 | ABOUT 24 | NOW 25 | IDEAS 26 | } 27 | 28 | // Separate scrape status from posts to track failures 29 | model ScrapeState { 30 | domain String 31 | type PostType 32 | 33 | domainType DomainType @default(INDIVIDUAL_SITE) 34 | status ScrapeStatus 35 | scapedAt DateTime? 36 | 37 | @@id([domain, type]) 38 | } 39 | 40 | enum DomainType { 41 | INDIVIDUAL_SITE 42 | DIRECTORY 43 | } 44 | 45 | enum ScrapeStatus { 46 | UNAVAILABLE 47 | NO_CONTENT 48 | REDIRECTED 49 | SCRAPED 50 | } 51 | 52 | model SubmittedDomain { 53 | domain String @id 54 | email String? 55 | 56 | success Boolean 57 | submittedAt DateTime 58 | } 59 | -------------------------------------------------------------------------------- /packages/core/src/index.ts: -------------------------------------------------------------------------------- 1 | import { PrismaClient } from "../generated/prisma-client"; 2 | 3 | export function getDatabaseClient(): PrismaClient { 4 | console.log("Initializing database client"); 5 | 6 | return new PrismaClient(); 7 | } 8 | -------------------------------------------------------------------------------- /packages/core/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@repo/typescript-config/base.json", 3 | "include": ["src/**/*.ts"], 4 | "exclude": ["node_modules"], 5 | "compilerOptions": { 6 | "outDir": "dist" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /packages/eslint-config/README.md: -------------------------------------------------------------------------------- 1 | # `@turbo/eslint-config` 2 | 3 | Collection of internal eslint configurations. 4 | -------------------------------------------------------------------------------- /packages/eslint-config/library.js: -------------------------------------------------------------------------------- 1 | const { resolve } = require("node:path"); 2 | 3 | const project = resolve(process.cwd(), "tsconfig.json"); 4 | 5 | /** @type {import("eslint").Linter.Config} */ 6 | module.exports = { 7 | extends: ["eslint:recommended", "prettier", "eslint-config-turbo"], 8 | plugins: ["only-warn"], 9 | globals: { 10 | React: true, 11 | JSX: true 12 | }, 13 | env: { 14 | node: true 15 | }, 16 | settings: { 17 | "import/resolver": { 18 | typescript: { 19 | project 20 | } 21 | } 22 | }, 23 | ignorePatterns: [ 24 | // Ignore dotfiles 25 | ".*.js", 26 | "node_modules/", 27 | "dist/" 28 | ], 29 | overrides: [ 30 | { 31 | files: ["*.js?(x)", "*.ts?(x)"] 32 | } 33 | ] 34 | }; 35 | -------------------------------------------------------------------------------- /packages/eslint-config/next.js: -------------------------------------------------------------------------------- 1 | const { resolve } = require("node:path"); 2 | 3 | const project = resolve(process.cwd(), "tsconfig.json"); 4 | 5 | /** @type {import("eslint").Linter.Config} */ 6 | module.exports = { 7 | extends: [ 8 | "eslint:recommended", 9 | "prettier", 10 | require.resolve("@vercel/style-guide/eslint/next"), 11 | "eslint-config-turbo" 12 | ], 13 | globals: { 14 | React: true, 15 | JSX: true 16 | }, 17 | env: { 18 | node: true, 19 | browser: true 20 | }, 21 | plugins: ["only-warn"], 22 | settings: { 23 | "import/resolver": { 24 | typescript: { 25 | project 26 | } 27 | } 28 | }, 29 | ignorePatterns: [ 30 | // Ignore dotfiles 31 | ".*.js", 32 | "node_modules/" 33 | ], 34 | overrides: [{ files: ["*.js?(x)", "*.ts?(x)"] }] 35 | }; 36 | -------------------------------------------------------------------------------- /packages/eslint-config/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@repo/eslint-config", 3 | "version": "0.0.0", 4 | "private": true, 5 | "files": [ 6 | "library.js", 7 | "next.js", 8 | "react-internal.js" 9 | ], 10 | "devDependencies": { 11 | "@vercel/style-guide": "^5.1.0", 12 | "eslint-config-turbo": "^1.11.3", 13 | "eslint-config-prettier": "^9.1.0", 14 | "eslint-plugin-only-warn": "^1.1.0", 15 | "@typescript-eslint/parser": "^6.17.0", 16 | "@typescript-eslint/eslint-plugin": "^6.17.0", 17 | "typescript": "^5.3.3" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /packages/eslint-config/react-internal.js: -------------------------------------------------------------------------------- 1 | const { resolve } = require("node:path"); 2 | 3 | const project = resolve(process.cwd(), "tsconfig.json"); 4 | 5 | /* 6 | * This is a custom ESLint configuration for use with 7 | * internal (bundled by their consumer) libraries 8 | * that utilize React. 9 | * 10 | * This config extends the Vercel Engineering Style Guide. 11 | * For more information, see https://github.com/vercel/style-guide 12 | * 13 | */ 14 | 15 | /** @type {import("eslint").Linter.Config} */ 16 | module.exports = { 17 | extends: ["eslint:recommended", "prettier", "eslint-config-turbo"], 18 | plugins: ["only-warn"], 19 | globals: { 20 | React: true, 21 | JSX: true 22 | }, 23 | env: { 24 | browser: true 25 | }, 26 | settings: { 27 | "import/resolver": { 28 | typescript: { 29 | project 30 | } 31 | } 32 | }, 33 | ignorePatterns: [ 34 | // Ignore dotfiles 35 | ".*.js", 36 | "node_modules/", 37 | "dist/" 38 | ], 39 | overrides: [ 40 | // Force ESLint to detect .tsx files 41 | { files: ["*.js?(x)", "*.ts?(x)"] } 42 | ] 43 | }; 44 | -------------------------------------------------------------------------------- /packages/typescript-config/base.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json.schemastore.org/tsconfig", 3 | "display": "Default", 4 | "compilerOptions": { 5 | "declaration": true, 6 | "declarationMap": true, 7 | "esModuleInterop": true, 8 | "incremental": false, 9 | "isolatedModules": true, 10 | "lib": ["es2022", "DOM", "DOM.Iterable"], 11 | "module": "NodeNext", 12 | "moduleDetection": "force", 13 | "moduleResolution": "NodeNext", 14 | "noUncheckedIndexedAccess": true, 15 | "resolveJsonModule": true, 16 | "skipLibCheck": true, 17 | "strict": true, 18 | "strictNullChecks": true, 19 | "target": "ES2022" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /packages/typescript-config/nextjs.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json.schemastore.org/tsconfig", 3 | "display": "Next.js", 4 | "extends": "./base.json", 5 | "compilerOptions": { 6 | "plugins": [{ "name": "next" }], 7 | "module": "ESNext", 8 | "moduleResolution": "Bundler", 9 | "allowJs": true, 10 | "jsx": "preserve", 11 | "noEmit": true 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /packages/typescript-config/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@repo/typescript-config", 3 | "version": "0.0.0", 4 | "private": true, 5 | "license": "MIT", 6 | "publishConfig": { 7 | "access": "public" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /packages/typescript-config/react-library.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json.schemastore.org/tsconfig", 3 | "display": "React Library", 4 | "extends": "./base.json", 5 | "compilerOptions": { 6 | "jsx": "react-jsx" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /pnpm-workspace.yaml: -------------------------------------------------------------------------------- 1 | packages: 2 | - "apps/*" 3 | - "packages/*" 4 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@repo/typescript-config/base.json" 3 | } 4 | -------------------------------------------------------------------------------- /turbo.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://turbo.build/schema.json", 3 | "globalDependencies": ["**/.env.*local"], 4 | "globalEnv": [ 5 | "PUBLIC_TYPESENSE_URL", 6 | "PUBLIC_TYPESENSE_SEARCH_API_KEY", 7 | "SUPABASE_URL", 8 | "SUPABASE_PRIVATE_KEY", 9 | "DATABASE_URL", 10 | "INTERNAL_API_URL", 11 | "OPENAI_API_KEY", 12 | "COHERE_API_KEY", 13 | "PINECONE_API_KEY", 14 | "TYPESENSE_URL", 15 | "TYPESENSE_ADMIN_API_KEY" 16 | ], 17 | "tasks": { 18 | "build": { 19 | "dependsOn": ["^build"], 20 | "outputs": ["dist/**", ".next/**", "!.next/cache/**", ".svelte-kit/**", ".svelte/**"] 21 | }, 22 | "lint": { 23 | "dependsOn": ["^lint"] 24 | }, 25 | "dev": { 26 | "cache": false, 27 | "persistent": true 28 | } 29 | } 30 | } 31 | --------------------------------------------------------------------------------