├── .dockerignore
├── .eslintrc.js
├── .github
    └── workflows
    │   └── deploy.yml
├── .gitignore
├── .npmrc
├── .prettierrc
├── .vscode
    └── settings.json
├── LICENCE.md
├── README.md
├── apps
    ├── api
    │   ├── .env.example
    │   ├── .gitignore
    │   ├── Dockerfile
    │   ├── fly.toml
    │   ├── package.json
    │   ├── src
    │   │   ├── common
    │   │   │   ├── content.ts
    │   │   │   ├── db.ts
    │   │   │   ├── filter.ts
    │   │   │   ├── meta.ts
    │   │   │   ├── openai.ts
    │   │   │   ├── postType.ts
    │   │   │   ├── split.ts
    │   │   │   └── typesense.ts
    │   │   ├── crawler
    │   │   │   ├── main.ts
    │   │   │   └── routes.ts
    │   │   ├── index.ts
    │   │   └── routes
    │   │   │   ├── add.ts
    │   │   │   ├── backfill.ts
    │   │   │   ├── crawl.ts
    │   │   │   ├── getIndexedNowPages.ts
    │   │   │   ├── hello.ts
    │   │   │   └── index.ts
    │   └── tsconfig.json
    └── web
    │   ├── .env.example
    │   ├── .gitignore
    │   ├── .prettierignore
    │   ├── README.md
    │   ├── package.json
    │   ├── postcss.config.js
    │   ├── src
    │       ├── app.css
    │       ├── app.d.ts
    │       ├── app.html
    │       ├── common
    │       │   ├── cohere.ts
    │       │   ├── constants.ts
    │       │   ├── formActions.ts
    │       │   ├── openai.ts
    │       │   ├── typesense.ts
    │       │   └── util.ts
    │       ├── components
    │       │   ├── Form.svelte
    │       │   ├── Header.svelte
    │       │   ├── IdeaCard.svelte
    │       │   └── icons
    │       │   │   └── x.svelte
    │       └── routes
    │       │   ├── +layout.svelte
    │       │   ├── +page.server.ts
    │       │   ├── +page.svelte
    │       │   └── about
    │       │       ├── +page.server.ts
    │       │       └── +page.svelte
    │   ├── static
    │       ├── favicon.png
    │       ├── fonts
    │       │   ├── DMSans-VariableFont.ttf
    │       │   ├── PTMono-Regular.ttf
    │       │   └── Reckless-VariableFont.woff2
    │       ├── logo.svg
    │       ├── metaimage.png
    │       ├── telephone.png
    │       └── typewriter.png
    │   ├── svelte.config.js
    │   ├── tailwind.config.js
    │   ├── tsconfig.json
    │   ├── vercel.json
    │   ├── vite.config.ts
    │   └── yarn.lock
├── package.json
├── packages
    ├── core
    │   ├── .env.example
    │   ├── .gitignore
    │   ├── README.md
    │   ├── package.json
    │   ├── prisma
    │   │   ├── migrations
    │   │   │   ├── 20240111115033_add_post
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240111121259_add_scrape_state
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240111152613_convert_post_date
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240111153141_convert_post_date_back
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240111155745_add_redirect_status
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240126145714_add_directory_scrape_state
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240128184424_add_vector_search
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240128193052_fix_search_functions
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240128193504_change_post_id
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240128200314_add_post_id
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240129141512_remove_post_id
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240129141653_edit_embeddings
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240222114727_enable_domain_scrape
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240222153334_add_submitted_domain_table
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240223142705_per_post_scrape_state
    │   │   │   │   └── migration.sql
    │   │   │   ├── 20240223150159_change_scrapestate_id
    │   │   │   │   └── migration.sql
    │   │   │   └── migration_lock.toml
    │   │   └── schema.prisma
    │   ├── src
    │   │   └── index.ts
    │   └── tsconfig.json
    ├── eslint-config
    │   ├── README.md
    │   ├── library.js
    │   ├── next.js
    │   ├── package.json
    │   └── react-internal.js
    └── typescript-config
    │   ├── base.json
    │   ├── nextjs.json
    │   ├── package.json
    │   └── react-library.json
├── pnpm-lock.yaml
├── pnpm-workspace.yaml
├── tsconfig.json
└── turbo.json


/.dockerignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | dist
3 | tsconfig.tsbuildinfo
4 | 


--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
 1 | // This configuration only applies to the package manager root.
 2 | /** @type {import("eslint").Linter.Config} */
 3 | module.exports = {
 4 |     ignorePatterns: ["apps/**", "packages/**"],
 5 |     extends: ["@repo/eslint-config/library.js"],
 6 |     parser: "@typescript-eslint/parser",
 7 |     parserOptions: {
 8 |         project: true
 9 |     }
10 | };
11 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
 1 | name: 🚀 Deploy
 2 | 
 3 | on:
 4 |     push:
 5 |         branches:
 6 |             - master
 7 |             - main
 8 | 
 9 | permissions:
10 |     contents: read
11 | 
12 | env:
13 |     FLYCTL_VERSION: 0.0.500
14 | 
15 | jobs:
16 |     changes:
17 |         runs-on: ubuntu-latest
18 |         outputs:
19 |             api: ${{ steps.changes.outputs.api }}
20 |         steps:
21 |             - uses: actions/checkout@v4
22 |             - uses: dorny/paths-filter@v2
23 |               id: changes
24 |               with:
25 |                   filters: |
26 |                       api:
27 |                         - 'packages/core/**'
28 |                         - 'apps/api/**'
29 |                         - '.github/**'
30 | 
31 |     api:
32 |         needs: changes
33 |         if: ${{ needs.changes.outputs.api == 'true' }}
34 |         runs-on: ubuntu-latest
35 |         env:
36 |             FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
37 |         steps:
38 |             - uses: actions/checkout@v4
39 |             - uses: superfly/flyctl-actions@master
40 |               with:
41 |                   args: deploy --config apps/api/fly.toml .
42 |                   version: $FLYCTL_VERSION
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # Dependencies
 4 | node_modules
 5 | .pnp
 6 | .pnp.js
 7 | 
 8 | # Local env files
 9 | .env
10 | .env.local
11 | .env.development.local
12 | .env.test.local
13 | .env.production.local
14 | 
15 | # Testing
16 | coverage
17 | 
18 | # Turbo
19 | .turbo
20 | 
21 | # Vercel
22 | .vercel
23 | 
24 | # Build Outputs
25 | .next/
26 | out/
27 | build
28 | dist
29 | 
30 | 
31 | # Debug
32 | npm-debug.log*
33 | yarn-debug.log*
34 | yarn-error.log*
35 | 
36 | # Misc
37 | .DS_Store
38 | *.pem
39 | 


--------------------------------------------------------------------------------
/.npmrc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/.npmrc


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tabWidth": 4,
 3 |     "useTabs": false,
 4 |     "singleQuote": false,
 5 |     "trailingComma": "none",
 6 |     "printWidth": 100,
 7 |     // "plugins": ["prettier-plugin-svelte", "prettier-plugin-tailwindcss"],
 8 |     "pluginSearchDirs": ["."],
 9 |     "overrides": [{ "files": "*.svelte", "options": { "parser": "svelte" } }]
10 | }
11 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "eslint.workingDirectories": [
3 |         {
4 |             "mode": "auto"
5 |         }
6 |     ]
7 | }
8 | 


--------------------------------------------------------------------------------
/LICENCE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Peter Hagen & Louis Barclay
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # About Ideas Now
 2 | 
 3 | aboutideasnow.com indexes the /about, /ideas, and /now pages of 1000s of personal websites, and lets you search across them.
 4 | The purpose is to help you find interesting people and talk to them.
 5 | 
 6 | If you want to help improve the website, please open a Github issue!
 7 | 
 8 | ## Code structure
 9 | 
10 | This project uses Turborepo with a number of packages:
11 | 
12 | -   `apps/web`: A SvelteKit web app for the aboutideasnow.com website
13 | -   `apps/api`: A Node.js server to run periodic website scrapes
14 | -   `packages/core`: Shared util functions and the Prisma Postgres database config
15 | 
16 | ## Installation
17 | 
18 | -   Create `.env` files in `apps/api`, `apps/web`, and `packages/core` (look at the `.env.example` files)
19 | -   `pnpm install` to install the dependencies
20 | 
21 | ## Development
22 | 
23 | -   `pnpm dev` to run all apps
24 | -   `cd packages/core && pnpm generate/migrate` to regenerate the database types after changes, or to apply them to the database
25 | 
26 | ## Deployment
27 | 
28 | -   Create a Postgres database (e.g. via Supabase), and use its connection string as the `DATABASE_URL` secret in the following deployments.
29 | -   Deploy the web app JavaScript build. If using Vercel, you'll have to override the CI/CD install command with `pnpm install --unsafe-perm` so it runs the `packages/core` postinstall step. It correctly fills in all other steps automatically.
30 | -   Deploy the api app using Docker (from the root monorepo context). This repo already has a Github action to automatically deploy to fly.io.
31 | 
32 | 1
33 | 


--------------------------------------------------------------------------------
/apps/api/.env.example:
--------------------------------------------------------------------------------
1 | DATABASE_URL=""
2 | OPENAI_API_KEY=""
3 | TYPESENSE_URL=""
4 | TYPESENSE_ADMIN_API_KEY=""
5 | 


--------------------------------------------------------------------------------
/apps/api/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | dist
3 | .env
4 | 


--------------------------------------------------------------------------------
/apps/api/Dockerfile:
--------------------------------------------------------------------------------
 1 | # syntax = docker/dockerfile:1
 2 | 
 3 | # Adjust NODE_VERSION as desired
 4 | ARG NODE_VERSION=18.16.1
 5 | FROM node:${NODE_VERSION}-slim as base
 6 | 
 7 | LABEL fly_launch_runtime="Node.js/Prisma"
 8 | 
 9 | # Node.js/Prisma app lives here
10 | WORKDIR /app
11 | 
12 | # Throw-away build stage to reduce size of final image
13 | FROM base as build
14 | 
15 | # Install packages needed to build node modules
16 | RUN apt-get update -qq && \
17 |     apt-get install -y build-essential openssl pkg-config python-is-python3
18 | 
19 | # Copy application code
20 | COPY --link . .
21 | 
22 | # Install dev CLIs
23 | RUN npm install -g pnpm turbo
24 | 
25 | # Install dependencies
26 | RUN pnpm install --frozen-lockfile
27 | 
28 | # Build application
29 | RUN pnpm build --filter=@repo/api
30 | 
31 | # Remove development dependencies
32 | RUN pnpm install --prod --frozen-lockfile
33 | 
34 | # Final stage for app image
35 | FROM base
36 | ENV NODE_ENV="production"
37 | 
38 | # Install packages needed for deployment
39 | RUN apt-get update -qq && \
40 |     apt-get install --no-install-recommends -y chromium chromium-sandbox openssl && \
41 |     rm -rf /var/lib/apt/lists /var/cache/apt/archives
42 | 
43 | # Copy built application
44 | COPY --from=build /app /app
45 | 
46 | # Start the server by default, this can be overwritten at runtime
47 | EXPOSE 3000
48 | ENV PUPPETEER_EXECUTABLE_PATH="/usr/bin/chromium"
49 | CMD [ "node", "apps/api/dist/index.js" ]
50 | 


--------------------------------------------------------------------------------
/apps/api/fly.toml:
--------------------------------------------------------------------------------
 1 | # fly.toml app configuration file generated for ideasideasideas on 2024-01-11T10:40:26+01:00
 2 | #
 3 | # See https://fly.io/docs/reference/configuration/ for information about how to use this file.
 4 | #
 5 | 
 6 | app = "ideasideasideas"
 7 | primary_region = "iad"
 8 | 
 9 | [build]
10 |   dockerfile = "Dockerfile"
11 | 
12 | [env]
13 |   PORT = "3000"
14 | 
15 | [http_service]
16 |   internal_port = 3000
17 |   force_https = true
18 |   auto_stop_machines = true
19 |   auto_start_machines = true
20 |   min_machines_running = 1
21 |   processes = ["app"]
22 | 
23 | [[vm]]
24 |   cpu_kind = "shared"
25 |   cpus = 1
26 |   memory_mb = 1024
27 | 


--------------------------------------------------------------------------------
/apps/api/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "@repo/api",
 3 |     "version": "1.0.0",
 4 |     "private": true,
 5 |     "scripts": {
 6 |         "dev": "nodemon src/index.ts",
 7 |         "build": "tsc"
 8 |     },
 9 |     "dependencies": {
10 |         "@crawlee/memory-storage": "^3.7.2",
11 |         "@mozilla/readability": "^0.5.0",
12 |         "@postlight/parser": "^2.2.3",
13 |         "@repo/core": "workspace:*",
14 |         "cheerio": "1.0.0-rc.12",
15 |         "cors": "^2.8.5",
16 |         "crawlee": "^3.7.2",
17 |         "dotenv": "^16.4.1",
18 |         "express": "^4.18.2",
19 |         "jsdom": "^23.2.0",
20 |         "langchain": "^0.1.11",
21 |         "metascraper": "^5.43.4",
22 |         "metascraper-date": "^5.43.4",
23 |         "normalize-url": "^8.0.0",
24 |         "openai": "^4.26.0",
25 |         "turndown": "^7.1.2",
26 |         "typesense": "^1.7.2"
27 |     },
28 |     "devDependencies": {
29 |         "@flydotio/dockerfile": "^0.4.11",
30 |         "@types/cors": "^2.8.17",
31 |         "@types/express": "^4.17.21",
32 |         "@types/node": "^20.11.10",
33 |         "@types/turndown": "^5.0.4",
34 |         "nodemon": "^3.0.3",
35 |         "parser": "link:@types/@postlight/parser",
36 |         "ts-node": "^10.9.2",
37 |         "typescript": "^5.3.3"
38 |     },
39 |     "type": "module"
40 | }
41 | 


--------------------------------------------------------------------------------
/apps/api/src/common/content.ts:
--------------------------------------------------------------------------------
 1 | import { Readability } from "@mozilla/readability";
 2 | // @ts-ignore types missing
 3 | import Parser from "@postlight/parser";
 4 | import { JSDOM } from "jsdom";
 5 | import TurndownService from "turndown";
 6 | 
 7 | export async function getPageContent(url: string, html: string) {
 8 |     const dom = new JSDOM(html, { url });
 9 |     const document = dom.window.document;
10 | 
11 |     const rawContent = document.body.textContent?.replace(/\s+/g, " ") || "";
12 | 
13 |     const articleContent = await getReaderModeHtml(url, html, document);
14 | 
15 |     return { rawContent, articleContent };
16 | }
17 | 
18 | async function getReaderModeHtml(url: string, html: string, document: Document) {
19 |     // Try Readability
20 |     const article1 = new Readability(document).parse();
21 |     const content1 = htmlToMarkdown(article1?.content);
22 |     if (content1) {
23 |         return content1;
24 |     }
25 | 
26 |     // Try Postlight
27 |     const article2 = await Parser.parse(url, { html });
28 |     const content2 = htmlToMarkdown(article2?.content);
29 |     if (content2) {
30 |         return content2;
31 |     }
32 | 
33 |     return undefined;
34 | }
35 | 
36 | function htmlToMarkdown(html?: string) {
37 |     if (!html) {
38 |         return undefined;
39 |     }
40 | 
41 |     return new TurndownService()
42 |         .addRule("remove-tags", {
43 |             filter: ["figure", "img"],
44 |             replacement: function () {
45 |                 return "";
46 |             }
47 |         })
48 |         .addRule("unwrap-links", {
49 |             filter: ["a"],
50 |             replacement: function (content) {
51 |                 return content;
52 |             }
53 |         })
54 |         .turndown(html);
55 | }
56 | 


--------------------------------------------------------------------------------
/apps/api/src/common/db.ts:
--------------------------------------------------------------------------------
 1 | import { getDatabaseClient } from "@repo/core/dist";
 2 | import { unIndexPost } from "./typesense.js";
 3 | import { PostType } from "@repo/core/generated/prisma-client";
 4 | 
 5 | export const db = getDatabaseClient();
 6 | 
 7 | // Delete all post state if it exists
 8 | export async function deletePost(domain: string, postType: PostType) {
 9 |     try {
10 |         await db.post.deleteMany({ where: { domain, type: postType } });
11 |     } catch {}
12 |     try {
13 |         await db.scrapeState.deleteMany({ where: { domain, type: postType } });
14 |     } catch {}
15 |     try {
16 |         await unIndexPost(domain, postType);
17 |     } catch {}
18 | }
19 | 


--------------------------------------------------------------------------------
/apps/api/src/common/filter.ts:
--------------------------------------------------------------------------------
  1 | export function isExcludedPage(
  2 |     url: string,
  3 |     domain: string,
  4 |     title: string,
  5 |     pathname: string,
  6 |     rawContent: string,
  7 |     articleContent?: string
  8 | ) {
  9 |     // Missing article content
 10 |     const wordCount = articleContent?.split(/\s+/).length || 0;
 11 |     if (!articleContent || wordCount < 10) {
 12 |         return true;
 13 |     }
 14 | 
 15 |     // Check words in title & content
 16 |     const wordBlocklist = [
 17 |         "404",
 18 |         "oops",
 19 |         "missing",
 20 |         "not found",
 21 |         "does not exist",
 22 |         "cannot find",
 23 |         "cannot be found",
 24 |         "couldn't find",
 25 |         "nothing found",
 26 |         "no page",
 27 |         "private site",
 28 |         "you're lost",
 29 |         "this website uses cookies",
 30 |         "does not exist",
 31 |         "you need to enable javascript",
 32 |         "server error",
 33 |         "| substack",
 34 |         "existiert noch nicht"
 35 |     ];
 36 |     if (
 37 |         wordBlocklist.some(
 38 |             (w) => title.toLowerCase().includes(w) || rawContent.toLowerCase().includes(w)
 39 |         )
 40 |     ) {
 41 |         return true;
 42 |     }
 43 | 
 44 |     // Broken platform links such as https://hiradnotes.substack.com/now
 45 |     if (domain.includes("substack") || ["soundcloud.com"].includes(domain)) {
 46 |         return true;
 47 |     }
 48 | 
 49 |     // Ensure /ideas pages actually talk about ideas (and are not fallback pages)
 50 |     if (
 51 |         pathname === "/ideas" &&
 52 |         !(title.toLowerCase().includes("idea") || rawContent.toLowerCase().includes("idea"))
 53 |     ) {
 54 |         return true;
 55 |     }
 56 | 
 57 |     // Exclude some domains & pages manually
 58 |     if (
 59 |         [
 60 |             // company websites
 61 |             "founderslist.com",
 62 |             "notesnook.com",
 63 |             "aisnakeoil.com",
 64 |             "dev.to",
 65 |             "github.com",
 66 |             "blog.tjcx.me",
 67 |             "ethical.net",
 68 |             "newstalk.com",
 69 |             "thoughtdistillery.com",
 70 |             "archive.blogs.harvard.edu",
 71 |             "indieweb.org",
 72 |             "wrightplacetv.com",
 73 |             "zotero.org",
 74 |             "theatlantic.com",
 75 |             "roarmag.org",
 76 |             "profiles.wordpress.org",
 77 |             "privacytools.io",
 78 |             "openthemagazine.com",
 79 |             "newstatesman.com",
 80 |             "learninpublic.org",
 81 |             "vibilagare.se",
 82 |             // bad date parsing
 83 |             "manojranaweera.me",
 84 |             "blog.halfvast.com"
 85 |         ].includes(domain)
 86 |     ) {
 87 |         return true;
 88 |     }
 89 |     if (
 90 |         [
 91 |             "https://jonathanhaslett.com/ideas",
 92 |             "https://levinofearth.com/ideas",
 93 |             "https://andreiluca.com/ideas",
 94 |             "https://bunniestudios.com/ideas",
 95 |             "https://drorpoleg.com/ideas",
 96 |             "https://verraes.net/ideas",
 97 |             "https://blog.nateliason.com/ideas",
 98 |             "https://ajain.tech/ideas",
 99 |             "https://tracingwoodgrains.com/ideas",
100 |             "https://secretorum.life/ideas",
101 |             "https://blazsemprimoznik.com/ideas",
102 |             "https://xan.lol/ideas",
103 |             "https://hardtowrite.com/ideas",
104 |             "https://flaviocopes.com/ideas",
105 |             "https://kvark.github.io/ideas",
106 |             "https://cwinters.com/ideas",
107 |             "https://simply.joejenett.com/ideas",
108 |             "https://joshj.blog/ideas",
109 |             "https://ianbrodie.com/ideas",
110 |             "https://marcjenkins.co.uk/ideas",
111 |             "https://hans.gerwitz.com/ideas",
112 |             "https://reesskennedy.com/ideas",
113 |             "https://blog.bitsapien.dev/ideas",
114 |             "https://jfpenn.com/ideas",
115 |             "https://jonathontoon.com/ideas",
116 |             "https://whatlisacooks.com/ideas",
117 |             "https://thistooshallgrow.com/ideas",
118 |             "https://teodorapetkova.com/ideas",
119 |             "https://sohl-dickstein.github.io/ideas",
120 |             "https://scottaaronson.blog/ideas",
121 |             "https://ryanholiday.net/ideas",
122 |             "https://rocketcrab.com/ideas",
123 |             "https://neilkakkar.com/ideas",
124 |             "https://nadia.xyz/ideas",
125 |             "https://mywiki.wooledge.org/ideas",
126 |             "https://michaelnielsen.org/ideas",
127 |             "https://matthiasportzel.com/ideas",
128 |             "https://markuskaarlonen.com/ideas",
129 |             "https://liw.fi/ideas",
130 |             "https://literacyenquirer.blogspot.com/ideas"
131 |         ].includes(url)
132 |     ) {
133 |         return true;
134 |     }
135 | 
136 |     return false;
137 | }
138 | 


--------------------------------------------------------------------------------
/apps/api/src/common/meta.ts:
--------------------------------------------------------------------------------
 1 | // @ts-ignore
 2 | import Parser from "@postlight/parser";
 3 | import _metascraper from "metascraper";
 4 | import _metascraperDate from "metascraper-date";
 5 | import { openai } from "./openai.js";
 6 | 
 7 | // @ts-ignore
 8 | const metascraper = _metascraper([_metascraperDate()]);
 9 | 
10 | export async function getMeta(url: string, html: string, content?: string, log = console.log) {
11 |     const meta = await metascraper({ url, html });
12 |     const domain = getDomain(url);
13 | 
14 |     let date: Date | undefined = undefined;
15 | 
16 |     // Use GPT date parse by default as it's the most reliable
17 |     if (content) {
18 |         date = await findDateUsingGPT(content.slice(0, 1000));
19 |         if (!date && content.length > 2000) {
20 |             // Try end of large pages
21 |             date = await findDateUsingGPT(content.slice(-1000));
22 |         }
23 |     }
24 | 
25 |     console.log(`GPT date: ${date?.toISOString().slice(0, 10)}`);
26 |     console.log(`meta date: ${meta.date}`);
27 | 
28 |     if (!date) {
29 |         // Use metadata date instead
30 |         date = meta.date ? new Date(meta.date) : undefined;
31 | 
32 |         // Ingore dates a long time in the past
33 |         if (date && date.toISOString().slice(0, 10) < "2010-01-01") {
34 |             date = undefined;
35 |         }
36 | 
37 |         // Don't trust very recent or future dates, e.g. on https://francescasciandra.art/now, https://nycsubwaygirl.com/now
38 |         // But include the current date in case people create their now page before submitting it
39 |         if (date && date.getTime() > new Date().getTime() - 1000 * 60) {
40 |             date = undefined;
41 |         }
42 |     }
43 | 
44 |     return {
45 |         domain,
46 |         date
47 |     };
48 | }
49 | 
50 | async function findDateUsingGPT(text: string): Promise<Date | undefined> {
51 |     if (!text) {
52 |         return undefined;
53 |     }
54 | 
55 |     const response = await openai.chat.completions.create({
56 |         model: "gpt-3.5-turbo-1106", // 3.5 seems enough for this
57 |         max_tokens: 50,
58 |         temperature: 0,
59 |         messages: [
60 |             {
61 |                 role: "system",
62 |                 content: `You are an API that extracts the lastUpdated full ISO date from a text. Return null if there's no date mentioned. Return only the data as JSON.`
63 |             },
64 |             {
65 |                 role: "user",
66 |                 content: text
67 |             }
68 |         ],
69 |         response_format: { type: "json_object" }
70 |     });
71 |     const completion = response.choices[0].message.content;
72 |     // Parse again with Chrono for error handling
73 | 
74 |     try {
75 |         const isoString = JSON.parse(completion!).lastUpdated;
76 |         const date = new Date(isoString);
77 | 
78 |         // GTP returns 1970-01-01 for empty dates
79 |         if (date && date.toISOString().slice(0, 10) < "2010-01-01") {
80 |             return undefined;
81 |         }
82 |         // Don't trust future dates, e.g. on https://kunalmarwaha.com/now
83 |         if (date && date.toISOString().slice(0, 10) > new Date().toISOString().slice(0, 10)) {
84 |             return undefined;
85 |         }
86 | 
87 |         return date;
88 |     } catch (err) {
89 |         console.error(`Could not parse date from string with GPT: ${completion}`);
90 |         return undefined;
91 |     }
92 | }
93 | 
94 | export function getDomain(url: string) {
95 |     return new URL(url).hostname.replace("www.", "");
96 | }
97 | 


--------------------------------------------------------------------------------
/apps/api/src/common/openai.ts:
--------------------------------------------------------------------------------
 1 | import OpenAI from "openai";
 2 | import { env } from "process";
 3 | 
 4 | export const openai = new OpenAI({
 5 |     apiKey: env.OPENAI_API_KEY
 6 | });
 7 | 
 8 | export async function generateEmbedding(text: string) {
 9 |     const result = await openai.embeddings.create({
10 |         input: text,
11 |         model: "text-embedding-3-small",
12 |         dimensions: 512
13 |     });
14 | 
15 |     const [{ embedding }] = result.data;
16 |     return embedding;
17 | }
18 | 


--------------------------------------------------------------------------------
/apps/api/src/common/postType.ts:
--------------------------------------------------------------------------------
 1 | import { PostType } from "@repo/core/generated/prisma-client";
 2 | 
 3 | export function getPostType(pathname: string) {
 4 |     // Ignore trailing slash
 5 |     if (pathname.endsWith("/")) {
 6 |         pathname = pathname.slice(0, -1);
 7 |     }
 8 | 
 9 |     // Treat / as /about
10 |     if (pathname === "") {
11 |         pathname = "/about";
12 |     }
13 |     // Allow /about variants like /about-me but not article slugs
14 |     if (pathname.includes("about") && pathname.length <= 20) {
15 |         pathname = "/about";
16 |     }
17 | 
18 |     if (pathname === "/about") {
19 |         return PostType.ABOUT;
20 |     } else if (pathname === "/now") {
21 |         return PostType.NOW;
22 |     } else if (pathname === "/ideas") {
23 |         return PostType.IDEAS;
24 |     }
25 | 
26 |     return undefined;
27 | }
28 | 


--------------------------------------------------------------------------------
/apps/api/src/common/split.ts:
--------------------------------------------------------------------------------
 1 | import { MarkdownTextSplitter } from "langchain/text_splitter";
 2 | 
 3 | const splitter = new MarkdownTextSplitter({
 4 |     chunkSize: 500,
 5 |     chunkOverlap: 0
 6 | });
 7 | 
 8 | export async function getPostParagraphs(text: string) {
 9 |     // Exclude heading lines
10 |     const lines = text.trim().split("\n");
11 |     // console.log(lines.slice(0, 10));
12 |     while (lines[0] && lines[0].length < 100) {
13 |         lines.shift();
14 |     }
15 |     text = lines.join("\n");
16 | 
17 |     // console.log(lines.slice(0, 10));
18 | 
19 |     const paragraphs = await splitter.splitText(text);
20 |     return paragraphs.filter((paragraph) => paragraph.length > 100).slice(0, 10);
21 | }
22 | 


--------------------------------------------------------------------------------
/apps/api/src/common/typesense.ts:
--------------------------------------------------------------------------------
 1 | import Typesense from "typesense";
 2 | import { getPostParagraphs } from "./split.js";
 3 | import type { Post, PostType } from "@repo/core/generated/prisma-client";
 4 | import { env } from "process";
 5 | 
 6 | export const typesense = new Typesense.Client({
 7 |     nodes: [
 8 |         {
 9 |             host: env.TYPESENSE_URL!,
10 |             port: 443,
11 |             protocol: "https"
12 |         }
13 |     ],
14 |     apiKey: env.TYPESENSE_ADMIN_API_KEY!,
15 |     connectionTimeoutSeconds: 2
16 | });
17 | 
18 | // typesense.collections().create({
19 | //     name: "paragraphs",
20 | //     fields: [
21 | //         // { name: "id", type: "string" },
22 | //         { name: "domain", type: "string", facet: true },
23 | //         { name: "type", type: "string", facet: true },
24 | //         { name: "url", type: "string" },
25 | //         { name: "content", type: "string" },
26 | //         { name: "updatedAt", type: "int64" }, // epoch milliseconds to enable sorting,
27 | //         {
28 | //             name: "embedding",
29 | //             type: "float[]",
30 | //             embed: {
31 | //                 from: ["content"],
32 | //                 model_config: {
33 | //                     model_name: "openai/text-embedding-3-small",
34 | //                     api_key: env.OPENAI_API_KEY!
35 | //                 }
36 | //             }
37 | //         }
38 | //     ],
39 | //     default_sorting_field: "updatedAt"
40 | // });
41 | 
42 | export async function indexPost(post: Post, logger = console.log) {
43 |     try {
44 |         const t0 = Date.now();
45 |         const paragraphs = await getPostParagraphs(post.content);
46 | 
47 |         // Paragraph splitting debug
48 |         // logger(`# ${post.url}\n`);
49 |         // for (const p of paragraphs) {
50 |         //     logger(`- ${p}\n`);
51 |         // }
52 |         // logger(`\n\n`);
53 |         // return;
54 | 
55 |         // Delete existing paragraphs for this post (the number might have changed)
56 |         await unIndexPost(post.domain, post.type);
57 | 
58 |         if (paragraphs.length === 0) {
59 |             return;
60 |         }
61 | 
62 |         await typesense
63 |             .collections("paragraphs")
64 |             .documents()
65 |             .import(
66 |                 paragraphs.map((p, i) => ({
67 |                     // id: `${post.domain}-${post.type}-${i}`,
68 |                     url: post.url,
69 |                     domain: post.domain,
70 |                     type: post.type,
71 |                     content: p,
72 |                     updatedAt: post.updatedAt.getTime()
73 |                 }))
74 |             );
75 | 
76 |         // logger(`Inserted ${paragraphs.length} paragraphs in ${Date.now() - t0}ms`);
77 |     } catch (e) {
78 |         logger(`Error indexing post ${post.url}: ${e}`);
79 |     }
80 | }
81 | 
82 | export async function unIndexPost(domain: string, postType: PostType) {
83 |     await typesense
84 |         .collections("paragraphs")
85 |         .documents()
86 |         .delete({ filter_by: `domain:${domain} && type:${postType}` });
87 | }
88 | 


--------------------------------------------------------------------------------
/apps/api/src/crawler/main.ts:
--------------------------------------------------------------------------------
  1 | // For more information, see https://crawlee.dev/
  2 | import "dotenv/config";
  3 | import {
  4 |     CheerioCrawler,
  5 |     RequestOptions,
  6 |     ProxyConfiguration,
  7 |     purgeDefaultStorages,
  8 |     Configuration
  9 | } from "crawlee";
 10 | import { MemoryStorage } from "@crawlee/memory-storage";
 11 | import { router } from "./routes.js";
 12 | import normalizeUrl from "normalize-url";
 13 | import { db } from "../common/db.js";
 14 | import { getDomain } from "../common/meta.js";
 15 | import { ScrapeStatus } from "@repo/core/generated/prisma-client";
 16 | import { getPostType } from "../common/postType.js";
 17 | 
 18 | export async function runCrawler(directoryUrls: string[], documentUrls: string[]) {
 19 |     // Seed URLs
 20 |     const crawlerQueue: RequestOptions[] = [
 21 |         ...directoryUrls.map((url) => ({
 22 |             url,
 23 |             label: "directory"
 24 |         })),
 25 |         ...documentUrls.map((url) => ({
 26 |             url,
 27 |             label: "document"
 28 |         }))
 29 |     ];
 30 | 
 31 |     // Run crawler
 32 |     const proxyConfiguration = new ProxyConfiguration({
 33 |         // proxyUrls: ["URL"]
 34 |         // proxyUrls: Array.from(Array(100).keys()).map(
 35 |         //     (i) => `URL:${10000 + i + 1}`
 36 |         // )
 37 |     });
 38 |     await purgeDefaultStorages();
 39 |     const crawler = new CheerioCrawler(
 40 |         {
 41 |             // proxyConfiguration,
 42 |             // useSessionPool: true,
 43 |             // persistCookiesPerSession: true,
 44 |             // additionalHttpErrorStatusCodes: [403, 444, 503],
 45 | 
 46 |             minConcurrency: 1,
 47 |             maxConcurrency: 10,
 48 |             retryOnBlocked: false,
 49 |             maxRequestRetries: 1,
 50 |             maxRequestsPerMinute: 120,
 51 |             sameDomainDelaySecs: 0,
 52 | 
 53 |             requestHandler: router,
 54 |             failedRequestHandler: async ({ request, log, enqueueLinks }) => {
 55 |                 const url = normalizeUrl(request.url);
 56 |                 const domain = getDomain(url);
 57 |                 const pathname = new URL(url).pathname;
 58 |                 const postType = getPostType(pathname);
 59 | 
 60 |                 log.info(`Failed to crawl ${url}`);
 61 | 
 62 |                 // Mark as unavailable
 63 |                 if (postType) {
 64 |                     await db.scrapeState.upsert({
 65 |                         where: { domain_type: { domain, type: postType } },
 66 |                         create: {
 67 |                             domain,
 68 |                             type: postType,
 69 |                             status: ScrapeStatus.UNAVAILABLE,
 70 |                             scapedAt: new Date()
 71 |                         },
 72 |                         update: {
 73 |                             status: ScrapeStatus.UNAVAILABLE,
 74 |                             scapedAt: new Date()
 75 |                         }
 76 |                     });
 77 |                 }
 78 | 
 79 |                 // Try other paths
 80 |                 if (postType === "ABOUT" && pathname !== "/") {
 81 |                     log.info(`Trying / instead of /about for ${domain}`);
 82 |                     await enqueueLinks({
 83 |                         strategy: "all",
 84 |                         label: "document",
 85 |                         urls: [`https://${domain}/`]
 86 |                     });
 87 |                     return;
 88 |                 }
 89 |             }
 90 |         },
 91 |         new Configuration({
 92 |             persistStateIntervalMillis: 10_000,
 93 |             storageClient: new MemoryStorage({
 94 |                 persistStorage: false,
 95 |                 writeMetadata: false
 96 |             })
 97 |         })
 98 |     );
 99 |     console.log(`Crawling ${crawlerQueue.length} urls...`);
100 |     await crawler.run(crawlerQueue);
101 | }
102 | 


--------------------------------------------------------------------------------
/apps/api/src/crawler/routes.ts:
--------------------------------------------------------------------------------
  1 | import { createCheerioRouter } from "crawlee";
  2 | import { getDomain, getMeta } from "../common/meta.js";
  3 | import normalizeUrl from "normalize-url";
  4 | import { getPageContent } from "../common/content.js";
  5 | import { PostType, ScrapeStatus } from "@repo/core/generated/prisma-client";
  6 | import { db, deletePost } from "../common/db.js";
  7 | import { isExcludedPage } from "../common/filter.js";
  8 | import { indexPost, unIndexPost } from "../common/typesense.js";
  9 | import { getPostType } from "../common/postType.js";
 10 | 
 11 | export const router = createCheerioRouter();
 12 | 
 13 | // Scrape a directory of links
 14 | router.addHandler("directory", async ({ $, request, enqueueLinks, log }) => {
 15 |     const url = request.loadedUrl!;
 16 |     const domain = getDomain(url);
 17 |     log.info(`crawling directory: ${url}`);
 18 | 
 19 |     // Extract links
 20 |     const links = $("a[href]")
 21 |         .map((_, el) => $(el).attr("href"))
 22 |         .get()
 23 |         // map to absolute urls
 24 |         .map((link) => {
 25 |             try {
 26 |                 const obj = new URL(link, url);
 27 |                 // filter out current-domain links
 28 |                 if (obj.hostname === domain) {
 29 |                     return null;
 30 |                 }
 31 |                 return obj.toString();
 32 |             } catch {
 33 |                 return null;
 34 |             }
 35 |         })
 36 |         .filter((link) => link !== null) as string[];
 37 | 
 38 |     // Exclude already checked links
 39 |     const excludedDomains = new Set();
 40 |     const scrapeStates = await db.scrapeState.findMany({
 41 |         where: { domain: { in: links.map(getDomain) } }
 42 |     });
 43 |     scrapeStates.forEach((s) => excludedDomains.add(s.domain));
 44 |     const newLinks = links.filter((link) => !excludedDomains.has(getDomain(link)));
 45 |     log.info(`Found ${newLinks.length} new links`);
 46 | 
 47 |     // Randomize order to work around network errors
 48 |     newLinks.sort(() => Math.random() - 0.5);
 49 | 
 50 |     // Scrape new links
 51 |     await enqueueLinks({
 52 |         strategy: "all",
 53 |         label: "document",
 54 |         urls: newLinks.flatMap((url) => {
 55 |             const domain = getDomain(url);
 56 |             return [`https://${domain}/about`, `https://${domain}/now`, `https://${domain}/ideas`];
 57 |         })
 58 |     });
 59 | });
 60 | 
 61 | // Scrape an individual page
 62 | router.addHandler("document", async ({ $, request, log, enqueueLinks }) => {
 63 |     const url = normalizeUrl(request.loadedUrl || request.url);
 64 |     const domain = getDomain(url);
 65 |     const pathname = new URL(url).pathname;
 66 | 
 67 |     const originalUrl = normalizeUrl(request.url);
 68 |     const originalDomain = getDomain(originalUrl);
 69 |     const originalPathname = new URL(originalUrl).pathname;
 70 | 
 71 |     // Detect post type
 72 |     const postType = getPostType(pathname) || getPostType(originalPathname);
 73 |     if (!postType) {
 74 |         log.info(`${domain} ${pathname} skipped (not /about, /now, or /ideas)\n`);
 75 |         return;
 76 |     }
 77 | 
 78 |     // Store domain redirects
 79 |     if (domain !== originalDomain) {
 80 |         log.info(`Redirected from ${originalDomain} to ${domain}`);
 81 |         await db.scrapeState.upsert({
 82 |             where: { domain_type: { domain: originalDomain, type: postType } },
 83 |             create: {
 84 |                 domain: originalDomain,
 85 |                 type: postType,
 86 |                 status: ScrapeStatus.REDIRECTED,
 87 |                 scapedAt: new Date()
 88 |             },
 89 |             update: {
 90 |                 status: ScrapeStatus.REDIRECTED,
 91 |                 scapedAt: new Date()
 92 |             }
 93 |         });
 94 |     }
 95 | 
 96 |     const existingPost = await db.post.findFirst({ where: { domain, type: postType } });
 97 | 
 98 |     // Extract content
 99 |     const title = $("title").text();
100 |     const html = $.html();
101 |     const { rawContent, articleContent } = await getPageContent(url, html);
102 |     const wordCount = articleContent?.split(/\s+/).length || 0;
103 | 
104 |     // Check if should exclude / delete post
105 |     if (
106 |         !articleContent ||
107 |         isExcludedPage(url, domain, title, pathname, rawContent, articleContent)
108 |     ) {
109 |         if (pathname === "/about") {
110 |             log.info(`Trying / instead of /about for ${domain}\n`);
111 |             enqueueLinks({
112 |                 strategy: "all",
113 |                 label: "document",
114 |                 urls: [`https://${domain}/`]
115 |             });
116 |             return;
117 |         }
118 |         log.info(`excluding ${url} (title: ${title})\n`);
119 | 
120 |         // Update scrape time if exists, otherwise save as no content
121 |         await db.scrapeState.upsert({
122 |             where: { domain_type: { domain, type: postType } },
123 |             create: {
124 |                 domain,
125 |                 type: postType,
126 |                 status: ScrapeStatus.NO_CONTENT,
127 |                 scapedAt: new Date()
128 |             },
129 |             update: {
130 |                 scapedAt: new Date()
131 |             }
132 |         });
133 | 
134 |         // Delete post if existed before
135 |         if (existingPost) {
136 |             await deletePost(domain, postType);
137 |         }
138 | 
139 |         return;
140 |     }
141 | 
142 |     // Check if content has changed
143 |     if (existingPost && existingPost.content === articleContent) {
144 |         log.info(`skipping ${url} (content unchanged)\n`);
145 | 
146 |         // Update scrape time
147 |         await db.scrapeState.upsert({
148 |             where: { domain_type: { domain, type: postType } },
149 |             create: {
150 |                 domain,
151 |                 type: postType,
152 |                 status: ScrapeStatus.SCRAPED,
153 |                 scapedAt: new Date()
154 |             },
155 |             update: {
156 |                 scapedAt: new Date()
157 |             }
158 |         });
159 | 
160 |         return;
161 |     }
162 | 
163 |     // Use rawContent in case date is outside main text (e.g. on https://alexcarpenter.me/now)
164 |     const meta = await getMeta(url, html, rawContent, log.info.bind(log));
165 |     // Log debug stats
166 |     log.info(`scraped ${url}:`);
167 |     log.info(`\ttitle: ${title}`);
168 |     log.info(`\twords: ${wordCount}`);
169 |     log.info(`\tdate: ${meta.date?.toISOString().slice(0, 10)}`);
170 |     log.info(``);
171 | 
172 |     // Update post
173 |     const post = {
174 |         url,
175 |         domain,
176 |         type: postType,
177 |         content: articleContent,
178 |         updatedAt: meta.date || new Date("1970-01-01")
179 |     };
180 |     await db.post.upsert({
181 |         where: { url },
182 |         create: post,
183 |         update: post
184 |     });
185 | 
186 |     // Index for search async
187 |     indexPost(post, log.info.bind(log));
188 | 
189 |     // Save scrape success
190 |     await db.scrapeState.upsert({
191 |         where: { domain_type: { domain, type: postType } },
192 |         create: {
193 |             domain,
194 |             type: postType,
195 |             status: ScrapeStatus.SCRAPED,
196 |             scapedAt: new Date()
197 |         },
198 |         update: {
199 |             status: ScrapeStatus.SCRAPED,
200 |             scapedAt: new Date()
201 |         }
202 |     });
203 | });
204 | 


--------------------------------------------------------------------------------
/apps/api/src/index.ts:
--------------------------------------------------------------------------------
 1 | import "dotenv/config";
 2 | import express, { Application } from "express";
 3 | import cors from "cors";
 4 | import { router } from "./routes/index.js";
 5 | 
 6 | // Setup express server
 7 | const app: Application = express();
 8 | 
 9 | // Process middleware
10 | app.use(cors());
11 | app.use(express.json({ limit: "5mb" }));
12 | 
13 | // Handle requests
14 | app.use(router);
15 | 
16 | // Start server
17 | const PORT: number = parseInt(process.env.PORT as string, 10) || 7101;
18 | app.listen(PORT, () => {
19 |     console.log(`API listening on port ${PORT}`);
20 | });
21 | 


--------------------------------------------------------------------------------
/apps/api/src/routes/add.ts:
--------------------------------------------------------------------------------
  1 | import type { Request, Response } from "express";
  2 | import { runCrawler } from "../crawler/main.js";
  3 | import { db } from "../common/db.js";
  4 | import { getDomain } from "../common/meta.js";
  5 | import { SubmittedDomain } from "@repo/core/generated/prisma-client";
  6 | import { unIndexPost } from "../common/typesense.js";
  7 | 
  8 | export async function addDirectory(req: Request, res: Response) {
  9 |     const url = req.query.url as string | undefined;
 10 |     if (!url) {
 11 |         return res.status(400).json({ message: "Missing url" });
 12 |     }
 13 | 
 14 |     // Don't await
 15 |     runCrawler([url], []);
 16 | 
 17 |     const domain = getDomain(url);
 18 |     await db.scrapeState.upsert({
 19 |         where: { domain_type: { domain, type: "ABOUT" } },
 20 |         create: {
 21 |             domain,
 22 |             domainType: "DIRECTORY",
 23 |             type: "ABOUT",
 24 |             status: "SCRAPED",
 25 |             scapedAt: new Date()
 26 |         },
 27 |         update: { scapedAt: new Date() }
 28 |     });
 29 | 
 30 |     return res.json({ message: "Pending" });
 31 | }
 32 | 
 33 | export async function addBatchDomains(req: Request, res: Response) {
 34 |     let domains = req.body.domains as string[];
 35 |     domains = domains.filter((domain) => domain.includes("."));
 36 | 
 37 |     const links = domains.flatMap((domain) => [
 38 |         `https://${domain}/about`,
 39 |         `https://${domain}/now`,
 40 |         `https://${domain}/ideas`
 41 |     ]);
 42 | 
 43 |     // Exclude existing domains
 44 |     const excludedDomains = new Set();
 45 |     const scrapeStates = await db.scrapeState.findMany({
 46 |         where: { domain: { in: links.map(getDomain) } }
 47 |     });
 48 |     scrapeStates.forEach((s) => excludedDomains.add(s.domain));
 49 |     const newLinks = links.filter((link) => !excludedDomains.has(getDomain(link)));
 50 |     console.log(`Found ${newLinks.length} new links`);
 51 | 
 52 |     runCrawler([], links);
 53 | 
 54 |     return res.json({ message: "Pending" });
 55 | }
 56 | 
 57 | export async function addDomain(req: Request, res: Response) {
 58 |     // Parse params
 59 |     let url = parseUrl(req);
 60 |     if (!url) {
 61 |         return res.status(400).json({ message: "Missing url" });
 62 |     }
 63 |     const domain = getDomain(url);
 64 |     let email = (req.query.email as string) || null;
 65 | 
 66 |     // Scrape website
 67 |     let success = true;
 68 |     try {
 69 |         await runCrawler(
 70 |             [],
 71 |             [`https://${domain}/about`, `https://${domain}/now`, `https://${domain}/ideas`]
 72 |         );
 73 |     } catch (err) {
 74 |         success = false;
 75 |     }
 76 | 
 77 |     // Save submitted info if not exists
 78 |     try {
 79 |         await db.submittedDomain.create({
 80 |             data: {
 81 |                 domain,
 82 |                 email,
 83 |                 success,
 84 |                 submittedAt: new Date()
 85 |             }
 86 |         });
 87 |     } catch {}
 88 | 
 89 |     // Return results
 90 |     if (success) {
 91 |         const posts = await db.post.findMany({
 92 |             where: { domain },
 93 |             orderBy: { updatedAt: "desc" }
 94 |         });
 95 |         return res.json(posts);
 96 |     } else {
 97 |         return res.status(500).json({ message: "Failed to scrape website :(" });
 98 |     }
 99 | }
100 | 
101 | function parseUrl(req: Request) {
102 |     let url = req.query.url as string | undefined;
103 | 
104 |     if (!url?.startsWith("http")) {
105 |         url = `https://${url}`;
106 |     }
107 | 
108 |     // Validate url
109 |     if (!url.includes(".")) {
110 |         return;
111 |     }
112 |     try {
113 |         new URL(url);
114 |     } catch (error) {
115 |         return;
116 |     }
117 | 
118 |     return url;
119 | }
120 | 


--------------------------------------------------------------------------------
/apps/api/src/routes/backfill.ts:
--------------------------------------------------------------------------------
 1 | import type { Request, Response } from "express";
 2 | import { db } from "../common/db.js";
 3 | import { indexPost } from "../common/typesense.js";
 4 | 
 5 | export async function runBackfill(req: Request, res: Response) {
 6 |     const start = parseInt(req.query.start as string) || 0;
 7 |     const limit = parseInt(req.query.limit as string) || 10000;
 8 | 
 9 |     const posts = await db.post.findMany({
10 |         orderBy: { updatedAt: "desc" },
11 |         take: limit,
12 |         skip: start
13 |     });
14 | 
15 |     let index = start;
16 |     for (const post of posts) {
17 |         console.log(`(${index}/${posts.length + start}) Backfilling post ${post.url}`);
18 | 
19 |         try {
20 |             indexPost(post);
21 |         } catch (e) {
22 |             console.error(e);
23 |         }
24 | 
25 |         await new Promise((resolve) => setTimeout(resolve, 300));
26 | 
27 |         index++;
28 |     }
29 | 
30 |     console.log(`Backfill complete`);
31 |     return res.json({ success: true });
32 | }
33 | 


--------------------------------------------------------------------------------
/apps/api/src/routes/crawl.ts:
--------------------------------------------------------------------------------
 1 | import type { Request, Response } from "express";
 2 | import { runCrawler } from "../crawler/main.js";
 3 | import { db } from "../common/db.js";
 4 | 
 5 | export async function periodicCrawl(req: Request, res: Response) {
 6 |     const includeDirectories = req.query.directories === "true";
 7 |     const limit = parseInt(req.query.limit as string) || 10_000;
 8 | 
 9 |     // Check all directories for new links
10 |     const directoryScrapes = await db.scrapeState.findMany({
11 |         where: { domainType: "DIRECTORY" }
12 |     });
13 |     const directories = includeDirectories
14 |         ? directoryScrapes.map((s) => `https://${s.domain}`)
15 |         : [];
16 | 
17 |     // Re-scrape all indexed pages
18 |     const scrapeStates = await db.scrapeState.findMany({
19 |         where: { status: "SCRAPED", domainType: "INDIVIDUAL_SITE" },
20 |         orderBy: { scapedAt: "asc" },
21 |         take: limit
22 |     });
23 |     const documents = scrapeStates.map((s) => `https://${s.domain}/${s.type.toLowerCase()}`);
24 | 
25 |     // const posts = await db.post.findMany({
26 |     //     where: { type: "NOW", updatedAt: { lt: new Date("2010-01-01") } },
27 |     //     select: { url: true },
28 |     //     take: limit
29 |     // });
30 |     // const documents = posts.map((d) => d.url);
31 | 
32 |     // const docs = await db.post.findMany({
33 |     //     where: { type: "ABOUT" },
34 |     //     orderBy: { updatedAt: "asc" },
35 |     //     select: { url: true },
36 |     //     take: limit
37 |     // });
38 |     // const documents = docs.map((d) => d.url);
39 | 
40 |     // Don't await response
41 |     runCrawler(directories, documents);
42 | 
43 |     return res.json({ message: "Pending" });
44 | }
45 | 


--------------------------------------------------------------------------------
/apps/api/src/routes/getIndexedNowPages.ts:
--------------------------------------------------------------------------------
 1 | import { Request, Response } from "express";
 2 | import { db } from "../common/db.js";
 3 | 
 4 | export async function getIndexedNowPages(req: Request, res: Response) {
 5 |     // List valid /now posts
 6 |     const posts = await db.post.findMany({
 7 |         where: {
 8 |             type: "NOW"
 9 |         },
10 |         select: {
11 |             domain: true,
12 |             // url: true,
13 |             updatedAt: true
14 |         },
15 |         orderBy: {
16 |             updatedAt: "desc"
17 |         }
18 |     });
19 |     // Map format
20 |     const websitesUpdatedAt = posts.reduce((obj, post) => {
21 |         let updatedAt: string | null = post.updatedAt.toISOString().slice(0, 10);
22 |         if (updatedAt === "1970-01-01") {
23 |             updatedAt = null;
24 |         }
25 | 
26 |         return {
27 |             ...obj,
28 |             [post.domain]: updatedAt
29 |         };
30 |     }, {});
31 | 
32 |     const lastScrapeState = await db.scrapeState.findFirst({
33 |         where: {
34 |             type: "NOW"
35 |         },
36 |         select: {
37 |             scapedAt: true
38 |         },
39 |         orderBy: {
40 |             scapedAt: "desc"
41 |         }
42 |     });
43 | 
44 |     return res.json({
45 |         validWebsitesCount: posts.length,
46 |         lastScrapedAt: lastScrapeState?.scapedAt,
47 |         websitesUpdatedAt
48 |     });
49 | }
50 | 


--------------------------------------------------------------------------------
/apps/api/src/routes/hello.ts:
--------------------------------------------------------------------------------
1 | import type { Request, Response } from "express";
2 | 
3 | export async function hello(req: Request, res: Response) {
4 |     return res.json({ message: "Hello World!!!" });
5 | }
6 | 


--------------------------------------------------------------------------------
/apps/api/src/routes/index.ts:
--------------------------------------------------------------------------------
 1 | import { Router } from "express";
 2 | import { hello } from "./hello.js";
 3 | import { periodicCrawl } from "./crawl.js";
 4 | import { addBatchDomains, addDirectory, addDomain } from "./add.js";
 5 | import { runBackfill } from "./backfill.js";
 6 | import { getIndexedNowPages } from "./getIndexedNowPages.js";
 7 | 
 8 | export const router = Router();
 9 | 
10 | router.get("/hello", hello);
11 | router.post("/periodic-crawl", periodicCrawl);
12 | router.post("/add-directory", addDirectory);
13 | router.post("/add-domain", addDomain);
14 | router.post("/add-batch-domains", addBatchDomains);
15 | router.post("/backfill", runBackfill);
16 | 
17 | router.get("/indexed-now-pages", getIndexedNowPages);
18 | 


--------------------------------------------------------------------------------
/apps/api/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "target": "ES2022",
 4 |         "module": "NodeNext",
 5 |         "outDir": "./dist",
 6 |         "rootDir": "./src",
 7 |         "strict": true,
 8 |         "strictNullChecks": true,
 9 |         "esModuleInterop": true,
10 |         "skipLibCheck": true,
11 |         "forceConsistentCasingInFileNames": true
12 |     },
13 |     "include": ["src"],
14 |     "exclude": ["node_modules", "dist", "openapi"],
15 |     "ts-node": {
16 |         "esm": true
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/apps/web/.env.example:
--------------------------------------------------------------------------------
 1 | ########### Public secrets ###########
 2 | 
 3 | PUBLIC_TYPESENSE_URL="o8k5amdihc3vegn7p-1.a1.typesense.net"
 4 | PUBLIC_TYPESENSE_SEARCH_API_KEY="IRHY5vFh26RNZxoYem0hFs6Gb6ilSOnv"
 5 | 
 6 | ########### Private secrets ###########
 7 | 
 8 | DATABASE_URL=""
 9 | INTERNAL_API_URL="https://api.aboutideasnow.com"
10 | 
11 | OPENAI_API_KEY=""
12 | COHERE_API_KEY=""
13 | 


--------------------------------------------------------------------------------
/apps/web/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | node_modules
 3 | /build
 4 | /.svelte-kit
 5 | /package
 6 | .env
 7 | .env.*
 8 | !.env.example
 9 | vite.config.js.timestamp-*
10 | vite.config.ts.timestamp-*
11 | 


--------------------------------------------------------------------------------
/apps/web/.prettierignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | node_modules
 3 | /build
 4 | /.svelte-kit
 5 | /package
 6 | .env
 7 | .env.*
 8 | !.env.example
 9 | 
10 | # Ignore files for PNPM, NPM and YARN
11 | pnpm-lock.yaml
12 | package-lock.json
13 | yarn.lock
14 | 


--------------------------------------------------------------------------------
/apps/web/README.md:
--------------------------------------------------------------------------------
 1 | # create-svelte
 2 | 
 3 | Everything you need to build a Svelte project, powered by [`create-svelte`](https://github.com/sveltejs/kit/tree/master/packages/create-svelte).
 4 | 
 5 | ## Creating a project
 6 | 
 7 | If you're seeing this, you've probably already done this step. Congrats!
 8 | 
 9 | ```bash
10 | # create a new project in the current directory
11 | npm create svelte@latest
12 | 
13 | # create a new project in my-app
14 | npm create svelte@latest my-app
15 | ```
16 | 
17 | ## Developing
18 | 
19 | Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server:
20 | 
21 | ```bash
22 | npm run dev
23 | 
24 | # or start the server and open the app in a new browser tab
25 | npm run dev -- --open
26 | ```
27 | 
28 | ## Building
29 | 
30 | To create a production version of your app:
31 | 
32 | ```bash
33 | npm run build
34 | ```
35 | 
36 | You can preview the production build with `npm run preview`.
37 | 
38 | > To deploy your app, you may need to install an [adapter](https://kit.svelte.dev/docs/adapters) for your target environment.
39 | 


--------------------------------------------------------------------------------
/apps/web/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "@repo/web",
 3 |     "version": "1.0.0",
 4 |     "private": true,
 5 |     "scripts": {
 6 |         "dev": "vite dev --port 7100",
 7 |         "build": "vite build",
 8 |         "preview": "vite preview",
 9 |         "test": "playwright test",
10 |         "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
11 |         "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
12 |         "lint": "prettier --plugin-search-dir . --check .",
13 |         "format": "prettier --plugin-search-dir . --write ."
14 |     },
15 |     "dependencies": {
16 |         "@langchain/community": "^0.0.21",
17 |         "@langchain/openai": "^0.0.13",
18 |         "@repo/core": "workspace:*",
19 |         "@supabase/supabase-js": "^2.39.3",
20 |         "@vercel/analytics": "^1.2.2",
21 |         "axios": "^1.6.7",
22 |         "clsx": "^2.1.0",
23 |         "cohere-ai": "^7.7.3",
24 |         "lodash": "^4.17.21",
25 |         "loglevel": "^1.9.1",
26 |         "openai": "^4.26.0",
27 |         "posthog-js": "^1.108.3",
28 |         "typesense": "^1.7.2"
29 |     },
30 |     "devDependencies": {
31 |         "@csstools/postcss-oklab-function": "^3.0.3",
32 |         "@playwright/test": "^1.28.1",
33 |         "@sveltejs/adapter-vercel": "^4.0.4",
34 |         "@sveltejs/kit": "^2.0.0",
35 |         "@sveltejs/vite-plugin-svelte": "^3.0.0",
36 |         "@types/lodash": "^4.14.202",
37 |         "autoprefixer": "^10.4.15",
38 |         "postcss": "^8.4.29",
39 |         "prettier": "^3.0.3",
40 |         "prettier-plugin-svelte": "^2.10.1",
41 |         "prettier-plugin-tailwindcss": "^0.5.4",
42 |         "svelte": "^4.0.5",
43 |         "svelte-check": "^3.4.3",
44 |         "tailwindcss": "^3.3.3",
45 |         "tslib": "^2.4.1",
46 |         "typescript": "^5.0.0",
47 |         "vite": "^5.0.0"
48 |     },
49 |     "type": "module"
50 | }


--------------------------------------------------------------------------------
/apps/web/postcss.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 |     plugins: {
3 |         tailwindcss: {},
4 |         "@csstools/postcss-oklab-function": { preserve: true },
5 |         autoprefixer: {}
6 |     }
7 | };
8 | 


--------------------------------------------------------------------------------
/apps/web/src/app.css:
--------------------------------------------------------------------------------
  1 | @tailwind base;
  2 | @tailwind components;
  3 | @tailwind utilities;
  4 | 
  5 | @layer base {
  6 |     h2 {
  7 |         @apply mb-4 text-3xl font-bold text-center font-title;
  8 |     }
  9 |     p {
 10 |         @apply mb-2;
 11 |     }
 12 |     a {
 13 |         @apply font-bold transition-opacity hover:opacity-50;
 14 |     }
 15 |     ul,
 16 |     ol {
 17 |         @apply pr-5 ml-5 list-outside w-max;
 18 |     }
 19 |     ul {
 20 |         @apply list-disc;
 21 |     }
 22 |     ol {
 23 |         @apply list-decimal;
 24 |     }
 25 |     li {
 26 |         @apply mb-2;
 27 |     }
 28 | }
 29 | 
 30 | @media (min-width: 768px) {
 31 |     ul,
 32 |     ol {
 33 |         @apply pr-0 ml-10;
 34 |     }
 35 | }
 36 | 
 37 | .highlight {
 38 |     @apply font-bold bg-yellow-200;
 39 | }
 40 | 
 41 | .loader {
 42 |     width: 20px;
 43 |     height: 20px;
 44 |     border-radius: 50%;
 45 |     position: relative;
 46 |     animation: rotate 1s linear infinite;
 47 | }
 48 | .loader::before {
 49 |     content: "";
 50 |     box-sizing: border-box;
 51 |     position: absolute;
 52 |     inset: 0px;
 53 |     border-radius: 50%;
 54 |     border: 2px solid black;
 55 |     animation: prixClipFix 2s linear infinite;
 56 | }
 57 | 
 58 | @keyframes rotate {
 59 |     100% {
 60 |         transform: rotate(360deg);
 61 |     }
 62 | }
 63 | 
 64 | @keyframes prixClipFix {
 65 |     0% {
 66 |         clip-path: polygon(50% 50%, 0 0, 0 0, 0 0, 0 0, 0 0);
 67 |     }
 68 |     25% {
 69 |         clip-path: polygon(50% 50%, 0 0, 100% 0, 100% 0, 100% 0, 100% 0);
 70 |     }
 71 |     50% {
 72 |         clip-path: polygon(50% 50%, 0 0, 100% 0, 100% 100%, 100% 100%, 100% 100%);
 73 |     }
 74 |     75% {
 75 |         clip-path: polygon(50% 50%, 0 0, 100% 0, 100% 100%, 0 100%, 0 100%);
 76 |     }
 77 |     100% {
 78 |         clip-path: polygon(50% 50%, 0 0, 100% 0, 100% 100%, 0 100%, 0 0);
 79 |     }
 80 | }
 81 | 
 82 | @font-face {
 83 |     font-family: "Reckless";
 84 |     font-weight: 100 1000;
 85 |     font-display: block;
 86 |     src: url(/fonts/Reckless-VariableFont.woff2) format("woff2");
 87 | }
 88 | 
 89 | @font-face {
 90 |     font-family: "DM Sans";
 91 |     font-weight: 100 1000;
 92 |     font-display: block;
 93 |     src:
 94 |         local("DM Sans"),
 95 |         url(/fonts/DMSans-VariableFont.ttf) format("truetype");
 96 | }
 97 | 
 98 | @font-face {
 99 |     font-family: "PT Mono";
100 |     font-weight: 400;
101 |     font-display: block;
102 |     src:
103 |         local("PT Mono"),
104 |         url(/fonts/PTMono-Regular.ttf) format("truetype");
105 | }
106 | 


--------------------------------------------------------------------------------
/apps/web/src/app.d.ts:
--------------------------------------------------------------------------------
 1 | // See https://kit.svelte.dev/docs/types#app
 2 | // for information about these interfaces
 3 | declare global {
 4 |     namespace App {
 5 |         // interface Error {}
 6 |         // interface Locals {}
 7 |         // interface PageData {}
 8 |         // interface Platform {}
 9 |     }
10 | }
11 | 
12 | export {};
13 | 


--------------------------------------------------------------------------------
/apps/web/src/app.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en" class="bg-background font-text text-text">
 3 |     <head>
 4 |         <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 5 |         <meta charset="utf-8" />
 6 |         <link rel="icon" href="%sveltekit.assets%/favicon.png" />
 7 |         <meta name="viewport" content="width=device-width" />
 8 |         <title>About Ideas Now | Search 1000s of personal sites</title>
 9 |         <meta name="title" content="About Ideas Now | Search 1000s of personal sites" />
10 |         <meta
11 |             name="description"
12 |             content="Find people to collaborate with by searching through the /about, /ideas and /now pages of 1000s of personal sites"
13 |         />
14 |         <meta name="keywords" content="personal, directory, sites, about, ideas, now" />
15 |         <meta name="robots" content="index, follow" />
16 |         <meta name="language" content="English" />
17 |         <meta property="og:title" content="About Ideas Now | Search 1000s of personal sites" />
18 |         <meta property="og:type" content="website" />
19 |         <meta
20 |             property="og:description"
21 |             content="Find people to collaborate with by searching through the /about, /ideas and /now pages of 1000s of personal sites"
22 |         />
23 |         <meta property="og:url" content="https://aboutideasnow.com" />
24 |         <meta property="og:image" content="/metaimage.png" />
25 |         <meta name="twitter:title" content="About Ideas Now | Search 1000s of personal sites" />
26 |         <meta
27 |             name="twitter:description"
28 |             content="Find people to collaborate with by searching through the /about, /ideas and /now pages of 1000s of personal sites"
29 |         />
30 |         <meta name="twitter:image" content="metaimage.png" />
31 |         <meta name="twitter:card" content="summary_large_image" />
32 |         <meta
33 |             name="description"
34 |             content="Find people to talk to or collaborate with by searching across the /about, /ideas and /now pages of 1000s of personal websites."
35 |         />
36 |         <!-- <meta property="og:image" content="/thumbnail.png" /> -->
37 |         %sveltekit.head%
38 |     </head>
39 |     <body data-sveltekit-preload-data="hover" style="overflow-y: scroll">
40 |         %sveltekit.body%
41 |     </body>
42 | </html>
43 | 


--------------------------------------------------------------------------------
/apps/web/src/common/cohere.ts:
--------------------------------------------------------------------------------
1 | import { COHERE_API_KEY } from "$env/static/private";
2 | import { CohereClient } from "cohere-ai";
3 | 
4 | export const cohere = new CohereClient({
5 |     token: COHERE_API_KEY
6 | });
7 | 


--------------------------------------------------------------------------------
/apps/web/src/common/constants.ts:
--------------------------------------------------------------------------------
 1 | export const colorPalette = ["#ce7e8f", "#ebbe6b", "#6eaecf", "#F9B09A", "#8e79af", "#479A70"];
 2 | 
 3 | export const exampleSearchQueries = [
 4 |     {
 5 |         term: "building open source",
 6 |         emoji: "📖"
 7 |     },
 8 |     {
 9 |         term: "starting a community",
10 |         emoji: "👥"
11 |     },
12 |     // {
13 |     //     term: "looking for a cofounder",
14 |     //     emoji: "🤝"
15 |     // },
16 |     {
17 |         term: "founding a company",
18 |         emoji: "🏢"
19 |     },
20 |     {
21 |         term: "writing a book",
22 |         emoji: "🖊️"
23 |     },
24 |     {
25 |         term: "building an app",
26 |         emoji: "📱"
27 |     },
28 |     {
29 |         term: "quitting social media",
30 |         emoji: "📵"
31 |     },
32 |     // {
33 |     //     term: "taking a break",
34 |     //     emoji: "🏖️"
35 |     // },
36 |     {
37 |         term: "traveling the world",
38 |         emoji: "🌎"
39 |     },
40 |     {
41 |         term: "taking photos",
42 |         emoji: "📸"
43 |     },
44 |     // {
45 |     //     term: "spending time with my kids",
46 |     //     emoji: "👨‍👩‍👧‍👦"
47 |     // },
48 |     // {
49 |     //     term: "learning to code",
50 |     //     emoji: "👨‍💻"
51 |     // },
52 |     {
53 |         term: "making music",
54 |         emoji: "🎵"
55 |     },
56 |     {
57 |         term: "volunteering",
58 |         emoji: "🤲"
59 |     }
60 | ];
61 | 


--------------------------------------------------------------------------------
/apps/web/src/common/formActions.ts:
--------------------------------------------------------------------------------
 1 | // Import necessary types and constants
 2 | import { INTERNAL_API_URL } from "$env/static/private";
 3 | import type { Post } from "@repo/core/generated/prisma-client";
 4 | 
 5 | // Abstracted form action logic
 6 | export async function handleSubmit(
 7 |     request: Request
 8 | ): Promise<{ addedDomain: boolean; scrapedPosts?: Post[] }> {
 9 |     const data = await request.formData();
10 |     const domain = data.get("domain");
11 |     const email = data.get("email");
12 |     const response = await fetch(`${INTERNAL_API_URL}/add-domain?url=${domain}&email=${email}`, {
13 |         method: "POST"
14 |     });
15 |     if (!response.ok) {
16 |         return { addedDomain: false };
17 |     }
18 |     const scrapedPosts: Post[] = await response.json();
19 |     return { addedDomain: true, scrapedPosts };
20 | }
21 | 


--------------------------------------------------------------------------------
/apps/web/src/common/openai.ts:
--------------------------------------------------------------------------------
 1 | import { OPENAI_API_KEY } from "$env/static/private";
 2 | import OpenAI from "openai";
 3 | 
 4 | export const openai = new OpenAI({
 5 |     apiKey: OPENAI_API_KEY
 6 | });
 7 | 
 8 | export async function generateEmbedding(text: string) {
 9 |     const result = await openai.embeddings.create({
10 |         input: text,
11 |         model: "text-embedding-3-small",
12 |         dimensions: 512
13 |     });
14 | 
15 |     const [{ embedding }] = result.data;
16 |     return embedding;
17 | }
18 | 


--------------------------------------------------------------------------------
/apps/web/src/common/typesense.ts:
--------------------------------------------------------------------------------
 1 | import { PUBLIC_TYPESENSE_SEARCH_API_KEY, PUBLIC_TYPESENSE_URL } from "$env/static/public";
 2 | import type { Post, PostType } from "@repo/core/generated/prisma-client";
 3 | import Typesense from "typesense";
 4 | 
 5 | export const typesense = new Typesense.Client({
 6 |     nodes: [
 7 |         {
 8 |             host: PUBLIC_TYPESENSE_URL,
 9 |             port: 443,
10 |             protocol: "https"
11 |         }
12 |     ],
13 |     apiKey: PUBLIC_TYPESENSE_SEARCH_API_KEY,
14 |     connectionTimeoutSeconds: 2
15 | });
16 | 
17 | export type SearchedPost = Post & {
18 |     id: number;
19 | };
20 | 
21 | export async function searchPosts(query: string, postType?: PostType): Promise<SearchedPost[]> {
22 |     const searchResults = await typesense
23 |         .collections<Post & { id: number }>("paragraphs")
24 |         .documents()
25 |         .search({
26 |             q: query,
27 | 
28 |             // query_by: "embedding",
29 |             // uncomment this to enable keyword search
30 |             query_by: "content,domain",
31 | 
32 |             filter_by: postType ? `type:${postType}` : undefined,
33 | 
34 |             // required for embeddings
35 |             exclude_fields: "embedding",
36 |             prefix: false,
37 | 
38 |             // group by domain to return only best paragraph
39 |             group_by: "domain,type",
40 |             group_limit: 1,
41 | 
42 |             limit: 50
43 |         });
44 | 
45 |     const hits = searchResults.grouped_hits?.map((hit) => hit.hits[0]) || [];
46 |     console.log(hits);
47 |     return (
48 |         hits.map((hit) => {
49 |             // Highlight search matches
50 |             let htmlContent = hit.document.content;
51 |             hit.highlights?.forEach((highlight) => {
52 |                 if (highlight.field !== "content") {
53 |                     return;
54 |                 }
55 | 
56 |                 if (highlight.snippet) {
57 |                     // Use TypeSense snippet to avoid highlighting small words in random places
58 |                     // However this shrinks the displayed paragraph :(
59 |                     htmlContent = highlight?.snippet.replaceAll(
60 |                         "<mark>",
61 |                         '<mark class="highlight">'
62 |                     );
63 |                 } else {
64 |                     highlight.matched_tokens?.forEach((token) => {
65 |                         // Exclude small words like "an"
66 |                         if (typeof token !== "string" || token.length <= 3) {
67 |                             return;
68 |                         }
69 | 
70 |                         htmlContent = htmlContent.replace(
71 |                             token,
72 |                             `<span class="highlight">${token}</span>`
73 |                         );
74 |                     });
75 |                 }
76 |             });
77 | 
78 |             return {
79 |                 ...hit.document,
80 |                 updatedAt: new Date(hit.document.updatedAt),
81 |                 content: htmlContent
82 |             };
83 |         }) || []
84 |     );
85 | }
86 | 


--------------------------------------------------------------------------------
/apps/web/src/common/util.ts:
--------------------------------------------------------------------------------
 1 | // Update timestamps are saved at UTC in the database.
 2 | // Interpreting the dates in the client timezone may change the date, so parse date explicitly.
 3 | // See https://github.com/lindylearn/aboutideasnow/issues/8
 4 | export function getUTCDate(date: Date) {
 5 |     const utcDate = new Date(date.toISOString().slice(0, 10));
 6 | 
 7 |     return new Intl.DateTimeFormat("en-US", {
 8 |         month: "long",
 9 |         day: "numeric",
10 |         year: "numeric",
11 |         timeZone: "UTC"
12 |     }).format(utcDate);
13 | }
14 | 


--------------------------------------------------------------------------------
/apps/web/src/components/Form.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |     import { enhance } from "$app/forms";
 3 |     import { colorPalette } from "../common/constants"; // Adjust the import path as necessary
 4 |     import { getUTCDate } from "../common/util";
 5 |     import type { ActionData } from "../routes/about/$types";
 6 |     import posthog from "posthog-js";
 7 | 
 8 |     export let form: ActionData;
 9 |     export let isAddingDomain: boolean;
10 |     export let isClearBg: boolean;
11 | </script>
12 | 
13 | <div class="flex flex-col mt-4 text-left">
14 |     {#if form?.addedDomain === false}
15 |         Error indexing your domain :(<br />We will take a look and add your site as soon as
16 |         possible!
17 |     {:else if form?.scrapedPosts}
18 |         {#if form?.scrapedPosts.length === 0}
19 |             We didn't find a /now, /about, or /ideas page on your website. Add one and try again!
20 |         {:else}
21 |             Indexed your website successfully! Found posts:
22 |             <ul class="list-disc">
23 |                 {#each form.scrapedPosts as post}
24 |                     <li>
25 |                         <span class="font-bold">
26 |                             {new URL(post.url).hostname}{new URL(post.url).pathname}
27 |                         </span>
28 |                         {#if post.updatedAt && new Date(post.updatedAt).getFullYear() > 1970}
29 |                             last updated at {getUTCDate(new Date(post.updatedAt))}
30 |                         {:else}
31 |                             without update time
32 |                         {/if}
33 |                     </li>
34 |                 {/each}
35 |             </ul>
36 |         {/if}
37 |     {:else if isAddingDomain}
38 |         Indexing your domain...
39 |     {:else}
40 |         <form
41 |             class="flex flex-col items-center w-full gap-4 md:w-full"
42 |             method="POST"
43 |             use:enhance={() => {
44 |                 // Show loading state until page data is reloaded
45 |                 isAddingDomain = true;
46 | 
47 |                 posthog.capture("siteAdded");
48 |             }}
49 |         >
50 |             <div class="flex flex-col w-full gap-4 md:flex-row">
51 |                 <input
52 |                     class="w-full px-3 py-2 text-lg bg-white border rounded-lg shadow-lg outline-none placeholder:text-text/30 border-border"
53 |                     placeholder="yourwebsite.com"
54 |                     name="domain"
55 |                     required
56 |                 />
57 |                 <input
58 |                     class="w-full px-3 py-2 text-lg bg-white border rounded-lg shadow-lg outline-none placeholder:text-text/30 border-border"
59 |                     placeholder="your@email.com"
60 |                     name="email"
61 |                 />
62 |             </div>
63 |             <button
64 |                 class="w-full px-3 py-2 font-bold text-white border rounded-lg shadow-lg border-border"
65 |                 style:background-color={isClearBg ? colorPalette[0] : colorPalette[0]}
66 |             >
67 |                 Add my site
68 |             </button>
69 |         </form>
70 |     {/if}
71 | </div>
72 | 


--------------------------------------------------------------------------------
/apps/web/src/components/Header.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |     import clsx from "clsx";
 3 |     import { colorPalette } from "../common/constants";
 4 |     import type { PostType } from "@repo/core/generated/prisma-client";
 5 | 
 6 |     export let onClick: () => void = () => {};
 7 |     export let activeTag: PostType | undefined = undefined;
 8 | </script>
 9 | 
10 | <button
11 |     class="flex text-3xl font-bold leading-relaxed text-center transition-all shadow md:text-6xl font-title rounded-2xl hover:shadow-md"
12 |     on:click={onClick}
13 | >
14 |     {#each ["ABOUT", "IDEAS", "NOW"] as word, i}
15 |         <div
16 |             class={clsx(
17 |                 "bg-light h-full py-1 md:py-3 px-3 md:px-4 border border-border",
18 |                 i === 0 && "rounded-l-2xl",
19 |                 i === 2 && "rounded-r-2xl",
20 |                 activeTag && word !== activeTag && "opacity-25"
21 |             )}
22 |             style:background-color={colorPalette[i]}
23 |         >
24 |             {word.toLowerCase()}
25 |         </div>
26 |     {/each}
27 | </button>
28 | 


--------------------------------------------------------------------------------
/apps/web/src/components/IdeaCard.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |     import type { Post } from "@repo/core/generated/prisma-client";
 3 |     import posthog from "posthog-js";
 4 |     import { colorPalette } from "../common/constants";
 5 |     import { getUTCDate } from "../common/util";
 6 | 
 7 |     export let post: Post;
 8 | 
 9 |     const typeColors = {
10 |         IDEAS: colorPalette[1],
11 |         ABOUT: colorPalette[0],
12 |         NOW: colorPalette[2]
13 |     };
14 | 
15 |     const color = typeColors[post.type];
16 |     // const color = colorPalette[listIndex % colorPalette.length];
17 |     const date = post.updatedAt ? new Date(post.updatedAt) : undefined;
18 | </script>
19 | 
20 | <a
21 |     href={post.url}
22 |     class="relative flex flex-col w-full overflow-hidden transition-all border shadow-sm post h-max rounded-xl border-border bg-light animate-cardFadein will-change-transform hover:shadow md:hover:rotate-1 md:hover:opacity-75"
23 |     target="_blank"
24 |     style:animation-delay={`${Math.random() * 200}ms`}
25 |     on:click={() => {
26 |         posthog.capture("postClicked");
27 |     }}
28 | >
29 |     <div class="flex gap-2 p-2" style:background-color={color}>
30 |         <div class="flex flex-1 gap-2 px-2 rounded-lg bg-light">
31 |             <img
32 |                 alt={post.domain}
33 |                 class="w-5 h-5 my-2"
34 |                 src="https://www.google.com/s2/favicons?sz=256&domain_url={post.domain}"
35 |             />
36 |             <div class="flex flex-row items-center gap-2 overflow-hidden grow shrink">
37 |                 <h3 class="overflow-hidden text-xl font-bold font-title shrink text-ellipsis">
38 |                     {post.domain}
39 |                 </h3>
40 |                 <div
41 |                     class="w-auto px-2 py-1 font-mono text-sm rounded-lg"
42 |                     style:background-color={color}
43 |                 >
44 |                     /{post.type.toLowerCase()}
45 |                 </div>
46 |             </div>
47 |         </div>
48 |     </div>
49 |     <div class="p-4">
50 |         <div
51 |             class="text-sm whitespace-pre-wrap overflow-hidden line-clamp-[10] font-mono font-normal"
52 |         >
53 |             {#if date && date.getFullYear() > 1970}
54 |                 Updated {getUTCDate(date)}
55 |                 <!-- Use HTML to easily style search highlights -->
56 |                 <br /><br />{/if}{@html post.content}
57 |             <!-- <div
58 |                 id="bottom-fade"
59 |                 class="absolute left-0 -bottom-2 w-full h-8 pointer-events-none bg-gradient-to-t from-red-200 via-white via-50%"
60 |             /> -->
61 |         </div>
62 |     </div>
63 | </a>
64 | 


--------------------------------------------------------------------------------
/apps/web/src/components/icons/x.svelte:
--------------------------------------------------------------------------------
 1 | <svg
 2 |     xmlns="http://www.w3.org/2000/svg"
 3 |     class="w-6 h-6"
 4 |     viewBox="0 0 24 24"
 5 |     fill="none"
 6 |     stroke="currentColor"
 7 |     stroke-width="2"
 8 |     stroke-linecap="round"
 9 |     stroke-linejoin="round"
10 |     ><line x1="18" y1="6" x2="6" y2="18"></line><line x1="6" y1="6" x2="18" y2="18"></line></svg
11 | >
12 | 


--------------------------------------------------------------------------------
/apps/web/src/routes/+layout.svelte:
--------------------------------------------------------------------------------
  1 | <script>
  2 |     import { dev } from "$app/environment";
  3 |     import clsx from "clsx";
  4 |     import { colorPalette } from "../common/constants";
  5 |     import { page } from "$app/stores";
  6 |     import { inject } from "@vercel/analytics";
  7 |     import posthog from "posthog-js";
  8 |     import "../app.css";
  9 |     import { onMount } from "svelte";
 10 | 
 11 |     onMount(() => {
 12 |         posthog.init("phc_5EmEbNdMFGqalcMQIfLDl9m2CqOIOAwUffQCWyN40ty", {
 13 |             api_host: "https://eu.posthog.com"
 14 |         });
 15 |     });
 16 | 
 17 |     let href = "/";
 18 |     // Update href based on current page
 19 |     $: {
 20 |         const currentPath = $page.url.pathname;
 21 |         href = currentPath === "/" ? "/about" : "/";
 22 |     }
 23 | 
 24 |     inject({ mode: dev ? "development" : "production" });
 25 | </script>
 26 | 
 27 | <!-- Very ugly piece of code here for GitHub corner, but it's quick and dirty -->
 28 | <a
 29 |     href="https://github.com/lindylearn/aboutideasnow"
 30 |     class="github-corner"
 31 |     target="_blank"
 32 |     aria-label="View source on GitHub"
 33 |     ><svg
 34 |         class="w-16 h-16 md:w-20 md:h-20"
 35 |         viewBox="0 0 250 250"
 36 |         style="fill:{colorPalette[1]}; color:#fff; position: absolute; top: 0; border: 0; right: 0;"
 37 |         aria-hidden="true"
 38 |         ><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path
 39 |             d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2"
 40 |             fill="currentColor"
 41 |             style="transform-origin: 130px 106px;"
 42 |             class="octo-arm"
 43 |         ></path><path
 44 |             d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z"
 45 |             fill="currentColor"
 46 |             class="octo-body"
 47 |         ></path></svg
 48 |     ></a
 49 | >
 50 | 
 51 | <div
 52 |     class="flex flex-col items-center min-h-screen gap-5 p-5 overflow-hidden md:mt-5 md:gap-10 bg-background font-text text-text animate-fadein"
 53 | >
 54 |     <slot />
 55 | 
 56 |     {#if href !== "/"}
 57 |         <footer class="flex flex-col items-center text-center">
 58 |             <p class="mb-0">
 59 |                 Made by <a class="font-bold" href="https://lindylearn.io" target="_blank"
 60 |                     >Peter Hagen</a
 61 |                 >, <a class="font-bold" href="https://louis.work" target="_blank">Louis Barclay</a>,
 62 |                 and <a class="font-bold" href="/about#authors">others</a>.
 63 |             </p>
 64 |             <p class="mb-0">
 65 |                 We're open source &#8212;
 66 |                 <a
 67 |                     class="font-bold"
 68 |                     href="https://github.com/lindylearn/aboutideasnow"
 69 |                     target="_blank"
 70 |                 >
 71 |                     contribute on GitHub
 72 |                 </a>
 73 |                 to add your name here!
 74 |             </p>
 75 |         </footer>
 76 |     {/if}
 77 | </div>
 78 | 
 79 | <style>
 80 |     .github-corner:hover .octo-arm {
 81 |         animation: octocat-wave 560ms ease-in-out;
 82 |     }
 83 |     @keyframes octocat-wave {
 84 |         0%,
 85 |         100% {
 86 |             transform: rotate(0);
 87 |         }
 88 |         20%,
 89 |         60% {
 90 |             transform: rotate(-25deg);
 91 |         }
 92 |         40%,
 93 |         80% {
 94 |             transform: rotate(10deg);
 95 |         }
 96 |     }
 97 |     @media (max-width: 500px) {
 98 |         .github-corner:hover .octo-arm {
 99 |             animation: none;
100 |         }
101 |         .github-corner .octo-arm {
102 |             animation: octocat-wave 560ms ease-in-out;
103 |         }
104 |     }
105 | </style>
106 | 


--------------------------------------------------------------------------------
/apps/web/src/routes/+page.server.ts:
--------------------------------------------------------------------------------
 1 | import { getDatabaseClient } from "@repo/core/dist";
 2 | import { type Post, PostType, PrismaClient } from "@repo/core/generated/prisma-client";
 3 | import { handleSubmit } from "../common/formActions.js";
 4 | 
 5 | export async function load({ url, setHeaders }): Promise<{
 6 |     websiteCount: number;
 7 |     defaultPosts: Post[];
 8 | }> {
 9 |     try {
10 |         const db = getDatabaseClient();
11 | 
12 |         const postTypeFilter =
13 |             (url.searchParams.get("filter")?.toUpperCase() as PostType) || undefined;
14 | 
15 |         const defaultPosts = await getRepresentativePosts(postTypeFilter, db);
16 |         const websiteCount = await db.scrapeState.count({
17 |             where: {
18 |                 status: "SCRAPED",
19 |                 type: "ABOUT"
20 |             }
21 |         });
22 | 
23 |         db.$disconnect();
24 | 
25 |         // Cache for 1 hour
26 |         setHeaders({
27 |             "Cache-Control": "max-age=0, s-max-age=3600"
28 |         });
29 | 
30 |         return { websiteCount, defaultPosts };
31 |     } catch (err) {
32 |         console.error(`load() function failed: ${err}`);
33 | 
34 |         return { websiteCount: 7591, defaultPosts: [] };
35 |     }
36 | }
37 | 
38 | async function getRepresentativePosts(
39 |     postTypeFilter: PostType | undefined,
40 |     db: PrismaClient,
41 |     limit = 12
42 | ) {
43 |     // Apply filter if present
44 |     if (postTypeFilter) {
45 |         return await getPosts(postTypeFilter, db, limit);
46 |     }
47 | 
48 |     // Ensure that all three post types exist
49 |     const postsByType = await Promise.all(
50 |         [PostType.ABOUT, PostType.IDEAS, PostType.NOW].map((type) =>
51 |             getPosts(type, db, Math.floor(limit / 3))
52 |         )
53 |     );
54 |     return postsByType.flat().sort((a, b) => a.domain.localeCompare(b.domain));
55 | }
56 | 
57 | async function getPosts(postTypeFilter: PostType, db: PrismaClient, limit = 12) {
58 |     return await db.post.findMany({
59 |         where: { type: postTypeFilter },
60 |         orderBy: { updatedAt: "desc" },
61 |         take: limit
62 |     });
63 | }
64 | 
65 | export const actions = {
66 |     default: async ({ request }) => {
67 |         return await handleSubmit(request);
68 |     }
69 | };
70 | 


--------------------------------------------------------------------------------
/apps/web/src/routes/+page.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |     import { page, navigating } from "$app/stores";
  3 |     import { goto } from "$app/navigation";
  4 |     import { colorPalette, exampleSearchQueries } from "../common/constants";
  5 |     import { searchPosts } from "../common/typesense";
  6 |     import IdeaCard from "../components/IdeaCard.svelte";
  7 |     import type { PageData } from "./$types";
  8 |     import debounce from "lodash/debounce";
  9 |     import XIcon from "../components/icons/x.svelte";
 10 |     import type { Post, PostType } from "@repo/core/generated/prisma-client";
 11 |     import clsx from "clsx";
 12 |     import Form from "../components/Form.svelte";
 13 |     import type { ActionData } from "./about/$types";
 14 |     import posthog from "posthog-js";
 15 |     import Header from "../components/Header.svelte";
 16 | 
 17 |     export let data: PageData;
 18 |     export let form: ActionData;
 19 | 
 20 |     let showFilter = false;
 21 |     let isAddingDomain = false;
 22 | 
 23 |     let searchQuery = $page.url.searchParams.get("q") || "";
 24 |     let postTypeFilter: PostType | undefined = $page.url.searchParams
 25 |         .get("filter")
 26 |         ?.toUpperCase() as PostType;
 27 | 
 28 |     function toggleFilter() {
 29 |         if (postTypeFilter === undefined) {
 30 |             postTypeFilter = "ABOUT";
 31 |         } else if (postTypeFilter === "ABOUT") {
 32 |             postTypeFilter = "IDEAS";
 33 |         } else if (postTypeFilter === "IDEAS") {
 34 |             postTypeFilter = "NOW";
 35 |         } else {
 36 |             postTypeFilter = undefined;
 37 |         }
 38 | 
 39 |         runSearch();
 40 |     }
 41 | 
 42 |     let searchedPosts: Post[] = [];
 43 |     let isSearching = false;
 44 |     $: if (!searchQuery) {
 45 |         searchedPosts = [];
 46 |         isSearching = false;
 47 |     }
 48 | 
 49 |     async function runSearch() {
 50 |         if (searchQuery) {
 51 |             postTypeFilter = undefined;
 52 |         } else {
 53 |             // postTypeFilter = "IDEAS";
 54 |         }
 55 | 
 56 |         // Update URL params
 57 |         const searchParams = new URLSearchParams($page.url.searchParams.toString());
 58 |         if (postTypeFilter) {
 59 |             searchParams.set("filter", postTypeFilter?.toLowerCase() || "ideas");
 60 |         } else {
 61 |             searchParams.delete("filter");
 62 |         }
 63 |         if (searchQuery) {
 64 |             searchParams.set("q", searchQuery);
 65 |         } else {
 66 |             searchParams.delete("q");
 67 |         }
 68 | 
 69 |         if (!searchQuery) {
 70 |             // Show most recent posts per type by reloading the page
 71 |             goto(`?${searchParams.toString()}`);
 72 |             return;
 73 |         } else {
 74 |             // Update URL without reload
 75 |             window.history.replaceState(history.state, "", `?${searchParams.toString()}`);
 76 |         }
 77 | 
 78 |         isSearching = true;
 79 |         showFilter = false;
 80 |         searchedPosts = await searchPosts(searchQuery, postTypeFilter); // Call TypeSense directly from the browser
 81 |         isSearching = false;
 82 | 
 83 |         posthog.capture("search");
 84 |     }
 85 |     const runSearchDebounced = debounce(runSearch, 200);
 86 | 
 87 |     // Run search on page load
 88 |     if (searchQuery && typeof window !== "undefined") {
 89 |         runSearch();
 90 |     }
 91 | </script>
 92 | 
 93 | <Header activeTag={postTypeFilter} onClick={toggleFilter} />
 94 | 
 95 | <main class="flex flex-col items-center max-w-xl gap-0 text-center md:text-lg">
 96 |     <p>
 97 |         Find people to talk to or collaborate with by searching across the /about, /ideas and /now
 98 |         pages of {data.websiteCount}
 99 |         personal websites.
100 |     </p>
101 |     <a class="underline transition-opacity md:text-lg hover:opacity-50" href="/about"
102 |         >Read the manifesto</a
103 |     >
104 | </main>
105 | 
106 | <div class="flex items-center w-full max-w-4xl gap-4">
107 |     <div
108 |         id="search-container"
109 |         class="flex items-stretch self-stretch overflow-hidden bg-white border shadow-md md:text-lg grow rounded-xl border-border"
110 |     >
111 |         <!-- svelte-ignore a11y-autofocus -->
112 |         <input
113 |             id="search-bar"
114 |             class="px-3 py-2 text-center outline-none grow placeholder:text-text/30"
115 |             placeholder="Search for anything that interests you"
116 |             autocapitalize="off"
117 |             spellcheck="false"
118 |             autofocus
119 |             bind:value={searchQuery}
120 |             on:input={runSearchDebounced}
121 |         />
122 |         <div class="relative flex items-center w-0">
123 |             {#if isSearching || $navigating}
124 |                 <div class="loader -ml-9 animate-fadein" />
125 |             {:else if searchQuery}
126 |                 <button
127 |                     class="p-1 -ml-10 font-normal rounded-full animate-fadein"
128 |                     on:click={() => {
129 |                         searchQuery = "";
130 |                         runSearch();
131 |                         // document.getElementById("search-bar")?.focus();
132 |                     }}
133 |                 >
134 |                     <XIcon />
135 |                 </button>
136 |             {/if}
137 |         </div>
138 |     </div>
139 | 
140 |     <!-- <button
141 |         class={clsx(
142 |             "hidden md:block transition-opacity text-text hover:opacity-100",
143 |             showFilter ? "opacity-100" : "opacity-30"
144 |         )}
145 |         on:click={() => {
146 |             showFilter = !showFilter;
147 |         }}
148 |     >
149 |         <svg
150 |             xmlns="http://www.w3.org/2000/svg"
151 |             fill="none"
152 |             viewBox="0 0 24 24"
153 |             stroke-width="1.5"
154 |             stroke="currentColor"
155 |             class="w-6 h-6"
156 |         >
157 |             <path
158 |                 stroke-linecap="round"
159 |                 stroke-linejoin="round"
160 |                 d="M10.5 6h9.75M10.5 6a1.5 1.5 0 1 1-3 0m3 0a1.5 1.5 0 1 0-3 0M3.75 6H7.5m3 12h9.75m-9.75 0a1.5 1.5 0 0 1-3 0m3 0a1.5 1.5 0 0 0-3 0m-3.75 0H7.5m9-6h3.75m-3.75 0a1.5 1.5 0 0 1-3 0m3 0a1.5 1.5 0 0 0-3 0m-9.75 0h9.75"
161 |             />
162 |         </svg>
163 |     </button> -->
164 | </div>
165 | 
166 | <!-- {#if showFilter}
167 |     <div class="flex gap-2 overflow-hidden font-title border-border md:-mt-5">
168 |         {#each ["ABOUT", "IDEAS", "NOW"] as word, i}
169 |             <button
170 |                 class={clsx(
171 |                     "rounded-lg px-2 text-text font-mono",
172 |                     postTypeFilter !== word &&
173 |                         "transition-opacity opacity-30 h-full px-2 py-1 hover:opacity-100"
174 |                 )}
175 |                 style:background-color={postTypeFilter === word ? colorPalette[i] : undefined}
176 |                 on:click={() => {
177 |                     // @ts-ignore
178 |                     postTypeFilter = word;
179 |                     searchQuery = "";
180 |                     runSearch();
181 |                 }}
182 |             >
183 |                 /{word.toLowerCase()}
184 |             </button>
185 |         {/each}
186 |     </div>
187 | {/if} -->
188 | 
189 | <div
190 |     id="example-searches"
191 |     class="flex flex-wrap justify-center max-w-4xl gap-1 -mx-5 -mt-2 text-sm md:text-base md:mx-0 md:justify-center md:-mt-5"
192 | >
193 |     {#each exampleSearchQueries as exampleSearchQuery}
194 |         <button
195 |             class="px-2 py-1 font-mono transition-colors hover:text-text/50"
196 |             on:click={() => {
197 |                 searchQuery = exampleSearchQuery.term;
198 |                 // trigger search without debounce
199 |                 runSearch();
200 | 
201 |                 document.getElementById("search-bar")?.focus();
202 |             }}
203 |         >
204 |             {exampleSearchQuery.emoji + " " + exampleSearchQuery.term}
205 |         </button>
206 |     {/each}
207 | </div>
208 | 
209 | <div
210 |     id="search-results"
211 |     class="flex flex-col items-start justify-around w-full gap-8 mt-2 mb-5 md:mt-5 md:grid md:grid-cols-2 xl:grid-cols-3 2xl:grid-cols-4"
212 | >
213 |     {#each searchedPosts.length ? searchedPosts : data.defaultPosts as post, index (post.url)}
214 |         <IdeaCard {post}></IdeaCard>
215 |     {/each}
216 | </div>
217 | 
218 | <section class="flex flex-col items-center max-w-xl gap-0 text-center md:text-lg">
219 |     <p>
220 |         Find more posts by searching for things you're interested in!<br />Or click the
221 |         AboutIdeasNow logo to filter by a specific post type.
222 |     </p>
223 | </section>
224 | 
225 | <div
226 |     class="flex flex-col items-center max-w-lg p-4 bg-white border shadow-md rounded-xl border-border"
227 |     style:background-color={colorPalette[1]}
228 | >
229 |     <h1 class="mb-2 text-2xl font-bold font-title">Add your site here!</h1>
230 |     <p class="text-center">
231 |         Help other people find you by adding your website to aboutideasnow.com.
232 |         <a class="font-bold" href="/about#submit">Learn more</a>
233 |     </p>
234 |     <Form {form} isClearBg={false} {isAddingDomain} />
235 | </div>
236 | 


--------------------------------------------------------------------------------
/apps/web/src/routes/about/+page.server.ts:
--------------------------------------------------------------------------------
1 | import { handleSubmit } from "../../common/formActions.js";
2 | 
3 | export const actions = {
4 |     default: async ({ request }) => {
5 |         return await handleSubmit(request);
6 |     }
7 | };
8 | 


--------------------------------------------------------------------------------
/apps/web/src/routes/about/+page.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |     import type { ActionData } from "./$types";
  3 |     import Form from "../../components/Form.svelte";
  4 |     import Header from "../../components/Header.svelte";
  5 | 
  6 |     export let form: ActionData;
  7 | 
  8 |     let isAddingDomain = false;
  9 | </script>
 10 | 
 11 | <Header
 12 |     onClick={() => {
 13 |         window.location.href = "/";
 14 |     }}
 15 | />
 16 | 
 17 | <main class="flex flex-col w-full max-w-xl gap-5 mb-10 text-lg md:gap-10 items-left">
 18 |     <section id="intro">
 19 |         <p>
 20 |             <span class="font-bold">aboutideasnow.com</span> exists to help you find your people.
 21 |         </p>
 22 |         <p>
 23 |             1000s of personal websites exist on the internet, outside of social media &#8212;
 24 |             created by creators, thinkers, and doers of all sorts.
 25 |         </p>
 26 |         <p>
 27 |             We index the /about, /ideas, and /now pages of these independent sites to give you a
 28 |             handy way of searching through them.
 29 |         </p>
 30 |     </section>
 31 | 
 32 |     <section id="manifesto">
 33 |         <h2>What's /ideas?</h2>
 34 |         <p>
 35 |             The way we see it, the /about, /ideas, and /now pages on websites each have a different
 36 |             purpose. And all three should be there.
 37 |         </p>
 38 |         <ul class="w-full">
 39 |             <li>
 40 |                 <span class="font-bold">/about</span> pages are about the past: how people see themselves
 41 |                 and what brought them there. That's useful as a general overview.
 42 |             </li>
 43 |             <li>
 44 |                 <span class="font-bold">/now</span> is a more personal look at what people are doing
 45 |                 right now, what they care about, and what they could use help with.
 46 |             </li>
 47 |             <li>
 48 |                 <span class="font-bold">/ideas</span> should be about the future: the crazy things people
 49 |                 always wanted to make, concepts they're mulling over, or planned projects.
 50 |             </li>
 51 |         </ul>
 52 |         <p>
 53 |             If you create an <span class="font-bold">/ideas</span> page on your website, people who are
 54 |             looking for collaborators are much more likely to find you.
 55 |         </p>
 56 |     </section>
 57 | 
 58 |     <section id="submit">
 59 |         <h2>Add your website</h2>
 60 |         <p>
 61 |             Create any of the above pages on your website. Mention when you last updated each page
 62 |             so people know it's fresh.
 63 |         </p>
 64 |         <p>
 65 |             Then submit your website here to add it to
 66 |             <span class="font-bold">aboutideasnow.com</span>.
 67 |         </p>
 68 |         <p>
 69 |             If you enter your email address, we'll let you know if someone mentions similar ideas to
 70 |             you on their website.
 71 |         </p>
 72 |         <Form {form} isClearBg={true} {isAddingDomain} />
 73 |     </section>
 74 | 
 75 |     <section id="authors" class="mt-4">
 76 |         <h2>Who built this?</h2>
 77 |         <p>
 78 |             aboutideasnow.com is a decentralized solution on the increasingly centralized web. We
 79 |             exist only to direct you to people's personal websites. You take it from there.
 80 |         </p>
 81 |         <p>
 82 |             <a class="font-bold" href="https://lindylearn.io" target="_blank">Peter Hagen</a>
 83 |             and
 84 |             <a class="font-bold" href="https://louis.work" target="_blank">Louis Barclay</a> built this
 85 |             website in February 2024.
 86 |         </p>
 87 |         <p>
 88 |             <a class="font-bold" href="https://agentcooper.io/about" target="_blank">Artem Tyurin</a
 89 |             > came up with the notion of /ideas pages.
 90 |         </p>
 91 |         <p>
 92 |             <a href="https://sive.rs" target="_blank"> Derek Sivers </a>started the /now page
 93 |             movement without which this all wouldn't be possible.
 94 |         </p>
 95 |         <p>
 96 |             Contribute
 97 |             <a href="https://github.com/lindylearn/aboutideasnow" target="_blank"> on GitHub</a> to add
 98 |             your name here!
 99 |         </p>
100 |     </section>
101 | </main>
102 | 


--------------------------------------------------------------------------------
/apps/web/static/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/favicon.png


--------------------------------------------------------------------------------
/apps/web/static/fonts/DMSans-VariableFont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/fonts/DMSans-VariableFont.ttf


--------------------------------------------------------------------------------
/apps/web/static/fonts/PTMono-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/fonts/PTMono-Regular.ttf


--------------------------------------------------------------------------------
/apps/web/static/fonts/Reckless-VariableFont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/fonts/Reckless-VariableFont.woff2


--------------------------------------------------------------------------------
/apps/web/static/logo.svg:
--------------------------------------------------------------------------------
 1 | <svg width="960" height="960" viewBox="0 0 960 960" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <g clip-path="url(#clip0_154_2)">
 3 | <path d="M411.656 41.5L174.469 922.688H42.5781L279.766 41.5H411.656Z" fill="#404040"/>
 4 | <rect x="471" y="50" width="424" height="248" rx="60" fill="#CE7E8F"/>
 5 | <rect x="390" y="358" width="505" height="248" rx="60" fill="#EBBE6B"/>
 6 | <rect x="294" y="667" width="601" height="248" rx="60" fill="#6EAECF"/>
 7 | </g>
 8 | <defs>
 9 | <clipPath id="clip0_154_2">
10 | <rect width="960" height="960" rx="80" fill="white"/>
11 | </clipPath>
12 | </defs>
13 | </svg>
14 | 


--------------------------------------------------------------------------------
/apps/web/static/metaimage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/metaimage.png


--------------------------------------------------------------------------------
/apps/web/static/telephone.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/telephone.png


--------------------------------------------------------------------------------
/apps/web/static/typewriter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lindylearn/aboutideasnow/a2a99de4390dfa744022c8a7c1098df10ada2e06/apps/web/static/typewriter.png


--------------------------------------------------------------------------------
/apps/web/svelte.config.js:
--------------------------------------------------------------------------------
 1 | import adapter from "@sveltejs/adapter-vercel";
 2 | import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
 3 | 
 4 | /** @type {import('@sveltejs/kit').Config} */
 5 | const config = {
 6 |     // Consult https://kit.svelte.dev/docs/integrations#preprocessors
 7 |     // for more information about preprocessors
 8 |     preprocess: vitePreprocess(),
 9 | 
10 |     kit: {
11 |         // adapter-auto only supports some environments, see https://kit.svelte.dev/docs/adapter-auto for a list.
12 |         // If your environment is not supported or you settled on a specific environment, switch out the adapter.
13 |         // See https://kit.svelte.dev/docs/adapters for more information about adapters.
14 |         adapter: adapter()
15 |     }
16 | };
17 | 
18 | export default config;
19 | 


--------------------------------------------------------------------------------
/apps/web/tailwind.config.js:
--------------------------------------------------------------------------------
 1 | /** @type {import('tailwindcss').Config} */
 2 | export default {
 3 |     content: ["./src/**/*.{html,js,svelte,ts}"],
 4 | 
 5 |     plugins: [],
 6 |     theme: {
 7 |         extend: {
 8 |             colors: {
 9 |                 background: "#fafafa",
10 |                 light: "rgba(255, 255, 255, 0.32)",
11 |                 border: "rgba(33, 35, 36, 0.04)",
12 |                 bold: "rgb(213, 199, 162)",
13 |                 text: "#2c2c2c"
14 |             },
15 |             fontFamily: {
16 |                 title: ["Reckless", "serif"],
17 |                 text: ["DM Sans", "sans-serif"],
18 |                 mono: ["PT Mono", "monospace"]
19 |             },
20 |             keyframes: {
21 |                 fadein: {
22 |                     "0%": { opacity: "0" },
23 |                     "100%": { opacity: "1" }
24 |                 },
25 |                 cardFadein: {
26 |                     "0%": { opacity: "0", transform: "translateY(5px)" },
27 |                     "100%": { opacity: "1", transform: "translateY(0)" }
28 |                 }
29 |             },
30 |             animation: {
31 |                 fadein: "fadein 200ms ease-out backwards",
32 |                 cardFadein: "cardFadein 600ms cubic-bezier(0.16, 1, 0.3, 1) backwards"
33 |             }
34 |         }
35 |     }
36 | };
37 | 


--------------------------------------------------------------------------------
/apps/web/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "./.svelte-kit/tsconfig.json",
 3 |     "compilerOptions": {
 4 |         "allowJs": true,
 5 |         "checkJs": true,
 6 |         "esModuleInterop": true,
 7 |         "forceConsistentCasingInFileNames": true,
 8 |         "resolveJsonModule": true,
 9 |         "skipLibCheck": true,
10 |         "sourceMap": true,
11 |         "strict": true,
12 |         "strictNullChecks": true
13 |     }
14 |     // Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias
15 |     //
16 |     // If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes
17 |     // from the referenced tsconfig.json - TypeScript does not merge them in
18 | }
19 | 


--------------------------------------------------------------------------------
/apps/web/vercel.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "redirects": [
 3 |         {
 4 |             "source": "/now",
 5 |             "destination": "https://github.com/lindylearn/aboutideasnow/pulls",
 6 |             "permanent": true
 7 |         },
 8 |         {
 9 |             "source": "/ideas",
10 |             "destination": "https://github.com/lindylearn/aboutideasnow/issues",
11 |             "permanent": true
12 |         }
13 |     ]
14 | }
15 | 


--------------------------------------------------------------------------------
/apps/web/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import { sveltekit } from "@sveltejs/kit/vite";
 2 | import { defineConfig } from "vite";
 3 | 
 4 | export default defineConfig({
 5 |     plugins: [sveltekit()],
 6 |     resolve: {
 7 |         preserveSymlinks: true
 8 |     }
 9 | });
10 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "project-template",
 3 |     "private": true,
 4 |     "scripts": {
 5 |         "build": "turbo build",
 6 |         "dev": "turbo dev",
 7 |         "lint": "turbo lint",
 8 |         "format": "prettier --write \"**/*.{ts,tsx,md}\""
 9 |     },
10 |     "devDependencies": {
11 |         "@repo/eslint-config": "workspace:*",
12 |         "@repo/typescript-config": "workspace:*",
13 |         "prettier": "^3.1.1",
14 |         "turbo": "latest"
15 |     },
16 |     "packageManager": "pnpm@8.9.0",
17 |     "engines": {
18 |         "node": ">=18"
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/packages/core/.env.example:
--------------------------------------------------------------------------------
1 | # Supabase database URL
2 | DATABASE_URL=""
3 | 


--------------------------------------------------------------------------------
/packages/core/.gitignore:
--------------------------------------------------------------------------------
1 | generated
2 | 


--------------------------------------------------------------------------------
/packages/core/README.md:
--------------------------------------------------------------------------------
1 | # Core package
2 | 
3 | Common util functions and the database config.
4 | 
5 | ## Development
6 | 
7 | After changing `schema.prisma`, run `pnpm generate` and `pnpm migrate $migration-name`.
8 | 


--------------------------------------------------------------------------------
/packages/core/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "@repo/core",
 3 |     "version": "1.0.0",
 4 |     "description": "",
 5 |     "exports": {
 6 |         "./dist": "./dist/index.js",
 7 |         "./generated/prisma-client": "./generated/prisma-client/index.js"
 8 |     },
 9 |     "scripts": {
10 |         "postinstall": "pnpm generate",
11 |         "dev": "tsc -w",
12 |         "build": "tsc",
13 |         "generate": "prisma generate",
14 |         "migrate": "prisma migrate dev --name"
15 |     },
16 |     "keywords": [],
17 |     "author": "",
18 |     "license": "ISC",
19 |     "dependencies": {
20 |         "@prisma/client": "^5.8.0",
21 |         "extract-date": "^2.8.2",
22 |         "prisma": "^5.8.0"
23 |     },
24 |     "devDependencies": {
25 |         "@repo/typescript-config": "workspace:^",
26 |         "typescript": "^5.3.3"
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240111115033_add_post/migration.sql:
--------------------------------------------------------------------------------
 1 | -- CreateTable
 2 | CREATE TABLE "Post" (
 3 |     "domain" TEXT NOT NULL,
 4 |     "url" TEXT NOT NULL,
 5 |     "content" TEXT NOT NULL,
 6 |     "updatedAt" TIMESTAMP(3),
 7 |     "authorName" TEXT,
 8 | 
 9 |     CONSTRAINT "Post_pkey" PRIMARY KEY ("domain")
10 | );
11 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240111121259_add_scrape_state/migration.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Warnings:
 3 | 
 4 |   - Added the required column `type` to the `Post` table without a default value. This is not possible if the table is not empty.
 5 | 
 6 | */
 7 | -- CreateEnum
 8 | CREATE TYPE "PostType" AS ENUM ('NOW', 'IDEAS');
 9 | 
10 | -- CreateEnum
11 | CREATE TYPE "ScrapeStatus" AS ENUM ('UNAVAILABLE', 'NO_CONTENT', 'SCRAPED');
12 | 
13 | -- AlterTable
14 | ALTER TABLE "Post" ADD COLUMN     "type" "PostType" NOT NULL;
15 | 
16 | -- CreateTable
17 | CREATE TABLE "ScrapeState" (
18 |     "domain" TEXT NOT NULL,
19 |     "status" "ScrapeStatus" NOT NULL,
20 |     "scapedAt" TIMESTAMP(3),
21 | 
22 |     CONSTRAINT "ScrapeState_pkey" PRIMARY KEY ("domain")
23 | );
24 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240111152613_convert_post_date/migration.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Warnings:
 3 | 
 4 |   - Made the column `updatedAt` on table `Post` required. This step will fail if there are existing NULL values in that column.
 5 | 
 6 | */
 7 | -- AlterTable
 8 | ALTER TABLE "Post" ALTER COLUMN "updatedAt" SET NOT NULL,
 9 | ALTER COLUMN "updatedAt" SET DEFAULT '1970-01-01',
10 | ALTER COLUMN "updatedAt" SET DATA TYPE TEXT;
11 | 
12 | -- AlterTable
13 | ALTER TABLE "ScrapeState" ADD COLUMN     "url" TEXT;
14 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240111153141_convert_post_date_back/migration.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Warnings:
 3 | 
 4 |   - Changed the type of `updatedAt` on the `Post` table. No cast exists, the column would be dropped and recreated, which cannot be done if there is data, since the column is required.
 5 | 
 6 | */
 7 | -- AlterTable
 8 | ALTER TABLE "Post" DROP COLUMN "updatedAt",
 9 | ADD COLUMN     "updatedAt" TIMESTAMP(3) NOT NULL;
10 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240111155745_add_redirect_status/migration.sql:
--------------------------------------------------------------------------------
1 | -- AlterEnum
2 | ALTER TYPE "ScrapeStatus" ADD VALUE 'REDIRECTED';
3 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240126145714_add_directory_scrape_state/migration.sql:
--------------------------------------------------------------------------------
1 | -- CreateEnum
2 | CREATE TYPE "DomainType" AS ENUM ('INDIVIDUAL_SITE', 'DIRECTORY');
3 | 
4 | -- AlterTable
5 | ALTER TABLE "ScrapeState" ADD COLUMN     "domainType" "DomainType" NOT NULL DEFAULT 'INDIVIDUAL_SITE';
6 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240128184424_add_vector_search/migration.sql:
--------------------------------------------------------------------------------
 1 | -- From https://js.langchain.com/docs/integrations/retrievers/supabase-hybrid/?ref=blog.langchain.dev#create-a-table-and-search-functions-in-your-database
 2 | 
 3 | -- Enable the pgvector extension to work with embedding vectors
 4 | create extension vector;
 5 | 
 6 | -- Create a table to store your documents
 7 | alter table "Post"
 8 | add column embedding vector(1024);
 9 | 
10 | -- Create a function to similarity search for documents
11 | create function match_posts (
12 |   query_embedding vector(1024),
13 |   match_count int DEFAULT null,
14 |   filter jsonb DEFAULT '{}'
15 | ) returns table (
16 |   id bigint,
17 |   content text,
18 |   similarity float
19 | )
20 | language plpgsql
21 | as $$
22 | #variable_conflict use_column
23 | begin
24 |   return query
25 |   select
26 |     id,
27 |     content,
28 |     1 - (documents.embedding <=> query_embedding) as similarity
29 |   from documents
30 |   order by documents.embedding <=> query_embedding
31 |   limit match_count;
32 | end;
33 | $$;
34 | 
35 | -- Create a function to keyword search for documents
36 | create function kw_match_posts(query_text text, match_count int)
37 | returns table (id bigint, content text, similarity real)
38 | as $$
39 | 
40 | begin
41 | return query execute
42 | format('select id, content, ts_rank(to_tsvector(content), plainto_tsquery($1)) as similarity
43 | from documents
44 | where to_tsvector(content) @@ plainto_tsquery($1)
45 | order by similarity desc
46 | limit $2')
47 | using query_text, match_count;
48 | end;
49 | $$ language plpgsql;
50 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240128193052_fix_search_functions/migration.sql:
--------------------------------------------------------------------------------
 1 | drop function if exists match_posts;
 2 | drop function if exists kw_match_posts;
 3 | 
 4 | -- Create a function to similarity search for "Post"
 5 | create or replace function match_posts (
 6 |   query_embedding vector(1024),
 7 |   match_count int DEFAULT null,
 8 |   filter jsonb DEFAULT '{}'
 9 | ) returns table (
10 |   "url" text,
11 |   "domain" text,
12 |   "type" "PostType",
13 |   "content" text,
14 |   "updatedAt" timestamp,
15 |   similarity float
16 | )
17 | language plpgsql
18 | as $$
19 | #variable_conflict use_column
20 | begin
21 |   return query
22 |   select
23 |     "url",
24 |     "domain",
25 |     "type",
26 |     "content",
27 |     "updatedAt",
28 |     1 - ("Post".embedding <=> query_embedding) as similarity
29 |   from "Post"
30 |   order by "Post".embedding <=> query_embedding
31 |   limit match_count;
32 | end;
33 | $$;
34 | 
35 | -- Create a function to keyword search for "Post"
36 | create or replace function kw_match_posts(query_text text, match_count int)
37 | returns table (
38 |   "url" text, 
39 |   "domain" text,
40 |   "type" "PostType",
41 |   "content" text,
42 |   "updatedAt" timestamp,
43 |   similarity real
44 | )
45 | as $$
46 | begin
47 | return query execute
48 | format('
49 | select 
50 |   "url", 
51 |   "domain",
52 |   "type",
53 |   "content",
54 |   "updatedAt",
55 |   ts_rank(to_tsvector(content), plainto_tsquery($1)) as similarity
56 | from "Post"
57 | where to_tsvector(content) @@ plainto_tsquery($1)
58 | order by similarity desc
59 | limit $2')
60 | using query_text, match_count;
61 | end;
62 | $$ language plpgsql;
63 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240128193504_change_post_id/migration.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Warnings:
 3 | 
 4 |   - The primary key for the `Post` table will be changed. If it partially fails, the table could be left without primary key constraint.
 5 | 
 6 | */
 7 | -- AlterTable
 8 | ALTER TABLE "Post" DROP CONSTRAINT "Post_pkey",
 9 | ADD CONSTRAINT "Post_pkey" PRIMARY KEY ("url");
10 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240128200314_add_post_id/migration.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Warnings:
 3 | 
 4 |   - The primary key for the `Post` table will be changed. If it partially fails, the table could be left without primary key constraint.
 5 | 
 6 | */
 7 | -- AlterTable
 8 | ALTER TABLE "Post" DROP CONSTRAINT "Post_pkey",
 9 | ADD COLUMN     "id" SERIAL NOT NULL,
10 | ADD CONSTRAINT "Post_pkey" PRIMARY KEY ("id");
11 | CREATE UNIQUE INDEX "Post_url_key" ON "Post"("url");
12 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240129141512_remove_post_id/migration.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Warnings:
 3 | 
 4 |   - The primary key for the `Post` table will be changed. If it partially fails, the table could be left without primary key constraint.
 5 |   - You are about to drop the column `authorName` on the `Post` table. All the data in the column will be lost.
 6 |   - You are about to drop the column `id` on the `Post` table. All the data in the column will be lost.
 7 | 
 8 | */
 9 | -- DropIndex
10 | DROP INDEX "Post_url_key";
11 | 
12 | -- AlterTable
13 | ALTER TABLE "Post" DROP CONSTRAINT "Post_pkey",
14 | DROP COLUMN "authorName",
15 | DROP COLUMN "id",
16 | ADD CONSTRAINT "Post_pkey" PRIMARY KEY ("url");
17 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240129141653_edit_embeddings/migration.sql:
--------------------------------------------------------------------------------
1 | alter table "Post"
2 | alter COLUMN embedding TYPE vector(512);
3 | 
4 | CREATE INDEX "Post_content_search" ON "Post" USING GIN (to_tsvector('english', content));
5 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240222114727_enable_domain_scrape/migration.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Warnings:
 3 | 
 4 |   - You are about to drop the column `url` on the `ScrapeState` table. All the data in the column will be lost.
 5 | 
 6 | */
 7 | -- AlterEnum
 8 | ALTER TYPE "PostType" ADD VALUE 'ABOUT';
 9 | 
10 | -- AlterTable
11 | ALTER TABLE "ScrapeState" DROP COLUMN "url";
12 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240222153334_add_submitted_domain_table/migration.sql:
--------------------------------------------------------------------------------
 1 | -- CreateTable
 2 | CREATE TABLE "SubmittedDomain" (
 3 |     "domain" TEXT NOT NULL,
 4 |     "email" TEXT,
 5 |     "success" BOOLEAN NOT NULL,
 6 |     "submittedAt" TIMESTAMP(3) NOT NULL,
 7 | 
 8 |     CONSTRAINT "SubmittedDomain_pkey" PRIMARY KEY ("domain")
 9 | );
10 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240223142705_per_post_scrape_state/migration.sql:
--------------------------------------------------------------------------------
1 | -- AlterTable
2 | ALTER TABLE "ScrapeState" ADD COLUMN     "type" "PostType" NOT NULL DEFAULT 'NOW';
3 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/20240223150159_change_scrapestate_id/migration.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Warnings:
 3 | 
 4 |   - The primary key for the `ScrapeState` table will be changed. If it partially fails, the table could be left without primary key constraint.
 5 | 
 6 | */
 7 | -- AlterTable
 8 | ALTER TABLE "ScrapeState" DROP CONSTRAINT "ScrapeState_pkey",
 9 | ALTER COLUMN "type" DROP DEFAULT,
10 | ADD CONSTRAINT "ScrapeState_pkey" PRIMARY KEY ("domain", "type");
11 | 


--------------------------------------------------------------------------------
/packages/core/prisma/migrations/migration_lock.toml:
--------------------------------------------------------------------------------
1 | # Please do not edit this file manually
2 | # It should be added in your version-control system (i.e. Git)
3 | provider = "postgresql"


--------------------------------------------------------------------------------
/packages/core/prisma/schema.prisma:
--------------------------------------------------------------------------------
 1 | generator client {
 2 |     provider = "prisma-client-js"
 3 |     output   = "../generated/prisma-client"
 4 | }
 5 | 
 6 | datasource db {
 7 |     provider = "postgresql"
 8 |     url      = env("DATABASE_URL")
 9 | }
10 | 
11 | model Post {
12 |     url    String   @id
13 |     domain String
14 |     type   PostType
15 | 
16 |     content   String
17 |     updatedAt DateTime
18 | 
19 |     embedding Unsupported("vector(512)")?
20 | }
21 | 
22 | enum PostType {
23 |     ABOUT
24 |     NOW
25 |     IDEAS
26 | }
27 | 
28 | // Separate scrape status from posts to track failures
29 | model ScrapeState {
30 |     domain String
31 |     type   PostType
32 | 
33 |     domainType DomainType   @default(INDIVIDUAL_SITE)
34 |     status     ScrapeStatus
35 |     scapedAt   DateTime?
36 | 
37 |     @@id([domain, type])
38 | }
39 | 
40 | enum DomainType {
41 |     INDIVIDUAL_SITE
42 |     DIRECTORY
43 | }
44 | 
45 | enum ScrapeStatus {
46 |     UNAVAILABLE
47 |     NO_CONTENT
48 |     REDIRECTED
49 |     SCRAPED
50 | }
51 | 
52 | model SubmittedDomain {
53 |     domain String  @id
54 |     email  String?
55 | 
56 |     success     Boolean
57 |     submittedAt DateTime
58 | }
59 | 


--------------------------------------------------------------------------------
/packages/core/src/index.ts:
--------------------------------------------------------------------------------
1 | import { PrismaClient } from "../generated/prisma-client";
2 | 
3 | export function getDatabaseClient(): PrismaClient {
4 |     console.log("Initializing database client");
5 | 
6 |     return new PrismaClient();
7 | }
8 | 


--------------------------------------------------------------------------------
/packages/core/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |     "extends": "@repo/typescript-config/base.json",
3 |     "include": ["src/**/*.ts"],
4 |     "exclude": ["node_modules"],
5 |     "compilerOptions": {
6 |         "outDir": "dist"
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/packages/eslint-config/README.md:
--------------------------------------------------------------------------------
1 | # `@turbo/eslint-config`
2 | 
3 | Collection of internal eslint configurations.
4 | 


--------------------------------------------------------------------------------
/packages/eslint-config/library.js:
--------------------------------------------------------------------------------
 1 | const { resolve } = require("node:path");
 2 | 
 3 | const project = resolve(process.cwd(), "tsconfig.json");
 4 | 
 5 | /** @type {import("eslint").Linter.Config} */
 6 | module.exports = {
 7 |     extends: ["eslint:recommended", "prettier", "eslint-config-turbo"],
 8 |     plugins: ["only-warn"],
 9 |     globals: {
10 |         React: true,
11 |         JSX: true
12 |     },
13 |     env: {
14 |         node: true
15 |     },
16 |     settings: {
17 |         "import/resolver": {
18 |             typescript: {
19 |                 project
20 |             }
21 |         }
22 |     },
23 |     ignorePatterns: [
24 |         // Ignore dotfiles
25 |         ".*.js",
26 |         "node_modules/",
27 |         "dist/"
28 |     ],
29 |     overrides: [
30 |         {
31 |             files: ["*.js?(x)", "*.ts?(x)"]
32 |         }
33 |     ]
34 | };
35 | 


--------------------------------------------------------------------------------
/packages/eslint-config/next.js:
--------------------------------------------------------------------------------
 1 | const { resolve } = require("node:path");
 2 | 
 3 | const project = resolve(process.cwd(), "tsconfig.json");
 4 | 
 5 | /** @type {import("eslint").Linter.Config} */
 6 | module.exports = {
 7 |     extends: [
 8 |         "eslint:recommended",
 9 |         "prettier",
10 |         require.resolve("@vercel/style-guide/eslint/next"),
11 |         "eslint-config-turbo"
12 |     ],
13 |     globals: {
14 |         React: true,
15 |         JSX: true
16 |     },
17 |     env: {
18 |         node: true,
19 |         browser: true
20 |     },
21 |     plugins: ["only-warn"],
22 |     settings: {
23 |         "import/resolver": {
24 |             typescript: {
25 |                 project
26 |             }
27 |         }
28 |     },
29 |     ignorePatterns: [
30 |         // Ignore dotfiles
31 |         ".*.js",
32 |         "node_modules/"
33 |     ],
34 |     overrides: [{ files: ["*.js?(x)", "*.ts?(x)"] }]
35 | };
36 | 


--------------------------------------------------------------------------------
/packages/eslint-config/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "@repo/eslint-config",
 3 |     "version": "0.0.0",
 4 |     "private": true,
 5 |     "files": [
 6 |         "library.js",
 7 |         "next.js",
 8 |         "react-internal.js"
 9 |     ],
10 |     "devDependencies": {
11 |         "@vercel/style-guide": "^5.1.0",
12 |         "eslint-config-turbo": "^1.11.3",
13 |         "eslint-config-prettier": "^9.1.0",
14 |         "eslint-plugin-only-warn": "^1.1.0",
15 |         "@typescript-eslint/parser": "^6.17.0",
16 |         "@typescript-eslint/eslint-plugin": "^6.17.0",
17 |         "typescript": "^5.3.3"
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/packages/eslint-config/react-internal.js:
--------------------------------------------------------------------------------
 1 | const { resolve } = require("node:path");
 2 | 
 3 | const project = resolve(process.cwd(), "tsconfig.json");
 4 | 
 5 | /*
 6 |  * This is a custom ESLint configuration for use with
 7 |  * internal (bundled by their consumer) libraries
 8 |  * that utilize React.
 9 |  *
10 |  * This config extends the Vercel Engineering Style Guide.
11 |  * For more information, see https://github.com/vercel/style-guide
12 |  *
13 |  */
14 | 
15 | /** @type {import("eslint").Linter.Config} */
16 | module.exports = {
17 |     extends: ["eslint:recommended", "prettier", "eslint-config-turbo"],
18 |     plugins: ["only-warn"],
19 |     globals: {
20 |         React: true,
21 |         JSX: true
22 |     },
23 |     env: {
24 |         browser: true
25 |     },
26 |     settings: {
27 |         "import/resolver": {
28 |             typescript: {
29 |                 project
30 |             }
31 |         }
32 |     },
33 |     ignorePatterns: [
34 |         // Ignore dotfiles
35 |         ".*.js",
36 |         "node_modules/",
37 |         "dist/"
38 |     ],
39 |     overrides: [
40 |         // Force ESLint to detect .tsx files
41 |         { files: ["*.js?(x)", "*.ts?(x)"] }
42 |     ]
43 | };
44 | 


--------------------------------------------------------------------------------
/packages/typescript-config/base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://json.schemastore.org/tsconfig",
 3 |     "display": "Default",
 4 |     "compilerOptions": {
 5 |         "declaration": true,
 6 |         "declarationMap": true,
 7 |         "esModuleInterop": true,
 8 |         "incremental": false,
 9 |         "isolatedModules": true,
10 |         "lib": ["es2022", "DOM", "DOM.Iterable"],
11 |         "module": "NodeNext",
12 |         "moduleDetection": "force",
13 |         "moduleResolution": "NodeNext",
14 |         "noUncheckedIndexedAccess": true,
15 |         "resolveJsonModule": true,
16 |         "skipLibCheck": true,
17 |         "strict": true,
18 |         "strictNullChecks": true,
19 |         "target": "ES2022"
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/packages/typescript-config/nextjs.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://json.schemastore.org/tsconfig",
 3 |     "display": "Next.js",
 4 |     "extends": "./base.json",
 5 |     "compilerOptions": {
 6 |         "plugins": [{ "name": "next" }],
 7 |         "module": "ESNext",
 8 |         "moduleResolution": "Bundler",
 9 |         "allowJs": true,
10 |         "jsx": "preserve",
11 |         "noEmit": true
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/packages/typescript-config/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "@repo/typescript-config",
 3 |     "version": "0.0.0",
 4 |     "private": true,
 5 |     "license": "MIT",
 6 |     "publishConfig": {
 7 |         "access": "public"
 8 |     }
 9 | }
10 | 


--------------------------------------------------------------------------------
/packages/typescript-config/react-library.json:
--------------------------------------------------------------------------------
1 | {
2 |     "$schema": "https://json.schemastore.org/tsconfig",
3 |     "display": "React Library",
4 |     "extends": "./base.json",
5 |     "compilerOptions": {
6 |         "jsx": "react-jsx"
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/pnpm-workspace.yaml:
--------------------------------------------------------------------------------
1 | packages:
2 |     - "apps/*"
3 |     - "packages/*"
4 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |     "extends": "@repo/typescript-config/base.json"
3 | }
4 | 


--------------------------------------------------------------------------------
/turbo.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://turbo.build/schema.json",
 3 |     "globalDependencies": ["**/.env.*local"],
 4 |     "globalEnv": [
 5 |         "PUBLIC_TYPESENSE_URL",
 6 |         "PUBLIC_TYPESENSE_SEARCH_API_KEY",
 7 |         "SUPABASE_URL",
 8 |         "SUPABASE_PRIVATE_KEY",
 9 |         "DATABASE_URL",
10 |         "INTERNAL_API_URL",
11 |         "OPENAI_API_KEY",
12 |         "COHERE_API_KEY",
13 |         "PINECONE_API_KEY",
14 |         "TYPESENSE_URL",
15 |         "TYPESENSE_ADMIN_API_KEY"
16 |     ],
17 |     "tasks": {
18 |         "build": {
19 |             "dependsOn": ["^build"],
20 |             "outputs": ["dist/**", ".next/**", "!.next/cache/**", ".svelte-kit/**", ".svelte/**"]
21 |         },
22 |         "lint": {
23 |             "dependsOn": ["^lint"]
24 |         },
25 |         "dev": {
26 |             "cache": false,
27 |             "persistent": true
28 |         }
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------