├── Procfile ├── .eslintrc.json ├── babel.config.js ├── esbuild.config.js ├── jest.config.js ├── ecosystem.config.js ├── src ├── lib │ ├── logger.ts │ ├── schema.ts │ ├── crypto.ts │ ├── error.ts │ ├── contentType.ts │ ├── source.ts │ ├── services.ts │ ├── stream.ts │ ├── response.ts │ ├── mocks.ts │ ├── __tests__ │ │ ├── fetch.test.ts │ │ ├── rank.test.ts │ │ ├── metadata.test.ts │ │ ├── url.test.ts │ │ ├── manifest.test.ts │ │ └── favicon.test.ts │ ├── manifest.ts │ ├── types.ts │ ├── url.ts │ ├── rank.ts │ ├── metadata.ts │ ├── fetch.ts │ ├── cache.ts │ └── favicon.ts ├── app.ts └── api │ └── favicon.ts ├── tsconfig.json ├── .gitignore ├── .github └── workflows │ └── test.yml ├── LICENSE ├── README.md └── package.json /Procfile: -------------------------------------------------------------------------------- 1 | web: npm start 2 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "next/core-web-vitals" 3 | } 4 | -------------------------------------------------------------------------------- /babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: ['@babel/preset-env'] 3 | } 4 | -------------------------------------------------------------------------------- /esbuild.config.js: -------------------------------------------------------------------------------- 1 | require("esbuild").build({ 2 | bundle: true, 3 | entryPoints: [ 4 | "src/app.ts" 5 | ], 6 | outfile: "./dist/bundle.js", 7 | packages: "external", 8 | platform: "node", 9 | sourcemap: true, 10 | }); 11 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | moduleNameMapper: { 3 | '^@/(.*)$': '/src/$1' 4 | }, 5 | preset: 'ts-jest', 6 | transform: { 7 | '^.+\\.(ts|tsx)?$': 'ts-jest', 8 | '^.+\\.(js|jsx)$': 'babel-jest', 9 | } 10 | }; 11 | -------------------------------------------------------------------------------- /ecosystem.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | apps : [{ 3 | name: 'raycast-favicons', 4 | script: './dist/bundle.js', 5 | instances: 1, 6 | max_memory_restart: '512M', 7 | env: { 8 | NODE_ENV: "production", 9 | }, 10 | env_production: { 11 | NODE_ENV: "production", 12 | } 13 | }], 14 | }; 15 | -------------------------------------------------------------------------------- /src/lib/logger.ts: -------------------------------------------------------------------------------- 1 | import winston from "winston"; 2 | 3 | const logger = winston.createLogger({ 4 | level: process.env.NODE_ENV === "production" ? "info" : "debug", 5 | transports: [ 6 | new winston.transports.Console({ 7 | format: winston.format.combine( 8 | winston.format.colorize(), 9 | winston.format.simple() 10 | ), 11 | }), 12 | ], 13 | }); 14 | 15 | export default logger; 16 | -------------------------------------------------------------------------------- /src/lib/schema.ts: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | 3 | export const urlSchema = () => 4 | z 5 | .string() 6 | .refine( 7 | (data) => { 8 | try { 9 | new URL(data); 10 | return true; 11 | } catch { 12 | return false; 13 | } 14 | }, 15 | { 16 | message: "Invalid URL", 17 | } 18 | ) 19 | .transform((data) => new URL(data)); 20 | -------------------------------------------------------------------------------- /src/lib/crypto.ts: -------------------------------------------------------------------------------- 1 | import crypto from "crypto"; 2 | 3 | export function sha256(input: string) { 4 | return crypto.createHash("sha256").update(input).digest("hex"); 5 | } 6 | 7 | export async function blobDigest(blob: Blob) { 8 | const hash = crypto.createHash("sha256"); 9 | const arrayBuffer = await blob.arrayBuffer(); 10 | const buffer = Buffer.from(arrayBuffer); 11 | hash.update(buffer); 12 | return hash.digest("hex"); 13 | } 14 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es6", 4 | "moduleResolution": "node", 5 | "outFile": "./build/build.js", 6 | "allowJs": true, 7 | "skipLibCheck": true, 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "jsx": "preserve", 11 | "incremental": true, 12 | "paths": { 13 | "@/*": ["./src/*"] 14 | }, 15 | }, 16 | "include": ["src/**/*.ts"], 17 | "exclude": ["node_modules"] 18 | } 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | .yarn/install-state.gz 8 | 9 | # testing 10 | /coverage 11 | 12 | # production 13 | /dist 14 | 15 | # misc 16 | .DS_Store 17 | *.pem 18 | 19 | # debug 20 | npm-debug.log* 21 | yarn-debug.log* 22 | yarn-error.log* 23 | 24 | # local env files 25 | .env 26 | 27 | # redis 28 | *.rdb 29 | 30 | # typescript 31 | *.tsbuildinfo 32 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Run Tests 2 | 3 | on: [push] 4 | 5 | jobs: 6 | test: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - name: Checkout code 11 | uses: actions/checkout@v4 12 | - name: Set up Node.js 13 | uses: actions/setup-node@v4 14 | with: 15 | node-version: "22.x" 16 | cache: "npm" 17 | - name: Install dependencies 18 | run: npm install 19 | - name: Run tests 20 | run: npm test 21 | -------------------------------------------------------------------------------- /src/lib/error.ts: -------------------------------------------------------------------------------- 1 | export type ErrorType = 2 | | "missing_url" 3 | | "invalid_url" 4 | | "invalid_size" 5 | | "invalid_dpr" 6 | | "internal"; 7 | 8 | export class APIError extends Error { 9 | status: number; 10 | code: string; 11 | message: string; 12 | 13 | constructor(status: number, code: ErrorType, message: string) { 14 | super(message); 15 | this.status = status; 16 | this.code = code; 17 | this.message = message; 18 | } 19 | } 20 | 21 | export function makeInternalError() { 22 | return new APIError(500, "internal", "Internal error"); 23 | } 24 | -------------------------------------------------------------------------------- /src/app.ts: -------------------------------------------------------------------------------- 1 | import cors from "cors"; 2 | import "dotenv/config"; 3 | import express from "express"; 4 | import { getFavicon } from "./api/favicon"; 5 | import logger from "./lib/logger"; 6 | import { connectServices } from "./lib/services"; 7 | 8 | const app = express(); 9 | const port = process.env.PORT || 3000; 10 | 11 | logger.info(`Starting in environment ${process.env.NODE_ENV}...`); 12 | 13 | app.use(cors()); 14 | 15 | const services = connectServices(); 16 | 17 | app.get("/favicon", async (req, res) => { 18 | await getFavicon(req, res, services); 19 | }); 20 | 21 | app.listen(port, () => { 22 | logger.info(`Listening at http://localhost:${port}`); 23 | }); 24 | -------------------------------------------------------------------------------- /src/lib/contentType.ts: -------------------------------------------------------------------------------- 1 | import { parse as parseContentType } from "content-type"; 2 | 3 | export const isTextContentType = (contentType: string) => { 4 | const { type } = parseContentType(contentType); 5 | return type === "text/plain"; 6 | }; 7 | 8 | export const isHTMLContentType = (contentType: string) => { 9 | const { type } = parseContentType(contentType); 10 | return type === "text/html"; 11 | }; 12 | 13 | export const isImageContentType = (contentType: string) => { 14 | const { type } = parseContentType(contentType); 15 | return [ 16 | "image/gif", 17 | "image/jpeg", 18 | "image/png", 19 | "image/svg+xml", 20 | "image/tiff", 21 | "image/vnd.microsoft.icon", 22 | "image/webp", 23 | "image/x-icon", 24 | ].includes(type); 25 | }; 26 | -------------------------------------------------------------------------------- /src/lib/source.ts: -------------------------------------------------------------------------------- 1 | import { IconSource, ReferenceIconSource } from "./types"; 2 | 3 | export function isReferencedIcon( 4 | icon: IconSource 5 | ): icon is ReferenceIconSource { 6 | return icon.source === "link" || icon.source === "manifest"; 7 | } 8 | 9 | export function isSameReferencedIcon( 10 | target: ReferenceIconSource, 11 | icon: IconSource 12 | ) { 13 | if (!isReferencedIcon(icon)) { 14 | return false; 15 | } 16 | 17 | if (target.source === "link" && icon.source === "link") { 18 | const { href, type, size } = target; 19 | return href === icon.href && type === icon.type && size === icon.size; 20 | } 21 | 22 | if (target.source === "manifest" && icon.source === "manifest") { 23 | const { href, size } = target; 24 | return href === icon.href && size === icon.size; 25 | } 26 | 27 | return false; 28 | } 29 | -------------------------------------------------------------------------------- /src/lib/services.ts: -------------------------------------------------------------------------------- 1 | import { S3Client } from "@aws-sdk/client-s3"; 2 | import S3Legacy from "aws-sdk/clients/s3"; 3 | import Redis, { RedisOptions } from "ioredis"; 4 | import logger from "./logger"; 5 | import { Services } from "./types"; 6 | 7 | export function connectServices(): Services { 8 | const redisURL = process.env.REDIS_URL as string; 9 | logger.info(`Connecting to redis at '${redisURL}'`); 10 | 11 | const options: RedisOptions = 12 | process.env.NODE_ENV === "development" 13 | ? {} 14 | : { 15 | tls: { 16 | rejectUnauthorized: false, 17 | }, 18 | }; 19 | 20 | const redis = new Redis(redisURL, options); 21 | 22 | const s3Region = process.env.RAYCAST_S3_REGION; 23 | logger.info(`Connecting to S3 at ${s3Region}...`); 24 | const s3 = new S3Client({ region: s3Region }); 25 | const s3Legacy = new S3Legacy({ region: s3Region }); 26 | 27 | return { redis, s3, s3Legacy }; 28 | } 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Raycast Technologies Ltd. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/lib/stream.ts: -------------------------------------------------------------------------------- 1 | import { Observable } from "rxjs"; 2 | 3 | export function read$( 4 | stream: ReadableStream, 5 | byteLimit: number 6 | ): Observable { 7 | return new Observable((observer) => { 8 | const reader = stream.getReader(); 9 | let bytesReceived = 0; 10 | let closed = false; 11 | 12 | const releaseLock = () => { 13 | closed = true; 14 | reader.releaseLock(); 15 | }; 16 | 17 | const read = (): Promise> => { 18 | if (closed) { 19 | return new Promise((resolve) => 20 | resolve({ done: true, value: undefined }) 21 | ); 22 | } else { 23 | return reader.read(); 24 | } 25 | }; 26 | 27 | function processBytes({ 28 | done, 29 | value, 30 | }: ReadableStreamReadResult) { 31 | if (done) { 32 | observer.complete(); 33 | return; 34 | } 35 | 36 | bytesReceived += value.length; 37 | if (bytesReceived > byteLimit) { 38 | observer.error( 39 | new Error(`Maximum size limit ${byteLimit} bytes exceeded`) 40 | ); 41 | return; 42 | } 43 | 44 | observer.next(value); 45 | read().then(processBytes); 46 | } 47 | 48 | read().then(processBytes); 49 | 50 | return () => releaseLock(); 51 | }); 52 | } 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Raycast Favicons Service 2 | 3 | A service used in the Raycast app to fetch and serve favicons from websites. 4 | 5 | ## Setup 6 | 7 | Create a `.env` file: 8 | 9 | ``` 10 | NODE_ENV=development 11 | RAYCAST_IGNORE_CACHE=false # set to true to always look for favicons rather than using cache 12 | REDIS_URL=redis://localhost:6379 13 | AWS_ACCESS_KEY_ID=... # for S3 14 | AWS_SECRET_ACCESS_KEY=... # for S3 15 | ``` 16 | 17 | ## Usage 18 | 19 | Development: 20 | 21 | ```bash 22 | bun dev # served at :3000. use `PORT=NNNN bun dev` to specify port 23 | ``` 24 | 25 | Load favicon: 26 | 27 | ```bash 28 | curl http://localhost:3000/favicon?url={encoded URL} 29 | ``` 30 | 31 | Parameters: 32 | - `url`: encoded URL (e.g. encoded with `encodeURIComponent()`) 33 | - `size`: supports `favicon`, `32` or `64` 34 | - `dpr`: pass the scale factor along with required `size` in points to get icons that look good at e.g. `@2x` or `@3x` 35 | 36 | ## Implementation 37 | 38 | - Uses [RxJS](https://rxjs.dev/) to implement the concurrent behaviour of finding the best favicon from a given URL 39 | - Looks for favicons from: 40 | - `/favicon.ico` (and `/favicon.ico` at any recursive subdomains) 41 | - `` tags found from loading the page at the given URL 42 | - The website's web application manifest (if it exists) 43 | - Protection from abuse: 44 | - Utilises timeouts when loading all resources 45 | - Has data length limits for both images and other resources to prevent resource exhaustion attacks 46 | -------------------------------------------------------------------------------- /src/lib/response.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "cache-control-parser"; 2 | import { OutgoingHttpHeaders } from "http"; 3 | import { APIError } from "./error"; 4 | 5 | const oneDay = 24 * 60 * 60 * 1000; 6 | 7 | export function cacheExpiryFromResponse(response: Response) { 8 | const now = new Date(); 9 | 10 | const cacheControl = response.headers.get("cache-control"); 11 | if (cacheControl != null) { 12 | const directives = parse(cacheControl); 13 | const maxAge = directives["s-maxage"] || directives["max-age"]; 14 | if (maxAge != null) { 15 | return new Date(now.getTime() + maxAge * 1000); 16 | } 17 | } 18 | 19 | const expires = response.headers.get("expires"); 20 | if (expires != null) { 21 | try { 22 | return new Date(expires); 23 | } catch {} 24 | } 25 | 26 | return null; 27 | } 28 | 29 | export function minimumExpiryDate(date: Date) { 30 | const now = new Date(); 31 | const minimumTimestamp = now.getTime() + oneDay; 32 | 33 | if (date.getTime() < minimumTimestamp) { 34 | return new Date(minimumTimestamp); 35 | } else { 36 | return date; 37 | } 38 | } 39 | 40 | export function responseHeaders({ 41 | size, 42 | expiry, 43 | }: { 44 | size?: number; 45 | expiry: Date; 46 | }): OutgoingHttpHeaders { 47 | const now = new Date(); 48 | const delta = Math.max(expiry.getTime() - now.getTime(), 0); 49 | const maxAge = Math.ceil(delta / 1000); 50 | 51 | return { 52 | "Content-Disposition": "inline", 53 | ...(size != null ? { "Content-Length": size.toFixed(0) } : {}), 54 | "Cache-Control": `public, max-age=${maxAge}`, 55 | }; 56 | } 57 | 58 | export function errorResponse(error: APIError) { 59 | return { 60 | json: { code: error.code, message: error.message }, 61 | status: error.status, 62 | }; 63 | } 64 | -------------------------------------------------------------------------------- /src/lib/mocks.ts: -------------------------------------------------------------------------------- 1 | export const testImageData = new Uint8Array([72, 101, 108, 108, 111]); 2 | 3 | type FetchMock = ( 4 | input: RequestInfo | URL, 5 | init?: RequestInit | undefined 6 | ) => Partial; 7 | type FetchResponses = Record; 8 | 9 | export function mockFetch( 10 | jestInstance: typeof jest, 11 | mock: FetchMock | FetchResponses 12 | ) { 13 | jestInstance.spyOn(global, "fetch").mockImplementation( 14 | jestInstance.fn( 15 | (input: RequestInfo | URL, init?: RequestInit | undefined) => { 16 | const url = toURL(input); 17 | 18 | try { 19 | if (typeof mock === "function") { 20 | return Promise.resolve(mock(input, init)); 21 | } else { 22 | const fn = mock[url.toString()]; 23 | if (fn == null) { 24 | return Promise.reject( 25 | `Missing mock response for ${url.toString()}` 26 | ); 27 | } 28 | return Promise.resolve(fn(input, init)); 29 | } 30 | } catch (error) { 31 | return Promise.reject(error); 32 | } 33 | } 34 | ) as jest.Mock 35 | ); 36 | } 37 | 38 | export function mockReadableStream(data: Uint8Array | string) { 39 | return new ReadableStream({ 40 | start(controller) { 41 | if (data instanceof Uint8Array) { 42 | controller.enqueue(data); 43 | } else { 44 | const encoder = new TextEncoder(); 45 | const uint8Array = encoder.encode(data); 46 | controller.enqueue(uint8Array); 47 | } 48 | controller.close(); 49 | }, 50 | }); 51 | } 52 | 53 | function toURL(input: RequestInfo | URL) { 54 | if (input instanceof URL) { 55 | return input; 56 | } 57 | 58 | if (typeof input === "string") { 59 | return new URL(input); 60 | } 61 | 62 | return new URL(input.url); 63 | } 64 | -------------------------------------------------------------------------------- /src/lib/__tests__/fetch.test.ts: -------------------------------------------------------------------------------- 1 | import { fetchImage$ } from "@/lib/fetch"; 2 | import { mockFetch, mockReadableStream, testImageData } from "@/lib/mocks"; 3 | import { firstValueFrom } from "rxjs"; 4 | 5 | afterEach(() => { 6 | jest.restoreAllMocks(); 7 | }); 8 | 9 | describe("Image fetch", () => { 10 | test("successful fetch returns result", async () => { 11 | mockFetch(jest, { 12 | "https://example.com/image.png": () => ({ 13 | headers: new Headers({ "content-type": "image/png" }), 14 | body: mockReadableStream(testImageData), 15 | }), 16 | }); 17 | 18 | const image$ = fetchImage$(new URL("https://example.com/image.png")); 19 | const result = await firstValueFrom(image$); 20 | 21 | expect(result.source.toString()).toEqual("https://example.com/image.png"); 22 | 23 | expect(result.source.toString()).toEqual("https://example.com/image.png"); 24 | expect(result.blob.size).toEqual(5); 25 | expect(result.blob.type).toEqual("image/png"); 26 | }); 27 | 28 | test("fetch with invalid content type throws error", async () => { 29 | mockFetch(jest, { 30 | "https://example.com/image.png": () => ({ 31 | headers: new Headers({ "content-type": "text/html" }), 32 | body: mockReadableStream(testImageData), 33 | }), 34 | }); 35 | 36 | const image$ = fetchImage$(new URL("https://example.com/image.png")); 37 | expect(async () => { 38 | await firstValueFrom(image$); 39 | }).rejects.toThrow(); 40 | }); 41 | 42 | test("fetch with failure throws error", async () => { 43 | mockFetch(jest, { 44 | "https://example.com/image.png": () => { 45 | throw new Error(); 46 | }, 47 | }); 48 | 49 | const image$ = fetchImage$(new URL("https://example.com/image.png")); 50 | expect(async () => { 51 | await firstValueFrom(image$); 52 | }).rejects.toThrow(); 53 | }); 54 | }); 55 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "raycast-favicons", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "npm-run-all --parallel dev:server dev:redis", 7 | "dev:server": "node --inspect --watch -r ts-node/register -r tsconfig-paths/register src/app.ts", 8 | "dev:redis": "redis-server", 9 | "preinstall": "npm i -g pm2", 10 | "postinstall": "node ./esbuild.config.js", 11 | "start": "pm2-runtime start ecosystem.config.js --env production", 12 | "test": "jest -b" 13 | }, 14 | "dependencies": { 15 | "@aws-sdk/client-s3": "^3.529.1", 16 | "@babel/preset-env": "^7.26.9", 17 | "@types/express": "^4.17.21", 18 | "aws-sdk": "^2.1573.0", 19 | "babel-jest": "^29.7.0", 20 | "cache-control-parser": "^2.0.5", 21 | "content-type": "^1.0.5", 22 | "cors": "^2.8.5", 23 | "dotenv": "^16.4.7", 24 | "express": "^4.18.2", 25 | "htmlparser2": "^9.1.0", 26 | "ioredis": "^5.3.2", 27 | "pm2": "^5.3.1", 28 | "psl": "^1.9.0", 29 | "redis": "^4.6.13", 30 | "rxjs": "^7.8.1", 31 | "winston": "^3.17.0", 32 | "zod": "^3.22.4" 33 | }, 34 | "devDependencies": { 35 | "@babel/preset-typescript": "^7.27.0", 36 | "@jest/globals": "^29.7.0", 37 | "@types/content-type": "^1.1.8", 38 | "@types/cors": "^2.8.17", 39 | "@types/ioredis": "^5.0.0", 40 | "@types/jest": "^29.5.12", 41 | "@types/node": "^20", 42 | "@types/private-ip": "^1.0.3", 43 | "@types/psl": "^1.1.3", 44 | "@types/react": "^18", 45 | "@types/react-dom": "^18", 46 | "@types/redis": "^4.0.11", 47 | "esbuild": "^0.25.2", 48 | "eslint": "^8", 49 | "eslint-config-next": "14.0.4", 50 | "jest": "^29.7.0", 51 | "node-polyfill-webpack-plugin": "^3.0.0", 52 | "npm-run-all": "^4.1.5", 53 | "ts-jest": "^29.3.1", 54 | "ts-loader": "^9.5.1", 55 | "ts-node": "^10.9.2", 56 | "tsconfig-paths": "^4.2.0", 57 | "typescript": "^5" 58 | }, 59 | "engines": { 60 | "node": "20.x" 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/lib/manifest.ts: -------------------------------------------------------------------------------- 1 | import { map, reduce, switchMap } from "rxjs/operators"; 2 | import { fetchManifest$, pageByteLimit } from "./fetch"; 3 | import { parseIconSizes } from "./metadata"; 4 | import { read$ } from "./stream"; 5 | import { ManifestIconSource } from "./types"; 6 | import { makeURLWithoutThrowing } from "./url"; 7 | 8 | export function iconsFromManifest$(url: URL) { 9 | return fetchManifest$(url).pipe( 10 | switchMap((response) => { 11 | if (response.body == null) { 12 | throw new Error("Missing response body"); 13 | } 14 | 15 | return read$(response.body, pageByteLimit).pipe( 16 | reduce((acc, chunk) => [...acc, chunk], new Array()), 17 | map((chunks) => { 18 | const decoder = new TextDecoder(); 19 | const baseURL = makeURLWithoutThrowing(response.url) || url; 20 | return [decoder.decode(...chunks), baseURL] as const; 21 | }) 22 | ); 23 | }), 24 | map(([jsonString, baseURL]) => parseManifest(jsonString, baseURL)) 25 | ); 26 | } 27 | 28 | function parseManifest(jsonString: string, baseURL: URL) { 29 | const json = JSON.parse(jsonString); 30 | 31 | if (json == null) { 32 | throw new Error("Invalid manifest"); 33 | } 34 | 35 | const icons = json["icons"]; 36 | if (icons == null || !Array.isArray(icons)) { 37 | throw new Error("Invalid manifest"); 38 | } 39 | 40 | return icons 41 | .map((icon) => parseIcon(icon, baseURL)) 42 | .filter((icon): icon is ManifestIconSource => icon != null); 43 | } 44 | 45 | function parseIcon(icon: any, baseURL: URL): ManifestIconSource | null { 46 | if (typeof icon !== "object") { 47 | return null; 48 | } 49 | 50 | const src = icon["src"]; 51 | const sizes = icon["sizes"]; 52 | const url = makeURLWithoutThrowing(src, baseURL); 53 | if (src == null || typeof src !== "string" || url == null) { 54 | return null; 55 | } 56 | 57 | let iconSize = null; 58 | if (typeof sizes === "string") { 59 | iconSize = parseIconSizes(sizes); 60 | } 61 | 62 | return { 63 | source: "manifest", 64 | href: src, 65 | url, 66 | size: iconSize != null ? iconSize : undefined, 67 | }; 68 | } 69 | -------------------------------------------------------------------------------- /src/lib/__tests__/rank.test.ts: -------------------------------------------------------------------------------- 1 | import { LinkIconSource } from "@/lib/types"; 2 | import { bestIcon } from "../rank"; 3 | 4 | describe("Ranking large icons", () => { 5 | test("single no-size link icon is returned", async () => { 6 | const favicon: LinkIconSource = { 7 | source: "link", 8 | type: "icon", 9 | href: "/favicon.ico", 10 | url: new URL("https://example.com/favicon.ico"), 11 | }; 12 | const icon = bestIcon([favicon], 64); 13 | expect(icon).toEqual(favicon); 14 | }); 15 | 16 | test("icon with exact size takes precedence over smaller icons", async () => { 17 | const favicon: LinkIconSource = { 18 | source: "link", 19 | type: "shortcut icon", 20 | href: "/favicon.ico", 21 | url: new URL("https://example.com/favicon.ico"), 22 | }; 23 | const mediumIcon: LinkIconSource = { 24 | source: "link", 25 | type: "icon", 26 | href: "/favicon32.png", 27 | url: new URL("https://example.com/favicon32.png"), 28 | size: { type: "single", width: 32, height: 32 }, 29 | }; 30 | const largeIcon: LinkIconSource = { 31 | source: "link", 32 | type: "icon", 33 | href: "/favicon.png", 34 | url: new URL("https://example.com/favicon32.png"), 35 | size: { type: "single", width: 64, height: 64 }, 36 | }; 37 | const icon = bestIcon([favicon, mediumIcon, largeIcon], 64); 38 | expect(icon).toEqual(largeIcon); 39 | }); 40 | 41 | test("icon with exact size takes precedence over larger icons", async () => { 42 | const favicon: LinkIconSource = { 43 | source: "link", 44 | type: "shortcut icon", 45 | href: "/favicon.ico", 46 | url: new URL("https://example.com/favicon.ico"), 47 | }; 48 | const largeIcon: LinkIconSource = { 49 | source: "link", 50 | type: "icon", 51 | href: "/favicon.png", 52 | url: new URL("https://example.com/favicon32.png"), 53 | size: { type: "single", width: 64, height: 64 }, 54 | }; 55 | const appleTouchIcon: LinkIconSource = { 56 | source: "link", 57 | type: "icon", 58 | href: "/apple-touch.png", 59 | url: new URL("https://example.com/apple-touch.png"), 60 | size: { type: "single", width: 180, height: 180 }, 61 | }; 62 | const icon = bestIcon([favicon, largeIcon, appleTouchIcon], 64); 63 | expect(icon).toEqual(largeIcon); 64 | }); 65 | }); 66 | -------------------------------------------------------------------------------- /src/lib/types.ts: -------------------------------------------------------------------------------- 1 | import { S3Client } from "@aws-sdk/client-s3"; 2 | import S3Legacy from "aws-sdk/clients/s3"; 3 | import Redis from "ioredis"; 4 | import { z } from "zod"; 5 | import { urlSchema } from "./schema"; 6 | 7 | export type Services = { 8 | redis: Redis; 9 | s3: S3Client; 10 | s3Legacy: S3Legacy; 11 | }; 12 | 13 | export const allSizes = ["favicon", "32", "64"] as const; 14 | export const SizeParam = z.enum(allSizes); 15 | export type SizeParam = z.infer; 16 | 17 | export const DevicePixelRatioParam = z.union([ 18 | z.literal(1), 19 | z.literal(2), 20 | z.literal(3), 21 | ]); 22 | export type DevicePixelRatioParam = z.infer; 23 | 24 | export const LinkIconType = z.union([ 25 | z.literal("apple-touch-icon"), 26 | z.literal("apple-touch-icon-precomposed"), 27 | z.literal("shortcut icon"), 28 | z.literal("icon"), 29 | ]); 30 | export type LinkIconType = z.infer; 31 | 32 | export const IconSize = z.union([ 33 | z.object({ type: z.literal("any") }), 34 | z.object({ 35 | type: z.literal("single"), 36 | width: z.number(), 37 | height: z.number(), 38 | }), 39 | z.object({ 40 | type: z.literal("multiple"), 41 | sizes: z.array(z.object({ width: z.number(), height: z.number() })), 42 | }), 43 | ]); 44 | export type IconSize = z.infer; 45 | 46 | const FaviconIconSource = z.object({ 47 | source: z.literal("favicon.ico"), 48 | url: urlSchema(), 49 | }); 50 | const LinkIconSource = z.object({ 51 | source: z.literal("link"), 52 | type: LinkIconType, 53 | href: z.string(), 54 | url: urlSchema(), 55 | data: z.boolean().optional(), // Is this an inline data reference? 56 | size: IconSize.optional(), 57 | }); 58 | const ManifestIconSource = z.object({ 59 | source: z.literal("manifest"), 60 | href: z.string(), 61 | url: urlSchema(), 62 | size: IconSize.optional(), 63 | }); 64 | export type FaviconIconSource = z.infer; 65 | export type LinkIconSource = z.infer; 66 | export type ManifestIconSource = z.infer; 67 | 68 | export const IconSource = z.union([ 69 | FaviconIconSource, 70 | LinkIconSource, 71 | ManifestIconSource, 72 | ]); 73 | export type IconSource = z.infer; 74 | 75 | export const FoundIcon = z.object({ 76 | image: z.object({ 77 | originURL: z.string().url(), 78 | cacheURL: z.string().url(), 79 | }), 80 | foundIcons: z.array(IconSource), 81 | expiry: z.string().transform((str) => new Date(str)), 82 | lastAccess: z.string().transform((str) => new Date(str)), 83 | }); 84 | export type FoundIcon = z.infer; 85 | 86 | // Cached icon metadata. 87 | export const IconMetadata = z.object({ 88 | objectKey: z.string(), 89 | expiry: z.string().transform((str) => new Date(str)), 90 | lastAccess: z.string().transform((str) => new Date(str)), 91 | }); 92 | export type IconMetadata = z.infer; 93 | 94 | export const StatsResponse = z.object({ 95 | metadata: z.nullable(FoundIcon), 96 | }); 97 | export type StatsResponse = z.infer; 98 | 99 | export type ReferenceIconSource = LinkIconSource | ManifestIconSource; 100 | 101 | export interface IconImage { 102 | source: URL; 103 | blob: Blob; 104 | expiry: Date; 105 | } 106 | 107 | export interface Icon { 108 | image: IconImage; 109 | source: IconSource; 110 | } 111 | -------------------------------------------------------------------------------- /src/lib/url.ts: -------------------------------------------------------------------------------- 1 | import net from "net"; 2 | import { ParsedDomain, ParseError, parse as parseHost } from "psl"; 3 | 4 | export function makeURLWithoutThrowing(urlString: string, base?: string | URL) { 5 | try { 6 | return new URL(urlString, base); 7 | } catch { 8 | return null; 9 | } 10 | } 11 | 12 | export function parseURL(urlString: string) { 13 | if (!urlString.match(/^[A-z]+:\/\//)) { 14 | urlString = `https://${urlString}`; 15 | } 16 | 17 | // Upgrade http to https 18 | if (urlString.startsWith("http://")) { 19 | urlString = urlString.replace("http://", "https://"); 20 | } 21 | 22 | return new URL(urlString); 23 | } 24 | 25 | export function obfuscateURL(url: URL) { 26 | const pathComponents = url.pathname.split("/"); 27 | const obfuscatedComponents = pathComponents.map((component) => 28 | component.length <= 6 ? component : "***" 29 | ); 30 | const obfuscatedPath = obfuscatedComponents.join("/"); 31 | return `${url.protocol}//${url.host}${obfuscatedPath}`; 32 | } 33 | 34 | export function isValidURL(url: URL) { 35 | if (url.protocol.toLowerCase() !== "https:") { 36 | return false; 37 | } 38 | 39 | const host = url.host.split(":")[0].toLowerCase(); 40 | if (host == null) { 41 | return false; 42 | } 43 | 44 | if (net.isIP(host) !== 0) { 45 | return false; 46 | } 47 | 48 | // Disallowing http should be enough but also block localhost to be safe. 49 | if (host === "localhost") { 50 | return false; 51 | } 52 | 53 | // If a port is specified only allow 443 for https. 54 | if (url.port.length > 0 && url.port !== "443") { 55 | return false; 56 | } 57 | 58 | return true; 59 | } 60 | 61 | export function isRelativeURL(string: string) { 62 | return string.match(/^([A-z]+:)?\/\//) == null; 63 | } 64 | 65 | export function isBase64DataURL(string: string) { 66 | // Use basic pattern for MIME type. 67 | return string.match(/^data:([a-zA-Z]+\/[a-zA-Z0-9\-+.]+);base64,/) != null; 68 | } 69 | 70 | export function parseBase64DataURL(url: URL) { 71 | const components = url.toString().split(","); 72 | if (components.length !== 2) { 73 | return null; 74 | } 75 | 76 | const match = components[0].match( 77 | /^data:([a-zA-Z]+\/[a-zA-Z0-9\-+.]+);base64/ 78 | ); 79 | const mimeType = (match || [])[1]; 80 | if (match == null || mimeType == null || mimeType.length == 0) { 81 | return null; 82 | } 83 | 84 | return { base64: components[1], type: mimeType }; 85 | } 86 | 87 | export function faviconURL(baseURL: URL) { 88 | const url = baseURL; 89 | url.pathname = "/favicon.ico"; 90 | return url; 91 | } 92 | 93 | function getTLD(host: string) { 94 | const parsed = parseHost(host); 95 | if (parsed == null || isParseError(parsed)) { 96 | return null; 97 | } 98 | 99 | return parsed.tld; 100 | } 101 | 102 | function isParseError(result: ParsedDomain | ParseError): result is ParseError { 103 | return (result as ParseError).error !== undefined; 104 | } 105 | 106 | export function baseURLs(url: URL, maximumSubdomains: number) { 107 | const host = url.host; 108 | const tld = getTLD(host); 109 | const suffix = tld != null ? `.${tld}` : null; 110 | 111 | if (suffix == null || !host.endsWith(suffix)) { 112 | const baseURL = url; 113 | url.pathname = ""; 114 | return [baseURL]; 115 | } 116 | 117 | const domainWithoutTLD = host.slice(0, -suffix.length); 118 | const domainComponents = domainWithoutTLD 119 | .split(".") 120 | .filter((component) => component.length > 0); 121 | 122 | const urls = []; 123 | 124 | for (var i = 0; i < domainComponents.length; i++) { 125 | const subdomain = domainComponents 126 | .slice(i, domainComponents.length) 127 | .join("."); 128 | const host = `${subdomain}.${tld}`; 129 | const baseURL = makeURLWithoutThrowing(`${url.protocol}//${host}`); 130 | if (baseURL != null) { 131 | urls.push(baseURL); 132 | } 133 | } 134 | 135 | return urls.slice(0, Math.max(maximumSubdomains, 1)); 136 | } 137 | 138 | export function resolvedURLsFromRelative(url: string, baseURL: URL) { 139 | if (!isRelativeURL(url)) { 140 | return [new URL(url)]; 141 | } 142 | 143 | const pathComponents = baseURL.pathname.replace(/^\//, "").split("/"); 144 | if (!url.startsWith("/") && pathComponents.length > 1) { 145 | return [new URL(url, baseURL), new URL(`/${url}`, baseURL)]; 146 | } 147 | 148 | return [new URL(url, baseURL)]; 149 | } 150 | -------------------------------------------------------------------------------- /src/lib/rank.ts: -------------------------------------------------------------------------------- 1 | import { IconLoadResult, smallestIconDimension } from "./favicon"; 2 | import { 3 | DevicePixelRatioParam, 4 | LinkIconSource, 5 | LinkIconType, 6 | ManifestIconSource, 7 | SizeParam, 8 | } from "./types"; 9 | 10 | export function bestReferencedIcon( 11 | sources: (LinkIconSource | ManifestIconSource)[], 12 | size: SizeParam, 13 | dpr: DevicePixelRatioParam 14 | ) { 15 | switch (size) { 16 | case "favicon": 17 | return bestFavicon(sources, dpr); 18 | case "32": 19 | return bestIcon(sources, 32 * dpr); 20 | case "64": 21 | return bestIcon(sources, 64 * dpr); 22 | } 23 | } 24 | 25 | export function bestFavicon( 26 | sources: (LinkIconSource | ManifestIconSource)[], 27 | dpr: DevicePixelRatioParam 28 | ): LinkIconSource | ManifestIconSource | null { 29 | const linkIconsWithType = (type: LinkIconType) => 30 | sources.filter((icon) => { 31 | if (icon.source === "link") { 32 | return icon.type === type; 33 | } else { 34 | return false; 35 | } 36 | }); 37 | 38 | // Prioritise small icons for favicons since usually these are redrawn to look good at small sizes. 39 | const targetDimension = Math.min(16 * dpr, 32); 40 | const icon = bestIcon(linkIconsWithType("icon"), targetDimension); 41 | if (icon != null) { 42 | return icon; 43 | } 44 | 45 | const shortcutIcon = bestIcon( 46 | linkIconsWithType("shortcut icon"), 47 | targetDimension 48 | ); 49 | if (shortcutIcon != null) { 50 | return shortcutIcon; 51 | } 52 | 53 | const appleTouchIcon = bestIcon( 54 | linkIconsWithType("apple-touch-icon"), 55 | targetDimension 56 | ); 57 | if (appleTouchIcon != null) { 58 | return appleTouchIcon; 59 | } 60 | 61 | const appleTouchIconPrecomposed = bestIcon( 62 | linkIconsWithType("apple-touch-icon-precomposed"), 63 | targetDimension 64 | ); 65 | if (appleTouchIconPrecomposed != null) { 66 | return appleTouchIconPrecomposed; 67 | } 68 | 69 | return null; 70 | } 71 | 72 | export function bestIcon( 73 | sources: (LinkIconSource | ManifestIconSource)[], 74 | dimension: number 75 | ) { 76 | // Where the delta (the amount the smallest dimension of the icon source is larger than our 77 | // target dimension) is positive, we want to prioritise values that are closest to but larger than 78 | // the target dimension. Where they are equal the rank will be +Infinity as we want to rank this 79 | // icon source the highest. The more larger the source icons are than the target value the rank will 80 | // decrease (but still be +ve), and tend towards zero. y=1/x exhibits this behaviour perfectly. 81 | // 82 | // Where the delta is -ve, we have icons that are smaller than the target dimension. We want to rank 83 | // these icons where the closest (but smaller) icons have a smaller negative value than the ones which 84 | // are smaller than but further away from the target value. 85 | // 86 | // From these two computations we can rank icons such that the icons which are larger will have a positive 87 | // rank value > 0 (and those which are closest to the target dimension will be the biggest). For icons 88 | // which are smaller they will have a rank value < 0, and those which are closest to the target dimension 89 | // will also be the biggest. 90 | // 91 | // Icons with no sizing information will be considered last because we don't know how big they are. 92 | const rank = (source: LinkIconSource | ManifestIconSource) => { 93 | const smallestDimension = smallestIconDimension(source); 94 | if (smallestDimension == null) { 95 | return -Infinity; 96 | } 97 | 98 | const delta = smallestDimension - dimension; 99 | if (delta >= 0) { 100 | return 1 / delta; 101 | } else { 102 | return delta; 103 | } 104 | }; 105 | 106 | const iconsWithRank = sources.map((source) => { 107 | return { source, rank: rank(source) }; 108 | }); 109 | 110 | const first = iconsWithRank.sort((a, b) => b.rank - a.rank)[0]; 111 | if (first == null) { 112 | return null; 113 | } 114 | 115 | return first.source; 116 | } 117 | 118 | type IconLoadResults = { 119 | favicon: IconLoadResult; 120 | page: IconLoadResult; 121 | }; 122 | 123 | export function bestResult(url: URL, results: IconLoadResults): IconLoadResult { 124 | const exception = ruleBasedDecision(url, results); 125 | if (exception != null) { 126 | return exception; 127 | } 128 | 129 | return defaultDecision(url, results); 130 | } 131 | 132 | function ruleBasedDecision( 133 | url: URL, 134 | results: IconLoadResults 135 | ): IconLoadResult | undefined { 136 | const { favicon, page } = results; 137 | const foundIcons = foundIconsFromResults(results); 138 | 139 | // Favicon for developer.apple.com looks better 140 | if (url.host.toLowerCase() === "developer.apple.com") { 141 | return { 142 | icon: favicon.icon || page.icon, 143 | foundIcons, 144 | }; 145 | } 146 | } 147 | 148 | function defaultDecision(url: URL, results: IconLoadResults): IconLoadResult { 149 | const { favicon, page } = results; 150 | 151 | // Always favour the results from loading the original HTML page since this gives us 152 | // richer information. 153 | return { 154 | icon: page.icon || favicon.icon, 155 | foundIcons: foundIconsFromResults(results), 156 | }; 157 | } 158 | 159 | function foundIconsFromResults(results: IconLoadResults) { 160 | const { favicon, page } = results; 161 | return [...favicon.foundIcons, ...page.foundIcons]; 162 | } 163 | -------------------------------------------------------------------------------- /src/lib/metadata.ts: -------------------------------------------------------------------------------- 1 | import * as htmlparser2 from "htmlparser2"; 2 | import { Observable, Subscription } from "rxjs"; 3 | // Import node's definition since we won't be using this on the client. 4 | import { IconSize, LinkIconSource, LinkIconType } from "@/lib/types"; 5 | import { pageByteLimit } from "./fetch"; 6 | import { read$ } from "./stream"; 7 | import { isBase64DataURL, isValidURL, makeURLWithoutThrowing } from "./url"; 8 | 9 | function isLinkIconType(type: string): type is LinkIconType { 10 | return [ 11 | "apple-touch-icon", 12 | "apple-touch-icon-precomposed", 13 | "shortcut icon", 14 | "icon", 15 | ].includes(type.toLowerCase()); 16 | } 17 | 18 | export function metadataFromHTMLPage$( 19 | body: ReadableStream, 20 | baseURL: URL 21 | ): Observable<{ 22 | linkIcons: LinkIconSource[]; 23 | manifestURL: URL | null; 24 | }> { 25 | return new Observable((observer) => { 26 | let sources: LinkIconSource[] = []; 27 | let manifestURL: URL | null; 28 | let finished = false; 29 | 30 | const parser = new htmlparser2.Parser({ 31 | onopentag(name, attributes) { 32 | if (name === "link") { 33 | const rel = attributes["rel"]; 34 | const href = attributes["href"]; 35 | const sizes = attributes["sizes"]; 36 | 37 | if ( 38 | href == null || 39 | typeof href !== "string" || 40 | rel == null || 41 | typeof rel !== "string" 42 | ) { 43 | return; 44 | } 45 | 46 | const result = processLinkTag(rel, href, sizes, baseURL); 47 | if (result == null) { 48 | return; 49 | } 50 | 51 | switch (result.type) { 52 | case "icon": 53 | sources.push(result.source); 54 | break; 55 | case "manifest": 56 | // If there are multiple manifest tags specified the last one 57 | // will always take precedence. 58 | manifestURL = result.url; 59 | break; 60 | } 61 | } 62 | }, 63 | onclosetag(name) { 64 | if (name === "head") { 65 | finished = true; 66 | } 67 | }, 68 | onend() { 69 | finished = true; 70 | }, 71 | }); 72 | 73 | const decoder = new TextDecoder(); 74 | 75 | let subscription: Subscription; 76 | let completed = false; 77 | 78 | const complete = () => { 79 | if (!completed) { 80 | completed = true; 81 | observer.next({ linkIcons: sources, manifestURL }); 82 | observer.complete(); 83 | } 84 | }; 85 | 86 | subscription = read$(body, pageByteLimit).subscribe({ 87 | next(chunk) { 88 | const string = decoder.decode(chunk); 89 | parser.write(string); 90 | if (finished) { 91 | complete(); 92 | if (subscription != null) { 93 | subscription.unsubscribe(); 94 | } 95 | } 96 | }, 97 | error(err) { 98 | parser.end(); 99 | observer.error(err); 100 | }, 101 | complete() { 102 | parser.end(); 103 | complete(); 104 | }, 105 | }); 106 | 107 | return () => subscription.unsubscribe(); 108 | }); 109 | } 110 | 111 | function processLinkTag( 112 | rel: string, 113 | href: string, 114 | sizes: string, 115 | baseURL: URL 116 | ): 117 | | { type: "icon"; source: LinkIconSource } 118 | | { type: "manifest"; url: URL } 119 | | null { 120 | if (isLinkIconType(rel)) { 121 | const url = makeURLWithoutThrowing(href, baseURL); 122 | const parsedSizes = sizes ? parseIconSizes(sizes) : null; 123 | if (url == null) { 124 | return null; 125 | } 126 | 127 | if (isBase64DataURL(href)) { 128 | return { 129 | type: "icon", 130 | source: { 131 | source: "link", 132 | type: rel, 133 | url, 134 | href: "", // Use empty href otherwise we will store the data twice 135 | data: true, 136 | size: parsedSizes || undefined, 137 | }, 138 | }; 139 | } else { 140 | if (!isValidURL(url)) { 141 | return null; 142 | } 143 | 144 | return { 145 | type: "icon", 146 | source: { 147 | source: "link", 148 | type: rel, 149 | url, 150 | href, 151 | size: parsedSizes || undefined, 152 | }, 153 | }; 154 | } 155 | } 156 | 157 | if (rel.toLowerCase() === "manifest") { 158 | const url = makeURLWithoutThrowing(href, baseURL); 159 | if (url == null) { 160 | return null; 161 | } 162 | 163 | return { type: "manifest", url }; 164 | } 165 | 166 | return null; 167 | } 168 | 169 | export function parseIconSizes(string: string): IconSize | null { 170 | string = string.toLowerCase().trim(); 171 | if (string === "any") { 172 | return { type: "any" }; 173 | } 174 | 175 | const sizes = string 176 | .split(" ") 177 | .map((value) => { 178 | const match = value.match(/(\d+)x(\d+)/); 179 | if (match == null) { 180 | return null; 181 | } 182 | 183 | const width = parseInt(match[1]); 184 | const height = parseInt(match[2]); 185 | 186 | if (isNaN(width) || isNaN(height)) { 187 | return null; 188 | } 189 | 190 | return { width, height }; 191 | }) 192 | .filter( 193 | (value): value is { width: number; height: number } => value != null 194 | ); 195 | 196 | if (sizes.length === 0) { 197 | return null; 198 | } 199 | 200 | if (sizes.length === 1) { 201 | return { type: "single", ...sizes[0] }; 202 | } 203 | 204 | return { type: "multiple", sizes }; 205 | } 206 | -------------------------------------------------------------------------------- /src/lib/fetch.ts: -------------------------------------------------------------------------------- 1 | import { Observable, of } from "rxjs"; 2 | import { fromFetch } from "rxjs/fetch"; 3 | import { 4 | catchError, 5 | filter, 6 | map, 7 | mergeMap, 8 | reduce, 9 | scan, 10 | switchMap, 11 | timeout, 12 | } from "rxjs/operators"; 13 | 14 | import { isHTMLContentType, isImageContentType } from "@/lib/contentType"; 15 | import { cacheExpiryFromResponse, minimumExpiryDate } from "./response"; 16 | import { read$ } from "./stream"; 17 | import { IconImage } from "./types"; 18 | 19 | export const imageByteLimit = 1024 * 1024; 20 | export const pageByteLimit = 2 * 1024 * 1024; 21 | const defaultTimeoutMs = 5000; 22 | 23 | function fetchTimeout() { 24 | const timeoutMs = parseInt(process.env["RAYCAST_FETCH_TIMEOUT_MS"] || ""); 25 | if (isNaN(timeoutMs)) { 26 | return defaultTimeoutMs; 27 | } 28 | 29 | return timeoutMs; 30 | } 31 | 32 | namespace BatchImageLoad { 33 | export type ImageResult = { type: "image"; url: URL; image: IconImage }; 34 | export type ErrorResult = { type: "error"; error: Error }; 35 | export type LoadingResult = { type: "loading" }; 36 | export type EmptyResult = { type: "none" }; 37 | 38 | export type FetchState = Array; 39 | } 40 | 41 | // Load multiple images in order of precedence and return the first which returns 42 | // a valid result. 43 | export function fetchFirstValidImage$( 44 | imageURLs: URL[] 45 | ): Observable<{ image: IconImage; url: URL }> { 46 | const urlsWithPrecedence = imageURLs.map((url, index) => ({ 47 | url, 48 | precedence: index, 49 | })); 50 | 51 | return of(...urlsWithPrecedence).pipe( 52 | // Fetch all of the image URLs in parallel 53 | mergeMap(({ url, precedence }) => { 54 | return fetchImage$(url).pipe( 55 | map( 56 | (image): BatchImageLoad.ImageResult => ({ type: "image", url, image }) 57 | ), 58 | catchError((error): Observable => { 59 | return of({ type: "error", error: error as Error }); 60 | }), 61 | map((result) => ({ result, precedence })) 62 | ); 63 | }), 64 | 65 | // Scan the results into an array of `imageURLs` length that contains null (no result), 66 | // the returned IconImage value, or the Error returned from loading the image. 67 | scan((acc, { result, precedence }) => { 68 | acc[precedence] = result; 69 | return acc; 70 | }, new Array(imageURLs.length).fill({ type: "loading" }) as BatchImageLoad.FetchState), 71 | 72 | // Find the first successful result 73 | map((results: BatchImageLoad.FetchState) => { 74 | for (const result of results) { 75 | switch (result.type) { 76 | // Ignore leading null values as we want to return the highest-precedence image. 77 | case "loading": 78 | return { type: "loading" }; 79 | case "image": 80 | const { url, image } = result; 81 | return { type: "image", url, image }; 82 | case "error": 83 | break; 84 | } 85 | } 86 | 87 | return { type: "none" }; 88 | }), 89 | 90 | // // Don't emit values while we're loading. 91 | filter( 92 | ( 93 | result 94 | ): result is BatchImageLoad.ImageResult | BatchImageLoad.EmptyResult => 95 | result.type !== "loading" 96 | ), 97 | 98 | // Return the final result. 99 | map((result) => { 100 | switch (result.type) { 101 | case "image": 102 | return { image: result.image, url: result.url }; 103 | case "none": 104 | throw new Error( 105 | `No valid image found from ${JSON.stringify( 106 | urlsWithPrecedence.map(({ url }) => url) 107 | )}` 108 | ); 109 | } 110 | }) 111 | ); 112 | } 113 | 114 | export function fetchImage$(imageURL: URL): Observable { 115 | return fromFetch(imageURL.toString()).pipe( 116 | switchMap((response) => { 117 | const contentType = response.headers.get("content-type"); 118 | const expiry = minimumExpiryDate( 119 | cacheExpiryFromResponse(response) || new Date() 120 | ); 121 | 122 | if (contentType == null || !isImageContentType(contentType)) { 123 | throw new Error(`Invalid content type ${contentType}`); 124 | } 125 | 126 | if (response.body == null) { 127 | throw new Error("Missing response body"); 128 | } 129 | 130 | return read$(response.body, imageByteLimit).pipe( 131 | reduce((acc, chunk) => [...acc, chunk], new Array()), 132 | map((chunks) => ({ 133 | source: imageURL, 134 | blob: new Blob(chunks, { type: contentType }), 135 | expiry, 136 | })) 137 | ); 138 | }), 139 | timeout(fetchTimeout()) 140 | ); 141 | } 142 | 143 | export function fetchHTMLPage$(url: URL) { 144 | return of(url).pipe( 145 | switchMap((url) => 146 | fromFetch(url.toString(), { 147 | headers: { 148 | "User-Agent": 149 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15", 150 | }, 151 | }) 152 | ), 153 | map((response) => { 154 | const contentType = response.headers.get("content-type"); 155 | if (contentType == null || !isHTMLContentType(contentType)) { 156 | throw new Error(`Invalid content type ${contentType}`); 157 | } 158 | 159 | return response; 160 | }), 161 | timeout(fetchTimeout()) 162 | ); 163 | } 164 | 165 | export function fetchManifest$(url: URL) { 166 | return fromFetch(url.toString(), { 167 | headers: { 168 | "User-Agent": 169 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15", 170 | }, 171 | }).pipe(timeout(fetchTimeout())); 172 | } 173 | -------------------------------------------------------------------------------- /src/lib/cache.ts: -------------------------------------------------------------------------------- 1 | import { 2 | DevicePixelRatioParam, 3 | IconMetadata, 4 | IconSource, 5 | Services, 6 | SizeParam, 7 | } from "@/lib/types"; 8 | import { GetObjectCommand } from "@aws-sdk/client-s3"; 9 | import Redis from "ioredis"; 10 | import { blobDigest, sha256 } from "./crypto"; 11 | import { Icon, IconImage } from "./types"; 12 | import { parseBase64DataURL } from "./url"; 13 | 14 | function redisCacheKey(key: CacheKey) { 15 | const { url, size, dpr } = key; 16 | let host = url.hostname; 17 | 18 | const sizeComponent = () => { 19 | switch (size) { 20 | case "favicon": 21 | return "favicon"; 22 | case "32": 23 | return `${32 * dpr}`; 24 | case "64": 25 | return `${64 * dpr}`; 26 | } 27 | }; 28 | 29 | // Use | as a separator because it's not a valid character in a URL. 30 | const contents = [host, sizeComponent()] 31 | .filter((component) => component != null) 32 | .join("|"); 33 | 34 | return sha256(contents); 35 | } 36 | 37 | export async function getCachedImage( 38 | key: CacheKey, 39 | redis: Redis 40 | ): Promise { 41 | if (process.env["RAYCAST_IGNORE_CACHE"] === "true") { 42 | return null; 43 | } 44 | 45 | const metadata = await getMetadata(key, redis); 46 | if (metadata == null) { 47 | return null; 48 | } 49 | 50 | const now = new Date(); 51 | if (metadata.expiry < now) { 52 | const { url } = key; 53 | await removeFromCache(key, redis); 54 | return null; 55 | } 56 | 57 | return metadata; 58 | } 59 | 60 | export async function getMetadata( 61 | key: CacheKey, 62 | redis: Redis 63 | ): Promise { 64 | const redisKey = redisCacheKey(key); 65 | const payload = await redis.hgetall(redisKey); 66 | if (payload == null) { 67 | return null; 68 | } 69 | 70 | try { 71 | return await IconMetadata.parseAsync(payload); 72 | } catch (error) { 73 | await redis.del(redisKey); 74 | return null; 75 | } 76 | } 77 | 78 | export type CacheKey = { 79 | url: URL; 80 | size: SizeParam; 81 | dpr: DevicePixelRatioParam; 82 | }; 83 | 84 | export async function setMetadata( 85 | key: CacheKey, 86 | metadata: IconMetadata, 87 | redis: Redis 88 | ) { 89 | const redisKey = redisCacheKey(key); 90 | await redis.hset(redisKey, metadata); 91 | } 92 | 93 | export async function setMetadataPartial( 94 | key: CacheKey, 95 | partial: Partial, 96 | redis: Redis 97 | ) { 98 | const redisKey = redisCacheKey(key); 99 | await redis.hset(redisKey, partial); 100 | } 101 | 102 | export async function getStoredObject(key: string, services: Services) { 103 | const { s3 } = services; 104 | try { 105 | const bucket = process.env.RAYCAST_S3_BUCKET_NAME; 106 | if (bucket == null) { 107 | return null; 108 | } 109 | const { Body, ContentType, ContentLength } = await s3.send( 110 | new GetObjectCommand({ 111 | Bucket: bucket, 112 | Key: key, 113 | }) 114 | ); 115 | 116 | // Memory leak in S3 SDK if you don't consume the body; workaround based on: 117 | // https://github.com/aws/aws-sdk-js-v3/issues/5570#issuecomment-1977613960 118 | if (Body) { 119 | const _ = await Body.transformToString(); 120 | } 121 | 122 | return { type: ContentType, size: ContentLength }; 123 | } catch (error) { 124 | return null; 125 | } 126 | } 127 | 128 | export async function getOrStoreObject(blob: Blob, services: Services) { 129 | const key = await blobDigest(blob); 130 | 131 | const storedObject = await getStoredObject(key, services); 132 | if (storedObject != null) { 133 | return key; 134 | } 135 | 136 | const { s3Legacy } = services; 137 | try { 138 | let arrayBuffer: ArrayBuffer | null = await blob.arrayBuffer(); 139 | let buffer: Buffer | null = Buffer.from(arrayBuffer); 140 | await new Promise((resolve, reject) => { 141 | const bucket = process.env.RAYCAST_S3_BUCKET_NAME; 142 | if (buffer == null || bucket == null) { 143 | return; 144 | } 145 | s3Legacy.putObject( 146 | { 147 | Bucket: bucket, 148 | Key: key, 149 | ContentType: blob.type, 150 | Body: buffer, 151 | }, 152 | (err, data) => { 153 | arrayBuffer = null; 154 | buffer = null; 155 | if (err) { 156 | reject(err); 157 | } else { 158 | resolve(); 159 | } 160 | } 161 | ); 162 | }); 163 | } catch (error) { 164 | return null; 165 | } 166 | 167 | return key; 168 | } 169 | 170 | export async function cacheFavicon( 171 | key: CacheKey, 172 | icon: Icon, 173 | services: Services 174 | ) { 175 | const { image, source } = icon; 176 | const { expiry } = image; 177 | const cacheResult = await cacheImage(image, source, services); 178 | if (cacheResult == null) { 179 | return; 180 | } 181 | 182 | const { objectKey } = cacheResult; 183 | const { redis } = services; 184 | await setMetadata( 185 | key, 186 | { 187 | objectKey, 188 | expiry, 189 | lastAccess: new Date(), 190 | }, 191 | redis 192 | ); 193 | } 194 | 195 | async function cacheImage( 196 | image: IconImage, 197 | source: IconSource, 198 | services: Services 199 | ): Promise<{ objectKey: string } | null> { 200 | if (source.source === "link" && source.data) { 201 | const parsed = parseBase64DataURL(source.url); 202 | if (parsed == null) { 203 | return null; 204 | } 205 | 206 | const { base64, type } = parsed; 207 | const buffer = Buffer.from(base64, "base64"); 208 | const blob = new Blob([buffer], { type }); 209 | const objectKey = await getOrStoreObject(blob, services); 210 | if (objectKey == null) { 211 | return null; 212 | } 213 | return { objectKey }; 214 | } else { 215 | const { blob } = image; 216 | const objectKey = await getOrStoreObject(blob, services); 217 | if (objectKey == null) { 218 | return null; 219 | } 220 | return { objectKey }; 221 | } 222 | } 223 | 224 | export async function removeFromCache(key: CacheKey, redis: Redis) { 225 | // TODO: remove blob if no remaining references. 226 | const redisKey = redisCacheKey(key); 227 | await redis.del(redisKey); 228 | } 229 | -------------------------------------------------------------------------------- /src/lib/__tests__/metadata.test.ts: -------------------------------------------------------------------------------- 1 | import { mockReadableStream } from "@/lib/mocks"; 2 | import { firstValueFrom } from "rxjs"; 3 | import { metadataFromHTMLPage$ } from "../metadata"; 4 | 5 | describe("Metadata parsing", () => { 6 | test("finds simple link icon", async () => { 7 | const page = ` 8 | 9 | 10 | 11 | 12 | 13 | 14 | `; 15 | 16 | const metadata$ = metadataFromHTMLPage$( 17 | mockReadableStream(page), 18 | new URL("https://example.com") 19 | ); 20 | const { linkIcons } = await firstValueFrom(metadata$); 21 | expect(linkIcons).toEqual([ 22 | { 23 | source: "link", 24 | href: "/favicon.ico", 25 | type: "icon", 26 | url: new URL("https://example.com/favicon.ico"), 27 | }, 28 | ]); 29 | }); 30 | 31 | test("finds multiple link icons", async () => { 32 | const page = ` 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | `; 42 | 43 | const metadata$ = metadataFromHTMLPage$( 44 | mockReadableStream(page), 45 | new URL("https://example.com") 46 | ); 47 | const { linkIcons } = await firstValueFrom(metadata$); 48 | expect(linkIcons).toEqual([ 49 | { 50 | source: "link", 51 | href: "/favicon-16x16.png", 52 | type: "icon", 53 | url: new URL("https://example.com/favicon-16x16.png"), 54 | size: { type: "single", width: 16, height: 16 }, 55 | }, 56 | { 57 | source: "link", 58 | href: "/favicon-32x32.png", 59 | type: "icon", 60 | url: new URL("https://example.com/favicon-32x32.png"), 61 | size: { type: "single", width: 32, height: 32 }, 62 | }, 63 | { 64 | source: "link", 65 | href: "/favicon-64x64.png", 66 | type: "icon", 67 | url: new URL("https://example.com/favicon-64x64.png"), 68 | size: { type: "single", width: 64, height: 64 }, 69 | }, 70 | ]); 71 | }); 72 | 73 | test("finds shortcut icons", async () => { 74 | const page = ` 75 | 76 | 77 | 78 | 79 | 80 | 81 | `; 82 | 83 | const metadata$ = metadataFromHTMLPage$( 84 | mockReadableStream(page), 85 | new URL("https://example.com") 86 | ); 87 | const { linkIcons } = await firstValueFrom(metadata$); 88 | expect(linkIcons).toEqual([ 89 | { 90 | source: "link", 91 | href: "/shortcut.png", 92 | type: "shortcut icon", 93 | url: new URL("https://example.com/shortcut.png"), 94 | }, 95 | ]); 96 | }); 97 | 98 | test("finds apple touch icons", async () => { 99 | const page = ` 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | `; 108 | 109 | const metadata$ = metadataFromHTMLPage$( 110 | mockReadableStream(page), 111 | new URL("https://example.com") 112 | ); 113 | const { linkIcons } = await firstValueFrom(metadata$); 114 | expect(linkIcons).toEqual([ 115 | { 116 | source: "link", 117 | href: "apple-touch-icon.png", 118 | type: "apple-touch-icon", 119 | url: new URL("https://example.com/apple-touch-icon.png"), 120 | }, 121 | { 122 | source: "link", 123 | href: "apple-touch-icon-precomposed.png", 124 | type: "apple-touch-icon-precomposed", 125 | url: new URL("https://example.com/apple-touch-icon-precomposed.png"), 126 | }, 127 | ]); 128 | }); 129 | 130 | test("finds base64-encoded icons", async () => { 131 | const page = ` 132 | 133 | 134 | 135 | 136 | 137 | 138 | `; 139 | 140 | const metadata$ = metadataFromHTMLPage$( 141 | mockReadableStream(page), 142 | new URL("https://example.com") 143 | ); 144 | const { linkIcons } = await firstValueFrom(metadata$); 145 | expect(linkIcons).toEqual([ 146 | { 147 | source: "link", 148 | href: "", 149 | type: "icon", 150 | url: new URL( 151 | "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAABEVBMVEVHcEz///////////////////////////////////////////////////8AGziAjZtgcIKgqrT/JW+/xs3/BFmcC0vf4+b/DV//Dl//GWb/GWf/M3i+B08jGDyJDUh1EEZ1D0Zvf4+gqbSfqbSvuMD/3+r/v9X/oMD/IWz/MXb/U43/gq0gOFH/h7D/aZzv8fL6IWuufZr/GGfti68NGTo0Fj4TGTrwF2OYDEsjFz0MGjnAB1AIGjl+Dkc+FEBME0H/MnitCU2JDUl5D0b3LXLrIGidC0s1FT8zFj6uCk3/UY30SITfNXPlK22KDkn2OntgEUP/QYIVGTpgEkNCFUD/QoKWGVPRO3RfEUNBFT/kS4MXVO28AAAADXRSTlMAn2Cg78+Q3yAQMI9fslpvQQAAAUBJREFUOMuFk2dDwyAQhklCaGhL1IwO7XLvvWfV1r33+P8/REwiHCS270fugTvu3kNIiJqGjRnDtlGkKC2H8KCQ4WhhWmCaCur1HEspBx7JZ8Q5kdfu+6ODUj54g8bxARdqKHoj/k1cX8lNA3GlTpxxPAtgv0ksBjPUj9qbWxIgvAIGgIPXz+ZpuLMrAEyRCYGvl1YjPKntbyyUkmPzL0MEPD82379vHror68ui58gGwGXr/uP6acQbXhLdshEGwEUjvH3j8fKcADBiAKifX90de9VKMC0brgBup3bmVcvBDJiIksLdO9xeqwSzkwykgEVyrS7OT8GR2so3ZQeltdRGpYEiorgnwAdOegGGHHc2EHmK/A8QYLkE8BXbUmDaicirY9nG72f7/ovD6yB6nOgL7FggiomTsd/UtJL1N8HtHzDFNkJWpxq9AAAAAElFTkSuQmCC" 152 | ), 153 | data: true, 154 | size: { 155 | type: "single", 156 | width: 32, 157 | height: 32, 158 | }, 159 | }, 160 | ]); 161 | }); 162 | }); 163 | -------------------------------------------------------------------------------- /src/lib/__tests__/url.test.ts: -------------------------------------------------------------------------------- 1 | import { 2 | baseURLs, 3 | isRelativeURL, 4 | isValidURL, 5 | obfuscateURL, 6 | parseURL, 7 | resolvedURLsFromRelative, 8 | } from "@/lib/url"; 9 | import { describe, expect, test } from "@jest/globals"; 10 | 11 | describe("URL parsing", () => { 12 | test("URL with http is upgraded to https", () => { 13 | expect(parseURL("http://google.com")?.toString()).toBe( 14 | new URL("https://google.com").toString() 15 | ); 16 | }); 17 | 18 | test("URL without protocol is parsed to https", () => { 19 | expect(parseURL("google.com")?.toString()).toBe( 20 | new URL("https://google.com").toString() 21 | ); 22 | }); 23 | }); 24 | 25 | describe("URL obfuscation", () => { 26 | test("simple URLs are not obfuscated", async () => { 27 | const url = new URL("https://google.com"); 28 | expect(await obfuscateURL(url)).toBe("https://google.com/"); 29 | }); 30 | 31 | test("simple components aren't obfuscated", async () => { 32 | const url = new URL("https://google.com/a/b/c"); 33 | expect(await obfuscateURL(url)).toBe("https://google.com/a/b/c"); 34 | }); 35 | 36 | test("longer components are obfuscated", async () => { 37 | const url = new URL("https://google.com/something-sensitive"); 38 | expect(await obfuscateURL(url)).toBe("https://google.com/***"); 39 | }); 40 | }); 41 | 42 | describe("URL validation", () => { 43 | test("URL with http protocol is invalid", () => { 44 | const url = new URL("http://google.com"); 45 | expect(isValidURL(url)).toBe(false); 46 | }); 47 | 48 | test("https://localhost is invalid", () => { 49 | const url = new URL("https://localhost"); 50 | expect(isValidURL(url)).toBe(false); 51 | }); 52 | 53 | test("https://localhost:80 is invalid", () => { 54 | const url = new URL("https://localhost:80"); 55 | expect(isValidURL(url)).toBe(false); 56 | }); 57 | 58 | test("https://127.0.0.1 is invalid", () => { 59 | const url = new URL("https://127.0.0.1"); 60 | expect(isValidURL(url)).toBe(false); 61 | }); 62 | 63 | test("https://127.0.0.1:80 is invalid", () => { 64 | const url = new URL("https://127.0.0.1:80"); 65 | expect(isValidURL(url)).toBe(false); 66 | }); 67 | 68 | test("https://172.16.0.0 is invalid", () => { 69 | const url = new URL("https://172.16.0.0"); 70 | expect(isValidURL(url)).toBe(false); 71 | }); 72 | 73 | test("https://192.168.1.0 is invalid", () => { 74 | const url = new URL("https://192.168.1.0"); 75 | expect(isValidURL(url)).toBe(false); 76 | }); 77 | 78 | test("https://127.0.0.1 is invalid", () => { 79 | const url = new URL("https://127.0.0.1"); 80 | expect(isValidURL(url)).toBe(false); 81 | }); 82 | 83 | test("Non-443 port is invalid", () => { 84 | const url = new URL("https://1.2.3.4:22"); 85 | expect(isValidURL(url)).toBe(false); 86 | }); 87 | 88 | test("URL with port 443 specified is valid", () => { 89 | const url = new URL("https://google.com:443"); 90 | expect(isValidURL(url)).toBe(true); 91 | }); 92 | }); 93 | 94 | describe("relative URLs", () => { 95 | test("absolute URL is not relative", () => { 96 | expect(isRelativeURL("https://google.com")).toBe(false); 97 | }); 98 | 99 | test("scheme-relative URL is not relative", () => { 100 | expect(isRelativeURL("//google.com")).toBe(false); 101 | }); 102 | 103 | test("URL with trailing slash is relative", () => { 104 | expect(isRelativeURL("/image.png")).toBe(true); 105 | }); 106 | 107 | test("URL without trailing slash is relative", () => { 108 | expect(isRelativeURL("image.png")).toBe(true); 109 | }); 110 | 111 | test("resolved relative URL which is actually absolute", () => { 112 | const url = new URL("https://example.com/"); 113 | const urlStrings = resolvedURLsFromRelative( 114 | "https://example2.com/image.png", 115 | url 116 | ).map((url) => url.toString()); 117 | expect(urlStrings).toEqual(["https://example2.com/image.png"]); 118 | }); 119 | 120 | test("resolved relative URL with no preceding slash and top-level base", () => { 121 | const url = new URL("https://example.com/"); 122 | const urlStrings = resolvedURLsFromRelative("image.png", url).map((url) => 123 | url.toString() 124 | ); 125 | expect(urlStrings).toEqual(["https://example.com/image.png"]); 126 | }); 127 | 128 | test("resolved relative URL with no preceding slash and top-level base path", () => { 129 | const url = new URL("https://example.com/123"); 130 | const urlStrings = resolvedURLsFromRelative("image.png", url).map((url) => 131 | url.toString() 132 | ); 133 | expect(urlStrings).toEqual(["https://example.com/image.png"]); 134 | }); 135 | 136 | test("resolved relative URL with no preceding slash and subdirectory-level base", () => { 137 | const url = new URL("https://example.com/path/"); 138 | const urlStrings = resolvedURLsFromRelative("image.png", url).map((url) => 139 | url.toString() 140 | ); 141 | expect(urlStrings).toEqual([ 142 | "https://example.com/path/image.png", 143 | "https://example.com/image.png", 144 | ]); 145 | }); 146 | 147 | test("resolved relative URL with preceding slash and top-level base", () => { 148 | const url = new URL("https://example.com/"); 149 | const urlStrings = resolvedURLsFromRelative("/image.png", url).map((url) => 150 | url.toString() 151 | ); 152 | expect(urlStrings).toEqual(["https://example.com/image.png"]); 153 | }); 154 | 155 | test("resolved relative URL with preceding slash and subdirectory-level base", () => { 156 | const url = new URL("https://example.com/path/"); 157 | const urlStrings = resolvedURLsFromRelative("/image.png", url).map((url) => 158 | url.toString() 159 | ); 160 | expect(urlStrings).toEqual(["https://example.com/image.png"]); 161 | }); 162 | }); 163 | 164 | describe("URL utilities", () => { 165 | test("base URLs for single-level domain", () => { 166 | const url = new URL("https://google.com"); 167 | const urlStrings = baseURLs(url, 3).map((url) => url.toString()); 168 | expect(urlStrings).toEqual(["https://google.com/"]); 169 | }); 170 | 171 | test("base URLs for two-level domain", () => { 172 | const url = new URL("https://docs.google.com"); 173 | const urlStrings = baseURLs(url, 3).map((url) => url.toString()); 174 | expect(urlStrings).toEqual([ 175 | "https://docs.google.com/", 176 | "https://google.com/", 177 | ]); 178 | }); 179 | 180 | test("base URLs for N-level domain", () => { 181 | const url = new URL("https://a.b.example.com"); 182 | const urlStrings = baseURLs(url, 3).map((url) => url.toString()); 183 | expect(urlStrings).toEqual([ 184 | "https://a.b.example.com/", 185 | "https://b.example.com/", 186 | "https://example.com/", 187 | ]); 188 | }); 189 | 190 | test("base URLs are capped with parameter", () => { 191 | const url = new URL("https://a.b.example.com"); 192 | const urlStrings = baseURLs(url, 2).map((url) => url.toString()); 193 | expect(urlStrings).toEqual([ 194 | "https://a.b.example.com/", 195 | "https://b.example.com/", 196 | ]); 197 | }); 198 | }); 199 | -------------------------------------------------------------------------------- /src/lib/__tests__/manifest.test.ts: -------------------------------------------------------------------------------- 1 | import { mockFetch, mockReadableStream } from "@/lib/mocks"; 2 | import { firstValueFrom } from "rxjs"; 3 | import { iconsFromManifest$ } from "../manifest"; 4 | 5 | describe("Manifest parsing", () => { 6 | test("manifest with missing icons array throws error", async () => { 7 | const manifest = `{}`; 8 | 9 | mockFetch(jest, { 10 | "https://example.com/manifest.json": () => ({ 11 | headers: new Headers({ "content-type": "application/json" }), 12 | body: mockReadableStream(manifest), 13 | }), 14 | }); 15 | 16 | const icons$ = iconsFromManifest$( 17 | new URL("https://example.com/manifest.json") 18 | ); 19 | expect(async () => { 20 | await firstValueFrom(icons$); 21 | }).rejects.toThrow(); 22 | }); 23 | 24 | test("manifest with invalid icons key throws error", async () => { 25 | const manifest = `{ 26 | "icons": 5 27 | }`; 28 | 29 | mockFetch(jest, { 30 | "https://example.com/manifest.json": () => ({ 31 | headers: new Headers({ "content-type": "application/json" }), 32 | body: mockReadableStream(manifest), 33 | }), 34 | }); 35 | 36 | const icons$ = iconsFromManifest$( 37 | new URL("https://example.com/manifest.json") 38 | ); 39 | expect(async () => { 40 | await firstValueFrom(icons$); 41 | }).rejects.toThrow(); 42 | }); 43 | 44 | test("manifest with single icon is parsed correctly", async () => { 45 | const manifest = `{ 46 | "icons": [ 47 | { "src": "/android-chrome-192x192.png" } 48 | ] 49 | }`; 50 | 51 | mockFetch(jest, { 52 | "https://example.com/manifest.json": () => ({ 53 | headers: new Headers({ "content-type": "application/json" }), 54 | body: mockReadableStream(manifest), 55 | }), 56 | }); 57 | 58 | const icons$ = iconsFromManifest$( 59 | new URL("https://example.com/manifest.json") 60 | ); 61 | 62 | const icons = await firstValueFrom(icons$); 63 | expect(icons).toEqual([ 64 | { 65 | source: "manifest", 66 | href: "/android-chrome-192x192.png", 67 | url: new URL("https://example.com/android-chrome-192x192.png"), 68 | }, 69 | ]); 70 | }); 71 | 72 | test("manifest with multiple icons are parsed correctly", async () => { 73 | const manifest = `{ 74 | "icons": [ 75 | { "src": "/android-chrome-192x192.png" }, 76 | { "src": "/android-chrome-256x256.png" } 77 | ] 78 | }`; 79 | 80 | mockFetch(jest, { 81 | "https://example.com/manifest.json": () => ({ 82 | headers: new Headers({ "content-type": "application/json" }), 83 | body: mockReadableStream(manifest), 84 | }), 85 | }); 86 | 87 | const icons$ = iconsFromManifest$( 88 | new URL("https://example.com/manifest.json") 89 | ); 90 | 91 | const icons = await firstValueFrom(icons$); 92 | expect(icons).toEqual([ 93 | { 94 | source: "manifest", 95 | href: "/android-chrome-192x192.png", 96 | url: new URL("https://example.com/android-chrome-192x192.png"), 97 | }, 98 | { 99 | source: "manifest", 100 | href: "/android-chrome-256x256.png", 101 | url: new URL("https://example.com/android-chrome-256x256.png"), 102 | }, 103 | ]); 104 | }); 105 | 106 | test("manifest with icons with single sizes are parsed correctly", async () => { 107 | const manifest = `{ 108 | "icons": [ 109 | { "src": "/android-chrome-192x192.png", "sizes": "192x192" }, 110 | { "src": "/android-chrome-256x256.png", "sizes": "256x256" } 111 | ] 112 | }`; 113 | 114 | mockFetch(jest, { 115 | "https://example.com/manifest.json": () => ({ 116 | headers: new Headers({ "content-type": "application/json" }), 117 | body: mockReadableStream(manifest), 118 | }), 119 | }); 120 | 121 | const icons$ = iconsFromManifest$( 122 | new URL("https://example.com/manifest.json") 123 | ); 124 | 125 | const icons = await firstValueFrom(icons$); 126 | expect(icons).toEqual([ 127 | { 128 | source: "manifest", 129 | href: "/android-chrome-192x192.png", 130 | url: new URL("https://example.com/android-chrome-192x192.png"), 131 | size: { 132 | type: "single", 133 | width: 192, 134 | height: 192, 135 | }, 136 | }, 137 | { 138 | source: "manifest", 139 | href: "/android-chrome-256x256.png", 140 | url: new URL("https://example.com/android-chrome-256x256.png"), 141 | size: { 142 | type: "single", 143 | width: 256, 144 | height: 256, 145 | }, 146 | }, 147 | ]); 148 | }); 149 | 150 | test("manifest with icons with multiple sizes are parsed correctly", async () => { 151 | const manifest = `{ 152 | "icons": [ 153 | { "src": "/favicon.ico", "sizes": "16x16 32x32 64x64" }, 154 | { "src": "/android-chrome-256x256.png", "sizes": "256x256" } 155 | ] 156 | }`; 157 | 158 | mockFetch(jest, { 159 | "https://example.com/manifest.json": () => ({ 160 | headers: new Headers({ "content-type": "application/json" }), 161 | body: mockReadableStream(manifest), 162 | }), 163 | }); 164 | 165 | const icons$ = iconsFromManifest$( 166 | new URL("https://example.com/manifest.json") 167 | ); 168 | 169 | const icons = await firstValueFrom(icons$); 170 | expect(icons).toEqual([ 171 | { 172 | source: "manifest", 173 | href: "/favicon.ico", 174 | url: new URL("https://example.com/favicon.ico"), 175 | size: { 176 | type: "multiple", 177 | sizes: [ 178 | { width: 16, height: 16 }, 179 | { width: 32, height: 32 }, 180 | { width: 64, height: 64 }, 181 | ], 182 | }, 183 | }, 184 | { 185 | source: "manifest", 186 | href: "/android-chrome-256x256.png", 187 | url: new URL("https://example.com/android-chrome-256x256.png"), 188 | size: { 189 | type: "single", 190 | width: 256, 191 | height: 256, 192 | }, 193 | }, 194 | ]); 195 | }); 196 | 197 | test("manifest with individual icon type error doesn't affect parsing of other icons", async () => { 198 | // E.g. for invalid 'src' tag in first icon 199 | const manifest = `{ 200 | "icons": [ 201 | { "sr": "/favicon.ico", "sizes": "16x16 32x32 64x64" }, 202 | { "src": "/android-chrome-256x256.png", "sizes": "256x256" } 203 | ] 204 | }`; 205 | 206 | mockFetch(jest, { 207 | "https://example.com/manifest.json": () => ({ 208 | headers: new Headers({ "content-type": "application/json" }), 209 | body: mockReadableStream(manifest), 210 | }), 211 | }); 212 | 213 | const icons$ = iconsFromManifest$( 214 | new URL("https://example.com/manifest.json") 215 | ); 216 | 217 | const icons = await firstValueFrom(icons$); 218 | expect(icons).toEqual([ 219 | { 220 | source: "manifest", 221 | href: "/android-chrome-256x256.png", 222 | url: new URL("https://example.com/android-chrome-256x256.png"), 223 | size: { 224 | type: "single", 225 | width: 256, 226 | height: 256, 227 | }, 228 | }, 229 | ]); 230 | }); 231 | }); 232 | -------------------------------------------------------------------------------- /src/lib/favicon.ts: -------------------------------------------------------------------------------- 1 | import { combineLatest, Observable, of } from "rxjs"; 2 | import { catchError, map, share, switchMap } from "rxjs/operators"; 3 | // Import node's definition since we won't be using this on the client. 4 | import { 5 | DevicePixelRatioParam, 6 | IconSource, 7 | ManifestIconSource, 8 | ReferenceIconSource, 9 | SizeParam, 10 | } from "@/lib/types"; 11 | import { fetchFirstValidImage$, fetchHTMLPage$ } from "./fetch"; 12 | import { iconsFromManifest$ } from "./manifest"; 13 | import { metadataFromHTMLPage$ } from "./metadata"; 14 | import { bestReferencedIcon } from "./rank"; 15 | import { cacheExpiryFromResponse, minimumExpiryDate } from "./response"; 16 | import { isReferencedIcon, isSameReferencedIcon } from "./source"; 17 | import { Icon } from "./types"; 18 | import { 19 | baseURLs, 20 | faviconURL, 21 | isRelativeURL, 22 | makeURLWithoutThrowing, 23 | obfuscateURL, 24 | parseBase64DataURL, 25 | resolvedURLsFromRelative, 26 | } from "./url"; 27 | 28 | export interface IconLoadResult { 29 | icon: Icon | null; 30 | foundIcons: IconSource[]; 31 | } 32 | 33 | // Load /favicon.ico for a given URL. This also loads /favicon.ico for any higher-level 34 | // domains, e.g. for https://docs.google.com it will load https://docs.google.com/favicon.ico 35 | // and https://google.com/favicon.ico. 36 | // 37 | // If favicons exist at multiple subdomains, precedence is given to the deepest subdomain 38 | // (e.g. in the example above, https://docs.google.com/favicon.ico will take precedence over 39 | // https://docs.google.com/favicon.ico) 40 | export function loadFaviconIco$(baseURL: URL): Observable { 41 | const faviconURLs = baseURLs(baseURL, 3).map(faviconURL); 42 | 43 | return of(faviconURLs).pipe( 44 | switchMap((faviconURLs) => fetchFirstValidImage$(faviconURLs)), 45 | map(({ image, url }): IconLoadResult => { 46 | return { 47 | icon: { 48 | image, 49 | source: { source: "favicon.ico", url }, 50 | }, 51 | foundIcons: faviconURLs.map((url) => ({ 52 | source: "favicon.ico", 53 | url, 54 | })), 55 | }; 56 | }), 57 | catchError((error: Error) => { 58 | return of({ icon: null, foundIcons: [] }); 59 | }) 60 | ); 61 | } 62 | 63 | export function loadFaviconFromHTMLPage$( 64 | url: URL, 65 | size: SizeParam, 66 | dpr: DevicePixelRatioParam 67 | ): Observable { 68 | const result$ = of(url).pipe( 69 | switchMap((url) => fetchHTMLPage$(url)), 70 | switchMap((response) => { 71 | return of(response).pipe( 72 | switchMap((response) => { 73 | if (response.body == null) { 74 | throw new Error("Missing response body"); 75 | } 76 | 77 | const pageExpiry = minimumExpiryDate( 78 | cacheExpiryFromResponse(response) || new Date() 79 | ); 80 | 81 | // Use the URL from the response to handle any redirects. 82 | const baseURL = new URL(response.url); 83 | 84 | return combineLatest([ 85 | metadataFromHTMLPage$(response.body, baseURL), 86 | of(baseURL), 87 | of(pageExpiry), 88 | ]); 89 | }), 90 | map(([metadata, baseURL, pageExpiry]) => ({ 91 | metadata, 92 | baseURL, 93 | pageURL: makeURLWithoutThrowing(response.url) || url, 94 | pageExpiry, 95 | })) 96 | ); 97 | }), 98 | share() 99 | ); 100 | 101 | const metadata$ = result$.pipe(map(({ metadata }) => metadata)); 102 | const manifestIcons$ = metadata$.pipe( 103 | switchMap(({ manifestURL }) => { 104 | if (manifestURL == null) { 105 | throw new Error("Missing manifest URL"); 106 | } 107 | 108 | return iconsFromManifest$(manifestURL); 109 | }), 110 | catchError((error: Error) => { 111 | return of([] as ManifestIconSource[]); 112 | }) 113 | ); 114 | 115 | return combineLatest([result$, manifestIcons$]).pipe( 116 | map(([result, manifestIcons]) => { 117 | const { metadata, baseURL, pageURL, pageExpiry } = result; 118 | const { linkIcons } = metadata; 119 | const icon = bestReferencedIcon( 120 | [...linkIcons, ...manifestIcons], 121 | size, 122 | dpr 123 | ); 124 | 125 | if (icon == null) { 126 | throw new Error(`No icon found for page ${obfuscateURL(url)}`); 127 | } else { 128 | return { 129 | icon, 130 | linkIcons, 131 | manifestIcons, 132 | baseURL, 133 | pageURL, 134 | pageExpiry, 135 | }; 136 | } 137 | }), 138 | switchMap( 139 | ({ icon, linkIcons, manifestIcons, baseURL, pageURL, pageExpiry }) => { 140 | if (icon.source === "link" && icon.data) { 141 | const parsed = parseBase64DataURL(icon.url); 142 | if (parsed == null) { 143 | throw new Error("Invalid icon data"); 144 | } 145 | 146 | const { base64, type } = parsed; 147 | const buffer = Buffer.from(base64, "base64"); 148 | const blob = new Blob([buffer], { type }); 149 | 150 | return of({ 151 | icon: { 152 | image: { 153 | source: pageURL, 154 | blob, 155 | expiry: pageExpiry, 156 | }, 157 | source: icon, 158 | }, 159 | foundIcons: [...linkIcons, ...manifestIcons], 160 | }); 161 | } 162 | 163 | const urls = flexibleRelativeIconSourceURLs(icon, baseURL); 164 | return fetchFirstValidImage$(urls).pipe( 165 | map(({ url, image }): IconLoadResult => { 166 | return { 167 | icon: { 168 | image: { 169 | ...image, 170 | expiry: pageExpiry, 171 | }, 172 | source: { 173 | ...icon, 174 | // Ensure we replace `url` here as this may be different to the 175 | // original URL we are given. 176 | url, 177 | }, 178 | }, 179 | // And here 180 | foundIcons: rewriteIconURLs( 181 | [...linkIcons, ...manifestIcons], 182 | icon, 183 | url 184 | ), 185 | }; 186 | }) 187 | ); 188 | } 189 | ), 190 | catchError((error: Error) => { 191 | return of({ icon: null, foundIcons: [] }); 192 | }) 193 | ); 194 | } 195 | 196 | // Gives us potential icon source URLs where sites incorrectly reference URLs relatively 197 | // (e.g. if "favicon.png" is specified where it should be "/favicon.png") 198 | function flexibleRelativeIconSourceURLs(icon: IconSource, baseURL: URL) { 199 | if (isReferencedIcon(icon) && isRelativeURL(icon.href)) { 200 | return resolvedURLsFromRelative(icon.href, baseURL); 201 | } 202 | 203 | return [icon.url]; 204 | } 205 | 206 | // Rewrites the URLs for `icon` in `icons` with `url` 207 | function rewriteIconURLs(icons: IconSource[], icon: IconSource, url: URL) { 208 | return icons.map((source) => { 209 | if (!isReferencedIcon(source) || !isReferencedIcon(icon)) { 210 | return source; 211 | } 212 | 213 | if (isSameReferencedIcon(source, icon)) { 214 | return { 215 | ...source, 216 | url, 217 | }; 218 | } 219 | 220 | return source; 221 | }); 222 | } 223 | 224 | export function smallestIconDimension(source: ReferenceIconSource) { 225 | const { size } = source; 226 | if (size == null) { 227 | return null; 228 | } 229 | 230 | switch (size.type) { 231 | case "any": 232 | return null; 233 | case "single": 234 | const { width, height } = size; 235 | return Math.min(width, height); 236 | case "multiple": 237 | const { sizes } = size; 238 | return sizes.reduce((smallest, { width, height }) => { 239 | const minDimension = Math.min(width, height); 240 | if (smallest == null) { 241 | return minDimension; 242 | } 243 | return Math.min(smallest, minDimension); 244 | }, null as null | number); 245 | } 246 | } 247 | -------------------------------------------------------------------------------- /src/api/favicon.ts: -------------------------------------------------------------------------------- 1 | import { cacheFavicon, getCachedImage, setMetadataPartial } from "@/lib/cache"; 2 | import { APIError, makeInternalError } from "@/lib/error"; 3 | import { 4 | IconLoadResult, 5 | loadFaviconFromHTMLPage$, 6 | loadFaviconIco$, 7 | } from "@/lib/favicon"; 8 | import { bestResult } from "@/lib/rank"; 9 | import { errorResponse, responseHeaders } from "@/lib/response"; 10 | import { 11 | DevicePixelRatioParam, 12 | Icon, 13 | IconMetadata, 14 | IconSource, 15 | Services, 16 | SizeParam, 17 | allSizes, 18 | } from "@/lib/types"; 19 | import { isValidURL, parseURL } from "@/lib/url"; 20 | import { Request, Response } from "express"; 21 | import { 22 | Observable, 23 | combineLatest, 24 | firstValueFrom, 25 | from, 26 | merge, 27 | of, 28 | partition, 29 | } from "rxjs"; 30 | import { catchError, map, share, switchMap, tap } from "rxjs/operators"; 31 | 32 | export async function getFavicon( 33 | req: Request, 34 | res: Response, 35 | services: Services 36 | ) { 37 | const url = new URL(req.url, `http://${req.headers.host}`); 38 | const { redis } = services; 39 | const defer = () => {}; 40 | 41 | const urlParam$ = getURLParam$(url); 42 | const sizeParam$ = getSizeParam$(url); 43 | const dprParam$ = getDevicePixelRatioParam$(url); 44 | const validatedURL$ = urlParam$.pipe(switchMap(parsedAndValidatedURL$)); 45 | const params$ = combineLatest([ 46 | urlParam$, 47 | validatedURL$, 48 | sizeParam$, 49 | dprParam$, 50 | ]).pipe( 51 | map(([urlParam, validatedURL, size, dpr]) => ({ 52 | url: validatedURL, 53 | urlParam, 54 | size, 55 | dpr: dpr || 1, 56 | })) 57 | ); 58 | 59 | const cachedImage$ = params$.pipe( 60 | switchMap(({ url, size, dpr }) => { 61 | const key = { url, size, dpr }; 62 | return from(getCachedImage(key, redis)); 63 | }), 64 | share() 65 | ); 66 | 67 | const [cached$, uncached$] = partition( 68 | cachedImage$, 69 | (cachedImage): cachedImage is IconMetadata => cachedImage != null 70 | ); 71 | 72 | const response$ = merge( 73 | combineLatest([cached$, params$]).pipe( 74 | switchMap(([cachedImage, params]) => 75 | cachedFaviconResponse$(params, cachedImage, services, defer) 76 | ), 77 | tap(({ expiry, objectKey }) => { 78 | const faviconHost = process.env.RAYCAST_FAVICON_HOST; 79 | if (objectKey == null || faviconHost == null) { 80 | throw makeInternalError(); 81 | } else { 82 | res.set(responseHeaders({ expiry })); 83 | res.redirect(`https://${faviconHost}/${objectKey}`); 84 | } 85 | }) 86 | ), 87 | combineLatest([uncached$, params$]).pipe( 88 | switchMap(([_, params]) => 89 | combineLatest([ 90 | of(params), 91 | uncachedFaviconResponse$(params, services, defer), 92 | ]) 93 | ), 94 | tap(async ([{ size, dpr, url }, result]) => { 95 | if (result.found) { 96 | const { blob, expiry } = result; 97 | res.type(blob.type); 98 | const buffer = await blob.arrayBuffer(); 99 | res.set(responseHeaders({ size: blob.size, expiry })); 100 | res.send(Buffer.from(buffer)); 101 | } else { 102 | res.status(404).send("Not found"); 103 | } 104 | }) 105 | ) 106 | ); 107 | 108 | try { 109 | await firstValueFrom(response$); 110 | } catch (error) { 111 | if (error instanceof APIError) { 112 | const { status, json } = errorResponse(error); 113 | res.status(status).json(json); 114 | } else { 115 | const { status, json } = errorResponse(makeInternalError()); 116 | res.status(status).json(json); 117 | } 118 | } 119 | } 120 | 121 | function cachedFaviconResponse$( 122 | params: { url: URL; size: SizeParam; dpr: DevicePixelRatioParam }, 123 | icon: IconMetadata, 124 | services: Services, 125 | defer: (work: Promise) => void 126 | ) { 127 | const { redis } = services; 128 | return combineLatest([of(params), of(icon)]).pipe( 129 | tap(([params, { objectKey }]) => { 130 | defer( 131 | setMetadataPartial( 132 | params, 133 | { 134 | lastAccess: new Date(), 135 | }, 136 | redis 137 | ) 138 | ); 139 | }), 140 | switchMap(([_, cachedImage]) => { 141 | const { objectKey } = cachedImage; 142 | return combineLatest([of(cachedImage), of(objectKey)]); 143 | }), 144 | map(([{ expiry }, objectKey]) => ({ expiry, objectKey })) 145 | ); 146 | } 147 | 148 | function uncachedFaviconResponse$( 149 | params: { url: URL; size: SizeParam; dpr: DevicePixelRatioParam }, 150 | services: Services, 151 | defer: (work: Promise) => void 152 | ): Observable<{ found: true; blob: Blob; expiry: Date } | { found: false }> { 153 | const loadResult$ = of(params).pipe( 154 | switchMap(({ url, size, dpr }) => 155 | loadIconsForValidatedURL$(url, size, dpr) 156 | ), 157 | share() 158 | ); 159 | 160 | const [foundIcon$, notFoundIcon$] = partition( 161 | loadResult$, 162 | (loadResult): loadResult is { icon: Icon; foundIcons: IconSource[] } => 163 | loadResult.icon != null 164 | ); 165 | 166 | return merge( 167 | combineLatest([foundIcon$, of(params)]).pipe( 168 | switchMap(([{ icon, foundIcons }, { url, size, dpr }]) => { 169 | const { image } = icon; 170 | const { blob, expiry } = image; 171 | const key = { url, size, dpr }; 172 | defer(cacheFavicon(key, icon, services)); 173 | return of({ found: true, blob, expiry } as const); 174 | }) 175 | ), 176 | combineLatest([notFoundIcon$, of(params)]).pipe( 177 | switchMap(([_, params]) => { 178 | const { url, size, dpr } = params; 179 | return of({ found: false } as const); 180 | }) 181 | ) 182 | ); 183 | } 184 | 185 | function loadIconsForValidatedURL$( 186 | url: URL, 187 | size: SizeParam, 188 | dpr: DevicePixelRatioParam 189 | ): Observable { 190 | const results$ = of({ url, size }).pipe( 191 | switchMap(({ url, size }) => 192 | combineLatest([ 193 | loadFaviconIco$(url), 194 | loadFaviconFromHTMLPage$(url, size, dpr), 195 | ]) 196 | ) 197 | ); 198 | 199 | return combineLatest([of(url), results$]).pipe( 200 | map(([url, [favicon, page]]) => 201 | bestResult(url, { favicon: favicon, page: page }) 202 | ) 203 | ); 204 | } 205 | 206 | export function getURLParam$(url: URL) { 207 | return of(url).pipe( 208 | map((urlString) => new URL(urlString)), 209 | catchError(() => { 210 | throw makeInternalError(); 211 | }), 212 | map((url) => { 213 | const urlParam = url.searchParams.get("url"); 214 | if (urlParam == null) { 215 | throw new APIError(400, "missing_url", "Missing 'url' query parameter"); 216 | } 217 | return urlParam; 218 | }) 219 | ); 220 | } 221 | 222 | export function getSizeParam$(url: URL) { 223 | return of(url).pipe( 224 | map((urlString) => new URL(urlString)), 225 | catchError(() => { 226 | throw makeInternalError(); 227 | }), 228 | map((url): SizeParam => { 229 | const sizeParam = url.searchParams.get("size"); 230 | if (sizeParam == null) { 231 | return "favicon"; 232 | } 233 | 234 | return SizeParam.parse(sizeParam); 235 | }), 236 | catchError(() => { 237 | throw new APIError( 238 | 400, 239 | "invalid_size", 240 | `Invalid 'size' query parameter. Valid sizes are ${allSizes.join(", ")}` 241 | ); 242 | }) 243 | ); 244 | } 245 | 246 | export function getDevicePixelRatioParam$(url: URL) { 247 | return of(url).pipe( 248 | map((urlString) => new URL(urlString)), 249 | map((url) => { 250 | const param = url.searchParams.get("dpr"); 251 | if (param == null) { 252 | return null; 253 | } 254 | 255 | const dpr = parseFloat(param); 256 | if (isNaN(dpr)) { 257 | throw new APIError( 258 | 400, 259 | "invalid_dpr", 260 | `Invalid 'dpr' query parameter. This should be a number` 261 | ); 262 | } 263 | 264 | // Round to the closest integer ratio. 265 | const rounded = Math.round(dpr); 266 | return DevicePixelRatioParam.parse(Math.min(Math.max(rounded, 1), 3)); 267 | }), 268 | catchError(() => { 269 | throw makeInternalError(); 270 | }) 271 | ); 272 | } 273 | 274 | export function parsedAndValidatedURL$(urlString: string) { 275 | const validatedURL = (url: URL) => { 276 | const isValid = isValidURL(url); 277 | if (!isValid) { 278 | throw new APIError(400, "invalid_url", "Invalid 'url' query parameter"); 279 | } 280 | return url; 281 | }; 282 | 283 | return of(urlString).pipe( 284 | map(parseURL), 285 | map(validatedURL), 286 | catchError(() => { 287 | throw new APIError(400, "invalid_url", "Invalid 'url' query parameter"); 288 | }) 289 | ); 290 | } 291 | -------------------------------------------------------------------------------- /src/lib/__tests__/favicon.test.ts: -------------------------------------------------------------------------------- 1 | import { mockFetch, mockReadableStream, testImageData } from "@/lib/mocks"; 2 | import { firstValueFrom } from "rxjs"; 3 | import { loadFaviconFromHTMLPage$, loadFaviconIco$ } from "../favicon"; 4 | 5 | describe("Loading favicons from favicon.ico", () => { 6 | test("successful favicon.ico fetch returns result", async () => { 7 | mockFetch(jest, { 8 | "https://example.com/favicon.ico": () => ({ 9 | headers: new Headers({ "content-type": "image/x-icon" }), 10 | body: mockReadableStream(testImageData), 11 | }), 12 | }); 13 | 14 | const image$ = loadFaviconIco$(new URL("https://example.com")); 15 | const result = await firstValueFrom(image$); 16 | 17 | expect(result.icon).toBeTruthy(); 18 | expect(result.icon!.image.source.toString()).toEqual( 19 | "https://example.com/favicon.ico" 20 | ); 21 | expect(result.icon!.image.blob.size).toEqual(5); 22 | expect(result.icon!.image.blob.type).toEqual("image/x-icon"); 23 | }); 24 | 25 | test("invalid favicon.ico content type returns null", async () => { 26 | mockFetch(jest, { 27 | "https://example.com/favicon.ico": () => ({ 28 | headers: new Headers({ "content-type": "text/html" }), 29 | body: mockReadableStream(testImageData), 30 | }), 31 | }); 32 | 33 | const image$ = loadFaviconIco$(new URL("https://example.com")); 34 | const result = await firstValueFrom(image$); 35 | 36 | expect(result.icon).toBe(null); 37 | }); 38 | 39 | test("favicon.ico fetch uses higher-level subdomain on failure", async () => { 40 | mockFetch(jest, { 41 | "https://google.com/favicon.ico": () => ({ 42 | headers: new Headers({ "content-type": "image/x-icon" }), 43 | body: mockReadableStream(testImageData), 44 | }), 45 | }); 46 | 47 | const image$ = loadFaviconIco$(new URL("https://docs.google.com")); 48 | const result = await firstValueFrom(image$); 49 | 50 | expect(result.icon).toBeTruthy(); 51 | expect(result.icon!.image.source.toString()).toEqual( 52 | "https://google.com/favicon.ico" 53 | ); 54 | expect(result.icon!.image.blob.size).toEqual(5); 55 | expect(result.icon!.image.blob.type).toEqual("image/x-icon"); 56 | }); 57 | 58 | test("favicon.ico fetch from multiple subdomains prefers outermost host", async () => { 59 | mockFetch(jest, { 60 | "https://docs.google.com/favicon.ico": () => ({ 61 | headers: new Headers({ "content-type": "image/x-icon" }), 62 | body: mockReadableStream(testImageData), 63 | }), 64 | "https://google.com/favicon.ico": () => ({ 65 | headers: new Headers({ "content-type": "image/x-icon" }), 66 | body: mockReadableStream(testImageData), 67 | }), 68 | }); 69 | 70 | const image$ = loadFaviconIco$(new URL("https://docs.google.com")); 71 | const result = await firstValueFrom(image$); 72 | 73 | expect(result.icon).toBeTruthy(); 74 | expect(result.icon!.image.source.toString()).toEqual( 75 | "https://docs.google.com/favicon.ico" 76 | ); 77 | expect(result.icon!.image.blob.size).toEqual(5); 78 | expect(result.icon!.image.blob.type).toEqual("image/x-icon"); 79 | }); 80 | 81 | test("favicon.ico fetch only fetches up to 3 subdomains deep", async () => { 82 | const mockResponse = { 83 | headers: new Headers({ "content-type": "image/x-icon" }), 84 | body: mockReadableStream(testImageData), 85 | }; 86 | 87 | mockFetch(jest, { 88 | "https://d.com/favicon.ico": () => mockResponse, 89 | }); 90 | 91 | const image$ = loadFaviconIco$(new URL("https://a.b.c.d.com")); 92 | const result = await firstValueFrom(image$); 93 | 94 | expect(result.icon).toBeFalsy(); 95 | }); 96 | }); 97 | 98 | describe("Loading favicons from HTML page", () => { 99 | test("finds single icon", async () => { 100 | const page = ` 101 | 102 | 103 | 104 | 105 | 106 | 107 | `; 108 | 109 | mockFetch(jest, { 110 | "https://example.com/": () => ({ 111 | headers: new Headers({ "content-type": "text/html" }), 112 | body: mockReadableStream(page), 113 | url: "https://example.com/", 114 | }), 115 | "https://example.com/favicon.ico": () => ({ 116 | headers: new Headers({ "content-type": "image/x-icon" }), 117 | body: mockReadableStream(testImageData), 118 | }), 119 | }); 120 | 121 | const image$ = loadFaviconFromHTMLPage$( 122 | new URL("https://example.com"), 123 | "favicon", 124 | 1 125 | ); 126 | const result = await firstValueFrom(image$); 127 | 128 | expect(result.icon).toBeTruthy(); 129 | expect(result.icon!.image.source).toEqual( 130 | new URL("https://example.com/favicon.ico") 131 | ); 132 | expect(result.icon!.image.blob.size).toEqual(5); 133 | expect(result.icon!.source).toEqual({ 134 | source: "link", 135 | type: "icon", 136 | href: "/favicon.ico", 137 | url: new URL("https://example.com/favicon.ico"), 138 | }); 139 | expect(result.foundIcons).toEqual([ 140 | { 141 | source: "link", 142 | type: "icon", 143 | href: "/favicon.ico", 144 | url: new URL("https://example.com/favicon.ico"), 145 | }, 146 | ]); 147 | }); 148 | 149 | test("finds multiple icon", async () => { 150 | const page = ` 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | `; 159 | 160 | mockFetch(jest, { 161 | "https://example.com/": () => ({ 162 | headers: new Headers({ "content-type": "text/html" }), 163 | body: mockReadableStream(page), 164 | url: "https://example.com/", 165 | }), 166 | "https://example.com/favicon.ico": () => ({ 167 | headers: new Headers({ "content-type": "image/x-icon" }), 168 | body: mockReadableStream(testImageData), 169 | }), 170 | }); 171 | 172 | const image$ = loadFaviconFromHTMLPage$( 173 | new URL("https://example.com"), 174 | "favicon", 175 | 1 176 | ); 177 | const result = await firstValueFrom(image$); 178 | 179 | expect(result.icon).toBeTruthy(); 180 | expect(result.icon!.image.source).toEqual( 181 | new URL("https://example.com/favicon.ico") 182 | ); 183 | expect(result.icon!.image.blob.size).toEqual(5); 184 | expect(result.icon!.source).toEqual({ 185 | source: "link", 186 | type: "icon", 187 | href: "/favicon.ico", 188 | url: new URL("https://example.com/favicon.ico"), 189 | }); 190 | expect(result.foundIcons).toEqual([ 191 | { 192 | source: "link", 193 | type: "icon", 194 | href: "/favicon.ico", 195 | url: new URL("https://example.com/favicon.ico"), 196 | }, 197 | { 198 | source: "link", 199 | type: "shortcut icon", 200 | href: "/favicon.ico", 201 | url: new URL("https://example.com/favicon.ico"), 202 | }, 203 | ]); 204 | }); 205 | 206 | // Test case for https://www.thesaurus.com/browse/Word which references relative URL incorrectly. 207 | test("uses flexible relative URLs for icons referenced in HTML", async () => { 208 | const page = ` 209 | 210 | 211 | 212 | 213 | 214 | 215 | `; 216 | 217 | mockFetch(jest, { 218 | "https://example.com/some/path": () => ({ 219 | headers: new Headers({ "content-type": "text/html" }), 220 | body: mockReadableStream(page), 221 | url: "https://example.com/some/path", 222 | }), 223 | "https://example.com/favicon.ico": () => ({ 224 | headers: new Headers({ "content-type": "image/x-icon" }), 225 | body: mockReadableStream(testImageData), 226 | }), 227 | }); 228 | 229 | const image$ = loadFaviconFromHTMLPage$( 230 | new URL("https://example.com/some/path"), 231 | "favicon", 232 | 1 233 | ); 234 | const result = await firstValueFrom(image$); 235 | 236 | expect(result.icon).toBeTruthy(); 237 | expect(result.icon!.image.source).toEqual( 238 | new URL("https://example.com/favicon.ico") 239 | ); 240 | expect(result.icon!.image.blob.size).toEqual(5); 241 | expect(result.icon!.source).toEqual({ 242 | href: "favicon.ico", 243 | source: "link", 244 | type: "icon", 245 | url: new URL("https://example.com/favicon.ico"), 246 | }); 247 | expect(result.foundIcons).toEqual([ 248 | { 249 | source: "link", 250 | type: "icon", 251 | href: "favicon.ico", 252 | url: new URL("https://example.com/favicon.ico"), 253 | }, 254 | ]); 255 | }); 256 | 257 | // Test case for https://www.bt.com/sport/watch/live-now/bt-sport-1 which redirects. 258 | test("uses correct base URL when HTML page redirects", async () => { 259 | const page = ` 260 | 261 | 262 | 263 | 264 | 265 | 266 | `; 267 | 268 | mockFetch(jest, { 269 | "https://example.com/": () => ({ 270 | headers: new Headers({ "content-type": "text/html" }), 271 | body: mockReadableStream(page), 272 | url: "https://redirected.example.com/", 273 | }), 274 | "https://redirected.example.com/favicon.ico": () => ({ 275 | headers: new Headers({ "content-type": "image/x-icon" }), 276 | body: mockReadableStream(testImageData), 277 | }), 278 | }); 279 | 280 | const image$ = loadFaviconFromHTMLPage$( 281 | new URL("https://example.com"), 282 | "favicon", 283 | 1 284 | ); 285 | const result = await firstValueFrom(image$); 286 | 287 | expect(result.icon).toBeTruthy(); 288 | expect(result.icon!.image.source).toEqual( 289 | new URL("https://redirected.example.com/favicon.ico") 290 | ); 291 | expect(result.icon!.image.blob.size).toEqual(5); 292 | expect(result.icon!.source).toEqual({ 293 | source: "link", 294 | type: "icon", 295 | href: "/favicon.ico", 296 | url: new URL("https://redirected.example.com/favicon.ico"), 297 | }); 298 | expect(result.foundIcons).toEqual([ 299 | { 300 | source: "link", 301 | type: "icon", 302 | href: "/favicon.ico", 303 | url: new URL("https://redirected.example.com/favicon.ico"), 304 | }, 305 | ]); 306 | }); 307 | 308 | test("uses base64-encoded image referenced in page", async () => { 309 | const page = ` 310 | 311 | 312 | 313 | 314 | 315 | 316 | `; 317 | 318 | mockFetch(jest, { 319 | "https://example.com/": () => ({ 320 | headers: new Headers({ "content-type": "text/html" }), 321 | body: mockReadableStream(page), 322 | url: "https://example.com/", 323 | }), 324 | }); 325 | 326 | const image$ = loadFaviconFromHTMLPage$( 327 | new URL("https://example.com"), 328 | "32", 329 | 1 330 | ); 331 | const result = await firstValueFrom(image$); 332 | 333 | expect(result.icon).toBeTruthy(); 334 | expect(result.icon!.image.source).toEqual(new URL("https://example.com")); 335 | expect(result.icon!.image.blob.size).toEqual(687); 336 | expect(result.icon!.source).toEqual({ 337 | source: "link", 338 | type: "icon", 339 | href: "", 340 | data: true, 341 | url: new URL( 342 | "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAABEVBMVEVHcEz///////////////////////////////////////////////////8AGziAjZtgcIKgqrT/JW+/xs3/BFmcC0vf4+b/DV//Dl//GWb/GWf/M3i+B08jGDyJDUh1EEZ1D0Zvf4+gqbSfqbSvuMD/3+r/v9X/oMD/IWz/MXb/U43/gq0gOFH/h7D/aZzv8fL6IWuufZr/GGfti68NGTo0Fj4TGTrwF2OYDEsjFz0MGjnAB1AIGjl+Dkc+FEBME0H/MnitCU2JDUl5D0b3LXLrIGidC0s1FT8zFj6uCk3/UY30SITfNXPlK22KDkn2OntgEUP/QYIVGTpgEkNCFUD/QoKWGVPRO3RfEUNBFT/kS4MXVO28AAAADXRSTlMAn2Cg78+Q3yAQMI9fslpvQQAAAUBJREFUOMuFk2dDwyAQhklCaGhL1IwO7XLvvWfV1r33+P8/REwiHCS270fugTvu3kNIiJqGjRnDtlGkKC2H8KCQ4WhhWmCaCur1HEspBx7JZ8Q5kdfu+6ODUj54g8bxARdqKHoj/k1cX8lNA3GlTpxxPAtgv0ksBjPUj9qbWxIgvAIGgIPXz+ZpuLMrAEyRCYGvl1YjPKntbyyUkmPzL0MEPD82379vHror68ui58gGwGXr/uP6acQbXhLdshEGwEUjvH3j8fKcADBiAKifX90de9VKMC0brgBup3bmVcvBDJiIksLdO9xeqwSzkwykgEVyrS7OT8GR2so3ZQeltdRGpYEiorgnwAdOegGGHHc2EHmK/A8QYLkE8BXbUmDaicirY9nG72f7/ovD6yB6nOgL7FggiomTsd/UtJL1N8HtHzDFNkJWpxq9AAAAAElFTkSuQmCC" 343 | ), 344 | size: { 345 | type: "single", 346 | width: 32, 347 | height: 32, 348 | }, 349 | }); 350 | expect(result.foundIcons).toEqual([ 351 | { 352 | source: "link", 353 | type: "icon", 354 | href: "", 355 | data: true, 356 | url: new URL( 357 | "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAABEVBMVEVHcEz///////////////////////////////////////////////////8AGziAjZtgcIKgqrT/JW+/xs3/BFmcC0vf4+b/DV//Dl//GWb/GWf/M3i+B08jGDyJDUh1EEZ1D0Zvf4+gqbSfqbSvuMD/3+r/v9X/oMD/IWz/MXb/U43/gq0gOFH/h7D/aZzv8fL6IWuufZr/GGfti68NGTo0Fj4TGTrwF2OYDEsjFz0MGjnAB1AIGjl+Dkc+FEBME0H/MnitCU2JDUl5D0b3LXLrIGidC0s1FT8zFj6uCk3/UY30SITfNXPlK22KDkn2OntgEUP/QYIVGTpgEkNCFUD/QoKWGVPRO3RfEUNBFT/kS4MXVO28AAAADXRSTlMAn2Cg78+Q3yAQMI9fslpvQQAAAUBJREFUOMuFk2dDwyAQhklCaGhL1IwO7XLvvWfV1r33+P8/REwiHCS270fugTvu3kNIiJqGjRnDtlGkKC2H8KCQ4WhhWmCaCur1HEspBx7JZ8Q5kdfu+6ODUj54g8bxARdqKHoj/k1cX8lNA3GlTpxxPAtgv0ksBjPUj9qbWxIgvAIGgIPXz+ZpuLMrAEyRCYGvl1YjPKntbyyUkmPzL0MEPD82379vHror68ui58gGwGXr/uP6acQbXhLdshEGwEUjvH3j8fKcADBiAKifX90de9VKMC0brgBup3bmVcvBDJiIksLdO9xeqwSzkwykgEVyrS7OT8GR2so3ZQeltdRGpYEiorgnwAdOegGGHHc2EHmK/A8QYLkE8BXbUmDaicirY9nG72f7/ovD6yB6nOgL7FggiomTsd/UtJL1N8HtHzDFNkJWpxq9AAAAAElFTkSuQmCC" 358 | ), 359 | size: { 360 | type: "single", 361 | width: 32, 362 | height: 32, 363 | }, 364 | }, 365 | ]); 366 | }); 367 | }); 368 | --------------------------------------------------------------------------------