├── .dockerignore
├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── .pre-commit-config.yaml
├── Dockerfile
├── Makefile
├── README.md
├── docker-compose.yml
├── otel-collector
    ├── Dockerfile
    └── otel-collector-config.yaml
├── pyproject.toml
├── render.yaml
├── src
    ├── __init__.py
    ├── __main__.py
    ├── common
    │   ├── __init__.py
    │   ├── db
    │   │   ├── __init__.py
    │   │   ├── github.py
    │   │   └── slack.py
    │   └── embeddings.py
    ├── tiling
    │   ├── __init__.py
    │   └── build_map.py
    ├── webui
    │   ├── __init__.py
    │   ├── llm.py
    │   ├── main.py
    │   ├── settings.py
    │   ├── shared.py
    │   ├── slack.py
    │   └── web_hooks.py
    └── worker
    │   ├── __init__.py
    │   ├── docs_embeddings.py
    │   ├── github_similar_content.py
    │   └── settings.py
└── uv.lock


/.dockerignore:
--------------------------------------------------------------------------------
 1 | .git
 2 | dist
 3 | .venv
 4 | .logfire
 5 | .github
 6 | 
 7 | **/node_modules/
 8 | **/__pycache__/
 9 | scratch
10 | 
11 | **/target/
12 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     tags:
 8 |       - '**'
 9 |   pull_request: {}
10 | 
11 | jobs:
12 |   lint:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v4
16 | 
17 |       - name: Install uv
18 |         uses: astral-sh/setup-uv@v3
19 |         with:
20 |           version: "0.4.30"
21 |           enable-cache: true
22 | 
23 |       - name: Install dependencies
24 |         run: uv sync --python 3.12 --frozen
25 | 
26 |       - run: make lint
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # python generated files
 2 | __pycache__/
 3 | *.py[oc]
 4 | build/
 5 | dist/
 6 | wheels/
 7 | *.egg-info
 8 | 
 9 | # venv
10 | .venv
11 | 
12 | # misc space for stuff not in VCS
13 | /scratch/
14 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |   rev: v4.0.1
 4 |   hooks:
 5 |   - id: no-commit-to-branch
 6 |   - id: check-yaml
 7 |   - id: check-toml
 8 |   - id: end-of-file-fixer
 9 |   - id: trailing-whitespace
10 |   - id: check-added-large-files
11 | 
12 | - repo: local
13 |   hooks:
14 |   - id: format
15 |     name: Format
16 |     entry: make format
17 |     types: [python]
18 |     language: system
19 |     pass_filenames: false
20 |   - id: lint
21 |     name: Lint
22 |     entry: make lint
23 |     types: [python]
24 |     language: system
25 |     pass_filenames: false
26 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.12-alpine AS build
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # required for logfire[system-metrics] which in turn requires psutlils
 6 | RUN apk add --no-cache gcc musl-dev linux-headers && rm -rf /var/cache/apk/*
 7 | 
 8 | RUN pip install uv
 9 | 
10 | COPY pyproject.toml uv.lock ./
11 | 
12 | ENV UV_COMPILE_BYTECODE=1
13 | 
14 | RUN uv sync --locked --no-install-project --no-dev
15 | 
16 | COPY ./src /app/src
17 | 
18 | ARG LOGFIRE_TOKEN
19 | ENV LOGFIRE_TOKEN=$LOGFIRE_TOKEN
20 | 
21 | FROM python:3.12-alpine AS main
22 | 
23 | COPY --from=build --chown=app:app /app /app
24 | WORKDIR /app
25 | ENV PATH="/app/.venv/bin:$PATH"
26 | 
27 | CMD ["python", "-m", "src"]
28 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .DEFAULT_GOAL := all
 2 | 
 3 | .PHONY: install
 4 | install:
 5 | 	uv sync --frozen
 6 | 	pre-commit install
 7 | 
 8 | .PHONY: build-docker
 9 | build-docker:
10 | 	docker compose build
11 | 
12 | .PHONY: up
13 | up:
14 | 	docker compose up --build
15 | 
16 | .PHONY: format
17 | format:
18 | 	uv run ruff check --fix-only src
19 | 	uv run ruff format src
20 | 
21 | .PHONY: lint
22 | lint:
23 | 	uv run ruff check src
24 | 	uv run ruff format --check src
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # logfire-demo
 2 | 
 3 | This project demonstrates [Pydantic Logfire](https://pydantic.dev/logfire).
 4 | 
 5 | It's designed to be a simple app with enough functionality to show some of the things Logfire can do.
 6 | 
 7 | ## Running the demo
 8 | 
 9 | 1. Follow [these](https://docs.pydantic.dev/logfire/guides/first_steps/) instructions to get setup with logfire, you'll want to export your logfire write token as `LOGFIRE_TOKEN` so it can be used by docker compose.
10 | 2. Create a GitHub app and set the `GITHUB_APP_ID`, `GITHUB_APP_INSTALLATION_ID` and `GITHUB_APP_PRIVATE_KEY` environment variables. (this is used for the "GitHub similar issues suggestion" in demo).
11 | 3. Create an OpenAI token and set the `OPENAI_API_KEY` environment variable (this is used for the "LLM Query" demo)
12 | 4. Run `make up`.
13 | 
14 | Now you can go to the [Logfire demo page](http://localhost:8000/) and try the app.
15 | 
16 | You can find your project `Dashboard` link at the end of the page. Click on the dashboard link
17 | to see the live logs from the demo project.
18 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   webui:
 3 |     build: .
 4 |     platform: linux/amd64
 5 |     ports:
 6 |       - 8000:8000
 7 |     environment:
 8 |         SERVICE: webui
 9 |         PG_DSN: postgres://postgres:postgres@postgres/logfire_demo
10 |         REDIS_DSN: redis://redis:6379/0
11 |         TILING_SERVER: "http://tiling:8000"
12 |         # forward the openapi key from the host environment
13 |         OPENAI_API_KEY: $OPENAI_API_KEY
14 |         LOGFIRE_TOKEN: $LOGFIRE_TOKEN
15 |         LOGFIRE_BASE_URL: $LOGFIRE_BASE_URL
16 |         GITHUB_WEBHOOK_SECRET: $GITHUB_WEBHOOK_SECRET
17 |         SLACK_SIGNING_SECRET: $SLACK_SIGNING_SECRET
18 |         SLACK_CHANNEL: ${SLACK_CHANNEL:-{}}
19 |     healthcheck:
20 |       test: python -c "import urllib.request as r; assert r.urlopen('http://localhost:8000/health').status == 200"
21 |     depends_on:
22 |       - postgres
23 |       - redis
24 | 
25 |   tiling:
26 |     build: .
27 |     platform: linux/amd64
28 |     environment:
29 |         SERVICE: tiling
30 |         LOGFIRE_TOKEN: $LOGFIRE_TOKEN
31 |         LOGFIRE_BASE_URL: $LOGFIRE_BASE_URL
32 |     healthcheck:
33 |       test: python -c "import urllib.request as r; assert r.urlopen('http://localhost:8000/health').status == 200"
34 | 
35 |   worker:
36 |     build: .
37 |     platform: linux/amd64
38 |     environment:
39 |         SERVICE: worker
40 |         PG_DSN: postgres://postgres:postgres@postgres/logfire_demo
41 |         REDIS_DSN: redis://redis:6379/0
42 |         OPENAI_API_KEY: $OPENAI_API_KEY
43 |         LOGFIRE_TOKEN: $LOGFIRE_TOKEN
44 |         LOGFIRE_BASE_URL: $LOGFIRE_BASE_URL
45 |         GITHUB_APP_ID: ${GITHUB_APP_ID:-0}
46 |         GITHUB_APP_INSTALLATION_ID: ${GITHUB_APP_INSTALLATION_ID:-0}
47 |         GITHUB_APP_PRIVATE_KEY: $GITHUB_APP_PRIVATE_KEY
48 |         VECTOR_DISTANCE_THRESHOLD: 0.4
49 |         AI_SIMILARITY_THRESHOLD: 85
50 |     healthcheck:
51 |       test: arq src.worker.WorkerSettings --check
52 | 
53 |   postgres:
54 |     image: ankane/pgvector:latest
55 |     container_name: logfire-demo-postgres
56 |     environment:
57 |       POSTGRES_USER: postgres
58 |       POSTGRES_PASSWORD: postgres
59 |       POSTGRES_DB: logfire_demo
60 |     ports:
61 |       # to connect: `pgcli postgres://postgres:postgres@localhost:54320/logfire_demo`
62 |       - 54320:5432
63 |     volumes:
64 |       - postgres-data:/var/lib/postgresql/data
65 |     restart: unless-stopped
66 |     healthcheck:
67 |       test: ["CMD-SHELL", "pg_isready -U postgres -d postgres"]
68 | 
69 |   redis:
70 |     image: redis:latest
71 |     container_name: logfire-demo-redis
72 |     volumes:
73 |       - redis-data:/data
74 |     ports:
75 |       - "63790:6379"
76 |     healthcheck:
77 |       test: ["CMD", "redis-cli", "ping"]
78 | 
79 | volumes:
80 |   postgres-data:
81 |   redis-data:
82 | 
83 | networks:
84 |   default:
85 |     name: logfire-demo-dev
86 | 


--------------------------------------------------------------------------------
/otel-collector/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM otel/opentelemetry-collector-contrib:0.114.0
2 | 
3 | # Copy configuration and entry script
4 | COPY otel-collector-config.yaml /etc/otel-collector-config.yaml
5 | 
6 | CMD [ "--config=/etc/otel-collector-config.yaml" ]
7 | 


--------------------------------------------------------------------------------
/otel-collector/otel-collector-config.yaml:
--------------------------------------------------------------------------------
 1 | receivers:
 2 |   otlp:
 3 |     protocols:
 4 |       http:
 5 |         endpoint: "0.0.0.0:4318"
 6 | 
 7 | exporters:
 8 |   debug:
 9 |   otlphttp/us:
10 |     endpoint: https://api-us.pydantic.dev
11 |     compression: none
12 |     tls:
13 |       insecure: true
14 |     headers:
15 |       Authorization: ${env:US_WRITE_TOKEN}
16 |   otlphttp/eu:
17 |     endpoint: https://api-eu.pydantic.dev
18 |     compression: none
19 |     tls:
20 |       insecure: true
21 |     headers:
22 |       Authorization: ${env:EU_WRITE_TOKEN}
23 | processors:
24 |   batch:
25 |     timeout: 1s
26 |     send_batch_size: 32768
27 | 
28 | extensions:
29 |   health_check:
30 |     endpoint: "0.0.0.0:13133"
31 | 
32 | service:
33 |   extensions: [health_check]
34 |   pipelines:
35 |     traces:
36 |       receivers: [otlp]
37 |       processors: [batch]
38 |       exporters: [debug, otlphttp/us, otlphttp/eu]
39 |     metrics:
40 |       receivers: [otlp]
41 |       processors: [batch]
42 |       exporters: [debug, otlphttp/us, otlphttp/eu]
43 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "logfire-demo"
 3 | version = "0.0.0"
 4 | description = "Demonstrate what Pydantic Logfire can do."
 5 | authors = [
 6 |     { name = "Samuel Colvin", email = "s@muelcolvin.com" }
 7 | ]
 8 | requires-python = ">= 3.12"
 9 | dependencies = [
10 |     "arq>=0.26.1",
11 |     "asyncpg>=0.29.0",
12 |     "cryptography>=44.0.1",
13 |     "fastapi>=0.115.0",
14 |     "fastui>=0.7.0",
15 |     "httpx>=0.27.2",
16 |     "logfire[asyncpg,fastapi,httpx,system-metrics]>=3.6.2",
17 |     "openai>=1.47.1",
18 |     "pillow>=10.4.0",
19 |     "pydantic>=2.9.2",
20 |     "pydantic-ai>=0.0.35",
21 |     "pydantic-settings>=2.5.2",
22 |     "pyjwt>=2.10.1",
23 |     "python-multipart>=0.0.10",
24 |     "tiktoken>=0.7.0",
25 |     "uvicorn[standard]>=0.30.6",
26 | ]
27 | 
28 | [tool.uv]
29 | dev-dependencies = [
30 |     "asyncpg-stubs>=0.29.1",
31 |     "devtools>=0.12.2",
32 |     "ipython>=8.27.0",
33 |     "pyright>=1.1.382",
34 |     "ruff>=0.6.7",
35 |     "watchfiles>=0.24.0",
36 | ]
37 | 
38 | [tool.ruff]
39 | line-length = 120
40 | target-version = "py312"
41 | lint.extend-select = ["Q", "RUF100", "C90", "UP", "I"]
42 | lint.flake8-quotes = {inline-quotes = "single", multiline-quotes = "double"}
43 | lint.mccabe = { max-complexity = 14 }
44 | format.quote-style = "single"
45 | 


--------------------------------------------------------------------------------
/render.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   - type: web
 3 |     name: webui
 4 |     runtime: docker
 5 |     rootDir: .
 6 |     dockerfilePath: ./Dockerfile
 7 |     envVars:
 8 |       - key: SERVICE
 9 |         value: webui
10 |       - key: LOGFIRE_TOKEN
11 |         sync: false
12 |       - key: OTEL_EXPORTER_OTLP_ENDPOINT
13 |         sync: false
14 |       - key: TILING_SERVER
15 |         value: "https://tiling.onrender.com"
16 |       - key: OPENAI_API_KEY
17 |         sync: false
18 |       - key: GITHUB_WEBHOOK_SECRET
19 |         sync: false
20 |       - key: SLACK_SIGNING_SECRET
21 |         sync: false
22 |       - key: SLACK_CHANNEL
23 |         sync: false
24 |       - key: CREATE_DATABASE
25 |         value: "false"
26 |       - key: PG_DSN
27 |         fromDatabase:
28 |           name: postgres
29 |           property: connectionString
30 |       - key: REDIS_DSN
31 |         fromService:
32 |           type: redis
33 |           name: arq-redis
34 |           property: connectionString
35 |   - type: web
36 |     name: tiling
37 |     runtime: docker
38 |     rootDir: .
39 |     dockerfilePath: ./Dockerfile
40 |     envVars:
41 |       - key: SERVICE
42 |         value: tiling
43 |       - key: LOGFIRE_TOKEN
44 |         sync: false
45 |       - key: OTEL_EXPORTER_OTLP_ENDPOINT
46 |         sync: false
47 |   - type: worker
48 |     name: arq-worker
49 |     runtime: docker
50 |     rootDir: .
51 |     dockerfilePath: ./Dockerfile
52 |     envVars:
53 |       - key: SERVICE
54 |         value: worker
55 |       - key: LOGFIRE_TOKEN
56 |         sync: false
57 |       - key: OTEL_EXPORTER_OTLP_ENDPOINT
58 |         sync: false
59 |       - key: OPENAI_API_KEY
60 |         sync: false
61 |       - key: PG_DSN
62 |         fromDatabase:
63 |           name: postgres
64 |           property: connectionString
65 |       - key: REDIS_DSN
66 |         fromService:
67 |           type: redis
68 |           name: arq-redis
69 |           property: connectionString
70 |       - key: GITHUB_APP_ID
71 |         sync: false
72 |       - key: GITHUB_APP_INSTALLATION_ID
73 |         sync: false
74 |       - key: GITHUB_APP_PRIVATE_KEY
75 |         sync: false
76 |       - key: VECTOR_DISTANCE_THRESHOLD
77 |         sync: false
78 |       - key: AI_SIMILARITY_THRESHOLD
79 |         sync: false
80 |   - name: otel-collector
81 |     type: pserv  # Ensures it's not exposed to the internet
82 |     runtime: docker
83 |     dockerfilePath: ./otel-collector/Dockerfile
84 |     envVars:
85 |       - key: US_WRITE_TOKEN
86 |         sync: false
87 |       - key: EU_WRITE_TOKEN
88 |         sync: false
89 |     autoDeploy: true
90 |   - type: redis
91 |     name: arq-redis
92 |     plan: starter
93 |     ipAllowList: [] # only allow internal connections
94 | 
95 | databases:
96 |   - name: postgres
97 |     plan: starter
98 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pydantic/logfire-demo/f7ce538b77174866c750c8187935a311a2764896/src/__init__.py


--------------------------------------------------------------------------------
/src/__main__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import os
 3 | import sys
 4 | 
 5 | import logfire
 6 | from pydantic_ai import Agent
 7 | 
 8 | service = sys.argv[1] if len(sys.argv) == 2 else os.getenv('SERVICE')
 9 | services = 'webui', 'tiling', 'worker'
10 | # min duration is 100ms
11 | logfire.install_auto_tracing(modules=[f'src.{s}' for s in services], min_duration=0.1)
12 | if service is None:
13 |     print('service argument variable not provided', file=sys.stderr)
14 |     print('Available services:', ', '.join(services), file=sys.stderr)
15 | elif service in services:
16 | 
17 |     def scrubbing_callback(match: logfire.ScrubMatch):
18 |         if (
19 |             match.path
20 |             in [
21 |                 ['message', 'gh_data'],
22 |                 ['message', 'prompt'],
23 |                 ['attributes', 'prompt'],
24 |                 ['attributes', 'result', 'reason'],
25 |             ]
26 |             or match.path[:2]
27 |             in [
28 |                 ['attributes', 'all_messages'],
29 |                 ['attributes', 'gh_data'],
30 |             ]
31 |             or match.path[:3]
32 |             in [
33 |                 ['attributes', 'response', 'parts'],
34 |             ]
35 |         ):
36 |             return match.value
37 | 
38 |     logfire.configure(
39 |         service_name=service,
40 |         code_source=logfire.CodeSource(
41 |             repository='https://github.com/pydantic/logfire-demo',
42 |             revision='main',
43 |         ),
44 |         scrubbing=logfire.ScrubbingOptions(callback=scrubbing_callback),
45 |         distributed_tracing=True,
46 |     )
47 |     logfire.instrument_system_metrics()
48 |     logfire.instrument_asyncpg()
49 |     Agent.instrument_all()
50 | 
51 |     module = importlib.import_module(f'.{service}', package='src')
52 |     module.run()
53 | else:
54 |     print(f'Unknown service: {service}', file=sys.stderr)
55 |     print('Available services:', ', '.join(services), file=sys.stderr)
56 | 


--------------------------------------------------------------------------------
/src/common/__init__.py:
--------------------------------------------------------------------------------
 1 | import urllib.parse
 2 | from typing import Annotated, Any
 3 | 
 4 | from arq import ArqRedis
 5 | from fastapi import Depends, Request
 6 | from httpx import AsyncClient
 7 | from pydantic_settings import BaseSettings
 8 | 
 9 | 
10 | def _get_http_client(request: Request) -> AsyncClient:
11 |     return request.app.state.httpx_client
12 | 
13 | 
14 | AsyncClientDep = Annotated[AsyncClient, Depends(_get_http_client)]
15 | 
16 | 
17 | def build_params(**params: Any) -> str:
18 |     return urllib.parse.urlencode({k: str(v) for k, v in params.items()})
19 | 
20 | 
21 | def _arq_redis(request: Request) -> ArqRedis:
22 |     return request.app.state.arq_redis
23 | 
24 | 
25 | ArqRedisDep = Annotated[ArqRedis, Depends(_arq_redis)]
26 | 
27 | 
28 | class GeneralSettings(BaseSettings):
29 |     pg_dsn: str = 'postgres://postgres:postgres@localhost/logfire_demo'
30 |     redis_dsn: str = 'redis://localhost:6379/0'
31 | 


--------------------------------------------------------------------------------
/src/common/db/__init__.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from collections.abc import AsyncIterator
  3 | from contextlib import asynccontextmanager
  4 | from dataclasses import dataclass
  5 | from typing import Annotated, Self
  6 | from urllib.parse import urlparse
  7 | 
  8 | import asyncpg
  9 | import logfire
 10 | from asyncpg.connection import Connection
 11 | from fastapi import Depends, Request
 12 | 
 13 | __all__ = ('Database',)
 14 | 
 15 | 
 16 | @dataclass
 17 | class _Database:
 18 |     """
 19 |     Wrapper for asyncpg with some utilities and usable as a fastapi dependency.
 20 |     """
 21 | 
 22 |     _pool: asyncpg.Pool
 23 | 
 24 |     @classmethod
 25 |     @asynccontextmanager
 26 |     async def create(cls, dsn: str, prepare_db: bool = False, create_database: bool = False) -> AsyncIterator[Self]:
 27 |         if prepare_db:
 28 |             with logfire.span('prepare DB'):
 29 |                 await _prepare_db(dsn, create_database)
 30 |         pool = await asyncpg.create_pool(dsn)
 31 |         if not pool:
 32 |             raise ValueError('Failed to create pool')
 33 |         try:
 34 |             yield cls(_pool=pool)
 35 |         finally:
 36 |             await asyncio.wait_for(pool.close(), timeout=2.0)
 37 | 
 38 |     @asynccontextmanager
 39 |     async def acquire(self) -> AsyncIterator[Connection]:
 40 |         con = await self._pool.acquire()
 41 |         try:
 42 |             yield con
 43 |         finally:
 44 |             await self._pool.release(con)
 45 | 
 46 |     @asynccontextmanager
 47 |     async def acquire_trans(self) -> AsyncIterator[Connection]:
 48 |         async with self._pool.acquire() as conn:
 49 |             async with conn.transaction():
 50 |                 yield conn
 51 | 
 52 | 
 53 | def _get_db(request: Request) -> _Database:
 54 |     return request.app.state.db
 55 | 
 56 | 
 57 | Database = Annotated[_Database, Depends(_get_db)]
 58 | 
 59 | 
 60 | async def _prepare_db(dsn: str, create_database: bool) -> None:
 61 |     if create_database:
 62 |         with logfire.span('check and create DB'):
 63 |             parse_result = urlparse(dsn)
 64 |             database = parse_result.path.lstrip('/')
 65 |             server_dsn = dsn[: dsn.rindex('/')]
 66 |             conn = await asyncpg.connect(server_dsn)
 67 |             try:
 68 |                 db_exists = await conn.fetchval('SELECT 1 FROM pg_database WHERE datname = $1', database)
 69 |                 if not db_exists:
 70 |                     await conn.execute(f'CREATE DATABASE {database}')
 71 |             finally:
 72 |                 await conn.close()
 73 | 
 74 |     with logfire.span('create schema'):
 75 |         conn = await asyncpg.connect(dsn)
 76 |         try:
 77 |             async with conn.transaction():
 78 |                 await _create_schema(conn)
 79 |         finally:
 80 |             await conn.close()
 81 | 
 82 | 
 83 | async def _create_schema(conn: Connection) -> None:
 84 |     await conn.execute("""
 85 | CREATE TABLE IF NOT EXISTS chats (
 86 |     id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
 87 |     created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
 88 | );
 89 | CREATE INDEX IF NOT EXISTS chats_created_at_idx ON chats (created_at desc);
 90 | 
 91 | CREATE TABLE IF NOT EXISTS messages (
 92 |     id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
 93 |     chat_id UUID NOT NULL REFERENCES chats(id) ON DELETE CASCADE,
 94 |     created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
 95 |     role TEXT NOT NULL,
 96 |     message TEXT NOT NULL,
 97 |     cost INT
 98 | );
 99 | CREATE INDEX IF NOT EXISTS messages_chat_id_idx ON messages (chat_id);
100 | CREATE INDEX IF NOT EXISTS messages_created_at_idx ON messages (created_at);
101 | 
102 | CREATE TABLE IF NOT EXISTS llm_results (
103 |     questions_hash TEXT PRIMARY KEY,
104 |     created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
105 |     chunks JSON  -- isn't filtered, so use JSON instead of JSONB
106 | );
107 | 
108 | CREATE EXTENSION IF NOT EXISTS vector;
109 | CREATE TABLE IF NOT EXISTS embeddings (
110 |     id SERIAL PRIMARY KEY,                -- Unique ID for each entry
111 |     source TEXT NOT NULL,                 -- "github_issue", "slack_message", "pydantic_docs", ...
112 |     external_reference TEXT,              -- GitHub link, Slack message ID
113 |     parent TEXT,                          -- GitHub issue, Thread TS (for Slack threads)
114 |     text TEXT NOT NULL,                   -- The actual text content
115 |     hash TEXT UNIQUE NOT NULL,            -- Hash of the text content
116 |     author TEXT,                          -- Author of the message
117 |     event_ts TIMESTAMPTZ DEFAULT NOW(),   -- Timestamp of when the event occurred
118 |     created_at TIMESTAMPTZ DEFAULT NOW(), -- Timestamp of when the entry was created
119 |     embedding VECTOR(1536)                -- For storing embeddings
120 | );
121 | 
122 | CREATE TABLE IF NOT EXISTS github_contents (
123 |     id SERIAL PRIMARY KEY,                     -- Unique ID for each entry
124 |     project TEXT NOT NULL,                     -- "pydantic", "logfire"
125 |     source TEXT NOT NULL,                      -- "issue"
126 |     content_id BIGINT NOT NULL,                -- GitHub content ID
127 |     external_reference TEXT NOT NULL,          -- GitHub link
128 |     text TEXT NOT NULL,                        -- The actual text content
129 |     event_ts TIMESTAMPTZ DEFAULT NOW(),        -- Timestamp of when the event occurred
130 |     created_at TIMESTAMPTZ DEFAULT NOW(),      -- Timestamp of when the entry was created
131 |     updated_at TIMESTAMPTZ DEFAULT NOW(),      -- Timestamp of when the entry was last updated
132 |     embedding VECTOR(1536),                    -- For storing embeddings
133 |     similar_issues JSONB,                      -- Similar issues
134 |     unique (project, source, content_id)       -- Unique constraint
135 | );
136 | 
137 | CREATE TABLE IF NOT EXISTS slack_messages (
138 |     id SERIAL PRIMARY KEY,                     -- Unique ID for each entry
139 |     channel TEXT NOT NULL,                     -- Slack channel
140 |     author TEXT NOT NULL,                      -- Message author
141 |     message_id TEXT NOT NULL,                  -- Slack message ID
142 |     event_ts TEXT NOT NULL,                    -- Timestamp of when the event occurred (text)
143 |     parent_event_ts TEXT,                      -- Slack message thread timestamp
144 |     text TEXT NOT NULL,                        -- The actual text content
145 |     ts TIMESTAMPTZ,                            -- Message timestamp
146 |     created_at TIMESTAMPTZ DEFAULT NOW(),      -- Timestamp of when the entry was created
147 |     embedding VECTOR(1536)                     -- For storing embeddings
148 | );
149 | """)
150 | 


--------------------------------------------------------------------------------
/src/common/db/github.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from datetime import datetime
  3 | from typing import Any, Literal
  4 | 
  5 | from asyncpg import Connection
  6 | 
  7 | GithubContentProject = Literal['pydantic', 'logfire', 'pydantic-ai']
  8 | GithubContentSource = Literal['issue']
  9 | 
 10 | 
 11 | async def create_github_content(
 12 |     conn: Connection,
 13 |     project: GithubContentProject,
 14 |     source: GithubContentSource,
 15 |     content_id: int,
 16 |     external_reference: str,
 17 |     text: str,
 18 |     event_ts: datetime,
 19 |     embedding: list[float],
 20 | ) -> None:
 21 |     """Save GitHub content to the database."""
 22 |     embedding_str = '[' + ','.join(map(str, embedding)) + ']'
 23 |     await conn.execute(
 24 |         """
 25 |         INSERT INTO github_contents (project, source, content_id, external_reference, text, event_ts, embedding)
 26 |         VALUES ($1, $2, $3, $4, $5, $6, $7)
 27 |         """,
 28 |         project,
 29 |         source,
 30 |         content_id,
 31 |         external_reference,
 32 |         text,
 33 |         event_ts,
 34 |         embedding_str,
 35 |     )
 36 | 
 37 | 
 38 | async def get_github_content(
 39 |     conn: Connection,
 40 |     project: GithubContentProject,
 41 |     source: GithubContentSource,
 42 |     content_id: int,
 43 | ) -> dict[str, Any] | None:
 44 |     """Fetch GitHub content from the database by ID."""
 45 |     return await conn.fetchrow(
 46 |         """
 47 |         SELECT id, text, embedding FROM github_contents WHERE project=$1 AND source=$2 AND content_id=$3
 48 |         """,
 49 |         project,
 50 |         source,
 51 |         content_id,
 52 |     )
 53 | 
 54 | 
 55 | async def update_github_content(
 56 |     conn: Connection,
 57 |     project: GithubContentProject,
 58 |     source: GithubContentSource,
 59 |     content_id: int,
 60 |     text: str,
 61 |     embedding: list[float],
 62 | ) -> None:
 63 |     """Update GitHub content in the database."""
 64 |     embedding_str = '[' + ','.join(map(str, embedding)) + ']'
 65 |     await conn.execute(
 66 |         """
 67 |         UPDATE github_contents SET text=$1, embedding=$2 WHERE project=$3 AND source=$4 AND content_id=$5
 68 |         """,
 69 |         text,
 70 |         embedding_str,
 71 |         project,
 72 |         source,
 73 |         content_id,
 74 |     )
 75 | 
 76 | 
 77 | async def fetch_issues_for_similarity_check(conn: Connection) -> list[dict[str, Any]]:
 78 |     """Fetch GitHub issues for similarity check."""
 79 |     return await conn.fetch(
 80 |         """
 81 |         SELECT
 82 |             id,
 83 |             project,
 84 |             text,
 85 |             external_reference
 86 |         FROM github_contents
 87 |         WHERE source='issue' AND similar_issues IS NULL
 88 |         """,
 89 |     )
 90 | 
 91 | 
 92 | async def find_similar_issues(conn: Connection, id: int, project: GithubContentProject) -> list[dict[str, Any]]:
 93 |     """Find similar GitHub issues by vector similarity."""
 94 |     return await conn.fetch(
 95 |         """
 96 |         SELECT
 97 |             text,
 98 |             external_reference,
 99 |             embedding <=> (SELECT embedding FROM github_contents WHERE id = $1) AS distance
100 |         FROM github_contents
101 |         WHERE source='issue' AND project=$2 AND id != $3
102 |         ORDER BY distance
103 |         LIMIT 3;
104 |         """,
105 |         id,
106 |         project,
107 |         id,
108 |     )
109 | 
110 | 
111 | async def update_similar_issues(conn: Connection, id: int, similar_issues_obj: list[dict[str, Any]]) -> None:
112 |     await conn.execute(
113 |         """
114 |         UPDATE github_contents SET similar_issues=$1 WHERE id=$2
115 |         """,
116 |         json.dumps(similar_issues_obj),
117 |         id,
118 |     )
119 | 


--------------------------------------------------------------------------------
/src/common/db/slack.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import Any
 3 | 
 4 | from asyncpg import Connection
 5 | 
 6 | 
 7 | async def create_slack_message(
 8 |     conn: Connection,
 9 |     channel: str,
10 |     author: str,
11 |     message_id: str,
12 |     event_ts: str,
13 |     parent_event_ts: str | None,
14 |     text: str,
15 |     ts: datetime,
16 |     embedding: list[float],
17 | ) -> None:
18 |     """Create a new slack message in the database"""
19 |     embedding_str = '[' + ','.join(map(str, embedding)) + ']'
20 |     await conn.execute(
21 |         """
22 |         INSERT INTO slack_messages (channel, author, message_id, event_ts, parent_event_ts, text, ts, embedding)
23 |         VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
24 |         """,
25 |         channel,
26 |         author,
27 |         message_id,
28 |         event_ts,
29 |         parent_event_ts,
30 |         text,
31 |         ts,
32 |         embedding_str,
33 |     )
34 | 
35 | 
36 | async def get_root_slack_messages(conn: Connection, channel_id: str, limit: int = 10) -> list[dict[str, Any]]:
37 |     """Fetch the root slack message from the database."""
38 |     return await conn.fetch(
39 |         """
40 |         WITH messages AS (
41 |             SELECT s.id, s.author, s.text, s.ts, count(r.id) as replies_count
42 |             FROM slack_messages s
43 |             LEFT JOIN slack_messages r ON r.parent_event_ts = s.event_ts OR r.event_ts = s.event_ts
44 |             WHERE s.parent_event_ts IS NULL AND s.channel = $1
45 |             GROUP BY s.author, s.id, s.text, s.ts, s.event_ts
46 |             ORDER BY s.ts DESC
47 |             LIMIT $2
48 |         )
49 |         SELECT * FROM messages ORDER BY ts
50 |         """,
51 |         channel_id,
52 |         limit,
53 |     )
54 | 
55 | 
56 | async def get_slack_thread(conn: Connection, message_id: int) -> list[dict[str, Any]]:
57 |     """Fetch a slack thread from the database."""
58 |     return await conn.fetch(
59 |         """
60 |         SELECT author, text, ts
61 |         FROM slack_messages WHERE parent_event_ts=(SELECT event_ts FROM slack_messages WHERE id = $1)
62 |         ORDER BY ts
63 |         """,
64 |         message_id,
65 |     )
66 | 


--------------------------------------------------------------------------------
/src/common/embeddings.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | from datetime import datetime
 3 | from typing import Literal
 4 | 
 5 | import logfire
 6 | import tiktoken
 7 | from asyncpg import Connection
 8 | from openai import AsyncOpenAI
 9 | 
10 | TOKEN_LIMIT = 8192  # OpenAI embedding model token limit
11 | 
12 | 
13 | def count_tokens(text: str, model: str = 'text-embedding-ada-002') -> int:
14 |     """Counts the number of tokens in a given text using OpenAI's tiktoken."""
15 |     encoding = tiktoken.encoding_for_model(model)
16 |     return len(encoding.encode(text))
17 | 
18 | 
19 | def truncate_text_to_token_limit(text: str, model: str = 'text-embedding-ada-002', max_tokens: int = 8192) -> str:
20 |     """Truncate text to fit within the token limit for embeddings."""
21 |     encoding = tiktoken.encoding_for_model(model)
22 |     tokens = encoding.encode(text)  # Convert text to tokens
23 | 
24 |     if len(tokens) > max_tokens:
25 |         tokens = tokens[:max_tokens]  # Truncate to max tokens
26 | 
27 |     return encoding.decode(tokens)  # Convert tokens back to text
28 | 
29 | 
30 | async def generate_embedding(openai_client: AsyncOpenAI, text: str) -> list[float]:
31 |     with logfire.span('call openai'):
32 |         response = await openai_client.embeddings.create(input=text, model='text-embedding-ada-002')
33 |         return response.data[0].embedding
34 | 
35 | 
36 | def hash_text(text: str) -> str:
37 |     return hashlib.md5(text.encode()).hexdigest()
38 | 
39 | 
40 | EmbeddingsSource = Literal['slack_message', 'github_issue', 'pydantic_docs', 'pydantic_ai_docs', 'logfire_docs']
41 | 
42 | 
43 | async def get_stored_embeddings_hash_by_source(conn: Connection, source: EmbeddingsSource) -> set[str]:
44 |     hashes = await conn.fetch('SELECT hash FROM embeddings WHERE source=$1', source)
45 |     return {record['hash'] for record in hashes}
46 | 
47 | 
48 | async def create_embeddings(
49 |     conn: Connection,
50 |     source: EmbeddingsSource,
51 |     text: str,
52 |     text_hash: str,
53 |     embedding: list[list[float]],
54 |     event_ts: datetime | None = None,
55 |     external_reference: str | None = None,
56 |     author: str | None = None,
57 |     parent: str | None = None,
58 | ) -> None:
59 |     """Create a new embeddings in the database"""
60 |     embedding_str = '[' + ','.join(map(str, embedding)) + ']'
61 |     await conn.execute(
62 |         """
63 |         INSERT INTO embeddings (source, external_reference, text, hash, author, event_ts, embedding, parent)
64 |         VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
65 |         """,
66 |         source,
67 |         external_reference,
68 |         text,
69 |         text_hash,
70 |         author,
71 |         event_ts,
72 |         embedding_str,
73 |         parent,
74 |     )
75 | 
76 | 
77 | async def delete_embeddings_by_hash(conn: Connection, hashes: set[str], source: EmbeddingsSource) -> None:
78 |     await conn.execute('DELETE FROM embeddings WHERE hash = ANY($1) AND source=$2', hashes, source)
79 | 


--------------------------------------------------------------------------------
/src/tiling/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations as _annotations
 2 | 
 3 | from contextlib import AsyncExitStack, asynccontextmanager
 4 | from typing import Annotated
 5 | 
 6 | import logfire
 7 | from annotated_types import Ge, Gt, Le, Lt
 8 | from fastapi import FastAPI, Header, Response
 9 | from fastapi.responses import PlainTextResponse
10 | from httpx import AsyncClient
11 | from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
12 | 
13 | from ..common import AsyncClientDep
14 | from .build_map import BuildMap
15 | 
16 | 
17 | @asynccontextmanager
18 | async def lifespan(app_: FastAPI):
19 |     async with AsyncExitStack() as stack:
20 |         app_.state.httpx_client = httpx_client = await stack.enter_async_context(AsyncClient())
21 |         HTTPXClientInstrumentor.instrument_client(httpx_client)
22 |         yield
23 | 
24 | 
25 | app = FastAPI(lifespan=lifespan)
26 | logfire.instrument_fastapi(app, capture_headers=True)
27 | 
28 | 
29 | @app.get('/', response_class=PlainTextResponse)
30 | @app.head('/', include_in_schema=False)
31 | async def index() -> str:
32 |     return 'Tiling service\n'
33 | 
34 | 
35 | @app.get('/robots.txt', response_class=PlainTextResponse)
36 | @app.head('/robots.txt', include_in_schema=False)
37 | async def robots_txt() -> str:
38 |     return 'User-agent: *\nDisallow: /\n'
39 | 
40 | 
41 | @app.get('/health', response_class=PlainTextResponse)
42 | @app.head('/health', include_in_schema=False)
43 | async def health() -> str:
44 |     return 'OK\n'
45 | 
46 | 
47 | @app.get('/favicon.ico', status_code=404, response_class=PlainTextResponse)
48 | @app.head('/favicon.ico', include_in_schema=False)
49 | async def favicon_ico() -> str:
50 |     return 'page not found'
51 | 
52 | 
53 | @app.get('/map.jpg')
54 | async def get_map(
55 |     http_client: AsyncClientDep,
56 |     lat: Annotated[float, Ge(-85), Le(85)],
57 |     lng: Annotated[float, Ge(-180), Le(180)],
58 |     zoom: Annotated[int, Gt(0), Lt(20)] = 10,
59 |     width: Annotated[int, Ge(95), Le(1000)] = 600,
60 |     height: Annotated[int, Ge(60), Le(1000)] = 400,
61 |     scale: Annotated[int, Ge(1), Le(2)] = 1,
62 |     referer: Annotated[str | None, Header()] = None,
63 | ) -> Response:
64 |     builder = BuildMap(
65 |         http_client=http_client, referrer=referer, lat=lat, lng=lng, zoom=zoom, width=width, height=height, scale=scale
66 |     )
67 |     image = await builder.run()
68 |     return Response(
69 |         content=image,
70 |         media_type='image/jpeg',
71 |         headers={'Cache-Control': 'max-age=1209600', 'X-Robots-Tag': 'noindex'},  # 1209600 is 14 days
72 |     )
73 | 
74 | 
75 | def run():
76 |     import uvicorn
77 | 
78 |     uvicorn.run(app, host='0.0.0.0', port=8000, log_level='info')
79 | 


--------------------------------------------------------------------------------
/src/tiling/build_map.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import io
  3 | import math
  4 | import random
  5 | from asyncio import Semaphore
  6 | from collections.abc import Awaitable, Iterator, Sequence
  7 | from statistics import mean
  8 | from time import time
  9 | 
 10 | import logfire
 11 | from httpx import AsyncClient
 12 | from PIL import Image, ImageDraw
 13 | 
 14 | __all__ = ('BuildMap',)
 15 | 
 16 | SHARDS = 'a', 'b', 'c'
 17 | TILE_SIZE = 256
 18 | HEADERS = {'User-Agent': 'https://github.com/tutorcruncher/static-maps'}
 19 | 
 20 | COPYRIGHT_MSG = '© OpenStreetMap contributors'
 21 | 
 22 | OSM_ROOT = 'https://{shard}.tile.openstreetmap.org'
 23 | 
 24 | URL_TEMPLATE = '{url_root}/{zoom:d}/{x:d}/{y:d}.png'
 25 | OSM_SEMAPHORE = Semaphore(value=32)
 26 | 
 27 | 
 28 | class BuildMap:
 29 |     __slots__ = 'http_client', 'lat', 'lng', 'zoom', 'w', 'h', 'no_tiles', 'tiles', 'times', 'headers', 'scale'
 30 | 
 31 |     def __init__(
 32 |         self,
 33 |         *,
 34 |         http_client: AsyncClient,
 35 |         referrer: str | None,
 36 |         lat: float,
 37 |         lng: float,
 38 |         zoom: int,
 39 |         width: int,
 40 |         height: int,
 41 |         scale: int,
 42 |     ):
 43 |         self.http_client = http_client
 44 |         self.lat = lat
 45 |         self.lng = lng
 46 |         self.zoom = zoom
 47 |         self.w = width * scale
 48 |         self.h = height * scale
 49 |         self.scale = scale
 50 |         self.no_tiles = 2**self.zoom
 51 | 
 52 |         self.tiles: set[tuple[bytes, int, int]] = set()
 53 |         self.times: list[float] = []
 54 |         self.headers = HEADERS.copy()
 55 |         if referrer:
 56 |             self.headers['Referer'] = referrer
 57 | 
 58 |     async def run(self) -> bytes:
 59 |         # https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#Implementations
 60 |         x_tile = self.no_tiles * (self.lng + 180) / 360
 61 | 
 62 |         lat_rad = math.radians(self.lat)
 63 |         y_tile = self.no_tiles * (1 - math.log(math.tan(lat_rad) + 1 / math.cos(lat_rad)) / math.pi) / 2
 64 | 
 65 |         x_range, x_correction = self.range_correction(x_tile, self.w)
 66 |         y_range, y_correction = self.range_correction(y_tile, self.h)
 67 | 
 68 |         await asyncio.gather(*self.get_tiles(x_range, x_correction, y_range, y_correction))
 69 | 
 70 |         logfire.info(
 71 |             '{lat=:0.6f} {lng=:0.6f} {zoom=} {tiles=} {avg_download_time=:0.3f}s',
 72 |             lat=self.lat,
 73 |             lng=self.lng,
 74 |             zoom=self.zoom,
 75 |             tiles=len(self.times),
 76 |             avg_download_time=mean(self.times),
 77 |             times=self.times,
 78 |         )
 79 | 
 80 |         return await asyncio.get_event_loop().run_in_executor(None, self.stitch_tiles)
 81 | 
 82 |     @staticmethod
 83 |     def range_correction(tile_no: float, size: int) -> tuple[Sequence[int], int]:
 84 |         half_t = size / 2 / TILE_SIZE  # half the width/height in tiles
 85 |         min_, max_ = int(math.floor(tile_no - half_t)), int(math.ceil(tile_no + half_t))
 86 |         correction = (tile_no - min_) * TILE_SIZE - size / 2
 87 |         return range(min_, max_), intr(correction)
 88 | 
 89 |     def get_tiles(
 90 |         self, x_range: Sequence[int], x_correction: int, y_range: Sequence[int], y_correction: int
 91 |     ) -> Iterator[Awaitable[None]]:
 92 |         for col, x in enumerate(x_range):
 93 |             for row, y in enumerate(y_range):
 94 |                 yield self.get_tile(x, y, col * TILE_SIZE - x_correction, row * TILE_SIZE - y_correction)
 95 | 
 96 |     async def get_tile(self, osm_x: int, osm_y: int, image_x: int, image_y: int) -> None:
 97 |         if not 0 <= osm_y < self.no_tiles:
 98 |             return
 99 |         # wraps map around at edges
100 |         osm_x = osm_x % self.no_tiles
101 |         root = OSM_ROOT.format(shard=random.choice(SHARDS))
102 |         url = URL_TEMPLATE.format(url_root=root, zoom=self.zoom, x=osm_x, y=osm_y)
103 |         # debug(url, osm_x, osm_y, image_x, image_y)
104 | 
105 |         start = time()
106 |         async with OSM_SEMAPHORE:
107 |             r = await self.http_client.get(url, headers=self.headers)
108 |         self.times.append(time() - start)
109 |         if r.status_code != 200:
110 |             data = {'content': r.content, 'response_headers': dict(r.headers)}
111 |             logfire.warn('unexpected {status=} from {url!r}', status=r.status_code, url=url, data=data)
112 |         else:
113 |             self.tiles.add((r.content, image_x, image_y))
114 | 
115 |     @logfire.instrument('stitch tiles together')
116 |     def stitch_tiles(self) -> bytes:
117 |         # the minimum image width is set to 95px to fit copyright text
118 |         box_size_w, box_size_h = 95, 8
119 |         text_pos_x, text_pos_y = 94, 8
120 |         if self.w >= 205:
121 |             box_size_w, box_size_h = 205, 20
122 |             text_pos_x, text_pos_y = 200, 20
123 | 
124 |         img_bg = Image.new('RGBA', (self.w, self.h), (255, 255, 255, 255))
125 | 
126 |         for content, x, y in self.tiles:
127 |             img_bg.paste(Image.open(io.BytesIO(content)), (x, y))
128 | 
129 |         self.tiles = set()
130 |         img_fg = Image.new('RGBA', img_bg.size, (0, 0, 0, 0))
131 |         rect_box = self.w - box_size_w * self.scale, self.h - box_size_h * self.scale, self.w, self.h
132 |         ImageDraw.Draw(img_fg).rectangle(rect_box, fill=(255, 255, 255, 128))
133 |         text_pos: tuple[int, int] = self.w - text_pos_x * self.scale, self.h - text_pos_y * self.scale
134 |         ImageDraw.Draw(img_fg).text(text_pos, COPYRIGHT_MSG, fill=(0, 0, 0))  # type: ignore
135 | 
136 |         bio = io.BytesIO()
137 |         Image.alpha_composite(img_bg, img_fg).convert('RGB').save(bio, format='jpeg', quality=95, optimize=True)
138 |         return bio.getvalue()
139 | 
140 | 
141 | def intr(v: float) -> int:
142 |     return int(round(v))
143 | 


--------------------------------------------------------------------------------
/src/webui/__init__.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations as _annotations
  2 | 
  3 | import sys
  4 | from contextlib import AsyncExitStack, asynccontextmanager
  5 | from typing import Annotated, Any
  6 | 
  7 | import arq
  8 | import logfire
  9 | from annotated_types import Ge, Gt, Le, Lt
 10 | from arq.connections import RedisSettings
 11 | from fastapi import FastAPI
 12 | from fastapi.responses import HTMLResponse, PlainTextResponse
 13 | from fastui import prebuilt_html
 14 | from fastui.auth import fastapi_auth_exception_handling
 15 | from fastui.dev import dev_fastapi_app
 16 | from httpx import AsyncClient
 17 | from openai import AsyncOpenAI
 18 | from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
 19 | from starlette.responses import StreamingResponse
 20 | 
 21 | from ..common import AsyncClientDep
 22 | from ..common.db import Database
 23 | from .llm import router as llm_router
 24 | from .main import router as main_router
 25 | from .settings import settings
 26 | from .slack import router as slack_router
 27 | from .web_hooks import router as web_hooks_router
 28 | 
 29 | 
 30 | @asynccontextmanager
 31 | async def lifespan(app_: FastAPI):
 32 |     async with AsyncExitStack() as stack:
 33 |         app_.state.httpx_client = httpx_client = await stack.enter_async_context(AsyncClient())
 34 |         HTTPXClientInstrumentor.instrument_client(httpx_client)
 35 |         app_.state.db = await stack.enter_async_context(
 36 |             Database.create(settings.pg_dsn, True, settings.create_database)
 37 |         )
 38 |         app_.state.arq_redis = await arq.create_pool(RedisSettings.from_dsn(settings.redis_dsn))
 39 |         app_.state.settings = settings
 40 |         app_.state.openai_client = openai_client = AsyncOpenAI(http_client=httpx_client)
 41 |         logfire.instrument_openai(openai_client=openai_client)
 42 |         yield
 43 | 
 44 | 
 45 | # This doesn't have any effect yet, needs https://github.com/pydantic/FastUI/issues/198
 46 | frontend_reload = '--reload' in sys.argv
 47 | if frontend_reload:
 48 |     # dev_fastapi_app reloads in the browser when the Python source changes
 49 |     app = dev_fastapi_app(lifespan=lifespan)
 50 | else:
 51 |     app = FastAPI(lifespan=lifespan)
 52 | 
 53 | logfire.instrument_fastapi(app, capture_headers=True)
 54 | 
 55 | fastapi_auth_exception_handling(app)
 56 | app.include_router(llm_router, prefix='/api/llm')
 57 | app.include_router(slack_router, prefix='/api/slack')
 58 | app.include_router(main_router, prefix='/api')
 59 | app.include_router(web_hooks_router, prefix='/webhooks')
 60 | 
 61 | 
 62 | @app.get('/robots.txt', response_class=PlainTextResponse)
 63 | @app.head('/robots.txt', include_in_schema=False)
 64 | async def robots_txt() -> str:
 65 |     return 'User-agent: *\nDisallow: /\n'
 66 | 
 67 | 
 68 | @app.get('/health', response_class=PlainTextResponse)
 69 | @app.head('/health', include_in_schema=False)
 70 | async def health(db: Database) -> str:
 71 |     async with db.acquire() as con:
 72 |         version = await con.fetchval('SELECT version()')
 73 |     return f'pg version: {version}'
 74 | 
 75 | 
 76 | @app.get('/favicon.ico', status_code=404, response_class=PlainTextResponse)
 77 | async def favicon_ico() -> str:
 78 |     return 'page not found'
 79 | 
 80 | 
 81 | @app.get('/map.jpg')
 82 | async def map_jpg(
 83 |     http_client: AsyncClientDep,
 84 |     # Show a map of London by default
 85 |     lat: Annotated[float, Ge(-85), Le(85)] = 51.5074,
 86 |     lng: Annotated[float, Ge(-180), Le(180)] = -0.1,
 87 |     zoom: Annotated[int, Gt(0), Lt(20)] = 10,
 88 |     width: Annotated[int, Ge(95), Le(1000)] = 600,
 89 |     height: Annotated[int, Ge(60), Le(1000)] = 400,
 90 |     scale: Annotated[int, Ge(1), Le(2)] = 1,
 91 | ) -> StreamingResponse:
 92 |     params: dict[str, Any] = {'lat': lat, 'lng': lng, 'zoom': zoom, 'width': width, 'height': height, 'scale': scale}
 93 |     r = await http_client.get(f'{settings.tiling_server}/map.jpg', params=params)
 94 |     return StreamingResponse(r.aiter_bytes(), media_type='image/jpeg')
 95 | 
 96 | 
 97 | @app.get('/{path:path}')
 98 | @app.head('/{path:path}', include_in_schema=False)
 99 | async def html_landing() -> HTMLResponse:
100 |     return HTMLResponse(prebuilt_html(title='Logfire Demo'))
101 | 
102 | 
103 | def run():
104 |     import uvicorn
105 | 
106 |     uvicorn.run(app, host='0.0.0.0', port=8000, log_level='info')
107 | 


--------------------------------------------------------------------------------
/src/webui/llm.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import hashlib
  3 | import json
  4 | from collections.abc import AsyncIterable
  5 | from random import random
  6 | from typing import Annotated
  7 | from uuid import UUID
  8 | 
  9 | import logfire
 10 | import tiktoken
 11 | from fastapi import APIRouter
 12 | from fastui import AnyComponent, FastUI, events
 13 | from fastui import components as c
 14 | from fastui.forms import fastui_form
 15 | from openai import AsyncOpenAI
 16 | from pydantic import BaseModel, Field
 17 | from starlette.responses import StreamingResponse
 18 | 
 19 | from ..common import AsyncClientDep
 20 | from ..common.db import Database
 21 | from .shared import demo_page
 22 | 
 23 | router = APIRouter()
 24 | 
 25 | # This is a workaround for the compatibility issue with the new version of Pydantic
 26 | # https://github.com/pydantic/FastUI/issues/369
 27 | c.Link.model_rebuild()
 28 | 
 29 | 
 30 | class PromptModel(BaseModel):
 31 |     prompt: str | None = Field(title='Prompt', description='Ask me (almost) anything', max_length=300)
 32 | 
 33 | 
 34 | def form_comp(chat_id: UUID) -> c.ModelForm:
 35 |     return c.ModelForm(
 36 |         model=PromptModel,
 37 |         method='POST',
 38 |         submit_url=f'/api/llm/ask/{chat_id}',
 39 |         footer=[c.Div(components=[c.Button(text='Ask')], class_name='text-end')],
 40 |     )
 41 | 
 42 | 
 43 | @router.get('', response_model=FastUI, response_model_exclude_none=True)
 44 | async def llm_page(db: Database) -> list[AnyComponent]:
 45 |     async with db.acquire() as conn:
 46 |         # create a new chat row
 47 |         chat_id = await conn.fetchval('insert into chats DEFAULT VALUES RETURNING id')
 48 | 
 49 |     return demo_page(
 50 |         c.Link(components=[c.Text(text='back')], on_click=events.BackEvent()),
 51 |         c.Div(
 52 |             components=[c.Div(components=[form_comp(chat_id)], class_name='col-md-6')],
 53 |             class_name='row justify-content-center',
 54 |         ),
 55 |         title='LLM Query',
 56 |     )
 57 | 
 58 | 
 59 | @router.post('/ask/{chat_id}', response_model=FastUI, response_model_exclude_none=True)
 60 | async def llm_ask(
 61 |     db: Database, prompt: Annotated[PromptModel, fastui_form(PromptModel)], chat_id: UUID
 62 | ) -> list[AnyComponent]:
 63 |     async with db.acquire() as conn:
 64 |         # create a new message row
 65 |         await conn.execute(
 66 |             """
 67 |             insert into messages (chat_id, role, message) VALUES ($1, 'user', $2)
 68 |             """,
 69 |             chat_id,
 70 |             prompt.prompt,
 71 |         )
 72 |     return [
 73 |         c.Markdown(text=f'**You asked:** {prompt.prompt}'),
 74 |         c.ServerLoad(path=f'/llm/ask/stream/{chat_id}', sse=True),
 75 |         form_comp(chat_id),
 76 |     ]
 77 | 
 78 | 
 79 | OPENAI_MODEL = 'gpt-4'
 80 | 
 81 | 
 82 | @router.get('/ask/stream/{chat_id}')
 83 | async def llm_stream(db: Database, http_client: AsyncClientDep, chat_id: UUID) -> StreamingResponse:
 84 |     async with db.acquire() as conn:
 85 |         # count tokens used today
 86 |         tokens_used = await conn.fetchval(
 87 |             'select sum(cost) from messages where created_at > current_date and cost is not null'
 88 |         )
 89 |         logfire.info('{cost_today=}', cost_today=tokens_used)
 90 | 
 91 |         if tokens_used is not None and tokens_used > 500_000:
 92 |             content = [_sse_message('**Limit Exceeded**:\n\nDaily token limit exceeded.')]
 93 |             return StreamingResponse(content, media_type='text/event-stream')
 94 | 
 95 |         # get messages from this chat
 96 |         chat_messages = await conn.fetch(
 97 |             'select role, message as content from messages where chat_id = $1 order by created_at',
 98 |             chat_id,
 99 |         )
100 | 
101 |         questions = '|'.join(m['content'].lower() for m in chat_messages if m['role'] == 'user')
102 |         questions_hash = hashlib.md5(questions.encode()).hexdigest()
103 | 
104 |         opt_chunks = await conn.fetchval('select chunks from llm_results where questions_hash = $1', questions_hash)
105 | 
106 |     messages = [{'role': 'system', 'content': 'Please response in markdown only.'}, *map(dict, chat_messages)]
107 | 
108 |     async def gen_saved(chunks_json: str) -> AsyncIterable[str]:
109 |         """
110 |         Generate a result based on on previously saved chunks.
111 |         """
112 |         chunks = json.loads(chunks_json)
113 |         output = ''
114 |         try:
115 |             await asyncio.sleep(0.5 + random() * 0.5)
116 |             with logfire.span('saved result {messages=}', messages=messages) as logfire_span:
117 |                 for chunk in chunks:
118 |                     if chunk is not None:
119 |                         output += chunk
120 |                         yield _sse_message(f'**{OPENAI_MODEL.upper()}s**:\n\n{output}')
121 | 
122 |                     # 0.12s delay is taken roughly from
123 |                     # https://github.com/pydantic/FastUI/blob/196414360b69b3dab7012576f852229831307883/demo/sse.py#L66C1-L388C2
124 |                     await asyncio.sleep(random() * 0.12)
125 |                 logfire_span.set_attribute('output', output)
126 |         finally:
127 |             async with db.acquire() as conn:
128 |                 await conn.execute(
129 |                     "insert into messages (chat_id, role, message, cost) VALUES ($1, 'system', $2, 0)",
130 |                     chat_id,
131 |                     output,
132 |                 )
133 | 
134 |     async def gen_openai() -> AsyncIterable[str]:
135 |         output = ''
136 |         input_usage = sum(_count_usage(m['content']) for m in messages if m['role'] in ('system', 'user'))
137 |         output_usage = 0
138 |         output_chunks = []
139 |         try:
140 |             openai_client = AsyncOpenAI(http_client=http_client)
141 |             logfire.instrument_openai(openai_client=openai_client)
142 |             with logfire.span('call openai'):
143 |                 chunks = await openai_client.chat.completions.create(
144 |                     model=OPENAI_MODEL,
145 |                     messages=messages,
146 |                     stream=True,
147 |                     stream_options={'include_usage': True},
148 |                 )
149 | 
150 |                 async for chunk in chunks:
151 |                     if not chunk.choices:
152 |                         # Ignore the usage chunk at the end
153 |                         continue
154 |                     text = chunk.choices[0].delta.content
155 |                     output_chunks.append(text)
156 |                     if text is not None:
157 |                         output += text
158 |                         yield _sse_message(f'**{OPENAI_MODEL.upper()}**:\n\n{output}')
159 |                 output_usage = _count_usage(output)
160 |             async with db.acquire() as conn:
161 |                 await conn.execute(
162 |                     'insert into llm_results (questions_hash, chunks) VALUES ($1, $2) ON CONFLICT DO NOTHING',
163 |                     questions_hash,
164 |                     json.dumps(output_chunks),
165 |                 )
166 |         finally:
167 |             async with db.acquire() as conn:
168 |                 await conn.execute(
169 |                     "insert into messages (chat_id, role, message, cost) VALUES ($1, 'system', $2, $3)",
170 |                     chat_id,
171 |                     output,
172 |                     input_usage + output_usage,
173 |                 )
174 | 
175 |     if opt_chunks:
176 |         gen = gen_saved(opt_chunks)
177 |     else:
178 |         gen = gen_openai()
179 |     return StreamingResponse(gen, media_type='text/event-stream')
180 | 
181 | 
182 | TOKEN_ENCODER = tiktoken.encoding_for_model(OPENAI_MODEL)
183 | 
184 | 
185 | def _count_usage(message: str) -> int:
186 |     return len(TOKEN_ENCODER.encode(message))
187 | 
188 | 
189 | def _sse_message(markdown: str) -> str:
190 |     m = FastUI(root=[c.Markdown(text=markdown)])
191 |     return f'data: {m.model_dump_json(by_alias=True, exclude_none=True)}\n\n'
192 | 


--------------------------------------------------------------------------------
/src/webui/main.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations as _annotations
 2 | 
 3 | from time import time
 4 | 
 5 | from fastapi import APIRouter
 6 | from fastui import AnyComponent, FastUI
 7 | from fastui import components as c
 8 | from fastui.events import GoToEvent
 9 | 
10 | from .settings import settings
11 | from .shared import demo_page
12 | 
13 | router = APIRouter()
14 | 
15 | 
16 | @router.get('/', response_model=FastUI, response_model_exclude_none=True)
17 | def api_index() -> list[AnyComponent]:
18 |     # language=markdown
19 |     markdown = """\
20 | This site demonstrates [Pydantic Logfire](https://docs.logfire.dev).
21 | 
22 | You can use the sections below to see how different tasks are recorded by Logfire.
23 | """
24 |     slack_links = [
25 |         f'* [{channel_name}](/slack/{channel_id})' for channel_id, channel_name in settings.slack_channel.items()
26 |     ]
27 |     return demo_page(
28 |         c.Markdown(text=markdown),
29 |         c.Div(
30 |             components=[
31 |                 c.Heading(text='LLM Query', level=2),
32 |                 c.Link(components=[c.Text(text='Simple LLM question and answer.')], on_click=GoToEvent(url='/llm')),
33 |             ],
34 |             class_name='border-top mt-3 pt-1',
35 |         ),
36 |         c.Div(
37 |             components=[
38 |                 c.Heading(text='Slack Messages Archive', level=2),
39 |                 c.Markdown(text='\n'.join(slack_links)),
40 |             ],
41 |             class_name='border-top mt-3 pt-1',
42 |         ),
43 |         c.Div(
44 |             components=[
45 |                 c.Heading(text='Distributed Tracing', level=2),
46 |                 c.Paragraph(text="Here's an image generated by a separate tiling service."),
47 |                 c.Image(src=f'/map.jpg?v={time()}', alt='Map', width=600, height=400),
48 |             ],
49 |             class_name='border-top mt-3 pt-1',
50 |         ),
51 |     )
52 | 
53 | 
54 | @router.get('/{path:path}', status_code=404)
55 | async def api_404():
56 |     # so we don't fall through to the index page
57 |     return {'message': 'Not Found'}
58 | 


--------------------------------------------------------------------------------
/src/webui/settings.py:
--------------------------------------------------------------------------------
 1 | from pydantic import SecretStr
 2 | 
 3 | from ..common import GeneralSettings
 4 | 
 5 | 
 6 | class Settings(GeneralSettings):
 7 |     create_database: bool = True
 8 |     tiling_server: str = 'http://localhost:8001'
 9 |     github_webhook_secret: SecretStr = 'test-github-secret'
10 |     slack_signing_secret: SecretStr = 'test-slack-signing-secret'
11 |     slack_channel: dict[str, str] = {}  # mapping between Slack channel IDs and names
12 | 
13 | 
14 | settings = Settings()  # type: ignore
15 | 


--------------------------------------------------------------------------------
/src/webui/shared.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations as _annotations
 2 | 
 3 | from fastui import AnyComponent
 4 | from fastui import components as c
 5 | from fastui.events import GoToEvent
 6 | 
 7 | 
 8 | def demo_page(*components: AnyComponent, title: str | None = None) -> list[AnyComponent]:
 9 |     return [
10 |         c.PageTitle(text=f'Logfire Demo — {title}' if title else 'Logfire Demo'),
11 |         c.Navbar(
12 |             title='Logfire Demo',
13 |             title_event=GoToEvent(url='/'),
14 |             end_links=[
15 |                 c.Link(
16 |                     components=[c.Text(text='Login')],
17 |                     on_click=GoToEvent(url='/auth/login/password'),
18 |                     active='startswith:/auth',
19 |                 ),
20 |             ],
21 |         ),
22 |         c.Page(
23 |             components=[
24 |                 *((c.Heading(text=title),) if title else ()),
25 |                 *components,
26 |             ],
27 |         ),
28 |         c.Footer(
29 |             extra_text='Logfire Demo',
30 |             links=[
31 |                 c.Link(components=[c.Text(text='Docs')], on_click=GoToEvent(url='https://docs.logfire.dev')),
32 |                 c.Link(components=[c.Text(text='Dashboard')], on_click=GoToEvent(url='https://dash.logfire.dev')),
33 |                 c.Link(components=[c.Text(text='PyPI')], on_click=GoToEvent(url='https://pypi.org/project/logfire/')),
34 |             ],
35 |         ),
36 |     ]
37 | 


--------------------------------------------------------------------------------
/src/webui/slack.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Request
 2 | from fastui import FastUI, events
 3 | from fastui import components as c
 4 | 
 5 | from ..common.db import Database
 6 | from ..common.db.slack import get_root_slack_messages, get_slack_thread
 7 | from .shared import demo_page
 8 | 
 9 | router = APIRouter()
10 | 
11 | 
12 | @router.get('/{channel_id}', response_model=FastUI, response_model_exclude_none=True)
13 | async def read_messages(request: Request, db: Database, channel_id: str):
14 |     async with db.acquire() as conn:
15 |         messages = await get_root_slack_messages(conn, channel_id)
16 | 
17 |         text = ''
18 |         for msg in messages:
19 |             text += f'- **@{msg["author"]}** ({msg["ts"]}): _{msg["text"][:50]}_ - [View Thread ({msg["replies_count"]})](/slack/thread/{msg["id"]}) \n\n'
20 | 
21 |         return demo_page(
22 |             c.Link(components=[c.Text(text='back')], on_click=events.BackEvent()),
23 |             c.Div(components=[c.Markdown(text=text)]),
24 |             title='Logfire Slack Messages',
25 |         )
26 | 
27 | 
28 | @router.get('/thread/{message_id}', response_model=FastUI, response_model_exclude_none=True)
29 | async def read_thread(request: Request, db: Database, message_id: int):
30 |     async with db.acquire() as conn:
31 |         messages = await get_slack_thread(conn, message_id)
32 | 
33 |         text = ''
34 |         for i, msg in enumerate(messages):
35 |             text += f'{i + 1}. **@{msg["author"]}** ({msg["ts"]}): _{msg["text"]}_ \n\n'
36 |         return demo_page(
37 |             c.Link(components=[c.Text(text='back')], on_click=events.BackEvent()),
38 |             c.Div(components=[c.Markdown(text=text)], class_name='col-md-6'),
39 |             title=f'Logfire Slack Messages Thread {message_id}',
40 |         )
41 | 


--------------------------------------------------------------------------------
/src/webui/web_hooks.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import hmac
  3 | import json
  4 | from datetime import UTC, datetime
  5 | from typing import Annotated, Any
  6 | 
  7 | import logfire
  8 | from fastapi import APIRouter, Depends, Header, HTTPException, Request
  9 | from openai import AsyncOpenAI
 10 | 
 11 | from ..common.db import Database
 12 | from ..common.db.github import create_github_content, get_github_content, update_github_content
 13 | from ..common.db.slack import create_slack_message
 14 | from ..common.embeddings import generate_embedding, truncate_text_to_token_limit
 15 | from .settings import settings
 16 | 
 17 | router = APIRouter()
 18 | 
 19 | 
 20 | def _get_openai_client(request: Request) -> AsyncOpenAI:
 21 |     return request.app.state.openai_client
 22 | 
 23 | 
 24 | AsyncOpenAIClientDep = Annotated[AsyncOpenAI, Depends(_get_openai_client)]
 25 | 
 26 | 
 27 | async def generate_github_content_embedding(openai_client: AsyncOpenAI, text: str) -> list[float]:
 28 |     """Generate an embedding for GitHub content."""
 29 |     truncated_text = truncate_text_to_token_limit(text)
 30 |     return await generate_embedding(openai_client, truncated_text)
 31 | 
 32 | 
 33 | def extract_data(issue: dict[str, Any]) -> tuple[int, str, str, datetime]:
 34 |     """Extract relevant information from a GitHub issue or comment."""
 35 |     issue_id = issue.get('id')
 36 |     title = issue.get('title')
 37 |     text = issue.get('body')
 38 |     if title:
 39 |         text = f'{title}\n\n{text}'
 40 |     external_reference = issue.get('html_url')
 41 |     event_ts = datetime.fromisoformat(issue['created_at'].replace('Z', '+00:00'))
 42 |     return issue_id, text, external_reference, event_ts
 43 | 
 44 | 
 45 | def verify_github_signature(secret: str, payload: bytes, signature: str) -> bool:
 46 |     """Verify GitHub webhook signature (HMAC SHA-256)"""
 47 |     mac = hmac.new(secret.encode(), msg=payload, digestmod=hashlib.sha256)
 48 |     expected_signature = f'sha256={mac.hexdigest()}'
 49 |     return hmac.compare_digest(expected_signature, signature)
 50 | 
 51 | 
 52 | @router.post('/github')
 53 | async def github_webhook(
 54 |     request: Request,
 55 |     db: Database,
 56 |     openai_client: AsyncOpenAIClientDep,
 57 |     x_hub_signature_256: str = Header(None),  # GitHub sends signature in headers
 58 | ):
 59 |     """Handle GitHub webhook events"""
 60 |     payload = await request.body()
 61 | 
 62 |     # Verify signature for security
 63 |     if not verify_github_signature(settings.github_webhook_secret.get_secret_value(), payload, x_hub_signature_256):
 64 |         raise HTTPException(status_code=403, detail='Invalid signature')
 65 | 
 66 |     data = await request.json()  # Convert request payload to JSON
 67 |     event_type = request.headers.get('X-GitHub-Event')  # GitHub event type
 68 | 
 69 |     if event_type not in ['issues', 'issue_comment']:
 70 |         logfire.debug('Event not supported: {event_type}', event_type=event_type)
 71 |         return {'message': 'Event not supported'}
 72 | 
 73 |     if event_type == 'issues':
 74 |         logfire.info('Received GitHub issue event: {gh_data}', gh_data=data)
 75 |         if data.get('action') == 'opened':
 76 |             issue = data.get('issue')
 77 |             if not issue:
 78 |                 logfire.error('Invalid GitHub issue: {gh_data}', gh_data=data)
 79 |                 return {'message': 'Invalid GitHub issue'}
 80 | 
 81 |             i_id, i_text, i_external_reference, event_ts = extract_data(issue)
 82 |             project = data.get('repository', {}).get('name')
 83 |             embeddings = await generate_github_content_embedding(openai_client, i_text)
 84 |             async with db.acquire() as conn:
 85 |                 await create_github_content(
 86 |                     conn, project, 'issue', i_id, i_external_reference, i_text, event_ts, embeddings
 87 |                 )
 88 |         else:
 89 |             logfire.debug('Action not supported: {gh_data}', gh_data=data)
 90 |             return {'message': 'Action not supported'}
 91 |     elif event_type == 'issue_comment':
 92 |         logfire.info('Received GitHub comment event: {gh_data}', gh_data=data)
 93 |         if data.get('action') == 'created':
 94 |             issue = data.get('issue')
 95 |             comment = data.get('comment')
 96 |             if not issue or not comment:
 97 |                 logfire.error('Invalid GitHub issue comment: {gh_data}', gh_data=data)
 98 |                 return {'message': 'Invalid GitHub issue comment'}
 99 | 
100 |             if 'pull_request' in issue:  # Ignore pull requests comments
101 |                 logfire.error('Ignoring comment on GitHub pull request: {gh_data}', gh_data=data)
102 |                 return {'message': 'Ignoring comment on GitHub pull request'}
103 | 
104 |             # Comment has to be added to the issue text
105 |             project = data.get('repository', {}).get('name')
106 |             i_id, _, i_external_reference, _ = extract_data(issue)
107 |             async with db.acquire() as conn:
108 |                 saved_issue = await get_github_content(conn, project, 'issue', i_id)
109 |                 if not saved_issue:
110 |                     logfire.error(
111 |                         'GitHub issue not found: {external_reference}', external_reference=i_external_reference
112 |                     )
113 |                     return {'message': 'GitHub issue not found'}
114 | 
115 |                 _, c_text, _, _ = extract_data(comment)
116 |                 text = f'{saved_issue["text"]}\n\n{c_text}'
117 |                 embeddings = await generate_github_content_embedding(openai_client, text)
118 |                 await update_github_content(conn, project, 'issue', i_id, text, embeddings)
119 |             logfire.info('Updated GitHub issue: {external_reference}', external_reference=i_external_reference)
120 |         else:
121 |             logfire.debug('Action not supported: {gh_data}', gh_data=data)
122 |             return {'message': 'Action not supported'}
123 | 
124 |     return {'message': 'Webhook received successfully!'}
125 | 
126 | 
127 | def verify_slack_signature(request: Request, body: bytes, slack_signing_secret: str) -> bool:
128 |     """Verify Slack request signature for security"""
129 |     timestamp = request.headers.get('X-Slack-Request-Timestamp')
130 |     slack_signature = request.headers.get('X-Slack-Signature')
131 | 
132 |     if not timestamp or not slack_signature:
133 |         return False
134 | 
135 |     # Slack signature format: v0=HMAC_SHA256(secret, "v0:{timestamp}:{body}")
136 |     basestring = f'v0:{timestamp}:{body.decode("utf-8")}'
137 |     calculated_signature = (
138 |         'v0=' + hmac.new(slack_signing_secret.encode(), basestring.encode(), hashlib.sha256).hexdigest()
139 |     )
140 | 
141 |     return hmac.compare_digest(calculated_signature, slack_signature)
142 | 
143 | 
144 | @router.post('/slack/events')
145 | async def slack_events(request: Request, db: Database, openai_client: AsyncOpenAIClientDep):
146 |     """Receive Slack messages via webhook"""
147 |     body = await request.body()
148 |     if not verify_slack_signature(request, body, settings.slack_signing_secret.get_secret_value()):
149 |         raise HTTPException(status_code=403, detail='Invalid signature')
150 | 
151 |     data = json.loads(body)
152 |     if data.get('type') == 'url_verification':
153 |         # Slack sends a challenge code for verification
154 |         return {'challenge': data['challenge']}
155 | 
156 |     if data.get('type') == 'event_callback':
157 |         event = data.get('event', {})
158 | 
159 |         logfire.info('Received Slack event: {event}', event=event)
160 | 
161 |         # Only process messages from allowed channels
162 |         if (channel := event.get('channel')) not in settings.slack_channel:
163 |             logfire.info('Invalid Slack channel: {channel}', channel=channel)
164 |             return {'message': 'Invalid Slack channel'}
165 | 
166 |         if event.get('type') == 'message' and event.get('subtype') is None:
167 |             author = event.get('user')
168 |             text = event.get('text')
169 |             message_id = event.get('client_msg_id')
170 |             ts = datetime.fromtimestamp(float(event.get('ts')), tz=UTC)
171 |             event_ts = event.get('event_ts')
172 |             parent_event_ts = event.get('thread_ts')
173 |             if not author or not text or not message_id or not event_ts:
174 |                 logfire.error('Invalid Slack message: {event}', event=event)
175 |                 return {'message': 'Invalid Slack message'}
176 | 
177 |             embedding = await generate_embedding(openai_client, text)
178 | 
179 |             async with db.acquire_trans() as conn:
180 |                 await create_slack_message(
181 |                     conn,
182 |                     channel=channel,
183 |                     author=author,
184 |                     message_id=message_id,
185 |                     event_ts=event_ts,
186 |                     parent_event_ts=parent_event_ts,
187 |                     text=text,
188 |                     ts=ts,
189 |                     embedding=embedding,
190 |                 )
191 | 
192 |             logfire.info('Saved Slack message: {message_id}', message_id=message_id)
193 | 
194 |     return {'message': 'Event received'}
195 | 


--------------------------------------------------------------------------------
/src/worker/__init__.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging.config
  3 | import time
  4 | 
  5 | import asyncpg
  6 | import logfire
  7 | from arq import cron
  8 | from arq.connections import RedisSettings
  9 | from arq.worker import run_worker
 10 | from httpx import AsyncClient
 11 | from openai import AsyncOpenAI
 12 | from pydantic_ai import Agent
 13 | 
 14 | from .docs_embeddings import update_docs_embeddings
 15 | from .github_similar_content import similar_issue_agent, suggest_similar_issues
 16 | from .settings import settings
 17 | 
 18 | 
 19 | async def startup(ctx):
 20 |     openai_client = AsyncOpenAI()
 21 | 
 22 |     ai_agent = Agent(
 23 |         'openai:gpt-4o',
 24 |         result_type=str,
 25 |         system_prompt='Be concise, reply with maximum 50 tokens.',
 26 |     )
 27 | 
 28 |     client = AsyncClient()
 29 | 
 30 |     ctx.update(
 31 |         client=client,
 32 |         pg_pool=await asyncpg.create_pool(settings.pg_dsn),
 33 |         openai_client=openai_client,
 34 |         ai_agent=ai_agent,
 35 |         similar_issue_agent=similar_issue_agent,
 36 |     )
 37 | 
 38 | 
 39 | async def shutdown(ctx):
 40 |     await ctx['client'].aclose()
 41 |     await ctx['openai_client'].close()
 42 |     await asyncio.wait_for(ctx['pg_pool'].close(), timeout=2.0)
 43 | 
 44 | 
 45 | async def pydantic_doc_embeddings(ctx) -> None:
 46 |     """Update the embeddings for the pydantic documentation."""
 47 |     with logfire.span('update pydantic ai docs embeddings'):
 48 |         await update_docs_embeddings(
 49 |             ctx['client'],
 50 |             ctx['pg_pool'],
 51 |             ctx['openai_client'],
 52 |             'https://docs.pydantic.dev/dev/llms.txt',
 53 |             'pydantic_docs',
 54 |         )
 55 | 
 56 | 
 57 | async def pydantic_ai_doc_embeddings(ctx) -> None:
 58 |     """Update the embeddings for the pydantic ai documentation."""
 59 |     with logfire.span('update pydantic ai docs embeddings'):
 60 |         await update_docs_embeddings(
 61 |             ctx['client'], ctx['pg_pool'], ctx['openai_client'], 'https://ai.pydantic.dev/llms.txt', 'pydantic_ai_docs'
 62 |         )
 63 | 
 64 | 
 65 | async def logfire_doc_embeddings(ctx) -> None:
 66 |     """Update the embeddings for the logfire documentation."""
 67 |     with logfire.span('update logfire docs embeddings'):
 68 |         await update_docs_embeddings(
 69 |             ctx['client'],
 70 |             ctx['pg_pool'],
 71 |             ctx['openai_client'],
 72 |             'https://logfire.pydantic.dev/docs/llms.txt',
 73 |             'logfire_docs',
 74 |         )
 75 | 
 76 | 
 77 | QUESTIONS = [
 78 |     'What is Pydantic?',
 79 |     'What is PydanticAI?',
 80 |     'What is Pydantic Logfire?',
 81 |     'What are the main features of PydanticAI?',
 82 |     'What are the main features of Pydantic Logfire?',
 83 |     'Where is the documentation for the Pydantic Logfire schema?',
 84 |     'What database does Pydantic Logfire use?',
 85 |     'Where can I find the Pydantic public slack contact details?',
 86 |     "What's the url for the Pydantic Logfire docs?",
 87 |     'How do I invite my team members to Logfire?',
 88 | ]
 89 | 
 90 | 
 91 | async def llm_query(ctx) -> None:
 92 |     """Query the LLM model with some questions."""
 93 |     with logfire.span('query llm'):
 94 |         question_index = int(time.time() // (5 * 60)) % len(QUESTIONS)  # Divide time into 5-minute intervals
 95 |         question = QUESTIONS[question_index]
 96 |         response = await ctx['ai_agent'].run(question)
 97 |         logfire.info('Question: {question} Answer: {response}', question=question, response=response.data)
 98 | 
 99 | 
100 | async def check_new_created_issues(ctx) -> None:
101 |     """Suggest similar issues for new issues and post them as comments."""
102 |     with logfire.span('check new issues for similarity'):
103 |         await suggest_similar_issues(
104 |             ctx['pg_pool'],
105 |             ctx['similar_issue_agent'],
106 |             ctx['client'],
107 |             settings.vector_distance_threshold,
108 |             settings.ai_similarity_threshold,
109 |         )
110 | 
111 | 
112 | class WorkerSettings:
113 |     functions = [
114 |         pydantic_doc_embeddings,
115 |         pydantic_ai_doc_embeddings,
116 |         logfire_doc_embeddings,
117 |         llm_query,
118 |         check_new_created_issues,
119 |     ]
120 |     on_startup = startup
121 |     on_shutdown = shutdown
122 |     redis_settings = RedisSettings.from_dsn(settings.redis_dsn)
123 |     cron_jobs = [
124 |         cron(pydantic_ai_doc_embeddings, hour={10, 22}, minute=0),
125 |         cron(logfire_doc_embeddings, hour={1, 13}, minute=0),
126 |         cron(pydantic_doc_embeddings, hour={2, 14}, minute=0),
127 |         cron(llm_query, minute={0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55}),
128 |         cron(check_new_created_issues, minute={0, 10, 20, 30, 40, 50}),
129 |     ]
130 | 
131 | 
132 | def run():
133 |     logging.config.dictConfig(
134 |         {
135 |             'version': 1,
136 |             'disable_existing_loggers': False,
137 |             'handlers': {
138 |                 'logfire': {'level': 'INFO', 'class': 'logfire.integrations.logging.LogfireLoggingHandler'},
139 |             },
140 |             'loggers': {'arq': {'handlers': ['logfire'], 'level': 'INFO'}},
141 |         }
142 |     )
143 | 
144 |     run_worker(WorkerSettings)  # type: ignore
145 | 


--------------------------------------------------------------------------------
/src/worker/docs_embeddings.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | import asyncpg
  4 | import logfire
  5 | from httpx import AsyncClient
  6 | from openai import AsyncOpenAI
  7 | 
  8 | from ..common.embeddings import (
  9 |     TOKEN_LIMIT,
 10 |     EmbeddingsSource,
 11 |     count_tokens,
 12 |     create_embeddings,
 13 |     delete_embeddings_by_hash,
 14 |     generate_embedding,
 15 |     get_stored_embeddings_hash_by_source,
 16 |     hash_text,
 17 | )
 18 | 
 19 | 
 20 | async def get_content(client: AsyncClient, url: str) -> str:
 21 |     with logfire.span('Reading from {url=}', url=url):
 22 |         r = await client.get(url)
 23 |         r.raise_for_status()
 24 |         return r.content.decode()
 25 | 
 26 | 
 27 | def split_large_text(text: str, max_tokens: int = TOKEN_LIMIT) -> list[str]:
 28 |     """Splits text into smaller chunks by paragraph if it exceeds max_tokens."""
 29 |     paragraphs = text.split('\n\n')  # Split by double newlines (paragraphs)
 30 |     chunks = []
 31 |     current_chunk = []
 32 | 
 33 |     for paragraph in paragraphs:
 34 |         current_chunk.append(paragraph)
 35 |         chunk_text = '\n\n'.join(current_chunk)
 36 | 
 37 |         if count_tokens(chunk_text) > max_tokens:
 38 |             # Remove the last added paragraph and store the chunk
 39 |             current_chunk.pop()
 40 |             chunks.append('\n\n'.join(current_chunk))
 41 |             current_chunk = [paragraph]  # Start new chunk
 42 | 
 43 |     # Add remaining content
 44 |     if current_chunk:
 45 |         chunks.append('\n\n'.join(current_chunk))
 46 | 
 47 |     return chunks
 48 | 
 49 | 
 50 | def split_markdown_sections(content: str) -> list[dict[str, str]]:
 51 |     """Splits a Markdown file into sections based on headers, ensuring each section is <= 8192 tokens."""
 52 |     pattern = r'^(#{1,6})\s+(.*)$'
 53 |     matches = re.finditer(pattern, content, re.MULTILINE)
 54 | 
 55 |     sections = []
 56 |     last_index = 0
 57 | 
 58 |     for match in matches:
 59 |         header, title = match.groups()
 60 |         start = match.start()
 61 | 
 62 |         if sections:
 63 |             section_content = content[last_index:start].strip()
 64 |             # Split if content exceeds token limit
 65 |             if count_tokens(section_content) > TOKEN_LIMIT:
 66 |                 section_chunks = split_large_text(section_content, TOKEN_LIMIT)
 67 |                 for chunk in section_chunks:
 68 |                     sections.append(
 69 |                         {
 70 |                             'level': sections[-1]['level'],
 71 |                             'title': sections[-1]['title'],
 72 |                             'content': chunk,
 73 |                         }
 74 |                     )
 75 |             else:
 76 |                 sections[-1]['content'] = section_content
 77 | 
 78 |         sections.append({'level': len(header), 'title': title.strip(), 'content': ''})
 79 |         last_index = start
 80 | 
 81 |     # Process the last section
 82 |     if sections:
 83 |         last_content = content[last_index:].strip()
 84 |         if count_tokens(last_content) > TOKEN_LIMIT:
 85 |             section_chunks = split_large_text(last_content, TOKEN_LIMIT)
 86 |             for chunk in section_chunks:
 87 |                 sections.append(
 88 |                     {
 89 |                         'level': sections[-1]['level'],
 90 |                         'title': sections[-1]['title'],
 91 |                         'content': chunk,
 92 |                     }
 93 |                 )
 94 |         else:
 95 |             sections[-1]['content'] = last_content
 96 | 
 97 |     return sections
 98 | 
 99 | 
100 | async def update_docs_embeddings(
101 |     client: AsyncClient, pg_pool: asyncpg.Pool, openai_client: AsyncOpenAI, url: str, source: EmbeddingsSource
102 | ) -> None:
103 |     content = await get_content(client, url)
104 |     sections = split_markdown_sections(content)
105 | 
106 |     async with pg_pool.acquire() as conn:
107 |         hashes: set[str] = set()
108 |         stored_hashes = await get_stored_embeddings_hash_by_source(conn, source)
109 | 
110 |         for section in sections:
111 |             try:
112 |                 section_content = f'{section["title"]} {section["content"]}'
113 |                 text_hash = hash_text(section_content)
114 |                 hashes.add(text_hash)
115 |                 if text_hash in stored_hashes:
116 |                     logfire.info('Skipping {text_hash=}', text_hash=text_hash)
117 |                     continue
118 |                 embeddings = await generate_embedding(openai_client, section_content)
119 |                 await create_embeddings(
120 |                     conn,
121 |                     source=source,
122 |                     text=section_content,
123 |                     text_hash=text_hash,
124 |                     embedding=embeddings,
125 |                 )
126 |             except Exception as exc:
127 |                 logfire.error('Failed to update docs embeddings {exc!r}', exc=exc)
128 | 
129 |         # Remove old embeddings that are not in the new content
130 |         hashes_to_delete = stored_hashes - hashes
131 |         if hashes_to_delete:
132 |             await delete_embeddings_by_hash(conn, hashes_to_delete, source)
133 | 


--------------------------------------------------------------------------------
/src/worker/github_similar_content.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from typing import Any
  3 | 
  4 | import asyncpg
  5 | import jwt
  6 | import logfire
  7 | from httpx import AsyncClient
  8 | from pydantic import BaseModel, Field
  9 | from pydantic_ai import Agent
 10 | from pydantic_ai.models import ModelSettings
 11 | 
 12 | from ..common.db.github import (
 13 |     GithubContentProject,
 14 |     fetch_issues_for_similarity_check,
 15 |     find_similar_issues,
 16 |     update_similar_issues,
 17 | )
 18 | from .settings import settings
 19 | 
 20 | 
 21 | class SimilarityResult(BaseModel):
 22 |     percentage: int = Field(description='Similarity of the issues', ge=0, le=100)
 23 |     reason: str = Field(description='Reason for the similarity')
 24 | 
 25 | 
 26 | similar_issue_agent = Agent(
 27 |     'openai:gpt-4o',
 28 |     result_type=SimilarityResult,
 29 |     model_settings=ModelSettings(temprature=0.1),
 30 |     system_prompt=(
 31 |         """
 32 | Your task is to provide a detailed similarity analysis while maintaining strict output format requirements.
 33 | 
 34 | ANALYSIS CRITERIA:
 35 | 1. Semantic Similarity (40% weight)
 36 |    - Core problem or feature request
 37 |    - Technical domain and scope
 38 |    - Expected behavior and outcomes
 39 | 
 40 | 2. Implementation Details (30% weight)
 41 |    - Technical approach suggested
 42 |    - Dependencies mentioned
 43 |    - Code snippets or examples
 44 | 
 45 | 3. Context & Requirements (30% weight)
 46 |    - Project context and constraints
 47 |    - User impact and priorities
 48 |    - Environment and version details
 49 | 
 50 | SIMILARITY SCALE:
 51 | 0-20%: Fundamentally different issues
 52 | 21-40%: Slight overlaps but largely distinct
 53 | 41-60%: Moderate similarity in some aspects
 54 | 61-80%: Significant overlap in core aspects
 55 | 81-100%: Nearly identical issues
 56 | 
 57 | RULES:
 58 | - Ignore superficial similarities (writing style, formatting)
 59 | - Consider partial matches in technical requirements
 60 | - Account for implicit similarities in problem domain
 61 | - Look for shared root causes in bug reports
 62 | - Consider related feature requests as partial matches
 63 | 
 64 | OUTPUT FORMAT:
 65 | 1. Provide a single integer similarity score (0-100)
 66 | 2. The score must be divisible by 5 (e.g., 75 not 77)
 67 | 3. No explanation unless explicitly requested
 68 | 
 69 | EXAMPLE PAIRS AND SCORES:
 70 | 
 71 | # High Similarity (80-100%)
 72 | Issue 1: "Error: Connection timeout when processing large files >500MB"
 73 | Issue 2: "Timeout occurred during batch processing of files >1GB"
 74 | Score: 85
 75 | Reason: Nearly identical core issue (timeout during large file processing), same technical domain, similar scope
 76 | 
 77 | Issue 1: "Add dark mode support to dashboard UI"
 78 | Issue 2: "Implement dark theme for main dashboard"
 79 | Score: 90
 80 | Reason: Same feature request, same component, identical scope
 81 | 
 82 | # Moderate Similarity (40-79%)
 83 | Issue 1: "Redis connection fails with timeout after 30 seconds"
 84 | Issue 2: "MongoDB connection timeout in high-load scenarios"
 85 | Score: 60
 86 | Reason: Similar problem (database timeout) but different databases and contexts
 87 | 
 88 | Issue 1: "Add user authentication via Google OAuth"
 89 | Issue 2: "Implement SSO support for Google accounts"
 90 | Score: 75
 91 | Reason: Related authentication features with overlapping implementation
 92 | 
 93 | # Low Similarity (0-39%)
 94 | Issue 1: "Browser crashes when uploading large files"
 95 | Issue 2: "Timeout during large file upload"
 96 | Score: 35
 97 | Reason: Different core issues (crash vs timeout) despite similar trigger
 98 | 
 99 | Issue 1: "Add PDF export functionality"
100 | Issue 2: "Fix PDF rendering bug in preview"
101 | Score: 25
102 | Reason: Same component (PDF) but different types of issues (feature vs bug)
103 | 
104 | # Zero Similarity
105 | Issue 1: "Update documentation for API endpoints"
106 | Issue 2: "Fix memory leak in image processing"
107 | Score: 0
108 | Reason: Completely different domains, types, and purposes
109 | """
110 |     ),
111 | )
112 | 
113 | 
114 | def _generate_query(issue_1_text: str, issue_2_text: str) -> str:
115 |     return f"""
116 |     Are these two GitHub issues similar?
117 |     **Issue 1:**
118 |     "{issue_1_text}"
119 | 
120 |     **Issue 2:**
121 |     "{issue_2_text}"
122 |     """
123 | 
124 | 
125 | async def _generate_github_app_access_token(
126 |     client: AsyncClient, app_id: int, installation_id: int, private_key: str
127 | ) -> str:
128 |     """Generate a GitHub App access token."""
129 |     # Generate a GitHub App JWT
130 |     now = int(time.time())
131 |     payload = {'iat': now, 'exp': now + 600, 'iss': app_id}
132 |     jwt_token = jwt.encode(payload, private_key, algorithm='RS256')
133 | 
134 |     # Get Installation Access Token
135 |     url = f'https://api.github.com/app/installations/{installation_id}/access_tokens'
136 |     headers = {'Authorization': f'Bearer {jwt_token}', 'Accept': 'application/vnd.github.v3+json'}
137 |     response = await client.post(url, headers=headers)
138 |     return response.json().get('token')
139 | 
140 | 
141 | async def _post_github_comment(
142 |     client: AsyncClient,
143 |     access_token: str,
144 |     project: GithubContentProject,
145 |     issue_link: str,
146 |     similar_issues: list[dict[str, Any]],
147 | ) -> None:
148 |     # Find the issue number from the issue link
149 |     issue_number = issue_link.split('/')[-1]
150 |     url = f'https://api.github.com/repos/pydantic/{project}/issues/{issue_number}/comments'
151 | 
152 |     # Generate the comment body
153 |     issue_links = '\n'.join(
154 |         [
155 |             f'{i + 1}. "{similar_issue["link"]}" ({similar_issue["ai_similarity"]}% similar)'
156 |             for i, similar_issue in enumerate(similar_issues)
157 |         ]
158 |     )
159 |     body = f'PydanticAI Github Bot Found {len(similar_issues)} issues similar to this one: \n{issue_links}'
160 | 
161 |     response = await client.post(
162 |         url,
163 |         json={'body': body},
164 |         headers={'Authorization': f'Bearer {access_token}', 'Accept': 'application/vnd.github.v3+json'},
165 |     )
166 |     response.raise_for_status()
167 | 
168 | 
169 | async def suggest_similar_issues(
170 |     pg_pool: asyncpg.Pool,
171 |     similar_issue_agent: Agent,
172 |     client: AsyncClient,
173 |     vector_distance_threshold: float,
174 |     ai_similarity_threshold: int,
175 | ) -> None:
176 |     github_access_token = None
177 | 
178 |     async with pg_pool.acquire() as conn:
179 |         # Fetch new issues for similarity check
180 |         issues = await fetch_issues_for_similarity_check(conn)
181 |         if not issues:
182 |             logfire.info('No new issues found')
183 |             return
184 |         logfire.info(f'Found {len(issues)} new issues')
185 | 
186 |         for issue in issues:
187 |             issue_link = issue['external_reference']
188 |             with logfire.span(f'Checking issue {issue_link}'):
189 |                 # Fetch similar issues by vector similarity
190 |                 similar_issues = await find_similar_issues(conn, issue['id'], issue['project'])
191 |                 logfire.info(f'Found {len(similar_issues)} similar issues for issue {issue_link}')
192 | 
193 |                 similar_issues_obj: list[dict[str, Any]] = []
194 |                 for similar_issue in similar_issues:
195 |                     similar_issue_link = similar_issue['external_reference']
196 |                     distance = similar_issue['distance']
197 |                     obj = {
198 |                         'link': similar_issue_link,
199 |                         'distance': distance,
200 |                         'ai_similarity': None,
201 |                         'post_comment': False,
202 |                     }
203 |                     # Skip similar issues with distance > vector_distance_threshold
204 |                     # It could be done in database level, but we did it here to see some
205 |                     # similar issues in logs. This help us to adjust the threshold
206 |                     if distance <= vector_distance_threshold:
207 |                         # Get similarity percentage from the AI agent
208 |                         logfire.info(
209 |                             f'Checking similarity between issue {issue_link} and similar issue {similar_issue_link}'
210 |                         )
211 |                         similarity_result = await similar_issue_agent.run(
212 |                             _generate_query(issue['text'], similar_issue['text'])
213 |                         )
214 |                         obj['ai_similarity'] = similarity_result.data.percentage
215 |                         if similarity_result.data.percentage > ai_similarity_threshold:
216 |                             obj['post_comment'] = True
217 |                     else:
218 |                         logfire.info(f'Skipping similar issue {similar_issue_link} due to distance {distance}')
219 | 
220 |                     similar_issues_obj.append(obj)
221 | 
222 |                 # Filter similar issues to post comments
223 |                 issues_to_comment = [issue for issue in similar_issues_obj if issue['post_comment']]
224 |                 if not issues_to_comment:
225 |                     logfire.info(f'No similar issues found for {issue_link}')
226 |                 else:
227 |                     # Github access token is valid for 10 minutes. We need to generate a new one
228 |                     # if we don't have it. As the task runs every 10 minutes, we need to generate
229 |                     # a new token every time the task runs.
230 |                     if not github_access_token:
231 |                         github_access_token = await _generate_github_app_access_token(
232 |                             client,
233 |                             settings.github_app_id,
234 |                             settings.github_app_installation_id,
235 |                             settings.github_app_private_key,
236 |                         )
237 |                     await _post_github_comment(
238 |                         client, github_access_token, issue['project'], issue_link, issues_to_comment
239 |                     )
240 |                     logfire.info(f'Posted similar issues for {issue_link}')
241 | 
242 |                 # Update the similar issues in the database
243 |                 await update_similar_issues(conn, issue['id'], similar_issues_obj)
244 | 


--------------------------------------------------------------------------------
/src/worker/settings.py:
--------------------------------------------------------------------------------
 1 | from pydantic import Field
 2 | 
 3 | from ..common import GeneralSettings
 4 | 
 5 | 
 6 | class Settings(GeneralSettings):
 7 |     github_app_id: int
 8 |     github_app_installation_id: int
 9 |     github_app_private_key: str
10 |     vector_distance_threshold: float = Field(0.4, ge=0.0, le=1.0)
11 |     ai_similarity_threshold: int = Field(85, ge=0, le=100)
12 | 
13 | 
14 | settings = Settings()  # type: ignore
15 | 


--------------------------------------------------------------------------------