├── .github └── workflows │ ├── ci.yml │ └── issue-translator.yml ├── .gitignore ├── .golangci.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── README-en.md ├── README.md ├── docker-compose.yml ├── docs ├── cherry-studio-mcp.md ├── config-zh.md ├── config.md ├── crawl-zh.md ├── images │ ├── add-rss.png │ ├── arch.png │ ├── chat-with-feeds.png │ ├── cherry-studio-mcp-prompt.png │ ├── cherry-studio-mcp.png │ ├── crad.png │ ├── daily-brief.png │ ├── feed-list-with-web.png │ ├── folo-html.png │ ├── migrate-from-follow-1.png │ ├── migrate-from-follow-2.png │ ├── migrate-from-follow-3.png │ ├── migrate-from-follow-4.png │ ├── migrate-from-follow-5.png │ ├── monitoring.png │ ├── notification-with-web.png │ ├── sponsor.png │ ├── update-config-with-web.png │ ├── upgrade-from-v0.1.0-backup.png │ ├── web-add-source.png │ ├── web-reading-aggr.png │ └── wechat.png ├── mcp-client-prompt.md ├── migrate-from-follow.md ├── model-selection-zh.md ├── preview.md ├── query-api-zh.md ├── roadmap-zh.md ├── rss-api-zh.md ├── tech │ ├── hld-zh.md │ ├── rewrite-zh.md │ ├── testing-zh.md │ └── vector-zh.md ├── upgrade-from-v0.1.0.md ├── upgrade.md └── webhook-zh.md ├── go.mod ├── go.sum ├── main.go └── pkg ├── api ├── api.go ├── http │ └── http.go ├── mcp │ └── mcp.go └── rss │ └── rss.go ├── component └── component.go ├── config └── config.go ├── llm ├── embedding_spliter.go ├── embedding_spliter_test.go ├── llm.go ├── openai.go └── prompt │ └── prompt.go ├── model └── model.go ├── notify ├── channel │ ├── channel.go │ ├── email.go │ └── webhook.go ├── notify.go └── route │ ├── route.go │ └── route_test.go ├── rewrite ├── rewrite.go └── rewrite_test.go ├── schedule ├── rule │ ├── periodic.go │ ├── periodic_test.go │ ├── rule.go │ ├── watch.go │ └── watch_test.go └── schedule.go ├── scrape ├── manager.go ├── manager_test.go └── scraper │ ├── rss.go │ ├── rss_test.go │ ├── scraper.go │ ├── scraper_test.go │ └── source.go ├── storage ├── feed │ ├── block │ │ ├── block.go │ │ ├── block_test.go │ │ ├── chunk │ │ │ ├── chunk.go │ │ │ ├── chunk_benchmark_test.go │ │ │ ├── chunk_test.go │ │ │ └── encoding.go │ │ └── index │ │ │ ├── codec.go │ │ │ ├── inverted │ │ │ ├── inverted.go │ │ │ └── inverted_test.go │ │ │ ├── primary │ │ │ ├── primary.go │ │ │ └── primary_test.go │ │ │ └── vector │ │ │ ├── vector.go │ │ │ └── vector_test.go │ ├── feed.go │ └── feed_test.go └── kv │ └── kv.go ├── telemetry ├── log │ └── log.go ├── metric │ └── metric.go ├── model │ └── model.go ├── server │ └── server.go └── telemetry.go ├── test └── test.go └── util ├── binary ├── binary.go └── binary_test.go ├── buffer └── buffer.go ├── crawl └── crawl.go ├── hash └── hash.go ├── heap ├── heap.go └── heap_test.go ├── json_schema ├── json_schema.go └── json_schema_test.go ├── jsonrpc ├── jsonrpc.go └── jsonrpc_test.go ├── retry ├── retry.go └── retry_test.go ├── runtime ├── runtime.go └── runtime_test.go ├── text_convert ├── text_convert.go └── text_convert_test.go ├── time ├── time.go └── time_test.go └── vector ├── vector.go └── vector_test.go /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | release: 9 | types: [ published ] 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: Set up Go 17 | uses: actions/setup-go@v5 18 | with: 19 | go-version: '1.23.4' 20 | - name: Golangci Lint 21 | uses: golangci/golangci-lint-action@v7 22 | with: 23 | version: v2.0 24 | - name: Run tests 25 | run: make test 26 | 27 | build-and-push: 28 | runs-on: ubuntu-latest 29 | needs: test 30 | if: github.event_name == 'release' 31 | steps: 32 | - uses: actions/checkout@v4 33 | - name: Set up Docker Buildx 34 | uses: docker/setup-buildx-action@v3 35 | - name: Login to Docker Hub 36 | uses: docker/login-action@v3 37 | with: 38 | username: ${{ secrets.DOCKERHUB_USERNAME }} 39 | password: ${{ secrets.DOCKERHUB_TOKEN }} 40 | - name: Build and push Docker images 41 | run: make push -------------------------------------------------------------------------------- /.github/workflows/issue-translator.yml: -------------------------------------------------------------------------------- 1 | name: 'issue-translator' 2 | on: 3 | issue_comment: 4 | types: [created] 5 | issues: 6 | types: [opened] 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: usthe/issues-translate-action@v2.7 13 | with: 14 | IS_MODIFY_TITLE: true -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.dll 2 | *.so 3 | *.dylib 4 | *.test 5 | *.out 6 | coverage.html 7 | vendor/ 8 | go.work 9 | .idea/ 10 | .vscode/ 11 | *.swp 12 | *.swo 13 | .DS_Store 14 | .aider* 15 | .cursorrules 16 | *.log 17 | local_docs/ 18 | .env 19 | .env.local 20 | __debug_bin 21 | config.yaml 22 | data/ 23 | *debug* 24 | .cursorrules 25 | .cursor/ -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | run: 3 | timeout: 5m 4 | 5 | # https://golangci-lint.run/usage/linters. 6 | linters: 7 | settings: 8 | gocognit: 9 | min-complexity: 15 10 | cyclop: 11 | max-complexity: 10 12 | 13 | enable: 14 | - errcheck 15 | - govet 16 | - ineffassign 17 | - staticcheck 18 | - unused 19 | - bodyclose 20 | - copyloopvar 21 | - cyclop 22 | - gocognit 23 | - errorlint 24 | - funlen 25 | - gocognit 26 | - goheader 27 | - iface 28 | - importas 29 | - inamedparam 30 | - intrange 31 | - maintidx 32 | - nestif 33 | - nlreturn 34 | - noctx 35 | - paralleltest 36 | - perfsprint 37 | - prealloc 38 | - promlinter 39 | - reassign 40 | exclusions: 41 | rules: 42 | - path: pkg/rewrite/rewrite.go 43 | linters: 44 | - lll # For prompt. 45 | - path: pkg/config/config.go 46 | linters: 47 | - lll # For schema tag. 48 | - path: pkg/notify/channel/email.go 49 | linters: 50 | - lll # For HTML template. 51 | - path: main.go 52 | linters: 53 | - lll # For disclaimer. 54 | - cyclop 55 | paths: 56 | - ".*\\_test\\.go$" 57 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.23.4-alpine AS builder 2 | 3 | RUN apk add --no-cache git 4 | 5 | WORKDIR /app 6 | COPY . . 7 | 8 | ARG VERSION=dev 9 | RUN GOOS=linux go build -ldflags="-s -w -X main.version=${VERSION}" -o /app/zenfeed ./main.go 10 | 11 | FROM alpine:latest 12 | 13 | ARG VERSION=dev 14 | LABEL org.opencontainers.image.version=${VERSION} 15 | 16 | RUN apk add --no-cache ca-certificates tzdata && \ 17 | mkdir -p /app/data 18 | 19 | COPY --from=builder /app/zenfeed /app/ 20 | 21 | WORKDIR /app 22 | ENTRYPOINT ["/app/zenfeed"] 23 | CMD ["--config", "/app/config/config.yaml"] -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VERSION ?= $(shell git describe --tags --always) 2 | IMAGE_NAME ?= zenfeed 3 | REGISTRY ?= glidea 4 | FULL_IMAGE_NAME = $(REGISTRY)/$(IMAGE_NAME) 5 | 6 | 7 | .PHONY: test push dev-push 8 | 9 | test: 10 | go test -race -v -coverprofile=coverage.out -coverpkg=./... ./... 11 | 12 | push: 13 | docker buildx create --use --name multi-platform-builder || true 14 | docker buildx build --platform linux/amd64,linux/arm64 \ 15 | --build-arg VERSION=$(VERSION) \ 16 | -t $(FULL_IMAGE_NAME):$(VERSION) \ 17 | -t $(FULL_IMAGE_NAME):latest \ 18 | --push . 19 | 20 | dev-push: 21 | docker buildx create --use --name multi-platform-builder || true 22 | docker buildx build --platform linux/amd64,linux/arm64 \ 23 | --build-arg VERSION=$(VERSION) \ 24 | -t $(FULL_IMAGE_NAME):$(VERSION) \ 25 | --push . 26 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | zenfeed-web: 3 | image: glidea/zenfeed-web:latest 4 | ports: 5 | - "1400:1400" 6 | environment: 7 | - PUBLIC_DEFAULT_API_URL=http://zenfeed:1300 8 | depends_on: 9 | - zenfeed 10 | restart: unless-stopped 11 | 12 | zenfeed: 13 | image: glidea/zenfeed:latest 14 | entrypoint: > 15 | sh -c " 16 | if [ ! -f /app/config/config.yaml ]; then 17 | echo 'Config file not found in volume, initializing from init config...' 18 | cp /app/config.init.yaml /app/config/config.yaml; 19 | else 20 | echo 'Existing config file found in volume.' 21 | fi && 22 | echo 'Starting Zenfeed...' && 23 | exec /app/zenfeed --config /app/config/config.yaml 24 | " 25 | configs: 26 | - source: zenfeed_init_config 27 | target: /app/config.init.yaml 28 | volumes: 29 | - data:/app/data 30 | - config:/app/config 31 | ports: 32 | - "1300:1300" 33 | - "1301:1301" 34 | - "9090:9090" 35 | depends_on: 36 | - rsshub 37 | restart: unless-stopped 38 | 39 | rsshub: 40 | image: diygod/rsshub:2024-12-14 41 | ports: 42 | - "1200:1200" 43 | environment: 44 | - NODE_ENV=production 45 | restart: unless-stopped 46 | 47 | volumes: 48 | data: {} 49 | config: {} 50 | 51 | configs: 52 | zenfeed_init_config: # After installation, you must modify the configuration through zenfeed or config volume. 53 | content: | 54 | timezone: ${TZ:-Asia/Shanghai} 55 | llms: 56 | - name: general 57 | default: true 58 | provider: siliconflow 59 | model: Qwen/Qwen3-8B 60 | api_key: ${API_KEY:-your-api-key} 61 | - name: embed 62 | provider: siliconflow 63 | embedding_model: Pro/BAAI/bge-m3 64 | api_key: ${API_KEY:-your-api-key} 65 | scrape: 66 | rsshub_endpoint: http://rsshub:1200 67 | storage: 68 | feed: 69 | rewrites: 70 | - transform: 71 | to_text: 72 | prompt: | 73 | {{ .summary_html_snippet_for_small_model }} Respond in ${LANGUAGE:-Chinese} 74 | label: summary_html_snippet 75 | embedding_llm: embed 76 | notify: 77 | channels: 78 | email: 79 | feed_html_snippet_template: | 80 | {{ .summary_html_snippet }} 81 | -------------------------------------------------------------------------------- /docs/cherry-studio-mcp.md: -------------------------------------------------------------------------------- 1 | **Configure MCP Server** 2 | 3 | Default URL: `http://localhost:1301/sse` 4 | 5 | Cherry Studio MCP 6 | 7 | **Configure Prompt (Optional but recommended for optimal results)** 8 | 9 | For complete prompt, see [mcp-client-prompt.md](mcp-client-prompt.md) 10 | 11 | Cherry Studio MCP Prompt 12 | 13 | **Usage Examples** 14 | 15 | [Doc](preview.md) 16 | 17 | Very powerful - you can even directly modify zenfeed configuration settings -------------------------------------------------------------------------------- /docs/crawl-zh.md: -------------------------------------------------------------------------------- 1 | # 使用 Zenfeed 爬虫功能 2 | 3 | Zenfeed 提供了将网页内容抓取并转换为 Markdown 格式的功能。这主要通过重写规则 (`rewrites` rule) 中的 `transform.to_text.type` 配置项实现。 4 | 5 | ## 如何使用 6 | 7 | 在你的配置文件中,找到 `storage.feed.rewrites` 部分。当你定义一条重写规则时,可以通过 `transform` 字段来启用爬虫功能。 8 | 9 | 具体配置如下: 10 | 11 | ```yaml 12 | storage: 13 | feed: 14 | rewrites: 15 | - if: ["source=xxx", ...] 16 | source_label: "link" # 指定包含 URL 的标签,例如 feed 中的 'link' 标签 17 | transform: 18 | to_text: 19 | type: "crawl" # 或 "crawl_by_jina" 20 | # llm: "your-llm-name" # crawl 类型不需要 llm 21 | # prompt: "your-prompt" # crawl 类型不需要 prompt 22 | # match: ".*" # 可选:对抓取到的 Markdown 内容进行匹配 23 | action: "create_or_update_label" # 对抓取到的内容执行的动作 24 | label: "crawled_content" # 将抓取到的 Markdown 存储到这个新标签 25 | # ... 其他配置 ... 26 | jina: # 如果使用 crawl_by_jina,并且需要更高的速率限制(匿名ip: 20 RPM),请配置 Jina API Token 27 | token: "YOUR_JINA_AI_TOKEN" # 从 https://jina.ai/api-dashboard/ 获取 28 | ``` 29 | 30 | ### 转换类型 (`transform.to_text.type`) 31 | 32 | 你有以下几种选择: 33 | 34 | 1. **`crawl`**: 35 | * Zenfeed 将使用内置的本地爬虫尝试抓取 `source_label` 中指定的 URL。 36 | * 它会尝试遵循目标网站的 `robots.txt` 协议。 37 | * 适用于静态网页或结构相对简单的网站。 38 | 39 | 2. **`crawl_by_jina`**: 40 | * Zenfeed 将通过 [Jina AI Reader API](https://jina.ai/reader/) 来抓取和处理 `source_label` 中指定的 URL。 41 | * Jina AI 可能能更好地处理动态内容和复杂网站结构。 42 | * 同样遵循目标网站的 `robots.txt` 协议。 43 | * **依赖 Jina AI 服务**: 44 | * 建议在配置文件的顶层添加 `jina.token` (如上示例) 来提供你的 Jina AI API Token,以获得更高的服务速率限制。 45 | * 如果未提供 Token,将以匿名用户身份请求,速率限制较低。 46 | * 请查阅 Jina AI 的服务条款和隐私政策。 47 | 48 | ### 关键配置说明 49 | 50 | * `source_label`: 此标签的值**必须是一个有效的 URL**。例如,如果你的 RSS Feed 中的 `link` 标签指向的是一篇包含完整文章的网页,你可以将 `source_label` 设置为 `link`。 51 | * `action`: 通常设置为 `create_or_update_label`,将抓取并转换后的 Markdown 内容存入一个新的标签中(由 `label` 字段指定)。 52 | * `label`: 指定存储抓取到的 Markdown 内容的新标签名称。 53 | 54 | ## 使用场景 55 | 56 | **全文内容提取**: 57 | 很多 RSS 源只提供文章摘要和原文链接。使用爬虫功能可以将原文完整内容抓取下来,转换为 Markdown 格式,方便后续的 AI 处理(如总结、打标签、分类等)或直接阅读。 58 | 59 | ## 免责声明 60 | 61 | **在使用 Zenfeed 的爬虫功能(包括 `crawl` 和 `crawl_by_jina` 类型)前,请仔细阅读并理解以下声明。您的使用行为即表示您已接受本声明的所有条款。** 62 | 63 | 1. **用户责任与授权**: 64 | * 您将对使用爬虫功能的所有行为承担全部责任。 65 | * 您必须确保拥有访问、抓取和处理所提供 URL 内容的合法权利。 66 | * 请严格遵守目标网站的 `robots.txt` 协议、服务条款 (ToS)、版权政策以及所有相关的法律法规。 67 | * 不得使用本功能处理、存储或分发任何非法、侵权、诽谤、淫秽或其他令人反感的内容。 68 | 69 | 2. **内容准确性与完整性**: 70 | * 网页抓取和 Markdown 转换过程的结果可能不准确、不完整或存在偏差。这可能受到目标网站结构、反爬虫机制、动态内容渲染、网络问题等多种因素的影响。 71 | * Zenfeed 项目作者和贡献者不对抓取内容的准确性、完整性、及时性或质量作任何保证。 72 | 73 | 3. **第三方服务依赖 (`crawl_by_jina`)**: 74 | * `crawl_by_jina` 功能依赖于 Jina AI 提供的第三方服务。 75 | * Jina AI 服务的可用性、性能、数据处理政策、服务条款以及可能的费用(超出免费额度后)均由 Jina AI 自行决定。 76 | * 项目作者和贡献者不对 Jina AI 服务的任何方面负责。请在使用前查阅 [Jina AI 的相关条款](https://jina.ai/terms/) 和 [隐私政策](https://jina.ai/privacy/)。 77 | 78 | 4. **无间接或后果性损害赔偿**: 79 | * 在任何情况下,无论基于何种法律理论,项目作者和贡献者均不对因使用或无法使用爬虫功能而导致的任何直接、间接、偶然、特殊、惩戒性或后果性损害负责,包括但不限于利润损失、数据丢失、商誉损失或业务中断。 80 | 81 | 5. **法律与合规风险**: 82 | * 未经授权抓取、复制、存储、处理或传播受版权保护的内容,或违反网站服务条款的行为,可能违反相关法律法规,并可能导致法律纠纷或处罚。 83 | * 用户需自行承担因使用爬虫功能而产生的所有法律风险和责任。 84 | 85 | 6. **"按原样"提供**: 86 | * 爬虫功能按"现状"和"可用"的基础提供,不附带任何形式的明示或默示担保。 87 | 88 | **强烈建议您在启用和配置爬虫功能前,仔细评估相关风险,并确保您的使用行为完全合法合规。对于任何因用户滥用或不当使用本软件(包括爬虫功能)而引起的法律纠纷、损失或损害,Zenfeed 项目作者和贡献者不承担任何责任。** 89 | -------------------------------------------------------------------------------- /docs/images/add-rss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/add-rss.png -------------------------------------------------------------------------------- /docs/images/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/arch.png -------------------------------------------------------------------------------- /docs/images/chat-with-feeds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/chat-with-feeds.png -------------------------------------------------------------------------------- /docs/images/cherry-studio-mcp-prompt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/cherry-studio-mcp-prompt.png -------------------------------------------------------------------------------- /docs/images/cherry-studio-mcp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/cherry-studio-mcp.png -------------------------------------------------------------------------------- /docs/images/crad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/crad.png -------------------------------------------------------------------------------- /docs/images/daily-brief.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/daily-brief.png -------------------------------------------------------------------------------- /docs/images/feed-list-with-web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/feed-list-with-web.png -------------------------------------------------------------------------------- /docs/images/folo-html.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/folo-html.png -------------------------------------------------------------------------------- /docs/images/migrate-from-follow-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/migrate-from-follow-1.png -------------------------------------------------------------------------------- /docs/images/migrate-from-follow-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/migrate-from-follow-2.png -------------------------------------------------------------------------------- /docs/images/migrate-from-follow-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/migrate-from-follow-3.png -------------------------------------------------------------------------------- /docs/images/migrate-from-follow-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/migrate-from-follow-4.png -------------------------------------------------------------------------------- /docs/images/migrate-from-follow-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/migrate-from-follow-5.png -------------------------------------------------------------------------------- /docs/images/monitoring.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/monitoring.png -------------------------------------------------------------------------------- /docs/images/notification-with-web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/notification-with-web.png -------------------------------------------------------------------------------- /docs/images/sponsor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/sponsor.png -------------------------------------------------------------------------------- /docs/images/update-config-with-web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/update-config-with-web.png -------------------------------------------------------------------------------- /docs/images/upgrade-from-v0.1.0-backup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/upgrade-from-v0.1.0-backup.png -------------------------------------------------------------------------------- /docs/images/web-add-source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/web-add-source.png -------------------------------------------------------------------------------- /docs/images/web-reading-aggr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/web-reading-aggr.png -------------------------------------------------------------------------------- /docs/images/wechat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glidea/zenfeed/0fc6d73b046e81d76854ef272db7b25bffc39159/docs/images/wechat.png -------------------------------------------------------------------------------- /docs/migrate-from-follow.md: -------------------------------------------------------------------------------- 1 | ## Export OPML File from Follow 2 | 3 | 4 | 5 | 6 | 7 | > Note: Make sure to fill in http://rsshub:1200 8 | 9 | ## Import to zenfeed-web 10 | 11 | 12 | -------------------------------------------------------------------------------- /docs/model-selection-zh.md: -------------------------------------------------------------------------------- 1 | 如果无需使用 HTML 总结,模型可以随便选择 2 | 3 | ## 背景 & 原则 4 | * Token 使用会很多,你可以想象每篇 RSS 都总结一遍会有多少消耗。所以优先选择免费模型,或者按次计费 5 | * HTML 生成对模型有较高要求。所以你现在知道了为什么自部署的默认总结效果比不上 https://zenfeed.xyz 6 | * 那为什么不支持 Markdown 呢?web 还没精力支持,你可以先用邮件日报替代 7 | * 总结都是后台任务,且支持有状态重试,对模型速率限制 & 稳定性没有要求 8 | * 所以 “1. 质量”,“2. 低价”,“3. 稳定”。首选 1,兼顾 2,无需 3 9 | 10 | ## 如果你对默认的模型效果不满意,首选推荐 11 | * **不缺钱 or “有路子”**:Gemini 2.5 Pro 12 | * **再便宜点的**:Gemini 2.5 Flash 13 | 14 | --- 15 | 16 | v0.4.0 优化之后,免费 qwen3 的效果应该已经可以满足大部分需求 -------------------------------------------------------------------------------- /docs/preview.md: -------------------------------------------------------------------------------- 1 | ## 信息监控 2 | ```yaml 3 | rules: 4 | - name: US Tariff Impact 5 | query: The various impacts and developments of recent US tariff policies, different perspectives, especially their impact on China 6 | ``` 7 | Monitoring 8 | 9 | ## 每日简报 10 | ```yaml 11 | rules: 12 | - name: Evening News 13 | every_day: "06:30~18:00" 14 | ``` 15 | Daily Brief 16 | 17 | ## Chat with feeds 18 | 19 | Chat with feeds 20 | 21 | ## 添加 RSS 订阅源 22 | > 如果你是 RSS 老司机,直接丢 RSS 地址,或者 OPML 文件给 AI 即可 23 | 24 | Add RSS 25 | 26 | ## 配合 zenfeed-web 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /docs/query-api-zh.md: -------------------------------------------------------------------------------- 1 | # Zenfeed Query API 使用教程 2 | 3 | Zenfeed Query API 允许用户通过多种条件检索存储的 Feed 数据。本教程将详细介绍如何使用此 API。 4 | 5 | ## 接口说明 6 | 7 | ### 请求 8 | 9 | * **方法**: `POST` 10 | * **URL**: `/query` 11 | * **Content-Type**: `application/json` 12 | 13 | ### 请求体 (JSON) 14 | 15 | ```json 16 | { 17 | "query": "string", 18 | "threshold": 0.55, 19 | "label_filters": ["string"], 20 | "summarize": false, 21 | "limit": 10, 22 | "start": "2006-01-02T15:04:05Z07:00", 23 | "end": "2006-01-02T15:04:05Z07:00" 24 | } 25 | ``` 26 | 27 | **字段说明:** 28 | 29 | * `query` (string, 可选): 30 | * 用于语义搜索的查询字符串。 31 | * 如果提供,必须至少包含 5 个字符。 32 | * 如果为空或未提供,则不进行语义搜索,仅根据其他条件(如标签、时间)过滤。 33 | * `threshold` (float32, 可选, 默认值: `0.55`): 34 | * 语义搜索的相关性阈值。 35 | * 取值范围: `[0, 1]`。 36 | * 仅当 `query` 字段非空时有效。 37 | * `label_filters` ([]string, 可选): 38 | * 一个字符串数组,用于根据 Feed 的标签进行过滤。 39 | * 每个过滤器的格式为: 40 | * `"key=value"`: 匹配标签 `key` 的值为 `value` 的 Feed。 41 | * `"key!=value"`: 匹配标签 `key` 的值不为 `value` 的 Feed。 42 | * 常用的 `key` 包括: 43 | * `source`: Feed 来源 44 | * `title`: Feed 标题 45 | * `你在 rewrite 阶段自定义创建的`:比如 category 46 | * 可以指定多个过滤器,它们之间是 "AND" 关系。 47 | * `summarize` (bool, 可选, 默认值: `false`): 48 | * 是否对查询结果进行摘要。 49 | * 如果为 `true`,系统将调用配置的 LLM (Large Language Model) 对返回的 Feed 内容进行总结。 50 | * `limit` (int, 可选, 默认值: `10`): 51 | * 返回 Feed 结果的最大数量。 52 | * 取值范围: `[1, 500]`。 53 | * `start` (string, 可选, 默认值: 24小时前): 54 | * 查询的时间范围的开始时间(包含)。 55 | * 格式为 RFC3339 (例如: `"2023-10-26T10:00:00Z"`)。 56 | * `end` (string, 可选, 默认值: 当前时间): 57 | * 查询的时间范围的结束时间(不包含)。 58 | * 格式为 RFC3339 (例如: `"2023-10-27T10:00:00Z"`)。 59 | * `end` 时间必须晚于 `start` 时间。 60 | 61 | ### 响应体 (JSON) 62 | 63 | ```json 64 | { 65 | "summary": "string", 66 | "feeds": [ 67 | { 68 | "labels": { 69 | "type": "rss", 70 | "source": "Example News", 71 | "title": "Breaking News: AI Revolutionizes Everything", 72 | "link": "http://example.com/news/123", 73 | "pub_time": "2023-10-26T09:30:00Z", 74 | "content": "Detailed content of the news article..." 75 | }, 76 | "time": "2023-10-26T10:15:30+08:00", 77 | "score": 0.85 78 | } 79 | ], 80 | "count": 1 81 | } 82 | ``` 83 | 84 | **字段说明:** 85 | 86 | * `summary` (string, 可选): 87 | * 如果请求中的 `summarize` 为 `true` 且成功生成摘要,此字段将包含 LLM 生成的内容摘要。 88 | * 如果生成摘要失败,可能包含错误信息。 89 | * `feeds` ([]object, 必须): 90 | * 一个对象数组,每个对象代表一个符合查询条件的 Feed。 91 | * **Feed 对象结构**: 92 | * `labels` (object): Feed 的元数据标签,键值对形式。 93 | * `type` (string): Feed 类型。 94 | * `source` (string): Feed 来源。 95 | * `title` (string): Feed 标题。 96 | * `link` (string): Feed 原始链接。 97 | * `pub_time` (string): Feed 发布时间。 98 | * `content` (string): Feed 内容。 99 | * ... (其他自定义标签) 100 | * `time` (string): Feed 被系统记录或处理的时间戳 (RFC3339 格式,通常为服务器本地时区)。 101 | * `score` (float32, 可选): 102 | * 当请求中提供了 `query` (进行了语义搜索) 时,此字段表示该 Feed 与查询的相关性得分。 103 | * 得分越高,相关性越强。 104 | * `count` (int, 必须): 105 | * 返回的 `feeds` 数组中的 Feed 数量。 106 | 107 | ## `curl` 示例 108 | 109 | 以下示例假设 Zenfeed 服务运行在 `http://localhost:1300`。 110 | 111 | ### 1. 基本查询 (获取最近10条记录) 112 | 113 | 获取最近(默认24小时内)的最多10条 Feed。 114 | 115 | ```bash 116 | curl -X POST http://localhost:1300/query \ 117 | -H "Content-Type: application/json" \ 118 | -d '{}' 119 | ``` 120 | 121 | ### 2. 语义搜索 122 | 123 | 查询与 "人工智能最新进展" 相关的 Feed,并设置相关性阈值为 `0.7`。 124 | 125 | ```bash 126 | curl -X POST http://localhost:1300/query \ 127 | -H "Content-Type: application/json" \ 128 | -d '{ 129 | "query": "人工智能最新进展", 130 | "threshold": 0.7 131 | }' 132 | ``` 133 | 134 | ### 3. 带标签过滤的查询 135 | 136 | 查询类型为 "rss" 且来源不是 "SpecificSource" 的 Feed。 137 | 138 | ```bash 139 | curl -X POST http://localhost:1300/query \ 140 | -H "Content-Type: application/json" \ 141 | -d '{ 142 | "label_filters": [ 143 | "type=rss", 144 | "source!=SpecificSource" 145 | ] 146 | }' 147 | ``` 148 | 149 | ### 4. 带时间范围的查询 150 | 151 | 查询 2023年10月25日 00:00:00 UTC 到 2023年10月26日 00:00:00 UTC 之间的 Feed。 152 | 153 | ```bash 154 | curl -X POST http://localhost:1300/query \ 155 | -H "Content-Type: application/json" \ 156 | -d '{ 157 | "start": "2023-10-25T00:00:00Z", 158 | "end": "2023-10-26T00:00:00Z" 159 | }' 160 | ``` 161 | 162 | ### 5. 组合查询示例 163 | 164 | 查询过去3天内,与 "开源项目" 相关的 Feed,类型为 "github_release",并获取摘要,最多返回20条。 165 | 166 | ```bash 167 | # 假设今天是 2023-10-28 168 | curl -X POST http://localhost:1300/query \ 169 | -H "Content-Type: application/json" \ 170 | -d '{ 171 | "query": "最近的热门开源项目", # 尽可能详细,获得最佳搜索效果 172 | "threshold": 0.6, 173 | "label_filters": ["source=github_trending"], 174 | "summarize": true, 175 | "limit": 20, 176 | "start": "2023-10-25T00:00:00Z", # 手动计算或动态生成 177 | "end": "2023-10-28T00:00:00Z" # 手动计算或动态生成 178 | }' 179 | ``` 180 | -------------------------------------------------------------------------------- /docs/roadmap-zh.md: -------------------------------------------------------------------------------- 1 | ## 短期 2 | * 播客 3 | * NotebookLM 的播客效果让人惊艳 4 | * 技术上复刻一个并不难,难的是没有又便宜效果又好的 TTS API(只用得起小帅的声音😭) 5 | * TTS 音色进步也只是近几年的事情,长期需要等成本下降 6 | * 短期因为我个人很喜欢播客总结(应该也很适合大家通勤),会先本地部署模型,提供给 https://zenfeed.xyz 使用 7 | 8 | * ebup2rss 9 | * 见过 rss2ebup,但你绝没见过反着来的 10 | * 严格上这并不属于 zenfeed,顶多算生态项目吧 11 | * 抛开时效性,书比新闻更有价值。但当你立下 “坚持阅读” 的 flag,然后呢? 12 | * 这个子项目旨在实现:每日更新一章,作为 rss 暴露。在阅读新闻 RSS 时,“顺便” 把书给看了 13 | * 这里遵循《掌控习惯》的几个原理 14 | * 让它显而易见:在你的新闻阅读器里 15 | * 让它简便易行:配合 zenfeed 总结,更轻松地阅读要点(进一步了解原文逃不掉,但这时你已经被勾住了,相信这事已经没那么困难了) 16 | * 让你感觉到爽:zenfeed 阅读完后的木鱼声,嗯这算一个,确信 17 | 18 | * 提供更多玩法指导 19 | * zenfeed 定位是信息管理引擎,普通用户反而搞不清楚状况 20 | * 短期并不会考虑做一个没有使用心智成本的 “产品”,但我可以分享一些垂直的使用案例技巧 21 | > 灵光一现:最近喜欢上和豆包聊新闻了,或许可以分享下如何把 zenfeed 数据接入豆包 22 | 23 | ## 中长期 24 | * 更易用的 Web(但坦诚地讲目前优先级比较低,更鼓励调用后端 api,构建一个属于你的 web 25 | * 主题研究报告 26 | * 屏蔽 or follow 相关新闻后续 27 | * 相关性聚合阅读 28 | ![](images/web-reading-aggr.png) 29 | > P.S. 相关功能已经实现,只不过没有下放到 Web 30 | 31 | --- 32 | 33 | 如果你觉得 zenfeed 很酷,并且有意愿贡献,请联系我! 34 | -------------------------------------------------------------------------------- /docs/rss-api-zh.md: -------------------------------------------------------------------------------- 1 | # 托管源 2 | 3 | ## Folo 4 | 5 | 直接搜索 zenfeed 6 | 7 | ## Other 8 | 9 | ```bash 10 | https://zenfeed.xyz/rss?.... 参数用法见下方《自部署》 11 | 12 | https://zenfeed.xyz/rss?label_filter=source=知乎热榜 # 你在 zenfeed.xyz 中看到的源名称 13 | 14 | https://zenfeed.xyz/rss?query=AI # 语义搜索。请不要滥用,成本 cover 不住可能随时下线 15 | ``` 16 | 17 | # 自部署 18 | 19 | ## 1. 配置(可选) 20 | 21 | ```yaml 22 | api: 23 | rss: 24 | content_html_template: | # 可自由排版搭配(go template 语法);需要确保渲染后的内容是正确的 HTML 25 | {{ .summary_html_snippet }} # 默认值 26 | ``` 27 | 28 | ## 2. enjoy RSS address! 29 | 30 | ```bash 31 | your_zenfeed_address/rss?label_filter=label1=value1&label_filter=label2!=value2&query=xxx 32 | 33 | # e.g. 34 | 35 | ## Past 24h rss feed for GithubTrending 36 | http://localhost:1302/rss?label_filter=source=GithubTrending 37 | 38 | ## Past 24h rss feed for Tech category 39 | http://localhost:1302/rss?label_filter=category=Tech 40 | 41 | ## Past 24h rss feed for dynamic query 42 | http://localhost:1302/rss?query=特朗普最新消息 43 | ``` 44 | 45 | # FAQ 46 | 47 | ## 添加失败怎么办? 48 | 49 | 部分 RSS 阅读器通过服务端间接访问 RSS 地址,如果 zenfeed 部署到本地,将无法访问 50 | 51 | 你需要通过内网穿透,或者 VPS 暴露到公网上,注意仅暴露 1302 端口 52 | 53 | ## Folo 看起来只有纯文本? 54 | 55 | ![](images/folo-html.png) 56 | 57 | ## 暗黑模式显示有问题? 58 | 59 | 嗯就是有问题,请使用白底背景,否则样式渲染会出现问题 60 | -------------------------------------------------------------------------------- /docs/tech/hld-zh.md: -------------------------------------------------------------------------------- 1 | > 适用版本:v0.2.2 2 | 3 | ```mermaid 4 | graph TD 5 | subgraph User_Interactions 6 | WebUI["Web UI (zenfeed-web)"] 7 | MCPClient["MCP Client"] 8 | end 9 | 10 | subgraph Zenfeed_Core_Services 11 | HTTPServer["HTTP Server (pkg/api/http)"] 12 | MCPServer["MCP Server (pkg/api/mcp)"] 13 | API["API Service (pkg/api)"] 14 | end 15 | 16 | subgraph Data_Processing_Storage_Main 17 | ScraperManager["Scraper Manager (pkg/scrape)"] 18 | Rewriter["Rewriter (pkg/rewrite)"] 19 | FeedStorage["Feed Storage (pkg/storage/feed)"] 20 | LLMFactory["LLM Factory (pkg/llm)"] 21 | KVStorage["KV Storage (pkg/storage/kv)"] 22 | end 23 | 24 | subgraph FeedStorage_Internals 25 | Block["Block (pkg/storage/feed/block)"] 26 | ChunkFile["ChunkFile (pkg/storage/feed/block/chunk)"] 27 | PrimaryIndex["Primary Index (pkg/storage/feed/block/index/primary)"] 28 | InvertedIndex["Inverted Index (pkg/storage/feed/block/index/inverted)"] 29 | VectorIndex["Vector Index (pkg/storage/feed/block/index/vector)"] 30 | end 31 | 32 | subgraph Scheduling_Notification 33 | Scheduler["Scheduler (pkg/schedule)"] 34 | Notifier["Notifier (pkg/notify)"] 35 | NotifyChan["(Go Channel for Results)"] 36 | EmailChannel["Email Channel (pkg/notify/channel)"] 37 | end 38 | 39 | ConfigManager["Config Manager (pkg/config)"] 40 | 41 | ExternalDataSources["External Data Sources (RSS Feeds, RSSHub)"] 42 | LLMProviders["LLM Providers (OpenAI, Gemini, etc.)"] 43 | EmailServiceProvider["Email Service Provider (SMTP)"] 44 | 45 | WebUI --> HTTPServer 46 | MCPClient --> MCPServer 47 | HTTPServer --> API 48 | MCPServer --> API 49 | 50 | API --> ConfigManager 51 | API --> FeedStorage 52 | API --> LLMFactory 53 | 54 | ScraperManager --> ExternalDataSources 55 | ScraperManager --> KVStorage 56 | ScraperManager --> FeedStorage 57 | 58 | FeedStorage --> Rewriter 59 | FeedStorage --> LLMFactory 60 | FeedStorage --> KVStorage 61 | FeedStorage --> Block 62 | 63 | Block --> ChunkFile 64 | Block --> PrimaryIndex 65 | Block --> InvertedIndex 66 | Block --> VectorIndex 67 | 68 | Rewriter --> LLMFactory 69 | 70 | Scheduler --> FeedStorage 71 | Scheduler --> NotifyChan 72 | Notifier --> NotifyChan 73 | Notifier --> LLMFactory 74 | Notifier --> EmailChannel 75 | Notifier --> KVStorage 76 | EmailChannel --> EmailServiceProvider 77 | 78 | ConfigManager --> HTTPServer 79 | ConfigManager --> MCPServer 80 | ConfigManager --> API 81 | ConfigManager --> ScraperManager 82 | ConfigManager --> Rewriter 83 | ConfigManager --> FeedStorage 84 | ConfigManager --> LLMFactory 85 | ConfigManager --> Scheduler 86 | ConfigManager --> Notifier 87 | 88 | LLMFactory --> LLMProviders 89 | LLMFactory --> KVStorage 90 | ``` 91 | 92 | ## 技术特点 93 | 94 | * 零外部依赖 95 | * Golang 资源占用少于采用 Python 的竞品 96 | * 采用模块化、面向服务的架构,各组件职责清晰 97 | * 系统配置集中管理,并支持热重载,实现动态调整 98 | * 提供灵活的内容重写管道,可自定义处理流程 99 | * Feed 数据按时间分块存储,支持高效索引与生命周期管理 100 | * 支持基于向量嵌入的语义搜索能力 101 | * 通过可配置的抓取器和 RSSHub 集成,支持多样化的数据源 102 | * 基于规则的调度引擎,实现灵活的事件监控与查询 103 | * 可定制的通知路由和多渠道通知发送机制 104 | * 实现 MCP (Model Context Protocol) 服务端,便于外部工具集成 105 | * 提供统一的 API 接口层,解耦核心业务与通信协议 106 | * 内置通用键值存储,用于缓存和持久化辅助状态 107 | 108 | ## 组件说明 109 | 110 | 1. **配置管理器 (ConfigManager - `pkg/config.Manager`)** 111 | * 负责加载、管理和热更新应用的整体配置 (通常存储在 `config.yaml` 中)。其他组件订阅配置变更,以便动态调整其行为。是系统动态性的基础。 112 | 113 | 2. **键值存储 (KVStorage - `pkg/storage/kv.Storage`)** 114 | * 提供一个通用的键值存储服务。用于存储临时状态、缓存(如 LLM 调用、RSSHub 响应)、小型元数据、以及一些组件的运行状态(如 Scraper 的最后抓取时间、Notifier 的通知发送记录)。 115 | 116 | 3. **大语言模型工厂 (LLMFactory - `pkg/llm.Factory`)** 117 | * 管理和提供大语言模型 (LLM) 的实例。它根据配置初始化不同的 LLM 客户端 (如 OpenAI, Gemini, SiliconFlow 等),并向上层组件 (如 `Rewriter`, `FeedStorage`, `Notifier`) 提供统一的 LLM 调用接口。这些接口用于文本生成、内容摘要、向量嵌入等 AI 处理任务。,可以动态切换或更新 LLM 配置。 118 | 119 | 4. **内容重写器 (Rewriter - `pkg/rewrite.Rewriter`)** 120 | * 根据用户在配置文件中定义的重写规则 (Rewrite Rules),对原始 Feed 内容进行管道式处理。每个规则可以针对 Feed 的特定标签 (如标题、正文),通过调用 `LLMFactory` 提供的模型执行操作 (如评分、分类、摘要、过滤、添加新标签等)。处理后的 Feed 用于存储或进一步的逻辑判断。 121 | 122 | 5. **Feed 存储 (FeedStorage - `pkg/storage/feed.Storage`)** 123 | * 负责持久化存储经过 `Rewriter` 处理后的 Feed 数据,并提供高效的查询接口。它管理着 Feed 数据的生命周期和存储结构。 124 | * **关键子组件**: 125 | * **Block (`pkg/storage/feed/block.Block`)**: `FeedStorage` 将数据按时间组织成多个 `Block`。每个 `Block` 代表一个时间段内的数据 (例如,过去 25 小时)。这种设计有助于数据的管理,如按时间归档、删除过期数据,并能独立处理冷热数据。 126 | * **ChunkFile (`pkg/storage/feed/block/chunk.File`)**: 在每个 `Block` 内部,实际的 Feed 内容(经过序列化,包含所有标签和时间戳)存储在 `ChunkFile` 中。这是一种紧凑的存储方式,支持高效的追加和按偏移读取。 127 | * **Primary Index (`pkg/storage/feed/block/index/primary.Index`)**: 为每个 `Block` 内的 Feed 提供主键索引。它将全局唯一的 Feed ID 映射到该 Feed 在对应 `ChunkFile` 中的具体位置(如偏移量),实现通过 ID 快速定位 Feed 数据。 128 | * **Inverted Index (`pkg/storage/feed/block/index/inverted.Index`)**: 为每个 `Block` 内的 Feed 标签建立倒排索引。它将标签的键值对映射到包含这些标签的 Feed ID 列表,从而能够根据标签条件快速过滤 Feed。 129 | * **Vector Index (`pkg/storage/feed/block/index/vector.Index`)**: 为每个 `Block` 内的 Feed(或其内容切片)存储由 `LLMFactory` 生成的向量嵌入。它支持高效的近似最近邻搜索,从而实现基于语义相似度的 Feed 查询。 130 | 131 | 6. **API 服务 (API - `pkg/api.API`)** 132 | * 提供核心的业务逻辑接口层,供上层服务 (如 `HTTPServer`, `MCPServer`) 调用,解耦核心业务逻辑与具体的通信协议。接口功能包括:应用配置的查询与动态应用、RSSHub 相关信息的查询、Feed 数据的写入与多维度查询等。此组件会响应配置变更,并将其传递给其依赖的下游组件。 133 | 134 | 7. **HTTP 服务 (HTTPServer - `pkg/api/http.Server`)** 135 | * 暴露一个 HTTP/JSON API 接口,主要供 Web 前端 (`zenfeed-web`) 或其他HTTP客户端使用。用户通过此接口进行如添加订阅源、配置监控规则、查看 Feed 列表、管理应用配置等操作。它依赖 `API` 组件来执行实际的业务逻辑。 136 | 137 | 8. **MCP 服务 (MCPServer - `pkg/api/mcp.Server`)** 138 | * 实现 Model Context Protocol (MCP) 服务端。这使得 Zenfeed 的数据可以作为上下文源被外部应用或 LLM 集成。 139 | 140 | 9. **抓取管理器 (ScraperManager - `pkg/scrape.Manager`)** 141 | * 负责管理和执行从各种外部数据源 (主要是 RSS Feed,支持通过 RSSHub 扩展源) 抓取内容的任务。它根据配置中定义的来源和抓取间隔,定期或按需从指定的 URL 或 RSSHub 路由抓取最新的 Feed 数据。抓取到的原始数据会提交给 `FeedStorage` 进行后续的重写处理和存储。 142 | * **关键子组件**: 143 | * **Scraper (`pkg/scrape/scraper.Scraper`)**: 每个配置的数据源会对应一个 `Scraper` 实例,负责该特定源的抓取逻辑和调度。 144 | * **Reader (`pkg/scrape/scraper/source.go#reader`)**: `Scraper` 内部使用不同类型的 `reader` (如针对标准 RSS URL 的 reader,针对 RSSHub 路径的 reader) 来实际获取数据。 145 | 146 | 10. **调度器 (Scheduler - `pkg/schedule.Scheduler`)** 147 | * 根据用户配置的调度规则 (Scheduls Rules) 定期执行查询任务。这些规则定义了特定的查询条件,如语义关键词 (基于向量搜索)、标签过滤、以及时间范围等。当 `FeedStorage` 中有符合规则条件的 Feed 数据时,调度器会将这些结果 (封装为 `rule.Result`) 通过一个内部 Go Channel (`notifyChan`) 发送给 `Notifier` 组件进行后续处理。 148 | * **关键子组件**: 149 | * **Rule (`pkg/schedule/rule.Rule`)**: 每个调度配置对应一个 `Rule` 实例,封装了该规则的查询逻辑和执行计划。 150 | 151 | 11. **通知器 (Notifier - `pkg/notify.Notifier`)** 152 | * 监听来自 `Scheduler` 的 `notifyChan`。接收到 `rule.Result` 后,它会根据通知路由 (NotifyRoute) 配置对 Feed 进行分组、聚合。为了生成更精炼的通知内容,它可能会再次调用 `LLMFactory` 进行摘要。最终,通过配置的通知渠道 (NotifyChannels) 将处理后的信息发送给指定的接收者 (NotifyReceivers)。其发送状态或去重逻辑可能利用 `KVStorage`。 153 | * **关键子组件**: 154 | * **Router (`pkg/notify/route.Router`)**: 根据配置的路由规则,将 `rule.Result` 中的 Feed 分配到不同的处理流程或目标接收者。 155 | * **Channel (`pkg/notify/channel.Channel`)**: 代表具体的通知发送方式,例如 `EmailChannel` 负责通过 SMTP 发送邮件。 156 | -------------------------------------------------------------------------------- /docs/tech/rewrite-zh.md: -------------------------------------------------------------------------------- 1 | > 适用版本:v0.2.2 2 | 3 | `rewrite` 组件是 zenfeed 中负责对信息流内容进行动态处理和转换的核心模块。它允许用户通过声明式的规则配置,利用大型语言模型 (LLM) 等工具,对内容的元数据(标签)进行修改、丰富、过滤,甚至决定是否丢弃某条信息。 4 | 5 | ## 1. 设计理念与哲学 6 | 7 | * **Prometheus 的 `relabel_config`**: 借鉴其强大的标签重写能力。在 Prometheus 中,`relabel_config` 允许用户在采集指标前后动态地修改标签集,实现服务发现、指标过滤和路由等高级功能。`rewrite` 组件将此思想应用于信息流处理,将每一条信息(如一篇文章、一个帖子)视为一个标签集,通过规则来操作这些标签。 8 | * **管道 (Pipeline) 处理模式**: 信息的处理过程被设计成一个可配置的 ETL 管道。每个规则是管道中的一个处理阶段,信息流经这些规则,逐步被转换和打标。这种模式使得复杂的处理逻辑可以被分解为一系列简单、独立的步骤,易于理解和维护。 9 | * **AI 能力的模块化与按需应用**: 大型语言模型 (LLM) 被视为一种强大的"转换函数"。用户可以根据需求,在规则中指定使用哪个 LLM、配合什么样的提示词 (Prompt) 来处理特定的文本内容(例如,从文章正文生成摘要、分类、评分等)。这种设计使得 AI 能力可以灵活地嵌入到信息处理的任意环节。 10 | * **内容即标签 (Content as Labels)**: 这是 zenfeed 的一个核心抽象。原始信息(如标题、正文、链接、发布时间)和经过 AI 或规则处理后产生的衍生信息(如类别、标签、评分、摘要)都被统一表示为键值对形式的"标签"。这种统一表示简化了后续的查询、过滤、路由和展示逻辑。 11 | * **声明式配置优于命令式代码**: 用户通过 YAML 配置文件定义重写规则,而不是编写代码来实现处理逻辑。这降低了使用门槛,使得非程序员也能方便地定制自己的信息处理流程,同时也使得配置更易于管理和版本控制。 12 | 13 | > 简单说这是一条专门针对 Feed 处理的可配置工作流 14 | 15 | ## 2. 业务流程 16 | 17 | 内容重写组件的核心工作流程是接收一个代表信息单元的标签集 (`model.Labels`),然后按顺序应用预定义的重写规则 (`Rule`),最终输出一个经过修改的标签集,或者指示该信息单元应被丢弃。 18 | 19 | 其处理流程可以概括为: 20 | 21 | 1. **接收标签集**: 组件的入口是一个 `model.Labels` 对象,代表待处理的信息单元。 22 | 2. **顺序应用规则**: 系统会遍历用户配置的每一条 `Rule`。 23 | 3. **规则评估与执行**: 对于每一条规则,系统会: 24 | * **定位源文本**: 根据规则指定的 `source_label` (默认为 `content`),找到相应的文本内容。 25 | * **条件检查**: 检查源文本是否满足规则中声明的 `skip_too_short_threshold`(最小长度,默认为300字符)。若不满足,则跳过当前规则。 26 | * **文本转换 (可选)**: 若规则声明了 `transform` (如通过 `to_text` 使用 LLM 和特定 `Prompt` 进行处理),则源文本会被转换为新文本。此转换结果将用于后续的匹配。 27 | * **模式匹配**: 使用规则中声明的 `match` 正则表达式 (默认为 `.*`) 来匹配(可能已被转换过的)文本。若不匹配,则跳过当前规则。 28 | * **执行动作**: 若文本匹配成功,则执行规则声明的 `Action`: 29 | * `ActionDropFeed`: 指示应丢弃当前信息单元,处理流程终止。 30 | * `ActionCreateOrUpdateLabel`: 使用(可能已被转换过的)匹配文本,为规则中指定的 `Label` 创建或更新标签值。 31 | 4. **输出结果**: 32 | * 若所有规则处理完毕且未触发 `ActionDropFeed`,则返回最终修改并排序后的 `model.Labels`。 33 | * 若任一规则触发 `ActionDropFeed`,则返回 `nil`,表示丢弃。 34 | * 处理过程中若发生错误(如 LLM 调用失败),则会中止并返回错误。 35 | 36 | 37 | ## 3. 使用示例 38 | 39 | 以下是一些如何使用 `rewrite` 规则的场景示例: 40 | 41 | ### 示例 1: 内容分类打标 42 | 43 | * **目标**: 根据文章内容,自动为其添加一个 `category` 标签,如 "Technology", "Finance" 等。 44 | * **规则配置 (概念性)**: 45 | ```yaml 46 | - source_label: "content" # 使用文章正文作为分析源 47 | transform: 48 | to_text: 49 | llm: "qwen-default" # 使用名为 "qwen-default" 的 LLM 配置 50 | prompt: "category" # 使用预设的 "category" prompt 模板 51 | match: ".+" # 匹配 LLM 返回的任何非空分类结果 52 | action: "create_or_update_label" 53 | label: "category" # 新标签的键为 "category" 54 | ``` 55 | * **效果**: 如果一篇文章内容是关于人工智能的,LLM 可能会返回 "Technology"。经过此规则处理后,文章的标签集会增加或更新一个标签,例如 `{"category", "Technology"}`。 56 | 57 | ### 示例 2: 基于 LLM 评分过滤低质量内容 58 | 59 | * **目标**: 让 LLM 对文章内容进行评分 (0-10),如果评分低于 4,则丢弃该文章。 60 | * **规则配置 (包含两条规则)**: 61 | 62 | * **规则 2.1: 内容评分** 63 | ```yaml 64 | - source_label: "content" 65 | transform: 66 | to_text: 67 | llm: "qwen-default" 68 | prompt: "score" # 使用预设的 "score" prompt 模板 69 | match: "^([0-9]|10)$" # 确保 LLM 返回的是 0-10 的数字 70 | action: "create_or_update_label" 71 | label: "ai_score" # 将评分结果存入 "ai_score" 标签 72 | ``` 73 | * **规则 2.2: 根据评分过滤** 74 | ```yaml 75 | - source_label: "ai_score" # 使用上一条规则生成的评分作为判断依据 76 | # 无需 Transform 77 | match: "^[0-3]$" # 匹配 0, 1, 2, 3 分 78 | action: "drop_feed" # 丢弃这些低分文章 79 | ``` 80 | * **效果**: 文章首先会被 LLM 评分并打上 `ai_score` 标签。如果该评分值在 0 到 3 之间,第二条规则会将其丢弃。 81 | 82 | ### 示例 3: 基于特定标签值添加新标签 83 | 84 | * **目标**: 如果文章的 `source` 标签值是 "Hacker News",则添加一个新标签 `source_type: "community"`。 85 | * **注意**: 当前 `ActionCreateOrUpdateLabel` 会将匹配成功的 `text` (即 `source_label` 的值或其转换结果)作为新标签的值。若要实现固定值标签,需要通过 LLM 转换。 86 | * **规则配置 (通过 LLM 实现映射)**: 87 | ```yaml 88 | - source_label: "source" # 源标签是 "source" 89 | transform: 90 | to_text: 91 | llm: "qwen-mini" 92 | # Prompt 需要精心设计,告诉 LLM 如何根据输入映射到输出 93 | # 例如,Prompt 可以包含类似 "If input is 'Hacker News', output 'community'. If input is 'GitHub Trending', output 'code'." 的逻辑 94 | prompt: | 95 | Analyze the input, which is a news source name. 96 | If the source is "Hacker News", output "community". 97 | If the source is "GitHub Trending", output "code". 98 | If the source is "V2EX", output "community". 99 | Otherwise, output "unknown". 100 | Return ONLY the type, no other text. 101 | match: "^(community|code|unknown)$" # 确保 LLM 输出的是预期的类型 102 | action: "create_or_update_label" 103 | label: "source_type" # 新标签的键 104 | ``` 105 | * **效果**: 如果某文章的 `source` 标签值为 "Hacker News",经过 LLM 处理后(理想情况下)会输出 "community",然后 `source_type` 标签会被设置为 `{"source_type", "community"}`。 106 | 107 | 这些示例展示了 `rewrite` 组件的灵活性和强大功能,通过组合不同的源标签、转换、匹配条件和动作,可以实现复杂的内容处理和信息增强逻辑。 108 | 109 | 110 | -------------------------------------------------------------------------------- /docs/tech/testing-zh.md: -------------------------------------------------------------------------------- 1 | # Zenfeed 最新测试策略与风格 2 | > 适用版本:v0.2.2 3 | 4 | ## 1. 引言 5 | 6 | Zenfeed 的测试策略核心目标是: 7 | 8 | * **清晰性 (Clarity)**:测试本身应如文档般易于理解,清晰地表达被测功能的行为和预期。 9 | * **可信性 (Reliability)**:测试结果应准确反映代码的健康状况,确保每次提交的信心。 10 | * **可维护性 (Maintainability)**:测试代码应易于修改和扩展,以适应项目的持续演进。 11 | 12 | 本指南旨在详细介绍 Zenfeed 项目所遵循的测试理念、风格和具体实践。 13 | 14 | ## 2. 核心测试理念与风格 15 | 16 | Zenfeed 的测试方法论深受行为驱动开发 (BDD) 的影响,并结合了表驱动测试等高效实践。 17 | 18 | ### 2.1 行为驱动开发 19 | 20 | 我们选择 BDD 作为核心的测试描述框架,主要基于以下原因(其理念也体现在 `pkg/test/test.go` 的 `Case` 结构设计中): 21 | 22 | * **提升可读性 (Enhanced Readability)**:BDD 强调使用自然语言描述软件的行为。每个测试用例读起来都像一个用户故事或一个功能说明,这使得测试本身就成为了一种精确的"活文档"。 23 | * **关注行为 (Focus on Behavior)**:测试不再仅仅是验证代码片段的输入输出,而是从模块、组件或用户交互的层面描述其应有的行为。这有助于确保我们构建的功能符合预期。 24 | * **需求驱动 (Requirement-Driven)**:测试直接对应需求描述,而非实现细节。这种自顶向下的方法确保了测试的稳定性,即使内部实现重构,只要行为不变,测试依然有效。 25 | 26 | BDD 通常使用 `Scenario`, `Given`, `When`, `Then` 的结构来组织测试: 27 | 28 | * **`Scenario` (场景)**:描述测试用例所针对的特性或功能点。 29 | * 例如:`"Query hot block with label filters"` (查询带标签过滤的热数据块) 30 | * **`Given` (给定)**:描述场景开始前的初始上下文或状态(**注意:这不是指方法的输入参数**)。 31 | * 例如:`"a hot block with indexed feeds"` (一个已索引了 Feed 的热数据块) 32 | * **`When` (当)**:描述触发场景的事件或操作(**这部分通常包含被测方法的输入参数**)。 33 | * 例如:`"querying with label filters"` (当使用标签过滤器进行查询时) 34 | * **`Then` (那么)**:描述场景结束后预期的结果或状态变化。 35 | * 例如:`"should return matching feeds"` (那么应该返回匹配的 Feed) 36 | 37 | 为了更好地在代码中实践 BDD,我们定义了 `pkg/test/test.go` 中的 `Case[GivenDetail, WhenDetail, ThenExpected]` 泛型结构。其中: 38 | 39 | * `GivenDetail`: 存储 `Given` 子句描述的初始状态的具体数据。 40 | * `WhenDetail`: 存储 `When` 子句描述的事件或方法调用的具体参数。 41 | * `ThenExpected`: 存储 `Then` 子句描述的预期结果。 42 | 43 | 这种结构化不仅增强了测试数据的类型安全,也使得测试用例的意图更加明确。对于需要模拟依赖项的组件,`GivenDetail` 通常会包含用于配置这些模拟行为的 `component.MockOption`,我们将在后续 Mocking 章节详细讨论。 44 | 45 | ### 2.2 表驱动测试 46 | 47 | 当一个功能或方法需要针对多种不同的输入组合、边界条件或状态进行测试时,表驱动测试是一种非常高效和整洁的组织方式。 48 | 49 | * **简洁性 (Conciseness)**:将所有测试用例的数据(输入、参数、预期输出)集中定义在一个表格(通常是切片)中,避免了为每个 case编写大量重复的测试逻辑。 50 | * **易扩展性 (Extensibility)**:添加新的测试场景变得非常简单,只需在表格中增加一条新记录即可。 51 | * **清晰性 (Clarity)**:所有相关的测试用例一目了然,便于快速理解被测功能的覆盖范围。 52 | 53 | **实践约定**: 54 | 在 Zenfeed 中,**当存在多个测试用例时,必须使用表驱动测试**。 55 | 56 | ### 2.3 测试结构约定 57 | 58 | 为了保持项目范围内测试代码的一致性和可读性,我们约定在测试文件中遵循以下组织结构: 59 | 60 | 1. **定义辅助类型 (Define Helper Types)**:在测试函数的开头部分,通常会为 `GivenDetail`, `WhenDetail`, `ThenExpected` 定义具体的结构体类型,以增强类型安全和表达力。 61 | 2. **定义测试用例表 (Define Test Case Table)**:将所有测试用例集中定义在一个 `[]test.Case` 类型的切片中。 62 | 3. **循环执行测试 (Loop Through Test Cases)**:使用 `for` 循环遍历测试用例表,并为每个用例运行 `t.Run(tt.Scenario, func(t *testing.T) { ... })`。 63 | 4. **清晰的 G/W/T 逻辑块 (Clear G/W/T Blocks)**:在每个 `t.Run` 的匿名函数内部,根据需要组织代码块,以对应 `Given`(准备初始状态,通常基于 `tt.GivenDetail`),`When`(执行被测操作,通常使用 `tt.WhenDetail`),和 `Then`(断言结果,通常对比 `tt.ThenExpected`)。 64 | 5. **描述性变量名 (Descriptive Variable Names)**:使用与 BDD 术语(如 `given`, `when`, `then`, `expected`, `actual`)相匹配或能清晰表达意图的变量名。 65 | 66 | ## 3. 依赖隔离:Mocking (Dependency Isolation: Mocking) 67 | 68 | 单元测试的核心原则之一是**隔离性 (Isolation)**,即被测试的代码单元(如一个函数或一个方法)应该与其依赖项隔离开来。Mocking (模拟) 是实现这种隔离的关键技术。 69 | 70 | 我们主要使用 `github.com/stretchr/testify/mock` 库来实现 Mocking。特别是对于实现了 `pkg/component/component.go` 中 `Component` 接口的组件,我们提供了一种标准的 Mocking 方式。 71 | 72 | 73 | ```go 74 | type givenDetail struct { 75 | // Example of another initial state field for the component being tested 76 | initialProcessingPrefix string 77 | // MockOption to set up the behavior of dependencyA 78 | dependencyAMockSetup component.MockOption 79 | // ... 80 | } 81 | 82 | type whenDetail struct { 83 | processDataInput string 84 | // ... 85 | } 86 | 87 | type thenExpected struct { 88 | expectedOutput string 89 | expectedError error 90 | // ... 91 | } 92 | 93 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 94 | { 95 | Scenario: "Component processes data successfully with mocked dependency", 96 | Given: "YourComponent with an initial prefix and dependencyA mocked to return 'related_data_value' for 'input_key'", 97 | When: "ProcessData is called with 'input_key'", 98 | Then: "Should return 'prefix:input_key:related_data_value' and no error", 99 | GivenDetail: givenDetail{ 100 | initialProcessingPrefix: "prefix1", 101 | dependencyAMockSetup: func(m *mock.Mock) { 102 | // We expect DependencyA's FetchRelatedData to be called with "input_key" 103 | // and it should return "related_data_value" and no error. 104 | m.On("FetchRelatedData", "input_key"). 105 | Return("related_data_value", nil). 106 | Once() // Expect it to be called exactly once. 107 | }, 108 | }, 109 | WhenDetail: whenDetail{ 110 | processDataInput: "input_key", 111 | }, 112 | ThenExpected: thenExpected{ 113 | expectedOutput: "prefix1:input_key:related_data_value", 114 | expectedError: nil, 115 | }, 116 | }, 117 | // ...更多测试用例... 118 | } 119 | 120 | 121 | // 在 for _, tt := range tests { t.Run(tt.Scenario, func(t *testing.T) { ... }) } 循环内部 122 | 123 | // Given 阶段: Setup mocks and the component under test 124 | var mockHelperForDepA *mock.Mock 125 | defer func() { // 确保在每个子测试结束时断言 126 | if mockHelperForDepA != nil { 127 | mockHelperForDepA.AssertExpectations(t) 128 | } 129 | }() 130 | 131 | // 创建并配置 mockDependencyA 132 | // dependency_a_pkg.NewFactory 应该是一个返回 DependencyA 接口和 error 的工厂函数 133 | // 它接受 component.MockOption 来配置其内部的 mock.Mock 对象 134 | mockDependencyA, err := dependency_a_pkg.NewFactory( 135 | component.MockOption(func(m *mock.Mock) { 136 | mockHelperForDepA = m // 保存 mock.Mock 实例以供 AssertExpectations 使用 137 | if tt.GivenDetail.dependencyAMockSetup != nil { 138 | // 应用测试用例中定义的 specific mock setup 139 | tt.GivenDetail.dependencyAMockSetup(m) 140 | } 141 | }), 142 | ).New("mocked_dep_a_instance", nil /* config for dep A */, dependency_a_pkg.Dependencies{}) 143 | Expect(err).NotTo(HaveOccurred()) 144 | Expect(mockDependencyA).NotTo(BeNil()) 145 | 146 | // 假设 YourComponent 的构造函数如下: 147 | componentUnderTest := NewYourComponent(tt.GivenDetail.initialProcessingPrefix, mockDependencyA) 148 | 149 | // When 阶段: Execute the action being tested 150 | actualOutput, actualErr := componentUnderTest.ProcessData(context.Background(), tt.WhenDetail.processDataInput) 151 | 152 | // Then 阶段: Assert the outcomes 153 | if tt.ThenExpected.expectedError != nil { 154 | Expect(actualErr).To(HaveOccurred()) 155 | Expect(actualErr.Error()).To(Equal(tt.ThenExpected.expectedError.Error())) 156 | } else { 157 | Expect(actualErr).NotTo(HaveOccurred()) 158 | } 159 | Expect(actualOutput).To(Equal(tt.ThenExpected.expectedOutput)) 160 | ``` -------------------------------------------------------------------------------- /docs/tech/vector-zh.md: -------------------------------------------------------------------------------- 1 | > 适用版本:v0.2.2 2 | 3 | ## 1. 引言 4 | 5 | `vector.Index` 组件是 Zenfeed 系统中负责实现内容语义相似度检索的核心模块,与 `block.Block` 一一对应。它的主要目标是根据用户提供的查询向量,快速找到与之在语义上最相关的 Feed(通常是新闻资讯、文章等文本内容)。 6 | 7 | 该索引的核心设计理念是服务于**文档级别的召回 (Document-level Recall)**。与许多传统向量索引将每个文本块(chunk)视为独立节点不同,`vector.Index` 将**整个 Feed 文档作为图中的一个节点**。而 Feed 内容经过 `embedding_spliter` 切分后产生的多个文本块(chunks),它们各自的向量嵌入(embeddings)则作为该 Feed 节点的属性。 8 | 9 | 这种设计的独特性在于: 10 | 11 | * **搜索结果直接是 Feed ID**:用户搜索后直接获得相关 Feed 的标识符,而不是零散的文本片段。 12 | * **相似度聚焦于“任何部分相关即相关”**:如果一个 Feed 的任何一个 chunk 与查询向量高度相似,整个 Feed 就被认为是相关的。其最终得分为该 Feed 所有 chunks 与查询向量相似度中的最大值。 13 | * **为新闻资讯场景优化**:这种设计特别适合新闻资讯类应用,优先保证相关内容的召回率,确保用户不会错过重要信息,即使该信息仅是文章的一部分。 14 | 15 | `vector.Index` 底层采用 HNSW (Hierarchical Navigable Small World) 算法来组织和搜索这些 Feed 节点,以实现高效的近似最近邻查找。 16 | 17 | ## 2. 核心概念 18 | 19 | 理解 `vector.Index` 的运作方式,需要熟悉以下核心概念: 20 | 21 | * **Feed (Node)**: 22 | * 在 `vector.Index` 的 HNSW 图中,每个**节点 (node)** 代表一个独立的 **Feed 文档** (例如一篇新闻报道)。 23 | * 每个 Feed 通过一个唯一的 `uint64` ID 来标识。 24 | * 节点存储了其对应的原始 Feed ID 以及与该 Feed 相关的多个向量。 25 | 26 | * **Chunk (Vector Represented by `[][]float32`)**: 27 | * 一个 Feed 的内容(尤其是其文本标签,如标题、正文)可能较长。如果直接将整个长文本生成单一的 embedding,可能会遇到以下问题: 28 | * **LLM 输入长度限制**: 许多 embedding 模型对输入文本的长度有限制。 29 | * **语义稀释 (Semantic Dilution)**: 对于包含多个主题或信息点的长文本,单一向量可能难以精确捕捉所有细微的语义,导致关键信息在整体平均化的向量表示中被“稀释”,降低了特定语义片段的表征能力。例如,一篇包含多个不同事件的综合报道,其单一向量可能无法很好地代表其中任何一个特定事件。 30 | * 通过 `embeddingSpliter`,一个 Feed 的文本内容可以被切分成一个或多个语义相对连贯的 **文本块 (Chunks)**。这种切分有助于每个 chunk 聚焦于更具体的主题或信息点。 31 | * 每个 Chunk 会被送入 LLM 生成一个 **向量嵌入 (vector embedding)**。 32 | * 因此,一个 Feed 节点在索引中会关联**一组向量 (vectors `[][]float32`)**,每个子向量代表其一个 Chunk 的语义。 33 | 34 | * **Embedding**: 35 | * Embedding 是一个由浮点数组成的向量,由大语言模型 (LLM) 生成。它能够捕捉文本片段的语义信息,使得语义上相似的文本在向量空间中距离更近。 36 | * `vector.Index` 存储和比较的就是这些 embeddings。 37 | 38 | * **HNSW (Hierarchical Navigable Small World)**: 39 | * `vector.Index` 使用 HNSW 作为其底层的近似最近邻 (ANN) 搜索算法。 40 | * HNSW 通过构建一个多层的图结构来实现高效搜索。上层图更稀疏,用于快速导航;下层图更密集,用于精确查找。 41 | * 这种结构使得索引在插入新节点和执行搜索时都能保持较好的性能。 42 | 43 | * **相似度计算 (Similarity Score)**: 44 | * **Feed 间相似度 (Inter-Feed Similarity)**: 45 | * 当评估 HNSW 图中两个 Feed 节点(例如,`nodeA` 和 `nodeB`)之间的相似度时,策略是计算 `nodeA` 的所有 Chunk 向量与 `nodeB` 的所有 Chunk 向量之间的两两余弦相似度。 46 | * 最终,这两个 Feed 节点间的相似度取所有这些两两 Chunk 相似度中的**最大值 (Maximal Local Similarity)**。 47 | * **选择此策略的原因**: 对于新闻资讯,只要两篇报道中存在任何一对高度相关的片段(例如,都报道了同一核心事件或引用了同一关键信息),就认为这两篇报道具有强关联性。这有助于最大化召回率,确保用户能发现所有可能相关的资讯,即使它们整体侧重点不同。 48 | * **潜在影响**: 这种策略对局部强相关非常敏感,但也可能因为次要内容的偶然相似而将整体主题差异较大的 Feed 判定为相关,需要在上层应用或通过重排序模型来进一步优化精度。 49 | * **查询与 Feed 相似度 (Query-Feed Similarity)**: 50 | * 当用户使用一个查询向量 `q` 进行搜索时,计算 `q` 与目标 Feed 的每一个 Chunk 向量的余弦相似度。 51 | * 该 Feed 最终与查询 `q` 的相似度分数,同样取这些计算结果中的**最大值**。 52 | * 这样做是为了确保只要 Feed 的任何一部分内容与用户查询高度匹配,该 Feed 就会被召回。 53 | 54 | ## 3. 主要接口 55 | 56 | `vector.Index` 提供了一组清晰的接口,用于管理和查询基于 Feed 内容语义的向量索引。 57 | 58 | * **`Add(ctx context.Context, id uint64, vectors [][]float32) error`** 59 | * **业务目标**: 将一个新的 Feed 文档及其所有内容块(Chunks)的向量表示添加到索引中,使其能够被后续的相似度搜索发现。 60 | * **核心流程**: 61 | 1. **接收 Feed 数据**: 接收 Feed 的唯一 `id` 和代表其所有 Chunks 的 `vectors` 列表。 62 | 2. **确定插入策略**: 根据 HNSW 算法的层级构建原则,为该 Feed 节点随机确定一个在多层图结构中的最高插入层级。 63 | 3. **查找邻近节点**: 从选定的最高层级开始逐层向下,在每一层利用该层的图结构(和 `EfConstruct` 参数指导下的搜索范围)为新 Feed 节点找到一组最相似的已有 Feed 节点(邻居)。此处的“相似”基于我们定义的“最大局部相似性”——即比较两个 Feed 所有 Chunk 向量对,取其中相似度最高的一对作为这两个 Feed 的相似度。 64 | 4. **建立连接**: 如果新 Feed 节点被分配到当前层级,则将其与找到的邻居建立双向连接(朋友关系),并更新其在该层级的友邻列表。 65 | 5. **维护图结构**: 在添加连接后,可能会触发友邻剪枝逻辑,以确保每个节点的友邻数量符合配置(`M` 或 `2*M`),并尝试维护图的良好连接性,避免产生孤立节点或过度密集的区域。 66 | 67 | * **`Search(ctx context.Context, q []float32, threshold float32, limit int) (map[uint64]float32, error)`** 68 | * **业务目标**: 根据用户提供的查询向量 `q`,从索引中高效地检索出语义上最相似的 Feed 列表,并返回它们的 ID 及相似度得分。 69 | * **核心流程**: 70 | 1. **接收查询**: 接收查询向量 `q`、相似度阈值 `threshold` 和期望返回的最大结果数 `limit`。 71 | 2. **导航至目标区域**: 从 HNSW 图的顶层开始,利用稀疏的高层图结构快速定位到与查询向量 `q` 大致相关的区域,逐层向下,每层都找到与 `q` 更近的节点作为下一层的入口。 72 | 3. **在底层精确搜索**: 到达最底层的图(第 0 层,包含所有 Feed 节点)后,以上一步得到的入口点为起点,进行一次更细致的扩展搜索(受 `EfSearch` 参数指导的搜索范围)。此搜索旨在找到与查询向量 `q` 的“最大局部相似性”(即 `q` 与 Feed 的所有 Chunk 向量相似度中的最大值)满足 `threshold` 且排名前 `limit` 的 Feed。 73 | 4. **返回结果**: 将符合条件的 Feed ID 及其对应的最高相似度分数打包返回。 74 | 75 | * **`EncodeTo(ctx context.Context, w io.Writer) error` / `DecodeFrom(ctx context.Context, r io.Reader) error`** 76 | * **业务目标**: 提供索引的持久化能力,允许将内存中的索引状态完整地保存到外部存储(如文件),并在需要时恢复。 77 | * **核心流程 (`EncodeTo`)**: 78 | 1. **写入元数据**: 保存索引的配置参数(如 `M`, `Ml`, `EfConstruct`, `EfSearch`)和版本信息。 79 | 2. **写入节点数据**: 遍历所有 Feed 节点,依次保存每个节点的 ID、其所有 Chunk 向量(经过量化处理以压缩体积)、以及它在 HNSW 各层级上的友邻关系(友邻 ID 和相似度)。 80 | 3. **写入层级结构**: 保存每个层级所包含的节点 ID 列表。 81 | * **核心流程 (`DecodeFrom`)**: 82 | 1. **读取元数据**: 恢复索引配置。 83 | 2. **重建节点数据**: 读取并重建所有 Feed 节点,包括其 ID、反量化后的 Chunk 向量、以及友邻关系。 84 | 3. **重建层级结构**: 恢复 HNSW 的多层图。 85 | 86 | ## 4. 内部实现细节补充 87 | 88 | ### 4.1 核心数据表示 89 | 90 | * **Feed 节点 (`node`)**: 每个 Feed 在内存中表示为一个 `node` 对象,它不仅存储了 Feed 的 ID 和其所有 Chunk 的向量 (`vectors [][]float32`),还关键地维护了它在 HNSW 图各个层级上的“友邻列表” (`friendsOnLayers`)。这个友邻列表是图连接性的基础。 91 | * **分层图 (`layers`)**: 索引内部维护一个 `layers` 列表,代表 HNSW 的多层结构。高层图节点更少、连接更稀疏,用于快速跳转;底层图(尤其是第0层)节点最多、连接最密集,用于精确搜索。 92 | * **全局节点池 (`m`)**: 一个从 Feed ID 到 `node` 对象的映射,方便快速访问任何已索引的 Feed。 93 | 94 | ### 4.2 索引构建的关键机制 95 | 96 | * **概率性分层 (`randomInsertLevel`)**: 新加入的 Feed 节点会被随机分配到一个最高层级。这种概率机制(受 `Ml` 参数影响)形成了 HNSW 的金字塔式层级结构。 97 | * **动态邻居选择 (`insertAndLinkAtLevel` 中的搜索逻辑)**: 当一个新 Feed 节点加入某一层时,它会基于“最大局部相似性”在该层搜索一定数量(受 `EfConstruct` 影响)的最近邻居。 98 | * **连接维护与剪枝 (`makeFriend`, `tryRemoveFriend`)**: 与邻居建立双向连接后,为保证图的性能和结构(避免节点拥有过多邻居),会有一套剪枝逻辑。这套逻辑不仅考虑移除相似度最低的连接,有时还会考虑被移除连接的另一端节点的连接状况,试图避免制造“孤岛”节点,甚至在必要时(通过 `tryRemakeFriend`)为连接数过少的节点尝试从“邻居的邻居”中寻找新的连接机会。 99 | 100 | ### 4.3 存储效率:向量量化 101 | 102 | * 为了显著减少索引在持久化存储时占用的空间,`float32` 类型的向量在写入磁盘前会通过 `vectorutil.Quantize` 被转换为 `int8` 类型,并记录下转换所需的最小值和缩放比例。读取时再通过 `vectorutil.Dequantize` 进行有损恢复。这是在存储成本和表示精度之间的一种实用权衡。 103 | -------------------------------------------------------------------------------- /docs/upgrade-from-v0.1.0.md: -------------------------------------------------------------------------------- 1 | ## 0. Check your current version 2 | ```bash 3 | # Mac/Linux 4 | docker inspect glidea/zenfeed:latest | grep version 5 | 6 | # Windows PowerShell 7 | docker inspect glidea/zenfeed:latest | Select-String -Pattern 'version' 8 | ``` 9 | 10 | If you **don't see any results**, it means you're using version v0.1.0. This is because the first version didn't include version information. Therefore, **this document applies to you.** 11 | 12 | ## 1. Move your data to the correct volume path 13 | ```bash 14 | docker-compose -p zenfeed exec zenfeed cp -a /data/. /app/data/ 15 | ``` 16 | 17 | ## 2. Backup your config 18 | Access: http://localhost:1400 19 | ![](images/upgrade-from-v0.1.0-backup.png) 20 | 21 | ## 3. Upgrade 22 | See [upgrade](./upgrade.md) 23 | 24 | ## 4. Resave your config 25 | Access: http://localhost:1400 26 | Resave your config. 27 | 28 | These tedious steps are due to the oversight in the deployment form of the first version, and I apologize for that. Subsequent versions will not require these extra steps. 29 | -------------------------------------------------------------------------------- /docs/upgrade.md: -------------------------------------------------------------------------------- 1 | **NOTE:** If you are upgrading from v0.1.0, which is the first version, please refer to [upgrade-from-v0.1.0.md](./upgrade-from-v0.1.0.md) 2 | 3 | ```bash 4 | # Ensure compose yml up to date. 5 | ## Mac/Linux 6 | curl -L -O https://raw.githubusercontent.com/glidea/zenfeed/main/docker-compose.yml 7 | ## Windows PowerShell 8 | Invoke-WebRequest -Uri "https://raw.githubusercontent.com/glidea/zenfeed/main/docker-compose.yml" -OutFile ([System.IO.Path]::GetFileName("https://raw.githubusercontent.com/glidea/zenfeed/main/docker-compose.yml")) 9 | 10 | 11 | # Ensure images up to date. 12 | docker-compose -p zenfeed pull 13 | 14 | 15 | # Upgrading without reconfiguring, etc APIKey. 16 | docker-compose -p zenfeed up -d 17 | ``` 18 | 19 | Then all the feed data and configurations should be intact. 20 | -------------------------------------------------------------------------------- /docs/webhook-zh.md: -------------------------------------------------------------------------------- 1 | # Zenfeed Webhook 通知对接指南 2 | 3 | Zenfeed 支持通过 Webhook 将分组和总结后的 Feed 通知推送到您指定的 HTTP(S) 端点。这允许您将 Zenfeed 的通知集成到自定义的应用或工作流程中。 4 | 5 | ## 1. 配置方法 6 | 7 | 要在 Zenfeed 中配置 Webhook 通知,您需要在配置文件的 `notify.receivers` 部分定义一个或多个接收者,并为每个 Webhook 接收者指定其唯一的 `name` 和 `webhook` 配置块。 8 | 9 | **示例配置 (`config.yaml`):** 10 | 11 | ```yaml 12 | notify: 13 | # ... 其他通知配置 ... 14 | 15 | receivers: 16 | - name: my_awesome_webhook # 接收者的唯一名称,将在路由规则中引用 17 | webhook: 18 | url: "https://your-service.com/webhook-endpoint" # 您的 Webhook 接收端点 URL 19 | 20 | # 示例:路由规则中如何使用此接收者 21 | route: # or sub_routes.. 22 | receivers: 23 | - my_awesome_webhook # 引用上面定义的接收者名称 24 | # ... 其他路由配置 ... 25 | ``` 26 | 27 | 在上述示例中: 28 | - 我们定义了一个名为 `my_awesome_webhook` 的接收者。 29 | - `webhook.url` 字段指定了当有匹配此接收者的通知时,Zenfeed 将向哪个 URL 发送 POST 请求。 30 | 31 | ## 2. 数据格式详解 32 | 33 | 当 Zenfeed 向您的 Webhook 端点发送通知时,它会发送一个 `POST` 请求,请求体为 JSON 格式。 34 | 35 | 请求体结构如下: 36 | 37 | ```json 38 | { 39 | "group": "string", 40 | "labels": { 41 | "label_key1": "label_value1", 42 | "label_key2": "label_value2" 43 | }, 44 | "summary": "string", 45 | "feeds": [ 46 | { 47 | "labels": { 48 | "title": "Feed Title 1", 49 | "link": "http://example.com/feed1", 50 | "content": "Feed content snippet 1...", 51 | "source": "example_source", 52 | "pub_time": "2024-07-30T10:00:00Z" 53 | // ... 其他自定义或标准标签 54 | }, 55 | "time": "2024-07-30T10:00:00Z", 56 | "related": [ 57 | // 可选:与此 Feed 相关的其他 Feed 对象,结构同父 Feed 58 | ] 59 | } 60 | // ...更多 Feed 对象 61 | ] 62 | } 63 | ``` 64 | 65 | **字段说明:** 66 | 67 | - `group` (`string`): 68 | 当前通知所属的组名。这个名称是根据通知路由配置中 `group_by` 定义的标签值组合而成的。例如,如果 `group_by: ["source", "category"]`,且一个 Feed 组的 `source` 是 `github_trending`,`category` 是 `golang`,那么 `group` 可能类似于 `"github_trending/golang"`。 69 | 70 | - `labels` (`object`): 71 | 一个键值对对象,表示当前通知组的标签。这些标签是根据通知路由配置中 `group_by` 所指定的标签及其对应的值。 72 | 例如,如果 `group_by: ["source"]` 且当前组的 `source` 标签值为 `rsshub`,则 `labels` 会是 `{"source": "rsshub"}`。 73 | 74 | - `summary` (`string`): 75 | 由大语言模型 (LLM) 为当前这一组 Feed 生成的摘要文本。如果通知路由中没有配置 LLM 总结,此字段可能为空字符串或省略 (取决于具体的实现细节,但通常会是空字符串)。 76 | 77 | - `feeds` (`array` of `object`): 78 | 一个数组,包含了属于当前通知组的所有 Feed 对象。每个 Feed 对象包含以下字段: 79 | * `labels` (`object`): Feed 的元数据。这是一个键值对对象,包含了该 Feed 的所有标签,例如: 80 | * `title` (`string`): Feed 的标题。 81 | * `link` (`string`): Feed 的原始链接。 82 | * `content` (`string`): Feed 的内容摘要或全文 (取决于抓取和重写规则)。 83 | * `source` (`string`): Feed 的来源标识。 84 | * `pub_time` (`string`): Feed 的发布时间 (RFC3339 格式的字符串,例如 `2025-01-01T00:00:00Z`)。 85 | * ...以及其他在抓取或重写过程中添加的自定义标签。 86 | * `time` (`string`): Feed 的时间戳,通常是其发布时间,采用 RFC3339 格式 (例如 `2025-01-01T00:00:00Z`)。此字段与 `labels.pub_time` 通常一致,但 `time` 是系统内部用于时间序列处理的主要时间字段。 87 | * `related` (`array` of `object`, 可选): 88 | 一个数组,包含了与当前 Feed 语义相关的其他 Feed 对象。这通常在通知路由中启用了 `compress_by_related_threshold` 选项时填充。每个相关的 Feed 对象结构与父 Feed 对象完全相同。如果未启用相关性压缩或没有相关的 Feed,此字段可能为空数组或不存在。 89 | 90 | ## 3. 请求示例 91 | 92 | 以下是一个发送到您的 Webhook 端点的 JSON 请求体示例: 93 | 94 | ```json 95 | { 96 | "group": "my_favorite_blogs", 97 | "labels": { 98 | "category": "tech_updates", 99 | }, 100 | "summary": "今天有多篇关于最新 AI 技术进展的文章,重点关注了大型语言模型在代码生成方面的应用,以及其对未来软件开发模式的潜在影响。", 101 | "feeds": [ 102 | { 103 | "labels": { 104 | "content": "AlphaCode X 展示了惊人的代码理解和生成能力,在多个编程竞赛中超越了人类平均水平...", 105 | "link": "https://example.blog/alphacode-x-details", 106 | "pub_time": "2024-07-30T14:35:10Z", 107 | "source": "Example Tech Blog", 108 | "title": "AlphaCode X: 下一代 AI 编码助手", 109 | "type": "blog_post" 110 | }, 111 | "time": "2024-07-30T14:35:10Z", 112 | "related": [] 113 | }, 114 | { 115 | "labels": { 116 | "content": "讨论了当前 LLM 在实际软件工程项目中落地所面临的挑战,包括成本、可控性和安全性问题。", 117 | "link": "https://another.blog/llm-in-swe-challenges", 118 | "pub_time": "2024-07-30T11:15:00Z", 119 | "source": "Another Tech Review", 120 | "title": "LLM 在软件工程中的应用:机遇与挑战", 121 | "type": "rss" 122 | }, 123 | "time": "2024-07-30T11:15:00Z", 124 | "related": [ 125 | { 126 | "labels": { 127 | "content": "一篇关于如何更经济有效地部署和微调大型语言模型的指南。", 128 | "link": "https://some.other.blog/cost-effective-llm", 129 | "pub_time": "2024-07-30T09:00:00Z", 130 | "source": "AI Infra Weekly", 131 | "title": "经济高效的 LLM 部署策略", 132 | "type": "rss" 133 | }, 134 | "time": "2024-07-30T09:00:00Z", 135 | "related": [] 136 | } 137 | ] 138 | } 139 | ] 140 | } 141 | ``` 142 | 143 | ## 4. 响应要求 144 | 145 | Zenfeed 期望您的 Webhook 端点在成功接收并处理通知后,返回 HTTP `200 OK` 状态码。 146 | 如果 Zenfeed 收到任何非 `200` 的状态码,它会将该次通知尝试标记为失败,并可能根据重试策略进行重试 (具体重试行为取决于 Zenfeed 的内部实现)。 147 | 148 | 请确保您的端点能够及时响应,以避免超时。 149 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/glidea/zenfeed 2 | 3 | go 1.23.4 4 | 5 | require ( 6 | github.com/JohannesKaufmann/html-to-markdown v1.6.0 7 | github.com/benbjohnson/clock v1.3.5 8 | github.com/chewxy/math32 v1.10.1 9 | github.com/edsrzf/mmap-go v1.2.0 10 | github.com/gorilla/feeds v1.2.0 11 | github.com/mark3labs/mcp-go v0.17.0 12 | github.com/mmcdole/gofeed v1.3.0 13 | github.com/nutsdb/nutsdb v1.0.4 14 | github.com/onsi/gomega v1.36.1 15 | github.com/pkg/errors v0.9.1 16 | github.com/prometheus/client_golang v1.21.1 17 | github.com/sashabaranov/go-openai v1.40.1 18 | github.com/stretchr/testify v1.10.0 19 | github.com/veqryn/slog-dedup v0.5.0 20 | github.com/yuin/goldmark v1.7.8 21 | gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df 22 | gopkg.in/yaml.v3 v3.0.1 23 | k8s.io/utils v0.0.0-20241210054802-24370beab758 24 | ) 25 | 26 | require ( 27 | github.com/PuerkitoBio/goquery v1.9.2 // indirect 28 | github.com/andybalholm/cascadia v1.3.2 // indirect 29 | github.com/antlabs/stl v0.0.1 // indirect 30 | github.com/antlabs/timer v0.0.11 // indirect 31 | github.com/beorn7/perks v1.0.1 // indirect 32 | github.com/bwmarrin/snowflake v0.3.0 // indirect 33 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 34 | github.com/davecgh/go-spew v1.1.1 // indirect 35 | github.com/gofrs/flock v0.8.1 // indirect 36 | github.com/google/go-cmp v0.7.0 // indirect 37 | github.com/google/uuid v1.6.0 // indirect 38 | github.com/json-iterator/go v1.1.12 // indirect 39 | github.com/klauspost/compress v1.18.0 // indirect 40 | github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 // indirect 41 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 42 | github.com/modern-go/reflect2 v1.0.2 // indirect 43 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 44 | github.com/pmezard/go-difflib v1.0.0 // indirect 45 | github.com/prometheus/client_model v0.6.1 // indirect 46 | github.com/prometheus/common v0.62.0 // indirect 47 | github.com/prometheus/procfs v0.15.1 // indirect 48 | github.com/stretchr/objx v0.5.2 // indirect 49 | github.com/temoto/robotstxt v1.1.2 50 | github.com/tidwall/btree v1.6.0 // indirect 51 | github.com/xujiajun/mmap-go v1.0.1 // indirect 52 | github.com/xujiajun/utils v0.0.0-20220904132955-5f7c5b914235 // indirect 53 | github.com/yosida95/uritemplate/v3 v3.0.2 // indirect 54 | golang.org/x/net v0.38.0 // indirect 55 | golang.org/x/sys v0.31.0 // indirect 56 | golang.org/x/text v0.23.0 // indirect 57 | google.golang.org/protobuf v1.36.6 // indirect 58 | gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect 59 | modernc.org/b/v2 v2.1.0 // indirect 60 | ) 61 | -------------------------------------------------------------------------------- /pkg/api/http/http.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package http 17 | 18 | import ( 19 | "net" 20 | "net/http" 21 | 22 | "github.com/pkg/errors" 23 | 24 | "github.com/glidea/zenfeed/pkg/api" 25 | "github.com/glidea/zenfeed/pkg/component" 26 | "github.com/glidea/zenfeed/pkg/config" 27 | telemetry "github.com/glidea/zenfeed/pkg/telemetry" 28 | "github.com/glidea/zenfeed/pkg/telemetry/log" 29 | telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model" 30 | "github.com/glidea/zenfeed/pkg/util/jsonrpc" 31 | ) 32 | 33 | // --- Interface code block --- 34 | type Server interface { 35 | component.Component 36 | config.Watcher 37 | } 38 | 39 | type Config struct { 40 | Address string 41 | } 42 | 43 | func (c *Config) Validate() error { 44 | if c.Address == "" { 45 | c.Address = ":1300" 46 | } 47 | if _, _, err := net.SplitHostPort(c.Address); err != nil { 48 | return errors.Wrap(err, "invalid address") 49 | } 50 | 51 | return nil 52 | } 53 | 54 | func (c *Config) From(app *config.App) *Config { 55 | c.Address = app.API.HTTP.Address 56 | 57 | return c 58 | } 59 | 60 | type Dependencies struct { 61 | API api.API 62 | } 63 | 64 | // --- Factory code block --- 65 | type Factory component.Factory[Server, config.App, Dependencies] 66 | 67 | func NewFactory(mockOn ...component.MockOption) Factory { 68 | if len(mockOn) > 0 { 69 | return component.FactoryFunc[Server, config.App, Dependencies]( 70 | func(instance string, config *config.App, dependencies Dependencies) (Server, error) { 71 | m := &mockServer{} 72 | component.MockOptions(mockOn).Apply(&m.Mock) 73 | 74 | return m, nil 75 | }, 76 | ) 77 | } 78 | 79 | return component.FactoryFunc[Server, config.App, Dependencies](new) 80 | } 81 | 82 | func new(instance string, app *config.App, dependencies Dependencies) (Server, error) { 83 | config := &Config{} 84 | config.From(app) 85 | if err := config.Validate(); err != nil { 86 | return nil, errors.Wrap(err, "validate config") 87 | } 88 | 89 | router := http.NewServeMux() 90 | api := dependencies.API 91 | router.Handle("/write", jsonrpc.API(api.Write)) 92 | router.Handle("/query_config", jsonrpc.API(api.QueryAppConfig)) 93 | router.Handle("/apply_config", jsonrpc.API(api.ApplyAppConfig)) 94 | router.Handle("/query_config_schema", jsonrpc.API(api.QueryAppConfigSchema)) 95 | router.Handle("/query_rsshub_categories", jsonrpc.API(api.QueryRSSHubCategories)) 96 | router.Handle("/query_rsshub_websites", jsonrpc.API(api.QueryRSSHubWebsites)) 97 | router.Handle("/query_rsshub_routes", jsonrpc.API(api.QueryRSSHubRoutes)) 98 | router.Handle("/query", jsonrpc.API(api.Query)) 99 | httpServer := &http.Server{Addr: config.Address, Handler: router} 100 | 101 | return &server{ 102 | Base: component.New(&component.BaseConfig[Config, Dependencies]{ 103 | Name: "HTTPServer", 104 | Instance: instance, 105 | Config: config, 106 | Dependencies: dependencies, 107 | }), 108 | http: httpServer, 109 | }, nil 110 | } 111 | 112 | // --- Implementation code block --- 113 | type server struct { 114 | *component.Base[Config, Dependencies] 115 | http *http.Server 116 | } 117 | 118 | func (s *server) Run() (err error) { 119 | ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...) 120 | defer func() { telemetry.End(ctx, err) }() 121 | 122 | serverErr := make(chan error, 1) 123 | go func() { 124 | serverErr <- s.http.ListenAndServe() 125 | }() 126 | 127 | s.MarkReady() 128 | select { 129 | case <-ctx.Done(): 130 | log.Info(ctx, "shutting down") 131 | 132 | return s.http.Shutdown(ctx) 133 | case err := <-serverErr: 134 | return errors.Wrap(err, "listen and serve") 135 | } 136 | } 137 | 138 | func (s *server) Reload(app *config.App) error { 139 | newConfig := &Config{} 140 | newConfig.From(app) 141 | if err := newConfig.Validate(); err != nil { 142 | return errors.Wrap(err, "validate config") 143 | } 144 | if s.Config().Address != newConfig.Address { 145 | return errors.New("address cannot be reloaded") 146 | } 147 | 148 | s.SetConfig(newConfig) 149 | 150 | return nil 151 | } 152 | 153 | type mockServer struct { 154 | component.Mock 155 | } 156 | 157 | func (m *mockServer) Reload(app *config.App) error { 158 | return m.Called(app).Error(0) 159 | } 160 | -------------------------------------------------------------------------------- /pkg/api/rss/rss.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package rss 17 | 18 | import ( 19 | "net" 20 | "net/http" 21 | "text/template" 22 | "time" 23 | 24 | "github.com/benbjohnson/clock" 25 | "github.com/gorilla/feeds" 26 | "github.com/pkg/errors" 27 | 28 | "github.com/glidea/zenfeed/pkg/api" 29 | "github.com/glidea/zenfeed/pkg/component" 30 | "github.com/glidea/zenfeed/pkg/config" 31 | "github.com/glidea/zenfeed/pkg/model" 32 | telemetry "github.com/glidea/zenfeed/pkg/telemetry" 33 | "github.com/glidea/zenfeed/pkg/telemetry/log" 34 | telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model" 35 | "github.com/glidea/zenfeed/pkg/util/buffer" 36 | ) 37 | 38 | var clk = clock.New() 39 | 40 | // --- Interface code block --- 41 | type Server interface { 42 | component.Component 43 | config.Watcher 44 | } 45 | 46 | type Config struct { 47 | Address string 48 | ContentHTMLTemplate string 49 | contentHTMLTemplate *template.Template 50 | } 51 | 52 | func (c *Config) Validate() error { 53 | if c.Address == "" { 54 | c.Address = ":1302" 55 | } 56 | if _, _, err := net.SplitHostPort(c.Address); err != nil { 57 | return errors.Wrap(err, "invalid address") 58 | } 59 | 60 | if c.ContentHTMLTemplate == "" { 61 | c.ContentHTMLTemplate = "{{ .summary_html_snippet }}" 62 | } 63 | t, err := template.New("").Parse(c.ContentHTMLTemplate) 64 | if err != nil { 65 | return errors.Wrap(err, "parse rss content template") 66 | } 67 | c.contentHTMLTemplate = t 68 | 69 | return nil 70 | } 71 | 72 | func (c *Config) From(app *config.App) *Config { 73 | c.Address = app.API.RSS.Address 74 | c.ContentHTMLTemplate = app.API.RSS.ContentHTMLTemplate 75 | 76 | return c 77 | } 78 | 79 | type Dependencies struct { 80 | API api.API 81 | } 82 | 83 | // --- Factory code block --- 84 | type Factory component.Factory[Server, config.App, Dependencies] 85 | 86 | func NewFactory(mockOn ...component.MockOption) Factory { 87 | if len(mockOn) > 0 { 88 | return component.FactoryFunc[Server, config.App, Dependencies]( 89 | func(instance string, config *config.App, dependencies Dependencies) (Server, error) { 90 | m := &mockServer{} 91 | component.MockOptions(mockOn).Apply(&m.Mock) 92 | 93 | return m, nil 94 | }, 95 | ) 96 | } 97 | 98 | return component.FactoryFunc[Server, config.App, Dependencies](new) 99 | } 100 | 101 | func new(instance string, app *config.App, dependencies Dependencies) (Server, error) { 102 | config := &Config{} 103 | config.From(app) 104 | if err := config.Validate(); err != nil { 105 | return nil, errors.Wrap(err, "validate config") 106 | } 107 | 108 | s := &server{ 109 | Base: component.New(&component.BaseConfig[Config, Dependencies]{ 110 | Name: "RSSServer", 111 | Instance: instance, 112 | Config: config, 113 | Dependencies: dependencies, 114 | }), 115 | } 116 | 117 | router := http.NewServeMux() 118 | router.Handle("/", http.HandlerFunc(s.rss)) 119 | 120 | s.http = &http.Server{Addr: config.Address, Handler: router} 121 | 122 | return s, nil 123 | } 124 | 125 | // --- Implementation code block --- 126 | type server struct { 127 | *component.Base[Config, Dependencies] 128 | http *http.Server 129 | } 130 | 131 | func (s *server) Run() (err error) { 132 | ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...) 133 | defer func() { telemetry.End(ctx, err) }() 134 | 135 | serverErr := make(chan error, 1) 136 | go func() { 137 | serverErr <- s.http.ListenAndServe() 138 | }() 139 | 140 | s.MarkReady() 141 | select { 142 | case <-ctx.Done(): 143 | log.Info(ctx, "shutting down") 144 | 145 | return s.http.Shutdown(ctx) 146 | case err := <-serverErr: 147 | return errors.Wrap(err, "listen and serve") 148 | } 149 | } 150 | 151 | func (s *server) Reload(app *config.App) error { 152 | newConfig := &Config{} 153 | newConfig.From(app) 154 | if err := newConfig.Validate(); err != nil { 155 | return errors.Wrap(err, "validate config") 156 | } 157 | if s.Config().Address != newConfig.Address { 158 | return errors.New("address cannot be reloaded") 159 | } 160 | 161 | s.SetConfig(newConfig) 162 | 163 | return nil 164 | } 165 | 166 | func (s *server) rss(w http.ResponseWriter, r *http.Request) { 167 | var err error 168 | ctx := telemetry.StartWith(r.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "rss")...) 169 | defer telemetry.End(ctx, err) 170 | 171 | // Extract parameters. 172 | ps := r.URL.Query() 173 | labelFilters := ps["label_filter"] 174 | query := ps.Get("query") 175 | 176 | // Forward query request to API. 177 | now := clk.Now() 178 | queryResult, err := s.Dependencies().API.Query(ctx, &api.QueryRequest{ 179 | Query: query, 180 | LabelFilters: labelFilters, 181 | Start: now.Add(-24 * time.Hour), 182 | End: now, 183 | Limit: 100, 184 | }) 185 | if err != nil { 186 | http.Error(w, err.Error(), http.StatusBadRequest) // TODO: standardize error handling. 187 | 188 | return 189 | } 190 | 191 | // Render and convert to RSS. 192 | rssObj := &feeds.Feed{ 193 | Title: "Zenfeed RSS - " + ps.Encode(), 194 | Description: "Powered by Github Zenfeed - https://github.com/glidea/zenfeed. If you use Folo, please enable 'Appearance - Content - Render inline styles'", 195 | Items: make([]*feeds.Item, 0, len(queryResult.Feeds)), 196 | } 197 | 198 | buf := buffer.Get() 199 | defer buffer.Put(buf) 200 | 201 | for _, feed := range queryResult.Feeds { 202 | buf.Reset() 203 | 204 | if err = s.Config().contentHTMLTemplate.Execute(buf, feed.Labels.Map()); err != nil { 205 | http.Error(w, err.Error(), http.StatusInternalServerError) 206 | 207 | return 208 | } 209 | 210 | item := &feeds.Item{ 211 | Title: feed.Labels.Get(model.LabelTitle), 212 | Link: &feeds.Link{Href: feed.Labels.Get(model.LabelLink)}, 213 | Created: feed.Time, // NOTE: scrape time, not pub time. 214 | Content: buf.String(), 215 | } 216 | 217 | rssObj.Items = append(rssObj.Items, item) 218 | } 219 | 220 | if err = rssObj.WriteRss(w); err != nil { 221 | log.Error(ctx, errors.Wrap(err, "write rss response")) 222 | 223 | return 224 | } 225 | } 226 | 227 | type mockServer struct { 228 | component.Mock 229 | } 230 | 231 | func (m *mockServer) Reload(app *config.App) error { 232 | return m.Called(app).Error(0) 233 | } 234 | -------------------------------------------------------------------------------- /pkg/llm/embedding_spliter.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package llm 17 | 18 | import ( 19 | "math" 20 | "slices" 21 | 22 | "github.com/glidea/zenfeed/pkg/model" 23 | ) 24 | 25 | type embeddingSpliter interface { 26 | Split(ls model.Labels) ([]model.Labels, error) 27 | } 28 | 29 | func newEmbeddingSpliter(maxLabelValueTokens, overlapTokens int) embeddingSpliter { 30 | if maxLabelValueTokens <= 0 { 31 | maxLabelValueTokens = 1024 32 | } 33 | if overlapTokens <= 0 { 34 | overlapTokens = 64 35 | } 36 | if overlapTokens > maxLabelValueTokens { 37 | overlapTokens = maxLabelValueTokens / 10 38 | } 39 | 40 | return &embeddingSpliterImpl{maxLabelValueTokens: maxLabelValueTokens, overlapTokens: overlapTokens} 41 | } 42 | 43 | type embeddingSpliterImpl struct { 44 | maxLabelValueTokens int 45 | overlapTokens int 46 | } 47 | 48 | func (e *embeddingSpliterImpl) Split(ls model.Labels) ([]model.Labels, error) { 49 | var ( 50 | short = make(model.Labels, 0, len(ls)) 51 | long = make(model.Labels, 0, 1) 52 | longTokens = make([]int, 0, 1) 53 | ) 54 | for _, l := range ls { 55 | tokens := e.estimateTokens(l.Value) 56 | if tokens <= e.maxLabelValueTokens { 57 | short = append(short, l) 58 | } else { 59 | long = append(long, l) 60 | longTokens = append(longTokens, tokens) 61 | } 62 | } 63 | if len(long) == 0 { 64 | return []model.Labels{ls}, nil 65 | } 66 | 67 | var ( 68 | common = short 69 | splits = make([]model.Labels, 0, len(long)*2) 70 | ) 71 | for i := range long { 72 | parts := e.split(long[i].Value, longTokens[i]) 73 | for _, p := range parts { 74 | com := slices.Clone(common) 75 | s := append(com, model.Label{Key: long[i].Key, Value: p}) 76 | splits = append(splits, s) 77 | } 78 | } 79 | 80 | return splits, nil 81 | } 82 | 83 | func (e *embeddingSpliterImpl) split(value string, tokens int) []string { 84 | var ( 85 | results = make([]string, 0) 86 | chars = []rune(value) 87 | ) 88 | 89 | // Estimate the number of characters per token 90 | avgCharsPerToken := float64(len(chars)) / float64(tokens) 91 | // Calculate the approximate number of characters corresponding to maxLabelValueTokens tokens. 92 | charsPerSegment := int(float64(e.maxLabelValueTokens) * avgCharsPerToken) 93 | 94 | // The number of characters corresponding to a fixed overlap of 64 tokens. 95 | overlapChars := int(float64(e.overlapTokens) * avgCharsPerToken) 96 | 97 | // Actual step length = segment length - overlap. 98 | charStep := charsPerSegment - overlapChars 99 | 100 | for start := 0; start < len(chars); { 101 | end := min(start+charsPerSegment, len(chars)) 102 | 103 | segment := string(chars[start:end]) 104 | results = append(results, segment) 105 | 106 | if end == len(chars) { 107 | break 108 | } 109 | start += charStep 110 | } 111 | 112 | return results 113 | } 114 | 115 | func (e *embeddingSpliterImpl) estimateTokens(text string) int { 116 | latinChars := 0 117 | otherChars := 0 118 | 119 | for _, r := range text { 120 | if r <= 127 { 121 | latinChars++ 122 | } else { 123 | otherChars++ 124 | } 125 | } 126 | 127 | // Rough estimate: 128 | // - English and punctuation: about 0.25 tokens/char (4 characters ≈ 1 token). 129 | // - Chinese and other non-Latin characters: about 1.5 tokens/char. 130 | return int(math.Round(float64(latinChars)/4 + float64(otherChars)*3/2)) 131 | } 132 | -------------------------------------------------------------------------------- /pkg/llm/embedding_spliter_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package llm 17 | 18 | import ( 19 | "testing" 20 | 21 | . "github.com/onsi/gomega" 22 | 23 | "github.com/glidea/zenfeed/pkg/model" 24 | "github.com/glidea/zenfeed/pkg/test" 25 | ) 26 | 27 | func TestEmbeddingSpliter_Split(t *testing.T) { 28 | RegisterTestingT(t) 29 | 30 | type givenDetail struct { 31 | maxLabelValueTokens int 32 | overlapTokens int 33 | } 34 | type whenDetail struct { 35 | labels model.Labels 36 | } 37 | type thenExpected struct { 38 | splits []model.Labels 39 | err string 40 | } 41 | 42 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 43 | { 44 | Scenario: "Split labels with all short values", 45 | Given: "an embedding spliter with max token limit", 46 | When: "splitting labels with all values under token limit", 47 | Then: "should return original labels as single split", 48 | GivenDetail: givenDetail{ 49 | maxLabelValueTokens: 1024, 50 | }, 51 | WhenDetail: whenDetail{ 52 | labels: model.Labels{ 53 | {Key: "title", Value: "Short title"}, 54 | {Key: "description", Value: "Short description"}, 55 | }, 56 | }, 57 | ThenExpected: thenExpected{ 58 | splits: []model.Labels{ 59 | { 60 | {Key: "title", Value: "Short title"}, 61 | {Key: "description", Value: "Short description"}, 62 | }, 63 | }, 64 | }, 65 | }, 66 | { 67 | Scenario: "Split labels with one long value", 68 | Given: "an embedding spliter with max token limit", 69 | When: "splitting labels with one value exceeding token limit", 70 | Then: "should split the long value and combine with common labels", 71 | GivenDetail: givenDetail{ 72 | maxLabelValueTokens: 10, // Small limit to force splitting. 73 | overlapTokens: 1, 74 | }, 75 | WhenDetail: whenDetail{ 76 | labels: model.Labels{ 77 | {Key: "title", Value: "Short title"}, 78 | {Key: "content", Value: "This is a long content that exceeds the token limit and needs to be split into multiple parts"}, 79 | }, 80 | }, 81 | ThenExpected: thenExpected{ 82 | splits: []model.Labels{ 83 | { 84 | {Key: "title", Value: "Short title"}, 85 | {Key: "content", Value: "This is a long content that exceeds the "}, 86 | }, 87 | { 88 | {Key: "title", Value: "Short title"}, 89 | {Key: "content", Value: "the token limit and needs to be split in"}, 90 | }, 91 | { 92 | {Key: "title", Value: "Short title"}, 93 | {Key: "content", Value: "t into multiple parts"}, 94 | }, 95 | }, 96 | }, 97 | }, 98 | { 99 | Scenario: "Handle non-Latin characters", 100 | Given: "an embedding spliter with max token limit", 101 | When: "splitting labels with non-Latin characters", 102 | Then: "should correctly estimate tokens and split accordingly", 103 | GivenDetail: givenDetail{ 104 | maxLabelValueTokens: 10, // Small limit to force splitting. 105 | overlapTokens: 2, 106 | }, 107 | WhenDetail: whenDetail{ 108 | labels: model.Labels{ 109 | {Key: "title", Value: "Short title"}, 110 | {Key: "content", Value: "中文内容需要被分割因为它超过了令牌限制"}, // Chinese content that needs to be split. 111 | }, 112 | }, 113 | ThenExpected: thenExpected{ 114 | splits: []model.Labels{ 115 | { 116 | {Key: "title", Value: "Short title"}, 117 | {Key: "content", Value: "中文内容需要"}, 118 | }, 119 | { 120 | {Key: "title", Value: "Short title"}, 121 | {Key: "content", Value: "要被分割因为"}, 122 | }, 123 | { 124 | {Key: "title", Value: "Short title"}, 125 | {Key: "content", Value: "为它超过了令"}, 126 | }, 127 | { 128 | {Key: "title", Value: "Short title"}, 129 | {Key: "content", Value: "令牌限制"}, 130 | }, 131 | }, 132 | }, 133 | }, 134 | } 135 | 136 | for _, tt := range tests { 137 | t.Run(tt.Scenario, func(t *testing.T) { 138 | // Given. 139 | spliter := newEmbeddingSpliter(tt.GivenDetail.maxLabelValueTokens, tt.GivenDetail.overlapTokens) 140 | 141 | // When. 142 | splits, err := spliter.Split(tt.WhenDetail.labels) 143 | 144 | // Then. 145 | if tt.ThenExpected.err != "" { 146 | Expect(err).NotTo(BeNil()) 147 | Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err)) 148 | } else { 149 | Expect(err).To(BeNil()) 150 | Expect(len(splits)).To(Equal(len(tt.ThenExpected.splits))) 151 | 152 | for i, expectedSplit := range tt.ThenExpected.splits { 153 | Expect(splits[i]).To(Equal(expectedSplit)) 154 | } 155 | } 156 | }) 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /pkg/llm/openai.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package llm 17 | 18 | import ( 19 | "context" 20 | "encoding/json" 21 | 22 | "github.com/pkg/errors" 23 | oai "github.com/sashabaranov/go-openai" 24 | 25 | "github.com/glidea/zenfeed/pkg/component" 26 | "github.com/glidea/zenfeed/pkg/model" 27 | "github.com/glidea/zenfeed/pkg/telemetry" 28 | telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model" 29 | runtimeutil "github.com/glidea/zenfeed/pkg/util/runtime" 30 | ) 31 | 32 | type openai struct { 33 | *component.Base[Config, struct{}] 34 | 35 | client *oai.Client 36 | embeddingSpliter embeddingSpliter 37 | } 38 | 39 | func newOpenAI(c *Config) LLM { 40 | config := oai.DefaultConfig(c.APIKey) 41 | config.BaseURL = c.Endpoint 42 | client := oai.NewClientWithConfig(config) 43 | embeddingSpliter := newEmbeddingSpliter(1536, 64) 44 | 45 | return &openai{ 46 | Base: component.New(&component.BaseConfig[Config, struct{}]{ 47 | Name: "LLM/openai", 48 | Instance: c.Name, 49 | Config: c, 50 | }), 51 | client: client, 52 | embeddingSpliter: embeddingSpliter, 53 | } 54 | } 55 | 56 | func (o *openai) String(ctx context.Context, messages []string) (value string, err error) { 57 | ctx = telemetry.StartWith(ctx, append(o.TelemetryLabels(), telemetrymodel.KeyOperation, "String")...) 58 | defer func() { telemetry.End(ctx, err) }() 59 | 60 | config := o.Config() 61 | if config.Model == "" { 62 | return "", errors.New("model is not set") 63 | } 64 | msgs := make([]oai.ChatCompletionMessage, 0, len(messages)) 65 | for _, m := range messages { 66 | msgs = append(msgs, oai.ChatCompletionMessage{ 67 | Role: oai.ChatMessageRoleUser, 68 | Content: m, 69 | }) 70 | } 71 | 72 | req := oai.ChatCompletionRequest{ 73 | Model: config.Model, 74 | Messages: msgs, 75 | Temperature: config.Temperature, 76 | } 77 | 78 | resp, err := o.client.CreateChatCompletion(ctx, req) 79 | if err != nil { 80 | return "", errors.Wrap(err, "create chat completion") 81 | } 82 | if len(resp.Choices) == 0 { 83 | return "", errors.New("no completion choices returned") 84 | } 85 | 86 | lvs := []string{o.Name(), o.Instance(), "String"} 87 | promptTokens.WithLabelValues(lvs...).Add(float64(resp.Usage.PromptTokens)) 88 | completionTokens.WithLabelValues(lvs...).Add(float64(resp.Usage.CompletionTokens)) 89 | totalTokens.WithLabelValues(lvs...).Add(float64(resp.Usage.TotalTokens)) 90 | 91 | return resp.Choices[0].Message.Content, nil 92 | } 93 | 94 | func (o *openai) EmbeddingLabels(ctx context.Context, labels model.Labels) (value [][]float32, err error) { 95 | ctx = telemetry.StartWith(ctx, append(o.TelemetryLabels(), telemetrymodel.KeyOperation, "EmbeddingLabels")...) 96 | defer func() { telemetry.End(ctx, err) }() 97 | 98 | config := o.Config() 99 | if config.EmbeddingModel == "" { 100 | return nil, errors.New("embedding model is not set") 101 | } 102 | splits, err := o.embeddingSpliter.Split(labels) 103 | if err != nil { 104 | return nil, errors.Wrap(err, "split embedding") 105 | } 106 | 107 | vecs := make([][]float32, 0, len(splits)) 108 | for _, split := range splits { 109 | text := runtimeutil.Must1(json.Marshal(split)) 110 | vec, err := o.Embedding(ctx, string(text)) 111 | if err != nil { 112 | return nil, errors.Wrap(err, "embedding") 113 | } 114 | vecs = append(vecs, vec) 115 | } 116 | 117 | return vecs, nil 118 | } 119 | 120 | func (o *openai) Embedding(ctx context.Context, s string) (value []float32, err error) { 121 | ctx = telemetry.StartWith(ctx, append(o.TelemetryLabels(), telemetrymodel.KeyOperation, "Embedding")...) 122 | defer func() { telemetry.End(ctx, err) }() 123 | 124 | config := o.Config() 125 | if config.EmbeddingModel == "" { 126 | return nil, errors.New("embedding model is not set") 127 | } 128 | vec, err := o.client.CreateEmbeddings(ctx, oai.EmbeddingRequest{ 129 | Input: []string{s}, 130 | Model: oai.EmbeddingModel(config.EmbeddingModel), 131 | EncodingFormat: oai.EmbeddingEncodingFormatFloat, 132 | }) 133 | if err != nil { 134 | return nil, errors.Wrap(err, "create embeddings") 135 | } 136 | if len(vec.Data) == 0 { 137 | return nil, errors.New("no embedding data returned") 138 | } 139 | 140 | lvs := []string{o.Name(), o.Instance(), "Embedding"} 141 | promptTokens.WithLabelValues(lvs...).Add(float64(vec.Usage.PromptTokens)) 142 | completionTokens.WithLabelValues(lvs...).Add(float64(vec.Usage.CompletionTokens)) 143 | totalTokens.WithLabelValues(lvs...).Add(float64(vec.Usage.TotalTokens)) 144 | 145 | return vec.Data[0].Embedding, nil 146 | } 147 | -------------------------------------------------------------------------------- /pkg/notify/channel/channel.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package channel 17 | 18 | import ( 19 | "context" 20 | 21 | "github.com/pkg/errors" 22 | 23 | "github.com/glidea/zenfeed/pkg/component" 24 | "github.com/glidea/zenfeed/pkg/notify/route" 25 | "github.com/glidea/zenfeed/pkg/telemetry" 26 | telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model" 27 | ) 28 | 29 | // --- Interface code block --- 30 | type Channel interface { 31 | component.Component 32 | sender 33 | } 34 | 35 | type sender interface { 36 | Send(ctx context.Context, receiver Receiver, group *route.FeedGroup) error 37 | } 38 | 39 | type Config struct { 40 | Email *Email 41 | } 42 | 43 | func (c *Config) Validate() error { 44 | if c.Email.Enabled() { 45 | if err := c.Email.Validate(); err != nil { 46 | return errors.Wrap(err, "validate email") 47 | } 48 | } 49 | 50 | return nil 51 | } 52 | 53 | type Receiver struct { 54 | Email string 55 | Webhook *WebhookReceiver 56 | } 57 | 58 | func (r *Receiver) Validate() error { 59 | if r.Email != "" && r.Webhook != nil { 60 | return errors.New("email and webhook cannot both be set") 61 | } 62 | if r.Webhook != nil { 63 | if err := r.Webhook.Validate(); err != nil { 64 | return errors.Wrap(err, "validate webhook") 65 | } 66 | } 67 | 68 | return nil 69 | } 70 | 71 | type Dependencies struct{} 72 | 73 | // --- Factory code block --- 74 | type Factory component.Factory[Channel, Config, Dependencies] 75 | 76 | func NewFactory(mockOn ...component.MockOption) Factory { 77 | if len(mockOn) > 0 { 78 | return component.FactoryFunc[Channel, Config, Dependencies]( 79 | func(instance string, config *Config, dependencies Dependencies) (Channel, error) { 80 | m := &mockChannel{} 81 | component.MockOptions(mockOn).Apply(&m.Mock) 82 | 83 | return m, nil 84 | }, 85 | ) 86 | } 87 | 88 | return component.FactoryFunc[Channel, Config, Dependencies](new) 89 | } 90 | 91 | func new(instance string, config *Config, dependencies Dependencies) (Channel, error) { 92 | if err := config.Validate(); err != nil { 93 | return nil, errors.Wrap(err, "validate config") 94 | } 95 | 96 | var email sender 97 | if config.Email.Enabled() { 98 | var err error 99 | email, err = newEmail(config.Email, dependencies) 100 | if err != nil { 101 | return nil, errors.Wrap(err, "new email") 102 | } 103 | } 104 | 105 | return &aggrChannel{ 106 | Base: component.New(&component.BaseConfig[Config, Dependencies]{ 107 | Name: "NotifyChannel", 108 | Instance: instance, 109 | Config: config, 110 | Dependencies: dependencies, 111 | }), 112 | email: email, 113 | webhook: newWebhook(), 114 | }, nil 115 | } 116 | 117 | // --- Implementation code block --- 118 | type aggrChannel struct { 119 | *component.Base[Config, Dependencies] 120 | email, webhook sender 121 | } 122 | 123 | func (c *aggrChannel) Send(ctx context.Context, receiver Receiver, group *route.FeedGroup) error { 124 | if receiver.Email != "" && c.email != nil { 125 | return c.send(ctx, receiver, group, c.email, "email") 126 | } 127 | if receiver.Webhook != nil && c.webhook != nil { 128 | return c.send(ctx, receiver, group, c.webhook, "webhook") 129 | } 130 | 131 | return nil 132 | } 133 | 134 | func (c *aggrChannel) send( 135 | ctx context.Context, 136 | receiver Receiver, 137 | group *route.FeedGroup, 138 | sender sender, 139 | senderName string, 140 | ) (err error) { 141 | ctx = telemetry.StartWith(ctx, append(c.TelemetryLabels(), telemetrymodel.KeyOperation, "channel", senderName)...) 142 | defer func() { telemetry.End(ctx, err) }() 143 | if err := sender.Send(ctx, receiver, group); err != nil { 144 | return errors.Wrap(err, "send") 145 | } 146 | 147 | return nil 148 | } 149 | 150 | type mockChannel struct { 151 | component.Mock 152 | } 153 | 154 | func (m *mockChannel) Send(ctx context.Context, receiver Receiver, group *route.FeedGroup) error { 155 | args := m.Called(ctx, receiver, group) 156 | 157 | return args.Error(0) 158 | } 159 | -------------------------------------------------------------------------------- /pkg/notify/channel/webhook.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package channel 17 | 18 | import ( 19 | "bytes" 20 | "context" 21 | "encoding/json" 22 | "net/http" 23 | 24 | "github.com/pkg/errors" 25 | 26 | "github.com/glidea/zenfeed/pkg/model" 27 | "github.com/glidea/zenfeed/pkg/notify/route" 28 | runtimeutil "github.com/glidea/zenfeed/pkg/util/runtime" 29 | ) 30 | 31 | type WebhookReceiver struct { 32 | URL string `json:"url"` 33 | } 34 | 35 | func (r *WebhookReceiver) Validate() error { 36 | if r.URL == "" { 37 | return errors.New("webhook.url is required") 38 | } 39 | 40 | return nil 41 | } 42 | 43 | type webhookBody struct { 44 | Group string `json:"group"` 45 | Labels model.Labels `json:"labels"` 46 | Summary string `json:"summary"` 47 | Feeds []*route.Feed `json:"feeds"` 48 | } 49 | 50 | func newWebhook() sender { 51 | return &webhook{ 52 | httpClient: &http.Client{}, 53 | } 54 | } 55 | 56 | type webhook struct { 57 | httpClient *http.Client 58 | } 59 | 60 | func (w *webhook) Send(ctx context.Context, receiver Receiver, group *route.FeedGroup) error { 61 | // Prepare request. 62 | body := &webhookBody{ 63 | Group: group.Name, 64 | Labels: group.Labels, 65 | Summary: group.Summary, 66 | Feeds: group.Feeds, 67 | } 68 | b := runtimeutil.Must1(json.Marshal(body)) 69 | req, err := http.NewRequestWithContext(ctx, http.MethodPost, receiver.Webhook.URL, bytes.NewReader(b)) 70 | if err != nil { 71 | return errors.Wrap(err, "create request") 72 | } 73 | req.Header.Set("Content-Type", "application/json") 74 | 75 | // Send request. 76 | resp, err := w.httpClient.Do(req) 77 | if err != nil { 78 | return errors.Wrap(err, "send request") 79 | } 80 | defer func() { _ = resp.Body.Close() }() 81 | 82 | // Handle response. 83 | if resp.StatusCode != http.StatusOK { 84 | return errors.New("send request") 85 | } 86 | 87 | return nil 88 | } 89 | -------------------------------------------------------------------------------- /pkg/schedule/rule/periodic.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package rule 17 | 18 | import ( 19 | "context" 20 | "time" 21 | 22 | "github.com/pkg/errors" 23 | 24 | "github.com/glidea/zenfeed/pkg/component" 25 | "github.com/glidea/zenfeed/pkg/storage/feed/block" 26 | "github.com/glidea/zenfeed/pkg/telemetry" 27 | "github.com/glidea/zenfeed/pkg/telemetry/log" 28 | telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model" 29 | timeutil "github.com/glidea/zenfeed/pkg/util/time" 30 | ) 31 | 32 | func newPeriodic(instance string, config *Config, dependencies Dependencies) (Rule, error) { 33 | return &periodic{ 34 | Base: component.New(&component.BaseConfig[Config, Dependencies]{ 35 | Name: "PeriodicRuler", 36 | Instance: instance, 37 | Config: config, 38 | Dependencies: dependencies, 39 | }), 40 | }, nil 41 | } 42 | 43 | type periodic struct { 44 | *component.Base[Config, Dependencies] 45 | } 46 | 47 | func (r *periodic) Run() (err error) { 48 | ctx := telemetry.StartWith(r.Context(), append(r.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...) 49 | defer func() { telemetry.End(ctx, err) }() 50 | r.MarkReady() 51 | 52 | iter := func(now time.Time) { 53 | config := r.Config() 54 | today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()) 55 | end := time.Date(today.Year(), today.Month(), today.Day(), 56 | config.end.Hour(), config.end.Minute(), 0, 0, today.Location()) 57 | 58 | buffer := 30 * time.Minute 59 | endPlusBuffer := end.Add(buffer) 60 | if now.Before(end) || now.After(endPlusBuffer) { 61 | return 62 | } 63 | if err := r.execute(ctx, now); err != nil { 64 | log.Warn(ctx, errors.Wrap(err, "execute, retry in next time")) 65 | } 66 | log.Debug(ctx, "rule executed", "now", now, "end", end) 67 | } 68 | 69 | offset := timeutil.Random(time.Minute) 70 | log.Debug(ctx, "computed watch offset", "offset", offset) 71 | 72 | tick := time.NewTimer(offset) 73 | defer tick.Stop() 74 | for { 75 | select { 76 | case <-ctx.Done(): 77 | return nil 78 | case now := <-tick.C: 79 | iter(now) 80 | tick.Reset(5 * time.Minute) 81 | } 82 | } 83 | } 84 | 85 | func (r *periodic) execute(ctx context.Context, now time.Time) error { 86 | // Determine the query interval based on now and config's start, end and crossDay. 87 | config := r.Config() 88 | today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()) 89 | var start, end time.Time 90 | if config.crossDay { 91 | yesterday := today.AddDate(0, 0, -1) 92 | start = time.Date(yesterday.Year(), yesterday.Month(), yesterday.Day(), 93 | config.start.Hour(), config.start.Minute(), 0, 0, yesterday.Location()) 94 | end = time.Date(today.Year(), today.Month(), today.Day(), 95 | config.end.Hour(), config.end.Minute(), 0, 0, today.Location()) 96 | } else { 97 | start = time.Date(today.Year(), today.Month(), today.Day(), 98 | config.start.Hour(), config.start.Minute(), 0, 0, today.Location()) 99 | end = time.Date(today.Year(), today.Month(), today.Day(), 100 | config.end.Hour(), config.end.Minute(), 0, 0, today.Location()) 101 | } 102 | 103 | // Query. 104 | ctx = log.With(ctx, "start", start, "end", end) 105 | feeds, err := r.Dependencies().FeedStorage.Query(ctx, block.QueryOptions{ 106 | Query: config.Query, 107 | Threshold: config.Threshold, 108 | LabelFilters: config.LabelFilters, 109 | Start: start, 110 | End: end, 111 | Limit: 500, 112 | }) 113 | if err != nil { 114 | return errors.Wrap(err, "query") 115 | } 116 | if len(feeds) == 0 { 117 | log.Debug(ctx, "no feeds found") 118 | 119 | return nil 120 | } 121 | 122 | // Attach labels to feeds. 123 | for _, feed := range feeds { 124 | feed.Labels = append(feed.Labels, config.labels...) 125 | feed.Labels.EnsureSorted() 126 | } 127 | 128 | // Notify. 129 | r.Dependencies().Out <- &Result{ 130 | Rule: config.Name, 131 | Time: start, 132 | Feeds: feeds, 133 | } 134 | log.Debug(ctx, "rule notified", "feeds", len(feeds)) 135 | 136 | return nil 137 | } 138 | -------------------------------------------------------------------------------- /pkg/schedule/rule/rule.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package rule 17 | 18 | import ( 19 | "strings" 20 | "time" 21 | 22 | "github.com/pkg/errors" 23 | 24 | "github.com/glidea/zenfeed/pkg/component" 25 | "github.com/glidea/zenfeed/pkg/model" 26 | "github.com/glidea/zenfeed/pkg/storage/feed" 27 | "github.com/glidea/zenfeed/pkg/storage/feed/block" 28 | ) 29 | 30 | // --- Interface code block --- 31 | type Rule interface { 32 | component.Component 33 | Config() *Config 34 | } 35 | 36 | type Config struct { 37 | Name string 38 | Query string 39 | Threshold float32 40 | LabelFilters []string 41 | Labels map[string]string 42 | labels model.Labels 43 | 44 | // Periodic type. 45 | EveryDay string // e.g. "00:00~23:59", or "-22:00~7:00" (yesterday 22:00 to today 07:00) 46 | start, end time.Time 47 | crossDay bool 48 | 49 | // Watch type. 50 | WatchInterval time.Duration 51 | } 52 | 53 | var ( 54 | timeSep = "~" 55 | timeYesterdayPrefix = "-" 56 | timeFmt = "15:04" 57 | ) 58 | 59 | func (c *Config) Validate() error { //nolint:cyclop,gocognit 60 | if c.Name == "" { 61 | return errors.New("name is required") 62 | } 63 | if c.Threshold == 0 { 64 | c.Threshold = 0.5 65 | } 66 | if c.Threshold < 0 || c.Threshold > 1 { 67 | return errors.New("threshold must be between 0 and 1") 68 | } 69 | if len(c.Labels) > 0 { 70 | c.labels.FromMap(c.Labels) 71 | } 72 | if c.EveryDay != "" && c.WatchInterval != 0 { 73 | return errors.New("every_day and watch_interval cannot both be set") 74 | } 75 | switch c.EveryDay { 76 | case "": 77 | if c.WatchInterval < 10*time.Minute { 78 | c.WatchInterval = 10 * time.Minute 79 | } 80 | default: 81 | times := strings.Split(c.EveryDay, timeSep) 82 | if len(times) != 2 { 83 | return errors.New("every_day must be in format 'start~end'") 84 | } 85 | 86 | start, end := strings.TrimSpace(times[0]), strings.TrimSpace(times[1]) 87 | isYesterday := strings.HasPrefix(start, timeYesterdayPrefix) 88 | if isYesterday { 89 | start = start[1:] // Remove the "-" prefix 90 | c.crossDay = true 91 | } 92 | 93 | // Parse start time. 94 | startTime, err := time.ParseInLocation(timeFmt, start, time.Local) 95 | if err != nil { 96 | return errors.Wrap(err, "parse start time") 97 | } 98 | 99 | // Parse end time. 100 | endTime, err := time.ParseInLocation(timeFmt, end, time.Local) 101 | if err != nil { 102 | return errors.Wrap(err, "parse end time") 103 | } 104 | 105 | // For non-yesterday time range, end time must be after start time. 106 | if !isYesterday && endTime.Before(startTime) { 107 | return errors.New("end time must be after start time") 108 | } 109 | 110 | c.start, c.end = startTime, endTime 111 | } 112 | 113 | return nil 114 | } 115 | 116 | type Dependencies struct { 117 | FeedStorage feed.Storage 118 | Out chan<- *Result 119 | } 120 | 121 | type Result struct { 122 | Rule string 123 | Time time.Time 124 | Feeds []*block.FeedVO 125 | } 126 | 127 | // --- Factory code block --- 128 | 129 | type Factory component.Factory[Rule, Config, Dependencies] 130 | 131 | func NewFactory(mockOn ...component.MockOption) Factory { 132 | if len(mockOn) > 0 { 133 | return component.FactoryFunc[Rule, Config, Dependencies]( 134 | func(instance string, config *Config, dependencies Dependencies) (Rule, error) { 135 | m := &mockRule{} 136 | component.MockOptions(mockOn).Apply(&m.Mock) 137 | 138 | return m, nil 139 | }, 140 | ) 141 | } 142 | 143 | return component.FactoryFunc[Rule, Config, Dependencies](new) 144 | } 145 | 146 | func new(instance string, config *Config, dependencies Dependencies) (Rule, error) { 147 | if err := config.Validate(); err != nil { 148 | return nil, errors.Wrap(err, "validate config") 149 | } 150 | 151 | switch config.EveryDay { 152 | case "": 153 | return newWatch(instance, config, dependencies) 154 | default: 155 | return newPeriodic(instance, config, dependencies) 156 | } 157 | } 158 | 159 | // --- Implementation code block --- 160 | type mockRule struct { 161 | component.Mock 162 | } 163 | 164 | func (m *mockRule) Config() *Config { 165 | args := m.Called() 166 | 167 | return args.Get(0).(*Config) 168 | } 169 | -------------------------------------------------------------------------------- /pkg/schedule/rule/watch.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package rule 17 | 18 | import ( 19 | "context" 20 | "time" 21 | 22 | "github.com/pkg/errors" 23 | 24 | "github.com/glidea/zenfeed/pkg/component" 25 | "github.com/glidea/zenfeed/pkg/storage/feed/block" 26 | "github.com/glidea/zenfeed/pkg/telemetry" 27 | "github.com/glidea/zenfeed/pkg/telemetry/log" 28 | telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model" 29 | timeutil "github.com/glidea/zenfeed/pkg/util/time" 30 | ) 31 | 32 | func newWatch(instance string, config *Config, dependencies Dependencies) (Rule, error) { 33 | return &watch{ 34 | Base: component.New(&component.BaseConfig[Config, Dependencies]{ 35 | Name: "WatchRuler", 36 | Instance: instance, 37 | Config: config, 38 | Dependencies: dependencies, 39 | }), 40 | }, nil 41 | } 42 | 43 | type watch struct { 44 | *component.Base[Config, Dependencies] 45 | } 46 | 47 | func (r *watch) Run() (err error) { 48 | ctx := telemetry.StartWith(r.Context(), append(r.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...) 49 | defer func() { telemetry.End(ctx, err) }() 50 | r.MarkReady() 51 | 52 | iter := func(now time.Time) { 53 | config := r.Config() 54 | end := time.Unix(now.Unix(), 0).Truncate(config.WatchInterval) 55 | // Interval 0, 1 are retry, to ensure success. 56 | // That means, one execution result at least send 3 times. 57 | // So the customer need to deduplicate the result by themselves. 58 | start := end.Add(-3 * config.WatchInterval) 59 | 60 | if err := r.execute(ctx, start, end); err != nil { 61 | log.Warn(ctx, errors.Wrap(err, "execute, retry in next time")) 62 | } 63 | log.Debug(ctx, "watch rule executed", "start", start, "end", end) 64 | } 65 | 66 | offset := timeutil.Random(time.Minute) 67 | log.Debug(ctx, "computed watch offset", "offset", offset) 68 | 69 | tick := time.NewTimer(offset) 70 | defer tick.Stop() 71 | for { 72 | select { 73 | case <-r.Context().Done(): 74 | return nil 75 | case now := <-tick.C: 76 | iter(now) 77 | tick.Reset(r.Config().WatchInterval) 78 | } 79 | } 80 | } 81 | 82 | func (r *watch) execute(ctx context.Context, start, end time.Time) error { 83 | ctx = log.With(ctx, "start", start, "end", end) 84 | 85 | // Query. 86 | config := r.Config() 87 | feeds, err := r.Dependencies().FeedStorage.Query(ctx, block.QueryOptions{ 88 | Query: config.Query, 89 | Threshold: config.Threshold, 90 | LabelFilters: config.LabelFilters, 91 | Start: start, 92 | End: end, 93 | Limit: 500, 94 | }) 95 | if err != nil { 96 | return errors.Wrap(err, "query") 97 | } 98 | if len(feeds) == 0 { 99 | log.Debug(ctx, "no feeds found") 100 | 101 | return nil 102 | } 103 | 104 | // Attach labels to feeds. 105 | for _, feed := range feeds { 106 | feed.Labels = append(feed.Labels, config.labels...) 107 | feed.Labels.EnsureSorted() 108 | } 109 | 110 | // Split feeds by start time. 111 | feedsByStart := make(map[time.Time][]*block.FeedVO) // Start time -> feeds. 112 | for _, feed := range feeds { 113 | interval := time.Unix(feed.Time.Unix(), 0).Truncate(config.WatchInterval) 114 | feedsByStart[interval] = append(feedsByStart[interval], feed) 115 | } 116 | 117 | // Notify. 118 | for start, feeds := range feedsByStart { 119 | r.Dependencies().Out <- &Result{ 120 | Rule: config.Name, 121 | Time: start, 122 | Feeds: feeds, 123 | } 124 | } 125 | log.Debug(ctx, "rule notified", "feeds", len(feedsByStart)) 126 | 127 | return nil 128 | } 129 | -------------------------------------------------------------------------------- /pkg/schedule/schedule.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package schedule 17 | 18 | import ( 19 | "reflect" 20 | "time" 21 | 22 | "github.com/pkg/errors" 23 | 24 | "github.com/glidea/zenfeed/pkg/component" 25 | "github.com/glidea/zenfeed/pkg/config" 26 | "github.com/glidea/zenfeed/pkg/schedule/rule" 27 | "github.com/glidea/zenfeed/pkg/storage/feed" 28 | "github.com/glidea/zenfeed/pkg/telemetry" 29 | "github.com/glidea/zenfeed/pkg/telemetry/log" 30 | telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model" 31 | ) 32 | 33 | // --- Interface code block --- 34 | type Scheduler interface { 35 | component.Component 36 | config.Watcher 37 | } 38 | 39 | type Config struct { 40 | Rules []rule.Config 41 | } 42 | 43 | func (c *Config) Validate() error { 44 | for _, rule := range c.Rules { 45 | if err := (&rule).Validate(); err != nil { 46 | return errors.Wrap(err, "validate rule") 47 | } 48 | } 49 | 50 | return nil 51 | } 52 | 53 | func (c *Config) From(app *config.App) *Config { 54 | c.Rules = make([]rule.Config, len(app.Scheduls.Rules)) 55 | for i, r := range app.Scheduls.Rules { 56 | c.Rules[i] = rule.Config{ 57 | Name: r.Name, 58 | Query: r.Query, 59 | Threshold: r.Threshold, 60 | LabelFilters: r.LabelFilters, 61 | Labels: r.Labels, 62 | EveryDay: r.EveryDay, 63 | WatchInterval: time.Duration(r.WatchInterval), 64 | } 65 | } 66 | 67 | return c 68 | } 69 | 70 | type Dependencies struct { 71 | RuleFactory rule.Factory 72 | FeedStorage feed.Storage 73 | Out chan<- *rule.Result 74 | } 75 | 76 | // --- Factory code block --- 77 | type Factory component.Factory[Scheduler, config.App, Dependencies] 78 | 79 | func NewFactory(mockOn ...component.MockOption) Factory { 80 | if len(mockOn) > 0 { 81 | return component.FactoryFunc[Scheduler, config.App, Dependencies]( 82 | func(instance string, app *config.App, dependencies Dependencies) (Scheduler, error) { 83 | m := &mockScheduler{} 84 | component.MockOptions(mockOn).Apply(&m.Mock) 85 | 86 | return m, nil 87 | }, 88 | ) 89 | } 90 | 91 | return component.FactoryFunc[Scheduler, config.App, Dependencies](new) 92 | } 93 | 94 | func new(instance string, app *config.App, dependencies Dependencies) (Scheduler, error) { 95 | config := &Config{} 96 | config.From(app) 97 | if err := config.Validate(); err != nil { 98 | return nil, errors.Wrap(err, "validate config") 99 | } 100 | 101 | s := &scheduler{ 102 | Base: component.New(&component.BaseConfig[Config, Dependencies]{ 103 | Name: instance, 104 | Instance: instance, 105 | Config: config, 106 | Dependencies: dependencies, 107 | }), 108 | rules: make(map[string]rule.Rule, len(config.Rules)), 109 | } 110 | 111 | for i := range config.Rules { 112 | r := &config.Rules[i] 113 | rule, err := s.newRule(r) 114 | if err != nil { 115 | return nil, errors.Wrapf(err, "create rule %s", r.Name) 116 | } 117 | s.rules[r.Name] = rule 118 | } 119 | 120 | return s, nil 121 | } 122 | 123 | // --- Implementation code block --- 124 | type scheduler struct { 125 | *component.Base[Config, Dependencies] 126 | 127 | rules map[string]rule.Rule 128 | } 129 | 130 | func (s *scheduler) Run() (err error) { 131 | ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...) 132 | defer func() { telemetry.End(ctx, err) }() 133 | 134 | for _, r := range s.rules { 135 | if err := component.RunUntilReady(ctx, r, 10*time.Second); err != nil { 136 | return errors.Wrapf(err, "running rule %s", r.Config().Name) 137 | } 138 | } 139 | 140 | s.MarkReady() 141 | <-ctx.Done() 142 | 143 | return nil 144 | } 145 | 146 | func (s *scheduler) Reload(app *config.App) error { 147 | newConfig := &Config{} 148 | newConfig.From(app) 149 | if err := newConfig.Validate(); err != nil { 150 | return errors.Wrap(err, "validate config") 151 | } 152 | if reflect.DeepEqual(s.Config(), newConfig) { 153 | log.Debug(s.Context(), "no changes in schedule config") 154 | 155 | return nil 156 | } 157 | 158 | newRules := make(map[string]rule.Rule, len(newConfig.Rules)) 159 | 160 | if err := s.runOrRestartRules(newConfig, newRules); err != nil { 161 | return errors.Wrap(err, "run or restart rules") 162 | } 163 | if err := s.stopObsoleteRules(newRules); err != nil { 164 | return errors.Wrap(err, "stop obsolete rules") 165 | } 166 | 167 | s.rules = newRules 168 | s.SetConfig(newConfig) 169 | 170 | return nil 171 | } 172 | 173 | func (s *scheduler) Close() error { 174 | if err := s.Base.Close(); err != nil { 175 | return errors.Wrap(err, "close base") 176 | } 177 | 178 | // Stop all rules. 179 | for _, r := range s.rules { 180 | _ = r.Close() 181 | } 182 | 183 | return nil 184 | } 185 | 186 | func (s *scheduler) newRule(config *rule.Config) (rule.Rule, error) { 187 | return s.Dependencies().RuleFactory.New(config.Name, config, rule.Dependencies{ 188 | FeedStorage: s.Dependencies().FeedStorage, 189 | Out: s.Dependencies().Out, 190 | }) 191 | } 192 | 193 | func (s *scheduler) runOrRestartRules(config *Config, newRules map[string]rule.Rule) error { 194 | for _, r := range config.Rules { 195 | // Close or reuse existing rule. 196 | if existing, exists := s.rules[r.Name]; exists { 197 | if reflect.DeepEqual(existing.Config(), r) { 198 | newRules[r.Name] = existing 199 | 200 | continue 201 | } 202 | 203 | if err := existing.Close(); err != nil { 204 | return errors.Wrap(err, "close existing rule") 205 | } 206 | } 207 | 208 | // Create & Run new/updated rule. 209 | newRule, err := s.newRule(&r) 210 | if err != nil { 211 | return errors.Wrap(err, "create rule") 212 | } 213 | newRules[r.Name] = newRule 214 | if err := component.RunUntilReady(s.Context(), newRule, 10*time.Second); err != nil { 215 | return errors.Wrapf(err, "running rule %s", r.Name) 216 | } 217 | } 218 | 219 | return nil 220 | } 221 | 222 | func (s *scheduler) stopObsoleteRules(newRules map[string]rule.Rule) error { 223 | var lastErr error 224 | for name, r := range s.rules { 225 | if _, exists := newRules[name]; !exists { 226 | if err := r.Close(); err != nil { 227 | lastErr = errors.Wrap(err, "close obsolete rule") 228 | } 229 | } 230 | } 231 | 232 | return lastErr 233 | } 234 | 235 | type mockScheduler struct { 236 | component.Mock 237 | } 238 | 239 | func (m *mockScheduler) Reload(app *config.App) error { 240 | args := m.Called(app) 241 | 242 | return args.Error(0) 243 | } 244 | -------------------------------------------------------------------------------- /pkg/scrape/scraper/rss.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package scraper 17 | 18 | import ( 19 | "context" 20 | "strings" 21 | "time" 22 | 23 | "github.com/mmcdole/gofeed" 24 | "github.com/pkg/errors" 25 | "github.com/stretchr/testify/mock" 26 | 27 | "github.com/glidea/zenfeed/pkg/model" 28 | textconvert "github.com/glidea/zenfeed/pkg/util/text_convert" 29 | ) 30 | 31 | // --- Interface code block --- 32 | type ScrapeSourceRSS struct { 33 | URL string 34 | RSSHubEndpoint string 35 | RSSHubRoutePath string 36 | } 37 | 38 | func (c *ScrapeSourceRSS) Validate() error { 39 | if c.URL == "" && c.RSSHubEndpoint == "" { 40 | return errors.New("URL or RSSHubEndpoint can not be empty at the same time") 41 | } 42 | if c.URL == "" { 43 | c.URL = strings.TrimSuffix(c.RSSHubEndpoint, "/") + "/" + strings.TrimPrefix(c.RSSHubRoutePath, "/") 44 | } 45 | if c.URL != "" && !strings.HasPrefix(c.URL, "http://") && !strings.HasPrefix(c.URL, "https://") { 46 | return errors.New("URL must be a valid HTTP/HTTPS URL") 47 | } 48 | 49 | return nil 50 | } 51 | 52 | // --- Factory code block --- 53 | func newRSSReader(config *ScrapeSourceRSS) (reader, error) { 54 | if err := config.Validate(); err != nil { 55 | return nil, errors.Wrapf(err, "invalid RSS config") 56 | } 57 | 58 | return &rssReader{ 59 | config: config, 60 | client: &gofeedClient{ 61 | url: config.URL, 62 | base: gofeed.NewParser(), 63 | }, 64 | }, nil 65 | } 66 | 67 | // --- Implementation code block --- 68 | type rssReader struct { 69 | config *ScrapeSourceRSS 70 | client client 71 | } 72 | 73 | func (r *rssReader) Read(ctx context.Context) ([]*model.Feed, error) { 74 | feed, err := r.client.Get(ctx) 75 | if err != nil { 76 | return nil, errors.Wrapf(err, "fetching RSS feed") 77 | } 78 | if len(feed.Items) == 0 { 79 | return []*model.Feed{}, nil 80 | } 81 | 82 | now := clk.Now() 83 | feeds := make([]*model.Feed, 0, len(feed.Items)) 84 | for _, fi := range feed.Items { 85 | item, err := r.toResultFeed(now, fi) 86 | if err != nil { 87 | return nil, errors.Wrapf(err, "converting feed item") 88 | } 89 | 90 | feeds = append(feeds, item) 91 | } 92 | 93 | return feeds, nil 94 | } 95 | 96 | func (r *rssReader) toResultFeed(now time.Time, feedFeed *gofeed.Item) (*model.Feed, error) { 97 | content := r.combineContent(feedFeed.Content, feedFeed.Description) 98 | 99 | // Ensure the content is markdown. 100 | mdContent, err := textconvert.HTMLToMarkdown([]byte(content)) 101 | if err != nil { 102 | return nil, errors.Wrapf(err, "converting content to markdown") 103 | } 104 | 105 | // Create the feed item. 106 | feed := &model.Feed{ 107 | Labels: model.Labels{ 108 | {Key: model.LabelType, Value: "rss"}, 109 | {Key: model.LabelTitle, Value: feedFeed.Title}, 110 | {Key: model.LabelLink, Value: feedFeed.Link}, 111 | {Key: model.LabelPubTime, Value: r.parseTime(feedFeed).Format(time.RFC3339)}, 112 | {Key: model.LabelContent, Value: string(mdContent)}, 113 | }, 114 | Time: now, 115 | } 116 | 117 | return feed, nil 118 | } 119 | 120 | // parseTime parses the publication time from the feed item. 121 | // If the feed item does not have a publication time, it returns the current time. 122 | func (r *rssReader) parseTime(feedFeed *gofeed.Item) time.Time { 123 | if feedFeed.PublishedParsed == nil { 124 | return clk.Now().In(time.Local) 125 | } 126 | 127 | return feedFeed.PublishedParsed.In(time.Local) 128 | } 129 | 130 | // combineContent combines Content and Description fields with proper formatting. 131 | func (r *rssReader) combineContent(content, description string) string { 132 | switch { 133 | case content == "": 134 | return description 135 | case description == "": 136 | return content 137 | default: 138 | return strings.Join([]string{description, content}, "\n\n") 139 | } 140 | } 141 | 142 | type client interface { 143 | Get(ctx context.Context) (*gofeed.Feed, error) 144 | } 145 | 146 | type gofeedClient struct { 147 | url string 148 | base *gofeed.Parser 149 | } 150 | 151 | func (c *gofeedClient) Get(ctx context.Context) (*gofeed.Feed, error) { 152 | return c.base.ParseURLWithContext(c.url, ctx) 153 | } 154 | 155 | type mockClient struct { 156 | mock.Mock 157 | } 158 | 159 | func newMockClient() *mockClient { 160 | return &mockClient{} 161 | } 162 | 163 | func (c *mockClient) Get(ctx context.Context) (*gofeed.Feed, error) { 164 | args := c.Called(ctx) 165 | if args.Error(1) != nil { 166 | return nil, args.Error(1) 167 | } 168 | 169 | return args.Get(0).(*gofeed.Feed), nil 170 | } 171 | -------------------------------------------------------------------------------- /pkg/scrape/scraper/source.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package scraper 17 | 18 | import ( 19 | "context" 20 | "errors" 21 | 22 | "github.com/stretchr/testify/mock" 23 | 24 | "github.com/glidea/zenfeed/pkg/model" 25 | ) 26 | 27 | // --- Interface code block --- 28 | 29 | // reader defines interface for reading from different data sources. 30 | type reader interface { 31 | // Read fetches content from the data source. 32 | // Returns a slice of feeds and any error encountered. 33 | Read(ctx context.Context) ([]*model.Feed, error) 34 | } 35 | 36 | // --- Factory code block --- 37 | func newReader(config *Config) (reader, error) { 38 | if config.RSS != nil { 39 | return newRSSReader(config.RSS) 40 | } 41 | 42 | return nil, errors.New("source not supported") 43 | } 44 | 45 | // --- Implementation code block --- 46 | 47 | type mockReader struct { 48 | mock.Mock 49 | } 50 | 51 | func NewMock() *mockReader { 52 | return &mockReader{} 53 | } 54 | 55 | func (m *mockReader) Read(ctx context.Context) ([]*model.Feed, error) { 56 | args := m.Called(ctx) 57 | if feeds := args.Get(0); feeds != nil { 58 | return feeds.([]*model.Feed), args.Error(1) 59 | } 60 | 61 | return nil, args.Error(1) 62 | } 63 | -------------------------------------------------------------------------------- /pkg/storage/feed/block/index/codec.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "context" 5 | "io" 6 | ) 7 | 8 | // Codec defines interface for encoding and decoding index. 9 | type Codec interface { 10 | // EncodeTo encodes the index to the given writer. 11 | EncodeTo(ctx context.Context, w io.Writer) (err error) 12 | // DecodeFrom decodes the index from the given reader. 13 | DecodeFrom(ctx context.Context, r io.Reader) (err error) 14 | } 15 | -------------------------------------------------------------------------------- /pkg/storage/feed/block/index/primary/primary_test.go: -------------------------------------------------------------------------------- 1 | package primary 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "testing" 7 | "time" 8 | 9 | . "github.com/onsi/gomega" 10 | 11 | "github.com/glidea/zenfeed/pkg/test" 12 | ) 13 | 14 | func TestAdd(t *testing.T) { 15 | RegisterTestingT(t) 16 | 17 | type givenDetail struct { 18 | existingItems map[uint64]FeedRef 19 | } 20 | type whenDetail struct { 21 | id uint64 22 | item FeedRef 23 | } 24 | type thenExpected struct { 25 | items map[uint64]FeedRef 26 | } 27 | 28 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 29 | { 30 | Scenario: "Add Single Feed", 31 | Given: "An index with existing item", 32 | When: "Adding a single item", 33 | Then: "Should store the item correctly", 34 | GivenDetail: givenDetail{ 35 | existingItems: map[uint64]FeedRef{ 36 | 0: {Chunk: 0, Offset: 0}, 37 | }, 38 | }, 39 | WhenDetail: whenDetail{ 40 | id: 1, 41 | item: FeedRef{Chunk: 1, Offset: 100}, 42 | }, 43 | ThenExpected: thenExpected{ 44 | items: map[uint64]FeedRef{ 45 | 0: {Chunk: 0, Offset: 0}, 46 | 1: {Chunk: 1, Offset: 100}, 47 | }, 48 | }, 49 | }, 50 | { 51 | Scenario: "Update Existing Feed", 52 | Given: "An index with existing item", 53 | When: "Adding item with same ID", 54 | Then: "Should update the item reference", 55 | GivenDetail: givenDetail{ 56 | existingItems: map[uint64]FeedRef{ 57 | 1: {Chunk: 1, Offset: 100}, 58 | }, 59 | }, 60 | WhenDetail: whenDetail{ 61 | id: 1, 62 | item: FeedRef{Chunk: 2, Offset: 200}, 63 | }, 64 | ThenExpected: thenExpected{ 65 | items: map[uint64]FeedRef{ 66 | 1: {Chunk: 2, Offset: 200}, 67 | }, 68 | }, 69 | }, 70 | } 71 | 72 | for _, tt := range tests { 73 | t.Run(tt.Scenario, func(t *testing.T) { 74 | // Given. 75 | idx0, err := NewFactory().New("test", &Config{}, Dependencies{}) 76 | Expect(err).NotTo(HaveOccurred()) 77 | for id, item := range tt.GivenDetail.existingItems { 78 | idx0.Add(context.Background(), id, item) 79 | } 80 | 81 | // When. 82 | idx0.Add(context.Background(), tt.WhenDetail.id, tt.WhenDetail.item) 83 | 84 | // Then. 85 | primIdx := idx0.(*idx) 86 | for id, expected := range tt.ThenExpected.items { 87 | Expect(primIdx.m).To(HaveKey(id)) 88 | Expect(primIdx.m[id]).To(Equal(expected)) 89 | } 90 | }) 91 | } 92 | } 93 | 94 | func TestSearch(t *testing.T) { 95 | RegisterTestingT(t) 96 | 97 | type givenDetail struct { 98 | feeds map[uint64]FeedRef 99 | } 100 | type whenDetail struct { 101 | searchID uint64 102 | } 103 | type thenExpected struct { 104 | feedRef FeedRef 105 | found bool 106 | } 107 | 108 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 109 | { 110 | Scenario: "Search Existing Feed", 111 | Given: "An index with feeds", 112 | When: "Searching for existing ID", 113 | Then: "Should return correct FeedRef", 114 | GivenDetail: givenDetail{ 115 | feeds: map[uint64]FeedRef{ 116 | 1: {Chunk: 1, Offset: 100}, 117 | 2: {Chunk: 2, Offset: 200}, 118 | }, 119 | }, 120 | WhenDetail: whenDetail{ 121 | searchID: 1, 122 | }, 123 | ThenExpected: thenExpected{ 124 | feedRef: FeedRef{Chunk: 1, Offset: 100}, 125 | found: true, 126 | }, 127 | }, 128 | { 129 | Scenario: "Search Non-Existing Feed", 130 | Given: "An index with feeds", 131 | When: "Searching for non-existing ID", 132 | Then: "Should return empty FeedRef", 133 | GivenDetail: givenDetail{ 134 | feeds: map[uint64]FeedRef{ 135 | 1: {Chunk: 1, Offset: 100}, 136 | }, 137 | }, 138 | WhenDetail: whenDetail{ 139 | searchID: 2, 140 | }, 141 | ThenExpected: thenExpected{ 142 | feedRef: FeedRef{}, 143 | found: false, 144 | }, 145 | }, 146 | } 147 | 148 | for _, tt := range tests { 149 | t.Run(tt.Scenario, func(t *testing.T) { 150 | // Given. 151 | idx, err := NewFactory().New("test", &Config{}, Dependencies{}) 152 | Expect(err).NotTo(HaveOccurred()) 153 | for id, item := range tt.GivenDetail.feeds { 154 | idx.Add(context.Background(), id, item) 155 | } 156 | 157 | // When. 158 | result, ok := idx.Search(context.Background(), tt.WhenDetail.searchID) 159 | 160 | // Then. 161 | Expect(result).To(Equal(tt.ThenExpected.feedRef)) 162 | Expect(ok).To(Equal(tt.ThenExpected.found)) 163 | }) 164 | } 165 | } 166 | 167 | func TestEncodeDecode(t *testing.T) { 168 | RegisterTestingT(t) 169 | 170 | type givenDetail struct { 171 | feeds map[uint64]FeedRef 172 | } 173 | type whenDetail struct{} 174 | type thenExpected struct { 175 | success bool 176 | } 177 | 178 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 179 | { 180 | Scenario: "Encode and Decode Index with Data", 181 | Given: "An index with feeds", 182 | When: "Encoding and decoding", 183 | Then: "Should restore all data correctly", 184 | GivenDetail: givenDetail{ 185 | feeds: map[uint64]FeedRef{ 186 | 1: {Chunk: 1, Offset: 100, Time: time.Now()}, 187 | 2: {Chunk: 2, Offset: 200, Time: time.Now()}, 188 | }, 189 | }, 190 | WhenDetail: whenDetail{}, 191 | ThenExpected: thenExpected{ 192 | success: true, 193 | }, 194 | }, 195 | } 196 | 197 | for _, tt := range tests { 198 | t.Run(tt.Scenario, func(t *testing.T) { 199 | // Given. 200 | original, err := NewFactory().New("test", &Config{}, Dependencies{}) 201 | Expect(err).NotTo(HaveOccurred()) 202 | for id, item := range tt.GivenDetail.feeds { 203 | original.Add(context.Background(), id, item) 204 | } 205 | 206 | // When. 207 | var buf bytes.Buffer 208 | err = original.EncodeTo(context.Background(), &buf) 209 | Expect(err).NotTo(HaveOccurred()) 210 | 211 | decoded, err := NewFactory().New("test", &Config{}, Dependencies{}) 212 | Expect(err).NotTo(HaveOccurred()) 213 | err = decoded.DecodeFrom(context.Background(), &buf) 214 | Expect(err).NotTo(HaveOccurred()) 215 | 216 | // Then. 217 | origIdx := original.(*idx) 218 | decodedIdx := decoded.(*idx) 219 | Expect(decodedIdx.m).To(Equal(origIdx.m)) 220 | }) 221 | } 222 | } 223 | -------------------------------------------------------------------------------- /pkg/storage/kv/kv.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package kv 17 | 18 | import ( 19 | "context" 20 | "strings" 21 | "time" 22 | 23 | "github.com/nutsdb/nutsdb" 24 | "github.com/pkg/errors" 25 | 26 | "github.com/glidea/zenfeed/pkg/component" 27 | "github.com/glidea/zenfeed/pkg/config" 28 | "github.com/glidea/zenfeed/pkg/telemetry" 29 | telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model" 30 | ) 31 | 32 | // --- Interface code block --- 33 | type Storage interface { 34 | component.Component 35 | Get(ctx context.Context, key []byte) ([]byte, error) 36 | Set(ctx context.Context, key []byte, value []byte, ttl time.Duration) error 37 | } 38 | 39 | var ErrNotFound = errors.New("not found") 40 | 41 | type Config struct { 42 | Dir string 43 | } 44 | 45 | const subDir = "kv" 46 | 47 | func (c *Config) Validate() error { 48 | if c.Dir == "" { 49 | c.Dir = "./data/" + subDir 50 | } 51 | 52 | return nil 53 | } 54 | 55 | func (c *Config) From(app *config.App) *Config { 56 | c.Dir = app.Storage.Dir 57 | 58 | return c 59 | } 60 | 61 | type Dependencies struct{} 62 | 63 | // --- Factory code block --- 64 | type Factory component.Factory[Storage, config.App, Dependencies] 65 | 66 | func NewFactory(mockOn ...component.MockOption) Factory { 67 | if len(mockOn) > 0 { 68 | return component.FactoryFunc[Storage, config.App, Dependencies]( 69 | func(instance string, config *config.App, dependencies Dependencies) (Storage, error) { 70 | m := &mockKV{} 71 | component.MockOptions(mockOn).Apply(&m.Mock) 72 | 73 | return m, nil 74 | }, 75 | ) 76 | } 77 | 78 | return component.FactoryFunc[Storage, config.App, Dependencies](new) 79 | } 80 | 81 | func new(instance string, app *config.App, dependencies Dependencies) (Storage, error) { 82 | config := &Config{} 83 | config.From(app) 84 | if err := config.Validate(); err != nil { 85 | return nil, errors.Wrap(err, "validate config") 86 | } 87 | 88 | return &kv{ 89 | Base: component.New(&component.BaseConfig[Config, Dependencies]{ 90 | Name: "KVStorage", 91 | Instance: instance, 92 | Config: config, 93 | Dependencies: dependencies, 94 | }), 95 | }, nil 96 | } 97 | 98 | // --- Implementation code block --- 99 | type kv struct { 100 | *component.Base[Config, Dependencies] 101 | db *nutsdb.DB 102 | } 103 | 104 | func (k *kv) Run() error { 105 | db, err := nutsdb.Open( 106 | nutsdb.DefaultOptions, 107 | nutsdb.WithDir(k.Config().Dir), 108 | nutsdb.WithSyncEnable(false), 109 | ) 110 | if err != nil { 111 | return errors.Wrap(err, "open db") 112 | } 113 | if err := db.Update(func(tx *nutsdb.Tx) error { 114 | if !tx.ExistBucket(nutsdb.DataStructureBTree, bucket) { 115 | return tx.NewBucket(nutsdb.DataStructureBTree, bucket) 116 | } 117 | 118 | return nil 119 | }); err != nil { 120 | return errors.Wrap(err, "create bucket") 121 | } 122 | k.db = db 123 | 124 | k.MarkReady() 125 | <-k.Context().Done() 126 | 127 | return nil 128 | } 129 | 130 | func (k *kv) Close() error { 131 | if err := k.Base.Close(); err != nil { 132 | return errors.Wrap(err, "close base") 133 | } 134 | 135 | return k.db.Close() 136 | } 137 | 138 | const bucket = "0" 139 | 140 | func (k *kv) Get(ctx context.Context, key []byte) (value []byte, err error) { 141 | ctx = telemetry.StartWith(ctx, append(k.TelemetryLabels(), telemetrymodel.KeyOperation, "Get")...) 142 | defer func() { 143 | telemetry.End(ctx, func() error { 144 | if err != nil && !errors.Is(err, ErrNotFound) { 145 | return err 146 | } 147 | 148 | return nil 149 | }()) 150 | }() 151 | 152 | var b []byte 153 | err = k.db.View(func(tx *nutsdb.Tx) error { 154 | b, err = tx.Get(bucket, []byte(key)) 155 | 156 | return err 157 | }) 158 | switch { 159 | case err == nil: 160 | return b, nil 161 | case errors.Is(err, nutsdb.ErrNotFoundKey): 162 | return nil, ErrNotFound 163 | case strings.Contains(err.Error(), "key not found"): 164 | return nil, ErrNotFound 165 | default: 166 | return nil, err 167 | } 168 | } 169 | 170 | func (k *kv) Set(ctx context.Context, key []byte, value []byte, ttl time.Duration) (err error) { 171 | ctx = telemetry.StartWith(ctx, append(k.TelemetryLabels(), telemetrymodel.KeyOperation, "Set")...) 172 | defer func() { telemetry.End(ctx, err) }() 173 | 174 | return k.db.Update(func(tx *nutsdb.Tx) error { 175 | return tx.Put(bucket, key, value, uint32(ttl.Seconds())) 176 | }) 177 | } 178 | 179 | type mockKV struct { 180 | component.Mock 181 | } 182 | 183 | func (m *mockKV) Get(ctx context.Context, key []byte) ([]byte, error) { 184 | args := m.Called(ctx, key) 185 | 186 | return args.Get(0).([]byte), args.Error(1) 187 | } 188 | 189 | func (m *mockKV) Set(ctx context.Context, key []byte, value []byte, ttl time.Duration) error { 190 | args := m.Called(ctx, key, value, ttl) 191 | 192 | return args.Error(0) 193 | } 194 | -------------------------------------------------------------------------------- /pkg/telemetry/log/log.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package log 17 | 18 | import ( 19 | "context" 20 | "log/slog" 21 | "os" 22 | "runtime" 23 | "strconv" 24 | "strings" 25 | "sync" 26 | "sync/atomic" 27 | 28 | "github.com/pkg/errors" 29 | slogdedup "github.com/veqryn/slog-dedup" 30 | 31 | "github.com/glidea/zenfeed/pkg/model" 32 | ) 33 | 34 | type Level string 35 | 36 | const ( 37 | LevelDebug Level = "debug" 38 | LevelInfo Level = "info" 39 | LevelWarn Level = "warn" 40 | LevelError Level = "error" 41 | ) 42 | 43 | func SetLevel(level Level) error { 44 | if level == "" { 45 | level = LevelInfo 46 | } 47 | 48 | var logLevel slog.Level 49 | switch level { 50 | case LevelDebug: 51 | logLevel = slog.LevelDebug 52 | case LevelInfo: 53 | logLevel = slog.LevelInfo 54 | case LevelWarn: 55 | logLevel = slog.LevelWarn 56 | case LevelError: 57 | logLevel = slog.LevelError 58 | default: 59 | return errors.Errorf("invalid log level, valid values are: %v", []Level{LevelDebug, LevelInfo, LevelWarn, LevelError}) 60 | } 61 | 62 | newLogger := slog.New(slogdedup.NewOverwriteHandler( 63 | slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: logLevel}), 64 | nil, 65 | )) 66 | 67 | mu.Lock() 68 | defaultLogger = newLogger 69 | mu.Unlock() 70 | 71 | return nil 72 | } 73 | 74 | // With returns a new context with additional labels added to the logger. 75 | func With(ctx context.Context, keyvals ...any) context.Context { 76 | logger := from(ctx) 77 | 78 | return with(ctx, logger.With(keyvals...)) 79 | } 80 | 81 | // Debug logs a debug message with stack trace. 82 | func Debug(ctx context.Context, msg string, args ...any) { 83 | logWithStack(ctx, slog.LevelDebug, msg, args...) 84 | } 85 | 86 | // Info logs an informational message with stack trace. 87 | func Info(ctx context.Context, msg string, args ...any) { 88 | logWithStack(ctx, slog.LevelInfo, msg, args...) 89 | } 90 | 91 | // Warn logs a warning message with stack trace. 92 | func Warn(ctx context.Context, err error, args ...any) { 93 | logWithStack(ctx, slog.LevelWarn, err.Error(), args...) 94 | } 95 | 96 | // Error logs an error message with call stack trace. 97 | func Error(ctx context.Context, err error, args ...any) { 98 | logWithStack(ctx, slog.LevelError, err.Error(), args...) 99 | } 100 | 101 | // Fatal logs a fatal message with call stack trace. 102 | // It will call os.Exit(1) after logging. 103 | func Fatal(ctx context.Context, err error, args ...any) { 104 | logWithStack(ctx, slog.LevelError, err.Error(), args...) 105 | os.Exit(1) 106 | } 107 | 108 | type ctxKey uint8 109 | 110 | var ( 111 | loggerCtxKey = ctxKey(0) 112 | defaultLogger = slog.New(slogdedup.NewOverwriteHandler(slog.NewTextHandler(os.Stdout, nil), nil)) 113 | mu sync.RWMutex 114 | // withStackLevel controls which log level and above will include stack traces. 115 | withStackLevel atomic.Int32 116 | ) 117 | 118 | func init() { 119 | // Default to include stack traces for Warn and above. 120 | SetWithStackLevel(slog.LevelWarn) 121 | } 122 | 123 | // SetWithStackLevel sets the minimum log level that will include stack traces. 124 | // It should not be called in init(). 125 | func SetWithStackLevel(level slog.Level) { 126 | withStackLevel.Store(int32(level)) 127 | } 128 | 129 | // with returns a new context with the given logger. 130 | func with(ctx context.Context, logger *slog.Logger) context.Context { 131 | return context.WithValue(ctx, loggerCtxKey, logger) 132 | } 133 | 134 | // from retrieves the logger from context. 135 | // Returns default logger if context has no logger. 136 | func from(ctx context.Context) *slog.Logger { 137 | mu.RLock() 138 | defer mu.RUnlock() 139 | if ctx == nil { 140 | return defaultLogger 141 | } 142 | 143 | if logger, ok := ctx.Value(loggerCtxKey).(*slog.Logger); ok { 144 | return logger 145 | } 146 | 147 | return defaultLogger 148 | } 149 | 150 | const ( 151 | stackSkip = 2 // Skip ERROR../logWithStack. 152 | stackDepth = 5 // Maximum number of stack frames to capture. 153 | avgFrameLen = 64 154 | ) 155 | 156 | func logWithStack(ctx context.Context, level slog.Level, msg string, args ...any) { 157 | logger := from(ctx) 158 | if !logger.Enabled(ctx, level) { 159 | // avoid to get stack trace if logging is disabled for this level 160 | return 161 | } 162 | 163 | // Only include stack trace if level is >= withStackLevel 164 | newArgs := make([]any, 0, len(args)+2) 165 | newArgs = append(newArgs, args...) 166 | if level >= slog.Level(withStackLevel.Load()) { 167 | newArgs = append(newArgs, "stack", getStack(stackSkip, stackDepth)) 168 | } 169 | 170 | logger.Log(ctx, level, msg, newArgs...) 171 | } 172 | 173 | // getStack returns a formatted call stack trace. 174 | func getStack(skip, depth int) string { 175 | pc := make([]uintptr, depth) 176 | n := runtime.Callers(skip+2, pc) // skip itself and runtime.Callers 177 | if n == 0 { 178 | return "" 179 | } 180 | 181 | var b strings.Builder 182 | b.Grow(n * avgFrameLen) 183 | 184 | frames := runtime.CallersFrames(pc[:n]) 185 | first := true 186 | for frame, more := frames.Next(); more; frame, more = frames.Next() { 187 | if !first { 188 | b.WriteString(" <- ") 189 | } 190 | first = false 191 | 192 | fn := strings.TrimPrefix(frame.Function, model.Module) // no module prefix for zenfeed self. 193 | b.WriteString(fn) 194 | b.WriteByte(':') 195 | b.WriteString(strconv.Itoa(frame.Line)) 196 | } 197 | 198 | return b.String() 199 | } 200 | -------------------------------------------------------------------------------- /pkg/telemetry/metric/metric.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package metric 17 | 18 | import ( 19 | "context" 20 | "net/http" 21 | "time" 22 | 23 | "github.com/prometheus/client_golang/prometheus" 24 | "github.com/prometheus/client_golang/prometheus/promauto" 25 | "github.com/prometheus/client_golang/prometheus/promhttp" 26 | 27 | "github.com/glidea/zenfeed/pkg/model" 28 | telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model" 29 | ) 30 | 31 | func Handler() http.Handler { 32 | return promhttp.Handler() 33 | } 34 | 35 | var ( 36 | operationInFlight = promauto.NewGaugeVec( 37 | prometheus.GaugeOpts{ 38 | Namespace: model.AppName, 39 | Name: "operation_in_flight", 40 | Help: "Number of operations in flight.", 41 | }, 42 | []string{ 43 | telemetrymodel.KeyComponent, 44 | telemetrymodel.KeyComponentInstance, 45 | telemetrymodel.KeyOperation, 46 | }, 47 | ) 48 | 49 | operationTotal = promauto.NewCounterVec( 50 | prometheus.CounterOpts{ 51 | Namespace: model.AppName, 52 | Name: "operation_total", 53 | Help: "Total number of operations.", 54 | }, 55 | []string{ 56 | telemetrymodel.KeyComponent, 57 | telemetrymodel.KeyComponentInstance, 58 | telemetrymodel.KeyOperation, 59 | telemetrymodel.KeyResult, 60 | }, 61 | ) 62 | 63 | operationDuration = promauto.NewHistogramVec( 64 | prometheus.HistogramOpts{ 65 | Namespace: model.AppName, 66 | Name: "operation_duration_seconds", 67 | Help: "Histogram of operation latencies in seconds.", 68 | Buckets: []float64{.001, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 20}, 69 | }, 70 | []string{ 71 | telemetrymodel.KeyComponent, 72 | telemetrymodel.KeyComponentInstance, 73 | telemetrymodel.KeyOperation, 74 | telemetrymodel.KeyResult, 75 | }, 76 | ) 77 | ) 78 | 79 | type ctxKey uint8 80 | 81 | const ( 82 | ctxKeyComponent ctxKey = iota 83 | ctxKeyInstance 84 | ctxKeyOperation 85 | ctxKeyStartTime 86 | ) 87 | 88 | func StartWith(ctx context.Context, keyvals ...any) context.Context { 89 | // Extend from parent context. 90 | component, instance, operation, _ := parseFrom(ctx) 91 | 92 | // Parse component and operation... from keyvals. 93 | for i := 0; i < len(keyvals); i += 2 { 94 | if i+1 < len(keyvals) { 95 | switch keyvals[i] { 96 | case telemetrymodel.KeyComponent: 97 | component = keyvals[i+1].(string) 98 | case telemetrymodel.KeyComponentInstance: 99 | instance = keyvals[i+1].(string) 100 | case telemetrymodel.KeyOperation: 101 | operation = keyvals[i+1].(string) 102 | } 103 | } 104 | } 105 | if component == "" || operation == "" { 106 | panic("missing required keyvals") 107 | } 108 | 109 | // Record operation in flight. 110 | operationInFlight.WithLabelValues(component, instance, operation).Inc() 111 | 112 | // Add to context. 113 | ctx = context.WithValue(ctx, ctxKeyComponent, component) 114 | ctx = context.WithValue(ctx, ctxKeyInstance, instance) 115 | ctx = context.WithValue(ctx, ctxKeyOperation, operation) 116 | ctx = context.WithValue(ctx, ctxKeyStartTime, time.Now()) 117 | 118 | return ctx 119 | } 120 | 121 | func RecordRED(ctx context.Context, err error) { 122 | // Parse component, instance, operation, and start time from context. 123 | component, instance, operation, startTime := parseFrom(ctx) 124 | duration := time.Since(startTime) 125 | 126 | // Determine result. 127 | result := telemetrymodel.ValResultSuccess 128 | if err != nil { 129 | result = telemetrymodel.ValResultError 130 | } 131 | 132 | // Record metrics. 133 | operationTotal.WithLabelValues(component, instance, operation, result).Inc() 134 | operationDuration.WithLabelValues(component, instance, operation, result).Observe(duration.Seconds()) 135 | operationInFlight.WithLabelValues(component, instance, operation).Dec() 136 | } 137 | 138 | func Close(id prometheus.Labels) { 139 | operationInFlight.DeletePartialMatch(id) 140 | operationTotal.DeletePartialMatch(id) 141 | operationDuration.DeletePartialMatch(id) 142 | } 143 | 144 | func parseFrom(ctx context.Context) (component, instance, operation string, startTime time.Time) { 145 | if v := ctx.Value(ctxKeyComponent); v != nil { 146 | component = v.(string) 147 | } 148 | if v := ctx.Value(ctxKeyInstance); v != nil { 149 | instance = v.(string) 150 | } 151 | if v := ctx.Value(ctxKeyOperation); v != nil { 152 | operation = v.(string) 153 | } 154 | if v := ctx.Value(ctxKeyStartTime); v != nil { 155 | startTime = v.(time.Time) 156 | } 157 | 158 | return 159 | } 160 | -------------------------------------------------------------------------------- /pkg/telemetry/model/model.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package model 17 | 18 | const ( 19 | // KeyComponent is the label for the component name. 20 | KeyComponent = "component" 21 | // KeyComponentInstance is the label for the component instance name. 22 | KeyComponentInstance = "component_instance" 23 | // KeyOperation is the label for the operation name. 24 | KeyOperation = "operation" 25 | // KeyResult is the label for the result of the operation. 26 | KeyResult = "result" 27 | ValResultSuccess = "success" 28 | ValResultError = "error" 29 | ) 30 | -------------------------------------------------------------------------------- /pkg/telemetry/server/server.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package http 17 | 18 | import ( 19 | "net" 20 | "net/http" 21 | "net/http/pprof" 22 | 23 | "github.com/pkg/errors" 24 | 25 | "github.com/glidea/zenfeed/pkg/component" 26 | "github.com/glidea/zenfeed/pkg/config" 27 | telemetry "github.com/glidea/zenfeed/pkg/telemetry" 28 | "github.com/glidea/zenfeed/pkg/telemetry/log" 29 | "github.com/glidea/zenfeed/pkg/telemetry/metric" 30 | telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model" 31 | ) 32 | 33 | // --- Interface code block --- 34 | type Server interface { 35 | component.Component 36 | } 37 | 38 | type Config struct { 39 | Address string 40 | } 41 | 42 | func (c *Config) Validate() error { 43 | if c.Address == "" { 44 | c.Address = ":9090" 45 | } 46 | if _, _, err := net.SplitHostPort(c.Address); err != nil { 47 | return errors.Wrap(err, "invalid address") 48 | } 49 | 50 | return nil 51 | } 52 | 53 | func (c *Config) From(app *config.App) *Config { 54 | c.Address = app.Telemetry.Address 55 | 56 | return c 57 | } 58 | 59 | type Dependencies struct { 60 | } 61 | 62 | // --- Factory code block --- 63 | type Factory component.Factory[Server, config.App, Dependencies] 64 | 65 | func NewFactory(mockOn ...component.MockOption) Factory { 66 | if len(mockOn) > 0 { 67 | return component.FactoryFunc[Server, config.App, Dependencies]( 68 | func(instance string, config *config.App, dependencies Dependencies) (Server, error) { 69 | m := &mockServer{} 70 | component.MockOptions(mockOn).Apply(&m.Mock) 71 | 72 | return m, nil 73 | }, 74 | ) 75 | } 76 | 77 | return component.FactoryFunc[Server, config.App, Dependencies](new) 78 | } 79 | 80 | func new(instance string, app *config.App, dependencies Dependencies) (Server, error) { 81 | config := &Config{} 82 | config.From(app) 83 | if err := config.Validate(); err != nil { 84 | return nil, errors.Wrap(err, "validate config") 85 | } 86 | 87 | router := http.NewServeMux() 88 | router.Handle("/health", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 89 | w.WriteHeader(200) 90 | })) 91 | router.Handle("/metrics", metric.Handler()) 92 | router.HandleFunc("/pprof", pprof.Index) 93 | router.HandleFunc("/pprof/cmdline", pprof.Cmdline) 94 | router.HandleFunc("/pprof/profile", pprof.Profile) 95 | router.HandleFunc("/pprof/symbol", pprof.Symbol) 96 | router.HandleFunc("/pprof/trace", pprof.Trace) 97 | 98 | return &server{ 99 | Base: component.New(&component.BaseConfig[Config, Dependencies]{ 100 | Name: "TelemetryServer", 101 | Instance: instance, 102 | Config: config, 103 | Dependencies: dependencies, 104 | }), 105 | http: &http.Server{Addr: config.Address, Handler: router}, 106 | }, nil 107 | } 108 | 109 | // --- Implementation code block --- 110 | type server struct { 111 | *component.Base[Config, Dependencies] 112 | http *http.Server 113 | } 114 | 115 | func (s *server) Run() (err error) { 116 | ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...) 117 | defer func() { telemetry.End(ctx, err) }() 118 | 119 | serverErr := make(chan error, 1) 120 | go func() { 121 | serverErr <- s.http.ListenAndServe() 122 | }() 123 | 124 | s.MarkReady() 125 | select { 126 | case <-ctx.Done(): 127 | log.Info(ctx, "shutting down") 128 | 129 | return s.http.Shutdown(ctx) 130 | case err := <-serverErr: 131 | return errors.Wrap(err, "listen and serve") 132 | } 133 | } 134 | 135 | type mockServer struct { 136 | component.Mock 137 | } 138 | -------------------------------------------------------------------------------- /pkg/telemetry/telemetry.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package telemetry 17 | 18 | import ( 19 | "context" 20 | 21 | "github.com/prometheus/client_golang/prometheus" 22 | 23 | "github.com/glidea/zenfeed/pkg/telemetry/log" 24 | "github.com/glidea/zenfeed/pkg/telemetry/metric" 25 | ) 26 | 27 | type Labels []any 28 | 29 | func (l Labels) Get(key any) any { 30 | for i := 0; i < len(l); i += 2 { 31 | if l[i] == key { 32 | return l[i+1] 33 | } 34 | } 35 | 36 | return nil 37 | } 38 | 39 | // StartWith starts a new operation with the given key-value pairs. 40 | // MUST call End() to finalize the operation. 41 | func StartWith(ctx context.Context, keyvals ...any) context.Context { 42 | ctx = log.With(ctx, keyvals...) 43 | ctx = metric.StartWith(ctx, keyvals...) 44 | 45 | return ctx 46 | } 47 | 48 | // End records and finalizes the operation. 49 | func End(ctx context.Context, err error) { 50 | metric.RecordRED(ctx, err) 51 | } 52 | 53 | // CloseMetrics closes the metrics for the given id. 54 | func CloseMetrics(id prometheus.Labels) { 55 | metric.Close(id) 56 | } 57 | -------------------------------------------------------------------------------- /pkg/test/test.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package test 17 | 18 | // Case is a BDD style test case for a feature. 19 | // 20 | // Background: https://en.wikipedia.org/wiki/Behavior-driven_development. 21 | // Aha, maybe you don't need to fully understand it, 22 | // we just use Scenario, Given, When, Then to describe a test case, which has several advantages: 23 | // 1. Highly readable and easy to maintain. 24 | // 2. It can be used as a requirement or use case description, helping you in the TDD process, 25 | // let AI generate code, that is "code as prompt". 26 | // 3. Test against requirement descriptions, not implementation details. 27 | // Top-down, and the requirement level is above the details. 28 | // 29 | // To add, "requirement" here is a broad concept, not or not only refers to the requirements 30 | // from the product side, but the interface behavior defined by the test module. 31 | // 32 | // TODO: Use this consistently. 33 | type Case[T1 any, T2 any, T3 any] struct { 34 | // Scenario describes feature of the test case. 35 | // E.g. "Query hot block with label filters". 36 | Scenario string 37 | 38 | // Given is initial "context"!!!(context != parameters of method) 39 | // at the beginning of the scenario, in one or more clauses. 40 | // E.g. "a hot block with indexed feeds". 41 | Given string 42 | // When is the event that triggers the scenario. 43 | // E.g. "querying with label filters". 44 | When string 45 | // Then is the expected outcome, in one or more clauses. 46 | // E.g. "should return matching feeds". 47 | Then string 48 | 49 | // GivenDetail is the detail of the given context. 50 | // Generally speaking, it describes what "state the object" of the module should have. 51 | // E.g. 'hot block', what does it look like, what are its member variable values? 52 | // What is the expected behavior of external dependencies? 53 | GivenDetail T1 54 | // WhenDetail is the detail of the when event. 55 | // Generally speaking, it describes the "parameters of the method call". 56 | // E.g. what does the query options look like. 57 | WhenDetail T2 58 | // ThenExpected is the expected outcome of the scenario. 59 | // Generally speaking, it describes the "return value of the method call". 60 | // E.g. what does the returned feeds look like. 61 | ThenExpected T3 62 | } 63 | -------------------------------------------------------------------------------- /pkg/util/binary/binary.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package binary 17 | 18 | import ( 19 | "encoding/binary" 20 | "io" 21 | "math" 22 | "sync" 23 | 24 | "github.com/pkg/errors" 25 | 26 | "github.com/glidea/zenfeed/pkg/util/buffer" 27 | ) 28 | 29 | // WriteString writes a string to a writer. 30 | func WriteString(w io.Writer, str string) error { 31 | len := len(str) 32 | if len > math.MaxUint32 { 33 | return errors.New("length exceeds maximum uint32") 34 | } 35 | 36 | if err := WriteUint32(w, uint32(len)); err != nil { 37 | return errors.Wrap(err, "write length") 38 | } 39 | if _, err := io.WriteString(w, str); err != nil { 40 | return errors.Wrap(err, "write data") 41 | } 42 | 43 | return nil 44 | } 45 | 46 | // ReadString reads a string from a reader. 47 | func ReadString(r io.Reader) (string, error) { 48 | len, err := ReadUint32(r) 49 | if err != nil { 50 | return "", errors.Wrap(err, "read length") 51 | } 52 | 53 | bb := buffer.Get() 54 | defer buffer.Put(bb) 55 | // bb.EnsureRemaining(int(len)) 56 | 57 | if _, err := io.CopyN(bb, r, int64(len)); err != nil { 58 | return "", errors.Wrap(err, "read data") 59 | } 60 | 61 | return bb.String(), nil 62 | } 63 | 64 | var smallBufPool = sync.Pool{ 65 | New: func() any { 66 | // 8 bytes is enough for uint64, uint32, float32. 67 | b := make([]byte, 8) 68 | 69 | return &b 70 | }, 71 | } 72 | 73 | // WriteUint64 writes a uint64 using a pooled buffer. 74 | func WriteUint64(w io.Writer, v uint64) error { 75 | bp := smallBufPool.Get().(*[]byte) 76 | defer smallBufPool.Put(bp) 77 | b := *bp 78 | 79 | binary.LittleEndian.PutUint64(b, v) 80 | _, err := w.Write(b[:8]) 81 | 82 | return err 83 | } 84 | 85 | // ReadUint64 reads a uint64 using a pooled buffer. 86 | func ReadUint64(r io.Reader) (uint64, error) { 87 | bp := smallBufPool.Get().(*[]byte) 88 | defer smallBufPool.Put(bp) 89 | b := (*bp)[:8] 90 | 91 | // Read exactly 8 bytes into the slice. 92 | if _, err := io.ReadFull(r, b); err != nil { 93 | return 0, errors.Wrap(err, "read uint64") 94 | } 95 | 96 | return binary.LittleEndian.Uint64(b), nil 97 | } 98 | 99 | // WriteUint32 writes a uint32 using a pooled buffer. 100 | func WriteUint32(w io.Writer, v uint32) error { 101 | bp := smallBufPool.Get().(*[]byte) 102 | defer smallBufPool.Put(bp) 103 | b := *bp 104 | 105 | binary.LittleEndian.PutUint32(b, v) 106 | _, err := w.Write(b[:4]) 107 | 108 | return err 109 | } 110 | 111 | // ReadUint32 reads a uint32 using a pooled buffer. 112 | func ReadUint32(r io.Reader) (uint32, error) { 113 | bp := smallBufPool.Get().(*[]byte) 114 | defer smallBufPool.Put(bp) 115 | b := (*bp)[:4] 116 | 117 | // Read exactly 4 bytes into the slice. 118 | if _, err := io.ReadFull(r, b); err != nil { 119 | return 0, errors.Wrap(err, "read uint32") 120 | } 121 | 122 | return binary.LittleEndian.Uint32(b), nil 123 | } 124 | 125 | // WriteFloat32 writes a float32 using a pooled buffer. 126 | func WriteFloat32(w io.Writer, v float32) error { 127 | return WriteUint32(w, math.Float32bits(v)) 128 | } 129 | 130 | // ReadFloat32 reads a float32 using a pooled buffer. 131 | func ReadFloat32(r io.Reader) (float32, error) { 132 | // Read the uint32 bits first. 133 | bits, err := ReadUint32(r) 134 | if err != nil { 135 | return 0, err 136 | } 137 | 138 | // Convert bits to float32. 139 | return math.Float32frombits(bits), nil 140 | } 141 | -------------------------------------------------------------------------------- /pkg/util/binary/binary_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package binary 17 | 18 | import ( 19 | "bytes" 20 | "testing" 21 | 22 | . "github.com/onsi/gomega" 23 | 24 | "github.com/glidea/zenfeed/pkg/test" 25 | ) 26 | 27 | func TestWriteString(t *testing.T) { 28 | RegisterTestingT(t) 29 | 30 | type givenDetail struct{} 31 | type whenDetail struct { 32 | str string 33 | } 34 | type thenExpected struct{} 35 | 36 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 37 | { 38 | Scenario: "Write empty string", 39 | When: "writing an empty string to a buffer", 40 | Then: "should write successfully without error", 41 | WhenDetail: whenDetail{ 42 | str: "", 43 | }, 44 | ThenExpected: thenExpected{}, 45 | }, 46 | { 47 | Scenario: "Write normal string", 48 | When: "writing a normal string to a buffer", 49 | Then: "should write successfully without error", 50 | WhenDetail: whenDetail{ 51 | str: "hello world", 52 | }, 53 | ThenExpected: thenExpected{}, 54 | }, 55 | } 56 | 57 | for _, tt := range tests { 58 | t.Run(tt.Scenario, func(t *testing.T) { 59 | // When. 60 | buf := &bytes.Buffer{} 61 | err := WriteString(buf, tt.WhenDetail.str) 62 | 63 | // Then. 64 | Expect(err).NotTo(HaveOccurred()) 65 | 66 | // Verify the written data by reading it back 67 | readStr, readErr := ReadString(bytes.NewReader(buf.Bytes())) 68 | Expect(readErr).NotTo(HaveOccurred()) 69 | Expect(readStr).To(Equal(tt.WhenDetail.str)) 70 | }) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /pkg/util/buffer/buffer.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package buffer 17 | 18 | import ( 19 | "sync" 20 | "unsafe" 21 | ) 22 | 23 | var pool = sync.Pool{ 24 | New: func() any { 25 | return &Bytes{B: make([]byte, 0, 1024)} 26 | }, 27 | } 28 | 29 | func Get() *Bytes { 30 | return pool.Get().(*Bytes) 31 | } 32 | 33 | func Put(b *Bytes) { 34 | if b.Len() > 512*1024 { // Avoid large buffer. 35 | b = nil 36 | 37 | return 38 | } 39 | 40 | b.Reset() 41 | pool.Put(b) 42 | } 43 | 44 | // Bytes is a simple buffer. 45 | // It is unsafe, SHOULD not modify existing bytes. 46 | type Bytes struct { 47 | B []byte 48 | } 49 | 50 | func (bs *Bytes) Reset() { 51 | bs.B = bs.B[:0] 52 | } 53 | 54 | func (bs *Bytes) String() string { 55 | return string(bs.B) 56 | } 57 | 58 | func (bs *Bytes) Bytes() []byte { 59 | return bs.B 60 | } 61 | 62 | func (bs *Bytes) Write(p []byte) (n int, err error) { 63 | bs.B = append(bs.B, p...) 64 | 65 | return len(p), nil 66 | } 67 | 68 | // Unsafe!!! 69 | func (bs *Bytes) WriteString(s string) (n int, err error) { 70 | b := unsafe.Slice(unsafe.StringData(s), len(s)) 71 | 72 | return bs.Write(b) 73 | } 74 | 75 | // EnsureRemaining ensures the buffer has space for at least `atLeast` 76 | // additional bytes beyond the current length (i.e., remaining capacity). 77 | // It grows the buffer if necessary using an amortized growth strategy. 78 | func (bs *Bytes) EnsureRemaining(atLeast int) { 79 | if atLeast <= 0 { 80 | return 81 | } 82 | 83 | // Calculate the minimum total capacity required. 84 | // needCap = current_length + required_remaining_capacity 85 | needCap := len(bs.B) + atLeast 86 | if cap(bs.B) >= needCap { 87 | // Current capacity is already sufficient. 88 | return 89 | } 90 | 91 | // --- Need to grow --- 92 | 93 | // Determine the new capacity. 94 | // Strategy: Double the existing capacity, but make sure it's at least needCap. 95 | // This amortizes the cost of allocations over time. 96 | newCap := max(cap(bs.B)*2, needCap) 97 | 98 | // Allocate a new slice with the current length and the calculated new capacity. 99 | // Note: We create it with the *current length*, not zero length. 100 | newB := make([]byte, len(bs.B), newCap) 101 | 102 | // Copy the existing data from the old buffer to the new buffer. 103 | copy(newB, bs.B) // copy is efficient 104 | 105 | // Replace the buffer's internal slice with the new one. 106 | bs.B = newB 107 | } 108 | 109 | func (bs *Bytes) Remaining() int { 110 | return cap(bs.B) - len(bs.B) 111 | } 112 | 113 | func (bs *Bytes) Len() int { 114 | return len(bs.B) 115 | } 116 | 117 | func (bs *Bytes) Cap() int { 118 | return cap(bs.B) 119 | } 120 | -------------------------------------------------------------------------------- /pkg/util/crawl/crawl.go: -------------------------------------------------------------------------------- 1 | package crawl 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | "net/url" 9 | "sync" 10 | 11 | "github.com/pkg/errors" 12 | "github.com/temoto/robotstxt" 13 | 14 | "github.com/glidea/zenfeed/pkg/util/text_convert" 15 | ) 16 | 17 | type Crawler interface { 18 | Markdown(ctx context.Context, u string) ([]byte, error) 19 | } 20 | 21 | type local struct { 22 | hc *http.Client 23 | 24 | robotsDataCache sync.Map 25 | } 26 | 27 | func NewLocal() Crawler { 28 | return &local{ 29 | hc: &http.Client{}, 30 | } 31 | } 32 | 33 | func (c *local) Markdown(ctx context.Context, u string) ([]byte, error) { 34 | // Check if the page is allowed. 35 | if err := c.checkAllowed(ctx, u); err != nil { 36 | return nil, errors.Wrapf(err, "check robots.txt for %s", u) 37 | } 38 | 39 | // Prepare the request. 40 | req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) 41 | if err != nil { 42 | return nil, errors.Wrapf(err, "create request for %s", u) 43 | } 44 | req.Header.Set("User-Agent", userAgent) 45 | 46 | // Send the request. 47 | resp, err := c.hc.Do(req) 48 | if err != nil { 49 | return nil, errors.Wrapf(err, "fetch %s", u) 50 | } 51 | defer func() { _ = resp.Body.Close() }() 52 | 53 | // Parse the response. 54 | if resp.StatusCode != http.StatusOK { 55 | return nil, errors.Errorf("received non-200 status code %d from %s", resp.StatusCode, u) 56 | } 57 | bodyBytes, err := io.ReadAll(resp.Body) 58 | if err != nil { 59 | return nil, errors.Wrapf(err, "read body from %s", u) 60 | } 61 | 62 | // Convert the body to markdown. 63 | mdBytes, err := textconvert.HTMLToMarkdown(bodyBytes) 64 | if err != nil { 65 | return nil, errors.Wrap(err, "convert html to markdown") 66 | } 67 | 68 | return mdBytes, nil 69 | } 70 | 71 | const userAgent = "ZenFeed" 72 | 73 | func (c *local) checkAllowed(ctx context.Context, u string) error { 74 | parsedURL, err := url.Parse(u) 75 | if err != nil { 76 | return errors.Wrapf(err, "parse url %s", u) 77 | } 78 | 79 | d, err := c.getRobotsData(ctx, parsedURL.Host) 80 | if err != nil { 81 | return errors.Wrapf(err, "check robots.txt for %s", parsedURL.Host) 82 | } 83 | if !d.TestAgent(parsedURL.Path, userAgent) { 84 | return errors.Errorf("disallowed by robots.txt for %s", u) 85 | } 86 | 87 | return nil 88 | } 89 | 90 | // getRobotsData fetches and parses robots.txt for a given host. 91 | func (c *local) getRobotsData(ctx context.Context, host string) (*robotstxt.RobotsData, error) { 92 | // Check the cache. 93 | if data, found := c.robotsDataCache.Load(host); found { 94 | return data.(*robotstxt.RobotsData), nil 95 | } 96 | 97 | // Prepare the request. 98 | robotsURL := fmt.Sprintf("https://%s/robots.txt", host) 99 | req, err := http.NewRequestWithContext(ctx, http.MethodGet, robotsURL, nil) 100 | if err != nil { 101 | return nil, errors.Wrapf(err, "create request for %s", robotsURL) 102 | } 103 | req.Header.Set("User-Agent", userAgent) 104 | 105 | // Send the request. 106 | resp, err := c.hc.Do(req) 107 | if err != nil { 108 | return nil, errors.Wrapf(err, "fetch %s", robotsURL) 109 | } 110 | defer func() { _ = resp.Body.Close() }() 111 | 112 | // Parse the response. 113 | switch resp.StatusCode { 114 | case http.StatusOK: 115 | data, err := robotstxt.FromResponse(resp) 116 | if err != nil { 117 | return nil, errors.Wrapf(err, "parse robots.txt from %s", robotsURL) 118 | } 119 | c.robotsDataCache.Store(host, data) 120 | 121 | return data, nil 122 | 123 | case http.StatusNotFound: 124 | data := &robotstxt.RobotsData{} 125 | c.robotsDataCache.Store(host, data) 126 | 127 | return data, nil 128 | 129 | case http.StatusUnauthorized, http.StatusForbidden: 130 | return nil, errors.Errorf("access to %s denied (status %d)", robotsURL, resp.StatusCode) 131 | default: 132 | return nil, errors.Errorf("unexpected status %d fetching %s", resp.StatusCode, robotsURL) 133 | } 134 | } 135 | 136 | type jina struct { 137 | hc *http.Client 138 | token string 139 | } 140 | 141 | func NewJina(token string) Crawler { 142 | return &jina{ 143 | hc: &http.Client{}, 144 | 145 | // If token is empty, will not affect to use, but rate limit will be lower. 146 | // See https://jina.ai/api-dashboard/rate-limit. 147 | token: token, 148 | } 149 | } 150 | 151 | func (c *jina) Markdown(ctx context.Context, u string) ([]byte, error) { 152 | proxyURL := "https://r.jina.ai/" + u 153 | req, err := http.NewRequestWithContext(ctx, http.MethodGet, proxyURL, nil) 154 | if err != nil { 155 | return nil, errors.Wrapf(err, "create request for %s", u) 156 | } 157 | 158 | req.Header.Set("X-Engine", "browser") 159 | req.Header.Set("X-Robots-Txt", userAgent) 160 | if c.token != "" { 161 | req.Header.Set("Authorization", "Bearer "+c.token) 162 | } 163 | 164 | resp, err := c.hc.Do(req) 165 | if err != nil { 166 | return nil, errors.Wrapf(err, "fetch %s", proxyURL) 167 | } 168 | defer func() { _ = resp.Body.Close() }() 169 | 170 | if resp.StatusCode != http.StatusOK { 171 | return nil, errors.Errorf("received non-200 status code %d from %s", resp.StatusCode, proxyURL) 172 | } 173 | 174 | mdBytes, err := io.ReadAll(resp.Body) 175 | if err != nil { 176 | return nil, errors.Wrapf(err, "read body from %s", proxyURL) 177 | } 178 | 179 | return mdBytes, nil 180 | } 181 | -------------------------------------------------------------------------------- /pkg/util/hash/hash.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package hash 17 | 18 | import "hash/fnv" 19 | 20 | func Sum64(s string) uint64 { 21 | h := fnv.New64a() 22 | h.Write([]byte(s)) 23 | 24 | return h.Sum64() 25 | } 26 | 27 | func Sum64s(ss []string) uint64 { 28 | h := fnv.New64a() 29 | for _, s := range ss { 30 | h.Write([]byte(s)) 31 | h.Write([]byte{0}) 32 | } 33 | 34 | return h.Sum64() 35 | } 36 | -------------------------------------------------------------------------------- /pkg/util/heap/heap.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package heap 17 | 18 | import ( 19 | "container/heap" 20 | "sort" 21 | ) 22 | 23 | type Heap[T any] struct { 24 | inner *innerHeap[T] 25 | limit int 26 | } 27 | 28 | func New[T any](data []T, less func(a, b T) bool) *Heap[T] { 29 | h := &Heap[T]{ 30 | inner: newInnerHeap(data, less), 31 | limit: cap(data), 32 | } 33 | heap.Init(h.inner) 34 | 35 | return h 36 | } 37 | 38 | func (h *Heap[T]) TryEvictPush(x T) { 39 | switch { 40 | case h.Len() < h.limit: 41 | case h.inner.less(h.Peek(), x): 42 | h.Pop() 43 | default: 44 | return 45 | } 46 | 47 | h.Push(x) 48 | } 49 | 50 | func (h *Heap[T]) Push(x T) { 51 | heap.Push(h.inner, x) 52 | } 53 | 54 | func (h *Heap[T]) Pop() T { 55 | return heap.Pop(h.inner).(T) 56 | } 57 | 58 | func (h *Heap[T]) PopLast() T { 59 | return heap.Remove(h.inner, h.Len()-1).(T) 60 | } 61 | 62 | func (h *Heap[T]) Peek() T { 63 | if h.Len() == 0 { 64 | var zero T 65 | 66 | return zero 67 | } 68 | 69 | return h.inner.data[0] 70 | } 71 | 72 | func (h *Heap[T]) Len() int { 73 | return h.inner.Len() 74 | } 75 | 76 | func (h *Heap[T]) Cap() int { 77 | return h.limit 78 | } 79 | 80 | func (h *Heap[T]) Slice() []T { 81 | return h.inner.data 82 | } 83 | 84 | func (h *Heap[T]) DESCSort() { 85 | sort.Slice(h.inner.data, func(i, j int) bool { 86 | return !h.inner.less(h.inner.data[i], h.inner.data[j]) 87 | }) 88 | } 89 | 90 | type innerHeap[T any] struct { 91 | data []T 92 | less func(a, b T) bool 93 | } 94 | 95 | func newInnerHeap[T any](data []T, less func(a, b T) bool) *innerHeap[T] { 96 | return &innerHeap[T]{ 97 | data: data, 98 | less: less, 99 | } 100 | } 101 | 102 | func (h *innerHeap[T]) Len() int { 103 | return len(h.data) 104 | } 105 | 106 | func (h *innerHeap[T]) Less(i, j int) bool { 107 | return h.less(h.data[i], h.data[j]) 108 | } 109 | 110 | func (h *innerHeap[T]) Swap(i, j int) { 111 | h.data[i], h.data[j] = h.data[j], h.data[i] 112 | } 113 | 114 | func (h *innerHeap[T]) Push(x any) { 115 | h.data = append(h.data, x.(T)) 116 | } 117 | 118 | func (h *innerHeap[T]) Pop() any { 119 | n := len(h.data) 120 | x := h.data[n-1] 121 | h.data = h.data[:n-1] 122 | 123 | return x 124 | } 125 | -------------------------------------------------------------------------------- /pkg/util/json_schema/json_schema.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package jsonschema 17 | 18 | import ( 19 | "maps" 20 | "reflect" 21 | "strings" 22 | "time" 23 | 24 | "github.com/pkg/errors" 25 | ) 26 | 27 | // ForType generates a JSON Schema for the given reflect.Type. 28 | // It supports struct fields with json tags and desc tags for metadata. 29 | func ForType(t reflect.Type) (map[string]any, error) { 30 | definitions := make(map[string]any) 31 | schema, err := forTypeInternal(t, "", make(map[reflect.Type]string), definitions) 32 | if err != nil { 33 | return nil, err 34 | } 35 | 36 | if len(definitions) == 0 { 37 | return schema, nil 38 | } 39 | 40 | result := map[string]any{ 41 | "$schema": "http://json-schema.org/draft-07/schema#", 42 | "definitions": definitions, 43 | } 44 | maps.Copy(result, schema) 45 | 46 | return result, nil 47 | } 48 | 49 | func forTypeInternal( 50 | t reflect.Type, 51 | fieldName string, 52 | visited map[reflect.Type]string, 53 | definitions map[string]any, 54 | ) (map[string]any, error) { 55 | if t == nil { 56 | return nil, errors.New("type cannot be nil") 57 | } 58 | 59 | // Dereference pointer types 60 | for t.Kind() == reflect.Ptr { 61 | t = t.Elem() 62 | } 63 | 64 | // Handle previously visited types 65 | if refName, ok := visited[t]; ok { 66 | return map[string]any{"$ref": "#/definitions/" + refName}, nil 67 | } 68 | 69 | switch t.Kind() { 70 | case reflect.Struct: 71 | return handleStructType(t, fieldName, visited, definitions) 72 | 73 | case reflect.Slice, reflect.Array: 74 | return handleArrayType(t, visited, definitions) 75 | 76 | case reflect.Map: 77 | return handleMapType(t, visited, definitions) 78 | 79 | default: 80 | return handlePrimitiveType(t) 81 | } 82 | } 83 | 84 | func handleStructType( 85 | t reflect.Type, 86 | fieldName string, 87 | visited map[reflect.Type]string, 88 | definitions map[string]any, 89 | ) (map[string]any, error) { 90 | // Handle special types. 91 | if t == reflect.TypeOf(time.Time{}) { 92 | return map[string]any{ 93 | "type": "string", 94 | "format": "date-time", 95 | }, nil 96 | } 97 | 98 | if t == reflect.TypeOf(time.Duration(0)) { 99 | return map[string]any{ 100 | "type": "string", 101 | "format": "duration", 102 | "pattern": "^([0-9]+(s|m|h))+$", 103 | }, nil 104 | } 105 | 106 | // Generate type name. 107 | typeName := t.Name() 108 | if typeName == "" { 109 | typeName = "Anonymous" + fieldName 110 | } 111 | visited[t] = typeName 112 | 113 | // Process schema. 114 | schema := map[string]any{"type": "object"} 115 | 116 | properties, err := handleStructFields(t, visited, definitions) 117 | if err != nil { 118 | return nil, errors.Wrap(err, "handle struct fields") 119 | } 120 | if len(properties) > 0 { 121 | schema["properties"] = properties 122 | } 123 | 124 | definitions[typeName] = schema 125 | 126 | return map[string]any{"$ref": "#/definitions/" + typeName}, nil 127 | } 128 | 129 | func handleStructFields( 130 | t reflect.Type, 131 | visited map[reflect.Type]string, 132 | definitions map[string]any, 133 | ) (properties map[string]any, err error) { 134 | properties = make(map[string]any, t.NumField()) 135 | 136 | for i := range t.NumField() { 137 | field := t.Field(i) 138 | if !field.IsExported() { 139 | continue 140 | } 141 | 142 | propName := getPropertyName(field) 143 | if propName == "" { 144 | continue 145 | } 146 | 147 | if field.Anonymous { 148 | if err := handleEmbeddedStruct(field, visited, definitions, properties); err != nil { 149 | return nil, err 150 | } 151 | 152 | continue 153 | } 154 | 155 | fieldSchema, err := forTypeInternal(field.Type, field.Name, visited, definitions) 156 | if err != nil { 157 | return nil, errors.Wrapf(err, "generating schema for field %s", field.Name) 158 | } 159 | 160 | if desc := field.Tag.Get("desc"); desc != "" { 161 | fieldSchema["description"] = desc 162 | } 163 | 164 | properties[propName] = fieldSchema 165 | } 166 | 167 | return properties, nil 168 | } 169 | 170 | func handleArrayType( 171 | t reflect.Type, 172 | visited map[reflect.Type]string, 173 | definitions map[string]any, 174 | ) (map[string]any, error) { 175 | itemSchema, err := forTypeInternal(t.Elem(), "", visited, definitions) 176 | if err != nil { 177 | return nil, errors.Wrap(err, "generating array item schema") 178 | } 179 | 180 | return map[string]any{ 181 | "type": "array", 182 | "items": itemSchema, 183 | }, nil 184 | } 185 | 186 | func handleMapType( 187 | t reflect.Type, 188 | visited map[reflect.Type]string, 189 | definitions map[string]any, 190 | ) (map[string]any, error) { 191 | if t.Key().Kind() != reflect.String { 192 | return nil, errors.Errorf("unsupported map key type: %s (must be string)", t.Key().Kind()) 193 | } 194 | 195 | valueSchema, err := forTypeInternal(t.Elem(), "", visited, definitions) 196 | if err != nil { 197 | return nil, errors.Wrap(err, "generating map value schema") 198 | } 199 | 200 | return map[string]any{ 201 | "type": "object", 202 | "additionalProperties": valueSchema, 203 | }, nil 204 | } 205 | 206 | func handlePrimitiveType(t reflect.Type) (map[string]any, error) { 207 | schema := make(map[string]any) 208 | 209 | switch t.Kind() { 210 | case reflect.String: 211 | schema["type"] = "string" 212 | 213 | case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 214 | if t == reflect.TypeOf(time.Duration(0)) { 215 | schema["type"] = "string" 216 | schema["format"] = "duration" 217 | schema["pattern"] = "^([0-9]+(s|m|h))+$" 218 | } else { 219 | schema["type"] = "integer" 220 | } 221 | 222 | case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: 223 | schema["type"] = "integer" 224 | schema["minimum"] = 0 225 | 226 | case reflect.Float32, reflect.Float64: 227 | schema["type"] = "number" 228 | 229 | case reflect.Bool: 230 | schema["type"] = "boolean" 231 | 232 | default: 233 | return nil, errors.Errorf("unsupported type: %s", t.Kind()) 234 | } 235 | 236 | return schema, nil 237 | } 238 | 239 | func getPropertyName(field reflect.StructField) string { 240 | jsonTag := field.Tag.Get("json") 241 | if jsonTag == "-" { 242 | return "" 243 | } 244 | 245 | if jsonTag != "" { 246 | parts := strings.Split(jsonTag, ",") 247 | 248 | return parts[0] 249 | } 250 | 251 | return field.Name 252 | } 253 | 254 | func handleEmbeddedStruct( 255 | field reflect.StructField, 256 | visited map[reflect.Type]string, 257 | definitions map[string]any, 258 | properties map[string]any, 259 | ) error { 260 | embeddedSchema, err := forTypeInternal(field.Type, "", visited, definitions) 261 | if err != nil { 262 | return errors.Wrapf(err, "generating schema for embedded field %s", field.Name) 263 | } 264 | 265 | if embeddedType, ok := embeddedSchema["$ref"]; ok { 266 | refType := embeddedType.(string) 267 | key := strings.TrimPrefix(refType, "#/definitions/") 268 | if def, ok := definitions[key]; ok { 269 | if embeddedProps, ok := def.(map[string]any)["properties"].(map[string]any); ok { 270 | maps.Copy(properties, embeddedProps) 271 | } 272 | 273 | delete(definitions, key) 274 | } 275 | } 276 | 277 | return nil 278 | } 279 | -------------------------------------------------------------------------------- /pkg/util/jsonrpc/jsonrpc.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package jsonrpc 17 | 18 | import ( 19 | "context" 20 | "encoding/json" 21 | "errors" 22 | "net/http" 23 | 24 | "github.com/glidea/zenfeed/pkg/api" 25 | ) 26 | 27 | type Handler[Request any, Response any] func(ctx context.Context, req *Request) (*Response, error) 28 | 29 | func API[Request any, Response any](handler Handler[Request, Response]) http.Handler { 30 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 31 | allowCORS(w) 32 | 33 | if r.Method == "OPTIONS" { 34 | return 35 | } 36 | 37 | var req Request 38 | if r.Body != http.NoBody { 39 | if err := json.NewDecoder(r.Body).Decode(&req); err != nil { 40 | http.Error(w, err.Error(), http.StatusBadRequest) 41 | 42 | return 43 | } 44 | } 45 | 46 | resp, err := handler(r.Context(), &req) 47 | if err != nil { 48 | var apiErr api.Error 49 | if errors.As(err, &apiErr) { 50 | w.Header().Set("Content-Type", "application/json") 51 | w.WriteHeader(apiErr.Code) 52 | _ = json.NewEncoder(w).Encode(apiErr) 53 | 54 | return 55 | } 56 | 57 | http.Error(w, err.Error(), http.StatusInternalServerError) 58 | 59 | return 60 | } 61 | 62 | w.Header().Set("Content-Type", "application/json") 63 | if err := json.NewEncoder(w).Encode(resp); err != nil { 64 | http.Error(w, err.Error(), http.StatusInternalServerError) 65 | 66 | return 67 | } 68 | }) 69 | } 70 | 71 | func allowCORS(w http.ResponseWriter) { 72 | w.Header().Set("Access-Control-Allow-Origin", "*") 73 | w.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS, PUT, DELETE") 74 | w.Header().Set("Access-Control-Allow-Headers", 75 | "Accept, Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization", 76 | ) 77 | } 78 | -------------------------------------------------------------------------------- /pkg/util/jsonrpc/jsonrpc_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package jsonrpc 17 | 18 | import ( 19 | "bytes" 20 | "context" 21 | "encoding/json" 22 | "errors" 23 | "io" 24 | "net/http" 25 | "net/http/httptest" 26 | "testing" 27 | 28 | . "github.com/onsi/gomega" 29 | 30 | "github.com/glidea/zenfeed/pkg/api" 31 | "github.com/glidea/zenfeed/pkg/test" 32 | ) 33 | 34 | func TestAPI(t *testing.T) { 35 | RegisterTestingT(t) 36 | 37 | type TestRequest struct { 38 | Name string `json:"name"` 39 | } 40 | 41 | type TestResponse struct { 42 | Greeting string `json:"greeting"` 43 | } 44 | 45 | type givenDetail struct { 46 | handler Handler[TestRequest, TestResponse] 47 | } 48 | type whenDetail struct { 49 | method string 50 | requestBody string 51 | } 52 | type thenExpected struct { 53 | statusCode int 54 | responseBody string 55 | } 56 | 57 | successHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) { 58 | return &TestResponse{Greeting: "Hello, " + req.Name}, nil 59 | } 60 | 61 | badRequestHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) { 62 | return nil, api.ErrBadRequest(errors.New("invalid request")) 63 | } 64 | 65 | notFoundHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) { 66 | return nil, api.ErrNotFound(errors.New("resource not found")) 67 | } 68 | 69 | internalErrorHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) { 70 | return nil, api.ErrInternal(errors.New("server error")) 71 | } 72 | 73 | genericErrorHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) { 74 | return nil, errors.New("generic error") 75 | } 76 | 77 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 78 | { 79 | Scenario: "Successful request", 80 | Given: "a handler that returns a successful response", 81 | When: "making a valid request", 82 | Then: "should return 200 OK with the expected response", 83 | GivenDetail: givenDetail{ 84 | handler: successHandler, 85 | }, 86 | WhenDetail: whenDetail{ 87 | method: http.MethodPost, 88 | requestBody: `{"name":"World"}`, 89 | }, 90 | ThenExpected: thenExpected{ 91 | statusCode: http.StatusOK, 92 | responseBody: `{"greeting":"Hello, World"}`, 93 | }, 94 | }, 95 | { 96 | Scenario: "Empty request body", 97 | Given: "a handler that returns a successful response", 98 | When: "making a request with empty body", 99 | Then: "should return 200 OK with default values", 100 | GivenDetail: givenDetail{ 101 | handler: successHandler, 102 | }, 103 | WhenDetail: whenDetail{ 104 | method: http.MethodPost, 105 | requestBody: "", 106 | }, 107 | ThenExpected: thenExpected{ 108 | statusCode: http.StatusOK, 109 | responseBody: `{"greeting":"Hello, "}`, 110 | }, 111 | }, 112 | { 113 | Scenario: "Invalid JSON request", 114 | Given: "a handler that processes JSON", 115 | When: "making a request with invalid JSON", 116 | Then: "should return 400 Bad Request", 117 | GivenDetail: givenDetail{ 118 | handler: successHandler, 119 | }, 120 | WhenDetail: whenDetail{ 121 | method: http.MethodPost, 122 | requestBody: `{"name":`, 123 | }, 124 | ThenExpected: thenExpected{ 125 | statusCode: http.StatusBadRequest, 126 | }, 127 | }, 128 | { 129 | Scenario: "Bad request error", 130 | Given: "a handler that returns a bad request error", 131 | When: "making a request that triggers a bad request error", 132 | Then: "should return 400 Bad Request with error details", 133 | GivenDetail: givenDetail{ 134 | handler: badRequestHandler, 135 | }, 136 | WhenDetail: whenDetail{ 137 | method: http.MethodPost, 138 | requestBody: `{"name":"World"}`, 139 | }, 140 | ThenExpected: thenExpected{ 141 | statusCode: http.StatusBadRequest, 142 | responseBody: `{"code":400,"message":"invalid request"}`, 143 | }, 144 | }, 145 | { 146 | Scenario: "Not found error", 147 | Given: "a handler that returns a not found error", 148 | When: "making a request that triggers a not found error", 149 | Then: "should return 404 Not Found with error details", 150 | GivenDetail: givenDetail{ 151 | handler: notFoundHandler, 152 | }, 153 | WhenDetail: whenDetail{ 154 | method: http.MethodPost, 155 | requestBody: `{"name":"World"}`, 156 | }, 157 | ThenExpected: thenExpected{ 158 | statusCode: http.StatusNotFound, 159 | responseBody: `{"code":404,"message":"resource not found"}`, 160 | }, 161 | }, 162 | { 163 | Scenario: "Internal server error", 164 | Given: "a handler that returns an internal server error", 165 | When: "making a request that triggers an internal server error", 166 | Then: "should return 500 Internal Server Error with error details", 167 | GivenDetail: givenDetail{ 168 | handler: internalErrorHandler, 169 | }, 170 | WhenDetail: whenDetail{ 171 | method: http.MethodPost, 172 | requestBody: `{"name":"World"}`, 173 | }, 174 | ThenExpected: thenExpected{ 175 | statusCode: http.StatusInternalServerError, 176 | responseBody: `{"code":500,"message":"server error"}`, 177 | }, 178 | }, 179 | { 180 | Scenario: "Generic error", 181 | Given: "a handler that returns a generic error", 182 | When: "making a request that triggers a generic error", 183 | Then: "should return 500 Internal Server Error", 184 | GivenDetail: givenDetail{ 185 | handler: genericErrorHandler, 186 | }, 187 | WhenDetail: whenDetail{ 188 | method: http.MethodPost, 189 | requestBody: `{"name":"World"}`, 190 | }, 191 | ThenExpected: thenExpected{ 192 | statusCode: http.StatusInternalServerError, 193 | }, 194 | }, 195 | } 196 | 197 | for _, tt := range tests { 198 | t.Run(tt.Scenario, func(t *testing.T) { 199 | // Given. 200 | handler := API(tt.GivenDetail.handler) 201 | 202 | // When. 203 | var req *http.Request 204 | if tt.WhenDetail.requestBody == "" { 205 | req = httptest.NewRequest(tt.WhenDetail.method, "/test", nil) 206 | } else { 207 | req = httptest.NewRequest(tt.WhenDetail.method, "/test", bytes.NewBufferString(tt.WhenDetail.requestBody)) 208 | } 209 | rec := httptest.NewRecorder() 210 | handler.ServeHTTP(rec, req) 211 | 212 | // Then. 213 | Expect(rec.Code).To(Equal(tt.ThenExpected.statusCode)) 214 | 215 | if tt.ThenExpected.responseBody != "" { 216 | var expected, actual interface{} 217 | err := json.Unmarshal([]byte(tt.ThenExpected.responseBody), &expected) 218 | Expect(err).NotTo(HaveOccurred()) 219 | 220 | body, err := io.ReadAll(rec.Body) 221 | Expect(err).NotTo(HaveOccurred()) 222 | 223 | err = json.Unmarshal(body, &actual) 224 | Expect(err).NotTo(HaveOccurred()) 225 | 226 | Expect(actual).To(Equal(expected)) 227 | } 228 | }) 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /pkg/util/retry/retry.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package retry 17 | 18 | import ( 19 | "context" 20 | "time" 21 | 22 | "github.com/pkg/errors" 23 | "k8s.io/utils/ptr" 24 | 25 | "github.com/glidea/zenfeed/pkg/telemetry/log" 26 | ) 27 | 28 | type Options struct { 29 | MinInterval time.Duration 30 | MaxInterval time.Duration 31 | MaxAttempts *int 32 | } 33 | 34 | func (opts *Options) adjust() { 35 | if opts.MinInterval == 0 { 36 | opts.MinInterval = 100 * time.Millisecond 37 | } 38 | if opts.MaxInterval == 0 { 39 | opts.MaxInterval = 10 * time.Second 40 | } 41 | if opts.MaxInterval < opts.MinInterval { 42 | opts.MaxInterval = opts.MinInterval 43 | } 44 | if opts.MaxAttempts == nil { 45 | opts.MaxAttempts = ptr.To(3) 46 | } 47 | } 48 | 49 | var InfAttempts = ptr.To(-1) 50 | 51 | func Backoff(ctx context.Context, operation func() error, opts *Options) error { 52 | switch err := operation(); err { 53 | case nil: 54 | return nil // One time success. 55 | 56 | default: 57 | log.Error(ctx, err, "attempt", 1) 58 | } 59 | 60 | if opts == nil { 61 | opts = &Options{} 62 | } 63 | opts.adjust() 64 | 65 | interval := opts.MinInterval 66 | attempts := 2 // Start from 1. 67 | 68 | for { 69 | select { 70 | case <-ctx.Done(): 71 | return ctx.Err() 72 | 73 | case <-time.After(interval): 74 | if err := operation(); err != nil { 75 | if reachedMaxAttempts(attempts, *opts.MaxAttempts) { 76 | return errors.Wrap(err, "max attempts reached") 77 | } 78 | log.Error(ctx, err, "attempt", attempts) 79 | 80 | interval = nextInterval(interval, opts.MaxInterval) 81 | attempts++ 82 | 83 | continue 84 | } 85 | 86 | return nil 87 | } 88 | } 89 | } 90 | 91 | func nextInterval(cur, max time.Duration) (next time.Duration) { 92 | return min(2*cur, max) 93 | } 94 | 95 | func reachedMaxAttempts(cur, max int) bool { 96 | if max == *InfAttempts { 97 | return false 98 | } 99 | 100 | return cur >= max 101 | } 102 | -------------------------------------------------------------------------------- /pkg/util/retry/retry_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package retry 17 | 18 | import ( 19 | "context" 20 | "testing" 21 | "time" 22 | 23 | . "github.com/onsi/gomega" 24 | "github.com/pkg/errors" 25 | "k8s.io/utils/ptr" 26 | 27 | "github.com/glidea/zenfeed/pkg/test" 28 | ) 29 | 30 | func TestBackoff(t *testing.T) { 31 | RegisterTestingT(t) 32 | 33 | type givenDetail struct{} 34 | type whenDetail struct { 35 | operation func() error 36 | opts *Options 37 | cancelAfter time.Duration 38 | } 39 | type thenExpected struct { 40 | shouldError bool 41 | errorContains string 42 | attemptsNeeded int 43 | } 44 | 45 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 46 | { 47 | Scenario: "Operation succeeds on first attempt", 48 | When: "calling Backoff with the operation that succeeds immediately", 49 | Then: "should return nil error", 50 | WhenDetail: whenDetail{ 51 | operation: func() error { 52 | return nil 53 | }, 54 | opts: nil, 55 | }, 56 | ThenExpected: thenExpected{ 57 | shouldError: false, 58 | attemptsNeeded: 1, 59 | }, 60 | }, 61 | { 62 | Scenario: "Operation succeeds after retries", 63 | When: "calling Backoff with the operation that fails initially but succeeds after retries", 64 | Then: "should return nil error after successful retry", 65 | WhenDetail: whenDetail{ 66 | operation: createFailingThenSucceedingOperation(2), 67 | opts: &Options{ 68 | MinInterval: 10 * time.Millisecond, 69 | MaxInterval: 50 * time.Millisecond, 70 | MaxAttempts: ptr.To(5), 71 | }, 72 | }, 73 | ThenExpected: thenExpected{ 74 | shouldError: false, 75 | attemptsNeeded: 3, 76 | }, 77 | }, 78 | { 79 | Scenario: "Operation fails all attempts", 80 | When: "calling Backoff with the operation that always fails", 81 | Then: "should return error after max attempts", 82 | WhenDetail: whenDetail{ 83 | operation: func() error { 84 | return errors.New("persistent error") 85 | }, 86 | opts: &Options{ 87 | MinInterval: 10 * time.Millisecond, 88 | MaxInterval: 50 * time.Millisecond, 89 | MaxAttempts: ptr.To(3), 90 | }, 91 | }, 92 | ThenExpected: thenExpected{ 93 | shouldError: true, 94 | errorContains: "max attempts reached", 95 | attemptsNeeded: 3, 96 | }, 97 | }, 98 | { 99 | Scenario: "Context cancellation", 100 | When: "calling Backoff with an operation that takes time", 101 | Then: "should return context error", 102 | WhenDetail: whenDetail{ 103 | operation: func() error { 104 | return errors.New("operation error") 105 | }, 106 | opts: &Options{ 107 | MinInterval: 100 * time.Millisecond, 108 | MaxInterval: 200 * time.Millisecond, 109 | MaxAttempts: ptr.To(10), 110 | }, 111 | cancelAfter: 50 * time.Millisecond, 112 | }, 113 | ThenExpected: thenExpected{ 114 | shouldError: true, 115 | errorContains: "context canceled", 116 | }, 117 | }, 118 | } 119 | 120 | for _, tt := range tests { 121 | t.Run(tt.Scenario, func(t *testing.T) { 122 | // When. 123 | ctx := context.Background() 124 | if tt.WhenDetail.cancelAfter > 0 { 125 | var cancel context.CancelFunc 126 | ctx, cancel = context.WithCancel(ctx) 127 | 128 | go func() { 129 | time.Sleep(tt.WhenDetail.cancelAfter) 130 | cancel() 131 | }() 132 | } 133 | err := Backoff(ctx, tt.WhenDetail.operation, tt.WhenDetail.opts) 134 | 135 | // Then. 136 | if tt.ThenExpected.shouldError { 137 | Expect(err).To(HaveOccurred()) 138 | if tt.ThenExpected.errorContains != "" { 139 | Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.errorContains)) 140 | } 141 | } else { 142 | Expect(err).NotTo(HaveOccurred()) 143 | } 144 | }) 145 | } 146 | } 147 | 148 | // createFailingThenSucceedingOperation returns an operation that fails for the specified 149 | // number of attempts and then succeeds. 150 | func createFailingThenSucceedingOperation(failCount int) func() error { 151 | attempts := 0 152 | return func() error { 153 | if attempts < failCount { 154 | attempts++ 155 | return errors.New("temporary error") 156 | } 157 | return nil 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /pkg/util/runtime/runtime.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package runtime 17 | 18 | // Must panics if err is not nil. 19 | // It is useful for handling errors in initialization code where recovery is not possible. 20 | func Must(err error) { 21 | if err != nil { 22 | panic(err) 23 | } 24 | } 25 | 26 | // Must1 is like Must but returns the value if err is nil. 27 | // It is useful for handling errors in initialization code where recovery is not possible 28 | // and a value needs to be returned. 29 | func Must1[T any](v T, err error) T { 30 | if err != nil { 31 | panic(err) 32 | } 33 | 34 | return v 35 | } 36 | 37 | // Must2 is like Must but returns two values if err is nil. 38 | // It is useful for handling errors in initialization code where recovery is not possible 39 | // and two values need to be returned. 40 | func Must2[T1 any, T2 any](v1 T1, v2 T2, err error) (T1, T2) { 41 | if err != nil { 42 | panic(err) 43 | } 44 | 45 | return v1, v2 46 | } 47 | -------------------------------------------------------------------------------- /pkg/util/runtime/runtime_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package runtime 17 | 18 | import ( 19 | "errors" 20 | "testing" 21 | 22 | . "github.com/onsi/gomega" 23 | 24 | "github.com/glidea/zenfeed/pkg/test" 25 | ) 26 | 27 | func TestMust(t *testing.T) { 28 | RegisterTestingT(t) 29 | 30 | type givenDetail struct{} 31 | type whenDetail struct { 32 | err error 33 | } 34 | type thenExpected struct { 35 | shouldPanic bool 36 | } 37 | 38 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 39 | { 40 | Scenario: "Must with nil error", 41 | When: "calling Must with nil error", 42 | Then: "should not panic", 43 | WhenDetail: whenDetail{ 44 | err: nil, 45 | }, 46 | ThenExpected: thenExpected{ 47 | shouldPanic: false, 48 | }, 49 | }, 50 | { 51 | Scenario: "Must with non-nil error", 52 | When: "calling Must with non-nil error", 53 | Then: "should panic", 54 | WhenDetail: whenDetail{ 55 | err: errors.New("test error"), 56 | }, 57 | ThenExpected: thenExpected{ 58 | shouldPanic: true, 59 | }, 60 | }, 61 | } 62 | 63 | for _, tt := range tests { 64 | t.Run(tt.Scenario, func(t *testing.T) { 65 | // When & Then. 66 | if tt.ThenExpected.shouldPanic { 67 | Expect(func() { Must(tt.WhenDetail.err) }).To(Panic()) 68 | } else { 69 | Expect(func() { Must(tt.WhenDetail.err) }).NotTo(Panic()) 70 | } 71 | }) 72 | } 73 | } 74 | 75 | func TestMust1(t *testing.T) { 76 | RegisterTestingT(t) 77 | 78 | type givenDetail struct{} 79 | type whenDetail struct { 80 | value string 81 | err error 82 | } 83 | type thenExpected struct { 84 | value string 85 | shouldPanic bool 86 | } 87 | 88 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 89 | { 90 | Scenario: "Must1 with nil error", 91 | When: "calling Must1 with a value and nil error", 92 | Then: "should return the value without panic", 93 | WhenDetail: whenDetail{ 94 | value: "test value", 95 | err: nil, 96 | }, 97 | ThenExpected: thenExpected{ 98 | value: "test value", 99 | shouldPanic: false, 100 | }, 101 | }, 102 | { 103 | Scenario: "Must1 with non-nil error", 104 | When: "calling Must1 with a value and non-nil error", 105 | Then: "should panic", 106 | WhenDetail: whenDetail{ 107 | value: "test value", 108 | err: errors.New("test error"), 109 | }, 110 | ThenExpected: thenExpected{ 111 | shouldPanic: true, 112 | }, 113 | }, 114 | } 115 | 116 | for _, tt := range tests { 117 | t.Run(tt.Scenario, func(t *testing.T) { 118 | // When & Then. 119 | if tt.ThenExpected.shouldPanic { 120 | Expect(func() { Must1(tt.WhenDetail.value, tt.WhenDetail.err) }).To(Panic()) 121 | } else { 122 | result := Must1(tt.WhenDetail.value, tt.WhenDetail.err) 123 | Expect(result).To(Equal(tt.ThenExpected.value)) 124 | } 125 | }) 126 | } 127 | } 128 | 129 | func TestMust2(t *testing.T) { 130 | RegisterTestingT(t) 131 | 132 | type givenDetail struct{} 133 | type whenDetail struct { 134 | value1 string 135 | value2 int 136 | err error 137 | } 138 | type thenExpected struct { 139 | value1 string 140 | value2 int 141 | shouldPanic bool 142 | } 143 | 144 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 145 | { 146 | Scenario: "Must2 with nil error", 147 | When: "calling Must2 with two values and nil error", 148 | Then: "should return both values without panic", 149 | WhenDetail: whenDetail{ 150 | value1: "test value", 151 | value2: 42, 152 | err: nil, 153 | }, 154 | ThenExpected: thenExpected{ 155 | value1: "test value", 156 | value2: 42, 157 | shouldPanic: false, 158 | }, 159 | }, 160 | { 161 | Scenario: "Must2 with non-nil error", 162 | When: "calling Must2 with two values and non-nil error", 163 | Then: "should panic", 164 | WhenDetail: whenDetail{ 165 | value1: "test value", 166 | value2: 42, 167 | err: errors.New("test error"), 168 | }, 169 | ThenExpected: thenExpected{ 170 | shouldPanic: true, 171 | }, 172 | }, 173 | } 174 | 175 | for _, tt := range tests { 176 | t.Run(tt.Scenario, func(t *testing.T) { 177 | // When & Then. 178 | if tt.ThenExpected.shouldPanic { 179 | Expect(func() { 180 | Must2(tt.WhenDetail.value1, tt.WhenDetail.value2, tt.WhenDetail.err) 181 | }).To(Panic()) 182 | } else { 183 | result1, result2 := Must2(tt.WhenDetail.value1, tt.WhenDetail.value2, tt.WhenDetail.err) 184 | Expect(result1).To(Equal(tt.ThenExpected.value1)) 185 | Expect(result2).To(Equal(tt.ThenExpected.value2)) 186 | } 187 | }) 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /pkg/util/text_convert/text_convert.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package textconvert 17 | 18 | import ( 19 | md "github.com/JohannesKaufmann/html-to-markdown" 20 | "github.com/yuin/goldmark" 21 | "github.com/yuin/goldmark/extension" 22 | "github.com/yuin/goldmark/renderer/html" 23 | 24 | "github.com/glidea/zenfeed/pkg/util/buffer" 25 | ) 26 | 27 | var ( 28 | md2html goldmark.Markdown 29 | html2md *md.Converter 30 | ) 31 | 32 | func init() { 33 | md2html = goldmark.New( 34 | goldmark.WithExtensions( 35 | extension.GFM, 36 | ), 37 | goldmark.WithRendererOptions( 38 | html.WithHardWraps(), 39 | html.WithXHTML(), 40 | ), 41 | ) 42 | html2md = md.NewConverter("", true, nil) 43 | } 44 | 45 | func MarkdownToHTML(md []byte) ([]byte, error) { 46 | buf := buffer.Get() 47 | defer buffer.Put(buf) 48 | 49 | if err := md2html.Convert(md, buf); err != nil { 50 | return nil, err 51 | } 52 | 53 | return buf.Bytes(), nil 54 | } 55 | 56 | func HTMLToMarkdown(html []byte) ([]byte, error) { 57 | res, err := html2md.ConvertBytes(html) 58 | if err != nil { 59 | return nil, err 60 | } 61 | 62 | return res, nil 63 | } 64 | -------------------------------------------------------------------------------- /pkg/util/text_convert/text_convert_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package textconvert 17 | 18 | import ( 19 | "testing" 20 | 21 | . "github.com/onsi/gomega" 22 | 23 | "github.com/glidea/zenfeed/pkg/test" 24 | ) 25 | 26 | func TestMarkdownToHTML(t *testing.T) { 27 | RegisterTestingT(t) 28 | 29 | type givenDetail struct{} 30 | type whenDetail struct { 31 | markdown []byte 32 | } 33 | type thenExpected struct { 34 | html []byte 35 | err string 36 | } 37 | 38 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 39 | { 40 | Scenario: "Convert simple markdown to HTML", 41 | When: "converting markdown to HTML", 42 | Then: "should return correct HTML", 43 | WhenDetail: whenDetail{ 44 | markdown: []byte("# Hello World"), 45 | }, 46 | ThenExpected: thenExpected{ 47 | html: []byte("

Hello World

\n"), 48 | }, 49 | }, 50 | { 51 | Scenario: "Convert markdown with formatting to HTML", 52 | When: "converting markdown text with formatting to HTML", 53 | Then: "should return HTML with proper formatting", 54 | WhenDetail: whenDetail{ 55 | markdown: []byte("**Bold** and *italic* text"), 56 | }, 57 | ThenExpected: thenExpected{ 58 | html: []byte("

Bold and italic text

\n"), 59 | }, 60 | }, 61 | { 62 | Scenario: "Convert markdown with links to HTML", 63 | When: "converting markdown text with links to HTML", 64 | Then: "should return HTML with proper links", 65 | WhenDetail: whenDetail{ 66 | markdown: []byte("[Link](https://example.com)"), 67 | }, 68 | ThenExpected: thenExpected{ 69 | html: []byte("

Link

\n"), 70 | }, 71 | }, 72 | } 73 | 74 | for _, tt := range tests { 75 | t.Run(tt.Scenario, func(_ *testing.T) { 76 | // When. 77 | html, err := MarkdownToHTML(tt.WhenDetail.markdown) 78 | 79 | // Then. 80 | if tt.ThenExpected.err != "" { 81 | Expect(err).NotTo(BeNil()) 82 | Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err)) 83 | } else { 84 | Expect(err).To(BeNil()) 85 | Expect(html).To(Equal(tt.ThenExpected.html)) 86 | } 87 | }) 88 | } 89 | } 90 | 91 | func TestHTMLToMarkdown(t *testing.T) { 92 | RegisterTestingT(t) 93 | 94 | type givenDetail struct{} 95 | type whenDetail struct { 96 | html []byte 97 | } 98 | type thenExpected struct { 99 | markdown []byte 100 | err string 101 | } 102 | 103 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 104 | { 105 | Scenario: "Convert simple HTML to markdown", 106 | When: "converting HTML text to markdown", 107 | Then: "should return correct markdown", 108 | WhenDetail: whenDetail{ 109 | html: []byte("

Hello World

"), 110 | }, 111 | ThenExpected: thenExpected{ 112 | markdown: []byte("# Hello World"), 113 | }, 114 | }, 115 | { 116 | Scenario: "Convert HTML with formatting to markdown", 117 | When: "converting HTML text with formatting to markdown", 118 | Then: "should return markdown with proper formatting", 119 | WhenDetail: whenDetail{ 120 | html: []byte("

Bold and italic text

"), 121 | }, 122 | ThenExpected: thenExpected{ 123 | markdown: []byte("**Bold** and _italic_ text"), 124 | }, 125 | }, 126 | { 127 | Scenario: "Convert HTML with links to markdown", 128 | When: "converting HTML text with links to markdown", 129 | Then: "should return markdown with proper links", 130 | WhenDetail: whenDetail{ 131 | html: []byte("

Link

"), 132 | }, 133 | ThenExpected: thenExpected{ 134 | markdown: []byte("[Link](https://example.com)"), 135 | }, 136 | }, 137 | } 138 | 139 | for _, tt := range tests { 140 | t.Run(tt.Scenario, func(_ *testing.T) { 141 | // When. 142 | markdown, err := HTMLToMarkdown(tt.WhenDetail.html) 143 | 144 | // Then. 145 | if tt.ThenExpected.err != "" { 146 | Expect(err).NotTo(BeNil()) 147 | Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err)) 148 | } else { 149 | Expect(err).To(BeNil()) 150 | Expect(markdown).To(Equal(tt.ThenExpected.markdown)) 151 | } 152 | }) 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /pkg/util/time/time.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package time 17 | 18 | import ( 19 | "context" 20 | "encoding/json" 21 | "math/rand" 22 | "time" 23 | _ "time/tzdata" 24 | 25 | "github.com/pkg/errors" 26 | "gopkg.in/yaml.v3" 27 | 28 | runtimeutil "github.com/glidea/zenfeed/pkg/util/runtime" 29 | ) 30 | 31 | const ( 32 | Day = 24 * time.Hour 33 | Week = 7 * Day 34 | Month = 30 * Day 35 | Year = 365 * Day 36 | ) 37 | 38 | // SetLocation sets the location for the current application. 39 | func SetLocation(name string) error { 40 | if name == "" { 41 | return nil 42 | } 43 | 44 | loc, err := time.LoadLocation(name) 45 | if err != nil { 46 | return errors.Wrap(err, "load location") 47 | } 48 | 49 | time.Local = loc 50 | 51 | return nil 52 | } 53 | 54 | func InRange(t time.Time, start, end time.Time) bool { 55 | return t.After(start) && t.Before(end) 56 | } 57 | 58 | func Format(t time.Time) string { 59 | return t.Format(time.RFC3339) 60 | } 61 | 62 | func Parse(s string) (time.Time, error) { 63 | return time.Parse(time.RFC3339, s) 64 | } 65 | 66 | func MustParse(s string) time.Time { 67 | return runtimeutil.Must1(Parse(s)) 68 | } 69 | 70 | func Tick(ctx context.Context, d time.Duration, f func() error) error { 71 | ticker := time.NewTicker(d) 72 | defer ticker.Stop() 73 | 74 | for { 75 | select { 76 | case <-ticker.C: 77 | if err := f(); err != nil { 78 | return err 79 | } 80 | case <-ctx.Done(): 81 | return nil 82 | } 83 | } 84 | } 85 | 86 | func Random(max time.Duration) time.Duration { 87 | return time.Duration(rand.Int63n(int64(max))) 88 | } 89 | 90 | type Duration time.Duration 91 | 92 | func (d Duration) String() string { 93 | return time.Duration(d).String() 94 | } 95 | 96 | func (d Duration) MarshalJSON() ([]byte, error) { 97 | return json.Marshal(d.String()) 98 | } 99 | 100 | func (d *Duration) UnmarshalJSON(b []byte) error { 101 | var v any 102 | if err := json.Unmarshal(b, &v); err != nil { 103 | return err 104 | } 105 | 106 | switch tv := v.(type) { 107 | case float64: 108 | *d = Duration(time.Duration(tv)) 109 | 110 | return nil 111 | 112 | case string: 113 | parsed, err := time.ParseDuration(tv) 114 | if err != nil { 115 | return err 116 | } 117 | *d = Duration(parsed) 118 | 119 | return nil 120 | 121 | default: 122 | return errors.Errorf("invalid duration: %v", tv) 123 | } 124 | } 125 | 126 | func (d Duration) MarshalYAML() (interface{}, error) { 127 | return d.String(), nil 128 | } 129 | 130 | func (d *Duration) UnmarshalYAML(value *yaml.Node) error { 131 | if value.Kind != yaml.ScalarNode { 132 | return errors.Errorf("invalid duration: expected a scalar node, got %v", value.Kind) 133 | } 134 | 135 | s := value.Value 136 | 137 | parsed, err := time.ParseDuration(s) 138 | if err != nil { 139 | return errors.Errorf("failed to parse duration string '%s' from YAML: %s", s, err.Error()) 140 | } 141 | 142 | *d = Duration(parsed) 143 | 144 | return nil 145 | } 146 | -------------------------------------------------------------------------------- /pkg/util/vector/vector.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package vector 17 | 18 | import ( 19 | "math" 20 | ) 21 | 22 | func Quantize(vec []float32) (quantized []int8, min, scale float32) { 23 | // Find the minimum and maximum values. 24 | min, max := float32(math.MaxFloat32), float32(-math.MaxFloat32) 25 | for _, v := range vec { 26 | if v < min { 27 | min = v 28 | } 29 | if v > max { 30 | max = v 31 | } 32 | } 33 | 34 | // Calculate the quantization scale. 35 | scale = float32(255) / (max - min) 36 | 37 | // Quantize the data. 38 | quantized = make([]int8, len(vec)) 39 | for i, v := range vec { 40 | quantized[i] = int8(math.Round(float64((v-min)*scale - 128))) 41 | } 42 | 43 | return quantized, min, scale 44 | } 45 | 46 | func Dequantize(quantized []int8, min, scale float32) []float32 { 47 | vec := make([]float32, len(quantized)) 48 | for i, v := range quantized { 49 | vec[i] = (float32(v)+128)/scale + min 50 | } 51 | 52 | return vec 53 | } 54 | -------------------------------------------------------------------------------- /pkg/util/vector/vector_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2025 wangyusong 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | package vector 17 | 18 | import ( 19 | "testing" 20 | 21 | . "github.com/onsi/gomega" 22 | 23 | "github.com/glidea/zenfeed/pkg/test" 24 | ) 25 | 26 | func TestQuantizeDequantize(t *testing.T) { 27 | RegisterTestingT(t) 28 | 29 | type givenDetail struct{} 30 | type whenDetail struct { 31 | vector []float32 32 | } 33 | type thenExpected struct { 34 | maxError float32 35 | } 36 | 37 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{ 38 | { 39 | Scenario: "Quantize and dequantize unit vector", 40 | When: "quantizing and then dequantizing a vector with values between 0 and 1", 41 | Then: "should return vector close to the original with small error", 42 | WhenDetail: whenDetail{ 43 | vector: []float32{0.1, 0.5, 0.9, 0.3}, 44 | }, 45 | ThenExpected: thenExpected{ 46 | maxError: 0.01, 47 | }, 48 | }, 49 | { 50 | Scenario: "Quantize and dequantize vector with negative values", 51 | When: "quantizing and then dequantizing a vector with negative values", 52 | Then: "should return vector close to the original with small error", 53 | WhenDetail: whenDetail{ 54 | vector: []float32{-1.0, -0.5, 0.0, 0.5, 1.0}, 55 | }, 56 | ThenExpected: thenExpected{ 57 | maxError: 0.01, 58 | }, 59 | }, 60 | { 61 | Scenario: "Quantize and dequantize large range vector", 62 | When: "quantizing and then dequantizing a vector with large range of values", 63 | Then: "should return vector close to the original with acceptable error", 64 | WhenDetail: whenDetail{ 65 | vector: []float32{-100, -50, 0, 50, 100}, 66 | }, 67 | ThenExpected: thenExpected{ 68 | maxError: 1.5, 69 | }, 70 | }, 71 | } 72 | 73 | for _, tt := range tests { 74 | t.Run(tt.Scenario, func(t *testing.T) { 75 | // When. 76 | quantized, min, scale := Quantize(tt.WhenDetail.vector) 77 | dequantized := Dequantize(quantized, min, scale) 78 | 79 | // Then. 80 | Expect(len(dequantized)).To(Equal(len(tt.WhenDetail.vector))) 81 | 82 | maxError := float32(0) 83 | 84 | for i := range tt.WhenDetail.vector { 85 | error := float32(0) 86 | if tt.WhenDetail.vector[i] > dequantized[i] { 87 | error = tt.WhenDetail.vector[i] - dequantized[i] 88 | } else { 89 | error = dequantized[i] - tt.WhenDetail.vector[i] 90 | } 91 | if error > maxError { 92 | maxError = error 93 | } 94 | } 95 | 96 | Expect(maxError).To(BeNumerically("<=", tt.ThenExpected.maxError)) 97 | }) 98 | } 99 | } 100 | --------------------------------------------------------------------------------