├── .github
    └── workflows
    │   ├── ci.yml
    │   └── issue-translator.yml
├── .gitignore
├── .golangci.yml
├── Dockerfile
├── LICENSE
├── Makefile
├── README-en.md
├── README.md
├── docker-compose.yml
├── docs
    ├── cherry-studio-mcp.md
    ├── config-zh.md
    ├── config.md
    ├── crawl-zh.md
    ├── images
    │   ├── add-rss.png
    │   ├── arch.png
    │   ├── chat-with-feeds.png
    │   ├── cherry-studio-mcp-prompt.png
    │   ├── cherry-studio-mcp.png
    │   ├── crad.png
    │   ├── daily-brief.png
    │   ├── feed-list-with-web.png
    │   ├── folo-html.png
    │   ├── migrate-from-follow-1.png
    │   ├── migrate-from-follow-2.png
    │   ├── migrate-from-follow-3.png
    │   ├── migrate-from-follow-4.png
    │   ├── migrate-from-follow-5.png
    │   ├── monitoring.png
    │   ├── notification-with-web.png
    │   ├── sponsor.png
    │   ├── update-config-with-web.png
    │   ├── upgrade-from-v0.1.0-backup.png
    │   ├── web-add-source.png
    │   ├── web-reading-aggr.png
    │   └── wechat.png
    ├── mcp-client-prompt.md
    ├── migrate-from-follow.md
    ├── model-selection-zh.md
    ├── preview.md
    ├── query-api-zh.md
    ├── roadmap-zh.md
    ├── rss-api-zh.md
    ├── tech
    │   ├── hld-zh.md
    │   ├── rewrite-zh.md
    │   ├── testing-zh.md
    │   └── vector-zh.md
    ├── upgrade-from-v0.1.0.md
    ├── upgrade.md
    └── webhook-zh.md
├── go.mod
├── go.sum
├── main.go
└── pkg
    ├── api
        ├── api.go
        ├── http
        │   └── http.go
        ├── mcp
        │   └── mcp.go
        └── rss
        │   └── rss.go
    ├── component
        └── component.go
    ├── config
        └── config.go
    ├── llm
        ├── embedding_spliter.go
        ├── embedding_spliter_test.go
        ├── llm.go
        ├── openai.go
        └── prompt
        │   └── prompt.go
    ├── model
        └── model.go
    ├── notify
        ├── channel
        │   ├── channel.go
        │   ├── email.go
        │   └── webhook.go
        ├── notify.go
        └── route
        │   ├── route.go
        │   └── route_test.go
    ├── rewrite
        ├── rewrite.go
        └── rewrite_test.go
    ├── schedule
        ├── rule
        │   ├── periodic.go
        │   ├── periodic_test.go
        │   ├── rule.go
        │   ├── watch.go
        │   └── watch_test.go
        └── schedule.go
    ├── scrape
        ├── manager.go
        ├── manager_test.go
        └── scraper
        │   ├── rss.go
        │   ├── rss_test.go
        │   ├── scraper.go
        │   ├── scraper_test.go
        │   └── source.go
    ├── storage
        ├── feed
        │   ├── block
        │   │   ├── block.go
        │   │   ├── block_test.go
        │   │   ├── chunk
        │   │   │   ├── chunk.go
        │   │   │   ├── chunk_benchmark_test.go
        │   │   │   ├── chunk_test.go
        │   │   │   └── encoding.go
        │   │   └── index
        │   │   │   ├── codec.go
        │   │   │   ├── inverted
        │   │   │       ├── inverted.go
        │   │   │       └── inverted_test.go
        │   │   │   ├── primary
        │   │   │       ├── primary.go
        │   │   │       └── primary_test.go
        │   │   │   └── vector
        │   │   │       ├── vector.go
        │   │   │       └── vector_test.go
        │   ├── feed.go
        │   └── feed_test.go
        └── kv
        │   └── kv.go
    ├── telemetry
        ├── log
        │   └── log.go
        ├── metric
        │   └── metric.go
        ├── model
        │   └── model.go
        ├── server
        │   └── server.go
        └── telemetry.go
    ├── test
        └── test.go
    └── util
        ├── binary
            ├── binary.go
            └── binary_test.go
        ├── buffer
            └── buffer.go
        ├── crawl
            └── crawl.go
        ├── hash
            └── hash.go
        ├── heap
            ├── heap.go
            └── heap_test.go
        ├── json_schema
            ├── json_schema.go
            └── json_schema_test.go
        ├── jsonrpc
            ├── jsonrpc.go
            └── jsonrpc_test.go
        ├── retry
            ├── retry.go
            └── retry_test.go
        ├── runtime
            ├── runtime.go
            └── runtime_test.go
        ├── text_convert
            ├── text_convert.go
            └── text_convert_test.go
        ├── time
            ├── time.go
            └── time_test.go
        └── vector
            ├── vector.go
            └── vector_test.go


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |   release:
 9 |     types: [ published ]
10 | 
11 | jobs:
12 |   test:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v4
16 |       - name: Set up Go
17 |         uses: actions/setup-go@v5
18 |         with:
19 |           go-version: '1.23.4'
20 |       - name: Golangci Lint
21 |         uses: golangci/golangci-lint-action@v7
22 |         with:
23 |           version: v2.0
24 |       - name: Run tests
25 |         run: make test
26 |         
27 |   build-and-push:
28 |     runs-on: ubuntu-latest
29 |     needs: test
30 |     if: github.event_name == 'release'
31 |     steps:
32 |       - uses: actions/checkout@v4
33 |       - name: Set up Docker Buildx
34 |         uses: docker/setup-buildx-action@v3
35 |       - name: Login to Docker Hub
36 |         uses: docker/login-action@v3
37 |         with:
38 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
39 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
40 |       - name: Build and push Docker images
41 |         run: make push


--------------------------------------------------------------------------------
/.github/workflows/issue-translator.yml:
--------------------------------------------------------------------------------
 1 | name: 'issue-translator'
 2 | on: 
 3 |   issue_comment: 
 4 |     types: [created]
 5 |   issues: 
 6 |     types: [opened]
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: usthe/issues-translate-action@v2.7
13 |         with:
14 |           IS_MODIFY_TITLE: true


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.dll
 2 | *.so
 3 | *.dylib
 4 | *.test
 5 | *.out
 6 | coverage.html
 7 | vendor/
 8 | go.work
 9 | .idea/
10 | .vscode/
11 | *.swp
12 | *.swo
13 | .DS_Store
14 | .aider*
15 | .cursorrules
16 | *.log
17 | local_docs/
18 | .env
19 | .env.local
20 | __debug_bin
21 | config.yaml
22 | data/
23 | *debug*
24 | .cursorrules
25 | .cursor/


--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
 1 | version: "2"
 2 | run:
 3 |   timeout: 5m
 4 | 
 5 | # https://golangci-lint.run/usage/linters.
 6 | linters:
 7 |   settings:
 8 |     gocognit:
 9 |       min-complexity: 15
10 |     cyclop:
11 |       max-complexity: 10
12 | 
13 |   enable:
14 |     - errcheck
15 |     - govet
16 |     - ineffassign
17 |     - staticcheck
18 |     - unused
19 |     - bodyclose
20 |     - copyloopvar
21 |     - cyclop
22 |     - gocognit
23 |     - errorlint
24 |     - funlen
25 |     - gocognit
26 |     - goheader
27 |     - iface
28 |     - importas
29 |     - inamedparam
30 |     - intrange
31 |     - maintidx
32 |     - nestif
33 |     - nlreturn
34 |     - noctx
35 |     - paralleltest
36 |     - perfsprint
37 |     - prealloc
38 |     - promlinter
39 |     - reassign
40 |   exclusions:
41 |     rules:
42 |       - path: pkg/rewrite/rewrite.go
43 |         linters:
44 |           - lll # For prompt.
45 |       - path: pkg/config/config.go
46 |         linters:
47 |           - lll # For schema tag.
48 |       - path: pkg/notify/channel/email.go
49 |         linters:
50 |           - lll # For HTML template.
51 |       - path: main.go
52 |         linters:
53 |           - lll # For disclaimer.
54 |           - cyclop
55 |     paths:
56 |       - ".*\\_test\\.go$"
57 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM golang:1.23.4-alpine AS builder
 2 | 
 3 | RUN apk add --no-cache git
 4 | 
 5 | WORKDIR /app
 6 | COPY . .
 7 | 
 8 | ARG VERSION=dev
 9 | RUN GOOS=linux go build -ldflags="-s -w -X main.version=${VERSION}" -o /app/zenfeed ./main.go
10 | 
11 | FROM alpine:latest
12 | 
13 | ARG VERSION=dev
14 | LABEL org.opencontainers.image.version=${VERSION}
15 | 
16 | RUN apk add --no-cache ca-certificates tzdata && \
17 |     mkdir -p /app/data
18 | 
19 | COPY --from=builder /app/zenfeed /app/
20 | 
21 | WORKDIR /app
22 | ENTRYPOINT ["/app/zenfeed"]
23 | CMD ["--config", "/app/config/config.yaml"]


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | VERSION ?= $(shell git describe --tags --always)
 2 | IMAGE_NAME ?= zenfeed
 3 | REGISTRY ?= glidea
 4 | FULL_IMAGE_NAME = $(REGISTRY)/$(IMAGE_NAME)
 5 | 
 6 | 
 7 | .PHONY: test push dev-push
 8 | 
 9 | test:
10 | 	go test -race -v -coverprofile=coverage.out -coverpkg=./... ./...
11 | 
12 | push:
13 | 	docker buildx create --use --name multi-platform-builder || true
14 | 	docker buildx build --platform linux/amd64,linux/arm64 \
15 | 		--build-arg VERSION=$(VERSION) \
16 | 		-t $(FULL_IMAGE_NAME):$(VERSION) \
17 | 		-t $(FULL_IMAGE_NAME):latest \
18 | 		--push .
19 | 
20 | dev-push:
21 | 	docker buildx create --use --name multi-platform-builder || true
22 | 	docker buildx build --platform linux/amd64,linux/arm64 \
23 | 		--build-arg VERSION=$(VERSION) \
24 | 		-t $(FULL_IMAGE_NAME):$(VERSION) \
25 | 		--push .
26 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   zenfeed-web:
 3 |     image: glidea/zenfeed-web:latest
 4 |     ports:
 5 |       - "1400:1400"
 6 |     environment:
 7 |       - PUBLIC_DEFAULT_API_URL=http://zenfeed:1300
 8 |     depends_on:
 9 |       - zenfeed
10 |     restart: unless-stopped
11 | 
12 |   zenfeed:
13 |     image: glidea/zenfeed:latest
14 |     entrypoint: >
15 |       sh -c "
16 |       if [ ! -f /app/config/config.yaml ]; then
17 |         echo 'Config file not found in volume, initializing from init config...'
18 |         cp /app/config.init.yaml /app/config/config.yaml;
19 |       else
20 |         echo 'Existing config file found in volume.'
21 |       fi &&
22 |       echo 'Starting Zenfeed...' &&
23 |       exec /app/zenfeed --config /app/config/config.yaml
24 |       "
25 |     configs:
26 |       - source: zenfeed_init_config
27 |         target: /app/config.init.yaml
28 |     volumes:
29 |       - data:/app/data
30 |       - config:/app/config
31 |     ports:
32 |       - "1300:1300"
33 |       - "1301:1301"
34 |       - "9090:9090"
35 |     depends_on:
36 |       - rsshub
37 |     restart: unless-stopped
38 |     
39 |   rsshub:
40 |     image: diygod/rsshub:2024-12-14
41 |     ports:
42 |       - "1200:1200"
43 |     environment:
44 |       - NODE_ENV=production
45 |     restart: unless-stopped
46 |       
47 | volumes:
48 |   data: {}
49 |   config: {}
50 | 
51 | configs:
52 |   zenfeed_init_config: # After installation, you must modify the configuration through zenfeed or config volume.
53 |     content: |
54 |       timezone: ${TZ:-Asia/Shanghai}
55 |       llms:
56 |         - name: general
57 |           default: true
58 |           provider: siliconflow
59 |           model: Qwen/Qwen3-8B
60 |           api_key: ${API_KEY:-your-api-key}
61 |         - name: embed
62 |           provider: siliconflow
63 |           embedding_model: Pro/BAAI/bge-m3
64 |           api_key: ${API_KEY:-your-api-key}
65 |       scrape:
66 |         rsshub_endpoint: http://rsshub:1200
67 |       storage:
68 |         feed:
69 |           rewrites:
70 |             - transform:
71 |                 to_text:
72 |                   prompt: |
73 |                     {{ .summary_html_snippet_for_small_model }} Respond in ${LANGUAGE:-Chinese}
74 |               label: summary_html_snippet
75 |           embedding_llm: embed
76 |       notify:
77 |         channels:
78 |           email:
79 |             feed_html_snippet_template: |
80 |               {{ .summary_html_snippet }}
81 | 


--------------------------------------------------------------------------------
/docs/cherry-studio-mcp.md:
--------------------------------------------------------------------------------
 1 | **Configure MCP Server**
 2 | 
 3 | Default URL: `http://localhost:1301/sse`
 4 | 
 5 | <img src="images/cherry-studio-mcp.png" alt="Cherry Studio MCP" width="500">
 6 | 
 7 | **Configure Prompt (Optional but recommended for optimal results)**
 8 | 
 9 | For complete prompt, see [mcp-client-prompt.md](mcp-client-prompt.md)
10 | 
11 | <img src="images/cherry-studio-mcp-prompt.png" alt="Cherry Studio MCP Prompt" width="500">
12 | 
13 | **Usage Examples**
14 | 
15 | [Doc](preview.md)
16 | 
17 | Very powerful - you can even directly modify zenfeed configuration settings


--------------------------------------------------------------------------------
/docs/crawl-zh.md:
--------------------------------------------------------------------------------
 1 | # 使用 Zenfeed 爬虫功能
 2 | 
 3 | Zenfeed 提供了将网页内容抓取并转换为 Markdown 格式的功能。这主要通过重写规则 (`rewrites` rule) 中的 `transform.to_text.type` 配置项实现。
 4 | 
 5 | ## 如何使用
 6 | 
 7 | 在你的配置文件中，找到 `storage.feed.rewrites` 部分。当你定义一条重写规则时，可以通过 `transform` 字段来启用爬虫功能。
 8 | 
 9 | 具体配置如下：
10 | 
11 | ```yaml
12 | storage:
13 |   feed:
14 |     rewrites:
15 |       - if: ["source=xxx", ...]
16 |         source_label: "link" # 指定包含 URL 的标签，例如 feed 中的 'link' 标签
17 |         transform:
18 |           to_text:
19 |             type: "crawl" # 或 "crawl_by_jina"
20 |             # llm: "your-llm-name" # crawl 类型不需要 llm
21 |             # prompt: "your-prompt" # crawl 类型不需要 prompt
22 |         # match: ".*" # 可选：对抓取到的 Markdown 内容进行匹配
23 |         action: "create_or_update_label" # 对抓取到的内容执行的动作
24 |         label: "crawled_content" # 将抓取到的 Markdown 存储到这个新标签
25 |     # ... 其他配置 ...
26 | jina: # 如果使用 crawl_by_jina，并且需要更高的速率限制（匿名ip: 20 RPM），请配置 Jina API Token
27 |   token: "YOUR_JINA_AI_TOKEN" # 从 https://jina.ai/api-dashboard/ 获取
28 | ```
29 | 
30 | ### 转换类型 (`transform.to_text.type`)
31 | 
32 | 你有以下几种选择：
33 | 
34 | 1.  **`crawl`**:
35 |     *   Zenfeed 将使用内置的本地爬虫尝试抓取 `source_label` 中指定的 URL。
36 |     *   它会尝试遵循目标网站的 `robots.txt` 协议。
37 |     *   适用于静态网页或结构相对简单的网站。
38 | 
39 | 2.  **`crawl_by_jina`**:
40 |     *   Zenfeed 将通过 [Jina AI Reader API](https://jina.ai/reader/) 来抓取和处理 `source_label` 中指定的 URL。
41 |     *   Jina AI 可能能更好地处理动态内容和复杂网站结构。
42 |     *   同样遵循目标网站的 `robots.txt` 协议。
43 |     *   **依赖 Jina AI 服务**：
44 |         *   建议在配置文件的顶层添加 `jina.token` (如上示例) 来提供你的 Jina AI API Token，以获得更高的服务速率限制。
45 |         *   如果未提供 Token，将以匿名用户身份请求，速率限制较低。
46 |         *   请查阅 Jina AI 的服务条款和隐私政策。
47 | 
48 | ### 关键配置说明
49 | 
50 | *   `source_label`: 此标签的值**必须是一个有效的 URL**。例如，如果你的 RSS Feed 中的 `link` 标签指向的是一篇包含完整文章的网页，你可以将 `source_label` 设置为 `link`。
51 | *   `action`: 通常设置为 `create_or_update_label`，将抓取并转换后的 Markdown 内容存入一个新的标签中（由 `label` 字段指定）。
52 | *   `label`: 指定存储抓取到的 Markdown 内容的新标签名称。
53 | 
54 | ## 使用场景
55 | 
56 | **全文内容提取**:
57 | 很多 RSS 源只提供文章摘要和原文链接。使用爬虫功能可以将原文完整内容抓取下来，转换为 Markdown 格式，方便后续的 AI 处理（如总结、打标签、分类等）或直接阅读。
58 | 
59 | ## 免责声明
60 | 
61 | **在使用 Zenfeed 的爬虫功能（包括 `crawl` 和 `crawl_by_jina` 类型）前，请仔细阅读并理解以下声明。您的使用行为即表示您已接受本声明的所有条款。**
62 | 
63 | 1.  **用户责任与授权**:
64 |     *   您将对使用爬虫功能的所有行为承担全部责任。
65 |     *   您必须确保拥有访问、抓取和处理所提供 URL 内容的合法权利。
66 |     *   请严格遵守目标网站的 `robots.txt` 协议、服务条款 (ToS)、版权政策以及所有相关的法律法规。
67 |     *   不得使用本功能处理、存储或分发任何非法、侵权、诽谤、淫秽或其他令人反感的内容。
68 | 
69 | 2.  **内容准确性与完整性**:
70 |     *   网页抓取和 Markdown 转换过程的结果可能不准确、不完整或存在偏差。这可能受到目标网站结构、反爬虫机制、动态内容渲染、网络问题等多种因素的影响。
71 |     *   Zenfeed 项目作者和贡献者不对抓取内容的准确性、完整性、及时性或质量作任何保证。
72 | 
73 | 3.  **第三方服务依赖 (`crawl_by_jina`)**:
74 |     *   `crawl_by_jina` 功能依赖于 Jina AI 提供的第三方服务。
75 |     *   Jina AI 服务的可用性、性能、数据处理政策、服务条款以及可能的费用（超出免费额度后）均由 Jina AI 自行决定。
76 |     *   项目作者和贡献者不对 Jina AI 服务的任何方面负责。请在使用前查阅 [Jina AI 的相关条款](https://jina.ai/terms/) 和 [隐私政策](https://jina.ai/privacy/)。
77 | 
78 | 4.  **无间接或后果性损害赔偿**:
79 |     *   在任何情况下，无论基于何种法律理论，项目作者和贡献者均不对因使用或无法使用爬虫功能而导致的任何直接、间接、偶然、特殊、惩戒性或后果性损害负责，包括但不限于利润损失、数据丢失、商誉损失或业务中断。
80 | 
81 | 5.  **法律与合规风险**:
82 |     *   未经授权抓取、复制、存储、处理或传播受版权保护的内容，或违反网站服务条款的行为，可能违反相关法律法规，并可能导致法律纠纷或处罚。
83 |     *   用户需自行承担因使用爬虫功能而产生的所有法律风险和责任。
84 | 
85 | 6.  **"按原样"提供**:
86 |     *   爬虫功能按"现状"和"可用"的基础提供，不附带任何形式的明示或默示担保。
87 | 
88 | **强烈建议您在启用和配置爬虫功能前，仔细评估相关风险，并确保您的使用行为完全合法合规。对于任何因用户滥用或不当使用本软件（包括爬虫功能）而引起的法律纠纷、损失或损害，Zenfeed 项目作者和贡献者不承担任何责任。**
89 | 


--------------------------------------------------------------------------------
/docs/images/add-rss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/add-rss.png


--------------------------------------------------------------------------------
/docs/images/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/arch.png


--------------------------------------------------------------------------------
/docs/images/chat-with-feeds.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/chat-with-feeds.png


--------------------------------------------------------------------------------
/docs/images/cherry-studio-mcp-prompt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/cherry-studio-mcp-prompt.png


--------------------------------------------------------------------------------
/docs/images/cherry-studio-mcp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/cherry-studio-mcp.png


--------------------------------------------------------------------------------
/docs/images/crad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/crad.png


--------------------------------------------------------------------------------
/docs/images/daily-brief.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/daily-brief.png


--------------------------------------------------------------------------------
/docs/images/feed-list-with-web.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/feed-list-with-web.png


--------------------------------------------------------------------------------
/docs/images/folo-html.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/folo-html.png


--------------------------------------------------------------------------------
/docs/images/migrate-from-follow-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/migrate-from-follow-1.png


--------------------------------------------------------------------------------
/docs/images/migrate-from-follow-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/migrate-from-follow-2.png


--------------------------------------------------------------------------------
/docs/images/migrate-from-follow-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/migrate-from-follow-3.png


--------------------------------------------------------------------------------
/docs/images/migrate-from-follow-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/migrate-from-follow-4.png


--------------------------------------------------------------------------------
/docs/images/migrate-from-follow-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/migrate-from-follow-5.png


--------------------------------------------------------------------------------
/docs/images/monitoring.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/monitoring.png


--------------------------------------------------------------------------------
/docs/images/notification-with-web.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/notification-with-web.png


--------------------------------------------------------------------------------
/docs/images/sponsor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/sponsor.png


--------------------------------------------------------------------------------
/docs/images/update-config-with-web.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/update-config-with-web.png


--------------------------------------------------------------------------------
/docs/images/upgrade-from-v0.1.0-backup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/upgrade-from-v0.1.0-backup.png


--------------------------------------------------------------------------------
/docs/images/web-add-source.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/web-add-source.png


--------------------------------------------------------------------------------
/docs/images/web-reading-aggr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/web-reading-aggr.png


--------------------------------------------------------------------------------
/docs/images/wechat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glidea/zenfeed/4ac4667ce945d780c12d30f7f7662a4b84b07175/docs/images/wechat.png


--------------------------------------------------------------------------------
/docs/migrate-from-follow.md:
--------------------------------------------------------------------------------
 1 | ## Export OPML File from Follow
 2 | 
 3 | <img src="images/migrate-from-follow-1.png" alt="" width="300">
 4 | <img src="images/migrate-from-follow-2.png" alt="" width="500">
 5 | <img src="images/migrate-from-follow-3.png" alt="" width="500">
 6 | 
 7 | > Note: Make sure to fill in http://rsshub:1200
 8 | 
 9 | ## Import to zenfeed-web
10 | <img src="images/migrate-from-follow-4.png" alt="" width="500">
11 | <img src="images/migrate-from-follow-5.png" alt="" width="500">
12 | 


--------------------------------------------------------------------------------
/docs/model-selection-zh.md:
--------------------------------------------------------------------------------
 1 | 如果无需使用 HTML 总结，模型可以随便选择
 2 | 
 3 | ## 背景 & 原则
 4 | * Token 使用会很多，你可以想象每篇 RSS 都总结一遍会有多少消耗。所以优先选择免费模型，或者按次计费
 5 | * HTML 生成对模型有较高要求。所以你现在知道了为什么自部署的默认总结效果比不上 https://zenfeed.xyz
 6 | * 那为什么不支持 Markdown 呢？web 还没精力支持，你可以先用邮件日报替代
 7 | * 总结都是后台任务，且支持有状态重试，对模型速率限制 & 稳定性没有要求
 8 | * 所以 “1. 质量”，“2. 低价”，“3. 稳定”。首选 1，兼顾 2，无需 3
 9 | 
10 | ## 如果你对默认的模型效果不满意，首选推荐
11 | * **不缺钱 or “有路子”**：Gemini 2.5 Pro
12 | * **再便宜点的**：Gemini 2.5 Flash
13 | 
14 | ---
15 | 
16 | v0.4.0 优化之后，免费 qwen3 的效果应该已经可以满足大部分需求


--------------------------------------------------------------------------------
/docs/preview.md:
--------------------------------------------------------------------------------
 1 | ## 信息监控
 2 | ```yaml
 3 | rules:
 4 |   - name: US Tariff Impact
 5 |     query: The various impacts and developments of recent US tariff policies, different perspectives, especially their impact on China
 6 | ```
 7 | <img src="images/monitoring.png" alt="Monitoring" width="500">
 8 | 
 9 | ## 每日简报
10 | ```yaml
11 | rules:
12 |   - name: Evening News
13 |     every_day: "06:30~18:00"
14 | ```
15 | <img src="images/daily-brief.png" alt="Daily Brief" width="500">
16 | 
17 | ## Chat with feeds
18 | 
19 | <img src="images/chat-with-feeds.png" alt="Chat with feeds" width="500">
20 | 
21 | ## 添加 RSS 订阅源
22 | > 如果你是 RSS 老司机，直接丢 RSS 地址，或者 OPML 文件给 AI 即可
23 | 
24 | <img src="images/add-rss.png" alt="Add RSS" width="500">
25 | 
26 | ## 配合 zenfeed-web
27 | <img src="images/feed-list-with-web.png" alt="" width="500">
28 | 
29 | <img src="images/notification-with-web.png" alt="" width="500">
30 | 
31 | <img src="images/update-config-with-web.png" alt="" width="500">
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/docs/query-api-zh.md:
--------------------------------------------------------------------------------
  1 | # Zenfeed Query API 使用教程
  2 | 
  3 | Zenfeed Query API 允许用户通过多种条件检索存储的 Feed 数据。本教程将详细介绍如何使用此 API。
  4 | 
  5 | ## 接口说明
  6 | 
  7 | ### 请求
  8 | 
  9 | *   **方法**: `POST`
 10 | *   **URL**: `/query`
 11 | *   **Content-Type**: `application/json`
 12 | 
 13 | ### 请求体 (JSON)
 14 | 
 15 | ```json
 16 | {
 17 |   "query": "string",
 18 |   "threshold": 0.55,
 19 |   "label_filters": ["string"],
 20 |   "summarize": false,
 21 |   "limit": 10,
 22 |   "start": "2006-01-02T15:04:05Z07:00",
 23 |   "end": "2006-01-02T15:04:05Z07:00"
 24 | }
 25 | ```
 26 | 
 27 | **字段说明:**
 28 | 
 29 | *   `query` (string, 可选):
 30 |     *   用于语义搜索的查询字符串。
 31 |     *   如果提供，必须至少包含 5 个字符。
 32 |     *   如果为空或未提供，则不进行语义搜索，仅根据其他条件（如标签、时间）过滤。
 33 | *   `threshold` (float32, 可选, 默认值: `0.55`):
 34 |     *   语义搜索的相关性阈值。
 35 |     *   取值范围: `[0, 1]`。
 36 |     *   仅当 `query` 字段非空时有效。
 37 | *   `label_filters` ([]string, 可选):
 38 |     *   一个字符串数组，用于根据 Feed 的标签进行过滤。
 39 |     *   每个过滤器的格式为:
 40 |         *   `"key=value"`: 匹配标签 `key` 的值为 `value` 的 Feed。
 41 |         *   `"key!=value"`: 匹配标签 `key` 的值不为 `value` 的 Feed。
 42 |     *   常用的 `key` 包括:
 43 |         *   `source`: Feed 来源
 44 |         *   `title`: Feed 标题
 45 |         *   `你在 rewrite 阶段自定义创建的`：比如 category
 46 |     *   可以指定多个过滤器，它们之间是 "AND" 关系。
 47 | *   `summarize` (bool, 可选, 默认值: `false`):
 48 |     *   是否对查询结果进行摘要。
 49 |     *   如果为 `true`，系统将调用配置的 LLM (Large Language Model) 对返回的 Feed 内容进行总结。
 50 | *   `limit` (int, 可选, 默认值: `10`):
 51 |     *   返回 Feed 结果的最大数量。
 52 |     *   取值范围: `[1, 500]`。
 53 | *   `start` (string, 可选, 默认值: 24小时前):
 54 |     *   查询的时间范围的开始时间（包含）。
 55 |     *   格式为 RFC3339 (例如: `"2023-10-26T10:00:00Z"`)。
 56 | *   `end` (string, 可选, 默认值: 当前时间):
 57 |     *   查询的时间范围的结束时间（不包含）。
 58 |     *   格式为 RFC3339 (例如: `"2023-10-27T10:00:00Z"`)。
 59 |     *   `end` 时间必须晚于 `start` 时间。
 60 | 
 61 | ### 响应体 (JSON)
 62 | 
 63 | ```json
 64 | {
 65 |   "summary": "string",
 66 |   "feeds": [
 67 |     {
 68 |       "labels": {
 69 |         "type": "rss",
 70 |         "source": "Example News",
 71 |         "title": "Breaking News: AI Revolutionizes Everything",
 72 |         "link": "http://example.com/news/123",
 73 |         "pub_time": "2023-10-26T09:30:00Z",
 74 |         "content": "Detailed content of the news article..."
 75 |       },
 76 |       "time": "2023-10-26T10:15:30+08:00",
 77 |       "score": 0.85
 78 |     }
 79 |   ],
 80 |   "count": 1
 81 | }
 82 | ```
 83 | 
 84 | **字段说明:**
 85 | 
 86 | *   `summary` (string, 可选):
 87 |     *   如果请求中的 `summarize` 为 `true` 且成功生成摘要，此字段将包含 LLM 生成的内容摘要。
 88 |     *   如果生成摘要失败，可能包含错误信息。
 89 | *   `feeds` ([]object, 必须):
 90 |     *   一个对象数组，每个对象代表一个符合查询条件的 Feed。
 91 |     *   **Feed 对象结构**:
 92 |         *   `labels` (object): Feed 的元数据标签，键值对形式。
 93 |             *   `type` (string): Feed 类型。
 94 |             *   `source` (string): Feed 来源。
 95 |             *   `title` (string): Feed 标题。
 96 |             *   `link` (string): Feed 原始链接。
 97 |             *   `pub_time` (string): Feed 发布时间。
 98 |             *   `content` (string): Feed 内容。
 99 |             *   ... (其他自定义标签)
100 |         *   `time` (string): Feed 被系统记录或处理的时间戳 (RFC3339 格式，通常为服务器本地时区)。
101 |         *   `score` (float32, 可选):
102 |             *   当请求中提供了 `query` (进行了语义搜索) 时，此字段表示该 Feed 与查询的相关性得分。
103 |             *   得分越高，相关性越强。
104 | *   `count` (int, 必须):
105 |     *   返回的 `feeds` 数组中的 Feed 数量。
106 | 
107 | ## `curl` 示例
108 | 
109 | 以下示例假设 Zenfeed 服务运行在 `http://localhost:1300`。
110 | 
111 | ### 1. 基本查询 (获取最近10条记录)
112 | 
113 | 获取最近（默认24小时内）的最多10条 Feed。
114 | 
115 | ```bash
116 | curl -X POST http://localhost:1300/query \
117 | -H "Content-Type: application/json" \
118 | -d '{}'
119 | ```
120 | 
121 | ### 2. 语义搜索
122 | 
123 | 查询与 "人工智能最新进展" 相关的 Feed，并设置相关性阈值为 `0.7`。
124 | 
125 | ```bash
126 | curl -X POST http://localhost:1300/query \
127 | -H "Content-Type: application/json" \
128 | -d '{
129 |   "query": "人工智能最新进展",
130 |   "threshold": 0.7
131 | }'
132 | ```
133 | 
134 | ### 3. 带标签过滤的查询
135 | 
136 | 查询类型为 "rss" 且来源不是 "SpecificSource" 的 Feed。
137 | 
138 | ```bash
139 | curl -X POST http://localhost:1300/query \
140 | -H "Content-Type: application/json" \
141 | -d '{
142 |   "label_filters": [
143 |     "type=rss",
144 |     "source!=SpecificSource"
145 |   ]
146 | }'
147 | ```
148 | 
149 | ### 4. 带时间范围的查询
150 | 
151 | 查询 2023年10月25日 00:00:00 UTC 到 2023年10月26日 00:00:00 UTC 之间的 Feed。
152 | 
153 | ```bash
154 | curl -X POST http://localhost:1300/query \
155 | -H "Content-Type: application/json" \
156 | -d '{
157 |   "start": "2023-10-25T00:00:00Z",
158 |   "end": "2023-10-26T00:00:00Z"
159 | }'
160 | ```
161 | 
162 | ### 5. 组合查询示例
163 | 
164 | 查询过去3天内，与 "开源项目" 相关的 Feed，类型为 "github_release"，并获取摘要，最多返回20条。
165 | 
166 | ```bash
167 | # 假设今天是 2023-10-28
168 | curl -X POST http://localhost:1300/query \
169 | -H "Content-Type: application/json" \
170 | -d '{
171 |   "query": "最近的热门开源项目", # 尽可能详细，获得最佳搜索效果
172 |   "threshold": 0.6,
173 |   "label_filters": ["source=github_trending"],
174 |   "summarize": true,
175 |   "limit": 20,
176 |   "start": "2023-10-25T00:00:00Z", # 手动计算或动态生成
177 |   "end": "2023-10-28T00:00:00Z"   # 手动计算或动态生成
178 | }'
179 | ```
180 | 


--------------------------------------------------------------------------------
/docs/roadmap-zh.md:
--------------------------------------------------------------------------------
 1 | ## 短期
 2 | * 播客
 3 |   * NotebookLM 的播客效果让人惊艳
 4 |   * 技术上复刻一个并不难，难的是没有又便宜效果又好的 TTS API（只用得起小帅的声音😭）
 5 |   * TTS 音色进步也只是近几年的事情，长期需要等成本下降
 6 |   * 短期因为我个人很喜欢播客总结（应该也很适合大家通勤），会先本地部署模型，提供给 https://zenfeed.xyz 使用
 7 | 
 8 | * epub2rss
 9 |   * 见过 rss2epub，但你绝没见过反着来的
10 |   * 严格上这并不属于 zenfeed，顶多算生态项目吧
11 |   * 抛开时效性，书比新闻更有价值。但当你立下 “坚持阅读” 的 flag，然后呢？
12 |   * 这个子项目旨在实现：每日更新一章，作为 rss 暴露。在阅读新闻 RSS 时，“顺便” 把书给看了
13 |   * 这里遵循《掌控习惯》的几个原理
14 |     * 让它显而易见：在你的新闻阅读器里
15 |     * 让它简便易行：配合 zenfeed 总结，更轻松地阅读要点（进一步了解原文逃不掉，但这时你已经被勾住了，相信这事已经没那么困难了）
16 |     * 让你感觉到爽：zenfeed 阅读完后的木鱼声，嗯这算一个，确信
17 | 
18 | * 提供更多玩法指导
19 |   * zenfeed 定位是信息管理引擎，普通用户反而搞不清楚状况
20 |   * 短期并不会考虑做一个没有使用心智成本的 “产品”，但我可以分享一些垂直的使用案例技巧
21 |     > 灵光一现：最近喜欢上和豆包聊新闻了，或许可以分享下如何把 zenfeed 数据接入豆包
22 | 
23 | ## 中长期
24 | * 更易用的 Web（但坦诚地讲目前优先级比较低，更鼓励调用后端 api，构建一个属于你的 web
25 |   * 主题研究报告
26 |   * 屏蔽 or follow 相关新闻后续
27 |   * 相关性聚合阅读
28 |   ![](images/web-reading-aggr.png)
29 | > P.S. 相关功能已经实现，只不过没有下放到 Web
30 | 
31 | ---
32 | 
33 | 如果你觉得 zenfeed 很酷，并且有意愿贡献，请联系我！
34 | 


--------------------------------------------------------------------------------
/docs/rss-api-zh.md:
--------------------------------------------------------------------------------
 1 | # 托管源
 2 | 
 3 | ## Folo
 4 | 
 5 | 直接搜索 zenfeed
 6 | 
 7 | ## Other
 8 | 
 9 | ```bash
10 | https://zenfeed.xyz/rss?.... 参数用法见下方《自部署》
11 | 
12 | https://zenfeed.xyz/rss?label_filter=source=知乎热榜 # 你在 zenfeed.xyz 中看到的源名称
13 | 
14 | https://zenfeed.xyz/rss?query=AI # 语义搜索。请不要滥用，成本 cover 不住可能随时下线
15 | ```
16 | 
17 | # 自部署
18 | 
19 | ## 1. 配置（可选）
20 | 
21 | ```yaml
22 | api:
23 |   rss:
24 |     content_html_template: | # 可自由排版搭配（go template 语法）；需要确保渲染后的内容是正确的 HTML
25 |       {{ .summary_html_snippet }} # 默认值
26 | ```
27 | 
28 | ## 2. enjoy RSS address!
29 | 
30 | ```bash
31 | your_zenfeed_address/rss?label_filter=label1=value1&label_filter=label2!=value2&query=xxx
32 | 
33 | # e.g.
34 | 
35 | ## Past 24h rss feed for GithubTrending
36 | http://localhost:1302/rss?label_filter=source=GithubTrending
37 | 
38 | ## Past 24h rss feed for Tech category
39 | http://localhost:1302/rss?label_filter=category=Tech
40 | 
41 | ## Past 24h rss feed for dynamic query
42 | http://localhost:1302/rss?query=特朗普最新消息
43 | ```
44 | 
45 | # FAQ
46 | 
47 | ## 添加失败怎么办？
48 | 
49 | 部分 RSS 阅读器通过服务端间接访问 RSS 地址，如果 zenfeed 部署到本地，将无法访问
50 | 
51 | 你需要通过内网穿透，或者 VPS 暴露到公网上，注意仅暴露 1302 端口
52 | 
53 | ## Folo 看起来只有纯文本？
54 | 
55 | ![](images/folo-html.png)
56 | 
57 | ## 暗黑模式显示有问题？
58 | 
59 | 嗯就是有问题，请使用白底背景，否则样式渲染会出现问题
60 | 


--------------------------------------------------------------------------------
/docs/tech/hld-zh.md:
--------------------------------------------------------------------------------
  1 | > 适用版本：v0.2.2
  2 | 
  3 | ```mermaid
  4 | graph TD
  5 |     subgraph User_Interactions
  6 |         WebUI["Web UI (zenfeed-web)"]
  7 |         MCPClient["MCP Client"]
  8 |     end
  9 | 
 10 |     subgraph Zenfeed_Core_Services
 11 |         HTTPServer["HTTP Server (pkg/api/http)"]
 12 |         MCPServer["MCP Server (pkg/api/mcp)"]
 13 |         API["API Service (pkg/api)"]
 14 |     end
 15 | 
 16 |     subgraph Data_Processing_Storage_Main
 17 |         ScraperManager["Scraper Manager (pkg/scrape)"]
 18 |         Rewriter["Rewriter (pkg/rewrite)"]
 19 |         FeedStorage["Feed Storage (pkg/storage/feed)"]
 20 |         LLMFactory["LLM Factory (pkg/llm)"]
 21 |         KVStorage["KV Storage (pkg/storage/kv)"]
 22 |     end
 23 | 
 24 |     subgraph FeedStorage_Internals
 25 |         Block["Block (pkg/storage/feed/block)"]
 26 |         ChunkFile["ChunkFile (pkg/storage/feed/block/chunk)"]
 27 |         PrimaryIndex["Primary Index (pkg/storage/feed/block/index/primary)"]
 28 |         InvertedIndex["Inverted Index (pkg/storage/feed/block/index/inverted)"]
 29 |         VectorIndex["Vector Index (pkg/storage/feed/block/index/vector)"]
 30 |     end
 31 |     
 32 |     subgraph Scheduling_Notification
 33 |         Scheduler["Scheduler (pkg/schedule)"]
 34 |         Notifier["Notifier (pkg/notify)"]
 35 |         NotifyChan["(Go Channel for Results)"]
 36 |         EmailChannel["Email Channel (pkg/notify/channel)"]
 37 |     end
 38 | 
 39 |     ConfigManager["Config Manager (pkg/config)"]
 40 | 
 41 |     ExternalDataSources["External Data Sources (RSS Feeds, RSSHub)"]
 42 |     LLMProviders["LLM Providers (OpenAI, Gemini, etc.)"]
 43 |     EmailServiceProvider["Email Service Provider (SMTP)"]
 44 | 
 45 |     WebUI --> HTTPServer
 46 |     MCPClient --> MCPServer
 47 |     HTTPServer --> API
 48 |     MCPServer --> API
 49 | 
 50 |     API --> ConfigManager
 51 |     API --> FeedStorage
 52 |     API --> LLMFactory
 53 | 
 54 |     ScraperManager --> ExternalDataSources
 55 |     ScraperManager --> KVStorage
 56 |     ScraperManager --> FeedStorage
 57 | 
 58 |     FeedStorage --> Rewriter
 59 |     FeedStorage --> LLMFactory
 60 |     FeedStorage --> KVStorage
 61 |     FeedStorage --> Block
 62 | 
 63 |     Block --> ChunkFile
 64 |     Block --> PrimaryIndex
 65 |     Block --> InvertedIndex
 66 |     Block --> VectorIndex
 67 |     
 68 |     Rewriter --> LLMFactory
 69 | 
 70 |     Scheduler --> FeedStorage
 71 |     Scheduler --> NotifyChan
 72 |     Notifier --> NotifyChan
 73 |     Notifier --> LLMFactory
 74 |     Notifier --> EmailChannel
 75 |     Notifier --> KVStorage
 76 |     EmailChannel --> EmailServiceProvider
 77 | 
 78 |     ConfigManager --> HTTPServer
 79 |     ConfigManager --> MCPServer
 80 |     ConfigManager --> API
 81 |     ConfigManager --> ScraperManager
 82 |     ConfigManager --> Rewriter
 83 |     ConfigManager --> FeedStorage
 84 |     ConfigManager --> LLMFactory
 85 |     ConfigManager --> Scheduler
 86 |     ConfigManager --> Notifier
 87 |     
 88 |     LLMFactory --> LLMProviders
 89 |     LLMFactory --> KVStorage
 90 | ```
 91 | 
 92 | ## 技术特点
 93 | 
 94 | *   零外部依赖
 95 | *   Golang 资源占用少于采用 Python 的竞品
 96 | *   采用模块化、面向服务的架构，各组件职责清晰
 97 | *   系统配置集中管理，并支持热重载，实现动态调整
 98 | *   提供灵活的内容重写管道，可自定义处理流程
 99 | *   Feed 数据按时间分块存储，支持高效索引与生命周期管理
100 | *   支持基于向量嵌入的语义搜索能力
101 | *   通过可配置的抓取器和 RSSHub 集成，支持多样化的数据源
102 | *   基于规则的调度引擎，实现灵活的事件监控与查询
103 | *   可定制的通知路由和多渠道通知发送机制
104 | *   实现 MCP (Model Context Protocol) 服务端，便于外部工具集成
105 | *   提供统一的 API 接口层，解耦核心业务与通信协议
106 | *   内置通用键值存储，用于缓存和持久化辅助状态
107 | 
108 | ## 组件说明
109 | 
110 | 1.  **配置管理器 (ConfigManager - `pkg/config.Manager`)**
111 |     *   负责加载、管理和热更新应用的整体配置 (通常存储在 `config.yaml` 中)。其他组件订阅配置变更，以便动态调整其行为。是系统动态性的基础。
112 | 
113 | 2.  **键值存储 (KVStorage - `pkg/storage/kv.Storage`)**
114 |     *   提供一个通用的键值存储服务。用于存储临时状态、缓存（如 LLM 调用、RSSHub 响应）、小型元数据、以及一些组件的运行状态（如 Scraper 的最后抓取时间、Notifier 的通知发送记录）。
115 | 
116 | 3.  **大语言模型工厂 (LLMFactory - `pkg/llm.Factory`)**
117 |     *   管理和提供大语言模型 (LLM) 的实例。它根据配置初始化不同的 LLM 客户端 (如 OpenAI, Gemini, SiliconFlow 等)，并向上层组件 (如 `Rewriter`, `FeedStorage`, `Notifier`) 提供统一的 LLM 调用接口。这些接口用于文本生成、内容摘要、向量嵌入等 AI 处理任务。，可以动态切换或更新 LLM 配置。
118 | 
119 | 4.  **内容重写器 (Rewriter - `pkg/rewrite.Rewriter`)**
120 |     *   根据用户在配置文件中定义的重写规则 (Rewrite Rules)，对原始 Feed 内容进行管道式处理。每个规则可以针对 Feed 的特定标签 (如标题、正文)，通过调用 `LLMFactory` 提供的模型执行操作 (如评分、分类、摘要、过滤、添加新标签等)。处理后的 Feed 用于存储或进一步的逻辑判断。
121 | 
122 | 5.  **Feed 存储 (FeedStorage - `pkg/storage/feed.Storage`)**
123 |     *   负责持久化存储经过 `Rewriter` 处理后的 Feed 数据，并提供高效的查询接口。它管理着 Feed 数据的生命周期和存储结构。
124 |     *   **关键子组件**:
125 |         *   **Block (`pkg/storage/feed/block.Block`)**: `FeedStorage` 将数据按时间组织成多个 `Block`。每个 `Block` 代表一个时间段内的数据 (例如，过去 25 小时)。这种设计有助于数据的管理，如按时间归档、删除过期数据，并能独立处理冷热数据。
126 |         *   **ChunkFile (`pkg/storage/feed/block/chunk.File`)**: 在每个 `Block` 内部，实际的 Feed 内容（经过序列化，包含所有标签和时间戳）存储在 `ChunkFile` 中。这是一种紧凑的存储方式，支持高效的追加和按偏移读取。
127 |         *   **Primary Index (`pkg/storage/feed/block/index/primary.Index`)**: 为每个 `Block` 内的 Feed 提供主键索引。它将全局唯一的 Feed ID 映射到该 Feed 在对应 `ChunkFile` 中的具体位置（如偏移量），实现通过 ID 快速定位 Feed 数据。
128 |         *   **Inverted Index (`pkg/storage/feed/block/index/inverted.Index`)**: 为每个 `Block` 内的 Feed 标签建立倒排索引。它将标签的键值对映射到包含这些标签的 Feed ID 列表，从而能够根据标签条件快速过滤 Feed。
129 |         *   **Vector Index (`pkg/storage/feed/block/index/vector.Index`)**: 为每个 `Block` 内的 Feed（或其内容切片）存储由 `LLMFactory` 生成的向量嵌入。它支持高效的近似最近邻搜索，从而实现基于语义相似度的 Feed 查询。
130 | 
131 | 6.  **API 服务 (API - `pkg/api.API`)**
132 |     *   提供核心的业务逻辑接口层，供上层服务 (如 `HTTPServer`, `MCPServer`) 调用，解耦核心业务逻辑与具体的通信协议。接口功能包括：应用配置的查询与动态应用、RSSHub 相关信息的查询、Feed 数据的写入与多维度查询等。此组件会响应配置变更，并将其传递给其依赖的下游组件。
133 | 
134 | 7.  **HTTP 服务 (HTTPServer - `pkg/api/http.Server`)**
135 |     *   暴露一个 HTTP/JSON API 接口，主要供 Web 前端 (`zenfeed-web`) 或其他HTTP客户端使用。用户通过此接口进行如添加订阅源、配置监控规则、查看 Feed 列表、管理应用配置等操作。它依赖 `API` 组件来执行实际的业务逻辑。
136 | 
137 | 8.  **MCP 服务 (MCPServer - `pkg/api/mcp.Server`)**
138 |     *   实现 Model Context Protocol (MCP) 服务端。这使得 Zenfeed 的数据可以作为上下文源被外部应用或 LLM 集成。
139 | 
140 | 9.  **抓取管理器 (ScraperManager - `pkg/scrape.Manager`)**
141 |     *   负责管理和执行从各种外部数据源 (主要是 RSS Feed，支持通过 RSSHub 扩展源) 抓取内容的任务。它根据配置中定义的来源和抓取间隔，定期或按需从指定的 URL 或 RSSHub 路由抓取最新的 Feed 数据。抓取到的原始数据会提交给 `FeedStorage` 进行后续的重写处理和存储。
142 |     *   **关键子组件**:
143 |         *   **Scraper (`pkg/scrape/scraper.Scraper`)**: 每个配置的数据源会对应一个 `Scraper` 实例，负责该特定源的抓取逻辑和调度。
144 |         *   **Reader (`pkg/scrape/scraper/source.go#reader`)**: `Scraper` 内部使用不同类型的 `reader` (如针对标准 RSS URL 的 reader，针对 RSSHub 路径的 reader) 来实际获取数据。
145 | 
146 | 10. **调度器 (Scheduler - `pkg/schedule.Scheduler`)**
147 |     *   根据用户配置的调度规则 (Scheduls Rules) 定期执行查询任务。这些规则定义了特定的查询条件，如语义关键词 (基于向量搜索)、标签过滤、以及时间范围等。当 `FeedStorage` 中有符合规则条件的 Feed 数据时，调度器会将这些结果 (封装为 `rule.Result`) 通过一个内部 Go Channel (`notifyChan`) 发送给 `Notifier` 组件进行后续处理。
148 |     *   **关键子组件**:
149 |         *   **Rule (`pkg/schedule/rule.Rule`)**: 每个调度配置对应一个 `Rule` 实例，封装了该规则的查询逻辑和执行计划。
150 | 
151 | 11. **通知器 (Notifier - `pkg/notify.Notifier`)**
152 |     *   监听来自 `Scheduler` 的 `notifyChan`。接收到 `rule.Result` 后，它会根据通知路由 (NotifyRoute) 配置对 Feed 进行分组、聚合。为了生成更精炼的通知内容，它可能会再次调用 `LLMFactory` 进行摘要。最终，通过配置的通知渠道 (NotifyChannels) 将处理后的信息发送给指定的接收者 (NotifyReceivers)。其发送状态或去重逻辑可能利用 `KVStorage`。
153 |     *   **关键子组件**:
154 |         *   **Router (`pkg/notify/route.Router`)**: 根据配置的路由规则，将 `rule.Result` 中的 Feed 分配到不同的处理流程或目标接收者。
155 |         *   **Channel (`pkg/notify/channel.Channel`)**: 代表具体的通知发送方式，例如 `EmailChannel` 负责通过 SMTP 发送邮件。
156 | 


--------------------------------------------------------------------------------
/docs/tech/rewrite-zh.md:
--------------------------------------------------------------------------------
  1 | > 适用版本：v0.2.2
  2 | 
  3 | `rewrite` 组件是 zenfeed 中负责对信息流内容进行动态处理和转换的核心模块。它允许用户通过声明式的规则配置，利用大型语言模型 (LLM) 等工具，对内容的元数据（标签）进行修改、丰富、过滤，甚至决定是否丢弃某条信息。
  4 | 
  5 | ## 1. 设计理念与哲学
  6 | 
  7 | *   **Prometheus 的 `relabel_config`**: 借鉴其强大的标签重写能力。在 Prometheus 中，`relabel_config` 允许用户在采集指标前后动态地修改标签集，实现服务发现、指标过滤和路由等高级功能。`rewrite` 组件将此思想应用于信息流处理，将每一条信息（如一篇文章、一个帖子）视为一个标签集，通过规则来操作这些标签。
  8 | *   **管道 (Pipeline) 处理模式**: 信息的处理过程被设计成一个可配置的 ETL 管道。每个规则是管道中的一个处理阶段，信息流经这些规则，逐步被转换和打标。这种模式使得复杂的处理逻辑可以被分解为一系列简单、独立的步骤，易于理解和维护。
  9 | *   **AI 能力的模块化与按需应用**: 大型语言模型 (LLM) 被视为一种强大的"转换函数"。用户可以根据需求，在规则中指定使用哪个 LLM、配合什么样的提示词 (Prompt) 来处理特定的文本内容（例如，从文章正文生成摘要、分类、评分等）。这种设计使得 AI 能力可以灵活地嵌入到信息处理的任意环节。
 10 | *   **内容即标签 (Content as Labels)**: 这是 zenfeed 的一个核心抽象。原始信息（如标题、正文、链接、发布时间）和经过 AI 或规则处理后产生的衍生信息（如类别、标签、评分、摘要）都被统一表示为键值对形式的"标签"。这种统一表示简化了后续的查询、过滤、路由和展示逻辑。
 11 | *   **声明式配置优于命令式代码**: 用户通过 YAML 配置文件定义重写规则，而不是编写代码来实现处理逻辑。这降低了使用门槛，使得非程序员也能方便地定制自己的信息处理流程，同时也使得配置更易于管理和版本控制。
 12 | 
 13 | > 简单说这是一条专门针对 Feed 处理的可配置工作流
 14 | 
 15 | ## 2. 业务流程
 16 | 
 17 | 内容重写组件的核心工作流程是接收一个代表信息单元的标签集 (`model.Labels`)，然后按顺序应用预定义的重写规则 (`Rule`)，最终输出一个经过修改的标签集，或者指示该信息单元应被丢弃。
 18 | 
 19 | 其处理流程可以概括为：
 20 | 
 21 | 1.  **接收标签集**: 组件的入口是一个 `model.Labels` 对象，代表待处理的信息单元。
 22 | 2.  **顺序应用规则**: 系统会遍历用户配置的每一条 `Rule`。
 23 | 3.  **规则评估与执行**: 对于每一条规则，系统会：
 24 |     *   **定位源文本**: 根据规则指定的 `source_label` (默认为 `content`)，找到相应的文本内容。
 25 |     *   **条件检查**: 检查源文本是否满足规则中声明的 `skip_too_short_threshold`（最小长度，默认为300字符）。若不满足，则跳过当前规则。
 26 |     *   **文本转换 (可选)**: 若规则声明了 `transform` (如通过 `to_text` 使用 LLM 和特定 `Prompt` 进行处理)，则源文本会被转换为新文本。此转换结果将用于后续的匹配。
 27 |     *   **模式匹配**: 使用规则中声明的 `match` 正则表达式 (默认为 `.*`) 来匹配（可能已被转换过的）文本。若不匹配，则跳过当前规则。
 28 |     *   **执行动作**: 若文本匹配成功，则执行规则声明的 `Action`：
 29 |         *   `ActionDropFeed`: 指示应丢弃当前信息单元，处理流程终止。
 30 |         *   `ActionCreateOrUpdateLabel`: 使用（可能已被转换过的）匹配文本，为规则中指定的 `Label` 创建或更新标签值。
 31 | 4.  **输出结果**:
 32 |     *   若所有规则处理完毕且未触发 `ActionDropFeed`，则返回最终修改并排序后的 `model.Labels`。
 33 |     *   若任一规则触发 `ActionDropFeed`，则返回 `nil`，表示丢弃。
 34 |     *   处理过程中若发生错误（如 LLM 调用失败），则会中止并返回错误。
 35 | 
 36 | 
 37 | ## 3. 使用示例
 38 | 
 39 | 以下是一些如何使用 `rewrite` 规则的场景示例：
 40 | 
 41 | ### 示例 1: 内容分类打标
 42 | 
 43 | *   **目标**: 根据文章内容，自动为其添加一个 `category` 标签，如 "Technology", "Finance" 等。
 44 | *   **规则配置 (概念性)**:
 45 |     ```yaml
 46 |     - source_label: "content" # 使用文章正文作为分析源
 47 |       transform:
 48 |         to_text:
 49 |           llm: "qwen-default" # 使用名为 "qwen-default" 的 LLM 配置
 50 |           prompt: "category"  # 使用预设的 "category" prompt 模板
 51 |       match: ".+"             # 匹配 LLM 返回的任何非空分类结果
 52 |       action: "create_or_update_label"
 53 |       label: "category"       # 新标签的键为 "category"
 54 |     ```
 55 | *   **效果**: 如果一篇文章内容是关于人工智能的，LLM 可能会返回 "Technology"。经过此规则处理后，文章的标签集会增加或更新一个标签，例如 `{"category", "Technology"}`。
 56 | 
 57 | ### 示例 2: 基于 LLM 评分过滤低质量内容
 58 | 
 59 | *   **目标**: 让 LLM 对文章内容进行评分 (0-10)，如果评分低于 4，则丢弃该文章。
 60 | *   **规则配置 (包含两条规则)**:
 61 | 
 62 |     *   **规则 2.1: 内容评分**
 63 |         ```yaml
 64 |         - source_label: "content"
 65 |           transform:
 66 |             to_text:
 67 |               llm: "qwen-default"
 68 |               prompt: "score" # 使用预设的 "score" prompt 模板
 69 |           match: "^([0-9]|10)$" # 确保 LLM 返回的是 0-10 的数字
 70 |           action: "create_or_update_label"
 71 |           label: "ai_score"  # 将评分结果存入 "ai_score" 标签
 72 |         ```
 73 |     *   **规则 2.2: 根据评分过滤**
 74 |         ```yaml
 75 |         - source_label: "ai_score" # 使用上一条规则生成的评分作为判断依据
 76 |           # 无需 Transform
 77 |           match: "^[0-3]$"       # 匹配 0, 1, 2, 3 分
 78 |           action: "drop_feed"     # 丢弃这些低分文章
 79 |         ```
 80 | *   **效果**: 文章首先会被 LLM 评分并打上 `ai_score` 标签。如果该评分值在 0 到 3 之间，第二条规则会将其丢弃。
 81 | 
 82 | ### 示例 3: 基于特定标签值添加新标签
 83 | 
 84 | *   **目标**: 如果文章的 `source` 标签值是 "Hacker News"，则添加一个新标签 `source_type: "community"`。
 85 |     *   **注意**: 当前 `ActionCreateOrUpdateLabel` 会将匹配成功的 `text` （即 `source_label` 的值或其转换结果）作为新标签的值。若要实现固定值标签，需要通过 LLM 转换。
 86 | *   **规则配置 (通过 LLM 实现映射)**:
 87 |     ```yaml
 88 |     - source_label: "source" # 源标签是 "source"
 89 |       transform:
 90 |         to_text:
 91 |           llm: "qwen-mini"
 92 |           # Prompt 需要精心设计，告诉 LLM 如何根据输入映射到输出
 93 |           # 例如，Prompt 可以包含类似 "If input is 'Hacker News', output 'community'. If input is 'GitHub Trending', output 'code'." 的逻辑
 94 |           prompt: |
 95 |             Analyze the input, which is a news source name.
 96 |             If the source is "Hacker News", output "community".
 97 |             If the source is "GitHub Trending", output "code".
 98 |             If the source is "V2EX", output "community".
 99 |             Otherwise, output "unknown".
100 |             Return ONLY the type, no other text.
101 |       match: "^(community|code|unknown)$" # 确保 LLM 输出的是预期的类型
102 |       action: "create_or_update_label"
103 |       label: "source_type" # 新标签的键
104 |     ```
105 | *   **效果**: 如果某文章的 `source` 标签值为 "Hacker News"，经过 LLM 处理后（理想情况下）会输出 "community"，然后 `source_type` 标签会被设置为 `{"source_type", "community"}`。
106 | 
107 | 这些示例展示了 `rewrite` 组件的灵活性和强大功能，通过组合不同的源标签、转换、匹配条件和动作，可以实现复杂的内容处理和信息增强逻辑。
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/docs/tech/testing-zh.md:
--------------------------------------------------------------------------------
  1 | # Zenfeed 最新测试策略与风格
  2 | > 适用版本：v0.2.2
  3 | 
  4 | ## 1. 引言
  5 | 
  6 | Zenfeed 的测试策略核心目标是：
  7 | 
  8 | *   **清晰性 (Clarity)**：测试本身应如文档般易于理解，清晰地表达被测功能的行为和预期。
  9 | *   **可信性 (Reliability)**：测试结果应准确反映代码的健康状况，确保每次提交的信心。
 10 | *   **可维护性 (Maintainability)**：测试代码应易于修改和扩展，以适应项目的持续演进。
 11 | 
 12 | 本指南旨在详细介绍 Zenfeed 项目所遵循的测试理念、风格和具体实践。
 13 | 
 14 | ## 2. 核心测试理念与风格
 15 | 
 16 | Zenfeed 的测试方法论深受行为驱动开发 (BDD) 的影响，并结合了表驱动测试等高效实践。
 17 | 
 18 | ### 2.1 行为驱动开发
 19 | 
 20 | 我们选择 BDD 作为核心的测试描述框架，主要基于以下原因（其理念也体现在 `pkg/test/test.go` 的 `Case` 结构设计中）：
 21 | 
 22 | *   **提升可读性 (Enhanced Readability)**：BDD 强调使用自然语言描述软件的行为。每个测试用例读起来都像一个用户故事或一个功能说明，这使得测试本身就成为了一种精确的"活文档"。
 23 | *   **关注行为 (Focus on Behavior)**：测试不再仅仅是验证代码片段的输入输出，而是从模块、组件或用户交互的层面描述其应有的行为。这有助于确保我们构建的功能符合预期。
 24 | *   **需求驱动 (Requirement-Driven)**：测试直接对应需求描述，而非实现细节。这种自顶向下的方法确保了测试的稳定性，即使内部实现重构，只要行为不变，测试依然有效。
 25 | 
 26 | BDD 通常使用 `Scenario`, `Given`, `When`, `Then` 的结构来组织测试：
 27 | 
 28 | *   **`Scenario` (场景)**：描述测试用例所针对的特性或功能点。
 29 |     *   例如：`"Query hot block with label filters"` (查询带标签过滤的热数据块)
 30 | *   **`Given` (给定)**：描述场景开始前的初始上下文或状态（**注意：这不是指方法的输入参数**）。
 31 |     *   例如：`"a hot block with indexed feeds"` (一个已索引了 Feed 的热数据块)
 32 | *   **`When` (当)**：描述触发场景的事件或操作（**这部分通常包含被测方法的输入参数**）。
 33 |     *   例如：`"querying with label filters"` (当使用标签过滤器进行查询时)
 34 | *   **`Then` (那么)**：描述场景结束后预期的结果或状态变化。
 35 |     *   例如：`"should return matching feeds"` (那么应该返回匹配的 Feed)
 36 | 
 37 | 为了更好地在代码中实践 BDD，我们定义了 `pkg/test/test.go` 中的 `Case[GivenDetail, WhenDetail, ThenExpected]` 泛型结构。其中：
 38 | 
 39 | *   `GivenDetail`: 存储 `Given` 子句描述的初始状态的具体数据。
 40 | *   `WhenDetail`: 存储 `When` 子句描述的事件或方法调用的具体参数。
 41 | *   `ThenExpected`: 存储 `Then` 子句描述的预期结果。
 42 | 
 43 | 这种结构化不仅增强了测试数据的类型安全，也使得测试用例的意图更加明确。对于需要模拟依赖项的组件，`GivenDetail` 通常会包含用于配置这些模拟行为的 `component.MockOption`，我们将在后续 Mocking 章节详细讨论。
 44 | 
 45 | ### 2.2 表驱动测试
 46 | 
 47 | 当一个功能或方法需要针对多种不同的输入组合、边界条件或状态进行测试时，表驱动测试是一种非常高效和整洁的组织方式。
 48 | 
 49 | *   **简洁性 (Conciseness)**：将所有测试用例的数据（输入、参数、预期输出）集中定义在一个表格（通常是切片）中，避免了为每个 case编写大量重复的测试逻辑。
 50 | *   **易扩展性 (Extensibility)**：添加新的测试场景变得非常简单，只需在表格中增加一条新记录即可。
 51 | *   **清晰性 (Clarity)**：所有相关的测试用例一目了然，便于快速理解被测功能的覆盖范围。
 52 | 
 53 | **实践约定**：
 54 | 在 Zenfeed 中，**当存在多个测试用例时，必须使用表驱动测试**。
 55 | 
 56 | ### 2.3 测试结构约定
 57 | 
 58 | 为了保持项目范围内测试代码的一致性和可读性，我们约定在测试文件中遵循以下组织结构：
 59 | 
 60 | 1.  **定义辅助类型 (Define Helper Types)**：在测试函数的开头部分，通常会为 `GivenDetail`, `WhenDetail`, `ThenExpected` 定义具体的结构体类型，以增强类型安全和表达力。
 61 | 2.  **定义测试用例表 (Define Test Case Table)**：将所有测试用例集中定义在一个 `[]test.Case` 类型的切片中。
 62 | 3.  **循环执行测试 (Loop Through Test Cases)**：使用 `for` 循环遍历测试用例表，并为每个用例运行 `t.Run(tt.Scenario, func(t *testing.T) { ... })`。
 63 | 4.  **清晰的 G/W/T 逻辑块 (Clear G/W/T Blocks)**：在每个 `t.Run` 的匿名函数内部，根据需要组织代码块，以对应 `Given`（准备初始状态，通常基于 `tt.GivenDetail`），`When`（执行被测操作，通常使用 `tt.WhenDetail`），和 `Then`（断言结果，通常对比 `tt.ThenExpected`）。
 64 | 5.  **描述性变量名 (Descriptive Variable Names)**：使用与 BDD 术语（如 `given`, `when`, `then`, `expected`, `actual`）相匹配或能清晰表达意图的变量名。
 65 | 
 66 | ## 3. 依赖隔离：Mocking (Dependency Isolation: Mocking)
 67 | 
 68 | 单元测试的核心原则之一是**隔离性 (Isolation)**，即被测试的代码单元（如一个函数或一个方法）应该与其依赖项隔离开来。Mocking (模拟) 是实现这种隔离的关键技术。
 69 | 
 70 | 我们主要使用 `github.com/stretchr/testify/mock` 库来实现 Mocking。特别是对于实现了 `pkg/component/component.go` 中 `Component` 接口的组件，我们提供了一种标准的 Mocking 方式。
 71 | 
 72 | 
 73 | ```go
 74 | type givenDetail struct {
 75 |     // Example of another initial state field for the component being tested
 76 |     initialProcessingPrefix string
 77 |     // MockOption to set up the behavior of dependencyA
 78 |     dependencyAMockSetup component.MockOption
 79 |     // ...
 80 | }
 81 | 
 82 | type whenDetail struct {
 83 |     processDataInput string
 84 |     // ...
 85 | }
 86 | 
 87 | type thenExpected struct {
 88 |     expectedOutput string
 89 |     expectedError error
 90 |     // ...
 91 | }
 92 | 
 93 | tests := []test.Case[givenDetail, whenDetail, thenExpected]{
 94 |     {
 95 |         Scenario: "Component processes data successfully with mocked dependency",
 96 |         Given:    "YourComponent with an initial prefix and dependencyA mocked to return 'related_data_value' for 'input_key'",
 97 |         When:     "ProcessData is called with 'input_key'",
 98 |         Then:     "Should return 'prefix:input_key:related_data_value' and no error",
 99 |         GivenDetail: givenDetail{
100 |             initialProcessingPrefix: "prefix1",
101 |             dependencyAMockSetup: func(m *mock.Mock) {
102 |                 // We expect DependencyA's FetchRelatedData to be called with "input_key"
103 |                 // and it should return "related_data_value" and no error.
104 |                 m.On("FetchRelatedData", "input_key").
105 |                     Return("related_data_value", nil).
106 |                     Once() // Expect it to be called exactly once.
107 |             },
108 |         },
109 |         WhenDetail: whenDetail{
110 |             processDataInput: "input_key",
111 |         },
112 |         ThenExpected: thenExpected{
113 |             expectedOutput: "prefix1:input_key:related_data_value",
114 |             expectedError:  nil,
115 |         },
116 |     },
117 |     // ...更多测试用例...
118 | }
119 | 
120 | 
121 | // 在 for _, tt := range tests { t.Run(tt.Scenario, func(t *testing.T) { ... }) } 循环内部
122 | 
123 | // Given 阶段: Setup mocks and the component under test
124 | var mockHelperForDepA *mock.Mock
125 | defer func() { // 确保在每个子测试结束时断言
126 |     if mockHelperForDepA != nil {
127 |         mockHelperForDepA.AssertExpectations(t)
128 |     }
129 | }()
130 | 
131 | // 创建并配置 mockDependencyA
132 | // dependency_a_pkg.NewFactory 应该是一个返回 DependencyA 接口和 error 的工厂函数
133 | // 它接受 component.MockOption 来配置其内部的 mock.Mock 对象
134 | mockDependencyA, err := dependency_a_pkg.NewFactory(
135 |     component.MockOption(func(m *mock.Mock) {
136 |         mockHelperForDepA = m // 保存 mock.Mock 实例以供 AssertExpectations 使用
137 |         if tt.GivenDetail.dependencyAMockSetup != nil {
138 |             // 应用测试用例中定义的 specific mock setup
139 |             tt.GivenDetail.dependencyAMockSetup(m)
140 |         }
141 |     }),
142 | ).New("mocked_dep_a_instance", nil /* config for dep A */, dependency_a_pkg.Dependencies{})
143 | Expect(err).NotTo(HaveOccurred())
144 | Expect(mockDependencyA).NotTo(BeNil())
145 | 
146 | // 假设 YourComponent 的构造函数如下：
147 | componentUnderTest := NewYourComponent(tt.GivenDetail.initialProcessingPrefix, mockDependencyA)
148 | 
149 | // When 阶段: Execute the action being tested
150 | actualOutput, actualErr := componentUnderTest.ProcessData(context.Background(), tt.WhenDetail.processDataInput)
151 | 
152 | // Then 阶段: Assert the outcomes
153 | if tt.ThenExpected.expectedError != nil {
154 |     Expect(actualErr).To(HaveOccurred())
155 |     Expect(actualErr.Error()).To(Equal(tt.ThenExpected.expectedError.Error()))
156 | } else {
157 |     Expect(actualErr).NotTo(HaveOccurred())
158 | }
159 | Expect(actualOutput).To(Equal(tt.ThenExpected.expectedOutput))
160 | ```


--------------------------------------------------------------------------------
/docs/tech/vector-zh.md:
--------------------------------------------------------------------------------
  1 | > 适用版本：v0.2.2
  2 | 
  3 | ## 1. 引言
  4 | 
  5 | `vector.Index` 组件是 Zenfeed 系统中负责实现内容语义相似度检索的核心模块，与 `block.Block` 一一对应。它的主要目标是根据用户提供的查询向量，快速找到与之在语义上最相关的 Feed（通常是新闻资讯、文章等文本内容）。
  6 | 
  7 | 该索引的核心设计理念是服务于**文档级别的召回 (Document-level Recall)**。与许多传统向量索引将每个文本块（chunk）视为独立节点不同，`vector.Index` 将**整个 Feed 文档作为图中的一个节点**。而 Feed 内容经过 `embedding_spliter` 切分后产生的多个文本块（chunks），它们各自的向量嵌入（embeddings）则作为该 Feed 节点的属性。
  8 | 
  9 | 这种设计的独特性在于：
 10 | 
 11 | *   **搜索结果直接是 Feed ID**：用户搜索后直接获得相关 Feed 的标识符，而不是零散的文本片段。
 12 | *   **相似度聚焦于“任何部分相关即相关”**：如果一个 Feed 的任何一个 chunk 与查询向量高度相似，整个 Feed 就被认为是相关的。其最终得分为该 Feed 所有 chunks 与查询向量相似度中的最大值。
 13 | *   **为新闻资讯场景优化**：这种设计特别适合新闻资讯类应用，优先保证相关内容的召回率，确保用户不会错过重要信息，即使该信息仅是文章的一部分。
 14 | 
 15 | `vector.Index` 底层采用 HNSW (Hierarchical Navigable Small World) 算法来组织和搜索这些 Feed 节点，以实现高效的近似最近邻查找。
 16 | 
 17 | ## 2. 核心概念
 18 | 
 19 | 理解 `vector.Index` 的运作方式，需要熟悉以下核心概念：
 20 | 
 21 | *   **Feed (Node)**:
 22 |     *   在 `vector.Index` 的 HNSW 图中，每个**节点 (node)** 代表一个独立的 **Feed 文档** (例如一篇新闻报道)。
 23 |     *   每个 Feed 通过一个唯一的 `uint64` ID 来标识。
 24 |     *   节点存储了其对应的原始 Feed ID 以及与该 Feed 相关的多个向量。
 25 | 
 26 | *   **Chunk (Vector Represented by `[][]float32`)**:
 27 |     *   一个 Feed 的内容（尤其是其文本标签，如标题、正文）可能较长。如果直接将整个长文本生成单一的 embedding，可能会遇到以下问题：
 28 |         *   **LLM 输入长度限制**: 许多 embedding 模型对输入文本的长度有限制。
 29 |         *   **语义稀释 (Semantic Dilution)**: 对于包含多个主题或信息点的长文本，单一向量可能难以精确捕捉所有细微的语义，导致关键信息在整体平均化的向量表示中被“稀释”，降低了特定语义片段的表征能力。例如，一篇包含多个不同事件的综合报道，其单一向量可能无法很好地代表其中任何一个特定事件。
 30 |     *   通过 `embeddingSpliter`，一个 Feed 的文本内容可以被切分成一个或多个语义相对连贯的 **文本块 (Chunks)**。这种切分有助于每个 chunk 聚焦于更具体的主题或信息点。
 31 |     *   每个 Chunk 会被送入 LLM 生成一个 **向量嵌入 (vector embedding)**。
 32 |     *   因此，一个 Feed 节点在索引中会关联**一组向量 (vectors `[][]float32`)**，每个子向量代表其一个 Chunk 的语义。
 33 | 
 34 | *   **Embedding**:
 35 |     *   Embedding 是一个由浮点数组成的向量，由大语言模型 (LLM) 生成。它能够捕捉文本片段的语义信息，使得语义上相似的文本在向量空间中距离更近。
 36 |     *   `vector.Index` 存储和比较的就是这些 embeddings。
 37 | 
 38 | *   **HNSW (Hierarchical Navigable Small World)**:
 39 |     *   `vector.Index` 使用 HNSW 作为其底层的近似最近邻 (ANN) 搜索算法。
 40 |     *   HNSW 通过构建一个多层的图结构来实现高效搜索。上层图更稀疏，用于快速导航；下层图更密集，用于精确查找。
 41 |     *   这种结构使得索引在插入新节点和执行搜索时都能保持较好的性能。
 42 | 
 43 | *   **相似度计算 (Similarity Score)**:
 44 |     *   **Feed 间相似度 (Inter-Feed Similarity)**:
 45 |         *   当评估 HNSW 图中两个 Feed 节点（例如，`nodeA` 和 `nodeB`）之间的相似度时，策略是计算 `nodeA` 的所有 Chunk 向量与 `nodeB` 的所有 Chunk 向量之间的两两余弦相似度。
 46 |         *   最终，这两个 Feed 节点间的相似度取所有这些两两 Chunk 相似度中的**最大值 (Maximal Local Similarity)**。
 47 |         *   **选择此策略的原因**: 对于新闻资讯，只要两篇报道中存在任何一对高度相关的片段（例如，都报道了同一核心事件或引用了同一关键信息），就认为这两篇报道具有强关联性。这有助于最大化召回率，确保用户能发现所有可能相关的资讯，即使它们整体侧重点不同。
 48 |         *   **潜在影响**: 这种策略对局部强相关非常敏感，但也可能因为次要内容的偶然相似而将整体主题差异较大的 Feed 判定为相关，需要在上层应用或通过重排序模型来进一步优化精度。
 49 |     *   **查询与 Feed 相似度 (Query-Feed Similarity)**:
 50 |         *   当用户使用一个查询向量 `q` 进行搜索时，计算 `q` 与目标 Feed 的每一个 Chunk 向量的余弦相似度。
 51 |         *   该 Feed 最终与查询 `q` 的相似度分数，同样取这些计算结果中的**最大值**。
 52 |         *   这样做是为了确保只要 Feed 的任何一部分内容与用户查询高度匹配，该 Feed 就会被召回。
 53 | 
 54 | ## 3. 主要接口
 55 | 
 56 | `vector.Index` 提供了一组清晰的接口，用于管理和查询基于 Feed 内容语义的向量索引。
 57 | 
 58 | *   **`Add(ctx context.Context, id uint64, vectors [][]float32) error`**
 59 |     *   **业务目标**: 将一个新的 Feed 文档及其所有内容块（Chunks）的向量表示添加到索引中，使其能够被后续的相似度搜索发现。
 60 |     *   **核心流程**:
 61 |         1.  **接收 Feed 数据**: 接收 Feed 的唯一 `id` 和代表其所有 Chunks 的 `vectors` 列表。
 62 |         2.  **确定插入策略**: 根据 HNSW 算法的层级构建原则，为该 Feed 节点随机确定一个在多层图结构中的最高插入层级。
 63 |         3.  **查找邻近节点**: 从选定的最高层级开始逐层向下，在每一层利用该层的图结构（和 `EfConstruct` 参数指导下的搜索范围）为新 Feed 节点找到一组最相似的已有 Feed 节点（邻居）。此处的“相似”基于我们定义的“最大局部相似性”——即比较两个 Feed 所有 Chunk 向量对，取其中相似度最高的一对作为这两个 Feed 的相似度。
 64 |         4.  **建立连接**: 如果新 Feed 节点被分配到当前层级，则将其与找到的邻居建立双向连接（朋友关系），并更新其在该层级的友邻列表。
 65 |         5.  **维护图结构**: 在添加连接后，可能会触发友邻剪枝逻辑，以确保每个节点的友邻数量符合配置（`M` 或 `2*M`），并尝试维护图的良好连接性，避免产生孤立节点或过度密集的区域。
 66 | 
 67 | *   **`Search(ctx context.Context, q []float32, threshold float32, limit int) (map[uint64]float32, error)`**
 68 |     *   **业务目标**: 根据用户提供的查询向量 `q`，从索引中高效地检索出语义上最相似的 Feed 列表，并返回它们的 ID 及相似度得分。
 69 |     *   **核心流程**:
 70 |         1.  **接收查询**: 接收查询向量 `q`、相似度阈值 `threshold` 和期望返回的最大结果数 `limit`。
 71 |         2.  **导航至目标区域**: 从 HNSW 图的顶层开始，利用稀疏的高层图结构快速定位到与查询向量 `q` 大致相关的区域，逐层向下，每层都找到与 `q` 更近的节点作为下一层的入口。
 72 |         3.  **在底层精确搜索**: 到达最底层的图（第 0 层，包含所有 Feed 节点）后，以上一步得到的入口点为起点，进行一次更细致的扩展搜索（受 `EfSearch` 参数指导的搜索范围）。此搜索旨在找到与查询向量 `q` 的“最大局部相似性”（即 `q` 与 Feed 的所有 Chunk 向量相似度中的最大值）满足 `threshold` 且排名前 `limit` 的 Feed。
 73 |         4.  **返回结果**: 将符合条件的 Feed ID 及其对应的最高相似度分数打包返回。
 74 | 
 75 | *   **`EncodeTo(ctx context.Context, w io.Writer) error` / `DecodeFrom(ctx context.Context, r io.Reader) error`**
 76 |     *   **业务目标**: 提供索引的持久化能力，允许将内存中的索引状态完整地保存到外部存储（如文件），并在需要时恢复。
 77 |     *   **核心流程 (`EncodeTo`)**:
 78 |         1.  **写入元数据**: 保存索引的配置参数（如 `M`, `Ml`, `EfConstruct`, `EfSearch`）和版本信息。
 79 |         2.  **写入节点数据**: 遍历所有 Feed 节点，依次保存每个节点的 ID、其所有 Chunk 向量（经过量化处理以压缩体积）、以及它在 HNSW 各层级上的友邻关系（友邻 ID 和相似度）。
 80 |         3.  **写入层级结构**: 保存每个层级所包含的节点 ID 列表。
 81 |     *   **核心流程 (`DecodeFrom`)**:
 82 |         1.  **读取元数据**: 恢复索引配置。
 83 |         2.  **重建节点数据**: 读取并重建所有 Feed 节点，包括其 ID、反量化后的 Chunk 向量、以及友邻关系。
 84 |         3.  **重建层级结构**: 恢复 HNSW 的多层图。
 85 | 
 86 | ## 4. 内部实现细节补充
 87 | 
 88 | ### 4.1 核心数据表示
 89 | 
 90 | *   **Feed 节点 (`node`)**: 每个 Feed 在内存中表示为一个 `node` 对象，它不仅存储了 Feed 的 ID 和其所有 Chunk 的向量 (`vectors [][]float32`)，还关键地维护了它在 HNSW 图各个层级上的“友邻列表” (`friendsOnLayers`)。这个友邻列表是图连接性的基础。
 91 | *   **分层图 (`layers`)**: 索引内部维护一个 `layers` 列表，代表 HNSW 的多层结构。高层图节点更少、连接更稀疏，用于快速跳转；底层图（尤其是第0层）节点最多、连接最密集，用于精确搜索。
 92 | *   **全局节点池 (`m`)**: 一个从 Feed ID 到 `node` 对象的映射，方便快速访问任何已索引的 Feed。
 93 | 
 94 | ### 4.2 索引构建的关键机制
 95 | 
 96 | *   **概率性分层 (`randomInsertLevel`)**: 新加入的 Feed 节点会被随机分配到一个最高层级。这种概率机制（受 `Ml` 参数影响）形成了 HNSW 的金字塔式层级结构。
 97 | *   **动态邻居选择 (`insertAndLinkAtLevel` 中的搜索逻辑)**: 当一个新 Feed 节点加入某一层时，它会基于“最大局部相似性”在该层搜索一定数量（受 `EfConstruct` 影响）的最近邻居。
 98 | *   **连接维护与剪枝 (`makeFriend`, `tryRemoveFriend`)**: 与邻居建立双向连接后，为保证图的性能和结构（避免节点拥有过多邻居），会有一套剪枝逻辑。这套逻辑不仅考虑移除相似度最低的连接，有时还会考虑被移除连接的另一端节点的连接状况，试图避免制造“孤岛”节点，甚至在必要时（通过 `tryRemakeFriend`）为连接数过少的节点尝试从“邻居的邻居”中寻找新的连接机会。
 99 | 
100 | ### 4.3 存储效率：向量量化
101 | 
102 | *   为了显著减少索引在持久化存储时占用的空间，`float32` 类型的向量在写入磁盘前会通过 `vectorutil.Quantize` 被转换为 `int8` 类型，并记录下转换所需的最小值和缩放比例。读取时再通过 `vectorutil.Dequantize` 进行有损恢复。这是在存储成本和表示精度之间的一种实用权衡。
103 | 


--------------------------------------------------------------------------------
/docs/upgrade-from-v0.1.0.md:
--------------------------------------------------------------------------------
 1 | ## 0. Check your current version
 2 | ```bash
 3 | # Mac/Linux
 4 | docker inspect glidea/zenfeed:latest | grep version
 5 | 
 6 | # Windows PowerShell
 7 | docker inspect glidea/zenfeed:latest | Select-String -Pattern 'version'
 8 | ```
 9 | 
10 | If you **don't see any results**, it means you're using version v0.1.0. This is because the first version didn't include version information. Therefore, **this document applies to you.**
11 | 
12 | ## 1. Move your data to the correct volume path
13 | ```bash
14 | docker-compose -p zenfeed exec zenfeed cp -a /data/. /app/data/
15 | ```
16 | 
17 | ## 2. Backup your config
18 | Access: http://localhost:1400
19 | ![](images/upgrade-from-v0.1.0-backup.png)
20 | 
21 | ## 3. Upgrade
22 | See [upgrade](./upgrade.md)
23 | 
24 | ## 4. Resave your config
25 | Access: http://localhost:1400
26 | Resave your config.
27 | 
28 | These tedious steps are due to the oversight in the deployment form of the first version, and I apologize for that. Subsequent versions will not require these extra steps.
29 | 


--------------------------------------------------------------------------------
/docs/upgrade.md:
--------------------------------------------------------------------------------
 1 | **NOTE:** If you are upgrading from v0.1.0, which is the first version, please refer to [upgrade-from-v0.1.0.md](./upgrade-from-v0.1.0.md)
 2 | 
 3 | ```bash
 4 | # Ensure compose yml up to date.
 5 | ## Mac/Linux
 6 | curl -L -O https://raw.githubusercontent.com/glidea/zenfeed/main/docker-compose.yml
 7 | ## Windows PowerShell
 8 | Invoke-WebRequest -Uri "https://raw.githubusercontent.com/glidea/zenfeed/main/docker-compose.yml" -OutFile ([System.IO.Path]::GetFileName("https://raw.githubusercontent.com/glidea/zenfeed/main/docker-compose.yml"))
 9 | 
10 | 
11 | # Ensure images up to date.
12 | docker-compose -p zenfeed pull
13 | 
14 | 
15 | # Upgrading without reconfiguring, etc APIKey.
16 | docker-compose -p zenfeed up -d
17 | ```
18 | 
19 | Then all the feed data and configurations should be intact.
20 | 


--------------------------------------------------------------------------------
/docs/webhook-zh.md:
--------------------------------------------------------------------------------
  1 | # Zenfeed Webhook 通知对接指南
  2 | 
  3 | Zenfeed 支持通过 Webhook 将分组和总结后的 Feed 通知推送到您指定的 HTTP(S) 端点。这允许您将 Zenfeed 的通知集成到自定义的应用或工作流程中。
  4 | 
  5 | ## 1. 配置方法
  6 | 
  7 | 要在 Zenfeed 中配置 Webhook 通知，您需要在配置文件的 `notify.receivers` 部分定义一个或多个接收者，并为每个 Webhook 接收者指定其唯一的 `name` 和 `webhook` 配置块。
  8 | 
  9 | **示例配置 (`config.yaml`):**
 10 | 
 11 | ```yaml
 12 | notify:
 13 |   # ... 其他通知配置 ...
 14 | 
 15 |   receivers:
 16 |     - name: my_awesome_webhook # 接收者的唯一名称，将在路由规则中引用
 17 |       webhook:
 18 |         url: "https://your-service.com/webhook-endpoint" # 您的 Webhook 接收端点 URL
 19 | 
 20 |   # 示例：路由规则中如何使用此接收者
 21 |   route: # or sub_routes..
 22 |     receivers:
 23 |       - my_awesome_webhook # 引用上面定义的接收者名称
 24 |     # ... 其他路由配置 ...
 25 | ```
 26 | 
 27 | 在上述示例中：
 28 | - 我们定义了一个名为 `my_awesome_webhook` 的接收者。
 29 | - `webhook.url` 字段指定了当有匹配此接收者的通知时，Zenfeed 将向哪个 URL 发送 POST 请求。
 30 | 
 31 | ## 2. 数据格式详解
 32 | 
 33 | 当 Zenfeed 向您的 Webhook 端点发送通知时，它会发送一个 `POST` 请求，请求体为 JSON 格式。
 34 | 
 35 | 请求体结构如下：
 36 | 
 37 | ```json
 38 | {
 39 |   "group": "string",
 40 |   "labels": {
 41 |     "label_key1": "label_value1",
 42 |     "label_key2": "label_value2"
 43 |   },
 44 |   "summary": "string",
 45 |   "feeds": [
 46 |     {
 47 |       "labels": {
 48 |         "title": "Feed Title 1",
 49 |         "link": "http://example.com/feed1",
 50 |         "content": "Feed content snippet 1...",
 51 |         "source": "example_source",
 52 |         "pub_time": "2024-07-30T10:00:00Z"
 53 |         // ... 其他自定义或标准标签
 54 |       },
 55 |       "time": "2024-07-30T10:00:00Z",
 56 |       "related": [
 57 |         // 可选：与此 Feed 相关的其他 Feed 对象，结构同父 Feed
 58 |       ]
 59 |     }
 60 |     // ...更多 Feed 对象
 61 |   ]
 62 | }
 63 | ```
 64 | 
 65 | **字段说明:**
 66 | 
 67 | -   `group` (`string`):
 68 |     当前通知所属的组名。这个名称是根据通知路由配置中 `group_by` 定义的标签值组合而成的。例如，如果 `group_by: ["source", "category"]`，且一个 Feed 组的 `source` 是 `github_trending`，`category` 是 `golang`，那么 `group` 可能类似于 `"github_trending/golang"`。
 69 | 
 70 | -   `labels` (`object`):
 71 |     一个键值对对象，表示当前通知组的标签。这些标签是根据通知路由配置中 `group_by` 所指定的标签及其对应的值。
 72 |     例如，如果 `group_by: ["source"]` 且当前组的 `source` 标签值为 `rsshub`，则 `labels` 会是 `{"source": "rsshub"}`。
 73 | 
 74 | -   `summary` (`string`):
 75 |     由大语言模型 (LLM) 为当前这一组 Feed 生成的摘要文本。如果通知路由中没有配置 LLM 总结，此字段可能为空字符串或省略 (取决于具体的实现细节，但通常会是空字符串)。
 76 | 
 77 | -   `feeds` (`array` of `object`):
 78 |     一个数组，包含了属于当前通知组的所有 Feed 对象。每个 Feed 对象包含以下字段：
 79 |     *   `labels` (`object`): Feed 的元数据。这是一个键值对对象，包含了该 Feed 的所有标签，例如：
 80 |         *   `title` (`string`): Feed 的标题。
 81 |         *   `link` (`string`): Feed 的原始链接。
 82 |         *   `content` (`string`): Feed 的内容摘要或全文 (取决于抓取和重写规则)。
 83 |         *   `source` (`string`): Feed 的来源标识。
 84 |         *   `pub_time` (`string`): Feed 的发布时间 (RFC3339 格式的字符串，例如 `2025-01-01T00:00:00Z`)。
 85 |         *   ...以及其他在抓取或重写过程中添加的自定义标签。
 86 |     *   `time` (`string`): Feed 的时间戳，通常是其发布时间，采用 RFC3339 格式 (例如 `2025-01-01T00:00:00Z`)。此字段与 `labels.pub_time` 通常一致，但 `time` 是系统内部用于时间序列处理的主要时间字段。
 87 |     *   `related` (`array` of `object`, 可选):
 88 |         一个数组，包含了与当前 Feed 语义相关的其他 Feed 对象。这通常在通知路由中启用了 `compress_by_related_threshold` 选项时填充。每个相关的 Feed 对象结构与父 Feed 对象完全相同。如果未启用相关性压缩或没有相关的 Feed，此字段可能为空数组或不存在。
 89 | 
 90 | ## 3. 请求示例
 91 | 
 92 | 以下是一个发送到您的 Webhook 端点的 JSON 请求体示例：
 93 | 
 94 | ```json
 95 | {
 96 |   "group": "my_favorite_blogs",
 97 |   "labels": {
 98 |     "category": "tech_updates",
 99 |   },
100 |   "summary": "今天有多篇关于最新 AI 技术进展的文章，重点关注了大型语言模型在代码生成方面的应用，以及其对未来软件开发模式的潜在影响。",
101 |   "feeds": [
102 |     {
103 |       "labels": {
104 |         "content": "AlphaCode X 展示了惊人的代码理解和生成能力，在多个编程竞赛中超越了人类平均水平...",
105 |         "link": "https://example.blog/alphacode-x-details",
106 |         "pub_time": "2024-07-30T14:35:10Z",
107 |         "source": "Example Tech Blog",
108 |         "title": "AlphaCode X: 下一代 AI 编码助手",
109 |         "type": "blog_post"
110 |       },
111 |       "time": "2024-07-30T14:35:10Z",
112 |       "related": []
113 |     },
114 |     {
115 |       "labels": {
116 |         "content": "讨论了当前 LLM 在实际软件工程项目中落地所面临的挑战，包括成本、可控性和安全性问题。",
117 |         "link": "https://another.blog/llm-in-swe-challenges",
118 |         "pub_time": "2024-07-30T11:15:00Z",
119 |         "source": "Another Tech Review",
120 |         "title": "LLM 在软件工程中的应用：机遇与挑战",
121 |         "type": "rss"
122 |       },
123 |       "time": "2024-07-30T11:15:00Z",
124 |       "related": [
125 |         {
126 |           "labels": {
127 |             "content": "一篇关于如何更经济有效地部署和微调大型语言模型的指南。",
128 |             "link": "https://some.other.blog/cost-effective-llm",
129 |             "pub_time": "2024-07-30T09:00:00Z",
130 |             "source": "AI Infra Weekly",
131 |             "title": "经济高效的 LLM 部署策略",
132 |             "type": "rss"
133 |           },
134 |           "time": "2024-07-30T09:00:00Z",
135 |           "related": []
136 |         }
137 |       ]
138 |     }
139 |   ]
140 | }
141 | ```
142 | 
143 | ## 4. 响应要求
144 | 
145 | Zenfeed 期望您的 Webhook 端点在成功接收并处理通知后，返回 HTTP `200 OK` 状态码。
146 | 如果 Zenfeed 收到任何非 `200` 的状态码，它会将该次通知尝试标记为失败，并可能根据重试策略进行重试 (具体重试行为取决于 Zenfeed 的内部实现)。
147 | 
148 | 请确保您的端点能够及时响应，以避免超时。
149 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/glidea/zenfeed
 2 | 
 3 | go 1.23.4
 4 | 
 5 | require (
 6 | 	github.com/JohannesKaufmann/html-to-markdown v1.6.0
 7 | 	github.com/benbjohnson/clock v1.3.5
 8 | 	github.com/chewxy/math32 v1.10.1
 9 | 	github.com/edsrzf/mmap-go v1.2.0
10 | 	github.com/gorilla/feeds v1.2.0
11 | 	github.com/mark3labs/mcp-go v0.17.0
12 | 	github.com/mmcdole/gofeed v1.3.0
13 | 	github.com/nutsdb/nutsdb v1.0.4
14 | 	github.com/onsi/gomega v1.36.1
15 | 	github.com/pkg/errors v0.9.1
16 | 	github.com/prometheus/client_golang v1.21.1
17 | 	github.com/sashabaranov/go-openai v1.40.1
18 | 	github.com/stretchr/testify v1.10.0
19 | 	github.com/veqryn/slog-dedup v0.5.0
20 | 	github.com/yuin/goldmark v1.7.8
21 | 	gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df
22 | 	gopkg.in/yaml.v3 v3.0.1
23 | 	k8s.io/utils v0.0.0-20241210054802-24370beab758
24 | )
25 | 
26 | require (
27 | 	github.com/PuerkitoBio/goquery v1.9.2 // indirect
28 | 	github.com/andybalholm/cascadia v1.3.2 // indirect
29 | 	github.com/antlabs/stl v0.0.1 // indirect
30 | 	github.com/antlabs/timer v0.0.11 // indirect
31 | 	github.com/beorn7/perks v1.0.1 // indirect
32 | 	github.com/bwmarrin/snowflake v0.3.0 // indirect
33 | 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
34 | 	github.com/davecgh/go-spew v1.1.1 // indirect
35 | 	github.com/gofrs/flock v0.8.1 // indirect
36 | 	github.com/google/go-cmp v0.7.0 // indirect
37 | 	github.com/google/uuid v1.6.0 // indirect
38 | 	github.com/json-iterator/go v1.1.12 // indirect
39 | 	github.com/klauspost/compress v1.18.0 // indirect
40 | 	github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 // indirect
41 | 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
42 | 	github.com/modern-go/reflect2 v1.0.2 // indirect
43 | 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
44 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
45 | 	github.com/prometheus/client_model v0.6.1 // indirect
46 | 	github.com/prometheus/common v0.62.0 // indirect
47 | 	github.com/prometheus/procfs v0.15.1 // indirect
48 | 	github.com/stretchr/objx v0.5.2 // indirect
49 | 	github.com/temoto/robotstxt v1.1.2
50 | 	github.com/tidwall/btree v1.6.0 // indirect
51 | 	github.com/xujiajun/mmap-go v1.0.1 // indirect
52 | 	github.com/xujiajun/utils v0.0.0-20220904132955-5f7c5b914235 // indirect
53 | 	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
54 | 	golang.org/x/net v0.38.0 // indirect
55 | 	golang.org/x/sys v0.31.0 // indirect
56 | 	golang.org/x/text v0.23.0 // indirect
57 | 	google.golang.org/protobuf v1.36.6 // indirect
58 | 	gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect
59 | 	modernc.org/b/v2 v2.1.0 // indirect
60 | )
61 | 


--------------------------------------------------------------------------------
/pkg/api/http/http.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package http
 17 | 
 18 | import (
 19 | 	"net"
 20 | 	"net/http"
 21 | 
 22 | 	"github.com/pkg/errors"
 23 | 
 24 | 	"github.com/glidea/zenfeed/pkg/api"
 25 | 	"github.com/glidea/zenfeed/pkg/component"
 26 | 	"github.com/glidea/zenfeed/pkg/config"
 27 | 	telemetry "github.com/glidea/zenfeed/pkg/telemetry"
 28 | 	"github.com/glidea/zenfeed/pkg/telemetry/log"
 29 | 	telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
 30 | 	"github.com/glidea/zenfeed/pkg/util/jsonrpc"
 31 | )
 32 | 
 33 | // --- Interface code block ---
 34 | type Server interface {
 35 | 	component.Component
 36 | 	config.Watcher
 37 | }
 38 | 
 39 | type Config struct {
 40 | 	Address string
 41 | }
 42 | 
 43 | func (c *Config) Validate() error {
 44 | 	if c.Address == "" {
 45 | 		c.Address = ":1300"
 46 | 	}
 47 | 	if _, _, err := net.SplitHostPort(c.Address); err != nil {
 48 | 		return errors.Wrap(err, "invalid address")
 49 | 	}
 50 | 
 51 | 	return nil
 52 | }
 53 | 
 54 | func (c *Config) From(app *config.App) *Config {
 55 | 	c.Address = app.API.HTTP.Address
 56 | 
 57 | 	return c
 58 | }
 59 | 
 60 | type Dependencies struct {
 61 | 	API api.API
 62 | }
 63 | 
 64 | // --- Factory code block ---
 65 | type Factory component.Factory[Server, config.App, Dependencies]
 66 | 
 67 | func NewFactory(mockOn ...component.MockOption) Factory {
 68 | 	if len(mockOn) > 0 {
 69 | 		return component.FactoryFunc[Server, config.App, Dependencies](
 70 | 			func(instance string, config *config.App, dependencies Dependencies) (Server, error) {
 71 | 				m := &mockServer{}
 72 | 				component.MockOptions(mockOn).Apply(&m.Mock)
 73 | 
 74 | 				return m, nil
 75 | 			},
 76 | 		)
 77 | 	}
 78 | 
 79 | 	return component.FactoryFunc[Server, config.App, Dependencies](new)
 80 | }
 81 | 
 82 | func new(instance string, app *config.App, dependencies Dependencies) (Server, error) {
 83 | 	config := &Config{}
 84 | 	config.From(app)
 85 | 	if err := config.Validate(); err != nil {
 86 | 		return nil, errors.Wrap(err, "validate config")
 87 | 	}
 88 | 
 89 | 	router := http.NewServeMux()
 90 | 	api := dependencies.API
 91 | 	router.Handle("/write", jsonrpc.API(api.Write))
 92 | 	router.Handle("/query_config", jsonrpc.API(api.QueryAppConfig))
 93 | 	router.Handle("/apply_config", jsonrpc.API(api.ApplyAppConfig))
 94 | 	router.Handle("/query_config_schema", jsonrpc.API(api.QueryAppConfigSchema))
 95 | 	router.Handle("/query_rsshub_categories", jsonrpc.API(api.QueryRSSHubCategories))
 96 | 	router.Handle("/query_rsshub_websites", jsonrpc.API(api.QueryRSSHubWebsites))
 97 | 	router.Handle("/query_rsshub_routes", jsonrpc.API(api.QueryRSSHubRoutes))
 98 | 	router.Handle("/query", jsonrpc.API(api.Query))
 99 | 	httpServer := &http.Server{Addr: config.Address, Handler: router}
100 | 
101 | 	return &server{
102 | 		Base: component.New(&component.BaseConfig[Config, Dependencies]{
103 | 			Name:         "HTTPServer",
104 | 			Instance:     instance,
105 | 			Config:       config,
106 | 			Dependencies: dependencies,
107 | 		}),
108 | 		http: httpServer,
109 | 	}, nil
110 | }
111 | 
112 | // --- Implementation code block ---
113 | type server struct {
114 | 	*component.Base[Config, Dependencies]
115 | 	http *http.Server
116 | }
117 | 
118 | func (s *server) Run() (err error) {
119 | 	ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...)
120 | 	defer func() { telemetry.End(ctx, err) }()
121 | 
122 | 	serverErr := make(chan error, 1)
123 | 	go func() {
124 | 		serverErr <- s.http.ListenAndServe()
125 | 	}()
126 | 
127 | 	s.MarkReady()
128 | 	select {
129 | 	case <-ctx.Done():
130 | 		log.Info(ctx, "shutting down")
131 | 
132 | 		return s.http.Shutdown(ctx)
133 | 	case err := <-serverErr:
134 | 		return errors.Wrap(err, "listen and serve")
135 | 	}
136 | }
137 | 
138 | func (s *server) Reload(app *config.App) error {
139 | 	newConfig := &Config{}
140 | 	newConfig.From(app)
141 | 	if err := newConfig.Validate(); err != nil {
142 | 		return errors.Wrap(err, "validate config")
143 | 	}
144 | 	if s.Config().Address != newConfig.Address {
145 | 		return errors.New("address cannot be reloaded")
146 | 	}
147 | 
148 | 	s.SetConfig(newConfig)
149 | 
150 | 	return nil
151 | }
152 | 
153 | type mockServer struct {
154 | 	component.Mock
155 | }
156 | 
157 | func (m *mockServer) Reload(app *config.App) error {
158 | 	return m.Called(app).Error(0)
159 | }
160 | 


--------------------------------------------------------------------------------
/pkg/api/rss/rss.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package rss
 17 | 
 18 | import (
 19 | 	"net"
 20 | 	"net/http"
 21 | 	"text/template"
 22 | 	"time"
 23 | 
 24 | 	"github.com/benbjohnson/clock"
 25 | 	"github.com/gorilla/feeds"
 26 | 	"github.com/pkg/errors"
 27 | 
 28 | 	"github.com/glidea/zenfeed/pkg/api"
 29 | 	"github.com/glidea/zenfeed/pkg/component"
 30 | 	"github.com/glidea/zenfeed/pkg/config"
 31 | 	"github.com/glidea/zenfeed/pkg/model"
 32 | 	telemetry "github.com/glidea/zenfeed/pkg/telemetry"
 33 | 	"github.com/glidea/zenfeed/pkg/telemetry/log"
 34 | 	telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
 35 | 	"github.com/glidea/zenfeed/pkg/util/buffer"
 36 | )
 37 | 
 38 | var clk = clock.New()
 39 | 
 40 | // --- Interface code block ---
 41 | type Server interface {
 42 | 	component.Component
 43 | 	config.Watcher
 44 | }
 45 | 
 46 | type Config struct {
 47 | 	Address             string
 48 | 	ContentHTMLTemplate string
 49 | 	contentHTMLTemplate *template.Template
 50 | }
 51 | 
 52 | func (c *Config) Validate() error {
 53 | 	if c.Address == "" {
 54 | 		c.Address = ":1302"
 55 | 	}
 56 | 	if _, _, err := net.SplitHostPort(c.Address); err != nil {
 57 | 		return errors.Wrap(err, "invalid address")
 58 | 	}
 59 | 
 60 | 	if c.ContentHTMLTemplate == "" {
 61 | 		c.ContentHTMLTemplate = "{{ .summary_html_snippet }}"
 62 | 	}
 63 | 	t, err := template.New("").Parse(c.ContentHTMLTemplate)
 64 | 	if err != nil {
 65 | 		return errors.Wrap(err, "parse rss content template")
 66 | 	}
 67 | 	c.contentHTMLTemplate = t
 68 | 
 69 | 	return nil
 70 | }
 71 | 
 72 | func (c *Config) From(app *config.App) *Config {
 73 | 	c.Address = app.API.RSS.Address
 74 | 	c.ContentHTMLTemplate = app.API.RSS.ContentHTMLTemplate
 75 | 
 76 | 	return c
 77 | }
 78 | 
 79 | type Dependencies struct {
 80 | 	API api.API
 81 | }
 82 | 
 83 | // --- Factory code block ---
 84 | type Factory component.Factory[Server, config.App, Dependencies]
 85 | 
 86 | func NewFactory(mockOn ...component.MockOption) Factory {
 87 | 	if len(mockOn) > 0 {
 88 | 		return component.FactoryFunc[Server, config.App, Dependencies](
 89 | 			func(instance string, config *config.App, dependencies Dependencies) (Server, error) {
 90 | 				m := &mockServer{}
 91 | 				component.MockOptions(mockOn).Apply(&m.Mock)
 92 | 
 93 | 				return m, nil
 94 | 			},
 95 | 		)
 96 | 	}
 97 | 
 98 | 	return component.FactoryFunc[Server, config.App, Dependencies](new)
 99 | }
100 | 
101 | func new(instance string, app *config.App, dependencies Dependencies) (Server, error) {
102 | 	config := &Config{}
103 | 	config.From(app)
104 | 	if err := config.Validate(); err != nil {
105 | 		return nil, errors.Wrap(err, "validate config")
106 | 	}
107 | 
108 | 	s := &server{
109 | 		Base: component.New(&component.BaseConfig[Config, Dependencies]{
110 | 			Name:         "RSSServer",
111 | 			Instance:     instance,
112 | 			Config:       config,
113 | 			Dependencies: dependencies,
114 | 		}),
115 | 	}
116 | 
117 | 	router := http.NewServeMux()
118 | 	router.Handle("/", http.HandlerFunc(s.rss))
119 | 
120 | 	s.http = &http.Server{Addr: config.Address, Handler: router}
121 | 
122 | 	return s, nil
123 | }
124 | 
125 | // --- Implementation code block ---
126 | type server struct {
127 | 	*component.Base[Config, Dependencies]
128 | 	http *http.Server
129 | }
130 | 
131 | func (s *server) Run() (err error) {
132 | 	ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...)
133 | 	defer func() { telemetry.End(ctx, err) }()
134 | 
135 | 	serverErr := make(chan error, 1)
136 | 	go func() {
137 | 		serverErr <- s.http.ListenAndServe()
138 | 	}()
139 | 
140 | 	s.MarkReady()
141 | 	select {
142 | 	case <-ctx.Done():
143 | 		log.Info(ctx, "shutting down")
144 | 
145 | 		return s.http.Shutdown(ctx)
146 | 	case err := <-serverErr:
147 | 		return errors.Wrap(err, "listen and serve")
148 | 	}
149 | }
150 | 
151 | func (s *server) Reload(app *config.App) error {
152 | 	newConfig := &Config{}
153 | 	newConfig.From(app)
154 | 	if err := newConfig.Validate(); err != nil {
155 | 		return errors.Wrap(err, "validate config")
156 | 	}
157 | 	if s.Config().Address != newConfig.Address {
158 | 		return errors.New("address cannot be reloaded")
159 | 	}
160 | 
161 | 	s.SetConfig(newConfig)
162 | 
163 | 	return nil
164 | }
165 | 
166 | func (s *server) rss(w http.ResponseWriter, r *http.Request) {
167 | 	var err error
168 | 	ctx := telemetry.StartWith(r.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "rss")...)
169 | 	defer telemetry.End(ctx, err)
170 | 
171 | 	// Extract parameters.
172 | 	ps := r.URL.Query()
173 | 	labelFilters := ps["label_filter"]
174 | 	query := ps.Get("query")
175 | 
176 | 	// Forward query request to API.
177 | 	now := clk.Now()
178 | 	queryResult, err := s.Dependencies().API.Query(ctx, &api.QueryRequest{
179 | 		Query:        query,
180 | 		LabelFilters: labelFilters,
181 | 		Start:        now.Add(-24 * time.Hour),
182 | 		End:          now,
183 | 		Limit:        100,
184 | 	})
185 | 	if err != nil {
186 | 		http.Error(w, err.Error(), http.StatusBadRequest) // TODO: standardize error handling.
187 | 
188 | 		return
189 | 	}
190 | 
191 | 	// Render and convert to RSS.
192 | 	rssObj := &feeds.Feed{
193 | 		Title:       "Zenfeed RSS - " + ps.Encode(),
194 | 		Description: "Powered by Github Zenfeed - https://github.com/glidea/zenfeed. If you use Folo, please enable 'Appearance - Content - Render inline styles'",
195 | 		Items:       make([]*feeds.Item, 0, len(queryResult.Feeds)),
196 | 	}
197 | 
198 | 	buf := buffer.Get()
199 | 	defer buffer.Put(buf)
200 | 
201 | 	for _, feed := range queryResult.Feeds {
202 | 		buf.Reset()
203 | 
204 | 		if err = s.Config().contentHTMLTemplate.Execute(buf, feed.Labels.Map()); err != nil {
205 | 			http.Error(w, err.Error(), http.StatusInternalServerError)
206 | 
207 | 			return
208 | 		}
209 | 
210 | 		item := &feeds.Item{
211 | 			Title:   feed.Labels.Get(model.LabelTitle),
212 | 			Link:    &feeds.Link{Href: feed.Labels.Get(model.LabelLink)},
213 | 			Created: feed.Time, // NOTE: scrape time, not pub time.
214 | 			Content: buf.String(),
215 | 		}
216 | 
217 | 		rssObj.Items = append(rssObj.Items, item)
218 | 	}
219 | 
220 | 	if err = rssObj.WriteRss(w); err != nil {
221 | 		log.Error(ctx, errors.Wrap(err, "write rss response"))
222 | 
223 | 		return
224 | 	}
225 | }
226 | 
227 | type mockServer struct {
228 | 	component.Mock
229 | }
230 | 
231 | func (m *mockServer) Reload(app *config.App) error {
232 | 	return m.Called(app).Error(0)
233 | }
234 | 


--------------------------------------------------------------------------------
/pkg/llm/embedding_spliter.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package llm
 17 | 
 18 | import (
 19 | 	"math"
 20 | 	"slices"
 21 | 
 22 | 	"github.com/glidea/zenfeed/pkg/model"
 23 | )
 24 | 
 25 | type embeddingSpliter interface {
 26 | 	Split(ls model.Labels) ([]model.Labels, error)
 27 | }
 28 | 
 29 | func newEmbeddingSpliter(maxLabelValueTokens, overlapTokens int) embeddingSpliter {
 30 | 	if maxLabelValueTokens <= 0 {
 31 | 		maxLabelValueTokens = 1024
 32 | 	}
 33 | 	if overlapTokens <= 0 {
 34 | 		overlapTokens = 64
 35 | 	}
 36 | 	if overlapTokens > maxLabelValueTokens {
 37 | 		overlapTokens = maxLabelValueTokens / 10
 38 | 	}
 39 | 
 40 | 	return &embeddingSpliterImpl{maxLabelValueTokens: maxLabelValueTokens, overlapTokens: overlapTokens}
 41 | }
 42 | 
 43 | type embeddingSpliterImpl struct {
 44 | 	maxLabelValueTokens int
 45 | 	overlapTokens       int
 46 | }
 47 | 
 48 | func (e *embeddingSpliterImpl) Split(ls model.Labels) ([]model.Labels, error) {
 49 | 	var (
 50 | 		short      = make(model.Labels, 0, len(ls))
 51 | 		long       = make(model.Labels, 0, 1)
 52 | 		longTokens = make([]int, 0, 1)
 53 | 	)
 54 | 	for _, l := range ls {
 55 | 		tokens := e.estimateTokens(l.Value)
 56 | 		if tokens <= e.maxLabelValueTokens {
 57 | 			short = append(short, l)
 58 | 		} else {
 59 | 			long = append(long, l)
 60 | 			longTokens = append(longTokens, tokens)
 61 | 		}
 62 | 	}
 63 | 	if len(long) == 0 {
 64 | 		return []model.Labels{ls}, nil
 65 | 	}
 66 | 
 67 | 	var (
 68 | 		common = short
 69 | 		splits = make([]model.Labels, 0, len(long)*2)
 70 | 	)
 71 | 	for i := range long {
 72 | 		parts := e.split(long[i].Value, longTokens[i])
 73 | 		for _, p := range parts {
 74 | 			com := slices.Clone(common)
 75 | 			s := append(com, model.Label{Key: long[i].Key, Value: p})
 76 | 			splits = append(splits, s)
 77 | 		}
 78 | 	}
 79 | 
 80 | 	return splits, nil
 81 | }
 82 | 
 83 | func (e *embeddingSpliterImpl) split(value string, tokens int) []string {
 84 | 	var (
 85 | 		results = make([]string, 0)
 86 | 		chars   = []rune(value)
 87 | 	)
 88 | 
 89 | 	// Estimate the number of characters per token
 90 | 	avgCharsPerToken := float64(len(chars)) / float64(tokens)
 91 | 	// Calculate the approximate number of characters corresponding to maxLabelValueTokens tokens.
 92 | 	charsPerSegment := int(float64(e.maxLabelValueTokens) * avgCharsPerToken)
 93 | 
 94 | 	// The number of characters corresponding to a fixed overlap of 64 tokens.
 95 | 	overlapChars := int(float64(e.overlapTokens) * avgCharsPerToken)
 96 | 
 97 | 	// Actual step length = segment length - overlap.
 98 | 	charStep := charsPerSegment - overlapChars
 99 | 
100 | 	for start := 0; start < len(chars); {
101 | 		end := min(start+charsPerSegment, len(chars))
102 | 
103 | 		segment := string(chars[start:end])
104 | 		results = append(results, segment)
105 | 
106 | 		if end == len(chars) {
107 | 			break
108 | 		}
109 | 		start += charStep
110 | 	}
111 | 
112 | 	return results
113 | }
114 | 
115 | func (e *embeddingSpliterImpl) estimateTokens(text string) int {
116 | 	latinChars := 0
117 | 	otherChars := 0
118 | 
119 | 	for _, r := range text {
120 | 		if r <= 127 {
121 | 			latinChars++
122 | 		} else {
123 | 			otherChars++
124 | 		}
125 | 	}
126 | 
127 | 	// Rough estimate:
128 | 	// - English and punctuation: about 0.25 tokens/char (4 characters ≈ 1 token).
129 | 	// - Chinese and other non-Latin characters: about 1.5 tokens/char.
130 | 	return int(math.Round(float64(latinChars)/4 + float64(otherChars)*3/2))
131 | }
132 | 


--------------------------------------------------------------------------------
/pkg/llm/embedding_spliter_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package llm
 17 | 
 18 | import (
 19 | 	"testing"
 20 | 
 21 | 	. "github.com/onsi/gomega"
 22 | 
 23 | 	"github.com/glidea/zenfeed/pkg/model"
 24 | 	"github.com/glidea/zenfeed/pkg/test"
 25 | )
 26 | 
 27 | func TestEmbeddingSpliter_Split(t *testing.T) {
 28 | 	RegisterTestingT(t)
 29 | 
 30 | 	type givenDetail struct {
 31 | 		maxLabelValueTokens int
 32 | 		overlapTokens       int
 33 | 	}
 34 | 	type whenDetail struct {
 35 | 		labels model.Labels
 36 | 	}
 37 | 	type thenExpected struct {
 38 | 		splits []model.Labels
 39 | 		err    string
 40 | 	}
 41 | 
 42 | 	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
 43 | 		{
 44 | 			Scenario: "Split labels with all short values",
 45 | 			Given:    "an embedding spliter with max token limit",
 46 | 			When:     "splitting labels with all values under token limit",
 47 | 			Then:     "should return original labels as single split",
 48 | 			GivenDetail: givenDetail{
 49 | 				maxLabelValueTokens: 1024,
 50 | 			},
 51 | 			WhenDetail: whenDetail{
 52 | 				labels: model.Labels{
 53 | 					{Key: "title", Value: "Short title"},
 54 | 					{Key: "description", Value: "Short description"},
 55 | 				},
 56 | 			},
 57 | 			ThenExpected: thenExpected{
 58 | 				splits: []model.Labels{
 59 | 					{
 60 | 						{Key: "title", Value: "Short title"},
 61 | 						{Key: "description", Value: "Short description"},
 62 | 					},
 63 | 				},
 64 | 			},
 65 | 		},
 66 | 		{
 67 | 			Scenario: "Split labels with one long value",
 68 | 			Given:    "an embedding spliter with max token limit",
 69 | 			When:     "splitting labels with one value exceeding token limit",
 70 | 			Then:     "should split the long value and combine with common labels",
 71 | 			GivenDetail: givenDetail{
 72 | 				maxLabelValueTokens: 10, // Small limit to force splitting.
 73 | 				overlapTokens:       1,
 74 | 			},
 75 | 			WhenDetail: whenDetail{
 76 | 				labels: model.Labels{
 77 | 					{Key: "title", Value: "Short title"},
 78 | 					{Key: "content", Value: "This is a long content that exceeds the token limit and needs to be split into multiple parts"},
 79 | 				},
 80 | 			},
 81 | 			ThenExpected: thenExpected{
 82 | 				splits: []model.Labels{
 83 | 					{
 84 | 						{Key: "title", Value: "Short title"},
 85 | 						{Key: "content", Value: "This is a long content that exceeds the "},
 86 | 					},
 87 | 					{
 88 | 						{Key: "title", Value: "Short title"},
 89 | 						{Key: "content", Value: "the token limit and needs to be split in"},
 90 | 					},
 91 | 					{
 92 | 						{Key: "title", Value: "Short title"},
 93 | 						{Key: "content", Value: "t into multiple parts"},
 94 | 					},
 95 | 				},
 96 | 			},
 97 | 		},
 98 | 		{
 99 | 			Scenario: "Handle non-Latin characters",
100 | 			Given:    "an embedding spliter with max token limit",
101 | 			When:     "splitting labels with non-Latin characters",
102 | 			Then:     "should correctly estimate tokens and split accordingly",
103 | 			GivenDetail: givenDetail{
104 | 				maxLabelValueTokens: 10, // Small limit to force splitting.
105 | 				overlapTokens:       2,
106 | 			},
107 | 			WhenDetail: whenDetail{
108 | 				labels: model.Labels{
109 | 					{Key: "title", Value: "Short title"},
110 | 					{Key: "content", Value: "中文内容需要被分割因为它超过了令牌限制"}, // Chinese content that needs to be split.
111 | 				},
112 | 			},
113 | 			ThenExpected: thenExpected{
114 | 				splits: []model.Labels{
115 | 					{
116 | 						{Key: "title", Value: "Short title"},
117 | 						{Key: "content", Value: "中文内容需要"},
118 | 					},
119 | 					{
120 | 						{Key: "title", Value: "Short title"},
121 | 						{Key: "content", Value: "要被分割因为"},
122 | 					},
123 | 					{
124 | 						{Key: "title", Value: "Short title"},
125 | 						{Key: "content", Value: "为它超过了令"},
126 | 					},
127 | 					{
128 | 						{Key: "title", Value: "Short title"},
129 | 						{Key: "content", Value: "令牌限制"},
130 | 					},
131 | 				},
132 | 			},
133 | 		},
134 | 	}
135 | 
136 | 	for _, tt := range tests {
137 | 		t.Run(tt.Scenario, func(t *testing.T) {
138 | 			// Given.
139 | 			spliter := newEmbeddingSpliter(tt.GivenDetail.maxLabelValueTokens, tt.GivenDetail.overlapTokens)
140 | 
141 | 			// When.
142 | 			splits, err := spliter.Split(tt.WhenDetail.labels)
143 | 
144 | 			// Then.
145 | 			if tt.ThenExpected.err != "" {
146 | 				Expect(err).NotTo(BeNil())
147 | 				Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
148 | 			} else {
149 | 				Expect(err).To(BeNil())
150 | 				Expect(len(splits)).To(Equal(len(tt.ThenExpected.splits)))
151 | 
152 | 				for i, expectedSplit := range tt.ThenExpected.splits {
153 | 					Expect(splits[i]).To(Equal(expectedSplit))
154 | 				}
155 | 			}
156 | 		})
157 | 	}
158 | }
159 | 


--------------------------------------------------------------------------------
/pkg/llm/openai.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package llm
 17 | 
 18 | import (
 19 | 	"context"
 20 | 	"encoding/json"
 21 | 
 22 | 	"github.com/pkg/errors"
 23 | 	oai "github.com/sashabaranov/go-openai"
 24 | 
 25 | 	"github.com/glidea/zenfeed/pkg/component"
 26 | 	"github.com/glidea/zenfeed/pkg/model"
 27 | 	"github.com/glidea/zenfeed/pkg/telemetry"
 28 | 	telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
 29 | 	runtimeutil "github.com/glidea/zenfeed/pkg/util/runtime"
 30 | )
 31 | 
 32 | type openai struct {
 33 | 	*component.Base[Config, struct{}]
 34 | 
 35 | 	client           *oai.Client
 36 | 	embeddingSpliter embeddingSpliter
 37 | }
 38 | 
 39 | func newOpenAI(c *Config) LLM {
 40 | 	config := oai.DefaultConfig(c.APIKey)
 41 | 	config.BaseURL = c.Endpoint
 42 | 	client := oai.NewClientWithConfig(config)
 43 | 	embeddingSpliter := newEmbeddingSpliter(1536, 64)
 44 | 
 45 | 	return &openai{
 46 | 		Base: component.New(&component.BaseConfig[Config, struct{}]{
 47 | 			Name:     "LLM/openai",
 48 | 			Instance: c.Name,
 49 | 			Config:   c,
 50 | 		}),
 51 | 		client:           client,
 52 | 		embeddingSpliter: embeddingSpliter,
 53 | 	}
 54 | }
 55 | 
 56 | func (o *openai) String(ctx context.Context, messages []string) (value string, err error) {
 57 | 	ctx = telemetry.StartWith(ctx, append(o.TelemetryLabels(), telemetrymodel.KeyOperation, "String")...)
 58 | 	defer func() { telemetry.End(ctx, err) }()
 59 | 
 60 | 	config := o.Config()
 61 | 	if config.Model == "" {
 62 | 		return "", errors.New("model is not set")
 63 | 	}
 64 | 	msgs := make([]oai.ChatCompletionMessage, 0, len(messages))
 65 | 	for _, m := range messages {
 66 | 		msgs = append(msgs, oai.ChatCompletionMessage{
 67 | 			Role:    oai.ChatMessageRoleUser,
 68 | 			Content: m,
 69 | 		})
 70 | 	}
 71 | 
 72 | 	req := oai.ChatCompletionRequest{
 73 | 		Model:       config.Model,
 74 | 		Messages:    msgs,
 75 | 		Temperature: config.Temperature,
 76 | 	}
 77 | 
 78 | 	resp, err := o.client.CreateChatCompletion(ctx, req)
 79 | 	if err != nil {
 80 | 		return "", errors.Wrap(err, "create chat completion")
 81 | 	}
 82 | 	if len(resp.Choices) == 0 {
 83 | 		return "", errors.New("no completion choices returned")
 84 | 	}
 85 | 
 86 | 	lvs := []string{o.Name(), o.Instance(), "String"}
 87 | 	promptTokens.WithLabelValues(lvs...).Add(float64(resp.Usage.PromptTokens))
 88 | 	completionTokens.WithLabelValues(lvs...).Add(float64(resp.Usage.CompletionTokens))
 89 | 	totalTokens.WithLabelValues(lvs...).Add(float64(resp.Usage.TotalTokens))
 90 | 
 91 | 	return resp.Choices[0].Message.Content, nil
 92 | }
 93 | 
 94 | func (o *openai) EmbeddingLabels(ctx context.Context, labels model.Labels) (value [][]float32, err error) {
 95 | 	ctx = telemetry.StartWith(ctx, append(o.TelemetryLabels(), telemetrymodel.KeyOperation, "EmbeddingLabels")...)
 96 | 	defer func() { telemetry.End(ctx, err) }()
 97 | 
 98 | 	config := o.Config()
 99 | 	if config.EmbeddingModel == "" {
100 | 		return nil, errors.New("embedding model is not set")
101 | 	}
102 | 	splits, err := o.embeddingSpliter.Split(labels)
103 | 	if err != nil {
104 | 		return nil, errors.Wrap(err, "split embedding")
105 | 	}
106 | 
107 | 	vecs := make([][]float32, 0, len(splits))
108 | 	for _, split := range splits {
109 | 		text := runtimeutil.Must1(json.Marshal(split))
110 | 		vec, err := o.Embedding(ctx, string(text))
111 | 		if err != nil {
112 | 			return nil, errors.Wrap(err, "embedding")
113 | 		}
114 | 		vecs = append(vecs, vec)
115 | 	}
116 | 
117 | 	return vecs, nil
118 | }
119 | 
120 | func (o *openai) Embedding(ctx context.Context, s string) (value []float32, err error) {
121 | 	ctx = telemetry.StartWith(ctx, append(o.TelemetryLabels(), telemetrymodel.KeyOperation, "Embedding")...)
122 | 	defer func() { telemetry.End(ctx, err) }()
123 | 
124 | 	config := o.Config()
125 | 	if config.EmbeddingModel == "" {
126 | 		return nil, errors.New("embedding model is not set")
127 | 	}
128 | 	vec, err := o.client.CreateEmbeddings(ctx, oai.EmbeddingRequest{
129 | 		Input:          []string{s},
130 | 		Model:          oai.EmbeddingModel(config.EmbeddingModel),
131 | 		EncodingFormat: oai.EmbeddingEncodingFormatFloat,
132 | 	})
133 | 	if err != nil {
134 | 		return nil, errors.Wrap(err, "create embeddings")
135 | 	}
136 | 	if len(vec.Data) == 0 {
137 | 		return nil, errors.New("no embedding data returned")
138 | 	}
139 | 
140 | 	lvs := []string{o.Name(), o.Instance(), "Embedding"}
141 | 	promptTokens.WithLabelValues(lvs...).Add(float64(vec.Usage.PromptTokens))
142 | 	completionTokens.WithLabelValues(lvs...).Add(float64(vec.Usage.CompletionTokens))
143 | 	totalTokens.WithLabelValues(lvs...).Add(float64(vec.Usage.TotalTokens))
144 | 
145 | 	return vec.Data[0].Embedding, nil
146 | }
147 | 


--------------------------------------------------------------------------------
/pkg/notify/channel/channel.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package channel
 17 | 
 18 | import (
 19 | 	"context"
 20 | 
 21 | 	"github.com/pkg/errors"
 22 | 
 23 | 	"github.com/glidea/zenfeed/pkg/component"
 24 | 	"github.com/glidea/zenfeed/pkg/notify/route"
 25 | 	"github.com/glidea/zenfeed/pkg/telemetry"
 26 | 	telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
 27 | )
 28 | 
 29 | // --- Interface code block ---
 30 | type Channel interface {
 31 | 	component.Component
 32 | 	sender
 33 | }
 34 | 
 35 | type sender interface {
 36 | 	Send(ctx context.Context, receiver Receiver, group *route.FeedGroup) error
 37 | }
 38 | 
 39 | type Config struct {
 40 | 	Email *Email
 41 | }
 42 | 
 43 | func (c *Config) Validate() error {
 44 | 	if c.Email.Enabled() {
 45 | 		if err := c.Email.Validate(); err != nil {
 46 | 			return errors.Wrap(err, "validate email")
 47 | 		}
 48 | 	}
 49 | 
 50 | 	return nil
 51 | }
 52 | 
 53 | type Receiver struct {
 54 | 	Email   string
 55 | 	Webhook *WebhookReceiver
 56 | }
 57 | 
 58 | func (r *Receiver) Validate() error {
 59 | 	if r.Email != "" && r.Webhook != nil {
 60 | 		return errors.New("email and webhook cannot both be set")
 61 | 	}
 62 | 	if r.Webhook != nil {
 63 | 		if err := r.Webhook.Validate(); err != nil {
 64 | 			return errors.Wrap(err, "validate webhook")
 65 | 		}
 66 | 	}
 67 | 
 68 | 	return nil
 69 | }
 70 | 
 71 | type Dependencies struct{}
 72 | 
 73 | // --- Factory code block ---
 74 | type Factory component.Factory[Channel, Config, Dependencies]
 75 | 
 76 | func NewFactory(mockOn ...component.MockOption) Factory {
 77 | 	if len(mockOn) > 0 {
 78 | 		return component.FactoryFunc[Channel, Config, Dependencies](
 79 | 			func(instance string, config *Config, dependencies Dependencies) (Channel, error) {
 80 | 				m := &mockChannel{}
 81 | 				component.MockOptions(mockOn).Apply(&m.Mock)
 82 | 
 83 | 				return m, nil
 84 | 			},
 85 | 		)
 86 | 	}
 87 | 
 88 | 	return component.FactoryFunc[Channel, Config, Dependencies](new)
 89 | }
 90 | 
 91 | func new(instance string, config *Config, dependencies Dependencies) (Channel, error) {
 92 | 	if err := config.Validate(); err != nil {
 93 | 		return nil, errors.Wrap(err, "validate config")
 94 | 	}
 95 | 
 96 | 	var email sender
 97 | 	if config.Email.Enabled() {
 98 | 		var err error
 99 | 		email, err = newEmail(config.Email, dependencies)
100 | 		if err != nil {
101 | 			return nil, errors.Wrap(err, "new email")
102 | 		}
103 | 	}
104 | 
105 | 	return &aggrChannel{
106 | 		Base: component.New(&component.BaseConfig[Config, Dependencies]{
107 | 			Name:         "NotifyChannel",
108 | 			Instance:     instance,
109 | 			Config:       config,
110 | 			Dependencies: dependencies,
111 | 		}),
112 | 		email:   email,
113 | 		webhook: newWebhook(),
114 | 	}, nil
115 | }
116 | 
117 | // --- Implementation code block ---
118 | type aggrChannel struct {
119 | 	*component.Base[Config, Dependencies]
120 | 	email, webhook sender
121 | }
122 | 
123 | func (c *aggrChannel) Send(ctx context.Context, receiver Receiver, group *route.FeedGroup) error {
124 | 	if receiver.Email != "" && c.email != nil {
125 | 		return c.send(ctx, receiver, group, c.email, "email")
126 | 	}
127 | 	if receiver.Webhook != nil && c.webhook != nil {
128 | 		return c.send(ctx, receiver, group, c.webhook, "webhook")
129 | 	}
130 | 
131 | 	return nil
132 | }
133 | 
134 | func (c *aggrChannel) send(
135 | 	ctx context.Context,
136 | 	receiver Receiver,
137 | 	group *route.FeedGroup,
138 | 	sender sender,
139 | 	senderName string,
140 | ) (err error) {
141 | 	ctx = telemetry.StartWith(ctx, append(c.TelemetryLabels(), telemetrymodel.KeyOperation, "channel", senderName)...)
142 | 	defer func() { telemetry.End(ctx, err) }()
143 | 	if err := sender.Send(ctx, receiver, group); err != nil {
144 | 		return errors.Wrap(err, "send")
145 | 	}
146 | 
147 | 	return nil
148 | }
149 | 
150 | type mockChannel struct {
151 | 	component.Mock
152 | }
153 | 
154 | func (m *mockChannel) Send(ctx context.Context, receiver Receiver, group *route.FeedGroup) error {
155 | 	args := m.Called(ctx, receiver, group)
156 | 
157 | 	return args.Error(0)
158 | }
159 | 


--------------------------------------------------------------------------------
/pkg/notify/channel/webhook.go:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2025 wangyusong
 2 | //
 3 | // This program is free software: you can redistribute it and/or modify
 4 | // it under the terms of the GNU Affero General Public License as published by
 5 | // the Free Software Foundation, either version 3 of the License, or
 6 | // (at your option) any later version.
 7 | //
 8 | // This program is distributed in the hope that it will be useful,
 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
15 | 
16 | package channel
17 | 
18 | import (
19 | 	"bytes"
20 | 	"context"
21 | 	"encoding/json"
22 | 	"net/http"
23 | 
24 | 	"github.com/pkg/errors"
25 | 
26 | 	"github.com/glidea/zenfeed/pkg/model"
27 | 	"github.com/glidea/zenfeed/pkg/notify/route"
28 | 	runtimeutil "github.com/glidea/zenfeed/pkg/util/runtime"
29 | )
30 | 
31 | type WebhookReceiver struct {
32 | 	URL string `json:"url"`
33 | }
34 | 
35 | func (r *WebhookReceiver) Validate() error {
36 | 	if r.URL == "" {
37 | 		return errors.New("webhook.url is required")
38 | 	}
39 | 
40 | 	return nil
41 | }
42 | 
43 | type webhookBody struct {
44 | 	Group   string        `json:"group"`
45 | 	Labels  model.Labels  `json:"labels"`
46 | 	Summary string        `json:"summary"`
47 | 	Feeds   []*route.Feed `json:"feeds"`
48 | }
49 | 
50 | func newWebhook() sender {
51 | 	return &webhook{
52 | 		httpClient: &http.Client{},
53 | 	}
54 | }
55 | 
56 | type webhook struct {
57 | 	httpClient *http.Client
58 | }
59 | 
60 | func (w *webhook) Send(ctx context.Context, receiver Receiver, group *route.FeedGroup) error {
61 | 	// Prepare request.
62 | 	body := &webhookBody{
63 | 		Group:   group.Name,
64 | 		Labels:  group.Labels,
65 | 		Summary: group.Summary,
66 | 		Feeds:   group.Feeds,
67 | 	}
68 | 	b := runtimeutil.Must1(json.Marshal(body))
69 | 	req, err := http.NewRequestWithContext(ctx, http.MethodPost, receiver.Webhook.URL, bytes.NewReader(b))
70 | 	if err != nil {
71 | 		return errors.Wrap(err, "create request")
72 | 	}
73 | 	req.Header.Set("Content-Type", "application/json")
74 | 
75 | 	// Send request.
76 | 	resp, err := w.httpClient.Do(req)
77 | 	if err != nil {
78 | 		return errors.Wrap(err, "send request")
79 | 	}
80 | 	defer func() { _ = resp.Body.Close() }()
81 | 
82 | 	// Handle response.
83 | 	if resp.StatusCode != http.StatusOK {
84 | 		return errors.New("send request")
85 | 	}
86 | 
87 | 	return nil
88 | }
89 | 


--------------------------------------------------------------------------------
/pkg/schedule/rule/periodic.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package rule
 17 | 
 18 | import (
 19 | 	"context"
 20 | 	"time"
 21 | 
 22 | 	"github.com/pkg/errors"
 23 | 
 24 | 	"github.com/glidea/zenfeed/pkg/component"
 25 | 	"github.com/glidea/zenfeed/pkg/storage/feed/block"
 26 | 	"github.com/glidea/zenfeed/pkg/telemetry"
 27 | 	"github.com/glidea/zenfeed/pkg/telemetry/log"
 28 | 	telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
 29 | 	timeutil "github.com/glidea/zenfeed/pkg/util/time"
 30 | )
 31 | 
 32 | func newPeriodic(instance string, config *Config, dependencies Dependencies) (Rule, error) {
 33 | 	return &periodic{
 34 | 		Base: component.New(&component.BaseConfig[Config, Dependencies]{
 35 | 			Name:         "PeriodicRuler",
 36 | 			Instance:     instance,
 37 | 			Config:       config,
 38 | 			Dependencies: dependencies,
 39 | 		}),
 40 | 	}, nil
 41 | }
 42 | 
 43 | type periodic struct {
 44 | 	*component.Base[Config, Dependencies]
 45 | }
 46 | 
 47 | func (r *periodic) Run() (err error) {
 48 | 	ctx := telemetry.StartWith(r.Context(), append(r.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...)
 49 | 	defer func() { telemetry.End(ctx, err) }()
 50 | 	r.MarkReady()
 51 | 
 52 | 	iter := func(now time.Time) {
 53 | 		config := r.Config()
 54 | 		today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location())
 55 | 		end := time.Date(today.Year(), today.Month(), today.Day(),
 56 | 			config.end.Hour(), config.end.Minute(), 0, 0, today.Location())
 57 | 
 58 | 		buffer := 30 * time.Minute
 59 | 		endPlusBuffer := end.Add(buffer)
 60 | 		if now.Before(end) || now.After(endPlusBuffer) {
 61 | 			return
 62 | 		}
 63 | 		if err := r.execute(ctx, now); err != nil {
 64 | 			log.Warn(ctx, errors.Wrap(err, "execute, retry in next time"))
 65 | 		}
 66 | 		log.Debug(ctx, "rule executed", "now", now, "end", end)
 67 | 	}
 68 | 
 69 | 	offset := timeutil.Random(time.Minute)
 70 | 	log.Debug(ctx, "computed watch offset", "offset", offset)
 71 | 
 72 | 	tick := time.NewTimer(offset)
 73 | 	defer tick.Stop()
 74 | 	for {
 75 | 		select {
 76 | 		case <-ctx.Done():
 77 | 			return nil
 78 | 		case now := <-tick.C:
 79 | 			iter(now)
 80 | 			tick.Reset(5 * time.Minute)
 81 | 		}
 82 | 	}
 83 | }
 84 | 
 85 | func (r *periodic) execute(ctx context.Context, now time.Time) error {
 86 | 	// Determine the query interval based on now and config's start, end and crossDay.
 87 | 	config := r.Config()
 88 | 	today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location())
 89 | 	var start, end time.Time
 90 | 	if config.crossDay {
 91 | 		yesterday := today.AddDate(0, 0, -1)
 92 | 		start = time.Date(yesterday.Year(), yesterday.Month(), yesterday.Day(),
 93 | 			config.start.Hour(), config.start.Minute(), 0, 0, yesterday.Location())
 94 | 		end = time.Date(today.Year(), today.Month(), today.Day(),
 95 | 			config.end.Hour(), config.end.Minute(), 0, 0, today.Location())
 96 | 	} else {
 97 | 		start = time.Date(today.Year(), today.Month(), today.Day(),
 98 | 			config.start.Hour(), config.start.Minute(), 0, 0, today.Location())
 99 | 		end = time.Date(today.Year(), today.Month(), today.Day(),
100 | 			config.end.Hour(), config.end.Minute(), 0, 0, today.Location())
101 | 	}
102 | 
103 | 	// Query.
104 | 	ctx = log.With(ctx, "start", start, "end", end)
105 | 	feeds, err := r.Dependencies().FeedStorage.Query(ctx, block.QueryOptions{
106 | 		Query:        config.Query,
107 | 		Threshold:    config.Threshold,
108 | 		LabelFilters: config.LabelFilters,
109 | 		Start:        start,
110 | 		End:          end,
111 | 		Limit:        500,
112 | 	})
113 | 	if err != nil {
114 | 		return errors.Wrap(err, "query")
115 | 	}
116 | 	if len(feeds) == 0 {
117 | 		log.Debug(ctx, "no feeds found")
118 | 
119 | 		return nil
120 | 	}
121 | 
122 | 	// Attach labels to feeds.
123 | 	for _, feed := range feeds {
124 | 		feed.Labels = append(feed.Labels, config.labels...)
125 | 		feed.Labels.EnsureSorted()
126 | 	}
127 | 
128 | 	// Notify.
129 | 	r.Dependencies().Out <- &Result{
130 | 		Rule:  config.Name,
131 | 		Time:  start,
132 | 		Feeds: feeds,
133 | 	}
134 | 	log.Debug(ctx, "rule notified", "feeds", len(feeds))
135 | 
136 | 	return nil
137 | }
138 | 


--------------------------------------------------------------------------------
/pkg/schedule/rule/rule.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package rule
 17 | 
 18 | import (
 19 | 	"strings"
 20 | 	"time"
 21 | 
 22 | 	"github.com/pkg/errors"
 23 | 
 24 | 	"github.com/glidea/zenfeed/pkg/component"
 25 | 	"github.com/glidea/zenfeed/pkg/model"
 26 | 	"github.com/glidea/zenfeed/pkg/storage/feed"
 27 | 	"github.com/glidea/zenfeed/pkg/storage/feed/block"
 28 | )
 29 | 
 30 | // --- Interface code block ---
 31 | type Rule interface {
 32 | 	component.Component
 33 | 	Config() *Config
 34 | }
 35 | 
 36 | type Config struct {
 37 | 	Name         string
 38 | 	Query        string
 39 | 	Threshold    float32
 40 | 	LabelFilters []string
 41 | 	Labels       map[string]string
 42 | 	labels       model.Labels
 43 | 
 44 | 	// Periodic type.
 45 | 	EveryDay   string // e.g. "00:00~23:59", or "-22:00~7:00" (yesterday 22:00 to today 07:00)
 46 | 	start, end time.Time
 47 | 	crossDay   bool
 48 | 
 49 | 	// Watch type.
 50 | 	WatchInterval time.Duration
 51 | }
 52 | 
 53 | var (
 54 | 	timeSep             = "~"
 55 | 	timeYesterdayPrefix = "-"
 56 | 	timeFmt             = "15:04"
 57 | )
 58 | 
 59 | func (c *Config) Validate() error { //nolint:cyclop,gocognit
 60 | 	if c.Name == "" {
 61 | 		return errors.New("name is required")
 62 | 	}
 63 | 	if c.Threshold == 0 {
 64 | 		c.Threshold = 0.5
 65 | 	}
 66 | 	if c.Threshold < 0 || c.Threshold > 1 {
 67 | 		return errors.New("threshold must be between 0 and 1")
 68 | 	}
 69 | 	if len(c.Labels) > 0 {
 70 | 		c.labels.FromMap(c.Labels)
 71 | 	}
 72 | 	if c.EveryDay != "" && c.WatchInterval != 0 {
 73 | 		return errors.New("every_day and watch_interval cannot both be set")
 74 | 	}
 75 | 	switch c.EveryDay {
 76 | 	case "":
 77 | 		if c.WatchInterval < 10*time.Minute {
 78 | 			c.WatchInterval = 10 * time.Minute
 79 | 		}
 80 | 	default:
 81 | 		times := strings.Split(c.EveryDay, timeSep)
 82 | 		if len(times) != 2 {
 83 | 			return errors.New("every_day must be in format 'start~end'")
 84 | 		}
 85 | 
 86 | 		start, end := strings.TrimSpace(times[0]), strings.TrimSpace(times[1])
 87 | 		isYesterday := strings.HasPrefix(start, timeYesterdayPrefix)
 88 | 		if isYesterday {
 89 | 			start = start[1:] // Remove the "-" prefix
 90 | 			c.crossDay = true
 91 | 		}
 92 | 
 93 | 		// Parse start time.
 94 | 		startTime, err := time.ParseInLocation(timeFmt, start, time.Local)
 95 | 		if err != nil {
 96 | 			return errors.Wrap(err, "parse start time")
 97 | 		}
 98 | 
 99 | 		// Parse end time.
100 | 		endTime, err := time.ParseInLocation(timeFmt, end, time.Local)
101 | 		if err != nil {
102 | 			return errors.Wrap(err, "parse end time")
103 | 		}
104 | 
105 | 		// For non-yesterday time range, end time must be after start time.
106 | 		if !isYesterday && endTime.Before(startTime) {
107 | 			return errors.New("end time must be after start time")
108 | 		}
109 | 
110 | 		c.start, c.end = startTime, endTime
111 | 	}
112 | 
113 | 	return nil
114 | }
115 | 
116 | type Dependencies struct {
117 | 	FeedStorage feed.Storage
118 | 	Out         chan<- *Result
119 | }
120 | 
121 | type Result struct {
122 | 	Rule  string
123 | 	Time  time.Time
124 | 	Feeds []*block.FeedVO
125 | }
126 | 
127 | // --- Factory code block ---
128 | 
129 | type Factory component.Factory[Rule, Config, Dependencies]
130 | 
131 | func NewFactory(mockOn ...component.MockOption) Factory {
132 | 	if len(mockOn) > 0 {
133 | 		return component.FactoryFunc[Rule, Config, Dependencies](
134 | 			func(instance string, config *Config, dependencies Dependencies) (Rule, error) {
135 | 				m := &mockRule{}
136 | 				component.MockOptions(mockOn).Apply(&m.Mock)
137 | 
138 | 				return m, nil
139 | 			},
140 | 		)
141 | 	}
142 | 
143 | 	return component.FactoryFunc[Rule, Config, Dependencies](new)
144 | }
145 | 
146 | func new(instance string, config *Config, dependencies Dependencies) (Rule, error) {
147 | 	if err := config.Validate(); err != nil {
148 | 		return nil, errors.Wrap(err, "validate config")
149 | 	}
150 | 
151 | 	switch config.EveryDay {
152 | 	case "":
153 | 		return newWatch(instance, config, dependencies)
154 | 	default:
155 | 		return newPeriodic(instance, config, dependencies)
156 | 	}
157 | }
158 | 
159 | // --- Implementation code block ---
160 | type mockRule struct {
161 | 	component.Mock
162 | }
163 | 
164 | func (m *mockRule) Config() *Config {
165 | 	args := m.Called()
166 | 
167 | 	return args.Get(0).(*Config)
168 | }
169 | 


--------------------------------------------------------------------------------
/pkg/schedule/rule/watch.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package rule
 17 | 
 18 | import (
 19 | 	"context"
 20 | 	"time"
 21 | 
 22 | 	"github.com/pkg/errors"
 23 | 
 24 | 	"github.com/glidea/zenfeed/pkg/component"
 25 | 	"github.com/glidea/zenfeed/pkg/storage/feed/block"
 26 | 	"github.com/glidea/zenfeed/pkg/telemetry"
 27 | 	"github.com/glidea/zenfeed/pkg/telemetry/log"
 28 | 	telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
 29 | 	timeutil "github.com/glidea/zenfeed/pkg/util/time"
 30 | )
 31 | 
 32 | func newWatch(instance string, config *Config, dependencies Dependencies) (Rule, error) {
 33 | 	return &watch{
 34 | 		Base: component.New(&component.BaseConfig[Config, Dependencies]{
 35 | 			Name:         "WatchRuler",
 36 | 			Instance:     instance,
 37 | 			Config:       config,
 38 | 			Dependencies: dependencies,
 39 | 		}),
 40 | 	}, nil
 41 | }
 42 | 
 43 | type watch struct {
 44 | 	*component.Base[Config, Dependencies]
 45 | }
 46 | 
 47 | func (r *watch) Run() (err error) {
 48 | 	ctx := telemetry.StartWith(r.Context(), append(r.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...)
 49 | 	defer func() { telemetry.End(ctx, err) }()
 50 | 	r.MarkReady()
 51 | 
 52 | 	iter := func(now time.Time) {
 53 | 		config := r.Config()
 54 | 		end := time.Unix(now.Unix(), 0).Truncate(config.WatchInterval)
 55 | 		// Interval 0, 1 are retry, to ensure success.
 56 | 		// That means, one execution result at least send 3 times.
 57 | 		// So the customer need to deduplicate the result by themselves.
 58 | 		start := end.Add(-3 * config.WatchInterval)
 59 | 
 60 | 		if err := r.execute(ctx, start, end); err != nil {
 61 | 			log.Warn(ctx, errors.Wrap(err, "execute, retry in next time"))
 62 | 		}
 63 | 		log.Debug(ctx, "watch rule executed", "start", start, "end", end)
 64 | 	}
 65 | 
 66 | 	offset := timeutil.Random(time.Minute)
 67 | 	log.Debug(ctx, "computed watch offset", "offset", offset)
 68 | 
 69 | 	tick := time.NewTimer(offset)
 70 | 	defer tick.Stop()
 71 | 	for {
 72 | 		select {
 73 | 		case <-r.Context().Done():
 74 | 			return nil
 75 | 		case now := <-tick.C:
 76 | 			iter(now)
 77 | 			tick.Reset(r.Config().WatchInterval)
 78 | 		}
 79 | 	}
 80 | }
 81 | 
 82 | func (r *watch) execute(ctx context.Context, start, end time.Time) error {
 83 | 	ctx = log.With(ctx, "start", start, "end", end)
 84 | 
 85 | 	// Query.
 86 | 	config := r.Config()
 87 | 	feeds, err := r.Dependencies().FeedStorage.Query(ctx, block.QueryOptions{
 88 | 		Query:        config.Query,
 89 | 		Threshold:    config.Threshold,
 90 | 		LabelFilters: config.LabelFilters,
 91 | 		Start:        start,
 92 | 		End:          end,
 93 | 		Limit:        500,
 94 | 	})
 95 | 	if err != nil {
 96 | 		return errors.Wrap(err, "query")
 97 | 	}
 98 | 	if len(feeds) == 0 {
 99 | 		log.Debug(ctx, "no feeds found")
100 | 
101 | 		return nil
102 | 	}
103 | 
104 | 	// Attach labels to feeds.
105 | 	for _, feed := range feeds {
106 | 		feed.Labels = append(feed.Labels, config.labels...)
107 | 		feed.Labels.EnsureSorted()
108 | 	}
109 | 
110 | 	// Split feeds by start time.
111 | 	feedsByStart := make(map[time.Time][]*block.FeedVO) // Start time -> feeds.
112 | 	for _, feed := range feeds {
113 | 		interval := time.Unix(feed.Time.Unix(), 0).Truncate(config.WatchInterval)
114 | 		feedsByStart[interval] = append(feedsByStart[interval], feed)
115 | 	}
116 | 
117 | 	// Notify.
118 | 	for start, feeds := range feedsByStart {
119 | 		r.Dependencies().Out <- &Result{
120 | 			Rule:  config.Name,
121 | 			Time:  start,
122 | 			Feeds: feeds,
123 | 		}
124 | 	}
125 | 	log.Debug(ctx, "rule notified", "feeds", len(feedsByStart))
126 | 
127 | 	return nil
128 | }
129 | 


--------------------------------------------------------------------------------
/pkg/schedule/schedule.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package schedule
 17 | 
 18 | import (
 19 | 	"reflect"
 20 | 	"time"
 21 | 
 22 | 	"github.com/pkg/errors"
 23 | 
 24 | 	"github.com/glidea/zenfeed/pkg/component"
 25 | 	"github.com/glidea/zenfeed/pkg/config"
 26 | 	"github.com/glidea/zenfeed/pkg/schedule/rule"
 27 | 	"github.com/glidea/zenfeed/pkg/storage/feed"
 28 | 	"github.com/glidea/zenfeed/pkg/telemetry"
 29 | 	"github.com/glidea/zenfeed/pkg/telemetry/log"
 30 | 	telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
 31 | )
 32 | 
 33 | // --- Interface code block ---
 34 | type Scheduler interface {
 35 | 	component.Component
 36 | 	config.Watcher
 37 | }
 38 | 
 39 | type Config struct {
 40 | 	Rules []rule.Config
 41 | }
 42 | 
 43 | func (c *Config) Validate() error {
 44 | 	for _, rule := range c.Rules {
 45 | 		if err := (&rule).Validate(); err != nil {
 46 | 			return errors.Wrap(err, "validate rule")
 47 | 		}
 48 | 	}
 49 | 
 50 | 	return nil
 51 | }
 52 | 
 53 | func (c *Config) From(app *config.App) *Config {
 54 | 	c.Rules = make([]rule.Config, len(app.Scheduls.Rules))
 55 | 	for i, r := range app.Scheduls.Rules {
 56 | 		c.Rules[i] = rule.Config{
 57 | 			Name:          r.Name,
 58 | 			Query:         r.Query,
 59 | 			Threshold:     r.Threshold,
 60 | 			LabelFilters:  r.LabelFilters,
 61 | 			Labels:        r.Labels,
 62 | 			EveryDay:      r.EveryDay,
 63 | 			WatchInterval: time.Duration(r.WatchInterval),
 64 | 		}
 65 | 	}
 66 | 
 67 | 	return c
 68 | }
 69 | 
 70 | type Dependencies struct {
 71 | 	RuleFactory rule.Factory
 72 | 	FeedStorage feed.Storage
 73 | 	Out         chan<- *rule.Result
 74 | }
 75 | 
 76 | // --- Factory code block ---
 77 | type Factory component.Factory[Scheduler, config.App, Dependencies]
 78 | 
 79 | func NewFactory(mockOn ...component.MockOption) Factory {
 80 | 	if len(mockOn) > 0 {
 81 | 		return component.FactoryFunc[Scheduler, config.App, Dependencies](
 82 | 			func(instance string, app *config.App, dependencies Dependencies) (Scheduler, error) {
 83 | 				m := &mockScheduler{}
 84 | 				component.MockOptions(mockOn).Apply(&m.Mock)
 85 | 
 86 | 				return m, nil
 87 | 			},
 88 | 		)
 89 | 	}
 90 | 
 91 | 	return component.FactoryFunc[Scheduler, config.App, Dependencies](new)
 92 | }
 93 | 
 94 | func new(instance string, app *config.App, dependencies Dependencies) (Scheduler, error) {
 95 | 	config := &Config{}
 96 | 	config.From(app)
 97 | 	if err := config.Validate(); err != nil {
 98 | 		return nil, errors.Wrap(err, "validate config")
 99 | 	}
100 | 
101 | 	s := &scheduler{
102 | 		Base: component.New(&component.BaseConfig[Config, Dependencies]{
103 | 			Name:         instance,
104 | 			Instance:     instance,
105 | 			Config:       config,
106 | 			Dependencies: dependencies,
107 | 		}),
108 | 		rules: make(map[string]rule.Rule, len(config.Rules)),
109 | 	}
110 | 
111 | 	for i := range config.Rules {
112 | 		r := &config.Rules[i]
113 | 		rule, err := s.newRule(r)
114 | 		if err != nil {
115 | 			return nil, errors.Wrapf(err, "create rule %s", r.Name)
116 | 		}
117 | 		s.rules[r.Name] = rule
118 | 	}
119 | 
120 | 	return s, nil
121 | }
122 | 
123 | // --- Implementation code block ---
124 | type scheduler struct {
125 | 	*component.Base[Config, Dependencies]
126 | 
127 | 	rules map[string]rule.Rule
128 | }
129 | 
130 | func (s *scheduler) Run() (err error) {
131 | 	ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...)
132 | 	defer func() { telemetry.End(ctx, err) }()
133 | 
134 | 	for _, r := range s.rules {
135 | 		if err := component.RunUntilReady(ctx, r, 10*time.Second); err != nil {
136 | 			return errors.Wrapf(err, "running rule %s", r.Config().Name)
137 | 		}
138 | 	}
139 | 
140 | 	s.MarkReady()
141 | 	<-ctx.Done()
142 | 
143 | 	return nil
144 | }
145 | 
146 | func (s *scheduler) Reload(app *config.App) error {
147 | 	newConfig := &Config{}
148 | 	newConfig.From(app)
149 | 	if err := newConfig.Validate(); err != nil {
150 | 		return errors.Wrap(err, "validate config")
151 | 	}
152 | 	if reflect.DeepEqual(s.Config(), newConfig) {
153 | 		log.Debug(s.Context(), "no changes in schedule config")
154 | 
155 | 		return nil
156 | 	}
157 | 
158 | 	newRules := make(map[string]rule.Rule, len(newConfig.Rules))
159 | 
160 | 	if err := s.runOrRestartRules(newConfig, newRules); err != nil {
161 | 		return errors.Wrap(err, "run or restart rules")
162 | 	}
163 | 	if err := s.stopObsoleteRules(newRules); err != nil {
164 | 		return errors.Wrap(err, "stop obsolete rules")
165 | 	}
166 | 
167 | 	s.rules = newRules
168 | 	s.SetConfig(newConfig)
169 | 
170 | 	return nil
171 | }
172 | 
173 | func (s *scheduler) Close() error {
174 | 	if err := s.Base.Close(); err != nil {
175 | 		return errors.Wrap(err, "close base")
176 | 	}
177 | 
178 | 	// Stop all rules.
179 | 	for _, r := range s.rules {
180 | 		_ = r.Close()
181 | 	}
182 | 
183 | 	return nil
184 | }
185 | 
186 | func (s *scheduler) newRule(config *rule.Config) (rule.Rule, error) {
187 | 	return s.Dependencies().RuleFactory.New(config.Name, config, rule.Dependencies{
188 | 		FeedStorage: s.Dependencies().FeedStorage,
189 | 		Out:         s.Dependencies().Out,
190 | 	})
191 | }
192 | 
193 | func (s *scheduler) runOrRestartRules(config *Config, newRules map[string]rule.Rule) error {
194 | 	for _, r := range config.Rules {
195 | 		// Close or reuse existing rule.
196 | 		if existing, exists := s.rules[r.Name]; exists {
197 | 			if reflect.DeepEqual(existing.Config(), r) {
198 | 				newRules[r.Name] = existing
199 | 
200 | 				continue
201 | 			}
202 | 
203 | 			if err := existing.Close(); err != nil {
204 | 				return errors.Wrap(err, "close existing rule")
205 | 			}
206 | 		}
207 | 
208 | 		// Create & Run new/updated rule.
209 | 		newRule, err := s.newRule(&r)
210 | 		if err != nil {
211 | 			return errors.Wrap(err, "create rule")
212 | 		}
213 | 		newRules[r.Name] = newRule
214 | 		if err := component.RunUntilReady(s.Context(), newRule, 10*time.Second); err != nil {
215 | 			return errors.Wrapf(err, "running rule %s", r.Name)
216 | 		}
217 | 	}
218 | 
219 | 	return nil
220 | }
221 | 
222 | func (s *scheduler) stopObsoleteRules(newRules map[string]rule.Rule) error {
223 | 	var lastErr error
224 | 	for name, r := range s.rules {
225 | 		if _, exists := newRules[name]; !exists {
226 | 			if err := r.Close(); err != nil {
227 | 				lastErr = errors.Wrap(err, "close obsolete rule")
228 | 			}
229 | 		}
230 | 	}
231 | 
232 | 	return lastErr
233 | }
234 | 
235 | type mockScheduler struct {
236 | 	component.Mock
237 | }
238 | 
239 | func (m *mockScheduler) Reload(app *config.App) error {
240 | 	args := m.Called(app)
241 | 
242 | 	return args.Error(0)
243 | }
244 | 


--------------------------------------------------------------------------------
/pkg/scrape/scraper/rss.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package scraper
 17 | 
 18 | import (
 19 | 	"context"
 20 | 	"strings"
 21 | 	"time"
 22 | 
 23 | 	"github.com/mmcdole/gofeed"
 24 | 	"github.com/pkg/errors"
 25 | 	"github.com/stretchr/testify/mock"
 26 | 
 27 | 	"github.com/glidea/zenfeed/pkg/model"
 28 | 	textconvert "github.com/glidea/zenfeed/pkg/util/text_convert"
 29 | )
 30 | 
 31 | // --- Interface code block ---
 32 | type ScrapeSourceRSS struct {
 33 | 	URL             string
 34 | 	RSSHubEndpoint  string
 35 | 	RSSHubRoutePath string
 36 | }
 37 | 
 38 | func (c *ScrapeSourceRSS) Validate() error {
 39 | 	if c.URL == "" && c.RSSHubEndpoint == "" {
 40 | 		return errors.New("URL or RSSHubEndpoint can not be empty at the same time")
 41 | 	}
 42 | 	if c.URL == "" {
 43 | 		c.URL = strings.TrimSuffix(c.RSSHubEndpoint, "/") + "/" + strings.TrimPrefix(c.RSSHubRoutePath, "/")
 44 | 	}
 45 | 	if c.URL != "" && !strings.HasPrefix(c.URL, "http://") && !strings.HasPrefix(c.URL, "https://") {
 46 | 		return errors.New("URL must be a valid HTTP/HTTPS URL")
 47 | 	}
 48 | 
 49 | 	return nil
 50 | }
 51 | 
 52 | // --- Factory code block ---
 53 | func newRSSReader(config *ScrapeSourceRSS) (reader, error) {
 54 | 	if err := config.Validate(); err != nil {
 55 | 		return nil, errors.Wrapf(err, "invalid RSS config")
 56 | 	}
 57 | 
 58 | 	return &rssReader{
 59 | 		config: config,
 60 | 		client: &gofeedClient{
 61 | 			url:  config.URL,
 62 | 			base: gofeed.NewParser(),
 63 | 		},
 64 | 	}, nil
 65 | }
 66 | 
 67 | // --- Implementation code block ---
 68 | type rssReader struct {
 69 | 	config *ScrapeSourceRSS
 70 | 	client client
 71 | }
 72 | 
 73 | func (r *rssReader) Read(ctx context.Context) ([]*model.Feed, error) {
 74 | 	feed, err := r.client.Get(ctx)
 75 | 	if err != nil {
 76 | 		return nil, errors.Wrapf(err, "fetching RSS feed")
 77 | 	}
 78 | 	if len(feed.Items) == 0 {
 79 | 		return []*model.Feed{}, nil
 80 | 	}
 81 | 
 82 | 	now := clk.Now()
 83 | 	feeds := make([]*model.Feed, 0, len(feed.Items))
 84 | 	for _, fi := range feed.Items {
 85 | 		item, err := r.toResultFeed(now, fi)
 86 | 		if err != nil {
 87 | 			return nil, errors.Wrapf(err, "converting feed item")
 88 | 		}
 89 | 
 90 | 		feeds = append(feeds, item)
 91 | 	}
 92 | 
 93 | 	return feeds, nil
 94 | }
 95 | 
 96 | func (r *rssReader) toResultFeed(now time.Time, feedFeed *gofeed.Item) (*model.Feed, error) {
 97 | 	content := r.combineContent(feedFeed.Content, feedFeed.Description)
 98 | 
 99 | 	// Ensure the content is markdown.
100 | 	mdContent, err := textconvert.HTMLToMarkdown([]byte(content))
101 | 	if err != nil {
102 | 		return nil, errors.Wrapf(err, "converting content to markdown")
103 | 	}
104 | 
105 | 	// Create the feed item.
106 | 	feed := &model.Feed{
107 | 		Labels: model.Labels{
108 | 			{Key: model.LabelType, Value: "rss"},
109 | 			{Key: model.LabelTitle, Value: feedFeed.Title},
110 | 			{Key: model.LabelLink, Value: feedFeed.Link},
111 | 			{Key: model.LabelPubTime, Value: r.parseTime(feedFeed).Format(time.RFC3339)},
112 | 			{Key: model.LabelContent, Value: string(mdContent)},
113 | 		},
114 | 		Time: now,
115 | 	}
116 | 
117 | 	return feed, nil
118 | }
119 | 
120 | // parseTime parses the publication time from the feed item.
121 | // If the feed item does not have a publication time, it returns the current time.
122 | func (r *rssReader) parseTime(feedFeed *gofeed.Item) time.Time {
123 | 	if feedFeed.PublishedParsed == nil {
124 | 		return clk.Now().In(time.Local)
125 | 	}
126 | 
127 | 	return feedFeed.PublishedParsed.In(time.Local)
128 | }
129 | 
130 | // combineContent combines Content and Description fields with proper formatting.
131 | func (r *rssReader) combineContent(content, description string) string {
132 | 	switch {
133 | 	case content == "":
134 | 		return description
135 | 	case description == "":
136 | 		return content
137 | 	default:
138 | 		return strings.Join([]string{description, content}, "\n\n")
139 | 	}
140 | }
141 | 
142 | type client interface {
143 | 	Get(ctx context.Context) (*gofeed.Feed, error)
144 | }
145 | 
146 | type gofeedClient struct {
147 | 	url  string
148 | 	base *gofeed.Parser
149 | }
150 | 
151 | func (c *gofeedClient) Get(ctx context.Context) (*gofeed.Feed, error) {
152 | 	return c.base.ParseURLWithContext(c.url, ctx)
153 | }
154 | 
155 | type mockClient struct {
156 | 	mock.Mock
157 | }
158 | 
159 | func newMockClient() *mockClient {
160 | 	return &mockClient{}
161 | }
162 | 
163 | func (c *mockClient) Get(ctx context.Context) (*gofeed.Feed, error) {
164 | 	args := c.Called(ctx)
165 | 	if args.Error(1) != nil {
166 | 		return nil, args.Error(1)
167 | 	}
168 | 
169 | 	return args.Get(0).(*gofeed.Feed), nil
170 | }
171 | 


--------------------------------------------------------------------------------
/pkg/scrape/scraper/source.go:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2025 wangyusong
 2 | //
 3 | // This program is free software: you can redistribute it and/or modify
 4 | // it under the terms of the GNU Affero General Public License as published by
 5 | // the Free Software Foundation, either version 3 of the License, or
 6 | // (at your option) any later version.
 7 | //
 8 | // This program is distributed in the hope that it will be useful,
 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
15 | 
16 | package scraper
17 | 
18 | import (
19 | 	"context"
20 | 	"errors"
21 | 
22 | 	"github.com/stretchr/testify/mock"
23 | 
24 | 	"github.com/glidea/zenfeed/pkg/model"
25 | )
26 | 
27 | // --- Interface code block ---
28 | 
29 | // reader defines interface for reading from different data sources.
30 | type reader interface {
31 | 	// Read fetches content from the data source.
32 | 	// Returns a slice of feeds and any error encountered.
33 | 	Read(ctx context.Context) ([]*model.Feed, error)
34 | }
35 | 
36 | // --- Factory code block ---
37 | func newReader(config *Config) (reader, error) {
38 | 	if config.RSS != nil {
39 | 		return newRSSReader(config.RSS)
40 | 	}
41 | 
42 | 	return nil, errors.New("source not supported")
43 | }
44 | 
45 | // --- Implementation code block ---
46 | 
47 | type mockReader struct {
48 | 	mock.Mock
49 | }
50 | 
51 | func NewMock() *mockReader {
52 | 	return &mockReader{}
53 | }
54 | 
55 | func (m *mockReader) Read(ctx context.Context) ([]*model.Feed, error) {
56 | 	args := m.Called(ctx)
57 | 	if feeds := args.Get(0); feeds != nil {
58 | 		return feeds.([]*model.Feed), args.Error(1)
59 | 	}
60 | 
61 | 	return nil, args.Error(1)
62 | }
63 | 


--------------------------------------------------------------------------------
/pkg/storage/feed/block/index/codec.go:
--------------------------------------------------------------------------------
 1 | package index
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"io"
 6 | )
 7 | 
 8 | // Codec defines interface for encoding and decoding index.
 9 | type Codec interface {
10 | 	// EncodeTo encodes the index to the given writer.
11 | 	EncodeTo(ctx context.Context, w io.Writer) (err error)
12 | 	// DecodeFrom decodes the index from the given reader.
13 | 	DecodeFrom(ctx context.Context, r io.Reader) (err error)
14 | }
15 | 


--------------------------------------------------------------------------------
/pkg/storage/feed/block/index/primary/primary_test.go:
--------------------------------------------------------------------------------
  1 | package primary
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"testing"
  7 | 	"time"
  8 | 
  9 | 	. "github.com/onsi/gomega"
 10 | 
 11 | 	"github.com/glidea/zenfeed/pkg/test"
 12 | )
 13 | 
 14 | func TestAdd(t *testing.T) {
 15 | 	RegisterTestingT(t)
 16 | 
 17 | 	type givenDetail struct {
 18 | 		existingItems map[uint64]FeedRef
 19 | 	}
 20 | 	type whenDetail struct {
 21 | 		id   uint64
 22 | 		item FeedRef
 23 | 	}
 24 | 	type thenExpected struct {
 25 | 		items map[uint64]FeedRef
 26 | 	}
 27 | 
 28 | 	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
 29 | 		{
 30 | 			Scenario: "Add Single Feed",
 31 | 			Given:    "An index with existing item",
 32 | 			When:     "Adding a single item",
 33 | 			Then:     "Should store the item correctly",
 34 | 			GivenDetail: givenDetail{
 35 | 				existingItems: map[uint64]FeedRef{
 36 | 					0: {Chunk: 0, Offset: 0},
 37 | 				},
 38 | 			},
 39 | 			WhenDetail: whenDetail{
 40 | 				id:   1,
 41 | 				item: FeedRef{Chunk: 1, Offset: 100},
 42 | 			},
 43 | 			ThenExpected: thenExpected{
 44 | 				items: map[uint64]FeedRef{
 45 | 					0: {Chunk: 0, Offset: 0},
 46 | 					1: {Chunk: 1, Offset: 100},
 47 | 				},
 48 | 			},
 49 | 		},
 50 | 		{
 51 | 			Scenario: "Update Existing Feed",
 52 | 			Given:    "An index with existing item",
 53 | 			When:     "Adding item with same ID",
 54 | 			Then:     "Should update the item reference",
 55 | 			GivenDetail: givenDetail{
 56 | 				existingItems: map[uint64]FeedRef{
 57 | 					1: {Chunk: 1, Offset: 100},
 58 | 				},
 59 | 			},
 60 | 			WhenDetail: whenDetail{
 61 | 				id:   1,
 62 | 				item: FeedRef{Chunk: 2, Offset: 200},
 63 | 			},
 64 | 			ThenExpected: thenExpected{
 65 | 				items: map[uint64]FeedRef{
 66 | 					1: {Chunk: 2, Offset: 200},
 67 | 				},
 68 | 			},
 69 | 		},
 70 | 	}
 71 | 
 72 | 	for _, tt := range tests {
 73 | 		t.Run(tt.Scenario, func(t *testing.T) {
 74 | 			// Given.
 75 | 			idx0, err := NewFactory().New("test", &Config{}, Dependencies{})
 76 | 			Expect(err).NotTo(HaveOccurred())
 77 | 			for id, item := range tt.GivenDetail.existingItems {
 78 | 				idx0.Add(context.Background(), id, item)
 79 | 			}
 80 | 
 81 | 			// When.
 82 | 			idx0.Add(context.Background(), tt.WhenDetail.id, tt.WhenDetail.item)
 83 | 
 84 | 			// Then.
 85 | 			primIdx := idx0.(*idx)
 86 | 			for id, expected := range tt.ThenExpected.items {
 87 | 				Expect(primIdx.m).To(HaveKey(id))
 88 | 				Expect(primIdx.m[id]).To(Equal(expected))
 89 | 			}
 90 | 		})
 91 | 	}
 92 | }
 93 | 
 94 | func TestSearch(t *testing.T) {
 95 | 	RegisterTestingT(t)
 96 | 
 97 | 	type givenDetail struct {
 98 | 		feeds map[uint64]FeedRef
 99 | 	}
100 | 	type whenDetail struct {
101 | 		searchID uint64
102 | 	}
103 | 	type thenExpected struct {
104 | 		feedRef FeedRef
105 | 		found   bool
106 | 	}
107 | 
108 | 	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
109 | 		{
110 | 			Scenario: "Search Existing Feed",
111 | 			Given:    "An index with feeds",
112 | 			When:     "Searching for existing ID",
113 | 			Then:     "Should return correct FeedRef",
114 | 			GivenDetail: givenDetail{
115 | 				feeds: map[uint64]FeedRef{
116 | 					1: {Chunk: 1, Offset: 100},
117 | 					2: {Chunk: 2, Offset: 200},
118 | 				},
119 | 			},
120 | 			WhenDetail: whenDetail{
121 | 				searchID: 1,
122 | 			},
123 | 			ThenExpected: thenExpected{
124 | 				feedRef: FeedRef{Chunk: 1, Offset: 100},
125 | 				found:   true,
126 | 			},
127 | 		},
128 | 		{
129 | 			Scenario: "Search Non-Existing Feed",
130 | 			Given:    "An index with feeds",
131 | 			When:     "Searching for non-existing ID",
132 | 			Then:     "Should return empty FeedRef",
133 | 			GivenDetail: givenDetail{
134 | 				feeds: map[uint64]FeedRef{
135 | 					1: {Chunk: 1, Offset: 100},
136 | 				},
137 | 			},
138 | 			WhenDetail: whenDetail{
139 | 				searchID: 2,
140 | 			},
141 | 			ThenExpected: thenExpected{
142 | 				feedRef: FeedRef{},
143 | 				found:   false,
144 | 			},
145 | 		},
146 | 	}
147 | 
148 | 	for _, tt := range tests {
149 | 		t.Run(tt.Scenario, func(t *testing.T) {
150 | 			// Given.
151 | 			idx, err := NewFactory().New("test", &Config{}, Dependencies{})
152 | 			Expect(err).NotTo(HaveOccurred())
153 | 			for id, item := range tt.GivenDetail.feeds {
154 | 				idx.Add(context.Background(), id, item)
155 | 			}
156 | 
157 | 			// When.
158 | 			result, ok := idx.Search(context.Background(), tt.WhenDetail.searchID)
159 | 
160 | 			// Then.
161 | 			Expect(result).To(Equal(tt.ThenExpected.feedRef))
162 | 			Expect(ok).To(Equal(tt.ThenExpected.found))
163 | 		})
164 | 	}
165 | }
166 | 
167 | func TestEncodeDecode(t *testing.T) {
168 | 	RegisterTestingT(t)
169 | 
170 | 	type givenDetail struct {
171 | 		feeds map[uint64]FeedRef
172 | 	}
173 | 	type whenDetail struct{}
174 | 	type thenExpected struct {
175 | 		success bool
176 | 	}
177 | 
178 | 	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
179 | 		{
180 | 			Scenario: "Encode and Decode Index with Data",
181 | 			Given:    "An index with feeds",
182 | 			When:     "Encoding and decoding",
183 | 			Then:     "Should restore all data correctly",
184 | 			GivenDetail: givenDetail{
185 | 				feeds: map[uint64]FeedRef{
186 | 					1: {Chunk: 1, Offset: 100, Time: time.Now()},
187 | 					2: {Chunk: 2, Offset: 200, Time: time.Now()},
188 | 				},
189 | 			},
190 | 			WhenDetail: whenDetail{},
191 | 			ThenExpected: thenExpected{
192 | 				success: true,
193 | 			},
194 | 		},
195 | 	}
196 | 
197 | 	for _, tt := range tests {
198 | 		t.Run(tt.Scenario, func(t *testing.T) {
199 | 			// Given.
200 | 			original, err := NewFactory().New("test", &Config{}, Dependencies{})
201 | 			Expect(err).NotTo(HaveOccurred())
202 | 			for id, item := range tt.GivenDetail.feeds {
203 | 				original.Add(context.Background(), id, item)
204 | 			}
205 | 
206 | 			// When.
207 | 			var buf bytes.Buffer
208 | 			err = original.EncodeTo(context.Background(), &buf)
209 | 			Expect(err).NotTo(HaveOccurred())
210 | 
211 | 			decoded, err := NewFactory().New("test", &Config{}, Dependencies{})
212 | 			Expect(err).NotTo(HaveOccurred())
213 | 			err = decoded.DecodeFrom(context.Background(), &buf)
214 | 			Expect(err).NotTo(HaveOccurred())
215 | 
216 | 			// Then.
217 | 			origIdx := original.(*idx)
218 | 			decodedIdx := decoded.(*idx)
219 | 			Expect(decodedIdx.m).To(Equal(origIdx.m))
220 | 		})
221 | 	}
222 | }
223 | 


--------------------------------------------------------------------------------
/pkg/storage/kv/kv.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package kv
 17 | 
 18 | import (
 19 | 	"context"
 20 | 	"strings"
 21 | 	"time"
 22 | 
 23 | 	"github.com/nutsdb/nutsdb"
 24 | 	"github.com/pkg/errors"
 25 | 
 26 | 	"github.com/glidea/zenfeed/pkg/component"
 27 | 	"github.com/glidea/zenfeed/pkg/config"
 28 | 	"github.com/glidea/zenfeed/pkg/telemetry"
 29 | 	telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
 30 | )
 31 | 
 32 | // --- Interface code block ---
 33 | type Storage interface {
 34 | 	component.Component
 35 | 	Get(ctx context.Context, key []byte) ([]byte, error)
 36 | 	Set(ctx context.Context, key []byte, value []byte, ttl time.Duration) error
 37 | }
 38 | 
 39 | var ErrNotFound = errors.New("not found")
 40 | 
 41 | type Config struct {
 42 | 	Dir string
 43 | }
 44 | 
 45 | const subDir = "kv"
 46 | 
 47 | func (c *Config) Validate() error {
 48 | 	if c.Dir == "" {
 49 | 		c.Dir = "./data/" + subDir
 50 | 	}
 51 | 
 52 | 	return nil
 53 | }
 54 | 
 55 | func (c *Config) From(app *config.App) *Config {
 56 | 	c.Dir = app.Storage.Dir
 57 | 
 58 | 	return c
 59 | }
 60 | 
 61 | type Dependencies struct{}
 62 | 
 63 | // --- Factory code block ---
 64 | type Factory component.Factory[Storage, config.App, Dependencies]
 65 | 
 66 | func NewFactory(mockOn ...component.MockOption) Factory {
 67 | 	if len(mockOn) > 0 {
 68 | 		return component.FactoryFunc[Storage, config.App, Dependencies](
 69 | 			func(instance string, config *config.App, dependencies Dependencies) (Storage, error) {
 70 | 				m := &mockKV{}
 71 | 				component.MockOptions(mockOn).Apply(&m.Mock)
 72 | 
 73 | 				return m, nil
 74 | 			},
 75 | 		)
 76 | 	}
 77 | 
 78 | 	return component.FactoryFunc[Storage, config.App, Dependencies](new)
 79 | }
 80 | 
 81 | func new(instance string, app *config.App, dependencies Dependencies) (Storage, error) {
 82 | 	config := &Config{}
 83 | 	config.From(app)
 84 | 	if err := config.Validate(); err != nil {
 85 | 		return nil, errors.Wrap(err, "validate config")
 86 | 	}
 87 | 
 88 | 	return &kv{
 89 | 		Base: component.New(&component.BaseConfig[Config, Dependencies]{
 90 | 			Name:         "KVStorage",
 91 | 			Instance:     instance,
 92 | 			Config:       config,
 93 | 			Dependencies: dependencies,
 94 | 		}),
 95 | 	}, nil
 96 | }
 97 | 
 98 | // --- Implementation code block ---
 99 | type kv struct {
100 | 	*component.Base[Config, Dependencies]
101 | 	db *nutsdb.DB
102 | }
103 | 
104 | func (k *kv) Run() error {
105 | 	db, err := nutsdb.Open(
106 | 		nutsdb.DefaultOptions,
107 | 		nutsdb.WithDir(k.Config().Dir),
108 | 		nutsdb.WithSyncEnable(false),
109 | 	)
110 | 	if err != nil {
111 | 		return errors.Wrap(err, "open db")
112 | 	}
113 | 	if err := db.Update(func(tx *nutsdb.Tx) error {
114 | 		if !tx.ExistBucket(nutsdb.DataStructureBTree, bucket) {
115 | 			return tx.NewBucket(nutsdb.DataStructureBTree, bucket)
116 | 		}
117 | 
118 | 		return nil
119 | 	}); err != nil {
120 | 		return errors.Wrap(err, "create bucket")
121 | 	}
122 | 	k.db = db
123 | 
124 | 	k.MarkReady()
125 | 	<-k.Context().Done()
126 | 
127 | 	return nil
128 | }
129 | 
130 | func (k *kv) Close() error {
131 | 	if err := k.Base.Close(); err != nil {
132 | 		return errors.Wrap(err, "close base")
133 | 	}
134 | 
135 | 	return k.db.Close()
136 | }
137 | 
138 | const bucket = "0"
139 | 
140 | func (k *kv) Get(ctx context.Context, key []byte) (value []byte, err error) {
141 | 	ctx = telemetry.StartWith(ctx, append(k.TelemetryLabels(), telemetrymodel.KeyOperation, "Get")...)
142 | 	defer func() {
143 | 		telemetry.End(ctx, func() error {
144 | 			if err != nil && !errors.Is(err, ErrNotFound) {
145 | 				return err
146 | 			}
147 | 
148 | 			return nil
149 | 		}())
150 | 	}()
151 | 
152 | 	var b []byte
153 | 	err = k.db.View(func(tx *nutsdb.Tx) error {
154 | 		b, err = tx.Get(bucket, []byte(key))
155 | 
156 | 		return err
157 | 	})
158 | 	switch {
159 | 	case err == nil:
160 | 		return b, nil
161 | 	case errors.Is(err, nutsdb.ErrNotFoundKey):
162 | 		return nil, ErrNotFound
163 | 	case strings.Contains(err.Error(), "key not found"):
164 | 		return nil, ErrNotFound
165 | 	default:
166 | 		return nil, err
167 | 	}
168 | }
169 | 
170 | func (k *kv) Set(ctx context.Context, key []byte, value []byte, ttl time.Duration) (err error) {
171 | 	ctx = telemetry.StartWith(ctx, append(k.TelemetryLabels(), telemetrymodel.KeyOperation, "Set")...)
172 | 	defer func() { telemetry.End(ctx, err) }()
173 | 
174 | 	return k.db.Update(func(tx *nutsdb.Tx) error {
175 | 		return tx.Put(bucket, key, value, uint32(ttl.Seconds()))
176 | 	})
177 | }
178 | 
179 | type mockKV struct {
180 | 	component.Mock
181 | }
182 | 
183 | func (m *mockKV) Get(ctx context.Context, key []byte) ([]byte, error) {
184 | 	args := m.Called(ctx, key)
185 | 
186 | 	return args.Get(0).([]byte), args.Error(1)
187 | }
188 | 
189 | func (m *mockKV) Set(ctx context.Context, key []byte, value []byte, ttl time.Duration) error {
190 | 	args := m.Called(ctx, key, value, ttl)
191 | 
192 | 	return args.Error(0)
193 | }
194 | 


--------------------------------------------------------------------------------
/pkg/telemetry/log/log.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package log
 17 | 
 18 | import (
 19 | 	"context"
 20 | 	"log/slog"
 21 | 	"os"
 22 | 	"runtime"
 23 | 	"strconv"
 24 | 	"strings"
 25 | 	"sync"
 26 | 	"sync/atomic"
 27 | 
 28 | 	"github.com/pkg/errors"
 29 | 	slogdedup "github.com/veqryn/slog-dedup"
 30 | 
 31 | 	"github.com/glidea/zenfeed/pkg/model"
 32 | )
 33 | 
 34 | type Level string
 35 | 
 36 | const (
 37 | 	LevelDebug Level = "debug"
 38 | 	LevelInfo  Level = "info"
 39 | 	LevelWarn  Level = "warn"
 40 | 	LevelError Level = "error"
 41 | )
 42 | 
 43 | func SetLevel(level Level) error {
 44 | 	if level == "" {
 45 | 		level = LevelInfo
 46 | 	}
 47 | 
 48 | 	var logLevel slog.Level
 49 | 	switch level {
 50 | 	case LevelDebug:
 51 | 		logLevel = slog.LevelDebug
 52 | 	case LevelInfo:
 53 | 		logLevel = slog.LevelInfo
 54 | 	case LevelWarn:
 55 | 		logLevel = slog.LevelWarn
 56 | 	case LevelError:
 57 | 		logLevel = slog.LevelError
 58 | 	default:
 59 | 		return errors.Errorf("invalid log level, valid values are: %v", []Level{LevelDebug, LevelInfo, LevelWarn, LevelError})
 60 | 	}
 61 | 
 62 | 	newLogger := slog.New(slogdedup.NewOverwriteHandler(
 63 | 		slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: logLevel}),
 64 | 		nil,
 65 | 	))
 66 | 
 67 | 	mu.Lock()
 68 | 	defaultLogger = newLogger
 69 | 	mu.Unlock()
 70 | 
 71 | 	return nil
 72 | }
 73 | 
 74 | // With returns a new context with additional labels added to the logger.
 75 | func With(ctx context.Context, keyvals ...any) context.Context {
 76 | 	logger := from(ctx)
 77 | 
 78 | 	return with(ctx, logger.With(keyvals...))
 79 | }
 80 | 
 81 | // Debug logs a debug message with stack trace.
 82 | func Debug(ctx context.Context, msg string, args ...any) {
 83 | 	logWithStack(ctx, slog.LevelDebug, msg, args...)
 84 | }
 85 | 
 86 | // Info logs an informational message with stack trace.
 87 | func Info(ctx context.Context, msg string, args ...any) {
 88 | 	logWithStack(ctx, slog.LevelInfo, msg, args...)
 89 | }
 90 | 
 91 | // Warn logs a warning message with stack trace.
 92 | func Warn(ctx context.Context, err error, args ...any) {
 93 | 	logWithStack(ctx, slog.LevelWarn, err.Error(), args...)
 94 | }
 95 | 
 96 | // Error logs an error message with call stack trace.
 97 | func Error(ctx context.Context, err error, args ...any) {
 98 | 	logWithStack(ctx, slog.LevelError, err.Error(), args...)
 99 | }
100 | 
101 | // Fatal logs a fatal message with call stack trace.
102 | // It will call os.Exit(1) after logging.
103 | func Fatal(ctx context.Context, err error, args ...any) {
104 | 	logWithStack(ctx, slog.LevelError, err.Error(), args...)
105 | 	os.Exit(1)
106 | }
107 | 
108 | type ctxKey uint8
109 | 
110 | var (
111 | 	loggerCtxKey  = ctxKey(0)
112 | 	defaultLogger = slog.New(slogdedup.NewOverwriteHandler(slog.NewTextHandler(os.Stdout, nil), nil))
113 | 	mu            sync.RWMutex
114 | 	// withStackLevel controls which log level and above will include stack traces.
115 | 	withStackLevel atomic.Int32
116 | )
117 | 
118 | func init() {
119 | 	// Default to include stack traces for Warn and above.
120 | 	SetWithStackLevel(slog.LevelWarn)
121 | }
122 | 
123 | // SetWithStackLevel sets the minimum log level that will include stack traces.
124 | // It should not be called in init().
125 | func SetWithStackLevel(level slog.Level) {
126 | 	withStackLevel.Store(int32(level))
127 | }
128 | 
129 | // with returns a new context with the given logger.
130 | func with(ctx context.Context, logger *slog.Logger) context.Context {
131 | 	return context.WithValue(ctx, loggerCtxKey, logger)
132 | }
133 | 
134 | // from retrieves the logger from context.
135 | // Returns default logger if context has no logger.
136 | func from(ctx context.Context) *slog.Logger {
137 | 	mu.RLock()
138 | 	defer mu.RUnlock()
139 | 	if ctx == nil {
140 | 		return defaultLogger
141 | 	}
142 | 
143 | 	if logger, ok := ctx.Value(loggerCtxKey).(*slog.Logger); ok {
144 | 		return logger
145 | 	}
146 | 
147 | 	return defaultLogger
148 | }
149 | 
150 | const (
151 | 	stackSkip   = 2 // Skip ERROR../logWithStack.
152 | 	stackDepth  = 5 // Maximum number of stack frames to capture.
153 | 	avgFrameLen = 64
154 | )
155 | 
156 | func logWithStack(ctx context.Context, level slog.Level, msg string, args ...any) {
157 | 	logger := from(ctx)
158 | 	if !logger.Enabled(ctx, level) {
159 | 		// avoid to get stack trace if logging is disabled for this level
160 | 		return
161 | 	}
162 | 
163 | 	// Only include stack trace if level is >= withStackLevel
164 | 	newArgs := make([]any, 0, len(args)+2)
165 | 	newArgs = append(newArgs, args...)
166 | 	if level >= slog.Level(withStackLevel.Load()) {
167 | 		newArgs = append(newArgs, "stack", getStack(stackSkip, stackDepth))
168 | 	}
169 | 
170 | 	logger.Log(ctx, level, msg, newArgs...)
171 | }
172 | 
173 | // getStack returns a formatted call stack trace.
174 | func getStack(skip, depth int) string {
175 | 	pc := make([]uintptr, depth)
176 | 	n := runtime.Callers(skip+2, pc) // skip itself and runtime.Callers
177 | 	if n == 0 {
178 | 		return ""
179 | 	}
180 | 
181 | 	var b strings.Builder
182 | 	b.Grow(n * avgFrameLen)
183 | 
184 | 	frames := runtime.CallersFrames(pc[:n])
185 | 	first := true
186 | 	for frame, more := frames.Next(); more; frame, more = frames.Next() {
187 | 		if !first {
188 | 			b.WriteString(" <- ")
189 | 		}
190 | 		first = false
191 | 
192 | 		fn := strings.TrimPrefix(frame.Function, model.Module) // no module prefix for zenfeed self.
193 | 		b.WriteString(fn)
194 | 		b.WriteByte(':')
195 | 		b.WriteString(strconv.Itoa(frame.Line))
196 | 	}
197 | 
198 | 	return b.String()
199 | }
200 | 


--------------------------------------------------------------------------------
/pkg/telemetry/metric/metric.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package metric
 17 | 
 18 | import (
 19 | 	"context"
 20 | 	"net/http"
 21 | 	"time"
 22 | 
 23 | 	"github.com/prometheus/client_golang/prometheus"
 24 | 	"github.com/prometheus/client_golang/prometheus/promauto"
 25 | 	"github.com/prometheus/client_golang/prometheus/promhttp"
 26 | 
 27 | 	"github.com/glidea/zenfeed/pkg/model"
 28 | 	telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
 29 | )
 30 | 
 31 | func Handler() http.Handler {
 32 | 	return promhttp.Handler()
 33 | }
 34 | 
 35 | var (
 36 | 	operationInFlight = promauto.NewGaugeVec(
 37 | 		prometheus.GaugeOpts{
 38 | 			Namespace: model.AppName,
 39 | 			Name:      "operation_in_flight",
 40 | 			Help:      "Number of operations in flight.",
 41 | 		},
 42 | 		[]string{
 43 | 			telemetrymodel.KeyComponent,
 44 | 			telemetrymodel.KeyComponentInstance,
 45 | 			telemetrymodel.KeyOperation,
 46 | 		},
 47 | 	)
 48 | 
 49 | 	operationTotal = promauto.NewCounterVec(
 50 | 		prometheus.CounterOpts{
 51 | 			Namespace: model.AppName,
 52 | 			Name:      "operation_total",
 53 | 			Help:      "Total number of operations.",
 54 | 		},
 55 | 		[]string{
 56 | 			telemetrymodel.KeyComponent,
 57 | 			telemetrymodel.KeyComponentInstance,
 58 | 			telemetrymodel.KeyOperation,
 59 | 			telemetrymodel.KeyResult,
 60 | 		},
 61 | 	)
 62 | 
 63 | 	operationDuration = promauto.NewHistogramVec(
 64 | 		prometheus.HistogramOpts{
 65 | 			Namespace: model.AppName,
 66 | 			Name:      "operation_duration_seconds",
 67 | 			Help:      "Histogram of operation latencies in seconds.",
 68 | 			Buckets:   []float64{.001, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 20},
 69 | 		},
 70 | 		[]string{
 71 | 			telemetrymodel.KeyComponent,
 72 | 			telemetrymodel.KeyComponentInstance,
 73 | 			telemetrymodel.KeyOperation,
 74 | 			telemetrymodel.KeyResult,
 75 | 		},
 76 | 	)
 77 | )
 78 | 
 79 | type ctxKey uint8
 80 | 
 81 | const (
 82 | 	ctxKeyComponent ctxKey = iota
 83 | 	ctxKeyInstance
 84 | 	ctxKeyOperation
 85 | 	ctxKeyStartTime
 86 | )
 87 | 
 88 | func StartWith(ctx context.Context, keyvals ...any) context.Context {
 89 | 	// Extend from parent context.
 90 | 	component, instance, operation, _ := parseFrom(ctx)
 91 | 
 92 | 	// Parse component and operation... from keyvals.
 93 | 	for i := 0; i < len(keyvals); i += 2 {
 94 | 		if i+1 < len(keyvals) {
 95 | 			switch keyvals[i] {
 96 | 			case telemetrymodel.KeyComponent:
 97 | 				component = keyvals[i+1].(string)
 98 | 			case telemetrymodel.KeyComponentInstance:
 99 | 				instance = keyvals[i+1].(string)
100 | 			case telemetrymodel.KeyOperation:
101 | 				operation = keyvals[i+1].(string)
102 | 			}
103 | 		}
104 | 	}
105 | 	if component == "" || operation == "" {
106 | 		panic("missing required keyvals")
107 | 	}
108 | 
109 | 	// Record operation in flight.
110 | 	operationInFlight.WithLabelValues(component, instance, operation).Inc()
111 | 
112 | 	// Add to context.
113 | 	ctx = context.WithValue(ctx, ctxKeyComponent, component)
114 | 	ctx = context.WithValue(ctx, ctxKeyInstance, instance)
115 | 	ctx = context.WithValue(ctx, ctxKeyOperation, operation)
116 | 	ctx = context.WithValue(ctx, ctxKeyStartTime, time.Now())
117 | 
118 | 	return ctx
119 | }
120 | 
121 | func RecordRED(ctx context.Context, err error) {
122 | 	// Parse component, instance, operation, and start time from context.
123 | 	component, instance, operation, startTime := parseFrom(ctx)
124 | 	duration := time.Since(startTime)
125 | 
126 | 	// Determine result.
127 | 	result := telemetrymodel.ValResultSuccess
128 | 	if err != nil {
129 | 		result = telemetrymodel.ValResultError
130 | 	}
131 | 
132 | 	// Record metrics.
133 | 	operationTotal.WithLabelValues(component, instance, operation, result).Inc()
134 | 	operationDuration.WithLabelValues(component, instance, operation, result).Observe(duration.Seconds())
135 | 	operationInFlight.WithLabelValues(component, instance, operation).Dec()
136 | }
137 | 
138 | func Close(id prometheus.Labels) {
139 | 	operationInFlight.DeletePartialMatch(id)
140 | 	operationTotal.DeletePartialMatch(id)
141 | 	operationDuration.DeletePartialMatch(id)
142 | }
143 | 
144 | func parseFrom(ctx context.Context) (component, instance, operation string, startTime time.Time) {
145 | 	if v := ctx.Value(ctxKeyComponent); v != nil {
146 | 		component = v.(string)
147 | 	}
148 | 	if v := ctx.Value(ctxKeyInstance); v != nil {
149 | 		instance = v.(string)
150 | 	}
151 | 	if v := ctx.Value(ctxKeyOperation); v != nil {
152 | 		operation = v.(string)
153 | 	}
154 | 	if v := ctx.Value(ctxKeyStartTime); v != nil {
155 | 		startTime = v.(time.Time)
156 | 	}
157 | 
158 | 	return
159 | }
160 | 


--------------------------------------------------------------------------------
/pkg/telemetry/model/model.go:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2025 wangyusong
 2 | //
 3 | // This program is free software: you can redistribute it and/or modify
 4 | // it under the terms of the GNU Affero General Public License as published by
 5 | // the Free Software Foundation, either version 3 of the License, or
 6 | // (at your option) any later version.
 7 | //
 8 | // This program is distributed in the hope that it will be useful,
 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
15 | 
16 | package model
17 | 
18 | const (
19 | 	// KeyComponent is the label for the component name.
20 | 	KeyComponent = "component"
21 | 	// KeyComponentInstance is the label for the component instance name.
22 | 	KeyComponentInstance = "component_instance"
23 | 	// KeyOperation is the label for the operation name.
24 | 	KeyOperation = "operation"
25 | 	// KeyResult is the label for the result of the operation.
26 | 	KeyResult        = "result"
27 | 	ValResultSuccess = "success"
28 | 	ValResultError   = "error"
29 | )
30 | 


--------------------------------------------------------------------------------
/pkg/telemetry/server/server.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package http
 17 | 
 18 | import (
 19 | 	"net"
 20 | 	"net/http"
 21 | 	"net/http/pprof"
 22 | 
 23 | 	"github.com/pkg/errors"
 24 | 
 25 | 	"github.com/glidea/zenfeed/pkg/component"
 26 | 	"github.com/glidea/zenfeed/pkg/config"
 27 | 	telemetry "github.com/glidea/zenfeed/pkg/telemetry"
 28 | 	"github.com/glidea/zenfeed/pkg/telemetry/log"
 29 | 	"github.com/glidea/zenfeed/pkg/telemetry/metric"
 30 | 	telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
 31 | )
 32 | 
 33 | // --- Interface code block ---
 34 | type Server interface {
 35 | 	component.Component
 36 | }
 37 | 
 38 | type Config struct {
 39 | 	Address string
 40 | }
 41 | 
 42 | func (c *Config) Validate() error {
 43 | 	if c.Address == "" {
 44 | 		c.Address = ":9090"
 45 | 	}
 46 | 	if _, _, err := net.SplitHostPort(c.Address); err != nil {
 47 | 		return errors.Wrap(err, "invalid address")
 48 | 	}
 49 | 
 50 | 	return nil
 51 | }
 52 | 
 53 | func (c *Config) From(app *config.App) *Config {
 54 | 	c.Address = app.Telemetry.Address
 55 | 
 56 | 	return c
 57 | }
 58 | 
 59 | type Dependencies struct {
 60 | }
 61 | 
 62 | // --- Factory code block ---
 63 | type Factory component.Factory[Server, config.App, Dependencies]
 64 | 
 65 | func NewFactory(mockOn ...component.MockOption) Factory {
 66 | 	if len(mockOn) > 0 {
 67 | 		return component.FactoryFunc[Server, config.App, Dependencies](
 68 | 			func(instance string, config *config.App, dependencies Dependencies) (Server, error) {
 69 | 				m := &mockServer{}
 70 | 				component.MockOptions(mockOn).Apply(&m.Mock)
 71 | 
 72 | 				return m, nil
 73 | 			},
 74 | 		)
 75 | 	}
 76 | 
 77 | 	return component.FactoryFunc[Server, config.App, Dependencies](new)
 78 | }
 79 | 
 80 | func new(instance string, app *config.App, dependencies Dependencies) (Server, error) {
 81 | 	config := &Config{}
 82 | 	config.From(app)
 83 | 	if err := config.Validate(); err != nil {
 84 | 		return nil, errors.Wrap(err, "validate config")
 85 | 	}
 86 | 
 87 | 	router := http.NewServeMux()
 88 | 	router.Handle("/health", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 89 | 		w.WriteHeader(200)
 90 | 	}))
 91 | 	router.Handle("/metrics", metric.Handler())
 92 | 	router.HandleFunc("/pprof", pprof.Index)
 93 | 	router.HandleFunc("/pprof/cmdline", pprof.Cmdline)
 94 | 	router.HandleFunc("/pprof/profile", pprof.Profile)
 95 | 	router.HandleFunc("/pprof/symbol", pprof.Symbol)
 96 | 	router.HandleFunc("/pprof/trace", pprof.Trace)
 97 | 
 98 | 	return &server{
 99 | 		Base: component.New(&component.BaseConfig[Config, Dependencies]{
100 | 			Name:         "TelemetryServer",
101 | 			Instance:     instance,
102 | 			Config:       config,
103 | 			Dependencies: dependencies,
104 | 		}),
105 | 		http: &http.Server{Addr: config.Address, Handler: router},
106 | 	}, nil
107 | }
108 | 
109 | // --- Implementation code block ---
110 | type server struct {
111 | 	*component.Base[Config, Dependencies]
112 | 	http *http.Server
113 | }
114 | 
115 | func (s *server) Run() (err error) {
116 | 	ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...)
117 | 	defer func() { telemetry.End(ctx, err) }()
118 | 
119 | 	serverErr := make(chan error, 1)
120 | 	go func() {
121 | 		serverErr <- s.http.ListenAndServe()
122 | 	}()
123 | 
124 | 	s.MarkReady()
125 | 	select {
126 | 	case <-ctx.Done():
127 | 		log.Info(ctx, "shutting down")
128 | 
129 | 		return s.http.Shutdown(ctx)
130 | 	case err := <-serverErr:
131 | 		return errors.Wrap(err, "listen and serve")
132 | 	}
133 | }
134 | 
135 | type mockServer struct {
136 | 	component.Mock
137 | }
138 | 


--------------------------------------------------------------------------------
/pkg/telemetry/telemetry.go:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2025 wangyusong
 2 | //
 3 | // This program is free software: you can redistribute it and/or modify
 4 | // it under the terms of the GNU Affero General Public License as published by
 5 | // the Free Software Foundation, either version 3 of the License, or
 6 | // (at your option) any later version.
 7 | //
 8 | // This program is distributed in the hope that it will be useful,
 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
15 | 
16 | package telemetry
17 | 
18 | import (
19 | 	"context"
20 | 
21 | 	"github.com/prometheus/client_golang/prometheus"
22 | 
23 | 	"github.com/glidea/zenfeed/pkg/telemetry/log"
24 | 	"github.com/glidea/zenfeed/pkg/telemetry/metric"
25 | )
26 | 
27 | type Labels []any
28 | 
29 | func (l Labels) Get(key any) any {
30 | 	for i := 0; i < len(l); i += 2 {
31 | 		if l[i] == key {
32 | 			return l[i+1]
33 | 		}
34 | 	}
35 | 
36 | 	return nil
37 | }
38 | 
39 | // StartWith starts a new operation with the given key-value pairs.
40 | // MUST call End() to finalize the operation.
41 | func StartWith(ctx context.Context, keyvals ...any) context.Context {
42 | 	ctx = log.With(ctx, keyvals...)
43 | 	ctx = metric.StartWith(ctx, keyvals...)
44 | 
45 | 	return ctx
46 | }
47 | 
48 | // End records and finalizes the operation.
49 | func End(ctx context.Context, err error) {
50 | 	metric.RecordRED(ctx, err)
51 | }
52 | 
53 | // CloseMetrics closes the metrics for the given id.
54 | func CloseMetrics(id prometheus.Labels) {
55 | 	metric.Close(id)
56 | }
57 | 


--------------------------------------------------------------------------------
/pkg/test/test.go:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2025 wangyusong
 2 | //
 3 | // This program is free software: you can redistribute it and/or modify
 4 | // it under the terms of the GNU Affero General Public License as published by
 5 | // the Free Software Foundation, either version 3 of the License, or
 6 | // (at your option) any later version.
 7 | //
 8 | // This program is distributed in the hope that it will be useful,
 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
15 | 
16 | package test
17 | 
18 | // Case is a BDD style test case for a feature.
19 | //
20 | // Background: https://en.wikipedia.org/wiki/Behavior-driven_development.
21 | // Aha, maybe you don't need to fully understand it,
22 | // we just use Scenario, Given, When, Then to describe a test case, which has several advantages:
23 | //  1. Highly readable and easy to maintain.
24 | //  2. It can be used as a requirement or use case description, helping you in the TDD process,
25 | //     let AI generate code, that is "code as prompt".
26 | //  3. Test against requirement descriptions, not implementation details.
27 | //     Top-down, and the requirement level is above the details.
28 | //
29 | // To add, "requirement" here is a broad concept, not or not only refers to the requirements
30 | // from the product side, but the interface behavior defined by the test module.
31 | //
32 | // TODO: Use this consistently.
33 | type Case[T1 any, T2 any, T3 any] struct {
34 | 	// Scenario describes feature of the test case.
35 | 	// E.g. "Query hot block with label filters".
36 | 	Scenario string
37 | 
38 | 	// Given is initial "context"!!!(context != parameters of method)
39 | 	// at the beginning of the scenario, in one or more clauses.
40 | 	// E.g. "a hot block with indexed feeds".
41 | 	Given string
42 | 	// When is the event that triggers the scenario.
43 | 	// E.g. "querying with label filters".
44 | 	When string
45 | 	// Then is the expected outcome, in one or more clauses.
46 | 	// E.g. "should return matching feeds".
47 | 	Then string
48 | 
49 | 	// GivenDetail is the detail of the given context.
50 | 	// Generally speaking, it describes what "state the object" of the module should have.
51 | 	// E.g. 'hot block', what does it look like, what are its member variable values?
52 | 	// What is the expected behavior of external dependencies?
53 | 	GivenDetail T1
54 | 	// WhenDetail is the detail of the when event.
55 | 	// Generally speaking, it describes the "parameters of the method call".
56 | 	// E.g. what does the query options look like.
57 | 	WhenDetail T2
58 | 	// ThenExpected is the expected outcome of the scenario.
59 | 	// Generally speaking, it describes the "return value of the method call".
60 | 	// E.g. what does the returned feeds look like.
61 | 	ThenExpected T3
62 | }
63 | 


--------------------------------------------------------------------------------
/pkg/util/binary/binary.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package binary
 17 | 
 18 | import (
 19 | 	"encoding/binary"
 20 | 	"io"
 21 | 	"math"
 22 | 	"sync"
 23 | 
 24 | 	"github.com/pkg/errors"
 25 | 
 26 | 	"github.com/glidea/zenfeed/pkg/util/buffer"
 27 | )
 28 | 
 29 | // WriteString writes a string to a writer.
 30 | func WriteString(w io.Writer, str string) error {
 31 | 	len := len(str)
 32 | 	if len > math.MaxUint32 {
 33 | 		return errors.New("length exceeds maximum uint32")
 34 | 	}
 35 | 
 36 | 	if err := WriteUint32(w, uint32(len)); err != nil {
 37 | 		return errors.Wrap(err, "write length")
 38 | 	}
 39 | 	if _, err := io.WriteString(w, str); err != nil {
 40 | 		return errors.Wrap(err, "write data")
 41 | 	}
 42 | 
 43 | 	return nil
 44 | }
 45 | 
 46 | // ReadString reads a string from a reader.
 47 | func ReadString(r io.Reader) (string, error) {
 48 | 	len, err := ReadUint32(r)
 49 | 	if err != nil {
 50 | 		return "", errors.Wrap(err, "read length")
 51 | 	}
 52 | 
 53 | 	bb := buffer.Get()
 54 | 	defer buffer.Put(bb)
 55 | 	// bb.EnsureRemaining(int(len))
 56 | 
 57 | 	if _, err := io.CopyN(bb, r, int64(len)); err != nil {
 58 | 		return "", errors.Wrap(err, "read data")
 59 | 	}
 60 | 
 61 | 	return bb.String(), nil
 62 | }
 63 | 
 64 | var smallBufPool = sync.Pool{
 65 | 	New: func() any {
 66 | 		// 8 bytes is enough for uint64, uint32, float32.
 67 | 		b := make([]byte, 8)
 68 | 
 69 | 		return &b
 70 | 	},
 71 | }
 72 | 
 73 | // WriteUint64 writes a uint64 using a pooled buffer.
 74 | func WriteUint64(w io.Writer, v uint64) error {
 75 | 	bp := smallBufPool.Get().(*[]byte)
 76 | 	defer smallBufPool.Put(bp)
 77 | 	b := *bp
 78 | 
 79 | 	binary.LittleEndian.PutUint64(b, v)
 80 | 	_, err := w.Write(b[:8])
 81 | 
 82 | 	return err
 83 | }
 84 | 
 85 | // ReadUint64 reads a uint64 using a pooled buffer.
 86 | func ReadUint64(r io.Reader) (uint64, error) {
 87 | 	bp := smallBufPool.Get().(*[]byte)
 88 | 	defer smallBufPool.Put(bp)
 89 | 	b := (*bp)[:8]
 90 | 
 91 | 	// Read exactly 8 bytes into the slice.
 92 | 	if _, err := io.ReadFull(r, b); err != nil {
 93 | 		return 0, errors.Wrap(err, "read uint64")
 94 | 	}
 95 | 
 96 | 	return binary.LittleEndian.Uint64(b), nil
 97 | }
 98 | 
 99 | // WriteUint32 writes a uint32 using a pooled buffer.
100 | func WriteUint32(w io.Writer, v uint32) error {
101 | 	bp := smallBufPool.Get().(*[]byte)
102 | 	defer smallBufPool.Put(bp)
103 | 	b := *bp
104 | 
105 | 	binary.LittleEndian.PutUint32(b, v)
106 | 	_, err := w.Write(b[:4])
107 | 
108 | 	return err
109 | }
110 | 
111 | // ReadUint32 reads a uint32 using a pooled buffer.
112 | func ReadUint32(r io.Reader) (uint32, error) {
113 | 	bp := smallBufPool.Get().(*[]byte)
114 | 	defer smallBufPool.Put(bp)
115 | 	b := (*bp)[:4]
116 | 
117 | 	// Read exactly 4 bytes into the slice.
118 | 	if _, err := io.ReadFull(r, b); err != nil {
119 | 		return 0, errors.Wrap(err, "read uint32")
120 | 	}
121 | 
122 | 	return binary.LittleEndian.Uint32(b), nil
123 | }
124 | 
125 | // WriteFloat32 writes a float32 using a pooled buffer.
126 | func WriteFloat32(w io.Writer, v float32) error {
127 | 	return WriteUint32(w, math.Float32bits(v))
128 | }
129 | 
130 | // ReadFloat32 reads a float32 using a pooled buffer.
131 | func ReadFloat32(r io.Reader) (float32, error) {
132 | 	// Read the uint32 bits first.
133 | 	bits, err := ReadUint32(r)
134 | 	if err != nil {
135 | 		return 0, err
136 | 	}
137 | 
138 | 	// Convert bits to float32.
139 | 	return math.Float32frombits(bits), nil
140 | }
141 | 


--------------------------------------------------------------------------------
/pkg/util/binary/binary_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2025 wangyusong
 2 | //
 3 | // This program is free software: you can redistribute it and/or modify
 4 | // it under the terms of the GNU Affero General Public License as published by
 5 | // the Free Software Foundation, either version 3 of the License, or
 6 | // (at your option) any later version.
 7 | //
 8 | // This program is distributed in the hope that it will be useful,
 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
15 | 
16 | package binary
17 | 
18 | import (
19 | 	"bytes"
20 | 	"testing"
21 | 
22 | 	. "github.com/onsi/gomega"
23 | 
24 | 	"github.com/glidea/zenfeed/pkg/test"
25 | )
26 | 
27 | func TestWriteString(t *testing.T) {
28 | 	RegisterTestingT(t)
29 | 
30 | 	type givenDetail struct{}
31 | 	type whenDetail struct {
32 | 		str string
33 | 	}
34 | 	type thenExpected struct{}
35 | 
36 | 	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
37 | 		{
38 | 			Scenario: "Write empty string",
39 | 			When:     "writing an empty string to a buffer",
40 | 			Then:     "should write successfully without error",
41 | 			WhenDetail: whenDetail{
42 | 				str: "",
43 | 			},
44 | 			ThenExpected: thenExpected{},
45 | 		},
46 | 		{
47 | 			Scenario: "Write normal string",
48 | 			When:     "writing a normal string to a buffer",
49 | 			Then:     "should write successfully without error",
50 | 			WhenDetail: whenDetail{
51 | 				str: "hello world",
52 | 			},
53 | 			ThenExpected: thenExpected{},
54 | 		},
55 | 	}
56 | 
57 | 	for _, tt := range tests {
58 | 		t.Run(tt.Scenario, func(t *testing.T) {
59 | 			// When.
60 | 			buf := &bytes.Buffer{}
61 | 			err := WriteString(buf, tt.WhenDetail.str)
62 | 
63 | 			// Then.
64 | 			Expect(err).NotTo(HaveOccurred())
65 | 
66 | 			// Verify the written data by reading it back
67 | 			readStr, readErr := ReadString(bytes.NewReader(buf.Bytes()))
68 | 			Expect(readErr).NotTo(HaveOccurred())
69 | 			Expect(readStr).To(Equal(tt.WhenDetail.str))
70 | 		})
71 | 	}
72 | }
73 | 


--------------------------------------------------------------------------------
/pkg/util/buffer/buffer.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package buffer
 17 | 
 18 | import (
 19 | 	"sync"
 20 | 	"unsafe"
 21 | )
 22 | 
 23 | var pool = sync.Pool{
 24 | 	New: func() any {
 25 | 		return &Bytes{B: make([]byte, 0, 1024)}
 26 | 	},
 27 | }
 28 | 
 29 | func Get() *Bytes {
 30 | 	return pool.Get().(*Bytes)
 31 | }
 32 | 
 33 | func Put(b *Bytes) {
 34 | 	if b.Len() > 512*1024 { // Avoid large buffer.
 35 | 		b = nil
 36 | 
 37 | 		return
 38 | 	}
 39 | 
 40 | 	b.Reset()
 41 | 	pool.Put(b)
 42 | }
 43 | 
 44 | // Bytes is a simple buffer.
 45 | // It is unsafe, SHOULD not modify existing bytes.
 46 | type Bytes struct {
 47 | 	B []byte
 48 | }
 49 | 
 50 | func (bs *Bytes) Reset() {
 51 | 	bs.B = bs.B[:0]
 52 | }
 53 | 
 54 | func (bs *Bytes) String() string {
 55 | 	return string(bs.B)
 56 | }
 57 | 
 58 | func (bs *Bytes) Bytes() []byte {
 59 | 	return bs.B
 60 | }
 61 | 
 62 | func (bs *Bytes) Write(p []byte) (n int, err error) {
 63 | 	bs.B = append(bs.B, p...)
 64 | 
 65 | 	return len(p), nil
 66 | }
 67 | 
 68 | // Unsafe!!!
 69 | func (bs *Bytes) WriteString(s string) (n int, err error) {
 70 | 	b := unsafe.Slice(unsafe.StringData(s), len(s))
 71 | 
 72 | 	return bs.Write(b)
 73 | }
 74 | 
 75 | // EnsureRemaining ensures the buffer has space for at least `atLeast`
 76 | // additional bytes beyond the current length (i.e., remaining capacity).
 77 | // It grows the buffer if necessary using an amortized growth strategy.
 78 | func (bs *Bytes) EnsureRemaining(atLeast int) {
 79 | 	if atLeast <= 0 {
 80 | 		return
 81 | 	}
 82 | 
 83 | 	// Calculate the minimum total capacity required.
 84 | 	// needCap = current_length + required_remaining_capacity
 85 | 	needCap := len(bs.B) + atLeast
 86 | 	if cap(bs.B) >= needCap {
 87 | 		// Current capacity is already sufficient.
 88 | 		return
 89 | 	}
 90 | 
 91 | 	// --- Need to grow ---
 92 | 
 93 | 	// Determine the new capacity.
 94 | 	// Strategy: Double the existing capacity, but make sure it's at least needCap.
 95 | 	// This amortizes the cost of allocations over time.
 96 | 	newCap := max(cap(bs.B)*2, needCap)
 97 | 
 98 | 	// Allocate a new slice with the current length and the calculated new capacity.
 99 | 	// Note: We create it with the *current length*, not zero length.
100 | 	newB := make([]byte, len(bs.B), newCap)
101 | 
102 | 	// Copy the existing data from the old buffer to the new buffer.
103 | 	copy(newB, bs.B) // copy is efficient
104 | 
105 | 	// Replace the buffer's internal slice with the new one.
106 | 	bs.B = newB
107 | }
108 | 
109 | func (bs *Bytes) Remaining() int {
110 | 	return cap(bs.B) - len(bs.B)
111 | }
112 | 
113 | func (bs *Bytes) Len() int {
114 | 	return len(bs.B)
115 | }
116 | 
117 | func (bs *Bytes) Cap() int {
118 | 	return cap(bs.B)
119 | }
120 | 


--------------------------------------------------------------------------------
/pkg/util/crawl/crawl.go:
--------------------------------------------------------------------------------
  1 | package crawl
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"net/http"
  8 | 	"net/url"
  9 | 	"sync"
 10 | 
 11 | 	"github.com/pkg/errors"
 12 | 	"github.com/temoto/robotstxt"
 13 | 
 14 | 	"github.com/glidea/zenfeed/pkg/util/text_convert"
 15 | )
 16 | 
 17 | type Crawler interface {
 18 | 	Markdown(ctx context.Context, u string) ([]byte, error)
 19 | }
 20 | 
 21 | type local struct {
 22 | 	hc *http.Client
 23 | 
 24 | 	robotsDataCache sync.Map
 25 | }
 26 | 
 27 | func NewLocal() Crawler {
 28 | 	return &local{
 29 | 		hc: &http.Client{},
 30 | 	}
 31 | }
 32 | 
 33 | func (c *local) Markdown(ctx context.Context, u string) ([]byte, error) {
 34 | 	// Check if the page is allowed.
 35 | 	if err := c.checkAllowed(ctx, u); err != nil {
 36 | 		return nil, errors.Wrapf(err, "check robots.txt for %s", u)
 37 | 	}
 38 | 
 39 | 	// Prepare the request.
 40 | 	req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
 41 | 	if err != nil {
 42 | 		return nil, errors.Wrapf(err, "create request for %s", u)
 43 | 	}
 44 | 	req.Header.Set("User-Agent", userAgent)
 45 | 
 46 | 	// Send the request.
 47 | 	resp, err := c.hc.Do(req)
 48 | 	if err != nil {
 49 | 		return nil, errors.Wrapf(err, "fetch %s", u)
 50 | 	}
 51 | 	defer func() { _ = resp.Body.Close() }()
 52 | 
 53 | 	// Parse the response.
 54 | 	if resp.StatusCode != http.StatusOK {
 55 | 		return nil, errors.Errorf("received non-200 status code %d from %s", resp.StatusCode, u)
 56 | 	}
 57 | 	bodyBytes, err := io.ReadAll(resp.Body)
 58 | 	if err != nil {
 59 | 		return nil, errors.Wrapf(err, "read body from %s", u)
 60 | 	}
 61 | 
 62 | 	// Convert the body to markdown.
 63 | 	mdBytes, err := textconvert.HTMLToMarkdown(bodyBytes)
 64 | 	if err != nil {
 65 | 		return nil, errors.Wrap(err, "convert html to markdown")
 66 | 	}
 67 | 
 68 | 	return mdBytes, nil
 69 | }
 70 | 
 71 | const userAgent = "ZenFeed"
 72 | 
 73 | func (c *local) checkAllowed(ctx context.Context, u string) error {
 74 | 	parsedURL, err := url.Parse(u)
 75 | 	if err != nil {
 76 | 		return errors.Wrapf(err, "parse url %s", u)
 77 | 	}
 78 | 
 79 | 	d, err := c.getRobotsData(ctx, parsedURL.Host)
 80 | 	if err != nil {
 81 | 		return errors.Wrapf(err, "check robots.txt for %s", parsedURL.Host)
 82 | 	}
 83 | 	if !d.TestAgent(parsedURL.Path, userAgent) {
 84 | 		return errors.Errorf("disallowed by robots.txt for %s", u)
 85 | 	}
 86 | 
 87 | 	return nil
 88 | }
 89 | 
 90 | // getRobotsData fetches and parses robots.txt for a given host.
 91 | func (c *local) getRobotsData(ctx context.Context, host string) (*robotstxt.RobotsData, error) {
 92 | 	// Check the cache.
 93 | 	if data, found := c.robotsDataCache.Load(host); found {
 94 | 		return data.(*robotstxt.RobotsData), nil
 95 | 	}
 96 | 
 97 | 	// Prepare the request.
 98 | 	robotsURL := fmt.Sprintf("https://%s/robots.txt", host)
 99 | 	req, err := http.NewRequestWithContext(ctx, http.MethodGet, robotsURL, nil)
100 | 	if err != nil {
101 | 		return nil, errors.Wrapf(err, "create request for %s", robotsURL)
102 | 	}
103 | 	req.Header.Set("User-Agent", userAgent)
104 | 
105 | 	// Send the request.
106 | 	resp, err := c.hc.Do(req)
107 | 	if err != nil {
108 | 		return nil, errors.Wrapf(err, "fetch %s", robotsURL)
109 | 	}
110 | 	defer func() { _ = resp.Body.Close() }()
111 | 
112 | 	// Parse the response.
113 | 	switch resp.StatusCode {
114 | 	case http.StatusOK:
115 | 		data, err := robotstxt.FromResponse(resp)
116 | 		if err != nil {
117 | 			return nil, errors.Wrapf(err, "parse robots.txt from %s", robotsURL)
118 | 		}
119 | 		c.robotsDataCache.Store(host, data)
120 | 
121 | 		return data, nil
122 | 
123 | 	case http.StatusNotFound:
124 | 		data := &robotstxt.RobotsData{}
125 | 		c.robotsDataCache.Store(host, data)
126 | 
127 | 		return data, nil
128 | 
129 | 	case http.StatusUnauthorized, http.StatusForbidden:
130 | 		return nil, errors.Errorf("access to %s denied (status %d)", robotsURL, resp.StatusCode)
131 | 	default:
132 | 		return nil, errors.Errorf("unexpected status %d fetching %s", resp.StatusCode, robotsURL)
133 | 	}
134 | }
135 | 
136 | type jina struct {
137 | 	hc    *http.Client
138 | 	token string
139 | }
140 | 
141 | func NewJina(token string) Crawler {
142 | 	return &jina{
143 | 		hc: &http.Client{},
144 | 
145 | 		// If token is empty, will not affect to use, but rate limit will be lower.
146 | 		// See https://jina.ai/api-dashboard/rate-limit.
147 | 		token: token,
148 | 	}
149 | }
150 | 
151 | func (c *jina) Markdown(ctx context.Context, u string) ([]byte, error) {
152 | 	proxyURL := "https://r.jina.ai/" + u
153 | 	req, err := http.NewRequestWithContext(ctx, http.MethodGet, proxyURL, nil)
154 | 	if err != nil {
155 | 		return nil, errors.Wrapf(err, "create request for %s", u)
156 | 	}
157 | 
158 | 	req.Header.Set("X-Engine", "browser")
159 | 	req.Header.Set("X-Robots-Txt", userAgent)
160 | 	if c.token != "" {
161 | 		req.Header.Set("Authorization", "Bearer "+c.token)
162 | 	}
163 | 
164 | 	resp, err := c.hc.Do(req)
165 | 	if err != nil {
166 | 		return nil, errors.Wrapf(err, "fetch %s", proxyURL)
167 | 	}
168 | 	defer func() { _ = resp.Body.Close() }()
169 | 
170 | 	if resp.StatusCode != http.StatusOK {
171 | 		return nil, errors.Errorf("received non-200 status code %d from %s", resp.StatusCode, proxyURL)
172 | 	}
173 | 
174 | 	mdBytes, err := io.ReadAll(resp.Body)
175 | 	if err != nil {
176 | 		return nil, errors.Wrapf(err, "read body from %s", proxyURL)
177 | 	}
178 | 
179 | 	return mdBytes, nil
180 | }
181 | 


--------------------------------------------------------------------------------
/pkg/util/hash/hash.go:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2025 wangyusong
 2 | //
 3 | // This program is free software: you can redistribute it and/or modify
 4 | // it under the terms of the GNU Affero General Public License as published by
 5 | // the Free Software Foundation, either version 3 of the License, or
 6 | // (at your option) any later version.
 7 | //
 8 | // This program is distributed in the hope that it will be useful,
 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
15 | 
16 | package hash
17 | 
18 | import "hash/fnv"
19 | 
20 | func Sum64(s string) uint64 {
21 | 	h := fnv.New64a()
22 | 	h.Write([]byte(s))
23 | 	
24 | 	return h.Sum64()
25 | }
26 | 
27 | func Sum64s(ss []string) uint64 {
28 | 	h := fnv.New64a()
29 | 	for _, s := range ss {
30 | 		h.Write([]byte(s))
31 | 		h.Write([]byte{0})
32 | 	}
33 | 
34 | 	return h.Sum64()
35 | }
36 | 


--------------------------------------------------------------------------------
/pkg/util/heap/heap.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package heap
 17 | 
 18 | import (
 19 | 	"container/heap"
 20 | 	"sort"
 21 | )
 22 | 
 23 | type Heap[T any] struct {
 24 | 	inner *innerHeap[T]
 25 | 	limit int
 26 | }
 27 | 
 28 | func New[T any](data []T, less func(a, b T) bool) *Heap[T] {
 29 | 	h := &Heap[T]{
 30 | 		inner: newInnerHeap(data, less),
 31 | 		limit: cap(data),
 32 | 	}
 33 | 	heap.Init(h.inner)
 34 | 
 35 | 	return h
 36 | }
 37 | 
 38 | func (h *Heap[T]) TryEvictPush(x T) {
 39 | 	switch {
 40 | 	case h.Len() < h.limit:
 41 | 	case h.inner.less(h.Peek(), x):
 42 | 		h.Pop()
 43 | 	default:
 44 | 		return
 45 | 	}
 46 | 
 47 | 	h.Push(x)
 48 | }
 49 | 
 50 | func (h *Heap[T]) Push(x T) {
 51 | 	heap.Push(h.inner, x)
 52 | }
 53 | 
 54 | func (h *Heap[T]) Pop() T {
 55 | 	return heap.Pop(h.inner).(T)
 56 | }
 57 | 
 58 | func (h *Heap[T]) PopLast() T {
 59 | 	return heap.Remove(h.inner, h.Len()-1).(T)
 60 | }
 61 | 
 62 | func (h *Heap[T]) Peek() T {
 63 | 	if h.Len() == 0 {
 64 | 		var zero T
 65 | 
 66 | 		return zero
 67 | 	}
 68 | 
 69 | 	return h.inner.data[0]
 70 | }
 71 | 
 72 | func (h *Heap[T]) Len() int {
 73 | 	return h.inner.Len()
 74 | }
 75 | 
 76 | func (h *Heap[T]) Cap() int {
 77 | 	return h.limit
 78 | }
 79 | 
 80 | func (h *Heap[T]) Slice() []T {
 81 | 	return h.inner.data
 82 | }
 83 | 
 84 | func (h *Heap[T]) DESCSort() {
 85 | 	sort.Slice(h.inner.data, func(i, j int) bool {
 86 | 		return !h.inner.less(h.inner.data[i], h.inner.data[j])
 87 | 	})
 88 | }
 89 | 
 90 | type innerHeap[T any] struct {
 91 | 	data []T
 92 | 	less func(a, b T) bool
 93 | }
 94 | 
 95 | func newInnerHeap[T any](data []T, less func(a, b T) bool) *innerHeap[T] {
 96 | 	return &innerHeap[T]{
 97 | 		data: data,
 98 | 		less: less,
 99 | 	}
100 | }
101 | 
102 | func (h *innerHeap[T]) Len() int {
103 | 	return len(h.data)
104 | }
105 | 
106 | func (h *innerHeap[T]) Less(i, j int) bool {
107 | 	return h.less(h.data[i], h.data[j])
108 | }
109 | 
110 | func (h *innerHeap[T]) Swap(i, j int) {
111 | 	h.data[i], h.data[j] = h.data[j], h.data[i]
112 | }
113 | 
114 | func (h *innerHeap[T]) Push(x any) {
115 | 	h.data = append(h.data, x.(T))
116 | }
117 | 
118 | func (h *innerHeap[T]) Pop() any {
119 | 	n := len(h.data)
120 | 	x := h.data[n-1]
121 | 	h.data = h.data[:n-1]
122 | 
123 | 	return x
124 | }
125 | 


--------------------------------------------------------------------------------
/pkg/util/json_schema/json_schema.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package jsonschema
 17 | 
 18 | import (
 19 | 	"maps"
 20 | 	"reflect"
 21 | 	"strings"
 22 | 	"time"
 23 | 
 24 | 	"github.com/pkg/errors"
 25 | )
 26 | 
 27 | // ForType generates a JSON Schema for the given reflect.Type.
 28 | // It supports struct fields with json tags and desc tags for metadata.
 29 | func ForType(t reflect.Type) (map[string]any, error) {
 30 | 	definitions := make(map[string]any)
 31 | 	schema, err := forTypeInternal(t, "", make(map[reflect.Type]string), definitions)
 32 | 	if err != nil {
 33 | 		return nil, err
 34 | 	}
 35 | 
 36 | 	if len(definitions) == 0 {
 37 | 		return schema, nil
 38 | 	}
 39 | 
 40 | 	result := map[string]any{
 41 | 		"$schema":     "http://json-schema.org/draft-07/schema#",
 42 | 		"definitions": definitions,
 43 | 	}
 44 | 	maps.Copy(result, schema)
 45 | 
 46 | 	return result, nil
 47 | }
 48 | 
 49 | func forTypeInternal(
 50 | 	t reflect.Type,
 51 | 	fieldName string,
 52 | 	visited map[reflect.Type]string,
 53 | 	definitions map[string]any,
 54 | ) (map[string]any, error) {
 55 | 	if t == nil {
 56 | 		return nil, errors.New("type cannot be nil")
 57 | 	}
 58 | 
 59 | 	// Dereference pointer types
 60 | 	for t.Kind() == reflect.Ptr {
 61 | 		t = t.Elem()
 62 | 	}
 63 | 
 64 | 	// Handle previously visited types
 65 | 	if refName, ok := visited[t]; ok {
 66 | 		return map[string]any{"$ref": "#/definitions/" + refName}, nil
 67 | 	}
 68 | 
 69 | 	switch t.Kind() {
 70 | 	case reflect.Struct:
 71 | 		return handleStructType(t, fieldName, visited, definitions)
 72 | 
 73 | 	case reflect.Slice, reflect.Array:
 74 | 		return handleArrayType(t, visited, definitions)
 75 | 
 76 | 	case reflect.Map:
 77 | 		return handleMapType(t, visited, definitions)
 78 | 
 79 | 	default:
 80 | 		return handlePrimitiveType(t)
 81 | 	}
 82 | }
 83 | 
 84 | func handleStructType(
 85 | 	t reflect.Type,
 86 | 	fieldName string,
 87 | 	visited map[reflect.Type]string,
 88 | 	definitions map[string]any,
 89 | ) (map[string]any, error) {
 90 | 	// Handle special types.
 91 | 	if t == reflect.TypeOf(time.Time{}) {
 92 | 		return map[string]any{
 93 | 			"type":   "string",
 94 | 			"format": "date-time",
 95 | 		}, nil
 96 | 	}
 97 | 
 98 | 	if t == reflect.TypeOf(time.Duration(0)) {
 99 | 		return map[string]any{
100 | 			"type":    "string",
101 | 			"format":  "duration",
102 | 			"pattern": "^([0-9]+(s|m|h))+$",
103 | 		}, nil
104 | 	}
105 | 
106 | 	// Generate type name.
107 | 	typeName := t.Name()
108 | 	if typeName == "" {
109 | 		typeName = "Anonymous" + fieldName
110 | 	}
111 | 	visited[t] = typeName
112 | 
113 | 	// Process schema.
114 | 	schema := map[string]any{"type": "object"}
115 | 
116 | 	properties, err := handleStructFields(t, visited, definitions)
117 | 	if err != nil {
118 | 		return nil, errors.Wrap(err, "handle struct fields")
119 | 	}
120 | 	if len(properties) > 0 {
121 | 		schema["properties"] = properties
122 | 	}
123 | 
124 | 	definitions[typeName] = schema
125 | 
126 | 	return map[string]any{"$ref": "#/definitions/" + typeName}, nil
127 | }
128 | 
129 | func handleStructFields(
130 | 	t reflect.Type,
131 | 	visited map[reflect.Type]string,
132 | 	definitions map[string]any,
133 | ) (properties map[string]any, err error) {
134 | 	properties = make(map[string]any, t.NumField())
135 | 
136 | 	for i := range t.NumField() {
137 | 		field := t.Field(i)
138 | 		if !field.IsExported() {
139 | 			continue
140 | 		}
141 | 
142 | 		propName := getPropertyName(field)
143 | 		if propName == "" {
144 | 			continue
145 | 		}
146 | 
147 | 		if field.Anonymous {
148 | 			if err := handleEmbeddedStruct(field, visited, definitions, properties); err != nil {
149 | 				return nil, err
150 | 			}
151 | 
152 | 			continue
153 | 		}
154 | 
155 | 		fieldSchema, err := forTypeInternal(field.Type, field.Name, visited, definitions)
156 | 		if err != nil {
157 | 			return nil, errors.Wrapf(err, "generating schema for field %s", field.Name)
158 | 		}
159 | 
160 | 		if desc := field.Tag.Get("desc"); desc != "" {
161 | 			fieldSchema["description"] = desc
162 | 		}
163 | 
164 | 		properties[propName] = fieldSchema
165 | 	}
166 | 
167 | 	return properties, nil
168 | }
169 | 
170 | func handleArrayType(
171 | 	t reflect.Type,
172 | 	visited map[reflect.Type]string,
173 | 	definitions map[string]any,
174 | ) (map[string]any, error) {
175 | 	itemSchema, err := forTypeInternal(t.Elem(), "", visited, definitions)
176 | 	if err != nil {
177 | 		return nil, errors.Wrap(err, "generating array item schema")
178 | 	}
179 | 
180 | 	return map[string]any{
181 | 		"type":  "array",
182 | 		"items": itemSchema,
183 | 	}, nil
184 | }
185 | 
186 | func handleMapType(
187 | 	t reflect.Type,
188 | 	visited map[reflect.Type]string,
189 | 	definitions map[string]any,
190 | ) (map[string]any, error) {
191 | 	if t.Key().Kind() != reflect.String {
192 | 		return nil, errors.Errorf("unsupported map key type: %s (must be string)", t.Key().Kind())
193 | 	}
194 | 
195 | 	valueSchema, err := forTypeInternal(t.Elem(), "", visited, definitions)
196 | 	if err != nil {
197 | 		return nil, errors.Wrap(err, "generating map value schema")
198 | 	}
199 | 
200 | 	return map[string]any{
201 | 		"type":                 "object",
202 | 		"additionalProperties": valueSchema,
203 | 	}, nil
204 | }
205 | 
206 | func handlePrimitiveType(t reflect.Type) (map[string]any, error) {
207 | 	schema := make(map[string]any)
208 | 
209 | 	switch t.Kind() {
210 | 	case reflect.String:
211 | 		schema["type"] = "string"
212 | 
213 | 	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
214 | 		if t == reflect.TypeOf(time.Duration(0)) {
215 | 			schema["type"] = "string"
216 | 			schema["format"] = "duration"
217 | 			schema["pattern"] = "^([0-9]+(s|m|h))+$"
218 | 		} else {
219 | 			schema["type"] = "integer"
220 | 		}
221 | 
222 | 	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
223 | 		schema["type"] = "integer"
224 | 		schema["minimum"] = 0
225 | 
226 | 	case reflect.Float32, reflect.Float64:
227 | 		schema["type"] = "number"
228 | 
229 | 	case reflect.Bool:
230 | 		schema["type"] = "boolean"
231 | 
232 | 	default:
233 | 		return nil, errors.Errorf("unsupported type: %s", t.Kind())
234 | 	}
235 | 
236 | 	return schema, nil
237 | }
238 | 
239 | func getPropertyName(field reflect.StructField) string {
240 | 	jsonTag := field.Tag.Get("json")
241 | 	if jsonTag == "-" {
242 | 		return ""
243 | 	}
244 | 
245 | 	if jsonTag != "" {
246 | 		parts := strings.Split(jsonTag, ",")
247 | 
248 | 		return parts[0]
249 | 	}
250 | 
251 | 	return field.Name
252 | }
253 | 
254 | func handleEmbeddedStruct(
255 | 	field reflect.StructField,
256 | 	visited map[reflect.Type]string,
257 | 	definitions map[string]any,
258 | 	properties map[string]any,
259 | ) error {
260 | 	embeddedSchema, err := forTypeInternal(field.Type, "", visited, definitions)
261 | 	if err != nil {
262 | 		return errors.Wrapf(err, "generating schema for embedded field %s", field.Name)
263 | 	}
264 | 
265 | 	if embeddedType, ok := embeddedSchema["$ref"]; ok {
266 | 		refType := embeddedType.(string)
267 | 		key := strings.TrimPrefix(refType, "#/definitions/")
268 | 		if def, ok := definitions[key]; ok {
269 | 			if embeddedProps, ok := def.(map[string]any)["properties"].(map[string]any); ok {
270 | 				maps.Copy(properties, embeddedProps)
271 | 			}
272 | 
273 | 			delete(definitions, key)
274 | 		}
275 | 	}
276 | 
277 | 	return nil
278 | }
279 | 


--------------------------------------------------------------------------------
/pkg/util/jsonrpc/jsonrpc.go:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2025 wangyusong
 2 | //
 3 | // This program is free software: you can redistribute it and/or modify
 4 | // it under the terms of the GNU Affero General Public License as published by
 5 | // the Free Software Foundation, either version 3 of the License, or
 6 | // (at your option) any later version.
 7 | //
 8 | // This program is distributed in the hope that it will be useful,
 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
15 | 
16 | package jsonrpc
17 | 
18 | import (
19 | 	"context"
20 | 	"encoding/json"
21 | 	"errors"
22 | 	"net/http"
23 | 
24 | 	"github.com/glidea/zenfeed/pkg/api"
25 | )
26 | 
27 | type Handler[Request any, Response any] func(ctx context.Context, req *Request) (*Response, error)
28 | 
29 | func API[Request any, Response any](handler Handler[Request, Response]) http.Handler {
30 | 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
31 | 		allowCORS(w)
32 | 
33 | 		if r.Method == "OPTIONS" {
34 | 			return
35 | 		}
36 | 
37 | 		var req Request
38 | 		if r.Body != http.NoBody {
39 | 			if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
40 | 				http.Error(w, err.Error(), http.StatusBadRequest)
41 | 
42 | 				return
43 | 			}
44 | 		}
45 | 
46 | 		resp, err := handler(r.Context(), &req)
47 | 		if err != nil {
48 | 			var apiErr api.Error
49 | 			if errors.As(err, &apiErr) {
50 | 				w.Header().Set("Content-Type", "application/json")
51 | 				w.WriteHeader(apiErr.Code)
52 | 				_ = json.NewEncoder(w).Encode(apiErr)
53 | 
54 | 				return
55 | 			}
56 | 
57 | 			http.Error(w, err.Error(), http.StatusInternalServerError)
58 | 
59 | 			return
60 | 		}
61 | 
62 | 		w.Header().Set("Content-Type", "application/json")
63 | 		if err := json.NewEncoder(w).Encode(resp); err != nil {
64 | 			http.Error(w, err.Error(), http.StatusInternalServerError)
65 | 
66 | 			return
67 | 		}
68 | 	})
69 | }
70 | 
71 | func allowCORS(w http.ResponseWriter) {
72 | 	w.Header().Set("Access-Control-Allow-Origin", "*")
73 | 	w.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS, PUT, DELETE")
74 | 	w.Header().Set("Access-Control-Allow-Headers",
75 | 		"Accept, Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization",
76 | 	)
77 | }
78 | 


--------------------------------------------------------------------------------
/pkg/util/jsonrpc/jsonrpc_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package jsonrpc
 17 | 
 18 | import (
 19 | 	"bytes"
 20 | 	"context"
 21 | 	"encoding/json"
 22 | 	"errors"
 23 | 	"io"
 24 | 	"net/http"
 25 | 	"net/http/httptest"
 26 | 	"testing"
 27 | 
 28 | 	. "github.com/onsi/gomega"
 29 | 
 30 | 	"github.com/glidea/zenfeed/pkg/api"
 31 | 	"github.com/glidea/zenfeed/pkg/test"
 32 | )
 33 | 
 34 | func TestAPI(t *testing.T) {
 35 | 	RegisterTestingT(t)
 36 | 
 37 | 	type TestRequest struct {
 38 | 		Name string `json:"name"`
 39 | 	}
 40 | 
 41 | 	type TestResponse struct {
 42 | 		Greeting string `json:"greeting"`
 43 | 	}
 44 | 
 45 | 	type givenDetail struct {
 46 | 		handler Handler[TestRequest, TestResponse]
 47 | 	}
 48 | 	type whenDetail struct {
 49 | 		method      string
 50 | 		requestBody string
 51 | 	}
 52 | 	type thenExpected struct {
 53 | 		statusCode   int
 54 | 		responseBody string
 55 | 	}
 56 | 
 57 | 	successHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) {
 58 | 		return &TestResponse{Greeting: "Hello, " + req.Name}, nil
 59 | 	}
 60 | 
 61 | 	badRequestHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) {
 62 | 		return nil, api.ErrBadRequest(errors.New("invalid request"))
 63 | 	}
 64 | 
 65 | 	notFoundHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) {
 66 | 		return nil, api.ErrNotFound(errors.New("resource not found"))
 67 | 	}
 68 | 
 69 | 	internalErrorHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) {
 70 | 		return nil, api.ErrInternal(errors.New("server error"))
 71 | 	}
 72 | 
 73 | 	genericErrorHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) {
 74 | 		return nil, errors.New("generic error")
 75 | 	}
 76 | 
 77 | 	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
 78 | 		{
 79 | 			Scenario: "Successful request",
 80 | 			Given:    "a handler that returns a successful response",
 81 | 			When:     "making a valid request",
 82 | 			Then:     "should return 200 OK with the expected response",
 83 | 			GivenDetail: givenDetail{
 84 | 				handler: successHandler,
 85 | 			},
 86 | 			WhenDetail: whenDetail{
 87 | 				method:      http.MethodPost,
 88 | 				requestBody: `{"name":"World"}`,
 89 | 			},
 90 | 			ThenExpected: thenExpected{
 91 | 				statusCode:   http.StatusOK,
 92 | 				responseBody: `{"greeting":"Hello, World"}`,
 93 | 			},
 94 | 		},
 95 | 		{
 96 | 			Scenario: "Empty request body",
 97 | 			Given:    "a handler that returns a successful response",
 98 | 			When:     "making a request with empty body",
 99 | 			Then:     "should return 200 OK with default values",
100 | 			GivenDetail: givenDetail{
101 | 				handler: successHandler,
102 | 			},
103 | 			WhenDetail: whenDetail{
104 | 				method:      http.MethodPost,
105 | 				requestBody: "",
106 | 			},
107 | 			ThenExpected: thenExpected{
108 | 				statusCode:   http.StatusOK,
109 | 				responseBody: `{"greeting":"Hello, "}`,
110 | 			},
111 | 		},
112 | 		{
113 | 			Scenario: "Invalid JSON request",
114 | 			Given:    "a handler that processes JSON",
115 | 			When:     "making a request with invalid JSON",
116 | 			Then:     "should return 400 Bad Request",
117 | 			GivenDetail: givenDetail{
118 | 				handler: successHandler,
119 | 			},
120 | 			WhenDetail: whenDetail{
121 | 				method:      http.MethodPost,
122 | 				requestBody: `{"name":`,
123 | 			},
124 | 			ThenExpected: thenExpected{
125 | 				statusCode: http.StatusBadRequest,
126 | 			},
127 | 		},
128 | 		{
129 | 			Scenario: "Bad request error",
130 | 			Given:    "a handler that returns a bad request error",
131 | 			When:     "making a request that triggers a bad request error",
132 | 			Then:     "should return 400 Bad Request with error details",
133 | 			GivenDetail: givenDetail{
134 | 				handler: badRequestHandler,
135 | 			},
136 | 			WhenDetail: whenDetail{
137 | 				method:      http.MethodPost,
138 | 				requestBody: `{"name":"World"}`,
139 | 			},
140 | 			ThenExpected: thenExpected{
141 | 				statusCode:   http.StatusBadRequest,
142 | 				responseBody: `{"code":400,"message":"invalid request"}`,
143 | 			},
144 | 		},
145 | 		{
146 | 			Scenario: "Not found error",
147 | 			Given:    "a handler that returns a not found error",
148 | 			When:     "making a request that triggers a not found error",
149 | 			Then:     "should return 404 Not Found with error details",
150 | 			GivenDetail: givenDetail{
151 | 				handler: notFoundHandler,
152 | 			},
153 | 			WhenDetail: whenDetail{
154 | 				method:      http.MethodPost,
155 | 				requestBody: `{"name":"World"}`,
156 | 			},
157 | 			ThenExpected: thenExpected{
158 | 				statusCode:   http.StatusNotFound,
159 | 				responseBody: `{"code":404,"message":"resource not found"}`,
160 | 			},
161 | 		},
162 | 		{
163 | 			Scenario: "Internal server error",
164 | 			Given:    "a handler that returns an internal server error",
165 | 			When:     "making a request that triggers an internal server error",
166 | 			Then:     "should return 500 Internal Server Error with error details",
167 | 			GivenDetail: givenDetail{
168 | 				handler: internalErrorHandler,
169 | 			},
170 | 			WhenDetail: whenDetail{
171 | 				method:      http.MethodPost,
172 | 				requestBody: `{"name":"World"}`,
173 | 			},
174 | 			ThenExpected: thenExpected{
175 | 				statusCode:   http.StatusInternalServerError,
176 | 				responseBody: `{"code":500,"message":"server error"}`,
177 | 			},
178 | 		},
179 | 		{
180 | 			Scenario: "Generic error",
181 | 			Given:    "a handler that returns a generic error",
182 | 			When:     "making a request that triggers a generic error",
183 | 			Then:     "should return 500 Internal Server Error",
184 | 			GivenDetail: givenDetail{
185 | 				handler: genericErrorHandler,
186 | 			},
187 | 			WhenDetail: whenDetail{
188 | 				method:      http.MethodPost,
189 | 				requestBody: `{"name":"World"}`,
190 | 			},
191 | 			ThenExpected: thenExpected{
192 | 				statusCode: http.StatusInternalServerError,
193 | 			},
194 | 		},
195 | 	}
196 | 
197 | 	for _, tt := range tests {
198 | 		t.Run(tt.Scenario, func(t *testing.T) {
199 | 			// Given.
200 | 			handler := API(tt.GivenDetail.handler)
201 | 
202 | 			// When.
203 | 			var req *http.Request
204 | 			if tt.WhenDetail.requestBody == "" {
205 | 				req = httptest.NewRequest(tt.WhenDetail.method, "/test", nil)
206 | 			} else {
207 | 				req = httptest.NewRequest(tt.WhenDetail.method, "/test", bytes.NewBufferString(tt.WhenDetail.requestBody))
208 | 			}
209 | 			rec := httptest.NewRecorder()
210 | 			handler.ServeHTTP(rec, req)
211 | 
212 | 			// Then.
213 | 			Expect(rec.Code).To(Equal(tt.ThenExpected.statusCode))
214 | 
215 | 			if tt.ThenExpected.responseBody != "" {
216 | 				var expected, actual interface{}
217 | 				err := json.Unmarshal([]byte(tt.ThenExpected.responseBody), &expected)
218 | 				Expect(err).NotTo(HaveOccurred())
219 | 
220 | 				body, err := io.ReadAll(rec.Body)
221 | 				Expect(err).NotTo(HaveOccurred())
222 | 
223 | 				err = json.Unmarshal(body, &actual)
224 | 				Expect(err).NotTo(HaveOccurred())
225 | 
226 | 				Expect(actual).To(Equal(expected))
227 | 			}
228 | 		})
229 | 	}
230 | }
231 | 


--------------------------------------------------------------------------------
/pkg/util/retry/retry.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package retry
 17 | 
 18 | import (
 19 | 	"context"
 20 | 	"time"
 21 | 
 22 | 	"github.com/pkg/errors"
 23 | 	"k8s.io/utils/ptr"
 24 | 
 25 | 	"github.com/glidea/zenfeed/pkg/telemetry/log"
 26 | )
 27 | 
 28 | type Options struct {
 29 | 	MinInterval time.Duration
 30 | 	MaxInterval time.Duration
 31 | 	MaxAttempts *int
 32 | }
 33 | 
 34 | func (opts *Options) adjust() {
 35 | 	if opts.MinInterval == 0 {
 36 | 		opts.MinInterval = 100 * time.Millisecond
 37 | 	}
 38 | 	if opts.MaxInterval == 0 {
 39 | 		opts.MaxInterval = 10 * time.Second
 40 | 	}
 41 | 	if opts.MaxInterval < opts.MinInterval {
 42 | 		opts.MaxInterval = opts.MinInterval
 43 | 	}
 44 | 	if opts.MaxAttempts == nil {
 45 | 		opts.MaxAttempts = ptr.To(3)
 46 | 	}
 47 | }
 48 | 
 49 | var InfAttempts = ptr.To(-1)
 50 | 
 51 | func Backoff(ctx context.Context, operation func() error, opts *Options) error {
 52 | 	switch err := operation(); err {
 53 | 	case nil:
 54 | 		return nil // One time success.
 55 | 
 56 | 	default:
 57 | 		log.Error(ctx, err, "attempt", 1)
 58 | 	}
 59 | 
 60 | 	if opts == nil {
 61 | 		opts = &Options{}
 62 | 	}
 63 | 	opts.adjust()
 64 | 
 65 | 	interval := opts.MinInterval
 66 | 	attempts := 2 // Start from 1.
 67 | 
 68 | 	for {
 69 | 		select {
 70 | 		case <-ctx.Done():
 71 | 			return ctx.Err()
 72 | 
 73 | 		case <-time.After(interval):
 74 | 			if err := operation(); err != nil {
 75 | 				if reachedMaxAttempts(attempts, *opts.MaxAttempts) {
 76 | 					return errors.Wrap(err, "max attempts reached")
 77 | 				}
 78 | 				log.Error(ctx, err, "attempt", attempts)
 79 | 
 80 | 				interval = nextInterval(interval, opts.MaxInterval)
 81 | 				attempts++
 82 | 
 83 | 				continue
 84 | 			}
 85 | 
 86 | 			return nil
 87 | 		}
 88 | 	}
 89 | }
 90 | 
 91 | func nextInterval(cur, max time.Duration) (next time.Duration) {
 92 | 	return min(2*cur, max)
 93 | }
 94 | 
 95 | func reachedMaxAttempts(cur, max int) bool {
 96 | 	if max == *InfAttempts {
 97 | 		return false
 98 | 	}
 99 | 
100 | 	return cur >= max
101 | }
102 | 


--------------------------------------------------------------------------------
/pkg/util/retry/retry_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package retry
 17 | 
 18 | import (
 19 | 	"context"
 20 | 	"testing"
 21 | 	"time"
 22 | 
 23 | 	. "github.com/onsi/gomega"
 24 | 	"github.com/pkg/errors"
 25 | 	"k8s.io/utils/ptr"
 26 | 
 27 | 	"github.com/glidea/zenfeed/pkg/test"
 28 | )
 29 | 
 30 | func TestBackoff(t *testing.T) {
 31 | 	RegisterTestingT(t)
 32 | 
 33 | 	type givenDetail struct{}
 34 | 	type whenDetail struct {
 35 | 		operation   func() error
 36 | 		opts        *Options
 37 | 		cancelAfter time.Duration
 38 | 	}
 39 | 	type thenExpected struct {
 40 | 		shouldError    bool
 41 | 		errorContains  string
 42 | 		attemptsNeeded int
 43 | 	}
 44 | 
 45 | 	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
 46 | 		{
 47 | 			Scenario: "Operation succeeds on first attempt",
 48 | 			When:     "calling Backoff with the operation that succeeds immediately",
 49 | 			Then:     "should return nil error",
 50 | 			WhenDetail: whenDetail{
 51 | 				operation: func() error {
 52 | 					return nil
 53 | 				},
 54 | 				opts: nil,
 55 | 			},
 56 | 			ThenExpected: thenExpected{
 57 | 				shouldError:    false,
 58 | 				attemptsNeeded: 1,
 59 | 			},
 60 | 		},
 61 | 		{
 62 | 			Scenario: "Operation succeeds after retries",
 63 | 			When:     "calling Backoff with the operation that fails initially but succeeds after retries",
 64 | 			Then:     "should return nil error after successful retry",
 65 | 			WhenDetail: whenDetail{
 66 | 				operation: createFailingThenSucceedingOperation(2),
 67 | 				opts: &Options{
 68 | 					MinInterval: 10 * time.Millisecond,
 69 | 					MaxInterval: 50 * time.Millisecond,
 70 | 					MaxAttempts: ptr.To(5),
 71 | 				},
 72 | 			},
 73 | 			ThenExpected: thenExpected{
 74 | 				shouldError:    false,
 75 | 				attemptsNeeded: 3,
 76 | 			},
 77 | 		},
 78 | 		{
 79 | 			Scenario: "Operation fails all attempts",
 80 | 			When:     "calling Backoff with the operation that always fails",
 81 | 			Then:     "should return error after max attempts",
 82 | 			WhenDetail: whenDetail{
 83 | 				operation: func() error {
 84 | 					return errors.New("persistent error")
 85 | 				},
 86 | 				opts: &Options{
 87 | 					MinInterval: 10 * time.Millisecond,
 88 | 					MaxInterval: 50 * time.Millisecond,
 89 | 					MaxAttempts: ptr.To(3),
 90 | 				},
 91 | 			},
 92 | 			ThenExpected: thenExpected{
 93 | 				shouldError:    true,
 94 | 				errorContains:  "max attempts reached",
 95 | 				attemptsNeeded: 3,
 96 | 			},
 97 | 		},
 98 | 		{
 99 | 			Scenario: "Context cancellation",
100 | 			When:     "calling Backoff with an operation that takes time",
101 | 			Then:     "should return context error",
102 | 			WhenDetail: whenDetail{
103 | 				operation: func() error {
104 | 					return errors.New("operation error")
105 | 				},
106 | 				opts: &Options{
107 | 					MinInterval: 100 * time.Millisecond,
108 | 					MaxInterval: 200 * time.Millisecond,
109 | 					MaxAttempts: ptr.To(10),
110 | 				},
111 | 				cancelAfter: 50 * time.Millisecond,
112 | 			},
113 | 			ThenExpected: thenExpected{
114 | 				shouldError:   true,
115 | 				errorContains: "context canceled",
116 | 			},
117 | 		},
118 | 	}
119 | 
120 | 	for _, tt := range tests {
121 | 		t.Run(tt.Scenario, func(t *testing.T) {
122 | 			// When.
123 | 			ctx := context.Background()
124 | 			if tt.WhenDetail.cancelAfter > 0 {
125 | 				var cancel context.CancelFunc
126 | 				ctx, cancel = context.WithCancel(ctx)
127 | 
128 | 				go func() {
129 | 					time.Sleep(tt.WhenDetail.cancelAfter)
130 | 					cancel()
131 | 				}()
132 | 			}
133 | 			err := Backoff(ctx, tt.WhenDetail.operation, tt.WhenDetail.opts)
134 | 
135 | 			// Then.
136 | 			if tt.ThenExpected.shouldError {
137 | 				Expect(err).To(HaveOccurred())
138 | 				if tt.ThenExpected.errorContains != "" {
139 | 					Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.errorContains))
140 | 				}
141 | 			} else {
142 | 				Expect(err).NotTo(HaveOccurred())
143 | 			}
144 | 		})
145 | 	}
146 | }
147 | 
148 | // createFailingThenSucceedingOperation returns an operation that fails for the specified
149 | // number of attempts and then succeeds.
150 | func createFailingThenSucceedingOperation(failCount int) func() error {
151 | 	attempts := 0
152 | 	return func() error {
153 | 		if attempts < failCount {
154 | 			attempts++
155 | 			return errors.New("temporary error")
156 | 		}
157 | 		return nil
158 | 	}
159 | }
160 | 


--------------------------------------------------------------------------------
/pkg/util/runtime/runtime.go:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2025 wangyusong
 2 | //
 3 | // This program is free software: you can redistribute it and/or modify
 4 | // it under the terms of the GNU Affero General Public License as published by
 5 | // the Free Software Foundation, either version 3 of the License, or
 6 | // (at your option) any later version.
 7 | //
 8 | // This program is distributed in the hope that it will be useful,
 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
15 | 
16 | package runtime
17 | 
18 | // Must panics if err is not nil.
19 | // It is useful for handling errors in initialization code where recovery is not possible.
20 | func Must(err error) {
21 | 	if err != nil {
22 | 		panic(err)
23 | 	}
24 | }
25 | 
26 | // Must1 is like Must but returns the value if err is nil.
27 | // It is useful for handling errors in initialization code where recovery is not possible
28 | // and a value needs to be returned.
29 | func Must1[T any](v T, err error) T {
30 | 	if err != nil {
31 | 		panic(err)
32 | 	}
33 | 	
34 | 	return v
35 | }
36 | 
37 | // Must2 is like Must but returns two values if err is nil.
38 | // It is useful for handling errors in initialization code where recovery is not possible
39 | // and two values need to be returned.
40 | func Must2[T1 any, T2 any](v1 T1, v2 T2, err error) (T1, T2) {
41 | 	if err != nil {
42 | 		panic(err)
43 | 	}
44 | 
45 | 	return v1, v2
46 | }
47 | 


--------------------------------------------------------------------------------
/pkg/util/runtime/runtime_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package runtime
 17 | 
 18 | import (
 19 | 	"errors"
 20 | 	"testing"
 21 | 
 22 | 	. "github.com/onsi/gomega"
 23 | 
 24 | 	"github.com/glidea/zenfeed/pkg/test"
 25 | )
 26 | 
 27 | func TestMust(t *testing.T) {
 28 | 	RegisterTestingT(t)
 29 | 
 30 | 	type givenDetail struct{}
 31 | 	type whenDetail struct {
 32 | 		err error
 33 | 	}
 34 | 	type thenExpected struct {
 35 | 		shouldPanic bool
 36 | 	}
 37 | 
 38 | 	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
 39 | 		{
 40 | 			Scenario: "Must with nil error",
 41 | 			When:     "calling Must with nil error",
 42 | 			Then:     "should not panic",
 43 | 			WhenDetail: whenDetail{
 44 | 				err: nil,
 45 | 			},
 46 | 			ThenExpected: thenExpected{
 47 | 				shouldPanic: false,
 48 | 			},
 49 | 		},
 50 | 		{
 51 | 			Scenario: "Must with non-nil error",
 52 | 			When:     "calling Must with non-nil error",
 53 | 			Then:     "should panic",
 54 | 			WhenDetail: whenDetail{
 55 | 				err: errors.New("test error"),
 56 | 			},
 57 | 			ThenExpected: thenExpected{
 58 | 				shouldPanic: true,
 59 | 			},
 60 | 		},
 61 | 	}
 62 | 
 63 | 	for _, tt := range tests {
 64 | 		t.Run(tt.Scenario, func(t *testing.T) {
 65 | 			// When & Then.
 66 | 			if tt.ThenExpected.shouldPanic {
 67 | 				Expect(func() { Must(tt.WhenDetail.err) }).To(Panic())
 68 | 			} else {
 69 | 				Expect(func() { Must(tt.WhenDetail.err) }).NotTo(Panic())
 70 | 			}
 71 | 		})
 72 | 	}
 73 | }
 74 | 
 75 | func TestMust1(t *testing.T) {
 76 | 	RegisterTestingT(t)
 77 | 
 78 | 	type givenDetail struct{}
 79 | 	type whenDetail struct {
 80 | 		value string
 81 | 		err   error
 82 | 	}
 83 | 	type thenExpected struct {
 84 | 		value       string
 85 | 		shouldPanic bool
 86 | 	}
 87 | 
 88 | 	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
 89 | 		{
 90 | 			Scenario: "Must1 with nil error",
 91 | 			When:     "calling Must1 with a value and nil error",
 92 | 			Then:     "should return the value without panic",
 93 | 			WhenDetail: whenDetail{
 94 | 				value: "test value",
 95 | 				err:   nil,
 96 | 			},
 97 | 			ThenExpected: thenExpected{
 98 | 				value:       "test value",
 99 | 				shouldPanic: false,
100 | 			},
101 | 		},
102 | 		{
103 | 			Scenario: "Must1 with non-nil error",
104 | 			When:     "calling Must1 with a value and non-nil error",
105 | 			Then:     "should panic",
106 | 			WhenDetail: whenDetail{
107 | 				value: "test value",
108 | 				err:   errors.New("test error"),
109 | 			},
110 | 			ThenExpected: thenExpected{
111 | 				shouldPanic: true,
112 | 			},
113 | 		},
114 | 	}
115 | 
116 | 	for _, tt := range tests {
117 | 		t.Run(tt.Scenario, func(t *testing.T) {
118 | 			// When & Then.
119 | 			if tt.ThenExpected.shouldPanic {
120 | 				Expect(func() { Must1(tt.WhenDetail.value, tt.WhenDetail.err) }).To(Panic())
121 | 			} else {
122 | 				result := Must1(tt.WhenDetail.value, tt.WhenDetail.err)
123 | 				Expect(result).To(Equal(tt.ThenExpected.value))
124 | 			}
125 | 		})
126 | 	}
127 | }
128 | 
129 | func TestMust2(t *testing.T) {
130 | 	RegisterTestingT(t)
131 | 
132 | 	type givenDetail struct{}
133 | 	type whenDetail struct {
134 | 		value1 string
135 | 		value2 int
136 | 		err    error
137 | 	}
138 | 	type thenExpected struct {
139 | 		value1      string
140 | 		value2      int
141 | 		shouldPanic bool
142 | 	}
143 | 
144 | 	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
145 | 		{
146 | 			Scenario: "Must2 with nil error",
147 | 			When:     "calling Must2 with two values and nil error",
148 | 			Then:     "should return both values without panic",
149 | 			WhenDetail: whenDetail{
150 | 				value1: "test value",
151 | 				value2: 42,
152 | 				err:    nil,
153 | 			},
154 | 			ThenExpected: thenExpected{
155 | 				value1:      "test value",
156 | 				value2:      42,
157 | 				shouldPanic: false,
158 | 			},
159 | 		},
160 | 		{
161 | 			Scenario: "Must2 with non-nil error",
162 | 			When:     "calling Must2 with two values and non-nil error",
163 | 			Then:     "should panic",
164 | 			WhenDetail: whenDetail{
165 | 				value1: "test value",
166 | 				value2: 42,
167 | 				err:    errors.New("test error"),
168 | 			},
169 | 			ThenExpected: thenExpected{
170 | 				shouldPanic: true,
171 | 			},
172 | 		},
173 | 	}
174 | 
175 | 	for _, tt := range tests {
176 | 		t.Run(tt.Scenario, func(t *testing.T) {
177 | 			// When & Then.
178 | 			if tt.ThenExpected.shouldPanic {
179 | 				Expect(func() {
180 | 					Must2(tt.WhenDetail.value1, tt.WhenDetail.value2, tt.WhenDetail.err)
181 | 				}).To(Panic())
182 | 			} else {
183 | 				result1, result2 := Must2(tt.WhenDetail.value1, tt.WhenDetail.value2, tt.WhenDetail.err)
184 | 				Expect(result1).To(Equal(tt.ThenExpected.value1))
185 | 				Expect(result2).To(Equal(tt.ThenExpected.value2))
186 | 			}
187 | 		})
188 | 	}
189 | }
190 | 


--------------------------------------------------------------------------------
/pkg/util/text_convert/text_convert.go:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2025 wangyusong
 2 | //
 3 | // This program is free software: you can redistribute it and/or modify
 4 | // it under the terms of the GNU Affero General Public License as published by
 5 | // the Free Software Foundation, either version 3 of the License, or
 6 | // (at your option) any later version.
 7 | //
 8 | // This program is distributed in the hope that it will be useful,
 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
15 | 
16 | package textconvert
17 | 
18 | import (
19 | 	md "github.com/JohannesKaufmann/html-to-markdown"
20 | 	"github.com/yuin/goldmark"
21 | 	"github.com/yuin/goldmark/extension"
22 | 	"github.com/yuin/goldmark/renderer/html"
23 | 
24 | 	"github.com/glidea/zenfeed/pkg/util/buffer"
25 | )
26 | 
27 | var (
28 | 	md2html goldmark.Markdown
29 | 	html2md *md.Converter
30 | )
31 | 
32 | func init() {
33 | 	md2html = goldmark.New(
34 | 		goldmark.WithExtensions(
35 | 			extension.GFM,
36 | 		),
37 | 		goldmark.WithRendererOptions(
38 | 			html.WithHardWraps(),
39 | 			html.WithXHTML(),
40 | 		),
41 | 	)
42 | 	html2md = md.NewConverter("", true, nil)
43 | }
44 | 
45 | func MarkdownToHTML(md []byte) ([]byte, error) {
46 | 	buf := buffer.Get()
47 | 	defer buffer.Put(buf)
48 | 
49 | 	if err := md2html.Convert(md, buf); err != nil {
50 | 		return nil, err
51 | 	}
52 | 
53 | 	return buf.Bytes(), nil
54 | }
55 | 
56 | func HTMLToMarkdown(html []byte) ([]byte, error) {
57 | 	res, err := html2md.ConvertBytes(html)
58 | 	if err != nil {
59 | 		return nil, err
60 | 	}
61 | 
62 | 	return res, nil
63 | }
64 | 


--------------------------------------------------------------------------------
/pkg/util/text_convert/text_convert_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package textconvert
 17 | 
 18 | import (
 19 | 	"testing"
 20 | 
 21 | 	. "github.com/onsi/gomega"
 22 | 
 23 | 	"github.com/glidea/zenfeed/pkg/test"
 24 | )
 25 | 
 26 | func TestMarkdownToHTML(t *testing.T) {
 27 | 	RegisterTestingT(t)
 28 | 
 29 | 	type givenDetail struct{}
 30 | 	type whenDetail struct {
 31 | 		markdown []byte
 32 | 	}
 33 | 	type thenExpected struct {
 34 | 		html []byte
 35 | 		err  string
 36 | 	}
 37 | 
 38 | 	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
 39 | 		{
 40 | 			Scenario: "Convert simple markdown to HTML",
 41 | 			When:     "converting markdown to HTML",
 42 | 			Then:     "should return correct HTML",
 43 | 			WhenDetail: whenDetail{
 44 | 				markdown: []byte("# Hello World"),
 45 | 			},
 46 | 			ThenExpected: thenExpected{
 47 | 				html: []byte("<h1>Hello World</h1>\n"),
 48 | 			},
 49 | 		},
 50 | 		{
 51 | 			Scenario: "Convert markdown with formatting to HTML",
 52 | 			When:     "converting markdown text with formatting to HTML",
 53 | 			Then:     "should return HTML with proper formatting",
 54 | 			WhenDetail: whenDetail{
 55 | 				markdown: []byte("**Bold** and *italic* text"),
 56 | 			},
 57 | 			ThenExpected: thenExpected{
 58 | 				html: []byte("<p><strong>Bold</strong> and <em>italic</em> text</p>\n"),
 59 | 			},
 60 | 		},
 61 | 		{
 62 | 			Scenario: "Convert markdown with links to HTML",
 63 | 			When:     "converting markdown text with links to HTML",
 64 | 			Then:     "should return HTML with proper links",
 65 | 			WhenDetail: whenDetail{
 66 | 				markdown: []byte("[Link](https://example.com)"),
 67 | 			},
 68 | 			ThenExpected: thenExpected{
 69 | 				html: []byte("<p><a href=\"https://example.com\">Link</a></p>\n"),
 70 | 			},
 71 | 		},
 72 | 	}
 73 | 
 74 | 	for _, tt := range tests {
 75 | 		t.Run(tt.Scenario, func(_ *testing.T) {
 76 | 			// When.
 77 | 			html, err := MarkdownToHTML(tt.WhenDetail.markdown)
 78 | 
 79 | 			// Then.
 80 | 			if tt.ThenExpected.err != "" {
 81 | 				Expect(err).NotTo(BeNil())
 82 | 				Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
 83 | 			} else {
 84 | 				Expect(err).To(BeNil())
 85 | 				Expect(html).To(Equal(tt.ThenExpected.html))
 86 | 			}
 87 | 		})
 88 | 	}
 89 | }
 90 | 
 91 | func TestHTMLToMarkdown(t *testing.T) {
 92 | 	RegisterTestingT(t)
 93 | 
 94 | 	type givenDetail struct{}
 95 | 	type whenDetail struct {
 96 | 		html []byte
 97 | 	}
 98 | 	type thenExpected struct {
 99 | 		markdown []byte
100 | 		err      string
101 | 	}
102 | 
103 | 	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
104 | 		{
105 | 			Scenario: "Convert simple HTML to markdown",
106 | 			When:     "converting HTML text to markdown",
107 | 			Then:     "should return correct markdown",
108 | 			WhenDetail: whenDetail{
109 | 				html: []byte("<h1>Hello World</h1>"),
110 | 			},
111 | 			ThenExpected: thenExpected{
112 | 				markdown: []byte("# Hello World"),
113 | 			},
114 | 		},
115 | 		{
116 | 			Scenario: "Convert HTML with formatting to markdown",
117 | 			When:     "converting HTML text with formatting to markdown",
118 | 			Then:     "should return markdown with proper formatting",
119 | 			WhenDetail: whenDetail{
120 | 				html: []byte("<p><strong>Bold</strong> and <em>italic</em> text</p>"),
121 | 			},
122 | 			ThenExpected: thenExpected{
123 | 				markdown: []byte("**Bold** and _italic_ text"),
124 | 			},
125 | 		},
126 | 		{
127 | 			Scenario: "Convert HTML with links to markdown",
128 | 			When:     "converting HTML text with links to markdown",
129 | 			Then:     "should return markdown with proper links",
130 | 			WhenDetail: whenDetail{
131 | 				html: []byte("<p><a href=\"https://example.com\">Link</a></p>"),
132 | 			},
133 | 			ThenExpected: thenExpected{
134 | 				markdown: []byte("[Link](https://example.com)"),
135 | 			},
136 | 		},
137 | 	}
138 | 
139 | 	for _, tt := range tests {
140 | 		t.Run(tt.Scenario, func(_ *testing.T) {
141 | 			// When.
142 | 			markdown, err := HTMLToMarkdown(tt.WhenDetail.html)
143 | 
144 | 			// Then.
145 | 			if tt.ThenExpected.err != "" {
146 | 				Expect(err).NotTo(BeNil())
147 | 				Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
148 | 			} else {
149 | 				Expect(err).To(BeNil())
150 | 				Expect(markdown).To(Equal(tt.ThenExpected.markdown))
151 | 			}
152 | 		})
153 | 	}
154 | }
155 | 


--------------------------------------------------------------------------------
/pkg/util/time/time.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package time
 17 | 
 18 | import (
 19 | 	"context"
 20 | 	"encoding/json"
 21 | 	"math/rand"
 22 | 	"time"
 23 | 	_ "time/tzdata"
 24 | 
 25 | 	"github.com/pkg/errors"
 26 | 	"gopkg.in/yaml.v3"
 27 | 
 28 | 	runtimeutil "github.com/glidea/zenfeed/pkg/util/runtime"
 29 | )
 30 | 
 31 | const (
 32 | 	Day   = 24 * time.Hour
 33 | 	Week  = 7 * Day
 34 | 	Month = 30 * Day
 35 | 	Year  = 365 * Day
 36 | )
 37 | 
 38 | // SetLocation sets the location for the current application.
 39 | func SetLocation(name string) error {
 40 | 	if name == "" {
 41 | 		return nil
 42 | 	}
 43 | 
 44 | 	loc, err := time.LoadLocation(name)
 45 | 	if err != nil {
 46 | 		return errors.Wrap(err, "load location")
 47 | 	}
 48 | 
 49 | 	time.Local = loc
 50 | 
 51 | 	return nil
 52 | }
 53 | 
 54 | func InRange(t time.Time, start, end time.Time) bool {
 55 | 	return t.After(start) && t.Before(end)
 56 | }
 57 | 
 58 | func Format(t time.Time) string {
 59 | 	return t.Format(time.RFC3339)
 60 | }
 61 | 
 62 | func Parse(s string) (time.Time, error) {
 63 | 	return time.Parse(time.RFC3339, s)
 64 | }
 65 | 
 66 | func MustParse(s string) time.Time {
 67 | 	return runtimeutil.Must1(Parse(s))
 68 | }
 69 | 
 70 | func Tick(ctx context.Context, d time.Duration, f func() error) error {
 71 | 	ticker := time.NewTicker(d)
 72 | 	defer ticker.Stop()
 73 | 
 74 | 	for {
 75 | 		select {
 76 | 		case <-ticker.C:
 77 | 			if err := f(); err != nil {
 78 | 				return err
 79 | 			}
 80 | 		case <-ctx.Done():
 81 | 			return nil
 82 | 		}
 83 | 	}
 84 | }
 85 | 
 86 | func Random(max time.Duration) time.Duration {
 87 | 	return time.Duration(rand.Int63n(int64(max)))
 88 | }
 89 | 
 90 | type Duration time.Duration
 91 | 
 92 | func (d Duration) String() string {
 93 | 	return time.Duration(d).String()
 94 | }
 95 | 
 96 | func (d Duration) MarshalJSON() ([]byte, error) {
 97 | 	return json.Marshal(d.String())
 98 | }
 99 | 
100 | func (d *Duration) UnmarshalJSON(b []byte) error {
101 | 	var v any
102 | 	if err := json.Unmarshal(b, &v); err != nil {
103 | 		return err
104 | 	}
105 | 
106 | 	switch tv := v.(type) {
107 | 	case float64:
108 | 		*d = Duration(time.Duration(tv))
109 | 
110 | 		return nil
111 | 
112 | 	case string:
113 | 		parsed, err := time.ParseDuration(tv)
114 | 		if err != nil {
115 | 			return err
116 | 		}
117 | 		*d = Duration(parsed)
118 | 
119 | 		return nil
120 | 
121 | 	default:
122 | 		return errors.Errorf("invalid duration: %v", tv)
123 | 	}
124 | }
125 | 
126 | func (d Duration) MarshalYAML() (interface{}, error) {
127 | 	return d.String(), nil
128 | }
129 | 
130 | func (d *Duration) UnmarshalYAML(value *yaml.Node) error {
131 | 	if value.Kind != yaml.ScalarNode {
132 | 		return errors.Errorf("invalid duration: expected a scalar node, got %v", value.Kind)
133 | 	}
134 | 
135 | 	s := value.Value
136 | 
137 | 	parsed, err := time.ParseDuration(s)
138 | 	if err != nil {
139 | 		return errors.Errorf("failed to parse duration string '%s' from YAML: %s", s, err.Error())
140 | 	}
141 | 
142 | 	*d = Duration(parsed)
143 | 
144 | 	return nil
145 | }
146 | 


--------------------------------------------------------------------------------
/pkg/util/vector/vector.go:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2025 wangyusong
 2 | //
 3 | // This program is free software: you can redistribute it and/or modify
 4 | // it under the terms of the GNU Affero General Public License as published by
 5 | // the Free Software Foundation, either version 3 of the License, or
 6 | // (at your option) any later version.
 7 | //
 8 | // This program is distributed in the hope that it will be useful,
 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
15 | 
16 | package vector
17 | 
18 | import (
19 | 	"math"
20 | )
21 | 
22 | func Quantize(vec []float32) (quantized []int8, min, scale float32) {
23 | 	// Find the minimum and maximum values.
24 | 	min, max := float32(math.MaxFloat32), float32(-math.MaxFloat32)
25 | 	for _, v := range vec {
26 | 		if v < min {
27 | 			min = v
28 | 		}
29 | 		if v > max {
30 | 			max = v
31 | 		}
32 | 	}
33 | 
34 | 	// Calculate the quantization scale.
35 | 	scale = float32(255) / (max - min)
36 | 
37 | 	// Quantize the data.
38 | 	quantized = make([]int8, len(vec))
39 | 	for i, v := range vec {
40 | 		quantized[i] = int8(math.Round(float64((v-min)*scale - 128)))
41 | 	}
42 | 
43 | 	return quantized, min, scale
44 | }
45 | 
46 | func Dequantize(quantized []int8, min, scale float32) []float32 {
47 | 	vec := make([]float32, len(quantized))
48 | 	for i, v := range quantized {
49 | 		vec[i] = (float32(v)+128)/scale + min
50 | 	}
51 | 
52 | 	return vec
53 | }
54 | 


--------------------------------------------------------------------------------
/pkg/util/vector/vector_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2025 wangyusong
  2 | //
  3 | // This program is free software: you can redistribute it and/or modify
  4 | // it under the terms of the GNU Affero General Public License as published by
  5 | // the Free Software Foundation, either version 3 of the License, or
  6 | // (at your option) any later version.
  7 | //
  8 | // This program is distributed in the hope that it will be useful,
  9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 11 | // GNU Affero General Public License for more details.
 12 | //
 13 | // You should have received a copy of the GNU Affero General Public License
 14 | // along with this program. If not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | package vector
 17 | 
 18 | import (
 19 | 	"testing"
 20 | 
 21 | 	. "github.com/onsi/gomega"
 22 | 
 23 | 	"github.com/glidea/zenfeed/pkg/test"
 24 | )
 25 | 
 26 | func TestQuantizeDequantize(t *testing.T) {
 27 | 	RegisterTestingT(t)
 28 | 
 29 | 	type givenDetail struct{}
 30 | 	type whenDetail struct {
 31 | 		vector []float32
 32 | 	}
 33 | 	type thenExpected struct {
 34 | 		maxError float32
 35 | 	}
 36 | 
 37 | 	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
 38 | 		{
 39 | 			Scenario: "Quantize and dequantize unit vector",
 40 | 			When:     "quantizing and then dequantizing a vector with values between 0 and 1",
 41 | 			Then:     "should return vector close to the original with small error",
 42 | 			WhenDetail: whenDetail{
 43 | 				vector: []float32{0.1, 0.5, 0.9, 0.3},
 44 | 			},
 45 | 			ThenExpected: thenExpected{
 46 | 				maxError: 0.01,
 47 | 			},
 48 | 		},
 49 | 		{
 50 | 			Scenario: "Quantize and dequantize vector with negative values",
 51 | 			When:     "quantizing and then dequantizing a vector with negative values",
 52 | 			Then:     "should return vector close to the original with small error",
 53 | 			WhenDetail: whenDetail{
 54 | 				vector: []float32{-1.0, -0.5, 0.0, 0.5, 1.0},
 55 | 			},
 56 | 			ThenExpected: thenExpected{
 57 | 				maxError: 0.01,
 58 | 			},
 59 | 		},
 60 | 		{
 61 | 			Scenario: "Quantize and dequantize large range vector",
 62 | 			When:     "quantizing and then dequantizing a vector with large range of values",
 63 | 			Then:     "should return vector close to the original with acceptable error",
 64 | 			WhenDetail: whenDetail{
 65 | 				vector: []float32{-100, -50, 0, 50, 100},
 66 | 			},
 67 | 			ThenExpected: thenExpected{
 68 | 				maxError: 1.5,
 69 | 			},
 70 | 		},
 71 | 	}
 72 | 
 73 | 	for _, tt := range tests {
 74 | 		t.Run(tt.Scenario, func(t *testing.T) {
 75 | 			// When.
 76 | 			quantized, min, scale := Quantize(tt.WhenDetail.vector)
 77 | 			dequantized := Dequantize(quantized, min, scale)
 78 | 
 79 | 			// Then.
 80 | 			Expect(len(dequantized)).To(Equal(len(tt.WhenDetail.vector)))
 81 | 
 82 | 			maxError := float32(0)
 83 | 
 84 | 			for i := range tt.WhenDetail.vector {
 85 | 				error := float32(0)
 86 | 				if tt.WhenDetail.vector[i] > dequantized[i] {
 87 | 					error = tt.WhenDetail.vector[i] - dequantized[i]
 88 | 				} else {
 89 | 					error = dequantized[i] - tt.WhenDetail.vector[i]
 90 | 				}
 91 | 				if error > maxError {
 92 | 					maxError = error
 93 | 				}
 94 | 			}
 95 | 
 96 | 			Expect(maxError).To(BeNumerically("<=", tt.ThenExpected.maxError))
 97 | 		})
 98 | 	}
 99 | }
100 | 


--------------------------------------------------------------------------------