├── .github
    └── workflows
    │   └── docker-build.yml
├── .gitignore
├── Dockerfile
├── README.md
├── app
    ├── core
    │   ├── agent.py
    │   └── scm.py
    ├── main.py
    ├── models
    │   └── config.py
    ├── routers
    │   ├── config_ui.py
    │   └── webhooks.py
    ├── services
    │   └── llm_service.py
    ├── static
    │   └── .gitkeep
    └── templates
    │   └── config.html
└── requirements.txt


/.github/workflows/docker-build.yml:
--------------------------------------------------------------------------------
 1 | name: Docker Build and Push
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   build-and-push:
10 |     runs-on: ubuntu-latest
11 |     
12 |     steps:
13 |       - name: Checkout code
14 |         uses: actions/checkout@v4
15 |         
16 |       - name: Set up Docker Buildx
17 |         uses: docker/setup-buildx-action@v3
18 |         
19 |       - name: Login to Docker Hub
20 |         uses: docker/login-action@v3
21 |         with:
22 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
23 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
24 |           
25 |       - name: Build and push
26 |         uses: docker/build-push-action@v5
27 |         with:
28 |           context: .
29 |           push: true
30 |           tags: highkay/codereview-agent:latest
31 |           platforms: linux/amd64,linux/arm64
32 |           cache-from: type=registry,ref=highkay/codereview-agent:latest
33 |           cache-to: type=inline 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.json
2 | __pycache__/
3 | config.yaml
4 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # 使用 Python 3.9 作为基础镜像
 2 | FROM python:3.12-slim
 3 | 
 4 | # 设置工作目录
 5 | WORKDIR /app
 6 | 
 7 | # 设置环境变量
 8 | ENV PYTHONUNBUFFERED=1
 9 | # 复制项目文件
10 | COPY . .
11 | 
12 | # 安装项目依赖
13 | RUN pip install --no-cache-dir -r requirements.txt && pip install --no-cache-dir --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple pydantic
14 | 
15 | # 暴露端口
16 | EXPOSE 8000
17 | 
18 | # 启动命令
19 | CMD ["python", "app/main.py"] 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 代码评审Agent
  2 | 
  3 | 这是一个基于LLM的自动代码评审系统，可以自动对Gitea的Pull Request进行代码评审。
  4 | 
  5 | ## 主要功能
  6 | 
  7 | 1. 自动监听Gitea的Pull Request事件
  8 | 2. 获取PR中的代码变更并进行分析
  9 | 3. 使用LLM进行代码评审，包括：
 10 |    - 安全性评估
 11 |    - 性能分析
 12 |    - 代码可读性
 13 |    - 最佳实践检查
 14 | 4. 自动生成评审评论
 15 | 5. 支持根据评分自动合并PR
 16 | 
 17 | ## 技术栈
 18 | 
 19 | - Python 3.10+
 20 | - FastAPI
 21 | - HTMX
 22 | - TailwindCSS
 23 | - LiteLLM
 24 | 
 25 | ## 安装
 26 | 
 27 | 1. 克隆仓库：
 28 | ```bash
 29 | git clone https://github.com/highkay/codereview-agent.git
 30 | cd codereview-agent
 31 | ```
 32 | 
 33 | 2. 安装依赖：
 34 | ```bash
 35 | pip install -r requirements.txt
 36 | ```
 37 | 
 38 | 3. 配置系统：
 39 |    - 访问 `http://localhost:8000/config`
 40 |    - 填写必要的配置信息：
 41 |      - Gitea服务器URL和API Token
 42 |      - LLM提供商配置
 43 |      - 评审规则和阈值
 44 | 
 45 | ## 运行
 46 | 
 47 | ```bash
 48 | python app/main.py
 49 | ```
 50 | 
 51 | 服务将在 `http://localhost:8000` 启动。
 52 | 
 53 | ## Gitea配置
 54 | 
 55 | 1. 在Gitea中添加Webhook：
 56 |    - 进入仓库设置 -> Webhooks -> 添加Webhook
 57 |    - URL设置为：`http://your-server:8000/webhook/gitea`
 58 |    - 选择事件：Pull Request
 59 | 
 60 | ## 评审规则
 61 | 
 62 | 系统使用以下维度进行代码评审：
 63 | 
 64 | 1. 安全性（30%）：
 65 |    - SQL注入
 66 |    - XSS漏洞
 67 |    - 敏感信息泄露
 68 |    - 权限控制
 69 | 
 70 | 2. 性能（20%）：
 71 |    - 算法复杂度
 72 |    - 资源使用效率
 73 |    - 并发处理
 74 | 
 75 | 3. 可读性（20%）：
 76 |    - 代码格式
 77 |    - 命名规范
 78 |    - 注释完整性
 79 | 
 80 | 4. 最佳实践（30%）：
 81 |    - 设计模式
 82 |    - 单元测试
 83 |    - 类型提示
 84 |    - SOLID原则
 85 | 
 86 | ## 评分规则
 87 | 
 88 | - 安全问题：
 89 |   - 高危：-3分/个
 90 |   - 中危：-1分/个
 91 | - 性能问题：-2分/个
 92 | - 可读性问题：-0.5分/个
 93 | - 最佳实践：
 94 |   - 缺少单元测试：-2分
 95 |   - 无类型提示：-1分
 96 | 
 97 | ## 配置文件
 98 | 
 99 | 系统使用YAML格式的配置文件（`config.yaml`）：
100 | 
101 | ```yaml
102 | scm:
103 |   type: gitea
104 |   url: https://git.example.com
105 |   token: xxxx-xxxx
106 |   context_window: 5
107 | 
108 | llm:
109 |   provider: openai
110 |   model: gpt-4-turbo
111 |   api_key: sk-xxxx
112 |   max_tokens: 4096
113 | 
114 | review:
115 |   quality_threshold: 8.5
116 |   ignore_patterns:
117 |     - "*.md"
118 |     - "**/test_*.py"
119 |   scoring_rules:
120 |     security: 0.3
121 |     performance: 0.2
122 |     readability: 0.2
123 |     best_practice: 0.3
124 | ```
125 | 
126 | ## 开发
127 | 
128 | 1. 代码结构：
129 | ```
130 | app/
131 | ├── core/           # 核心功能
132 | │   ├── agent.py    # 评审Agent
133 | │   └── scm.py      # SCM抽象层
134 | ├── models/         # 数据模型
135 | │   └── config.py   # 配置模型
136 | ├── routers/        # API路由
137 | │   ├── webhooks.py # Webhook处理
138 | │   └── config_ui.py# 配置界面
139 | ├── services/       # 服务
140 | │   └── llm_service.py # LLM服务
141 | ├── static/         # 静态资源
142 | ├── templates/      # 模板
143 | │   └── config.html # 配置页面
144 | └── main.py         # 应用入口
145 | ```
146 | 
147 | 2. 添加新功能：
148 |    - 在相应模块中添加代码
149 |    - 确保添加适当的测试
150 |    - 遵循项目的代码风格
151 | 
152 | ## 贡献
153 | 
154 | 欢迎提交Pull Request！在提交之前，请确保：
155 | 
156 | 1. 代码通过所有测试
157 | 2. 添加了必要的文档
158 | 3. 遵循项目的代码规范
159 | 
160 | ## 许可证
161 | 
162 | MIT License 


--------------------------------------------------------------------------------
/app/core/agent.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict, Any, Optional
  2 | from app.models.config import AppConfig
  3 | from app.services.llm_service import LLMService, CodeContext, ReviewResult, QualityMetrics
  4 | from app.core.scm import SCMProvider, CommitDiff, ReviewComment
  5 | from loguru import logger
  6 | import fnmatch
  7 | import os
  8 | 
  9 | class CodeReviewAgent:
 10 |     def __init__(self, config: AppConfig, scm: SCMProvider, llm: LLMService) -> None:
 11 |         self.config = config
 12 |         self.scm = scm
 13 |         self.llm = llm
 14 |         logger.info("CodeReviewAgent initialized with model: {} and config: {}", config.llm.model, config.dict())
 15 | 
 16 |     def _filter_files(self, files: List[dict]) -> List[dict]:
 17 |         """过滤不需要评审的文件"""
 18 |         filtered = []
 19 |         for file in files:
 20 |             should_ignore = any(
 21 |                 fnmatch.fnmatch(file["filename"], pattern)
 22 |                 for pattern in self.config.review.ignore_patterns
 23 |             )
 24 |             if not should_ignore:
 25 |                 filtered.append(file)
 26 |         return filtered
 27 | 
 28 |     async def _collect_context(self, owner: str, repo: str, commit_diff: CommitDiff) -> Optional[CodeContext]:
 29 |         # 过滤文件
 30 |         filtered_files = self._filter_files(commit_diff.files)
 31 |         if not filtered_files:
 32 |             logger.info("No files to review after filtering for commit {} (changed files: {})", 
 33 |                         commit_diff.commit_id[:8], len(commit_diff.files))
 34 |             return None
 35 |             
 36 |         # 收集所有文件的上下文
 37 |         files_context = []
 38 |         window_size = self.config.scm.context_window
 39 |         
 40 |         for file in filtered_files:
 41 |             if "filename" not in file:
 42 |                 logger.error("Invalid file data - missing filename in commit: {}", commit_diff.commit_id[:8])
 43 |                 continue
 44 |                 
 45 |             file_path = file["filename"]
 46 |             file_type = os.path.splitext(file_path)[1][1:] if os.path.splitext(file_path)[1] else "unknown"
 47 |             
 48 |             try:
 49 |                 # 获取文件上下文
 50 |                 context = await self.scm.get_file_context(
 51 |                     owner,
 52 |                     repo,
 53 |                     file_path,
 54 |                     commit_diff.commit_id,
 55 |                     1,
 56 |                     window_size * 2
 57 |                 )
 58 |                 
 59 |                 if not context:
 60 |                     logger.warning("No context returned for file: {} in commit: {}", file_path, commit_diff.commit_id[:8])
 61 |                     continue
 62 |                     
 63 |                 files_context.append({
 64 |                     "file_path": file_path,
 65 |                     "file_type": file_type,
 66 |                     "context": context
 67 |                 })
 68 |             except Exception as e:
 69 |                 logger.error("Error getting context for file {} in commit {}: {}", 
 70 |                              file_path, commit_diff.commit_id[:8], str(e))
 71 |                 continue
 72 |                 
 73 |         if not files_context:
 74 |             logger.warning("No valid file contexts collected for commit {} (total files: {})", 
 75 |                            commit_diff.commit_id[:8] if commit_diff and commit_diff.commit_id else "unknown", 
 76 |                            len(commit_diff.files) if commit_diff and hasattr(commit_diff, 'files') else 0)
 77 |             return None
 78 |             
 79 |         # 创建并验证上下文对象
 80 |         try:
 81 |             context = CodeContext(
 82 |                 diff=commit_diff.diff_content,
 83 |                 files_context=files_context,
 84 |                 metadata={
 85 |                     "commit_id": commit_diff.commit_id,
 86 |                     "commit_message": commit_diff.commit_message
 87 |                 }
 88 |             )
 89 |             return context
 90 |         except Exception as e:
 91 |             logger.error("Error creating CodeContext for commit {}: {}", 
 92 |                          commit_diff.commit_id[:8] if commit_diff and commit_diff.commit_id else "unknown", 
 93 |                          str(e))
 94 |             return None
 95 | 
 96 |     async def _analyze_code(self, context: CodeContext) -> ReviewResult:
 97 |         """分析整个commit的代码变更"""
 98 |         logger.debug("Analyzing commit: {} - {} (files: {})",
 99 |                     context.metadata["commit_id"][:8], 
100 |                     context.metadata["commit_message"].split('\n')[0][:50],
101 |                     len(context.files_context))
102 |         
103 |         try:
104 |             result = await self.llm.analyze_code(context)
105 |             
106 |             # 记录评审结果
107 |             logger.info("Code analysis completed for commit {} with scores and {} files:",
108 |                        context.metadata["commit_id"][:8], len(context.files_context))
109 |             logger.info("- Overall Score: {}/10 (weight: {})", result.score, self.config.review.quality_threshold)
110 |             logger.info("- Security: {}/10 (weight: {})", result.quality_metrics.security_score, self.config.review.scoring_rules["security"])
111 |             logger.info("- Performance: {}/10 (weight: {})", result.quality_metrics.performance_score, self.config.review.scoring_rules["performance"])
112 |             logger.info("- Readability: {}/10 (weight: {})", result.quality_metrics.readability_score, self.config.review.scoring_rules["readability"])
113 |             logger.info("- Best Practices: {}/10 (weight: {})", result.quality_metrics.best_practice_score, self.config.review.scoring_rules["best_practice"])
114 |             
115 |             if result.security_issues:
116 |                 logger.warning("Found {} security issues in commit {} (threshold: {})",
117 |                              len(result.security_issues), context.metadata["commit_id"][:8], self.config.review.max_security_issues)
118 |             
119 |             return result
120 |         except Exception as e:
121 |             logger.error("Error analyzing code for commit {}: {}\nFull error: {}", 
122 |                          context.metadata.get("commit_id", "unknown")[:8], 
123 |                          str(e), repr(e))
124 |             # 返回一个默认的评审结果
125 |             return ReviewResult(
126 |                 score=0,
127 |                 comments=["代码评审过程中发生错误"],
128 |                 suggestions=[],
129 |                 issues=[],
130 |                 security_issues=[],
131 |                 quality_metrics=QualityMetrics(
132 |                     security_score=0,
133 |                     performance_score=0,
134 |                     readability_score=0,
135 |                     best_practice_score=0
136 |                 )
137 |             )
138 | 
139 |     def _generate_comments(self, result: ReviewResult, context: CodeContext) -> List[ReviewComment]:
140 |         """生成评审评论"""
141 |         logger.debug("Generating comments for commit: {} with {} issues", 
142 |                      context.metadata["commit_id"][:8], len(result.issues))
143 |         comments = []
144 |         
145 |         # 添加总体评分评论
146 |         overall_comment = [
147 |             "# 🔍 代码评审报告",
148 |             "",
149 |             f"## 📊 评分概览 ({result.score:.1f}/10)",
150 |             "",
151 |             "| 评审维度 | 得分 | 权重 |",
152 |             "|---------|------|------|",
153 |             f"| 🛡️ 安全性 | {result.quality_metrics.security_score:.1f}/10 | {self.config.review.scoring_rules['security']:.0f} |",
154 |             f"| ⚡ 性能 | {result.quality_metrics.performance_score:.1f}/10 | {self.config.review.scoring_rules['performance']:.0f} |",
155 |             f"| 📖 可读性 | {result.quality_metrics.readability_score:.1f}/10 | {self.config.review.scoring_rules['readability']:.0f} |",
156 |             f"| ✨ 最佳实践 | {result.quality_metrics.best_practice_score:.1f}/10 | {self.config.review.scoring_rules['best_practice']:.0f} |",
157 |             ""
158 |         ]
159 |         
160 |         if result.issues:
161 |             overall_comment.extend([
162 |                 "## 💡 需要改进的地方",
163 |                 ""
164 |             ])
165 |             for issue in result.issues:
166 |                 overall_comment.extend([
167 |                     f"### {issue.file_path}",
168 |                     f"- 位置：第{issue.start_line}行" + (f"-{issue.end_line}行" if issue.end_line else ""),
169 |                     f"- 问题：{issue.description}",
170 |                     f"- 建议：{issue.suggestion}",
171 |                     ""
172 |                 ])
173 |         
174 |         if result.security_issues:
175 |             overall_comment.extend([
176 |                 "## ⚠️ 安全问题",
177 |                 ""
178 |             ])
179 |             for issue in result.security_issues:
180 |                 severity_icon = "🔴" if issue.severity.lower() == "high" else "🟡"
181 |                 overall_comment.extend([
182 |                     f"### {severity_icon} {issue.file_path}",
183 |                     f"- 严重程度：{issue.severity}",
184 |                     f"- 位置：第{issue.start_line}行" + (f"-{issue.end_line}行" if issue.end_line else ""),
185 |                     f"- 问题：{issue.description}",
186 |                     f"- 建议：{issue.suggestion}",
187 |                     ""
188 |                 ])
189 |         
190 |         comments.append(ReviewComment(
191 |             path=context.metadata["commit_message"],
192 |             line=1,
193 |             body="\n".join(overall_comment),
194 |             commit_id=context.metadata["commit_id"]
195 |         ))
196 |         
197 |         logger.info("Generated {} review comments for commit {}", 
198 |                     len(comments), context.metadata["commit_id"][:8])
199 |         return comments
200 | 
201 |     async def review_pr(self, owner: str, repo: str, pr_id: str) -> bool:
202 |         """执行PR评审的主流程"""
203 |         logger.info("Starting PR review for {}/{} #{}", 
204 |                     owner, repo, pr_id)
205 |         try:
206 |             # 获取PR的所有commits及其diff
207 |             commit_diffs = await self.scm.get_diff(owner, repo, pr_id)
208 |             logger.info("Found {} commits to review in PR {}/{} #{}", 
209 |                         len(commit_diffs), owner, repo, pr_id)
210 |             
211 |             all_results = []
212 |             for commit_diff in commit_diffs:
213 |                 try:
214 |                     logger.info("Reviewing commit: {} - {} (files: {})",
215 |                                commit_diff.commit_id[:8],
216 |                                commit_diff.commit_message.split('\n')[0][:50],
217 |                                len(commit_diff.files))
218 |                     
219 |                     # 收集整个commit的上下文
220 |                     context = await self._collect_context(owner, repo, commit_diff)
221 |                     if not context:
222 |                         logger.warning("Skipping commit {} due to no reviewable files (total files: {})",
223 |                                      commit_diff.commit_id[:8], len(commit_diff.files))
224 |                         continue
225 |                     
226 |                     # 分析整个commit的代码
227 |                     result = await self._analyze_code(context)
228 |                     all_results.append((result, context))
229 |                     
230 |                     # 生成并发送评论
231 |                     comments = self._generate_comments(result, context)
232 |                     
233 |                     await self.scm.post_comment(owner, repo, pr_id, comments)
234 |                     logger.info("Posted {} review comments for commit {}", 
235 |                                 len(comments), commit_diff.commit_id[:8])
236 |                 except Exception as commit_error:
237 |                     logger.error("Error processing commit {} with {} files: {}\nFull error: {}",
238 |                                 commit_diff.commit_id[:8], len(commit_diff.files), str(commit_error), repr(commit_error))
239 |                     continue
240 |             
241 |             # 处理评审结果 - 使用最低分作为最终分数
242 |             if all_results:
243 |                 min_score = min(r.score for r, _ in all_results)
244 |                 logger.info("PR review completed with minimum score: {} (threshold: {})", 
245 |                             min_score, self.config.review.quality_threshold)
246 |                 if min_score >= self.config.review.quality_threshold:
247 |                     logger.info("PR quality meets threshold ({} >= {}), attempting to approve and merge",
248 |                                min_score, self.config.review.quality_threshold)
249 |                     # 先批准PR
250 |                     await self.scm.approve_pr(owner, repo, pr_id)
251 |                     # 再合并PR
252 |                     await self.scm.merge_pr(owner, repo, pr_id)
253 |                 else:
254 |                     logger.info("PR quality below threshold ({} < {}), skipping approval",
255 |                                min_score, self.config.review.quality_threshold)
256 |             
257 |             return True
258 |             
259 |         except Exception as e:
260 |             logger.error("Error reviewing PR {}/{} #{}: {}\nFull error: {}", 
261 |                          owner, repo, pr_id, str(e), repr(e))
262 |             return False 


--------------------------------------------------------------------------------
/app/core/scm.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import List, Optional
  3 | from pydantic import BaseModel
  4 | import aiohttp
  5 | from loguru import logger
  6 | from app.models.config import GiteaConfig
  7 | 
  8 | class CommitDiff(BaseModel):
  9 |     commit_id: str
 10 |     commit_message: str
 11 |     files: List[dict]  # 包含每个文件的变更信息
 12 |     diff_content: str  # commit 的完整 diff 内容
 13 | 
 14 | class ReviewComment(BaseModel):
 15 |     path: str
 16 |     line: int
 17 |     body: str
 18 |     commit_id: str
 19 | 
 20 | class SCMProvider(ABC):
 21 |     @abstractmethod
 22 |     async def get_diff(self, owner: str, repo: str, pr_id: str) -> List[CommitDiff]:
 23 |         pass
 24 | 
 25 |     @abstractmethod
 26 |     async def post_comment(self, owner: str, repo: str, pr_id: str, comments: List[ReviewComment]):
 27 |         pass
 28 | 
 29 |     @abstractmethod
 30 |     async def approve_pr(self, owner: str, repo: str, pr_id: str):
 31 |         """批准PR"""
 32 |         pass
 33 | 
 34 |     @abstractmethod
 35 |     async def merge_pr(self, owner: str, repo: str, pr_id: str):
 36 |         pass
 37 | 
 38 |     @abstractmethod
 39 |     async def get_file_context(self, owner: str, repo: str, file_path: str, commit_id: str, line_start: int, line_count: int) -> str:
 40 |         pass
 41 | 
 42 | class GiteaClient(SCMProvider):
 43 |     def __init__(self, config: GiteaConfig):
 44 |         self.config = config
 45 |         self.headers = {
 46 |             "Authorization": f"token {config.token}",
 47 |             "Content-Type": "application/json"
 48 |         }
 49 |         logger.info("GiteaClient initialized with URL: {} and token length: {}", 
 50 |                     config.url if config and hasattr(config, 'url') else "unknown", 
 51 |                     len(config.token) if config and hasattr(config, 'token') and config.token else 0)
 52 | 
 53 |     async def _make_request(self, method: str, path: str, **kwargs) -> dict:
 54 |         url = f"{self.config.url}/api/v1/{path}"
 55 |         logger.debug("Making {} request to {} with params: {}", 
 56 |                      method if method else "unknown", 
 57 |                      url if url else "unknown", 
 58 |                      kwargs if kwargs else "none")
 59 |         try:
 60 |             async with aiohttp.ClientSession() as session:
 61 |                 async with session.request(method, url, headers=self.headers, **kwargs) as response:
 62 |                     response.raise_for_status()
 63 |                     data = await response.json()
 64 |                     logger.debug("Request successful: {} {} with status code: {}", 
 65 |                                  method if method else "unknown", 
 66 |                                  url if url else "unknown", 
 67 |                                  response.status if response and hasattr(response, 'status') else "unknown")
 68 |                     return data
 69 |         except aiohttp.ClientError as e:
 70 |             status = getattr(getattr(e, 'response', None), 'status', 'N/A')
 71 |             logger.error("Request failed: {} {} - Status: {} - Error: {}", 
 72 |                          method if method else "unknown", 
 73 |                          url if url else "unknown", 
 74 |                          status if status else "N/A", 
 75 |                          str(e))
 76 |             raise
 77 |         except Exception as e:
 78 |             logger.error("Unexpected error in request: {} {} - Error: {}", 
 79 |                          method if method else "unknown", 
 80 |                          url if url else "unknown", 
 81 |                          str(e))
 82 |             raise
 83 | 
 84 |     async def get_diff(self, owner: str, repo: str, pr_id: str) -> List[CommitDiff]:
 85 |         logger.info("Getting diff for PR {}/{} #{}", owner, repo, pr_id)
 86 |         try:
 87 |             # 获取PR的所有commits
 88 |             commits = await self._make_request(
 89 |                 "GET",
 90 |                 f"repos/{owner}/{repo}/pulls/{pr_id}/commits"
 91 |             )
 92 |             
 93 |             diffs = []
 94 |             for commit in commits:
 95 |                 commit_id = commit["sha"]
 96 |                 
 97 |                 # 获取这个commit的完整diff
 98 |                 async with aiohttp.ClientSession() as session:
 99 |                     url = f"{self.config.url}/api/v1/repos/{owner}/{repo}/git/commits/{commit_id}.diff"
100 |                     async with session.get(url, headers=self.headers) as response:
101 |                         response.raise_for_status()
102 |                         diff_content = await response.text()
103 |                 
104 |                 # 获取这个commit变更的文件列表
105 |                 files = await self._make_request(
106 |                     "GET",
107 |                     f"repos/{owner}/{repo}/git/commits/{commit_id}"
108 |                 )
109 |                 
110 |                 diffs.append(CommitDiff(
111 |                     commit_id=commit_id,
112 |                     commit_message=commit["commit"]["message"],
113 |                     files=files.get("files", []),
114 |                     diff_content=diff_content
115 |                 ))
116 |             
117 |             logger.info("Found {} commits in PR {}/{} #{}", 
118 |                        len(commits), owner, repo, pr_id)
119 |             return diffs
120 |         except Exception as e:
121 |             logger.error("Failed to get diff for PR {}/{} #{}: {}", 
122 |                         owner, repo, pr_id, str(e))
123 |             raise
124 | 
125 |     async def post_comment(self, owner: str, repo: str, pr_id: str, comments: List[ReviewComment]):
126 |         logger.info("Posting {} comments to PR {}/{} #{} for commit: {}", 
127 |                     len(comments) if comments else 0, 
128 |                     owner if owner else "unknown", 
129 |                     repo if repo else "unknown", 
130 |                     pr_id if pr_id else "unknown", 
131 |                     comments[0].commit_id[:8] if comments and len(comments) > 0 and hasattr(comments[0], 'commit_id') else "unknown")
132 |         if not comments:
133 |             return
134 |             
135 |         review_comments = []
136 |         for comment in comments:
137 |             review_comments.append({
138 |                 "path": comment.path,
139 |                 "body": comment.body,
140 |                 "new_position": comment.line,
141 |                 "commit_id": comment.commit_id
142 |             })
143 |             
144 |         try:
145 |             await self._make_request(
146 |                 "POST",
147 |                 f"repos/{owner}/{repo}/pulls/{pr_id}/reviews",
148 |                 json={
149 |                     "commit_id": comments[0].commit_id,
150 |                     "body": "Code Review Comments",
151 |                     "comments": review_comments,
152 |                     "event": "comment"
153 |                 }
154 |             )
155 |             logger.info("Successfully posted {} comments to PR {}/{} #{}", 
156 |                         len(comments) if comments else 0, 
157 |                         owner if owner else "unknown", 
158 |                         repo if repo else "unknown", 
159 |                         pr_id if pr_id else "unknown")
160 |         except Exception as e:
161 |             logger.error("Failed to post comments to PR {}/{} #{}: {}", 
162 |                         owner if owner else "unknown", 
163 |                         repo if repo else "unknown", 
164 |                         pr_id if pr_id else "unknown", 
165 |                         str(e))
166 |             raise
167 | 
168 |     async def approve_pr(self, owner: str, repo: str, pr_id: str):
169 |         """批准PR"""
170 |         logger.info("Approving PR {}/{} #{}", owner, repo, pr_id)
171 |         try:
172 |             await self._make_request(
173 |                 "POST",
174 |                 f"repos/{owner}/{repo}/pulls/{pr_id}/reviews",
175 |                 json={
176 |                     "body": "LGTM! 代码评审通过。",
177 |                     "event": "APPROVE"
178 |                 }
179 |             )
180 |             logger.info("Successfully approved PR {}/{} #{}", owner, repo, pr_id)
181 |         except Exception as e:
182 |             logger.error("Failed to approve PR {}/{} #{}: {}", 
183 |                         owner, repo, pr_id, str(e))
184 |             raise
185 | 
186 |     async def merge_pr(self, owner: str, repo: str, pr_id: str):
187 |         logger.info("Attempting to merge PR {}/{} #{}", owner, repo, pr_id)
188 |         try:
189 |             response = await self._make_request(
190 |                 "POST", 
191 |                 f"repos/{owner}/{repo}/pulls/{pr_id}/merge",
192 |                 json={
193 |                     "style": "merge",  # 合并方式：merge, rebase, rebase-merge, squash
194 |                     "message": "",     # 可选的合并信息
195 |                     "title": ""        # 可选的合并标题
196 |                 }
197 |             )
198 |             logger.info("Successfully merged PR {}/{} #{}", owner, repo, pr_id)
199 |         except Exception as e:
200 |             logger.error("Failed to merge PR {}/{} #{}: {}", 
201 |                         owner, repo, pr_id, str(e))
202 |             raise
203 | 
204 |     async def get_file_context(self, owner: str, repo: str, file_path: str, commit_id: str, line_start: int, line_count: int) -> str:
205 |         logger.debug("Getting file context for {}/{} {} @ {} with lines: {}-{}", 
206 |                     owner, repo, file_path, commit_id[:8], line_start or 'start', line_count or 'end')
207 |         try:
208 |             # 使用raw内容API直接获取文件内容
209 |             async with aiohttp.ClientSession() as session:
210 |                 async with session.get(
211 |                     f"{self.config.url}/api/v1/repos/{owner}/{repo}/raw/{file_path}?ref={commit_id}",
212 |                     headers=self.headers
213 |                 ) as response:
214 |                     response.raise_for_status()
215 |                     content = await response.text()
216 |                     
217 |             lines = content.splitlines()
218 |             start = max(0, line_start - line_count)
219 |             end = min(len(lines), line_start + line_count)
220 |             
221 |             context = "\n".join(lines[start:end])
222 |             logger.debug("Got {} lines of context for {} (size: {} bytes)", 
223 |                         len(context.splitlines()), file_path, len(context))
224 |             return context
225 |         except Exception as e:
226 |             logger.error("Failed to get file context for {}: {}", 
227 |                         file_path, str(e))
228 |             return ""  # 如果获取上下文失败，返回空字符串 


--------------------------------------------------------------------------------
/app/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from pathlib import Path
 4 | 
 5 | # 将项目根目录添加到 Python 路径
 6 | ROOT_DIR = Path(__file__).resolve().parent.parent
 7 | sys.path.append(str(ROOT_DIR))
 8 | 
 9 | from fastapi import FastAPI
10 | from fastapi.staticfiles import StaticFiles
11 | from fastapi.templating import Jinja2Templates
12 | from app.routers import webhooks, config_ui
13 | import uvicorn
14 | 
15 | app = FastAPI(
16 |     title="代码评审Agent",
17 |     description="自动代码评审系统",
18 |     version="1.0.0"
19 | )
20 | 
21 | # 挂载静态文件
22 | app.mount("/static", StaticFiles(directory="app/static"), name="static")
23 | 
24 | # 注册路由
25 | app.include_router(webhooks.router, tags=["webhooks"])
26 | app.include_router(config_ui.router, tags=["config"])
27 | 
28 | @app.get("/")
29 | async def root():
30 |     """重定向到配置页面"""
31 |     return {"message": "Welcome to Code Review Agent"}
32 | 
33 | if __name__ == "__main__":
34 |     uvicorn.run(
35 |         "app.main:app",
36 |         host="0.0.0.0",
37 |         port=8000,
38 |         reload=False
39 |     ) 


--------------------------------------------------------------------------------
/app/models/config.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field, ConfigDict
 2 | from typing import Dict, List, Optional
 3 | 
 4 | class GiteaConfig(BaseModel):
 5 |     model_config = ConfigDict(title="Gitea配置")
 6 |     url: str = Field(description="Gitea服务器URL")
 7 |     token: str = Field(description="Gitea API访问令牌")
 8 |     context_window: int = Field(10, description="代码上下文窗口大小")
 9 | 
10 | class LLMConfig(BaseModel):
11 |     model_config = ConfigDict(title="LLM配置")
12 |     model: str = Field("deepseek/deepseek-chat", description="模型名称")
13 |     api_key: str = Field(description="API密钥")
14 |     max_tokens: int = Field(60000, description="最大token数")
15 | 
16 | class ReviewConfig(BaseModel):
17 |     model_config = ConfigDict(title="评审配置")
18 |     quality_threshold: float = Field(8.5, description="质量阈值分数")
19 |     max_security_issues: int = Field(5, description="最大安全问题数量")
20 |     ignore_patterns: List[str] = Field(
21 |         default=[
22 |             '**/node_modules/', '**/vendor/', '**/venv/', '**/.venv/',
23 |             '**/bower_components/', '**/jspm_packages/', '**/packages/',
24 |             '**/deps/', '**/dist/', '**/build/', '**/out/', '**/target/',
25 |             '**/bin/', '**/obj/', '**/*.exe', '**/*.dll', '**/*.so',
26 |             '**/*.a', '**/*.jar', '**/*.class', '**/*.pyc',
27 |             '**/__pycache__/', '**/*.egg-info/', '**/.DS_Store',
28 |             '**/Thumbs.db', '**/Desktop.ini', '**/.idea/', '**/.vscode/',
29 |             '**/.vs/', '**/*.suo', '**/*.user', '**/*.sublime-project',
30 |             '**/*.sublime-workspace', '**/*.log', '**/logs/', '**/tmp/',
31 |             '**/*.tmp', '**/*.swp', '**/*.swo', '**/.sass-cache/',
32 |             '**/coverage/', '**/.nyc_output/', '**/junit.xml',
33 |             '**/test-results/', '**/*.min.js', '**/*.min.css', '**/*.map',
34 |             '**/public/static/', '**/compiled/', '**/generated/', '**/.env',
35 |             '**/.env.local', '**/.env.*.local', '**/docker-compose.override.yml',
36 |             '**/*.key', '**/*.pem', '**/*.crt', '**/docs/_build/',
37 |             '**/site/', '**/.vuepress/dist/', '**/package-lock.json',
38 |             '**/yarn.lock', '**/Gemfile.lock', '**/Podfile.lock'
39 |         ],
40 |         description="忽略的文件模式"
41 |     )
42 |     scoring_rules: Dict[str, float] = Field(
43 |         default={
44 |             "security": 0.3,
45 |             "performance": 0.2,
46 |             "readability": 0.2,
47 |             "best_practice": 0.3
48 |         },
49 |         description="评分规则权重"
50 |     )
51 | 
52 | class AppConfig(BaseModel):
53 |     model_config = ConfigDict(title="应用配置")
54 |     scm: GiteaConfig
55 |     llm: LLMConfig
56 |     review: ReviewConfig 


--------------------------------------------------------------------------------
/app/routers/config_ui.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Request, Form
 2 | from fastapi.responses import HTMLResponse
 3 | from fastapi.templating import Jinja2Templates
 4 | import yaml
 5 | from pathlib import Path
 6 | from typing import List
 7 | from app.models.config import AppConfig, GiteaConfig, LLMConfig, ReviewConfig
 8 | 
 9 | router = APIRouter()
10 | templates = Jinja2Templates(directory="app/templates")
11 | 
12 | CONFIG_FILE = "config.yaml"
13 | 
14 | def load_config() -> AppConfig:
15 |     """加载配置文件"""
16 |     if Path(CONFIG_FILE).exists():
17 |         with open(CONFIG_FILE, "r", encoding="utf-8") as f:
18 |             config_data = yaml.safe_load(f)
19 |             return AppConfig.parse_obj(config_data)
20 |     return AppConfig(
21 |         scm=GiteaConfig(
22 |             url="",
23 |             token="",
24 |             context_window=10
25 |         ),
26 |         llm=LLMConfig(
27 |             model="deepseek/deepseek-chat",
28 |             api_key="",
29 |             max_tokens=60000
30 |         ),
31 |         review=ReviewConfig()  # 使用config.py中的默认值
32 |     )
33 | 
34 | def save_config(config: AppConfig):
35 |     """保存配置文件"""
36 |     config_dict = config.dict()
37 |     with open(CONFIG_FILE, "w", encoding="utf-8") as f:
38 |         yaml.safe_dump(config_dict, f, allow_unicode=True)
39 | 
40 | @router.get("/config", response_class=HTMLResponse)
41 | async def get_config(request: Request):
42 |     """显示配置页面"""
43 |     config = load_config()
44 |     return templates.TemplateResponse(
45 |         "config.html",
46 |         {"request": request, "config": config}
47 |     )
48 | 
49 | @router.post("/config", response_class=HTMLResponse)
50 | async def save_config_handler(
51 |     request: Request,
52 |     scm_url: str = Form(alias="scm.url"),
53 |     scm_token: str = Form(alias="scm.token"),
54 |     scm_context_window: int = Form(alias="scm.context_window"),
55 |     llm_model: str = Form(alias="llm.model"),
56 |     llm_api_key: str = Form(alias="llm.api_key"),
57 |     llm_max_tokens: int = Form(alias="llm.max_tokens"),
58 |     review_quality_threshold: float = Form(alias="review.quality_threshold"),
59 |     review_ignore_patterns: str = Form(alias="review.ignore_patterns"),
60 |     review_scoring_rules_security: float = Form(alias="review.scoring_rules.security"),
61 |     review_scoring_rules_performance: float = Form(alias="review.scoring_rules.performance"),
62 |     review_scoring_rules_readability: float = Form(alias="review.scoring_rules.readability"),
63 |     review_scoring_rules_best_practice: float = Form(alias="review.scoring_rules.best_practice")
64 | ):
65 |     """保存配置"""
66 |     config = AppConfig(
67 |         scm=GiteaConfig(
68 |             url=scm_url,
69 |             token=scm_token,
70 |             context_window=scm_context_window
71 |         ),
72 |         llm=LLMConfig(
73 |             model=llm_model,
74 |             api_key=llm_api_key,
75 |             max_tokens=llm_max_tokens
76 |         ),
77 |         review=ReviewConfig(
78 |             quality_threshold=review_quality_threshold,
79 |             ignore_patterns=[p.strip() for p in review_ignore_patterns.split("\n") if p.strip()],
80 |             scoring_rules={
81 |                 "security": review_scoring_rules_security,
82 |                 "performance": review_scoring_rules_performance,
83 |                 "readability": review_scoring_rules_readability,
84 |                 "best_practice": review_scoring_rules_best_practice
85 |             }
86 |         )
87 |     )
88 |     
89 |     save_config(config)
90 |     
91 |     return templates.TemplateResponse(
92 |         "config.html",
93 |         {
94 |             "request": request,
95 |             "config": config,
96 |             "message": "配置已保存"
97 |         },
98 |         headers={"HX-Trigger": "configSaved"}
99 |     ) 


--------------------------------------------------------------------------------
/app/routers/webhooks.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, BackgroundTasks, HTTPException
 2 | from pydantic import BaseModel
 3 | from typing import Optional, Dict, Any
 4 | from app.core.agent import CodeReviewAgent
 5 | from app.core.scm import GiteaClient
 6 | from app.services.llm_service import LLMService
 7 | from app.models.config import AppConfig
 8 | from app.routers.config_ui import load_config
 9 | import logging
10 | 
11 | router = APIRouter()
12 | 
13 | class PRWebhook(BaseModel):
14 |     action: str
15 |     number: int
16 |     pull_request: Dict[str, Any]
17 |     repository: Dict[str, Any]
18 |     sender: Dict[str, Any]
19 | 
20 | async def process_pr(owner: str, repo: str, pr_id: str):
21 |     """处理PR的后台任务"""
22 |     try:
23 |         # 加载配置
24 |         config = load_config()
25 |         
26 |         # 初始化服务
27 |         scm = GiteaClient(config.scm)
28 |         llm = LLMService(config.llm)
29 |         agent = CodeReviewAgent(config, scm, llm)
30 |         
31 |         # 执行评审
32 |         await agent.review_pr(owner, repo, pr_id)
33 |     except Exception as e:
34 |         logger.error("Error processing PR {}/{} #{}: {}", 
35 |                     owner if owner else "unknown", 
36 |                     repo if repo else "unknown", 
37 |                     pr_id if pr_id else "unknown", 
38 |                     str(e))
39 | 
40 | @router.post("/webhook/gitea")
41 | async def handle_webhook(
42 |     webhook: PRWebhook,
43 |     background_tasks: BackgroundTasks
44 | ):
45 |     """处理Gitea webhook"""
46 |     # 只处理PR相关事件
47 |     if webhook.action not in ["opened", "reopened", "synchronize"]:
48 |         return {"status": "ignored"}
49 |     
50 |     # 从仓库信息中获取owner和repo
51 |     owner = webhook.repository["owner"]["username"]
52 |     repo = webhook.repository["name"]
53 |     
54 |     # 添加后台任务
55 |     background_tasks.add_task(
56 |         process_pr,
57 |         owner,
58 |         repo,
59 |         str(webhook.number)
60 |     )
61 |     
62 |     return {"status": "processing"} 


--------------------------------------------------------------------------------
/app/services/llm_service.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict, Optional
  2 | import tiktoken
  3 | from pydantic import BaseModel
  4 | import litellm
  5 | from loguru import logger
  6 | from app.models.config import LLMConfig, ReviewConfig
  7 | 
  8 | class CodeIssue(BaseModel):
  9 |     file_path: str
 10 |     start_line: int
 11 |     end_line: Optional[int]
 12 |     description: str
 13 |     suggestion: str
 14 | 
 15 | class SecurityIssue(BaseModel):
 16 |     severity: str
 17 |     file_path: str
 18 |     start_line: int
 19 |     end_line: Optional[int]
 20 |     description: str
 21 |     suggestion: str
 22 | 
 23 | class QualityMetrics(BaseModel):
 24 |     security_score: float
 25 |     performance_score: float
 26 |     readability_score: float
 27 |     best_practice_score: float
 28 | 
 29 | class ReviewResult(BaseModel):
 30 |     score: float
 31 |     issues: List[CodeIssue]
 32 |     security_issues: List[SecurityIssue]
 33 |     quality_metrics: QualityMetrics
 34 | 
 35 | class CodeContext(BaseModel):
 36 |     diff: str  # commit 的完整 diff 内容
 37 |     files_context: List[dict]  # 包含每个文件的上下文信息
 38 |     metadata: Dict[str, str]  # commit 相关的元数据
 39 | 
 40 |     def __len__(self) -> int:
 41 |         """返回文件上下文的数量"""
 42 |         return len(self.files_context)
 43 | 
 44 | REVIEW_PROMPT = """你是一个专业的代码评审专家，请根据以下代码变更内容进行评审。评审时请特别注意以下几点：
 45 | 
 46 | 1. 安全性（占比30%）：
 47 |    - 检查SQL注入、XSS等安全漏洞
 48 |    - 检查敏感信息泄露
 49 |    - 检查权限控制问题
 50 |    
 51 | 2. 性能（占比20%）：
 52 |    - 检查算法复杂度
 53 |    - 检查资源使用效率
 54 |    - 检查并发处理方式
 55 |    
 56 | 3. 可读性（占比20%）：
 57 |    - 代码格式是否规范
 58 |    - 命名是否清晰
 59 |    - 注释是否充分
 60 |    
 61 | 4. 最佳实践（占比30%）：
 62 |    - 是否遵循设计模式
 63 |    - 是否有单元测试
 64 |    - 是否有类型提示
 65 |    - 是否符合SOLID原则
 66 | 
 67 | 评分规则：
 68 | - 安全问题：高危-3分/个，中危-1分/个
 69 | - 性能问题：-2分/个
 70 | - 可读性问题：-0.5分/个
 71 | - 最佳实践：缺少单元测试-2分，无类型提示-1分
 72 | 
 73 | Commit信息：
 74 | {commit_message}
 75 | 
 76 | 代码变更：
 77 | {diff}
 78 | 
 79 | 相关文件上下文：
 80 | {files_context}
 81 | 
 82 | 请提供详细的评审结果，包括：
 83 | 1. 总体评分（满分10分）
 84 | 2. 具体问题列表（包含文件路径和代码位置）
 85 | 3. 安全问题清单（包含文件路径和代码位置）
 86 | 4. 各维度的具体评分
 87 | 
 88 | 请以JSON格式返回结果，格式如下：
 89 | {{
 90 |     "score": float,
 91 |     "issues": [
 92 |         {{
 93 |             "file_path": string,
 94 |             "start_line": int,
 95 |             "end_line": int | null,
 96 |             "description": string,
 97 |             "suggestion": string
 98 |         }}
 99 |     ],
100 |     "security_issues": [
101 |         {{
102 |             "severity": string,
103 |             "file_path": string,
104 |             "start_line": int,
105 |             "end_line": int | null,
106 |             "description": string,
107 |             "suggestion": string
108 |         }}
109 |     ],
110 |     "quality_metrics": {{
111 |         "security_score": float,
112 |         "performance_score": float,
113 |         "readability_score": float,
114 |         "best_practice_score": float
115 |     }}
116 | }}
117 | 
118 | 注意：
119 | 1. 每个问题必须指明具体的文件路径和代码位置（行号）
120 | 2. 如果问题涉及多行代码，请提供start_line和end_line
121 | 3. 如果问题只涉及单行代码，end_line可以为null
122 | 4. 所有的行号必须是实际的代码行号
123 | """
124 | 
125 | class LLMService:
126 |     def __init__(self, config: LLMConfig):
127 |         self.config = config
128 |         self.model_name = config.model
129 |         self.api_key = config.api_key
130 |         
131 |         if not self.model_name:
132 |             raise ValueError("Model name is required")
133 |         if not self.api_key:
134 |             raise ValueError("API key is required")
135 |             
136 |         logger.info("Setting up LLMService with model: {} and max_tokens: {}", 
137 |                     self.model_name if hasattr(self, 'model_name') else "unknown", 
138 |                     self.config.max_tokens if hasattr(self, 'config') and hasattr(self.config, 'max_tokens') else 0)
139 |         litellm.api_key = self.api_key
140 |         litellm.set_verbose = False
141 |         
142 |         try:
143 |             self.tokenizer = tiktoken.encoding_for_model("gpt-4")
144 |             logger.info("Tokenizer initialized successfully for model: {}", 
145 |                         self.model_name if hasattr(self, 'model_name') else "unknown")
146 |         except Exception as e:
147 |             logger.error("Error initializing tokenizer for model {}: {}", 
148 |                          self.model_name if hasattr(self, 'model_name') else "unknown", 
149 |                          str(e))
150 |             raise
151 |             
152 |         logger.info("LLMService initialized successfully with model: {} and chunk_size: {}", 
153 |                     self.model_name if hasattr(self, 'model_name') else "unknown", 
154 |                     self.config.max_tokens if hasattr(self, 'config') and hasattr(self.config, 'max_tokens') else 0)
155 |     
156 |     def _split_code_chunks(self, context: CodeContext) -> List[CodeContext]:
157 |         max_tokens = self.config.max_tokens - 1000  # 预留空间给prompt和response
158 |         
159 |         def count_tokens(text: str) -> int:
160 |             return len(self.tokenizer.encode(text))
161 | 
162 |         chunks = []
163 |         current_chunk = CodeContext(
164 |             diff="",
165 |             files_context=[],
166 |             metadata=context.metadata
167 |         )
168 |         current_tokens = 0
169 |         
170 |         # 按文件分割diff内容
171 |         diff_content = context.diff
172 |         file_diffs = diff_content.split("diff --git ")
173 |         if file_diffs[0] == "":  # 移除空的第一个元素
174 |             file_diffs = file_diffs[1:]
175 |             
176 |         current_files = []  # 当前chunk包含的文件路径
177 |         
178 |         for file_diff in file_diffs:
179 |             if not file_diff:
180 |                 continue
181 |                 
182 |             file_diff = "diff --git " + file_diff
183 |             file_tokens = count_tokens(file_diff)
184 |             
185 |             # 从diff中提取文件路径
186 |             import re
187 |             file_path_match = re.search(r'a/(.*?) b/', file_diff)
188 |             if not file_path_match:
189 |                 continue
190 |             file_path = file_path_match.group(1)
191 |             
192 |             if current_tokens + file_tokens > max_tokens and current_chunk.diff:
193 |                 # 添加相关的文件上下文
194 |                 current_chunk.files_context = [
195 |                     f for f in context.files_context 
196 |                     if f["file_path"] in current_files
197 |                 ]
198 |                 chunks.append(current_chunk)
199 |                 
200 |                 # 重置当前chunk
201 |                 current_chunk = CodeContext(
202 |                     diff="",
203 |                     files_context=[],
204 |                     metadata=context.metadata
205 |                 )
206 |                 current_tokens = 0
207 |                 current_files = []
208 |             
209 |             # 添加文件diff到当前chunk
210 |             current_chunk.diff += file_diff
211 |             current_tokens += file_tokens
212 |             current_files.append(file_path)
213 |         
214 |         # 处理最后一个chunk
215 |         if current_chunk.diff:
216 |             current_chunk.files_context = [
217 |                 f for f in context.files_context 
218 |                 if f["file_path"] in current_files
219 |             ]
220 |             chunks.append(current_chunk)
221 |         
222 |         logger.info("Split code into {} chunks with total size: {} characters", 
223 |                     len(chunks) if chunks else 0, 
224 |                     sum(len(chunk.diff) if hasattr(chunk, 'diff') else 0 for chunk in chunks) if chunks else 0)
225 |         return chunks
226 | 
227 |     async def analyze_code(self, context: CodeContext) -> ReviewResult:
228 |         chunks = self._split_code_chunks(context)
229 |         results = []
230 |         
231 |         for i, chunk in enumerate(chunks):
232 |             logger.info("Analyzing chunk {}/{} with size: {} characters", 
233 |                         i + 1, len(chunks) if chunks else 0, 
234 |                         len(chunk.diff) if hasattr(chunk, 'diff') else 0)
235 |             
236 |             # 格式化文件上下文
237 |             files_context_str = "\n\n".join(
238 |                 f"文件: {f['file_path']} ({f['file_type']})\n{f['context']}"
239 |                 for f in chunk.files_context
240 |             ) if chunk.files_context else "无文件上下文"
241 |             
242 |             # 验证必需的参数
243 |             if not context.metadata.get("commit_message"):
244 |                 logger.error("Missing commit message in metadata for commit: {}", 
245 |                              context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown")
246 |                 raise ValueError("Missing commit message in metadata")
247 |                 
248 |             if not chunk.diff:
249 |                 logger.error("Missing diff content for commit: {}", 
250 |                              context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown")
251 |                 raise ValueError("Missing diff content")
252 |             
253 |             try:
254 |                 prompt = REVIEW_PROMPT.format(
255 |                     commit_message=context.metadata["commit_message"],
256 |                     diff=chunk.diff,
257 |                     files_context=files_context_str
258 |                 )
259 |             except KeyError as ke:
260 |                 logger.error("Error formatting prompt - missing key: {}", ke)
261 |                 raise ValueError(f"Missing required field for prompt formatting: {ke}")
262 |             except Exception as e:
263 |                 logger.error("Error formatting prompt: {}", str(e))
264 |                 raise ValueError(f"Failed to format prompt: {str(e)}")
265 |                 
266 |             if not prompt:
267 |                 logger.error("Empty prompt after formatting")
268 |                 raise ValueError("Empty prompt after formatting")
269 |             
270 |             try:
271 |                 logger.info("Sending request to LLM model: {} with prompt size: {} characters", 
272 |                             self.model_name if hasattr(self, 'model_name') else "unknown", 
273 |                             len(prompt) if prompt else 0)
274 |                 
275 |                 try:
276 |                     response = await litellm.acompletion(
277 |                         model=self.model_name,
278 |                         messages=[{"role": "user", "content": prompt}],
279 |                         temperature=0.2
280 |                     )
281 |                     logger.info("Received response from LLM with size: {} characters", 
282 |                                 len(str(response)) if response else 0)
283 |                     
284 |                     if not response or not hasattr(response, 'choices') or not response.choices:
285 |                         logger.error("Invalid response format from LLM. Expected JSON, got: {}", 
286 |                                      type(response).__name__ if response else "None")
287 |                         raise ValueError("Invalid response format from LLM")
288 |                         
289 |                     response_text = response.choices[0].message.content
290 |                 except Exception as llm_error:
291 |                     logger.error("Error calling LLM model {}: {}", 
292 |                                  self.model_name if hasattr(self, 'model_name') else "unknown", 
293 |                                  str(llm_error))
294 |                     raise
295 |                 
296 |                 response_text = response_text.strip()
297 |                 
298 |                 # 查找JSON内容的开始和结束位置
299 |                 json_start = response_text.find("{")
300 |                 json_end = response_text.rfind("}") + 1
301 |                 
302 |                 if json_start == -1 or json_end <= json_start:
303 |                     logger.error("No valid JSON found in response of size: {} characters", 
304 |                                  len(str(response)) if response else 0)
305 |                     # 返回默认结果而不是抛出异常
306 |                     return ReviewResult(
307 |                         score=0,
308 |                         issues=[],
309 |                         security_issues=[],
310 |                         quality_metrics=QualityMetrics(
311 |                             security_score=0,
312 |                             performance_score=0,
313 |                             readability_score=0,
314 |                             best_practice_score=0
315 |                         )
316 |                     )
317 |                 
318 |                 response_text = response_text[json_start:json_end]
319 |                 
320 |                 try:
321 |                     import json
322 |                     # 尝试清理和格式化 JSON 字符串
323 |                     response_text = response_text.replace('\n', ' ').replace('\r', '')
324 |                     # 处理可能的 markdown 代码块
325 |                     if '```json' in response_text:
326 |                         response_text = response_text.split('```json')[-1].split('```')[0]
327 |                     elif '```' in response_text:
328 |                         response_text = response_text.split('```')[-2]
329 |                     
330 |                     # 先尝试解析JSON
331 |                     json_obj = json.loads(response_text)
332 |                     
333 |                     # 确保所有必需的字段都存在并且类型正确
334 |                     required_fields = {
335 |                         "score": float,
336 |                         "issues": list,
337 |                         "security_issues": list,
338 |                         "quality_metrics": dict
339 |                     }
340 |                     
341 |                     for field, field_type in required_fields.items():
342 |                         if field not in json_obj:
343 |                             logger.warning("Missing required field: {} in response for commit: {}, adding default value", 
344 |                                            field, context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown")
345 |                             if field == "issues":
346 |                                 json_obj["issues"] = []
347 |                             elif field == "security_issues":
348 |                                 json_obj["security_issues"] = []
349 |                             elif field == "quality_metrics":
350 |                                 json_obj["quality_metrics"] = {
351 |                                     "security_score": 0.0,
352 |                                     "performance_score": 0.0,
353 |                                     "readability_score": 0.0,
354 |                                     "best_practice_score": 0.0
355 |                                 }
356 |                             elif field == "score":
357 |                                 json_obj["score"] = 0.0
358 |                         elif not isinstance(json_obj[field], field_type):
359 |                             logger.warning("Field {} has wrong type. Expected {}, got {}. Converting to default value.", 
360 |                                            field, field_type.__name__ if hasattr(field_type, '__name__') else str(field_type), 
361 |                                            type(json_obj[field]).__name__ if json_obj and field in json_obj else "unknown")
362 |                             if field == "issues":
363 |                                 json_obj["issues"] = []
364 |                             elif field == "security_issues":
365 |                                 json_obj["security_issues"] = []
366 |                             elif field == "quality_metrics":
367 |                                 json_obj["quality_metrics"] = {
368 |                                     "security_score": 0.0,
369 |                                     "performance_score": 0.0,
370 |                                     "readability_score": 0.0,
371 |                                     "best_practice_score": 0.0
372 |                                 }
373 |                             elif field == "score":
374 |                                 json_obj["score"] = 0.0
375 |                     
376 |                     # 检查 quality_metrics 的字段
377 |                     required_metrics = {
378 |                         "security_score": float,
379 |                         "performance_score": float,
380 |                         "readability_score": float,
381 |                         "best_practice_score": float
382 |                     }
383 |                     
384 |                     if "quality_metrics" in json_obj:
385 |                         for metric, metric_type in required_metrics.items():
386 |                             if metric not in json_obj["quality_metrics"]:
387 |                                 logger.warning("Missing required metric: {} in quality metrics for commit: {}, adding default value", 
388 |                                                metric, context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown")
389 |                                 json_obj["quality_metrics"][metric] = 0.0
390 |                             elif not isinstance(json_obj["quality_metrics"][metric], metric_type):
391 |                                 json_obj["quality_metrics"][metric] = float(json_obj["quality_metrics"][metric])
392 |                     
393 |                     result = ReviewResult.parse_obj(json_obj)
394 |                     results.append(result)
395 |                     logger.info("Successfully analyzed chunk {}/{} for commit: {}", 
396 |                                 i + 1, len(chunks) if chunks else 0, 
397 |                                 context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown")
398 |                 except json.JSONDecodeError as json_error:
399 |                     logger.error("JSON parsing error at position {} in response of size {}: {}", 
400 |                                  getattr(json_error, 'pos', 0), 
401 |                                  len(str(response)) if response else 0, 
402 |                                  str(json_error))
403 |                     raise
404 |                 except Exception as parse_error:
405 |                     logger.error("Error parsing LLM response of size {}: {}", 
406 |                                  len(str(response)) if response else 0, 
407 |                                  str(parse_error))
408 |                     # 返回一个默认的评审结果
409 |                     results.append(ReviewResult(
410 |                         score=0,
411 |                         issues=[],
412 |                         security_issues=[],
413 |                         quality_metrics=QualityMetrics(
414 |                             security_score=0,
415 |                             performance_score=0,
416 |                             readability_score=0,
417 |                             best_practice_score=0
418 |                         )
419 |                     ))
420 |             except Exception as e:
421 |                 logger.error("Error getting LLM response for model {}: {}", 
422 |                              self.model_name if hasattr(self, 'model_name') else "unknown", 
423 |                              str(e))
424 |                 raise
425 |         
426 |         # 合并所有chunk的结果
427 |         if not results:
428 |             logger.warning("No valid results for commit: {} in model: {}", 
429 |                            context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown", 
430 |                            self.model_name if hasattr(self, 'model_name') else "unknown")
431 |             return ReviewResult(
432 |                 score=0,
433 |                 issues=[],
434 |                 security_issues=[],
435 |                 quality_metrics=QualityMetrics(
436 |                     security_score=0,
437 |                     performance_score=0,
438 |                     readability_score=0,
439 |                     best_practice_score=0
440 |                 )
441 |             )
442 |         
443 |         # 使用最低分作为最终分数
444 |         final_result = ReviewResult(
445 |             score=min(r.score for r in results),
446 |             issues=[i for r in results for i in r.issues],
447 |             security_issues=[i for r in results for i in r.security_issues],
448 |             quality_metrics=QualityMetrics(
449 |                 security_score=min(r.quality_metrics.security_score for r in results),
450 |                 performance_score=min(r.quality_metrics.performance_score for r in results),
451 |                 readability_score=min(r.quality_metrics.readability_score for r in results),
452 |                 best_practice_score=min(r.quality_metrics.best_practice_score for r in results)
453 |             )
454 |         )
455 |         
456 |         logger.info("Analysis completed for commit {} with final score: {}", 
457 |                     context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown", 
458 |                     final_result.score if final_result and hasattr(final_result, 'score') else 0.0)
459 |         return final_result 


--------------------------------------------------------------------------------
/app/static/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/highkay/codereview-agent/6a324047dda2ec11056ad3e9bdde848082e97c05/app/static/.gitkeep


--------------------------------------------------------------------------------
/app/templates/config.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="zh">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>代码评审Agent配置</title>
  7 |     <script src="https://unpkg.com/htmx.org@1.9.10"></script>
  8 |     <script src="https://cdn.tailwindcss.com"></script>
  9 | </head>
 10 | <body class="bg-gray-100">
 11 |     <div class="container mx-auto px-4 py-8">
 12 |         <h1 class="text-3xl font-bold mb-8">代码评审Agent配置</h1>
 13 |         
 14 |         <form hx-post="/config" hx-target="this" hx-swap="none" class="space-y-8 bg-white p-6 rounded-lg shadow">
 15 |             <!-- Gitea配置 -->
 16 |             <div class="space-y-4">
 17 |                 <h2 class="text-xl font-semibold">Gitea配置</h2>
 18 |                 <div class="grid grid-cols-1 gap-4">
 19 |                     <div>
 20 |                         <label class="block text-sm font-medium text-gray-700">服务器URL</label>
 21 |                         <input type="url" name="scm.url" required value="{{ config.scm.url }}"
 22 |                                class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500">
 23 |                     </div>
 24 |                     <div>
 25 |                         <label class="block text-sm font-medium text-gray-700">API Token</label>
 26 |                         <input type="password" name="scm.token" required value="{{ config.scm.token }}"
 27 |                                class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500">
 28 |                     </div>
 29 |                     <div>
 30 |                         <label class="block text-sm font-medium text-gray-700">上下文窗口大小</label>
 31 |                         <input type="number" name="scm.context_window" min="1" max="20" value="{{ config.scm.context_window }}"
 32 |                                class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500">
 33 |                     </div>
 34 |                 </div>
 35 |             </div>
 36 | 
 37 |             <!-- LLM配置 -->
 38 |             <div class="space-y-4">
 39 |                 <h2 class="text-xl font-semibold">LLM配置</h2>
 40 |                 <div class="grid grid-cols-1 gap-4">
 41 |                     <div>
 42 |                         <label class="block text-sm font-medium text-gray-700">模型名称</label>
 43 |                         <input type="text" name="llm.model" value="{{ config.llm.model }}" required
 44 |                                class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500">
 45 |                     </div>
 46 |                     <div>
 47 |                         <label class="block text-sm font-medium text-gray-700">API密钥</label>
 48 |                         <input type="password" name="llm.api_key" required value="{{ config.llm.api_key }}"
 49 |                                class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500">
 50 |                     </div>
 51 |                     <div>
 52 |                         <label class="block text-sm font-medium text-gray-700">最大Token数</label>
 53 |                         <input type="number" name="llm.max_tokens" min="1024" max="100000" value="{{ config.llm.max_tokens }}"
 54 |                                class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500">
 55 |                     </div>
 56 |                 </div>
 57 |             </div>
 58 | 
 59 |             <!-- 评审配置 -->
 60 |             <div class="space-y-4">
 61 |                 <h2 class="text-xl font-semibold">评审配置</h2>
 62 |                 <div class="grid grid-cols-1 gap-4">
 63 |                     <div>
 64 |                         <label class="block text-sm font-medium text-gray-700">质量阈值分数</label>
 65 |                         <input type="number" name="review.quality_threshold" min="0" max="10" step="0.1" value="{{ config.review.quality_threshold }}"
 66 |                                class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500">
 67 |                     </div>
 68 |                     <div>
 69 |                         <label class="block text-sm font-medium text-gray-700">忽略文件模式（每行一个）</label>
 70 |                         <textarea name="review.ignore_patterns" rows="4"
 71 |                                   class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500">{% for pattern in config.review.ignore_patterns %}{{ pattern }}
 72 | {% endfor %}</textarea>
 73 |                     </div>
 74 |                 </div>
 75 |             </div>
 76 | 
 77 |             <!-- 评分规则权重 -->
 78 |             <div class="space-y-4">
 79 |                 <h2 class="text-xl font-semibold">评分规则权重</h2>
 80 |                 <div class="grid grid-cols-2 gap-4">
 81 |                     <div>
 82 |                         <label class="block text-sm font-medium text-gray-700">安全性权重</label>
 83 |                         <input type="number" name="review.scoring_rules.security" min="0" max="1" step="0.1" value="{{ config.review.scoring_rules.security }}"
 84 |                                class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500">
 85 |                     </div>
 86 |                     <div>
 87 |                         <label class="block text-sm font-medium text-gray-700">性能权重</label>
 88 |                         <input type="number" name="review.scoring_rules.performance" min="0" max="1" step="0.1" value="{{ config.review.scoring_rules.performance }}"
 89 |                                class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500">
 90 |                     </div>
 91 |                     <div>
 92 |                         <label class="block text-sm font-medium text-gray-700">可读性权重</label>
 93 |                         <input type="number" name="review.scoring_rules.readability" min="0" max="1" step="0.1" value="{{ config.review.scoring_rules.readability }}"
 94 |                                class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500">
 95 |                     </div>
 96 |                     <div>
 97 |                         <label class="block text-sm font-medium text-gray-700">最佳实践权重</label>
 98 |                         <input type="number" name="review.scoring_rules.best_practice" min="0" max="1" step="0.1" value="{{ config.review.scoring_rules.best_practice }}"
 99 |                                class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500">
100 |                     </div>
101 |                 </div>
102 |             </div>
103 | 
104 |             <div class="flex justify-end space-x-4">
105 |                 <button type="reset"
106 |                         class="px-4 py-2 border border-gray-300 rounded-md shadow-sm text-sm font-medium text-gray-700 bg-white hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500">
107 |                     重置
108 |                 </button>
109 |                 <button type="submit"
110 |                         class="px-4 py-2 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500">
111 |                     保存
112 |                 </button>
113 |             </div>
114 |         </form>
115 | 
116 |         <!-- 消息提示 -->
117 |         <div id="message" class="fixed top-4 right-4 hidden">
118 |             <div class="bg-green-500 text-white px-6 py-4 rounded-lg shadow-lg">
119 |                 配置已保存
120 |             </div>
121 |         </div>
122 |     </div>
123 | 
124 |     <script>
125 |         document.body.addEventListener('configSaved', function() {
126 |             const message = document.getElementById('message');
127 |             message.classList.remove('hidden');
128 |             setTimeout(() => {
129 |                 message.classList.add('hidden');
130 |             }, 3000);
131 |         });
132 |     </script>
133 | </body>
134 | </html> 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | -i https://pypi.tuna.tsinghua.edu.cn/simple
 2 | fastapi
 3 | uvicorn
 4 | pydantic
 5 | aiohttp
 6 | python-multipart
 7 | jinja2
 8 | tiktoken
 9 | htmx
10 | loguru
11 | litellm


--------------------------------------------------------------------------------