├── .github └── workflows │ └── docker-build.yml ├── .gitignore ├── Dockerfile ├── README.md ├── app ├── core │ ├── agent.py │ └── scm.py ├── main.py ├── models │ └── config.py ├── routers │ ├── config_ui.py │ └── webhooks.py ├── services │ └── llm_service.py ├── static │ └── .gitkeep └── templates │ └── config.html └── requirements.txt /.github/workflows/docker-build.yml: -------------------------------------------------------------------------------- 1 | name: Docker Build and Push 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | build-and-push: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v4 15 | 16 | - name: Set up Docker Buildx 17 | uses: docker/setup-buildx-action@v3 18 | 19 | - name: Login to Docker Hub 20 | uses: docker/login-action@v3 21 | with: 22 | username: ${{ secrets.DOCKERHUB_USERNAME }} 23 | password: ${{ secrets.DOCKERHUB_TOKEN }} 24 | 25 | - name: Build and push 26 | uses: docker/build-push-action@v5 27 | with: 28 | context: . 29 | push: true 30 | tags: highkay/codereview-agent:latest 31 | platforms: linux/amd64,linux/arm64 32 | cache-from: type=registry,ref=highkay/codereview-agent:latest 33 | cache-to: type=inline -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.json 2 | __pycache__/ 3 | config.yaml 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 使用 Python 3.9 作为基础镜像 2 | FROM python:3.12-slim 3 | 4 | # 设置工作目录 5 | WORKDIR /app 6 | 7 | # 设置环境变量 8 | ENV PYTHONUNBUFFERED=1 9 | # 复制项目文件 10 | COPY . . 11 | 12 | # 安装项目依赖 13 | RUN pip install --no-cache-dir -r requirements.txt && pip install --no-cache-dir --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple pydantic 14 | 15 | # 暴露端口 16 | EXPOSE 8000 17 | 18 | # 启动命令 19 | CMD ["python", "app/main.py"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 代码评审Agent 2 | 3 | 这是一个基于LLM的自动代码评审系统,可以自动对Gitea的Pull Request进行代码评审。 4 | 5 | ## 主要功能 6 | 7 | 1. 自动监听Gitea的Pull Request事件 8 | 2. 获取PR中的代码变更并进行分析 9 | 3. 使用LLM进行代码评审,包括: 10 | - 安全性评估 11 | - 性能分析 12 | - 代码可读性 13 | - 最佳实践检查 14 | 4. 自动生成评审评论 15 | 5. 支持根据评分自动合并PR 16 | 17 | ## 技术栈 18 | 19 | - Python 3.10+ 20 | - FastAPI 21 | - HTMX 22 | - TailwindCSS 23 | - LiteLLM 24 | 25 | ## 安装 26 | 27 | 1. 克隆仓库: 28 | ```bash 29 | git clone https://github.com/highkay/codereview-agent.git 30 | cd codereview-agent 31 | ``` 32 | 33 | 2. 安装依赖: 34 | ```bash 35 | pip install -r requirements.txt 36 | ``` 37 | 38 | 3. 配置系统: 39 | - 访问 `http://localhost:8000/config` 40 | - 填写必要的配置信息: 41 | - Gitea服务器URL和API Token 42 | - LLM提供商配置 43 | - 评审规则和阈值 44 | 45 | ## 运行 46 | 47 | ```bash 48 | python app/main.py 49 | ``` 50 | 51 | 服务将在 `http://localhost:8000` 启动。 52 | 53 | ## Gitea配置 54 | 55 | 1. 在Gitea中添加Webhook: 56 | - 进入仓库设置 -> Webhooks -> 添加Webhook 57 | - URL设置为:`http://your-server:8000/webhook/gitea` 58 | - 选择事件:Pull Request 59 | 60 | ## 评审规则 61 | 62 | 系统使用以下维度进行代码评审: 63 | 64 | 1. 安全性(30%): 65 | - SQL注入 66 | - XSS漏洞 67 | - 敏感信息泄露 68 | - 权限控制 69 | 70 | 2. 性能(20%): 71 | - 算法复杂度 72 | - 资源使用效率 73 | - 并发处理 74 | 75 | 3. 可读性(20%): 76 | - 代码格式 77 | - 命名规范 78 | - 注释完整性 79 | 80 | 4. 最佳实践(30%): 81 | - 设计模式 82 | - 单元测试 83 | - 类型提示 84 | - SOLID原则 85 | 86 | ## 评分规则 87 | 88 | - 安全问题: 89 | - 高危:-3分/个 90 | - 中危:-1分/个 91 | - 性能问题:-2分/个 92 | - 可读性问题:-0.5分/个 93 | - 最佳实践: 94 | - 缺少单元测试:-2分 95 | - 无类型提示:-1分 96 | 97 | ## 配置文件 98 | 99 | 系统使用YAML格式的配置文件(`config.yaml`): 100 | 101 | ```yaml 102 | scm: 103 | type: gitea 104 | url: https://git.example.com 105 | token: xxxx-xxxx 106 | context_window: 5 107 | 108 | llm: 109 | provider: openai 110 | model: gpt-4-turbo 111 | api_key: sk-xxxx 112 | max_tokens: 4096 113 | 114 | review: 115 | quality_threshold: 8.5 116 | ignore_patterns: 117 | - "*.md" 118 | - "**/test_*.py" 119 | scoring_rules: 120 | security: 0.3 121 | performance: 0.2 122 | readability: 0.2 123 | best_practice: 0.3 124 | ``` 125 | 126 | ## 开发 127 | 128 | 1. 代码结构: 129 | ``` 130 | app/ 131 | ├── core/ # 核心功能 132 | │ ├── agent.py # 评审Agent 133 | │ └── scm.py # SCM抽象层 134 | ├── models/ # 数据模型 135 | │ └── config.py # 配置模型 136 | ├── routers/ # API路由 137 | │ ├── webhooks.py # Webhook处理 138 | │ └── config_ui.py# 配置界面 139 | ├── services/ # 服务 140 | │ └── llm_service.py # LLM服务 141 | ├── static/ # 静态资源 142 | ├── templates/ # 模板 143 | │ └── config.html # 配置页面 144 | └── main.py # 应用入口 145 | ``` 146 | 147 | 2. 添加新功能: 148 | - 在相应模块中添加代码 149 | - 确保添加适当的测试 150 | - 遵循项目的代码风格 151 | 152 | ## 贡献 153 | 154 | 欢迎提交Pull Request!在提交之前,请确保: 155 | 156 | 1. 代码通过所有测试 157 | 2. 添加了必要的文档 158 | 3. 遵循项目的代码规范 159 | 160 | ## 许可证 161 | 162 | MIT License -------------------------------------------------------------------------------- /app/core/agent.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Any, Optional 2 | from app.models.config import AppConfig 3 | from app.services.llm_service import LLMService, CodeContext, ReviewResult, QualityMetrics 4 | from app.core.scm import SCMProvider, CommitDiff, ReviewComment 5 | from loguru import logger 6 | import fnmatch 7 | import os 8 | 9 | class CodeReviewAgent: 10 | def __init__(self, config: AppConfig, scm: SCMProvider, llm: LLMService) -> None: 11 | self.config = config 12 | self.scm = scm 13 | self.llm = llm 14 | logger.info("CodeReviewAgent initialized with model: {} and config: {}", config.llm.model, config.dict()) 15 | 16 | def _filter_files(self, files: List[dict]) -> List[dict]: 17 | """过滤不需要评审的文件""" 18 | filtered = [] 19 | for file in files: 20 | should_ignore = any( 21 | fnmatch.fnmatch(file["filename"], pattern) 22 | for pattern in self.config.review.ignore_patterns 23 | ) 24 | if not should_ignore: 25 | filtered.append(file) 26 | return filtered 27 | 28 | async def _collect_context(self, owner: str, repo: str, commit_diff: CommitDiff) -> Optional[CodeContext]: 29 | # 过滤文件 30 | filtered_files = self._filter_files(commit_diff.files) 31 | if not filtered_files: 32 | logger.info("No files to review after filtering for commit {} (changed files: {})", 33 | commit_diff.commit_id[:8], len(commit_diff.files)) 34 | return None 35 | 36 | # 收集所有文件的上下文 37 | files_context = [] 38 | window_size = self.config.scm.context_window 39 | 40 | for file in filtered_files: 41 | if "filename" not in file: 42 | logger.error("Invalid file data - missing filename in commit: {}", commit_diff.commit_id[:8]) 43 | continue 44 | 45 | file_path = file["filename"] 46 | file_type = os.path.splitext(file_path)[1][1:] if os.path.splitext(file_path)[1] else "unknown" 47 | 48 | try: 49 | # 获取文件上下文 50 | context = await self.scm.get_file_context( 51 | owner, 52 | repo, 53 | file_path, 54 | commit_diff.commit_id, 55 | 1, 56 | window_size * 2 57 | ) 58 | 59 | if not context: 60 | logger.warning("No context returned for file: {} in commit: {}", file_path, commit_diff.commit_id[:8]) 61 | continue 62 | 63 | files_context.append({ 64 | "file_path": file_path, 65 | "file_type": file_type, 66 | "context": context 67 | }) 68 | except Exception as e: 69 | logger.error("Error getting context for file {} in commit {}: {}", 70 | file_path, commit_diff.commit_id[:8], str(e)) 71 | continue 72 | 73 | if not files_context: 74 | logger.warning("No valid file contexts collected for commit {} (total files: {})", 75 | commit_diff.commit_id[:8] if commit_diff and commit_diff.commit_id else "unknown", 76 | len(commit_diff.files) if commit_diff and hasattr(commit_diff, 'files') else 0) 77 | return None 78 | 79 | # 创建并验证上下文对象 80 | try: 81 | context = CodeContext( 82 | diff=commit_diff.diff_content, 83 | files_context=files_context, 84 | metadata={ 85 | "commit_id": commit_diff.commit_id, 86 | "commit_message": commit_diff.commit_message 87 | } 88 | ) 89 | return context 90 | except Exception as e: 91 | logger.error("Error creating CodeContext for commit {}: {}", 92 | commit_diff.commit_id[:8] if commit_diff and commit_diff.commit_id else "unknown", 93 | str(e)) 94 | return None 95 | 96 | async def _analyze_code(self, context: CodeContext) -> ReviewResult: 97 | """分析整个commit的代码变更""" 98 | logger.debug("Analyzing commit: {} - {} (files: {})", 99 | context.metadata["commit_id"][:8], 100 | context.metadata["commit_message"].split('\n')[0][:50], 101 | len(context.files_context)) 102 | 103 | try: 104 | result = await self.llm.analyze_code(context) 105 | 106 | # 记录评审结果 107 | logger.info("Code analysis completed for commit {} with scores and {} files:", 108 | context.metadata["commit_id"][:8], len(context.files_context)) 109 | logger.info("- Overall Score: {}/10 (weight: {})", result.score, self.config.review.quality_threshold) 110 | logger.info("- Security: {}/10 (weight: {})", result.quality_metrics.security_score, self.config.review.scoring_rules["security"]) 111 | logger.info("- Performance: {}/10 (weight: {})", result.quality_metrics.performance_score, self.config.review.scoring_rules["performance"]) 112 | logger.info("- Readability: {}/10 (weight: {})", result.quality_metrics.readability_score, self.config.review.scoring_rules["readability"]) 113 | logger.info("- Best Practices: {}/10 (weight: {})", result.quality_metrics.best_practice_score, self.config.review.scoring_rules["best_practice"]) 114 | 115 | if result.security_issues: 116 | logger.warning("Found {} security issues in commit {} (threshold: {})", 117 | len(result.security_issues), context.metadata["commit_id"][:8], self.config.review.max_security_issues) 118 | 119 | return result 120 | except Exception as e: 121 | logger.error("Error analyzing code for commit {}: {}\nFull error: {}", 122 | context.metadata.get("commit_id", "unknown")[:8], 123 | str(e), repr(e)) 124 | # 返回一个默认的评审结果 125 | return ReviewResult( 126 | score=0, 127 | comments=["代码评审过程中发生错误"], 128 | suggestions=[], 129 | issues=[], 130 | security_issues=[], 131 | quality_metrics=QualityMetrics( 132 | security_score=0, 133 | performance_score=0, 134 | readability_score=0, 135 | best_practice_score=0 136 | ) 137 | ) 138 | 139 | def _generate_comments(self, result: ReviewResult, context: CodeContext) -> List[ReviewComment]: 140 | """生成评审评论""" 141 | logger.debug("Generating comments for commit: {} with {} issues", 142 | context.metadata["commit_id"][:8], len(result.issues)) 143 | comments = [] 144 | 145 | # 添加总体评分评论 146 | overall_comment = [ 147 | "# 🔍 代码评审报告", 148 | "", 149 | f"## 📊 评分概览 ({result.score:.1f}/10)", 150 | "", 151 | "| 评审维度 | 得分 | 权重 |", 152 | "|---------|------|------|", 153 | f"| 🛡️ 安全性 | {result.quality_metrics.security_score:.1f}/10 | {self.config.review.scoring_rules['security']:.0f} |", 154 | f"| ⚡ 性能 | {result.quality_metrics.performance_score:.1f}/10 | {self.config.review.scoring_rules['performance']:.0f} |", 155 | f"| 📖 可读性 | {result.quality_metrics.readability_score:.1f}/10 | {self.config.review.scoring_rules['readability']:.0f} |", 156 | f"| ✨ 最佳实践 | {result.quality_metrics.best_practice_score:.1f}/10 | {self.config.review.scoring_rules['best_practice']:.0f} |", 157 | "" 158 | ] 159 | 160 | if result.issues: 161 | overall_comment.extend([ 162 | "## 💡 需要改进的地方", 163 | "" 164 | ]) 165 | for issue in result.issues: 166 | overall_comment.extend([ 167 | f"### {issue.file_path}", 168 | f"- 位置:第{issue.start_line}行" + (f"-{issue.end_line}行" if issue.end_line else ""), 169 | f"- 问题:{issue.description}", 170 | f"- 建议:{issue.suggestion}", 171 | "" 172 | ]) 173 | 174 | if result.security_issues: 175 | overall_comment.extend([ 176 | "## ⚠️ 安全问题", 177 | "" 178 | ]) 179 | for issue in result.security_issues: 180 | severity_icon = "🔴" if issue.severity.lower() == "high" else "🟡" 181 | overall_comment.extend([ 182 | f"### {severity_icon} {issue.file_path}", 183 | f"- 严重程度:{issue.severity}", 184 | f"- 位置:第{issue.start_line}行" + (f"-{issue.end_line}行" if issue.end_line else ""), 185 | f"- 问题:{issue.description}", 186 | f"- 建议:{issue.suggestion}", 187 | "" 188 | ]) 189 | 190 | comments.append(ReviewComment( 191 | path=context.metadata["commit_message"], 192 | line=1, 193 | body="\n".join(overall_comment), 194 | commit_id=context.metadata["commit_id"] 195 | )) 196 | 197 | logger.info("Generated {} review comments for commit {}", 198 | len(comments), context.metadata["commit_id"][:8]) 199 | return comments 200 | 201 | async def review_pr(self, owner: str, repo: str, pr_id: str) -> bool: 202 | """执行PR评审的主流程""" 203 | logger.info("Starting PR review for {}/{} #{}", 204 | owner, repo, pr_id) 205 | try: 206 | # 获取PR的所有commits及其diff 207 | commit_diffs = await self.scm.get_diff(owner, repo, pr_id) 208 | logger.info("Found {} commits to review in PR {}/{} #{}", 209 | len(commit_diffs), owner, repo, pr_id) 210 | 211 | all_results = [] 212 | for commit_diff in commit_diffs: 213 | try: 214 | logger.info("Reviewing commit: {} - {} (files: {})", 215 | commit_diff.commit_id[:8], 216 | commit_diff.commit_message.split('\n')[0][:50], 217 | len(commit_diff.files)) 218 | 219 | # 收集整个commit的上下文 220 | context = await self._collect_context(owner, repo, commit_diff) 221 | if not context: 222 | logger.warning("Skipping commit {} due to no reviewable files (total files: {})", 223 | commit_diff.commit_id[:8], len(commit_diff.files)) 224 | continue 225 | 226 | # 分析整个commit的代码 227 | result = await self._analyze_code(context) 228 | all_results.append((result, context)) 229 | 230 | # 生成并发送评论 231 | comments = self._generate_comments(result, context) 232 | 233 | await self.scm.post_comment(owner, repo, pr_id, comments) 234 | logger.info("Posted {} review comments for commit {}", 235 | len(comments), commit_diff.commit_id[:8]) 236 | except Exception as commit_error: 237 | logger.error("Error processing commit {} with {} files: {}\nFull error: {}", 238 | commit_diff.commit_id[:8], len(commit_diff.files), str(commit_error), repr(commit_error)) 239 | continue 240 | 241 | # 处理评审结果 - 使用最低分作为最终分数 242 | if all_results: 243 | min_score = min(r.score for r, _ in all_results) 244 | logger.info("PR review completed with minimum score: {} (threshold: {})", 245 | min_score, self.config.review.quality_threshold) 246 | if min_score >= self.config.review.quality_threshold: 247 | logger.info("PR quality meets threshold ({} >= {}), attempting to approve and merge", 248 | min_score, self.config.review.quality_threshold) 249 | # 先批准PR 250 | await self.scm.approve_pr(owner, repo, pr_id) 251 | # 再合并PR 252 | await self.scm.merge_pr(owner, repo, pr_id) 253 | else: 254 | logger.info("PR quality below threshold ({} < {}), skipping approval", 255 | min_score, self.config.review.quality_threshold) 256 | 257 | return True 258 | 259 | except Exception as e: 260 | logger.error("Error reviewing PR {}/{} #{}: {}\nFull error: {}", 261 | owner, repo, pr_id, str(e), repr(e)) 262 | return False -------------------------------------------------------------------------------- /app/core/scm.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List, Optional 3 | from pydantic import BaseModel 4 | import aiohttp 5 | from loguru import logger 6 | from app.models.config import GiteaConfig 7 | 8 | class CommitDiff(BaseModel): 9 | commit_id: str 10 | commit_message: str 11 | files: List[dict] # 包含每个文件的变更信息 12 | diff_content: str # commit 的完整 diff 内容 13 | 14 | class ReviewComment(BaseModel): 15 | path: str 16 | line: int 17 | body: str 18 | commit_id: str 19 | 20 | class SCMProvider(ABC): 21 | @abstractmethod 22 | async def get_diff(self, owner: str, repo: str, pr_id: str) -> List[CommitDiff]: 23 | pass 24 | 25 | @abstractmethod 26 | async def post_comment(self, owner: str, repo: str, pr_id: str, comments: List[ReviewComment]): 27 | pass 28 | 29 | @abstractmethod 30 | async def approve_pr(self, owner: str, repo: str, pr_id: str): 31 | """批准PR""" 32 | pass 33 | 34 | @abstractmethod 35 | async def merge_pr(self, owner: str, repo: str, pr_id: str): 36 | pass 37 | 38 | @abstractmethod 39 | async def get_file_context(self, owner: str, repo: str, file_path: str, commit_id: str, line_start: int, line_count: int) -> str: 40 | pass 41 | 42 | class GiteaClient(SCMProvider): 43 | def __init__(self, config: GiteaConfig): 44 | self.config = config 45 | self.headers = { 46 | "Authorization": f"token {config.token}", 47 | "Content-Type": "application/json" 48 | } 49 | logger.info("GiteaClient initialized with URL: {} and token length: {}", 50 | config.url if config and hasattr(config, 'url') else "unknown", 51 | len(config.token) if config and hasattr(config, 'token') and config.token else 0) 52 | 53 | async def _make_request(self, method: str, path: str, **kwargs) -> dict: 54 | url = f"{self.config.url}/api/v1/{path}" 55 | logger.debug("Making {} request to {} with params: {}", 56 | method if method else "unknown", 57 | url if url else "unknown", 58 | kwargs if kwargs else "none") 59 | try: 60 | async with aiohttp.ClientSession() as session: 61 | async with session.request(method, url, headers=self.headers, **kwargs) as response: 62 | response.raise_for_status() 63 | data = await response.json() 64 | logger.debug("Request successful: {} {} with status code: {}", 65 | method if method else "unknown", 66 | url if url else "unknown", 67 | response.status if response and hasattr(response, 'status') else "unknown") 68 | return data 69 | except aiohttp.ClientError as e: 70 | status = getattr(getattr(e, 'response', None), 'status', 'N/A') 71 | logger.error("Request failed: {} {} - Status: {} - Error: {}", 72 | method if method else "unknown", 73 | url if url else "unknown", 74 | status if status else "N/A", 75 | str(e)) 76 | raise 77 | except Exception as e: 78 | logger.error("Unexpected error in request: {} {} - Error: {}", 79 | method if method else "unknown", 80 | url if url else "unknown", 81 | str(e)) 82 | raise 83 | 84 | async def get_diff(self, owner: str, repo: str, pr_id: str) -> List[CommitDiff]: 85 | logger.info("Getting diff for PR {}/{} #{}", owner, repo, pr_id) 86 | try: 87 | # 获取PR的所有commits 88 | commits = await self._make_request( 89 | "GET", 90 | f"repos/{owner}/{repo}/pulls/{pr_id}/commits" 91 | ) 92 | 93 | diffs = [] 94 | for commit in commits: 95 | commit_id = commit["sha"] 96 | 97 | # 获取这个commit的完整diff 98 | async with aiohttp.ClientSession() as session: 99 | url = f"{self.config.url}/api/v1/repos/{owner}/{repo}/git/commits/{commit_id}.diff" 100 | async with session.get(url, headers=self.headers) as response: 101 | response.raise_for_status() 102 | diff_content = await response.text() 103 | 104 | # 获取这个commit变更的文件列表 105 | files = await self._make_request( 106 | "GET", 107 | f"repos/{owner}/{repo}/git/commits/{commit_id}" 108 | ) 109 | 110 | diffs.append(CommitDiff( 111 | commit_id=commit_id, 112 | commit_message=commit["commit"]["message"], 113 | files=files.get("files", []), 114 | diff_content=diff_content 115 | )) 116 | 117 | logger.info("Found {} commits in PR {}/{} #{}", 118 | len(commits), owner, repo, pr_id) 119 | return diffs 120 | except Exception as e: 121 | logger.error("Failed to get diff for PR {}/{} #{}: {}", 122 | owner, repo, pr_id, str(e)) 123 | raise 124 | 125 | async def post_comment(self, owner: str, repo: str, pr_id: str, comments: List[ReviewComment]): 126 | logger.info("Posting {} comments to PR {}/{} #{} for commit: {}", 127 | len(comments) if comments else 0, 128 | owner if owner else "unknown", 129 | repo if repo else "unknown", 130 | pr_id if pr_id else "unknown", 131 | comments[0].commit_id[:8] if comments and len(comments) > 0 and hasattr(comments[0], 'commit_id') else "unknown") 132 | if not comments: 133 | return 134 | 135 | review_comments = [] 136 | for comment in comments: 137 | review_comments.append({ 138 | "path": comment.path, 139 | "body": comment.body, 140 | "new_position": comment.line, 141 | "commit_id": comment.commit_id 142 | }) 143 | 144 | try: 145 | await self._make_request( 146 | "POST", 147 | f"repos/{owner}/{repo}/pulls/{pr_id}/reviews", 148 | json={ 149 | "commit_id": comments[0].commit_id, 150 | "body": "Code Review Comments", 151 | "comments": review_comments, 152 | "event": "comment" 153 | } 154 | ) 155 | logger.info("Successfully posted {} comments to PR {}/{} #{}", 156 | len(comments) if comments else 0, 157 | owner if owner else "unknown", 158 | repo if repo else "unknown", 159 | pr_id if pr_id else "unknown") 160 | except Exception as e: 161 | logger.error("Failed to post comments to PR {}/{} #{}: {}", 162 | owner if owner else "unknown", 163 | repo if repo else "unknown", 164 | pr_id if pr_id else "unknown", 165 | str(e)) 166 | raise 167 | 168 | async def approve_pr(self, owner: str, repo: str, pr_id: str): 169 | """批准PR""" 170 | logger.info("Approving PR {}/{} #{}", owner, repo, pr_id) 171 | try: 172 | await self._make_request( 173 | "POST", 174 | f"repos/{owner}/{repo}/pulls/{pr_id}/reviews", 175 | json={ 176 | "body": "LGTM! 代码评审通过。", 177 | "event": "APPROVE" 178 | } 179 | ) 180 | logger.info("Successfully approved PR {}/{} #{}", owner, repo, pr_id) 181 | except Exception as e: 182 | logger.error("Failed to approve PR {}/{} #{}: {}", 183 | owner, repo, pr_id, str(e)) 184 | raise 185 | 186 | async def merge_pr(self, owner: str, repo: str, pr_id: str): 187 | logger.info("Attempting to merge PR {}/{} #{}", owner, repo, pr_id) 188 | try: 189 | response = await self._make_request( 190 | "POST", 191 | f"repos/{owner}/{repo}/pulls/{pr_id}/merge", 192 | json={ 193 | "style": "merge", # 合并方式:merge, rebase, rebase-merge, squash 194 | "message": "", # 可选的合并信息 195 | "title": "" # 可选的合并标题 196 | } 197 | ) 198 | logger.info("Successfully merged PR {}/{} #{}", owner, repo, pr_id) 199 | except Exception as e: 200 | logger.error("Failed to merge PR {}/{} #{}: {}", 201 | owner, repo, pr_id, str(e)) 202 | raise 203 | 204 | async def get_file_context(self, owner: str, repo: str, file_path: str, commit_id: str, line_start: int, line_count: int) -> str: 205 | logger.debug("Getting file context for {}/{} {} @ {} with lines: {}-{}", 206 | owner, repo, file_path, commit_id[:8], line_start or 'start', line_count or 'end') 207 | try: 208 | # 使用raw内容API直接获取文件内容 209 | async with aiohttp.ClientSession() as session: 210 | async with session.get( 211 | f"{self.config.url}/api/v1/repos/{owner}/{repo}/raw/{file_path}?ref={commit_id}", 212 | headers=self.headers 213 | ) as response: 214 | response.raise_for_status() 215 | content = await response.text() 216 | 217 | lines = content.splitlines() 218 | start = max(0, line_start - line_count) 219 | end = min(len(lines), line_start + line_count) 220 | 221 | context = "\n".join(lines[start:end]) 222 | logger.debug("Got {} lines of context for {} (size: {} bytes)", 223 | len(context.splitlines()), file_path, len(context)) 224 | return context 225 | except Exception as e: 226 | logger.error("Failed to get file context for {}: {}", 227 | file_path, str(e)) 228 | return "" # 如果获取上下文失败,返回空字符串 -------------------------------------------------------------------------------- /app/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from pathlib import Path 4 | 5 | # 将项目根目录添加到 Python 路径 6 | ROOT_DIR = Path(__file__).resolve().parent.parent 7 | sys.path.append(str(ROOT_DIR)) 8 | 9 | from fastapi import FastAPI 10 | from fastapi.staticfiles import StaticFiles 11 | from fastapi.templating import Jinja2Templates 12 | from app.routers import webhooks, config_ui 13 | import uvicorn 14 | 15 | app = FastAPI( 16 | title="代码评审Agent", 17 | description="自动代码评审系统", 18 | version="1.0.0" 19 | ) 20 | 21 | # 挂载静态文件 22 | app.mount("/static", StaticFiles(directory="app/static"), name="static") 23 | 24 | # 注册路由 25 | app.include_router(webhooks.router, tags=["webhooks"]) 26 | app.include_router(config_ui.router, tags=["config"]) 27 | 28 | @app.get("/") 29 | async def root(): 30 | """重定向到配置页面""" 31 | return {"message": "Welcome to Code Review Agent"} 32 | 33 | if __name__ == "__main__": 34 | uvicorn.run( 35 | "app.main:app", 36 | host="0.0.0.0", 37 | port=8000, 38 | reload=False 39 | ) -------------------------------------------------------------------------------- /app/models/config.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field, ConfigDict 2 | from typing import Dict, List, Optional 3 | 4 | class GiteaConfig(BaseModel): 5 | model_config = ConfigDict(title="Gitea配置") 6 | url: str = Field(description="Gitea服务器URL") 7 | token: str = Field(description="Gitea API访问令牌") 8 | context_window: int = Field(10, description="代码上下文窗口大小") 9 | 10 | class LLMConfig(BaseModel): 11 | model_config = ConfigDict(title="LLM配置") 12 | model: str = Field("deepseek/deepseek-chat", description="模型名称") 13 | api_key: str = Field(description="API密钥") 14 | max_tokens: int = Field(60000, description="最大token数") 15 | 16 | class ReviewConfig(BaseModel): 17 | model_config = ConfigDict(title="评审配置") 18 | quality_threshold: float = Field(8.5, description="质量阈值分数") 19 | max_security_issues: int = Field(5, description="最大安全问题数量") 20 | ignore_patterns: List[str] = Field( 21 | default=[ 22 | '**/node_modules/', '**/vendor/', '**/venv/', '**/.venv/', 23 | '**/bower_components/', '**/jspm_packages/', '**/packages/', 24 | '**/deps/', '**/dist/', '**/build/', '**/out/', '**/target/', 25 | '**/bin/', '**/obj/', '**/*.exe', '**/*.dll', '**/*.so', 26 | '**/*.a', '**/*.jar', '**/*.class', '**/*.pyc', 27 | '**/__pycache__/', '**/*.egg-info/', '**/.DS_Store', 28 | '**/Thumbs.db', '**/Desktop.ini', '**/.idea/', '**/.vscode/', 29 | '**/.vs/', '**/*.suo', '**/*.user', '**/*.sublime-project', 30 | '**/*.sublime-workspace', '**/*.log', '**/logs/', '**/tmp/', 31 | '**/*.tmp', '**/*.swp', '**/*.swo', '**/.sass-cache/', 32 | '**/coverage/', '**/.nyc_output/', '**/junit.xml', 33 | '**/test-results/', '**/*.min.js', '**/*.min.css', '**/*.map', 34 | '**/public/static/', '**/compiled/', '**/generated/', '**/.env', 35 | '**/.env.local', '**/.env.*.local', '**/docker-compose.override.yml', 36 | '**/*.key', '**/*.pem', '**/*.crt', '**/docs/_build/', 37 | '**/site/', '**/.vuepress/dist/', '**/package-lock.json', 38 | '**/yarn.lock', '**/Gemfile.lock', '**/Podfile.lock' 39 | ], 40 | description="忽略的文件模式" 41 | ) 42 | scoring_rules: Dict[str, float] = Field( 43 | default={ 44 | "security": 0.3, 45 | "performance": 0.2, 46 | "readability": 0.2, 47 | "best_practice": 0.3 48 | }, 49 | description="评分规则权重" 50 | ) 51 | 52 | class AppConfig(BaseModel): 53 | model_config = ConfigDict(title="应用配置") 54 | scm: GiteaConfig 55 | llm: LLMConfig 56 | review: ReviewConfig -------------------------------------------------------------------------------- /app/routers/config_ui.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Request, Form 2 | from fastapi.responses import HTMLResponse 3 | from fastapi.templating import Jinja2Templates 4 | import yaml 5 | from pathlib import Path 6 | from typing import List 7 | from app.models.config import AppConfig, GiteaConfig, LLMConfig, ReviewConfig 8 | 9 | router = APIRouter() 10 | templates = Jinja2Templates(directory="app/templates") 11 | 12 | CONFIG_FILE = "config.yaml" 13 | 14 | def load_config() -> AppConfig: 15 | """加载配置文件""" 16 | if Path(CONFIG_FILE).exists(): 17 | with open(CONFIG_FILE, "r", encoding="utf-8") as f: 18 | config_data = yaml.safe_load(f) 19 | return AppConfig.parse_obj(config_data) 20 | return AppConfig( 21 | scm=GiteaConfig( 22 | url="", 23 | token="", 24 | context_window=10 25 | ), 26 | llm=LLMConfig( 27 | model="deepseek/deepseek-chat", 28 | api_key="", 29 | max_tokens=60000 30 | ), 31 | review=ReviewConfig() # 使用config.py中的默认值 32 | ) 33 | 34 | def save_config(config: AppConfig): 35 | """保存配置文件""" 36 | config_dict = config.dict() 37 | with open(CONFIG_FILE, "w", encoding="utf-8") as f: 38 | yaml.safe_dump(config_dict, f, allow_unicode=True) 39 | 40 | @router.get("/config", response_class=HTMLResponse) 41 | async def get_config(request: Request): 42 | """显示配置页面""" 43 | config = load_config() 44 | return templates.TemplateResponse( 45 | "config.html", 46 | {"request": request, "config": config} 47 | ) 48 | 49 | @router.post("/config", response_class=HTMLResponse) 50 | async def save_config_handler( 51 | request: Request, 52 | scm_url: str = Form(alias="scm.url"), 53 | scm_token: str = Form(alias="scm.token"), 54 | scm_context_window: int = Form(alias="scm.context_window"), 55 | llm_model: str = Form(alias="llm.model"), 56 | llm_api_key: str = Form(alias="llm.api_key"), 57 | llm_max_tokens: int = Form(alias="llm.max_tokens"), 58 | review_quality_threshold: float = Form(alias="review.quality_threshold"), 59 | review_ignore_patterns: str = Form(alias="review.ignore_patterns"), 60 | review_scoring_rules_security: float = Form(alias="review.scoring_rules.security"), 61 | review_scoring_rules_performance: float = Form(alias="review.scoring_rules.performance"), 62 | review_scoring_rules_readability: float = Form(alias="review.scoring_rules.readability"), 63 | review_scoring_rules_best_practice: float = Form(alias="review.scoring_rules.best_practice") 64 | ): 65 | """保存配置""" 66 | config = AppConfig( 67 | scm=GiteaConfig( 68 | url=scm_url, 69 | token=scm_token, 70 | context_window=scm_context_window 71 | ), 72 | llm=LLMConfig( 73 | model=llm_model, 74 | api_key=llm_api_key, 75 | max_tokens=llm_max_tokens 76 | ), 77 | review=ReviewConfig( 78 | quality_threshold=review_quality_threshold, 79 | ignore_patterns=[p.strip() for p in review_ignore_patterns.split("\n") if p.strip()], 80 | scoring_rules={ 81 | "security": review_scoring_rules_security, 82 | "performance": review_scoring_rules_performance, 83 | "readability": review_scoring_rules_readability, 84 | "best_practice": review_scoring_rules_best_practice 85 | } 86 | ) 87 | ) 88 | 89 | save_config(config) 90 | 91 | return templates.TemplateResponse( 92 | "config.html", 93 | { 94 | "request": request, 95 | "config": config, 96 | "message": "配置已保存" 97 | }, 98 | headers={"HX-Trigger": "configSaved"} 99 | ) -------------------------------------------------------------------------------- /app/routers/webhooks.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, BackgroundTasks, HTTPException 2 | from pydantic import BaseModel 3 | from typing import Optional, Dict, Any 4 | from app.core.agent import CodeReviewAgent 5 | from app.core.scm import GiteaClient 6 | from app.services.llm_service import LLMService 7 | from app.models.config import AppConfig 8 | from app.routers.config_ui import load_config 9 | import logging 10 | 11 | router = APIRouter() 12 | 13 | class PRWebhook(BaseModel): 14 | action: str 15 | number: int 16 | pull_request: Dict[str, Any] 17 | repository: Dict[str, Any] 18 | sender: Dict[str, Any] 19 | 20 | async def process_pr(owner: str, repo: str, pr_id: str): 21 | """处理PR的后台任务""" 22 | try: 23 | # 加载配置 24 | config = load_config() 25 | 26 | # 初始化服务 27 | scm = GiteaClient(config.scm) 28 | llm = LLMService(config.llm) 29 | agent = CodeReviewAgent(config, scm, llm) 30 | 31 | # 执行评审 32 | await agent.review_pr(owner, repo, pr_id) 33 | except Exception as e: 34 | logger.error("Error processing PR {}/{} #{}: {}", 35 | owner if owner else "unknown", 36 | repo if repo else "unknown", 37 | pr_id if pr_id else "unknown", 38 | str(e)) 39 | 40 | @router.post("/webhook/gitea") 41 | async def handle_webhook( 42 | webhook: PRWebhook, 43 | background_tasks: BackgroundTasks 44 | ): 45 | """处理Gitea webhook""" 46 | # 只处理PR相关事件 47 | if webhook.action not in ["opened", "reopened", "synchronize"]: 48 | return {"status": "ignored"} 49 | 50 | # 从仓库信息中获取owner和repo 51 | owner = webhook.repository["owner"]["username"] 52 | repo = webhook.repository["name"] 53 | 54 | # 添加后台任务 55 | background_tasks.add_task( 56 | process_pr, 57 | owner, 58 | repo, 59 | str(webhook.number) 60 | ) 61 | 62 | return {"status": "processing"} -------------------------------------------------------------------------------- /app/services/llm_service.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Optional 2 | import tiktoken 3 | from pydantic import BaseModel 4 | import litellm 5 | from loguru import logger 6 | from app.models.config import LLMConfig, ReviewConfig 7 | 8 | class CodeIssue(BaseModel): 9 | file_path: str 10 | start_line: int 11 | end_line: Optional[int] 12 | description: str 13 | suggestion: str 14 | 15 | class SecurityIssue(BaseModel): 16 | severity: str 17 | file_path: str 18 | start_line: int 19 | end_line: Optional[int] 20 | description: str 21 | suggestion: str 22 | 23 | class QualityMetrics(BaseModel): 24 | security_score: float 25 | performance_score: float 26 | readability_score: float 27 | best_practice_score: float 28 | 29 | class ReviewResult(BaseModel): 30 | score: float 31 | issues: List[CodeIssue] 32 | security_issues: List[SecurityIssue] 33 | quality_metrics: QualityMetrics 34 | 35 | class CodeContext(BaseModel): 36 | diff: str # commit 的完整 diff 内容 37 | files_context: List[dict] # 包含每个文件的上下文信息 38 | metadata: Dict[str, str] # commit 相关的元数据 39 | 40 | def __len__(self) -> int: 41 | """返回文件上下文的数量""" 42 | return len(self.files_context) 43 | 44 | REVIEW_PROMPT = """你是一个专业的代码评审专家,请根据以下代码变更内容进行评审。评审时请特别注意以下几点: 45 | 46 | 1. 安全性(占比30%): 47 | - 检查SQL注入、XSS等安全漏洞 48 | - 检查敏感信息泄露 49 | - 检查权限控制问题 50 | 51 | 2. 性能(占比20%): 52 | - 检查算法复杂度 53 | - 检查资源使用效率 54 | - 检查并发处理方式 55 | 56 | 3. 可读性(占比20%): 57 | - 代码格式是否规范 58 | - 命名是否清晰 59 | - 注释是否充分 60 | 61 | 4. 最佳实践(占比30%): 62 | - 是否遵循设计模式 63 | - 是否有单元测试 64 | - 是否有类型提示 65 | - 是否符合SOLID原则 66 | 67 | 评分规则: 68 | - 安全问题:高危-3分/个,中危-1分/个 69 | - 性能问题:-2分/个 70 | - 可读性问题:-0.5分/个 71 | - 最佳实践:缺少单元测试-2分,无类型提示-1分 72 | 73 | Commit信息: 74 | {commit_message} 75 | 76 | 代码变更: 77 | {diff} 78 | 79 | 相关文件上下文: 80 | {files_context} 81 | 82 | 请提供详细的评审结果,包括: 83 | 1. 总体评分(满分10分) 84 | 2. 具体问题列表(包含文件路径和代码位置) 85 | 3. 安全问题清单(包含文件路径和代码位置) 86 | 4. 各维度的具体评分 87 | 88 | 请以JSON格式返回结果,格式如下: 89 | {{ 90 | "score": float, 91 | "issues": [ 92 | {{ 93 | "file_path": string, 94 | "start_line": int, 95 | "end_line": int | null, 96 | "description": string, 97 | "suggestion": string 98 | }} 99 | ], 100 | "security_issues": [ 101 | {{ 102 | "severity": string, 103 | "file_path": string, 104 | "start_line": int, 105 | "end_line": int | null, 106 | "description": string, 107 | "suggestion": string 108 | }} 109 | ], 110 | "quality_metrics": {{ 111 | "security_score": float, 112 | "performance_score": float, 113 | "readability_score": float, 114 | "best_practice_score": float 115 | }} 116 | }} 117 | 118 | 注意: 119 | 1. 每个问题必须指明具体的文件路径和代码位置(行号) 120 | 2. 如果问题涉及多行代码,请提供start_line和end_line 121 | 3. 如果问题只涉及单行代码,end_line可以为null 122 | 4. 所有的行号必须是实际的代码行号 123 | """ 124 | 125 | class LLMService: 126 | def __init__(self, config: LLMConfig): 127 | self.config = config 128 | self.model_name = config.model 129 | self.api_key = config.api_key 130 | 131 | if not self.model_name: 132 | raise ValueError("Model name is required") 133 | if not self.api_key: 134 | raise ValueError("API key is required") 135 | 136 | logger.info("Setting up LLMService with model: {} and max_tokens: {}", 137 | self.model_name if hasattr(self, 'model_name') else "unknown", 138 | self.config.max_tokens if hasattr(self, 'config') and hasattr(self.config, 'max_tokens') else 0) 139 | litellm.api_key = self.api_key 140 | litellm.set_verbose = False 141 | 142 | try: 143 | self.tokenizer = tiktoken.encoding_for_model("gpt-4") 144 | logger.info("Tokenizer initialized successfully for model: {}", 145 | self.model_name if hasattr(self, 'model_name') else "unknown") 146 | except Exception as e: 147 | logger.error("Error initializing tokenizer for model {}: {}", 148 | self.model_name if hasattr(self, 'model_name') else "unknown", 149 | str(e)) 150 | raise 151 | 152 | logger.info("LLMService initialized successfully with model: {} and chunk_size: {}", 153 | self.model_name if hasattr(self, 'model_name') else "unknown", 154 | self.config.max_tokens if hasattr(self, 'config') and hasattr(self.config, 'max_tokens') else 0) 155 | 156 | def _split_code_chunks(self, context: CodeContext) -> List[CodeContext]: 157 | max_tokens = self.config.max_tokens - 1000 # 预留空间给prompt和response 158 | 159 | def count_tokens(text: str) -> int: 160 | return len(self.tokenizer.encode(text)) 161 | 162 | chunks = [] 163 | current_chunk = CodeContext( 164 | diff="", 165 | files_context=[], 166 | metadata=context.metadata 167 | ) 168 | current_tokens = 0 169 | 170 | # 按文件分割diff内容 171 | diff_content = context.diff 172 | file_diffs = diff_content.split("diff --git ") 173 | if file_diffs[0] == "": # 移除空的第一个元素 174 | file_diffs = file_diffs[1:] 175 | 176 | current_files = [] # 当前chunk包含的文件路径 177 | 178 | for file_diff in file_diffs: 179 | if not file_diff: 180 | continue 181 | 182 | file_diff = "diff --git " + file_diff 183 | file_tokens = count_tokens(file_diff) 184 | 185 | # 从diff中提取文件路径 186 | import re 187 | file_path_match = re.search(r'a/(.*?) b/', file_diff) 188 | if not file_path_match: 189 | continue 190 | file_path = file_path_match.group(1) 191 | 192 | if current_tokens + file_tokens > max_tokens and current_chunk.diff: 193 | # 添加相关的文件上下文 194 | current_chunk.files_context = [ 195 | f for f in context.files_context 196 | if f["file_path"] in current_files 197 | ] 198 | chunks.append(current_chunk) 199 | 200 | # 重置当前chunk 201 | current_chunk = CodeContext( 202 | diff="", 203 | files_context=[], 204 | metadata=context.metadata 205 | ) 206 | current_tokens = 0 207 | current_files = [] 208 | 209 | # 添加文件diff到当前chunk 210 | current_chunk.diff += file_diff 211 | current_tokens += file_tokens 212 | current_files.append(file_path) 213 | 214 | # 处理最后一个chunk 215 | if current_chunk.diff: 216 | current_chunk.files_context = [ 217 | f for f in context.files_context 218 | if f["file_path"] in current_files 219 | ] 220 | chunks.append(current_chunk) 221 | 222 | logger.info("Split code into {} chunks with total size: {} characters", 223 | len(chunks) if chunks else 0, 224 | sum(len(chunk.diff) if hasattr(chunk, 'diff') else 0 for chunk in chunks) if chunks else 0) 225 | return chunks 226 | 227 | async def analyze_code(self, context: CodeContext) -> ReviewResult: 228 | chunks = self._split_code_chunks(context) 229 | results = [] 230 | 231 | for i, chunk in enumerate(chunks): 232 | logger.info("Analyzing chunk {}/{} with size: {} characters", 233 | i + 1, len(chunks) if chunks else 0, 234 | len(chunk.diff) if hasattr(chunk, 'diff') else 0) 235 | 236 | # 格式化文件上下文 237 | files_context_str = "\n\n".join( 238 | f"文件: {f['file_path']} ({f['file_type']})\n{f['context']}" 239 | for f in chunk.files_context 240 | ) if chunk.files_context else "无文件上下文" 241 | 242 | # 验证必需的参数 243 | if not context.metadata.get("commit_message"): 244 | logger.error("Missing commit message in metadata for commit: {}", 245 | context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown") 246 | raise ValueError("Missing commit message in metadata") 247 | 248 | if not chunk.diff: 249 | logger.error("Missing diff content for commit: {}", 250 | context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown") 251 | raise ValueError("Missing diff content") 252 | 253 | try: 254 | prompt = REVIEW_PROMPT.format( 255 | commit_message=context.metadata["commit_message"], 256 | diff=chunk.diff, 257 | files_context=files_context_str 258 | ) 259 | except KeyError as ke: 260 | logger.error("Error formatting prompt - missing key: {}", ke) 261 | raise ValueError(f"Missing required field for prompt formatting: {ke}") 262 | except Exception as e: 263 | logger.error("Error formatting prompt: {}", str(e)) 264 | raise ValueError(f"Failed to format prompt: {str(e)}") 265 | 266 | if not prompt: 267 | logger.error("Empty prompt after formatting") 268 | raise ValueError("Empty prompt after formatting") 269 | 270 | try: 271 | logger.info("Sending request to LLM model: {} with prompt size: {} characters", 272 | self.model_name if hasattr(self, 'model_name') else "unknown", 273 | len(prompt) if prompt else 0) 274 | 275 | try: 276 | response = await litellm.acompletion( 277 | model=self.model_name, 278 | messages=[{"role": "user", "content": prompt}], 279 | temperature=0.2 280 | ) 281 | logger.info("Received response from LLM with size: {} characters", 282 | len(str(response)) if response else 0) 283 | 284 | if not response or not hasattr(response, 'choices') or not response.choices: 285 | logger.error("Invalid response format from LLM. Expected JSON, got: {}", 286 | type(response).__name__ if response else "None") 287 | raise ValueError("Invalid response format from LLM") 288 | 289 | response_text = response.choices[0].message.content 290 | except Exception as llm_error: 291 | logger.error("Error calling LLM model {}: {}", 292 | self.model_name if hasattr(self, 'model_name') else "unknown", 293 | str(llm_error)) 294 | raise 295 | 296 | response_text = response_text.strip() 297 | 298 | # 查找JSON内容的开始和结束位置 299 | json_start = response_text.find("{") 300 | json_end = response_text.rfind("}") + 1 301 | 302 | if json_start == -1 or json_end <= json_start: 303 | logger.error("No valid JSON found in response of size: {} characters", 304 | len(str(response)) if response else 0) 305 | # 返回默认结果而不是抛出异常 306 | return ReviewResult( 307 | score=0, 308 | issues=[], 309 | security_issues=[], 310 | quality_metrics=QualityMetrics( 311 | security_score=0, 312 | performance_score=0, 313 | readability_score=0, 314 | best_practice_score=0 315 | ) 316 | ) 317 | 318 | response_text = response_text[json_start:json_end] 319 | 320 | try: 321 | import json 322 | # 尝试清理和格式化 JSON 字符串 323 | response_text = response_text.replace('\n', ' ').replace('\r', '') 324 | # 处理可能的 markdown 代码块 325 | if '```json' in response_text: 326 | response_text = response_text.split('```json')[-1].split('```')[0] 327 | elif '```' in response_text: 328 | response_text = response_text.split('```')[-2] 329 | 330 | # 先尝试解析JSON 331 | json_obj = json.loads(response_text) 332 | 333 | # 确保所有必需的字段都存在并且类型正确 334 | required_fields = { 335 | "score": float, 336 | "issues": list, 337 | "security_issues": list, 338 | "quality_metrics": dict 339 | } 340 | 341 | for field, field_type in required_fields.items(): 342 | if field not in json_obj: 343 | logger.warning("Missing required field: {} in response for commit: {}, adding default value", 344 | field, context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown") 345 | if field == "issues": 346 | json_obj["issues"] = [] 347 | elif field == "security_issues": 348 | json_obj["security_issues"] = [] 349 | elif field == "quality_metrics": 350 | json_obj["quality_metrics"] = { 351 | "security_score": 0.0, 352 | "performance_score": 0.0, 353 | "readability_score": 0.0, 354 | "best_practice_score": 0.0 355 | } 356 | elif field == "score": 357 | json_obj["score"] = 0.0 358 | elif not isinstance(json_obj[field], field_type): 359 | logger.warning("Field {} has wrong type. Expected {}, got {}. Converting to default value.", 360 | field, field_type.__name__ if hasattr(field_type, '__name__') else str(field_type), 361 | type(json_obj[field]).__name__ if json_obj and field in json_obj else "unknown") 362 | if field == "issues": 363 | json_obj["issues"] = [] 364 | elif field == "security_issues": 365 | json_obj["security_issues"] = [] 366 | elif field == "quality_metrics": 367 | json_obj["quality_metrics"] = { 368 | "security_score": 0.0, 369 | "performance_score": 0.0, 370 | "readability_score": 0.0, 371 | "best_practice_score": 0.0 372 | } 373 | elif field == "score": 374 | json_obj["score"] = 0.0 375 | 376 | # 检查 quality_metrics 的字段 377 | required_metrics = { 378 | "security_score": float, 379 | "performance_score": float, 380 | "readability_score": float, 381 | "best_practice_score": float 382 | } 383 | 384 | if "quality_metrics" in json_obj: 385 | for metric, metric_type in required_metrics.items(): 386 | if metric not in json_obj["quality_metrics"]: 387 | logger.warning("Missing required metric: {} in quality metrics for commit: {}, adding default value", 388 | metric, context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown") 389 | json_obj["quality_metrics"][metric] = 0.0 390 | elif not isinstance(json_obj["quality_metrics"][metric], metric_type): 391 | json_obj["quality_metrics"][metric] = float(json_obj["quality_metrics"][metric]) 392 | 393 | result = ReviewResult.parse_obj(json_obj) 394 | results.append(result) 395 | logger.info("Successfully analyzed chunk {}/{} for commit: {}", 396 | i + 1, len(chunks) if chunks else 0, 397 | context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown") 398 | except json.JSONDecodeError as json_error: 399 | logger.error("JSON parsing error at position {} in response of size {}: {}", 400 | getattr(json_error, 'pos', 0), 401 | len(str(response)) if response else 0, 402 | str(json_error)) 403 | raise 404 | except Exception as parse_error: 405 | logger.error("Error parsing LLM response of size {}: {}", 406 | len(str(response)) if response else 0, 407 | str(parse_error)) 408 | # 返回一个默认的评审结果 409 | results.append(ReviewResult( 410 | score=0, 411 | issues=[], 412 | security_issues=[], 413 | quality_metrics=QualityMetrics( 414 | security_score=0, 415 | performance_score=0, 416 | readability_score=0, 417 | best_practice_score=0 418 | ) 419 | )) 420 | except Exception as e: 421 | logger.error("Error getting LLM response for model {}: {}", 422 | self.model_name if hasattr(self, 'model_name') else "unknown", 423 | str(e)) 424 | raise 425 | 426 | # 合并所有chunk的结果 427 | if not results: 428 | logger.warning("No valid results for commit: {} in model: {}", 429 | context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown", 430 | self.model_name if hasattr(self, 'model_name') else "unknown") 431 | return ReviewResult( 432 | score=0, 433 | issues=[], 434 | security_issues=[], 435 | quality_metrics=QualityMetrics( 436 | security_score=0, 437 | performance_score=0, 438 | readability_score=0, 439 | best_practice_score=0 440 | ) 441 | ) 442 | 443 | # 使用最低分作为最终分数 444 | final_result = ReviewResult( 445 | score=min(r.score for r in results), 446 | issues=[i for r in results for i in r.issues], 447 | security_issues=[i for r in results for i in r.security_issues], 448 | quality_metrics=QualityMetrics( 449 | security_score=min(r.quality_metrics.security_score for r in results), 450 | performance_score=min(r.quality_metrics.performance_score for r in results), 451 | readability_score=min(r.quality_metrics.readability_score for r in results), 452 | best_practice_score=min(r.quality_metrics.best_practice_score for r in results) 453 | ) 454 | ) 455 | 456 | logger.info("Analysis completed for commit {} with final score: {}", 457 | context.metadata.get("commit_id", "unknown")[:8] if context and hasattr(context, 'metadata') else "unknown", 458 | final_result.score if final_result and hasattr(final_result, 'score') else 0.0) 459 | return final_result -------------------------------------------------------------------------------- /app/static/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/highkay/codereview-agent/6a324047dda2ec11056ad3e9bdde848082e97c05/app/static/.gitkeep -------------------------------------------------------------------------------- /app/templates/config.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 代码评审Agent配置 7 | 8 | 9 | 10 | 11 |
12 |

代码评审Agent配置

13 | 14 |
15 | 16 |
17 |

Gitea配置

18 |
19 |
20 | 21 | 23 |
24 |
25 | 26 | 28 |
29 |
30 | 31 | 33 |
34 |
35 |
36 | 37 | 38 |
39 |

LLM配置

40 |
41 |
42 | 43 | 45 |
46 |
47 | 48 | 50 |
51 |
52 | 53 | 55 |
56 |
57 |
58 | 59 | 60 |
61 |

评审配置

62 |
63 |
64 | 65 | 67 |
68 |
69 | 70 | 73 |
74 |
75 |
76 | 77 | 78 |
79 |

评分规则权重

80 |
81 |
82 | 83 | 85 |
86 |
87 | 88 | 90 |
91 |
92 | 93 | 95 |
96 |
97 | 98 | 100 |
101 |
102 |
103 | 104 |
105 | 109 | 113 |
114 |
115 | 116 | 117 | 122 |
123 | 124 | 133 | 134 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -i https://pypi.tuna.tsinghua.edu.cn/simple 2 | fastapi 3 | uvicorn 4 | pydantic 5 | aiohttp 6 | python-multipart 7 | jinja2 8 | tiktoken 9 | htmx 10 | loguru 11 | litellm --------------------------------------------------------------------------------