├── .env.sample
├── .flake8
├── .gitattributes
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── actions
│ └── poetry_setup
│ │ └── action.yml
├── dependabot.yml.disable
└── workflows
│ ├── flake8.yml
│ ├── publish.yml
│ ├── test.yml
│ └── version.yml
├── .gitignore
├── ARCHITECTURE.md
├── CHANGELOG.md
├── LICENSE
├── README.md
├── UPDATES.md
├── codedog
├── __init__.py
├── actors
│ ├── __init__.py
│ ├── base.py
│ └── reporters
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── code_review.py
│ │ ├── pr_summary.py
│ │ └── pull_request.py
├── analysis_results_20250424_095117.json
├── analyze_code.py
├── chains
│ ├── __init__.py
│ ├── code_review
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── prompts.py
│ │ └── translate_code_review_chain.py
│ ├── pr_summary
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── prompts.py
│ │ └── translate_pr_summary_chain.py
│ └── prompts.py
├── localization.py
├── models
│ ├── __init__.py
│ ├── blob.py
│ ├── change_file.py
│ ├── change_summary.py
│ ├── code_review.py
│ ├── commit.py
│ ├── diff.py
│ ├── issue.py
│ ├── pr_summary.py
│ ├── pull_request.py
│ └── repository.py
├── processors
│ ├── __init__.py
│ └── pull_request_processor.py
├── retrievers
│ ├── __init__.py
│ ├── base.py
│ ├── github_retriever.py
│ └── gitlab_retriever.py
├── templates
│ ├── __init__.py
│ ├── grimoire_cn.py
│ ├── grimoire_en.py
│ ├── optimized_code_review_prompt.py
│ ├── template_cn.py
│ └── template_en.py
├── utils
│ ├── __init__.py
│ ├── code_evaluator.py
│ ├── diff_utils.py
│ ├── email_utils.py
│ ├── git_hooks.py
│ ├── git_log_analyzer.py
│ ├── langchain_utils.py
│ └── remote_repository_analyzer.py
└── version.py
├── docs
├── api
│ ├── codedog.html
│ ├── codedog
│ │ ├── actors.html
│ │ ├── actors
│ │ │ ├── base.html
│ │ │ ├── reporters.html
│ │ │ └── reporters
│ │ │ │ ├── base.html
│ │ │ │ ├── code_review.html
│ │ │ │ ├── pr_summary.html
│ │ │ │ └── pull_request.html
│ │ ├── chains.html
│ │ ├── localization.html
│ │ ├── models.html
│ │ ├── processors.html
│ │ ├── retrievers.html
│ │ ├── templates.html
│ │ ├── templates
│ │ │ ├── grimoire_cn.html
│ │ │ ├── grimoire_en.html
│ │ │ ├── template_cn.html
│ │ │ └── template_en.html
│ │ ├── utils.html
│ │ ├── utils
│ │ │ ├── diff_utils.html
│ │ │ └── langchain_utils.html
│ │ └── version.html
│ ├── index.html
│ └── search.js
├── assets
│ ├── favicon.ico
│ └── logo.png
├── commit_review.md
├── email_setup.md
└── models.md
├── examples
├── deepseek_r1_example.py
├── github_review.py
├── github_server.py
├── gitlab_review.py
├── gitlab_server.py
└── translation.py
├── fetch_samples_mcp.py
├── poetry.lock
├── poetry.toml
├── product.md
├── pyproject.toml
├── requirements.txt
├── review_recent_commit.py
├── run_codedog.py
├── runtests.py
└── tests
├── __init__.py
├── codedog
├── actors
│ └── reports
│ │ ├── test_code_review.py
│ │ ├── test_pr_summary.py
│ │ └── test_pull_request_review.py
├── pr_summary
│ └── test_pr_summary_rocessor.py
└── retrievers
│ └── test_github_retriever.py
├── conftest.py
├── integration
├── __init__.py
└── test_end_to_end.py
├── test_email.py
└── unit
├── __init__.py
├── actors
├── __init__.py
└── reporters
│ ├── __init__.py
│ └── test_pull_request_reporter.py
├── chains
├── __init__.py
└── test_pr_summary_chain.py
├── processors
├── __init__.py
└── test_pull_request_processor.py
├── retrievers
├── __init__.py
└── test_github_retriever.py
└── utils
├── __init__.py
├── test_diff_utils.py
└── test_langchain_utils.py
/.env.sample:
--------------------------------------------------------------------------------
1 | # CodeDog 环境变量示例文件
2 | # 复制此文件为 .env 并填入您的实际配置值
3 |
4 | # ===== 平台配置 =====
5 | # 选择一个平台: GitHub 或 GitLab
6 |
7 | # GitHub 配置
8 | GITHUB_TOKEN="your_github_personal_access_token"
9 |
10 | # GitLab 配置
11 | # 如果使用 GitLab 而不是 GitHub
12 | # GITLAB_TOKEN="your_gitlab_personal_access_token"
13 | # 对于自托管实例,修改为您的 GitLab URL
14 | # GITLAB_URL="https://gitlab.com"
15 |
16 | # ===== LLM 配置 =====
17 | # 选择一种配置方式: OpenAI, Azure OpenAI, DeepSeek 或 MindConnect
18 |
19 | # OpenAI 配置
20 | # 标准 OpenAI API
21 | OPENAI_API_KEY="your_openai_api_key"
22 |
23 | # Azure OpenAI 配置
24 | # 如果使用 Azure 的 OpenAI 服务
25 | # AZURE_OPENAI="true"
26 | # AZURE_OPENAI_API_KEY="your_azure_openai_api_key"
27 | # AZURE_OPENAI_API_BASE="https://your-instance.openai.azure.com/"
28 | # 可选,默认会使用一个较新的版本
29 | # AZURE_OPENAI_API_VERSION="2023-05-15"
30 | # 用于代码摘要和评审的 GPT-3.5 部署
31 | # AZURE_OPENAI_DEPLOYMENT_ID="your_gpt35_deployment_name"
32 | # 用于 PR 摘要的 GPT-4 部署
33 | # AZURE_OPENAI_GPT4_DEPLOYMENT_ID="your_gpt4_deployment_name"
34 |
35 | # DeepSeek 配置
36 | # 如果使用 DeepSeek 模型
37 | # DEEPSEEK_API_KEY="your_deepseek_api_key"
38 | # DeepSeek 模型名称
39 | DEEPSEEK_MODEL="deepseek-chat"
40 | # DeepSeek API 基础 URL
41 | DEEPSEEK_API_BASE="https://api.deepseek.com"
42 | # DeepSeek 温度参数
43 | DEEPSEEK_TEMPERATURE="0"
44 | # DeepSeek 最大token数
45 | DEEPSEEK_MAX_TOKENS="4096"
46 | # DeepSeek top_p参数
47 | DEEPSEEK_TOP_P="0.95"
48 | # DeepSeek 超时时间(秒)
49 | DEEPSEEK_TIMEOUT="60"
50 | # DeepSeek R1 特定配置
51 | DEEPSEEK_R1_API_BASE="https://api.deepseek.com"
52 | DEEPSEEK_R1_MODEL="deepseek-reasoner"
53 |
54 | # ===== 模型选择配置 =====
55 | # 可选值: "gpt-3.5", "gpt-4", "gpt-4o", "deepseek", "deepseek-r1" 或任何 OpenAI 模型名称
56 | CODE_SUMMARY_MODEL="gpt-3.5"
57 | PR_SUMMARY_MODEL="gpt-3.5"
58 | CODE_REVIEW_MODEL="gpt-3.5"
59 |
60 | # 特定模型版本配置
61 | # GPT-3.5 模型名称,默认为 "gpt-3.5-turbo"
62 | # GPT35_MODEL="gpt-3.5-turbo-16k"
63 | # GPT-4 模型名称,默认为 "gpt-4"
64 | # GPT4_MODEL="gpt-4-turbo"
65 | # GPT-4o 模型名称,默认为 "gpt-4o"
66 | # GPT4O_MODEL="gpt-4o-mini"
67 |
68 | # ===== 电子邮件通知配置 =====
69 | # 启用电子邮件通知
70 | EMAIL_ENABLED="false"
71 | # 接收通知的邮箱,多个邮箱用逗号分隔
72 | NOTIFICATION_EMAILS="your_email@example.com"
73 |
74 | # SMTP 服务器配置
75 | # 用于发送电子邮件通知
76 | # Gmail SMTP 配置说明:
77 | # 1. 必须在 Google 账户开启两步验证: https://myaccount.google.com/security
78 | # 2. 创建应用专用密码: https://myaccount.google.com/apppasswords
79 | # 3. 使用应用专用密码而非您的常规Gmail密码
80 | # Gmail SMTP 服务器地址
81 | SMTP_SERVER="smtp.gmail.com"
82 | # Gmail SMTP 服务器端口
83 | SMTP_PORT="587"
84 | # 发送邮件的 Gmail 账户
85 | SMTP_USERNAME="your_email@gmail.com"
86 | # SMTP_PASSWORD 应该是应用专用密码,不是您的 Gmail 登录密码
87 | SMTP_PASSWORD="your_app_specific_password"
88 |
89 | # ===== 开发者评价配置 =====
90 | # 默认包含的文件类型
91 | DEV_EVAL_DEFAULT_INCLUDE=".py,.js,.java,.ts,.tsx,.jsx,.c,.cpp,.h,.hpp"
92 | # 默认排除的文件类型
93 | DEV_EVAL_DEFAULT_EXCLUDE=".md,.txt,.json,.lock,.gitignore"
94 |
95 | # ===== 其他可选配置 =====
96 | # 日志级别,可以是 DEBUG, INFO, WARNING, ERROR
97 | LOG_LEVEL="INFO"
98 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | exclude=
4 | .venv
5 | __pycache__
6 | tmp/
7 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto eol=lf
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Additional context**
27 | Add any other context about the problem here.
28 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/actions/poetry_setup/action.yml:
--------------------------------------------------------------------------------
1 | # An action for setting up poetry install with caching.
2 | # Using a custom action since the default action does not
3 | # take poetry install groups into account.
4 | # Action code from langchain from:
5 | # https://github.com/actions/setup-python/issues/505#issuecomment-1273013236
6 | name: poetry-install-with-caching
7 | description: Poetry install with support for caching of dependency groups.
8 |
9 | inputs:
10 | python-version:
11 | description: Python version, supporting MAJOR.MINOR only
12 | required: true
13 |
14 | poetry-version:
15 | description: Poetry version
16 | required: true
17 |
18 | install-command:
19 | description: Command run for installing dependencies
20 | required: false
21 | default: poetry install
22 |
23 | runs:
24 | using: composite
25 | steps:
26 | - uses: actions/setup-python@v4
27 | name: Setup python $${ inputs.python-version }}
28 | with:
29 | python-version: ${{ inputs.python-version }}
30 |
31 | - uses: actions/cache@v3
32 | id: cache-pip
33 | name: Cache Pip ${{ inputs.python-version }}
34 | env:
35 | SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
36 | with:
37 | path: |
38 | ~/.cache/pip
39 | key: pip-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}
40 |
41 | - run: pipx install poetry==${{ inputs.poetry-version }} --python python${{ inputs.python-version }}
42 | shell: bash
43 |
44 | - name: Check Poetry File
45 | shell: bash
46 | run: |
47 | poetry check
48 |
49 | - name: Check lock file
50 | shell: bash
51 | run: |
52 | poetry lock --check
53 |
54 | - uses: actions/cache@v3
55 | id: cache-poetry
56 | env:
57 | SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
58 | with:
59 | path: |
60 | ~/.cache/pypoetry/virtualenvs
61 | ~/.cache/pypoetry/cache
62 | ~/.cache/pypoetry/artifacts
63 | key: poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ hashFiles('poetry.lock') }}
64 |
65 | - run: ${{ inputs.install-command }}
66 | shell: bash
67 |
--------------------------------------------------------------------------------
/.github/dependabot.yml.disable:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "pip" # See documentation for possible values
9 | directory: "/" # Location of package manifests
10 | schedule:
11 | interval: "weekly"
12 |
--------------------------------------------------------------------------------
/.github/workflows/flake8.yml:
--------------------------------------------------------------------------------
1 | name: Checkstyle
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches:
7 | - master
8 |
9 | jobs:
10 | checkstyle:
11 | name: Checkstyle with Flake8
12 | runs-on: ubuntu-latest
13 | strategy:
14 | matrix:
15 | python-version:
16 | - "3.10"
17 |
18 | steps:
19 | -
20 | name: Checkout
21 | uses: actions/checkout@v3
22 | - name: Set up Python ${{ matrix.python-version }}
23 | uses: actions/setup-python@v4
24 | with:
25 | python-version: ${{ matrix.python-version }}
26 | - name: Install dependencies
27 | run: |
28 | python -m pip install --upgrade pip
29 | pip install flake8
30 | - name: Lint the code with flake8
31 | run: |
32 | flake8 .
33 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish new version
2 |
3 | on:
4 | push:
5 | tags:
6 | - v*
7 |
8 | jobs:
9 | build_and_publish_to_pypi:
10 | name: Build and Publish Package to PyPI
11 | runs-on: ubuntu-latest
12 |
13 | steps:
14 | - uses: actions/checkout@v1
15 | with:
16 | fetch-depth: 1
17 |
18 | - name: Build and publish to pypi
19 | uses: JRubics/poetry-publish@v1.17
20 | with:
21 | python_version: "3.10.10"
22 | poetry_version: "==1.5.1" # (PIP version specifier syntax)
23 | pypi_token: ${{ secrets.PYPI_TOKEN }}
24 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Test
2 | on:
3 | pull_request:
4 | branches:
5 | - master
6 | push:
7 | branches:
8 | - master
9 |
10 | permissions:
11 | contents: write
12 | pull-requests: write
13 |
14 | jobs:
15 | test:
16 | name: Test with Pytest
17 | runs-on: ubuntu-latest
18 |
19 | steps:
20 | - name: Checkout
21 | uses: actions/checkout@v1
22 | with:
23 | fetch-depth: 1
24 |
25 | - name: Set Up Python 3.10
26 | uses: "./.github/actions/poetry_setup"
27 | with:
28 | python-version: "3.10"
29 | poetry-version: "1.5.1"
30 | install-command: |
31 | echo "Installing dependencies with poetry..."
32 | poetry install --with test
33 |
34 | - name: Run Test
35 | run:
36 | poetry run pytest --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=codedog tests/ | tee pytest-coverage.txt
37 |
38 | - name: Pytest Coverage Comment
39 | id: coverageComment
40 | uses: MishaKav/pytest-coverage-comment@main
41 | with:
42 | pytest-coverage-path: ./pytest-coverage.txt
43 | junitxml-path: ./pytest.xml
44 | default-branch: master
45 |
46 | - name: Create Badge
47 | if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
48 | uses: schneegans/dynamic-badges-action@v1.6.0
49 | with:
50 | auth: ${{ secrets.GH_TOKEN }}
51 | gistID: ce38dae58995aeffef42065093fcfe84
52 | filename: codedog_master.json
53 | label: Coverage
54 | message: ${{ steps.coverageComment.outputs.coverage }}
55 | color: ${{ steps.coverageComment.outputs.color }}
56 | namedLogo: python
57 |
--------------------------------------------------------------------------------
/.github/workflows/version.yml:
--------------------------------------------------------------------------------
1 | name: Semantic Release
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | update_doc:
8 | name: Generate API Reference Documents.
9 | runs-on: ubuntu-latest
10 |
11 | steps:
12 | - uses: actions/checkout@v1
13 | with:
14 | fetch-depth: 0
15 |
16 | - name: Set up Python 3.10
17 | uses: "./.github/actions/poetry_setup"
18 | with:
19 | python-version: 3.10
20 | poetry-version: "1.5.1"
21 | install-command: |
22 | echo "Installing dependencies with poetry..."
23 | poetry install --with doc
24 |
25 | - name: Generate docs
26 | run: |
27 | rm -rf docs/api
28 | poetry run pdoc codedog \
29 | -o ./docs/api \
30 | -e codedog=https://github.com/codedog-ai/codedog/blob/master/codedog/ \
31 | --favicon https://raw.githubusercontent.com/codedog-ai/codedog/master/docs/assets/favicon.ico \
32 | --logo https://raw.githubusercontent.com/codedog-ai/codedog/master/docs/assets/logo.png \
33 | --logo-link https://codedog.ai \
34 |
35 | - name: Commit & Push changes
36 | uses: actions-js/push@master
37 | with:
38 | github_token: ${{ secrets.GH_TOKEN }}
39 | message : "chore: Update docs"
40 | branch : "master"
41 |
42 | release:
43 | name: Release New Version.
44 | runs-on: ubuntu-latest
45 | concurrency: release
46 | permissions:
47 | id-token: write
48 | contents: write
49 |
50 | steps:
51 | - uses: actions/checkout@v3
52 | with:
53 | fetch-depth: 0
54 |
55 | - name: Set up Python 3.10
56 | uses: "./.github/actions/poetry_setup"
57 | with:
58 | python-version: "3.10"
59 | poetry-version: "1.5.1"
60 | install-command: |
61 | echo "Installing dependencies with poetry..."
62 | poetry install --with dev
63 |
64 | - name: Python Semantic Release
65 | run: |
66 | git config --global user.name "github-actions"
67 | git config --global user.email "action@github.com"
68 | poetry run semantic-release version --changelog --no-commit --no-push --skip-build
69 |
70 | - name: Commit & Push changes
71 | uses: actions-js/push@master
72 | with:
73 | github_token: ${{ secrets.GH_TOKEN }}
74 | message : "chore: release"
75 | branch : "master"
76 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # .vscode files
31 | .vscode/
32 |
33 | # Pycharm
34 | .idea/
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage rerts
47 | htmlcov/
48 | .tox/
49 | .nox/
50 | .coverage
51 | .coverage.*
52 | .cache
53 | nosetests.xml
54 | coverage.xml
55 | *.cover
56 | *.py,cover
57 | .hypothesis/
58 | .pytest_cache/
59 |
60 | # Translations
61 | *.mo
62 | *.pot
63 |
64 | # Django stuff:
65 | *.log
66 | local_settings.py
67 | db.sqlite3
68 | db.sqlite3-journal
69 |
70 | # Flask stuff:
71 | instance/
72 | .webassets-cache
73 |
74 | # Scrapy stuff:
75 | .scrapy
76 |
77 | # Sphinx documentation
78 | docs/_build/
79 |
80 | # PyBuilder
81 | target/
82 |
83 | # Jupyter Notebook
84 | .ipynb_checkpoints
85 |
86 | # IPython
87 | profile_default/
88 | ipython_config.py
89 |
90 | # pyenv
91 | .python-version
92 |
93 | # pipenv
94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
97 | # install all needed dependencies.
98 | #Pipfile.lock
99 |
100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101 | __pypackages__/
102 |
103 | # Celery stuff
104 | celerybeat-schedule
105 | celerybeat.pid
106 |
107 | # SageMath parsed files
108 | *.sage.py
109 |
110 | # Environments
111 | .env
112 | .env.bat
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | myvenv/
120 | tmp/
121 |
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 |
126 | # Rope project settings
127 | .ropeproject
128 |
129 | # mkdocs documentation
130 | /site
131 |
132 | # mypy
133 | .mypy_cache/
134 | .dmypy.json
135 | dmypy.json
136 |
137 | # Pyre type checker
138 | .pyre/
139 |
140 | # macOS .DS_Store files
141 | .DS_Store
142 |
143 | # Generated context prompt file
144 | project_context.prompt
145 |
146 | # Helper script to generate context
147 | _create_context_prompt.py
148 |
149 | # Generated report files
150 | codedog_commit_*.md
151 | codedog_eval_*.md
152 | codedog_pr_*.md
153 | fix.py
154 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Codedog.ai
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/UPDATES.md:
--------------------------------------------------------------------------------
1 | # CodeDog Project Updates
2 |
3 | ## Latest Updates
4 |
5 | ### 1. Improved Scoring System
6 | - Enhanced the scoring system to provide more accurate and comprehensive code evaluations
7 | - Added detailed scoring criteria for each dimension
8 | - Implemented weighted scoring for different aspects of code quality
9 |
10 | ### 2. Evaluation Dimensions
11 | The evaluation now covers the following dimensions:
12 | - Readability: Code clarity and understandability
13 | - Efficiency & Performance: Code execution speed and resource usage
14 | - Security: Code security practices and vulnerability prevention
15 | - Structure & Design: Code organization and architectural design
16 | - Error Handling: Robustness in handling errors and edge cases
17 | - Documentation & Comments: Code documentation quality and completeness
18 | - Code Style: Adherence to coding standards and best practices
19 |
20 | ### 3. Enhanced Error Handling
21 | - Improved timeout handling for API requests
22 | - Added detailed error logging
23 | - Implemented better error recovery mechanisms
24 |
25 | ### 4. Performance Optimizations
26 | - Reduced API call latency
27 | - Optimized memory usage
28 | - Improved concurrent request handling
29 |
30 | ### 5. Documentation Updates
31 | - Added comprehensive API documentation
32 | - Updated user guides
33 | - Improved code examples and tutorials
34 |
35 | ## Running the Project
36 |
37 | ### Environment Setup
38 |
39 | 1. Ensure the .env file is properly configured, especially:
40 | - Platform tokens (GitHub or GitLab)
41 | - LLM API keys (OpenAI, DeepSeek, etc.)
42 | - SMTP server settings (if email notifications are enabled)
43 |
44 | 2. If using Gmail for email notifications:
45 | - Enable two-factor authentication for your Google account
46 | - Generate an app-specific password (https://myaccount.google.com/apppasswords)
47 | - Use the app password in your .env file
48 |
49 | ### Running Commands
50 |
51 | 1. **Evaluate Developer Code**:
52 | ```bash
53 | python run_codedog.py eval "developer_name" --start-date YYYY-MM-DD --end-date YYYY-MM-DD
54 | ```
55 |
56 | 2. **Review PR/MR**:
57 | ```bash
58 | # GitHub PR review
59 | python run_codedog.py pr "repository_name" PR_number
60 |
61 | # GitLab MR review
62 | python run_codedog.py pr "repository_name" MR_number --platform gitlab
63 |
64 | # Self-hosted GitLab instance
65 | python run_codedog.py pr "repository_name" MR_number --platform gitlab --gitlab-url "https://your.gitlab.instance.com"
66 | ```
67 |
68 | 3. **Set up Git Hooks**:
69 | ```bash
70 | python run_codedog.py setup-hooks
71 | ```
72 |
73 | ### Important Notes
74 |
75 | - For large code diffs, you may encounter context length limits. In such cases, consider using `gpt-4-32k` or other models with larger context windows.
76 | - DeepSeek models have specific message format requirements, please ensure to follow the fixes mentioned above.
77 |
78 | ## Future Improvements
79 |
80 | 1. Implement better text chunking and processing for handling large code diffs
81 | 2. Develop more specialized scoring criteria for different file types
82 | 3. Further improve report presentation with visual charts
83 | 4. Deeper integration with CI/CD systems
--------------------------------------------------------------------------------
/codedog/__init__.py:
--------------------------------------------------------------------------------
1 | r"""
2 |
3 | Review your Github/Gitlab PR with ChatGPT
4 |
5 | ## What is codedog?
6 |
7 | Codedog is a code review automation tool benefit the power of LLM (Large Language Model) to help developers
8 | review code faster and more accurately.
9 |
10 | Codedog is based on OpenAI API and Langchain.
11 |
12 | ## Quickstart
13 |
14 | As a example, we will use codedog to review a pull request on Github.
15 |
16 | 0. Install codedog
17 |
18 | ```bash
19 | pip install codedog
20 | ```
21 |
22 | codedog currently only supports python 3.10.
23 |
24 | 1. Get a github pull request
25 | ```python
26 | from github import Github
27 |
28 | github_token="YOUR GITHUB TOKEN"
29 | repository = "codedog-ai/codedog"
30 | pull_request_number = 2
31 |
32 | github = Github(github_token)
33 | retriever = GithubRetriever(github, repository, pull_requeest_number)
34 | ```
35 |
36 |
37 | 2. Summarize the pull request
38 |
39 | Since `PRSummaryChain` uses langchain's output parser, we suggest to use GPT-4 to improve formatting accuracy.
40 |
41 | ```python
42 | from codedog.chains import PRSummaryChain
43 |
44 | openai_api_key = "YOUR OPENAI API KEY WITH GPT4"
45 |
46 | # PR Summary uses output parser
47 | llm35 = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-3.5-turbo")
48 |
49 | llm4 = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-4")
50 |
51 | summary_chain = PRSummaryChain.from_llm(code_summary_llm=llm35, pr_summary_llm=llm4, verbose=True)
52 |
53 | summary = summary_chain({"pull_request": retriever.pull_request}, include_run_info=True)
54 |
55 | print(summary)
56 | ```
57 |
58 | 3. Review each code file changes in the pull request
59 |
60 | ```python
61 | review_chain = CodeReviewChain.from_llm(llm=llm35, verbose=True)
62 |
63 | reviews = review_chain({"pull_request": retriever.pull_request}, include_run_info=True)
64 |
65 | print(reviews)
66 | ```
67 |
68 | 4. Format review result
69 |
70 | Format review result to a markdown report.
71 |
72 | ```python
73 | from codedog.actors.reporters.pull_request import PullRequestReporter
74 |
75 | reporter = PullRequestReporter(
76 | pr_summary=summary["pr_summary"],
77 | code_summaries=summary["code_summaries"],
78 | pull_request=retriever.pull_request,
79 | code_reviews=reviews["code_reviews"],
80 | )
81 |
82 | md_report = reporter.report()
83 |
84 | print(md_report)
85 | ```
86 |
87 | ## Deployment
88 |
89 | We have a simple server demo to deploy codedog as a service with fastapi and handle Github webhook.
90 | Basicly you can also use it with workflow or Github Application.
91 |
92 | see `examples/server.py`
93 |
94 | Note that codedog don't have fastapi and unicorn as dependency, you need to install them manually.
95 | ## Configuration
96 |
97 | Codedog currently load config from environment variables.
98 |
99 | settings:
100 |
101 | | Config Name | Required | Default | Description |
102 | | ------------------------------ | -------- | ----------------- | --------------------------------------- |
103 | | OPENAI_API_KEY | No | | Api Key for calling openai gpt api |
104 | | AZURE_OPENAI | No | | Use azure openai if not blank |
105 | | AZURE_OPENAI_API_KEY | No | | Azure openai api key |
106 | | AZURE_OPENAI_API_BASE | No | | Azure openai api base |
107 | | AZURE_OPENAI_DEPLOYMENT_ID | No | | Azure openai deployment id for gpt 3.5 |
108 | | AZURE_OPENAI_GPT4_DEPLOYMENT_ID| No | | Azure openai deployment id for gpt 4 |
109 |
110 | """
111 | # flake8: noqa
112 | from codedog.actors.reporters.pull_request import PullRequestReporter
113 | from codedog.chains.code_review.base import CodeReviewChain
114 | from codedog.chains.pr_summary.base import PRSummaryChain
115 | from codedog.version import VERSION
116 |
117 | __version__ = VERSION
118 |
--------------------------------------------------------------------------------
/codedog/actors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codedog-ai/codedog/a1702a4ab933c7c071318e688a5a3b6948b423d0/codedog/actors/__init__.py
--------------------------------------------------------------------------------
/codedog/actors/base.py:
--------------------------------------------------------------------------------
1 | class Actor:
2 | pass
3 |
--------------------------------------------------------------------------------
/codedog/actors/reporters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codedog-ai/codedog/a1702a4ab933c7c071318e688a5a3b6948b423d0/codedog/actors/reporters/__init__.py
--------------------------------------------------------------------------------
/codedog/actors/reporters/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | from codedog.actors.base import Actor
4 |
5 |
6 | class Reporter(Actor, ABC):
7 | @abstractmethod
8 | def report(self) -> str:
9 | """Generate report content text."""
10 |
--------------------------------------------------------------------------------
/codedog/actors/reporters/pr_summary.py:
--------------------------------------------------------------------------------
1 | from codedog.actors.reporters.base import Reporter
2 | from codedog.localization import Localization
3 | from codedog.models import ChangeSummary, PRSummary, PullRequest
4 | from codedog.processors import PullRequestProcessor
5 | from codedog.templates import template_en
6 |
7 |
8 | class PRSummaryMarkdownReporter(Reporter, Localization):
9 | pr_processor = PullRequestProcessor()
10 |
11 | def __init__(
12 | self,
13 | pr_summary: PRSummary,
14 | code_summaries: list[ChangeSummary],
15 | pull_request: PullRequest,
16 | language="en",
17 | ):
18 | self._pr_summary: PRSummary = pr_summary
19 | self._code_summaries: dict[str, ChangeSummary] = {
20 | summary.full_name: summary for summary in code_summaries
21 | }
22 | self._pull_request: PullRequest = pull_request
23 | self._markdown: str = ""
24 |
25 | super().__init__(language=language)
26 |
27 | def report(self) -> str:
28 | if not self._markdown:
29 | self._markdown = self._generate_markdown()
30 |
31 | return self._markdown
32 |
33 | def _generate_markdown(self) -> str:
34 | results = self.template.REPORT_PR_SUMMARY.format(
35 | overview=self._generate_pr_overview(),
36 | change_overview=self._generate_change_overivew(),
37 | file_changes=self._generate_file_changes(),
38 | )
39 | return results
40 |
41 | def _generate_pr_overview(self) -> str:
42 | return template_en.REPORT_PR_SUMMARY_OVERVIEW.format(
43 | type_desc=self.template.REPORT_PR_TYPE_DESC_MAPPING[
44 | self._pr_summary.pr_type
45 | ],
46 | overview=self._pr_summary.overview,
47 | )
48 |
49 | def _generate_change_overivew(self) -> str:
50 | return self.pr_processor.gen_material_change_files(
51 | self._pull_request.change_files
52 | )
53 |
54 | def _generate_file_changes(self) -> str:
55 | major_changes = []
56 | secondary_changes = []
57 |
58 | major_files = set(self._pr_summary.major_files)
59 | self._pull_request.change_files
60 | for change_file in self._pull_request.change_files:
61 | if change_file.full_name not in self._code_summaries:
62 | continue
63 |
64 | curr_report = self.template.REPORT_CHANGE_OVERVIEW.format(
65 | name=change_file.name,
66 | url=change_file.diff_url,
67 | full_name=change_file.full_name,
68 | content=self._code_summaries[change_file.full_name].summary.replace(
69 | "\n", "\t"
70 | ), # markdown table content is single line.
71 | )
72 |
73 | _target_changes = (
74 | major_changes
75 | if change_file.full_name in major_files
76 | else secondary_changes
77 | )
78 | _target_changes.append(curr_report)
79 |
80 | major_change_report = (
81 | self.template.REPORT_FILE_CHANGES_MAJOR.format(
82 | major_changes="\n".join(major_changes)
83 | )
84 | if major_changes
85 | else ""
86 | )
87 | secondary_change_report = (
88 | self.template.REPORT_FILE_CHANGES.format(
89 | changes="\n".join(secondary_changes)
90 | )
91 | if secondary_changes
92 | else ""
93 | )
94 |
95 | return f"{major_change_report}\n{secondary_change_report}\n"
96 |
--------------------------------------------------------------------------------
/codedog/actors/reporters/pull_request.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from typing import Any, Dict, List, Optional
3 |
4 | from codedog.actors.reporters.base import Reporter
5 | from codedog.actors.reporters.code_review import CodeReviewMarkdownReporter
6 | from codedog.actors.reporters.pr_summary import PRSummaryMarkdownReporter
7 | from codedog.localization import Localization
8 | from codedog.models import ChangeSummary, CodeReview, PRSummary, PullRequest
9 | from codedog.version import PROJECT, VERSION
10 |
11 |
12 | class PullRequestReporter(Reporter, Localization):
13 | def __init__(
14 | self,
15 | pr_summary: PRSummary,
16 | code_summaries: list[ChangeSummary],
17 | pull_request: PullRequest,
18 | code_reviews: List[CodeReview],
19 | telemetry: Optional[Dict[str, Any]] = None,
20 | language="en",
21 | ):
22 | self._pr_summary = pr_summary
23 | self._code_summaries = code_summaries
24 | self._pull_request = pull_request
25 | self._code_reviews = code_reviews
26 | self._telemetry = telemetry if telemetry else {}
27 | super().__init__(language=language)
28 |
29 | def report(self) -> str:
30 | telemetry = (
31 | self.template.REPORT_TELEMETRY.format(
32 | start_time=datetime.datetime.fromtimestamp(self._telemetry["start_time"]).strftime("%Y-%m-%d %H:%M:%S"),
33 | time_usage=self._telemetry["time_usage"],
34 | cost=self._telemetry["cost"],
35 | tokens=self._telemetry["tokens"],
36 | )
37 | if self._telemetry
38 | else ""
39 | )
40 | pr_report = PRSummaryMarkdownReporter(
41 | pr_summary=self._pr_summary,
42 | code_summaries=self._code_summaries,
43 | pull_request=self._pull_request,
44 | language=self.language,
45 | ).report()
46 | cr_report = CodeReviewMarkdownReporter(self._code_reviews, self.language).report()
47 |
48 | return self.template.REPORT_PR_REVIEW.format(
49 | repo_name=self._pull_request.repository_name,
50 | pr_number=self._pull_request.pull_request_number,
51 | pr_name=self._pull_request.title,
52 | url=self._pull_request.url,
53 | project=PROJECT,
54 | version=VERSION,
55 | telemetry=telemetry,
56 | pr_report=pr_report,
57 | cr_report=cr_report,
58 | )
59 |
--------------------------------------------------------------------------------
/codedog/analysis_results_20250424_095117.json:
--------------------------------------------------------------------------------
1 | {
2 | "summary": {
3 | "total_commits": 0,
4 | "total_files": 0,
5 | "total_additions": 0,
6 | "total_deletions": 0,
7 | "files_changed": []
8 | },
9 | "commits": [],
10 | "file_diffs": {}
11 | }
--------------------------------------------------------------------------------
/codedog/analyze_code.py:
--------------------------------------------------------------------------------
1 | """
2 | Code analysis module for GitHub and GitLab repositories.
3 | Provides functionality to analyze code changes and generate reports.
4 | """
5 |
6 | from datetime import datetime, timedelta
7 | import json
8 | from pathlib import Path
9 | from utils.remote_repository_analyzer import RemoteRepositoryAnalyzer
10 |
11 | def format_commit_for_json(commit):
12 | """Format commit data for JSON serialization."""
13 | return {
14 | 'hash': commit.hash,
15 | 'author': commit.author,
16 | 'date': commit.date.isoformat(),
17 | 'message': commit.message,
18 | 'files': commit.files,
19 | 'added_lines': commit.added_lines,
20 | 'deleted_lines': commit.deleted_lines,
21 | 'effective_lines': commit.effective_lines
22 | }
23 |
24 | def save_analysis_results(output_path, commits, file_diffs, stats, show_diffs=False):
25 | """
26 | Save analysis results to a JSON file.
27 | Args:
28 | output_path: Path where to save the JSON file
29 | commits: List of commit objects
30 | file_diffs: Dictionary of file diffs
31 | stats: Dictionary containing analysis statistics
32 | show_diffs: Whether to include file diffs in the output
33 | """
34 | results = {
35 | 'summary': {
36 | 'total_commits': stats['total_commits'],
37 | 'total_files': len(stats['files_changed']),
38 | 'total_additions': stats['total_additions'],
39 | 'total_deletions': stats['total_deletions'],
40 | 'files_changed': sorted(stats['files_changed'])
41 | },
42 | 'commits': [format_commit_for_json(commit) for commit in commits]
43 | }
44 |
45 | if show_diffs:
46 | results['file_diffs'] = file_diffs
47 |
48 | output_path = Path(output_path)
49 | output_path.parent.mkdir(parents=True, exist_ok=True)
50 |
51 | with open(output_path, 'w', encoding='utf-8') as f:
52 | json.dump(results, f, indent=2, ensure_ascii=False)
53 |
54 | def analyze_repository(repo_url, author, days=7, include=None, exclude=None, token=None):
55 | """
56 | Analyze a Git repository and return the analysis results.
57 |
58 | Args:
59 | repo_url: URL of the repository to analyze
60 | author: Author name or email to filter commits
61 | days: Number of days to look back (default: 7)
62 | include: List of file extensions to include
63 | exclude: List of file extensions to exclude
64 | token: GitHub/GitLab access token
65 |
66 | Returns:
67 | Tuple of (commits, file_diffs, stats)
68 | """
69 | end_date = datetime.now()
70 | start_date = end_date - timedelta(days=days)
71 |
72 | analyzer = RemoteRepositoryAnalyzer(repo_url, token)
73 |
74 | return analyzer.get_file_diffs_by_timeframe(
75 | author=author,
76 | start_date=start_date,
77 | end_date=end_date,
78 | include_extensions=include,
79 | exclude_extensions=exclude
80 | )
--------------------------------------------------------------------------------
/codedog/chains/__init__.py:
--------------------------------------------------------------------------------
1 | from codedog.chains.code_review.base import CodeReviewChain
2 | from codedog.chains.pr_summary.base import PRSummaryChain
3 | from codedog.chains.pr_summary.translate_pr_summary_chain import TranslatePRSummaryChain
4 |
5 | __all__ = ["PRSummaryChain", "CodeReviewChain", "TranslatePRSummaryChain"]
6 |
--------------------------------------------------------------------------------
/codedog/chains/code_review/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codedog-ai/codedog/a1702a4ab933c7c071318e688a5a3b6948b423d0/codedog/chains/code_review/__init__.py
--------------------------------------------------------------------------------
/codedog/chains/code_review/base.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from itertools import zip_longest
4 | from typing import Any, Dict, List, Optional
5 |
6 | from langchain_core.language_models import BaseLanguageModel
7 | from langchain_core.callbacks.manager import (
8 | AsyncCallbackManagerForChainRun,
9 | CallbackManagerForChainRun,
10 | )
11 | from langchain.chains import LLMChain
12 | from langchain.chains.base import Chain
13 | from langchain_core.prompts import BasePromptTemplate
14 | from pydantic import Field
15 |
16 | from codedog.chains.code_review.prompts import CODE_REVIEW_PROMPT
17 | from codedog.models import ChangeFile, CodeReview, PullRequest
18 | from codedog.processors import PullRequestProcessor
19 | from codedog.processors.pull_request_processor import SUFFIX_LANGUAGE_MAPPING
20 |
21 |
22 | class CodeReviewChain(Chain):
23 | chain: LLMChain = Field(exclude=True)
24 | """Chain to use to review code change."""
25 | processor: PullRequestProcessor = Field(
26 | exclude=True, default_factory=PullRequestProcessor.build
27 | )
28 | """PR data process."""
29 | _input_keys: List[str] = ["pull_request"]
30 | _output_keys: List[str] = ["code_reviews"]
31 |
32 | @property
33 | def _chain_type(self) -> str:
34 | return "pull_request_code_review_chain"
35 |
36 | @property
37 | def input_keys(self) -> List[str]:
38 | """Will be whatever keys the prompt expects.
39 |
40 | :meta private:
41 | """
42 | return self._input_keys
43 |
44 | @property
45 | def output_keys(self) -> List[str]:
46 | """Will always return text key.
47 |
48 | :meta private:
49 | """
50 | return self._output_keys
51 |
52 | def _call(
53 | self,
54 | inputs: Dict[str, Any],
55 | run_manager: Optional[CallbackManagerForChainRun] = None,
56 | ) -> Dict[str, Any]:
57 | _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
58 | _run_manager.on_text(inputs["pull_request"].json() + "\n")
59 |
60 | pr: PullRequest = inputs["pull_request"]
61 | code_files: List[ChangeFile] = self.processor.get_diff_code_files(pr)
62 |
63 | code_review_inputs = self._process_code_review_inputs(code_files)
64 | code_review_outputs = (
65 | self.chain.apply(
66 | code_review_inputs, callbacks=_run_manager.get_child(tag="CodeReview")
67 | )
68 | if code_review_inputs
69 | else []
70 | )
71 |
72 | return self._process_result(code_files, code_review_outputs)
73 |
74 | async def _acall(
75 | self,
76 | inputs: Dict[str, Any],
77 | run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
78 | ) -> Dict[str, Any]:
79 | _run_manager = run_manager or AsyncCallbackManagerForChainRun.get_noop_manager()
80 | await _run_manager.on_text(inputs["pull_request"].json() + "\n")
81 |
82 | pr: PullRequest = inputs["pull_request"]
83 | code_files: List[ChangeFile] = self.processor.get_diff_code_files(pr)
84 |
85 | code_review_inputs = self._process_code_review_inputs(code_files)
86 | code_review_outputs = (
87 | await self.chain.aapply(
88 | code_review_inputs, callbacks=_run_manager.get_child(tag="CodeReview")
89 | )
90 | if code_review_inputs
91 | else []
92 | )
93 |
94 | return await self._aprocess_result(code_files, code_review_outputs)
95 |
96 | def _process_code_review_inputs(
97 | self,
98 | code_files: List[ChangeFile],
99 | ) -> List[Dict[str, str]]:
100 | input_data = []
101 | for code_file in code_files:
102 | input_item = {
103 | "content": code_file.diff_content.content[
104 | :4000
105 | ], # TODO: handle long diff with summarize chain
106 | "name": code_file.full_name,
107 | "language": SUFFIX_LANGUAGE_MAPPING.get(code_file.suffix, ""),
108 | }
109 | input_data.append(input_item)
110 |
111 | return input_data
112 |
113 | def _process_result(self, code_files: List[ChangeFile], code_review_outputs: List):
114 | code_reviews = []
115 | for i, o in zip_longest(code_files, code_review_outputs):
116 | code_reviews.append(CodeReview(file=i, review=o["text"]))
117 | return {"code_reviews": code_reviews}
118 |
119 | async def _aprocess_result(
120 | self, code_files: List[ChangeFile], code_review_outputs: List
121 | ):
122 | code_reviews = []
123 | for i, o in zip_longest(code_files, code_review_outputs):
124 | code_reviews.append(CodeReview(file=i, review=o["text"]))
125 | return {"code_reviews": code_reviews}
126 |
127 | @classmethod
128 | def from_llm(
129 | cls,
130 | *,
131 | llm: BaseLanguageModel,
132 | prompt: BasePromptTemplate = CODE_REVIEW_PROMPT,
133 | **kwargs,
134 | ) -> CodeReviewChain:
135 | return cls(
136 | chain=LLMChain(llm=llm, prompt=prompt, **kwargs),
137 | processor=PullRequestProcessor(),
138 | )
139 |
--------------------------------------------------------------------------------
/codedog/chains/code_review/prompts.py:
--------------------------------------------------------------------------------
1 | # TODO: Localization
2 | from langchain_core.prompts import PromptTemplate
3 |
4 | from codedog.templates import grimoire_en
5 |
6 | CODE_REVIEW_PROMPT = PromptTemplate(
7 | template=grimoire_en.CODE_SUGGESTION,
8 | input_variables=["name", "language", "content"],
9 | )
10 |
--------------------------------------------------------------------------------
/codedog/chains/code_review/translate_code_review_chain.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from itertools import zip_longest
4 | from typing import List
5 |
6 | from langchain_core.language_models import BaseLanguageModel
7 | from langchain.chains import LLMChain
8 | from langchain_core.prompts import BasePromptTemplate
9 | from pydantic import Field
10 |
11 | from codedog.chains.code_review.base import CodeReviewChain
12 | from codedog.chains.code_review.prompts import CODE_REVIEW_PROMPT
13 | from codedog.chains.prompts import TRANSLATE_PROMPT
14 | from codedog.models import ChangeFile, CodeReview
15 | from codedog.processors.pull_request_processor import PullRequestProcessor
16 |
17 |
18 | class TranslateCodeReviewChain(CodeReviewChain):
19 | # TODO: use multiple parent classes to avoid code duplication. Not sure how to do this with pydantic.
20 |
21 | language: str = Field()
22 | """The language you want to translate into.
23 |
24 | Note that default review result is usually in English. If language is set to english it will also call llm
25 | """
26 | translate_chain: LLMChain = Field(exclude=True)
27 | """Chain to use to translate code review result."""
28 |
29 | @classmethod
30 | def from_llm(
31 | cls,
32 | *,
33 | language: str,
34 | llm: BaseLanguageModel,
35 | translate_llm: BaseLanguageModel,
36 | prompt: BasePromptTemplate = CODE_REVIEW_PROMPT,
37 | translate_prompt: BasePromptTemplate = TRANSLATE_PROMPT,
38 | **kwargs,
39 | ) -> CodeReviewChain:
40 | return cls(
41 | language=language,
42 | chain=LLMChain(llm=llm, prompt=prompt, **kwargs),
43 | translate_chain=LLMChain(
44 | llm=translate_llm, prompt=translate_prompt, **kwargs
45 | ),
46 | processor=PullRequestProcessor(),
47 | )
48 |
49 | def _process_result(self, code_files: List[ChangeFile], code_review_outputs: List):
50 | code_reviews = []
51 | for i, o in zip_longest(code_files, code_review_outputs):
52 | code_reviews.append(CodeReview(file=i, review=o["text"]))
53 |
54 | code_reviews = self._translate(code_reviews)
55 | return {"code_reviews": code_reviews}
56 |
57 | async def _aprocess_result(
58 | self, code_files: List[ChangeFile], code_review_outputs: List
59 | ):
60 | code_reviews = []
61 | for i, o in zip_longest(code_files, code_review_outputs):
62 | code_reviews.append(CodeReview(file=i, review=o["text"]))
63 |
64 | code_reviews = await self._atranslate(code_reviews)
65 | return {"code_reviews": code_reviews}
66 |
67 | def _translate(self, code_reviews: List[CodeReview]) -> List[CodeReview]:
68 | data = [
69 | {
70 | "language": self.language,
71 | "description": "Suggestion for a changed file",
72 | "content": cr.review,
73 | }
74 | for cr in code_reviews
75 | if cr.review != ""
76 | ]
77 | response = self.translate_chain.apply(data) if data else []
78 |
79 | for cr, r in zip_longest(code_reviews, response):
80 | if not cr or not r:
81 | break
82 |
83 | cr.review = r["text"]
84 | return code_reviews
85 |
86 | async def _atranslate(self, code_reviews: List[CodeReview]) -> List[CodeReview]:
87 | data = [
88 | {
89 | "language": self.language,
90 | "description": "Suggestion for a changed file",
91 | "content": cr.review,
92 | }
93 | for cr in code_reviews
94 | if cr.review != ""
95 | ]
96 | response = await self.translate_chain.aapply(data) if data else []
97 |
98 | for cr, r in zip_longest(code_reviews, response):
99 | if not cr or not r:
100 | break
101 |
102 | cr.review = r["text"]
103 | return code_reviews
104 |
--------------------------------------------------------------------------------
/codedog/chains/pr_summary/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codedog-ai/codedog/a1702a4ab933c7c071318e688a5a3b6948b423d0/codedog/chains/pr_summary/__init__.py
--------------------------------------------------------------------------------
/codedog/chains/pr_summary/base.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Any, Dict, List, Optional
4 | import logging
5 |
6 | from langchain_core.language_models import BaseLanguageModel
7 | from langchain_core.callbacks.manager import (
8 | AsyncCallbackManagerForChainRun,
9 | CallbackManagerForChainRun,
10 | )
11 | from langchain.chains import LLMChain
12 | from langchain.chains.base import Chain
13 | from langchain.output_parsers import OutputFixingParser, PydanticOutputParser
14 | from langchain_core.output_parsers import BaseOutputParser
15 | from langchain_core.prompts import BasePromptTemplate
16 | from pydantic import Field, BaseModel, ConfigDict
17 |
18 | from codedog.chains.pr_summary.prompts import CODE_SUMMARY_PROMPT, PR_SUMMARY_PROMPT
19 | from codedog.models import ChangeSummary, PRSummary, PullRequest
20 | from codedog.processors.pull_request_processor import (
21 | SUFFIX_LANGUAGE_MAPPING,
22 | PullRequestProcessor,
23 | )
24 |
25 | processor = PullRequestProcessor.build()
26 |
27 |
28 | class PRSummaryChain(Chain):
29 | """Summarize a pull request.
30 |
31 | Inputs are:
32 | - pull_request(PullRequest): a pull request object
33 |
34 | Outputs are:
35 | - pr_summary(PRSummary): summary of pull request.
36 | - code_summaries(Dict[str, str]): changed code file summarizations, key is file path.
37 | """
38 |
39 | code_summary_chain: LLMChain = Field(exclude=True)
40 | """Chain to use to summarize code change."""
41 | pr_summary_chain: LLMChain = Field(exclude=True)
42 | """Chain to use to summarize PR."""
43 |
44 | parser: BaseOutputParser = Field(exclude=True)
45 | """Parse pr summarized result to PRSummary object."""
46 |
47 | _input_keys: List[str] = ["pull_request"]
48 | _output_keys: List[str] = ["pr_summary", "code_summaries"]
49 |
50 | model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
51 |
52 | @property
53 | def _chain_type(self) -> str:
54 | return "pull_request_summary_chain"
55 |
56 | @property
57 | def input_keys(self) -> List[str]:
58 | """Will be whatever keys the prompt expects.
59 |
60 | :meta private:
61 | """
62 | return self._input_keys
63 |
64 | @property
65 | def output_keys(self) -> List[str]:
66 | """Will always return text key.
67 |
68 | :meta private:
69 | """
70 | return self._output_keys
71 |
72 | def review(self, inputs, _run_manager) -> Dict[str, Any]:
73 | pr: PullRequest = inputs["pull_request"]
74 |
75 | code_summary_inputs = self._process_code_summary_inputs(pr)
76 | code_summary_outputs = (
77 | self.code_summary_chain.apply(
78 | code_summary_inputs, callbacks=_run_manager.get_child(tag="CodeSummary")
79 | )
80 | if code_summary_inputs
81 | else []
82 | )
83 |
84 | code_summaries = processor.build_change_summaries(
85 | code_summary_inputs, code_summary_outputs
86 | )
87 |
88 | pr_summary_input = self._process_pr_summary_input(pr, code_summaries)
89 | pr_summary_output = self.pr_summary_chain(
90 | pr_summary_input, callbacks=_run_manager.get_child(tag="PRSummary")
91 | )
92 |
93 | return self._process_result(pr_summary_output, code_summaries)
94 |
95 | async def areview(self, inputs, _run_manager) -> Dict[str, Any]:
96 | pr: PullRequest = inputs["pull_request"]
97 |
98 | code_summary_inputs = self._process_code_summary_inputs(pr)
99 | code_summary_outputs = (
100 | await self.code_summary_chain.aapply(
101 | code_summary_inputs, callbacks=_run_manager.get_child()
102 | )
103 | if code_summary_inputs
104 | else []
105 | )
106 |
107 | code_summaries = processor.build_change_summaries(
108 | code_summary_inputs, code_summary_outputs
109 | )
110 |
111 | pr_summary_input = self._process_pr_summary_input(pr, code_summaries)
112 | pr_summary_output = await self.pr_summary_chain.ainvoke(
113 | pr_summary_input, callbacks=_run_manager.get_child()
114 | )
115 |
116 | return await self._aprocess_result(pr_summary_output, code_summaries)
117 |
118 | def _call(
119 | self,
120 | inputs: Dict[str, Any],
121 | run_manager: Optional[CallbackManagerForChainRun] = None,
122 | ) -> Dict[str, Any]:
123 | _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
124 | _run_manager.on_text(inputs["pull_request"].json() + "\n")
125 |
126 | return self.review(inputs, _run_manager)
127 |
128 | async def _acall(
129 | self,
130 | inputs: Dict[str, Any],
131 | run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
132 | ) -> Dict[str, Any]:
133 | _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
134 | await _run_manager.on_text(inputs["pull_request"].json() + "\n")
135 |
136 | return await self.areview(inputs, _run_manager)
137 |
138 | def _process_code_summary_inputs(self, pr: PullRequest) -> List[Dict[str, str]]:
139 | input_data = []
140 | code_files = processor.get_diff_code_files(pr)
141 | for code_file in code_files:
142 | input_item = {
143 | "content": code_file.diff_content.content[
144 | :2000
145 | ], # TODO: handle long diff
146 | "name": code_file.full_name,
147 | "language": SUFFIX_LANGUAGE_MAPPING.get(code_file.suffix, ""),
148 | }
149 | input_data.append(input_item)
150 |
151 | return input_data
152 |
153 | def _process_pr_summary_input(
154 | self, pr: PullRequest, code_summaries: List[ChangeSummary]
155 | ) -> Dict[str, str]:
156 | change_files_material: str = processor.gen_material_change_files(
157 | pr.change_files
158 | )
159 | code_summaries_material = processor.gen_material_code_summaries(code_summaries)
160 | pr_metadata_material = processor.gen_material_pr_metadata(pr)
161 | return {
162 | "change_files": change_files_material,
163 | "code_summaries": code_summaries_material,
164 | "metadata": pr_metadata_material,
165 | }
166 |
167 | def _process_result(
168 | self, pr_summary_output: Dict[str, Any], code_summaries: List[ChangeSummary]
169 | ) -> Dict[str, Any]:
170 | return {
171 | "pr_summary": pr_summary_output["text"],
172 | "code_summaries": code_summaries,
173 | }
174 |
175 | async def _aprocess_result(
176 | self, pr_summary_output: Dict[str, Any], code_summaries: List[ChangeSummary]
177 | ) -> Dict[str, Any]:
178 | raw_output_text = pr_summary_output.get("text", "[No text found in output]")
179 | logging.warning(f"Raw LLM output for PR Summary: {raw_output_text}")
180 | return {
181 | "pr_summary": raw_output_text,
182 | "code_summaries": code_summaries,
183 | }
184 |
185 | @classmethod
186 | def from_llm(
187 | cls,
188 | code_summary_llm: BaseLanguageModel,
189 | pr_summary_llm: BaseLanguageModel,
190 | code_summary_prompt: BasePromptTemplate = CODE_SUMMARY_PROMPT,
191 | pr_summary_prompt: BasePromptTemplate = PR_SUMMARY_PROMPT,
192 | **kwargs,
193 | ) -> PRSummaryChain:
194 | parser = OutputFixingParser.from_llm(
195 | llm=pr_summary_llm, parser=PydanticOutputParser(pydantic_object=PRSummary)
196 | )
197 | code_summary_chain = LLMChain(llm=code_summary_llm, prompt=code_summary_prompt)
198 | pr_summary_chain = LLMChain(
199 | llm=pr_summary_llm, prompt=pr_summary_prompt, output_parser=parser
200 | )
201 | return cls(
202 | code_summary_chain=code_summary_chain,
203 | pr_summary_chain=pr_summary_chain,
204 | parser=parser,
205 | **kwargs,
206 | )
207 |
--------------------------------------------------------------------------------
/codedog/chains/pr_summary/prompts.py:
--------------------------------------------------------------------------------
1 | from langchain.output_parsers import PydanticOutputParser
2 | from langchain_core.prompts import PromptTemplate
3 |
4 | from codedog.models import PRSummary
5 | from codedog.templates import grimoire_en
6 |
7 | parser = PydanticOutputParser(pydantic_object=PRSummary)
8 |
9 | PR_SUMMARY_PROMPT = PromptTemplate(
10 | template=grimoire_en.PR_SUMMARY,
11 | input_variables=["metadata", "change_files", "code_summaries"],
12 | partial_variables={"format_instructions": parser.get_format_instructions()},
13 | )
14 | CODE_SUMMARY_PROMPT = PromptTemplate(
15 | template=grimoire_en.CODE_SUMMARY, input_variables=["name", "language", "content"]
16 | )
17 |
--------------------------------------------------------------------------------
/codedog/chains/pr_summary/translate_pr_summary_chain.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from itertools import zip_longest
4 | from typing import Any, Dict, List
5 |
6 | from langchain_core.language_models import BaseLanguageModel
7 | from langchain.chains import LLMChain
8 | from langchain.output_parsers import OutputFixingParser, PydanticOutputParser
9 | from langchain_core.prompts import BasePromptTemplate
10 | from pydantic import Field
11 |
12 | from codedog.chains.pr_summary.base import PRSummaryChain
13 | from codedog.chains.pr_summary.prompts import CODE_SUMMARY_PROMPT, PR_SUMMARY_PROMPT
14 | from codedog.chains.prompts import TRANSLATE_PROMPT
15 | from codedog.models import ChangeSummary, PRSummary
16 |
17 |
18 | class TranslatePRSummaryChain(PRSummaryChain):
19 | language: str = Field()
20 | """The language you want to translate into.
21 |
22 | Note that default review result is usually in English. If language is set to english it will also call llm
23 | """
24 |
25 | translate_chain: LLMChain = Field(exclude=True)
26 | """Chain to use to translate summary result."""
27 |
28 | @classmethod
29 | def from_llm(
30 | cls,
31 | language: str,
32 | code_summary_llm: BaseLanguageModel,
33 | pr_summary_llm: BaseLanguageModel,
34 | translate_llm: BaseLanguageModel,
35 | code_summary_prompt: BasePromptTemplate = CODE_SUMMARY_PROMPT,
36 | pr_summary_prompt: BasePromptTemplate = PR_SUMMARY_PROMPT,
37 | translate_prompt: BasePromptTemplate = TRANSLATE_PROMPT,
38 | **kwargs,
39 | ) -> PRSummaryChain:
40 | parser = OutputFixingParser.from_llm(
41 | llm=pr_summary_llm, parser=PydanticOutputParser(pydantic_object=PRSummary)
42 | )
43 | code_summary_chain = LLMChain(llm=code_summary_llm, prompt=code_summary_prompt)
44 | pr_summary_chain = LLMChain(
45 | llm=pr_summary_llm, prompt=pr_summary_prompt, output_parser=parser
46 | )
47 | translate_chain = LLMChain(llm=translate_llm, prompt=translate_prompt)
48 |
49 | return cls(
50 | language=language,
51 | code_summary_chain=code_summary_chain,
52 | pr_summary_chain=pr_summary_chain,
53 | translate_chain=translate_chain,
54 | parser=parser,
55 | **kwargs,
56 | )
57 |
58 | def _process_result(
59 | self, pr_summary_output: Dict[str, Any], code_summaries: List[ChangeSummary]
60 | ) -> Dict[str, Any]:
61 | summary: PRSummary = pr_summary_output["text"]
62 |
63 | if self.language:
64 | summary = self._translate_summary(summary=summary)
65 | code_summaries = self._translate_code_summaries(
66 | code_summaries=code_summaries
67 | )
68 |
69 | return {
70 | "pr_summary": summary,
71 | "code_summaries": code_summaries,
72 | }
73 |
74 | async def _aprocess_result(
75 | self, pr_summary_output: Dict[str, Any], code_summaries: List[ChangeSummary]
76 | ) -> Dict[str, Any]:
77 | summary: PRSummary = pr_summary_output["text"]
78 |
79 | if self.language:
80 | summary = await self._atranslate_summary(summary=summary)
81 | code_summaries = await self._atranslate_code_summaries(
82 | code_summaries=code_summaries
83 | )
84 |
85 | return {
86 | "pr_summary": summary,
87 | "code_summaries": code_summaries,
88 | }
89 |
90 | def _translate_summary(self, summary: PRSummary) -> PRSummary:
91 | response = self.translate_chain(
92 | {"language": self.language, "description": "", "content": summary.overview}
93 | )
94 | summary.overview = response["text"]
95 |
96 | return summary
97 |
98 | def _translate_code_summaries(
99 | self, code_summaries: List[ChangeSummary]
100 | ) -> List[ChangeSummary]:
101 | data = [
102 | {
103 | "language": self.language,
104 | "description": "Changed file brief summary (must in single line!).",
105 | "content": cs.summary,
106 | }
107 | for cs in code_summaries
108 | if cs.summary != ""
109 | ]
110 | response = self.translate_chain.apply(data) if data else []
111 |
112 | for cs, r in zip_longest(code_summaries, response):
113 | if not cs or not r:
114 | break
115 |
116 | cs.summary = r["text"]
117 | return code_summaries
118 |
119 | async def _atranslate_summary(self, summary: PRSummary) -> PRSummary:
120 | response = await self.translate_chain.ainvoke(
121 | {
122 | "language": self.language,
123 | "description": "Changed file brief summary (must in single line!).",
124 | "content": summary.overview,
125 | }
126 | )
127 |
128 | summary.overview = response["text"]
129 |
130 | return summary
131 |
132 | async def _atranslate_code_summaries(
133 | self, code_summaries: List[ChangeSummary]
134 | ) -> List[ChangeSummary]:
135 | data = [
136 | {
137 | "language": self.language,
138 | "description": "Changed file brief summary.",
139 | "content": cs.summary,
140 | }
141 | for cs in code_summaries
142 | if cs.summary != ""
143 | ]
144 | response = await self.translate_chain.aapply(data) if data else []
145 |
146 | for cs, r in zip_longest(code_summaries, response):
147 | if not cs or not r:
148 | break
149 |
150 | cs.summary = r["text"]
151 | return code_summaries
152 |
--------------------------------------------------------------------------------
/codedog/chains/prompts.py:
--------------------------------------------------------------------------------
1 | from langchain_core.prompts import PromptTemplate
2 |
3 | from codedog.templates import grimoire_en
4 |
5 | TRANSLATE_PROMPT = PromptTemplate(
6 | template=grimoire_en.TRANSLATE_PR_REVIEW,
7 | input_variables=["language", "description", "content"],
8 | )
9 |
--------------------------------------------------------------------------------
/codedog/localization.py:
--------------------------------------------------------------------------------
1 | from codedog.templates import grimoire_en, grimoire_cn, template_cn, template_en
2 |
3 |
4 | class Localization:
5 | templates = {
6 | "en": template_en,
7 | "cn": template_cn,
8 | }
9 |
10 | grimoires = {
11 | "en": grimoire_en,
12 | "cn": grimoire_cn,
13 | }
14 |
15 | def __init__(self, language="en"):
16 | if language not in self.templates or language not in self.grimoires:
17 | raise ValueError(f"Unsupported Language: {language}")
18 | self._language = language
19 |
20 | @property
21 | def language(self):
22 | return self._language
23 |
24 | @property
25 | def template(self):
26 | return self.templates[self.language]
27 |
28 | @property
29 | def grimoire(self):
30 | return self.grimoires[self.language]
31 |
--------------------------------------------------------------------------------
/codedog/models/__init__.py:
--------------------------------------------------------------------------------
1 | from codedog.models.blob import Blob
2 | from codedog.models.change_file import ChangeFile, ChangeStatus
3 | from codedog.models.change_summary import ChangeSummary
4 | from codedog.models.code_review import CodeReview
5 | from codedog.models.commit import Commit
6 | from codedog.models.diff import DiffContent, DiffSegment
7 | from codedog.models.issue import Issue
8 | from codedog.models.pr_summary import PRSummary, PRType
9 | from codedog.models.pull_request import PullRequest
10 | from codedog.models.repository import Repository
11 |
12 | __all__ = [
13 | "Blob",
14 | "ChangeFile",
15 | "ChangeStatus",
16 | "ChangeSummary",
17 | "CodeReview",
18 | "Commit",
19 | "DiffContent",
20 | "DiffSegment",
21 | "Issue",
22 | "PRSummary",
23 | "PRType",
24 | "PullRequest",
25 | "Repository",
26 | ]
27 |
--------------------------------------------------------------------------------
/codedog/models/blob.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel, Field
2 |
3 |
4 | class Blob(BaseModel):
5 | """Git blob object."""
6 |
7 | blob_id: int = Field()
8 | """Blob id. Converted from sha."""
9 | sha: str = Field()
10 | """Blob sha."""
11 | content: str = Field()
12 | """Blob content."""
13 | encoding: str = Field()
14 | """Blob content encoding."""
15 | size: int = Field()
16 | """Blob content size."""
17 | url: str = Field()
18 | """Blob url."""
19 |
--------------------------------------------------------------------------------
/codedog/models/change_file.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | from typing import Optional
3 |
4 | from pydantic import BaseModel, Field
5 |
6 | from codedog.models.diff import DiffContent
7 |
8 |
9 | class ChangeStatus(str, Enum):
10 | """Git file change mode. https://git-scm.com/docs/diff-format"""
11 |
12 | addition = "A"
13 | """Addition of a file"""
14 | copy = "C"
15 | """Copy of a file into a new one"""
16 | deletion = "D"
17 | """Deletion of a file"""
18 | modified = "M"
19 | """Modification of the contents or mode of a file"""
20 | renaming = "R"
21 | """Renaming of a file"""
22 | type_change = "T"
23 | """Change in the type of the file (regular file, symbolic link or submodule)"""
24 | unmerged = "U"
25 | """File is unmerged (you must complete the merge before it can be committed)"""
26 | unknown = "X"
27 | """Unknown change type (most probably a bug, please report it)"""
28 |
29 |
30 | class ChangeFile(BaseModel):
31 | """A changed file between two commit."""
32 |
33 | blob_id: int = Field()
34 | """Blob id. Converted from sha."""
35 | sha: str = Field()
36 | """Blob sha."""
37 | full_name: str = Field()
38 | """File name and path."""
39 | source_full_name: str = Field()
40 | """File name and path in source repository."""
41 | status: ChangeStatus = Field()
42 | """Change status. see more information in ChangeStatus."""
43 | pull_request_id: int = Field()
44 | """Id of pull request this change belongs to."""
45 | start_commit_id: int = Field()
46 | """Start commit id"""
47 | end_commit_id: int = Field()
48 | """End commit id"""
49 |
50 | name: str = Field()
51 | """File name."""
52 | suffix: str = Field()
53 | """File suffix."""
54 | diff_url: str = Field(default="")
55 | """Url of this change file in pull request."""
56 | blob_url: str = Field(default="")
57 | """Url of this change file blob in end commit.
58 |
59 | If change file type is deleted, this will be none.
60 | """
61 |
62 | diff_content: DiffContent = Field(default="", exclude=True)
63 | """The diff content of this file."""
64 |
65 | raw: Optional[object] = Field(default=None, exclude=True)
66 | """Raw object generated by client api of this change file."""
67 |
--------------------------------------------------------------------------------
/codedog/models/change_summary.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel, Field
2 |
3 |
4 | class ChangeSummary(BaseModel):
5 | full_name: str = Field()
6 | """File full name."""
7 |
8 | summary: str = Field()
9 | """File change summarization."""
10 |
--------------------------------------------------------------------------------
/codedog/models/code_review.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 |
3 | from codedog.models.change_file import ChangeFile
4 |
5 |
6 | class CodeReview(BaseModel):
7 | file: ChangeFile
8 | review: str
9 |
--------------------------------------------------------------------------------
/codedog/models/commit.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | from pydantic import BaseModel, Field
4 |
5 |
6 | class Commit(BaseModel):
7 | commit_id: int = Field()
8 | """Commit id converted from sha."""
9 | sha: str = Field()
10 | """Commit sha."""
11 |
12 | url: str = Field(default="")
13 | """Commit html url."""
14 | message: str = Field(default="")
15 | """Commit message."""
16 |
17 | raw: object = Field(default=None, exclude=True)
18 | """git commit raw object"""
19 | """git commit raw object"""
20 |
--------------------------------------------------------------------------------
/codedog/models/diff.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from pydantic import BaseModel, ConfigDict, Field
4 | from unidiff import PatchedFile
5 |
6 |
7 | class DiffSegment(BaseModel):
8 | add_count: int = Field()
9 | """Added lines count of this segment."""
10 | remove_count: int = Field()
11 | """Removed lines count of this segment."""
12 | content: str = Field()
13 | """Diff content of this segment."""
14 | source_start_line_number: int = Field()
15 | """Start line number of this segment in source file."""
16 | source_length: int = Field()
17 | """Length of this segment in source file."""
18 | target_start_line_number: int = Field()
19 | """Start line number of this segment in target file."""
20 | target_length: int = Field()
21 | """Length of this segment in target file."""
22 |
23 |
24 | class DiffContent(BaseModel):
25 | model_config = ConfigDict(arbitrary_types_allowed=True)
26 |
27 | add_count: int = Field()
28 | """Added lines count."""
29 | remove_count: int = Field()
30 | """Removed lines count."""
31 | content: str = Field()
32 | """Diff content."""
33 | diff_segments: list[DiffSegment] = Field(default_factory=list, exclude=True)
34 | """Diff segments."""
35 | patched_file: Optional[PatchedFile] = Field(default=None, exclude=True)
36 | """Unidiff patched file object."""
37 |
--------------------------------------------------------------------------------
/codedog/models/issue.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | from pydantic import BaseModel, Field
4 |
5 |
6 | class Issue(BaseModel):
7 | issue_id: int = Field()
8 | """Issue id."""
9 |
10 | title: str = Field(default="")
11 | """Issue title."""
12 | description: str = Field(default="")
13 | """Issue description."""
14 | url: str = Field(default="")
15 | """Issue url."""
16 |
17 | raw: object = Field(default=None, exclude=True)
18 | """git issue raw object"""
19 |
--------------------------------------------------------------------------------
/codedog/models/pr_summary.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 | from pydantic import BaseModel, Field
4 |
5 |
6 | class PRType(str, Enum):
7 | """Pull request type: feature, fix, refactor, perf, test, doc, ci, style, chore."""
8 |
9 | feature = "feature"
10 | fix = "fix"
11 | refactor = "refactor"
12 | perf = "perf"
13 | test = "test"
14 | doc = "doc"
15 | ci = "ci"
16 | style = "style"
17 | chore = "chore"
18 | unknown = "unknown"
19 |
20 |
21 | class PRSummary(BaseModel):
22 | overview: str = ""
23 | """Pull request summarization."""
24 |
25 | pr_type: PRType = PRType.unknown
26 | """Pull request type."""
27 |
28 | major_files: list[str] = Field(default_factory=list)
29 | """Pull request file with major logical changes. If pr_type is not feature, this will be empty."""
30 |
--------------------------------------------------------------------------------
/codedog/models/pull_request.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | from pydantic import BaseModel, Field
4 |
5 | from codedog.models.change_file import ChangeFile
6 | from codedog.models.issue import Issue
7 | from codedog.models.repository import Repository
8 |
9 |
10 | class PullRequest(BaseModel):
11 | pull_request_id: int = Field()
12 | """Pull Request id (Global id. Not number/iid)"""
13 | repository_id: int = Field()
14 | """Repository id this pull request belongs to."""
15 | pull_request_number: int = Field(default=0)
16 |
17 | title: str = Field(default="")
18 | """Pull Request title."""
19 | body: str = Field(default="")
20 | """Pull Request description."""
21 | url: str = Field(default="")
22 | """Pull Request url."""
23 | repository_name: str = Field(default="")
24 | """Repository name this pull request belongs to."""
25 |
26 | related_issues: list[Issue] = Field(default_factory=list, exclude=True)
27 | """git PR related issues"""
28 | change_files: list[ChangeFile] = Field(default_factory=list, exclude=True)
29 | """git PR changed files"""
30 | repository: Repository = Field(default=None, exclude=True)
31 | """git PR target repository"""
32 | source_repository: Repository = Field(default=None, exclude=True)
33 | """git PR source repository"""
34 | raw: object = Field(default=None, exclude=True)
35 | """git PR raw object"""
36 |
--------------------------------------------------------------------------------
/codedog/models/repository.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | from pydantic import BaseModel, Field
4 |
5 |
6 | class Repository(BaseModel):
7 | repository_id: int = Field()
8 | """Repository id."""
9 |
10 | repository_name: str = Field(default="")
11 | """Repository name this pull request belongs to."""
12 | repository_full_name: str = Field(default="")
13 | """Repository full name this pull request belongs to."""
14 | repository_url: str = Field(default="")
15 | """Repository url this pull request belongs to."""
16 |
17 | raw: object = Field(default=None, exclude=True)
18 | """git repository raw object"""
19 |
--------------------------------------------------------------------------------
/codedog/processors/__init__.py:
--------------------------------------------------------------------------------
1 | from codedog.processors.pull_request_processor import PullRequestProcessor
2 |
3 | __all__ = ["PullRequestProcessor"]
4 |
--------------------------------------------------------------------------------
/codedog/processors/pull_request_processor.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import itertools
4 | from functools import lru_cache
5 | from typing import Callable, Dict, List
6 |
7 | from codedog.localization import Localization
8 | from codedog.models import ChangeFile, ChangeStatus, ChangeSummary, PullRequest
9 |
10 | CONTENT_CHANGE_STATUS = [ChangeStatus.addition, ChangeStatus.modified]
11 |
12 | SUPPORT_CODE_FILE_SUFFIX = set(["py", "java", "go", "js", "ts", "php", "c", "cpp", "h", "cs", "rs"])
13 |
14 | SUFFIX_LANGUAGE_MAPPING = {
15 | "py": "python",
16 | "java": "java",
17 | "go": "go",
18 | "js": "javascript",
19 | "ts": "typescript",
20 | "php": "php",
21 | "c": "c",
22 | "cpp": "cpp",
23 | "h": "c",
24 | "cs": "csharp",
25 | "rs": "rust",
26 | }
27 |
28 |
29 | class PullRequestProcessor(Localization):
30 | def __init__(self):
31 | self._status_template_functions = None
32 |
33 | super().__init__()
34 |
35 | def is_code_file(self, change_file: ChangeFile):
36 | return change_file.suffix in SUPPORT_CODE_FILE_SUFFIX
37 |
38 | def get_diff_code_files(self, pr: PullRequest) -> list[ChangeFile]:
39 | diff_code_files = []
40 | for change_file in pr.change_files:
41 | if change_file.status in CONTENT_CHANGE_STATUS and self.is_code_file(change_file):
42 | diff_code_files.append(change_file)
43 |
44 | return diff_code_files
45 |
46 | def gen_material_change_files(self, change_files: list[ChangeFile]) -> str:
47 | files_by_status = itertools.groupby(sorted(change_files, key=lambda x: x.status), lambda x: x.status)
48 | summary_by_status = []
49 |
50 | for status, files in files_by_status:
51 | summary_by_status.append(
52 | f"{self.template.MATERIAL_STATUS_HEADER_MAPPING.get(status, ChangeStatus.unknown)}\n"
53 | + "\n".join(
54 | self.status_template_functions.get(status, self._build_status_template_default)(file)
55 | for file in files
56 | )
57 | + "\n"
58 | )
59 |
60 | return "\n".join(summary_by_status)
61 |
62 | def gen_material_code_summaries(self, code_summaries: list[ChangeSummary]) -> str:
63 | return (
64 | "\n\n".join(
65 | self.template.MATERIAL_CODE_SUMMARY.format(summary=code_summary.summary, name=code_summary.full_name)
66 | for code_summary in code_summaries
67 | )
68 | + "\n"
69 | )
70 |
71 | def gen_material_pr_metadata(self, pr: PullRequest) -> str:
72 | return self.template.MATERIAL_PR_METADATA.format(
73 | pr_title=pr.title,
74 | pr_body=pr.body,
75 | issues="\n".join(f"- {issue.title}" for issue in pr.related_issues),
76 | )
77 |
78 | def build_change_summaries(
79 | self, summaries_input: List[Dict[str, str]], summaries_output: List[Dict[str, str]]
80 | ) -> List[ChangeSummary]:
81 | result = []
82 | for i, o in itertools.zip_longest(summaries_input, summaries_output):
83 | result.append(ChangeSummary(full_name=i["name"], summary=o["text"]))
84 |
85 | return result
86 |
87 | def _build_status_template_default(self, change_file: ChangeFile):
88 | return f"- {change_file.full_name}"
89 |
90 | def _build_status_template_copy(self, change_file: ChangeFile):
91 | return f"- {change_file.full_name} (copied from {change_file.source_full_name})"
92 |
93 | def _build_status_template_rename(self, change_file: ChangeFile):
94 | return f"- {change_file.full_name} (renamed from {change_file.source_full_name})"
95 |
96 | @property
97 | def status_template_functions(self) -> dict[ChangeStatus, Callable]:
98 | if not self._status_template_functions:
99 | self._status_template_functions = {
100 | ChangeStatus.copy: self._build_status_template_copy,
101 | ChangeStatus.renaming: self._build_status_template_rename,
102 | }
103 | return self._status_template_functions
104 |
105 | @classmethod
106 | @lru_cache(maxsize=1)
107 | def build(cls) -> PullRequestProcessor:
108 | return cls()
109 |
--------------------------------------------------------------------------------
/codedog/retrievers/__init__.py:
--------------------------------------------------------------------------------
1 | from codedog.retrievers.github_retriever import GithubRetriever
2 | from codedog.retrievers.gitlab_retriever import GitlabRetriever
3 |
4 | __all__ = ["GithubRetriever", "GitlabRetriever"]
5 |
--------------------------------------------------------------------------------
/codedog/retrievers/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | from codedog.models import Blob, ChangeFile, Commit, PullRequest, Repository
4 |
5 |
6 | class Retriever(ABC):
7 | """Base class for git repository pull request retrievers.
8 |
9 | Retrievers are responsible for retrieving pr related commits, branchs, issues and code data from
10 | Github, Gitlab, Bitbucket etc. It defines the interface codedog uses to retrieve data from
11 | from repository, wrapped the different client api of platforms.
12 | """
13 |
14 | @property
15 | @abstractmethod
16 | def retriever_type(self) -> str:
17 | """Return the retriever type."""
18 |
19 | @property
20 | @abstractmethod
21 | def pull_request(self) -> PullRequest:
22 | """Return the pull request object."""
23 |
24 | @property
25 | @abstractmethod
26 | def repository(self) -> Repository:
27 | """Return the pull request target repository object."""
28 |
29 | @property
30 | @abstractmethod
31 | def source_repository(self) -> Repository:
32 | """Return the pull request source repository object."""
33 |
34 | @property
35 | @abstractmethod
36 | def changed_files(self) -> list[ChangeFile]:
37 | """Return the changed file list between end commit and start commit."""
38 |
39 | @abstractmethod
40 | def get_blob(self, blob_sha: str or id) -> Blob:
41 | """Get blob by id."""
42 |
43 | @abstractmethod
44 | def get_commit(self, commit_sha: str or id) -> Commit:
45 | """Get commit by id."""
46 |
--------------------------------------------------------------------------------
/codedog/retrievers/github_retriever.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import itertools
4 | import re
5 |
6 | from github import Github
7 | from github.Commit import Commit as GithubCommit
8 | from github.File import File as GithubFile
9 | from github.GitBlob import GitBlob as GithubBlob
10 | from github.Issue import Issue as GithubIssue
11 | from github.PullRequest import PullRequest as GHPullRequest
12 | from github.Repository import Repository as GHRepo
13 | from unidiff import Hunk, PatchedFile
14 |
15 | from codedog.models import (
16 | Blob,
17 | ChangeFile,
18 | ChangeStatus,
19 | Commit,
20 | DiffContent,
21 | Issue,
22 | PullRequest,
23 | Repository,
24 | )
25 | from codedog.models.diff import DiffSegment
26 | from codedog.retrievers.base import Retriever
27 | from codedog.utils.diff_utils import parse_patch_file
28 |
29 |
30 | class GithubRetriever(Retriever):
31 | """Github retriever."""
32 |
33 | GITHUB_STATUS_MAPPING = {
34 | "added": "A",
35 | "copied": "C",
36 | "removed": "D",
37 | "modified": "M",
38 | "renamed": "R",
39 | "type_change": "T",
40 | }
41 |
42 | ISSUE_PATTERN = r"#\d+"
43 |
44 | def __init__(
45 | self,
46 | client: Github,
47 | repository_name_or_id: str | int,
48 | pull_request_number: int,
49 | ):
50 | """Connect to github remote server and retrieve pull request data.
51 |
52 | Args:
53 | client (github.Github): github client from pyGithub
54 | repository_name_or_id (str | int): repository name or id
55 | pull_request_number (int): pull request number (not global id)
56 | """
57 |
58 | # --- github model ---
59 | self._git_repository: GHRepo = client.get_repo(repository_name_or_id)
60 | self._git_pull_request: GHPullRequest = self._git_repository.get_pull(
61 | pull_request_number
62 | )
63 |
64 | # --- codedog model ---
65 | self._repository: Repository = self._build_repository(self._git_repository)
66 | self._source_repository: Repository = self._build_repository(
67 | self._git_pull_request.base.repo
68 | )
69 | self._pull_request: PullRequest = self._build_pull_request(
70 | self._git_pull_request
71 | )
72 |
73 | @property
74 | def retriever_type(self) -> str:
75 | return "Github Retriever"
76 |
77 | @property
78 | def repository(self) -> Repository:
79 | return self._repository
80 |
81 | @property
82 | def pull_request(self) -> PullRequest:
83 | return self._pull_request
84 |
85 | @property
86 | def source_repository(self) -> Repository:
87 | return self._source_repository
88 |
89 | @property
90 | def changed_files(self) -> list[ChangeFile]:
91 | return self._pull_request.change_files
92 |
93 | def get_blob(self, blob_id: str) -> Blob:
94 | git_blob = self._git_repository.get_git_blob(blob_id)
95 | return self._build_blob(git_blob)
96 |
97 | def get_commit(self, commit_sha: str) -> Commit:
98 | git_commit = self._git_repository.get_commit(commit_sha)
99 | return self._build_commit(git_commit)
100 |
101 | def _build_repository(self, git_repo: GHRepo) -> Repository:
102 | return Repository(
103 | repository_id=git_repo.id,
104 | repository_name=git_repo.name,
105 | repository_full_name=git_repo.full_name,
106 | repository_url=git_repo.html_url,
107 | raw=git_repo,
108 | )
109 |
110 | def _build_pull_request(self, git_pr: GHPullRequest) -> PullRequest:
111 | related_issues = self._parse_and_build_related_issues(git_pr)
112 | change_files = self._build_change_file_list(git_pr)
113 |
114 | return PullRequest(
115 | pull_request_id=git_pr.id,
116 | repository_id=git_pr.head.repo.id,
117 | pull_request_number=git_pr.number,
118 | title=git_pr.title,
119 | body=git_pr.body if git_pr.body is not None else "",
120 | url=git_pr.html_url,
121 | repository_name=git_pr.head.repo.full_name,
122 | related_issues=related_issues,
123 | change_files=change_files,
124 | repository=self.repository,
125 | source_repository=self.source_repository,
126 | raw=git_pr,
127 | )
128 |
129 | def _parse_and_build_related_issues(self, git_pr: GHPullRequest) -> list[Issue]:
130 | title = git_pr.title
131 | body = git_pr.body
132 |
133 | issue_numbers = self._parse_issue_numbers(title, body)
134 | return [
135 | self._get_and_build_issue(issue_number) for issue_number in issue_numbers
136 | ]
137 |
138 | def _parse_issue_numbers(self, title, body) -> list[int]:
139 | body_matches = re.finditer(GithubRetriever.ISSUE_PATTERN, body) if body else []
140 | title_matches = (
141 | re.finditer(GithubRetriever.ISSUE_PATTERN, title) if title else []
142 | )
143 | issue_numbers = [
144 | int(match.group(0).lstrip("#"))
145 | for match in itertools.chain(body_matches, title_matches)
146 | ]
147 | return issue_numbers
148 |
149 | def _get_and_build_issue(self, issue_number):
150 | git_issue = self._git_repository.get_issue(issue_number)
151 | return self._build_issue(git_issue)
152 |
153 | def _build_issue(self, git_issue: GithubIssue) -> Issue:
154 | return Issue(
155 | issue_id=git_issue.number,
156 | title=git_issue.title,
157 | description=git_issue.body if git_issue.body else "",
158 | url=git_issue.html_url,
159 | raw=git_issue,
160 | )
161 |
162 | def _build_change_file_list(self, git_pr: GHPullRequest) -> list[ChangeFile]:
163 | change_files = []
164 | for file in git_pr.get_files():
165 | change_file = self._build_change_file(file, git_pr)
166 | change_files.append(change_file)
167 | return change_files
168 |
169 | def _build_change_file(
170 | self, git_file: GithubFile, git_pr: GHPullRequest
171 | ) -> ChangeFile:
172 | full_name = git_file.filename
173 | name = full_name.split("/")[-1]
174 | suffix = name.split(".")[-1]
175 | source_full_name = (
176 | git_file.previous_filename if git_file.previous_filename else full_name
177 | )
178 |
179 | return ChangeFile(
180 | blob_id=int(git_file.sha, 16),
181 | sha=git_file.sha,
182 | full_name=full_name,
183 | source_full_name=source_full_name,
184 | name=name,
185 | suffix=suffix,
186 | status=self._convert_status(git_file.status),
187 | pull_request_id=git_pr.id,
188 | start_commit_id=int(git_pr.base.sha, 16),
189 | end_commit_id=int(git_pr.head.sha, 16),
190 | diff_url=self._build_change_file_diff_url(git_file, git_pr),
191 | blob_url=git_file.blob_url,
192 | diff_content=self._parse_and_build_diff_content(git_file),
193 | raw=git_file,
194 | )
195 |
196 | def _convert_status(self, git_status: str) -> ChangeStatus:
197 | return ChangeStatus(GithubRetriever.GITHUB_STATUS_MAPPING.get(git_status, "X"))
198 |
199 | def _build_change_file_diff_url(
200 | self, git_file: GithubFile, git_pr: GHPullRequest
201 | ) -> str:
202 | return f"{git_pr.html_url}/files#diff-{git_file.sha}"
203 |
204 | def _parse_and_build_diff_content(self, git_file: GithubFile) -> DiffContent:
205 | patched_file: PatchedFile = self._build_patched_file(git_file)
206 | patched_segs: list[DiffSegment] = self._build_patched_file_segs(patched_file)
207 |
208 | # TODO: retrive long content from blob.
209 | return DiffContent(
210 | add_count=patched_file.added,
211 | remove_count=patched_file.removed,
212 | content=git_file.patch if git_file.patch else "",
213 | diff_segments=patched_segs,
214 | )
215 |
216 | def _build_patched_file(self, git_file: GithubFile) -> PatchedFile:
217 | prev_name = (
218 | git_file.previous_filename
219 | if git_file.previous_filename
220 | else git_file.filename
221 | )
222 | return parse_patch_file(git_file.patch, prev_name, git_file.filename)
223 |
224 | def _build_patched_file_segs(self, patched_file: PatchedFile) -> list[DiffSegment]:
225 | patched_segs = []
226 | for patched_hunk in patched_file:
227 | patched_segs.append(self._build_patch_segment(patched_hunk))
228 | return patched_segs
229 |
230 | def _build_patch_segment(self, patched_hunk: Hunk) -> DiffSegment:
231 | return DiffSegment(
232 | add_count=patched_hunk.added or 0,
233 | remove_count=patched_hunk.removed or 0,
234 | content=str(patched_hunk),
235 | source_start_line_number=patched_hunk.source_start,
236 | source_length=patched_hunk.source_length,
237 | target_start_line_number=patched_hunk.target_start,
238 | target_length=patched_hunk.target_length,
239 | )
240 |
241 | def _build_blob(self, git_blob: GithubBlob) -> Blob:
242 | return Blob(
243 | blob_id=int(git_blob.sha, 16),
244 | sha=git_blob.sha,
245 | content=git_blob.content,
246 | encoding=git_blob.encoding,
247 | size=git_blob.size,
248 | url=git_blob.url,
249 | )
250 |
251 | def _build_commit(self, git_commit: GithubCommit) -> Commit:
252 | return Commit(
253 | commit_id=int(git_commit.sha, 16),
254 | sha=git_commit.sha,
255 | url=git_commit.url,
256 | message=git_commit.commit.message,
257 | )
258 |
--------------------------------------------------------------------------------
/codedog/templates/__init__.py:
--------------------------------------------------------------------------------
1 | from codedog.templates.grimoire_cn import *
2 | from codedog.templates.grimoire_en import *
3 | from codedog.templates.template_cn import *
4 | from codedog.templates.template_en import *
5 |
6 | __all__ = [
7 | "grimoire_cn",
8 | "grimoire_en",
9 | "template_cn",
10 | "template_en",
11 | ]
12 |
--------------------------------------------------------------------------------
/codedog/templates/grimoire_cn.py:
--------------------------------------------------------------------------------
1 | """
2 | Chinese prompt templates for code review.
3 | """
4 |
5 | from typing import Any, Dict
6 |
7 | class GrimoireCn:
8 | SYSTEM_PROMPT = '''你是 CodeDog,一个由先进语言模型驱动的专业代码审查专家。你的目标是通过全面且建设性的代码审查来帮助开发者改进他们的代码。
9 |
10 | ====
11 |
12 | 能力说明
13 |
14 | 1. 代码分析
15 | - 深入理解多种编程语言和框架
16 | - 识别代码模式、反模式和最佳实践
17 | - 检测安全漏洞
18 | - 识别性能优化机会
19 | - 检查代码风格和一致性
20 |
21 | 2. 审查生成
22 | - 详细的逐行代码审查
23 | - 高层架构反馈
24 | - 安全建议
25 | - 性能改进建议
26 | - 文档改进
27 |
28 | 3. 上下文理解
29 | - 代码仓库结构分析
30 | - Pull Request 上下文理解
31 | - 编码标准合规性检查
32 | - 依赖和需求分析
33 |
34 | ====
35 |
36 | 规则说明
37 |
38 | 1. 审查格式
39 | - 始终提供建设性反馈
40 | - 使用 markdown 格式以提高可读性
41 | - 在建议改进时包含代码示例
42 | - 讨论问题时引用具体行号
43 | - 按严重程度分类反馈(严重、主要、次要、建议)
44 |
45 | 2. 沟通风格
46 | - 保持专业和尊重
47 | - 关注代码而非开发者
48 | - 解释每个建议背后的原因
49 | - 提供可执行的反馈
50 | - 使用清晰简洁的语言
51 |
52 | 3. 审查流程
53 | - 首先分析整体上下文
54 | - 然后审查具体更改
55 | - 考虑技术和可维护性方面
56 | - 关注安全影响
57 | - 检查性能影响
58 |
59 | 4. 代码标准
60 | - 如果有项目特定的编码标准则遵循
61 | - 默认遵循语言特定的最佳实践
62 | - 考虑可维护性和可读性
63 | - 检查适当的错误处理
64 | - 验证测试覆盖率
65 |
66 | ====
67 |
68 | 模板
69 |
70 | {templates}
71 |
72 | ====
73 |
74 | 目标
75 |
76 | 你的任务是提供全面的代码审查,以帮助提高代码质量和可维护性。对于每次审查:
77 |
78 | 1. 分析上下文
79 | - 理解更改的目的
80 | - 审查受影响的组件
81 | - 考虑对系统的影响
82 |
83 | 2. 评估更改
84 | - 检查代码正确性
85 | - 验证错误处理
86 | - 评估性能影响
87 | - 寻找安全漏洞
88 | - 审查文档完整性
89 |
90 | 3. 生成反馈
91 | - 提供具体、可执行的反馈
92 | - 包含改进的代码示例
93 | - 解释建议背后的原因
94 | - 按重要性优先排序反馈
95 |
96 | 4. 总结发现
97 | - 提供高层次概述
98 | - 列出关键建议
99 | - 突出关键问题
100 | - 建议下一步行动
101 |
102 | 记住:你的目标是在保持建设性和专业态度的同时帮助改进代码。
103 | '''
104 |
105 | PR_SUMMARY_SYSTEM_PROMPT = '''你是一个正在分析 Pull Request 的专业代码审查员。你的任务是:
106 | 1. 理解整体更改及其目的
107 | 2. 识别潜在风险和影响
108 | 3. 提供清晰简洁的总结
109 | 4. 突出需要注意的区域
110 |
111 | 重点关注:
112 | - 主要更改及其目的
113 | - 潜在风险或关注点
114 | - 需要仔细审查的区域
115 | - 对代码库的影响
116 | '''
117 |
118 | CODE_REVIEW_SYSTEM_PROMPT = '''你是一个正在检查具体代码更改的专业代码审查员。你的任务是:
119 | 1. 详细分析代码修改
120 | 2. 识别潜在问题或改进
121 | 3. 提供具体、可执行的反馈
122 | 4. 考虑安全和性能影响
123 |
124 | 重点关注:
125 | - 代码正确性和质量
126 | - 安全漏洞
127 | - 性能影响
128 | - 可维护性问题
129 | - 测试覆盖率
130 | '''
131 |
132 | # 其他模板...
133 | # (保持现有模板但使用清晰的注释和分组组织它们)
134 |
--------------------------------------------------------------------------------
/codedog/templates/template_cn.py:
--------------------------------------------------------------------------------
1 | # --- PR Markdown Report ------------------------------------------------------
2 | REPORT_PR_REVIEW = """# [{repo_name} #{pr_number} - {pr_name}]({url}) Pull Request 分析报告
3 |
4 | *powered by GPT and {project} {version}*
5 |
6 | {telemetry}
7 |
8 |
9 | {pr_report}
10 |
11 |
12 | {cr_report}
13 |
14 | """
15 |
16 |
17 | REPORT_TELEMETRY = """## 执行信息
18 | - 开始时间: {start_time}
19 | - 执行耗时: {time_usage:.2f}s
20 | - Openai Token 使用数量: {tokens}
21 | - Openai Api 调用成本: ${cost:.4f}
22 | """
23 |
24 | # --- PR Summary Markdown Report ----------------------------------------------
25 |
26 | REPORT_PR_SUMMARY = """
27 | ## PR 概要
28 |
29 | ### PR 总结
30 | {overview}
31 |
32 | ### 变动文件说明
33 | {file_changes}
34 |
35 |
37 |
38 | {change_overview}
39 |
40 | 改动列表
37 |
38 | {change_overview}
39 |
40 | Change File List
{markdown_content}" 92 | html_part = MIMEText(html_content, "html") 93 | 94 | msg.attach(text_part) 95 | msg.attach(html_part) 96 | 97 | try: 98 | # Create a secure SSL context 99 | context = ssl.create_default_context() if self.use_tls else None 100 | 101 | with smtplib.SMTP(self.smtp_server, self.smtp_port) as server: 102 | if self.use_tls: 103 | server.starttls(context=context) 104 | 105 | server.login(self.smtp_username, self.smtp_password) 106 | server.sendmail( 107 | self.smtp_username, all_recipients, msg.as_string() 108 | ) 109 | 110 | return True 111 | except Exception as e: 112 | print(f"Failed to send email: {str(e)}") 113 | return False 114 | 115 | 116 | def send_report_email( 117 | to_emails: List[str], 118 | subject: str, 119 | markdown_content: str, 120 | cc_emails: Optional[List[str]] = None, 121 | ) -> bool: 122 | """Helper function to send code review report via email. 123 | 124 | Args: 125 | to_emails: List of recipient email addresses 126 | subject: Email subject 127 | markdown_content: Report content in markdown format 128 | cc_emails: List of CC email addresses 129 | 130 | Returns: 131 | bool: True if email was sent successfully, False otherwise 132 | """ 133 | # Check if email notification is enabled 134 | if not env.get("EMAIL_ENABLED", "").lower() in ("true", "1", "yes"): 135 | print("Email notifications are disabled. Set EMAIL_ENABLED=true to enable.") 136 | return False 137 | 138 | try: 139 | notifier = EmailNotifier() 140 | return notifier.send_report( 141 | to_emails=to_emails, 142 | subject=subject, 143 | markdown_content=markdown_content, 144 | cc_emails=cc_emails, 145 | ) 146 | except ValueError as e: 147 | print(f"Email configuration error: {str(e)}") 148 | return False 149 | except smtplib.SMTPAuthenticationError: 150 | print("SMTP Authentication Error: Invalid username or password.") 151 | print("If using Gmail, make sure to:") 152 | print("1. Enable 2-step verification for your Google account") 153 | print("2. Generate an App Password at https://myaccount.google.com/apppasswords") 154 | print("3. Use that App Password in your .env file, not your regular Gmail password") 155 | return False 156 | except Exception as e: 157 | print(f"Unexpected error sending email: {str(e)}") 158 | return False -------------------------------------------------------------------------------- /codedog/utils/git_hooks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | from pathlib import Path 5 | from typing import List, Optional 6 | 7 | 8 | def install_git_hooks(repo_path: str) -> bool: 9 | """Install git hooks to trigger code reviews on commits. 10 | 11 | Args: 12 | repo_path: Path to the git repository 13 | 14 | Returns: 15 | bool: True if hooks were installed successfully, False otherwise 16 | """ 17 | hooks_dir = os.path.join(repo_path, ".git", "hooks") 18 | 19 | if not os.path.exists(hooks_dir): 20 | print(f"Git hooks directory not found: {hooks_dir}") 21 | return False 22 | 23 | # Create post-commit hook 24 | post_commit_path = os.path.join(hooks_dir, "post-commit") 25 | 26 | # Get the absolute path to the codedog directory 27 | codedog_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) 28 | 29 | # Create hook script content 30 | hook_content = f"""#!/bin/sh 31 | # CodeDog post-commit hook for triggering code reviews 32 | 33 | # Get the latest commit hash 34 | COMMIT_HASH=$(git rev-parse HEAD) 35 | 36 | # Run the review script with the commit hash 37 | # Enable verbose mode to see progress and set EMAIL_ENABLED=true to ensure emails are sent 38 | export EMAIL_ENABLED=true 39 | python {codedog_path}/run_codedog_commit.py --commit $COMMIT_HASH --verbose 40 | """ 41 | 42 | # Write hook file 43 | with open(post_commit_path, "w") as f: 44 | f.write(hook_content) 45 | 46 | # Make hook executable 47 | os.chmod(post_commit_path, 0o755) 48 | 49 | print(f"Git post-commit hook installed successfully: {post_commit_path}") 50 | return True 51 | 52 | 53 | def get_commit_files(commit_hash: str, repo_path: Optional[str] = None) -> List[str]: 54 | """Get list of files changed in a specific commit. 55 | 56 | Args: 57 | commit_hash: The commit hash to check 58 | repo_path: Path to git repository (defaults to current directory) 59 | 60 | Returns: 61 | List[str]: List of changed file paths 62 | """ 63 | cwd = repo_path or os.getcwd() 64 | 65 | try: 66 | # Get list of files changed in the commit 67 | result = subprocess.run( 68 | ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit_hash], 69 | capture_output=True, 70 | text=True, 71 | cwd=cwd, 72 | check=True, 73 | ) 74 | 75 | # Return list of files (filtering empty lines) 76 | files = [f for f in result.stdout.split("\n") if f.strip()] 77 | return files 78 | 79 | except subprocess.CalledProcessError as e: 80 | print(f"Error getting files from commit {commit_hash}: {e}") 81 | print(f"Error output: {e.stderr}") 82 | return [] 83 | 84 | 85 | def create_commit_pr_data(commit_hash: str, repo_path: Optional[str] = None) -> dict: 86 | """Create PR-like data structure from a commit for code review. 87 | 88 | Args: 89 | commit_hash: The commit hash to check 90 | repo_path: Path to git repository (defaults to current directory) 91 | 92 | Returns: 93 | dict: PR-like data structure with commit info and files 94 | """ 95 | cwd = repo_path or os.getcwd() 96 | 97 | try: 98 | # Get commit info 99 | commit_info = subprocess.run( 100 | ["git", "show", "--pretty=format:%s%n%b", commit_hash], 101 | capture_output=True, 102 | text=True, 103 | cwd=cwd, 104 | check=True, 105 | ) 106 | 107 | # Parse commit message 108 | lines = commit_info.stdout.strip().split("\n") 109 | title = lines[0] if lines else "Unknown commit" 110 | body = "\n".join(lines[1:]) if len(lines) > 1 else "" 111 | 112 | # Get author information 113 | author_info = subprocess.run( 114 | ["git", "show", "--pretty=format:%an <%ae>", "-s", commit_hash], 115 | capture_output=True, 116 | text=True, 117 | cwd=cwd, 118 | check=True, 119 | ) 120 | author = author_info.stdout.strip() 121 | 122 | # Get changed files 123 | files = get_commit_files(commit_hash, repo_path) 124 | 125 | # Get repository name from path 126 | repo_name = os.path.basename(os.path.abspath(cwd)) 127 | 128 | # Create PR-like structure 129 | pr_data = { 130 | "pull_request_id": int(commit_hash[:8], 16), # Convert first 8 chars of commit hash to integer 131 | "repository_id": abs(hash(repo_name)) % (10 ** 8), # Convert repo name to stable integer 132 | "number": commit_hash[:8], # Use shortened commit hash as "PR number" 133 | "title": title, 134 | "body": body, 135 | "author": author, 136 | "commit_hash": commit_hash, 137 | "files": files, 138 | "is_commit_review": True, # Flag to indicate this is a commit review, not a real PR 139 | } 140 | 141 | return pr_data 142 | 143 | except subprocess.CalledProcessError as e: 144 | print(f"Error creating PR data from commit {commit_hash}: {e}") 145 | print(f"Error output: {e.stderr}") 146 | return { 147 | "pull_request_id": int(commit_hash[:8], 16), 148 | "repository_id": abs(hash(repo_name)) % (10 ** 8), 149 | "number": commit_hash[:8] if commit_hash else "unknown", 150 | "title": "Error retrieving commit data", 151 | "body": str(e), 152 | "author": "Unknown", 153 | "commit_hash": commit_hash, 154 | "files": [], 155 | "is_commit_review": True, 156 | } -------------------------------------------------------------------------------- /codedog/version.py: -------------------------------------------------------------------------------- 1 | # -- Project information ----------------------------------------------------- 2 | 3 | PROJECT = "codedog" 4 | VERSION = "0.11.0" 5 | -------------------------------------------------------------------------------- /docs/api/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/assets/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codedog-ai/codedog/a1702a4ab933c7c071318e688a5a3b6948b423d0/docs/assets/favicon.ico -------------------------------------------------------------------------------- /docs/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codedog-ai/codedog/a1702a4ab933c7c071318e688a5a3b6948b423d0/docs/assets/logo.png -------------------------------------------------------------------------------- /docs/commit_review.md: -------------------------------------------------------------------------------- 1 | # Automatic Commit Code Review 2 | 3 | CodeDog can automatically review your code commits and send the review results via email. This guide explains how to set up and use this feature. 4 | 5 | ## Setup 6 | 7 | 1. **Install Git Hooks** 8 | 9 | Run the following command to set up the git hooks that will trigger automatic code reviews when you make commits: 10 | 11 | ```bash 12 | python run_codedog.py setup-hooks 13 | ``` 14 | 15 | This will install a post-commit hook in your repository's `.git/hooks` directory. 16 | 17 | 2. **Configure Email Notifications** 18 | 19 | To receive email notifications with the review results, you need to configure email settings. You have two options: 20 | 21 | a) **Using Environment Variables**: 22 | 23 | Add the following to your `.env` file: 24 | 25 | ``` 26 | # Email notification settings 27 | EMAIL_ENABLED="true" 28 | NOTIFICATION_EMAILS="your.email@example.com" # Can be comma-separated for multiple recipients 29 | 30 | # SMTP server settings 31 | SMTP_SERVER="smtp.gmail.com" # Use your email provider's SMTP server 32 | SMTP_PORT="587" # Common port for TLS connections 33 | SMTP_USERNAME="your.email@gmail.com" # The email that will send notifications 34 | SMTP_PASSWORD="your_app_password" # See Gmail-specific instructions in docs/email_setup.md 35 | ``` 36 | 37 | b) **Default Email**: 38 | 39 | If you don't configure any email settings, the system will automatically send review results to `kratosxie@gmail.com`. 40 | 41 | 3. **Configure LLM Models** 42 | 43 | You can specify which models to use for different parts of the review process: 44 | 45 | ``` 46 | # Model selection (optional) 47 | CODE_SUMMARY_MODEL="gpt-3.5" 48 | PR_SUMMARY_MODEL="gpt-4" 49 | CODE_REVIEW_MODEL="gpt-3.5" 50 | ``` 51 | 52 | ## How It Works 53 | 54 | 1. When you make a commit, the post-commit hook automatically runs. 55 | 2. The hook executes `run_codedog_commit.py` with your commit hash. 56 | 3. The script: 57 | - Retrieves information about your commit 58 | - Analyzes the code changes 59 | - Generates a summary and review 60 | - Saves the review to a file named `codedog_commit_