├── .gitignore
├── .pdm-python
├── LICENSE
├── README.md
├── data
    └── .keep
├── pdm.lock
├── pyproject.toml
└── src
    └── collab_dev
        ├── __init__.py
        ├── app.py
        ├── collect.py
        ├── components
            └── charts
            │   ├── __init__.py
            │   ├── approval_time
            │       ├── __init__.py
            │       ├── data.py
            │       └── template.html
            │   ├── bot_analysis
            │       ├── __init__.py
            │       ├── data.py
            │       └── template.html
            │   ├── chart.html
            │   ├── chart_renderer.py
            │   ├── contribution
            │       ├── __init__.py
            │       ├── data.py
            │       └── template.html
            │   ├── merge_time
            │       ├── __init__.py
            │       ├── data.py
            │       └── template.html
            │   ├── metric.html
            │   ├── review_coverage
            │       ├── __init__.py
            │       ├── data.py
            │       └── template.html
            │   ├── review_funnel
            │       ├── __init__.py
            │       ├── data.py
            │       └── template.html
            │   ├── review_turnaround
            │       ├── __init__.py
            │       ├── data.py
            │       └── template.html
            │   ├── utils.py
            │   └── workflow
            │       ├── __init__.py
            │       ├── data.py
            │       └── template.html
        ├── fetcher
            ├── __init__.py
            ├── api_client.py
            ├── fetch.py
            ├── github_utils.py
            └── store.py
        ├── loader
            ├── __init__.py
            └── load.py
        ├── templates
            ├── index.html
            └── repository.html
        └── theme.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # PDM project specific
 2 | .pdm.toml
 3 | __pypackages__/
 4 | .pdm-python
 5 | .pdm-build/
 6 | 
 7 | # Python
 8 | __pycache__/
 9 | *.py[cod]
10 | *$py.class
11 | *.so
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | 
29 | # Virtual Environments
30 | venv/
31 | env/
32 | ENV/
33 | .venv/
34 | .env/
35 | 
36 | # IDE specific files
37 | .idea/
38 | .vscode/
39 | *.swp
40 | *.swo
41 | .DS_Store
42 | 
43 | # Local development settings
44 | .env
45 | .env.local
46 | .env.development.local
47 | .env.test.local
48 | .env.production.local
49 | 
50 | # Testing
51 | .coverage
52 | htmlcov/
53 | .pytest_cache/
54 | .tox/ 
55 | /data/
56 | 


--------------------------------------------------------------------------------
/.pdm-python:
--------------------------------------------------------------------------------
1 | /Users/zak/pullflow/collab-dev/.venv/bin/python


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 PullFlow
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🍩 collab.dev
  2 | 
  3 | ## Open Source Collaboration Metrics for Code Reviews
  4 | 
  5 | Cloud edition: <https://collab.dev>
  6 | 
  7 | **collab-dev** is an open-source tool that generates collaboration metrics and insights from GitHub pull request data. Use it to analyze collaboration patterns, review workflow, process efficiency, and more.
  8 | 
  9 | ## Features
 10 | 
 11 | - **Data Collection:** Fetches pull request data from any public or private GitHub repository (requires GitHub token).
 12 | - **Visualization:** Generate interactive charts using Plotly.
 13 | - **Command Line Interface:** Run analysis with a single command.
 14 | - **Portable & Minimal:** Designed to work with CSV data to keep things simple.
 15 | - **Extensible:** Add new charts by adding them to the chart modules list.
 16 | 
 17 | ---
 18 | 
 19 | ## Getting Started
 20 | 
 21 | ### Prerequisites
 22 | 
 23 | - Python 3.12+
 24 | - Python Dependency Manager (`pdm`) - [Installation Instructions](https://pdm.fming.dev/latest/#installation)
 25 | - A GitHub Personal Access Token with repository read permissions
 26 | 
 27 | ### Installation
 28 | 
 29 | 1. Clone the repository:
 30 |   
 31 |   ```bash
 32 |   git clone https://github.com/pullflow/collab-dev.git
 33 |   cd collab-dev
 34 |   ```
 35 |   
 36 | 2. Install dependencies:
 37 |   
 38 |   ```bash
 39 |   pdm install
 40 |   ```
 41 |   
 42 | 3. Set up your GitHub API token as an environment variable:
 43 |   
 44 |   ```bash
 45 |   export GITHUB_TOKEN=your_token_here
 46 |   ```
 47 |   
 48 | ---
 49 | 
 50 | ## Usage
 51 | 
 52 | ### Fetch Pull Request Data
 53 | 
 54 | To download data from a GitHub repository, run:
 55 | 
 56 | ```bash
 57 | pdm collect owner/repo_name
 58 | ```
 59 | 
 60 | This will generate CSV files with pull request data in the `data/` directory.
 61 | 
 62 | You can specify the number of PRs to fetch using the `-n` flag:
 63 | 
 64 | ```bash
 65 | pdm collect -n 100 owner/repo_name
 66 | ```
 67 | 
 68 | For example, to collect 100 PRs from the React repository using your GitHub token:
 69 | 
 70 | ```bash
 71 | GITHUB_TOKEN=your_token pdm run collect -n 100 facebook/react
 72 | ```
 73 | 
 74 | Alternatively, you can save your GitHub token in a `.env` file.
 75 | 
 76 | ### View Metrics & Insights
 77 | 
 78 | To analyze the data and view the results:
 79 | 
 80 | 1. Start the Flask application:
 81 | 
 82 | ```bash
 83 | pdm serve
 84 | ```
 85 | 
 86 | 2. Open your browser and navigate to:
 87 | 
 88 | <http://127.0.0.1:8700>
 89 | 
 90 | 3. You'll see a list of repositories you've collected data for using the collect script.
 91 | 
 92 | 4. Click on any repository to view its detailed metrics and visualizations at `/report/owner/repo`.
 93 | 
 94 | ---
 95 | 
 96 | ## Data Structure
 97 | 
 98 | collab-dev organizes collected data in a hierarchical file structure:
 99 | 
100 | ```
101 | ./data/
102 | ├── {owner}/
103 | │   ├── {repo_name}/
104 | │   │   ├── repository.csv       # Repository metadata
105 | │   │   ├── pull_requests.csv    # All PR data for this repo
106 | │   │   ├── all_events.csv       # Consolidated events from all PRs
107 | │   │   ├── pr_{number}/
108 | │   │   │   └── events.csv       # Events for specific PR
109 | │   │   ├── pr_{number}/
110 | │   │   │   └── events.csv
111 | │   │   └── ...
112 | ```
113 | 
114 | ### Data Files
115 | 
116 | - **repository.csv**: Contains metadata about the GitHub repository
117 | - **pull_requests.csv**: Stores information about all pull requests collected from the repository
118 | - **all_events.csv**: Consolidates timeline events from all PRs for easier analysis
119 | - **events.csv**: In each PR subdirectory, stores the timeline events for that specific PR
120 | 
121 | This structure allows for efficient data collection, storage, and analysis while maintaining a clear organization based on GitHub's repository hierarchy.
122 | 
123 | ---
124 | 
125 | ## Customization
126 | 
127 | Charts are defined in the `CHART_MODULES` list in `src/collab_dev/components/charts/chart_renderer.py`. To add a custom chart:
128 | 
129 | 1. Create a new module in `src/collab_dev/components/charts/`
130 | 2. Implement a `render(repo_df)` function in your module
131 | 3. Add your module to the `CHART_MODULES` list in `chart_renderer.py`
132 | 
133 | Existing chart types include:
134 | 
135 | - Workflow (Sankey diagram)
136 | - Contributor distribution patterns
137 | - Bot contribution analysis
138 | - Review coverage metrics
139 | - Review funnel analysis
140 | - Review turnaround time
141 | - Request Approval time analysis
142 | - Merge time distribution
143 | 
144 | ---
145 | 
146 | ## Development
147 | 
148 | ### Code Style
149 | 
150 | We use `ruff` for code formatting and linting:
151 | 
152 | ```bash
153 | # Run linter
154 | pdm lint
155 | 
156 | # Format code
157 | pdm format
158 | 
159 | # Fix auto-fixable issues
160 | pdm lint-fix
161 | ```
162 | 
163 | ---
164 | 
165 | ## Contributing
166 | 
167 | We're looking for help in the following areas:
168 | 
169 | - **Validate and improve data and calculations:** Help ensure our metrics are accurate and meaningful.
170 | - **Improve current charts and other visualizations:** Enhance the clarity and usefulness of existing visualizations.
171 | - **Add new charts that help measure collaboration:** Develop new metrics and visualizations that provide insights into team collaboration patterns.
172 | 
173 | To contribute:
174 | 
175 | 1. Fork the repository
176 | 2. Create your branch:
177 |   
178 |   ```bash
179 |   git checkout -b feature/my-new-feature
180 |   ```
181 |   
182 | 3. Commit your changes:
183 |   
184 |   ```bash
185 |   git commit -m "Add some feature"
186 |   ```
187 |   
188 | 4. Push to the branch:
189 |   
190 |   ```bash
191 |   git push origin feature/my-new-feature
192 |   ```
193 |   
194 | 5. Open a Pull Request
195 | 
196 | ---
197 | 
198 | ## License
199 | 
200 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
201 | 
202 | ---
203 | 
204 | ## Support
205 | 
206 | For issues and feature requests, please use the [GitHub Issues](https://github.com/pullflow/collab-dev/issues) page.
207 | 


--------------------------------------------------------------------------------
/data/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pullflow/collab-dev/adcaa2efb3418c1a8aebb2ad98bf46b3a99aa9b2/data/.keep


--------------------------------------------------------------------------------
/pdm.lock:
--------------------------------------------------------------------------------
  1 | # This file is @generated by PDM.
  2 | # It is not intended for manual editing.
  3 | 
  4 | [metadata]
  5 | groups = ["default", "dev"]
  6 | strategy = ["inherit_metadata"]
  7 | lock_version = "4.5.0"
  8 | content_hash = "sha256:d14c62504a543769bcab6e91ca10d4ef6b32618842721d20192bc07182f558a4"
  9 | 
 10 | [[metadata.targets]]
 11 | requires_python = "==3.12.*"
 12 | 
 13 | [[package]]
 14 | name = "blinker"
 15 | version = "1.9.0"
 16 | requires_python = ">=3.9"
 17 | summary = "Fast, simple object-to-object and broadcast signaling"
 18 | groups = ["default"]
 19 | files = [
 20 |     {file = "blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc"},
 21 |     {file = "blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf"},
 22 | ]
 23 | 
 24 | [[package]]
 25 | name = "certifi"
 26 | version = "2025.1.31"
 27 | requires_python = ">=3.6"
 28 | summary = "Python package for providing Mozilla's CA Bundle."
 29 | groups = ["default"]
 30 | files = [
 31 |     {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"},
 32 |     {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"},
 33 | ]
 34 | 
 35 | [[package]]
 36 | name = "charset-normalizer"
 37 | version = "3.4.1"
 38 | requires_python = ">=3.7"
 39 | summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 40 | groups = ["default"]
 41 | files = [
 42 |     {file = "charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545"},
 43 |     {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7"},
 44 |     {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757"},
 45 |     {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa"},
 46 |     {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d"},
 47 |     {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616"},
 48 |     {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b"},
 49 |     {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d"},
 50 |     {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a"},
 51 |     {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9"},
 52 |     {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1"},
 53 |     {file = "charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35"},
 54 |     {file = "charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f"},
 55 |     {file = "charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85"},
 56 |     {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"},
 57 | ]
 58 | 
 59 | [[package]]
 60 | name = "click"
 61 | version = "8.1.8"
 62 | requires_python = ">=3.7"
 63 | summary = "Composable command line interface toolkit"
 64 | groups = ["default"]
 65 | dependencies = [
 66 |     "colorama; platform_system == \"Windows\"",
 67 |     "importlib-metadata; python_version < \"3.8\"",
 68 | ]
 69 | files = [
 70 |     {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
 71 |     {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
 72 | ]
 73 | 
 74 | [[package]]
 75 | name = "colorama"
 76 | version = "0.4.6"
 77 | requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 78 | summary = "Cross-platform colored terminal text."
 79 | groups = ["default"]
 80 | marker = "platform_system == \"Windows\""
 81 | files = [
 82 |     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
 83 |     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 84 | ]
 85 | 
 86 | [[package]]
 87 | name = "dotenv"
 88 | version = "0.9.9"
 89 | summary = "Deprecated package"
 90 | groups = ["default"]
 91 | dependencies = [
 92 |     "python-dotenv",
 93 | ]
 94 | files = [
 95 |     {file = "dotenv-0.9.9-py2.py3-none-any.whl", hash = "sha256:29cf74a087b31dafdb5a446b6d7e11cbce8ed2741540e2339c69fbef92c94ce9"},
 96 | ]
 97 | 
 98 | [[package]]
 99 | name = "flask"
100 | version = "3.1.0"
101 | requires_python = ">=3.9"
102 | summary = "A simple framework for building complex web applications."
103 | groups = ["default"]
104 | dependencies = [
105 |     "Jinja2>=3.1.2",
106 |     "Werkzeug>=3.1",
107 |     "blinker>=1.9",
108 |     "click>=8.1.3",
109 |     "importlib-metadata>=3.6; python_version < \"3.10\"",
110 |     "itsdangerous>=2.2",
111 | ]
112 | files = [
113 |     {file = "flask-3.1.0-py3-none-any.whl", hash = "sha256:d667207822eb83f1c4b50949b1623c8fc8d51f2341d65f72e1a1815397551136"},
114 |     {file = "flask-3.1.0.tar.gz", hash = "sha256:5f873c5184c897c8d9d1b05df1e3d01b14910ce69607a117bd3277098a5836ac"},
115 | ]
116 | 
117 | [[package]]
118 | name = "idna"
119 | version = "3.10"
120 | requires_python = ">=3.6"
121 | summary = "Internationalized Domain Names in Applications (IDNA)"
122 | groups = ["default"]
123 | files = [
124 |     {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
125 |     {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
126 | ]
127 | 
128 | [[package]]
129 | name = "itsdangerous"
130 | version = "2.2.0"
131 | requires_python = ">=3.8"
132 | summary = "Safely pass data to untrusted environments and back."
133 | groups = ["default"]
134 | files = [
135 |     {file = "itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef"},
136 |     {file = "itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173"},
137 | ]
138 | 
139 | [[package]]
140 | name = "jinja2"
141 | version = "3.1.6"
142 | requires_python = ">=3.7"
143 | summary = "A very fast and expressive template engine."
144 | groups = ["default"]
145 | dependencies = [
146 |     "MarkupSafe>=2.0",
147 | ]
148 | files = [
149 |     {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"},
150 |     {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"},
151 | ]
152 | 
153 | [[package]]
154 | name = "markupsafe"
155 | version = "3.0.2"
156 | requires_python = ">=3.9"
157 | summary = "Safely add untrusted strings to HTML/XML markup."
158 | groups = ["default"]
159 | files = [
160 |     {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf"},
161 |     {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225"},
162 |     {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028"},
163 |     {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8"},
164 |     {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c"},
165 |     {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557"},
166 |     {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22"},
167 |     {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48"},
168 |     {file = "MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30"},
169 |     {file = "MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87"},
170 |     {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
171 | ]
172 | 
173 | [[package]]
174 | name = "narwhals"
175 | version = "1.31.0"
176 | requires_python = ">=3.8"
177 | summary = "Extremely lightweight compatibility layer between dataframe libraries"
178 | groups = ["default"]
179 | files = [
180 |     {file = "narwhals-1.31.0-py3-none-any.whl", hash = "sha256:2a7b79bb5f511055c4c0142121fc0d4171ea171458e12d44dbd9c8fc6488e997"},
181 |     {file = "narwhals-1.31.0.tar.gz", hash = "sha256:333472e2562343dfdd27407ec9b5114a07c81d0416794e4ac6b703dd925c6a1a"},
182 | ]
183 | 
184 | [[package]]
185 | name = "numpy"
186 | version = "2.2.4"
187 | requires_python = ">=3.10"
188 | summary = "Fundamental package for array computing in Python"
189 | groups = ["default"]
190 | files = [
191 |     {file = "numpy-2.2.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a7b9084668aa0f64e64bd00d27ba5146ef1c3a8835f3bd912e7a9e01326804c4"},
192 |     {file = "numpy-2.2.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dbe512c511956b893d2dacd007d955a3f03d555ae05cfa3ff1c1ff6df8851854"},
193 |     {file = "numpy-2.2.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:bb649f8b207ab07caebba230d851b579a3c8711a851d29efe15008e31bb4de24"},
194 |     {file = "numpy-2.2.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:f34dc300df798742b3d06515aa2a0aee20941c13579d7a2f2e10af01ae4901ee"},
195 |     {file = "numpy-2.2.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3f7ac96b16955634e223b579a3e5798df59007ca43e8d451a0e6a50f6bfdfba"},
196 |     {file = "numpy-2.2.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f92084defa704deadd4e0a5ab1dc52d8ac9e8a8ef617f3fbb853e79b0ea3592"},
197 |     {file = "numpy-2.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7a4e84a6283b36632e2a5b56e121961f6542ab886bc9e12f8f9818b3c266bfbb"},
198 |     {file = "numpy-2.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:11c43995255eb4127115956495f43e9343736edb7fcdb0d973defd9de14cd84f"},
199 |     {file = "numpy-2.2.4-cp312-cp312-win32.whl", hash = "sha256:65ef3468b53269eb5fdb3a5c09508c032b793da03251d5f8722b1194f1790c00"},
200 |     {file = "numpy-2.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:2aad3c17ed2ff455b8eaafe06bcdae0062a1db77cb99f4b9cbb5f4ecb13c5146"},
201 |     {file = "numpy-2.2.4.tar.gz", hash = "sha256:9ba03692a45d3eef66559efe1d1096c4b9b75c0986b5dff5530c378fb8331d4f"},
202 | ]
203 | 
204 | [[package]]
205 | name = "packaging"
206 | version = "24.2"
207 | requires_python = ">=3.8"
208 | summary = "Core utilities for Python packages"
209 | groups = ["default"]
210 | files = [
211 |     {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
212 |     {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
213 | ]
214 | 
215 | [[package]]
216 | name = "pandas"
217 | version = "2.2.3"
218 | requires_python = ">=3.9"
219 | summary = "Powerful data structures for data analysis, time series, and statistics"
220 | groups = ["default"]
221 | dependencies = [
222 |     "numpy>=1.22.4; python_version < \"3.11\"",
223 |     "numpy>=1.23.2; python_version == \"3.11\"",
224 |     "numpy>=1.26.0; python_version >= \"3.12\"",
225 |     "python-dateutil>=2.8.2",
226 |     "pytz>=2020.1",
227 |     "tzdata>=2022.7",
228 | ]
229 | files = [
230 |     {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"},
231 |     {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"},
232 |     {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"},
233 |     {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"},
234 |     {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"},
235 |     {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"},
236 |     {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"},
237 |     {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"},
238 | ]
239 | 
240 | [[package]]
241 | name = "plotly"
242 | version = "6.0.1"
243 | requires_python = ">=3.8"
244 | summary = "An open-source interactive data visualization library for Python"
245 | groups = ["default"]
246 | dependencies = [
247 |     "narwhals>=1.15.1",
248 |     "packaging",
249 | ]
250 | files = [
251 |     {file = "plotly-6.0.1-py3-none-any.whl", hash = "sha256:4714db20fea57a435692c548a4eb4fae454f7daddf15f8d8ba7e1045681d7768"},
252 |     {file = "plotly-6.0.1.tar.gz", hash = "sha256:dd8400229872b6e3c964b099be699f8d00c489a974f2cfccfad5e8240873366b"},
253 | ]
254 | 
255 | [[package]]
256 | name = "python-dateutil"
257 | version = "2.9.0.post0"
258 | requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
259 | summary = "Extensions to the standard Python datetime module"
260 | groups = ["default"]
261 | dependencies = [
262 |     "six>=1.5",
263 | ]
264 | files = [
265 |     {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
266 |     {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
267 | ]
268 | 
269 | [[package]]
270 | name = "python-dotenv"
271 | version = "1.0.1"
272 | requires_python = ">=3.8"
273 | summary = "Read key-value pairs from a .env file and set them as environment variables"
274 | groups = ["default"]
275 | files = [
276 |     {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
277 |     {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
278 | ]
279 | 
280 | [[package]]
281 | name = "pytz"
282 | version = "2025.1"
283 | summary = "World timezone definitions, modern and historical"
284 | groups = ["default"]
285 | files = [
286 |     {file = "pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57"},
287 |     {file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"},
288 | ]
289 | 
290 | [[package]]
291 | name = "requests"
292 | version = "2.32.3"
293 | requires_python = ">=3.8"
294 | summary = "Python HTTP for Humans."
295 | groups = ["default"]
296 | dependencies = [
297 |     "certifi>=2017.4.17",
298 |     "charset-normalizer<4,>=2",
299 |     "idna<4,>=2.5",
300 |     "urllib3<3,>=1.21.1",
301 | ]
302 | files = [
303 |     {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
304 |     {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
305 | ]
306 | 
307 | [[package]]
308 | name = "ruff"
309 | version = "0.11.2"
310 | requires_python = ">=3.7"
311 | summary = "An extremely fast Python linter and code formatter, written in Rust."
312 | groups = ["dev"]
313 | files = [
314 |     {file = "ruff-0.11.2-py3-none-linux_armv6l.whl", hash = "sha256:c69e20ea49e973f3afec2c06376eb56045709f0212615c1adb0eda35e8a4e477"},
315 |     {file = "ruff-0.11.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:2c5424cc1c4eb1d8ecabe6d4f1b70470b4f24a0c0171356290b1953ad8f0e272"},
316 |     {file = "ruff-0.11.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:ecf20854cc73f42171eedb66f006a43d0a21bfb98a2523a809931cda569552d9"},
317 |     {file = "ruff-0.11.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c543bf65d5d27240321604cee0633a70c6c25c9a2f2492efa9f6d4b8e4199bb"},
318 |     {file = "ruff-0.11.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:20967168cc21195db5830b9224be0e964cc9c8ecf3b5a9e3ce19876e8d3a96e3"},
319 |     {file = "ruff-0.11.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:955a9ce63483999d9f0b8f0b4a3ad669e53484232853054cc8b9d51ab4c5de74"},
320 |     {file = "ruff-0.11.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:86b3a27c38b8fce73bcd262b0de32e9a6801b76d52cdb3ae4c914515f0cef608"},
321 |     {file = "ruff-0.11.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a3b66a03b248c9fcd9d64d445bafdf1589326bee6fc5c8e92d7562e58883e30f"},
322 |     {file = "ruff-0.11.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0397c2672db015be5aa3d4dac54c69aa012429097ff219392c018e21f5085147"},
323 |     {file = "ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:869bcf3f9abf6457fbe39b5a37333aa4eecc52a3b99c98827ccc371a8e5b6f1b"},
324 |     {file = "ruff-0.11.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2a2b50ca35457ba785cd8c93ebbe529467594087b527a08d487cf0ee7b3087e9"},
325 |     {file = "ruff-0.11.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:7c69c74bf53ddcfbc22e6eb2f31211df7f65054bfc1f72288fc71e5f82db3eab"},
326 |     {file = "ruff-0.11.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6e8fb75e14560f7cf53b15bbc55baf5ecbe373dd5f3aab96ff7aa7777edd7630"},
327 |     {file = "ruff-0.11.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:842a472d7b4d6f5924e9297aa38149e5dcb1e628773b70e6387ae2c97a63c58f"},
328 |     {file = "ruff-0.11.2-py3-none-win32.whl", hash = "sha256:aca01ccd0eb5eb7156b324cfaa088586f06a86d9e5314b0eb330cb48415097cc"},
329 |     {file = "ruff-0.11.2-py3-none-win_amd64.whl", hash = "sha256:3170150172a8f994136c0c66f494edf199a0bbea7a409f649e4bc8f4d7084080"},
330 |     {file = "ruff-0.11.2-py3-none-win_arm64.whl", hash = "sha256:52933095158ff328f4c77af3d74f0379e34fd52f175144cefc1b192e7ccd32b4"},
331 |     {file = "ruff-0.11.2.tar.gz", hash = "sha256:ec47591497d5a1050175bdf4e1a4e6272cddff7da88a2ad595e1e326041d8d94"},
332 | ]
333 | 
334 | [[package]]
335 | name = "six"
336 | version = "1.17.0"
337 | requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
338 | summary = "Python 2 and 3 compatibility utilities"
339 | groups = ["default"]
340 | files = [
341 |     {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
342 |     {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
343 | ]
344 | 
345 | [[package]]
346 | name = "tzdata"
347 | version = "2025.1"
348 | requires_python = ">=2"
349 | summary = "Provider of IANA time zone data"
350 | groups = ["default"]
351 | files = [
352 |     {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"},
353 |     {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"},
354 | ]
355 | 
356 | [[package]]
357 | name = "urllib3"
358 | version = "2.3.0"
359 | requires_python = ">=3.9"
360 | summary = "HTTP library with thread-safe connection pooling, file post, and more."
361 | groups = ["default"]
362 | files = [
363 |     {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"},
364 |     {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"},
365 | ]
366 | 
367 | [[package]]
368 | name = "werkzeug"
369 | version = "3.1.3"
370 | requires_python = ">=3.9"
371 | summary = "The comprehensive WSGI web application library."
372 | groups = ["default"]
373 | dependencies = [
374 |     "MarkupSafe>=2.1.1",
375 | ]
376 | files = [
377 |     {file = "werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e"},
378 |     {file = "werkzeug-3.1.3.tar.gz", hash = "sha256:60723ce945c19328679790e3282cc758aa4a6040e4bb330f53d30fa546d44746"},
379 | ]
380 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "collab-dev"
 3 | version = "0.1.0"
 4 | description = "Default template for PDM package"
 5 | authors = [
 6 |   { name = "Amna Anwar", email = "amna@pullflow.com" },
 7 |   { name = "Alissa Vuillier", email = "alissa@pullflow.com" },
 8 |   { name = "Zak Mandhro", email = "zak@pullflow.com" },
 9 | ]
10 | dependencies = [
11 |   "flask>=3.1.0",
12 |   "pandas>=2.2.3",
13 |   "numpy>=1.26.0",
14 |   "plotly>=5.18.0",
15 |   "dotenv>=0.9.9",
16 |   "requests>=2.31.0",
17 | ]
18 | requires-python = "==3.12.*"
19 | readme = "README.md"
20 | license = { text = "MIT" }
21 | 
22 | 
23 | [tool.pdm]
24 | distribution = false
25 | 
26 | [tool.pdm.scripts]
27 | serve = "python src/collab_dev/app.py"
28 | collect = "python src/collab_dev/collect.py"
29 | lint = "ruff check src/"
30 | format = "ruff format src/"
31 | lint-fix = "ruff check --fix src/"
32 | 
33 | [tool.pdm.dev-dependencies]
34 | dev = ["ruff>=0.11.2"]
35 | 
36 | [tool.ruff]
37 | line-length = 120
38 | target-version = "py312"
39 | 
40 | [tool.ruff.lint]
41 | select = ["E", "F", "I", "W", "B"]
42 | ignore = []
43 | 
44 | # Per-file ignores for visualization-related files with long template strings
45 | [tool.ruff.lint.per-file-ignores]
46 | "src/collab_dev/charts/pr_sankey/__init__.py" = ["E501"]
47 | "src/collab_dev/charts/review_coverage/__init__.py" = ["E501"]
48 | 
49 | [tool.ruff.format]
50 | quote-style = "double"
51 | indent-style = "space"
52 | line-ending = "auto"
53 | 


--------------------------------------------------------------------------------
/src/collab_dev/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pullflow/collab-dev/adcaa2efb3418c1a8aebb2ad98bf46b3a99aa9b2/src/collab_dev/__init__.py


--------------------------------------------------------------------------------
/src/collab_dev/app.py:
--------------------------------------------------------------------------------
 1 | """
 2 | collab.dev - Flask application for collaboration metrics
 3 | """
 4 | 
 5 | from components.charts.chart_renderer import render_charts
 6 | from fetcher.store import get_all_repositories
 7 | from flask import Flask, render_template
 8 | from loader.load import load
 9 | 
10 | app = Flask(__name__, template_folder=".", static_folder="./static")
11 | 
12 | 
13 | @app.route("/")
14 | def index():
15 |     """Return welcome message and API info"""
16 |     # Get list of all repositories
17 |     repositories = get_all_repositories()
18 | 
19 |     return render_template("templates/index.html", repositories=repositories)
20 | 
21 | 
22 | @app.route("/report/<path:repo_path>")
23 | def repository_report(repo_path):
24 |     """Show report for a specific repository"""
25 |     # Split the repo path into owner and name
26 |     parts = repo_path.split("/")
27 |     if len(parts) != 2:
28 |         return "Invalid repository path", 400
29 | 
30 |     owner, name = parts
31 |     df = load(owner, name)
32 |     charts = render_charts(df)
33 |     return render_template(
34 |         "templates/repository.html",
35 |         df=df,
36 |         repo=repo_path,
37 |         charts=charts,
38 |     )
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     app.run(host="127.0.0.1", port=8700, debug=True)
43 | 


--------------------------------------------------------------------------------
/src/collab_dev/collect.py:
--------------------------------------------------------------------------------
 1 | """
 2 | GitHub repository data collector for collab.dev
 3 | 
 4 | This module validates a GitHub repository URL provided as command line argument,
 5 | extracts the owner and repository name, and collects data from the repository.
 6 | """
 7 | 
 8 | import argparse
 9 | import re
10 | import sys
11 | from typing import Optional, Tuple
12 | 
13 | from fetcher.fetch import process_repository
14 | 
15 | 
16 | def parse_github_repo_url(url: str) -> Optional[Tuple[str, str]]:
17 |     """
18 |     Parse and validate a GitHub repository URL.
19 | 
20 |     Args:
21 |         url: A string representing a GitHub repository URL in one of these formats:
22 |              - owner/repo_name
23 |              - https://github.com/owner/repo_name
24 | 
25 |     Returns:
26 |         A tuple of (owner, repo_name) if valid, None otherwise
27 |     """
28 |     # Pattern for simple format: owner/repo_name
29 |     simple_pattern = r"^([a-zA-Z0-9_.-]+)/([a-zA-Z0-9_.-]+)$"
30 | 
31 |     # Pattern for https format: https://github.com/owner/repo_name
32 |     https_pattern = r"^https?://github\.com/([a-zA-Z0-9_.-]+)/([a-zA-Z0-9_.-]+)/?$"
33 | 
34 |     # Try to match each pattern
35 |     for pattern in [simple_pattern, https_pattern]:
36 |         match = re.match(pattern, url)
37 |         if match:
38 |             return match.group(1), match.group(2)
39 | 
40 |     return None
41 | 
42 | 
43 | def main():
44 |     """
45 |     Main function that validates GitHub repository URL from command line arguments
46 |     and collects data from the specified repository.
47 | 
48 |     Parses command line arguments to get the repository URL and the number of PRs to fetch,
49 |     validates the URL, and then processes the repository to collect and save data.
50 |     """
51 |     parser = argparse.ArgumentParser(description="Collect data from a GitHub repository")
52 |     parser.add_argument("repo_url", help="GitHub repository URL (owner/repo_name)")
53 |     parser.add_argument(
54 |         "-n",
55 |         "--num-prs",
56 |         type=int,
57 |         default=100,
58 |         help="Number of PRs to fetch (default: 100)",
59 |     )
60 | 
61 |     args = parser.parse_args()
62 | 
63 |     # Validate the repository URL
64 |     result = parse_github_repo_url(args.repo_url)
65 | 
66 |     if result:
67 |         owner, repo_name = result
68 |         print(f"Fetching data from GitHub repository: {owner}/{repo_name}")
69 |         try:
70 |             # Process the repository to fetch and save all data
71 |             result = process_repository(owner, repo_name, args.num_prs)
72 |             print(f"Successfully collected data from {owner}/{repo_name}")
73 |             print(f"Data saved to {result.get('path', 'output directory')}")
74 |             print(
75 |                 f"You can view the report by running `pdm serve` and navigating to http://127.0.0.1:5000/{owner}/{repo_name}"
76 |             )
77 |         except Exception as e:
78 |             print(f"Error fetching repository data: {e}")
79 |             sys.exit(1)
80 |     else:
81 |         print(f"Error: '{args.repo_url}' is not a valid GitHub repository URL")
82 |         print("Valid formats include: owner/repo_name, https://github.com/owner/repo_name")
83 |         sys.exit(1)
84 | 
85 | 
86 | if __name__ == "__main__":
87 |     main()
88 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Charts package initialization.
3 | """
4 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/approval_time/__init__.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import pandas as pd
  4 | import plotly
  5 | import plotly.graph_objects as go
  6 | from components.charts.utils import (
  7 |     apply_theme_to_figure,
  8 |     get_plotly_config,
  9 |     get_theme_colors,
 10 |     humanize_time,
 11 | )
 12 | from flask import render_template
 13 | 
 14 | from .data import get_approval_time_data
 15 | 
 16 | 
 17 | def create_approval_time_plot(size_stats) -> go.Figure:
 18 |     """Create visualization for approval time by PR size"""
 19 | 
 20 |     logging.debug("Processing size stats for approval time plot")
 21 |     logging.debug(f"Input size_stats:\n{size_stats}")
 22 | 
 23 |     # Define the desired order of size categories
 24 |     size_order = [
 25 |         "XS (<10 lines)",
 26 |         "S (10-99 lines)",
 27 |         "M (100-499 lines)",
 28 |         "L (500-999 lines)",
 29 |         "XL (1000+ lines)",
 30 |     ]
 31 | 
 32 |     # Sort the DataFrame by our custom order
 33 |     size_stats = size_stats.set_index("size_category").reindex(size_order).reset_index()
 34 | 
 35 |     # Extract data from size_stats DataFrame
 36 |     categories = size_stats["size_category"].tolist()
 37 | 
 38 |     # Fix: Replace NaN values with 0 in median_hours and pr_count
 39 |     median_hours = [0 if pd.isna(val) else val for val in size_stats["median_hours"].tolist()]
 40 |     pr_counts = [0 if pd.isna(val) else int(val) for val in size_stats["pr_count"].tolist()]
 41 | 
 42 |     # Create hover text with humanized times
 43 |     hover_text = [
 44 |         f"Median: {humanize_time(hours)}<br>Count: {count} PR{'s' if count != 1 else ''}"
 45 |         for hours, count in zip(median_hours, pr_counts, strict=False)
 46 |     ]
 47 | 
 48 |     logging.debug(f"Categories: {categories}")
 49 |     logging.debug(f"Median hours: {median_hours}")
 50 |     logging.debug(f"PR counts: {pr_counts}")
 51 | 
 52 |     # Calculate percentage of PRs in each category
 53 |     total_prs = sum(pr_counts)
 54 |     logging.debug(f"Total PRs: {total_prs}")
 55 | 
 56 |     # Create fraction text for each bar with simple dash
 57 |     bar_text = [f"{count}" for count in pr_counts]
 58 |     logging.debug(f"Bar text fractions: {bar_text}")
 59 | 
 60 |     # Calculate percentages based on PR counts
 61 |     percentages = [count / total_prs * 100 if total_prs > 0 else 0 for count in pr_counts]
 62 |     logging.debug(f"Calculated percentages: {percentages}")
 63 | 
 64 |     # Get theme colors
 65 |     colors = get_theme_colors(len(categories))
 66 | 
 67 |     # Create figure using plotly graph objects
 68 |     fig = go.Figure(
 69 |         data=[
 70 |             go.Bar(
 71 |                 x=categories,
 72 |                 y=median_hours,
 73 |                 text=bar_text,
 74 |                 textposition="outside",
 75 |                 marker_color=colors,
 76 |                 marker_line_width=0,  # Remove border lines from bars
 77 |                 hoverinfo="text",
 78 |                 hovertext=hover_text,
 79 |             )
 80 |         ]
 81 |     )
 82 | 
 83 |     # Update layout
 84 |     fig.update_layout(
 85 |         xaxis_title="PR Size",
 86 |         yaxis_title="Median Hours to Approval",
 87 |         showlegend=False,
 88 |         margin={"t": 40, "l": 50, "r": 50, "b": 50},
 89 |         height=400,
 90 |         paper_bgcolor="white",
 91 |         plot_bgcolor="white",
 92 |     )
 93 | 
 94 |     # Apply theme to the figure
 95 |     fig = apply_theme_to_figure(fig)
 96 | 
 97 |     return fig
 98 | 
 99 | 
100 | def render(repo_df):
101 |     """Render the approval time chart component"""
102 | 
103 |     try:
104 |         # Get approval time statistics
105 |         approval_data = get_approval_time_data(repo_df)
106 | 
107 |         if not approval_data:
108 |             return render_template("components/charts/approval_time/template.html", approval_data=None)
109 | 
110 |         # Create plot figure
111 |         fig = create_approval_time_plot(approval_data["size_stats"])
112 | 
113 |         # Get plotly config from theme
114 |         config = get_plotly_config()
115 | 
116 |         # Convert the figure to HTML
117 |         plot_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config)
118 | 
119 |         # Prepare data for template
120 |         template_data = {
121 |             "overall_median": approval_data["overall_median"],
122 |             "plot_html": plot_html,
123 |         }
124 | 
125 |         return render_template("components/charts/approval_time/template.html", approval_data=template_data)
126 | 
127 |     except Exception:
128 |         return render_template("components/charts/approval_time/template.html", approval_data=None)
129 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/approval_time/data.py:
--------------------------------------------------------------------------------
  1 | import logging  # Add this at the top
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | 
  7 | def calculate_pat(repo_df: pd.DataFrame) -> float:
  8 |     """Calculate overall median PAT"""
  9 |     try:
 10 |         if repo_df.empty:
 11 |             return None
 12 | 
 13 |         # Convert time to datetime
 14 |         repo_df["time"] = pd.to_datetime(repo_df["time"])
 15 | 
 16 |         # Get review request and approval times for each PR
 17 |         review_requests = repo_df[repo_df["event_type"] == "review_requested"].groupby("pr_number")["time"].first()
 18 |         approvals = repo_df[repo_df["event_type"] == "review_approved"].groupby("pr_number")["time"].first()
 19 | 
 20 |         # Match PRs that have both request and approval
 21 |         matched_prs = pd.DataFrame({"request_time": review_requests, "approval_time": approvals}).dropna()
 22 | 
 23 |         if matched_prs.empty:
 24 |             return None
 25 | 
 26 |         # Calculate time difference in hours
 27 |         matched_prs["approval_time_hours"] = (
 28 |             matched_prs["approval_time"] - matched_prs["request_time"]
 29 |         ).dt.total_seconds() / 3600
 30 | 
 31 |         # Return median time
 32 |         return matched_prs["approval_time_hours"].median()
 33 | 
 34 |     except Exception as e:
 35 |         logging.error(f"Error calculating PAT: {e}")
 36 |         return None
 37 | 
 38 | 
 39 | def get_pr_size_category(total_lines_changed: int) -> str:
 40 |     """
 41 |     Categorize PR size based on total lines changed
 42 | 
 43 |     These categories provide a more accurate representation of PR complexity:
 44 |     - XS: <10 lines (minimal changes, very quick to review)
 45 |     - S: 10-99 lines (small changes, quick to review)
 46 |     - M: 100-499 lines (moderate changes, reasonable review time)
 47 |     - L: 500-999 lines (large changes, significant review time)
 48 |     - XL: 1000+ lines (extensive changes, challenging to review effectively)
 49 |     """
 50 |     if total_lines_changed < 10:
 51 |         return "XS (<10 lines)"
 52 |     elif total_lines_changed < 100:
 53 |         return "S (10-99 lines)"
 54 |     elif total_lines_changed < 500:
 55 |         return "M (100-499 lines)"
 56 |     elif total_lines_changed < 1000:
 57 |         return "L (500-999 lines)"
 58 |     else:
 59 |         return "XL (1000+ lines)"
 60 | 
 61 | 
 62 | def calculate_total_lines_changed(repo_df: pd.DataFrame) -> pd.DataFrame:
 63 |     """
 64 |     Calculate total lines changed (added + deleted) for each PR
 65 |     """
 66 |     try:
 67 |         if repo_df.empty:
 68 |             return pd.DataFrame()
 69 | 
 70 |         # Group by PR number and calculate total lines changed
 71 |         pr_lines = (
 72 |             repo_df.groupby("pr_number")
 73 |             .agg(
 74 |                 {
 75 |                     "lines_added": "max",  # Take the max value as it should be consistent for a PR
 76 |                     "lines_deleted": "max",
 77 |                 }
 78 |             )
 79 |             .reset_index()
 80 |         )
 81 | 
 82 |         # Calculate total lines changed
 83 |         pr_lines["total_lines_changed"] = pr_lines["lines_added"] + pr_lines["lines_deleted"]
 84 | 
 85 |         return pr_lines[["pr_number", "total_lines_changed"]]
 86 | 
 87 |     except Exception:
 88 |         return pd.DataFrame()
 89 | 
 90 | 
 91 | def analyze_pr_size_distribution(repo_df: pd.DataFrame) -> dict:
 92 |     """
 93 |     Analyze the distribution of PR sizes based on line changes
 94 | 
 95 |     Returns a dictionary with:
 96 |     - percentiles: key percentiles of the distribution
 97 |     - histogram: counts of PRs in different line change ranges
 98 |     - category_counts: counts of PRs in each standardized category
 99 |     """
100 |     try:
101 |         if repo_df.empty:
102 |             return {"percentiles": {}, "histogram": {}, "category_counts": {}}
103 | 
104 |         # Calculate total lines changed for each PR
105 |         pr_lines = calculate_total_lines_changed(repo_df)
106 | 
107 |         if pr_lines.empty:
108 |             return {"percentiles": {}, "histogram": {}, "category_counts": {}}
109 | 
110 |         # Get the total lines changed values
111 |         total_lines = pr_lines["total_lines_changed"].dropna()
112 | 
113 |         if len(total_lines) == 0:
114 |             return {"percentiles": {}, "histogram": {}, "category_counts": {}}
115 | 
116 |         # Calculate percentiles
117 |         percentiles = {
118 |             "min": total_lines.min(),
119 |             "p10": total_lines.quantile(0.1),
120 |             "p25": total_lines.quantile(0.25),
121 |             "p50": total_lines.quantile(0.5),  # median
122 |             "p75": total_lines.quantile(0.75),
123 |             "p90": total_lines.quantile(0.9),
124 |             "p95": total_lines.quantile(0.95),
125 |             "p99": total_lines.quantile(0.99),
126 |             "max": total_lines.max(),
127 |         }
128 | 
129 |         # Create histogram with bins based on data range
130 |         bins = [0, 10, 100, 500]
131 |         if (total_lines >= 500).any():
132 |             bins.append(1000)
133 |         if (total_lines >= 1000).any():
134 |             bins.append(int(total_lines.max()) + 1)
135 | 
136 |         # Create histogram
137 |         hist_values, hist_bins = np.histogram(total_lines, bins=bins)
138 | 
139 |         histogram = {
140 |             f"{int(hist_bins[i])}-{int(hist_bins[i + 1])}": int(hist_values[i]) for i in range(len(hist_values))
141 |         }
142 | 
143 |         # Count PRs in each standardized category
144 |         category_counts = {
145 |             "XS (<10 lines)": len(total_lines[total_lines < 10]),
146 |             "S (10-99 lines)": len(total_lines[(total_lines >= 10) & (total_lines < 100)]),
147 |             "M (100-499 lines)": len(total_lines[(total_lines >= 100) & (total_lines < 500)]),
148 |             "L (500-999 lines)": len(total_lines[(total_lines >= 500) & (total_lines < 1000)]),
149 |             "XL (1000+ lines)": len(total_lines[total_lines >= 1000]),
150 |         }
151 | 
152 |         return {
153 |             "percentiles": {k: round(float(v), 1) for k, v in percentiles.items()},
154 |             "histogram": histogram,
155 |             "category_counts": category_counts,
156 |         }
157 | 
158 |     except Exception:
159 |         return {"percentiles": {}, "histogram": {}, "category_counts": {}}
160 | 
161 | 
162 | def calculate_pat_by_size(repo_df: pd.DataFrame) -> pd.DataFrame:
163 |     """Calculate PR Approval Time (PAT) broken down by PR size based on line changes"""
164 |     try:
165 |         if repo_df.empty:
166 |             return pd.DataFrame()
167 | 
168 |         # Convert time to datetime
169 |         repo_df["time"] = pd.to_datetime(repo_df["time"])
170 | 
171 |         # Calculate total lines changed for each PR
172 |         pr_lines = calculate_total_lines_changed(repo_df)
173 | 
174 |         if pr_lines.empty:
175 |             return pd.DataFrame()
176 | 
177 |         # Get review request and approval times for each PR
178 |         review_requests = repo_df[repo_df["event_type"] == "review_requested"].groupby("pr_number")["time"].first()
179 |         approvals = repo_df[repo_df["event_type"] == "review_approved"].groupby("pr_number")["time"].first()
180 | 
181 |         # Match PRs that have both request and approval
182 |         matched_prs = pd.DataFrame({"request_time": review_requests, "approval_time": approvals}).dropna()
183 | 
184 |         if matched_prs.empty:
185 |             return pd.DataFrame()
186 | 
187 |         # Add total lines changed information
188 |         matched_prs = matched_prs.reset_index().merge(pr_lines, on="pr_number", how="left").set_index("pr_number")
189 | 
190 |         # Calculate time difference in hours
191 |         matched_prs["approval_time_hours"] = (
192 |             matched_prs["approval_time"] - matched_prs["request_time"]
193 |         ).dt.total_seconds() / 3600
194 | 
195 |         # Add size category
196 |         matched_prs["size_category"] = matched_prs["total_lines_changed"].apply(get_pr_size_category)
197 | 
198 |         # Calculate stats by size category
199 |         size_stats = (
200 |             matched_prs.groupby("size_category")
201 |             .agg({"approval_time_hours": ["median", "mean", "count"], "total_lines_changed": "mean"})
202 |             .round(1)
203 |         )
204 | 
205 |         # Flatten column names
206 |         size_stats.columns = ["median_hours", "mean_hours", "pr_count", "avg_lines"]
207 | 
208 |         # Sort by size category in a logical order
209 |         size_order = {
210 |             "XS (<10 lines)": 0,
211 |             "S (10-99 lines)": 1,
212 |             "M (100-499 lines)": 2,
213 |             "L (500-999 lines)": 3,
214 |             "XL (1000+ lines)": 4,
215 |         }
216 | 
217 |         return size_stats.reset_index().sort_values(
218 |             by="size_category", key=lambda x: x.map(lambda cat: size_order.get(cat, 99))
219 |         )
220 | 
221 |     except Exception:
222 |         return pd.DataFrame()
223 | 
224 | 
225 | def get_approval_time_data(repo_df):
226 |     """Process raw data into approval time metrics"""
227 |     logging.debug("Starting get_approval_time_data processing...")
228 | 
229 |     if repo_df.empty:
230 |         logging.debug("Empty repository dataframe")
231 |         return None
232 | 
233 |     # Get overall median approval time
234 |     pat_hours = calculate_pat(repo_df)
235 |     logging.debug(f"Overall PAT hours: {pat_hours}")
236 | 
237 |     if pat_hours is None:
238 |         logging.debug("No PAT hours calculated")
239 |         return None
240 | 
241 |     # Calculate PR size stats
242 |     pr_lines = calculate_total_lines_changed(repo_df)
243 | 
244 |     if pr_lines.empty:
245 |         logging.debug("No PR lines data")
246 |         return None
247 | 
248 |     # Get review request and approval times for each PR
249 |     review_requests = repo_df[repo_df["event_type"] == "review_requested"].groupby("pr_number")["time"].first()
250 |     approvals = repo_df[repo_df["event_type"] == "review_approved"].groupby("pr_number")["time"].first()
251 | 
252 |     # Match PRs that have both request and approval
253 |     matched_prs = pd.DataFrame({"request_time": review_requests, "approval_time": approvals}).dropna()
254 | 
255 |     if matched_prs.empty:
256 |         logging.debug("No matched PRs with both request and approval")
257 |         return None
258 | 
259 |     # Add size information
260 |     matched_prs = matched_prs.reset_index().merge(pr_lines, on="pr_number", how="left")
261 | 
262 |     # Calculate approval time in hours
263 |     matched_prs["approval_time_hours"] = (
264 |         matched_prs["approval_time"] - matched_prs["request_time"]
265 |     ).dt.total_seconds() / 3600
266 | 
267 |     # Add size categories
268 |     matched_prs["size_category"] = matched_prs["total_lines_changed"].apply(get_pr_size_category)
269 | 
270 |     # Calculate stats by size category
271 |     size_stats = (
272 |         matched_prs.groupby("size_category")
273 |         .agg({"approval_time_hours": ["median", "count"], "total_lines_changed": "mean"})
274 |         .round(1)
275 |     )
276 | 
277 |     # Flatten column names
278 |     size_stats.columns = ["median_hours", "pr_count", "avg_lines"]
279 |     size_stats = size_stats.reset_index()
280 | 
281 |     return {"overall_median": pat_hours, "size_stats": size_stats}
282 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/approval_time/template.html:
--------------------------------------------------------------------------------
 1 | {% extends "components/charts/chart.html" %}
 2 | {% from "components/charts/metric.html" import metric %}
 3 | {% block title %}Request Approval Time{% endblock %}
 4 | {% block metrics %}
 5 | {% if approval_data %}
 6 | {{ metric(label="Overall Median Approval Time", value="%.1f"|format(approval_data.overall_median) ~ " hours",
 7 | tip="Median time between review request and approval") }}
 8 | {% endif %}
 9 | {% endblock %}
10 | {% block chart %}
11 | {% if approval_data %}
12 | <div id="approval-time-plot">
13 | 	{% if approval_data.plot_html %}
14 | 	{{ approval_data.plot_html | safe }}
15 | 	{% endif %}
16 | </div>
17 | {% else %}
18 | <p>No approval time data available because there are no reviews in the data.</p>
19 | {% endif %}
20 | {% endblock %}
21 | {% block caption %}
22 | Shows the median time between review request and approval for pull requests by size.
23 | Only pull requests with reviews are included.
24 | {% endblock %}


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/bot_analysis/__init__.py:
--------------------------------------------------------------------------------
 1 | import plotly
 2 | import plotly.graph_objects as go
 3 | from components.charts.utils import (
 4 |     apply_theme_to_figure,
 5 |     get_plotly_config,
 6 |     get_theme_colors,
 7 | )
 8 | from flask import render_template
 9 | 
10 | 
11 | def render(repo_df):
12 |     """Render the bot analysis visualization"""
13 |     from .data import analyze_bot_activity
14 | 
15 |     stats = analyze_bot_activity(repo_df)
16 |     if not stats:
17 |         return render_template("components/charts/bot_analysis/template.html")
18 | 
19 |     # Check if there's bot breakdown data to display
20 |     if not stats["bot_breakdown"]:
21 |         return render_template("components/charts/bot_analysis/template.html", stats=stats)
22 | 
23 |     # Get theme colors
24 |     colors = get_theme_colors(len(stats["bot_breakdown"]))
25 | 
26 |     # Create a Plotly figure
27 |     fig = go.Figure(
28 |         data=[
29 |             go.Bar(
30 |                 x=[item["pr_number"] for item in stats["bot_breakdown"]],
31 |                 y=[item["actor"] for item in stats["bot_breakdown"]],
32 |                 orientation="h",
33 |                 marker=dict(color=colors),
34 |                 customdata=[
35 |                     [pr_num, "PR" if pr_num == 1 else "PRs"]
36 |                     for pr_num in [item["pr_number"] for item in stats["bot_breakdown"]]
37 |                 ],
38 |                 hovertemplate="%{customdata[0]} %{customdata[1]}<extra></extra>",
39 |             )
40 |         ]
41 |     )
42 | 
43 |     # Update layout
44 |     fig.update_layout(
45 |         margin=dict(t=30, l=200, r=30, b=50),
46 |         height=max(300, len(stats["bot_breakdown"]) * 40),
47 |         xaxis=dict(title="Number of PRs"),
48 |         yaxis=dict(automargin=True, tickfont=dict(size=12)),
49 |     )
50 | 
51 |     # Apply theme to the figure
52 |     fig = apply_theme_to_figure(fig)
53 | 
54 |     # Get plotly config from theme
55 |     config = get_plotly_config()
56 | 
57 |     # Convert the figure to HTML
58 |     bot_breakdown_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config)
59 | 
60 |     return render_template(
61 |         "components/charts/bot_analysis/template.html",
62 |         stats=stats,
63 |         bot_breakdown_html=bot_breakdown_html,
64 |     )
65 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/bot_analysis/data.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def analyze_bot_activity(repo_df):
 5 |     """
 6 |     Analyze PR activity by bots vs humans.
 7 | 
 8 |     Args:
 9 |         repo_df: DataFrame containing repository data
10 | 
11 |     Returns:
12 |         dict: Statistics about bot vs human PR activity
13 |     """
14 |     if repo_df is None or repo_df.empty:
15 |         return None
16 | 
17 |     # Get unique PRs and their first events to determine PR type
18 |     pr_data = repo_df[repo_df["event_type"] == "pr_created"].drop_duplicates("pr_number")
19 | 
20 |     if pr_data.empty:
21 |         return None
22 | 
23 |     # Create a list of PR authors
24 |     pr_authors = []
25 |     for _, row in pr_data.iterrows():
26 |         author = row.get("actor", "")
27 |         if author:
28 |             pr_authors.append(
29 |                 {"actor": author, "pr_number": row.get("pr_number", 0), "is_bot": row.get("is_bot", False)}
30 |             )
31 | 
32 |     if not pr_authors:
33 |         return None
34 | 
35 |     # Convert to DataFrame
36 |     pr_df = pd.DataFrame(pr_authors)
37 | 
38 |     # Group by actor to count PRs
39 |     author_counts = pr_df.groupby(["actor", "is_bot"]).size().reset_index(name="pr_count")
40 | 
41 |     # Calculate statistics
42 |     total_prs = len(pr_authors)
43 |     bot_prs = pr_df[pr_df["is_bot"]].shape[0]
44 |     human_prs = pr_df[~pr_df["is_bot"]].shape[0]
45 | 
46 |     # Get bot breakdown
47 |     bot_breakdown = author_counts[author_counts["is_bot"]].sort_values("pr_count", ascending=False)
48 | 
49 |     # Rename column for consistency with the expected output
50 |     bot_breakdown = bot_breakdown.rename(columns={"pr_count": "pr_number"})
51 | 
52 |     return {
53 |         "total_prs": total_prs,
54 |         "bot_prs": bot_prs,
55 |         "human_prs": human_prs,
56 |         "bot_count": bot_prs,
57 |         "human_count": human_prs,
58 |         "bot_percentage": round((bot_prs / total_prs * 100) if total_prs > 0 else 0, 1),
59 |         "human_percentage": round((human_prs / total_prs * 100) if total_prs > 0 else 0, 1),
60 |         "bot_breakdown": bot_breakdown.to_dict("records"),
61 |     }
62 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/bot_analysis/template.html:
--------------------------------------------------------------------------------
 1 | {% extends "components/charts/chart.html" %}
 2 | {% from "components/charts/metric.html" import metric %}
 3 | {% block title %}Bot Contribution{% endblock %}
 4 | {% block metrics %}
 5 | {% if stats %}
 6 | {{ metric(label="Bot PRs", value=stats.bot_percentage ~ "%", tip="Percentage of PRs created by bots") }}
 7 | {{ metric(label="Human PRs", value=stats.human_percentage ~ "%", tip="Percentage of PRs created by humans") }}
 8 | {% endif %}
 9 | {% endblock %}
10 | {% block chart %}
11 | {% if stats %}
12 | {% if stats.bot_breakdown|length == 0 %}
13 | <p>No bot activity detected in this repository.</p>
14 | 
15 | {% elif stats.bot_breakdown|length == 1 %}
16 | <p>
17 | 	The bot PRs for this repo are all coming from
18 | 	<strong>{{ stats.bot_breakdown[0].actor }}</strong>.
19 | </p>
20 | 
21 | {% else %}
22 | <script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script>
23 | <div id="bot-breakdown-chart" class="chart">
24 | 	{% if bot_breakdown_html %} {{ bot_breakdown_html | safe }} {% endif %}
25 | </div>
26 | {% endif %}
27 | {% else %}
28 | <div class="no-data-message">No PR data available</div>
29 | {% endif %}
30 | {% endblock %}
31 | {% block caption %}
32 | Shows the proportion of pull requests created by bots, CI/CD, AI agents and other automations.
33 | {% endblock %}


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/chart.html:
--------------------------------------------------------------------------------
 1 | <div class="p-4 flex flex-col gap-4 items-center">
 2 |   <h2 class="h2 text-2xl font-bold mb-4">{% block title %}Chart Title{% endblock %}</h2>
 3 |   <div class="flex flex-row gap-12">
 4 |     {% block metrics %}
 5 |     {% endblock %}
 6 |   </div>
 7 |   <div class="flex w-full items-center justify-center">
 8 |     {% block chart %}
 9 |     [ chart goes here ]
10 |     {% endblock %}
11 |   </div>
12 |   <div class="text-sm text-gray-600 max-w-lg border-l-2 border-gray-200 pl-4">
13 |     {% block caption %}Description of chart{% endblock %}
14 |   </div>
15 | </div>


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/chart_renderer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Chart renderer module to execute all available charts against a data frame.
 3 | """
 4 | 
 5 | import sys
 6 | from typing import Any, Dict, List
 7 | 
 8 | import components.charts.approval_time
 9 | import components.charts.bot_analysis
10 | import components.charts.contribution
11 | import components.charts.merge_time
12 | import components.charts.review_coverage
13 | import components.charts.review_funnel
14 | import components.charts.review_turnaround
15 | import components.charts.workflow
16 | import pandas as pd
17 | 
18 | # Ordered list of chart modules
19 | CHART_MODULES = [
20 |     components.charts.workflow,
21 |     components.charts.contribution,
22 |     components.charts.bot_analysis,
23 |     components.charts.review_coverage,
24 |     components.charts.review_funnel,
25 |     components.charts.review_turnaround,
26 |     components.charts.approval_time,
27 |     components.charts.merge_time,
28 | ]
29 | 
30 | 
31 | def render_charts(data: pd.DataFrame) -> List[Dict[str, Any]]:
32 |     """
33 |     Render all available charts with the provided DataFrame. Have new charts?
34 |     Add them to the CHART_MODULES list and they'll be rendered automatically.
35 |     """
36 |     chart_renders = []
37 | 
38 |     # Iterate through the ordered list of chart modules and render them
39 |     for chart in CHART_MODULES:
40 |         try:
41 |             chart_renders.append(chart.render(data))
42 |         except Exception as e:
43 |             print(f"Error rendering chart {chart.__name__}: {e}", file=sys.stderr)
44 |             chart_renders.append(f"Failed to render {chart.__name__}. Error: {e}.")
45 | 
46 |     return chart_renders
47 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/contribution/__init__.py:
--------------------------------------------------------------------------------
  1 | import plotly
  2 | import plotly.graph_objects as go
  3 | from components.charts.utils import (
  4 |     apply_theme_to_figure,
  5 |     get_plotly_config,
  6 |     get_theme_colors,
  7 | )
  8 | from flask import render_template
  9 | from theme import CHART_DIMENSIONS
 10 | 
 11 | 
 12 | def create_contribution_plot(stats: dict) -> go.Figure:
 13 |     """Create visualization configuration for contribution donut chart"""
 14 | 
 15 |     # Get theme colors for the chart
 16 |     colors = get_theme_colors(3)
 17 | 
 18 |     fig = go.Figure(
 19 |         data=[
 20 |             go.Pie(
 21 |                 labels=["Core Team", "Bot", "Community"],
 22 |                 values=[
 23 |                     stats["core_percentage"],
 24 |                     stats["bot_percentage"],
 25 |                     stats["community_percentage"],
 26 |                 ],
 27 |                 hole=0.4,
 28 |                 marker_colors=colors,  # Use theme colors
 29 |                 domain={"x": [0.05, 0.95], "y": [0, 0.85]},  # Match other pie charts
 30 |                 textposition="inside",
 31 |                 textinfo="percent",
 32 |                 hovertemplate="%{label}: %{customdata}<extra></extra>",
 33 |                 customdata=[
 34 |                     f"{stats['core_prs']} {'PR' if stats['core_prs'] == 1 else 'PRs'}",
 35 |                     f"{stats['bot_prs']} {'PR' if stats['bot_prs'] == 1 else 'PRs'}",
 36 |                     f"{stats['community_prs']} {'PR' if stats['community_prs'] == 1 else 'PRs'}",
 37 |                 ],
 38 |                 insidetextorientation="auto",
 39 |             )
 40 |         ]
 41 |     )
 42 | 
 43 |     fig.update_layout(
 44 |         showlegend=True,
 45 |         legend=dict(
 46 |             orientation="h",
 47 |             yanchor="bottom",
 48 |             y=0.92,
 49 |             xanchor="center",
 50 |             x=0.5,
 51 |             bgcolor="rgba(255,255,255,0.8)",
 52 |         ),
 53 |         margin=dict(t=25, b=20, l=10, r=10),
 54 |         height=CHART_DIMENSIONS["pie_chart_height"],
 55 |         autosize=True,
 56 |         width=None,
 57 |         paper_bgcolor="rgba(0,0,0,0)",
 58 |         plot_bgcolor="rgba(0,0,0,0)",
 59 |     )
 60 | 
 61 |     # Apply theme to the figure
 62 |     fig = apply_theme_to_figure(fig)
 63 | 
 64 |     return fig
 65 | 
 66 | 
 67 | def render(repo_df):
 68 |     """
 69 |     Render the contribution chart component
 70 | 
 71 |     Args:
 72 |         repo_df: DataFrame containing repository data
 73 | 
 74 |     Returns:
 75 |         str: Rendered HTML for the contribution component
 76 |     """
 77 |     from .data import get_contribution_stats
 78 | 
 79 |     # Get the stats from data module
 80 |     stats = get_contribution_stats(repo_df)
 81 | 
 82 |     if not stats:
 83 |         return render_template("components/charts/contribution/template.html", contribution_data=None)
 84 | 
 85 |     # Create plot figure
 86 |     fig = create_contribution_plot(stats)
 87 | 
 88 |     # Get plotly config from theme
 89 |     config = get_plotly_config()
 90 | 
 91 |     # Convert the figure to HTML
 92 |     plot_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config)
 93 | 
 94 |     # Prepare data for template
 95 |     contribution_data = {
 96 |         "plot_html": plot_html,
 97 |         "stats": {
 98 |             "core_team": stats["core_percentage"],
 99 |             "bot": stats["bot_percentage"],
100 |             "community": stats["community_percentage"],
101 |         },
102 |         "counts": {
103 |             "total": stats["total_prs"],
104 |             "core": stats["core_prs"],
105 |             "bot": stats["bot_prs"],
106 |             "community": stats["community_prs"],
107 |         },
108 |     }
109 | 
110 |     # Pass the prepared data to the template
111 |     return render_template(
112 |         "components/charts/contribution/template.html",
113 |         contribution_data=contribution_data,
114 |     )
115 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/contribution/data.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def get_contribution_stats(repo_df: pd.DataFrame) -> dict:
 5 |     """
 6 |     Calculate contribution percentages and prepare statistics.
 7 |     TODO: This function is too slow. We may need to pre-calculate this data and store it in the database.
 8 |     """
 9 | 
10 |     if repo_df.empty:
11 |         return None
12 | 
13 |     # Get unique PRs and their first events to determine PR type
14 |     pr_data = repo_df[repo_df["event_type"] == "pr_created"].drop_duplicates("pr_number")
15 |     total_prs = len(pr_data)
16 | 
17 |     if total_prs == 0:
18 |         return None
19 | 
20 |     # Count PRs by type using the database columns
21 |     bot_prs = len(pr_data[pr_data["is_bot"]])
22 |     non_bot_data = pr_data[~pr_data["is_bot"]]
23 |     core_prs = len(non_bot_data[non_bot_data["is_core_team"]])
24 |     community_prs = len(non_bot_data[~non_bot_data["is_core_team"]])
25 | 
26 |     # Calculate all stats needed for display
27 |     stats = {
28 |         "total_prs": total_prs,
29 |         "core_prs": core_prs,
30 |         "community_prs": community_prs,
31 |         "bot_prs": bot_prs,
32 |         "core_percentage": round((core_prs / total_prs * 100), 1) if total_prs > 0 else 0,
33 |         "community_percentage": round((community_prs / total_prs * 100), 1) if total_prs > 0 else 0,
34 |         "bot_percentage": round((bot_prs / total_prs * 100), 1) if total_prs > 0 else 0,
35 |     }
36 | 
37 |     return stats
38 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/contribution/template.html:
--------------------------------------------------------------------------------
 1 | {% extends "components/charts/chart.html" %}
 2 | {% from "components/charts/metric.html" import metric %}
 3 | {% block title %}Contributor Distribution{% endblock %}
 4 | {% block metrics %}
 5 | {% if contribution_data and contribution_data.stats %}
 6 | {{ metric(label="Core Team PRs", value=contribution_data.stats.core_team|round(1) ~ "%", tip="Percentage of PRs created
 7 | by the core team") }}
 8 | {{ metric(label="Community PRs", value=contribution_data.stats.community|round(1) ~ "%", tip="Percentage of PRs created
 9 | by the community") }}
10 | {{ metric(label="Bot PRs", value=contribution_data.stats.bot|round(1) ~ "%", tip="Percentage of PRs created by bots") }}
11 | {% endif %}
12 | {% endblock %}
13 | {% block chart %}
14 | {% if contribution_data and contribution_data.stats %}
15 | <div id="contribution-plot">
16 | 	{% if contribution_data.plot_html %}
17 | 	{{ contribution_data.plot_html | safe }}
18 | 	{% endif %}
19 | </div>
20 | {% else %}
21 | <p>No contribution data available</p>
22 | {% endif %}
23 | {% endblock %}
24 | {% block caption %}
25 | Shows the distribution of pull requests by the author's role. Core team includes all admins, members, and collaborators.
26 | {% endblock %}


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/merge_time/__init__.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import plotly
  3 | import plotly.graph_objects as go
  4 | from components.charts.utils import (
  5 |     apply_theme_to_figure,
  6 |     get_plotly_config,
  7 |     get_theme_colors,
  8 |     humanize_time,
  9 | )
 10 | from flask import render_template
 11 | 
 12 | from .data import calculate_pmt
 13 | 
 14 | 
 15 | def create_pr_merge_time_chart(data):
 16 |     """Create PR Merge Time visualization"""
 17 | 
 18 |     # Handle both DataFrame and dictionary input
 19 |     repo_df = data.get("pr_data") if isinstance(data, dict) else data
 20 |     if repo_df is None:
 21 |         return None, None
 22 | 
 23 |     median_time, pr_times, percentile_values = calculate_pmt(repo_df)
 24 | 
 25 |     if median_time is None or pr_times is None:
 26 |         return None, None
 27 | 
 28 |     # Sort merge times for CDF
 29 |     sorted_times = np.sort(pr_times["merge_time"])
 30 |     cumulative_prob = np.arange(1, len(sorted_times) + 1) / len(sorted_times)
 31 | 
 32 |     # Calculate 95th percentile for x-axis limit
 33 |     percentile_95 = np.percentile(sorted_times, 95)
 34 | 
 35 |     # Create CDF plot
 36 |     # Get theme colors
 37 |     colors = get_theme_colors(5)
 38 | 
 39 |     fig = go.Figure()
 40 | 
 41 |     # Filter data points up to 95th percentile
 42 |     mask_95 = sorted_times <= percentile_95
 43 |     fig.add_trace(
 44 |         {
 45 |             "type": "scatter",
 46 |             "x": sorted_times[mask_95].tolist(),  # Convert numpy array to list
 47 |             "y": cumulative_prob[mask_95].tolist(),  # Convert numpy array to list
 48 |             "mode": "lines",
 49 |             "line": {"color": colors[0]},  # Use theme color
 50 |             "customdata": [[humanize_time(x)] for x in sorted_times[mask_95].tolist()],
 51 |             "hovertemplate": "%{y:.0%}: %{customdata[0]}<extra></extra>",
 52 |         }
 53 |     )
 54 | 
 55 |     # Add reference lines at key percentiles
 56 |     percentiles = [0.25, 0.5, 0.75]
 57 | 
 58 |     for p, val in zip(percentiles, percentile_values, strict=False):
 59 |         # Add vertical line
 60 |         fig.add_vline(x=val, line_dash="dash", line_color=colors[1], opacity=0.3)
 61 | 
 62 |         # Add annotation
 63 |         fig.add_annotation(
 64 |             x=val,
 65 |             y=p,
 66 |             text=f"{int(p * 100)}%: {val:.1f}h",
 67 |             showarrow=True,
 68 |             arrowhead=2,
 69 |             arrowsize=1,
 70 |             arrowwidth=1,
 71 |             arrowcolor=colors[1],
 72 |             font={"size": 12},
 73 |             bgcolor="white",
 74 |             bordercolor=colors[1],
 75 |             borderwidth=1,
 76 |             borderpad=4,
 77 |             ax=40,
 78 |             ay=0,
 79 |         )
 80 | 
 81 |     fig.update_layout(
 82 |         xaxis=dict(title="Merge Time (hours)", range=[0, percentile_95]),
 83 |         yaxis=dict(title="Cumulative Proportion of PRs", tickformat=",.0%", range=[0, 1.05]),
 84 |         showlegend=False,
 85 |         height=450,
 86 |         width=None,
 87 |         autosize=True,
 88 |         paper_bgcolor="white",
 89 |         plot_bgcolor="white",
 90 |         margin=dict(t=10, b=50, l=50, r=10),
 91 |     )
 92 | 
 93 |     # Apply theme to the figure
 94 |     fig = apply_theme_to_figure(fig)
 95 | 
 96 |     return fig, median_time
 97 | 
 98 | 
 99 | def render(data):
100 |     """Render the PR merge time chart component"""
101 | 
102 |     try:
103 |         # Create plot figure
104 |         fig, median_time = create_pr_merge_time_chart(data)
105 | 
106 |         if fig is None:
107 |             return render_template("components/charts/merge_time/template.html", pr_merge_data=None)
108 | 
109 |         # Get plotly config from theme
110 |         config = get_plotly_config()
111 | 
112 |         # Convert the figure to HTML
113 |         plot_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config)
114 | 
115 |         # Prepare data for template
116 |         pr_merge_data = {"median_time": median_time, "plot_html": plot_html}
117 | 
118 |         return render_template(
119 |             "components/charts/merge_time/template.html", pr_merge_data=pr_merge_data, humanize_time=humanize_time
120 |         )
121 | 
122 |     except Exception:
123 |         return render_template("components/charts/merge_time/template.html", pr_merge_data=None)
124 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/merge_time/data.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | 
 7 | def calculate_pmt(repo_df: pd.DataFrame) -> tuple:
 8 |     """
 9 |     Calculate PR Merge Time (PMT) metrics
10 | 
11 |     Args:
12 |         repo_df (pd.DataFrame): DataFrame containing PR events
13 | 
14 |     Returns:
15 |         tuple: (median_time, pr_times DataFrame, percentile_values)
16 |     """
17 |     try:
18 |         if not isinstance(repo_df, pd.DataFrame):
19 |             return None, None, None
20 | 
21 |         if repo_df.empty:
22 |             return None, None, None
23 | 
24 |         # Ensure 'time' is in datetime format
25 |         repo_df["time"] = pd.to_datetime(repo_df["time"])
26 | 
27 |         # Filter for PR creation and merge events
28 |         pr_created = repo_df[repo_df["event_type"] == "pr_created"][["pr_number", "time"]]
29 |         pr_merged = repo_df[repo_df["event_type"] == "pr_merged"][["pr_number", "time"]]
30 | 
31 |         # Merge the two DataFrames on 'pr_number'
32 |         pr_times = pd.merge(pr_created, pr_merged, on="pr_number", suffixes=("_created", "_merged"))
33 | 
34 |         if len(pr_times) == 0:
35 |             return None, None, None
36 | 
37 |         # Calculate the time difference in hours
38 |         pr_times["merge_time"] = (pr_times["time_merged"] - pr_times["time_created"]).dt.total_seconds() / 3600
39 | 
40 |         # Calculate metrics
41 |         median_time = pr_times["merge_time"].median()
42 |         percentile_values = np.percentile(pr_times["merge_time"], [25, 50, 75])
43 | 
44 |         return median_time, pr_times, percentile_values
45 | 
46 |     except Exception as e:
47 |         logging.error(f"Error calculating PMT: {e}")
48 |         return None, None, None
49 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/merge_time/template.html:
--------------------------------------------------------------------------------
 1 | {% extends "components/charts/chart.html" %}
 2 | {% from "components/charts/metric.html" import metric %}
 3 | {% block title %}PR Merge Time{% endblock %}
 4 | {% block metrics %}
 5 | {% if pr_merge_data %}
 6 | {{ metric(label="Overall Median Merge Time", value=humanize_time(pr_merge_data.median_time|default(0)), tip="Median time
 7 | from PR creation to merge") }}
 8 | {% endif %}
 9 | {% endblock %}
10 | {% block chart %}
11 | {% if pr_merge_data %}
12 | <div id="pr-merge-time-plot" class="mt-4 mb-2">
13 | 	{% if pr_merge_data.plot_html %}
14 | 	{{ pr_merge_data.plot_html | safe }}
15 | 	{% endif %}
16 | </div>
17 | {% else %}
18 | <p>No PR merge time data available</p>
19 | {% endif %}
20 | {% endblock %}
21 | {% block caption %}
22 | Shows the distribution of time taken for pull requests to merge from creation.
23 | {% endblock %}


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/metric.html:
--------------------------------------------------------------------------------
 1 | {% macro metric(label, value, tip=None) %}
 2 | <div class="flex flex-col gap-2 items-center justify-center">
 3 |   <div>
 4 |     <span class="text-gray-500 text-lg">{{ label }}</span>
 5 |     {% if tip %}
 6 |     <button type="button"
 7 |       class="inline-flex items-center justify-center w-5 h-5 text-xs font-bold text-gray-500 bg-gray-100 rounded-full hover:bg-gray-200 focus:outline-none peer">?</button>
 8 |     <div
 9 |       class="text-sm text-gray-500 absolute invisible peer-hover:visible peer-focus:visible mt-2 z-10 bg-white rounded-lg shadow-lg p-2 max-w-xs border border-gray-200 transform -translate-x-1/2 left-1/2">
10 |       {{ tip }}
11 |     </div>
12 |     {% endif %}
13 |   </div>
14 |   <div class="text-2xl font-bold">
15 |     {{ value }}
16 |   </div>
17 | </div>
18 | {% endmacro %}


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/review_coverage/__init__.py:
--------------------------------------------------------------------------------
  1 | import plotly
  2 | import plotly.graph_objects as go
  3 | from components.charts.utils import (
  4 |     apply_theme_to_figure,
  5 |     get_plotly_config,
  6 |     get_theme_colors,
  7 | )
  8 | from flask import render_template
  9 | 
 10 | from .data import get_review_merge_data
 11 | 
 12 | 
 13 | def create_coverage_donut_plot(coverage_data: dict) -> dict:
 14 |     """Create donut chart visualization for review coverage"""
 15 | 
 16 |     # Calculate values
 17 |     reviewed = coverage_data["reviewed_prs"]
 18 |     unreviewed = coverage_data["unreviewed_prs"]
 19 | 
 20 |     # Get theme colors for the chart
 21 |     colors = get_theme_colors(2)
 22 | 
 23 |     # Create figure
 24 |     fig = go.Figure(
 25 |         data=[
 26 |             go.Pie(
 27 |                 values=[reviewed, unreviewed],
 28 |                 labels=["Merged With Review", "Merged Without Review"],
 29 |                 hole=0.4,
 30 |                 textinfo="percent",
 31 |                 marker_colors=colors,  # Use theme colors
 32 |                 domain={
 33 |                     "x": [0.15, 0.85],
 34 |                     "y": [0.05, 0.85],
 35 |                 },  # Balanced whitespace around the chart
 36 |                 textposition="inside",
 37 |                 hovertemplate="%{label}: %{customdata}<extra></extra>",
 38 |                 customdata=[
 39 |                     f"{reviewed} PR{'s' if reviewed != 1 else ''}",
 40 |                     f"{unreviewed} PR{'s' if unreviewed != 1 else ''}",
 41 |                 ],
 42 |                 insidetextorientation="auto",
 43 |             )
 44 |         ]
 45 |     )
 46 | 
 47 |     # Update layout
 48 |     fig.update_layout(
 49 |         showlegend=True,
 50 |         height=350,  # Reduce height to remove extra whitespace
 51 |         margin=dict(t=10, b=10, l=10, r=10),  # Balanced margins around the chart
 52 |         legend=dict(
 53 |             orientation="h",
 54 |             yanchor="top",
 55 |             y=1.0,  # Position at the top of the chart
 56 |             xanchor="center",  # Center the legend
 57 |             x=0.5,  # Center position
 58 |             bgcolor="rgba(255,255,255,0.8)",
 59 |         ),
 60 |         autosize=True,
 61 |         width=None,
 62 |         paper_bgcolor="rgba(0,0,0,0)",
 63 |         plot_bgcolor="rgba(0,0,0,0)",
 64 |     )
 65 | 
 66 |     # Apply theme to the figure
 67 |     fig = apply_theme_to_figure(fig)
 68 | 
 69 |     return fig
 70 | 
 71 | 
 72 | def render(repo_df):
 73 |     """Render the review coverage chart component"""
 74 | 
 75 |     try:
 76 |         # Get coverage data
 77 |         coverage_data = get_review_merge_data(repo_df)
 78 | 
 79 |         if not coverage_data:
 80 |             return render_template("components/charts/review_coverage/template.html", coverage_data=None)
 81 | 
 82 |         # Create plot figure
 83 |         fig = create_coverage_donut_plot(coverage_data)
 84 | 
 85 |         # Get plotly config from theme
 86 |         config = get_plotly_config()
 87 | 
 88 |         # Convert the figure to HTML
 89 |         plot_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config)
 90 | 
 91 |         # Add plot to template data
 92 |         coverage_data["plot_html"] = plot_html
 93 | 
 94 |         return render_template(
 95 |             "components/charts/review_coverage/template.html",
 96 |             coverage_data=coverage_data,
 97 |         )
 98 | 
 99 |     except Exception:
100 |         return render_template("components/charts/review_coverage/template.html", coverage_data=None)
101 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/review_coverage/data.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def calculate_review_ratio_stats(repo_df: pd.DataFrame) -> dict:
 5 |     """Calculate PR review ratio statistics"""
 6 |     try:
 7 |         if repo_df.empty:
 8 |             return None
 9 | 
10 |         # Group by PR number to get unique PRs and their events
11 |         pr_summary = (
12 |             repo_df.groupby("pr_number")
13 |             .agg(
14 |                 {
15 |                     "event_type": list,
16 |                     "time": "first",  # Keep first timestamp for reference
17 |                 }
18 |             )
19 |             .reset_index()
20 |         )
21 | 
22 |         # Count total PRs
23 |         total_prs = len(pr_summary)
24 | 
25 |         # Count PRs that received a review (any type of review action)
26 |         reviewed_prs = sum(
27 |             any(event in events for event in ["review_commented", "review_approved", "review_changes_requested"])
28 |             for events in pr_summary["event_type"]
29 |         )
30 | 
31 |         # Calculate unreviewed PRs
32 |         unreviewed_prs = total_prs - reviewed_prs
33 | 
34 |         # Calculate review percentage
35 |         review_percentage = (reviewed_prs / total_prs * 100) if total_prs > 0 else 0
36 | 
37 |         return {
38 |             "total_prs": total_prs,
39 |             "reviewed_prs": reviewed_prs,
40 |             "unreviewed_prs": unreviewed_prs,
41 |             "review_percentage": review_percentage,
42 |         }
43 | 
44 |     except Exception:
45 |         return None
46 | 
47 | 
48 | def get_review_merge_data(repo_df: pd.DataFrame) -> dict:
49 |     """Process raw data into review merge metrics"""
50 | 
51 |     stats = calculate_review_ratio_stats(repo_df)
52 |     if not stats:
53 |         return None
54 | 
55 |     return stats
56 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/review_coverage/template.html:
--------------------------------------------------------------------------------
 1 | {% extends "components/charts/chart.html" %}
 2 | {% from "components/charts/metric.html" import metric %}
 3 | {% block title %}Review Coverage{% endblock %}
 4 | {% block metrics %}
 5 | {% if coverage_data %}
 6 | {{ metric(label="Review-Merge Rate", value="%.1f"|format(coverage_data.review_percentage) ~ "%", tip="Percentage of
 7 | merged PRs that received reviews") }}
 8 | {% endif %}
 9 | {% endblock %}
10 | {% block chart %}
11 | {% if coverage_data %}
12 | {% if coverage_data.plot_html %}
13 | {{ coverage_data.plot_html | safe }}
14 | {% endif %}
15 | {% else %}
16 | <p>No review coverage data available</p>
17 | {% endif %}
18 | {% endblock %}
19 | {% block caption %}
20 | Shows the percentage of pull requests that received reviews before being merged.
21 | {% endblock %}


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/review_funnel/__init__.py:
--------------------------------------------------------------------------------
  1 | import plotly
  2 | import plotly.graph_objects as go
  3 | from components.charts.utils import (
  4 |     apply_theme_to_figure,
  5 |     get_plotly_config,
  6 |     get_theme_colors,
  7 | )
  8 | from flask import render_template
  9 | 
 10 | from .data import get_review_funnel_data
 11 | 
 12 | 
 13 | def create_review_funnel_plot(funnel_data: dict) -> go.Figure:
 14 |     """Create visualization for review funnel"""
 15 | 
 16 |     # Prepare data for funnel chart with counts in the labels
 17 |     values = [funnel_data["total_prs"], funnel_data["reviewed_prs"], funnel_data["approved_prs"]]
 18 | 
 19 |     # Calculate relative percentages (each step relative to previous)
 20 |     total = values[0]
 21 |     reviewed = values[1]
 22 |     approved = values[2]
 23 | 
 24 |     # Create stage labels without counts
 25 |     stages = ["Total PRs", "Reviewed", "Approved"]
 26 | 
 27 |     # Format text to show just the count
 28 |     text = [str(values[0]), str(values[1]), str(values[2])]
 29 | 
 30 |     # Create hover text with percentages
 31 |     hover_text = [
 32 |         f"{values[0]} PR{'s' if values[0] != 1 else ''} Total",
 33 |         f"{values[1]} PR{'s' if values[1] != 1 else ''} Reviewed ({reviewed / total * 100:.0f}%)",
 34 |         f"{values[2]} PR{'s' if values[2] != 1 else ''} Approved ({approved / reviewed * 100:.0f}%)",
 35 |     ]
 36 | 
 37 |     # Get theme colors
 38 |     colors = get_theme_colors(3)
 39 | 
 40 |     # Create figure
 41 |     fig = go.Figure(
 42 |         data=[
 43 |             go.Funnel(
 44 |                 y=stages,
 45 |                 x=values,
 46 |                 textinfo="text",
 47 |                 textposition="auto",  # Automatically place text inside or outside based on space
 48 |                 text=text,
 49 |                 hovertemplate="%{customdata}<extra></extra>",
 50 |                 customdata=hover_text,
 51 |                 marker={
 52 |                     "color": colors,
 53 |                     "line": {"width": 0},  # Remove the line around funnel segments
 54 |                 },
 55 |                 connector={"line": {"color": "rgba(0,0,0,0)", "width": 0}},  # Make connector lines invisible
 56 |                 textfont={"size": 14},  # Match font size
 57 |             )
 58 |         ]
 59 |     )
 60 | 
 61 |     # Update layout
 62 |     fig.update_layout(
 63 |         showlegend=False,
 64 |         margin={"t": 20, "l": 150, "r": 100, "b": 20},
 65 |         height=300,
 66 |         font={"size": 14},
 67 |         # Hide all axis lines, ticks, and grid lines
 68 |         xaxis={"showgrid": False, "zeroline": False, "showline": False, "showticklabels": False, "ticks": ""},
 69 |         yaxis={
 70 |             "showgrid": False,
 71 |             "zeroline": False,
 72 |             "showline": False,
 73 |             "ticks": "",
 74 |             "tickmode": "array",
 75 |             "ticktext": stages,  # Use the HTML formatted labels
 76 |             "tickfont": {"size": 14},  # Match the font size
 77 |         },
 78 |     )
 79 | 
 80 |     # Apply theme to the figure
 81 |     fig = apply_theme_to_figure(fig)
 82 | 
 83 |     return fig
 84 | 
 85 | 
 86 | def render(repo_df):
 87 |     """Render the review funnel chart component"""
 88 | 
 89 |     try:
 90 |         # Get funnel data
 91 |         funnel_data = get_review_funnel_data(repo_df)
 92 | 
 93 |         if not funnel_data:
 94 |             return render_template("components/charts/review_funnel/template.html", review_data=None)
 95 | 
 96 |         # Calculate rates
 97 |         total_prs = funnel_data["total_prs"]
 98 |         reviewed_prs = funnel_data["reviewed_prs"]
 99 |         approved_prs = funnel_data["approved_prs"]
100 | 
101 |         review_rate = (reviewed_prs / total_prs * 100) if total_prs > 0 else 0
102 |         approval_rate = (approved_prs / reviewed_prs * 100) if reviewed_prs > 0 else 0
103 | 
104 |         # Create plot figure
105 |         fig = create_review_funnel_plot(funnel_data)
106 | 
107 |         # Get plotly config from theme
108 |         config = get_plotly_config()
109 | 
110 |         # Convert the figure to HTML
111 |         plot_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config)
112 | 
113 |         # Prepare data for template
114 |         template_data = {
115 |             "total_prs": total_prs,
116 |             "reviewed_prs": reviewed_prs,
117 |             "approved_prs": approved_prs,
118 |             "review_rate": review_rate,
119 |             "approval_rate": approval_rate,
120 |             "plot_html": plot_html,
121 |         }
122 | 
123 |         return render_template("components/charts/review_funnel/template.html", review_data=template_data)
124 | 
125 |     except Exception:
126 |         return render_template("components/charts/review_funnel/template.html", review_data=None)
127 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/review_funnel/data.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import pandas as pd
  4 | 
  5 | 
  6 | def get_pr_review_stats(pr_summary: pd.DataFrame) -> dict:
  7 |     """
  8 |     Calculate review flow statistics from PR summary DataFrame
  9 | 
 10 |     Args:
 11 |         pr_summary (pd.DataFrame): DataFrame from load_repository_prs
 12 | 
 13 |     Returns:
 14 |         dict: Review statistics including counts of different review states
 15 |     """
 16 |     # Group by PR number to get unique PRs and their events
 17 |     pr_events = (
 18 |         pr_summary.groupby("pr_number")
 19 |         .agg(
 20 |             {
 21 |                 "event_type": list,
 22 |                 "time": "first",  # Keep first timestamp for reference
 23 |             }
 24 |         )
 25 |         .reset_index()
 26 |     )
 27 | 
 28 |     total_prs = len(pr_events)
 29 | 
 30 |     # Count different review states
 31 |     review_requested = sum("review_requested" in events for events in pr_events["event_type"])
 32 |     review_completed = sum(
 33 |         any(event in events for event in ["review_commented", "review_changes_requested", "review_approved"])
 34 |         and "review_requested" in events
 35 |         for events in pr_events["event_type"]
 36 |     )
 37 |     review_approved = sum(
 38 |         "review_approved" in events and "review_requested" in events for events in pr_events["event_type"]
 39 |     )
 40 |     approved_without_request = sum(
 41 |         "review_approved" in events and "review_requested" not in events for events in pr_events["event_type"]
 42 |     )
 43 |     merged_without_review = sum(
 44 |         not any(event in events for event in ["review_approved", "review_commented", "review_changes_requested"])
 45 |         for events in pr_events["event_type"]
 46 |     )
 47 | 
 48 |     return {
 49 |         "total_prs": total_prs,
 50 |         "review_requested": review_requested,
 51 |         "review_completed": review_completed,
 52 |         "review_approved": review_approved,
 53 |         "approved_without_review_request": approved_without_request,
 54 |         "merged_without_review": merged_without_review,
 55 |     }
 56 | 
 57 | 
 58 | def analyze_pr_review_flow(repo_df: pd.DataFrame) -> dict:
 59 |     """Analyze PR review flow metrics for a repository"""
 60 | 
 61 |     if repo_df.empty:
 62 |         return None
 63 | 
 64 |     return get_pr_review_stats(repo_df)
 65 | 
 66 | 
 67 | def get_simplified_pr_flow_stats(pr_summary: pd.DataFrame) -> dict:
 68 |     """
 69 |     Calculate simplified PR flow statistics with just created, reviewed, and approved stages
 70 | 
 71 |     Args:
 72 |         pr_summary (pd.DataFrame): DataFrame from load_repository_prs
 73 | 
 74 |     Returns:
 75 |         dict: Review statistics with basic flow stages
 76 |     """
 77 |     total_prs = len(pr_summary)
 78 | 
 79 |     # Count PRs that received any type of review
 80 |     reviewed_prs = sum(
 81 |         any(event in events for event in ["review_commented", "review_changes_requested", "review_approved"])
 82 |         for events in pr_summary["event_type"]
 83 |     )
 84 | 
 85 |     # Count PRs that were approved
 86 |     approved_prs = sum("review_approved" in events for events in pr_summary["event_type"])
 87 | 
 88 |     return {"total_prs": total_prs, "reviewed_prs": reviewed_prs, "approved_prs": approved_prs}
 89 | 
 90 | 
 91 | def analyze_simplified_pr_flow(repo_df: pd.DataFrame) -> dict:
 92 |     """Analyze simplified PR flow metrics for a repository"""
 93 | 
 94 |     if repo_df.empty:
 95 |         return None
 96 | 
 97 |     # Group by PR number to get unique PRs and their events
 98 |     pr_events = (
 99 |         repo_df.groupby("pr_number")
100 |         .agg(
101 |             {
102 |                 "event_type": list,
103 |                 "time": "first",  # Keep first timestamp for reference
104 |             }
105 |         )
106 |         .reset_index()
107 |     )
108 | 
109 |     return get_simplified_pr_flow_stats(pr_events)
110 | 
111 | 
112 | def get_review_funnel_data(repo_df: pd.DataFrame) -> dict:
113 |     """Process raw data into review funnel metrics"""
114 | 
115 |     if repo_df.empty:
116 |         logging.debug("Empty repository dataframe")
117 |         return None
118 | 
119 |     # Group by PR number to get unique PRs and their events
120 |     pr_events = repo_df.groupby("pr_number").agg({"event_type": list, "time": "first"}).reset_index()
121 | 
122 |     logging.debug(f"PR events shape: {pr_events.shape}")
123 | 
124 |     total_prs = len(pr_events)
125 | 
126 |     # Count PRs that received any type of review
127 |     reviewed_prs = sum(
128 |         any(event in events for event in ["review_commented", "review_changes_requested", "review_approved"])
129 |         for events in pr_events["event_type"]
130 |     )
131 | 
132 |     # Count PRs that were approved
133 |     approved_prs = sum("review_approved" in events for events in pr_events["event_type"])
134 | 
135 |     logging.debug(f"Total PRs: {total_prs}, Reviewed: {reviewed_prs}, Approved: {approved_prs}")
136 | 
137 |     if total_prs == 0:
138 |         logging.debug("No PRs found")
139 |         return None
140 | 
141 |     return {"total_prs": total_prs, "reviewed_prs": reviewed_prs, "approved_prs": approved_prs}
142 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/review_funnel/template.html:
--------------------------------------------------------------------------------
 1 | {% extends "components/charts/chart.html" %}
 2 | {% from "components/charts/metric.html" import metric %}
 3 | {% block title %}Review Funnel{% endblock %}
 4 | {% block metrics %}
 5 | {% if review_data %}
 6 | {{ metric(label="Review Rate", value="%.1f"|format(review_data.review_rate) ~ "%", tip="Percentage of PRs that received
 7 | reviews") }}
 8 | {{ metric(label="Approval Rate", value="%.1f"|format(review_data.approval_rate) ~ "%", tip="Percentage of reviewed PRs
 9 | that were approved") }}
10 | {% endif %}
11 | {% endblock %}
12 | {% block chart %}
13 | {% if review_data %}
14 | <div id="review-funnel-plot" class="mt-4">
15 | 	{% if review_data.plot_html %}
16 | 	{{ review_data.plot_html | safe }}
17 | 	{% endif %}
18 | </div>
19 | {% else %}
20 | <p>No review process data available</p>
21 | {% endif %}
22 | {% endblock %}
23 | {% block caption %}
24 | Shows the progression of pull requests from creation through review to approval.
25 | {% endblock %}


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/review_turnaround/__init__.py:
--------------------------------------------------------------------------------
  1 | import plotly
  2 | import plotly.graph_objects as go
  3 | from components.charts.utils import (
  4 |     apply_theme_to_figure,
  5 |     get_plotly_config,
  6 |     get_theme_colors,
  7 |     humanize_time,
  8 | )
  9 | from flask import render_template
 10 | 
 11 | from .data import get_review_turnaround_data
 12 | 
 13 | 
 14 | def create_turnaround_distribution_plot(turnaround_data: dict) -> go.Figure:
 15 |     """Create visualization for review turnaround distribution"""
 16 | 
 17 |     # Calculate percentages for each segment
 18 |     within_1h = turnaround_data["within_1h"]
 19 |     within_4h = turnaround_data["within_4h"] - turnaround_data["within_1h"]
 20 |     within_24h = turnaround_data["within_24h"] - turnaround_data["within_4h"]
 21 |     over_24h = 100 - turnaround_data["within_24h"]
 22 | 
 23 |     # Calculate counts for hover text
 24 |     total_prs = turnaround_data["total_prs"]
 25 |     within_1h_count = int(within_1h * total_prs / 100)
 26 |     within_4h_count = int((turnaround_data["within_4h"] - turnaround_data["within_1h"]) * total_prs / 100)
 27 |     within_24h_count = int((turnaround_data["within_24h"] - turnaround_data["within_4h"]) * total_prs / 100)
 28 |     over_24h_count = total_prs - within_1h_count - within_4h_count - within_24h_count
 29 | 
 30 |     # Get theme colors for the chart
 31 |     colors = get_theme_colors(4)
 32 | 
 33 |     # Create figure
 34 |     fig = go.Figure()
 35 | 
 36 |     # Add each segment in the order they should appear in the chart
 37 |     fig.add_trace(
 38 |         go.Bar(
 39 |             y=[""],
 40 |             x=[within_1h],
 41 |             name="Within 1 hour",
 42 |             orientation="h",
 43 |             marker=dict(color=colors[0], line=dict(width=0)),
 44 |             hoverinfo="text",
 45 |             hovertext=[f"Within 1 hour: {within_1h_count} {'PR' if within_1h_count == 1 else 'PRs'}"],
 46 |             text=[f"{within_1h:.1f}%"],
 47 |             textposition="auto",
 48 |             insidetextanchor="middle",
 49 |         )
 50 |     )
 51 | 
 52 |     fig.add_trace(
 53 |         go.Bar(
 54 |             y=[""],
 55 |             x=[within_4h],
 56 |             name="Within 4 hours",
 57 |             orientation="h",
 58 |             marker=dict(color=colors[1], line=dict(width=0)),
 59 |             hoverinfo="text",
 60 |             hovertext=[f"Within 4 hours: {within_4h_count} {'PR' if within_4h_count == 1 else 'PRs'}"],
 61 |             text=[f"{within_4h:.1f}%"],
 62 |             textposition="auto",
 63 |             insidetextanchor="middle",
 64 |         )
 65 |     )
 66 | 
 67 |     fig.add_trace(
 68 |         go.Bar(
 69 |             y=[""],
 70 |             x=[within_24h],
 71 |             name="Within 24 hours",
 72 |             orientation="h",
 73 |             marker=dict(color=colors[2], line=dict(width=0)),
 74 |             hoverinfo="text",
 75 |             hovertext=[f"Within 24 hours: {within_24h_count} {'PR' if within_24h_count == 1 else 'PRs'}"],
 76 |             text=[f"{within_24h:.1f}%"],
 77 |             textposition="auto",
 78 |             insidetextanchor="middle",
 79 |         )
 80 |     )
 81 | 
 82 |     fig.add_trace(
 83 |         go.Bar(
 84 |             y=[""],
 85 |             x=[over_24h],
 86 |             name="Over 24 hours",
 87 |             orientation="h",
 88 |             marker=dict(color=colors[3], line=dict(width=0)),
 89 |             hoverinfo="text",
 90 |             hovertext=[f"Over 24 hours: {over_24h_count} {'PR' if over_24h_count == 1 else 'PRs'}"],
 91 |             text=[f"{over_24h:.1f}%"],
 92 |             textposition="auto",
 93 |             insidetextanchor="middle",
 94 |         )
 95 |     )
 96 | 
 97 |     # Add x-axis ticks
 98 |     tick_vals = [0, 20, 40, 60, 80, 100]
 99 | 
100 |     # Update layout
101 |     fig.update_layout(
102 |         barmode="stack",
103 |         showlegend=True,
104 |         legend=dict(
105 |             orientation="h",
106 |             yanchor="bottom",
107 |             y=1.1,
108 |             xanchor="center",
109 |             x=0.5,
110 |             traceorder="normal",
111 |             font=dict(size=10),
112 |         ),
113 |         margin=dict(t=30, l=0, r=0, b=20),
114 |         height=150,
115 |         uniformtext=dict(mode="hide", minsize=10),
116 |         xaxis=dict(
117 |             range=[0, 100],
118 |             showgrid=True,
119 |             tickvals=tick_vals,
120 |             zeroline=False,
121 |             fixedrange=True,
122 |         ),
123 |         yaxis=dict(showticklabels=False, showgrid=False, fixedrange=True),
124 |         plot_bgcolor="white",
125 |         paper_bgcolor="white",
126 |     )
127 | 
128 |     # Apply theme to figure
129 |     fig = apply_theme_to_figure(fig)
130 | 
131 |     return fig
132 | 
133 | 
134 | def render(repo_df):
135 |     """Render the review turnaround chart component"""
136 | 
137 |     try:
138 |         # Get turnaround data
139 |         turnaround_data = get_review_turnaround_data(repo_df)
140 | 
141 |         if not turnaround_data:
142 |             return render_template(
143 |                 "components/charts/review_turnaround/template.html",
144 |                 turnaround_data=None,
145 |             )
146 | 
147 |         # Create plot figure
148 |         fig = create_turnaround_distribution_plot(turnaround_data)
149 | 
150 |         # Get plotly config from theme
151 |         config = get_plotly_config()
152 | 
153 |         # Convert the figure to HTML
154 |         plot_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config)
155 | 
156 |         # Prepare data for template
157 |         chart_data = {
158 |             "plot_html": plot_html,
159 |             "median_hours": turnaround_data["median_hours"],
160 |             "total_prs": turnaround_data["total_prs"],
161 |             "reviewed_prs": turnaround_data["reviewed_prs"],
162 |             "review_rate": turnaround_data["review_rate"],
163 |             "within_1h": turnaround_data["within_1h"],
164 |             "within_4h": turnaround_data["within_4h"],
165 |             "within_24h": turnaround_data["within_24h"],
166 |         }
167 | 
168 |         # Pass the prepared data to the template
169 |         return render_template(
170 |             "components/charts/review_turnaround/template.html",
171 |             turnaround_data=chart_data,
172 |             humanize_time=humanize_time,
173 |         )
174 | 
175 |     except Exception:
176 |         return render_template("components/charts/review_turnaround/template.html", turnaround_data=None)
177 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/review_turnaround/data.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | 
  3 | 
  4 | def calculate_rtt_trends(repo_df: pd.DataFrame) -> pd.DataFrame:
  5 |     """
  6 |     Calculate Review Turnaround Time (RTT) trends over time
  7 | 
  8 |     Args:
  9 |         repo_df (pd.DataFrame): DataFrame containing PR events
 10 | 
 11 |     Returns:
 12 |         pd.DataFrame: DataFrame with RTT trends
 13 |     """
 14 |     try:
 15 |         if repo_df.empty:
 16 |             return pd.DataFrame()
 17 | 
 18 |         # Get PR creation and first review request times for each PR
 19 |         pr_created = (
 20 |             repo_df[repo_df["event_type"] == "pr_created"]
 21 |             .groupby("pr_number")
 22 |             .agg(
 23 |                 {
 24 |                     "time": "first",
 25 |                     "pr_title": "first",  # Get PR title for hover info
 26 |                 }
 27 |             )
 28 |         )
 29 |         review_requests = repo_df[repo_df["event_type"] == "review_requested"].groupby("pr_number")["time"].first()
 30 | 
 31 |         # Match PRs that have both creation and review request times
 32 |         matched_prs = pd.DataFrame(
 33 |             {
 34 |                 "created_time": pr_created["time"],
 35 |                 "pr_title": pr_created["pr_title"],
 36 |                 "review_requested_time": review_requests,
 37 |             }
 38 |         ).dropna()
 39 | 
 40 |         if matched_prs.empty:
 41 |             return pd.DataFrame()
 42 | 
 43 |         # Calculate time difference in hours
 44 |         matched_prs["turnaround_hours"] = (
 45 |             matched_prs["review_requested_time"] - matched_prs["created_time"]
 46 |         ).dt.total_seconds() / 3600
 47 | 
 48 |         # Sort by creation time
 49 |         matched_prs = matched_prs.sort_values("created_time")
 50 | 
 51 |         # Calculate rolling median (7 PRs window)
 52 |         matched_prs["rolling_median"] = matched_prs["turnaround_hours"].rolling(window=7, min_periods=1).median()
 53 | 
 54 |         return matched_prs
 55 | 
 56 |     except Exception:
 57 |         return pd.DataFrame()
 58 | 
 59 | 
 60 | def calculate_rtt(repo_df: pd.DataFrame) -> float:
 61 |     """Calculate overall median RTT"""
 62 |     try:
 63 |         trends_df = calculate_rtt_trends(repo_df)
 64 |         if trends_df.empty:
 65 |             return None
 66 |         return trends_df["turnaround_hours"].median()
 67 |     except Exception:
 68 |         return None
 69 | 
 70 | 
 71 | def calculate_rtt_stats(repo_df: pd.DataFrame) -> dict:
 72 |     """Calculate RTT statistics including thresholds and distribution"""
 73 |     try:
 74 |         if repo_df.empty:
 75 |             return None
 76 | 
 77 |         # Get all PRs created
 78 |         all_prs = repo_df[repo_df["event_type"] == "pr_created"]["pr_number"].nunique()
 79 | 
 80 |         # Initialize DataFrame to store turnaround times
 81 |         turnaround_times = []
 82 | 
 83 |         # Process each PR
 84 |         for pr_number in repo_df[repo_df["event_type"] == "pr_created"]["pr_number"].unique():
 85 |             pr_events = repo_df[repo_df["pr_number"] == pr_number].sort_values("time")
 86 | 
 87 |             # Get PR creation time
 88 |             pr_created_time = pr_events[pr_events["event_type"] == "pr_created"]["time"].iloc[0]
 89 | 
 90 |             # Check for review requests
 91 |             review_requests = pr_events[pr_events["event_type"] == "review_requested"]
 92 | 
 93 |             if not review_requests.empty:
 94 |                 # For each review request, find the first review action from that reviewer
 95 |                 for _, request in review_requests.iterrows():
 96 |                     request_time = request["time"]
 97 |                     requested_reviewer = request.get("target_user")  # Use get() to avoid KeyError
 98 | 
 99 |                     if not requested_reviewer:
100 |                         continue
101 | 
102 |                     # Find first review action from this reviewer
103 |                     review_actions = pr_events[
104 |                         (pr_events["time"] > request_time)
105 |                         & (pr_events["actor"] == requested_reviewer)
106 |                         & (
107 |                             pr_events["event_type"].isin(
108 |                                 ["review_approved", "review_changes_requested", "review_commented"]
109 |                             )
110 |                         )
111 |                     ]
112 | 
113 |                     if not review_actions.empty:
114 |                         first_review_time = review_actions["time"].iloc[0]
115 |                         turnaround_hours = (first_review_time - request_time).total_seconds() / 3600
116 |                         turnaround_times.append(turnaround_hours)
117 |                         break  # Only consider the first successful review request
118 |             else:
119 |                 # If no review request, measure from PR creation to first review action
120 |                 review_actions = pr_events[
121 |                     pr_events["event_type"].isin(["review_approved", "review_changes_requested", "review_commented"])
122 |                 ]
123 | 
124 |                 if not review_actions.empty:
125 |                     first_review_time = review_actions["time"].iloc[0]
126 |                     turnaround_hours = (first_review_time - pr_created_time).total_seconds() / 3600
127 |                     turnaround_times.append(turnaround_hours)
128 | 
129 |         if not turnaround_times:
130 |             return None
131 | 
132 |         turnaround_times = pd.Series(turnaround_times)
133 | 
134 |         # Calculate statistics
135 |         stats = {
136 |             "median_hours": turnaround_times.median(),
137 |             "total_prs": all_prs,
138 |             "reviewed_prs": len(turnaround_times),
139 |             "review_rate": (len(turnaround_times) / all_prs) * 100,
140 |             "within_1h": (turnaround_times <= 1).mean() * 100,
141 |             "within_4h": (turnaround_times <= 4).mean() * 100,
142 |             "within_24h": (turnaround_times <= 24).mean() * 100,
143 |         }
144 | 
145 |         return stats
146 | 
147 |     except Exception:
148 |         return None
149 | 
150 | 
151 | def get_review_turnaround_data(repo_df: pd.DataFrame) -> dict:
152 |     """Process raw data into review turnaround metrics"""
153 |     try:
154 |         # Validate input data
155 |         if repo_df is None or not isinstance(repo_df, pd.DataFrame):
156 |             return None
157 | 
158 |         if repo_df.empty:
159 |             return None
160 | 
161 |         # Check for required columns
162 |         required_columns = ["event_type", "pr_number", "time", "actor"]
163 |         missing_columns = [col for col in required_columns if col not in repo_df.columns]
164 |         if missing_columns:
165 |             return None
166 | 
167 |         # Check for required event types
168 |         pr_created_events = repo_df[repo_df["event_type"] == "pr_created"]
169 |         if len(pr_created_events) == 0:
170 |             return None
171 | 
172 |         stats = calculate_rtt_stats(repo_df)
173 |         if not stats:
174 |             return None
175 | 
176 |         # Convert numpy values to Python floats
177 |         return {
178 |             "median_hours": float(stats["median_hours"]),
179 |             "total_prs": stats["total_prs"],
180 |             "reviewed_prs": stats["reviewed_prs"],
181 |             "review_rate": float(stats["review_rate"]),
182 |             "within_1h": float(stats["within_1h"]),
183 |             "within_4h": float(stats["within_4h"]),
184 |             "within_24h": float(stats["within_24h"]),
185 |         }
186 | 
187 |     except Exception:
188 |         return None
189 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/review_turnaround/template.html:
--------------------------------------------------------------------------------
 1 | {% extends "components/charts/chart.html" %}
 2 | {% from "components/charts/metric.html" import metric %}
 3 | {% block title %}Review Turnaround{% endblock %}
 4 | {% block metrics %}
 5 | {% if turnaround_data %}
 6 | {{ metric(label="Review Turnaround Time", value=humanize_time(turnaround_data.median_hours), tip="Median time to first
 7 | review") }}
 8 | {{ metric(label="Reviewed in ≤1 Hour", value="%.1f"|format(turnaround_data.within_1h) ~ "%", tip="PRs that received a
 9 | review within 1 hour") }}
10 | {% endif %}
11 | {% endblock %}
12 | {% block chart %}
13 | {% if turnaround_data %}
14 | <!-- Plotly chart -->
15 | <div class="plotly-chart-container">
16 | 	{% if turnaround_data.plot_html %}
17 | 	{{ turnaround_data.plot_html | safe }}
18 | 	{% else %}
19 | 	<p>Chart data generated but plot HTML is missing</p>
20 | 	{% endif %}
21 | </div>
22 | {% else %}
23 | <p>No review turnaround data available</p>
24 | {% endif %}
25 | {% endblock %}
26 | {% block caption %}
27 | Shows the time required to receive the first review on pull requests.
28 | {% endblock %}
29 | {% block extra_js %}
30 | <script>
31 | 	// Ensure the Plotly chart is properly sized
32 | 	document.addEventListener("DOMContentLoaded", function () {
33 | 		// Find all Plotly charts in this component
34 | 		var charts = document.querySelectorAll(
35 | 			".plotly-chart-container .js-plotly-plot"
36 | 		);
37 | 
38 | 		// Apply sizing to each chart
39 | 		charts.forEach(function (chart) {
40 | 			// Set the height directly on the chart
41 | 			chart.style.height = "150px";
42 | 
43 | 			// Find the main SVG and set its height
44 | 			var svg = chart.querySelector(".main-svg");
45 | 			if (svg) {
46 | 				svg.style.height = "150px";
47 | 			}
48 | 
49 | 			// Hide the modebar
50 | 			var modebar = chart.querySelector(".modebar");
51 | 			if (modebar) {
52 | 				modebar.style.display = "none";
53 | 			}
54 | 		});
55 | 	});
56 | </script>
57 | {% endblock %}


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utility functions for chart components to apply consistent theming
  3 | """
  4 | 
  5 | import pandas as pd
  6 | import plotly.graph_objects as go
  7 | import theme as theme
  8 | 
  9 | 
 10 | def apply_theme_to_figure(fig: go.Figure) -> go.Figure:
 11 |     """
 12 |     Apply the application theme to a Plotly figure
 13 | 
 14 |     Args:
 15 |         fig: Plotly figure to apply theme to
 16 | 
 17 |     Returns:
 18 |         go.Figure: Themed Plotly figure
 19 |     """
 20 |     # Get the plotly template from our theme module
 21 |     template = theme.get_plotly_template()
 22 | 
 23 |     # Apply layout settings from the template
 24 |     for key, value in template["layout"].items():
 25 |         if key not in fig.layout or fig.layout[key] is None:
 26 |             fig.layout[key] = value
 27 | 
 28 |     # Apply font settings if they exist
 29 |     if "font" in template["layout"]:
 30 |         if "font" not in fig.layout:
 31 |             fig.layout.font = template["layout"]["font"]
 32 |         else:
 33 |             for font_key, font_value in template["layout"]["font"].items():
 34 |                 if font_key not in fig.layout.font or fig.layout.font[font_key] is None:
 35 |                     fig.layout.font[font_key] = font_value
 36 | 
 37 |     # Apply axis settings if they exist
 38 |     for axis in ["xaxis", "yaxis"]:
 39 |         if axis in template["layout"]:
 40 |             if axis not in fig.layout:
 41 |                 fig.layout[axis] = template["layout"][axis]
 42 |             else:
 43 |                 for axis_key, axis_value in template["layout"][axis].items():
 44 |                     if axis_key not in fig.layout[axis] or fig.layout[axis][axis_key] is None:
 45 |                         fig.layout[axis][axis_key] = axis_value
 46 | 
 47 |     return fig
 48 | 
 49 | 
 50 | def get_theme_colors(num_colors: int = 5, palette: str = "primary") -> list:
 51 |     """
 52 |     Get a list of colors from the theme for charts
 53 | 
 54 |     Args:
 55 |         num_colors: Number of colors needed
 56 |         palette: Which palette to use ('primary', 'secondary', 'mono', 'diverging')
 57 | 
 58 |     Returns:
 59 |         list: List of color hex codes
 60 |     """
 61 |     return theme.get_chart_colors(num_colors, palette)
 62 | 
 63 | 
 64 | def get_plotly_config() -> dict:
 65 |     """
 66 |     Get a consistent Plotly config for all charts
 67 | 
 68 |     Returns:
 69 |         dict: Plotly config
 70 |     """
 71 |     return {
 72 |         "displayModeBar": False,
 73 |         "responsive": True,
 74 |         "displaylogo": False,  # Disable the Plotly logo/advertisement
 75 |         "modeBarButtonsToRemove": ["sendDataToCloud", "autoScale2d", "resetScale2d"],
 76 |     }
 77 | 
 78 | 
 79 | def humanize_time(hours, precision=1):
 80 |     """
 81 |     Convert a time duration (in hours) to a human-readable string.
 82 |     Automatically selects the most appropriate unit (seconds to years) for display.
 83 | 
 84 |     Args:
 85 |         hours: Number of hours (input is always in hours)
 86 |         precision: Number of decimal places for values
 87 | 
 88 |     Returns:
 89 |         str: Human-readable string with appropriate unit (e.g. "2.5 minutes", "3 days")
 90 |     """
 91 |     if hours is None or pd.isna(hours):
 92 |         return "N/A"
 93 | 
 94 |     # Convert hours to seconds for easier unit conversion
 95 |     seconds = hours * 3600
 96 | 
 97 |     # Less than a minute
 98 |     if seconds < 60:
 99 |         return f"{int(seconds)} seconds"
100 | 
101 |     # Less than an hour
102 |     if seconds < 3600:
103 |         minutes = seconds / 60
104 |         return f"{minutes:.1f} minutes"
105 | 
106 |     # Less than a day
107 |     if seconds < 86400:
108 |         return f"{hours:.1f} hours"
109 | 
110 |     # Days
111 |     days = hours / 24
112 |     if days < 7:
113 |         return f"{days:.1f} days"
114 | 
115 |     # Weeks
116 |     weeks = days / 7
117 |     if weeks < 4:
118 |         return f"{weeks:.1f} weeks"
119 | 
120 |     # Months (approximate)
121 |     months = days / 30.44
122 |     if months < 12:
123 |         return f"{months:.1f} months"
124 | 
125 |     # Years
126 |     years = days / 365.25
127 |     return f"{years:.1f} years"
128 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/workflow/__init__.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Optional
  2 | 
  3 | import plotly.graph_objects as go
  4 | from components.charts.utils import (
  5 |     apply_theme_to_figure,
  6 |     get_plotly_config,
  7 |     get_theme_colors,
  8 | )
  9 | from flask import render_template
 10 | 
 11 | from .data import prepare_sankey_data
 12 | 
 13 | 
 14 | def create_pr_flow_chart(data) -> Optional[Dict]:
 15 |     """Creates a Sankey diagram showing PR flow through different stages"""
 16 |     if not data:
 17 |         return None
 18 | 
 19 |     # Process links into sources and targets arrays
 20 |     sources = []
 21 |     targets = []
 22 |     values = []
 23 |     node_values = [0] * len(data["nodes"])  # Initialize array for node values
 24 | 
 25 |     for link in data["links"]:
 26 |         source_idx = data["nodes"].index(link["source"])
 27 |         target_idx = data["nodes"].index(link["target"])
 28 |         sources.append(source_idx)
 29 |         targets.append(target_idx)
 30 |         values.append(link["value"])
 31 |         node_values[source_idx] = link["value"]  # Store value for each node
 32 | 
 33 |     # Create the Plotly figure
 34 |     # Get theme colors
 35 |     colors = get_theme_colors(len(data["nodes"]), "primary")
 36 | 
 37 |     fig = go.Figure(
 38 |         data=[
 39 |             go.Sankey(
 40 |                 arrangement="snap",
 41 |                 node=dict(
 42 |                     pad=15,
 43 |                     thickness=20,
 44 |                     line=dict(color="rgba(0,0,0,0.3)", width=0.5),
 45 |                     label=data["nodes"],
 46 |                     color=colors,  # Using theme colors
 47 |                     hoverlabel=dict(
 48 |                         bgcolor="rgba(100,100,100,0.8)",  # Semi-transparent dark background
 49 |                         bordercolor="rgba(0,0,0,0)",  # Transparent border
 50 |                         font=dict(size=16, color="white"),  # White text
 51 |                     ),
 52 |                     customdata=[
 53 |                         [val, "PR" if val == 1 else "PRs"] for val in node_values
 54 |                     ],  # Use node_values instead of values
 55 |                     hovertemplate="%{value:.0f} %{customdata[1]}<extra></extra>",  # Simple PR count for nodes
 56 |                 ),
 57 |                 link=dict(
 58 |                     source=sources,
 59 |                     target=targets,
 60 |                     value=values,
 61 |                     color=["rgba(229, 229, 229, 0.5)"] * len(sources),
 62 |                     hoverlabel=dict(
 63 |                         bgcolor="rgba(100,100,100,0.8)",  # Semi-transparent dark background
 64 |                         bordercolor="rgba(0,0,0,0)",  # Transparent border
 65 |                         font=dict(size=16, color="white"),  # White text
 66 |                     ),
 67 |                     customdata=[[val, "PR" if val == 1 else "PRs"] for val in values],
 68 |                     hovertemplate="%{value:.0f} %{customdata[1]}<br>"
 69 |                     + "%{source.label} → %{target.label}<extra></extra>",  # Clean text format for links
 70 |                 ),
 71 |             )
 72 |         ]
 73 |     )
 74 | 
 75 |     fig.update_layout(
 76 |         title=None,
 77 |         font={"size": 14},
 78 |         height=400,
 79 |         margin={"t": 20, "l": 20, "r": 20, "b": 20},
 80 |     )
 81 | 
 82 |     # Apply theme to the figure
 83 |     fig = apply_theme_to_figure(fig)
 84 | 
 85 |     # Convert to HTML with the consistent Plotly config
 86 |     return fig.to_html(full_html=False, include_plotlyjs=False, config=get_plotly_config())
 87 | 
 88 | 
 89 | def render(repo_df) -> str:
 90 |     pr_flow_data = prepare_sankey_data(repo_df)
 91 |     chart_html = create_pr_flow_chart(pr_flow_data)
 92 |     pr_count = repo_df["pr_number"].nunique()
 93 |     event_count = len(repo_df)
 94 | 
 95 |     # Format numbers with comma separators and no decimals
 96 |     formatted_pr_count = f"{pr_count:,}"
 97 |     formatted_event_count = f"{event_count:,}"
 98 | 
 99 |     if not chart_html:
100 |         return "<div>No data available</div>"
101 | 
102 |     return render_template(
103 |         "components/charts/workflow/template.html",
104 |         chart_content=chart_html,
105 |         pr_count=formatted_pr_count,
106 |         event_count=formatted_event_count,
107 |     )
108 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/workflow/data.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Optional
  2 | 
  3 | import pandas as pd
  4 | 
  5 | 
  6 | def prepare_sankey_data(df: pd.DataFrame) -> Optional[Dict]:
  7 |     """
  8 |     Process PR events into a format suitable for a Sankey diagram.
  9 | 
 10 |     Args:
 11 |         df: DataFrame containing PR events
 12 | 
 13 |     Returns:
 14 |         Dictionary containing nodes and links for the Sankey diagram, or None if no data
 15 |     """
 16 |     if df.empty:
 17 |         return None
 18 | 
 19 |     # Group events by PR number to analyze flow
 20 |     pr_events = (
 21 |         df.groupby("pr_number")
 22 |         .agg(
 23 |             {
 24 |                 "event_type": list,
 25 |                 "time": "first",  # Keep first timestamp for reference
 26 |             }
 27 |         )
 28 |         .reset_index()
 29 |     )
 30 | 
 31 |     # Initialize node lists and link counts
 32 |     nodes = ["PRs Created"]
 33 |     links = []
 34 | 
 35 |     # Count initial PRs
 36 |     total_prs = len(pr_events)
 37 | 
 38 |     # Track PRs at each stage
 39 |     review_requested = sum("review_requested" in events for events in pr_events["event_type"])
 40 |     direct_reviews = sum(
 41 |         any(event in ["review_commented", "review_changes_requested", "review_approved"] for event in events)
 42 |         and "review_requested" not in events
 43 |         for events in pr_events["event_type"]
 44 |     )
 45 |     no_review = total_prs - review_requested - direct_reviews
 46 | 
 47 |     # Add review request flow
 48 |     nodes.extend(["Review Requested", "No Review", "Direct Review"])
 49 | 
 50 |     links.extend(
 51 |         [
 52 |             {"source": "PRs Created", "target": "Review Requested", "value": review_requested},
 53 |             {"source": "PRs Created", "target": "No Review", "value": no_review},
 54 |             {"source": "PRs Created", "target": "Direct Review", "value": direct_reviews},
 55 |         ]
 56 |     )
 57 | 
 58 |     # Track review outcomes
 59 |     nodes.extend(["Approved", "Commented"])
 60 | 
 61 |     # Count PRs by their review outcome
 62 |     approved_prs = sum("review_approved" in events for events in pr_events["event_type"])
 63 |     commented_prs = sum(
 64 |         "review_commented" in events and "review_approved" not in events for events in pr_events["event_type"]
 65 |     )
 66 | 
 67 |     # Calculate how many PRs went from each review path to each outcome
 68 |     # For Review Requested path
 69 |     if review_requested > 0:
 70 |         approved_from_requested = sum(
 71 |             "review_approved" in events and "review_requested" in events for events in pr_events["event_type"]
 72 |         )
 73 |         commented_from_requested = sum(
 74 |             "review_commented" in events and "review_requested" in events and "review_approved" not in events
 75 |             for events in pr_events["event_type"]
 76 |         )
 77 | 
 78 |         # Add links for review outcomes
 79 |         if approved_from_requested > 0:
 80 |             links.append({"source": "Review Requested", "target": "Approved", "value": approved_from_requested})
 81 |         if commented_from_requested > 0:
 82 |             links.append({"source": "Review Requested", "target": "Commented", "value": commented_from_requested})
 83 | 
 84 |         # If there are remaining PRs with review requested but no outcome, add them to Approved
 85 |         remaining_requested = review_requested - approved_from_requested - commented_from_requested
 86 |         if remaining_requested > 0:
 87 |             links.append({"source": "Review Requested", "target": "Approved", "value": remaining_requested})
 88 | 
 89 |     # For Direct Review path
 90 |     if direct_reviews > 0:
 91 |         approved_from_direct = sum(
 92 |             "review_approved" in events and "review_requested" not in events for events in pr_events["event_type"]
 93 |         )
 94 |         commented_from_direct = sum(
 95 |             "review_commented" in events and "review_requested" not in events and "review_approved" not in events
 96 |             for events in pr_events["event_type"]
 97 |         )
 98 | 
 99 |         # Add links for review outcomes
100 |         if approved_from_direct > 0:
101 |             links.append({"source": "Direct Review", "target": "Approved", "value": approved_from_direct})
102 |         if commented_from_direct > 0:
103 |             links.append({"source": "Direct Review", "target": "Commented", "value": commented_from_direct})
104 | 
105 |         # If there are remaining PRs with direct review but no outcome, add them to Commented
106 |         remaining_direct = direct_reviews - approved_from_direct - commented_from_direct
107 |         if remaining_direct > 0:
108 |             links.append({"source": "Direct Review", "target": "Commented", "value": remaining_direct})
109 | 
110 |     # Add final state - Merged
111 |     nodes.append("Merged")
112 | 
113 |     # Use actual counts for merge paths instead of arbitrary allocation
114 |     approved_to_merged = approved_prs
115 |     comments_to_merged = commented_prs
116 | 
117 |     # Add links to Merged
118 |     if approved_to_merged > 0:
119 |         links.append({"source": "Approved", "target": "Merged", "value": approved_to_merged})
120 | 
121 |     if comments_to_merged > 0:
122 |         links.append({"source": "Commented", "target": "Merged", "value": comments_to_merged})
123 | 
124 |     # Only use the original no_review count when connecting to Merged
125 |     if no_review > 0:
126 |         links.append({"source": "No Review", "target": "Merged", "value": no_review})
127 | 
128 |     # Remove any links with zero value
129 |     links = [link for link in links if link["value"] > 0]
130 | 
131 |     return {"nodes": nodes, "links": links}
132 | 


--------------------------------------------------------------------------------
/src/collab_dev/components/charts/workflow/template.html:
--------------------------------------------------------------------------------
 1 | {% extends "components/charts/chart.html" %}
 2 | {% from "components/charts/metric.html" import metric %}
 3 | {% block title %}Code Review Workflow{% endblock %}
 4 | {% block metrics %}
 5 | {{ metric(label="Pull Requests", value=pr_count, tip="Number of pull requests") }}
 6 | {{ metric(label="Events", value=event_count, tip="Number of events") }}
 7 | {% endblock %}
 8 | {% block chart %}
 9 | {{ chart_content | safe }}
10 | {% endblock %}
11 | {% block caption %}Visualizes the review workflow of pull requests from creation to merge.{% endblock %}


--------------------------------------------------------------------------------
/src/collab_dev/fetcher/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pullflow/collab-dev/adcaa2efb3418c1a8aebb2ad98bf46b3a99aa9b2/src/collab_dev/fetcher/__init__.py


--------------------------------------------------------------------------------
/src/collab_dev/fetcher/api_client.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from typing import Any, Dict, Optional
 4 | 
 5 | import requests
 6 | from dotenv import load_dotenv
 7 | 
 8 | # Load environment variables
 9 | load_dotenv()
10 | 
11 | # Configure logging
12 | logging.basicConfig(level=logging.INFO)
13 | logger = logging.getLogger(__name__)
14 | 
15 | # GitHub API base URL
16 | BASE_URL = "https://api.github.com"
17 | 
18 | 
19 | def get_api_token() -> Optional[str]:
20 |     """Get GitHub API token from environment variable."""
21 |     token = os.getenv("GITHUB_TOKEN")
22 |     if not token:
23 |         logger.warning("GITHUB_TOKEN environment variable not set. API rate limits may apply.")
24 |     return token
25 | 
26 | 
27 | def get(
28 |     path: str,
29 |     params: Optional[Dict[str, Any]] = None,
30 |     headers: Optional[Dict[str, str]] = None,
31 | ) -> Dict:
32 |     """
33 |     Make a GET request to GitHub API.
34 | 
35 |     Args:
36 |         path: The API endpoint path (without the base URL)
37 |         params: Optional query parameters
38 |         headers: Optional additional headers
39 | 
40 |     Returns:
41 |         The JSON response as a dictionary
42 |     """
43 |     url = f"{BASE_URL}/{path.lstrip('/')}"
44 | 
45 |     # Initialize headers if None
46 |     if headers is None:
47 |         headers = {}
48 | 
49 |     # Use GitHub token if available
50 |     token = get_api_token()
51 |     if token:
52 |         headers["Authorization"] = f"token {token}"
53 | 
54 |     # Make the request
55 |     response = requests.get(url, params=params, headers=headers)
56 |     response.raise_for_status()
57 | 
58 |     return response.json()
59 | 


--------------------------------------------------------------------------------
/src/collab_dev/fetcher/fetch.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import re
  4 | from typing import Dict, List, Optional, Tuple
  5 | 
  6 | from dotenv import load_dotenv
  7 | 
  8 | from . import store
  9 | from .github_utils import (
 10 |     github_graphql_get_merged_pull_requests,
 11 |     github_graphql_get_pull_request_events,
 12 |     github_graphql_get_repository,
 13 |     process_timeline_events,
 14 | )
 15 | 
 16 | # Load environment variables
 17 | load_dotenv()
 18 | 
 19 | # Configure logging
 20 | logging.basicConfig(level=logging.INFO)
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | 
 24 | def extract_repo_parts(repo_url: str) -> Tuple[str, str]:
 25 |     """Extract owner and name from a GitHub repository URL."""
 26 |     # Match patterns like https://github.com/owner/repo or owner/repo
 27 |     pattern = r"(?:https?://github\.com/)?([^/]+)/([^/]+)"
 28 |     match = re.match(pattern, repo_url)
 29 | 
 30 |     if not match:
 31 |         raise ValueError(f"Invalid GitHub repository URL: {repo_url}")
 32 | 
 33 |     return match.group(1), match.group(2)
 34 | 
 35 | 
 36 | def process_repository(owner: str, name: str, max_prs: Optional[int] = None, category: str = None) -> dict:
 37 |     """Process a repository - main entry point function
 38 | 
 39 |     Args:
 40 |         owner: GitHub repository owner
 41 |         name: GitHub repository name
 42 |         max_prs: Maximum number of pull requests to fetch (None for no limit)
 43 |         category: Optional category to classify the repository
 44 | 
 45 |     Returns:
 46 |         Dictionary with repository processing results
 47 |     """
 48 |     return fetch_repository_info(owner, name, max_prs=max_prs, category=category)
 49 | 
 50 | 
 51 | def error_handler(func):
 52 |     def wrapper(*args, **kwargs):
 53 |         try:
 54 |             return func(*args, **kwargs)
 55 |         except Exception as e:
 56 |             import traceback
 57 | 
 58 |             logger.error(f"Error in {func.__name__}: {e}")
 59 |             logger.error(f"Stack trace: {traceback.format_exc()}")
 60 |             raise e
 61 | 
 62 |     return wrapper
 63 | 
 64 | 
 65 | def get_repository_info(owner: str, name: str) -> Dict:
 66 |     """Fetch repository information from GitHub using GraphQL."""
 67 |     return github_graphql_get_repository(owner, name)
 68 | 
 69 | 
 70 | def get_pull_requests(owner: str, name: str, max_prs: Optional[int] = None) -> List[Dict]:
 71 |     """Fetch merged pull requests from GitHub using GraphQL API."""
 72 |     pull_requests = github_graphql_get_merged_pull_requests(owner, name)
 73 | 
 74 |     if max_prs:
 75 |         pull_requests = pull_requests[:max_prs]
 76 | 
 77 |     return pull_requests
 78 | 
 79 | 
 80 | def get_pull_request_events(owner: str, name: str, pr_number: int) -> List[Dict]:
 81 |     """Fetch timeline events for a pull request using GraphQL."""
 82 |     timeline_data = github_graphql_get_pull_request_events(owner, name, pr_number)
 83 | 
 84 |     if not timeline_data:
 85 |         return []
 86 | 
 87 |     repo_url = f"https://github.com/{owner}/{name}"
 88 |     repository_slug = f"{owner}/{name}"
 89 | 
 90 |     return process_timeline_events(timeline_data, repo_url, repository_slug)
 91 | 
 92 | 
 93 | def check_existing_repository(owner: str, name: str) -> Optional[Dict]:
 94 |     """Check if repository already exists in the file system."""
 95 |     repo_url = f"https://github.com/{owner}/{name}"
 96 |     repo_dir = store.get_repo_dir(owner, name)
 97 |     repo_file = os.path.join(repo_dir, "repository.csv")
 98 | 
 99 |     if os.path.exists(repo_file):
100 |         # Simple representation of repository ID using owner/name
101 |         return {"id": f"{owner}/{name}", "url": repo_url}
102 | 
103 |     return None
104 | 
105 | 
106 | @error_handler
107 | def fetch_repository_info(owner: str, name: str, max_prs: Optional[int] = None, category: str = None) -> dict:
108 |     """Fetch repository information."""
109 |     # Check if repository already exists
110 |     existing_repo = check_existing_repository(owner, name)
111 | 
112 |     if existing_repo:
113 |         # Process pull requests for existing repository
114 |         result = fetch_pull_requests(owner, name, max_prs=max_prs)
115 |         return {"status": "success", "repository_id": existing_repo["id"], **result}
116 | 
117 |     # Fetch repository information using GraphQL
118 |     repo_data = get_repository_info(owner, name)
119 | 
120 |     if not repo_data:
121 |         raise ValueError(f"Could not fetch data for repository: {owner}/{name}")
122 | 
123 |     # Save repository information
124 |     save_result = store.save_repository_info(owner, name, repo_data, category)
125 | 
126 |     # Next stage - fetch pull requests
127 |     result = fetch_pull_requests(owner, name, max_prs=max_prs)
128 | 
129 |     return {
130 |         "status": "success",
131 |         "repository_id": f"{owner}/{name}",
132 |         "repository": save_result.get("repository", {}),
133 |         **result,
134 |     }
135 | 
136 | 
137 | @error_handler
138 | def fetch_pull_requests(owner: str, name: str, max_prs: Optional[int] = None) -> dict:
139 |     """Fetch pull requests."""
140 |     repository_slug = f"{owner}/{name}"
141 | 
142 |     # Get existing PR numbers
143 |     existing_prs = store.get_existing_prs_map(owner, name)
144 |     logger.info(f"Found {len(existing_prs)} existing pull requests for {repository_slug}")
145 | 
146 |     # Count PRs that already have events saved
147 |     existing_prs_with_events = 0
148 |     for pr_number in existing_prs:
149 |         if store.has_pr_events(owner, name, pr_number):
150 |             existing_prs_with_events += 1
151 | 
152 |     logger.info(f"Found {existing_prs_with_events} existing pull requests with events for {repository_slug}")
153 | 
154 |     # Adjust max_prs for new PRs to fetch based on what we already have
155 |     remaining_prs_to_fetch = None
156 |     if max_prs is not None:
157 |         remaining_prs_to_fetch = max(0, max_prs - existing_prs_with_events)
158 |         logger.info(f"Will fetch up to {remaining_prs_to_fetch} new pull requests to reach the limit of {max_prs}")
159 | 
160 |         # If we already have enough PRs with events, no need to fetch more
161 |         if remaining_prs_to_fetch == 0:
162 |             logger.info(
163 |                 f"Already have {existing_prs_with_events} PRs with events, "
164 |                 f"which meets or exceeds the requested {max_prs}"
165 |             )
166 |             return {
167 |                 "status": "success",
168 |                 "prs_processed": 0,
169 |                 "new_prs": 0,
170 |                 "message": f"No new PRs needed, already have {existing_prs_with_events} PRs with events",
171 |             }
172 | 
173 |     # Get merged pull requests from GitHub API using GraphQL
174 |     pull_requests_data = get_pull_requests(owner, name, max_prs=remaining_prs_to_fetch)
175 |     logger.info(f"Fetched {len(pull_requests_data)} pull requests from GitHub API for {repository_slug}")
176 | 
177 |     # Filter out PRs that we already have
178 |     new_pull_requests = [pr for pr in pull_requests_data if pr["number"] not in existing_prs]
179 |     logger.info(f"Found {len(new_pull_requests)} new pull requests for {repository_slug}")
180 | 
181 |     # Save new pull requests if we have any
182 |     result = {"status": "success", "prs_processed": 0, "new_prs": 0}
183 |     if new_pull_requests:
184 |         # Transform the PRs to the format expected by the store module
185 |         transformed_prs = [
186 |             {
187 |                 "repository_slug": repository_slug,
188 |                 "pr_number": pr["number"],
189 |                 "title": pr["title"],
190 |                 "url": pr["url"],
191 |                 "author_login": pr["author"]["login"] if pr["author"] else None,
192 |                 "created_at": pr["createdAt"],
193 |                 "merged_at": pr["mergedAt"],
194 |                 "additions": pr["additions"],
195 |                 "deletions": pr["deletions"],
196 |                 "files_changed": pr["changedFiles"],
197 |             }
198 |             for pr in new_pull_requests
199 |         ]
200 | 
201 |         save_result = store.save_pull_requests(owner, name, transformed_prs)
202 |         result["prs_processed"] = save_result.get("prs_processed", 0)
203 |         result["new_prs"] = len(new_pull_requests)
204 | 
205 |         # Process events for new PRs
206 |         for pr in new_pull_requests:
207 |             fetch_pull_request_events(owner, name, pr["number"])
208 | 
209 |     # Also check if we need to update events for existing PRs that don't have events yet
210 |     missing_events_prs = [
211 |         pr_number for pr_number in existing_prs.keys() if not store.has_pr_events(owner, name, pr_number)
212 |     ]
213 | 
214 |     if missing_events_prs:
215 |         logger.info(f"Fetching events for {len(missing_events_prs)} existing pull requests that are missing events")
216 |         for pr_number in missing_events_prs:
217 |             fetch_pull_request_events(owner, name, pr_number)
218 | 
219 |     # Consolidate all events into a single file
220 |     store.consolidate_all_events(owner, name)
221 | 
222 |     return result
223 | 
224 | 
225 | @error_handler
226 | def fetch_pull_request_events(owner: str, name: str, pr_number: int) -> dict:
227 |     """Fetch pull request events."""
228 |     # Check if events already exist for this PR
229 |     if store.has_pr_events(owner, name, pr_number):
230 |         logger.info(f"Events for PR #{pr_number} already fetched, skipping")
231 |         return {"status": "skipped", "events_processed": 0}
232 | 
233 |     # Fetch timeline events using GraphQL
234 |     events_data = get_pull_request_events(owner, name, pr_number)
235 | 
236 |     if not events_data:
237 |         logger.warning(f"No timeline events found for PR #{pr_number}")
238 |         return {"status": "empty", "events_processed": 0}
239 | 
240 |     # Save events using store module
241 |     return store.save_pr_events(owner, name, pr_number, events_data)
242 | 


--------------------------------------------------------------------------------
/src/collab_dev/fetcher/github_utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import re
  3 | from typing import Any, Dict, List
  4 | 
  5 | import requests
  6 | 
  7 | from .api_client import get_api_token
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | GITHUB_GRAPHQL_URL = "https://api.github.com/graphql"
 12 | 
 13 | 
 14 | def get_github_headers() -> Dict:
 15 |     """Get GitHub API headers with authentication token"""
 16 |     token = get_api_token()
 17 | 
 18 |     if not token:
 19 |         logger.error("No GitHub token available")
 20 |         raise Exception("No GitHub token available")
 21 | 
 22 |     headers = {
 23 |         "Authorization": f"token {token}",
 24 |         "Accept": "application/vnd.github.v3+json",
 25 |         "Content-Type": "application/json",
 26 |     }
 27 | 
 28 |     return headers
 29 | 
 30 | 
 31 | def github_request(method: str, url: str, **kwargs) -> Dict:
 32 |     """Make a GitHub API request"""
 33 |     # Get headers with a token
 34 |     headers = get_github_headers()
 35 |     kwargs_headers = kwargs.get("headers", {})
 36 | 
 37 |     # Merge headers
 38 |     full_headers = {**kwargs_headers, **headers}
 39 |     kwargs["headers"] = full_headers
 40 | 
 41 |     # Make the request
 42 |     response = requests.request(method, url, **kwargs)
 43 | 
 44 |     # Raise exceptions for error status codes
 45 |     response.raise_for_status()
 46 | 
 47 |     return response.json()
 48 | 
 49 | 
 50 | def github_graphql_request(query: str, variables: Dict, timeout=30) -> Dict:
 51 |     """Make a GitHub GraphQL API request"""
 52 |     url = "https://api.github.com/graphql"
 53 |     headers = get_github_headers()
 54 | 
 55 |     # Create the request payload
 56 |     payload = {"query": query, "variables": variables}
 57 | 
 58 |     # Make the request
 59 |     response = requests.post(url, headers=headers, json=payload, timeout=timeout)
 60 | 
 61 |     # Check for HTTP errors
 62 |     response.raise_for_status()
 63 | 
 64 |     # Get the response data
 65 |     result = response.json()
 66 | 
 67 |     # Check for GraphQL-specific errors
 68 |     if "errors" in result:
 69 |         logger.error(f"GraphQL errors: {result['errors']}")
 70 |         raise Exception(f"GraphQL errors: {result['errors']}")
 71 | 
 72 |     return result
 73 | 
 74 | 
 75 | def make_graphql_request(query: str, variables: dict, oauth_token: str = None) -> dict:
 76 |     """Make a GraphQL request"""
 77 |     try:
 78 |         token = oauth_token or get_api_token()
 79 | 
 80 |         if not token:
 81 |             raise Exception("No GitHub token available")
 82 | 
 83 |         headers = {
 84 |             "Authorization": f"Bearer {token}",
 85 |             "Content-Type": "application/json",
 86 |         }
 87 | 
 88 |         response = requests.post(GITHUB_GRAPHQL_URL, headers=headers, json={"query": query, "variables": variables})
 89 | 
 90 |         if response.status_code == 200:
 91 |             result = response.json()
 92 | 
 93 |             if "errors" in result:
 94 |                 logger.error(f"GraphQL Errors: {result['errors']}")
 95 |                 raise Exception(f"GraphQL errors: {result['errors']}")
 96 | 
 97 |             return result
 98 |         else:
 99 |             logger.error(f"GraphQL request failed with status {response.status_code}")
100 |             raise Exception(f"GraphQL request failed: {response.text}")
101 | 
102 |     except Exception as e:
103 |         logger.error(f"Error making GraphQL request: {str(e)}")
104 |         raise
105 | 
106 | 
107 | def get_user_association(owner: str, repo: str, username: str, oauth_token: str = None) -> str:
108 |     """
109 |     Get a user's association with a repository
110 |     Returns: Role as string ('owner', 'member', 'collaborator', or 'none')
111 |     """
112 |     if not username:
113 |         return "none"
114 | 
115 |     token = oauth_token or get_api_token()
116 | 
117 |     if not token:
118 |         return "none"
119 | 
120 |     query = """
121 |     query($owner: String!, $repo: String!) {
122 |       repository(owner: $owner, name: $repo) {
123 |         viewerPermission
124 |         owner {
125 |           login
126 |         }
127 |       }
128 |       viewer {
129 |         login
130 |       }
131 |     }
132 |     """
133 | 
134 |     try:
135 |         result = make_graphql_request(query, {"owner": owner, "repo": repo}, oauth_token=token)
136 |         logger.info(f"GitHub API response for user association: {result}")
137 | 
138 |         data = result.get("data", {})
139 |         viewer_login = data.get("viewer", {}).get("login")
140 |         logger.info(f"Viewer login: {viewer_login}, checking against username: {username}")
141 | 
142 |         # If we're not checking the authenticated user, return none
143 |         if viewer_login != username:
144 |             logger.info(f"Username mismatch: viewer {viewer_login} != requested {username}")
145 |             return "none"
146 | 
147 |         repository = data.get("repository", {})
148 |         logger.info(f"Repository data: {repository}")
149 | 
150 |         # Check if user is the repository owner
151 |         repo_owner = repository.get("owner", {}).get("login")
152 |         logger.info(f"Repository owner: {repo_owner}")
153 |         if repo_owner == username:
154 |             logger.info(f"User {username} is the repository owner")
155 |             return "owner"
156 | 
157 |         # Map GitHub permissions to our roles
158 |         permission = repository.get("viewerPermission")
159 |         logger.info(f"User permission level: {permission}")
160 |         if permission == "ADMIN":
161 |             logger.info(f"User {username} has ADMIN permission -> collaborator role")
162 |             return "collaborator"  # Admin gets collaborator role
163 |         elif permission == "MAINTAIN":
164 |             logger.info(f"User {username} has MAINTAIN permission -> member role")
165 |             return "member"  # Maintain gets member role
166 |         elif permission == "WRITE":
167 |             logger.info(f"User {username} has WRITE permission -> collaborator role")
168 |             return "collaborator"  # Write access gets collaborator role
169 | 
170 |         logger.info(f"User {username} has insufficient permissions: {permission}")
171 |         return "none"
172 | 
173 |     except Exception as e:
174 |         logger.error(f"Error checking user association for {username}: {str(e)}")
175 |         return "none"
176 | 
177 | 
178 | def is_bot_actor(actor_name: str) -> bool:
179 |     """Check if an actor is a bot based on name patterns"""
180 |     if not actor_name:
181 |         return False
182 | 
183 |     actor_name = actor_name.lower()
184 | 
185 |     # Common bot suffixes and patterns
186 |     bot_patterns = [r"bot$", r"\[bot\]$", r"app$", r"-bot$", r"bot-"]
187 | 
188 |     # Known bot names
189 |     known_bots = {
190 |         "dependabot",
191 |         "renovate",
192 |         "github-actions",
193 |         "semantic-release",
194 |         "codecov",
195 |         "sonarcloud",
196 |         "snyk-bot",
197 |         "imgbot",
198 |         "deepsource-autofix",
199 |         "stale",
200 |         "allcontributors",
201 |         "prettier",
202 |         "vercel",
203 |         "mergify",
204 |         "probot",
205 |         "goreleaserbot",
206 |         "greenkeeper",
207 |         "lgtm-com",
208 |         "circleci",
209 |         "travis-ci",
210 |         "gitter-badger",
211 |         "whitesource-bolt-for-github",
212 |         "dependabot-preview",
213 |         "semantic-release-bot",
214 |     }
215 | 
216 |     # Check if actor name contains any known bot name
217 |     for bot_name in known_bots:
218 |         if bot_name in actor_name:
219 |             return True
220 | 
221 |     # Check if actor name matches any bot pattern
222 |     for pattern in bot_patterns:
223 |         if re.search(pattern, actor_name):
224 |             return True
225 | 
226 |     return False
227 | 
228 | 
229 | def process_timeline_events(pr_data: Dict, repo_url: str, repo_name: str) -> list:
230 |     """Convert GraphQL timeline data into database-compatible format"""
231 |     owner, repo = repo_name.split("/")
232 | 
233 |     pr_author = pr_data["author"]["login"] if pr_data["author"] else None
234 |     author_association = pr_data.get("authorAssociation", "")
235 |     is_author_core = author_association in ["OWNER", "MEMBER", "COLLABORATOR"]
236 | 
237 |     events = []
238 | 
239 |     # Add PR creation event
240 |     events.append(
241 |         {
242 |             "time": pr_data["createdAt"],
243 |             "pr_number": pr_data["number"],
244 |             "repository_slug": repo_name,
245 |             "pr_title": pr_data["title"],
246 |             "pr_url": pr_data["url"],
247 |             "event_type": "pr_created",
248 |             "actor": pr_author,
249 |             "target_user": None,
250 |             "files_changed": pr_data["changedFiles"],
251 |             "lines_added": pr_data["additions"],
252 |             "lines_deleted": pr_data["deletions"],
253 |             "is_core_team": is_author_core,
254 |             "source_branch": pr_data["headRefName"],
255 |             "target_branch": pr_data["baseRefName"],
256 |             "was_draft": pr_data["isDraft"],
257 |             "is_bot": is_bot_actor(pr_author),
258 |         }
259 |     )
260 | 
261 |     # Process timeline items
262 |     logger.info(f"Processing {len(pr_data['timelineItems']['nodes'])} timeline events for PR #{pr_data['number']}")
263 | 
264 |     for item in pr_data["timelineItems"]["nodes"]:
265 |         if "__typename" not in item:
266 |             continue
267 | 
268 |         base_event = {
269 |             "pr_number": pr_data["number"],
270 |             "pr_title": pr_data["title"],
271 |             "repository_slug": repo_name,
272 |             "pr_url": pr_data["url"],
273 |             "files_changed": pr_data["changedFiles"],
274 |             "lines_added": pr_data["additions"],
275 |             "lines_deleted": pr_data["deletions"],
276 |             "is_core_team": is_author_core,
277 |             "source_branch": pr_data["headRefName"],
278 |             "target_branch": pr_data["baseRefName"],
279 |             "was_draft": pr_data["isDraft"],
280 |         }
281 | 
282 |         if item["__typename"] == "PullRequestCommit":
283 |             actor = item["commit"]["author"]["user"]["login"] if item["commit"]["author"]["user"] else None
284 |             events.append(
285 |                 {
286 |                     **base_event,
287 |                     "time": item["commit"]["committedDate"],
288 |                     "event_type": "commit_pushed",
289 |                     "actor": actor,
290 |                     "target_user": None,
291 |                     "is_bot": is_bot_actor(actor),
292 |                 }
293 |             )
294 | 
295 |         elif item["__typename"] == "ReviewRequestedEvent":
296 |             actor = item["actor"]["login"] if item["actor"] else None
297 |             target_user = item["requestedReviewer"]["login"] if item["requestedReviewer"] else None
298 |             events.append(
299 |                 {
300 |                     **base_event,
301 |                     "time": item["createdAt"],
302 |                     "event_type": "review_requested",
303 |                     "actor": actor,
304 |                     "target_user": target_user,
305 |                     "is_bot": is_bot_actor(actor),
306 |                 }
307 |             )
308 | 
309 |         elif item["__typename"] == "PullRequestReview":
310 |             actor = item["author"]["login"] if item["author"] else None
311 |             events.append(
312 |                 {
313 |                     **base_event,
314 |                     "time": item["createdAt"],
315 |                     "event_type": f"review_{item['state'].lower()}",
316 |                     "actor": actor,
317 |                     "target_user": None,
318 |                     "is_bot": is_bot_actor(actor),
319 |                 }
320 |             )
321 | 
322 |         elif item["__typename"] == "MergedEvent":
323 |             actor = item["actor"]["login"] if item["actor"] else None
324 |             events.append(
325 |                 {
326 |                     **base_event,
327 |                     "time": item["createdAt"],
328 |                     "event_type": "pr_merged",
329 |                     "actor": actor,
330 |                     "target_user": None,
331 |                     "is_bot": is_bot_actor(actor),
332 |                 }
333 |             )
334 | 
335 |         elif item["__typename"] == "IssueComment":
336 |             actor = item["author"]["login"] if item["author"] else None
337 |             events.append(
338 |                 {
339 |                     **base_event,
340 |                     "time": item["createdAt"],
341 |                     "event_type": "comment_added",
342 |                     "actor": actor,
343 |                     "target_user": None,
344 |                     "is_bot": is_bot_actor(actor),
345 |                 }
346 |             )
347 | 
348 |     logger.info(f"Processed {len(events)} total events for PR #{pr_data['number']}")
349 |     return events
350 | 
351 | 
352 | PULL_REQUESTS_PER_PAGE = 100
353 | 
354 | 
355 | def github_graphql_get_merged_pull_requests(owner: str, name: str) -> List[Dict]:
356 |     """Get merged pull requests using GraphQL API"""
357 |     query = (
358 |         """
359 |     query($owner: String!, $name: String!) {
360 |       repository(owner: $owner, name: $name) {
361 |         pullRequests(first: %d, states: [MERGED], orderBy: {field: UPDATED_AT, direction: DESC}) {
362 |           nodes {
363 |             number
364 |             title
365 |             url
366 |             createdAt
367 |             mergedAt
368 |             changedFiles
369 |             additions
370 |             deletions
371 |             author {
372 |               login
373 |             }
374 |           }
375 |         }
376 |       }
377 |     }
378 |     """
379 |         % PULL_REQUESTS_PER_PAGE
380 |     )
381 | 
382 |     try:
383 |         result = github_graphql_request(query, {"owner": owner, "name": name})
384 |         if result.get("data") and result["data"].get("repository"):
385 |             return result["data"]["repository"]["pullRequests"]["nodes"]
386 |         return []
387 |     except Exception as e:
388 |         logger.error(f"Error fetching pull requests: {str(e)}")
389 |         raise
390 | 
391 | 
392 | def github_graphql_get_pull_request_events(owner: str, name: str, pr_number: int) -> Dict[str, Any]:
393 |     """Get PR timeline data using GraphQL API"""
394 |     query = """
395 |     query($owner: String!, $name: String!, $pr_number: Int!) {
396 |       repository(owner: $owner, name: $name) {
397 |         pullRequest(number: $pr_number) {
398 |           number
399 |           title
400 |           url
401 |           createdAt
402 |           mergedAt
403 |           changedFiles
404 |           additions
405 |           deletions
406 |           headRefName
407 |           baseRefName
408 |           isDraft
409 |           author {
410 |             login
411 |           }
412 |           authorAssociation
413 |           timelineItems(first: 100) {
414 |             pageInfo {
415 |               hasNextPage
416 |               endCursor
417 |             }
418 |             nodes {
419 |               __typename
420 |               ... on PullRequestCommit {
421 |                 commit {
422 |                   committedDate
423 |                   author {
424 |                     user {
425 |                       login
426 |                     }
427 |                   }
428 |                 }
429 |               }
430 |               ... on ReviewRequestedEvent {
431 |                 createdAt
432 |                 actor {
433 |                   login
434 |                 }
435 |                 requestedReviewer {
436 |                   ... on User {
437 |                     login
438 |                   }
439 |                 }
440 |               }
441 |               ... on PullRequestReview {
442 |                 createdAt
443 |                 author {
444 |                   login
445 |                 }
446 |                 state
447 |               }
448 |               ... on MergedEvent {
449 |                 createdAt
450 |                 actor {
451 |                   login
452 |                 }
453 |               }
454 |               ... on IssueComment {
455 |                 createdAt
456 |                 author {
457 |                   login
458 |                 }
459 |               }
460 |             }
461 |           }
462 |         }
463 |       }
464 |     }
465 |     """
466 | 
467 |     try:
468 |         result = github_graphql_request(query, {"owner": owner, "name": name, "pr_number": pr_number})
469 |         if result.get("data") and result["data"].get("repository"):
470 |             return result["data"]["repository"]["pullRequest"]
471 |         return None
472 |     except Exception as e:
473 |         logger.error(f"Error fetching PR timeline: {str(e)}")
474 |         raise
475 | 
476 | 
477 | def github_graphql_get_repository(owner: str, name: str) -> Dict:
478 |     """Get repository data using GraphQL"""
479 |     query = """
480 |     query($owner: String!, $name: String!) {
481 |       repository(owner: $owner, name: $name) {
482 |         name
483 |         description
484 |         url
485 |         owner {
486 |           avatarUrl
487 |           ... on Organization {
488 |             avatarUrl
489 |           }
490 |         }
491 |       }
492 |     }
493 |     """
494 | 
495 |     try:
496 |         result = github_graphql_request(query, {"owner": owner, "name": name})
497 | 
498 |         if result.get("data") and result["data"].get("repository"):
499 |             repo = result["data"]["repository"]
500 |             # Return a flat dictionary with string values
501 |             return {
502 |                 "url": f"https://github.com/{owner}/{name}",
503 |                 "name": repo["name"],
504 |                 "organization": owner,
505 |                 "description": repo["description"],
506 |                 "logo_url": repo["owner"]["avatarUrl"],
507 |                 "category": "Newly Added",
508 |                 "repository_slug": f"{owner}/{name}",
509 |                 "status": "updating",
510 |             }
511 |         return None
512 |     except Exception as e:
513 |         logger.error(f"Error fetching repository data: {str(e)}")
514 |         raise
515 | 


--------------------------------------------------------------------------------
/src/collab_dev/fetcher/store.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import logging
  3 | import os
  4 | import sys
  5 | from typing import Dict, List
  6 | 
  7 | # Configure logging
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | # Get root directory - set to './data' by default
 11 | DATA_DIR = "./data"
 12 | 
 13 | # Check if data directory exists
 14 | if not os.path.exists(DATA_DIR):
 15 |     logger.error(f"Data directory {DATA_DIR} does not exist. Please create it first.")
 16 |     sys.exit(1)
 17 | 
 18 | 
 19 | def ensure_directory(path: str) -> str:
 20 |     """Ensure the directory exists, creating it if necessary."""
 21 |     os.makedirs(path, exist_ok=True)
 22 |     return path
 23 | 
 24 | 
 25 | def get_repo_dir(owner: str, name: str) -> str:
 26 |     """Get the repository directory path."""
 27 |     return ensure_directory(os.path.join(DATA_DIR, owner, name))
 28 | 
 29 | 
 30 | def get_pr_dir(owner: str, name: str, pr_number: int) -> str:
 31 |     """Get the pull request directory path."""
 32 |     repo_dir = get_repo_dir(owner, name)
 33 |     return ensure_directory(os.path.join(repo_dir, f"pr_{pr_number}"))
 34 | 
 35 | 
 36 | def write_csv(filepath: str, data: List[Dict], headers: List[str]) -> None:
 37 |     """Write data to a CSV file."""
 38 |     mode = "w"
 39 |     file_exists = os.path.exists(filepath)
 40 | 
 41 |     with open(filepath, mode, newline="", encoding="utf-8") as csvfile:
 42 |         writer = csv.DictWriter(csvfile, fieldnames=headers)
 43 |         if not file_exists or mode == "w":
 44 |             writer.writeheader()
 45 | 
 46 |         for row in data:
 47 |             # Filter the row to only include fields in headers
 48 |             filtered_row = {k: v for k, v in row.items() if k in headers}
 49 |             writer.writerow(filtered_row)
 50 | 
 51 |     logger.info(f"Data written to {filepath}")
 52 | 
 53 | 
 54 | def save_repository_info(owner: str, name: str, repo_data: Dict, category=None) -> Dict:
 55 |     """Save repository information to CSV."""
 56 |     repo_dir = get_repo_dir(owner, name)
 57 | 
 58 |     # Write repository info to CSV
 59 |     write_csv(
 60 |         os.path.join(repo_dir, "repository.csv"),
 61 |         [repo_data],
 62 |         list(repo_data.keys()),
 63 |     )
 64 | 
 65 |     return {
 66 |         "status": "success",
 67 |         "repository_dir": repo_dir,
 68 |         "repository": repo_data,
 69 |     }
 70 | 
 71 | 
 72 | def save_pull_requests(owner: str, name: str, pull_requests_data: List[Dict]) -> Dict:
 73 |     """Save pull requests to CSV."""
 74 |     repo_dir = get_repo_dir(owner, name)
 75 | 
 76 |     write_csv(
 77 |         os.path.join(repo_dir, "pull_requests.csv"),
 78 |         pull_requests_data,
 79 |         list(pull_requests_data[0].keys()),
 80 |     )
 81 | 
 82 |     return {
 83 |         "status": "success",
 84 |         "prs_processed": len(pull_requests_data),
 85 |     }
 86 | 
 87 | 
 88 | def save_pr_events(owner: str, name: str, pr_number: int, events_data: List[Dict]) -> Dict:
 89 |     """Save pull request events to CSV."""
 90 |     # Create directory for PR events
 91 |     pr_dir = get_pr_dir(owner, name, pr_number)
 92 | 
 93 |     if not events_data:
 94 |         logger.info(f"No timeline events found for PR #{pr_number}")
 95 |         return {"status": "success", "events_processed": 0}
 96 | 
 97 |     # Get PR data to extract title, URL, etc.
 98 |     repo_dir = get_repo_dir(owner, name)
 99 |     pr_csv_path = os.path.join(repo_dir, "pull_requests.csv")
100 |     if os.path.exists(pr_csv_path):
101 |         with open(pr_csv_path, "r", newline="", encoding="utf-8") as csvfile:
102 |             reader = csv.DictReader(csvfile)
103 |             for pr in reader:
104 |                 if int(pr["pr_number"]) == pr_number:
105 |                     break
106 | 
107 |     # Write to CSV
108 |     write_csv(
109 |         os.path.join(pr_dir, "events.csv"),
110 |         events_data,
111 |         list(events_data[0].keys()),
112 |     )
113 | 
114 |     return {
115 |         "status": "success",
116 |         "events_processed": len(events_data),
117 |     }
118 | 
119 | 
120 | def get_pr_numbers_from_csv(owner: str, name: str) -> List[int]:
121 |     """Read PR numbers from pull_requests.csv."""
122 |     repo_dir = get_repo_dir(owner, name)
123 |     pr_csv_path = os.path.join(repo_dir, "pull_requests.csv")
124 |     pr_numbers = []
125 | 
126 |     if os.path.exists(pr_csv_path):
127 |         with open(pr_csv_path, "r", newline="", encoding="utf-8") as csvfile:
128 |             reader = csv.DictReader(csvfile)
129 |             for pr in reader:
130 |                 pr_numbers.append(int(pr["pr_number"]))
131 | 
132 |     return pr_numbers
133 | 
134 | 
135 | def get_existing_prs_map(owner: str, name: str) -> Dict[int, Dict]:
136 |     """
137 |     Get a dictionary of existing PRs from pull_requests.csv.
138 | 
139 |     Args:
140 |         owner: GitHub repository owner
141 |         name: GitHub repository name
142 | 
143 |     Returns:
144 |         Dictionary mapping PR numbers to PR data
145 |     """
146 |     repo_dir = get_repo_dir(owner, name)
147 |     pr_csv_path = os.path.join(repo_dir, "pull_requests.csv")
148 |     pr_map = {}
149 | 
150 |     if os.path.exists(pr_csv_path):
151 |         with open(pr_csv_path, "r", newline="", encoding="utf-8") as csvfile:
152 |             reader = csv.DictReader(csvfile)
153 |             for pr in reader:
154 |                 pr_map[int(pr["pr_number"])] = pr
155 | 
156 |     return pr_map
157 | 
158 | 
159 | def has_pr_events(owner: str, name: str, pr_number: int) -> bool:
160 |     """
161 |     Check if events for a specific PR have already been fetched.
162 | 
163 |     Args:
164 |         owner: GitHub repository owner
165 |         name: GitHub repository name
166 |         pr_number: Pull request number
167 | 
168 |     Returns:
169 |         True if events exist, False otherwise
170 |     """
171 |     pr_dir = get_pr_dir(owner, name, pr_number)
172 |     events_csv_path = os.path.join(pr_dir, "events.csv")
173 | 
174 |     return os.path.exists(events_csv_path) and os.path.getsize(events_csv_path) > 0
175 | 
176 | 
177 | def consolidate_all_events(owner: str, name: str) -> Dict:
178 |     """
179 |     Consolidate all PR events into a single all_events.csv file in the repo directory.
180 | 
181 |     Args:
182 |         owner: GitHub repository owner
183 |         name: GitHub repository name
184 | 
185 |     Returns:
186 |         Dict with status and count of events consolidated
187 |     """
188 |     repo_dir = get_repo_dir(owner, name)
189 |     pr_numbers = get_pr_numbers_from_csv(owner, name)
190 | 
191 |     all_events = []
192 | 
193 |     # Collect events from each PR
194 |     for pr_number in pr_numbers:
195 |         pr_dir = get_pr_dir(owner, name, pr_number)
196 |         events_csv_path = os.path.join(pr_dir, "events.csv")
197 | 
198 |         if os.path.exists(events_csv_path) and os.path.getsize(events_csv_path) > 0:
199 |             with open(events_csv_path, "r", newline="", encoding="utf-8") as csvfile:
200 |                 reader = csv.DictReader(csvfile)
201 |                 all_events.extend(list(reader))
202 | 
203 |     # Write consolidated events to all_events.csv
204 |     if all_events:
205 |         all_events_path = os.path.join(repo_dir, "all_events.csv")
206 |         write_csv(all_events_path, all_events, list(all_events[0].keys()))
207 |         logger.info(f"Consolidated {len(all_events)} events into {all_events_path}")
208 | 
209 |     return {
210 |         "status": "success",
211 |         "events_consolidated": len(all_events),
212 |     }
213 | 
214 | 
215 | def get_all_repositories() -> List[str]:
216 |     """
217 |     Get a list of all repositories stored in the data directory.
218 | 
219 |     Returns:
220 |         List of repositories in the format "owner/name"
221 |     """
222 |     repositories = []
223 | 
224 |     # Check if data directory exists
225 |     if not os.path.exists(DATA_DIR):
226 |         logger.warning(f"Data directory {DATA_DIR} does not exist.")
227 |         return repositories
228 | 
229 |     # Walk through the data directory structure
230 |     for owner in os.listdir(DATA_DIR):
231 |         owner_path = os.path.join(DATA_DIR, owner)
232 |         if os.path.isdir(owner_path):
233 |             for repo in os.listdir(owner_path):
234 |                 repo_path = os.path.join(owner_path, repo)
235 |                 # Check if it's a directory and contains repository.csv
236 |                 if os.path.isdir(repo_path) and os.path.exists(os.path.join(repo_path, "repository.csv")):
237 |                     repositories.append(f"{owner}/{repo}")
238 | 
239 |     return repositories
240 | 


--------------------------------------------------------------------------------
/src/collab_dev/loader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pullflow/collab-dev/adcaa2efb3418c1a8aebb2ad98bf46b3a99aa9b2/src/collab_dev/loader/__init__.py


--------------------------------------------------------------------------------
/src/collab_dev/loader/load.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | import pandas as pd
 5 | 
 6 | 
 7 | def load(org: str, repo: str) -> pd.DataFrame:
 8 |     """
 9 |     Load all events data for a given org/repo into a pandas dataframe
10 | 
11 |     Args:
12 |         org: GitHub organization name
13 |         repo: GitHub repository name
14 | 
15 |     Returns:
16 |         DataFrame containing all events data
17 |     """
18 |     data_path = f"./data/{org}/{repo}/all_events.csv"
19 | 
20 |     if not os.path.exists(data_path):
21 |         logging.warning(f"Data file not found: {data_path}")
22 |         return pd.DataFrame()
23 | 
24 |     try:
25 |         # Specify data types, particularly for the time column
26 |         df = pd.read_csv(
27 |             data_path,
28 |             parse_dates=["time"],  # Parse the time column as datetime
29 |             dtype={
30 |                 "pr_number": int,
31 |                 "event_type": str,
32 |                 "actor": str,
33 |                 "is_bot": bool,
34 |                 "is_core_team": bool,
35 |             },
36 |         )
37 | 
38 |         # Log the shape
39 |         logging.info(f"Loaded dataframe with shape: {df.shape}")
40 | 
41 |         return df
42 |     except Exception as e:
43 |         logging.error(f"Error reading file {data_path}: {e}")
44 |         return pd.DataFrame()
45 | 


--------------------------------------------------------------------------------
/src/collab_dev/templates/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 |   <meta charset="UTF-8">
 6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |   <title>🍩 collab.dev report</title>
 8 |   <script src="https://cdn.tailwindcss.com"></script>
 9 |   <script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script>
10 | </head>
11 | 
12 | <body class="bg-gray-100 min-h-screen">
13 |   <div class="container mx-auto px-4 py-8">
14 |     <h1 class="text-2xl text-center text-gray-800 mb-8 font-mono tracking-tight font-semibold">
15 |       🍩 collab.dev report
16 |     </h1>
17 |     {% block content %}
18 |     <div class="bg-white shadow rounded-lg p-6 mt-6">
19 |       <h2 class="text-xl font-semibold mb-4">Repository Reports</h2>
20 |       {% if repositories %}
21 |       <ul class="list-disc pl-5 space-y-2">
22 |         {% for repo in repositories %}
23 |         <li>
24 |           <a href="/report/{{ repo }}" class="text-blue-600 hover:underline">{{ repo }}</a>
25 |         </li>
26 |         {% endfor %}
27 |       </ul>
28 |       {% else %}
29 |       <p class="text-gray-600 mb-4">No repositories found.</p>
30 |       {% endif %}
31 |     </div>
32 |     <div class="bg-white shadow rounded-lg p-6 mt-6">
33 |       <p class="text-gray-600 mb-4">Run the following command from the root of the project to collect repository data:
34 |       </p>
35 |       <pre class="bg-gray-100 rounded-lg p-4">pdm collect -n {number of PRs} {owner}/{repo}</pre>
36 |       <p class="text-gray-600 my-4">
37 |         Note: You must include the <code class="bg-gray-100 rounded-lg p-1">GITHUB_TOKEN</code> in your environment
38 |         variables before running the command.
39 |       </p>
40 |       <p class="text-gray-600 my-4">
41 |         For example, to collect 100 PRs for the <code class="bg-gray-100 rounded-lg p-1">collab_dev</code> repository,
42 |         run:
43 |       </p>
44 |       <pre class="bg-gray-100 rounded-lg p-4">GITHUB_TOKEN=your_token pdm collect -n 100 facebook/react</pre>
45 |     </div>
46 |     {% endblock %}
47 |   </div>
48 |   <footer class="mt-8 py-6 border-t border-gray-200">
49 |     <div class="container mx-auto px-4 flex flex-col items-center">
50 |       <div class="flex justify-center space-x-6 text-gray-600 mb-2">
51 |         <a href="https://github.com/pullflow/collab-dev/issues" class="hover:text-blue-600 hover:underline">Issues</a>
52 |         <a href="https://github.com/pullflow/collab-dev" class="hover:text-blue-600 hover:underline">GitHub</a>
53 |         <a href="https://collab.dev" class="hover:text-blue-600 hover:underline">Cloud Edition: https://collab.dev</a>
54 |       </div>
55 |       <div class="text-gray-600 text-sm">
56 |         MIT Licensed. Open source project by <a href="https://pullflow.com"
57 |           class="text-primary font-bold hover:underline">PullFlow</a>.
58 |       </div>
59 |     </div>
60 |   </footer>
61 | </body>
62 | 
63 | </html>


--------------------------------------------------------------------------------
/src/collab_dev/templates/repository.html:
--------------------------------------------------------------------------------
 1 | {% extends "templates/index.html" %}
 2 | 
 3 | {% block content %}
 4 | <div class="flex items-center justify-between mb-6">
 5 |   <h2 class="text-2xl font-semibold">{{ repo }}</h2>
 6 |   <a href="/" class="text-blue-600 hover:underline">Back to repositories</a>
 7 | </div>
 8 | {% for chart in charts %}
 9 | <div class="mb-6 shadow-md p-4 rounded-lg bg-white">
10 |   {{ chart|safe }}
11 | </div>
12 | {% endfor %}
13 | {% endblock %}


--------------------------------------------------------------------------------
/src/collab_dev/theme.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Theme configuration module providing consistent color palettes and styling utilities.
  3 | """
  4 | 
  5 | # Chart dimensions
  6 | CHART_DIMENSIONS = {
  7 |     "pie_chart_height": 400,  # Standard height for pie/donut charts
  8 |     "bar_chart_height": 300,  # Standard height for bar charts
  9 |     "funnel_chart_height": 300,  # Standard height for funnel charts
 10 | }
 11 | 
 12 | # Primary brand colors
 13 | BRAND = {
 14 |     "primary": "#795DBD",  # Slate blue - Main brand color
 15 |     "secondary": "#A592D3",  # African Violet - Secondary brand color
 16 |     "accent": "#FF958C",  # Coral pink - Accent color
 17 |     "highlight": "#ACE4AA",  # Celadon - Highlight color
 18 |     "dark": "#6D1A36",  # Claret - Dark accent
 19 | }
 20 | 
 21 | # Theme variations
 22 | THEMES = {
 23 |     "default": {
 24 |         "primary_series": [
 25 |             "#795DBD",  # Slate blue
 26 |             "#A592D3",  # African Violet
 27 |             "#FF958C",  # Coral pink
 28 |             "#ACE4AA",  # Celadon
 29 |             "#6D1A36",  # Claret
 30 |         ],
 31 |     }
 32 | }
 33 | 
 34 | # Extended color palette for data visualizations
 35 | VISUALIZATION = {
 36 |     # Main colors for primary data series - will be set by active theme
 37 |     "primary_series": THEMES["default"]["primary_series"],
 38 |     # Colors for secondary or supporting data
 39 |     "secondary_series": [
 40 |         "#B3A1E0",  # Lighter slate blue
 41 |         "#C4B6E3",  # Lighter african violet
 42 |         "#FFB3AC",  # Lighter coral pink
 43 |         "#C4ECC2",  # Lighter celadon
 44 |         "#8F3854",  # Lighter claret
 45 |     ],
 46 |     # Monochromatic scale of the primary color (Slate blue)
 47 |     "mono_scale": [
 48 |         "#795DBD",  # 100%
 49 |         "#8E76C7",  # 80%
 50 |         "#A38FD1",  # 60%
 51 |         "#B8A8DB",  # 40%
 52 |         "#CDC1E5",  # 20%
 53 |     ],
 54 |     # Diverging color scale for comparisons
 55 |     "diverging": [
 56 |         "#FF958C",  # negative (coral pink)
 57 |         "#FFB3AC",  # slightly negative
 58 |         "#F5F5F5",  # neutral
 59 |         "#ACE4AA",  # slightly positive (celadon)
 60 |         "#8BC887",  # positive (darker celadon)
 61 |     ],
 62 | }
 63 | 
 64 | # Semantic colors for status and feedback
 65 | SEMANTIC = {
 66 |     "success": "#ACE4AA",  # Celadon
 67 |     "warning": "#FFB3AC",  # Light coral pink
 68 |     "error": "#FF958C",  # Coral pink
 69 |     "info": "#A592D3",  # African Violet
 70 | }
 71 | 
 72 | # Background and surface colors
 73 | BACKGROUND = {
 74 |     "primary": "#FFFFFF",
 75 |     "secondary": "#F8F9FA",
 76 |     "tertiary": "#F1F3F5",
 77 |     "dark": "#6D1A36",  # Claret for dark mode or accents
 78 | }
 79 | 
 80 | # Text colors
 81 | TEXT = {
 82 |     "primary": "#212529",
 83 |     "secondary": "#6C757D",
 84 |     "muted": "#ADB5BD",
 85 |     "on_dark": "#F8F9FA",  # For text on dark backgrounds
 86 | }
 87 | 
 88 | 
 89 | def get_chart_colors(num_colors: int, palette: str = "primary") -> list:
 90 |     """
 91 |     Get a list of colors for charts and visualizations.
 92 | 
 93 |     Args:
 94 |         num_colors (int): Number of colors needed
 95 |         palette (str): Which palette to use ('primary', 'secondary', 'mono', 'diverging')
 96 | 
 97 |     Returns:
 98 |         list: List of color hex codes
 99 |     """
100 |     if palette == "primary":
101 |         # Extended primary colors with darker celadon for better contrast
102 |         colors = [
103 |             "#795DBD",  # Slate blue
104 |             "#A592D3",  # African Violet
105 |             "#FF958C",  # Coral pink
106 |             "#ACE4AA",  # Celadon
107 |             "#6D1A36",  # Claret
108 |             "#8BC887",  # Darker celadon
109 |             "#FFB3AC",  # Light coral pink
110 |         ]
111 |     elif palette == "secondary":
112 |         colors = VISUALIZATION["secondary_series"]
113 |     elif palette == "mono":
114 |         colors = VISUALIZATION["mono_scale"]
115 |     elif palette == "diverging":
116 |         colors = VISUALIZATION["diverging"]
117 |     else:
118 |         colors = VISUALIZATION["primary_series"]
119 | 
120 |     # If we need more colors than available, cycle through the palette
121 |     result = []
122 |     while len(result) < num_colors:
123 |         result.extend(colors)
124 |     return result[:num_colors]
125 | 
126 | 
127 | def get_plotly_template() -> dict:
128 |     """
129 |     Get a consistent Plotly chart template using the theme colors.
130 | 
131 |     Returns:
132 |         dict: Plotly layout template
133 |     """
134 |     return {
135 |         "layout": {
136 |             "paper_bgcolor": BACKGROUND["primary"],
137 |             "plot_bgcolor": BACKGROUND["primary"],
138 |             "margin": {"l": 50, "r": 50, "t": 35, "b": 30, "pad": 4},
139 |             "font": {"color": TEXT["primary"], "family": "sans-serif"},
140 |             "title": {"font": {"color": TEXT["primary"], "size": 20}},
141 |             "legend": {"font": {"color": TEXT["secondary"]}},
142 |             "xaxis": {
143 |                 "gridcolor": BACKGROUND["tertiary"],
144 |                 "linecolor": TEXT["muted"],
145 |                 "title": {"font": {"color": TEXT["secondary"]}},
146 |                 "tickfont": {"color": TEXT["secondary"]},
147 |             },
148 |             "yaxis": {
149 |                 "gridcolor": BACKGROUND["tertiary"],
150 |                 "linecolor": TEXT["muted"],
151 |                 "title": {"font": {"color": TEXT["secondary"]}},
152 |                 "tickfont": {"color": TEXT["secondary"]},
153 |             },
154 |         }
155 |     }
156 | 
157 | 
158 | def get_streamlit_theme() -> dict:
159 |     """
160 |     Get theme configuration for Streamlit's config.toml
161 | 
162 |     Returns:
163 |         dict: Streamlit theme configuration
164 |     """
165 |     return {
166 |         "primaryColor": BRAND["primary"],
167 |         "backgroundColor": BACKGROUND["primary"],
168 |         "secondaryBackgroundColor": BACKGROUND["secondary"],
169 |         "textColor": TEXT["primary"],
170 |         "font": "sans serif",
171 |     }
172 | 
173 | 
174 | def get_template_data() -> dict:
175 |     """
176 |     Get consistent theme data for template rendering.
177 | 
178 |     Returns:
179 |         dict: Theme configuration for templates
180 |     """
181 |     return {
182 |         "theme": {
183 |             "brand": BRAND,
184 |             "colors": VISUALIZATION["primary_series"],
185 |             "background": BACKGROUND,
186 |             "text": TEXT,
187 |             "semantic": SEMANTIC,
188 |         }
189 |     }
190 | 
191 | 
192 | def set_theme(theme_name: str = "default") -> None:
193 |     """
194 |     Set the active theme for visualizations.
195 | 
196 |     Args:
197 |         theme_name (str): Name of the theme to use (currently only 'default' is supported)
198 | 
199 |     Raises:
200 |         ValueError: If the specified theme name is not found
201 |     """
202 |     if theme_name not in THEMES:
203 |         raise ValueError(f"Theme '{theme_name}' not found. Available themes: {list(THEMES.keys())}")
204 | 
205 |     # Update visualization colors based on theme
206 |     VISUALIZATION["primary_series"] = THEMES[theme_name]["primary_series"]
207 | 


--------------------------------------------------------------------------------