├── .gitignore ├── .pdm-python ├── LICENSE ├── README.md ├── data └── .keep ├── pdm.lock ├── pyproject.toml └── src └── collab_dev ├── __init__.py ├── app.py ├── collect.py ├── components └── charts │ ├── __init__.py │ ├── approval_time │ ├── __init__.py │ ├── data.py │ └── template.html │ ├── bot_analysis │ ├── __init__.py │ ├── data.py │ └── template.html │ ├── chart.html │ ├── chart_renderer.py │ ├── contribution │ ├── __init__.py │ ├── data.py │ └── template.html │ ├── merge_time │ ├── __init__.py │ ├── data.py │ └── template.html │ ├── metric.html │ ├── review_coverage │ ├── __init__.py │ ├── data.py │ └── template.html │ ├── review_funnel │ ├── __init__.py │ ├── data.py │ └── template.html │ ├── review_turnaround │ ├── __init__.py │ ├── data.py │ └── template.html │ ├── utils.py │ └── workflow │ ├── __init__.py │ ├── data.py │ └── template.html ├── fetcher ├── __init__.py ├── api_client.py ├── fetch.py ├── github_utils.py └── store.py ├── loader ├── __init__.py └── load.py ├── templates ├── index.html └── repository.html └── theme.py /.gitignore: -------------------------------------------------------------------------------- 1 | # PDM project specific 2 | .pdm.toml 3 | __pypackages__/ 4 | .pdm-python 5 | .pdm-build/ 6 | 7 | # Python 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | *.so 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # Virtual Environments 30 | venv/ 31 | env/ 32 | ENV/ 33 | .venv/ 34 | .env/ 35 | 36 | # IDE specific files 37 | .idea/ 38 | .vscode/ 39 | *.swp 40 | *.swo 41 | .DS_Store 42 | 43 | # Local development settings 44 | .env 45 | .env.local 46 | .env.development.local 47 | .env.test.local 48 | .env.production.local 49 | 50 | # Testing 51 | .coverage 52 | htmlcov/ 53 | .pytest_cache/ 54 | .tox/ 55 | /data/ 56 | -------------------------------------------------------------------------------- /.pdm-python: -------------------------------------------------------------------------------- 1 | /Users/zak/pullflow/collab-dev/.venv/bin/python -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 PullFlow 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🍩 collab.dev 2 | 3 | ## Open Source Collaboration Metrics for Code Reviews 4 | 5 | Cloud edition: 6 | 7 | **collab-dev** is an open-source tool that generates collaboration metrics and insights from GitHub pull request data. Use it to analyze collaboration patterns, review workflow, process efficiency, and more. 8 | 9 | ## Features 10 | 11 | - **Data Collection:** Fetches pull request data from any public or private GitHub repository (requires GitHub token). 12 | - **Visualization:** Generate interactive charts using Plotly. 13 | - **Command Line Interface:** Run analysis with a single command. 14 | - **Portable & Minimal:** Designed to work with CSV data to keep things simple. 15 | - **Extensible:** Add new charts by adding them to the chart modules list. 16 | 17 | --- 18 | 19 | ## Getting Started 20 | 21 | ### Prerequisites 22 | 23 | - Python 3.12+ 24 | - Python Dependency Manager (`pdm`) - [Installation Instructions](https://pdm.fming.dev/latest/#installation) 25 | - A GitHub Personal Access Token with repository read permissions 26 | 27 | ### Installation 28 | 29 | 1. Clone the repository: 30 | 31 | ```bash 32 | git clone https://github.com/pullflow/collab-dev.git 33 | cd collab-dev 34 | ``` 35 | 36 | 2. Install dependencies: 37 | 38 | ```bash 39 | pdm install 40 | ``` 41 | 42 | 3. Set up your GitHub API token as an environment variable: 43 | 44 | ```bash 45 | export GITHUB_TOKEN=your_token_here 46 | ``` 47 | 48 | --- 49 | 50 | ## Usage 51 | 52 | ### Fetch Pull Request Data 53 | 54 | To download data from a GitHub repository, run: 55 | 56 | ```bash 57 | pdm collect owner/repo_name 58 | ``` 59 | 60 | This will generate CSV files with pull request data in the `data/` directory. 61 | 62 | You can specify the number of PRs to fetch using the `-n` flag: 63 | 64 | ```bash 65 | pdm collect -n 100 owner/repo_name 66 | ``` 67 | 68 | For example, to collect 100 PRs from the React repository using your GitHub token: 69 | 70 | ```bash 71 | GITHUB_TOKEN=your_token pdm run collect -n 100 facebook/react 72 | ``` 73 | 74 | Alternatively, you can save your GitHub token in a `.env` file. 75 | 76 | ### View Metrics & Insights 77 | 78 | To analyze the data and view the results: 79 | 80 | 1. Start the Flask application: 81 | 82 | ```bash 83 | pdm serve 84 | ``` 85 | 86 | 2. Open your browser and navigate to: 87 | 88 | 89 | 90 | 3. You'll see a list of repositories you've collected data for using the collect script. 91 | 92 | 4. Click on any repository to view its detailed metrics and visualizations at `/report/owner/repo`. 93 | 94 | --- 95 | 96 | ## Data Structure 97 | 98 | collab-dev organizes collected data in a hierarchical file structure: 99 | 100 | ``` 101 | ./data/ 102 | ├── {owner}/ 103 | │ ├── {repo_name}/ 104 | │ │ ├── repository.csv # Repository metadata 105 | │ │ ├── pull_requests.csv # All PR data for this repo 106 | │ │ ├── all_events.csv # Consolidated events from all PRs 107 | │ │ ├── pr_{number}/ 108 | │ │ │ └── events.csv # Events for specific PR 109 | │ │ ├── pr_{number}/ 110 | │ │ │ └── events.csv 111 | │ │ └── ... 112 | ``` 113 | 114 | ### Data Files 115 | 116 | - **repository.csv**: Contains metadata about the GitHub repository 117 | - **pull_requests.csv**: Stores information about all pull requests collected from the repository 118 | - **all_events.csv**: Consolidates timeline events from all PRs for easier analysis 119 | - **events.csv**: In each PR subdirectory, stores the timeline events for that specific PR 120 | 121 | This structure allows for efficient data collection, storage, and analysis while maintaining a clear organization based on GitHub's repository hierarchy. 122 | 123 | --- 124 | 125 | ## Customization 126 | 127 | Charts are defined in the `CHART_MODULES` list in `src/collab_dev/components/charts/chart_renderer.py`. To add a custom chart: 128 | 129 | 1. Create a new module in `src/collab_dev/components/charts/` 130 | 2. Implement a `render(repo_df)` function in your module 131 | 3. Add your module to the `CHART_MODULES` list in `chart_renderer.py` 132 | 133 | Existing chart types include: 134 | 135 | - Workflow (Sankey diagram) 136 | - Contributor distribution patterns 137 | - Bot contribution analysis 138 | - Review coverage metrics 139 | - Review funnel analysis 140 | - Review turnaround time 141 | - Request Approval time analysis 142 | - Merge time distribution 143 | 144 | --- 145 | 146 | ## Development 147 | 148 | ### Code Style 149 | 150 | We use `ruff` for code formatting and linting: 151 | 152 | ```bash 153 | # Run linter 154 | pdm lint 155 | 156 | # Format code 157 | pdm format 158 | 159 | # Fix auto-fixable issues 160 | pdm lint-fix 161 | ``` 162 | 163 | --- 164 | 165 | ## Contributing 166 | 167 | We're looking for help in the following areas: 168 | 169 | - **Validate and improve data and calculations:** Help ensure our metrics are accurate and meaningful. 170 | - **Improve current charts and other visualizations:** Enhance the clarity and usefulness of existing visualizations. 171 | - **Add new charts that help measure collaboration:** Develop new metrics and visualizations that provide insights into team collaboration patterns. 172 | 173 | To contribute: 174 | 175 | 1. Fork the repository 176 | 2. Create your branch: 177 | 178 | ```bash 179 | git checkout -b feature/my-new-feature 180 | ``` 181 | 182 | 3. Commit your changes: 183 | 184 | ```bash 185 | git commit -m "Add some feature" 186 | ``` 187 | 188 | 4. Push to the branch: 189 | 190 | ```bash 191 | git push origin feature/my-new-feature 192 | ``` 193 | 194 | 5. Open a Pull Request 195 | 196 | --- 197 | 198 | ## License 199 | 200 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. 201 | 202 | --- 203 | 204 | ## Support 205 | 206 | For issues and feature requests, please use the [GitHub Issues](https://github.com/pullflow/collab-dev/issues) page. 207 | -------------------------------------------------------------------------------- /data/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pullflow/collab-dev/adcaa2efb3418c1a8aebb2ad98bf46b3a99aa9b2/data/.keep -------------------------------------------------------------------------------- /pdm.lock: -------------------------------------------------------------------------------- 1 | # This file is @generated by PDM. 2 | # It is not intended for manual editing. 3 | 4 | [metadata] 5 | groups = ["default", "dev"] 6 | strategy = ["inherit_metadata"] 7 | lock_version = "4.5.0" 8 | content_hash = "sha256:d14c62504a543769bcab6e91ca10d4ef6b32618842721d20192bc07182f558a4" 9 | 10 | [[metadata.targets]] 11 | requires_python = "==3.12.*" 12 | 13 | [[package]] 14 | name = "blinker" 15 | version = "1.9.0" 16 | requires_python = ">=3.9" 17 | summary = "Fast, simple object-to-object and broadcast signaling" 18 | groups = ["default"] 19 | files = [ 20 | {file = "blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc"}, 21 | {file = "blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf"}, 22 | ] 23 | 24 | [[package]] 25 | name = "certifi" 26 | version = "2025.1.31" 27 | requires_python = ">=3.6" 28 | summary = "Python package for providing Mozilla's CA Bundle." 29 | groups = ["default"] 30 | files = [ 31 | {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, 32 | {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, 33 | ] 34 | 35 | [[package]] 36 | name = "charset-normalizer" 37 | version = "3.4.1" 38 | requires_python = ">=3.7" 39 | summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." 40 | groups = ["default"] 41 | files = [ 42 | {file = "charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545"}, 43 | {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7"}, 44 | {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757"}, 45 | {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa"}, 46 | {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d"}, 47 | {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616"}, 48 | {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b"}, 49 | {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d"}, 50 | {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a"}, 51 | {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9"}, 52 | {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1"}, 53 | {file = "charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35"}, 54 | {file = "charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f"}, 55 | {file = "charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85"}, 56 | {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"}, 57 | ] 58 | 59 | [[package]] 60 | name = "click" 61 | version = "8.1.8" 62 | requires_python = ">=3.7" 63 | summary = "Composable command line interface toolkit" 64 | groups = ["default"] 65 | dependencies = [ 66 | "colorama; platform_system == \"Windows\"", 67 | "importlib-metadata; python_version < \"3.8\"", 68 | ] 69 | files = [ 70 | {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, 71 | {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, 72 | ] 73 | 74 | [[package]] 75 | name = "colorama" 76 | version = "0.4.6" 77 | requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" 78 | summary = "Cross-platform colored terminal text." 79 | groups = ["default"] 80 | marker = "platform_system == \"Windows\"" 81 | files = [ 82 | {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, 83 | {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, 84 | ] 85 | 86 | [[package]] 87 | name = "dotenv" 88 | version = "0.9.9" 89 | summary = "Deprecated package" 90 | groups = ["default"] 91 | dependencies = [ 92 | "python-dotenv", 93 | ] 94 | files = [ 95 | {file = "dotenv-0.9.9-py2.py3-none-any.whl", hash = "sha256:29cf74a087b31dafdb5a446b6d7e11cbce8ed2741540e2339c69fbef92c94ce9"}, 96 | ] 97 | 98 | [[package]] 99 | name = "flask" 100 | version = "3.1.0" 101 | requires_python = ">=3.9" 102 | summary = "A simple framework for building complex web applications." 103 | groups = ["default"] 104 | dependencies = [ 105 | "Jinja2>=3.1.2", 106 | "Werkzeug>=3.1", 107 | "blinker>=1.9", 108 | "click>=8.1.3", 109 | "importlib-metadata>=3.6; python_version < \"3.10\"", 110 | "itsdangerous>=2.2", 111 | ] 112 | files = [ 113 | {file = "flask-3.1.0-py3-none-any.whl", hash = "sha256:d667207822eb83f1c4b50949b1623c8fc8d51f2341d65f72e1a1815397551136"}, 114 | {file = "flask-3.1.0.tar.gz", hash = "sha256:5f873c5184c897c8d9d1b05df1e3d01b14910ce69607a117bd3277098a5836ac"}, 115 | ] 116 | 117 | [[package]] 118 | name = "idna" 119 | version = "3.10" 120 | requires_python = ">=3.6" 121 | summary = "Internationalized Domain Names in Applications (IDNA)" 122 | groups = ["default"] 123 | files = [ 124 | {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, 125 | {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, 126 | ] 127 | 128 | [[package]] 129 | name = "itsdangerous" 130 | version = "2.2.0" 131 | requires_python = ">=3.8" 132 | summary = "Safely pass data to untrusted environments and back." 133 | groups = ["default"] 134 | files = [ 135 | {file = "itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef"}, 136 | {file = "itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173"}, 137 | ] 138 | 139 | [[package]] 140 | name = "jinja2" 141 | version = "3.1.6" 142 | requires_python = ">=3.7" 143 | summary = "A very fast and expressive template engine." 144 | groups = ["default"] 145 | dependencies = [ 146 | "MarkupSafe>=2.0", 147 | ] 148 | files = [ 149 | {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, 150 | {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, 151 | ] 152 | 153 | [[package]] 154 | name = "markupsafe" 155 | version = "3.0.2" 156 | requires_python = ">=3.9" 157 | summary = "Safely add untrusted strings to HTML/XML markup." 158 | groups = ["default"] 159 | files = [ 160 | {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf"}, 161 | {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225"}, 162 | {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028"}, 163 | {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8"}, 164 | {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c"}, 165 | {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557"}, 166 | {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22"}, 167 | {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48"}, 168 | {file = "MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30"}, 169 | {file = "MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87"}, 170 | {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"}, 171 | ] 172 | 173 | [[package]] 174 | name = "narwhals" 175 | version = "1.31.0" 176 | requires_python = ">=3.8" 177 | summary = "Extremely lightweight compatibility layer between dataframe libraries" 178 | groups = ["default"] 179 | files = [ 180 | {file = "narwhals-1.31.0-py3-none-any.whl", hash = "sha256:2a7b79bb5f511055c4c0142121fc0d4171ea171458e12d44dbd9c8fc6488e997"}, 181 | {file = "narwhals-1.31.0.tar.gz", hash = "sha256:333472e2562343dfdd27407ec9b5114a07c81d0416794e4ac6b703dd925c6a1a"}, 182 | ] 183 | 184 | [[package]] 185 | name = "numpy" 186 | version = "2.2.4" 187 | requires_python = ">=3.10" 188 | summary = "Fundamental package for array computing in Python" 189 | groups = ["default"] 190 | files = [ 191 | {file = "numpy-2.2.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a7b9084668aa0f64e64bd00d27ba5146ef1c3a8835f3bd912e7a9e01326804c4"}, 192 | {file = "numpy-2.2.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dbe512c511956b893d2dacd007d955a3f03d555ae05cfa3ff1c1ff6df8851854"}, 193 | {file = "numpy-2.2.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:bb649f8b207ab07caebba230d851b579a3c8711a851d29efe15008e31bb4de24"}, 194 | {file = "numpy-2.2.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:f34dc300df798742b3d06515aa2a0aee20941c13579d7a2f2e10af01ae4901ee"}, 195 | {file = "numpy-2.2.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3f7ac96b16955634e223b579a3e5798df59007ca43e8d451a0e6a50f6bfdfba"}, 196 | {file = "numpy-2.2.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f92084defa704deadd4e0a5ab1dc52d8ac9e8a8ef617f3fbb853e79b0ea3592"}, 197 | {file = "numpy-2.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7a4e84a6283b36632e2a5b56e121961f6542ab886bc9e12f8f9818b3c266bfbb"}, 198 | {file = "numpy-2.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:11c43995255eb4127115956495f43e9343736edb7fcdb0d973defd9de14cd84f"}, 199 | {file = "numpy-2.2.4-cp312-cp312-win32.whl", hash = "sha256:65ef3468b53269eb5fdb3a5c09508c032b793da03251d5f8722b1194f1790c00"}, 200 | {file = "numpy-2.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:2aad3c17ed2ff455b8eaafe06bcdae0062a1db77cb99f4b9cbb5f4ecb13c5146"}, 201 | {file = "numpy-2.2.4.tar.gz", hash = "sha256:9ba03692a45d3eef66559efe1d1096c4b9b75c0986b5dff5530c378fb8331d4f"}, 202 | ] 203 | 204 | [[package]] 205 | name = "packaging" 206 | version = "24.2" 207 | requires_python = ">=3.8" 208 | summary = "Core utilities for Python packages" 209 | groups = ["default"] 210 | files = [ 211 | {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, 212 | {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, 213 | ] 214 | 215 | [[package]] 216 | name = "pandas" 217 | version = "2.2.3" 218 | requires_python = ">=3.9" 219 | summary = "Powerful data structures for data analysis, time series, and statistics" 220 | groups = ["default"] 221 | dependencies = [ 222 | "numpy>=1.22.4; python_version < \"3.11\"", 223 | "numpy>=1.23.2; python_version == \"3.11\"", 224 | "numpy>=1.26.0; python_version >= \"3.12\"", 225 | "python-dateutil>=2.8.2", 226 | "pytz>=2020.1", 227 | "tzdata>=2022.7", 228 | ] 229 | files = [ 230 | {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, 231 | {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, 232 | {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, 233 | {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, 234 | {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, 235 | {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, 236 | {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, 237 | {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, 238 | ] 239 | 240 | [[package]] 241 | name = "plotly" 242 | version = "6.0.1" 243 | requires_python = ">=3.8" 244 | summary = "An open-source interactive data visualization library for Python" 245 | groups = ["default"] 246 | dependencies = [ 247 | "narwhals>=1.15.1", 248 | "packaging", 249 | ] 250 | files = [ 251 | {file = "plotly-6.0.1-py3-none-any.whl", hash = "sha256:4714db20fea57a435692c548a4eb4fae454f7daddf15f8d8ba7e1045681d7768"}, 252 | {file = "plotly-6.0.1.tar.gz", hash = "sha256:dd8400229872b6e3c964b099be699f8d00c489a974f2cfccfad5e8240873366b"}, 253 | ] 254 | 255 | [[package]] 256 | name = "python-dateutil" 257 | version = "2.9.0.post0" 258 | requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" 259 | summary = "Extensions to the standard Python datetime module" 260 | groups = ["default"] 261 | dependencies = [ 262 | "six>=1.5", 263 | ] 264 | files = [ 265 | {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, 266 | {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, 267 | ] 268 | 269 | [[package]] 270 | name = "python-dotenv" 271 | version = "1.0.1" 272 | requires_python = ">=3.8" 273 | summary = "Read key-value pairs from a .env file and set them as environment variables" 274 | groups = ["default"] 275 | files = [ 276 | {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, 277 | {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, 278 | ] 279 | 280 | [[package]] 281 | name = "pytz" 282 | version = "2025.1" 283 | summary = "World timezone definitions, modern and historical" 284 | groups = ["default"] 285 | files = [ 286 | {file = "pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57"}, 287 | {file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"}, 288 | ] 289 | 290 | [[package]] 291 | name = "requests" 292 | version = "2.32.3" 293 | requires_python = ">=3.8" 294 | summary = "Python HTTP for Humans." 295 | groups = ["default"] 296 | dependencies = [ 297 | "certifi>=2017.4.17", 298 | "charset-normalizer<4,>=2", 299 | "idna<4,>=2.5", 300 | "urllib3<3,>=1.21.1", 301 | ] 302 | files = [ 303 | {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, 304 | {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, 305 | ] 306 | 307 | [[package]] 308 | name = "ruff" 309 | version = "0.11.2" 310 | requires_python = ">=3.7" 311 | summary = "An extremely fast Python linter and code formatter, written in Rust." 312 | groups = ["dev"] 313 | files = [ 314 | {file = "ruff-0.11.2-py3-none-linux_armv6l.whl", hash = "sha256:c69e20ea49e973f3afec2c06376eb56045709f0212615c1adb0eda35e8a4e477"}, 315 | {file = "ruff-0.11.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:2c5424cc1c4eb1d8ecabe6d4f1b70470b4f24a0c0171356290b1953ad8f0e272"}, 316 | {file = "ruff-0.11.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:ecf20854cc73f42171eedb66f006a43d0a21bfb98a2523a809931cda569552d9"}, 317 | {file = "ruff-0.11.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c543bf65d5d27240321604cee0633a70c6c25c9a2f2492efa9f6d4b8e4199bb"}, 318 | {file = "ruff-0.11.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:20967168cc21195db5830b9224be0e964cc9c8ecf3b5a9e3ce19876e8d3a96e3"}, 319 | {file = "ruff-0.11.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:955a9ce63483999d9f0b8f0b4a3ad669e53484232853054cc8b9d51ab4c5de74"}, 320 | {file = "ruff-0.11.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:86b3a27c38b8fce73bcd262b0de32e9a6801b76d52cdb3ae4c914515f0cef608"}, 321 | {file = "ruff-0.11.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a3b66a03b248c9fcd9d64d445bafdf1589326bee6fc5c8e92d7562e58883e30f"}, 322 | {file = "ruff-0.11.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0397c2672db015be5aa3d4dac54c69aa012429097ff219392c018e21f5085147"}, 323 | {file = "ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:869bcf3f9abf6457fbe39b5a37333aa4eecc52a3b99c98827ccc371a8e5b6f1b"}, 324 | {file = "ruff-0.11.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2a2b50ca35457ba785cd8c93ebbe529467594087b527a08d487cf0ee7b3087e9"}, 325 | {file = "ruff-0.11.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:7c69c74bf53ddcfbc22e6eb2f31211df7f65054bfc1f72288fc71e5f82db3eab"}, 326 | {file = "ruff-0.11.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6e8fb75e14560f7cf53b15bbc55baf5ecbe373dd5f3aab96ff7aa7777edd7630"}, 327 | {file = "ruff-0.11.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:842a472d7b4d6f5924e9297aa38149e5dcb1e628773b70e6387ae2c97a63c58f"}, 328 | {file = "ruff-0.11.2-py3-none-win32.whl", hash = "sha256:aca01ccd0eb5eb7156b324cfaa088586f06a86d9e5314b0eb330cb48415097cc"}, 329 | {file = "ruff-0.11.2-py3-none-win_amd64.whl", hash = "sha256:3170150172a8f994136c0c66f494edf199a0bbea7a409f649e4bc8f4d7084080"}, 330 | {file = "ruff-0.11.2-py3-none-win_arm64.whl", hash = "sha256:52933095158ff328f4c77af3d74f0379e34fd52f175144cefc1b192e7ccd32b4"}, 331 | {file = "ruff-0.11.2.tar.gz", hash = "sha256:ec47591497d5a1050175bdf4e1a4e6272cddff7da88a2ad595e1e326041d8d94"}, 332 | ] 333 | 334 | [[package]] 335 | name = "six" 336 | version = "1.17.0" 337 | requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" 338 | summary = "Python 2 and 3 compatibility utilities" 339 | groups = ["default"] 340 | files = [ 341 | {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, 342 | {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, 343 | ] 344 | 345 | [[package]] 346 | name = "tzdata" 347 | version = "2025.1" 348 | requires_python = ">=2" 349 | summary = "Provider of IANA time zone data" 350 | groups = ["default"] 351 | files = [ 352 | {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"}, 353 | {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"}, 354 | ] 355 | 356 | [[package]] 357 | name = "urllib3" 358 | version = "2.3.0" 359 | requires_python = ">=3.9" 360 | summary = "HTTP library with thread-safe connection pooling, file post, and more." 361 | groups = ["default"] 362 | files = [ 363 | {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, 364 | {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, 365 | ] 366 | 367 | [[package]] 368 | name = "werkzeug" 369 | version = "3.1.3" 370 | requires_python = ">=3.9" 371 | summary = "The comprehensive WSGI web application library." 372 | groups = ["default"] 373 | dependencies = [ 374 | "MarkupSafe>=2.1.1", 375 | ] 376 | files = [ 377 | {file = "werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e"}, 378 | {file = "werkzeug-3.1.3.tar.gz", hash = "sha256:60723ce945c19328679790e3282cc758aa4a6040e4bb330f53d30fa546d44746"}, 379 | ] 380 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "collab-dev" 3 | version = "0.1.0" 4 | description = "Default template for PDM package" 5 | authors = [ 6 | { name = "Amna Anwar", email = "amna@pullflow.com" }, 7 | { name = "Alissa Vuillier", email = "alissa@pullflow.com" }, 8 | { name = "Zak Mandhro", email = "zak@pullflow.com" }, 9 | ] 10 | dependencies = [ 11 | "flask>=3.1.0", 12 | "pandas>=2.2.3", 13 | "numpy>=1.26.0", 14 | "plotly>=5.18.0", 15 | "dotenv>=0.9.9", 16 | "requests>=2.31.0", 17 | ] 18 | requires-python = "==3.12.*" 19 | readme = "README.md" 20 | license = { text = "MIT" } 21 | 22 | 23 | [tool.pdm] 24 | distribution = false 25 | 26 | [tool.pdm.scripts] 27 | serve = "python src/collab_dev/app.py" 28 | collect = "python src/collab_dev/collect.py" 29 | lint = "ruff check src/" 30 | format = "ruff format src/" 31 | lint-fix = "ruff check --fix src/" 32 | 33 | [tool.pdm.dev-dependencies] 34 | dev = ["ruff>=0.11.2"] 35 | 36 | [tool.ruff] 37 | line-length = 120 38 | target-version = "py312" 39 | 40 | [tool.ruff.lint] 41 | select = ["E", "F", "I", "W", "B"] 42 | ignore = [] 43 | 44 | # Per-file ignores for visualization-related files with long template strings 45 | [tool.ruff.lint.per-file-ignores] 46 | "src/collab_dev/charts/pr_sankey/__init__.py" = ["E501"] 47 | "src/collab_dev/charts/review_coverage/__init__.py" = ["E501"] 48 | 49 | [tool.ruff.format] 50 | quote-style = "double" 51 | indent-style = "space" 52 | line-ending = "auto" 53 | -------------------------------------------------------------------------------- /src/collab_dev/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pullflow/collab-dev/adcaa2efb3418c1a8aebb2ad98bf46b3a99aa9b2/src/collab_dev/__init__.py -------------------------------------------------------------------------------- /src/collab_dev/app.py: -------------------------------------------------------------------------------- 1 | """ 2 | collab.dev - Flask application for collaboration metrics 3 | """ 4 | 5 | from components.charts.chart_renderer import render_charts 6 | from fetcher.store import get_all_repositories 7 | from flask import Flask, render_template 8 | from loader.load import load 9 | 10 | app = Flask(__name__, template_folder=".", static_folder="./static") 11 | 12 | 13 | @app.route("/") 14 | def index(): 15 | """Return welcome message and API info""" 16 | # Get list of all repositories 17 | repositories = get_all_repositories() 18 | 19 | return render_template("templates/index.html", repositories=repositories) 20 | 21 | 22 | @app.route("/report/") 23 | def repository_report(repo_path): 24 | """Show report for a specific repository""" 25 | # Split the repo path into owner and name 26 | parts = repo_path.split("/") 27 | if len(parts) != 2: 28 | return "Invalid repository path", 400 29 | 30 | owner, name = parts 31 | df = load(owner, name) 32 | charts = render_charts(df) 33 | return render_template( 34 | "templates/repository.html", 35 | df=df, 36 | repo=repo_path, 37 | charts=charts, 38 | ) 39 | 40 | 41 | if __name__ == "__main__": 42 | app.run(host="127.0.0.1", port=8700, debug=True) 43 | -------------------------------------------------------------------------------- /src/collab_dev/collect.py: -------------------------------------------------------------------------------- 1 | """ 2 | GitHub repository data collector for collab.dev 3 | 4 | This module validates a GitHub repository URL provided as command line argument, 5 | extracts the owner and repository name, and collects data from the repository. 6 | """ 7 | 8 | import argparse 9 | import re 10 | import sys 11 | from typing import Optional, Tuple 12 | 13 | from fetcher.fetch import process_repository 14 | 15 | 16 | def parse_github_repo_url(url: str) -> Optional[Tuple[str, str]]: 17 | """ 18 | Parse and validate a GitHub repository URL. 19 | 20 | Args: 21 | url: A string representing a GitHub repository URL in one of these formats: 22 | - owner/repo_name 23 | - https://github.com/owner/repo_name 24 | 25 | Returns: 26 | A tuple of (owner, repo_name) if valid, None otherwise 27 | """ 28 | # Pattern for simple format: owner/repo_name 29 | simple_pattern = r"^([a-zA-Z0-9_.-]+)/([a-zA-Z0-9_.-]+)$" 30 | 31 | # Pattern for https format: https://github.com/owner/repo_name 32 | https_pattern = r"^https?://github\.com/([a-zA-Z0-9_.-]+)/([a-zA-Z0-9_.-]+)/?$" 33 | 34 | # Try to match each pattern 35 | for pattern in [simple_pattern, https_pattern]: 36 | match = re.match(pattern, url) 37 | if match: 38 | return match.group(1), match.group(2) 39 | 40 | return None 41 | 42 | 43 | def main(): 44 | """ 45 | Main function that validates GitHub repository URL from command line arguments 46 | and collects data from the specified repository. 47 | 48 | Parses command line arguments to get the repository URL and the number of PRs to fetch, 49 | validates the URL, and then processes the repository to collect and save data. 50 | """ 51 | parser = argparse.ArgumentParser(description="Collect data from a GitHub repository") 52 | parser.add_argument("repo_url", help="GitHub repository URL (owner/repo_name)") 53 | parser.add_argument( 54 | "-n", 55 | "--num-prs", 56 | type=int, 57 | default=100, 58 | help="Number of PRs to fetch (default: 100)", 59 | ) 60 | 61 | args = parser.parse_args() 62 | 63 | # Validate the repository URL 64 | result = parse_github_repo_url(args.repo_url) 65 | 66 | if result: 67 | owner, repo_name = result 68 | print(f"Fetching data from GitHub repository: {owner}/{repo_name}") 69 | try: 70 | # Process the repository to fetch and save all data 71 | result = process_repository(owner, repo_name, args.num_prs) 72 | print(f"Successfully collected data from {owner}/{repo_name}") 73 | print(f"Data saved to {result.get('path', 'output directory')}") 74 | print( 75 | f"You can view the report by running `pdm serve` and navigating to http://127.0.0.1:5000/{owner}/{repo_name}" 76 | ) 77 | except Exception as e: 78 | print(f"Error fetching repository data: {e}") 79 | sys.exit(1) 80 | else: 81 | print(f"Error: '{args.repo_url}' is not a valid GitHub repository URL") 82 | print("Valid formats include: owner/repo_name, https://github.com/owner/repo_name") 83 | sys.exit(1) 84 | 85 | 86 | if __name__ == "__main__": 87 | main() 88 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Charts package initialization. 3 | """ 4 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/approval_time/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import pandas as pd 4 | import plotly 5 | import plotly.graph_objects as go 6 | from components.charts.utils import ( 7 | apply_theme_to_figure, 8 | get_plotly_config, 9 | get_theme_colors, 10 | humanize_time, 11 | ) 12 | from flask import render_template 13 | 14 | from .data import get_approval_time_data 15 | 16 | 17 | def create_approval_time_plot(size_stats) -> go.Figure: 18 | """Create visualization for approval time by PR size""" 19 | 20 | logging.debug("Processing size stats for approval time plot") 21 | logging.debug(f"Input size_stats:\n{size_stats}") 22 | 23 | # Define the desired order of size categories 24 | size_order = [ 25 | "XS (<10 lines)", 26 | "S (10-99 lines)", 27 | "M (100-499 lines)", 28 | "L (500-999 lines)", 29 | "XL (1000+ lines)", 30 | ] 31 | 32 | # Sort the DataFrame by our custom order 33 | size_stats = size_stats.set_index("size_category").reindex(size_order).reset_index() 34 | 35 | # Extract data from size_stats DataFrame 36 | categories = size_stats["size_category"].tolist() 37 | 38 | # Fix: Replace NaN values with 0 in median_hours and pr_count 39 | median_hours = [0 if pd.isna(val) else val for val in size_stats["median_hours"].tolist()] 40 | pr_counts = [0 if pd.isna(val) else int(val) for val in size_stats["pr_count"].tolist()] 41 | 42 | # Create hover text with humanized times 43 | hover_text = [ 44 | f"Median: {humanize_time(hours)}
Count: {count} PR{'s' if count != 1 else ''}" 45 | for hours, count in zip(median_hours, pr_counts, strict=False) 46 | ] 47 | 48 | logging.debug(f"Categories: {categories}") 49 | logging.debug(f"Median hours: {median_hours}") 50 | logging.debug(f"PR counts: {pr_counts}") 51 | 52 | # Calculate percentage of PRs in each category 53 | total_prs = sum(pr_counts) 54 | logging.debug(f"Total PRs: {total_prs}") 55 | 56 | # Create fraction text for each bar with simple dash 57 | bar_text = [f"{count}" for count in pr_counts] 58 | logging.debug(f"Bar text fractions: {bar_text}") 59 | 60 | # Calculate percentages based on PR counts 61 | percentages = [count / total_prs * 100 if total_prs > 0 else 0 for count in pr_counts] 62 | logging.debug(f"Calculated percentages: {percentages}") 63 | 64 | # Get theme colors 65 | colors = get_theme_colors(len(categories)) 66 | 67 | # Create figure using plotly graph objects 68 | fig = go.Figure( 69 | data=[ 70 | go.Bar( 71 | x=categories, 72 | y=median_hours, 73 | text=bar_text, 74 | textposition="outside", 75 | marker_color=colors, 76 | marker_line_width=0, # Remove border lines from bars 77 | hoverinfo="text", 78 | hovertext=hover_text, 79 | ) 80 | ] 81 | ) 82 | 83 | # Update layout 84 | fig.update_layout( 85 | xaxis_title="PR Size", 86 | yaxis_title="Median Hours to Approval", 87 | showlegend=False, 88 | margin={"t": 40, "l": 50, "r": 50, "b": 50}, 89 | height=400, 90 | paper_bgcolor="white", 91 | plot_bgcolor="white", 92 | ) 93 | 94 | # Apply theme to the figure 95 | fig = apply_theme_to_figure(fig) 96 | 97 | return fig 98 | 99 | 100 | def render(repo_df): 101 | """Render the approval time chart component""" 102 | 103 | try: 104 | # Get approval time statistics 105 | approval_data = get_approval_time_data(repo_df) 106 | 107 | if not approval_data: 108 | return render_template("components/charts/approval_time/template.html", approval_data=None) 109 | 110 | # Create plot figure 111 | fig = create_approval_time_plot(approval_data["size_stats"]) 112 | 113 | # Get plotly config from theme 114 | config = get_plotly_config() 115 | 116 | # Convert the figure to HTML 117 | plot_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config) 118 | 119 | # Prepare data for template 120 | template_data = { 121 | "overall_median": approval_data["overall_median"], 122 | "plot_html": plot_html, 123 | } 124 | 125 | return render_template("components/charts/approval_time/template.html", approval_data=template_data) 126 | 127 | except Exception: 128 | return render_template("components/charts/approval_time/template.html", approval_data=None) 129 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/approval_time/data.py: -------------------------------------------------------------------------------- 1 | import logging # Add this at the top 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | 7 | def calculate_pat(repo_df: pd.DataFrame) -> float: 8 | """Calculate overall median PAT""" 9 | try: 10 | if repo_df.empty: 11 | return None 12 | 13 | # Convert time to datetime 14 | repo_df["time"] = pd.to_datetime(repo_df["time"]) 15 | 16 | # Get review request and approval times for each PR 17 | review_requests = repo_df[repo_df["event_type"] == "review_requested"].groupby("pr_number")["time"].first() 18 | approvals = repo_df[repo_df["event_type"] == "review_approved"].groupby("pr_number")["time"].first() 19 | 20 | # Match PRs that have both request and approval 21 | matched_prs = pd.DataFrame({"request_time": review_requests, "approval_time": approvals}).dropna() 22 | 23 | if matched_prs.empty: 24 | return None 25 | 26 | # Calculate time difference in hours 27 | matched_prs["approval_time_hours"] = ( 28 | matched_prs["approval_time"] - matched_prs["request_time"] 29 | ).dt.total_seconds() / 3600 30 | 31 | # Return median time 32 | return matched_prs["approval_time_hours"].median() 33 | 34 | except Exception as e: 35 | logging.error(f"Error calculating PAT: {e}") 36 | return None 37 | 38 | 39 | def get_pr_size_category(total_lines_changed: int) -> str: 40 | """ 41 | Categorize PR size based on total lines changed 42 | 43 | These categories provide a more accurate representation of PR complexity: 44 | - XS: <10 lines (minimal changes, very quick to review) 45 | - S: 10-99 lines (small changes, quick to review) 46 | - M: 100-499 lines (moderate changes, reasonable review time) 47 | - L: 500-999 lines (large changes, significant review time) 48 | - XL: 1000+ lines (extensive changes, challenging to review effectively) 49 | """ 50 | if total_lines_changed < 10: 51 | return "XS (<10 lines)" 52 | elif total_lines_changed < 100: 53 | return "S (10-99 lines)" 54 | elif total_lines_changed < 500: 55 | return "M (100-499 lines)" 56 | elif total_lines_changed < 1000: 57 | return "L (500-999 lines)" 58 | else: 59 | return "XL (1000+ lines)" 60 | 61 | 62 | def calculate_total_lines_changed(repo_df: pd.DataFrame) -> pd.DataFrame: 63 | """ 64 | Calculate total lines changed (added + deleted) for each PR 65 | """ 66 | try: 67 | if repo_df.empty: 68 | return pd.DataFrame() 69 | 70 | # Group by PR number and calculate total lines changed 71 | pr_lines = ( 72 | repo_df.groupby("pr_number") 73 | .agg( 74 | { 75 | "lines_added": "max", # Take the max value as it should be consistent for a PR 76 | "lines_deleted": "max", 77 | } 78 | ) 79 | .reset_index() 80 | ) 81 | 82 | # Calculate total lines changed 83 | pr_lines["total_lines_changed"] = pr_lines["lines_added"] + pr_lines["lines_deleted"] 84 | 85 | return pr_lines[["pr_number", "total_lines_changed"]] 86 | 87 | except Exception: 88 | return pd.DataFrame() 89 | 90 | 91 | def analyze_pr_size_distribution(repo_df: pd.DataFrame) -> dict: 92 | """ 93 | Analyze the distribution of PR sizes based on line changes 94 | 95 | Returns a dictionary with: 96 | - percentiles: key percentiles of the distribution 97 | - histogram: counts of PRs in different line change ranges 98 | - category_counts: counts of PRs in each standardized category 99 | """ 100 | try: 101 | if repo_df.empty: 102 | return {"percentiles": {}, "histogram": {}, "category_counts": {}} 103 | 104 | # Calculate total lines changed for each PR 105 | pr_lines = calculate_total_lines_changed(repo_df) 106 | 107 | if pr_lines.empty: 108 | return {"percentiles": {}, "histogram": {}, "category_counts": {}} 109 | 110 | # Get the total lines changed values 111 | total_lines = pr_lines["total_lines_changed"].dropna() 112 | 113 | if len(total_lines) == 0: 114 | return {"percentiles": {}, "histogram": {}, "category_counts": {}} 115 | 116 | # Calculate percentiles 117 | percentiles = { 118 | "min": total_lines.min(), 119 | "p10": total_lines.quantile(0.1), 120 | "p25": total_lines.quantile(0.25), 121 | "p50": total_lines.quantile(0.5), # median 122 | "p75": total_lines.quantile(0.75), 123 | "p90": total_lines.quantile(0.9), 124 | "p95": total_lines.quantile(0.95), 125 | "p99": total_lines.quantile(0.99), 126 | "max": total_lines.max(), 127 | } 128 | 129 | # Create histogram with bins based on data range 130 | bins = [0, 10, 100, 500] 131 | if (total_lines >= 500).any(): 132 | bins.append(1000) 133 | if (total_lines >= 1000).any(): 134 | bins.append(int(total_lines.max()) + 1) 135 | 136 | # Create histogram 137 | hist_values, hist_bins = np.histogram(total_lines, bins=bins) 138 | 139 | histogram = { 140 | f"{int(hist_bins[i])}-{int(hist_bins[i + 1])}": int(hist_values[i]) for i in range(len(hist_values)) 141 | } 142 | 143 | # Count PRs in each standardized category 144 | category_counts = { 145 | "XS (<10 lines)": len(total_lines[total_lines < 10]), 146 | "S (10-99 lines)": len(total_lines[(total_lines >= 10) & (total_lines < 100)]), 147 | "M (100-499 lines)": len(total_lines[(total_lines >= 100) & (total_lines < 500)]), 148 | "L (500-999 lines)": len(total_lines[(total_lines >= 500) & (total_lines < 1000)]), 149 | "XL (1000+ lines)": len(total_lines[total_lines >= 1000]), 150 | } 151 | 152 | return { 153 | "percentiles": {k: round(float(v), 1) for k, v in percentiles.items()}, 154 | "histogram": histogram, 155 | "category_counts": category_counts, 156 | } 157 | 158 | except Exception: 159 | return {"percentiles": {}, "histogram": {}, "category_counts": {}} 160 | 161 | 162 | def calculate_pat_by_size(repo_df: pd.DataFrame) -> pd.DataFrame: 163 | """Calculate PR Approval Time (PAT) broken down by PR size based on line changes""" 164 | try: 165 | if repo_df.empty: 166 | return pd.DataFrame() 167 | 168 | # Convert time to datetime 169 | repo_df["time"] = pd.to_datetime(repo_df["time"]) 170 | 171 | # Calculate total lines changed for each PR 172 | pr_lines = calculate_total_lines_changed(repo_df) 173 | 174 | if pr_lines.empty: 175 | return pd.DataFrame() 176 | 177 | # Get review request and approval times for each PR 178 | review_requests = repo_df[repo_df["event_type"] == "review_requested"].groupby("pr_number")["time"].first() 179 | approvals = repo_df[repo_df["event_type"] == "review_approved"].groupby("pr_number")["time"].first() 180 | 181 | # Match PRs that have both request and approval 182 | matched_prs = pd.DataFrame({"request_time": review_requests, "approval_time": approvals}).dropna() 183 | 184 | if matched_prs.empty: 185 | return pd.DataFrame() 186 | 187 | # Add total lines changed information 188 | matched_prs = matched_prs.reset_index().merge(pr_lines, on="pr_number", how="left").set_index("pr_number") 189 | 190 | # Calculate time difference in hours 191 | matched_prs["approval_time_hours"] = ( 192 | matched_prs["approval_time"] - matched_prs["request_time"] 193 | ).dt.total_seconds() / 3600 194 | 195 | # Add size category 196 | matched_prs["size_category"] = matched_prs["total_lines_changed"].apply(get_pr_size_category) 197 | 198 | # Calculate stats by size category 199 | size_stats = ( 200 | matched_prs.groupby("size_category") 201 | .agg({"approval_time_hours": ["median", "mean", "count"], "total_lines_changed": "mean"}) 202 | .round(1) 203 | ) 204 | 205 | # Flatten column names 206 | size_stats.columns = ["median_hours", "mean_hours", "pr_count", "avg_lines"] 207 | 208 | # Sort by size category in a logical order 209 | size_order = { 210 | "XS (<10 lines)": 0, 211 | "S (10-99 lines)": 1, 212 | "M (100-499 lines)": 2, 213 | "L (500-999 lines)": 3, 214 | "XL (1000+ lines)": 4, 215 | } 216 | 217 | return size_stats.reset_index().sort_values( 218 | by="size_category", key=lambda x: x.map(lambda cat: size_order.get(cat, 99)) 219 | ) 220 | 221 | except Exception: 222 | return pd.DataFrame() 223 | 224 | 225 | def get_approval_time_data(repo_df): 226 | """Process raw data into approval time metrics""" 227 | logging.debug("Starting get_approval_time_data processing...") 228 | 229 | if repo_df.empty: 230 | logging.debug("Empty repository dataframe") 231 | return None 232 | 233 | # Get overall median approval time 234 | pat_hours = calculate_pat(repo_df) 235 | logging.debug(f"Overall PAT hours: {pat_hours}") 236 | 237 | if pat_hours is None: 238 | logging.debug("No PAT hours calculated") 239 | return None 240 | 241 | # Calculate PR size stats 242 | pr_lines = calculate_total_lines_changed(repo_df) 243 | 244 | if pr_lines.empty: 245 | logging.debug("No PR lines data") 246 | return None 247 | 248 | # Get review request and approval times for each PR 249 | review_requests = repo_df[repo_df["event_type"] == "review_requested"].groupby("pr_number")["time"].first() 250 | approvals = repo_df[repo_df["event_type"] == "review_approved"].groupby("pr_number")["time"].first() 251 | 252 | # Match PRs that have both request and approval 253 | matched_prs = pd.DataFrame({"request_time": review_requests, "approval_time": approvals}).dropna() 254 | 255 | if matched_prs.empty: 256 | logging.debug("No matched PRs with both request and approval") 257 | return None 258 | 259 | # Add size information 260 | matched_prs = matched_prs.reset_index().merge(pr_lines, on="pr_number", how="left") 261 | 262 | # Calculate approval time in hours 263 | matched_prs["approval_time_hours"] = ( 264 | matched_prs["approval_time"] - matched_prs["request_time"] 265 | ).dt.total_seconds() / 3600 266 | 267 | # Add size categories 268 | matched_prs["size_category"] = matched_prs["total_lines_changed"].apply(get_pr_size_category) 269 | 270 | # Calculate stats by size category 271 | size_stats = ( 272 | matched_prs.groupby("size_category") 273 | .agg({"approval_time_hours": ["median", "count"], "total_lines_changed": "mean"}) 274 | .round(1) 275 | ) 276 | 277 | # Flatten column names 278 | size_stats.columns = ["median_hours", "pr_count", "avg_lines"] 279 | size_stats = size_stats.reset_index() 280 | 281 | return {"overall_median": pat_hours, "size_stats": size_stats} 282 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/approval_time/template.html: -------------------------------------------------------------------------------- 1 | {% extends "components/charts/chart.html" %} 2 | {% from "components/charts/metric.html" import metric %} 3 | {% block title %}Request Approval Time{% endblock %} 4 | {% block metrics %} 5 | {% if approval_data %} 6 | {{ metric(label="Overall Median Approval Time", value="%.1f"|format(approval_data.overall_median) ~ " hours", 7 | tip="Median time between review request and approval") }} 8 | {% endif %} 9 | {% endblock %} 10 | {% block chart %} 11 | {% if approval_data %} 12 |
13 | {% if approval_data.plot_html %} 14 | {{ approval_data.plot_html | safe }} 15 | {% endif %} 16 |
17 | {% else %} 18 |

No approval time data available because there are no reviews in the data.

19 | {% endif %} 20 | {% endblock %} 21 | {% block caption %} 22 | Shows the median time between review request and approval for pull requests by size. 23 | Only pull requests with reviews are included. 24 | {% endblock %} -------------------------------------------------------------------------------- /src/collab_dev/components/charts/bot_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | import plotly 2 | import plotly.graph_objects as go 3 | from components.charts.utils import ( 4 | apply_theme_to_figure, 5 | get_plotly_config, 6 | get_theme_colors, 7 | ) 8 | from flask import render_template 9 | 10 | 11 | def render(repo_df): 12 | """Render the bot analysis visualization""" 13 | from .data import analyze_bot_activity 14 | 15 | stats = analyze_bot_activity(repo_df) 16 | if not stats: 17 | return render_template("components/charts/bot_analysis/template.html") 18 | 19 | # Check if there's bot breakdown data to display 20 | if not stats["bot_breakdown"]: 21 | return render_template("components/charts/bot_analysis/template.html", stats=stats) 22 | 23 | # Get theme colors 24 | colors = get_theme_colors(len(stats["bot_breakdown"])) 25 | 26 | # Create a Plotly figure 27 | fig = go.Figure( 28 | data=[ 29 | go.Bar( 30 | x=[item["pr_number"] for item in stats["bot_breakdown"]], 31 | y=[item["actor"] for item in stats["bot_breakdown"]], 32 | orientation="h", 33 | marker=dict(color=colors), 34 | customdata=[ 35 | [pr_num, "PR" if pr_num == 1 else "PRs"] 36 | for pr_num in [item["pr_number"] for item in stats["bot_breakdown"]] 37 | ], 38 | hovertemplate="%{customdata[0]} %{customdata[1]}", 39 | ) 40 | ] 41 | ) 42 | 43 | # Update layout 44 | fig.update_layout( 45 | margin=dict(t=30, l=200, r=30, b=50), 46 | height=max(300, len(stats["bot_breakdown"]) * 40), 47 | xaxis=dict(title="Number of PRs"), 48 | yaxis=dict(automargin=True, tickfont=dict(size=12)), 49 | ) 50 | 51 | # Apply theme to the figure 52 | fig = apply_theme_to_figure(fig) 53 | 54 | # Get plotly config from theme 55 | config = get_plotly_config() 56 | 57 | # Convert the figure to HTML 58 | bot_breakdown_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config) 59 | 60 | return render_template( 61 | "components/charts/bot_analysis/template.html", 62 | stats=stats, 63 | bot_breakdown_html=bot_breakdown_html, 64 | ) 65 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/bot_analysis/data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def analyze_bot_activity(repo_df): 5 | """ 6 | Analyze PR activity by bots vs humans. 7 | 8 | Args: 9 | repo_df: DataFrame containing repository data 10 | 11 | Returns: 12 | dict: Statistics about bot vs human PR activity 13 | """ 14 | if repo_df is None or repo_df.empty: 15 | return None 16 | 17 | # Get unique PRs and their first events to determine PR type 18 | pr_data = repo_df[repo_df["event_type"] == "pr_created"].drop_duplicates("pr_number") 19 | 20 | if pr_data.empty: 21 | return None 22 | 23 | # Create a list of PR authors 24 | pr_authors = [] 25 | for _, row in pr_data.iterrows(): 26 | author = row.get("actor", "") 27 | if author: 28 | pr_authors.append( 29 | {"actor": author, "pr_number": row.get("pr_number", 0), "is_bot": row.get("is_bot", False)} 30 | ) 31 | 32 | if not pr_authors: 33 | return None 34 | 35 | # Convert to DataFrame 36 | pr_df = pd.DataFrame(pr_authors) 37 | 38 | # Group by actor to count PRs 39 | author_counts = pr_df.groupby(["actor", "is_bot"]).size().reset_index(name="pr_count") 40 | 41 | # Calculate statistics 42 | total_prs = len(pr_authors) 43 | bot_prs = pr_df[pr_df["is_bot"]].shape[0] 44 | human_prs = pr_df[~pr_df["is_bot"]].shape[0] 45 | 46 | # Get bot breakdown 47 | bot_breakdown = author_counts[author_counts["is_bot"]].sort_values("pr_count", ascending=False) 48 | 49 | # Rename column for consistency with the expected output 50 | bot_breakdown = bot_breakdown.rename(columns={"pr_count": "pr_number"}) 51 | 52 | return { 53 | "total_prs": total_prs, 54 | "bot_prs": bot_prs, 55 | "human_prs": human_prs, 56 | "bot_count": bot_prs, 57 | "human_count": human_prs, 58 | "bot_percentage": round((bot_prs / total_prs * 100) if total_prs > 0 else 0, 1), 59 | "human_percentage": round((human_prs / total_prs * 100) if total_prs > 0 else 0, 1), 60 | "bot_breakdown": bot_breakdown.to_dict("records"), 61 | } 62 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/bot_analysis/template.html: -------------------------------------------------------------------------------- 1 | {% extends "components/charts/chart.html" %} 2 | {% from "components/charts/metric.html" import metric %} 3 | {% block title %}Bot Contribution{% endblock %} 4 | {% block metrics %} 5 | {% if stats %} 6 | {{ metric(label="Bot PRs", value=stats.bot_percentage ~ "%", tip="Percentage of PRs created by bots") }} 7 | {{ metric(label="Human PRs", value=stats.human_percentage ~ "%", tip="Percentage of PRs created by humans") }} 8 | {% endif %} 9 | {% endblock %} 10 | {% block chart %} 11 | {% if stats %} 12 | {% if stats.bot_breakdown|length == 0 %} 13 |

No bot activity detected in this repository.

14 | 15 | {% elif stats.bot_breakdown|length == 1 %} 16 |

17 | The bot PRs for this repo are all coming from 18 | {{ stats.bot_breakdown[0].actor }}. 19 |

20 | 21 | {% else %} 22 | 23 |
24 | {% if bot_breakdown_html %} {{ bot_breakdown_html | safe }} {% endif %} 25 |
26 | {% endif %} 27 | {% else %} 28 |
No PR data available
29 | {% endif %} 30 | {% endblock %} 31 | {% block caption %} 32 | Shows the proportion of pull requests created by bots, CI/CD, AI agents and other automations. 33 | {% endblock %} -------------------------------------------------------------------------------- /src/collab_dev/components/charts/chart.html: -------------------------------------------------------------------------------- 1 |
2 |

{% block title %}Chart Title{% endblock %}

3 |
4 | {% block metrics %} 5 | {% endblock %} 6 |
7 |
8 | {% block chart %} 9 | [ chart goes here ] 10 | {% endblock %} 11 |
12 |
13 | {% block caption %}Description of chart{% endblock %} 14 |
15 |
-------------------------------------------------------------------------------- /src/collab_dev/components/charts/chart_renderer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Chart renderer module to execute all available charts against a data frame. 3 | """ 4 | 5 | import sys 6 | from typing import Any, Dict, List 7 | 8 | import components.charts.approval_time 9 | import components.charts.bot_analysis 10 | import components.charts.contribution 11 | import components.charts.merge_time 12 | import components.charts.review_coverage 13 | import components.charts.review_funnel 14 | import components.charts.review_turnaround 15 | import components.charts.workflow 16 | import pandas as pd 17 | 18 | # Ordered list of chart modules 19 | CHART_MODULES = [ 20 | components.charts.workflow, 21 | components.charts.contribution, 22 | components.charts.bot_analysis, 23 | components.charts.review_coverage, 24 | components.charts.review_funnel, 25 | components.charts.review_turnaround, 26 | components.charts.approval_time, 27 | components.charts.merge_time, 28 | ] 29 | 30 | 31 | def render_charts(data: pd.DataFrame) -> List[Dict[str, Any]]: 32 | """ 33 | Render all available charts with the provided DataFrame. Have new charts? 34 | Add them to the CHART_MODULES list and they'll be rendered automatically. 35 | """ 36 | chart_renders = [] 37 | 38 | # Iterate through the ordered list of chart modules and render them 39 | for chart in CHART_MODULES: 40 | try: 41 | chart_renders.append(chart.render(data)) 42 | except Exception as e: 43 | print(f"Error rendering chart {chart.__name__}: {e}", file=sys.stderr) 44 | chart_renders.append(f"Failed to render {chart.__name__}. Error: {e}.") 45 | 46 | return chart_renders 47 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/contribution/__init__.py: -------------------------------------------------------------------------------- 1 | import plotly 2 | import plotly.graph_objects as go 3 | from components.charts.utils import ( 4 | apply_theme_to_figure, 5 | get_plotly_config, 6 | get_theme_colors, 7 | ) 8 | from flask import render_template 9 | from theme import CHART_DIMENSIONS 10 | 11 | 12 | def create_contribution_plot(stats: dict) -> go.Figure: 13 | """Create visualization configuration for contribution donut chart""" 14 | 15 | # Get theme colors for the chart 16 | colors = get_theme_colors(3) 17 | 18 | fig = go.Figure( 19 | data=[ 20 | go.Pie( 21 | labels=["Core Team", "Bot", "Community"], 22 | values=[ 23 | stats["core_percentage"], 24 | stats["bot_percentage"], 25 | stats["community_percentage"], 26 | ], 27 | hole=0.4, 28 | marker_colors=colors, # Use theme colors 29 | domain={"x": [0.05, 0.95], "y": [0, 0.85]}, # Match other pie charts 30 | textposition="inside", 31 | textinfo="percent", 32 | hovertemplate="%{label}: %{customdata}", 33 | customdata=[ 34 | f"{stats['core_prs']} {'PR' if stats['core_prs'] == 1 else 'PRs'}", 35 | f"{stats['bot_prs']} {'PR' if stats['bot_prs'] == 1 else 'PRs'}", 36 | f"{stats['community_prs']} {'PR' if stats['community_prs'] == 1 else 'PRs'}", 37 | ], 38 | insidetextorientation="auto", 39 | ) 40 | ] 41 | ) 42 | 43 | fig.update_layout( 44 | showlegend=True, 45 | legend=dict( 46 | orientation="h", 47 | yanchor="bottom", 48 | y=0.92, 49 | xanchor="center", 50 | x=0.5, 51 | bgcolor="rgba(255,255,255,0.8)", 52 | ), 53 | margin=dict(t=25, b=20, l=10, r=10), 54 | height=CHART_DIMENSIONS["pie_chart_height"], 55 | autosize=True, 56 | width=None, 57 | paper_bgcolor="rgba(0,0,0,0)", 58 | plot_bgcolor="rgba(0,0,0,0)", 59 | ) 60 | 61 | # Apply theme to the figure 62 | fig = apply_theme_to_figure(fig) 63 | 64 | return fig 65 | 66 | 67 | def render(repo_df): 68 | """ 69 | Render the contribution chart component 70 | 71 | Args: 72 | repo_df: DataFrame containing repository data 73 | 74 | Returns: 75 | str: Rendered HTML for the contribution component 76 | """ 77 | from .data import get_contribution_stats 78 | 79 | # Get the stats from data module 80 | stats = get_contribution_stats(repo_df) 81 | 82 | if not stats: 83 | return render_template("components/charts/contribution/template.html", contribution_data=None) 84 | 85 | # Create plot figure 86 | fig = create_contribution_plot(stats) 87 | 88 | # Get plotly config from theme 89 | config = get_plotly_config() 90 | 91 | # Convert the figure to HTML 92 | plot_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config) 93 | 94 | # Prepare data for template 95 | contribution_data = { 96 | "plot_html": plot_html, 97 | "stats": { 98 | "core_team": stats["core_percentage"], 99 | "bot": stats["bot_percentage"], 100 | "community": stats["community_percentage"], 101 | }, 102 | "counts": { 103 | "total": stats["total_prs"], 104 | "core": stats["core_prs"], 105 | "bot": stats["bot_prs"], 106 | "community": stats["community_prs"], 107 | }, 108 | } 109 | 110 | # Pass the prepared data to the template 111 | return render_template( 112 | "components/charts/contribution/template.html", 113 | contribution_data=contribution_data, 114 | ) 115 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/contribution/data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def get_contribution_stats(repo_df: pd.DataFrame) -> dict: 5 | """ 6 | Calculate contribution percentages and prepare statistics. 7 | TODO: This function is too slow. We may need to pre-calculate this data and store it in the database. 8 | """ 9 | 10 | if repo_df.empty: 11 | return None 12 | 13 | # Get unique PRs and their first events to determine PR type 14 | pr_data = repo_df[repo_df["event_type"] == "pr_created"].drop_duplicates("pr_number") 15 | total_prs = len(pr_data) 16 | 17 | if total_prs == 0: 18 | return None 19 | 20 | # Count PRs by type using the database columns 21 | bot_prs = len(pr_data[pr_data["is_bot"]]) 22 | non_bot_data = pr_data[~pr_data["is_bot"]] 23 | core_prs = len(non_bot_data[non_bot_data["is_core_team"]]) 24 | community_prs = len(non_bot_data[~non_bot_data["is_core_team"]]) 25 | 26 | # Calculate all stats needed for display 27 | stats = { 28 | "total_prs": total_prs, 29 | "core_prs": core_prs, 30 | "community_prs": community_prs, 31 | "bot_prs": bot_prs, 32 | "core_percentage": round((core_prs / total_prs * 100), 1) if total_prs > 0 else 0, 33 | "community_percentage": round((community_prs / total_prs * 100), 1) if total_prs > 0 else 0, 34 | "bot_percentage": round((bot_prs / total_prs * 100), 1) if total_prs > 0 else 0, 35 | } 36 | 37 | return stats 38 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/contribution/template.html: -------------------------------------------------------------------------------- 1 | {% extends "components/charts/chart.html" %} 2 | {% from "components/charts/metric.html" import metric %} 3 | {% block title %}Contributor Distribution{% endblock %} 4 | {% block metrics %} 5 | {% if contribution_data and contribution_data.stats %} 6 | {{ metric(label="Core Team PRs", value=contribution_data.stats.core_team|round(1) ~ "%", tip="Percentage of PRs created 7 | by the core team") }} 8 | {{ metric(label="Community PRs", value=contribution_data.stats.community|round(1) ~ "%", tip="Percentage of PRs created 9 | by the community") }} 10 | {{ metric(label="Bot PRs", value=contribution_data.stats.bot|round(1) ~ "%", tip="Percentage of PRs created by bots") }} 11 | {% endif %} 12 | {% endblock %} 13 | {% block chart %} 14 | {% if contribution_data and contribution_data.stats %} 15 |
16 | {% if contribution_data.plot_html %} 17 | {{ contribution_data.plot_html | safe }} 18 | {% endif %} 19 |
20 | {% else %} 21 |

No contribution data available

22 | {% endif %} 23 | {% endblock %} 24 | {% block caption %} 25 | Shows the distribution of pull requests by the author's role. Core team includes all admins, members, and collaborators. 26 | {% endblock %} -------------------------------------------------------------------------------- /src/collab_dev/components/charts/merge_time/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import plotly 3 | import plotly.graph_objects as go 4 | from components.charts.utils import ( 5 | apply_theme_to_figure, 6 | get_plotly_config, 7 | get_theme_colors, 8 | humanize_time, 9 | ) 10 | from flask import render_template 11 | 12 | from .data import calculate_pmt 13 | 14 | 15 | def create_pr_merge_time_chart(data): 16 | """Create PR Merge Time visualization""" 17 | 18 | # Handle both DataFrame and dictionary input 19 | repo_df = data.get("pr_data") if isinstance(data, dict) else data 20 | if repo_df is None: 21 | return None, None 22 | 23 | median_time, pr_times, percentile_values = calculate_pmt(repo_df) 24 | 25 | if median_time is None or pr_times is None: 26 | return None, None 27 | 28 | # Sort merge times for CDF 29 | sorted_times = np.sort(pr_times["merge_time"]) 30 | cumulative_prob = np.arange(1, len(sorted_times) + 1) / len(sorted_times) 31 | 32 | # Calculate 95th percentile for x-axis limit 33 | percentile_95 = np.percentile(sorted_times, 95) 34 | 35 | # Create CDF plot 36 | # Get theme colors 37 | colors = get_theme_colors(5) 38 | 39 | fig = go.Figure() 40 | 41 | # Filter data points up to 95th percentile 42 | mask_95 = sorted_times <= percentile_95 43 | fig.add_trace( 44 | { 45 | "type": "scatter", 46 | "x": sorted_times[mask_95].tolist(), # Convert numpy array to list 47 | "y": cumulative_prob[mask_95].tolist(), # Convert numpy array to list 48 | "mode": "lines", 49 | "line": {"color": colors[0]}, # Use theme color 50 | "customdata": [[humanize_time(x)] for x in sorted_times[mask_95].tolist()], 51 | "hovertemplate": "%{y:.0%}: %{customdata[0]}", 52 | } 53 | ) 54 | 55 | # Add reference lines at key percentiles 56 | percentiles = [0.25, 0.5, 0.75] 57 | 58 | for p, val in zip(percentiles, percentile_values, strict=False): 59 | # Add vertical line 60 | fig.add_vline(x=val, line_dash="dash", line_color=colors[1], opacity=0.3) 61 | 62 | # Add annotation 63 | fig.add_annotation( 64 | x=val, 65 | y=p, 66 | text=f"{int(p * 100)}%: {val:.1f}h", 67 | showarrow=True, 68 | arrowhead=2, 69 | arrowsize=1, 70 | arrowwidth=1, 71 | arrowcolor=colors[1], 72 | font={"size": 12}, 73 | bgcolor="white", 74 | bordercolor=colors[1], 75 | borderwidth=1, 76 | borderpad=4, 77 | ax=40, 78 | ay=0, 79 | ) 80 | 81 | fig.update_layout( 82 | xaxis=dict(title="Merge Time (hours)", range=[0, percentile_95]), 83 | yaxis=dict(title="Cumulative Proportion of PRs", tickformat=",.0%", range=[0, 1.05]), 84 | showlegend=False, 85 | height=450, 86 | width=None, 87 | autosize=True, 88 | paper_bgcolor="white", 89 | plot_bgcolor="white", 90 | margin=dict(t=10, b=50, l=50, r=10), 91 | ) 92 | 93 | # Apply theme to the figure 94 | fig = apply_theme_to_figure(fig) 95 | 96 | return fig, median_time 97 | 98 | 99 | def render(data): 100 | """Render the PR merge time chart component""" 101 | 102 | try: 103 | # Create plot figure 104 | fig, median_time = create_pr_merge_time_chart(data) 105 | 106 | if fig is None: 107 | return render_template("components/charts/merge_time/template.html", pr_merge_data=None) 108 | 109 | # Get plotly config from theme 110 | config = get_plotly_config() 111 | 112 | # Convert the figure to HTML 113 | plot_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config) 114 | 115 | # Prepare data for template 116 | pr_merge_data = {"median_time": median_time, "plot_html": plot_html} 117 | 118 | return render_template( 119 | "components/charts/merge_time/template.html", pr_merge_data=pr_merge_data, humanize_time=humanize_time 120 | ) 121 | 122 | except Exception: 123 | return render_template("components/charts/merge_time/template.html", pr_merge_data=None) 124 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/merge_time/data.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | 7 | def calculate_pmt(repo_df: pd.DataFrame) -> tuple: 8 | """ 9 | Calculate PR Merge Time (PMT) metrics 10 | 11 | Args: 12 | repo_df (pd.DataFrame): DataFrame containing PR events 13 | 14 | Returns: 15 | tuple: (median_time, pr_times DataFrame, percentile_values) 16 | """ 17 | try: 18 | if not isinstance(repo_df, pd.DataFrame): 19 | return None, None, None 20 | 21 | if repo_df.empty: 22 | return None, None, None 23 | 24 | # Ensure 'time' is in datetime format 25 | repo_df["time"] = pd.to_datetime(repo_df["time"]) 26 | 27 | # Filter for PR creation and merge events 28 | pr_created = repo_df[repo_df["event_type"] == "pr_created"][["pr_number", "time"]] 29 | pr_merged = repo_df[repo_df["event_type"] == "pr_merged"][["pr_number", "time"]] 30 | 31 | # Merge the two DataFrames on 'pr_number' 32 | pr_times = pd.merge(pr_created, pr_merged, on="pr_number", suffixes=("_created", "_merged")) 33 | 34 | if len(pr_times) == 0: 35 | return None, None, None 36 | 37 | # Calculate the time difference in hours 38 | pr_times["merge_time"] = (pr_times["time_merged"] - pr_times["time_created"]).dt.total_seconds() / 3600 39 | 40 | # Calculate metrics 41 | median_time = pr_times["merge_time"].median() 42 | percentile_values = np.percentile(pr_times["merge_time"], [25, 50, 75]) 43 | 44 | return median_time, pr_times, percentile_values 45 | 46 | except Exception as e: 47 | logging.error(f"Error calculating PMT: {e}") 48 | return None, None, None 49 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/merge_time/template.html: -------------------------------------------------------------------------------- 1 | {% extends "components/charts/chart.html" %} 2 | {% from "components/charts/metric.html" import metric %} 3 | {% block title %}PR Merge Time{% endblock %} 4 | {% block metrics %} 5 | {% if pr_merge_data %} 6 | {{ metric(label="Overall Median Merge Time", value=humanize_time(pr_merge_data.median_time|default(0)), tip="Median time 7 | from PR creation to merge") }} 8 | {% endif %} 9 | {% endblock %} 10 | {% block chart %} 11 | {% if pr_merge_data %} 12 |
13 | {% if pr_merge_data.plot_html %} 14 | {{ pr_merge_data.plot_html | safe }} 15 | {% endif %} 16 |
17 | {% else %} 18 |

No PR merge time data available

19 | {% endif %} 20 | {% endblock %} 21 | {% block caption %} 22 | Shows the distribution of time taken for pull requests to merge from creation. 23 | {% endblock %} -------------------------------------------------------------------------------- /src/collab_dev/components/charts/metric.html: -------------------------------------------------------------------------------- 1 | {% macro metric(label, value, tip=None) %} 2 |
3 |
4 | {{ label }} 5 | {% if tip %} 6 | 8 | 12 | {% endif %} 13 |
14 |
15 | {{ value }} 16 |
17 |
18 | {% endmacro %} -------------------------------------------------------------------------------- /src/collab_dev/components/charts/review_coverage/__init__.py: -------------------------------------------------------------------------------- 1 | import plotly 2 | import plotly.graph_objects as go 3 | from components.charts.utils import ( 4 | apply_theme_to_figure, 5 | get_plotly_config, 6 | get_theme_colors, 7 | ) 8 | from flask import render_template 9 | 10 | from .data import get_review_merge_data 11 | 12 | 13 | def create_coverage_donut_plot(coverage_data: dict) -> dict: 14 | """Create donut chart visualization for review coverage""" 15 | 16 | # Calculate values 17 | reviewed = coverage_data["reviewed_prs"] 18 | unreviewed = coverage_data["unreviewed_prs"] 19 | 20 | # Get theme colors for the chart 21 | colors = get_theme_colors(2) 22 | 23 | # Create figure 24 | fig = go.Figure( 25 | data=[ 26 | go.Pie( 27 | values=[reviewed, unreviewed], 28 | labels=["Merged With Review", "Merged Without Review"], 29 | hole=0.4, 30 | textinfo="percent", 31 | marker_colors=colors, # Use theme colors 32 | domain={ 33 | "x": [0.15, 0.85], 34 | "y": [0.05, 0.85], 35 | }, # Balanced whitespace around the chart 36 | textposition="inside", 37 | hovertemplate="%{label}: %{customdata}", 38 | customdata=[ 39 | f"{reviewed} PR{'s' if reviewed != 1 else ''}", 40 | f"{unreviewed} PR{'s' if unreviewed != 1 else ''}", 41 | ], 42 | insidetextorientation="auto", 43 | ) 44 | ] 45 | ) 46 | 47 | # Update layout 48 | fig.update_layout( 49 | showlegend=True, 50 | height=350, # Reduce height to remove extra whitespace 51 | margin=dict(t=10, b=10, l=10, r=10), # Balanced margins around the chart 52 | legend=dict( 53 | orientation="h", 54 | yanchor="top", 55 | y=1.0, # Position at the top of the chart 56 | xanchor="center", # Center the legend 57 | x=0.5, # Center position 58 | bgcolor="rgba(255,255,255,0.8)", 59 | ), 60 | autosize=True, 61 | width=None, 62 | paper_bgcolor="rgba(0,0,0,0)", 63 | plot_bgcolor="rgba(0,0,0,0)", 64 | ) 65 | 66 | # Apply theme to the figure 67 | fig = apply_theme_to_figure(fig) 68 | 69 | return fig 70 | 71 | 72 | def render(repo_df): 73 | """Render the review coverage chart component""" 74 | 75 | try: 76 | # Get coverage data 77 | coverage_data = get_review_merge_data(repo_df) 78 | 79 | if not coverage_data: 80 | return render_template("components/charts/review_coverage/template.html", coverage_data=None) 81 | 82 | # Create plot figure 83 | fig = create_coverage_donut_plot(coverage_data) 84 | 85 | # Get plotly config from theme 86 | config = get_plotly_config() 87 | 88 | # Convert the figure to HTML 89 | plot_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config) 90 | 91 | # Add plot to template data 92 | coverage_data["plot_html"] = plot_html 93 | 94 | return render_template( 95 | "components/charts/review_coverage/template.html", 96 | coverage_data=coverage_data, 97 | ) 98 | 99 | except Exception: 100 | return render_template("components/charts/review_coverage/template.html", coverage_data=None) 101 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/review_coverage/data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def calculate_review_ratio_stats(repo_df: pd.DataFrame) -> dict: 5 | """Calculate PR review ratio statistics""" 6 | try: 7 | if repo_df.empty: 8 | return None 9 | 10 | # Group by PR number to get unique PRs and their events 11 | pr_summary = ( 12 | repo_df.groupby("pr_number") 13 | .agg( 14 | { 15 | "event_type": list, 16 | "time": "first", # Keep first timestamp for reference 17 | } 18 | ) 19 | .reset_index() 20 | ) 21 | 22 | # Count total PRs 23 | total_prs = len(pr_summary) 24 | 25 | # Count PRs that received a review (any type of review action) 26 | reviewed_prs = sum( 27 | any(event in events for event in ["review_commented", "review_approved", "review_changes_requested"]) 28 | for events in pr_summary["event_type"] 29 | ) 30 | 31 | # Calculate unreviewed PRs 32 | unreviewed_prs = total_prs - reviewed_prs 33 | 34 | # Calculate review percentage 35 | review_percentage = (reviewed_prs / total_prs * 100) if total_prs > 0 else 0 36 | 37 | return { 38 | "total_prs": total_prs, 39 | "reviewed_prs": reviewed_prs, 40 | "unreviewed_prs": unreviewed_prs, 41 | "review_percentage": review_percentage, 42 | } 43 | 44 | except Exception: 45 | return None 46 | 47 | 48 | def get_review_merge_data(repo_df: pd.DataFrame) -> dict: 49 | """Process raw data into review merge metrics""" 50 | 51 | stats = calculate_review_ratio_stats(repo_df) 52 | if not stats: 53 | return None 54 | 55 | return stats 56 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/review_coverage/template.html: -------------------------------------------------------------------------------- 1 | {% extends "components/charts/chart.html" %} 2 | {% from "components/charts/metric.html" import metric %} 3 | {% block title %}Review Coverage{% endblock %} 4 | {% block metrics %} 5 | {% if coverage_data %} 6 | {{ metric(label="Review-Merge Rate", value="%.1f"|format(coverage_data.review_percentage) ~ "%", tip="Percentage of 7 | merged PRs that received reviews") }} 8 | {% endif %} 9 | {% endblock %} 10 | {% block chart %} 11 | {% if coverage_data %} 12 | {% if coverage_data.plot_html %} 13 | {{ coverage_data.plot_html | safe }} 14 | {% endif %} 15 | {% else %} 16 |

No review coverage data available

17 | {% endif %} 18 | {% endblock %} 19 | {% block caption %} 20 | Shows the percentage of pull requests that received reviews before being merged. 21 | {% endblock %} -------------------------------------------------------------------------------- /src/collab_dev/components/charts/review_funnel/__init__.py: -------------------------------------------------------------------------------- 1 | import plotly 2 | import plotly.graph_objects as go 3 | from components.charts.utils import ( 4 | apply_theme_to_figure, 5 | get_plotly_config, 6 | get_theme_colors, 7 | ) 8 | from flask import render_template 9 | 10 | from .data import get_review_funnel_data 11 | 12 | 13 | def create_review_funnel_plot(funnel_data: dict) -> go.Figure: 14 | """Create visualization for review funnel""" 15 | 16 | # Prepare data for funnel chart with counts in the labels 17 | values = [funnel_data["total_prs"], funnel_data["reviewed_prs"], funnel_data["approved_prs"]] 18 | 19 | # Calculate relative percentages (each step relative to previous) 20 | total = values[0] 21 | reviewed = values[1] 22 | approved = values[2] 23 | 24 | # Create stage labels without counts 25 | stages = ["Total PRs", "Reviewed", "Approved"] 26 | 27 | # Format text to show just the count 28 | text = [str(values[0]), str(values[1]), str(values[2])] 29 | 30 | # Create hover text with percentages 31 | hover_text = [ 32 | f"{values[0]} PR{'s' if values[0] != 1 else ''} Total", 33 | f"{values[1]} PR{'s' if values[1] != 1 else ''} Reviewed ({reviewed / total * 100:.0f}%)", 34 | f"{values[2]} PR{'s' if values[2] != 1 else ''} Approved ({approved / reviewed * 100:.0f}%)", 35 | ] 36 | 37 | # Get theme colors 38 | colors = get_theme_colors(3) 39 | 40 | # Create figure 41 | fig = go.Figure( 42 | data=[ 43 | go.Funnel( 44 | y=stages, 45 | x=values, 46 | textinfo="text", 47 | textposition="auto", # Automatically place text inside or outside based on space 48 | text=text, 49 | hovertemplate="%{customdata}", 50 | customdata=hover_text, 51 | marker={ 52 | "color": colors, 53 | "line": {"width": 0}, # Remove the line around funnel segments 54 | }, 55 | connector={"line": {"color": "rgba(0,0,0,0)", "width": 0}}, # Make connector lines invisible 56 | textfont={"size": 14}, # Match font size 57 | ) 58 | ] 59 | ) 60 | 61 | # Update layout 62 | fig.update_layout( 63 | showlegend=False, 64 | margin={"t": 20, "l": 150, "r": 100, "b": 20}, 65 | height=300, 66 | font={"size": 14}, 67 | # Hide all axis lines, ticks, and grid lines 68 | xaxis={"showgrid": False, "zeroline": False, "showline": False, "showticklabels": False, "ticks": ""}, 69 | yaxis={ 70 | "showgrid": False, 71 | "zeroline": False, 72 | "showline": False, 73 | "ticks": "", 74 | "tickmode": "array", 75 | "ticktext": stages, # Use the HTML formatted labels 76 | "tickfont": {"size": 14}, # Match the font size 77 | }, 78 | ) 79 | 80 | # Apply theme to the figure 81 | fig = apply_theme_to_figure(fig) 82 | 83 | return fig 84 | 85 | 86 | def render(repo_df): 87 | """Render the review funnel chart component""" 88 | 89 | try: 90 | # Get funnel data 91 | funnel_data = get_review_funnel_data(repo_df) 92 | 93 | if not funnel_data: 94 | return render_template("components/charts/review_funnel/template.html", review_data=None) 95 | 96 | # Calculate rates 97 | total_prs = funnel_data["total_prs"] 98 | reviewed_prs = funnel_data["reviewed_prs"] 99 | approved_prs = funnel_data["approved_prs"] 100 | 101 | review_rate = (reviewed_prs / total_prs * 100) if total_prs > 0 else 0 102 | approval_rate = (approved_prs / reviewed_prs * 100) if reviewed_prs > 0 else 0 103 | 104 | # Create plot figure 105 | fig = create_review_funnel_plot(funnel_data) 106 | 107 | # Get plotly config from theme 108 | config = get_plotly_config() 109 | 110 | # Convert the figure to HTML 111 | plot_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config) 112 | 113 | # Prepare data for template 114 | template_data = { 115 | "total_prs": total_prs, 116 | "reviewed_prs": reviewed_prs, 117 | "approved_prs": approved_prs, 118 | "review_rate": review_rate, 119 | "approval_rate": approval_rate, 120 | "plot_html": plot_html, 121 | } 122 | 123 | return render_template("components/charts/review_funnel/template.html", review_data=template_data) 124 | 125 | except Exception: 126 | return render_template("components/charts/review_funnel/template.html", review_data=None) 127 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/review_funnel/data.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import pandas as pd 4 | 5 | 6 | def get_pr_review_stats(pr_summary: pd.DataFrame) -> dict: 7 | """ 8 | Calculate review flow statistics from PR summary DataFrame 9 | 10 | Args: 11 | pr_summary (pd.DataFrame): DataFrame from load_repository_prs 12 | 13 | Returns: 14 | dict: Review statistics including counts of different review states 15 | """ 16 | # Group by PR number to get unique PRs and their events 17 | pr_events = ( 18 | pr_summary.groupby("pr_number") 19 | .agg( 20 | { 21 | "event_type": list, 22 | "time": "first", # Keep first timestamp for reference 23 | } 24 | ) 25 | .reset_index() 26 | ) 27 | 28 | total_prs = len(pr_events) 29 | 30 | # Count different review states 31 | review_requested = sum("review_requested" in events for events in pr_events["event_type"]) 32 | review_completed = sum( 33 | any(event in events for event in ["review_commented", "review_changes_requested", "review_approved"]) 34 | and "review_requested" in events 35 | for events in pr_events["event_type"] 36 | ) 37 | review_approved = sum( 38 | "review_approved" in events and "review_requested" in events for events in pr_events["event_type"] 39 | ) 40 | approved_without_request = sum( 41 | "review_approved" in events and "review_requested" not in events for events in pr_events["event_type"] 42 | ) 43 | merged_without_review = sum( 44 | not any(event in events for event in ["review_approved", "review_commented", "review_changes_requested"]) 45 | for events in pr_events["event_type"] 46 | ) 47 | 48 | return { 49 | "total_prs": total_prs, 50 | "review_requested": review_requested, 51 | "review_completed": review_completed, 52 | "review_approved": review_approved, 53 | "approved_without_review_request": approved_without_request, 54 | "merged_without_review": merged_without_review, 55 | } 56 | 57 | 58 | def analyze_pr_review_flow(repo_df: pd.DataFrame) -> dict: 59 | """Analyze PR review flow metrics for a repository""" 60 | 61 | if repo_df.empty: 62 | return None 63 | 64 | return get_pr_review_stats(repo_df) 65 | 66 | 67 | def get_simplified_pr_flow_stats(pr_summary: pd.DataFrame) -> dict: 68 | """ 69 | Calculate simplified PR flow statistics with just created, reviewed, and approved stages 70 | 71 | Args: 72 | pr_summary (pd.DataFrame): DataFrame from load_repository_prs 73 | 74 | Returns: 75 | dict: Review statistics with basic flow stages 76 | """ 77 | total_prs = len(pr_summary) 78 | 79 | # Count PRs that received any type of review 80 | reviewed_prs = sum( 81 | any(event in events for event in ["review_commented", "review_changes_requested", "review_approved"]) 82 | for events in pr_summary["event_type"] 83 | ) 84 | 85 | # Count PRs that were approved 86 | approved_prs = sum("review_approved" in events for events in pr_summary["event_type"]) 87 | 88 | return {"total_prs": total_prs, "reviewed_prs": reviewed_prs, "approved_prs": approved_prs} 89 | 90 | 91 | def analyze_simplified_pr_flow(repo_df: pd.DataFrame) -> dict: 92 | """Analyze simplified PR flow metrics for a repository""" 93 | 94 | if repo_df.empty: 95 | return None 96 | 97 | # Group by PR number to get unique PRs and their events 98 | pr_events = ( 99 | repo_df.groupby("pr_number") 100 | .agg( 101 | { 102 | "event_type": list, 103 | "time": "first", # Keep first timestamp for reference 104 | } 105 | ) 106 | .reset_index() 107 | ) 108 | 109 | return get_simplified_pr_flow_stats(pr_events) 110 | 111 | 112 | def get_review_funnel_data(repo_df: pd.DataFrame) -> dict: 113 | """Process raw data into review funnel metrics""" 114 | 115 | if repo_df.empty: 116 | logging.debug("Empty repository dataframe") 117 | return None 118 | 119 | # Group by PR number to get unique PRs and their events 120 | pr_events = repo_df.groupby("pr_number").agg({"event_type": list, "time": "first"}).reset_index() 121 | 122 | logging.debug(f"PR events shape: {pr_events.shape}") 123 | 124 | total_prs = len(pr_events) 125 | 126 | # Count PRs that received any type of review 127 | reviewed_prs = sum( 128 | any(event in events for event in ["review_commented", "review_changes_requested", "review_approved"]) 129 | for events in pr_events["event_type"] 130 | ) 131 | 132 | # Count PRs that were approved 133 | approved_prs = sum("review_approved" in events for events in pr_events["event_type"]) 134 | 135 | logging.debug(f"Total PRs: {total_prs}, Reviewed: {reviewed_prs}, Approved: {approved_prs}") 136 | 137 | if total_prs == 0: 138 | logging.debug("No PRs found") 139 | return None 140 | 141 | return {"total_prs": total_prs, "reviewed_prs": reviewed_prs, "approved_prs": approved_prs} 142 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/review_funnel/template.html: -------------------------------------------------------------------------------- 1 | {% extends "components/charts/chart.html" %} 2 | {% from "components/charts/metric.html" import metric %} 3 | {% block title %}Review Funnel{% endblock %} 4 | {% block metrics %} 5 | {% if review_data %} 6 | {{ metric(label="Review Rate", value="%.1f"|format(review_data.review_rate) ~ "%", tip="Percentage of PRs that received 7 | reviews") }} 8 | {{ metric(label="Approval Rate", value="%.1f"|format(review_data.approval_rate) ~ "%", tip="Percentage of reviewed PRs 9 | that were approved") }} 10 | {% endif %} 11 | {% endblock %} 12 | {% block chart %} 13 | {% if review_data %} 14 |
15 | {% if review_data.plot_html %} 16 | {{ review_data.plot_html | safe }} 17 | {% endif %} 18 |
19 | {% else %} 20 |

No review process data available

21 | {% endif %} 22 | {% endblock %} 23 | {% block caption %} 24 | Shows the progression of pull requests from creation through review to approval. 25 | {% endblock %} -------------------------------------------------------------------------------- /src/collab_dev/components/charts/review_turnaround/__init__.py: -------------------------------------------------------------------------------- 1 | import plotly 2 | import plotly.graph_objects as go 3 | from components.charts.utils import ( 4 | apply_theme_to_figure, 5 | get_plotly_config, 6 | get_theme_colors, 7 | humanize_time, 8 | ) 9 | from flask import render_template 10 | 11 | from .data import get_review_turnaround_data 12 | 13 | 14 | def create_turnaround_distribution_plot(turnaround_data: dict) -> go.Figure: 15 | """Create visualization for review turnaround distribution""" 16 | 17 | # Calculate percentages for each segment 18 | within_1h = turnaround_data["within_1h"] 19 | within_4h = turnaround_data["within_4h"] - turnaround_data["within_1h"] 20 | within_24h = turnaround_data["within_24h"] - turnaround_data["within_4h"] 21 | over_24h = 100 - turnaround_data["within_24h"] 22 | 23 | # Calculate counts for hover text 24 | total_prs = turnaround_data["total_prs"] 25 | within_1h_count = int(within_1h * total_prs / 100) 26 | within_4h_count = int((turnaround_data["within_4h"] - turnaround_data["within_1h"]) * total_prs / 100) 27 | within_24h_count = int((turnaround_data["within_24h"] - turnaround_data["within_4h"]) * total_prs / 100) 28 | over_24h_count = total_prs - within_1h_count - within_4h_count - within_24h_count 29 | 30 | # Get theme colors for the chart 31 | colors = get_theme_colors(4) 32 | 33 | # Create figure 34 | fig = go.Figure() 35 | 36 | # Add each segment in the order they should appear in the chart 37 | fig.add_trace( 38 | go.Bar( 39 | y=[""], 40 | x=[within_1h], 41 | name="Within 1 hour", 42 | orientation="h", 43 | marker=dict(color=colors[0], line=dict(width=0)), 44 | hoverinfo="text", 45 | hovertext=[f"Within 1 hour: {within_1h_count} {'PR' if within_1h_count == 1 else 'PRs'}"], 46 | text=[f"{within_1h:.1f}%"], 47 | textposition="auto", 48 | insidetextanchor="middle", 49 | ) 50 | ) 51 | 52 | fig.add_trace( 53 | go.Bar( 54 | y=[""], 55 | x=[within_4h], 56 | name="Within 4 hours", 57 | orientation="h", 58 | marker=dict(color=colors[1], line=dict(width=0)), 59 | hoverinfo="text", 60 | hovertext=[f"Within 4 hours: {within_4h_count} {'PR' if within_4h_count == 1 else 'PRs'}"], 61 | text=[f"{within_4h:.1f}%"], 62 | textposition="auto", 63 | insidetextanchor="middle", 64 | ) 65 | ) 66 | 67 | fig.add_trace( 68 | go.Bar( 69 | y=[""], 70 | x=[within_24h], 71 | name="Within 24 hours", 72 | orientation="h", 73 | marker=dict(color=colors[2], line=dict(width=0)), 74 | hoverinfo="text", 75 | hovertext=[f"Within 24 hours: {within_24h_count} {'PR' if within_24h_count == 1 else 'PRs'}"], 76 | text=[f"{within_24h:.1f}%"], 77 | textposition="auto", 78 | insidetextanchor="middle", 79 | ) 80 | ) 81 | 82 | fig.add_trace( 83 | go.Bar( 84 | y=[""], 85 | x=[over_24h], 86 | name="Over 24 hours", 87 | orientation="h", 88 | marker=dict(color=colors[3], line=dict(width=0)), 89 | hoverinfo="text", 90 | hovertext=[f"Over 24 hours: {over_24h_count} {'PR' if over_24h_count == 1 else 'PRs'}"], 91 | text=[f"{over_24h:.1f}%"], 92 | textposition="auto", 93 | insidetextanchor="middle", 94 | ) 95 | ) 96 | 97 | # Add x-axis ticks 98 | tick_vals = [0, 20, 40, 60, 80, 100] 99 | 100 | # Update layout 101 | fig.update_layout( 102 | barmode="stack", 103 | showlegend=True, 104 | legend=dict( 105 | orientation="h", 106 | yanchor="bottom", 107 | y=1.1, 108 | xanchor="center", 109 | x=0.5, 110 | traceorder="normal", 111 | font=dict(size=10), 112 | ), 113 | margin=dict(t=30, l=0, r=0, b=20), 114 | height=150, 115 | uniformtext=dict(mode="hide", minsize=10), 116 | xaxis=dict( 117 | range=[0, 100], 118 | showgrid=True, 119 | tickvals=tick_vals, 120 | zeroline=False, 121 | fixedrange=True, 122 | ), 123 | yaxis=dict(showticklabels=False, showgrid=False, fixedrange=True), 124 | plot_bgcolor="white", 125 | paper_bgcolor="white", 126 | ) 127 | 128 | # Apply theme to figure 129 | fig = apply_theme_to_figure(fig) 130 | 131 | return fig 132 | 133 | 134 | def render(repo_df): 135 | """Render the review turnaround chart component""" 136 | 137 | try: 138 | # Get turnaround data 139 | turnaround_data = get_review_turnaround_data(repo_df) 140 | 141 | if not turnaround_data: 142 | return render_template( 143 | "components/charts/review_turnaround/template.html", 144 | turnaround_data=None, 145 | ) 146 | 147 | # Create plot figure 148 | fig = create_turnaround_distribution_plot(turnaround_data) 149 | 150 | # Get plotly config from theme 151 | config = get_plotly_config() 152 | 153 | # Convert the figure to HTML 154 | plot_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div", config=config) 155 | 156 | # Prepare data for template 157 | chart_data = { 158 | "plot_html": plot_html, 159 | "median_hours": turnaround_data["median_hours"], 160 | "total_prs": turnaround_data["total_prs"], 161 | "reviewed_prs": turnaround_data["reviewed_prs"], 162 | "review_rate": turnaround_data["review_rate"], 163 | "within_1h": turnaround_data["within_1h"], 164 | "within_4h": turnaround_data["within_4h"], 165 | "within_24h": turnaround_data["within_24h"], 166 | } 167 | 168 | # Pass the prepared data to the template 169 | return render_template( 170 | "components/charts/review_turnaround/template.html", 171 | turnaround_data=chart_data, 172 | humanize_time=humanize_time, 173 | ) 174 | 175 | except Exception: 176 | return render_template("components/charts/review_turnaround/template.html", turnaround_data=None) 177 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/review_turnaround/data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def calculate_rtt_trends(repo_df: pd.DataFrame) -> pd.DataFrame: 5 | """ 6 | Calculate Review Turnaround Time (RTT) trends over time 7 | 8 | Args: 9 | repo_df (pd.DataFrame): DataFrame containing PR events 10 | 11 | Returns: 12 | pd.DataFrame: DataFrame with RTT trends 13 | """ 14 | try: 15 | if repo_df.empty: 16 | return pd.DataFrame() 17 | 18 | # Get PR creation and first review request times for each PR 19 | pr_created = ( 20 | repo_df[repo_df["event_type"] == "pr_created"] 21 | .groupby("pr_number") 22 | .agg( 23 | { 24 | "time": "first", 25 | "pr_title": "first", # Get PR title for hover info 26 | } 27 | ) 28 | ) 29 | review_requests = repo_df[repo_df["event_type"] == "review_requested"].groupby("pr_number")["time"].first() 30 | 31 | # Match PRs that have both creation and review request times 32 | matched_prs = pd.DataFrame( 33 | { 34 | "created_time": pr_created["time"], 35 | "pr_title": pr_created["pr_title"], 36 | "review_requested_time": review_requests, 37 | } 38 | ).dropna() 39 | 40 | if matched_prs.empty: 41 | return pd.DataFrame() 42 | 43 | # Calculate time difference in hours 44 | matched_prs["turnaround_hours"] = ( 45 | matched_prs["review_requested_time"] - matched_prs["created_time"] 46 | ).dt.total_seconds() / 3600 47 | 48 | # Sort by creation time 49 | matched_prs = matched_prs.sort_values("created_time") 50 | 51 | # Calculate rolling median (7 PRs window) 52 | matched_prs["rolling_median"] = matched_prs["turnaround_hours"].rolling(window=7, min_periods=1).median() 53 | 54 | return matched_prs 55 | 56 | except Exception: 57 | return pd.DataFrame() 58 | 59 | 60 | def calculate_rtt(repo_df: pd.DataFrame) -> float: 61 | """Calculate overall median RTT""" 62 | try: 63 | trends_df = calculate_rtt_trends(repo_df) 64 | if trends_df.empty: 65 | return None 66 | return trends_df["turnaround_hours"].median() 67 | except Exception: 68 | return None 69 | 70 | 71 | def calculate_rtt_stats(repo_df: pd.DataFrame) -> dict: 72 | """Calculate RTT statistics including thresholds and distribution""" 73 | try: 74 | if repo_df.empty: 75 | return None 76 | 77 | # Get all PRs created 78 | all_prs = repo_df[repo_df["event_type"] == "pr_created"]["pr_number"].nunique() 79 | 80 | # Initialize DataFrame to store turnaround times 81 | turnaround_times = [] 82 | 83 | # Process each PR 84 | for pr_number in repo_df[repo_df["event_type"] == "pr_created"]["pr_number"].unique(): 85 | pr_events = repo_df[repo_df["pr_number"] == pr_number].sort_values("time") 86 | 87 | # Get PR creation time 88 | pr_created_time = pr_events[pr_events["event_type"] == "pr_created"]["time"].iloc[0] 89 | 90 | # Check for review requests 91 | review_requests = pr_events[pr_events["event_type"] == "review_requested"] 92 | 93 | if not review_requests.empty: 94 | # For each review request, find the first review action from that reviewer 95 | for _, request in review_requests.iterrows(): 96 | request_time = request["time"] 97 | requested_reviewer = request.get("target_user") # Use get() to avoid KeyError 98 | 99 | if not requested_reviewer: 100 | continue 101 | 102 | # Find first review action from this reviewer 103 | review_actions = pr_events[ 104 | (pr_events["time"] > request_time) 105 | & (pr_events["actor"] == requested_reviewer) 106 | & ( 107 | pr_events["event_type"].isin( 108 | ["review_approved", "review_changes_requested", "review_commented"] 109 | ) 110 | ) 111 | ] 112 | 113 | if not review_actions.empty: 114 | first_review_time = review_actions["time"].iloc[0] 115 | turnaround_hours = (first_review_time - request_time).total_seconds() / 3600 116 | turnaround_times.append(turnaround_hours) 117 | break # Only consider the first successful review request 118 | else: 119 | # If no review request, measure from PR creation to first review action 120 | review_actions = pr_events[ 121 | pr_events["event_type"].isin(["review_approved", "review_changes_requested", "review_commented"]) 122 | ] 123 | 124 | if not review_actions.empty: 125 | first_review_time = review_actions["time"].iloc[0] 126 | turnaround_hours = (first_review_time - pr_created_time).total_seconds() / 3600 127 | turnaround_times.append(turnaround_hours) 128 | 129 | if not turnaround_times: 130 | return None 131 | 132 | turnaround_times = pd.Series(turnaround_times) 133 | 134 | # Calculate statistics 135 | stats = { 136 | "median_hours": turnaround_times.median(), 137 | "total_prs": all_prs, 138 | "reviewed_prs": len(turnaround_times), 139 | "review_rate": (len(turnaround_times) / all_prs) * 100, 140 | "within_1h": (turnaround_times <= 1).mean() * 100, 141 | "within_4h": (turnaround_times <= 4).mean() * 100, 142 | "within_24h": (turnaround_times <= 24).mean() * 100, 143 | } 144 | 145 | return stats 146 | 147 | except Exception: 148 | return None 149 | 150 | 151 | def get_review_turnaround_data(repo_df: pd.DataFrame) -> dict: 152 | """Process raw data into review turnaround metrics""" 153 | try: 154 | # Validate input data 155 | if repo_df is None or not isinstance(repo_df, pd.DataFrame): 156 | return None 157 | 158 | if repo_df.empty: 159 | return None 160 | 161 | # Check for required columns 162 | required_columns = ["event_type", "pr_number", "time", "actor"] 163 | missing_columns = [col for col in required_columns if col not in repo_df.columns] 164 | if missing_columns: 165 | return None 166 | 167 | # Check for required event types 168 | pr_created_events = repo_df[repo_df["event_type"] == "pr_created"] 169 | if len(pr_created_events) == 0: 170 | return None 171 | 172 | stats = calculate_rtt_stats(repo_df) 173 | if not stats: 174 | return None 175 | 176 | # Convert numpy values to Python floats 177 | return { 178 | "median_hours": float(stats["median_hours"]), 179 | "total_prs": stats["total_prs"], 180 | "reviewed_prs": stats["reviewed_prs"], 181 | "review_rate": float(stats["review_rate"]), 182 | "within_1h": float(stats["within_1h"]), 183 | "within_4h": float(stats["within_4h"]), 184 | "within_24h": float(stats["within_24h"]), 185 | } 186 | 187 | except Exception: 188 | return None 189 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/review_turnaround/template.html: -------------------------------------------------------------------------------- 1 | {% extends "components/charts/chart.html" %} 2 | {% from "components/charts/metric.html" import metric %} 3 | {% block title %}Review Turnaround{% endblock %} 4 | {% block metrics %} 5 | {% if turnaround_data %} 6 | {{ metric(label="Review Turnaround Time", value=humanize_time(turnaround_data.median_hours), tip="Median time to first 7 | review") }} 8 | {{ metric(label="Reviewed in ≤1 Hour", value="%.1f"|format(turnaround_data.within_1h) ~ "%", tip="PRs that received a 9 | review within 1 hour") }} 10 | {% endif %} 11 | {% endblock %} 12 | {% block chart %} 13 | {% if turnaround_data %} 14 | 15 |
16 | {% if turnaround_data.plot_html %} 17 | {{ turnaround_data.plot_html | safe }} 18 | {% else %} 19 |

Chart data generated but plot HTML is missing

20 | {% endif %} 21 |
22 | {% else %} 23 |

No review turnaround data available

24 | {% endif %} 25 | {% endblock %} 26 | {% block caption %} 27 | Shows the time required to receive the first review on pull requests. 28 | {% endblock %} 29 | {% block extra_js %} 30 | 57 | {% endblock %} -------------------------------------------------------------------------------- /src/collab_dev/components/charts/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for chart components to apply consistent theming 3 | """ 4 | 5 | import pandas as pd 6 | import plotly.graph_objects as go 7 | import theme as theme 8 | 9 | 10 | def apply_theme_to_figure(fig: go.Figure) -> go.Figure: 11 | """ 12 | Apply the application theme to a Plotly figure 13 | 14 | Args: 15 | fig: Plotly figure to apply theme to 16 | 17 | Returns: 18 | go.Figure: Themed Plotly figure 19 | """ 20 | # Get the plotly template from our theme module 21 | template = theme.get_plotly_template() 22 | 23 | # Apply layout settings from the template 24 | for key, value in template["layout"].items(): 25 | if key not in fig.layout or fig.layout[key] is None: 26 | fig.layout[key] = value 27 | 28 | # Apply font settings if they exist 29 | if "font" in template["layout"]: 30 | if "font" not in fig.layout: 31 | fig.layout.font = template["layout"]["font"] 32 | else: 33 | for font_key, font_value in template["layout"]["font"].items(): 34 | if font_key not in fig.layout.font or fig.layout.font[font_key] is None: 35 | fig.layout.font[font_key] = font_value 36 | 37 | # Apply axis settings if they exist 38 | for axis in ["xaxis", "yaxis"]: 39 | if axis in template["layout"]: 40 | if axis not in fig.layout: 41 | fig.layout[axis] = template["layout"][axis] 42 | else: 43 | for axis_key, axis_value in template["layout"][axis].items(): 44 | if axis_key not in fig.layout[axis] or fig.layout[axis][axis_key] is None: 45 | fig.layout[axis][axis_key] = axis_value 46 | 47 | return fig 48 | 49 | 50 | def get_theme_colors(num_colors: int = 5, palette: str = "primary") -> list: 51 | """ 52 | Get a list of colors from the theme for charts 53 | 54 | Args: 55 | num_colors: Number of colors needed 56 | palette: Which palette to use ('primary', 'secondary', 'mono', 'diverging') 57 | 58 | Returns: 59 | list: List of color hex codes 60 | """ 61 | return theme.get_chart_colors(num_colors, palette) 62 | 63 | 64 | def get_plotly_config() -> dict: 65 | """ 66 | Get a consistent Plotly config for all charts 67 | 68 | Returns: 69 | dict: Plotly config 70 | """ 71 | return { 72 | "displayModeBar": False, 73 | "responsive": True, 74 | "displaylogo": False, # Disable the Plotly logo/advertisement 75 | "modeBarButtonsToRemove": ["sendDataToCloud", "autoScale2d", "resetScale2d"], 76 | } 77 | 78 | 79 | def humanize_time(hours, precision=1): 80 | """ 81 | Convert a time duration (in hours) to a human-readable string. 82 | Automatically selects the most appropriate unit (seconds to years) for display. 83 | 84 | Args: 85 | hours: Number of hours (input is always in hours) 86 | precision: Number of decimal places for values 87 | 88 | Returns: 89 | str: Human-readable string with appropriate unit (e.g. "2.5 minutes", "3 days") 90 | """ 91 | if hours is None or pd.isna(hours): 92 | return "N/A" 93 | 94 | # Convert hours to seconds for easier unit conversion 95 | seconds = hours * 3600 96 | 97 | # Less than a minute 98 | if seconds < 60: 99 | return f"{int(seconds)} seconds" 100 | 101 | # Less than an hour 102 | if seconds < 3600: 103 | minutes = seconds / 60 104 | return f"{minutes:.1f} minutes" 105 | 106 | # Less than a day 107 | if seconds < 86400: 108 | return f"{hours:.1f} hours" 109 | 110 | # Days 111 | days = hours / 24 112 | if days < 7: 113 | return f"{days:.1f} days" 114 | 115 | # Weeks 116 | weeks = days / 7 117 | if weeks < 4: 118 | return f"{weeks:.1f} weeks" 119 | 120 | # Months (approximate) 121 | months = days / 30.44 122 | if months < 12: 123 | return f"{months:.1f} months" 124 | 125 | # Years 126 | years = days / 365.25 127 | return f"{years:.1f} years" 128 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/workflow/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | 3 | import plotly.graph_objects as go 4 | from components.charts.utils import ( 5 | apply_theme_to_figure, 6 | get_plotly_config, 7 | get_theme_colors, 8 | ) 9 | from flask import render_template 10 | 11 | from .data import prepare_sankey_data 12 | 13 | 14 | def create_pr_flow_chart(data) -> Optional[Dict]: 15 | """Creates a Sankey diagram showing PR flow through different stages""" 16 | if not data: 17 | return None 18 | 19 | # Process links into sources and targets arrays 20 | sources = [] 21 | targets = [] 22 | values = [] 23 | node_values = [0] * len(data["nodes"]) # Initialize array for node values 24 | 25 | for link in data["links"]: 26 | source_idx = data["nodes"].index(link["source"]) 27 | target_idx = data["nodes"].index(link["target"]) 28 | sources.append(source_idx) 29 | targets.append(target_idx) 30 | values.append(link["value"]) 31 | node_values[source_idx] = link["value"] # Store value for each node 32 | 33 | # Create the Plotly figure 34 | # Get theme colors 35 | colors = get_theme_colors(len(data["nodes"]), "primary") 36 | 37 | fig = go.Figure( 38 | data=[ 39 | go.Sankey( 40 | arrangement="snap", 41 | node=dict( 42 | pad=15, 43 | thickness=20, 44 | line=dict(color="rgba(0,0,0,0.3)", width=0.5), 45 | label=data["nodes"], 46 | color=colors, # Using theme colors 47 | hoverlabel=dict( 48 | bgcolor="rgba(100,100,100,0.8)", # Semi-transparent dark background 49 | bordercolor="rgba(0,0,0,0)", # Transparent border 50 | font=dict(size=16, color="white"), # White text 51 | ), 52 | customdata=[ 53 | [val, "PR" if val == 1 else "PRs"] for val in node_values 54 | ], # Use node_values instead of values 55 | hovertemplate="%{value:.0f} %{customdata[1]}", # Simple PR count for nodes 56 | ), 57 | link=dict( 58 | source=sources, 59 | target=targets, 60 | value=values, 61 | color=["rgba(229, 229, 229, 0.5)"] * len(sources), 62 | hoverlabel=dict( 63 | bgcolor="rgba(100,100,100,0.8)", # Semi-transparent dark background 64 | bordercolor="rgba(0,0,0,0)", # Transparent border 65 | font=dict(size=16, color="white"), # White text 66 | ), 67 | customdata=[[val, "PR" if val == 1 else "PRs"] for val in values], 68 | hovertemplate="%{value:.0f} %{customdata[1]}
" 69 | + "%{source.label} → %{target.label}", # Clean text format for links 70 | ), 71 | ) 72 | ] 73 | ) 74 | 75 | fig.update_layout( 76 | title=None, 77 | font={"size": 14}, 78 | height=400, 79 | margin={"t": 20, "l": 20, "r": 20, "b": 20}, 80 | ) 81 | 82 | # Apply theme to the figure 83 | fig = apply_theme_to_figure(fig) 84 | 85 | # Convert to HTML with the consistent Plotly config 86 | return fig.to_html(full_html=False, include_plotlyjs=False, config=get_plotly_config()) 87 | 88 | 89 | def render(repo_df) -> str: 90 | pr_flow_data = prepare_sankey_data(repo_df) 91 | chart_html = create_pr_flow_chart(pr_flow_data) 92 | pr_count = repo_df["pr_number"].nunique() 93 | event_count = len(repo_df) 94 | 95 | # Format numbers with comma separators and no decimals 96 | formatted_pr_count = f"{pr_count:,}" 97 | formatted_event_count = f"{event_count:,}" 98 | 99 | if not chart_html: 100 | return "
No data available
" 101 | 102 | return render_template( 103 | "components/charts/workflow/template.html", 104 | chart_content=chart_html, 105 | pr_count=formatted_pr_count, 106 | event_count=formatted_event_count, 107 | ) 108 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/workflow/data.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | 3 | import pandas as pd 4 | 5 | 6 | def prepare_sankey_data(df: pd.DataFrame) -> Optional[Dict]: 7 | """ 8 | Process PR events into a format suitable for a Sankey diagram. 9 | 10 | Args: 11 | df: DataFrame containing PR events 12 | 13 | Returns: 14 | Dictionary containing nodes and links for the Sankey diagram, or None if no data 15 | """ 16 | if df.empty: 17 | return None 18 | 19 | # Group events by PR number to analyze flow 20 | pr_events = ( 21 | df.groupby("pr_number") 22 | .agg( 23 | { 24 | "event_type": list, 25 | "time": "first", # Keep first timestamp for reference 26 | } 27 | ) 28 | .reset_index() 29 | ) 30 | 31 | # Initialize node lists and link counts 32 | nodes = ["PRs Created"] 33 | links = [] 34 | 35 | # Count initial PRs 36 | total_prs = len(pr_events) 37 | 38 | # Track PRs at each stage 39 | review_requested = sum("review_requested" in events for events in pr_events["event_type"]) 40 | direct_reviews = sum( 41 | any(event in ["review_commented", "review_changes_requested", "review_approved"] for event in events) 42 | and "review_requested" not in events 43 | for events in pr_events["event_type"] 44 | ) 45 | no_review = total_prs - review_requested - direct_reviews 46 | 47 | # Add review request flow 48 | nodes.extend(["Review Requested", "No Review", "Direct Review"]) 49 | 50 | links.extend( 51 | [ 52 | {"source": "PRs Created", "target": "Review Requested", "value": review_requested}, 53 | {"source": "PRs Created", "target": "No Review", "value": no_review}, 54 | {"source": "PRs Created", "target": "Direct Review", "value": direct_reviews}, 55 | ] 56 | ) 57 | 58 | # Track review outcomes 59 | nodes.extend(["Approved", "Commented"]) 60 | 61 | # Count PRs by their review outcome 62 | approved_prs = sum("review_approved" in events for events in pr_events["event_type"]) 63 | commented_prs = sum( 64 | "review_commented" in events and "review_approved" not in events for events in pr_events["event_type"] 65 | ) 66 | 67 | # Calculate how many PRs went from each review path to each outcome 68 | # For Review Requested path 69 | if review_requested > 0: 70 | approved_from_requested = sum( 71 | "review_approved" in events and "review_requested" in events for events in pr_events["event_type"] 72 | ) 73 | commented_from_requested = sum( 74 | "review_commented" in events and "review_requested" in events and "review_approved" not in events 75 | for events in pr_events["event_type"] 76 | ) 77 | 78 | # Add links for review outcomes 79 | if approved_from_requested > 0: 80 | links.append({"source": "Review Requested", "target": "Approved", "value": approved_from_requested}) 81 | if commented_from_requested > 0: 82 | links.append({"source": "Review Requested", "target": "Commented", "value": commented_from_requested}) 83 | 84 | # If there are remaining PRs with review requested but no outcome, add them to Approved 85 | remaining_requested = review_requested - approved_from_requested - commented_from_requested 86 | if remaining_requested > 0: 87 | links.append({"source": "Review Requested", "target": "Approved", "value": remaining_requested}) 88 | 89 | # For Direct Review path 90 | if direct_reviews > 0: 91 | approved_from_direct = sum( 92 | "review_approved" in events and "review_requested" not in events for events in pr_events["event_type"] 93 | ) 94 | commented_from_direct = sum( 95 | "review_commented" in events and "review_requested" not in events and "review_approved" not in events 96 | for events in pr_events["event_type"] 97 | ) 98 | 99 | # Add links for review outcomes 100 | if approved_from_direct > 0: 101 | links.append({"source": "Direct Review", "target": "Approved", "value": approved_from_direct}) 102 | if commented_from_direct > 0: 103 | links.append({"source": "Direct Review", "target": "Commented", "value": commented_from_direct}) 104 | 105 | # If there are remaining PRs with direct review but no outcome, add them to Commented 106 | remaining_direct = direct_reviews - approved_from_direct - commented_from_direct 107 | if remaining_direct > 0: 108 | links.append({"source": "Direct Review", "target": "Commented", "value": remaining_direct}) 109 | 110 | # Add final state - Merged 111 | nodes.append("Merged") 112 | 113 | # Use actual counts for merge paths instead of arbitrary allocation 114 | approved_to_merged = approved_prs 115 | comments_to_merged = commented_prs 116 | 117 | # Add links to Merged 118 | if approved_to_merged > 0: 119 | links.append({"source": "Approved", "target": "Merged", "value": approved_to_merged}) 120 | 121 | if comments_to_merged > 0: 122 | links.append({"source": "Commented", "target": "Merged", "value": comments_to_merged}) 123 | 124 | # Only use the original no_review count when connecting to Merged 125 | if no_review > 0: 126 | links.append({"source": "No Review", "target": "Merged", "value": no_review}) 127 | 128 | # Remove any links with zero value 129 | links = [link for link in links if link["value"] > 0] 130 | 131 | return {"nodes": nodes, "links": links} 132 | -------------------------------------------------------------------------------- /src/collab_dev/components/charts/workflow/template.html: -------------------------------------------------------------------------------- 1 | {% extends "components/charts/chart.html" %} 2 | {% from "components/charts/metric.html" import metric %} 3 | {% block title %}Code Review Workflow{% endblock %} 4 | {% block metrics %} 5 | {{ metric(label="Pull Requests", value=pr_count, tip="Number of pull requests") }} 6 | {{ metric(label="Events", value=event_count, tip="Number of events") }} 7 | {% endblock %} 8 | {% block chart %} 9 | {{ chart_content | safe }} 10 | {% endblock %} 11 | {% block caption %}Visualizes the review workflow of pull requests from creation to merge.{% endblock %} -------------------------------------------------------------------------------- /src/collab_dev/fetcher/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pullflow/collab-dev/adcaa2efb3418c1a8aebb2ad98bf46b3a99aa9b2/src/collab_dev/fetcher/__init__.py -------------------------------------------------------------------------------- /src/collab_dev/fetcher/api_client.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from typing import Any, Dict, Optional 4 | 5 | import requests 6 | from dotenv import load_dotenv 7 | 8 | # Load environment variables 9 | load_dotenv() 10 | 11 | # Configure logging 12 | logging.basicConfig(level=logging.INFO) 13 | logger = logging.getLogger(__name__) 14 | 15 | # GitHub API base URL 16 | BASE_URL = "https://api.github.com" 17 | 18 | 19 | def get_api_token() -> Optional[str]: 20 | """Get GitHub API token from environment variable.""" 21 | token = os.getenv("GITHUB_TOKEN") 22 | if not token: 23 | logger.warning("GITHUB_TOKEN environment variable not set. API rate limits may apply.") 24 | return token 25 | 26 | 27 | def get( 28 | path: str, 29 | params: Optional[Dict[str, Any]] = None, 30 | headers: Optional[Dict[str, str]] = None, 31 | ) -> Dict: 32 | """ 33 | Make a GET request to GitHub API. 34 | 35 | Args: 36 | path: The API endpoint path (without the base URL) 37 | params: Optional query parameters 38 | headers: Optional additional headers 39 | 40 | Returns: 41 | The JSON response as a dictionary 42 | """ 43 | url = f"{BASE_URL}/{path.lstrip('/')}" 44 | 45 | # Initialize headers if None 46 | if headers is None: 47 | headers = {} 48 | 49 | # Use GitHub token if available 50 | token = get_api_token() 51 | if token: 52 | headers["Authorization"] = f"token {token}" 53 | 54 | # Make the request 55 | response = requests.get(url, params=params, headers=headers) 56 | response.raise_for_status() 57 | 58 | return response.json() 59 | -------------------------------------------------------------------------------- /src/collab_dev/fetcher/fetch.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import re 4 | from typing import Dict, List, Optional, Tuple 5 | 6 | from dotenv import load_dotenv 7 | 8 | from . import store 9 | from .github_utils import ( 10 | github_graphql_get_merged_pull_requests, 11 | github_graphql_get_pull_request_events, 12 | github_graphql_get_repository, 13 | process_timeline_events, 14 | ) 15 | 16 | # Load environment variables 17 | load_dotenv() 18 | 19 | # Configure logging 20 | logging.basicConfig(level=logging.INFO) 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | def extract_repo_parts(repo_url: str) -> Tuple[str, str]: 25 | """Extract owner and name from a GitHub repository URL.""" 26 | # Match patterns like https://github.com/owner/repo or owner/repo 27 | pattern = r"(?:https?://github\.com/)?([^/]+)/([^/]+)" 28 | match = re.match(pattern, repo_url) 29 | 30 | if not match: 31 | raise ValueError(f"Invalid GitHub repository URL: {repo_url}") 32 | 33 | return match.group(1), match.group(2) 34 | 35 | 36 | def process_repository(owner: str, name: str, max_prs: Optional[int] = None, category: str = None) -> dict: 37 | """Process a repository - main entry point function 38 | 39 | Args: 40 | owner: GitHub repository owner 41 | name: GitHub repository name 42 | max_prs: Maximum number of pull requests to fetch (None for no limit) 43 | category: Optional category to classify the repository 44 | 45 | Returns: 46 | Dictionary with repository processing results 47 | """ 48 | return fetch_repository_info(owner, name, max_prs=max_prs, category=category) 49 | 50 | 51 | def error_handler(func): 52 | def wrapper(*args, **kwargs): 53 | try: 54 | return func(*args, **kwargs) 55 | except Exception as e: 56 | import traceback 57 | 58 | logger.error(f"Error in {func.__name__}: {e}") 59 | logger.error(f"Stack trace: {traceback.format_exc()}") 60 | raise e 61 | 62 | return wrapper 63 | 64 | 65 | def get_repository_info(owner: str, name: str) -> Dict: 66 | """Fetch repository information from GitHub using GraphQL.""" 67 | return github_graphql_get_repository(owner, name) 68 | 69 | 70 | def get_pull_requests(owner: str, name: str, max_prs: Optional[int] = None) -> List[Dict]: 71 | """Fetch merged pull requests from GitHub using GraphQL API.""" 72 | pull_requests = github_graphql_get_merged_pull_requests(owner, name) 73 | 74 | if max_prs: 75 | pull_requests = pull_requests[:max_prs] 76 | 77 | return pull_requests 78 | 79 | 80 | def get_pull_request_events(owner: str, name: str, pr_number: int) -> List[Dict]: 81 | """Fetch timeline events for a pull request using GraphQL.""" 82 | timeline_data = github_graphql_get_pull_request_events(owner, name, pr_number) 83 | 84 | if not timeline_data: 85 | return [] 86 | 87 | repo_url = f"https://github.com/{owner}/{name}" 88 | repository_slug = f"{owner}/{name}" 89 | 90 | return process_timeline_events(timeline_data, repo_url, repository_slug) 91 | 92 | 93 | def check_existing_repository(owner: str, name: str) -> Optional[Dict]: 94 | """Check if repository already exists in the file system.""" 95 | repo_url = f"https://github.com/{owner}/{name}" 96 | repo_dir = store.get_repo_dir(owner, name) 97 | repo_file = os.path.join(repo_dir, "repository.csv") 98 | 99 | if os.path.exists(repo_file): 100 | # Simple representation of repository ID using owner/name 101 | return {"id": f"{owner}/{name}", "url": repo_url} 102 | 103 | return None 104 | 105 | 106 | @error_handler 107 | def fetch_repository_info(owner: str, name: str, max_prs: Optional[int] = None, category: str = None) -> dict: 108 | """Fetch repository information.""" 109 | # Check if repository already exists 110 | existing_repo = check_existing_repository(owner, name) 111 | 112 | if existing_repo: 113 | # Process pull requests for existing repository 114 | result = fetch_pull_requests(owner, name, max_prs=max_prs) 115 | return {"status": "success", "repository_id": existing_repo["id"], **result} 116 | 117 | # Fetch repository information using GraphQL 118 | repo_data = get_repository_info(owner, name) 119 | 120 | if not repo_data: 121 | raise ValueError(f"Could not fetch data for repository: {owner}/{name}") 122 | 123 | # Save repository information 124 | save_result = store.save_repository_info(owner, name, repo_data, category) 125 | 126 | # Next stage - fetch pull requests 127 | result = fetch_pull_requests(owner, name, max_prs=max_prs) 128 | 129 | return { 130 | "status": "success", 131 | "repository_id": f"{owner}/{name}", 132 | "repository": save_result.get("repository", {}), 133 | **result, 134 | } 135 | 136 | 137 | @error_handler 138 | def fetch_pull_requests(owner: str, name: str, max_prs: Optional[int] = None) -> dict: 139 | """Fetch pull requests.""" 140 | repository_slug = f"{owner}/{name}" 141 | 142 | # Get existing PR numbers 143 | existing_prs = store.get_existing_prs_map(owner, name) 144 | logger.info(f"Found {len(existing_prs)} existing pull requests for {repository_slug}") 145 | 146 | # Count PRs that already have events saved 147 | existing_prs_with_events = 0 148 | for pr_number in existing_prs: 149 | if store.has_pr_events(owner, name, pr_number): 150 | existing_prs_with_events += 1 151 | 152 | logger.info(f"Found {existing_prs_with_events} existing pull requests with events for {repository_slug}") 153 | 154 | # Adjust max_prs for new PRs to fetch based on what we already have 155 | remaining_prs_to_fetch = None 156 | if max_prs is not None: 157 | remaining_prs_to_fetch = max(0, max_prs - existing_prs_with_events) 158 | logger.info(f"Will fetch up to {remaining_prs_to_fetch} new pull requests to reach the limit of {max_prs}") 159 | 160 | # If we already have enough PRs with events, no need to fetch more 161 | if remaining_prs_to_fetch == 0: 162 | logger.info( 163 | f"Already have {existing_prs_with_events} PRs with events, " 164 | f"which meets or exceeds the requested {max_prs}" 165 | ) 166 | return { 167 | "status": "success", 168 | "prs_processed": 0, 169 | "new_prs": 0, 170 | "message": f"No new PRs needed, already have {existing_prs_with_events} PRs with events", 171 | } 172 | 173 | # Get merged pull requests from GitHub API using GraphQL 174 | pull_requests_data = get_pull_requests(owner, name, max_prs=remaining_prs_to_fetch) 175 | logger.info(f"Fetched {len(pull_requests_data)} pull requests from GitHub API for {repository_slug}") 176 | 177 | # Filter out PRs that we already have 178 | new_pull_requests = [pr for pr in pull_requests_data if pr["number"] not in existing_prs] 179 | logger.info(f"Found {len(new_pull_requests)} new pull requests for {repository_slug}") 180 | 181 | # Save new pull requests if we have any 182 | result = {"status": "success", "prs_processed": 0, "new_prs": 0} 183 | if new_pull_requests: 184 | # Transform the PRs to the format expected by the store module 185 | transformed_prs = [ 186 | { 187 | "repository_slug": repository_slug, 188 | "pr_number": pr["number"], 189 | "title": pr["title"], 190 | "url": pr["url"], 191 | "author_login": pr["author"]["login"] if pr["author"] else None, 192 | "created_at": pr["createdAt"], 193 | "merged_at": pr["mergedAt"], 194 | "additions": pr["additions"], 195 | "deletions": pr["deletions"], 196 | "files_changed": pr["changedFiles"], 197 | } 198 | for pr in new_pull_requests 199 | ] 200 | 201 | save_result = store.save_pull_requests(owner, name, transformed_prs) 202 | result["prs_processed"] = save_result.get("prs_processed", 0) 203 | result["new_prs"] = len(new_pull_requests) 204 | 205 | # Process events for new PRs 206 | for pr in new_pull_requests: 207 | fetch_pull_request_events(owner, name, pr["number"]) 208 | 209 | # Also check if we need to update events for existing PRs that don't have events yet 210 | missing_events_prs = [ 211 | pr_number for pr_number in existing_prs.keys() if not store.has_pr_events(owner, name, pr_number) 212 | ] 213 | 214 | if missing_events_prs: 215 | logger.info(f"Fetching events for {len(missing_events_prs)} existing pull requests that are missing events") 216 | for pr_number in missing_events_prs: 217 | fetch_pull_request_events(owner, name, pr_number) 218 | 219 | # Consolidate all events into a single file 220 | store.consolidate_all_events(owner, name) 221 | 222 | return result 223 | 224 | 225 | @error_handler 226 | def fetch_pull_request_events(owner: str, name: str, pr_number: int) -> dict: 227 | """Fetch pull request events.""" 228 | # Check if events already exist for this PR 229 | if store.has_pr_events(owner, name, pr_number): 230 | logger.info(f"Events for PR #{pr_number} already fetched, skipping") 231 | return {"status": "skipped", "events_processed": 0} 232 | 233 | # Fetch timeline events using GraphQL 234 | events_data = get_pull_request_events(owner, name, pr_number) 235 | 236 | if not events_data: 237 | logger.warning(f"No timeline events found for PR #{pr_number}") 238 | return {"status": "empty", "events_processed": 0} 239 | 240 | # Save events using store module 241 | return store.save_pr_events(owner, name, pr_number, events_data) 242 | -------------------------------------------------------------------------------- /src/collab_dev/fetcher/github_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | from typing import Any, Dict, List 4 | 5 | import requests 6 | 7 | from .api_client import get_api_token 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | GITHUB_GRAPHQL_URL = "https://api.github.com/graphql" 12 | 13 | 14 | def get_github_headers() -> Dict: 15 | """Get GitHub API headers with authentication token""" 16 | token = get_api_token() 17 | 18 | if not token: 19 | logger.error("No GitHub token available") 20 | raise Exception("No GitHub token available") 21 | 22 | headers = { 23 | "Authorization": f"token {token}", 24 | "Accept": "application/vnd.github.v3+json", 25 | "Content-Type": "application/json", 26 | } 27 | 28 | return headers 29 | 30 | 31 | def github_request(method: str, url: str, **kwargs) -> Dict: 32 | """Make a GitHub API request""" 33 | # Get headers with a token 34 | headers = get_github_headers() 35 | kwargs_headers = kwargs.get("headers", {}) 36 | 37 | # Merge headers 38 | full_headers = {**kwargs_headers, **headers} 39 | kwargs["headers"] = full_headers 40 | 41 | # Make the request 42 | response = requests.request(method, url, **kwargs) 43 | 44 | # Raise exceptions for error status codes 45 | response.raise_for_status() 46 | 47 | return response.json() 48 | 49 | 50 | def github_graphql_request(query: str, variables: Dict, timeout=30) -> Dict: 51 | """Make a GitHub GraphQL API request""" 52 | url = "https://api.github.com/graphql" 53 | headers = get_github_headers() 54 | 55 | # Create the request payload 56 | payload = {"query": query, "variables": variables} 57 | 58 | # Make the request 59 | response = requests.post(url, headers=headers, json=payload, timeout=timeout) 60 | 61 | # Check for HTTP errors 62 | response.raise_for_status() 63 | 64 | # Get the response data 65 | result = response.json() 66 | 67 | # Check for GraphQL-specific errors 68 | if "errors" in result: 69 | logger.error(f"GraphQL errors: {result['errors']}") 70 | raise Exception(f"GraphQL errors: {result['errors']}") 71 | 72 | return result 73 | 74 | 75 | def make_graphql_request(query: str, variables: dict, oauth_token: str = None) -> dict: 76 | """Make a GraphQL request""" 77 | try: 78 | token = oauth_token or get_api_token() 79 | 80 | if not token: 81 | raise Exception("No GitHub token available") 82 | 83 | headers = { 84 | "Authorization": f"Bearer {token}", 85 | "Content-Type": "application/json", 86 | } 87 | 88 | response = requests.post(GITHUB_GRAPHQL_URL, headers=headers, json={"query": query, "variables": variables}) 89 | 90 | if response.status_code == 200: 91 | result = response.json() 92 | 93 | if "errors" in result: 94 | logger.error(f"GraphQL Errors: {result['errors']}") 95 | raise Exception(f"GraphQL errors: {result['errors']}") 96 | 97 | return result 98 | else: 99 | logger.error(f"GraphQL request failed with status {response.status_code}") 100 | raise Exception(f"GraphQL request failed: {response.text}") 101 | 102 | except Exception as e: 103 | logger.error(f"Error making GraphQL request: {str(e)}") 104 | raise 105 | 106 | 107 | def get_user_association(owner: str, repo: str, username: str, oauth_token: str = None) -> str: 108 | """ 109 | Get a user's association with a repository 110 | Returns: Role as string ('owner', 'member', 'collaborator', or 'none') 111 | """ 112 | if not username: 113 | return "none" 114 | 115 | token = oauth_token or get_api_token() 116 | 117 | if not token: 118 | return "none" 119 | 120 | query = """ 121 | query($owner: String!, $repo: String!) { 122 | repository(owner: $owner, name: $repo) { 123 | viewerPermission 124 | owner { 125 | login 126 | } 127 | } 128 | viewer { 129 | login 130 | } 131 | } 132 | """ 133 | 134 | try: 135 | result = make_graphql_request(query, {"owner": owner, "repo": repo}, oauth_token=token) 136 | logger.info(f"GitHub API response for user association: {result}") 137 | 138 | data = result.get("data", {}) 139 | viewer_login = data.get("viewer", {}).get("login") 140 | logger.info(f"Viewer login: {viewer_login}, checking against username: {username}") 141 | 142 | # If we're not checking the authenticated user, return none 143 | if viewer_login != username: 144 | logger.info(f"Username mismatch: viewer {viewer_login} != requested {username}") 145 | return "none" 146 | 147 | repository = data.get("repository", {}) 148 | logger.info(f"Repository data: {repository}") 149 | 150 | # Check if user is the repository owner 151 | repo_owner = repository.get("owner", {}).get("login") 152 | logger.info(f"Repository owner: {repo_owner}") 153 | if repo_owner == username: 154 | logger.info(f"User {username} is the repository owner") 155 | return "owner" 156 | 157 | # Map GitHub permissions to our roles 158 | permission = repository.get("viewerPermission") 159 | logger.info(f"User permission level: {permission}") 160 | if permission == "ADMIN": 161 | logger.info(f"User {username} has ADMIN permission -> collaborator role") 162 | return "collaborator" # Admin gets collaborator role 163 | elif permission == "MAINTAIN": 164 | logger.info(f"User {username} has MAINTAIN permission -> member role") 165 | return "member" # Maintain gets member role 166 | elif permission == "WRITE": 167 | logger.info(f"User {username} has WRITE permission -> collaborator role") 168 | return "collaborator" # Write access gets collaborator role 169 | 170 | logger.info(f"User {username} has insufficient permissions: {permission}") 171 | return "none" 172 | 173 | except Exception as e: 174 | logger.error(f"Error checking user association for {username}: {str(e)}") 175 | return "none" 176 | 177 | 178 | def is_bot_actor(actor_name: str) -> bool: 179 | """Check if an actor is a bot based on name patterns""" 180 | if not actor_name: 181 | return False 182 | 183 | actor_name = actor_name.lower() 184 | 185 | # Common bot suffixes and patterns 186 | bot_patterns = [r"bot$", r"\[bot\]$", r"app$", r"-bot$", r"bot-"] 187 | 188 | # Known bot names 189 | known_bots = { 190 | "dependabot", 191 | "renovate", 192 | "github-actions", 193 | "semantic-release", 194 | "codecov", 195 | "sonarcloud", 196 | "snyk-bot", 197 | "imgbot", 198 | "deepsource-autofix", 199 | "stale", 200 | "allcontributors", 201 | "prettier", 202 | "vercel", 203 | "mergify", 204 | "probot", 205 | "goreleaserbot", 206 | "greenkeeper", 207 | "lgtm-com", 208 | "circleci", 209 | "travis-ci", 210 | "gitter-badger", 211 | "whitesource-bolt-for-github", 212 | "dependabot-preview", 213 | "semantic-release-bot", 214 | } 215 | 216 | # Check if actor name contains any known bot name 217 | for bot_name in known_bots: 218 | if bot_name in actor_name: 219 | return True 220 | 221 | # Check if actor name matches any bot pattern 222 | for pattern in bot_patterns: 223 | if re.search(pattern, actor_name): 224 | return True 225 | 226 | return False 227 | 228 | 229 | def process_timeline_events(pr_data: Dict, repo_url: str, repo_name: str) -> list: 230 | """Convert GraphQL timeline data into database-compatible format""" 231 | owner, repo = repo_name.split("/") 232 | 233 | pr_author = pr_data["author"]["login"] if pr_data["author"] else None 234 | author_association = pr_data.get("authorAssociation", "") 235 | is_author_core = author_association in ["OWNER", "MEMBER", "COLLABORATOR"] 236 | 237 | events = [] 238 | 239 | # Add PR creation event 240 | events.append( 241 | { 242 | "time": pr_data["createdAt"], 243 | "pr_number": pr_data["number"], 244 | "repository_slug": repo_name, 245 | "pr_title": pr_data["title"], 246 | "pr_url": pr_data["url"], 247 | "event_type": "pr_created", 248 | "actor": pr_author, 249 | "target_user": None, 250 | "files_changed": pr_data["changedFiles"], 251 | "lines_added": pr_data["additions"], 252 | "lines_deleted": pr_data["deletions"], 253 | "is_core_team": is_author_core, 254 | "source_branch": pr_data["headRefName"], 255 | "target_branch": pr_data["baseRefName"], 256 | "was_draft": pr_data["isDraft"], 257 | "is_bot": is_bot_actor(pr_author), 258 | } 259 | ) 260 | 261 | # Process timeline items 262 | logger.info(f"Processing {len(pr_data['timelineItems']['nodes'])} timeline events for PR #{pr_data['number']}") 263 | 264 | for item in pr_data["timelineItems"]["nodes"]: 265 | if "__typename" not in item: 266 | continue 267 | 268 | base_event = { 269 | "pr_number": pr_data["number"], 270 | "pr_title": pr_data["title"], 271 | "repository_slug": repo_name, 272 | "pr_url": pr_data["url"], 273 | "files_changed": pr_data["changedFiles"], 274 | "lines_added": pr_data["additions"], 275 | "lines_deleted": pr_data["deletions"], 276 | "is_core_team": is_author_core, 277 | "source_branch": pr_data["headRefName"], 278 | "target_branch": pr_data["baseRefName"], 279 | "was_draft": pr_data["isDraft"], 280 | } 281 | 282 | if item["__typename"] == "PullRequestCommit": 283 | actor = item["commit"]["author"]["user"]["login"] if item["commit"]["author"]["user"] else None 284 | events.append( 285 | { 286 | **base_event, 287 | "time": item["commit"]["committedDate"], 288 | "event_type": "commit_pushed", 289 | "actor": actor, 290 | "target_user": None, 291 | "is_bot": is_bot_actor(actor), 292 | } 293 | ) 294 | 295 | elif item["__typename"] == "ReviewRequestedEvent": 296 | actor = item["actor"]["login"] if item["actor"] else None 297 | target_user = item["requestedReviewer"]["login"] if item["requestedReviewer"] else None 298 | events.append( 299 | { 300 | **base_event, 301 | "time": item["createdAt"], 302 | "event_type": "review_requested", 303 | "actor": actor, 304 | "target_user": target_user, 305 | "is_bot": is_bot_actor(actor), 306 | } 307 | ) 308 | 309 | elif item["__typename"] == "PullRequestReview": 310 | actor = item["author"]["login"] if item["author"] else None 311 | events.append( 312 | { 313 | **base_event, 314 | "time": item["createdAt"], 315 | "event_type": f"review_{item['state'].lower()}", 316 | "actor": actor, 317 | "target_user": None, 318 | "is_bot": is_bot_actor(actor), 319 | } 320 | ) 321 | 322 | elif item["__typename"] == "MergedEvent": 323 | actor = item["actor"]["login"] if item["actor"] else None 324 | events.append( 325 | { 326 | **base_event, 327 | "time": item["createdAt"], 328 | "event_type": "pr_merged", 329 | "actor": actor, 330 | "target_user": None, 331 | "is_bot": is_bot_actor(actor), 332 | } 333 | ) 334 | 335 | elif item["__typename"] == "IssueComment": 336 | actor = item["author"]["login"] if item["author"] else None 337 | events.append( 338 | { 339 | **base_event, 340 | "time": item["createdAt"], 341 | "event_type": "comment_added", 342 | "actor": actor, 343 | "target_user": None, 344 | "is_bot": is_bot_actor(actor), 345 | } 346 | ) 347 | 348 | logger.info(f"Processed {len(events)} total events for PR #{pr_data['number']}") 349 | return events 350 | 351 | 352 | PULL_REQUESTS_PER_PAGE = 100 353 | 354 | 355 | def github_graphql_get_merged_pull_requests(owner: str, name: str) -> List[Dict]: 356 | """Get merged pull requests using GraphQL API""" 357 | query = ( 358 | """ 359 | query($owner: String!, $name: String!) { 360 | repository(owner: $owner, name: $name) { 361 | pullRequests(first: %d, states: [MERGED], orderBy: {field: UPDATED_AT, direction: DESC}) { 362 | nodes { 363 | number 364 | title 365 | url 366 | createdAt 367 | mergedAt 368 | changedFiles 369 | additions 370 | deletions 371 | author { 372 | login 373 | } 374 | } 375 | } 376 | } 377 | } 378 | """ 379 | % PULL_REQUESTS_PER_PAGE 380 | ) 381 | 382 | try: 383 | result = github_graphql_request(query, {"owner": owner, "name": name}) 384 | if result.get("data") and result["data"].get("repository"): 385 | return result["data"]["repository"]["pullRequests"]["nodes"] 386 | return [] 387 | except Exception as e: 388 | logger.error(f"Error fetching pull requests: {str(e)}") 389 | raise 390 | 391 | 392 | def github_graphql_get_pull_request_events(owner: str, name: str, pr_number: int) -> Dict[str, Any]: 393 | """Get PR timeline data using GraphQL API""" 394 | query = """ 395 | query($owner: String!, $name: String!, $pr_number: Int!) { 396 | repository(owner: $owner, name: $name) { 397 | pullRequest(number: $pr_number) { 398 | number 399 | title 400 | url 401 | createdAt 402 | mergedAt 403 | changedFiles 404 | additions 405 | deletions 406 | headRefName 407 | baseRefName 408 | isDraft 409 | author { 410 | login 411 | } 412 | authorAssociation 413 | timelineItems(first: 100) { 414 | pageInfo { 415 | hasNextPage 416 | endCursor 417 | } 418 | nodes { 419 | __typename 420 | ... on PullRequestCommit { 421 | commit { 422 | committedDate 423 | author { 424 | user { 425 | login 426 | } 427 | } 428 | } 429 | } 430 | ... on ReviewRequestedEvent { 431 | createdAt 432 | actor { 433 | login 434 | } 435 | requestedReviewer { 436 | ... on User { 437 | login 438 | } 439 | } 440 | } 441 | ... on PullRequestReview { 442 | createdAt 443 | author { 444 | login 445 | } 446 | state 447 | } 448 | ... on MergedEvent { 449 | createdAt 450 | actor { 451 | login 452 | } 453 | } 454 | ... on IssueComment { 455 | createdAt 456 | author { 457 | login 458 | } 459 | } 460 | } 461 | } 462 | } 463 | } 464 | } 465 | """ 466 | 467 | try: 468 | result = github_graphql_request(query, {"owner": owner, "name": name, "pr_number": pr_number}) 469 | if result.get("data") and result["data"].get("repository"): 470 | return result["data"]["repository"]["pullRequest"] 471 | return None 472 | except Exception as e: 473 | logger.error(f"Error fetching PR timeline: {str(e)}") 474 | raise 475 | 476 | 477 | def github_graphql_get_repository(owner: str, name: str) -> Dict: 478 | """Get repository data using GraphQL""" 479 | query = """ 480 | query($owner: String!, $name: String!) { 481 | repository(owner: $owner, name: $name) { 482 | name 483 | description 484 | url 485 | owner { 486 | avatarUrl 487 | ... on Organization { 488 | avatarUrl 489 | } 490 | } 491 | } 492 | } 493 | """ 494 | 495 | try: 496 | result = github_graphql_request(query, {"owner": owner, "name": name}) 497 | 498 | if result.get("data") and result["data"].get("repository"): 499 | repo = result["data"]["repository"] 500 | # Return a flat dictionary with string values 501 | return { 502 | "url": f"https://github.com/{owner}/{name}", 503 | "name": repo["name"], 504 | "organization": owner, 505 | "description": repo["description"], 506 | "logo_url": repo["owner"]["avatarUrl"], 507 | "category": "Newly Added", 508 | "repository_slug": f"{owner}/{name}", 509 | "status": "updating", 510 | } 511 | return None 512 | except Exception as e: 513 | logger.error(f"Error fetching repository data: {str(e)}") 514 | raise 515 | -------------------------------------------------------------------------------- /src/collab_dev/fetcher/store.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import logging 3 | import os 4 | import sys 5 | from typing import Dict, List 6 | 7 | # Configure logging 8 | logger = logging.getLogger(__name__) 9 | 10 | # Get root directory - set to './data' by default 11 | DATA_DIR = "./data" 12 | 13 | # Check if data directory exists 14 | if not os.path.exists(DATA_DIR): 15 | logger.error(f"Data directory {DATA_DIR} does not exist. Please create it first.") 16 | sys.exit(1) 17 | 18 | 19 | def ensure_directory(path: str) -> str: 20 | """Ensure the directory exists, creating it if necessary.""" 21 | os.makedirs(path, exist_ok=True) 22 | return path 23 | 24 | 25 | def get_repo_dir(owner: str, name: str) -> str: 26 | """Get the repository directory path.""" 27 | return ensure_directory(os.path.join(DATA_DIR, owner, name)) 28 | 29 | 30 | def get_pr_dir(owner: str, name: str, pr_number: int) -> str: 31 | """Get the pull request directory path.""" 32 | repo_dir = get_repo_dir(owner, name) 33 | return ensure_directory(os.path.join(repo_dir, f"pr_{pr_number}")) 34 | 35 | 36 | def write_csv(filepath: str, data: List[Dict], headers: List[str]) -> None: 37 | """Write data to a CSV file.""" 38 | mode = "w" 39 | file_exists = os.path.exists(filepath) 40 | 41 | with open(filepath, mode, newline="", encoding="utf-8") as csvfile: 42 | writer = csv.DictWriter(csvfile, fieldnames=headers) 43 | if not file_exists or mode == "w": 44 | writer.writeheader() 45 | 46 | for row in data: 47 | # Filter the row to only include fields in headers 48 | filtered_row = {k: v for k, v in row.items() if k in headers} 49 | writer.writerow(filtered_row) 50 | 51 | logger.info(f"Data written to {filepath}") 52 | 53 | 54 | def save_repository_info(owner: str, name: str, repo_data: Dict, category=None) -> Dict: 55 | """Save repository information to CSV.""" 56 | repo_dir = get_repo_dir(owner, name) 57 | 58 | # Write repository info to CSV 59 | write_csv( 60 | os.path.join(repo_dir, "repository.csv"), 61 | [repo_data], 62 | list(repo_data.keys()), 63 | ) 64 | 65 | return { 66 | "status": "success", 67 | "repository_dir": repo_dir, 68 | "repository": repo_data, 69 | } 70 | 71 | 72 | def save_pull_requests(owner: str, name: str, pull_requests_data: List[Dict]) -> Dict: 73 | """Save pull requests to CSV.""" 74 | repo_dir = get_repo_dir(owner, name) 75 | 76 | write_csv( 77 | os.path.join(repo_dir, "pull_requests.csv"), 78 | pull_requests_data, 79 | list(pull_requests_data[0].keys()), 80 | ) 81 | 82 | return { 83 | "status": "success", 84 | "prs_processed": len(pull_requests_data), 85 | } 86 | 87 | 88 | def save_pr_events(owner: str, name: str, pr_number: int, events_data: List[Dict]) -> Dict: 89 | """Save pull request events to CSV.""" 90 | # Create directory for PR events 91 | pr_dir = get_pr_dir(owner, name, pr_number) 92 | 93 | if not events_data: 94 | logger.info(f"No timeline events found for PR #{pr_number}") 95 | return {"status": "success", "events_processed": 0} 96 | 97 | # Get PR data to extract title, URL, etc. 98 | repo_dir = get_repo_dir(owner, name) 99 | pr_csv_path = os.path.join(repo_dir, "pull_requests.csv") 100 | if os.path.exists(pr_csv_path): 101 | with open(pr_csv_path, "r", newline="", encoding="utf-8") as csvfile: 102 | reader = csv.DictReader(csvfile) 103 | for pr in reader: 104 | if int(pr["pr_number"]) == pr_number: 105 | break 106 | 107 | # Write to CSV 108 | write_csv( 109 | os.path.join(pr_dir, "events.csv"), 110 | events_data, 111 | list(events_data[0].keys()), 112 | ) 113 | 114 | return { 115 | "status": "success", 116 | "events_processed": len(events_data), 117 | } 118 | 119 | 120 | def get_pr_numbers_from_csv(owner: str, name: str) -> List[int]: 121 | """Read PR numbers from pull_requests.csv.""" 122 | repo_dir = get_repo_dir(owner, name) 123 | pr_csv_path = os.path.join(repo_dir, "pull_requests.csv") 124 | pr_numbers = [] 125 | 126 | if os.path.exists(pr_csv_path): 127 | with open(pr_csv_path, "r", newline="", encoding="utf-8") as csvfile: 128 | reader = csv.DictReader(csvfile) 129 | for pr in reader: 130 | pr_numbers.append(int(pr["pr_number"])) 131 | 132 | return pr_numbers 133 | 134 | 135 | def get_existing_prs_map(owner: str, name: str) -> Dict[int, Dict]: 136 | """ 137 | Get a dictionary of existing PRs from pull_requests.csv. 138 | 139 | Args: 140 | owner: GitHub repository owner 141 | name: GitHub repository name 142 | 143 | Returns: 144 | Dictionary mapping PR numbers to PR data 145 | """ 146 | repo_dir = get_repo_dir(owner, name) 147 | pr_csv_path = os.path.join(repo_dir, "pull_requests.csv") 148 | pr_map = {} 149 | 150 | if os.path.exists(pr_csv_path): 151 | with open(pr_csv_path, "r", newline="", encoding="utf-8") as csvfile: 152 | reader = csv.DictReader(csvfile) 153 | for pr in reader: 154 | pr_map[int(pr["pr_number"])] = pr 155 | 156 | return pr_map 157 | 158 | 159 | def has_pr_events(owner: str, name: str, pr_number: int) -> bool: 160 | """ 161 | Check if events for a specific PR have already been fetched. 162 | 163 | Args: 164 | owner: GitHub repository owner 165 | name: GitHub repository name 166 | pr_number: Pull request number 167 | 168 | Returns: 169 | True if events exist, False otherwise 170 | """ 171 | pr_dir = get_pr_dir(owner, name, pr_number) 172 | events_csv_path = os.path.join(pr_dir, "events.csv") 173 | 174 | return os.path.exists(events_csv_path) and os.path.getsize(events_csv_path) > 0 175 | 176 | 177 | def consolidate_all_events(owner: str, name: str) -> Dict: 178 | """ 179 | Consolidate all PR events into a single all_events.csv file in the repo directory. 180 | 181 | Args: 182 | owner: GitHub repository owner 183 | name: GitHub repository name 184 | 185 | Returns: 186 | Dict with status and count of events consolidated 187 | """ 188 | repo_dir = get_repo_dir(owner, name) 189 | pr_numbers = get_pr_numbers_from_csv(owner, name) 190 | 191 | all_events = [] 192 | 193 | # Collect events from each PR 194 | for pr_number in pr_numbers: 195 | pr_dir = get_pr_dir(owner, name, pr_number) 196 | events_csv_path = os.path.join(pr_dir, "events.csv") 197 | 198 | if os.path.exists(events_csv_path) and os.path.getsize(events_csv_path) > 0: 199 | with open(events_csv_path, "r", newline="", encoding="utf-8") as csvfile: 200 | reader = csv.DictReader(csvfile) 201 | all_events.extend(list(reader)) 202 | 203 | # Write consolidated events to all_events.csv 204 | if all_events: 205 | all_events_path = os.path.join(repo_dir, "all_events.csv") 206 | write_csv(all_events_path, all_events, list(all_events[0].keys())) 207 | logger.info(f"Consolidated {len(all_events)} events into {all_events_path}") 208 | 209 | return { 210 | "status": "success", 211 | "events_consolidated": len(all_events), 212 | } 213 | 214 | 215 | def get_all_repositories() -> List[str]: 216 | """ 217 | Get a list of all repositories stored in the data directory. 218 | 219 | Returns: 220 | List of repositories in the format "owner/name" 221 | """ 222 | repositories = [] 223 | 224 | # Check if data directory exists 225 | if not os.path.exists(DATA_DIR): 226 | logger.warning(f"Data directory {DATA_DIR} does not exist.") 227 | return repositories 228 | 229 | # Walk through the data directory structure 230 | for owner in os.listdir(DATA_DIR): 231 | owner_path = os.path.join(DATA_DIR, owner) 232 | if os.path.isdir(owner_path): 233 | for repo in os.listdir(owner_path): 234 | repo_path = os.path.join(owner_path, repo) 235 | # Check if it's a directory and contains repository.csv 236 | if os.path.isdir(repo_path) and os.path.exists(os.path.join(repo_path, "repository.csv")): 237 | repositories.append(f"{owner}/{repo}") 238 | 239 | return repositories 240 | -------------------------------------------------------------------------------- /src/collab_dev/loader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pullflow/collab-dev/adcaa2efb3418c1a8aebb2ad98bf46b3a99aa9b2/src/collab_dev/loader/__init__.py -------------------------------------------------------------------------------- /src/collab_dev/loader/load.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import pandas as pd 5 | 6 | 7 | def load(org: str, repo: str) -> pd.DataFrame: 8 | """ 9 | Load all events data for a given org/repo into a pandas dataframe 10 | 11 | Args: 12 | org: GitHub organization name 13 | repo: GitHub repository name 14 | 15 | Returns: 16 | DataFrame containing all events data 17 | """ 18 | data_path = f"./data/{org}/{repo}/all_events.csv" 19 | 20 | if not os.path.exists(data_path): 21 | logging.warning(f"Data file not found: {data_path}") 22 | return pd.DataFrame() 23 | 24 | try: 25 | # Specify data types, particularly for the time column 26 | df = pd.read_csv( 27 | data_path, 28 | parse_dates=["time"], # Parse the time column as datetime 29 | dtype={ 30 | "pr_number": int, 31 | "event_type": str, 32 | "actor": str, 33 | "is_bot": bool, 34 | "is_core_team": bool, 35 | }, 36 | ) 37 | 38 | # Log the shape 39 | logging.info(f"Loaded dataframe with shape: {df.shape}") 40 | 41 | return df 42 | except Exception as e: 43 | logging.error(f"Error reading file {data_path}: {e}") 44 | return pd.DataFrame() 45 | -------------------------------------------------------------------------------- /src/collab_dev/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 🍩 collab.dev report 8 | 9 | 10 | 11 | 12 | 13 |
14 |

15 | 🍩 collab.dev report 16 |

17 | {% block content %} 18 |
19 |

Repository Reports

20 | {% if repositories %} 21 |
    22 | {% for repo in repositories %} 23 |
  • 24 | {{ repo }} 25 |
  • 26 | {% endfor %} 27 |
28 | {% else %} 29 |

No repositories found.

30 | {% endif %} 31 |
32 |
33 |

Run the following command from the root of the project to collect repository data: 34 |

35 |
pdm collect -n {number of PRs} {owner}/{repo}
36 |

37 | Note: You must include the GITHUB_TOKEN in your environment 38 | variables before running the command. 39 |

40 |

41 | For example, to collect 100 PRs for the collab_dev repository, 42 | run: 43 |

44 |
GITHUB_TOKEN=your_token pdm collect -n 100 facebook/react
45 |
46 | {% endblock %} 47 |
48 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /src/collab_dev/templates/repository.html: -------------------------------------------------------------------------------- 1 | {% extends "templates/index.html" %} 2 | 3 | {% block content %} 4 |
5 |

{{ repo }}

6 | Back to repositories 7 |
8 | {% for chart in charts %} 9 |
10 | {{ chart|safe }} 11 |
12 | {% endfor %} 13 | {% endblock %} -------------------------------------------------------------------------------- /src/collab_dev/theme.py: -------------------------------------------------------------------------------- 1 | """ 2 | Theme configuration module providing consistent color palettes and styling utilities. 3 | """ 4 | 5 | # Chart dimensions 6 | CHART_DIMENSIONS = { 7 | "pie_chart_height": 400, # Standard height for pie/donut charts 8 | "bar_chart_height": 300, # Standard height for bar charts 9 | "funnel_chart_height": 300, # Standard height for funnel charts 10 | } 11 | 12 | # Primary brand colors 13 | BRAND = { 14 | "primary": "#795DBD", # Slate blue - Main brand color 15 | "secondary": "#A592D3", # African Violet - Secondary brand color 16 | "accent": "#FF958C", # Coral pink - Accent color 17 | "highlight": "#ACE4AA", # Celadon - Highlight color 18 | "dark": "#6D1A36", # Claret - Dark accent 19 | } 20 | 21 | # Theme variations 22 | THEMES = { 23 | "default": { 24 | "primary_series": [ 25 | "#795DBD", # Slate blue 26 | "#A592D3", # African Violet 27 | "#FF958C", # Coral pink 28 | "#ACE4AA", # Celadon 29 | "#6D1A36", # Claret 30 | ], 31 | } 32 | } 33 | 34 | # Extended color palette for data visualizations 35 | VISUALIZATION = { 36 | # Main colors for primary data series - will be set by active theme 37 | "primary_series": THEMES["default"]["primary_series"], 38 | # Colors for secondary or supporting data 39 | "secondary_series": [ 40 | "#B3A1E0", # Lighter slate blue 41 | "#C4B6E3", # Lighter african violet 42 | "#FFB3AC", # Lighter coral pink 43 | "#C4ECC2", # Lighter celadon 44 | "#8F3854", # Lighter claret 45 | ], 46 | # Monochromatic scale of the primary color (Slate blue) 47 | "mono_scale": [ 48 | "#795DBD", # 100% 49 | "#8E76C7", # 80% 50 | "#A38FD1", # 60% 51 | "#B8A8DB", # 40% 52 | "#CDC1E5", # 20% 53 | ], 54 | # Diverging color scale for comparisons 55 | "diverging": [ 56 | "#FF958C", # negative (coral pink) 57 | "#FFB3AC", # slightly negative 58 | "#F5F5F5", # neutral 59 | "#ACE4AA", # slightly positive (celadon) 60 | "#8BC887", # positive (darker celadon) 61 | ], 62 | } 63 | 64 | # Semantic colors for status and feedback 65 | SEMANTIC = { 66 | "success": "#ACE4AA", # Celadon 67 | "warning": "#FFB3AC", # Light coral pink 68 | "error": "#FF958C", # Coral pink 69 | "info": "#A592D3", # African Violet 70 | } 71 | 72 | # Background and surface colors 73 | BACKGROUND = { 74 | "primary": "#FFFFFF", 75 | "secondary": "#F8F9FA", 76 | "tertiary": "#F1F3F5", 77 | "dark": "#6D1A36", # Claret for dark mode or accents 78 | } 79 | 80 | # Text colors 81 | TEXT = { 82 | "primary": "#212529", 83 | "secondary": "#6C757D", 84 | "muted": "#ADB5BD", 85 | "on_dark": "#F8F9FA", # For text on dark backgrounds 86 | } 87 | 88 | 89 | def get_chart_colors(num_colors: int, palette: str = "primary") -> list: 90 | """ 91 | Get a list of colors for charts and visualizations. 92 | 93 | Args: 94 | num_colors (int): Number of colors needed 95 | palette (str): Which palette to use ('primary', 'secondary', 'mono', 'diverging') 96 | 97 | Returns: 98 | list: List of color hex codes 99 | """ 100 | if palette == "primary": 101 | # Extended primary colors with darker celadon for better contrast 102 | colors = [ 103 | "#795DBD", # Slate blue 104 | "#A592D3", # African Violet 105 | "#FF958C", # Coral pink 106 | "#ACE4AA", # Celadon 107 | "#6D1A36", # Claret 108 | "#8BC887", # Darker celadon 109 | "#FFB3AC", # Light coral pink 110 | ] 111 | elif palette == "secondary": 112 | colors = VISUALIZATION["secondary_series"] 113 | elif palette == "mono": 114 | colors = VISUALIZATION["mono_scale"] 115 | elif palette == "diverging": 116 | colors = VISUALIZATION["diverging"] 117 | else: 118 | colors = VISUALIZATION["primary_series"] 119 | 120 | # If we need more colors than available, cycle through the palette 121 | result = [] 122 | while len(result) < num_colors: 123 | result.extend(colors) 124 | return result[:num_colors] 125 | 126 | 127 | def get_plotly_template() -> dict: 128 | """ 129 | Get a consistent Plotly chart template using the theme colors. 130 | 131 | Returns: 132 | dict: Plotly layout template 133 | """ 134 | return { 135 | "layout": { 136 | "paper_bgcolor": BACKGROUND["primary"], 137 | "plot_bgcolor": BACKGROUND["primary"], 138 | "margin": {"l": 50, "r": 50, "t": 35, "b": 30, "pad": 4}, 139 | "font": {"color": TEXT["primary"], "family": "sans-serif"}, 140 | "title": {"font": {"color": TEXT["primary"], "size": 20}}, 141 | "legend": {"font": {"color": TEXT["secondary"]}}, 142 | "xaxis": { 143 | "gridcolor": BACKGROUND["tertiary"], 144 | "linecolor": TEXT["muted"], 145 | "title": {"font": {"color": TEXT["secondary"]}}, 146 | "tickfont": {"color": TEXT["secondary"]}, 147 | }, 148 | "yaxis": { 149 | "gridcolor": BACKGROUND["tertiary"], 150 | "linecolor": TEXT["muted"], 151 | "title": {"font": {"color": TEXT["secondary"]}}, 152 | "tickfont": {"color": TEXT["secondary"]}, 153 | }, 154 | } 155 | } 156 | 157 | 158 | def get_streamlit_theme() -> dict: 159 | """ 160 | Get theme configuration for Streamlit's config.toml 161 | 162 | Returns: 163 | dict: Streamlit theme configuration 164 | """ 165 | return { 166 | "primaryColor": BRAND["primary"], 167 | "backgroundColor": BACKGROUND["primary"], 168 | "secondaryBackgroundColor": BACKGROUND["secondary"], 169 | "textColor": TEXT["primary"], 170 | "font": "sans serif", 171 | } 172 | 173 | 174 | def get_template_data() -> dict: 175 | """ 176 | Get consistent theme data for template rendering. 177 | 178 | Returns: 179 | dict: Theme configuration for templates 180 | """ 181 | return { 182 | "theme": { 183 | "brand": BRAND, 184 | "colors": VISUALIZATION["primary_series"], 185 | "background": BACKGROUND, 186 | "text": TEXT, 187 | "semantic": SEMANTIC, 188 | } 189 | } 190 | 191 | 192 | def set_theme(theme_name: str = "default") -> None: 193 | """ 194 | Set the active theme for visualizations. 195 | 196 | Args: 197 | theme_name (str): Name of the theme to use (currently only 'default' is supported) 198 | 199 | Raises: 200 | ValueError: If the specified theme name is not found 201 | """ 202 | if theme_name not in THEMES: 203 | raise ValueError(f"Theme '{theme_name}' not found. Available themes: {list(THEMES.keys())}") 204 | 205 | # Update visualization colors based on theme 206 | VISUALIZATION["primary_series"] = THEMES[theme_name]["primary_series"] 207 | --------------------------------------------------------------------------------