├── .editorconfig ├── .gitattributes ├── .dockerignore ├── readme-assets ├── architecture.png ├── dashboard-example.png ├── entity-relationship-model.png ├── architecture.puml └── entity-relationship-model.puml ├── metabase-data └── metabase.db │ └── metabase.db.mv.db ├── Dockerfile ├── requirements.txt ├── client_impl ├── __init__.py ├── github_client.py └── gitlab_client.py ├── LICENSE ├── main.py ├── docker-compose.yaml ├── Dockerfile-metabase ├── .env.example ├── .gitignore ├── abstractions.py ├── database_models.py ├── conf.py ├── README.md └── renovate_parser.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | 3 | *.bat eol=crlf 4 | *.ps1 eol=crlf 5 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .* 2 | Dockerfile 3 | docker-compose.yaml 4 | venv 5 | metabase-data 6 | experiment* 7 | **/*.md 8 | -------------------------------------------------------------------------------- /readme-assets/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MShekow/renovate-pr-visualization/HEAD/readme-assets/architecture.png -------------------------------------------------------------------------------- /readme-assets/dashboard-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MShekow/renovate-pr-visualization/HEAD/readme-assets/dashboard-example.png -------------------------------------------------------------------------------- /metabase-data/metabase.db/metabase.db.mv.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MShekow/renovate-pr-visualization/HEAD/metabase-data/metabase.db/metabase.db.mv.db -------------------------------------------------------------------------------- /readme-assets/entity-relationship-model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MShekow/renovate-pr-visualization/HEAD/readme-assets/entity-relationship-model.png -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim 2 | 3 | WORKDIR /app 4 | COPY requirements.txt ./ 5 | RUN pip install --no-cache-dir -r requirements.txt 6 | COPY . . 7 | 8 | CMD ["python", "main.py"] 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | SQLAlchemy==2.0.30 2 | sqlalchemy-utils==0.41.2 3 | PyGithub==2.3.0 4 | python-gitlab==4.6.0 5 | psycopg2-binary==2.9.9 6 | marko==2.0.3 7 | packaging==24.0 8 | tqdm==4.66.4 9 | -------------------------------------------------------------------------------- /client_impl/__init__.py: -------------------------------------------------------------------------------- 1 | from enum import StrEnum 2 | from typing import Optional 3 | 4 | from abstractions import ScmClient 5 | from client_impl.github_client import GitHubClient 6 | from client_impl.gitlab_client import GitLabClient 7 | 8 | 9 | class ScmClientImpl(StrEnum): 10 | GitHub = "github" 11 | GitLab = "gitlab" 12 | 13 | 14 | def scm_client_factory(scm_provider: ScmClientImpl, pat: str, api_base_url: Optional[str] = None) -> ScmClient: 15 | if scm_provider == ScmClientImpl.GitHub: 16 | return GitHubClient(pat, api_base_url) 17 | elif scm_provider == ScmClientImpl.GitLab: 18 | return GitLabClient(pat, api_base_url) 19 | else: 20 | raise ValueError(f"Unknown SCM provider: {scm_provider}") 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023-2024 Marius Shekow, and other contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script fills the provided PostgreSQL database with information about Renovate PRs and repository onboarding 3 | statuses. 4 | """ 5 | 6 | from conf import load_and_verify_configuration 7 | from renovate_parser import get_renovate_prs, get_database_entities, get_repository_onboarding_status, \ 8 | save_database_entities 9 | 10 | if __name__ == '__main__': 11 | configuration = load_and_verify_configuration() 12 | print("Configuration check successful, starting to fetch Renovate PRs (this may take a few minutes) ...") 13 | renovate_prs = get_renovate_prs(configuration) 14 | print(f"Found {len(renovate_prs.dependency_prs)} Renovate PRs, converting them to them to database entities ...") 15 | database_dependency_prs = get_database_entities(renovate_prs.dependency_prs, configuration) 16 | print(f"Fetching repository onboarding statuses for {len(configuration.repos)} repos " 17 | f"(this may take several minutes) ...") 18 | database_onboarding_statuses = get_repository_onboarding_status(renovate_prs.onboarding_prs, configuration) 19 | print("Storing database entities in the database (deleting old entries) ...") 20 | save_database_entities(database_dependency_prs, database_onboarding_statuses, configuration) 21 | print("Finished importing data successfully") 22 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | # This docker-compose file defines a PostgreSQL server and a metabase service 2 | services: 3 | metabase: 4 | image: metabase/metabase:v0.48.0 5 | restart: always 6 | ports: 7 | - 3000:3000 8 | environment: 9 | MB_DB_FILE: "/metabase-data/metabase.db" 10 | volumes: 11 | - ./metabase-data:/metabase-data 12 | networks: 13 | - metabase 14 | depends_on: 15 | postgres: 16 | condition: service_healthy 17 | 18 | datascraper: 19 | image: renovate-datascraper:latest 20 | build: 21 | context: . 22 | container_name: datascraper 23 | restart: no 24 | env_file: .env 25 | environment: 26 | PYTHONUNBUFFERED: "1" 27 | networks: 28 | - metabase 29 | depends_on: 30 | postgres: 31 | condition: service_healthy 32 | 33 | postgres: 34 | image: postgres:14.5 35 | restart: always 36 | environment: 37 | POSTGRES_PASSWORD: password 38 | POSTGRES_USER: metabase 39 | POSTGRES_DB: metabase 40 | ports: 41 | - 5432:5432 42 | volumes: 43 | - postgres-data:/var/lib/postgresql/data 44 | networks: 45 | - metabase 46 | healthcheck: 47 | test: [ "CMD", "pg_isready", "-U", "metabase" ] 48 | interval: 5s 49 | timeout: 5s 50 | retries: 5 51 | 52 | networks: 53 | metabase: 54 | 55 | volumes: 56 | postgres-data: 57 | -------------------------------------------------------------------------------- /Dockerfile-metabase: -------------------------------------------------------------------------------- 1 | # Helper image that you can build for ARM64 to get better performance 2 | ARG metabase_repo=metabase 3 | # Feed in version number from bash script 4 | ARG metabase_version 5 | #ARG metabase_version=latest 6 | FROM metabase/${metabase_repo}:${metabase_version} as metabase 7 | 8 | FROM ubuntu:22.04 9 | 10 | ENV FC_LANG en-US LC_CTYPE en_US.UTF-8 11 | 12 | # dependencies 13 | RUN apt-get update -y && apt-get upgrade -y && apt-get install -y --no-install-recommends bash fonts-dejavu-core fonts-dejavu-extra fontconfig curl openjdk-11-jre-headless && \ 14 | mkdir -p /app/certs && \ 15 | curl https://s3.amazonaws.com/rds-downloads/rds-combined-ca-bundle.pem -o /app/certs/rds-combined-ca-bundle.pem && \ 16 | keytool -noprompt -import -trustcacerts -alias aws-rds -file /app/certs/rds-combined-ca-bundle.pem -keystore /etc/ssl/certs/java/cacerts -keypass changeit -storepass changeit && \ 17 | curl https://cacerts.digicert.com/DigiCertGlobalRootG2.crt.pem -o /app/certs/DigiCertGlobalRootG2.crt.pem && \ 18 | keytool -noprompt -import -trustcacerts -alias azure-cert -file /app/certs/DigiCertGlobalRootG2.crt.pem -keystore /etc/ssl/certs/java/cacerts -keypass changeit -storepass changeit && \ 19 | mkdir -p /plugins && chmod a+rwx /plugins && \ 20 | useradd --shell /bin/bash metabase && \ 21 | apt-get purge -y curl && \ 22 | apt-get -y autoremove && \ 23 | apt-get -y clean && \ 24 | rm -rf /var/lib/{apt,dpkg,cache,log}/ 25 | 26 | 27 | 28 | WORKDIR /app 29 | 30 | # copy app from the official image 31 | COPY --from=metabase --chown=metabase /app /app 32 | RUN chown metabase /app 33 | 34 | USER metabase 35 | # expose our default runtime port 36 | EXPOSE 3000 37 | 38 | # run it 39 | ENTRYPOINT ["/app/run_metabase.sh"] 40 | -------------------------------------------------------------------------------- /readme-assets/architecture.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | !include 3 | !include 4 | !include 5 | !include 6 | !include 7 | !include 8 | 9 | title (C4 Model) Container diagram for Renovate PR visualization tool 10 | 11 | Person(user, User, "A developer or other kind of team member interested in statistics of Renovate PRs") 12 | 13 | System_Ext(github, "GitHub (Enterprise)", "<$github>\n\nStores all Git repos, including PRs from Renovate") 14 | 15 | System_Ext(gitlab, "GitLab", "<$gitlab>\n\nStores all Git repos, including PRs from Renovate") 16 | System_Boundary(tool, "Renovate PR visualization system") { 17 | Container(metabase, "Metabase", "BI tool", "<$metabase>\n\nRenders visual dashboard that visualizes the Renovate data") 18 | Container(datascraper, "Data Scraper", "Python script", "<$python>\n\n**main.py** scrapes PRs and repository onboarding statuses caused by Renovate, parses the dependencies from the Renovate PR bodies, and transforms this data to relational entities") 19 | ContainerDb(database, "Database", "RDBMS server", "<$postgresql>\n\nStores the transformed data in tables 'dependency_update', 'pull_request' and 'repository_onboarding_status'") 20 | } 21 | 22 | Rel(datascraper, database, "Persists entities using SQLAlchemy ORM, replacing(!) all existing entities", "PostgreSQL message protocol") 23 | Rel_L(datascraper, github, "Calls GitHub APIs", "REST") 24 | Rel_L(datascraper, gitlab, "Calls GitLab APIs", "REST") 25 | github -[hidden]d- gitlab 26 | Rel(metabase, database, "Queries relational data for visualization", "PostgreSQL message protocol") 27 | Rel_D(user, metabase, "Uses", "Browser / HTTPS") 28 | Rel_D(user, datascraper, "Manually triggers (on demand) to update data", "Browser / HTTPS") 29 | 30 | @enduml 31 | -------------------------------------------------------------------------------- /readme-assets/entity-relationship-model.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | ' Based on: https://gist.github.com/QuantumGhost/0955a45383a0b6c0bc24f9654b3cb561 3 | 4 | ' uncomment the line below if you're using computer with a retina display 5 | ' skinparam dpi 300 6 | !define Table(name,desc) class name as "desc" << (T,#FFAAAA) >> 7 | ' we use bold for primary key 8 | ' green color for unique 9 | ' and underscore for not_null 10 | !define primary_key(x) x 11 | !define unique(x) x 12 | !define not_null(x) x 13 | hide methods 14 | hide stereotypes 15 | 16 | ' entities 17 | 18 | Table(dependency_update, "dependency_update") { 19 | primary_key(id) INTEGER 20 | not_null(dependency_name) VARCHAR[200] 21 | not_null(old_version) VARCHAR[50] 22 | not_null(new_version) VARCHAR[50] 23 | not_null(update_type) ENUM[digest, patch, minor, major, multiple-major, security] 24 | } 25 | 26 | Table(pull_request, "pull_request") { 27 | primary_key(id) INTEGER 28 | not_null(repo) VARCHAR[200] 29 | not_null(created_date) TIMESTAMP 30 | closed_date TIMESTAMP 31 | close_type ENUM[merge, close] 32 | not_null(number) INTEGER 33 | not_null(url) VARCHAR[200] 34 | } 35 | 36 | Table(repository_onboarding_status, "repository_onboarding_status") { 37 | primary_key(id) INTEGER 38 | not_null(repo) VARCHAR[200] 39 | not_null(sample_date) TIMESTAMP 40 | not_null(onboarded) ENUM[onboarded, in_progress, disabled] 41 | } 42 | 43 | note top of "dependency_update" 44 | **Legend:** 45 | 46 | primary_key(abc) means "primary key" 47 | not_null(abc) means "not null" 48 | abc means that the value may be null 49 | end note 50 | 51 | note left of "dependency_update" 52 | dependency_name is e.g. "vite" 53 | (or whatever the **Package** column in the table 54 | of the PR body states) 55 | end note 56 | 57 | note left of "pull_request" 58 | repo has the format "owner/repo" 59 | 60 | number is the PR number as shown in the URL 61 | 62 | url is the URL of the PR (not of the REST API, but of the HTML/web UI) 63 | end note 64 | 65 | dependency_update "1..*" --> "1" pull_request : "dependency_update.pr_id" points\nto the Pull Request from whose body the\ndependency was parsed 66 | 67 | @enduml 68 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # The Source Code Management (SCM) provider to use. Currently, only "github" and "gitlab" are supported 2 | SCM_PROVIDER=github 3 | 4 | # The PostgreSQL database into which the PRs are written, format: ":@[:]/" 5 | # The following string uses the PostgreSQL database provided in the docker-compose.yaml file 6 | DATABASE_WITH_CREDS=metabase:password@postgres/metabase 7 | 8 | # The base URL of the SCM's API to use (optional, if omitted, the default is "https://api.github.com" if SCM_PROVIDER=github) 9 | API_BASE_URL= 10 | 11 | # The personal access token to use for the API of the SCM. In case of GitHub, it needs to have the "repo" scope, 12 | # for GitLab you need at least the "read_api" scope 13 | PAT=your-PAT 14 | 15 | # Comma-separated list of repositories to fetch Renovate PRs from. 16 | # Format: "/,/,/,...", where is e.g. a GitHub organization (e.g. "myorg"), 17 | # or a GitLab group or subgroup (e.g. "my-group" or "my-group/subgroup"). You may also specify just """ WITHOUT 18 | # a repo, in which case all repositories of the specified owner are fetched. Note: if is a USER (i.e., NOT a 19 | # GitHub organization or GitLab (sub)group), then please use the form "user:" so that this tool uses the 20 | # correct API endpoint of the SCM 21 | REPOS=some-org,some-other-org/some-repo 22 | 23 | # A label that you configured in your renovate.json file to be assigned to any PR created by Renovate (optional) 24 | RENOVATE_PR_LABEL=dependencies 25 | 26 | # The label that Renovate uses to mark security PRs 27 | RENOVATE_PR_SECURITY_LABEL=security 28 | 29 | # A comma-separated list of PR labels that this tool ignores (optional). A possible use case is to ignore PRs that were 30 | # created by Renovate for a certain manager or programming language which you want to exclude. In the renovate.json 31 | # config, you would have used a line such as this: "labels": ["dependencies", "depManager:{{{manager}}}"] 32 | IGNORE_PR_LABELS=depManager:github-actions 33 | 34 | # The SCM username that Renovate uses to create PRs (optional, if omitted, Renovate PRs may have been created by ANY user) 35 | # Note that you must set at least one of RENOVATE_PR_LABEL or RENOVATE_USER so that the script can identify Renovate PRs 36 | # If you set both, the script considers only those PRs that have BOTH the specified label AND are created by the specified user 37 | RENOVATE_USER= 38 | 39 | # A regular expression that identifies the title of Renovate's Onboarding PR (optional, by default a regex is used 40 | # that accepts PR titles that start with "Configure Renovate") 41 | RENOVATE_ONBOARDING_PR_REGEX= 42 | 43 | # If set to true, those dependencies that the script detects in Renovate PRs that are of type "major" and have an 44 | # increment of MULTIPLE major versions (e.g. v4.1 -> v6.0), will be marked as "multiple-major" (instead of "major") 45 | # in the database 46 | RENOVATE_DETECT_MULTIPLE_MAJOR=true 47 | 48 | # How many weeks in the past (measured from ) should the script sample the onboarding status of the Git repositories? 49 | RENOVATE_ONBOARDING_STATUS_SAMPLING_INTERVAL_MAX_PAST_WEEKS=30 50 | 51 | # The sampling interval, specified in weeks 52 | # For instance, if today is Tuesday 2023-12-19, and the sampling interval is 2 weeks, then the script will sample the 53 | # onboarding status of the Git repositories on Monday 2023-12-18, Monday 2023-12-04, etc., until 54 | # the RENOVATE_ONBOARDING_STATUS_SAMPLING_INTERVAL_MAX_PAST_WEEKS limit is reached 55 | RENOVATE_ONBOARDING_STATUS_SAMPLING_INTERVAL_IN_WEEKS=2 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /abstractions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains abstract definitions for client classes related to fetching PRs from an SCM. 3 | """ 4 | 5 | from abc import ABC, abstractmethod 6 | from dataclasses import dataclass 7 | from datetime import datetime 8 | from typing import Optional 9 | 10 | 11 | class GitCommit(ABC): 12 | """ 13 | Represents a Git commit. 14 | """ 15 | 16 | def __init__(self, sha: str, author_or_commit_date: datetime): 17 | self._sha = sha 18 | self._author_or_commit_date = author_or_commit_date 19 | 20 | @property 21 | def sha(self) -> str: 22 | return self._sha 23 | 24 | @property 25 | def author_or_commit_date(self) -> datetime: 26 | """ 27 | Returns the author or commit date of this commit, whichever is available and comes later. 28 | """ 29 | return self._author_or_commit_date 30 | 31 | @abstractmethod 32 | def get_tree(self) -> list[str]: 33 | """ 34 | Retrieves the list of relative file names at the root-level of this commit. 35 | The implementation should cache the results, as this method may be called multiple times. 36 | """ 37 | 38 | 39 | @dataclass 40 | class PullRequest: 41 | title: str 42 | description: str 43 | labels: list[str] 44 | created_date: datetime 45 | closed_date: Optional[datetime] 46 | merged_date: Optional[datetime] 47 | repo: "GitRepository" 48 | pr_number: int 49 | url: str 50 | 51 | 52 | class NoCommitsFoundError(Exception): 53 | pass 54 | 55 | 56 | class GitRepository(ABC): 57 | """ 58 | Represents a Git repository. 59 | """ 60 | 61 | def __init__(self, owner_and_name: str): 62 | self._owner_and_name = owner_and_name 63 | 64 | @property 65 | def owner_and_name(self) -> str: 66 | return self._owner_and_name 67 | 68 | @abstractmethod 69 | def get_commits(self, since: datetime) -> list[GitCommit]: 70 | """ 71 | Retrieves the list of commits for this repository in chronological order. 72 | May raise a NoCommitsFoundError if no commits are found. 73 | """ 74 | 75 | @abstractmethod 76 | def get_pull_requests(self, pr_author_username: Optional[str] = None, 77 | renovate_pr_label: Optional[str] = None, 78 | ignore_pr_labels: Optional[list[str]] = None) -> list[PullRequest]: 79 | """ 80 | Retrieves the list of ALL pull requests (including closed ones) for this repository. 81 | If pr_author_username is provided, only PRs created by that user are returned. 82 | If renovate_pr_label is provided, only PRs with that label are returned. 83 | At least one of pr_author_username or renovate_pr_label must be provided. 84 | If ignore_pr_labels is provided, PRs with any of these labels are not returned. 85 | """ 86 | 87 | 88 | class ScmClient(ABC): 89 | """ 90 | A client for a Source Code Management (SCM) system, such as GitHub or GitLab. 91 | """ 92 | 93 | def __init__(self, pat: str, api_base_url: Optional[str] = None): 94 | self._pat = pat 95 | self._api_base_url = api_base_url 96 | 97 | @abstractmethod 98 | def get_username(self) -> str: 99 | """ 100 | Retrieves the username for the provided PAT, verifying that the PAT is valid. 101 | """ 102 | 103 | def is_group(self, owner_or_username: str) -> bool: 104 | """ 105 | Returns True if the provided owner_or_username refers to a group that contains multiple repositories (e.g. a 106 | GitLab group or a GitHub organization), False otherwise. 107 | """ 108 | 109 | @abstractmethod 110 | def get_repository(self, owner_and_name: str) -> GitRepository: 111 | """ 112 | Retrieves the repository for the provided owner_and_name. The owner_and_name must have the format "owner/name". 113 | """ 114 | 115 | @abstractmethod 116 | def get_repositories(self, owner_or_username: str) -> list[GitRepository]: 117 | """ 118 | Retrieves the list of repositories for the provided username. owner_or_username must either be an actual 119 | username (format: "user:") or a group/organization (format: "someorgname" or "groupname/subgroup"). 120 | """ 121 | -------------------------------------------------------------------------------- /database_models.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from enum import Enum 3 | from typing import List 4 | from typing import Optional 5 | 6 | import sqlalchemy.types 7 | from sqlalchemy import ForeignKey, select 8 | from sqlalchemy import String 9 | from sqlalchemy.orm import DeclarativeBase 10 | from sqlalchemy.orm import Mapped 11 | from sqlalchemy.orm import mapped_column 12 | from sqlalchemy.orm import relationship 13 | from sqlalchemy_utils import create_view 14 | 15 | 16 | class Base(DeclarativeBase): 17 | pass 18 | 19 | 20 | class PrCloseType(Enum): 21 | merge = "merge" 22 | close = "close" 23 | 24 | 25 | class OnboardingType(Enum): 26 | onboarded = "onboarded" 27 | in_progress = "in_progress" 28 | disabled = "disabled" 29 | 30 | 31 | class DependencyUpdateType(Enum): 32 | digest = "digest" 33 | patch = "patch" 34 | minor = "minor" 35 | major = "major" 36 | multiple_major = "multiple-major" 37 | security = "security" 38 | 39 | 40 | class RepositoryOnboardingStatus(Base): 41 | __tablename__ = "repository_onboarding_status" 42 | id: Mapped[int] = mapped_column(primary_key=True) 43 | repo: Mapped[str] = mapped_column(String(200)) 44 | sample_date: Mapped[datetime] 45 | onboarded: Mapped[OnboardingType] 46 | 47 | def __repr__(self) -> str: 48 | return (f"RepositoryOnboardingStatus(id={self.id!r}, repo={self.repo!r}, sample_date={self.sample_date!r}, " 49 | f"onboarded={self.onboarded!r})") 50 | 51 | 52 | class PullRequest(Base): 53 | __tablename__ = "pull_request" 54 | id: Mapped[int] = mapped_column(primary_key=True) 55 | repo: Mapped[str] = mapped_column(String(200)) 56 | created_date: Mapped[datetime] 57 | closed_date: Mapped[Optional[datetime]] 58 | close_type: Mapped[Optional[PrCloseType]] 59 | number: Mapped[int] 60 | url: Mapped[str] = mapped_column(String(200)) 61 | dependency_updates: Mapped[List["DependencyUpdate"]] = relationship( 62 | back_populates="pull_request", cascade="all, delete-orphan" 63 | ) 64 | 65 | def __repr__(self) -> str: 66 | return f"PullRequest(id={self.id!r}, repo={self.repo!r}, number={self.number!r}," \ 67 | f"created_date={self.created_date!r}, " \ 68 | f"closed_date={self.closed_date!r}, close_type={self.close_type!r}, url={self.url!r})" 69 | 70 | 71 | class DependencyUpdate(Base): 72 | __tablename__ = "dependency_update" 73 | id: Mapped[int] = mapped_column(primary_key=True) 74 | pr_id: Mapped[int] = mapped_column(ForeignKey("pull_request.id")) 75 | pull_request: Mapped["PullRequest"] = relationship(back_populates="dependency_updates") 76 | dependency_name: Mapped[str] = mapped_column(String(200)) 77 | old_version: Mapped[str] = mapped_column(String(50)) 78 | new_version: Mapped[str] = mapped_column(String(50)) 79 | # Note: the values_callable is needed to use the values of the enum, not the names 80 | update_type: Mapped[DependencyUpdateType] = mapped_column( 81 | sqlalchemy.types.Enum(DependencyUpdateType, values_callable=lambda x: [i.value for i in x])) 82 | 83 | def __repr__(self) -> str: 84 | return f"DependencyUpdate(id={self.id!r}, pr={self.pull_request.number!r}," \ 85 | f"dependency_name={self.dependency_name!r}, " \ 86 | f"old_version={self.old_version!r}, new_version={self.new_version!r}, " \ 87 | f"update_type={self.update_type!r})" 88 | 89 | 90 | # Taken from 91 | # https://github.com/kvesteri/sqlalchemy-utils/blob/db32722aeb7439778cea9473fe00cddca6d2e302/tests/test_views.py#L58 92 | class DependenciesWithPullRequestsView(Base): 93 | __table__ = create_view( 94 | name='deps_with_prs_view', 95 | selectable=select( 96 | DependencyUpdate.id, 97 | DependencyUpdate.dependency_name, 98 | DependencyUpdate.old_version, 99 | DependencyUpdate.new_version, 100 | DependencyUpdate.update_type, 101 | PullRequest.id.label('pr_id'), 102 | PullRequest.created_date, 103 | PullRequest.closed_date, 104 | PullRequest.close_type, 105 | PullRequest.repo 106 | ).select_from( 107 | DependencyUpdate.__table__.join(PullRequest, DependencyUpdate.pr_id == PullRequest.id) 108 | ), metadata=Base.metadata 109 | ) 110 | -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dataclasses import dataclass 3 | from typing import Optional 4 | 5 | from sqlalchemy import create_engine 6 | 7 | from abstractions import GitRepository 8 | from client_impl import ScmClientImpl, scm_client_factory 9 | 10 | 11 | @dataclass 12 | class Configuration: 13 | scm_client_impl: ScmClientImpl 14 | database_with_credentials: str 15 | api_base_url: Optional[str] 16 | pat: str 17 | repos: list[GitRepository] 18 | renovate_pr_label: str 19 | renovate_pr_security_label: str 20 | ignore_pr_labels: Optional[list[str]] 21 | renovate_scm_user: Optional[str] 22 | detect_multiple_major_updates: bool 23 | renovate_onboarding_pr_regex: Optional[str] 24 | renovate_onboarding_sampling_max_weeks: int 25 | renovate_onboarding_sampling_interval_weeks: int 26 | 27 | 28 | def load_and_verify_configuration() -> Configuration: 29 | """ 30 | Checks whether all required environment variables are present and checks the PostgreSQL database connection as 31 | well as the SCM API connection. 32 | 33 | See the .env.example file for the documentation of the environment variables. 34 | """ 35 | configuration = Configuration( 36 | scm_client_impl=ScmClientImpl(os.getenv("SCM_PROVIDER")), 37 | database_with_credentials=os.getenv("DATABASE_WITH_CREDS"), 38 | api_base_url=os.getenv("API_BASE_URL", None), 39 | pat=os.getenv("PAT"), 40 | repos=[], # will be filled below 41 | renovate_pr_label=os.getenv("RENOVATE_PR_LABEL"), 42 | renovate_pr_security_label=os.getenv("RENOVATE_PR_SECURITY_LABEL"), 43 | ignore_pr_labels=None, 44 | renovate_scm_user=os.getenv("RENOVATE_USER"), 45 | detect_multiple_major_updates=os.getenv("RENOVATE_DETECT_MULTIPLE_MAJOR", "false") == "true", 46 | renovate_onboarding_pr_regex=os.getenv("RENOVATE_ONBOARDING_PR_REGEX") or r"^Configure Renovate", 47 | renovate_onboarding_sampling_max_weeks=int( 48 | os.getenv("RENOVATE_ONBOARDING_STATUS_SAMPLING_INTERVAL_MAX_PAST_WEEKS")), 49 | renovate_onboarding_sampling_interval_weeks=int( 50 | os.getenv("RENOVATE_ONBOARDING_STATUS_SAMPLING_INTERVAL_IN_WEEKS")), 51 | ) 52 | 53 | if ignore_pr_labels := os.getenv("IGNORE_PR_LABELS", ""): 54 | configuration.ignore_pr_labels = ignore_pr_labels.split(",") 55 | 56 | if not configuration.database_with_credentials: 57 | raise ValueError("Environment variable DATABASE_WITH_CREDS must be set " 58 | "to ':@[:]/'") 59 | if not configuration.pat: 60 | raise ValueError("Environment variable PAT must be set to a valid personal access token") 61 | repos_and_owners = os.getenv("REPOS").split(",") 62 | if not repos_and_owners: 63 | raise ValueError("Environment variable REPOS must be set to a comma-separated list of " 64 | "repositories/owners, where each entry has the form '/' or ''") 65 | if not configuration.renovate_pr_label and not configuration.renovate_scm_user: 66 | raise ValueError("At least one of the environment variables RENOVATE_PR_LABEL or RENOVATE_USER must be set") 67 | if not configuration.renovate_pr_security_label: 68 | raise ValueError("Environment variable RENOVATE_PR_SECURITY_LABEL must be set to the label that Renovate " 69 | "uses to mark security PRs (e.g. 'security')") 70 | if (configuration.renovate_onboarding_sampling_max_weeks <= 0 71 | or configuration.renovate_onboarding_sampling_max_weeks <= 0): 72 | raise ValueError("Environment variables RENOVATE_ONBOARDING_STATUS_SAMPLING_INTERVAL_MAX_PAST_WEEKS and " 73 | "RENOVATE_ONBOARDING_STATUS_SAMPLING_INTERVAL_IN_WEEKS must be set to positive numbers") 74 | 75 | # Check the PostgreSQL configuration 76 | engine = create_engine(f"postgresql+psycopg2://{configuration.database_with_credentials}") 77 | connection = engine.connect() 78 | connection.close() 79 | 80 | # Check the API configuration 81 | scm_client = scm_client_factory(configuration.scm_client_impl, configuration.pat, configuration.api_base_url) 82 | scm_client.get_username() # only called to verify that the PAT is valid, we don't actually need the username 83 | 84 | # Verify that all specified repositories exist, and also expand the repositories of organizations or users 85 | for owner_or_repo in repos_and_owners: 86 | if scm_client.is_group(owner_or_repo): 87 | try: 88 | configuration.repos.extend(scm_client.get_repositories(owner_or_repo)) 89 | except Exception as e: 90 | raise ValueError(f"Unable to find group/organization {owner_or_repo}, aborting: {e}") 91 | else: 92 | try: 93 | configuration.repos.append(scm_client.get_repository(owner_or_repo)) 94 | except Exception as e: 95 | raise ValueError(f"Unable to find repository {owner_or_repo}, aborting: {e}") 96 | 97 | # Verify that there are no duplicates in configuration.repos (could happen if the user provides both 98 | # "some-owner" AND "some-owner/some-repo" in the environment variable REPOS) 99 | if len(configuration.repos) != len(set(configuration.repos)): 100 | raise ValueError(f"There are duplicate repositories in the configuration, " 101 | f"aborting: {configuration.repos}") 102 | 103 | return configuration 104 | -------------------------------------------------------------------------------- /client_impl/github_client.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Optional 3 | 4 | import github 5 | import github.Repository 6 | import github.AuthenticatedUser 7 | from github.Consts import DEFAULT_BASE_URL 8 | 9 | from abstractions import ScmClient, GitRepository, GitCommit, PullRequest 10 | 11 | 12 | class GitHubClient(ScmClient): 13 | 14 | def __init__(self, pat: str, api_base_url: Optional[str] = None): 15 | super().__init__(pat, api_base_url) 16 | self._github_client = github.Github(auth=github.Auth.Token(pat), base_url=api_base_url or DEFAULT_BASE_URL) 17 | self._authenticated_user: Optional[github.AuthenticatedUser.AuthenticatedUser] = None 18 | 19 | def get_username(self) -> str: 20 | if not self._authenticated_user: 21 | self._authenticated_user = self._github_client.get_user() 22 | 23 | return self._authenticated_user.login 24 | 25 | def is_group(self, owner_or_username: str) -> bool: 26 | if owner_or_username.startswith("user:"): 27 | return True 28 | 29 | return '/' not in owner_or_username 30 | 31 | def get_repository(self, owner_and_name: str) -> GitRepository: 32 | gh_repo = self._github_client.get_repo(owner_and_name) 33 | return GitHubRepository(owner_and_name, self._github_client, gh_repo) 34 | 35 | def get_repositories(self, owner_or_username: str) -> list[GitRepository]: 36 | if owner_or_username.startswith("user:"): 37 | owner_or_username = owner_or_username[5:] 38 | if self.get_username().lower() == owner_or_username.lower(): 39 | sdk_function = self._authenticated_user.get_repos 40 | kwargs = {"type": "owner"} 41 | else: 42 | sdk_function = self._github_client.get_user(owner_or_username).get_repos 43 | kwargs = {} 44 | else: 45 | sdk_function = self._github_client.get_organization(owner_or_username).get_repos 46 | kwargs = {} 47 | 48 | repos: list[GitRepository] = [] 49 | 50 | for repo in sdk_function(**kwargs): 51 | repos.append(GitHubRepository(repo.full_name, self._github_client, repo)) 52 | 53 | return repos 54 | 55 | 56 | class GitHubCommit(GitCommit): 57 | 58 | def __init__(self, sha: str, author_or_commit_date: datetime, gh_repo: github.Repository.Repository): 59 | super().__init__(sha, author_or_commit_date) 60 | self._gh_repo = gh_repo 61 | self._cached_tree: Optional[list[str]] = None 62 | 63 | def get_tree(self) -> list[str]: 64 | if self._cached_tree is not None: 65 | return self._cached_tree 66 | 67 | self._cached_tree = [tree_entry.path for tree_entry in self._gh_repo.get_git_tree(self.sha).tree] 68 | return self._cached_tree 69 | 70 | 71 | class GitHubRepository(GitRepository): 72 | def __init__(self, owner_and_name: str, github_client: github.Github, gh_repo: github.Repository.Repository): 73 | super().__init__(owner_and_name) 74 | self._gh_repo = gh_repo 75 | self._github_client = github_client 76 | 77 | def get_commits(self, since: datetime) -> list[GitCommit]: 78 | gh_commits = [commit for commit in 79 | self._gh_repo.get_commits(sha=self._gh_repo.default_branch, since=since)] 80 | gh_commits.reverse() 81 | commits: list[GitHubCommit] = [] 82 | for commit in gh_commits: 83 | author_or_commit_date = commit.commit.author.date 84 | if commit.commit.committer: 85 | author_or_commit_date = commit.commit.committer.date 86 | commits.append( 87 | GitHubCommit(sha=commit.sha, author_or_commit_date=author_or_commit_date, gh_repo=self._gh_repo)) 88 | 89 | return commits 90 | 91 | def get_pull_requests(self, pr_author_username: Optional[str] = None, 92 | renovate_pr_label: Optional[str] = None, 93 | ignore_pr_labels: Optional[list[str]] = None) -> list[PullRequest]: 94 | prs: list[PullRequest] = [] 95 | 96 | # Note: the pulls API does not seem to be affected by GitHub rate limiting. While we could use the more 97 | # efficient issues search API, it takes approximately as long as using the pulls API, because the issues 98 | # search API is rate-limited. See Git commit df4f89062cfe7af5715beebcf20a4029836dc6c3 for the variant using 99 | # the search API. 100 | 101 | for pr in self._gh_repo.get_pulls(state="all"): 102 | if pr_author_username is None or pr.user.login == pr_author_username: 103 | if renovate_pr_label is None or any(label.name == renovate_pr_label for label in pr.labels): 104 | if ignore_pr_labels is None or not any(label.name in ignore_pr_labels for label in pr.labels): 105 | prs.append( 106 | PullRequest(title=pr.title, 107 | description=pr.body, 108 | labels=[label.name for label in pr.labels], 109 | created_date=pr.created_at, 110 | closed_date=pr.closed_at, 111 | merged_date=pr.merged_at, 112 | repo=self, 113 | pr_number=pr.number, 114 | url=pr.html_url) 115 | ) 116 | 117 | return prs 118 | -------------------------------------------------------------------------------- /client_impl/gitlab_client.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Optional, List 3 | 4 | import gitlab 5 | import gitlab.v4.objects.projects 6 | from gitlab.v4.objects import CurrentUser, ProjectCommit 7 | 8 | from abstractions import ScmClient, GitRepository, GitCommit, PullRequest 9 | 10 | 11 | class GitLabClient(ScmClient): 12 | 13 | def __init__(self, pat: str, api_base_url: Optional[str] = None): 14 | super().__init__(pat, api_base_url) 15 | self._gitlab_client = gitlab.Gitlab(api_base_url, private_token=pat) if api_base_url else gitlab.Gitlab( 16 | private_token=pat) 17 | self._authenticated_user: Optional[CurrentUser] = None 18 | 19 | def get_username(self) -> str: 20 | self._gitlab_client.auth() 21 | if not self._authenticated_user: 22 | self._authenticated_user = self._gitlab_client.user 23 | 24 | return self._authenticated_user.username 25 | 26 | def is_group(self, owner_or_username: str) -> bool: 27 | if owner_or_username.startswith("user:"): 28 | return True 29 | 30 | try: 31 | self._gitlab_client.groups.get(owner_or_username) 32 | return True 33 | except gitlab.exceptions.GitlabGetError as e: 34 | if e.response_code == 404: 35 | return False 36 | raise e 37 | 38 | def get_repository(self, owner_and_name: str) -> GitRepository: 39 | project = self._gitlab_client.projects.get(owner_and_name) 40 | return GitLabRepository(owner_and_name, self._gitlab_client, project) 41 | 42 | def get_repositories(self, owner_or_username: str) -> List[GitRepository]: 43 | if owner_or_username.startswith("user:"): 44 | owner_or_username = owner_or_username[5:] 45 | if self.get_username().lower() == owner_or_username.lower(): 46 | projects = self._gitlab_client.projects.list(owned=True, get_all=True) 47 | else: 48 | user = self._gitlab_client.users.list(username=owner_or_username, get_all=True)[0] 49 | projects = user.projects.list(owned=True, get_all=True) 50 | else: 51 | group = self._gitlab_client.groups.get(owner_or_username) 52 | projects = self._get_projects_from_groups_recursive(group) 53 | 54 | repos: List[GitRepository] = [GitLabRepository(project.id, self._gitlab_client, project) for project in 55 | projects] 56 | 57 | return repos 58 | 59 | def _get_projects_from_groups_recursive(self, group: gitlab.v4.objects.Group) -> List[gitlab.v4.objects.Project]: 60 | # Convert subproject objects (stored in group.projects) to "full" GitLab Project objects, which also have 61 | # the mergerequests attribute 62 | subprojects = group.projects.list(get_all=True) 63 | projects: List[gitlab.v4.objects.Project] = [] 64 | for sub_project in subprojects: 65 | projects.append(self._gitlab_client.projects.get(sub_project.id)) 66 | 67 | # Recursively get projects from subgroups 68 | subgroups: List[gitlab.v4.objects.GroupSubgroup] = group.subgroups.list(get_all=True) 69 | for subgroup in subgroups: 70 | full_subgroup = self._gitlab_client.groups.get(subgroup.id) 71 | projects.extend(self._get_projects_from_groups_recursive(full_subgroup)) 72 | 73 | return projects 74 | 75 | 76 | class GitLabCommit(GitCommit): 77 | 78 | def __init__(self, sha: str, author_or_commit_date: datetime, gl_project: gitlab.v4.objects.Project): 79 | super().__init__(sha, author_or_commit_date) 80 | self._gl_project = gl_project 81 | self._cached_tree: Optional[List[str]] = None 82 | 83 | def get_tree(self) -> List[str]: 84 | if self._cached_tree is not None: 85 | return self._cached_tree 86 | 87 | tree = self._gl_project.repository_tree(ref=self.sha, get_all=True) 88 | self._cached_tree = [entry['path'] for entry in tree] 89 | return self._cached_tree 90 | 91 | 92 | class GitLabRepository(GitRepository): 93 | def __init__(self, owner_and_name: str, gitlab_client: gitlab.Gitlab, gl_project: gitlab.v4.objects.Project): 94 | super().__init__(owner_and_name) 95 | self._gl_project = gl_project 96 | self._gitlab_client = gitlab_client 97 | 98 | def get_commits(self, since: datetime) -> List[GitCommit]: 99 | gl_commits: List[ProjectCommit] = self._gl_project.commits.list(since=since.isoformat(), get_all=True) 100 | gl_commits.reverse() 101 | commits: List[GitLabCommit] = [ 102 | GitLabCommit(sha=commit.attributes['id'], 103 | author_or_commit_date=datetime.fromisoformat(commit.attributes['committed_date']), 104 | gl_project=self._gl_project) 105 | for commit in gl_commits 106 | ] 107 | return commits 108 | 109 | def get_pull_requests(self, pr_author_username: Optional[str] = None, 110 | renovate_pr_label: Optional[str] = None, 111 | ignore_pr_labels: Optional[list[str]] = None) -> List[PullRequest]: 112 | prs: List[PullRequest] = [] 113 | mr_params = {'state': 'all', 'get_all': True} 114 | if pr_author_username: 115 | mr_params['author_username'] = pr_author_username 116 | 117 | for mr in self._gl_project.mergerequests.list(**mr_params): 118 | if renovate_pr_label is None or renovate_pr_label in [label for label in mr.labels]: 119 | if ignore_pr_labels is None or not any(label in ignore_pr_labels for label in mr.labels): 120 | prs.append( 121 | PullRequest( 122 | title=mr.title, 123 | description=mr.description, 124 | labels=mr.labels, 125 | created_date=datetime.fromisoformat(mr.created_at), 126 | closed_date=datetime.fromisoformat(mr.closed_at) if mr.closed_at else None, 127 | merged_date=datetime.fromisoformat(mr.merged_at) if mr.merged_at else None, 128 | repo=self, 129 | pr_number=mr.iid, 130 | url=mr.web_url 131 | ) 132 | ) 133 | 134 | return prs 135 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Renovate PR visualization 2 | 3 | This is a turnkey solution that shows your [Renovate](https://docs.renovatebot.com/) bot Pull Requests. 4 | Now you can see the "technical debt" of your git repositories. 5 | For now, only GitHub and GitLab are supported. 6 | 7 | Provide a few configuration values and run `docker compose up` to get a dashboard that looks like this: 8 | 9 | ![Screenshot of the dashboard](./readme-assets/dashboard-example.png) 10 | 11 | The above screenshot shows the state of the [Cypress](https://github.com/cypress-io/cypress) repository as of 2023-12-19 (note: the dashboard comes with _filters_ for the start and end date, and for choosing a specific repository, the filters are not shown in the screenshot): 12 | - The **top** graph shows the number of open Renovate PRs over time, with one sample point per week (Monday at UTC midnight), grouped by the type of dependency update (e.g. `major`, `minor`, `patch`, `digest`, `security`) 13 | - The **left center** graph is meaningless for _individual_ repositories: it is meant to be used in case you instruct the tool to scrape _many_ repositories, and you want to know the Renovate onboarding status of the repositories, over time: 14 | - "onboarded": if there is a `renovate.json[5]` file in the root of the repo's default branch 15 | - "onboarding": if there is a "Configure Renovate" PR open 16 | - "disabled": if both "onboarded" and "onboarding" are false 17 | - The **right center** graph shows the average time it took to close a Renovate PR 18 | - The **bottom** graph shows a table overview of the database 19 | 20 | ## Why do I need this? 21 | 22 | Renovate creates Pull Requests for outdated dependencies, which are a form of technical debt. 23 | Knowing how much technical debt you have helps you when setting your priorities. 24 | 25 | When you use this tool you'll better understand: 26 | 27 | - how your technical debt _changes over time_: for instance, if the number (or severity) of the PRs keeps increasing, you may want to grant your development team a larger "budget" for updating outdated dependencies 28 | - how long does it take the development team to address (that is, close) Renovate PRs, on average 29 | - if you have _many_ repositories: how many repositories have been onboarded to Renovate, and how has this changed over time 30 | 31 | ## How it works 32 | 33 | ![Architecture diagram](./readme-assets/architecture.png) 34 | 35 | This tool comes with a pre-configured **Docker Compose** setup that uses [Metabase](https://www.metabase.com/) to draw a dashboard that visualizes your Renovate Pull Requests. 36 | But you may use another "business intelligence" tool, or replace the PostgreSQL database with another relational database. 37 | The most complex SQL query is the one that computes how many PRs are open at a given point in time: 38 | 39 |
40 | Example for SQL query 41 | 42 | ```sql 43 | WITH weekly_dates AS (SELECT generate_series( 44 | date_trunc('week', TIMESTAMP '2023-09-25'), 45 | date_trunc('week', CURRENT_DATE), 46 | '1 week'::interval 47 | ) AS week_start_date), 48 | update_types AS (SELECT DISTINCT update_type FROM dependency_update), 49 | week_priorities AS (SELECT week_start_date, update_type 50 | FROM weekly_dates CROSS JOIN update_types), 51 | open_prs AS (SELECT date_trunc('week', created_date) AS week_created, 52 | date_trunc('week', COALESCE(closed_date, CURRENT_DATE + INTERVAL '10 years')) AS week_closed, 53 | update_type, repo 54 | FROM deps_with_prs_view) 55 | SELECT wp.week_start_date, 56 | wp.update_type, 57 | COUNT(open_prs.week_created) 58 | FROM week_priorities wp 59 | LEFT JOIN open_prs 60 | ON wp.week_start_date BETWEEN open_prs.week_created AND open_prs.week_closed 61 | AND wp.update_type = open_prs.update_type AND open_prs.repo = 'owner/repo' 62 | GROUP BY wp.week_start_date, wp.update_type 63 | ORDER BY wp.week_start_date, wp.update_type; 64 | ``` 65 | Note that you must replace the timestamps in rows 2+ 3 and the `owner/repo` at the bottom. 66 |
67 | 68 | The database model looks as follows: 69 | 70 | ![Database model](./readme-assets/entity-relationship-model.png) 71 | 72 | ## Usage instructions 73 | 74 | > [!IMPORTANT] 75 | > Using the tool only makes sense if Renovate has been running in your repositories _for a while_ (e.g. several weeks, better months). Otherwise, the dashboard won't show much data. 76 | > 77 | > Your Renovate configuration should contain the following settings: 78 | > - The tool needs a clear way to identify the PRs created by Renovate 79 | > - The PRs could be created by a specific functional user (on GitHub **.com** this is`renovate[bot]`) 80 | > - You could assign a _label_ to the PRs (in your `renovate.json` file, set `labels` e.g. to `["dependencies"]`) 81 | > - The tool needs a clear way to identify **security** PRs, you could do this by putting this snippet in your `renovate.json` file: 82 | > ```json 83 | > "vulnerabilityAlerts": { 84 | > "labels": ["security", "dependencies"], 85 | > } 86 | > ``` 87 | 88 | Follow these steps to run the tool: 89 | 90 | 1. Create a copy of the `.env.example` file (name it `.env`), and change the configuration values, which are documented in the file 91 | 2. Run `docker compose up -d`, wait for the `datascraper` service/container to finish with exit code 0 (if exit code is 1, check the container logs for errors) 92 | 3. Open Metabase at http://localhost:3000, login with username `admin@site.org` and password `admin1!`, then navigate to the **Renovate dashboard**. This dashboard shows the data of _all_ Git repositories, but the _Repository_ filter at the top of the dashboard allows you to filter the entries down to a specific repository 93 | 4. Whenever you want to update the Renovate data, run `docker compose up datascraper` again 94 | 95 | > [!TIP] 96 | > If you run Docker Desktop on macOS with an **Apple Silicon** (ARM64) chip, the public `metabase/metabase:vXXX` image may perform poorly or even crash, because it is only built for AMD/Intel CPU architectures (and the QEMU-based emulation of such images can be slow). You can easily build and use your own ARM-based image as follows: 97 | > 1. Build the Metabase ARM64-based image locally: `docker build -t metabase:local --build-arg metabase_version=v0.48.0 -f Dockerfile-metabase .` 98 | > 2. Edit the `docker-compose.yaml`, updating the `image:` reference for the `metabase` service to `metabase:local` 99 | -------------------------------------------------------------------------------- /renovate_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script fills the provided PostgreSQL database with information about Renovate PRs and repository onboarding 3 | statuses. 4 | """ 5 | import re 6 | from dataclasses import dataclass, field 7 | from datetime import datetime, timedelta, timezone 8 | from pathlib import Path 9 | from typing import Optional 10 | 11 | from marko.ext.gfm import gfm 12 | from marko.ext.gfm.elements import Table 13 | from marko.inline import Link, InlineElement, CodeSpan, RawText 14 | from packaging.version import Version, InvalidVersion 15 | from sqlalchemy import create_engine 16 | from sqlalchemy.orm import Session 17 | from tqdm import tqdm 18 | 19 | from abstractions import PullRequest, GitRepository, NoCommitsFoundError, GitCommit 20 | from conf import Configuration 21 | from database_models import PullRequest as PullRequestDb, PrCloseType, DependencyUpdate, Base, \ 22 | RepositoryOnboardingStatus, OnboardingType, DependencyUpdateType 23 | 24 | PR_TITLE_REGEX = re.compile(r"^(chore|fix)\(deps\): (bump|update) ") 25 | 26 | 27 | def has_relevant_pr_title(renovate_pr_title: str) -> bool: 28 | """ 29 | For the given title of a PR created by Renovate, return whether this PR should really be considered. 30 | Returns False for those PRs that someone from the dev team manually "deleted" (without actually deleting it), 31 | which involved renaming the title. 32 | 33 | A Renovate PR is relevant if its title matches one of the following patterns (ignoring an possibly-existing 34 | " - autoclosed" postfix): 35 | 36 | Update dependency to v 37 | Used when there is only one dependency that is updated, e.g. "Update dependency @types/node to v14.14.3" 38 | Update 39 | Used when there are multiple dependencies with minor upgrades, e.g one from 8.1->8.2 and one from 5.1->5.2 40 | Update (major) 41 | Used when there are multiple dependencies with major upgrades, e.g one from 8->9 and one from 5->6 42 | Update to v 43 | Used when there are multiple dependencies which ALL have the SAME updates (old -> new), on minor/patch level 44 | Update to v (major) 45 | Used when there are multiple dependencies which ALL have the SAME updates (old -> new), on major level 46 | 47 | If the repository mainly used semantic commit messages, then the PR title may also have the following patterns: 48 | chore(deps): bump from to 49 | chore(deps): update to 50 | fix(deps): update to 51 | 52 | where is either a single number indicating major updates (e.g. "2" or "2023") or a full version number 53 | (e.g. "1.2.3" or "2023.1.2"). may also have a " [SECURITY]" postfix. 54 | """ 55 | if renovate_pr_title.startswith("Update "): 56 | return True 57 | if PR_TITLE_REGEX.match(renovate_pr_title): 58 | return True 59 | return False 60 | 61 | 62 | @dataclass 63 | class RenovatePrs: 64 | dependency_prs: list[PullRequest] = field(default_factory=list) 65 | onboarding_prs: list[PullRequest] = field(default_factory=list) 66 | 67 | 68 | def get_renovate_prs(config: Configuration) -> RenovatePrs: 69 | """ 70 | Returns all those PRs from the specified repositories that were created by Renovate and that have a relevant title. 71 | """ 72 | onboarding_title_regex = re.compile(config.renovate_onboarding_pr_regex) 73 | 74 | renovate_prs = RenovatePrs() 75 | 76 | iterator = tqdm(config.repos, ncols=80) 77 | for git_repo in iterator: 78 | for pr in git_repo.get_pull_requests(pr_author_username=config.renovate_scm_user, 79 | renovate_pr_label=config.renovate_pr_label, 80 | ignore_pr_labels=config.ignore_pr_labels): 81 | if onboarding_title_regex.search(pr.title): 82 | renovate_prs.onboarding_prs.append(pr) 83 | continue 84 | 85 | if not config.renovate_pr_label or config.renovate_pr_label in pr.labels: 86 | if has_relevant_pr_title(pr.title): 87 | renovate_prs.dependency_prs.append(pr) 88 | 89 | # Work around issue https://github.com/tqdm/tqdm/issues/771 90 | if Path("/.dockerenv").exists(): 91 | print(iterator) 92 | 93 | return renovate_prs 94 | 95 | 96 | MAJOR_MINOR_PATCH_REGEX = re.compile(r"\d+(?:\.\d+){0,2}") 97 | 98 | 99 | def clean_version(version: str) -> str: 100 | """ 101 | Cleans the given version string, removing any unexpected characters, so that Python's packaging.Version class can 102 | parse it successfully. 103 | 104 | Examples for patterns that can successfully be parsed: 105 | - "^1.2.3" -> "1.2.3" 106 | - "~1.2.3" -> "1.2.3" 107 | - "1.2.3-alpha.1" -> "1.2.3" 108 | - "stable-v1.2.3" -> "1.2.3" 109 | - "1.x" -> "1.0" (to handle version updates such as "1.x -> 2.x") 110 | """ 111 | version = version.replace("x", "0") 112 | if match := MAJOR_MINOR_PATCH_REGEX.search(version): 113 | version = match.group() 114 | return version 115 | else: 116 | raise ValueError(f"Unable to parse version {version!r}, regex for major/minor/patch did not find any matches") 117 | 118 | 119 | DIGEST_REGEX = re.compile(r"^[a-f0-9]{7,40}$") 120 | 121 | 122 | def is_digest_version(version: str) -> bool: 123 | """ 124 | Returns True if the given version looks like a digest, e.g. a (short) Git hash, or a (Docker) SHA256 hash. 125 | """ 126 | return DIGEST_REGEX.match(version) is not None 127 | 128 | 129 | def parse_dependency_updates(renovate_pr: PullRequest, 130 | config: Configuration) -> list[DependencyUpdate]: 131 | """ 132 | Parses the MarkDown body of the given Renovate PR and returns all dependency updates. 133 | """ 134 | dependency_updates = [] 135 | # Note: use the GitHub-flavored Markdown parser, which supports parsing tables 136 | markdown_document = gfm.parse(renovate_pr.description) 137 | 138 | dependencies_table: Optional[Table] = None 139 | for child in markdown_document.children: 140 | if isinstance(child, Table): 141 | dependencies_table = child 142 | break 143 | if not dependencies_table: 144 | raise ValueError(f"Could not find dependencies table in PR {renovate_pr.url}") 145 | 146 | # Determine the columns, because they are not always at the same position: 147 | # The first column is always the "Package" column which contains the package name. 148 | # The other relevant column is called "Change" and it may appear as second column, or e.g. as fourth column, having 149 | # the content " -> " 150 | 151 | # Verify that the table has the "Package" column 152 | if dependencies_table.head.children[0].children[0].children != "Package": 153 | raise ValueError(f"Package column is missing in dependencies table in PR {renovate_pr.url}") 154 | 155 | # Find the "Change" column that contains the old and new version 156 | change_column_index = -1 157 | for i, column in enumerate(dependencies_table.head.children): 158 | if column.children[0].children == "Change": 159 | change_column_index = i 160 | break 161 | 162 | if change_column_index == -1: 163 | raise ValueError(f"Change column is missing in dependencies table in PR {renovate_pr.url}") 164 | 165 | # Parse the dependency updates table 166 | for row in dependencies_table.children[1:]: # note: row 0 is the header row 167 | # The cell content of the "Package" column may contain the package name directly (either as string, or wrapped 168 | # in a link) 169 | if len(row.children[0].children) == 1: 170 | if isinstance(row.children[0].children[0], Link): 171 | dependency_name = row.children[0].children[0].children[0].children 172 | else: 173 | dependency_name = row.children[0].children[0].children 174 | # Alternatively, the "Package" column contains a list of items, the first one being the dependency wrapped 175 | # in a Link, followed by other elements: "(", "", ")" 176 | else: 177 | if not isinstance(row.children[0].children[0], Link): 178 | raise ValueError(f"Unable to parse dependency table: expected Link, but got {row.children[0]!r} " 179 | f"for PR {renovate_pr.url}") 180 | dependency_name = row.children[0].children[0].children[0].children 181 | 182 | if type(dependency_name) != str: 183 | raise ValueError(f"Unable to parse dependency table: dependency name is not a string: " 184 | f"{dependency_name!r}") 185 | 186 | # The cell content of the "Change" column might contain a Link or not, so we either have 187 | # Link(CodeSpan(oldversion), RawText(" -> "), CodeSpan(newversion)), or 188 | # CodeSpan(oldversion), RawText(" -> "), CodeSpan(newversion) 189 | old_and_new_version_sequence: list[InlineElement] = row.children[change_column_index].children 190 | if isinstance(row.children[change_column_index].children[0], Link): 191 | old_and_new_version_sequence = row.children[change_column_index].children[0].children 192 | is_valid_version_sequence = len(old_and_new_version_sequence) == 3 \ 193 | and isinstance(old_and_new_version_sequence[0], CodeSpan) \ 194 | and isinstance(old_and_new_version_sequence[1], RawText) \ 195 | and isinstance(old_and_new_version_sequence[2], CodeSpan) \ 196 | and old_and_new_version_sequence[1].children == " -> " 197 | 198 | if not is_valid_version_sequence: 199 | raise ValueError(f"Unable to parse dependency table: expected " 200 | f"Link(CodeSpan(oldversion), RawText(\" -> \"), CodeSpan(newversion)), or " 201 | f"CodeSpan(oldversion), RawText(\" -> \"), CodeSpan(newversion), " 202 | f"but got {old_and_new_version_sequence!r} for PR {renovate_pr.url}") 203 | old_version_str = old_and_new_version_sequence[0].children 204 | new_version_str = old_and_new_version_sequence[2].children 205 | 206 | if (type(old_version_str), type(new_version_str)) != (str, str): 207 | raise ValueError(f"Unable to parse dependency table: old/new versions are not strings: " 208 | f"{old_version_str!r}, {new_version_str!r}") 209 | 210 | if is_digest_version(old_version_str): 211 | update_type = DependencyUpdateType.digest 212 | else: 213 | try: 214 | old_version = Version(clean_version(old_version_str)) 215 | new_version = Version(clean_version(new_version_str)) 216 | except (InvalidVersion, ValueError) as e: 217 | raise ValueError(f"Unable to parse old/new versions '{old_version_str}' / '{new_version_str}' for " 218 | f"dependency {dependency_name}: {e}") from None 219 | 220 | if config.renovate_pr_security_label in renovate_pr.labels: 221 | update_type = DependencyUpdateType.security 222 | elif old_version.major != new_version.major: 223 | update_type = DependencyUpdateType.major 224 | if config.detect_multiple_major_updates and (new_version.major - old_version.major) > 1: 225 | update_type = DependencyUpdateType.multiple_major 226 | elif old_version.minor != new_version.minor: 227 | update_type = DependencyUpdateType.minor 228 | else: 229 | update_type = DependencyUpdateType.patch 230 | 231 | dependency_updates.append(DependencyUpdate( 232 | dependency_name=dependency_name, 233 | old_version=old_version_str, 234 | new_version=new_version_str, 235 | update_type=update_type)) 236 | 237 | return dependency_updates 238 | 239 | 240 | def get_database_entities(renovate_prs: list[PullRequest], config: Configuration) -> list[PullRequestDb]: 241 | """ 242 | Creates the database entities (PullRequest and DependencyUpdate) by parsing the provided PRs. 243 | """ 244 | database_prs: list[PullRequestDb] = [] 245 | for renovate_pr in renovate_prs: 246 | closed_date = renovate_pr.closed_date or renovate_pr.merged_date 247 | if renovate_pr.merged_date: 248 | close_type = PrCloseType.merge 249 | elif renovate_pr.closed_date: 250 | close_type = PrCloseType.close 251 | else: 252 | close_type = None 253 | 254 | database_pr = PullRequestDb( 255 | repo=renovate_pr.repo.owner_and_name, 256 | created_date=renovate_pr.created_date, 257 | closed_date=closed_date, 258 | close_type=close_type, 259 | number=renovate_pr.pr_number, 260 | url=renovate_pr.url, 261 | ) 262 | 263 | try: 264 | dependency_updates = parse_dependency_updates(renovate_pr, config) 265 | database_pr.dependency_updates.extend(dependency_updates) 266 | except ValueError as e: 267 | print(f"Warning: skipping PR {renovate_pr.url}: {e}") 268 | continue 269 | 270 | database_prs.append(database_pr) 271 | 272 | return database_prs 273 | 274 | 275 | def save_database_entities(database_prs: list[PullRequestDb], 276 | database_onboarding_statuses: list[RepositoryOnboardingStatus], 277 | configuration: Configuration) -> None: 278 | """ 279 | Saves the given database entities to the PostgreSQL database, clearing all old records. 280 | """ 281 | engine = create_engine(f"postgresql+psycopg2://{configuration.database_with_credentials}") 282 | Base.metadata.drop_all(engine) 283 | Base.metadata.create_all(engine) 284 | with Session(engine) as session: 285 | session.add_all(database_prs) 286 | session.add_all(database_onboarding_statuses) 287 | session.commit() 288 | 289 | 290 | class GitCommitHelper: 291 | def __init__(self, git_repository: GitRepository, cutoff_date: datetime): 292 | try: 293 | self._commits = git_repository.get_commits(since=cutoff_date) 294 | except NoCommitsFoundError: 295 | # Repository might still be empty, or the default branch might have been renamed 296 | self._commits: list[GitCommit] = [] 297 | 298 | def _get_closest_commit(self, sample_datetime: datetime) -> Optional[GitCommit]: 299 | """ 300 | Returns the closest commit to the given datetime. 301 | """ 302 | if not self._commits: 303 | return None 304 | closest_commit = self._commits[0] 305 | for commit in self._commits[1:]: 306 | if commit.author_or_commit_date <= sample_datetime: 307 | closest_commit = commit 308 | else: 309 | break 310 | 311 | return closest_commit 312 | 313 | def contains_renovate_json_file(self, sample_datetime: datetime) -> bool: 314 | closest_commit = self._get_closest_commit(sample_datetime) 315 | if not closest_commit: 316 | return False 317 | 318 | for tree_entry in closest_commit.get_tree(): 319 | if tree_entry in ["renovate.json", "renovate.json5"]: 320 | return True 321 | return False 322 | 323 | 324 | def has_renovate_onboarding_pr(onboarding_prs: list[PullRequest], sample_datetime: datetime, 325 | sampling_interval_weeks: int) -> bool: 326 | """ 327 | Returns True if there is an onboarding PR that has been created at/before sample_datetime and that was still 328 | open after the sampling interval. 329 | """ 330 | for pr in onboarding_prs: 331 | if pr.created_date <= sample_datetime: 332 | closed_date = pr.closed_date or pr.merged_date 333 | if closed_date is None or closed_date > sample_datetime + timedelta(weeks=sampling_interval_weeks): 334 | return True 335 | return False 336 | 337 | 338 | def get_sampling_dates(config: Configuration) -> list[datetime]: 339 | """ 340 | Returns a list of Monday (8 AM UTC) datetimes, spread out in regular intervals (see 341 | config.renovate_onboarding_sampling_interval_weeks), starting from 342 | - weeks, until 343 | """ 344 | # get monday 8 AM of the current week 345 | now = datetime.now(timezone.utc) 346 | days_to_subtract = (now.weekday()) % 7 347 | monday = now - timedelta(days=days_to_subtract) 348 | monday_8am = monday.replace(hour=8, minute=0, second=0, microsecond=0) 349 | 350 | sampling_dates = [monday_8am] 351 | 352 | for i in range(config.renovate_onboarding_sampling_max_weeks // config.renovate_onboarding_sampling_interval_weeks): 353 | sampling_dates.append( 354 | monday_8am - timedelta(weeks=(i + 1) * config.renovate_onboarding_sampling_interval_weeks)) 355 | 356 | sampling_dates.reverse() 357 | return sampling_dates 358 | 359 | 360 | def get_repository_onboarding_status(onboarding_prs: list[PullRequest], 361 | config: Configuration) -> list[RepositoryOnboardingStatus]: 362 | """ 363 | Extracts the RepositoryOnboardingStatus database entities for the provided repositories, sampling them in regular 364 | intervals. 365 | 366 | The onboarding status of a repository (at a specific point in time) is defined as follows: 367 | - "onboarded": if there is a `renovate.json[5]` file in the root of the repo's default branch 368 | - "onboarding": if there is a PR open that adds a `renovate.json[5]` file in the root of the repo's default branch 369 | - "disabled": if both "onboarded" and "onboarding" are false 370 | 371 | Note that for determining the onboarding status, only the DEFAULT Git branch is considered, and we assume that 372 | whatever is the default branch now, has also been the default branch at any point in the past. 373 | """ 374 | week_start_dates = get_sampling_dates(config) 375 | cutoff_date = week_start_dates[0] - timedelta(weeks=1) # add one week to have some "leeway" 376 | 377 | onboarding_statuses: list[RepositoryOnboardingStatus] = [] 378 | iterator = tqdm(config.repos, ncols=80) 379 | for git_repository in iterator: 380 | onboarding_prs_for_this_repo = [pr for pr in onboarding_prs if pr.repo == git_repository] 381 | commit_helper = GitCommitHelper(git_repository, cutoff_date=cutoff_date) 382 | 383 | for week_start_date in week_start_dates: 384 | has_renovate_json_file = commit_helper.contains_renovate_json_file(week_start_date) 385 | 386 | onboarding_status = OnboardingType.onboarded if has_renovate_json_file else OnboardingType.disabled 387 | if has_renovate_onboarding_pr(onboarding_prs_for_this_repo, week_start_date, 388 | config.renovate_onboarding_sampling_interval_weeks): 389 | onboarding_status = OnboardingType.in_progress 390 | 391 | onboarding_statuses.append( 392 | RepositoryOnboardingStatus( 393 | repo=git_repository.owner_and_name, 394 | onboarded=onboarding_status, 395 | sample_date=week_start_date, 396 | ) 397 | ) 398 | 399 | # Work around issue https://github.com/tqdm/tqdm/issues/771 400 | if Path("/.dockerenv").exists(): 401 | print(iterator) 402 | 403 | return onboarding_statuses 404 | --------------------------------------------------------------------------------