├── app ├── __init__.py ├── dashboard.py ├── repo.py ├── utils.py └── ui.py ├── app.yaml ├── static └── dashboard.gif ├── Dockerfile ├── .gitignore ├── LICENSE ├── requirements.txt └── README.md /app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app.yaml: -------------------------------------------------------------------------------- 1 | service: git-streamlit 2 | runtime: custom 3 | env: flex 4 | -------------------------------------------------------------------------------- /static/dashboard.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andodet/git-overview/HEAD/static/dashboard.gif -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim-buster 2 | 3 | RUN apt-get update && \ 4 | apt-get -y install git 5 | 6 | COPY . . 7 | 8 | RUN pip install -r requirements.txt 9 | 10 | WORKDIR app/ 11 | 12 | EXPOSE 8080 13 | ENTRYPOINT ["streamlit","run"] 14 | CMD ["dashboard.py", "--server.port=8080", "--server.address=0.0.0.0"] 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Installer logs 10 | pip-log.txt 11 | pip-delete-this-directory.txt 12 | 13 | # Unit test / coverage reports 14 | htmlcov/ 15 | .tox/ 16 | .nox/ 17 | .coverage 18 | .coverage.* 19 | .cache 20 | nosetests.xml 21 | coverage.xml 22 | *.cover 23 | *.py,cover 24 | .hypothesis/ 25 | .pytest_cache/ 26 | cover/ 27 | 28 | # Environments 29 | .env 30 | .venv 31 | env/ 32 | venv/ 33 | ENV/ 34 | env.bak/ 35 | venv.bak/ 36 | 37 | 38 | # mkdocs documentation 39 | /site 40 | 41 | # mypy 42 | .mypy_cache/ 43 | .dmypy.json 44 | dmypy.json 45 | 46 | notebooks/ 47 | .vscode/ 48 | notes.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Andrea Dodet 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | 2 | altair==4.1.0 3 | argon2-cffi==20.1.0 4 | astor==0.8.1 5 | async-generator==1.10 6 | attrs==21.2.0 7 | backcall==0.2.0 8 | base58==2.1.0 9 | bleach==3.3.0 10 | blinker==1.4 11 | cachetools==4.2.2 12 | certifi==2021.5.30 13 | cffi==1.14.5 14 | chardet==4.0.0 15 | click==7.1.2 16 | debugpy==1.3.0 17 | decorator==5.0.9 18 | defusedxml==0.7.1 19 | entrypoints==0.3 20 | gitdb==4.0.7 21 | GitPython==3.1.18 22 | idna==2.10 23 | ipykernel==6.0.1 24 | ipython==7.25.0 25 | ipython-genutils==0.2.0 26 | ipywidgets==7.6.3 27 | jedi==0.18.0 28 | Jinja2==3.0.1 29 | jsonschema==3.2.0 30 | jupyter-client==6.1.12 31 | jupyter-core==4.7.1 32 | jupyterlab-pygments==0.1.2 33 | jupyterlab-widgets==1.0.0 34 | lizard==1.17.8 35 | MarkupSafe==2.0.1 36 | matplotlib-inline==0.1.2 37 | mistune==0.8.4 38 | nbclient==0.5.3 39 | nbconvert==6.1.0 40 | nbformat==5.1.3 41 | nest-asyncio==1.5.1 42 | notebook==6.4.0 43 | numpy==1.21.0 44 | packaging==21.0 45 | pandas==1.3.0 46 | pandocfilters==1.4.3 47 | parso==0.8.2 48 | pexpect==4.8.0 49 | pickleshare==0.7.5 50 | Pillow==8.3.1 51 | prometheus-client==0.11.0 52 | prompt-toolkit==3.0.19 53 | protobuf==3.17.3 54 | ptyprocess==0.7.0 55 | pycparser==2.20 56 | pydeck==0.6.2 57 | PyDriller==2.0 58 | Pygments==2.9.0 59 | pyparsing==2.4.7 60 | pyrsistent==0.18.0 61 | python-dateutil==2.8.1 62 | pytz==2021.1 63 | pyzmq==22.1.0 64 | requests==2.25.1 65 | Send2Trash==1.7.1 66 | six==1.16.0 67 | smmap==4.0.0 68 | streamlit==0.84.0 69 | terminado==0.10.1 70 | testpath==0.5.0 71 | toml==0.10.2 72 | toolz==0.11.1 73 | tornado==6.1 74 | tqdm==4.61.2 75 | traitlets==5.0.5 76 | tzlocal==2.1 77 | urllib3==1.26.6 78 | validators==0.18.2 79 | watchdog==2.1.3 80 | wcwidth==0.2.5 81 | webencodings==0.5.1 82 | widgetsnbextension==3.5.1 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Git repository activity dashboard 2 | 3 | This repo contains all the code produced for [this](https://anddt.com/post/streamlit-git-viz/) blog post. Through 4 | the streamlit dashboard it's possible to explore the following metrics on a target git repository: 5 | 6 | * Commits over time (waffle github-like activity chart) 7 | * Top contributors 8 | * Lines changed/added 9 | * Cumulative lines changed by contributor 10 | * Filter on specific time frame 11 | * Drill down on a specific contributor 12 | 13 | ![](/static/dashboard.gif) 14 | 15 | This dashboard is publicly available at [this url](https://share.streamlit.io/andodet/git-overview/app/dashboard.py). 16 | 17 | ## Usage 18 | 19 | Run the streamlit app: 20 | ```sh 21 | streamlit run app/dashboard.py 22 | ``` 23 | 24 | The dashboard can ingest data in two ways: 25 | 1. Provide a url for a remote repository (_disclaimer_: this solution might take a while to process long commit 26 | histories). Consider using 2 if dealing with long commit histories. 27 | 2. Upload a .json file exported using the `app/repo.py` utility. 28 | `app/repo.py` can be used in the following way: 29 | ```sh 30 | $ python app/repo.py -h 31 | usage: repo.py [-h] [-f OUTPUT_FORMAT] [-o OUTPUT_PATH] [-s SINCE] [-t TO] repo_path 32 | 33 | Extract commit hisotry and information from a target repo 34 | 35 | positional arguments: 36 | repo_path The path of the repo. This can be a path on your machine or a link to 37 | a hosted service (e.g https://github.com/andodet/myrepo.git) 38 | 39 | optional arguments: 40 | -h, --help show this help message and exit 41 | -f OUTPUT_FORMAT, --output-format OUTPUT_FORMAT 42 | Format of the output file 43 | -o OUTPUT_PATH, --output-path OUTPUT_PATH 44 | Path of the output file 45 | -s SINCE, --since SINCE 46 | Start date 47 | -t TO, --to TO End date 48 | ``` 49 | 50 | ## Credits 51 | 52 | This dashboard and blog post has been heavily inspired by [this](https://news.ycombinator.com/item?id=27689664) 53 | hackernews post. 54 | 55 | -------------------------------------------------------------------------------- /app/dashboard.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | import ui 4 | import utils 5 | from pathlib import Path 6 | 7 | 8 | def body(commit_history): 9 | st.title("Git Repository Overview 👀") 10 | 11 | # Get filters 12 | start, end, contributor = ui.get_sidebar(commit_history) 13 | 14 | # Apply filters 15 | contributor_stats, q_contrib = utils.get_contributor_stats( 16 | commit_history, contributor 17 | ) 18 | commit_history = utils.filter_by_date(commit_history, start, end) 19 | commit_history = utils.filter_by_contributor(commit_history, contributor) 20 | # Top-level repository stats 21 | st.markdown( 22 | """ 23 | 28 | """, 29 | unsafe_allow_html=True, 30 | ) 31 | 32 | # Top-level repo stats 33 | repo_stats = utils.get_repo_stats(commit_history) 34 | p1, p2 = st.beta_columns([1, 1]) 35 | with st.beta_container(): 36 | with p1: 37 | st.subheader("Repository Snapshot") 38 | for stat in repo_stats.items(): 39 | st.markdown( 40 | f"

{stat[1][1]} {stat[0]}: {stat[1][0]}

", # noqa: E501 41 | unsafe_allow_html=True, 42 | ) 43 | 44 | with p2: 45 | try: 46 | st.subheader(f"Contributor: {contributor_stats.pop('Contributor')[0]}") 47 | for c_stat in contributor_stats.items(): 48 | st.markdown( 49 | f"

{c_stat[1][1]} {c_stat[0]}: {c_stat[1][0]}

", # noqa: E501 50 | unsafe_allow_html=True, 51 | ) 52 | st.write(ui.plot_quarterly_commits(q_contrib)) 53 | except (AttributeError, KeyError): 54 | pass 55 | 56 | st.markdown("---") 57 | st.write(ui.plot_commit_waffle(commit_history)) 58 | with st.beta_expander("Changes Overview", expanded=True): 59 | st.write(ui.plot_daily_contributions(commit_history)) 60 | st.write(ui.plot_inserts_deletions(commit_history)) 61 | 62 | with st.beta_expander("Contributors Overview", expanded=True): 63 | st.write(ui.plot_top_contributors(utils.get_top_contributors(commit_history))) 64 | st.write(ui.plot_cumulative_lines_by_contributor(commit_history, 30)) 65 | 66 | 67 | if __name__ == "__main__": 68 | st.set_page_config(layout="wide") 69 | 70 | repo_source = ui.get_repo_source() 71 | if not repo_source: 72 | # Use github readme as dashboard instructions 73 | st.markdown(Path(Path(__file__).parents[1], "README.md").read_text()) 74 | else: 75 | commit_history = utils.get_data(repo_source) 76 | body = body(commit_history) 77 | -------------------------------------------------------------------------------- /app/repo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import csv 3 | import json 4 | from datetime import datetime 5 | from urllib.parse import urlparse 6 | 7 | from pydriller import Repository 8 | from tqdm import tqdm 9 | 10 | 11 | def get_all_commits(path, since=None, to=None): 12 | """ 13 | Grabs all commits and related metadata from a target repository. 14 | 15 | Args: 16 | path (str): A path to a local repository or a link to an hosted one 17 | (of the format https://github.com/andodet/myrepo.git). 18 | since (str): A date string of the format (`%Y-%m-%d`). Defaults to None. 19 | to (str): A date string of the format (`%Y-%m-%d`). Defaults to None. 20 | Returns: 21 | list: A list of dictionaries of all commits and relative information. 22 | """ 23 | if since and to: 24 | to = datetime.strptime(to, "%Y-%m-%d") 25 | since = datetime.strptime(since, "%Y-%m-%d") 26 | 27 | repo = Repository(path, num_workers=10, since=since, to=to) 28 | 29 | # Massage url if dealing with a remote repository 30 | if urlparse(path).scheme: 31 | path += ".git" 32 | 33 | res = [] 34 | print("Retrieving commits...") 35 | for commit in tqdm(repo.traverse_commits()): 36 | res.append( 37 | { 38 | "hash": commit.hash, 39 | "author": commit.author.name, 40 | "committed_on": commit.committer_date.strftime("%Y-%m-%d %H:%M:%S"), 41 | "authored_on": commit.author_date.strftime("%Y-%m-%d %H:%M:%S"), 42 | "lines_added": commit.insertions, 43 | "lines_deleted": commit.deletions, 44 | "files_touched": commit.files, 45 | # These two lines are accountable for a 10x slowdown runtime when 46 | # processing commit histories. On a 7k commits repo this brings 47 | # total runtime from 32min to ~3min. 48 | # "dmm_unit_complexity": commit.dmm_unit_complexity, 49 | # "dmm_unit_interfacing": commit.dmm_unit_interfacing, 50 | "is_merge": commit.merge, 51 | "message": commit.msg, 52 | } 53 | ) 54 | print(f"✔️ {len(res)} commits downloaded for {path}") 55 | return res 56 | 57 | 58 | def write_dataset(dataset, path, format): 59 | """ 60 | Exports a commit history in .csv format 61 | 62 | Args: 63 | dataset (dict): A dataset returned by `get_all_commits` function. 64 | path (:obj:`str`, optional): Path for the output file 65 | format (:obj:`str`, optional): Format for the output file (suports 'json'and 66 | 'csv') 67 | """ 68 | format = format.lower() 69 | try: 70 | with open(path, "w") as f: 71 | if format == "csv": 72 | keys = dataset[0].keys() 73 | # with open(path, "w") as f: 74 | writer = csv.DictWriter(f, keys) 75 | writer.writeheader() 76 | writer.writerows(dataset) 77 | elif format == "json": 78 | # with open(path, "w") as f: 79 | json.dump(dataset, f) 80 | except (EnvironmentError) as e: 81 | raise e 82 | finally: 83 | print(f"{path} exported 🥳") 84 | 85 | 86 | if __name__ == "__main__": 87 | 88 | parser = argparse.ArgumentParser( 89 | description="Extract commit hisotry and information from a target repo", 90 | formatter_class=argparse.RawTextHelpFormatter, 91 | ) 92 | parser.add_argument( 93 | "repo_path", 94 | help="""The path of the repo. This can be a path on your machine or a link to 95 | a hosted service (e.g https://github.com/andodet/myrepo.git)""", 96 | ) 97 | parser.add_argument("-f", "--output-format", help="Format of the output file") 98 | parser.add_argument("-o", "--output-path", help="Path of the output file") 99 | parser.add_argument("-s", "--since", help="Start date") 100 | parser.add_argument("-t", "--to", help="End date") 101 | parsed_args = parser.parse_args() 102 | 103 | commit_list = get_all_commits( 104 | parsed_args.repo_path, parsed_args.since, parsed_args.to 105 | ) 106 | 107 | # Export .csv dataset if requested 108 | if parsed_args.output_path: 109 | write_dataset(commit_list, parsed_args.output_path, parsed_args.output_format) 110 | -------------------------------------------------------------------------------- /app/utils.py: -------------------------------------------------------------------------------- 1 | import base64 2 | 3 | import pandas as pd 4 | import streamlit as st 5 | from streamlit.uploaded_file_manager import UploadedFile 6 | 7 | from repo import get_all_commits 8 | 9 | 10 | @st.cache 11 | def get_data(repo_path): 12 | """ 13 | Retrieve commit history from remote source or local .json file 14 | 15 | Args: 16 | repo_path: File st.text_input or st.file_uploader 17 | Returns: 18 | pandas.DataFrame: A dataframae containing the commit history 19 | """ 20 | if isinstance(repo_path, UploadedFile): 21 | data = pd.read_json(repo_path, orient="records") 22 | else: 23 | commits = get_all_commits(repo_path) 24 | data = pd.DataFrame(commits) 25 | 26 | # Convert timestamps 27 | data[["committed_on", "authored_on"]] = data[["committed_on", "authored_on"]].apply( 28 | pd.to_datetime 29 | ) 30 | data['total_lines'] = data['lines_added'] + data['lines_deleted'] 31 | 32 | return data 33 | 34 | 35 | def get_top_contributors(data): 36 | """Rank contributors by number of commits""" 37 | # fmt: off 38 | res = ( 39 | data.groupby("author")["hash"] 40 | .count() 41 | .sort_values(ascending=False) 42 | .reset_index() 43 | ) 44 | res.columns = ["author", "n_commits"] 45 | return res 46 | 47 | 48 | def get_repo_stats(data): 49 | """Get some high level repository statistics""" 50 | repo_stats = { 51 | "Commits": (f"{data['hash'].count():,}", "📃"), 52 | "Merges": (f"{data['is_merge'].value_counts()[0]:,}", "⛙"), 53 | "Contributors": (f"{data['author'].nunique():,}", "👨‍💻"), 54 | "Lines Added": (f"{data['lines_added'].sum():,}", "➕"), 55 | "Lines Deleted": (f"{data['lines_deleted'].sum():,}", "➖") 56 | } 57 | 58 | return repo_stats 59 | 60 | 61 | def get_contributor_stats(data, contributor): 62 | """ 63 | Gets some high level statistics on a given contributor 64 | 65 | Args: 66 | data (pd.DataFrame): A commit history 67 | contributor (str): The name of the target contributor 68 | Returns: 69 | (tuple): tuple containing: 70 | 71 | stats (dict): Dict containing contributor metrics 72 | quarterly_contrib (pd.DataFrame): Dataframe with n. contributions by quarter. 73 | """ 74 | if not contributor: 75 | return None, None 76 | 77 | activity = data[data['author'] == contributor] 78 | n_commits = len(activity) 79 | 80 | # Lines of code stats 81 | tot_lines = activity['lines_added'].sum() + activity['lines_deleted'].sum() 82 | pct_change = n_commits / len(data) 83 | tot_l_added = activity['lines_added'].sum() 84 | tot_l_deleted = activity['lines_deleted'].sum() 85 | pct_l_deleted = tot_l_deleted / tot_lines 86 | pct_l_added = tot_l_added / tot_lines 87 | avg_change = activity['total_lines'].mean() 88 | 89 | # Total changes by quarter 90 | quarterly_contrib = ( 91 | activity.groupby(pd.Grouper(key='committed_on', freq="1Q"))['hash'] 92 | .count() # noqa: E131 93 | .reset_index() 94 | ) 95 | return { 96 | "Commits": (f"{n_commits}", "📜"), 97 | "Total Lines": (f"{tot_lines:,}", "🖋️"), 98 | "Lines Added": (f"{tot_l_added:,}", "🖋️"), 99 | "% Lines Added": (f"{pct_l_added * 100:,.3}", "✅"), 100 | "Lines Deleted": (f"{tot_l_deleted:,}", "❌"), 101 | "% Lines Deleted": (f"{pct_l_deleted * 100:,.3}", "❌"), 102 | "% of Total Changes": (f"{pct_change * 100:,.3}", "♻️"), 103 | "Avg. Change (lines)": (f"{avg_change:,.2f}", "♻️"), 104 | "Contributor": (contributor, "👨‍💻") 105 | }, quarterly_contrib 106 | 107 | 108 | def filter_by_date(df, start, end): 109 | """Filter dataframe by date""" 110 | df = df[(df['committed_on'] >= str(start)) & (df['committed_on'] <= str(end))] 111 | return df 112 | 113 | 114 | def filter_by_contributor(df, x): 115 | """Filter dataframe by contributor""" 116 | if not x: 117 | return df 118 | else: 119 | df = df[df['author'] == x] 120 | return df 121 | 122 | 123 | def download_data(data): 124 | """ 125 | Download data in .csv format 126 | Args: 127 | data (`pd.DataFrame`): A pandas dataframe 128 | Returns: 129 | str: A href link to be fed into the dashboard ui. 130 | """ 131 | to_download = data.to_csv(index=False) 132 | b64 = base64.b64encode(to_download.encode()).decode() 133 | href = f'Download csv file' 134 | 135 | return href 136 | -------------------------------------------------------------------------------- /app/ui.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import pandas as pd 3 | import streamlit as st 4 | from pandas.tseries import offsets 5 | 6 | import utils 7 | 8 | 9 | def get_sidebar(data): 10 | st.sidebar.write(plot_cum_commits(data)) 11 | 12 | contributors = data["author"].unique().tolist() 13 | contributors.insert(0, None) # Manually add default 14 | 15 | # Filters 16 | contributor = st.sidebar.selectbox("Select Contributor", contributors, index=0) 17 | start = st.sidebar.date_input("Start Date", value=min(data["committed_on"])) 18 | end = st.sidebar.date_input("End Date", value=max(data["committed_on"])) 19 | 20 | # Data download button 21 | if st.sidebar.button("Download Data"): 22 | download_url = utils.download_data(data) 23 | st.sidebar.markdown(download_url, unsafe_allow_html=True) 24 | 25 | return start, end, contributor 26 | 27 | 28 | def get_repo_source(): 29 | """Gets repo path (remote or uploaded file) and displays relevant UI""" 30 | input_type = st.sidebar.radio( 31 | "Input type input (.json/repo link)", ("Local .json", "Repo Link") 32 | ) 33 | 34 | if input_type == "Local .json": 35 | repo_source = st.sidebar.file_uploader("Add your file here") 36 | elif input_type == "Repo Link": 37 | repo_source = st.sidebar.text_input("Add repo URL here", key="repo_url") 38 | 39 | return repo_source 40 | 41 | 42 | def plot_top_contributors(data): 43 | """Plots top n contributors in a vertical histogram""" 44 | bars = ( 45 | alt.Chart(data[:30]) 46 | .mark_bar() 47 | .encode( 48 | x=alt.X("n_commits", title="N. Commits"), 49 | y=alt.Y("author", sort="-x", title=""), 50 | tooltip=[ 51 | alt.Tooltip("author", title="Author"), 52 | alt.Tooltip("n_commits", title="N. Commits", format=",.0f"), 53 | ], 54 | ) 55 | .properties(width=850, height=430, title="Top 30 Contributors") 56 | ) 57 | 58 | text = bars.mark_text(align="left", baseline="middle", dx=3).encode( 59 | text="n_commits:Q" 60 | ) 61 | return bars + text 62 | 63 | 64 | def plot_daily_contributions(data): 65 | """Plots daily commits in a bar chart""" 66 | agg = ( 67 | data.groupby(pd.Grouper(key="committed_on", freq="1D"))["hash"] 68 | .count() 69 | .reset_index() 70 | ) 71 | 72 | plot = ( 73 | alt.Chart(agg) 74 | .mark_bar() 75 | .encode( 76 | x=alt.X("committed_on", title="Date"), 77 | y=alt.Y("hash", title="Commits", axis=alt.Axis(grid=False)), 78 | tooltip=[ 79 | alt.Tooltip("committed_on", title="Date"), 80 | alt.Tooltip("hash", title="Commits"), 81 | ], 82 | ) 83 | .properties(height=170, width=850, title="Daily Changes") 84 | ) 85 | return plot 86 | 87 | 88 | def plot_inserts_deletions(data): 89 | """Plots daily lines added/deleted in a bar chart""" 90 | agg = data.copy() 91 | agg["lines_deleted"] = -agg["lines_deleted"] 92 | agg = ( 93 | agg.groupby(pd.Grouper(key="committed_on", freq="1D"))[ 94 | ["lines_added", "lines_deleted"] 95 | ] 96 | .sum() 97 | .reset_index() 98 | .melt(id_vars="committed_on") 99 | ) 100 | 101 | plot = ( 102 | alt.Chart(agg) 103 | .mark_bar() 104 | .encode( 105 | x=alt.X("committed_on", title="Date"), 106 | y=alt.Y("value", title=""), 107 | color=alt.condition( 108 | alt.datum.value > 0, alt.value("green"), alt.value("red") 109 | ), 110 | tooltip=[ 111 | alt.Tooltip("committed_on", title="Date"), 112 | alt.Tooltip("value", title="Lines Changed", format=",.0f"), 113 | alt.Tooltip("variable"), 114 | ], 115 | ) 116 | ).properties(height=170, width=850, title="Daily Lines Added/Removed") 117 | 118 | return plot 119 | 120 | 121 | def plot_cum_commits(data): 122 | """Plots cumulative commits for sidebar plot""" 123 | added_commits_cumsum = ( 124 | data.groupby(pd.Grouper(key="committed_on", freq="1D"))["hash"] 125 | .count() 126 | .reset_index() 127 | .groupby(pd.Grouper(key="committed_on", freq="1M")) 128 | .sum() 129 | .cumsum() 130 | .reset_index() 131 | ) 132 | 133 | plot = ( 134 | alt.Chart(added_commits_cumsum) 135 | .mark_area() 136 | .encode( 137 | x=alt.X("committed_on:T", title="", axis=alt.Axis(labels=False)), 138 | y=alt.Y("hash:Q", title="", axis=alt.Axis(labels=False)), 139 | tooltip=[ 140 | alt.Tooltip("committed_on", title="Date"), 141 | alt.Tooltip("hash", title="Commits", format=",.0f"), 142 | ], 143 | ) 144 | .properties(width=300, height=100) 145 | .configure_axis(grid=False) 146 | ) 147 | 148 | return plot 149 | 150 | 151 | def plot_commit_waffle(data): 152 | """Plots waffle-charte (github-like) with commits by dow/week""" 153 | daily_commits = ( 154 | data.groupby(pd.Grouper(key="committed_on", freq="1D"))["hash"] 155 | .count() 156 | .reset_index() 157 | ) 158 | daily_commits = daily_commits.set_index("committed_on") 159 | 160 | min_date = min(daily_commits.index) - offsets.YearBegin() 161 | max_date = max(daily_commits.index) + offsets.YearEnd() 162 | # Reindex by date 163 | idx = pd.date_range(min_date, max_date) 164 | daily_commits = daily_commits.reindex(idx, fill_value=0) 165 | daily_commits = daily_commits.rename_axis("committed_on").reset_index() 166 | # Add year and week to dataframe 167 | daily_commits["week"] = daily_commits["committed_on"].dt.isocalendar().week 168 | daily_commits["year"] = daily_commits["committed_on"].dt.year 169 | max_year = daily_commits["year"].max() 170 | 171 | # Year dropdown 172 | years = list(daily_commits["year"].unique()) 173 | year_dropdown = alt.binding_select(options=years) 174 | selection = alt.selection_single( 175 | fields=["year"], bind=year_dropdown, name="Year", init={"year": max_year} 176 | ) 177 | 178 | plot = ( 179 | alt.Chart(daily_commits) 180 | .mark_rect() 181 | .encode( 182 | x=alt.X("week:O", title="Week"), 183 | y=alt.Y("day(committed_on):O", title=""), 184 | color=alt.Color( 185 | # FIXME: settings scales like this might lead to potential problems in 186 | # some cases. Ideally, we'd need to recompute scales each time the 187 | # upper bound each and every interaction. 188 | # (https://stackoverflow.com/questions/68329301/fix-scale-botttom-colour-on-0-in-altair) # noqa: E501 189 | "hash:Q", 190 | scale=alt.Scale(range=["transparent", "green"]), 191 | title="Commits", 192 | ), 193 | tooltip=[ 194 | alt.Tooltip("committed_on", title="Date"), 195 | alt.Tooltip("day(committed_on)", title="Day"), 196 | alt.Tooltip("hash", title="Commits"), 197 | ], 198 | ) 199 | .add_selection(selection) 200 | .transform_filter(selection) 201 | .properties(width=1000, height=200) 202 | ) 203 | 204 | return plot 205 | 206 | 207 | def plot_cumulative_lines_by_contributor(data, n=20): 208 | """Plots cumulative lines by contributor""" 209 | top_n = ( 210 | data.groupby("author")["hash"] 211 | .count() 212 | .sort_values(ascending=False)[:n] 213 | .index.tolist() 214 | ) 215 | 216 | df_top_n_month = ( 217 | data[data["author"].isin(top_n)] 218 | .groupby(["author", pd.Grouper(key="committed_on", freq="M")])["lines_added"] 219 | .sum() 220 | .reset_index() 221 | ) 222 | 223 | min_month = df_top_n_month["committed_on"].min() 224 | max_month = df_top_n_month["committed_on"].max() 225 | 226 | idx = pd.MultiIndex.from_product( 227 | [pd.date_range(min_month, max_month, freq="M"), df_top_n_month["author"].unique()] 228 | ) 229 | df_top_n_month = df_top_n_month.set_index(["committed_on", "author"]) 230 | df_top_n_month = df_top_n_month["lines_added"].reindex(idx, fill_value=0).to_frame() 231 | df_top_n_month = df_top_n_month.rename_axis(["committed_on", "author"]).reset_index() 232 | # Cumulative df 233 | df_top_n_month = ( 234 | df_top_n_month.groupby(["author", "committed_on"])["lines_added"] 235 | .sum() 236 | .groupby(level=0) 237 | .cumsum() 238 | .reset_index() 239 | ) 240 | 241 | selection = alt.selection_single(on="mouseover") 242 | 243 | plot = ( 244 | alt.Chart(df_top_n_month) 245 | .mark_area() 246 | .encode( 247 | x=alt.X("committed_on", title=""), 248 | y=alt.Y("lines_added", title="Lines Added"), 249 | color=alt.condition(selection, "author", alt.value("lightgray"), legend=None), 250 | tooltip=[ 251 | alt.Tooltip("committed_on"), 252 | alt.Tooltip("lines_added", format=",.0f"), 253 | alt.Tooltip("author"), 254 | ], 255 | ) 256 | .properties( 257 | width=800, height=350, title=f"Cumulative Lines Added by top-{n} Contributors" 258 | ) 259 | .add_selection(selection) 260 | ) 261 | 262 | return plot 263 | 264 | 265 | def plot_quarterly_commits(data): 266 | """Plots n_commits aggregated by quarter""" 267 | plot = ( 268 | alt.Chart(data) 269 | .mark_area() 270 | .encode( 271 | x=alt.X("committed_on", title=""), 272 | y=alt.Y("hash", title="Commits"), 273 | tooltip=[ 274 | alt.Tooltip("committed_on", title="Date"), 275 | alt.Tooltip("hash", format=",.0f", title="Commits"), 276 | ], 277 | ) 278 | .properties(width=500, height=130) 279 | ) 280 | 281 | return plot 282 | --------------------------------------------------------------------------------