├── tests ├── __init__.py ├── fakegit.py ├── csv__test.py ├── test__csv.py ├── git_parsing__test.py ├── test__git_parsing.py ├── test__settings.py ├── expand_commits__test.py └── test__expand_commits.py ├── githistorydata ├── __init__.py ├── filechanges.py ├── logline.py ├── csv.py ├── codeline.py ├── rawgit.py ├── TODO.txt ├── commitdetail.py ├── dataline.py ├── main.py ├── expand_commits.py ├── settings.py └── git.py ├── .gitignore ├── settings.json ├── Makefile ├── git-history-data ├── reports ├── ghd-file-by-num-authors ├── ghd-author-by-num-commits ├── ghd-commit-by-num-files ├── ghd-files-by-num-commits-for-author └── ghd-author-by-mean-num-files-per-commit ├── legal ├── ZackYovel-DCO1.1.txt └── andybalaam-DCO1.1.txt ├── LICENSE └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /githistorydata/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ropeproject 2 | *.pyc 3 | -------------------------------------------------------------------------------- /settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "git_path": "/usr/bin/git" 3 | } -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: test 3 | 4 | test: 5 | nosetests 6 | -------------------------------------------------------------------------------- /git-history-data: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Prerequisites: 4 | # 5 | # sudo apt-get install git python3 python3-dateutil 6 | 7 | import sys 8 | 9 | from githistorydata.main import main 10 | 11 | if __name__ == "__main__": 12 | main( sys.argv, sys.stdout, sys.stderr ) 13 | -------------------------------------------------------------------------------- /reports/ghd-file-by-num-authors: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # File and author 4 | # Unique 5 | # Group by file 6 | # Sort numerically 7 | 8 | tail -n +1 \ 9 | | awk -F', ' '{print $6, ", ", $3}' \ 10 | | sort | uniq \ 11 | | awk -F', ' '{arr[$1]++}END{for (a in arr) print arr[a], a}' \ 12 | | sort -n -r 13 | 14 | -------------------------------------------------------------------------------- /reports/ghd-author-by-num-commits: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Author and commit id 4 | # Unique 5 | # Group by author 6 | # Sort numerically 7 | 8 | tail -n +1 \ 9 | | awk -F', ' '{print $3, ", ", $1}' \ 10 | | sort | uniq \ 11 | | awk -F', ' '{arr[$1]++}END{for (a in arr) print arr[a], a}' \ 12 | | sort -n -r 13 | 14 | -------------------------------------------------------------------------------- /reports/ghd-commit-by-num-files: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Commit+author and file 4 | # Unique 5 | # Group by commit+author 6 | # Sort numerically 7 | 8 | tail -n +1 \ 9 | | awk -F', ' '{print $1, $3, ", ", $6}' \ 10 | | sort | uniq \ 11 | | awk -F', ' '{arr[$1]++}END{for (a in arr) print arr[a], a}' \ 12 | | sort -n -r 13 | 14 | -------------------------------------------------------------------------------- /tests/fakegit.py: -------------------------------------------------------------------------------- 1 | 2 | class FakeGit( object ): 3 | 4 | def __init__( self, ret_value ): 5 | self.ret_value = ret_value 6 | 7 | def git_log_pretty_tformat_H_ai_an( self ): 8 | return self.ret_value.split( "\n" ) 9 | 10 | def git_show_numstat( self, commit_hash ): 11 | return self.ret_value.split( "\n" ) 12 | -------------------------------------------------------------------------------- /reports/ghd-files-by-num-commits-for-author: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | AUTHOR="$1" 4 | 5 | # filter by author (TODO: do properly) 6 | # file 7 | # author-of-commit, num-files-in-commit 8 | # author, num-files 9 | # Group by author, mean-num-files 10 | # Sort numerically 11 | 12 | tail -n +1 \ 13 | | grep "${AUTHOR}" \ 14 | | awk -F', ' '{print $6}' \ 15 | | awk -F', ' '{arr[$1]++}END{for (a in arr) print arr[a], a}' \ 16 | | sort -n -r 17 | 18 | -------------------------------------------------------------------------------- /githistorydata/filechanges.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class FileChanges( object ): 4 | def __init__( self, added, removed, name ): 5 | self.added = added 6 | self.removed = removed 7 | self.name = name 8 | 9 | def __str__( self ): 10 | return "+%d -%d %s" % ( self.added, self.removed, self.name ) 11 | 12 | def __eq__( self, other ): 13 | return ( 14 | self.added == other.added 15 | and self.removed == other.removed 16 | and self.name == other.name 17 | ) 18 | -------------------------------------------------------------------------------- /githistorydata/logline.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class LogLine( object ): 4 | def __init__( self, commit_hash, date, author ): 5 | self.commit_hash = commit_hash 6 | self.date = date 7 | self.author = author 8 | 9 | def __str__( self ): 10 | return "%s %s %s" % ( self.commit_hash, self.date, self.author ) 11 | 12 | def __eq__( self, other ): 13 | return ( 14 | self.commit_hash == other.commit_hash 15 | and self.date == other.date 16 | and self.author == other.author 17 | ) 18 | -------------------------------------------------------------------------------- /reports/ghd-author-by-mean-num-files-per-commit: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # commit-id, author, file 4 | # unique 5 | # author-of-commit, num-files-in-commit 6 | # author, num-files 7 | # Group by author, mean-num-files 8 | # Sort numerically 9 | 10 | tail -n +1 \ 11 | | awk -F', ' '{print $1, ", ", $3, ", ", $6}' \ 12 | | sort | uniq \ 13 | | awk -F', ' '{arr[$1]++; auth[$1]=$2}END{for (a in arr) print auth[a], ",", arr[a]}' \ 14 | | awk -F', ' '{sum[$1]+=$2;count[$1]++}END{for (a in count) print sum[a]/count[a], a}' \ 15 | | sort -n -r 16 | 17 | -------------------------------------------------------------------------------- /githistorydata/csv.py: -------------------------------------------------------------------------------- 1 | class Csv( object ): 2 | def __init__( self, out, columns ): 3 | self.out = out 4 | self.columns = columns 5 | self._write( columns ) 6 | 7 | def line( self, items ): 8 | assert len( items ) == len( self.columns ) 9 | self._write( items ) 10 | 11 | def _write( self, items ): 12 | self.out.write( 13 | u", ".join( self._fmt( c ) for c in items ) ) 14 | 15 | self.out.write( u"\n" ) 16 | 17 | def _fmt( self, item ): 18 | try: 19 | float( item ) 20 | return str( item ) 21 | except: 22 | return '"%s"' % item 23 | -------------------------------------------------------------------------------- /githistorydata/codeline.py: -------------------------------------------------------------------------------- 1 | class CodeLine( object ): 2 | 3 | def __init__( self, commit_hash, date, author, weight ): 4 | self.commit_hash = commit_hash 5 | self.date = date 6 | self.author = author 7 | self.weight = weight 8 | 9 | def __eq__( self, other ): 10 | return( 11 | self.commit_hash == other.commit_hash 12 | and self.date == other.date 13 | and self.author == other.author 14 | and self.weight == other.weight 15 | ) 16 | 17 | def __str__( self ): 18 | return "%s %s %s %f" % ( 19 | self.commit_hash, self.date, self.author, self.weight 20 | ) 21 | -------------------------------------------------------------------------------- /githistorydata/rawgit.py: -------------------------------------------------------------------------------- 1 | 2 | import subprocess 3 | 4 | 5 | class RawGit( object ): 6 | def __init__( self, git_path="/usr/bin/git" ): 7 | self._git_path = git_path 8 | 9 | def git_log_pretty_tformat_H_ai_an( self ): 10 | return self._run_git( 11 | ["log", "--no-merges", "--pretty=tformat:%H %ai %an"] ) 12 | 13 | def git_show_numstat( self, commit_hash ): 14 | return self._run_git( 15 | ["show", "--pretty=oneline", "--numstat", commit_hash] ) 16 | 17 | def _run_git( self, args ): 18 | return subprocess.check_output( 19 | [self._git_path] + args 20 | ).decode( encoding="UTF-8", errors="replace" ).split( "\n" ) 21 | 22 | -------------------------------------------------------------------------------- /githistorydata/TODO.txt: -------------------------------------------------------------------------------- 1 | (Time flows upwards) 2 | 3 | - Release 1.0 4 | 5 | - Create a standard report (HTML or Jupyter) so I can run 2 command 6 | to learn a lot about my codebase. Potential inspiration: 7 | https://accu.org/index.php/journals/1835 8 | 9 | - Release 0.3 10 | 11 | - Create charts using something like Plotly https://plot.ly/python/ 12 | - More reports - establish a "standard set" that covers 13 | most things you want to know at a high level 14 | 15 | - Release 0.2 16 | 17 | - Clean environment when running Git using: 18 | GIT_CONFIG_NOSYSTEM=true HOME=/dev/null XDG_CONFIG_HOME=/dev/null 19 | + Avoid hard-coding git executable location 20 | + Fix bug (#1) where git show command assumed --pretty=oneline 21 | 22 | + Initial release 23 | 24 | -------------------------------------------------------------------------------- /githistorydata/commitdetail.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class CommitDetail( object ): 4 | def __init__( self, commit_hash, date, author, file_changes ): 5 | self.commit_hash = commit_hash 6 | self.date = date 7 | self.author = author 8 | self.file_changes = file_changes 9 | 10 | def __str__( self ): 11 | ret = "%s %s %s" % ( 12 | self.commit_hash, 13 | self.date, 14 | self.author, 15 | ) 16 | ret += "\n" 17 | ret += "\n".join( ( " " + str(ch) ) for ch in self.file_changes ) 18 | return ret 19 | 20 | def __eq__( self, other ): 21 | return ( 22 | self.commit_hash == other.commit_hash 23 | and self.file_changes == other.file_changes 24 | ) 25 | -------------------------------------------------------------------------------- /githistorydata/dataline.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class DataLine( object ): 4 | def __init__( self, commit_hash, date, author, added, removed, filename ): 5 | self.commit_hash = commit_hash 6 | self.date = date 7 | self.author = author 8 | self.added = added 9 | self.removed = removed 10 | self.filename = filename 11 | 12 | def __str__( self ): 13 | return " ".join( 14 | ( 15 | self.commit_hash, 16 | self.date, 17 | self.author, 18 | str( self.added ), 19 | str( self.removed ), 20 | self.filename 21 | ) 22 | ) 23 | 24 | def __eq__( self, other ): 25 | return ( 26 | self.commit_hash == other.commit_hash 27 | and self.date == other.date 28 | and self.author == other.author 29 | and self.added == other.added 30 | and self.removed == other.removed 31 | and self.filename == other.filename 32 | ) 33 | -------------------------------------------------------------------------------- /githistorydata/main.py: -------------------------------------------------------------------------------- 1 | 2 | import subprocess 3 | import sys 4 | 5 | from githistorydata.csv import Csv 6 | from githistorydata.expand_commits import expand_authors, expand_lines 7 | from githistorydata.git import Git 8 | from githistorydata.rawgit import RawGit 9 | from githistorydata.settings import Settings 10 | 11 | 12 | def main( argv, out, err ): 13 | settings = Settings() 14 | try: 15 | git = Git( RawGit(settings["git_path"]) ) 16 | csv = Csv( 17 | out, 18 | ( "Commit", "Date", "Author", "Added", "Removed", "File" ) 19 | ) 20 | for cod in expand_lines( git, expand_authors( git.log() ) ): 21 | csv.line( ( 22 | cod.commit_hash, 23 | cod.date.date().isoformat(), 24 | cod.author, 25 | cod.added, 26 | cod.removed, 27 | cod.filename, 28 | ) ) 29 | except subprocess.CalledProcessError as e: 30 | print(str( e )) 31 | sys.exit( 1 ) 32 | finally: 33 | out.flush() 34 | -------------------------------------------------------------------------------- /githistorydata/expand_commits.py: -------------------------------------------------------------------------------- 1 | 2 | from githistorydata.codeline import CodeLine 3 | from githistorydata.dataline import DataLine 4 | 5 | 6 | def expand_authors( log_lines ): 7 | for log_line in log_lines: 8 | spl = log_line.author.split( "," ) 9 | weight = 1.0 / len( spl ) 10 | for auth in spl: 11 | yield CodeLine( 12 | log_line.commit_hash, 13 | log_line.date, 14 | auth.strip(), 15 | weight 16 | ) 17 | 18 | 19 | def expand_detail( commit_detail, weight ): 20 | return ( 21 | DataLine( 22 | commit_detail.commit_hash, 23 | commit_detail.date, 24 | commit_detail.author, 25 | int( fc.added * weight ), 26 | int( fc.removed * weight ), 27 | fc.name 28 | ) 29 | for fc in commit_detail.file_changes 30 | ) 31 | 32 | 33 | def expand_lines( git, code_lines ): 34 | for ln in code_lines: 35 | commit_detail = git.show( ln.commit_hash, ln.date, ln.author ) 36 | for data_line in expand_detail( commit_detail, ln.weight ): 37 | yield data_line 38 | -------------------------------------------------------------------------------- /tests/csv__test.py: -------------------------------------------------------------------------------- 1 | 2 | from io import StringIO 3 | from githistorydata.csv import Csv 4 | 5 | from nose.tools import assert_equal 6 | 7 | 8 | def Headings_are_printed_quoted__test(): 9 | out = StringIO() 10 | csv = Csv( out, ( "a", "B c" ) ) 11 | csv # Silence lint 12 | assert_equal( 13 | '''"a", "B c" 14 | ''', 15 | out.getvalue() 16 | ) 17 | 18 | 19 | def String_lines_are_printed_in_quotes__test(): 20 | out = StringIO() 21 | csv = Csv( out, ( "a", "b" ) ) 22 | csv.line( ( "x", "y" ) ) 23 | assert_equal( 24 | '''"a", "b" 25 | "x", "y" 26 | ''', 27 | out.getvalue() 28 | ) 29 | 30 | 31 | def Number_lines_are_printed_without_quotes__test(): 32 | out = StringIO() 33 | csv = Csv( out, ( "a", "b", "c" ) ) 34 | csv.line( ( 2, "x", 3.5 ) ) 35 | assert_equal( 36 | '''"a", "b", "c" 37 | 2, "x", 3.5 38 | ''', 39 | out.getvalue() 40 | ) 41 | 42 | 43 | def Multiple_lines_are_printed__test(): 44 | out = StringIO() 45 | csv = Csv( out, ( "a", "b", "c" ) ) 46 | csv.line( ( 2, "x", 3.5 ) ) 47 | csv.line( ( 4, "y", 5.5 ) ) 48 | assert_equal( 49 | '''"a", "b", "c" 50 | 2, "x", 3.5 51 | 4, "y", 5.5 52 | ''', 53 | out.getvalue() 54 | ) 55 | -------------------------------------------------------------------------------- /legal/ZackYovel-DCO1.1.txt: -------------------------------------------------------------------------------- 1 | Developer's Certificate of Origin 1.1 2 | 3 | By making a contribution to this project, I certify that: 4 | 5 | (a) The contribution was created in whole or in part by me and I 6 | have the right to submit it under the open source license 7 | indicated in the file; or 8 | 9 | (b) The contribution is based upon previous work that, to the best 10 | of my knowledge, is covered under an appropriate open source 11 | license and I have the right under that license to submit that 12 | work with modifications, whether created in whole or in part 13 | by me, under the same open source license (unless I am 14 | permitted to submit under a different license), as indicated 15 | in the file; or 16 | 17 | (c) The contribution was provided directly to me by some other 18 | person who certified (a), (b) or (c) and I have not modified 19 | it. 20 | 21 | (d) I understand and agree that this project and the contribution 22 | are public and that a record of the contribution (including all 23 | personal information I submit with it, including my sign-off) is 24 | maintained indefinitely and may be redistributed consistent with 25 | this project or the open source license(s) involved. 26 | -------------------------------------------------------------------------------- /legal/andybalaam-DCO1.1.txt: -------------------------------------------------------------------------------- 1 | Developer's Certificate of Origin 1.1 2 | 3 | By making a contribution to this project, I certify that: 4 | 5 | (a) The contribution was created in whole or in part by me and I 6 | have the right to submit it under the open source license 7 | indicated in the file; or 8 | 9 | (b) The contribution is based upon previous work that, to the best 10 | of my knowledge, is covered under an appropriate open source 11 | license and I have the right under that license to submit that 12 | work with modifications, whether created in whole or in part 13 | by me, under the same open source license (unless I am 14 | permitted to submit under a different license), as indicated 15 | in the file; or 16 | 17 | (c) The contribution was provided directly to me by some other 18 | person who certified (a), (b) or (c) and I have not modified 19 | it. 20 | 21 | (d) I understand and agree that this project and the contribution 22 | are public and that a record of the contribution (including all 23 | personal information I submit with it, including my sign-off) is 24 | maintained indefinitely and may be redistributed consistent with 25 | this project or the open source license(s) involved. 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | (c) Copyright IBM Corporation 2015 and the git-history-data contributors. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | -------------------------------------------------------------------------------- /tests/test__csv.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from io import StringIO 3 | from githistorydata.csv import Csv 4 | 5 | 6 | class TestCsv(unittest.TestCase): 7 | def test__Headings_are_printed_quoted(self): 8 | out = StringIO() 9 | csv = Csv( out, ( "a", "B c" ) ) 10 | csv # Silence lint 11 | self.assertEqual( 12 | '''"a", "B c" 13 | ''', 14 | out.getvalue() 15 | ) 16 | 17 | 18 | def test__String_lines_are_printed_in_quotes(self): 19 | out = StringIO() 20 | csv = Csv( out, ( "a", "b" ) ) 21 | csv.line( ( "x", "y" ) ) 22 | self.assertEqual( 23 | '''"a", "b" 24 | "x", "y" 25 | ''', 26 | out.getvalue() 27 | ) 28 | 29 | 30 | def test__Number_lines_are_printed_without_quotes(self): 31 | out = StringIO() 32 | csv = Csv( out, ( "a", "b", "c" ) ) 33 | csv.line( ( 2, "x", 3.5 ) ) 34 | self.assertEqual( 35 | '''"a", "b", "c" 36 | 2, "x", 3.5 37 | ''', 38 | out.getvalue() 39 | ) 40 | 41 | 42 | def test__Multiple_lines_are_printed(self): 43 | out = StringIO() 44 | csv = Csv( out, ( "a", "b", "c" ) ) 45 | csv.line( ( 2, "x", 3.5 ) ) 46 | csv.line( ( 4, "y", 5.5 ) ) 47 | self.assertEqual( 48 | '''"a", "b", "c" 49 | 2, "x", 3.5 50 | 4, "y", 5.5 51 | ''', 52 | out.getvalue() 53 | ) 54 | -------------------------------------------------------------------------------- /githistorydata/settings.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class Settings: 5 | """ Asettings object that is responsible of managing a single settings file """ 6 | 7 | def __init__(self, settings_file_path="settings.json"): 8 | """ Initialize a Settings object """ 9 | self._settings_file_path = settings_file_path 10 | self._settings_dict = {} 11 | self.load() 12 | 13 | def load(self): 14 | """ Load settings from file """ 15 | try: 16 | with open(self._settings_file_path) as f: 17 | self._settings_dict = dict(self._settings_dict, **json.load(f)) 18 | except: 19 | self._settings_dict = { "git_path": "/usr/bin/git" } 20 | 21 | def add(self, settings): 22 | """ Add one or more extra settings to this object without persisting them. 23 | settings must be a mapping object""" 24 | self._settings_dict = dict(self._settings_dict, **settings) 25 | 26 | def save(self): 27 | """ Save this object to file as it is, possibly overriting un-synced changed in the file """ 28 | with open(self._settings_file_path, "w") as f: 29 | json.dump(self._settings_dict, f) 30 | 31 | def persist(self, settings=None): 32 | """ Potentially add settings to this object, then persist all the settings in it """ 33 | # if new settings received, add them to this object 34 | if settings: 35 | self.add(settings) 36 | 37 | # synchronize settings with file 38 | self.load() 39 | 40 | # persist all settings to file 41 | self.save() 42 | 43 | def __getitem__(self, key): 44 | """ Access Settings using square brackets """ 45 | return self._settings_dict[key] 46 | 47 | def __setitem__(self, key, value): 48 | """ Set items using square brackets without persisting the change """ 49 | self._settings_dict[key] = value 50 | -------------------------------------------------------------------------------- /githistorydata/git.py: -------------------------------------------------------------------------------- 1 | 2 | import re 3 | import dateutil.parser 4 | 5 | from githistorydata.commitdetail import CommitDetail 6 | from githistorydata.filechanges import FileChanges 7 | from githistorydata.logline import LogLine 8 | 9 | 10 | class Git( object ): 11 | 12 | def __init__( self, raw_git ): 13 | self.raw_git = raw_git 14 | 15 | def log( self ): 16 | """ 17 | Return a list of LogLine for the repo in the current dir. 18 | """ 19 | return list( 20 | self._logline( ln.strip() ) 21 | for ln in self.raw_git.git_log_pretty_tformat_H_ai_an() 22 | if ln.strip() != "" 23 | ) 24 | 25 | def show( self, commit_hash, date, author ): 26 | show_lines = self.raw_git.git_show_numstat( commit_hash ) 27 | return CommitDetail( 28 | commit_hash, 29 | date, 30 | author, 31 | list( self._showline( l ) for l in show_lines[1:] if l != "") 32 | ) 33 | 34 | _logline_re = re.compile( 35 | r"([0-9a-f]{40}) (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} [-+]\d{4}) (.+)" 36 | ) 37 | 38 | def _logline( self, ln ): 39 | m = Git._logline_re.match( ln ) 40 | if not m: 41 | raise Exception( 42 | "Line from git log '%s' did not match expected format" 43 | % ln 44 | ) 45 | return LogLine( 46 | m.group( 1 ), 47 | dateutil.parser.parse( m.group( 2 ) ), 48 | m.group( 3 ) 49 | ) 50 | 51 | showline_re = re.compile( 52 | r"(-|\d+)\s+(-|\d+)\s+(.*)" 53 | ) 54 | 55 | @staticmethod 56 | def lines_changed( num ): 57 | if num == "-": 58 | return 0 59 | else: 60 | return int( num ) 61 | 62 | def _showline( self, ln ): 63 | m = Git.showline_re.match( ln ) 64 | if not m: 65 | raise Exception( 66 | "Line from git show '%s' did not match expected format" 67 | % ln 68 | ) 69 | return FileChanges( 70 | Git.lines_changed( m.group( 1 ) ), 71 | Git.lines_changed( m.group( 2 ) ), 72 | m.group( 3 ) 73 | ) 74 | -------------------------------------------------------------------------------- /tests/git_parsing__test.py: -------------------------------------------------------------------------------- 1 | 2 | from datetime import datetime 3 | import dateutil.parser 4 | 5 | from githistorydata.commitdetail import CommitDetail 6 | from githistorydata.filechanges import FileChanges 7 | from githistorydata.git import Git 8 | from githistorydata.logline import LogLine 9 | 10 | from tests.fakegit import FakeGit 11 | 12 | from nose.tools import assert_equal 13 | 14 | 15 | def Log_lines_are_parsed__test(): 16 | git = Git( FakeGit( """ 17 | 64c5da790fb7edef4f99053497075839d11bb6d8 2015-07-09 12:00:00 +0100 Alban Tsui 18 | 2993dbf67c7e0659eba13987c98c5a03aade7099 2015-10-31 12:15:27 +0100 Lennart Tange 19 | c504bd352d5d9dd0ccec3cd601ac02b14f4982a8 2015-07-09 12:00:00 -0200 Alban Tsui 20 | """ ) ) 21 | 22 | off1 = dateutil.tz.tzoffset( "tz", 60*60 ) # +0100 23 | off2 = dateutil.tz.tzoffset( "tz", -2*60*60 ) # -0200 24 | 25 | assert_equal( 26 | [ 27 | LogLine( 28 | "64c5da790fb7edef4f99053497075839d11bb6d8", 29 | datetime( 2015, 7, 9, 12, 0, 0, 0, off1 ), 30 | "Alban Tsui" 31 | ), 32 | LogLine( 33 | "2993dbf67c7e0659eba13987c98c5a03aade7099", 34 | datetime( 2015, 10, 31, 12, 15, 27, 0, off1 ), 35 | "Lennart Tange" 36 | ), 37 | LogLine( 38 | "c504bd352d5d9dd0ccec3cd601ac02b14f4982a8", 39 | datetime( 2015, 7, 9, 12, 0, 0, 0, off2 ), 40 | "Alban Tsui" 41 | ) 42 | ], 43 | git.log() 44 | ) 45 | 46 | 47 | def FileChanges_to_string__test(): 48 | assert_equal( "+3 -2 foo.txt", str( FileChanges( 3, 2, "foo.txt" ) ) ) 49 | 50 | 51 | def CommitDetail_to_string__test(): 52 | assert_equal( 53 | """myhash dt auth 54 | +1 -0 x.cpp 55 | +3 -2 y.cpp""", 56 | str( CommitDetail( 57 | "myhash", 58 | "dt", 59 | "auth", 60 | [ 61 | FileChanges( 1, 0, "x.cpp" ), 62 | FileChanges( 3, 2, "y.cpp" ), 63 | ] 64 | ) ) 65 | ) 66 | 67 | 68 | def Numstat_lines_are_parsed__test(): 69 | git = Git( FakeGit( """2993dbf Lennart Tange "More generic dnd helper." 70 | 71 0 scripts/drag_and_drop_helper.js 71 | 0 66 scripts/dragdrop_pin_to_assemble.js 72 | 23 16 src/step_definitions/StepDef.java 73 | """ ) ) 74 | 75 | assert_equal( 76 | str( CommitDetail( 77 | "2993bdfAAAAAAAAAAAA", 78 | "dt", 79 | "auth", 80 | [ 81 | FileChanges( 71, 0, "scripts/drag_and_drop_helper.js" ), 82 | FileChanges( 0, 66, "scripts/dragdrop_pin_to_assemble.js" ), 83 | FileChanges( 23, 16, "src/step_definitions/StepDef.java" ), 84 | ] 85 | ) ), 86 | str( git.show( "2993bdfAAAAAAAAAAAA", "dt", "auth" ) ) 87 | ) 88 | -------------------------------------------------------------------------------- /tests/test__git_parsing.py: -------------------------------------------------------------------------------- 1 | 2 | from datetime import datetime 3 | import dateutil.parser 4 | 5 | from githistorydata.commitdetail import CommitDetail 6 | from githistorydata.filechanges import FileChanges 7 | from githistorydata.git import Git 8 | from githistorydata.logline import LogLine 9 | 10 | from tests.fakegit import FakeGit 11 | 12 | import unittest 13 | 14 | 15 | class TestGitParsing(unittest.TestCase): 16 | def test__Log_lines_are_parsed(self): 17 | git = Git( FakeGit( """ 18 | 64c5da790fb7edef4f99053497075839d11bb6d8 2015-07-09 12:00:00 +0100 Alban Tsui 19 | 2993dbf67c7e0659eba13987c98c5a03aade7099 2015-10-31 12:15:27 +0100 Lennart Tange 20 | c504bd352d5d9dd0ccec3cd601ac02b14f4982a8 2015-07-09 12:00:00 -0200 Alban Tsui 21 | """ ) ) 22 | 23 | off1 = dateutil.tz.tzoffset( "tz", 60*60 ) # +0100 24 | off2 = dateutil.tz.tzoffset( "tz", -2*60*60 ) # -0200 25 | 26 | self.assertEqual( 27 | [ 28 | LogLine( 29 | "64c5da790fb7edef4f99053497075839d11bb6d8", 30 | datetime( 2015, 7, 9, 12, 0, 0, 0, off1 ), 31 | "Alban Tsui" 32 | ), 33 | LogLine( 34 | "2993dbf67c7e0659eba13987c98c5a03aade7099", 35 | datetime( 2015, 10, 31, 12, 15, 27, 0, off1 ), 36 | "Lennart Tange" 37 | ), 38 | LogLine( 39 | "c504bd352d5d9dd0ccec3cd601ac02b14f4982a8", 40 | datetime( 2015, 7, 9, 12, 0, 0, 0, off2 ), 41 | "Alban Tsui" 42 | ) 43 | ], 44 | git.log() 45 | ) 46 | 47 | 48 | def test__FileChanges_to_string(self): 49 | self.assertEqual( "+3 -2 foo.txt", str( FileChanges( 3, 2, "foo.txt" ) ) ) 50 | 51 | 52 | def test__CommitDetail_to_string(self): 53 | self.assertEqual( 54 | """myhash dt auth 55 | +1 -0 x.cpp 56 | +3 -2 y.cpp""", 57 | str( CommitDetail( 58 | "myhash", 59 | "dt", 60 | "auth", 61 | [ 62 | FileChanges( 1, 0, "x.cpp" ), 63 | FileChanges( 3, 2, "y.cpp" ), 64 | ] 65 | ) ) 66 | ) 67 | 68 | 69 | def test__Numstat_lines_are_parsed(self): 70 | git = Git( FakeGit( """2993dbf Lennart Tange "More generic dnd helper." 71 | 71 0 scripts/drag_and_drop_helper.js 72 | 0 66 scripts/dragdrop_pin_to_assemble.js 73 | 23 16 src/step_definitions/StepDef.java 74 | """ ) ) 75 | 76 | self.assertEqual( 77 | str( CommitDetail( 78 | "2993bdfAAAAAAAAAAAA", 79 | "dt", 80 | "auth", 81 | [ 82 | FileChanges( 71, 0, "scripts/drag_and_drop_helper.js" ), 83 | FileChanges( 0, 66, "scripts/dragdrop_pin_to_assemble.js" ), 84 | FileChanges( 23, 16, "src/step_definitions/StepDef.java" ), 85 | ] 86 | ) ), 87 | str( git.show( "2993bdfAAAAAAAAAAAA", "dt", "auth" ) ) 88 | ) 89 | -------------------------------------------------------------------------------- /tests/test__settings.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import json 3 | import os 4 | from githistorydata.settings import Settings 5 | 6 | """ Tests file for unittest/nose2 """ 7 | 8 | TEST_SETTINGS_FILE = "test_settings.json" 9 | 10 | 11 | def get_test_settings(): 12 | return Settings(TEST_SETTINGS_FILE) 13 | 14 | 15 | def persist_test_settings(settings): 16 | with open(TEST_SETTINGS_FILE, "w") as f: 17 | json.dump(settings, f) 18 | 19 | 20 | def update_test_settings(settings): 21 | """ Update the file 'manually' (not through the Settings API) 22 | and create a new Settings object """ 23 | persist_test_settings(settings) 24 | return get_test_settings() 25 | 26 | 27 | class TestSettings(unittest.TestCase): 28 | 29 | def setUp(self): 30 | settings = {} 31 | persist_test_settings(settings) 32 | 33 | def tearDown(self): 34 | os.remove(TEST_SETTINGS_FILE) 35 | 36 | def test_ctor(self): 37 | # test default ctor 38 | settings = Settings() 39 | self.assertEqual(settings["git_path"], "/usr/bin/git") 40 | 41 | # test ctor with settings_file_path argument 42 | settings = {"key1": "value1"} 43 | settings = update_test_settings(settings) 44 | self.assertEqual(settings["key1"], "value1") 45 | 46 | def test_load(self): 47 | settings1 = get_test_settings() 48 | # assert 'key1' is not in the settings 49 | with self.assertRaises(KeyError): 50 | s = settings1["key1"] 51 | # write the 'key1' setting to the test file 52 | # (not using the Settings API) 53 | settings2 = {"key1": "value1"} 54 | persist_test_settings(settings2) 55 | # perform 'load' 56 | settings1.load() 57 | # assert 'key1' is in settings 58 | self.assertEqual(settings1["key1"], "value1") 59 | 60 | def test_add(self): 61 | settings = get_test_settings() 62 | # assert 'key1' is not in the settings 63 | with self.assertRaises(KeyError): 64 | s = settings["key1"] 65 | # add 'key1' 66 | settings.add({"key1": "value1"}) 67 | # assert 'key1' is in settings 68 | self.assertEqual(settings["key1"], "value1") 69 | # assert 'key1' not persisted 70 | settings = get_test_settings() 71 | with self.assertRaises(KeyError): 72 | s = settings["key1"] 73 | 74 | def test_save(self): 75 | settings = get_test_settings() 76 | # assert 'key1' is not in the settings 77 | with self.assertRaises(KeyError): 78 | s = settings["key1"] 79 | # add 'key1' 80 | settings.add({"key1": "value1"}) 81 | # save settings 82 | settings.save() 83 | # assert 'key1' persisted 84 | settings = get_test_settings() 85 | self.assertEqual(settings["key1"], "value1") 86 | 87 | def test_persist(self): 88 | settings = get_test_settings() 89 | # assert 'key1' is not in the settings 90 | with self.assertRaises(KeyError): 91 | s = settings["key1"] 92 | # persist 'key1' 93 | settings.persist({"key1": "value1"}) 94 | # assert 'key1' persisted 95 | settings = get_test_settings() 96 | self.assertEqual(settings["key1"], "value1") 97 | 98 | def test___getitmem__(self): 99 | # test the [] accessors 100 | settings = Settings() 101 | self.assertEqual(settings["git_path"], "/usr/bin/git") 102 | 103 | def test___setitem__(self): 104 | settings = get_test_settings() 105 | # assert 'key1' is not in the settings 106 | with self.assertRaises(KeyError): 107 | s = settings["key1"] 108 | # set 'key1' 109 | settings["key1"] = "value1" 110 | # assert 'key1' is in settings 111 | self.assertEqual(settings["key1"], "value1") 112 | # assert 'key1' not persisted 113 | settings = get_test_settings() 114 | with self.assertRaises(KeyError): 115 | s = settings["key1"] 116 | 117 | -------------------------------------------------------------------------------- /tests/expand_commits__test.py: -------------------------------------------------------------------------------- 1 | 2 | from githistorydata.codeline import CodeLine 3 | from githistorydata.commitdetail import CommitDetail 4 | from githistorydata.dataline import DataLine 5 | from githistorydata.expand_commits import expand_authors 6 | from githistorydata.expand_commits import expand_detail 7 | from githistorydata.expand_commits import expand_lines 8 | from githistorydata.filechanges import FileChanges 9 | from githistorydata.git import Git 10 | from githistorydata.logline import LogLine 11 | 12 | from tests.fakegit import FakeGit 13 | 14 | from nose.tools import assert_equal 15 | 16 | 17 | def Normal_commits_are_not_expanded__test(): 18 | assert_equal( 19 | [ 20 | CodeLine( "h1", "dt1", "a1", 1.0 ), 21 | CodeLine( "h2", "dt2", "a2", 1.0 ), 22 | ], 23 | list( expand_authors( 24 | [ 25 | LogLine( "h1", "dt1", "a1" ), 26 | LogLine( "h2", "dt2", "a2" ), 27 | ] 28 | ) ) 29 | ) 30 | 31 | 32 | def Shared_commits_are_expanded__test(): 33 | assert_equal( 34 | [ 35 | CodeLine( "h1", "dt1", "a1", 0.5 ), 36 | CodeLine( "h1", "dt1", "a2", 0.5 ), 37 | ], 38 | list( expand_authors( 39 | [ 40 | LogLine( "h1", "dt1", "a1,a2" ), 41 | ] 42 | ) ) 43 | ) 44 | 45 | 46 | def Multiple_commits__test(): 47 | assert_equal( 48 | [ 49 | CodeLine( "h1", "dt1", "a1", 1.0 ), 50 | CodeLine( "h2", "dt2", "a1", 1.0/3 ), 51 | CodeLine( "h2", "dt2", "a2", 1.0/3 ), 52 | CodeLine( "h2", "dt2", "a3", 1.0/3 ), 53 | CodeLine( "h4", "dt4", "a4", 1.0 ), 54 | ], 55 | list( expand_authors( 56 | [ 57 | LogLine( "h1", "dt1", "a1" ), 58 | LogLine( "h2", "dt2", "a1,a2,a3" ), 59 | LogLine( "h4", "dt4", "a4" ), 60 | ] 61 | ) ) 62 | ) 63 | 64 | 65 | def Expand_detail_for_single_author__test(): 66 | assert_equal( 67 | [ 68 | DataLine( 69 | "myhash1", 70 | "mydate1", 71 | "Me", 72 | 32, 73 | 1, 74 | "foo.txt", 75 | ), 76 | DataLine( 77 | "myhash1", 78 | "mydate1", 79 | "Me", 80 | 0, 81 | 10, 82 | "bar.pl", 83 | ) 84 | ], 85 | list( expand_detail( 86 | CommitDetail( 87 | "myhash1", 88 | "mydate1", 89 | "Me", 90 | [ 91 | FileChanges( 32, 1, "foo.txt" ), 92 | FileChanges( 0, 10, "bar.pl" ), 93 | ] 94 | ), 95 | 1.0 96 | ) ) 97 | ) 98 | 99 | 100 | def Expand_detail_with_weights_on_lines__test(): 101 | assert_equal( 102 | [ 103 | DataLine( "h", "d", "Me", 9, 0, "foo.txt" ), 104 | DataLine( "h", "d", "Me", 0, 3, "bar.pl" ), 105 | ], 106 | list( expand_detail( 107 | CommitDetail( 108 | "h", 109 | "d", 110 | "Me", 111 | [ 112 | FileChanges( 32, 1, "foo.txt" ), 113 | FileChanges( 0, 10, "bar.pl" ), 114 | ] 115 | ), 116 | 0.3 117 | ) ) 118 | ) 119 | 120 | 121 | def Expand_lines_makes_one_line_for_each_modified_file(): 122 | git = Git( FakeGit( """2976 Andy Balaam "desc." 123 | 10 2 f.txt 124 | 1 0 g.txt 125 | """, """2976 Peter Broadbent "desc2." 126 | 0 18 h.txt 127 | 4 14 i.txt 128 | 129 | """) ) 130 | assert_equal( 131 | [ 132 | DataLine( "h", "d", "a", 10, 2, "f.txt" ), 133 | DataLine( "h", "d", "a", 1, 0, "g.txt" ), 134 | DataLine( "j", "e", "b", 0, 9, "h.txt" ), 135 | DataLine( "j", "e", "b", 2, 7, "i.txt" ), 136 | ], 137 | expand_lines( 138 | git, 139 | CodeLine( "h", "d", "a", 1.0 ), 140 | CodeLine( "j", "j", "b", 0.5 ), 141 | ) 142 | ) 143 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # git-history-data 2 | 3 | git-history-data analyses a Git source code repository and dumps out data in a form that is easy to analyse. In its simplest form it prints out one line for every change to every file in history, and who made the change, like this: 4 | 5 | $ cd myrepo 6 | $ git-history-data 7 | "Commit", "Date", "Author", "Added", "Removed", "File" 8 | "e35a4f0", "2015-11-11", "Pete", 1, 1, "githistorydata/main.py" 9 | "5a6172d", "2015-07-15", "Andy", 1, 0, "githistorydata/codeline.py" 10 | "5a6172d", "2015-07-15", "Andy", 8, 2, "githistorydata/commitdetail.py" 11 | "5a6172d", "2015-07-15", "Andy", 32, 0, "githistorydata/dataline.py" 12 | "5a6172d", "2015-07-15", "Andy", 8, 13, "tests/git_parsing__test.py" 13 | "12f2881", "2015-07-13", "Pete", 4, 4, "githistorydata/git.py" 14 | "8fd2224", "2015-07-13", "Andy", 18, 0, "githistorydata/commitdetail.py" 15 | 16 | 17 | It is intended to be easy to analyse the results, and to be convenient to manipulate with standard Unix command-line tools. 18 | 19 | For example, if you want to see very large changes to a specific file (in the Git project itself): 20 | 21 | $ cd git 22 | $ git-history-data > git-git-history.csv 23 | $ grep "diff.c" git-git-history.csv | awk -F',' '{print $4, $3, $1 }' | sort -n -r | head -5 24 | 4047 "Junio C Hamano" "3686aa1caf907d22fe318c28efe93f0e7870ba50" 25 | 1805 "Martin Langhoff" "e660e3997fbad830e5723336d61883f3a50dbc92" 26 | 1803 "Junio C Hamano" "c66b6c067e49c5ec80f1254daef79aa1c7f5ffce" 27 | 1795 "Junio C Hamano" "e9b5b75ca87f45292de8ecde5d4d0512ac9542cd" 28 | 1795 "Junio C Hamano" "b8ed7f0f40743dae6111c8950ba55051933298ca" 29 | 30 | Or the files with terrifying numbers of authors: 31 | 32 | $ awk -F', ' '{print $6, $3}' git-git-history.csv | sort | uniq | awk '{arr[$1]++}END{for (a in arr) print arr[a], a}' | sort -n -r | head -5 33 | 235 "Makefile" 34 | 198 "Documentation/config.txt" 35 | 137 "cache.h" 36 | 130 "git-svn.perl" 37 | 115 "diff.c" 38 | 39 | You can find more examples here: Learning about the Git codebase using git-history-data 40 | 41 | ## Prerequisites 42 | 43 | git-history-data requires Git, Python 3 and the Python DateUtil library. On Debian, Ubuntu and similar you can install these with: 44 | 45 | sudo apt-get install git python3 python3-dateutil 46 | 47 | ## Install 48 | 49 | Get the code: 50 | 51 | cd 52 | git clone https://github.com/andybalaam/git-history-data.git 53 | 54 | Now add a line to your PATH by doing something like this: 55 | 56 | echo 'export PATH="$PATH:${HOME}/git-history-data"' >> ~/.bashrc 57 | 58 | (Log out and back in again, and use `echo $PATH` to check your PATH has been updated.) 59 | 60 | ## Use 61 | 62 | `cd` into the working tree of a git repository, and then run `git-history-data`, redirecting the result to a file. For example: 63 | 64 | cd git 65 | git-history-data > hist.csv 66 | 67 | Now hist.csv contains one line per file per commit in the entire history of the project, showing the commit ID, timestamp, author and the number of lines added and removed in that file in that commit. 68 | 69 | That's it. 70 | 71 | ## Contributing 72 | 73 | We welcome contributions! 74 | 75 | Before we can accept your pull request you must confirm that all your contributions are written by you. To do this, create a pull request that adds a file inside the `legal` directory, creating a file called `-DCO1.1.txt` that is a copy of one of the other files in that directory. Once that pull request has been accepted, we can accept other pull requests as normal. 76 | 77 | A good area to start contributing is to make a little script that analyses the data coming out of git-history-data and produces a little report (on the command line). We plan to make a library of scripts that report e.g. which developer creates the most commits, which files are most frequently modified. 78 | 79 | ## Copyright 80 | 81 | git-history-data is written by Andy Balaam and the git-history-data contributors. 82 | 83 | (c) Copyright 2015-2020 IBM Corporation, Andy Balaam and the git-history-data contributors. Distributed under the [BSD 2-clause license](https://github.com/andybalaam/git-history-data/blob/master/LICENSE). 84 | -------------------------------------------------------------------------------- /tests/test__expand_commits.py: -------------------------------------------------------------------------------- 1 | 2 | from githistorydata.codeline import CodeLine 3 | from githistorydata.commitdetail import CommitDetail 4 | from githistorydata.dataline import DataLine 5 | from githistorydata.expand_commits import expand_authors 6 | from githistorydata.expand_commits import expand_detail 7 | from githistorydata.expand_commits import expand_lines 8 | from githistorydata.filechanges import FileChanges 9 | from githistorydata.git import Git 10 | from githistorydata.logline import LogLine 11 | 12 | from tests.fakegit import FakeGit 13 | 14 | import unittest 15 | 16 | 17 | class TestExpandCommits(unittest.TestCase): 18 | def test__Normal_commits_are_not_expanded(self): 19 | self.assertEqual( 20 | [ 21 | CodeLine( "h1", "dt1", "a1", 1.0 ), 22 | CodeLine( "h2", "dt2", "a2", 1.0 ), 23 | ], 24 | list( expand_authors( 25 | [ 26 | LogLine( "h1", "dt1", "a1" ), 27 | LogLine( "h2", "dt2", "a2" ), 28 | ] 29 | ) ) 30 | ) 31 | 32 | 33 | def test__Shared_commits_are_expanded(self): 34 | self.assertEqual( 35 | [ 36 | CodeLine( "h1", "dt1", "a1", 0.5 ), 37 | CodeLine( "h1", "dt1", "a2", 0.5 ), 38 | ], 39 | list( expand_authors( 40 | [ 41 | LogLine( "h1", "dt1", "a1,a2" ), 42 | ] 43 | ) ) 44 | ) 45 | 46 | 47 | def test__Multiple_commits(self): 48 | self.assertEqual( 49 | [ 50 | CodeLine( "h1", "dt1", "a1", 1.0 ), 51 | CodeLine( "h2", "dt2", "a1", 1.0/3 ), 52 | CodeLine( "h2", "dt2", "a2", 1.0/3 ), 53 | CodeLine( "h2", "dt2", "a3", 1.0/3 ), 54 | CodeLine( "h4", "dt4", "a4", 1.0 ), 55 | ], 56 | list( expand_authors( 57 | [ 58 | LogLine( "h1", "dt1", "a1" ), 59 | LogLine( "h2", "dt2", "a1,a2,a3" ), 60 | LogLine( "h4", "dt4", "a4" ), 61 | ] 62 | ) ) 63 | ) 64 | 65 | 66 | def test__Expand_detail_for_single_author(self): 67 | self.assertEqual( 68 | [ 69 | DataLine( 70 | "myhash1", 71 | "mydate1", 72 | "Me", 73 | 32, 74 | 1, 75 | "foo.txt", 76 | ), 77 | DataLine( 78 | "myhash1", 79 | "mydate1", 80 | "Me", 81 | 0, 82 | 10, 83 | "bar.pl", 84 | ) 85 | ], 86 | list( expand_detail( 87 | CommitDetail( 88 | "myhash1", 89 | "mydate1", 90 | "Me", 91 | [ 92 | FileChanges( 32, 1, "foo.txt" ), 93 | FileChanges( 0, 10, "bar.pl" ), 94 | ] 95 | ), 96 | 1.0 97 | ) ) 98 | ) 99 | 100 | 101 | def test__Expand_detail_with_weights_on_lines(self): 102 | self.assertEqual( 103 | [ 104 | DataLine( "h", "d", "Me", 9, 0, "foo.txt" ), 105 | DataLine( "h", "d", "Me", 0, 3, "bar.pl" ), 106 | ], 107 | list( expand_detail( 108 | CommitDetail( 109 | "h", 110 | "d", 111 | "Me", 112 | [ 113 | FileChanges( 32, 1, "foo.txt" ), 114 | FileChanges( 0, 10, "bar.pl" ), 115 | ] 116 | ), 117 | 0.3 118 | ) ) 119 | ) 120 | 121 | 122 | def Expand_lines_makes_one_line_for_each_modified_file(self): 123 | git = Git( FakeGit( """2976 Andy Balaam "desc." 124 | 10 2 f.txt 125 | 1 0 g.txt 126 | """, """2976 Peter Broadbent "desc2." 127 | 0 18 h.txt 128 | 4 14 i.txt 129 | 130 | """) ) 131 | self.assertEqual( 132 | [ 133 | DataLine( "h", "d", "a", 10, 2, "f.txt" ), 134 | DataLine( "h", "d", "a", 1, 0, "g.txt" ), 135 | DataLine( "j", "e", "b", 0, 9, "h.txt" ), 136 | DataLine( "j", "e", "b", 2, 7, "i.txt" ), 137 | ], 138 | expand_lines( 139 | git, 140 | CodeLine( "h", "d", "a", 1.0 ), 141 | CodeLine( "j", "j", "b", 0.5 ), 142 | ) 143 | ) 144 | --------------------------------------------------------------------------------