├── tests
├── __init__.py
├── fakegit.py
├── csv__test.py
├── test__csv.py
├── git_parsing__test.py
├── test__git_parsing.py
├── test__settings.py
├── expand_commits__test.py
└── test__expand_commits.py
├── githistorydata
├── __init__.py
├── filechanges.py
├── logline.py
├── csv.py
├── codeline.py
├── rawgit.py
├── TODO.txt
├── commitdetail.py
├── dataline.py
├── main.py
├── expand_commits.py
├── settings.py
└── git.py
├── .gitignore
├── settings.json
├── Makefile
├── git-history-data
├── reports
├── ghd-file-by-num-authors
├── ghd-author-by-num-commits
├── ghd-commit-by-num-files
├── ghd-files-by-num-commits-for-author
└── ghd-author-by-mean-num-files-per-commit
├── legal
├── ZackYovel-DCO1.1.txt
└── andybalaam-DCO1.1.txt
├── LICENSE
└── README.md
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/githistorydata/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .ropeproject
2 | *.pyc
3 |
--------------------------------------------------------------------------------
/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "git_path": "/usr/bin/git"
3 | }
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 |
2 | all: test
3 |
4 | test:
5 | nosetests
6 |
--------------------------------------------------------------------------------
/git-history-data:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Prerequisites:
4 | #
5 | # sudo apt-get install git python3 python3-dateutil
6 |
7 | import sys
8 |
9 | from githistorydata.main import main
10 |
11 | if __name__ == "__main__":
12 | main( sys.argv, sys.stdout, sys.stderr )
13 |
--------------------------------------------------------------------------------
/reports/ghd-file-by-num-authors:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # File and author
4 | # Unique
5 | # Group by file
6 | # Sort numerically
7 |
8 | tail -n +1 \
9 | | awk -F', ' '{print $6, ", ", $3}' \
10 | | sort | uniq \
11 | | awk -F', ' '{arr[$1]++}END{for (a in arr) print arr[a], a}' \
12 | | sort -n -r
13 |
14 |
--------------------------------------------------------------------------------
/reports/ghd-author-by-num-commits:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Author and commit id
4 | # Unique
5 | # Group by author
6 | # Sort numerically
7 |
8 | tail -n +1 \
9 | | awk -F', ' '{print $3, ", ", $1}' \
10 | | sort | uniq \
11 | | awk -F', ' '{arr[$1]++}END{for (a in arr) print arr[a], a}' \
12 | | sort -n -r
13 |
14 |
--------------------------------------------------------------------------------
/reports/ghd-commit-by-num-files:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Commit+author and file
4 | # Unique
5 | # Group by commit+author
6 | # Sort numerically
7 |
8 | tail -n +1 \
9 | | awk -F', ' '{print $1, $3, ", ", $6}' \
10 | | sort | uniq \
11 | | awk -F', ' '{arr[$1]++}END{for (a in arr) print arr[a], a}' \
12 | | sort -n -r
13 |
14 |
--------------------------------------------------------------------------------
/tests/fakegit.py:
--------------------------------------------------------------------------------
1 |
2 | class FakeGit( object ):
3 |
4 | def __init__( self, ret_value ):
5 | self.ret_value = ret_value
6 |
7 | def git_log_pretty_tformat_H_ai_an( self ):
8 | return self.ret_value.split( "\n" )
9 |
10 | def git_show_numstat( self, commit_hash ):
11 | return self.ret_value.split( "\n" )
12 |
--------------------------------------------------------------------------------
/reports/ghd-files-by-num-commits-for-author:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | AUTHOR="$1"
4 |
5 | # filter by author (TODO: do properly)
6 | # file
7 | # author-of-commit, num-files-in-commit
8 | # author, num-files
9 | # Group by author, mean-num-files
10 | # Sort numerically
11 |
12 | tail -n +1 \
13 | | grep "${AUTHOR}" \
14 | | awk -F', ' '{print $6}' \
15 | | awk -F', ' '{arr[$1]++}END{for (a in arr) print arr[a], a}' \
16 | | sort -n -r
17 |
18 |
--------------------------------------------------------------------------------
/githistorydata/filechanges.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | class FileChanges( object ):
4 | def __init__( self, added, removed, name ):
5 | self.added = added
6 | self.removed = removed
7 | self.name = name
8 |
9 | def __str__( self ):
10 | return "+%d -%d %s" % ( self.added, self.removed, self.name )
11 |
12 | def __eq__( self, other ):
13 | return (
14 | self.added == other.added
15 | and self.removed == other.removed
16 | and self.name == other.name
17 | )
18 |
--------------------------------------------------------------------------------
/githistorydata/logline.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | class LogLine( object ):
4 | def __init__( self, commit_hash, date, author ):
5 | self.commit_hash = commit_hash
6 | self.date = date
7 | self.author = author
8 |
9 | def __str__( self ):
10 | return "%s %s %s" % ( self.commit_hash, self.date, self.author )
11 |
12 | def __eq__( self, other ):
13 | return (
14 | self.commit_hash == other.commit_hash
15 | and self.date == other.date
16 | and self.author == other.author
17 | )
18 |
--------------------------------------------------------------------------------
/reports/ghd-author-by-mean-num-files-per-commit:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # commit-id, author, file
4 | # unique
5 | # author-of-commit, num-files-in-commit
6 | # author, num-files
7 | # Group by author, mean-num-files
8 | # Sort numerically
9 |
10 | tail -n +1 \
11 | | awk -F', ' '{print $1, ", ", $3, ", ", $6}' \
12 | | sort | uniq \
13 | | awk -F', ' '{arr[$1]++; auth[$1]=$2}END{for (a in arr) print auth[a], ",", arr[a]}' \
14 | | awk -F', ' '{sum[$1]+=$2;count[$1]++}END{for (a in count) print sum[a]/count[a], a}' \
15 | | sort -n -r
16 |
17 |
--------------------------------------------------------------------------------
/githistorydata/csv.py:
--------------------------------------------------------------------------------
1 | class Csv( object ):
2 | def __init__( self, out, columns ):
3 | self.out = out
4 | self.columns = columns
5 | self._write( columns )
6 |
7 | def line( self, items ):
8 | assert len( items ) == len( self.columns )
9 | self._write( items )
10 |
11 | def _write( self, items ):
12 | self.out.write(
13 | u", ".join( self._fmt( c ) for c in items ) )
14 |
15 | self.out.write( u"\n" )
16 |
17 | def _fmt( self, item ):
18 | try:
19 | float( item )
20 | return str( item )
21 | except:
22 | return '"%s"' % item
23 |
--------------------------------------------------------------------------------
/githistorydata/codeline.py:
--------------------------------------------------------------------------------
1 | class CodeLine( object ):
2 |
3 | def __init__( self, commit_hash, date, author, weight ):
4 | self.commit_hash = commit_hash
5 | self.date = date
6 | self.author = author
7 | self.weight = weight
8 |
9 | def __eq__( self, other ):
10 | return(
11 | self.commit_hash == other.commit_hash
12 | and self.date == other.date
13 | and self.author == other.author
14 | and self.weight == other.weight
15 | )
16 |
17 | def __str__( self ):
18 | return "%s %s %s %f" % (
19 | self.commit_hash, self.date, self.author, self.weight
20 | )
21 |
--------------------------------------------------------------------------------
/githistorydata/rawgit.py:
--------------------------------------------------------------------------------
1 |
2 | import subprocess
3 |
4 |
5 | class RawGit( object ):
6 | def __init__( self, git_path="/usr/bin/git" ):
7 | self._git_path = git_path
8 |
9 | def git_log_pretty_tformat_H_ai_an( self ):
10 | return self._run_git(
11 | ["log", "--no-merges", "--pretty=tformat:%H %ai %an"] )
12 |
13 | def git_show_numstat( self, commit_hash ):
14 | return self._run_git(
15 | ["show", "--pretty=oneline", "--numstat", commit_hash] )
16 |
17 | def _run_git( self, args ):
18 | return subprocess.check_output(
19 | [self._git_path] + args
20 | ).decode( encoding="UTF-8", errors="replace" ).split( "\n" )
21 |
22 |
--------------------------------------------------------------------------------
/githistorydata/TODO.txt:
--------------------------------------------------------------------------------
1 | (Time flows upwards)
2 |
3 | - Release 1.0
4 |
5 | - Create a standard report (HTML or Jupyter) so I can run 2 command
6 | to learn a lot about my codebase. Potential inspiration:
7 | https://accu.org/index.php/journals/1835
8 |
9 | - Release 0.3
10 |
11 | - Create charts using something like Plotly https://plot.ly/python/
12 | - More reports - establish a "standard set" that covers
13 | most things you want to know at a high level
14 |
15 | - Release 0.2
16 |
17 | - Clean environment when running Git using:
18 | GIT_CONFIG_NOSYSTEM=true HOME=/dev/null XDG_CONFIG_HOME=/dev/null
19 | + Avoid hard-coding git executable location
20 | + Fix bug (#1) where git show command assumed --pretty=oneline
21 |
22 | + Initial release
23 |
24 |
--------------------------------------------------------------------------------
/githistorydata/commitdetail.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | class CommitDetail( object ):
4 | def __init__( self, commit_hash, date, author, file_changes ):
5 | self.commit_hash = commit_hash
6 | self.date = date
7 | self.author = author
8 | self.file_changes = file_changes
9 |
10 | def __str__( self ):
11 | ret = "%s %s %s" % (
12 | self.commit_hash,
13 | self.date,
14 | self.author,
15 | )
16 | ret += "\n"
17 | ret += "\n".join( ( " " + str(ch) ) for ch in self.file_changes )
18 | return ret
19 |
20 | def __eq__( self, other ):
21 | return (
22 | self.commit_hash == other.commit_hash
23 | and self.file_changes == other.file_changes
24 | )
25 |
--------------------------------------------------------------------------------
/githistorydata/dataline.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | class DataLine( object ):
4 | def __init__( self, commit_hash, date, author, added, removed, filename ):
5 | self.commit_hash = commit_hash
6 | self.date = date
7 | self.author = author
8 | self.added = added
9 | self.removed = removed
10 | self.filename = filename
11 |
12 | def __str__( self ):
13 | return " ".join(
14 | (
15 | self.commit_hash,
16 | self.date,
17 | self.author,
18 | str( self.added ),
19 | str( self.removed ),
20 | self.filename
21 | )
22 | )
23 |
24 | def __eq__( self, other ):
25 | return (
26 | self.commit_hash == other.commit_hash
27 | and self.date == other.date
28 | and self.author == other.author
29 | and self.added == other.added
30 | and self.removed == other.removed
31 | and self.filename == other.filename
32 | )
33 |
--------------------------------------------------------------------------------
/githistorydata/main.py:
--------------------------------------------------------------------------------
1 |
2 | import subprocess
3 | import sys
4 |
5 | from githistorydata.csv import Csv
6 | from githistorydata.expand_commits import expand_authors, expand_lines
7 | from githistorydata.git import Git
8 | from githistorydata.rawgit import RawGit
9 | from githistorydata.settings import Settings
10 |
11 |
12 | def main( argv, out, err ):
13 | settings = Settings()
14 | try:
15 | git = Git( RawGit(settings["git_path"]) )
16 | csv = Csv(
17 | out,
18 | ( "Commit", "Date", "Author", "Added", "Removed", "File" )
19 | )
20 | for cod in expand_lines( git, expand_authors( git.log() ) ):
21 | csv.line( (
22 | cod.commit_hash,
23 | cod.date.date().isoformat(),
24 | cod.author,
25 | cod.added,
26 | cod.removed,
27 | cod.filename,
28 | ) )
29 | except subprocess.CalledProcessError as e:
30 | print(str( e ))
31 | sys.exit( 1 )
32 | finally:
33 | out.flush()
34 |
--------------------------------------------------------------------------------
/githistorydata/expand_commits.py:
--------------------------------------------------------------------------------
1 |
2 | from githistorydata.codeline import CodeLine
3 | from githistorydata.dataline import DataLine
4 |
5 |
6 | def expand_authors( log_lines ):
7 | for log_line in log_lines:
8 | spl = log_line.author.split( "," )
9 | weight = 1.0 / len( spl )
10 | for auth in spl:
11 | yield CodeLine(
12 | log_line.commit_hash,
13 | log_line.date,
14 | auth.strip(),
15 | weight
16 | )
17 |
18 |
19 | def expand_detail( commit_detail, weight ):
20 | return (
21 | DataLine(
22 | commit_detail.commit_hash,
23 | commit_detail.date,
24 | commit_detail.author,
25 | int( fc.added * weight ),
26 | int( fc.removed * weight ),
27 | fc.name
28 | )
29 | for fc in commit_detail.file_changes
30 | )
31 |
32 |
33 | def expand_lines( git, code_lines ):
34 | for ln in code_lines:
35 | commit_detail = git.show( ln.commit_hash, ln.date, ln.author )
36 | for data_line in expand_detail( commit_detail, ln.weight ):
37 | yield data_line
38 |
--------------------------------------------------------------------------------
/tests/csv__test.py:
--------------------------------------------------------------------------------
1 |
2 | from io import StringIO
3 | from githistorydata.csv import Csv
4 |
5 | from nose.tools import assert_equal
6 |
7 |
8 | def Headings_are_printed_quoted__test():
9 | out = StringIO()
10 | csv = Csv( out, ( "a", "B c" ) )
11 | csv # Silence lint
12 | assert_equal(
13 | '''"a", "B c"
14 | ''',
15 | out.getvalue()
16 | )
17 |
18 |
19 | def String_lines_are_printed_in_quotes__test():
20 | out = StringIO()
21 | csv = Csv( out, ( "a", "b" ) )
22 | csv.line( ( "x", "y" ) )
23 | assert_equal(
24 | '''"a", "b"
25 | "x", "y"
26 | ''',
27 | out.getvalue()
28 | )
29 |
30 |
31 | def Number_lines_are_printed_without_quotes__test():
32 | out = StringIO()
33 | csv = Csv( out, ( "a", "b", "c" ) )
34 | csv.line( ( 2, "x", 3.5 ) )
35 | assert_equal(
36 | '''"a", "b", "c"
37 | 2, "x", 3.5
38 | ''',
39 | out.getvalue()
40 | )
41 |
42 |
43 | def Multiple_lines_are_printed__test():
44 | out = StringIO()
45 | csv = Csv( out, ( "a", "b", "c" ) )
46 | csv.line( ( 2, "x", 3.5 ) )
47 | csv.line( ( 4, "y", 5.5 ) )
48 | assert_equal(
49 | '''"a", "b", "c"
50 | 2, "x", 3.5
51 | 4, "y", 5.5
52 | ''',
53 | out.getvalue()
54 | )
55 |
--------------------------------------------------------------------------------
/legal/ZackYovel-DCO1.1.txt:
--------------------------------------------------------------------------------
1 | Developer's Certificate of Origin 1.1
2 |
3 | By making a contribution to this project, I certify that:
4 |
5 | (a) The contribution was created in whole or in part by me and I
6 | have the right to submit it under the open source license
7 | indicated in the file; or
8 |
9 | (b) The contribution is based upon previous work that, to the best
10 | of my knowledge, is covered under an appropriate open source
11 | license and I have the right under that license to submit that
12 | work with modifications, whether created in whole or in part
13 | by me, under the same open source license (unless I am
14 | permitted to submit under a different license), as indicated
15 | in the file; or
16 |
17 | (c) The contribution was provided directly to me by some other
18 | person who certified (a), (b) or (c) and I have not modified
19 | it.
20 |
21 | (d) I understand and agree that this project and the contribution
22 | are public and that a record of the contribution (including all
23 | personal information I submit with it, including my sign-off) is
24 | maintained indefinitely and may be redistributed consistent with
25 | this project or the open source license(s) involved.
26 |
--------------------------------------------------------------------------------
/legal/andybalaam-DCO1.1.txt:
--------------------------------------------------------------------------------
1 | Developer's Certificate of Origin 1.1
2 |
3 | By making a contribution to this project, I certify that:
4 |
5 | (a) The contribution was created in whole or in part by me and I
6 | have the right to submit it under the open source license
7 | indicated in the file; or
8 |
9 | (b) The contribution is based upon previous work that, to the best
10 | of my knowledge, is covered under an appropriate open source
11 | license and I have the right under that license to submit that
12 | work with modifications, whether created in whole or in part
13 | by me, under the same open source license (unless I am
14 | permitted to submit under a different license), as indicated
15 | in the file; or
16 |
17 | (c) The contribution was provided directly to me by some other
18 | person who certified (a), (b) or (c) and I have not modified
19 | it.
20 |
21 | (d) I understand and agree that this project and the contribution
22 | are public and that a record of the contribution (including all
23 | personal information I submit with it, including my sign-off) is
24 | maintained indefinitely and may be redistributed consistent with
25 | this project or the open source license(s) involved.
26 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | (c) Copyright IBM Corporation 2015 and the git-history-data contributors.
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | * Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 |
25 |
--------------------------------------------------------------------------------
/tests/test__csv.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from io import StringIO
3 | from githistorydata.csv import Csv
4 |
5 |
6 | class TestCsv(unittest.TestCase):
7 | def test__Headings_are_printed_quoted(self):
8 | out = StringIO()
9 | csv = Csv( out, ( "a", "B c" ) )
10 | csv # Silence lint
11 | self.assertEqual(
12 | '''"a", "B c"
13 | ''',
14 | out.getvalue()
15 | )
16 |
17 |
18 | def test__String_lines_are_printed_in_quotes(self):
19 | out = StringIO()
20 | csv = Csv( out, ( "a", "b" ) )
21 | csv.line( ( "x", "y" ) )
22 | self.assertEqual(
23 | '''"a", "b"
24 | "x", "y"
25 | ''',
26 | out.getvalue()
27 | )
28 |
29 |
30 | def test__Number_lines_are_printed_without_quotes(self):
31 | out = StringIO()
32 | csv = Csv( out, ( "a", "b", "c" ) )
33 | csv.line( ( 2, "x", 3.5 ) )
34 | self.assertEqual(
35 | '''"a", "b", "c"
36 | 2, "x", 3.5
37 | ''',
38 | out.getvalue()
39 | )
40 |
41 |
42 | def test__Multiple_lines_are_printed(self):
43 | out = StringIO()
44 | csv = Csv( out, ( "a", "b", "c" ) )
45 | csv.line( ( 2, "x", 3.5 ) )
46 | csv.line( ( 4, "y", 5.5 ) )
47 | self.assertEqual(
48 | '''"a", "b", "c"
49 | 2, "x", 3.5
50 | 4, "y", 5.5
51 | ''',
52 | out.getvalue()
53 | )
54 |
--------------------------------------------------------------------------------
/githistorydata/settings.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 |
4 | class Settings:
5 | """ Asettings object that is responsible of managing a single settings file """
6 |
7 | def __init__(self, settings_file_path="settings.json"):
8 | """ Initialize a Settings object """
9 | self._settings_file_path = settings_file_path
10 | self._settings_dict = {}
11 | self.load()
12 |
13 | def load(self):
14 | """ Load settings from file """
15 | try:
16 | with open(self._settings_file_path) as f:
17 | self._settings_dict = dict(self._settings_dict, **json.load(f))
18 | except:
19 | self._settings_dict = { "git_path": "/usr/bin/git" }
20 |
21 | def add(self, settings):
22 | """ Add one or more extra settings to this object without persisting them.
23 | settings must be a mapping object"""
24 | self._settings_dict = dict(self._settings_dict, **settings)
25 |
26 | def save(self):
27 | """ Save this object to file as it is, possibly overriting un-synced changed in the file """
28 | with open(self._settings_file_path, "w") as f:
29 | json.dump(self._settings_dict, f)
30 |
31 | def persist(self, settings=None):
32 | """ Potentially add settings to this object, then persist all the settings in it """
33 | # if new settings received, add them to this object
34 | if settings:
35 | self.add(settings)
36 |
37 | # synchronize settings with file
38 | self.load()
39 |
40 | # persist all settings to file
41 | self.save()
42 |
43 | def __getitem__(self, key):
44 | """ Access Settings using square brackets """
45 | return self._settings_dict[key]
46 |
47 | def __setitem__(self, key, value):
48 | """ Set items using square brackets without persisting the change """
49 | self._settings_dict[key] = value
50 |
--------------------------------------------------------------------------------
/githistorydata/git.py:
--------------------------------------------------------------------------------
1 |
2 | import re
3 | import dateutil.parser
4 |
5 | from githistorydata.commitdetail import CommitDetail
6 | from githistorydata.filechanges import FileChanges
7 | from githistorydata.logline import LogLine
8 |
9 |
10 | class Git( object ):
11 |
12 | def __init__( self, raw_git ):
13 | self.raw_git = raw_git
14 |
15 | def log( self ):
16 | """
17 | Return a list of LogLine for the repo in the current dir.
18 | """
19 | return list(
20 | self._logline( ln.strip() )
21 | for ln in self.raw_git.git_log_pretty_tformat_H_ai_an()
22 | if ln.strip() != ""
23 | )
24 |
25 | def show( self, commit_hash, date, author ):
26 | show_lines = self.raw_git.git_show_numstat( commit_hash )
27 | return CommitDetail(
28 | commit_hash,
29 | date,
30 | author,
31 | list( self._showline( l ) for l in show_lines[1:] if l != "")
32 | )
33 |
34 | _logline_re = re.compile(
35 | r"([0-9a-f]{40}) (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} [-+]\d{4}) (.+)"
36 | )
37 |
38 | def _logline( self, ln ):
39 | m = Git._logline_re.match( ln )
40 | if not m:
41 | raise Exception(
42 | "Line from git log '%s' did not match expected format"
43 | % ln
44 | )
45 | return LogLine(
46 | m.group( 1 ),
47 | dateutil.parser.parse( m.group( 2 ) ),
48 | m.group( 3 )
49 | )
50 |
51 | showline_re = re.compile(
52 | r"(-|\d+)\s+(-|\d+)\s+(.*)"
53 | )
54 |
55 | @staticmethod
56 | def lines_changed( num ):
57 | if num == "-":
58 | return 0
59 | else:
60 | return int( num )
61 |
62 | def _showline( self, ln ):
63 | m = Git.showline_re.match( ln )
64 | if not m:
65 | raise Exception(
66 | "Line from git show '%s' did not match expected format"
67 | % ln
68 | )
69 | return FileChanges(
70 | Git.lines_changed( m.group( 1 ) ),
71 | Git.lines_changed( m.group( 2 ) ),
72 | m.group( 3 )
73 | )
74 |
--------------------------------------------------------------------------------
/tests/git_parsing__test.py:
--------------------------------------------------------------------------------
1 |
2 | from datetime import datetime
3 | import dateutil.parser
4 |
5 | from githistorydata.commitdetail import CommitDetail
6 | from githistorydata.filechanges import FileChanges
7 | from githistorydata.git import Git
8 | from githistorydata.logline import LogLine
9 |
10 | from tests.fakegit import FakeGit
11 |
12 | from nose.tools import assert_equal
13 |
14 |
15 | def Log_lines_are_parsed__test():
16 | git = Git( FakeGit( """
17 | 64c5da790fb7edef4f99053497075839d11bb6d8 2015-07-09 12:00:00 +0100 Alban Tsui
18 | 2993dbf67c7e0659eba13987c98c5a03aade7099 2015-10-31 12:15:27 +0100 Lennart Tange
19 | c504bd352d5d9dd0ccec3cd601ac02b14f4982a8 2015-07-09 12:00:00 -0200 Alban Tsui
20 | """ ) )
21 |
22 | off1 = dateutil.tz.tzoffset( "tz", 60*60 ) # +0100
23 | off2 = dateutil.tz.tzoffset( "tz", -2*60*60 ) # -0200
24 |
25 | assert_equal(
26 | [
27 | LogLine(
28 | "64c5da790fb7edef4f99053497075839d11bb6d8",
29 | datetime( 2015, 7, 9, 12, 0, 0, 0, off1 ),
30 | "Alban Tsui"
31 | ),
32 | LogLine(
33 | "2993dbf67c7e0659eba13987c98c5a03aade7099",
34 | datetime( 2015, 10, 31, 12, 15, 27, 0, off1 ),
35 | "Lennart Tange"
36 | ),
37 | LogLine(
38 | "c504bd352d5d9dd0ccec3cd601ac02b14f4982a8",
39 | datetime( 2015, 7, 9, 12, 0, 0, 0, off2 ),
40 | "Alban Tsui"
41 | )
42 | ],
43 | git.log()
44 | )
45 |
46 |
47 | def FileChanges_to_string__test():
48 | assert_equal( "+3 -2 foo.txt", str( FileChanges( 3, 2, "foo.txt" ) ) )
49 |
50 |
51 | def CommitDetail_to_string__test():
52 | assert_equal(
53 | """myhash dt auth
54 | +1 -0 x.cpp
55 | +3 -2 y.cpp""",
56 | str( CommitDetail(
57 | "myhash",
58 | "dt",
59 | "auth",
60 | [
61 | FileChanges( 1, 0, "x.cpp" ),
62 | FileChanges( 3, 2, "y.cpp" ),
63 | ]
64 | ) )
65 | )
66 |
67 |
68 | def Numstat_lines_are_parsed__test():
69 | git = Git( FakeGit( """2993dbf Lennart Tange "More generic dnd helper."
70 | 71 0 scripts/drag_and_drop_helper.js
71 | 0 66 scripts/dragdrop_pin_to_assemble.js
72 | 23 16 src/step_definitions/StepDef.java
73 | """ ) )
74 |
75 | assert_equal(
76 | str( CommitDetail(
77 | "2993bdfAAAAAAAAAAAA",
78 | "dt",
79 | "auth",
80 | [
81 | FileChanges( 71, 0, "scripts/drag_and_drop_helper.js" ),
82 | FileChanges( 0, 66, "scripts/dragdrop_pin_to_assemble.js" ),
83 | FileChanges( 23, 16, "src/step_definitions/StepDef.java" ),
84 | ]
85 | ) ),
86 | str( git.show( "2993bdfAAAAAAAAAAAA", "dt", "auth" ) )
87 | )
88 |
--------------------------------------------------------------------------------
/tests/test__git_parsing.py:
--------------------------------------------------------------------------------
1 |
2 | from datetime import datetime
3 | import dateutil.parser
4 |
5 | from githistorydata.commitdetail import CommitDetail
6 | from githistorydata.filechanges import FileChanges
7 | from githistorydata.git import Git
8 | from githistorydata.logline import LogLine
9 |
10 | from tests.fakegit import FakeGit
11 |
12 | import unittest
13 |
14 |
15 | class TestGitParsing(unittest.TestCase):
16 | def test__Log_lines_are_parsed(self):
17 | git = Git( FakeGit( """
18 | 64c5da790fb7edef4f99053497075839d11bb6d8 2015-07-09 12:00:00 +0100 Alban Tsui
19 | 2993dbf67c7e0659eba13987c98c5a03aade7099 2015-10-31 12:15:27 +0100 Lennart Tange
20 | c504bd352d5d9dd0ccec3cd601ac02b14f4982a8 2015-07-09 12:00:00 -0200 Alban Tsui
21 | """ ) )
22 |
23 | off1 = dateutil.tz.tzoffset( "tz", 60*60 ) # +0100
24 | off2 = dateutil.tz.tzoffset( "tz", -2*60*60 ) # -0200
25 |
26 | self.assertEqual(
27 | [
28 | LogLine(
29 | "64c5da790fb7edef4f99053497075839d11bb6d8",
30 | datetime( 2015, 7, 9, 12, 0, 0, 0, off1 ),
31 | "Alban Tsui"
32 | ),
33 | LogLine(
34 | "2993dbf67c7e0659eba13987c98c5a03aade7099",
35 | datetime( 2015, 10, 31, 12, 15, 27, 0, off1 ),
36 | "Lennart Tange"
37 | ),
38 | LogLine(
39 | "c504bd352d5d9dd0ccec3cd601ac02b14f4982a8",
40 | datetime( 2015, 7, 9, 12, 0, 0, 0, off2 ),
41 | "Alban Tsui"
42 | )
43 | ],
44 | git.log()
45 | )
46 |
47 |
48 | def test__FileChanges_to_string(self):
49 | self.assertEqual( "+3 -2 foo.txt", str( FileChanges( 3, 2, "foo.txt" ) ) )
50 |
51 |
52 | def test__CommitDetail_to_string(self):
53 | self.assertEqual(
54 | """myhash dt auth
55 | +1 -0 x.cpp
56 | +3 -2 y.cpp""",
57 | str( CommitDetail(
58 | "myhash",
59 | "dt",
60 | "auth",
61 | [
62 | FileChanges( 1, 0, "x.cpp" ),
63 | FileChanges( 3, 2, "y.cpp" ),
64 | ]
65 | ) )
66 | )
67 |
68 |
69 | def test__Numstat_lines_are_parsed(self):
70 | git = Git( FakeGit( """2993dbf Lennart Tange "More generic dnd helper."
71 | 71 0 scripts/drag_and_drop_helper.js
72 | 0 66 scripts/dragdrop_pin_to_assemble.js
73 | 23 16 src/step_definitions/StepDef.java
74 | """ ) )
75 |
76 | self.assertEqual(
77 | str( CommitDetail(
78 | "2993bdfAAAAAAAAAAAA",
79 | "dt",
80 | "auth",
81 | [
82 | FileChanges( 71, 0, "scripts/drag_and_drop_helper.js" ),
83 | FileChanges( 0, 66, "scripts/dragdrop_pin_to_assemble.js" ),
84 | FileChanges( 23, 16, "src/step_definitions/StepDef.java" ),
85 | ]
86 | ) ),
87 | str( git.show( "2993bdfAAAAAAAAAAAA", "dt", "auth" ) )
88 | )
89 |
--------------------------------------------------------------------------------
/tests/test__settings.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import json
3 | import os
4 | from githistorydata.settings import Settings
5 |
6 | """ Tests file for unittest/nose2 """
7 |
8 | TEST_SETTINGS_FILE = "test_settings.json"
9 |
10 |
11 | def get_test_settings():
12 | return Settings(TEST_SETTINGS_FILE)
13 |
14 |
15 | def persist_test_settings(settings):
16 | with open(TEST_SETTINGS_FILE, "w") as f:
17 | json.dump(settings, f)
18 |
19 |
20 | def update_test_settings(settings):
21 | """ Update the file 'manually' (not through the Settings API)
22 | and create a new Settings object """
23 | persist_test_settings(settings)
24 | return get_test_settings()
25 |
26 |
27 | class TestSettings(unittest.TestCase):
28 |
29 | def setUp(self):
30 | settings = {}
31 | persist_test_settings(settings)
32 |
33 | def tearDown(self):
34 | os.remove(TEST_SETTINGS_FILE)
35 |
36 | def test_ctor(self):
37 | # test default ctor
38 | settings = Settings()
39 | self.assertEqual(settings["git_path"], "/usr/bin/git")
40 |
41 | # test ctor with settings_file_path argument
42 | settings = {"key1": "value1"}
43 | settings = update_test_settings(settings)
44 | self.assertEqual(settings["key1"], "value1")
45 |
46 | def test_load(self):
47 | settings1 = get_test_settings()
48 | # assert 'key1' is not in the settings
49 | with self.assertRaises(KeyError):
50 | s = settings1["key1"]
51 | # write the 'key1' setting to the test file
52 | # (not using the Settings API)
53 | settings2 = {"key1": "value1"}
54 | persist_test_settings(settings2)
55 | # perform 'load'
56 | settings1.load()
57 | # assert 'key1' is in settings
58 | self.assertEqual(settings1["key1"], "value1")
59 |
60 | def test_add(self):
61 | settings = get_test_settings()
62 | # assert 'key1' is not in the settings
63 | with self.assertRaises(KeyError):
64 | s = settings["key1"]
65 | # add 'key1'
66 | settings.add({"key1": "value1"})
67 | # assert 'key1' is in settings
68 | self.assertEqual(settings["key1"], "value1")
69 | # assert 'key1' not persisted
70 | settings = get_test_settings()
71 | with self.assertRaises(KeyError):
72 | s = settings["key1"]
73 |
74 | def test_save(self):
75 | settings = get_test_settings()
76 | # assert 'key1' is not in the settings
77 | with self.assertRaises(KeyError):
78 | s = settings["key1"]
79 | # add 'key1'
80 | settings.add({"key1": "value1"})
81 | # save settings
82 | settings.save()
83 | # assert 'key1' persisted
84 | settings = get_test_settings()
85 | self.assertEqual(settings["key1"], "value1")
86 |
87 | def test_persist(self):
88 | settings = get_test_settings()
89 | # assert 'key1' is not in the settings
90 | with self.assertRaises(KeyError):
91 | s = settings["key1"]
92 | # persist 'key1'
93 | settings.persist({"key1": "value1"})
94 | # assert 'key1' persisted
95 | settings = get_test_settings()
96 | self.assertEqual(settings["key1"], "value1")
97 |
98 | def test___getitmem__(self):
99 | # test the [] accessors
100 | settings = Settings()
101 | self.assertEqual(settings["git_path"], "/usr/bin/git")
102 |
103 | def test___setitem__(self):
104 | settings = get_test_settings()
105 | # assert 'key1' is not in the settings
106 | with self.assertRaises(KeyError):
107 | s = settings["key1"]
108 | # set 'key1'
109 | settings["key1"] = "value1"
110 | # assert 'key1' is in settings
111 | self.assertEqual(settings["key1"], "value1")
112 | # assert 'key1' not persisted
113 | settings = get_test_settings()
114 | with self.assertRaises(KeyError):
115 | s = settings["key1"]
116 |
117 |
--------------------------------------------------------------------------------
/tests/expand_commits__test.py:
--------------------------------------------------------------------------------
1 |
2 | from githistorydata.codeline import CodeLine
3 | from githistorydata.commitdetail import CommitDetail
4 | from githistorydata.dataline import DataLine
5 | from githistorydata.expand_commits import expand_authors
6 | from githistorydata.expand_commits import expand_detail
7 | from githistorydata.expand_commits import expand_lines
8 | from githistorydata.filechanges import FileChanges
9 | from githistorydata.git import Git
10 | from githistorydata.logline import LogLine
11 |
12 | from tests.fakegit import FakeGit
13 |
14 | from nose.tools import assert_equal
15 |
16 |
17 | def Normal_commits_are_not_expanded__test():
18 | assert_equal(
19 | [
20 | CodeLine( "h1", "dt1", "a1", 1.0 ),
21 | CodeLine( "h2", "dt2", "a2", 1.0 ),
22 | ],
23 | list( expand_authors(
24 | [
25 | LogLine( "h1", "dt1", "a1" ),
26 | LogLine( "h2", "dt2", "a2" ),
27 | ]
28 | ) )
29 | )
30 |
31 |
32 | def Shared_commits_are_expanded__test():
33 | assert_equal(
34 | [
35 | CodeLine( "h1", "dt1", "a1", 0.5 ),
36 | CodeLine( "h1", "dt1", "a2", 0.5 ),
37 | ],
38 | list( expand_authors(
39 | [
40 | LogLine( "h1", "dt1", "a1,a2" ),
41 | ]
42 | ) )
43 | )
44 |
45 |
46 | def Multiple_commits__test():
47 | assert_equal(
48 | [
49 | CodeLine( "h1", "dt1", "a1", 1.0 ),
50 | CodeLine( "h2", "dt2", "a1", 1.0/3 ),
51 | CodeLine( "h2", "dt2", "a2", 1.0/3 ),
52 | CodeLine( "h2", "dt2", "a3", 1.0/3 ),
53 | CodeLine( "h4", "dt4", "a4", 1.0 ),
54 | ],
55 | list( expand_authors(
56 | [
57 | LogLine( "h1", "dt1", "a1" ),
58 | LogLine( "h2", "dt2", "a1,a2,a3" ),
59 | LogLine( "h4", "dt4", "a4" ),
60 | ]
61 | ) )
62 | )
63 |
64 |
65 | def Expand_detail_for_single_author__test():
66 | assert_equal(
67 | [
68 | DataLine(
69 | "myhash1",
70 | "mydate1",
71 | "Me",
72 | 32,
73 | 1,
74 | "foo.txt",
75 | ),
76 | DataLine(
77 | "myhash1",
78 | "mydate1",
79 | "Me",
80 | 0,
81 | 10,
82 | "bar.pl",
83 | )
84 | ],
85 | list( expand_detail(
86 | CommitDetail(
87 | "myhash1",
88 | "mydate1",
89 | "Me",
90 | [
91 | FileChanges( 32, 1, "foo.txt" ),
92 | FileChanges( 0, 10, "bar.pl" ),
93 | ]
94 | ),
95 | 1.0
96 | ) )
97 | )
98 |
99 |
100 | def Expand_detail_with_weights_on_lines__test():
101 | assert_equal(
102 | [
103 | DataLine( "h", "d", "Me", 9, 0, "foo.txt" ),
104 | DataLine( "h", "d", "Me", 0, 3, "bar.pl" ),
105 | ],
106 | list( expand_detail(
107 | CommitDetail(
108 | "h",
109 | "d",
110 | "Me",
111 | [
112 | FileChanges( 32, 1, "foo.txt" ),
113 | FileChanges( 0, 10, "bar.pl" ),
114 | ]
115 | ),
116 | 0.3
117 | ) )
118 | )
119 |
120 |
121 | def Expand_lines_makes_one_line_for_each_modified_file():
122 | git = Git( FakeGit( """2976 Andy Balaam "desc."
123 | 10 2 f.txt
124 | 1 0 g.txt
125 | """, """2976 Peter Broadbent "desc2."
126 | 0 18 h.txt
127 | 4 14 i.txt
128 |
129 | """) )
130 | assert_equal(
131 | [
132 | DataLine( "h", "d", "a", 10, 2, "f.txt" ),
133 | DataLine( "h", "d", "a", 1, 0, "g.txt" ),
134 | DataLine( "j", "e", "b", 0, 9, "h.txt" ),
135 | DataLine( "j", "e", "b", 2, 7, "i.txt" ),
136 | ],
137 | expand_lines(
138 | git,
139 | CodeLine( "h", "d", "a", 1.0 ),
140 | CodeLine( "j", "j", "b", 0.5 ),
141 | )
142 | )
143 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # git-history-data
2 |
3 | git-history-data analyses a Git source code repository and dumps out data in a form that is easy to analyse. In its simplest form it prints out one line for every change to every file in history, and who made the change, like this:
4 |
5 | $ cd myrepo
6 | $ git-history-data
7 | "Commit", "Date", "Author", "Added", "Removed", "File"
8 | "e35a4f0", "2015-11-11", "Pete", 1, 1, "githistorydata/main.py"
9 | "5a6172d", "2015-07-15", "Andy", 1, 0, "githistorydata/codeline.py"
10 | "5a6172d", "2015-07-15", "Andy", 8, 2, "githistorydata/commitdetail.py"
11 | "5a6172d", "2015-07-15", "Andy", 32, 0, "githistorydata/dataline.py"
12 | "5a6172d", "2015-07-15", "Andy", 8, 13, "tests/git_parsing__test.py"
13 | "12f2881", "2015-07-13", "Pete", 4, 4, "githistorydata/git.py"
14 | "8fd2224", "2015-07-13", "Andy", 18, 0, "githistorydata/commitdetail.py"
15 |
16 |
17 | It is intended to be easy to analyse the results, and to be convenient to manipulate with standard Unix command-line tools.
18 |
19 | For example, if you want to see very large changes to a specific file (in the Git project itself):
20 |
21 | $ cd git
22 | $ git-history-data > git-git-history.csv
23 | $ grep "diff.c" git-git-history.csv | awk -F',' '{print $4, $3, $1 }' | sort -n -r | head -5
24 | 4047 "Junio C Hamano" "3686aa1caf907d22fe318c28efe93f0e7870ba50"
25 | 1805 "Martin Langhoff" "e660e3997fbad830e5723336d61883f3a50dbc92"
26 | 1803 "Junio C Hamano" "c66b6c067e49c5ec80f1254daef79aa1c7f5ffce"
27 | 1795 "Junio C Hamano" "e9b5b75ca87f45292de8ecde5d4d0512ac9542cd"
28 | 1795 "Junio C Hamano" "b8ed7f0f40743dae6111c8950ba55051933298ca"
29 |
30 | Or the files with terrifying numbers of authors:
31 |
32 | $ awk -F', ' '{print $6, $3}' git-git-history.csv | sort | uniq | awk '{arr[$1]++}END{for (a in arr) print arr[a], a}' | sort -n -r | head -5
33 | 235 "Makefile"
34 | 198 "Documentation/config.txt"
35 | 137 "cache.h"
36 | 130 "git-svn.perl"
37 | 115 "diff.c"
38 |
39 | You can find more examples here: Learning about the Git codebase using git-history-data
40 |
41 | ## Prerequisites
42 |
43 | git-history-data requires Git, Python 3 and the Python DateUtil library. On Debian, Ubuntu and similar you can install these with:
44 |
45 | sudo apt-get install git python3 python3-dateutil
46 |
47 | ## Install
48 |
49 | Get the code:
50 |
51 | cd
52 | git clone https://github.com/andybalaam/git-history-data.git
53 |
54 | Now add a line to your PATH by doing something like this:
55 |
56 | echo 'export PATH="$PATH:${HOME}/git-history-data"' >> ~/.bashrc
57 |
58 | (Log out and back in again, and use `echo $PATH` to check your PATH has been updated.)
59 |
60 | ## Use
61 |
62 | `cd` into the working tree of a git repository, and then run `git-history-data`, redirecting the result to a file. For example:
63 |
64 | cd git
65 | git-history-data > hist.csv
66 |
67 | Now hist.csv contains one line per file per commit in the entire history of the project, showing the commit ID, timestamp, author and the number of lines added and removed in that file in that commit.
68 |
69 | That's it.
70 |
71 | ## Contributing
72 |
73 | We welcome contributions!
74 |
75 | Before we can accept your pull request you must confirm that all your contributions are written by you. To do this, create a pull request that adds a file inside the `legal` directory, creating a file called `-DCO1.1.txt` that is a copy of one of the other files in that directory. Once that pull request has been accepted, we can accept other pull requests as normal.
76 |
77 | A good area to start contributing is to make a little script that analyses the data coming out of git-history-data and produces a little report (on the command line). We plan to make a library of scripts that report e.g. which developer creates the most commits, which files are most frequently modified.
78 |
79 | ## Copyright
80 |
81 | git-history-data is written by Andy Balaam and the git-history-data contributors.
82 |
83 | (c) Copyright 2015-2020 IBM Corporation, Andy Balaam and the git-history-data contributors. Distributed under the [BSD 2-clause license](https://github.com/andybalaam/git-history-data/blob/master/LICENSE).
84 |
--------------------------------------------------------------------------------
/tests/test__expand_commits.py:
--------------------------------------------------------------------------------
1 |
2 | from githistorydata.codeline import CodeLine
3 | from githistorydata.commitdetail import CommitDetail
4 | from githistorydata.dataline import DataLine
5 | from githistorydata.expand_commits import expand_authors
6 | from githistorydata.expand_commits import expand_detail
7 | from githistorydata.expand_commits import expand_lines
8 | from githistorydata.filechanges import FileChanges
9 | from githistorydata.git import Git
10 | from githistorydata.logline import LogLine
11 |
12 | from tests.fakegit import FakeGit
13 |
14 | import unittest
15 |
16 |
17 | class TestExpandCommits(unittest.TestCase):
18 | def test__Normal_commits_are_not_expanded(self):
19 | self.assertEqual(
20 | [
21 | CodeLine( "h1", "dt1", "a1", 1.0 ),
22 | CodeLine( "h2", "dt2", "a2", 1.0 ),
23 | ],
24 | list( expand_authors(
25 | [
26 | LogLine( "h1", "dt1", "a1" ),
27 | LogLine( "h2", "dt2", "a2" ),
28 | ]
29 | ) )
30 | )
31 |
32 |
33 | def test__Shared_commits_are_expanded(self):
34 | self.assertEqual(
35 | [
36 | CodeLine( "h1", "dt1", "a1", 0.5 ),
37 | CodeLine( "h1", "dt1", "a2", 0.5 ),
38 | ],
39 | list( expand_authors(
40 | [
41 | LogLine( "h1", "dt1", "a1,a2" ),
42 | ]
43 | ) )
44 | )
45 |
46 |
47 | def test__Multiple_commits(self):
48 | self.assertEqual(
49 | [
50 | CodeLine( "h1", "dt1", "a1", 1.0 ),
51 | CodeLine( "h2", "dt2", "a1", 1.0/3 ),
52 | CodeLine( "h2", "dt2", "a2", 1.0/3 ),
53 | CodeLine( "h2", "dt2", "a3", 1.0/3 ),
54 | CodeLine( "h4", "dt4", "a4", 1.0 ),
55 | ],
56 | list( expand_authors(
57 | [
58 | LogLine( "h1", "dt1", "a1" ),
59 | LogLine( "h2", "dt2", "a1,a2,a3" ),
60 | LogLine( "h4", "dt4", "a4" ),
61 | ]
62 | ) )
63 | )
64 |
65 |
66 | def test__Expand_detail_for_single_author(self):
67 | self.assertEqual(
68 | [
69 | DataLine(
70 | "myhash1",
71 | "mydate1",
72 | "Me",
73 | 32,
74 | 1,
75 | "foo.txt",
76 | ),
77 | DataLine(
78 | "myhash1",
79 | "mydate1",
80 | "Me",
81 | 0,
82 | 10,
83 | "bar.pl",
84 | )
85 | ],
86 | list( expand_detail(
87 | CommitDetail(
88 | "myhash1",
89 | "mydate1",
90 | "Me",
91 | [
92 | FileChanges( 32, 1, "foo.txt" ),
93 | FileChanges( 0, 10, "bar.pl" ),
94 | ]
95 | ),
96 | 1.0
97 | ) )
98 | )
99 |
100 |
101 | def test__Expand_detail_with_weights_on_lines(self):
102 | self.assertEqual(
103 | [
104 | DataLine( "h", "d", "Me", 9, 0, "foo.txt" ),
105 | DataLine( "h", "d", "Me", 0, 3, "bar.pl" ),
106 | ],
107 | list( expand_detail(
108 | CommitDetail(
109 | "h",
110 | "d",
111 | "Me",
112 | [
113 | FileChanges( 32, 1, "foo.txt" ),
114 | FileChanges( 0, 10, "bar.pl" ),
115 | ]
116 | ),
117 | 0.3
118 | ) )
119 | )
120 |
121 |
122 | def Expand_lines_makes_one_line_for_each_modified_file(self):
123 | git = Git( FakeGit( """2976 Andy Balaam "desc."
124 | 10 2 f.txt
125 | 1 0 g.txt
126 | """, """2976 Peter Broadbent "desc2."
127 | 0 18 h.txt
128 | 4 14 i.txt
129 |
130 | """) )
131 | self.assertEqual(
132 | [
133 | DataLine( "h", "d", "a", 10, 2, "f.txt" ),
134 | DataLine( "h", "d", "a", 1, 0, "g.txt" ),
135 | DataLine( "j", "e", "b", 0, 9, "h.txt" ),
136 | DataLine( "j", "e", "b", 2, 7, "i.txt" ),
137 | ],
138 | expand_lines(
139 | git,
140 | CodeLine( "h", "d", "a", 1.0 ),
141 | CodeLine( "j", "j", "b", 0.5 ),
142 | )
143 | )
144 |
--------------------------------------------------------------------------------