├── .gitignore
├── .mailmap
├── .travis.yml
├── LICENSE
├── README.md
├── alembic.ini
├── alembic
    ├── README
    ├── env.py
    ├── script.py.mako
    └── versions
    │   └── d95efca6f334_add_start_time_to_repository_object.py
├── git-hammer-config.json
├── githammer
    ├── __init__.py
    ├── __main__.py
    ├── combinedcommit.py
    ├── config.py
    ├── countdict.py
    ├── dbtypes.py
    ├── frequency.py
    ├── hammer.py
    └── summary
    │   ├── __init__.py
    │   ├── graph.py
    │   └── table.py
├── requirements.in
├── requirements.txt
├── setup.py
└── tests
    ├── __init__.py
    ├── check_regression.py
    ├── data
        ├── .gitignore
        ├── repo-config.json
        ├── repository
        │   ├── HEAD
        │   ├── config
        │   ├── description
        │   ├── info
        │   │   ├── exclude
        │   │   └── refs
        │   ├── objects
        │   │   ├── info
        │   │   │   ├── commit-graph
        │   │   │   └── packs
        │   │   └── pack
        │   │   │   ├── pack-ee6956a7f3425f41f1defd4327f7f84516571ff8.bitmap
        │   │   │   ├── pack-ee6956a7f3425f41f1defd4327f7f84516571ff8.idx
        │   │   │   └── pack-ee6956a7f3425f41f1defd4327f7f84516571ff8.pack
        │   ├── packed-refs
        │   └── refs
        │   │   └── .keep
        └── subrepository
        │   ├── HEAD
        │   ├── config
        │   ├── description
        │   ├── info
        │       ├── exclude
        │       └── refs
        │   ├── objects
        │       ├── info
        │       │   ├── commit-graph
        │       │   └── packs
        │       └── pack
        │       │   ├── pack-7fdd2a1dca94173e188275a6bc315dbc34653b99.bitmap
        │       │   ├── pack-7fdd2a1dca94173e188275a6bc315dbc34653b99.idx
        │       │   └── pack-7fdd2a1dca94173e188275a6bc315dbc34653b99.pack
        │   ├── packed-refs
        │   └── refs
        │       └── .keep
    ├── hammer_test.py
    ├── test_frequency.py
    ├── test_init.py
    ├── test_limited_repository.py
    ├── test_multiple_projects.py
    ├── test_multiple_repositories.py
    ├── test_shallow_repository.py
    ├── test_single_repository.py
    ├── test_submodule.py
    └── test_update.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | ###
  2 | ### Python
  3 | ###
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # celery beat schedule file
 89 | celerybeat-schedule
 90 | 
 91 | # SageMath parsed files
 92 | *.sage.py
 93 | 
 94 | # Environments
 95 | .env
 96 | .venv
 97 | env/
 98 | venv*/
 99 | ENV/
100 | env.bak/
101 | venv.bak/
102 | 
103 | # Spyder project settings
104 | .spyderproject
105 | .spyproject
106 | 
107 | # Rope project settings
108 | .ropeproject
109 | 
110 | # mkdocs documentation
111 | /site
112 | 
113 | # mypy
114 | .mypy_cache/
115 | .dmypy.json
116 | dmypy.json
117 | 
118 | # Pyre type checker
119 | .pyre/
120 | 
121 | ###
122 | ### IDEs
123 | ###
124 | 
125 | .vscode/
126 | .idea/
127 | 
128 | ###
129 | ### Local
130 | ###
131 | 
132 | # To allow keeping the database in this directory
133 | *.sqlite
134 | 


--------------------------------------------------------------------------------
/.mailmap:
--------------------------------------------------------------------------------
1 | Jaakko Kangasharju <ashar@iki.fi> <jkan@futurice.com>
2 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - '3.7'
 4 |   - '3.9'
 5 | script:
 6 |   - coverage run --source githammer --omit 'githammer/__main__.py,githammer/summary/*.py' -m unittest tests
 7 | after_success:
 8 |   - codecov
 9 | deploy:
10 |   provider: pypi
11 |   user: __token__
12 |   password:
13 |     secure: BZgxmYYR84NSaOsZC9rKiRC+dA98uEHtOtmTg2iBivT/sVDeGOMEzFVdoITPziD5nv+BCeMArEdQfendVppI/dM6cWpRA9hoWMZhuOVQWytr/02mcl5HRpTObe4n1AXRDlhWiopsrKxz63NenlN8FichXUHPtYS5rKV0jwedh7SllzMJkqQITw+d70GjA5Y0HhJ/Eb3dW2CZCAksTv6Y8sjwlEy0Kqwg4jElARs1lVYFORS2waYwty/7W04S7v8Zd/qDjV9tf5IoFMAGw8Pk2PfC/AsWfFiZT1R3AOcK6H8yrtAYa0NMPNz/1y/Cb0o0vlJqpsblOO/LaXR4RtSlvKiXcQ/r6CknFhqL/rfFjfMfrseGHyJUmbwyPGnAs6X4Tb8WaDGhcGkcCRLOClPcvuBbrJIt8yEanRAG9NnKiOyg0iKpa87PDaqu12mYy7HmQl5aQIe+hoFpC5o/hUa0oRRxyPtm2FdiGwRXyOwbNq9BrxWjQfZGSBFLZZfqny09Qx3YQzZ8uPNhz8xHwgNz9bVH5tY3nqhJJbgcwN6jj6OU1ZtmCrqCoYhZiabdWYiUosDweAHFEG66FHD5XFhoawgIQFjb+BAajbRJ3ggrjraVgNe9NL1+01Ri05kpMO5ro6/QtNbQVJF6s+F+AaI5aU4FTkHHRcZGxk/GElBDvTo=
14 |   distributions: sdist bdist_wheel
15 |   skip_existing: true
16 |   skip_cleanup: true
17 |   on:
18 |     branch: master
19 |     tags: true
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Git Hammer
  2 | 
  3 | [![Build Status](https://travis-ci.com/asharov/git-hammer.svg?branch=master)](https://travis-ci.com/asharov/git-hammer)
  4 | [![codecov](https://codecov.io/gh/asharov/git-hammer/branch/master/graph/badge.svg)](https://codecov.io/gh/asharov/git-hammer)
  5 | [![PyPI](https://img.shields.io/pypi/v/git-hammer)](https://pypi.org/project/git-hammer/)
  6 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
  7 | 
  8 | Git Hammer is a statistics tool for projects in git repositories.
  9 | Its major feature is tracking the number of lines authored by
 10 | each person for every commit, but it currently includes some
 11 | other useful statistics as well, and the data that it collects
 12 | could be used in multiple new ways as well.
 13 | 
 14 | Git Hammer is under active maintenance. New features appear when
 15 | a need or desire for them exists. If Git Hammer lacks some
 16 | feature you would like, all kinds of contributions are welcome,
 17 | from simple feature suggestions to complete pull requests
 18 | implementing the feature.
 19 | 
 20 | ## Setup
 21 | 
 22 | By default, Git Hammer stores the historical information from
 23 | the repository in an SQLite database file in the current
 24 | directory. If you wish to change this default, set the
 25 | `DATABASE_URL` environment variable to a database URL
 26 | according to the [SQLAlchemy engine documentation](https://docs.sqlalchemy.org/en/latest/core/engines.html).
 27 | This database will be created if it does not already exist.
 28 | Note that if you wish to use a database other than SQLite,
 29 | you may need to install the appropriate Python module to
 30 | connect to the database.
 31 | 
 32 | You will need Python 3, at least version 3.5. It is a good
 33 | idea to set up a virtual environment, like this:
 34 | ```bash
 35 | python3 -m venv venv
 36 | source venv/bin/activate
 37 | ```
 38 | Run these commands wherever you want to run Git Hammer. If
 39 | you only want to use Git Hammer, you can install it with
 40 | `pip`:
 41 | ```bash
 42 | pip install git-hammer
 43 | ```
 44 | If you want to use the latest development version or
 45 | contribute to Git Hammer development, you need to clone
 46 | this repository and run
 47 | ```bash
 48 | pip install -r requirements.txt
 49 | ```
 50 | in the directory where you cloned Git Hammer (in this
 51 | case you should create the virtual environment above in
 52 | that directory as well). The rest of the commands below
 53 | assume that one of these has been done.
 54 | 
 55 | ## Creating a Project
 56 | 
 57 | Now pick some git repository to run Git Hammer on. The examples
 58 | below use a hypothetical project called "baffle". You should
 59 | replace the name with your own.
 60 | 
 61 | ```bash
 62 | python -m githammer init-project baffle ~/projects/baffle
 63 | ```
 64 | This will create the database containing the project baffle
 65 | from the repository directory (here `~/projects/baffle`;
 66 | replace that with the path to your repository). Git Hammer
 67 | will print out a progress report while it goes through all
 68 | the commits in the repository.
 69 | 
 70 | Usually, you want your main development branch to be checked
 71 | out in the repository, and not change the checked-out branch
 72 | when updating Git Hammer data. This makes the statistics more
 73 | relevant for the whole development team.
 74 | 
 75 | When the repository gets new development, first update the
 76 | code in the repository to the latest version, and then run
 77 | ```bash
 78 | python -m githammer update-project baffle
 79 | ```
 80 | This will process all the new commits that were not yet seen
 81 | into the database.
 82 | 
 83 | If the repository is very old, with much history, you might
 84 | not be interested in capturing all of it. `init-project`
 85 | has the option `--earliest-commit-date` that provides a date
 86 | so that commits prior to that date are not included. This
 87 | would be used like
 88 | ```bash
 89 | python -m githammer init-project baffle ~/projects/baffle --earliest-commit-date 2018-01-01
 90 | ```
 91 | It is currently not possible to later add commits that were
 92 | excluded by date when the repository was added.
 93 | 
 94 | ## Showing Statistics
 95 | 
 96 | After the project has been initialized and the repository added,
 97 | you can show some information on it. First try out
 98 | ```bash
 99 | python -m githammer summary baffle
100 | ```
101 | This will print out three tables: The number of commits for
102 | each person, the number of lines of code written by each
103 | person in the head version, and the number of tests written
104 | by each person in the head version. This last is only printed
105 | if the repository configuration includes test recognition (see
106 | below).
107 | 
108 | There are a few graphs that Git Hammer can display. To see the
109 | types of supported graphs, enter
110 | ```bash
111 | python -m githammer graph --help
112 | ```
113 | The graphs are
114 | 
115 | Type | Description
116 | -----|------------
117 | line-count    | Number of lines in the project over time
118 | line-author-count | Same as above, except split per author
119 | test-count | Number of tests in the project over time
120 | test-author-count | Same as above, except split per author
121 | day-of-week | A histogram showing the number of commits for each day of the week
122 | time-of-day | A histogram showing the number of commits for each hour of the day
123 | 
124 | ## Configuring Sources and Tests
125 | 
126 | By default, Git Hammer assumes that every file in the repository
127 | is a source file and that there are no tests. This can be
128 | modified by creating a configuration file. The configuration
129 | file is JSON having some predefined keys:
130 | ```json
131 | {
132 |   "sourceFiles": [
133 |     "Sources/**/*.py",
134 |     "Tests/**/*.py",
135 |     ...
136 |   ],
137 |   "excludedSourceFiles": [
138 |     "Sources/Contrib/**"
139 |   ],
140 |   "testFiles": [
141 |     "Tests/**/*.py"
142 |   ],
143 |   "testLineRegex": "def test_"
144 | }
145 | ```
146 | 
147 | Here, `sourceFiles` is a list of patterns that match the source
148 | files. Any file not matching one of these patterns is not
149 | considered by Git Hammer. If `sourceFiles` captures too many
150 | files, for instance autogenerated sources, `excludedSourceFiles`
151 | is a list of patterns that will not be considered source even
152 | if they match some `sourceFiles` pattern.
153 | 
154 | To include test counts, `testFiles` needs to be specified. This
155 | is again, a list of patterns matching files that contain tests
156 | (it is up to you if you wish to define this to mean unit tests,
157 | integration tests, UI tests, etc.). Git Hammer will look inside
158 | each of the test files. Any line matching the Python regular
159 | expression `testLineRegex` is counted as one test. So
160 | `testLineRegex` should typically match whatever acts as the
161 | header of a test. Here, it is the definition of a function
162 | named starting with `test_`. Other projects, and especially
163 | other languages, will have different conventions.
164 | 
165 | All the file name patterns above (`sourceFiles`,
166 | `excludedSourceFiles`, `testFiles`) are glob patterns as
167 | defined by the
168 | [globber library](https://github.com/asharov/globber).
169 | 
170 | The configuration file can be given as an option to the
171 | `init-project` command:
172 | ```bash
173 | python -m githammer init-project baffle ~/projects/baffle --configuration ./baffle-config.json
174 | ```
175 | If the `--configuration` option is not given, but the repository
176 | contains a file named `git-hammer-config.json`, this file will
177 | be read as the configuration. This way you can keep the Git
178 | Hammer configuration for a repository in that repository.
179 | 
180 | Note: The configuration file path, as well as the repository
181 | path, will be stored in the database, so they should not be
182 | moved. If the configuration changes, data that was already
183 | in the database will not be reprocessed with the new
184 | configuration.
185 | 
186 | There is also a command to check what are the effects of a
187 | configuration. Run
188 | ```bash
189 | python -m githammer list-sources ~/projects/baffle --configuration ./baffle-config.json
190 | ```
191 | to print out a list of all files considered source or test files,
192 | and for each test file, the lines considered to be tests. A missing
193 | `--configuration` option is treated in the same way as with
194 | `init-project` above.
195 | 
196 | A partial output of the `list-sources` command on the Git Hammer
197 | repository looks like this:
198 | ```
199 | S: githammer/dbtypes.py
200 | S: githammer/frequency.py
201 | S: githammer/hammer.py
202 | T: tests/__init__.py
203 | T: tests/check_regression.py
204 | T: tests/hammer_test.py
205 | T: tests/test_init.py
206 | |---    def test_plain_init_does_not_create_database(self):
207 | |---    def test_update_fails_when_database_not_created(self):
208 | ```
209 | Source files are marked with `S`, test files with `T`, and after
210 | each test file, its test lines are printed indented with `|---`.
211 | 
212 | ## Multi-Repository Projects
213 | 
214 | Sometimes, a team works on multiple repositories that all still
215 | belong to the same project. For instance, a piece of functionality
216 | may be better to split off into a library in an independent
217 | repository. Git Hammer supports such projects by not limiting
218 | the project data to a single repository.
219 | 
220 | To add another repository to an existing project, just use
221 | `add-repository`:
222 | ```bash
223 | python -m githammer add-repository baffle ~/projects/baffle-common
224 | ```
225 | This will process the new repository, adding it to the project
226 | database. After this, any summary information will include
227 | data from all repositories of the project. Like `init-project`,
228 | `add-repository` also accepts the `--configuration` and
229 | `--earliest-commit-date` options with the same semantics for
230 | the added repository.
231 | 
232 | ## Database Migrations
233 | 
234 | If you update Git Hammer, it is possible that the database
235 | schema is updated in the new version. This means that you will
236 | need to migrate any existing databases to the latest version.
237 | Migration is performed by running
238 | ```bash
239 | HAMMER_DATABASE_URL=<URL of database to migrate> alembic upgrade head
240 | ```
241 | (If you haven't installed Git Hammer with `pip`, run this command
242 | in the project directory and add `PYTHONPATH=.` at the beginning.)
243 | 
244 | It is safe to run this even if the database schema has not changed
245 | in the update, so there is no need to try and figure that out before
246 | running the migration.
247 | 
248 | ## License
249 | 
250 | Git Hammer is licensed under the Apache Software License,
251 | version 2.0. See the LICENSE file for precise license terms
252 | and conditions.
253 | 


--------------------------------------------------------------------------------
/alembic.ini:
--------------------------------------------------------------------------------
 1 | # A generic, single database configuration.
 2 | 
 3 | [alembic]
 4 | # path to migration scripts
 5 | script_location = alembic
 6 | 
 7 | # template used to generate migration files
 8 | # file_template = %%(rev)s_%%(slug)s
 9 | 
10 | # timezone to use when rendering the date
11 | # within the migration file as well as the filename.
12 | # string value is passed to dateutil.tz.gettz()
13 | # leave blank for localtime
14 | # timezone =
15 | 
16 | # max length of characters to apply to the
17 | # "slug" field
18 | # truncate_slug_length = 40
19 | 
20 | # set to 'true' to run the environment during
21 | # the 'revision' command, regardless of autogenerate
22 | # revision_environment = false
23 | 
24 | # set to 'true' to allow .pyc and .pyo files without
25 | # a source .py file to be detected as revisions in the
26 | # versions/ directory
27 | # sourceless = false
28 | 
29 | # version location specification; this defaults
30 | # to alembic/versions.  When using multiple version
31 | # directories, initial revisions must be specified with --version-path
32 | # version_locations = %(here)s/bar %(here)s/bat alembic/versions
33 | 
34 | # the output encoding used when revision files
35 | # are written from script.py.mako
36 | # output_encoding = utf-8
37 | 
38 | # sqlalchemy.url = driver://user:pass@localhost/dbname
39 | 
40 | 
41 | [post_write_hooks]
42 | # post_write_hooks defines scripts or Python functions that are run
43 | # on newly generated revision scripts.  See the documentation for further
44 | # detail and examples
45 | 
46 | # format using "black" - use the console_scripts runner, against the "black" entrypoint
47 | # hooks=black
48 | # black.type=console_scripts
49 | # black.entrypoint=black
50 | # black.options=-l 79
51 | 
52 | # Logging configuration
53 | [loggers]
54 | keys = root,sqlalchemy,alembic
55 | 
56 | [handlers]
57 | keys = console
58 | 
59 | [formatters]
60 | keys = generic
61 | 
62 | [logger_root]
63 | level = WARN
64 | handlers = console
65 | qualname =
66 | 
67 | [logger_sqlalchemy]
68 | level = WARN
69 | handlers =
70 | qualname = sqlalchemy.engine
71 | 
72 | [logger_alembic]
73 | level = INFO
74 | handlers =
75 | qualname = alembic
76 | 
77 | [handler_console]
78 | class = StreamHandler
79 | args = (sys.stderr,)
80 | level = NOTSET
81 | formatter = generic
82 | 
83 | [formatter_generic]
84 | format = %(levelname)-5.5s [%(name)s] %(message)s
85 | datefmt = %H:%M:%S
86 | 


--------------------------------------------------------------------------------
/alembic/README:
--------------------------------------------------------------------------------
1 | Generic single-database configuration.


--------------------------------------------------------------------------------
/alembic/env.py:
--------------------------------------------------------------------------------
 1 | from logging.config import fileConfig
 2 | 
 3 | from sqlalchemy import engine_from_config
 4 | from sqlalchemy import pool
 5 | 
 6 | from alembic import context
 7 | 
 8 | from githammer import dbtypes
 9 | 
10 | import os
11 | 
12 | # this is the Alembic Config object, which provides
13 | # access to the values within the .ini file in use.
14 | config = context.config
15 | 
16 | # Interpret the config file for Python logging.
17 | # This line sets up loggers basically.
18 | fileConfig(config.config_file_name)
19 | 
20 | # add your model's MetaData object here
21 | # for 'autogenerate' support
22 | # from myapp import mymodel
23 | # target_metadata = mymodel.Base.metadata
24 | target_metadata = dbtypes.Base.metadata
25 | 
26 | # other values from the config, defined by the needs of env.py,
27 | # can be acquired:
28 | # my_important_option = config.get_main_option("my_important_option")
29 | # ... etc.
30 | 
31 | 
32 | def run_migrations_offline():
33 |     """Run migrations in 'offline' mode.
34 | 
35 |     This configures the context with just a URL
36 |     and not an Engine, though an Engine is acceptable
37 |     here as well.  By skipping the Engine creation
38 |     we don't even need a DBAPI to be available.
39 | 
40 |     Calls to context.execute() here emit the given string to the
41 |     script output.
42 | 
43 |     """
44 |     url = os.environ['HAMMER_DATABASE_URL']
45 |     context.configure(
46 |         url=url,
47 |         target_metadata=target_metadata,
48 |         literal_binds=True,
49 |         dialect_opts={"paramstyle": "named"},
50 |     )
51 | 
52 |     with context.begin_transaction():
53 |         context.run_migrations()
54 | 
55 | 
56 | def run_migrations_online():
57 |     """Run migrations in 'online' mode.
58 | 
59 |     In this scenario we need to create an Engine
60 |     and associate a connection with the context.
61 | 
62 |     """
63 |     configuration = config.get_section(config.config_ini_section)
64 |     configuration['sqlalchemy.url'] = os.environ['HAMMER_DATABASE_URL']
65 |     connectable = engine_from_config(
66 |         configuration,
67 |         prefix="sqlalchemy.",
68 |         poolclass=pool.NullPool,
69 |     )
70 | 
71 |     with connectable.connect() as connection:
72 |         context.configure(
73 |             connection=connection, target_metadata=target_metadata
74 |         )
75 | 
76 |         with context.begin_transaction():
77 |             context.run_migrations()
78 | 
79 | 
80 | if context.is_offline_mode():
81 |     run_migrations_offline()
82 | else:
83 |     run_migrations_online()
84 | 


--------------------------------------------------------------------------------
/alembic/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | ${imports if imports else ""}
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = ${repr(up_revision)}
14 | down_revision = ${repr(down_revision)}
15 | branch_labels = ${repr(branch_labels)}
16 | depends_on = ${repr(depends_on)}
17 | 
18 | 
19 | def upgrade():
20 |     ${upgrades if upgrades else "pass"}
21 | 
22 | 
23 | def downgrade():
24 |     ${downgrades if downgrades else "pass"}
25 | 


--------------------------------------------------------------------------------
/alembic/versions/d95efca6f334_add_start_time_to_repository_object.py:
--------------------------------------------------------------------------------
 1 | """Add start time to Repository object
 2 | 
 3 | Revision ID: d95efca6f334
 4 | Revises: 
 5 | Create Date: 2020-01-06 16:24:08.055349
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | 
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = 'd95efca6f334'
14 | down_revision = None
15 | branch_labels = None
16 | depends_on = None
17 | 
18 | 
19 | def upgrade():
20 |     op.add_column('repositories', sa.Column('start_time', sa.DateTime(), nullable=True))
21 |     op.add_column('repositories', sa.Column('start_time_utc_offset', sa.Integer(), nullable=True))
22 | 
23 | 
24 | def downgrade():
25 |     op.drop_column('repositories', 'start_time_utc_offset')
26 |     op.drop_column('repositories', 'start_time')
27 | 


--------------------------------------------------------------------------------
/git-hammer-config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "sourceFiles": [
 3 |     "githammer/**/*.py",
 4 |     "tests/*.py"
 5 |   ],
 6 |   "testFiles": [
 7 |     "tests/*.py"
 8 |   ],
 9 |   "testLineRegex": "def test_"
10 | }
11 | 


--------------------------------------------------------------------------------
/githammer/__init__.py:
--------------------------------------------------------------------------------
1 | from .frequency import Frequency
2 | from .hammer import Hammer, DatabaseNotInitializedError, OldDatabaseSchemaError
3 | from .hammer import iter_all_project_names, iter_sources_and_tests
4 | 


--------------------------------------------------------------------------------
/githammer/__main__.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 Jaakko Kangasharju
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import argparse
 16 | import datetime
 17 | import os
 18 | import sys
 19 | import matplotlib.pyplot as plt
 20 | 
 21 | from dateutil.parser import parse
 22 | 
 23 | from .hammer import Hammer, iter_all_project_names, iter_sources_and_tests
 24 | from .summary import *
 25 | 
 26 | 
 27 | def make_hammer(project):
 28 |     database_url = os.environ.get('DATABASE_URL')
 29 |     if database_url:
 30 |         return Hammer(project, database_url=database_url)
 31 |     else:
 32 |         return Hammer(project)
 33 | 
 34 | 
 35 | def update_project(options):
 36 |     hammer = make_hammer(options.project)
 37 |     hammer.update_data()
 38 | 
 39 | 
 40 | def add_repository(options):
 41 |     hammer = make_hammer(options.project)
 42 |     if options.earliest_commit_date:
 43 |         date = parse(options.earliest_commit_date)
 44 |         if date.tzinfo is None or date.tzinfo.utcoffset(date) is None:
 45 |             date = date.replace(tzinfo=datetime.timezone.utc)
 46 |         hammer.add_repository(options.repository, options.configuration, earliest_date=date)
 47 |     else:
 48 |         hammer.add_repository(options.repository, options.configuration)
 49 | 
 50 | 
 51 | def list_projects(_):
 52 |     database_url = os.environ.get('DATABASE_URL')
 53 |     if database_url:
 54 |         iterator = iter_all_project_names(database_url=database_url)
 55 |     else:
 56 |         iterator = iter_all_project_names()
 57 |     for name in iterator:
 58 |         print(name)
 59 | 
 60 | 
 61 | def list_sources(options):
 62 |     for item_type, item in iter_sources_and_tests(options.repository, options.configuration):
 63 |         if item_type == 'source-file':
 64 |             print('S: {}'.format(item))
 65 |         elif item_type == 'test-file':
 66 |             print('T: {}'.format(item))
 67 |         elif item_type == 'test-line':
 68 |             print('|---{}'.format(item))
 69 | 
 70 | 
 71 | def plot_graph(options):
 72 |     hammer = make_hammer(options.project)
 73 |     figure = None
 74 |     if options.type == 'line-count':
 75 |         figure = total_lines(hammer)
 76 |     elif options.type == 'line-author-count':
 77 |         figure = lines_per_author(hammer)
 78 |     elif options.type == 'test-count':
 79 |         figure = total_tests(hammer)
 80 |     elif options.type == 'test-author-count':
 81 |         figure = tests_per_author(hammer)
 82 |     elif options.type == 'day-of-week':
 83 |         figure = commits_per_weekday(hammer)
 84 |     elif options.type == 'time-of-day':
 85 |         figure = commits_per_hour(hammer)
 86 |     if figure:
 87 |         if options.output_file:
 88 |             figure.savefig(options.output_file)
 89 |         else:
 90 |             plt.show()
 91 | 
 92 | 
 93 | def print_summary(options):
 94 |     hammer = make_hammer(options.project)
 95 |     handle = open(options.output_file, 'w') if options.output_file else sys.stdout
 96 |     handle.write(str(commit_count_table(hammer)))
 97 |     handle.write('\n\n')
 98 |     handle.write(str(line_count_table(hammer)))
 99 |     test_counts = test_count_table(hammer)
100 |     if test_counts:
101 |         handle.write('\n\n')
102 |         handle.write(str(test_counts))
103 |     handle.write('\n')
104 |     if handle is not sys.stdout:
105 |         handle.close()
106 | 
107 | 
108 | parser = argparse.ArgumentParser(prog='githammer',
109 |                                  description='Extract statistics from Git repositories')
110 | command_parsers = parser.add_subparsers()
111 | 
112 | init_parser = command_parsers.add_parser('init-project', help='Initialize a new project')
113 | init_parser.add_argument('project', help='Name of the project to create')
114 | init_parser.add_argument('repository', help='Git repository to create the project from')
115 | init_parser.add_argument('-c', '--configuration', help='Path to the repository configuration file')
116 | init_parser.add_argument('--earliest-commit-date', help='Ignore commits prior to this date')
117 | init_parser.set_defaults(func=add_repository)
118 | 
119 | update_parser = command_parsers.add_parser('update-project', help='Update an existing project with new commits')
120 | update_parser.add_argument('project', help='Name of the project to update')
121 | update_parser.set_defaults(func=update_project)
122 | 
123 | add_parser = command_parsers.add_parser('add-repository', help='Add a repository to an existing project')
124 | add_parser.add_argument('project', help='Project to add the repository to')
125 | add_parser.add_argument('repository', help='Path to the git repository to add')
126 | add_parser.add_argument('-c', '--configuration', help='Path to the repository configuration file')
127 | add_parser.add_argument('--earliest-commit-date', help='Ignore commits prior to this date')
128 | add_parser.set_defaults(func=add_repository)
129 | 
130 | project_list_parser = command_parsers.add_parser('list-projects', help='List names of existing projects')
131 | project_list_parser.set_defaults(func=list_projects)
132 | 
133 | source_list_parser = command_parsers.add_parser('list-sources', help='List source files and test lines in repository')
134 | source_list_parser.add_argument('repository', help='Git repository to examine')
135 | source_list_parser.add_argument('-c', '--configuration', help='Path to the repository configuration file')
136 | source_list_parser.set_defaults(func=list_sources)
137 | 
138 | graph_parser = command_parsers.add_parser('graph', help='Draw line count per committer graph')
139 | graph_parser.add_argument('project', help='Name of the project to graph')
140 | graph_parser.add_argument('type', help='The type of graph to make',
141 |                           choices=['line-count', 'line-author-count', 'test-count', 'test-author-count', 'day-of-week',
142 |                                    'time-of-day'])
143 | graph_parser.add_argument('-o', '--output-file',
144 |                           help='Name of the file to save the graph to. If omitted, graph is displayed on screen')
145 | graph_parser.set_defaults(func=plot_graph)
146 | 
147 | summary_parser = command_parsers.add_parser('summary',
148 |                                             help='Print summary information of the current state of the project')
149 | summary_parser.add_argument('project', help='Name of the project to summarize')
150 | summary_parser.add_argument('-o', '--output-file',
151 |                             help='Name of the file to print the summary to. If omitted, summary is printed to standard output')
152 | summary_parser.set_defaults(func=print_summary)
153 | 
154 | parsed_args = parser.parse_args()
155 | parsed_args.func(parsed_args)
156 | 


--------------------------------------------------------------------------------
/githammer/combinedcommit.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Jaakko Kangasharju
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from operator import attrgetter
16 | 
17 | from .countdict import add_count_dict
18 | 
19 | 
20 | def _iter_combined_commits(iterators):
21 |     current_values = [None] * len(iterators)
22 |     has_finished = [False] * len(iterators)
23 |     next_values = [None] * len(iterators)
24 |     for index, iterator in enumerate(iterators):
25 |         try:
26 |             next_values[index] = next(iterator)
27 |         except StopIteration:
28 |             has_finished[index] = True
29 |     while not all(has_finished):
30 |         min_index = None
31 |         earliest_time = None
32 |         for index, commit in enumerate(next_values):
33 |             if not commit or has_finished[index]:
34 |                 continue
35 |             if not earliest_time or commit.commit_time < earliest_time:
36 |                 min_index = index
37 |                 earliest_time = commit.commit_time
38 |         if min_index is not None:
39 |             current_values[min_index] = next_values[min_index]
40 |             yield CombinedCommit(current_values)
41 |             try:
42 |                 next_values[min_index] = next(iterators[min_index])
43 |             except StopIteration:
44 |                 has_finished[min_index] = True
45 |         else:
46 |             return
47 | 
48 | 
49 | class CombinedCommit:
50 | 
51 |     def __init__(self, commits):
52 |         actual_commits = [commit for commit in commits if commit is not None]
53 |         max_index, max_commit = max(enumerate(actual_commits), key=lambda pair: pair[1].commit_time)
54 |         self.commit_time = max_commit.commit_time
55 |         self.commit_time_utc_offset = actual_commits[max_index].commit_time_utc_offset
56 |         self.line_counts = {}
57 |         self.test_counts = {}
58 |         for commit in commits:
59 |             if commit is not None:
60 |                 self.line_counts = add_count_dict(self.line_counts, commit.line_counts)
61 |                 self.test_counts = add_count_dict(self.test_counts, commit.test_counts)
62 | 


--------------------------------------------------------------------------------
/githammer/config.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import errno
 3 | import json
 4 | 
 5 | from globber import globber
 6 | 
 7 | 
 8 | def _matches_file_pattern(file, pattern):
 9 |     if type(pattern) is str:
10 |         return globber.match(pattern, file)
11 |     elif type(pattern) is list:
12 |         return any(_matches_file_pattern(file, p) for p in pattern)
13 |     else:
14 |         raise TypeError('Pattern {} not list or string'.format(pattern))
15 | 
16 | 
17 | class Configuration:
18 |     def __init__(self, file_path=None):
19 |         if file_path:
20 |             try:
21 |                 fp = open(file_path, 'r')
22 |             except OSError as error:
23 |                 if error.errno == errno.ENOENT:
24 |                     config_json = {}
25 |                 else:
26 |                     raise error
27 |             else:
28 |                 try:
29 |                     config_json = json.load(fp)
30 |                 finally:
31 |                     fp.close()
32 |         else:
33 |             config_json = {}
34 |         if 'sourceFiles' in config_json:
35 |             self.source_files = config_json['sourceFiles']
36 |         else:
37 |             self.source_files = None
38 |         if 'excludedSourceFiles' in config_json:
39 |             self.excluded_source_files = config_json['excludedSourceFiles']
40 |         else:
41 |             self.excluded_source_files = None
42 |         if 'testFiles' in config_json:
43 |             self.test_files = config_json['testFiles']
44 |         else:
45 |             self.test_files = None
46 |         if 'testLineRegex' in config_json:
47 |             self.test_line_regex = re.compile(config_json['testLineRegex'])
48 |         else:
49 |             self.test_line_regex = None
50 | 
51 |     def is_source_file(self, path):
52 |         is_included = self.source_files is None or _matches_file_pattern(path, self.source_files)
53 |         is_excluded = self.excluded_source_files is not None and _matches_file_pattern(path, self.excluded_source_files)
54 |         return is_included and not is_excluded
55 | 
56 |     def is_test_file(self, path):
57 |         if not self.is_source_file(path):
58 |             return False
59 |         return self.test_files is not None and _matches_file_pattern(path, self.test_files)
60 | 
61 |     def iter_test_lines(self, path, lines):
62 |         if not self.is_test_file(path):
63 |             return
64 |         for line in lines:
65 |             if self.test_line_regex.search(line):
66 |                 yield line
67 | 


--------------------------------------------------------------------------------
/githammer/countdict.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Jaakko Kangasharju
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def normalize_count_dict(count_dict):
17 |     return {key: value for key, value in count_dict.items() if value != 0}
18 | 
19 | 
20 | def subtract_count_dict(base_dict, dict_to_subtract):
21 |     result_dict = base_dict.copy()
22 |     for key, value in dict_to_subtract.items():
23 |         result_dict[key] = result_dict.get(key, 0) - value
24 |     return normalize_count_dict(result_dict)
25 | 
26 | 
27 | def add_count_dict(base_dict, dict_to_add):
28 |     result_dict = base_dict.copy()
29 |     for key, value in dict_to_add.items():
30 |         result_dict[key] = result_dict.get(key, 0) + value
31 |     return normalize_count_dict(result_dict)
32 | 


--------------------------------------------------------------------------------
/githammer/dbtypes.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 Jaakko Kangasharju
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import datetime
 16 | import re
 17 | 
 18 | import git
 19 | from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, orm
 20 | from sqlalchemy_utils import JSONType
 21 | from sqlalchemy.ext.declarative import declarative_base
 22 | from sqlalchemy.orm import relationship
 23 | from sqlalchemy.schema import MetaData
 24 | 
 25 | from .config import Configuration
 26 | 
 27 | 
 28 | def _time_offset_to_local_time(time, offset):
 29 |     timezone = datetime.timezone(datetime.timedelta(seconds=offset))
 30 |     return time.replace(tzinfo=datetime.timezone.utc).astimezone(timezone)
 31 | 
 32 | 
 33 | _naming_convention = {
 34 |     "ix": 'ix_%(column_0_label)s',
 35 |     "uq": "uq_%(table_name)s_%(column_0_name)s",
 36 |     "ck": "ck_%(table_name)s_%(constraint_name)s",
 37 |     "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s",
 38 |     "pk": "pk_%(table_name)s"
 39 | }
 40 | _metadata = MetaData(naming_convention=_naming_convention)
 41 | Base = declarative_base(metadata=_metadata)
 42 | 
 43 | 
 44 | class Project(Base):
 45 |     __tablename__ = 'projects'
 46 | 
 47 |     project_name = Column(String, primary_key=True)
 48 | 
 49 | 
 50 | class Repository(Base):
 51 |     __tablename__ = 'repositories'
 52 | 
 53 |     id = Column(Integer, primary_key=True)
 54 |     repository_path = Column(String)
 55 |     configuration_file_path = Column(String)
 56 |     head_commit_id = Column(String, ForeignKey('commits.hexsha'))
 57 |     start_time = Column(DateTime())
 58 |     start_time_utc_offset = Column(Integer)
 59 | 
 60 |     head_commit = relationship('Commit', foreign_keys=[head_commit_id])
 61 | 
 62 |     def __init__(self, **kwargs):
 63 |         super(Repository, self).__init__(**kwargs)
 64 |         self._init_properties()
 65 | 
 66 |     @orm.reconstructor
 67 |     def _init_properties(self):
 68 |         self.configuration = Configuration(self.configuration_file_path)
 69 |         self.git_repository = git.Repo(self.repository_path)
 70 | 
 71 |     def start_time_tz(self):
 72 |         if self.start_time:
 73 |             return _time_offset_to_local_time(self.start_time, self.start_time_utc_offset)
 74 |         else:
 75 |             return None
 76 | 
 77 | 
 78 | class ProjectRepository(Base):
 79 |     __tablename__ = 'projectrepository'
 80 | 
 81 |     project_name = Column(String, ForeignKey('projects.project_name'), primary_key=True)
 82 |     repository_id = Column(String, ForeignKey('repositories.id'), primary_key=True)
 83 | 
 84 | 
 85 | class Author(Base):
 86 |     __tablename__ = 'authors'
 87 |     _name_regex = re.compile('^(.*)\\s+(<.*>)$')
 88 | 
 89 |     canonical_name = Column(String, primary_key=True)
 90 |     aliases = Column(JSONType)
 91 | 
 92 |     @property
 93 |     def name(self):
 94 |         match = Author._name_regex.match(self.canonical_name)
 95 |         if match:
 96 |             return match.group(1)
 97 |         else:
 98 |             return None
 99 | 
100 |     def __eq__(self, other):
101 |         return self.canonical_name == other.canonical_name and self.aliases == other.aliases
102 | 
103 |     def __hash__(self):
104 |         return hash(self.canonical_name)
105 | 
106 |     def __repr__(self):
107 |         return self.name
108 | 
109 | 
110 | class Commit(Base):
111 |     __tablename__ = 'commits'
112 | 
113 |     hexsha = Column(String, primary_key=True)
114 |     author_name = Column(String, ForeignKey('authors.canonical_name'), nullable=False)
115 |     added_lines = Column(Integer)
116 |     deleted_lines = Column(Integer)
117 |     commit_time = Column(DateTime(), nullable=False)
118 |     commit_time_utc_offset = Column(Integer, nullable=False)
119 |     parent_ids = Column(JSONType)
120 |     repository_id = Column(Integer, ForeignKey('repositories.id'))
121 | 
122 |     author = relationship('Author', back_populates='commits', lazy='joined')
123 | 
124 |     def __init__(self, **kwargs):
125 |         super(Commit, self).__init__(**kwargs)
126 |         self._init_properties()
127 | 
128 |     @orm.reconstructor
129 |     def _init_properties(self):
130 |         self.line_counts = {}
131 |         self.test_counts = {}
132 | 
133 |     def commit_time_tz(self):
134 |         return _time_offset_to_local_time(self.commit_time, self.commit_time_utc_offset)
135 | 
136 | 
137 | Author.commits = relationship('Commit', order_by=Commit.commit_time, back_populates='author')
138 | 
139 | 
140 | class AuthorCommitDetail(Base):
141 |     __tablename__ = 'authorcommit'
142 | 
143 |     author_name = Column(String, ForeignKey('authors.canonical_name'), primary_key=True)
144 |     commit_id = Column(String, ForeignKey('commits.hexsha'), primary_key=True)
145 |     line_count = Column(Integer, nullable=False)
146 |     test_count = Column(Integer)
147 | 
148 |     author = relationship('Author')
149 |     commit = relationship('Commit')
150 | 


--------------------------------------------------------------------------------
/githammer/frequency.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Jaakko Kangasharju
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from enum import Enum
16 | import datetime
17 | 
18 | from dateutil.relativedelta import relativedelta
19 | 
20 | 
21 | class Frequency(Enum):
22 |     daily = 1
23 |     weekly = 2
24 |     monthly = 3
25 |     yearly = 4
26 | 
27 |     def next_instance(self, dt):
28 |         if self is Frequency.daily:
29 |             return dt + relativedelta(days=1)
30 |         elif self is Frequency.weekly:
31 |             return dt + relativedelta(weeks=1)
32 |         elif self is Frequency.monthly:
33 |             return dt + relativedelta(months=1)
34 |         elif self is Frequency.yearly:
35 |             return dt + relativedelta(years=1)
36 | 
37 |     def start_of_interval(self, dt):
38 |         if self is Frequency.daily:
39 |             return datetime.datetime.combine(dt.date(), datetime.time(tzinfo=dt.tzinfo))
40 |         elif self is Frequency.weekly:
41 |             monday_dt = dt - datetime.timedelta(days=dt.weekday())
42 |             return Frequency.daily.start_of_interval(monday_dt)
43 |         elif self is Frequency.monthly:
44 |             first_dt = dt.replace(day=1)
45 |             return Frequency.daily.start_of_interval(first_dt)
46 |         elif self is Frequency.yearly:
47 |             january_dt = dt.replace(month=1)
48 |             return Frequency.monthly.start_of_interval(january_dt)
49 | 


--------------------------------------------------------------------------------
/githammer/hammer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 Jaakko Kangasharju
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import datetime
 16 | import io
 17 | import os
 18 | import re
 19 | from operator import itemgetter
 20 | 
 21 | import git
 22 | from sqlalchemy import create_engine
 23 | from sqlalchemy.exc import OperationalError
 24 | from sqlalchemy.orm import sessionmaker
 25 | from sqlalchemy_utils import create_database, database_exists
 26 | 
 27 | from .combinedcommit import _iter_combined_commits, CombinedCommit
 28 | from .config import Configuration
 29 | from .countdict import add_count_dict, subtract_count_dict, normalize_count_dict
 30 | from .dbtypes import Author, Base, Commit, AuthorCommitDetail, Repository, Project, ProjectRepository
 31 | 
 32 | _diff_stat_regex = re.compile('^([0-9]+|-)\t([0-9]+|-)\t(.*)$')
 33 | _default_database_url = 'sqlite:///git-hammer.sqlite'
 34 | 
 35 | 
 36 | def _time_to_utc_offset(time):
 37 |     utc_time = time.astimezone(datetime.timezone.utc)
 38 |     offset = int(time.utcoffset().total_seconds())
 39 |     return utc_time, offset
 40 | 
 41 | 
 42 | def _commit_exists(repository, hexsha):
 43 |     status, out, err = repository.git_repository.git.cat_file('-e', hexsha, with_extended_output=True,
 44 |                                                               with_exceptions=False)
 45 |     return status == 0
 46 | 
 47 | 
 48 | def _is_commit_in_range(repository, commit):
 49 |     if not repository.start_time:
 50 |         return True
 51 |     else:
 52 |         return commit.authored_datetime >= repository.start_time_tz()
 53 | 
 54 | 
 55 | def _print_line_counts(line_counts):
 56 |     for author, count in sorted(line_counts.items(), key=itemgetter(1), reverse=True):
 57 |         print('{:>10}  {}'.format(count, author.canonical_name))
 58 | 
 59 | 
 60 | def _author_line(commit):
 61 |     return '{} <{}>'.format(commit.author.name, commit.author.email)
 62 | 
 63 | 
 64 | def _fail_unless_database_exists(engine):
 65 |     if not database_exists(engine.url):
 66 |         raise DatabaseNotInitializedError('Database must be created for this operation')
 67 | 
 68 | 
 69 | def iter_all_project_names(database_url=_default_database_url):
 70 |     engine = create_engine(database_url)
 71 |     _fail_unless_database_exists(engine)
 72 |     Session = sessionmaker(bind=engine)
 73 |     session = Session()
 74 |     for project in session.query(Project):
 75 |         yield project.project_name
 76 |     session.close()
 77 | 
 78 | 
 79 | def iter_sources_and_tests(repository_path, configuration_file_path=None):
 80 |     if configuration_file_path is None:
 81 |         configuration_file_path = os.path.join(repository_path, 'git-hammer-config.json')
 82 |     configuration = Configuration(configuration_file_path)
 83 |     repository = git.Repo(repository_path)
 84 |     for git_object in repository.tree().traverse(visit_once=True):
 85 |         if git_object.type != 'blob':
 86 |             continue
 87 |         if configuration.is_source_file(git_object.path):
 88 |             if configuration.is_test_file(git_object.path):
 89 |                 yield 'test-file', git_object.path
 90 |                 lines = [line.decode('utf-8', 'ignore') for line in
 91 |                          io.BytesIO(git_object.data_stream.read()).readlines()]
 92 |                 for line in configuration.iter_test_lines(git_object.path, lines):
 93 |                     yield 'test-line', line.rstrip()
 94 |             else:
 95 |                 yield 'source-file', git_object.path
 96 | 
 97 | 
 98 | class DatabaseNotInitializedError(Exception):
 99 |     pass
100 | 
101 | 
102 | class OldDatabaseSchemaError(Exception):
103 |     pass
104 | 
105 | 
106 | class Hammer:
107 | 
108 |     def _ensure_project_exists(self):
109 |         if not database_exists(self._engine.url):
110 |             create_database(self._engine.url)
111 |             Base.metadata.create_all(self._engine)
112 |         session = self._Session()
113 |         if not session.query(Project).filter(Project.project_name == self.project_name).first():
114 |             project = Project(project_name=self.project_name)
115 |             session.add(project)
116 |             session.commit()
117 |         session.close()
118 | 
119 |     def _init_properties(self):
120 |         self._repositories = []
121 |         self._names_to_authors = {}
122 |         self._shas_to_commits = {}
123 | 
124 |     def _commit_query(self, session):
125 |         return session.query(Commit).select_from(Commit).join(Repository, Commit.repository_id == Repository.id).join(
126 |             ProjectRepository).filter(ProjectRepository.project_name == self.project_name)
127 | 
128 |     def _is_commit_processed(self, commit_id):
129 |         return commit_id in self._shas_to_commits
130 | 
131 |     def _build_repository_map(self, session):
132 |         try:
133 |             for dbrepo in session.query(Repository).join(ProjectRepository).filter(
134 |                     ProjectRepository.project_name == self.project_name):
135 |                 self._repositories.append(dbrepo)
136 |         except OperationalError:
137 |             raise OldDatabaseSchemaError('Database created with too-old version of Git Hammer')
138 | 
139 |     def _build_author_map(self, session):
140 |         for dbauthor in session.query(Author):
141 |             self._names_to_authors[dbauthor.canonical_name] = dbauthor
142 |             for alias in dbauthor.aliases:
143 |                 self._names_to_authors[alias] = dbauthor
144 | 
145 |     def _build_commit_map(self, session):
146 |         for dbcommit in self._commit_query(session):
147 |             self._shas_to_commits[dbcommit.hexsha] = dbcommit
148 |         commits = self._commit_query(session).subquery()
149 |         for db_detail in session.query(AuthorCommitDetail).join(commits):
150 |             self._shas_to_commits[db_detail.commit_id].line_counts[db_detail.author] = db_detail.line_count
151 |             if db_detail.test_count:
152 |                 self._shas_to_commits[db_detail.commit_id].test_counts[db_detail.author] = db_detail.test_count
153 | 
154 |     def _process_lines_into_line_counts(self, repository, commit, path, lines, line_counts, test_counts):
155 |         author = self._names_to_authors[_author_line(commit)]
156 |         line_counts[author] = line_counts.get(author, 0) + len(lines)
157 |         test_counts[author] = test_counts.get(author, 0) + len(list(repository.configuration.iter_test_lines(path, lines)))
158 | 
159 |     def _blame_blob_into_line_counts(self, repository, commit_to_blame, path, line_counts, test_counts):
160 |         if not repository.configuration.is_source_file(path):
161 |             return
162 |         blame = repository.git_repository.blame(commit_to_blame, path, w=True)
163 |         for commit, lines in blame:
164 |             self._process_lines_into_line_counts(repository, commit, path, lines, line_counts, test_counts)
165 | 
166 |     def _make_full_commit_stats(self, repository, commit, need_full_blame=False):
167 |         stats_start_time = datetime.datetime.now()
168 |         line_counts = {}
169 |         test_counts = {}
170 |         for git_object in commit.tree.traverse(prune=lambda i, d: i is git.Submodule):
171 |             if git_object.type != 'blob':
172 |                 continue
173 |             if not repository.configuration.is_source_file(git_object.path):
174 |                 continue
175 |             if need_full_blame:
176 |                 self._blame_blob_into_line_counts(repository, commit, git_object.path, line_counts, test_counts)
177 |             else:
178 |                 lines = [line.decode('utf-8', 'ignore') for line in
179 |                          io.BytesIO(git_object.data_stream.read()).readlines()]
180 |                 self._process_lines_into_line_counts(repository, commit, git_object.path, lines, line_counts,
181 |                                                      test_counts)
182 |         print('Commit {} stats time: {}'.format(commit.hexsha,
183 |                                                 datetime.datetime.now() - stats_start_time))
184 |         return normalize_count_dict(line_counts), normalize_count_dict(test_counts)
185 | 
186 |     def _make_diffed_commit_stats(self, repository, commit, previous_commit, previous_commit_line_counts,
187 |                                   previous_commit_test_counts):
188 |         diff_index = previous_commit.diff(commit, w=True, ignore_submodules=True)
189 |         current_files = set()
190 |         previous_files = set()
191 |         for add_diff in diff_index.iter_change_type('A'):
192 |             current_files.add(add_diff.b_path)
193 |         for delete_diff in diff_index.iter_change_type('D'):
194 |             previous_files.add(delete_diff.a_path)
195 |         for rename_diff in diff_index.iter_change_type('R'):
196 |             current_files.add(rename_diff.b_path)
197 |             previous_files.add(rename_diff.a_path)
198 |         for modify_diff in diff_index.iter_change_type('M'):
199 |             current_files.add(modify_diff.b_path)
200 |             previous_files.add(modify_diff.a_path)
201 |         previous_line_counts = {}
202 |         current_line_counts = {}
203 |         previous_test_counts = {}
204 |         current_test_counts = {}
205 |         for current_file in current_files:
206 |             self._blame_blob_into_line_counts(repository, commit, current_file, current_line_counts,
207 |                                               current_test_counts)
208 |         for previous_file in previous_files:
209 |             self._blame_blob_into_line_counts(repository, previous_commit, previous_file, previous_line_counts,
210 |                                               previous_test_counts)
211 |         line_difference = subtract_count_dict(current_line_counts, previous_line_counts)
212 |         line_counts = add_count_dict(previous_commit_line_counts, line_difference)
213 |         test_difference = subtract_count_dict(current_test_counts, previous_test_counts)
214 |         test_counts = add_count_dict(previous_commit_test_counts, test_difference)
215 |         return line_counts, test_counts
216 | 
217 |     def _add_author_alias_if_needed(self, repository, commit):
218 |         author_line = _author_line(commit)
219 |         if not self._names_to_authors.get(author_line):
220 |             canonical_name = repository.git_repository.git.show(commit.hexsha, format='%aN <%aE>', no_patch=True)
221 |             author = self._names_to_authors[canonical_name]
222 |             author.aliases.append(author_line)
223 |             self._names_to_authors[author_line] = author
224 | 
225 |     def _add_canonical_authors(self, repository, session):
226 |         author_lines = repository.git_repository.git.log(format='%aN <%aE>')
227 |         for author_line in set(author_lines.splitlines()):
228 |             if not self._names_to_authors.get(author_line):
229 |                 author = Author(canonical_name=author_line, aliases=[])
230 |                 self._names_to_authors[author_line] = author
231 |                 session.add(author)
232 | 
233 |     def _add_commit_object(self, repository, commit, session):
234 |         self._add_author_alias_if_needed(repository, commit)
235 |         author_line = _author_line(commit)
236 |         author = self._names_to_authors[author_line]
237 |         author = session.merge(author)
238 |         commit_time, commit_time_utc_offset = _time_to_utc_offset(commit.authored_datetime)
239 |         commit_object = Commit(hexsha=commit.hexsha, author=author,
240 |                                commit_time=commit_time,
241 |                                commit_time_utc_offset=commit_time_utc_offset,
242 |                                parent_ids=[], repository_id=repository.id)
243 |         if len(commit.parents) <= 1:
244 |             if len(commit.parents) == 1 and _commit_exists(repository, commit.parents[0]):
245 |                 diff_stat = repository.git_repository.git.diff(
246 |                     commit.parents[0], commit, numstat=True, ignore_submodules=True)
247 |             else:
248 |                 diff_stat = repository.git_repository.git.show(commit, numstat=True, format='')
249 |             added_lines = 0
250 |             deleted_lines = 0
251 |             for line in diff_stat.splitlines():
252 |                 match = re.fullmatch(_diff_stat_regex, line)
253 |                 if match:
254 |                     if match.group(1) == '-' or match.group(2) == '-':
255 |                         continue
256 |                     if not repository.configuration.is_source_file(match.group(3)):
257 |                         continue
258 |                     added_lines += int(match.group(1))
259 |                     deleted_lines += int(match.group(2))
260 |             commit_object.added_lines = added_lines
261 |             commit_object.deleted_lines = deleted_lines
262 |         self._shas_to_commits[commit.hexsha] = commit_object
263 |         session.add(commit_object)
264 | 
265 |     def _add_commit_line_counts(self, commit, line_counts, test_counts, session):
266 |         self._shas_to_commits[commit.hexsha].line_counts = line_counts
267 |         self._shas_to_commits[commit.hexsha].test_counts = test_counts
268 |         for author, count in line_counts.items():
269 |             detail = AuthorCommitDetail(
270 |                 author_name=author.canonical_name, commit_id=commit.hexsha, line_count=count)
271 |             if test_counts.get(author):
272 |                 detail.test_count = test_counts[author]
273 |             session.add(detail)
274 | 
275 |     def _process_repository(self, repository, session):
276 |         print('Repository {}'.format(repository.repository_path))
277 |         repository = session.merge(repository, load=False)
278 |         start_time = datetime.datetime.now()
279 |         last_session_commit_time = start_time
280 |         self._add_canonical_authors(repository, session)
281 |         commit_count = 0
282 |         for commit in self._iter_unprocessed_commits(repository):
283 |             self._add_commit_object(repository, commit, session)
284 |             if commit.parents:
285 |                 for parent in commit.parents:
286 |                     self._shas_to_commits[commit.hexsha].parent_ids.append(parent.hexsha)
287 |                 parent_commit = self._shas_to_commits.get(commit.parents[0].hexsha)
288 |                 if parent_commit:
289 |                     line_counts, test_counts = self._make_diffed_commit_stats(repository, commit, commit.parents[0],
290 |                                                                               parent_commit.line_counts,
291 |                                                                               parent_commit.test_counts)
292 |                 else:
293 |                     need_full_blame = _commit_exists(repository, commit.parents[0].hexsha)
294 |                     line_counts, test_counts = self._make_full_commit_stats(repository, commit,
295 |                                                                             need_full_blame=need_full_blame)
296 |             else:
297 |                 line_counts, test_counts = self._make_full_commit_stats(repository, commit)
298 |             self._add_commit_line_counts(commit, line_counts, test_counts, session)
299 |             repository.head_commit_id = commit.hexsha
300 |             commit_count += 1
301 |             if commit_count % 20 == 0:
302 |                 print('Commit {:>5}: {}'.format(commit_count, datetime.datetime.now() - start_time))
303 |             if datetime.datetime.now() - last_session_commit_time >= datetime.timedelta(minutes=5):
304 |                 session_commit_start_time = datetime.datetime.now()
305 |                 session.commit()
306 |                 print('Commit {:>5}: Database commit time {}'.format(commit_count,
307 |                                                                      datetime.datetime.now() - session_commit_start_time))
308 |                 last_session_commit_time = datetime.datetime.now()
309 |         print('Commit processing time {}'.format(datetime.datetime.now() - start_time))
310 | 
311 |     def _iter_branch(self, repository):
312 |         commits = []
313 |         commit_id = repository.head_commit_id
314 |         while commit_id:
315 |             commit = self._shas_to_commits.get(commit_id)
316 |             if commit:
317 |                 commits.append(commit)
318 |                 commit_id = commit.parent_ids[0] if commit.parent_ids else None
319 |             else:
320 |                 break
321 |         return reversed(commits).__iter__()
322 | 
323 |     def _iter_unprocessed_commits(self, repository):
324 |         for commit_id in repository.git_repository.git.log(reverse=True, date_order=True, format='%H').splitlines():
325 |             if not self._is_commit_processed(commit_id):
326 |                 commit = repository.git_repository.commit(commit_id)
327 |                 if _is_commit_in_range(repository, commit):
328 |                     yield commit
329 | 
330 |     def __init__(self, project_name, database_url=_default_database_url):
331 |         start_time = datetime.datetime.now()
332 |         self.project_name = project_name
333 |         self._engine = create_engine(database_url)
334 |         self._Session = sessionmaker(bind=self._engine)
335 |         self._init_properties()
336 |         if database_exists(self._engine.url):
337 |             session = self._Session()
338 |             self._build_repository_map(session)
339 |             self._build_author_map(session)
340 |             self._build_commit_map(session)
341 |             session.close()
342 |         print('Init time {}'.format(datetime.datetime.now() - start_time))
343 | 
344 |     def add_repository(self, repository_path, configuration_file_path=None, **kwargs):
345 |         self._ensure_project_exists()
346 |         repository_path = os.path.abspath(repository_path)
347 |         if not next((repo for repo in self._repositories if repo.repository_path == repository_path), None):
348 |             if not configuration_file_path:
349 |                 configuration_file_path = os.path.join(repository_path, 'git-hammer-config.json')
350 |             else:
351 |                 configuration_file_path = os.path.abspath(configuration_file_path)
352 |             session = self._Session(expire_on_commit=False)
353 |             dbrepo = Repository(repository_path=repository_path, configuration_file_path=configuration_file_path)
354 |             if kwargs.get('earliest_date'):
355 |                 start_time, start_time_utc_offset = _time_to_utc_offset(kwargs.get('earliest_date'))
356 |                 dbrepo.start_time = start_time
357 |                 dbrepo.start_time_utc_offset = start_time_utc_offset
358 |             session.add(dbrepo)
359 |             session.flush()
360 |             self._repositories.append(dbrepo)
361 |             project_repo = ProjectRepository(project_name=self.project_name, repository_id=dbrepo.id)
362 |             session.add(project_repo)
363 |             session.flush()
364 |             self._process_repository(dbrepo, session)
365 |             session.commit()
366 | 
367 |     def update_data(self):
368 |         _fail_unless_database_exists(self._engine)
369 |         session = self._Session(expire_on_commit=False)
370 |         for repository in self._repositories:
371 |             self._process_repository(repository, session)
372 |         start_time = datetime.datetime.now()
373 |         session.commit()
374 |         print('Database commit time {}'.format(datetime.datetime.now() - start_time))
375 | 
376 |     def head_commit(self):
377 |         _fail_unless_database_exists(self._engine)
378 |         head_commit_ids = [repository.head_commit_id for repository in self._repositories]
379 |         head_commits = [self._shas_to_commits[commit_id] for commit_id in head_commit_ids]
380 |         return CombinedCommit(head_commits)
381 | 
382 |     def iter_authors(self):
383 |         _fail_unless_database_exists(self._engine)
384 |         session = self._Session()
385 |         for dbauthor in self._commit_query(session).join(Author).with_entities(Author).distinct():
386 |             yield self._names_to_authors.get(dbauthor.canonical_name)
387 |         session.close()
388 | 
389 |     def iter_commits(self, **kwargs):
390 |         _fail_unless_database_exists(self._engine)
391 |         iterators = [self._iter_branch(repository) for repository in self._repositories]
392 |         commit_iterator = _iter_combined_commits(iterators)
393 |         if not kwargs.get('frequency'):
394 |             for commit in commit_iterator:
395 |                 yield commit
396 |         else:
397 |             next_commit_time = None
398 |             frequency = kwargs['frequency']
399 |             for commit in commit_iterator:
400 |                 if not next_commit_time or commit.commit_time >= next_commit_time:
401 |                     yield commit
402 |                     start = frequency.start_of_interval(commit.commit_time)
403 |                     next_commit_time = frequency.next_instance(start)
404 | 
405 |     def iter_individual_commits(self):
406 |         _fail_unless_database_exists(self._engine)
407 |         session = self._Session()
408 |         for commit in self._commit_query(session).order_by(Commit.commit_time):
409 |             yield self._shas_to_commits.get(commit.hexsha)
410 |         session.close()
411 | 


--------------------------------------------------------------------------------
/githammer/summary/__init__.py:
--------------------------------------------------------------------------------
1 | from .graph import total_lines, lines_per_author, total_tests, tests_per_author, commits_per_hour, commits_per_weekday
2 | from .table import commit_count_table, line_count_table, test_count_table
3 | 


--------------------------------------------------------------------------------
/githammer/summary/graph.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 Jaakko Kangasharju
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from operator import attrgetter
 16 | 
 17 | import matplotlib.pyplot as mpplot
 18 | 
 19 | from githammer import Frequency
 20 | 
 21 | 
 22 | class NoDataForGraphError(Exception):
 23 |     pass
 24 | 
 25 | 
 26 | def _plot_totals(hammer, counts_property):
 27 |     date_array = []
 28 |     line_count_array = []
 29 |     for commit in hammer.iter_commits(frequency=Frequency.daily):
 30 |         date_array.append(commit.commit_time)
 31 |         line_count_array.append(sum(getattr(commit, counts_property).values()))
 32 |     figure = mpplot.figure()
 33 |     plot = figure.add_subplot(111)
 34 |     plot.plot(date_array, line_count_array, ls='-', marker='')
 35 |     figure.autofmt_xdate(rotation=45)
 36 |     figure.tight_layout()
 37 |     return figure
 38 | 
 39 | 
 40 | def _plot_totals_per_author(hammer, counts_property, min_count_per_author=0):
 41 |     selected_authors = set()
 42 |     for commit in hammer.iter_commits():
 43 |         for author, count in getattr(commit, counts_property).items():
 44 |             if count >= min_count_per_author:
 45 |                 selected_authors.add(author)
 46 |     if not selected_authors:
 47 |         raise NoDataForGraphError(
 48 |             'No authors were found having at least a count of {} in a single commit'.format(min_count_per_author))
 49 |     head_counts = getattr(hammer.head_commit(), counts_property)
 50 |     author_list = sorted(list(selected_authors), key=lambda a: head_counts.get(a, 0), reverse=True)
 51 |     author_labels = [author.name for author in author_list]
 52 |     date_array = []
 53 |     count_array = [[] for _ in range(len(author_list))]
 54 |     for commit in hammer.iter_commits(frequency=Frequency.daily):
 55 |         date_array.append(commit.commit_time)
 56 |         for index, author in enumerate(author_list):
 57 |             count_array[index].append(getattr(commit, counts_property).get(author, 0))
 58 |     figure = mpplot.figure(figsize=(12,7))
 59 |     figure.subplots_adjust(left=0.08, right=0.75, top=0.95, bottom=0.05)
 60 |     plot = figure.add_subplot(111)
 61 |     plot.stackplot(date_array, count_array, labels=author_labels)
 62 |     handles, labels = plot.get_legend_handles_labels()
 63 |     plot.legend(handles[:25], labels[:25], bbox_to_anchor=(1.0, 0.5), loc='center left')
 64 |     figure.autofmt_xdate(rotation=45)
 65 |     return figure
 66 | 
 67 | 
 68 | def total_lines(hammer):
 69 |     return _plot_totals(hammer, 'line_counts')
 70 | 
 71 | 
 72 | def total_tests(hammer):
 73 |     return _plot_totals(hammer, 'test_counts')
 74 | 
 75 | 
 76 | def lines_per_author(hammer):
 77 |     return _plot_totals_per_author(hammer, 'line_counts')
 78 | 
 79 | 
 80 | def tests_per_author(hammer):
 81 |     return _plot_totals_per_author(hammer, 'test_counts')
 82 | 
 83 | 
 84 | def commits_per_hour(hammer):
 85 |     count_array = [0] * 24
 86 |     for commit in hammer.iter_individual_commits():
 87 |         count_array[commit.commit_time_tz().hour] += 1
 88 |     figure = mpplot.figure()
 89 |     plot = figure.add_subplot(111)
 90 |     plot.bar(range(len(count_array)), count_array)
 91 |     figure.tight_layout()
 92 |     return figure
 93 | 
 94 | 
 95 | def commits_per_weekday(hammer):
 96 |     count_array = [0] * 7
 97 |     for commit in hammer.iter_individual_commits():
 98 |         count_array[commit.commit_time_tz().weekday()] += 1
 99 |     figure = mpplot.figure()
100 |     plot = figure.add_subplot(111)
101 |     plot.bar(range(len(count_array)), count_array)
102 |     figure.tight_layout()
103 |     mpplot.xticks(range(len(count_array)),
104 |                   ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
105 |     return figure
106 | 


--------------------------------------------------------------------------------
/githammer/summary/table.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Jaakko Kangasharju
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from beautifultable import BeautifulTable
16 | 
17 | 
18 | def _make_table(columns):
19 |     table = BeautifulTable()
20 |     table.set_style(BeautifulTable.STYLE_COMPACT)
21 |     table.column_headers = columns
22 |     for column in columns:
23 |         if column == 'Author':
24 |             table.column_alignments[column] = BeautifulTable.ALIGN_LEFT
25 |         else:
26 |             table.column_alignments[column] = BeautifulTable.ALIGN_RIGHT
27 |     return table
28 | 
29 | 
30 | def commit_count_table(hammer):
31 |     commit_counts = {}
32 |     for commit in hammer.iter_individual_commits():
33 |         commit_counts[commit.author] = commit_counts.get(commit.author, 0) + 1
34 |     table = _make_table(['Author', 'Commits'])
35 |     for author, commit_count in commit_counts.items():
36 |         table.append_row([author.name, commit_count])
37 |     table.sort('Commits', reverse=True)
38 |     return table
39 | 
40 | 
41 | def line_count_table(hammer):
42 |     head_commit = hammer.head_commit()
43 |     table = _make_table(['Author', 'Lines'])
44 |     for author, line_count in head_commit.line_counts.items():
45 |         table.append_row([author.name, line_count])
46 |     table.sort('Lines', reverse=True)
47 |     return table
48 | 
49 | 
50 | def test_count_table(hammer):
51 |     head_commit = hammer.head_commit()
52 |     if head_commit.test_counts:
53 |         table = _make_table(['Author', 'Tests'])
54 |         for author, test_count in head_commit.test_counts.items():
55 |             table.append_row([author.name, test_count])
56 |         table.sort('Tests', reverse=True)
57 |         return table
58 |     return None
59 | 


--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
 1 | gitpython
 2 | sqlalchemy >=1.4.7, <2.0
 3 | sqlalchemy-utils >=0.37.0
 4 | matplotlib <3.1
 5 | python-dateutil
 6 | globber
 7 | beautifultable
 8 | alembic
 9 | coverage
10 | codecov
11 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile
 3 | # To update, run:
 4 | #
 5 | #    pip-compile requirements.in
 6 | #
 7 | alembic==1.5.8
 8 |     # via -r requirements.in
 9 | beautifultable==1.0.1
10 |     # via -r requirements.in
11 | certifi==2020.12.5
12 |     # via requests
13 | chardet==4.0.0
14 |     # via requests
15 | codecov==2.1.11
16 |     # via -r requirements.in
17 | coverage==5.5
18 |     # via
19 |     #   -r requirements.in
20 |     #   codecov
21 | cycler==0.10.0
22 |     # via matplotlib
23 | gitdb==4.0.7
24 |     # via gitpython
25 | gitpython==3.1.14
26 |     # via -r requirements.in
27 | globber==0.2.1
28 |     # via -r requirements.in
29 | greenlet==1.0.0
30 |     # via sqlalchemy
31 | idna==2.10
32 |     # via requests
33 | kiwisolver==1.3.1
34 |     # via matplotlib
35 | mako==1.1.4
36 |     # via alembic
37 | markupsafe==1.1.1
38 |     # via mako
39 | matplotlib==3.0.3
40 |     # via -r requirements.in
41 | numpy==1.20.2
42 |     # via matplotlib
43 | pyparsing==2.4.7
44 |     # via matplotlib
45 | python-dateutil==2.8.1
46 |     # via
47 |     #   -r requirements.in
48 |     #   alembic
49 |     #   matplotlib
50 | python-editor==1.0.4
51 |     # via alembic
52 | requests==2.25.1
53 |     # via codecov
54 | six==1.15.0
55 |     # via
56 |     #   cycler
57 |     #   python-dateutil
58 |     #   sqlalchemy-utils
59 | smmap==4.0.0
60 |     # via gitdb
61 | sqlalchemy-utils==0.37.0
62 |     # via -r requirements.in
63 | sqlalchemy==1.4.7
64 |     # via
65 |     #   -r requirements.in
66 |     #   alembic
67 |     #   sqlalchemy-utils
68 | urllib3==1.26.4
69 |     # via requests
70 | wcwidth==0.2.5
71 |     # via beautifultable
72 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open('README.md', 'r') as file:
 4 |     long_description = file.read()
 5 | 
 6 | setuptools.setup(
 7 |     name='git-hammer',
 8 |     version='0.3.2',
 9 |     author='Jaakko Kangasharju',
10 |     author_email='ashar@iki.fi',
11 |     description='Statistics tool for git repositories',
12 |     long_description=long_description,
13 |     long_description_content_type='text/markdown',
14 |     url='https://github.com/asharov/git-hammer',
15 |     packages=setuptools.find_packages(exclude=['tests']),
16 |     classifiers=[
17 |         'Programming Language :: Python :: 3',
18 |         'License :: OSI Approved :: Apache Software License',
19 |         'Development Status :: 3 - Alpha',
20 |         'Operating System :: OS Independent'
21 |     ],
22 |     python_requires='>=3.7',
23 |     install_requires=[
24 |         'gitpython',
25 |         'sqlalchemy >=1.4.7, <2.0',
26 |         'sqlalchemy-utils >=0.37.0',
27 |         'matplotlib <3.1',
28 |         'python-dateutil',
29 |         'globber',
30 |         'beautifultable'
31 |     ]
32 | )
33 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | from .test_frequency import FrequencyTest
 2 | from .test_init import HammerInitTest
 3 | from .test_single_repository import HammerRepositoryTest
 4 | from .test_submodule import HammerSubmoduleTest
 5 | from .test_multiple_projects import HammerMultipleProjectsTest
 6 | from .test_update import HammerUpdateTest
 7 | from .test_shallow_repository import HammerShallowTest
 8 | from .test_multiple_repositories import HammerMultipleRepositoriesTest
 9 | from .test_limited_repository import HammerLimitedTest
10 | 


--------------------------------------------------------------------------------
/tests/check_regression.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import itertools
 3 | 
 4 | from githammer import Hammer
 5 | 
 6 | 
 7 | def check_commit(index, commit_old, commit_new, attr):
 8 |     old_attr = getattr(commit_old, attr)
 9 |     new_attr = getattr(commit_new, attr)
10 |     if old_attr != new_attr:
11 |         sys.exit('Error in commit {} ({}): Incorrect {} {} (expected {})'.
12 |                  format(index, commit_old.hexsha, attr, new_attr, old_attr))
13 | 
14 | 
15 | if len(sys.argv) < 5:
16 |     sys.exit('Usage: {} <known good project> <good database URL> <new project> <new database URL'.format(sys.argv[0]))
17 | 
18 | hammer_old = Hammer(sys.argv[1], database_url=sys.argv[2])
19 | hammer_new = Hammer(sys.argv[3], database_url=sys.argv[4])
20 | 
21 | count = 0
22 | 
23 | for (index, (commit_old, commit_new)) in enumerate(itertools.zip_longest(hammer_old.iter_individual_commits(),
24 |                                                                          hammer_new.iter_individual_commits())):
25 |     check_commit(index, commit_old, commit_new, 'hexsha')
26 |     check_commit(index, commit_old, commit_new, 'author_name')
27 |     check_commit(index, commit_old, commit_new, 'added_lines')
28 |     check_commit(index, commit_old, commit_new, 'deleted_lines')
29 |     check_commit(index, commit_old, commit_new, 'commit_time')
30 |     check_commit(index, commit_old, commit_new, 'commit_time_utc_offset')
31 |     check_commit(index, commit_old, commit_new, 'line_counts')
32 |     check_commit(index, commit_old, commit_new, 'test_counts')
33 |     count += 1
34 | 
35 | print('OK, checked {} commits'.format(count))
36 | 


--------------------------------------------------------------------------------
/tests/data/.gitignore:
--------------------------------------------------------------------------------
1 | # When working on the test repository, clone it to a directory starting with
2 | # worktree here, so git will ignore it.
3 | worktree*
4 | 


--------------------------------------------------------------------------------
/tests/data/repo-config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "sourceFiles": [
 3 |     "*.txt",
 4 |     "*/**/*"
 5 |   ],
 6 |   "testFiles": [
 7 |     "tests/**/*.txt"
 8 |   ],
 9 |   "testLineRegex": "^T"
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/data/repository/HEAD:
--------------------------------------------------------------------------------
1 | ref: refs/heads/master
2 | 


--------------------------------------------------------------------------------
/tests/data/repository/config:
--------------------------------------------------------------------------------
1 | [core]
2 | 	repositoryformatversion = 0
3 | 	filemode = true
4 | 	bare = true
5 | 	ignorecase = true
6 | 	precomposeunicode = true
7 | 


--------------------------------------------------------------------------------
/tests/data/repository/description:
--------------------------------------------------------------------------------
1 | Unnamed repository; edit this file 'description' to name the repository.
2 | 


--------------------------------------------------------------------------------
/tests/data/repository/info/exclude:
--------------------------------------------------------------------------------
1 | # git ls-files --others --exclude-from=.git/info/exclude
2 | # Lines that start with '#' are comments.
3 | # For a project mostly in C, the following would be a good set of
4 | # exclude patterns (uncomment them if you want to use them):
5 | # *.[oa]
6 | # *~
7 | 


--------------------------------------------------------------------------------
/tests/data/repository/info/refs:
--------------------------------------------------------------------------------
1 | c80ee8a32baaee8df8133b8afca26d63d857684e	refs/heads/december
2 | 10247c3a05e4bd35d827ed527a0aed39990338ea	refs/heads/feature
3 | 74e48c8686b26dc644951b55717e8828eb704587	refs/heads/master
4 | c153f2881f0f0025a9ff5754e74111333ce859cd	refs/heads/old-state
5 | 


--------------------------------------------------------------------------------
/tests/data/repository/objects/info/commit-graph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asharov/git-hammer/cdad6799bbf964f0d96736e68d753dbc57744be9/tests/data/repository/objects/info/commit-graph


--------------------------------------------------------------------------------
/tests/data/repository/objects/info/packs:
--------------------------------------------------------------------------------
1 | P pack-ee6956a7f3425f41f1defd4327f7f84516571ff8.pack
2 | 
3 | 


--------------------------------------------------------------------------------
/tests/data/repository/objects/pack/pack-ee6956a7f3425f41f1defd4327f7f84516571ff8.bitmap:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asharov/git-hammer/cdad6799bbf964f0d96736e68d753dbc57744be9/tests/data/repository/objects/pack/pack-ee6956a7f3425f41f1defd4327f7f84516571ff8.bitmap


--------------------------------------------------------------------------------
/tests/data/repository/objects/pack/pack-ee6956a7f3425f41f1defd4327f7f84516571ff8.idx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asharov/git-hammer/cdad6799bbf964f0d96736e68d753dbc57744be9/tests/data/repository/objects/pack/pack-ee6956a7f3425f41f1defd4327f7f84516571ff8.idx


--------------------------------------------------------------------------------
/tests/data/repository/objects/pack/pack-ee6956a7f3425f41f1defd4327f7f84516571ff8.pack:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asharov/git-hammer/cdad6799bbf964f0d96736e68d753dbc57744be9/tests/data/repository/objects/pack/pack-ee6956a7f3425f41f1defd4327f7f84516571ff8.pack


--------------------------------------------------------------------------------
/tests/data/repository/packed-refs:
--------------------------------------------------------------------------------
1 | # pack-refs with: peeled fully-peeled sorted 
2 | c80ee8a32baaee8df8133b8afca26d63d857684e refs/heads/december
3 | 10247c3a05e4bd35d827ed527a0aed39990338ea refs/heads/feature
4 | 74e48c8686b26dc644951b55717e8828eb704587 refs/heads/master
5 | c153f2881f0f0025a9ff5754e74111333ce859cd refs/heads/old-state
6 | 


--------------------------------------------------------------------------------
/tests/data/repository/refs/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asharov/git-hammer/cdad6799bbf964f0d96736e68d753dbc57744be9/tests/data/repository/refs/.keep


--------------------------------------------------------------------------------
/tests/data/subrepository/HEAD:
--------------------------------------------------------------------------------
1 | ref: refs/heads/master
2 | 


--------------------------------------------------------------------------------
/tests/data/subrepository/config:
--------------------------------------------------------------------------------
1 | [core]
2 | 	repositoryformatversion = 0
3 | 	filemode = true
4 | 	bare = true
5 | 	ignorecase = true
6 | 	precomposeunicode = true
7 | 


--------------------------------------------------------------------------------
/tests/data/subrepository/description:
--------------------------------------------------------------------------------
1 | Unnamed repository; edit this file 'description' to name the repository.
2 | 


--------------------------------------------------------------------------------
/tests/data/subrepository/info/exclude:
--------------------------------------------------------------------------------
1 | # git ls-files --others --exclude-from=.git/info/exclude
2 | # Lines that start with '#' are comments.
3 | # For a project mostly in C, the following would be a good set of
4 | # exclude patterns (uncomment them if you want to use them):
5 | # *.[oa]
6 | # *~
7 | 


--------------------------------------------------------------------------------
/tests/data/subrepository/info/refs:
--------------------------------------------------------------------------------
1 | 303804d461da9cdfef86f6053d0ad2d0545adae1	refs/heads/master
2 | 


--------------------------------------------------------------------------------
/tests/data/subrepository/objects/info/commit-graph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asharov/git-hammer/cdad6799bbf964f0d96736e68d753dbc57744be9/tests/data/subrepository/objects/info/commit-graph


--------------------------------------------------------------------------------
/tests/data/subrepository/objects/info/packs:
--------------------------------------------------------------------------------
1 | P pack-7fdd2a1dca94173e188275a6bc315dbc34653b99.pack
2 | 
3 | 


--------------------------------------------------------------------------------
/tests/data/subrepository/objects/pack/pack-7fdd2a1dca94173e188275a6bc315dbc34653b99.bitmap:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asharov/git-hammer/cdad6799bbf964f0d96736e68d753dbc57744be9/tests/data/subrepository/objects/pack/pack-7fdd2a1dca94173e188275a6bc315dbc34653b99.bitmap


--------------------------------------------------------------------------------
/tests/data/subrepository/objects/pack/pack-7fdd2a1dca94173e188275a6bc315dbc34653b99.idx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asharov/git-hammer/cdad6799bbf964f0d96736e68d753dbc57744be9/tests/data/subrepository/objects/pack/pack-7fdd2a1dca94173e188275a6bc315dbc34653b99.idx


--------------------------------------------------------------------------------
/tests/data/subrepository/objects/pack/pack-7fdd2a1dca94173e188275a6bc315dbc34653b99.pack:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asharov/git-hammer/cdad6799bbf964f0d96736e68d753dbc57744be9/tests/data/subrepository/objects/pack/pack-7fdd2a1dca94173e188275a6bc315dbc34653b99.pack


--------------------------------------------------------------------------------
/tests/data/subrepository/packed-refs:
--------------------------------------------------------------------------------
1 | # pack-refs with: peeled fully-peeled sorted 
2 | 303804d461da9cdfef86f6053d0ad2d0545adae1 refs/heads/master
3 | 


--------------------------------------------------------------------------------
/tests/data/subrepository/refs/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asharov/git-hammer/cdad6799bbf964f0d96736e68d753dbc57744be9/tests/data/subrepository/refs/.keep


--------------------------------------------------------------------------------
/tests/hammer_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | import unittest
 4 | 
 5 | from githammer import Hammer
 6 | 
 7 | 
 8 | class HammerTest(unittest.TestCase):
 9 | 
10 |     _main_repo_initial_commit_hexsha = 'c153f2881f0f0025a9ff5754e74111333ce859cd'
11 |     _main_repo_second_commit_hexsha = '5151985f7e3551c73ccb65cda2b021194b30b30a'
12 |     _main_repo_test_commit_hexsha = 'c80ee8a32baaee8df8133b8afca26d63d857684e'
13 |     _main_repo_head_commit_hexsha = '74e48c8686b26dc644951b55717e8828eb704587'
14 | 
15 |     def _fetch_commit(self, hexsha, hammer=None):
16 |         if hammer is None:
17 |             hammer = self.hammer
18 |         return next(c for c in hammer.iter_individual_commits() if c.hexsha == hexsha)
19 | 
20 |     def _make_hammer(self, project_name, database_url=None):
21 |         if not database_url:
22 |             database_url = self.database_url
23 |         return Hammer(project_name, database_url)
24 | 
25 |     def setUp(self):
26 |         print()
27 |         print(self.id())
28 |         self.current_directory = os.path.abspath(os.path.dirname(__file__))
29 |         self.working_directory = tempfile.TemporaryDirectory(prefix='git-hammer-')
30 |         self.database_url = 'sqlite:///' + self.working_directory.name + '/test.sqlite'
31 |         self.hammer = self._make_hammer('test')
32 | 
33 |     def tearDown(self):
34 |         self.working_directory.cleanup()
35 | 


--------------------------------------------------------------------------------
/tests/test_frequency.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import unittest
 3 | 
 4 | from githammer import Frequency
 5 | 
 6 | 
 7 | class FrequencyTest(unittest.TestCase):
 8 |     def setUp(self) -> None:
 9 |         print()
10 |         print(self.id())
11 |         self.initial_date = datetime.datetime(2019, 10, 10, 10, 10, 10, tzinfo=datetime.timezone.utc)
12 |         self.year_start_date = datetime.datetime(2019, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)
13 |         self.year_start_week_date = datetime.datetime(2019, 1, 7, 0, 0, 0, tzinfo=datetime.timezone.utc)
14 | 
15 |     def test_correct_start_of_interval(self):
16 |         self.assertEqual(Frequency.daily.start_of_interval(self.initial_date),
17 |                          datetime.datetime(2019, 10, 10, 0, 0, 0, tzinfo=datetime.timezone.utc))
18 |         self.assertEqual(Frequency.weekly.start_of_interval(self.initial_date),
19 |                          datetime.datetime(2019, 10, 7, 0, 0, 0, tzinfo=datetime.timezone.utc))
20 |         self.assertEqual(Frequency.monthly.start_of_interval(self.initial_date),
21 |                          datetime.datetime(2019, 10, 1, 0, 0, 0, tzinfo=datetime.timezone.utc))
22 |         self.assertEqual(Frequency.yearly.start_of_interval(self.initial_date),
23 |                          datetime.datetime(2019, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc))
24 | 
25 |     def test_correct_next_instance(self):
26 |         self.assertEqual(Frequency.daily.next_instance(self.year_start_date),
27 |                          datetime.datetime(2019, 1, 2, 0, 0, 0, tzinfo=datetime.timezone.utc))
28 |         self.assertEqual(Frequency.weekly.next_instance(self.year_start_week_date),
29 |                          datetime.datetime(2019, 1, 14, 0, 0, 0, tzinfo=datetime.timezone.utc))
30 |         self.assertEqual(Frequency.monthly.next_instance(self.year_start_date),
31 |                          datetime.datetime(2019, 2, 1, 0, 0, 0, tzinfo=datetime.timezone.utc))
32 |         self.assertEqual(Frequency.yearly.next_instance(self.year_start_date),
33 |                          datetime.datetime(2020, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc))
34 | 


--------------------------------------------------------------------------------
/tests/test_init.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from githammer import DatabaseNotInitializedError
 4 | from .hammer_test import HammerTest
 5 | 
 6 | 
 7 | class HammerInitTest(HammerTest):
 8 | 
 9 |     def test_plain_init_does_not_create_database(self):
10 |         self.assertFalse(os.listdir(self.working_directory.name))
11 | 
12 |     def test_update_fails_when_database_not_created(self):
13 |         with self.assertRaises(DatabaseNotInitializedError):
14 |             self.hammer.update_data()
15 | 


--------------------------------------------------------------------------------
/tests/test_limited_repository.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import datetime
 3 | 
 4 | import git
 5 | 
 6 | from .hammer_test import HammerTest
 7 | 
 8 | 
 9 | class HammerLimitedTest(HammerTest):
10 |     def setUp(self):
11 |         super().setUp()
12 |         self.start_date = datetime.datetime(2018, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)
13 |         self.hammer.add_repository(os.path.join(self.current_directory, 'data', 'repository'),
14 |                                    os.path.join(self.current_directory, 'data', 'repo-config.json'),
15 |                                    earliest_date=self.start_date)
16 | 
17 |     def test_limiting_by_date_includes_only_commits_after(self):
18 |         commits = list(self.hammer.iter_individual_commits())
19 |         self.assertEqual(len(commits), 3)
20 | 
21 |     def test_line_counts_are_correct_in_date_limited_repository(self):
22 |         authors = {author.name: author for author in self.hammer.iter_authors()}
23 |         self.assertEqual(self.hammer.head_commit().line_counts, {
24 |             authors['Author A']: 7,
25 |             authors['Author B']: 9,
26 |             authors['Author C']: 2
27 |         })
28 | 
29 |     def test_updating_project_does_not_add_new_commits(self):
30 |         self.hammer.update_data()
31 |         commits = list(self.hammer.iter_individual_commits())
32 |         self.assertEqual(len(commits), 3)
33 | 
34 |     def test_updating_brings_in_later_commits_but_not_excluded_ones(self):
35 |         other_hammer = self._make_hammer('otherTest',
36 |                                          database_url='sqlite:///' + self.working_directory.name + '/other.sqlite')
37 |         git_repository = git.Repo.clone_from(os.path.join(self.current_directory, 'data', 'repository'),
38 |                                              os.path.join(self.working_directory.name, 'worktree'),
39 |                                              branch='december', single_branch=True)
40 |         other_hammer.add_repository(os.path.join(self.working_directory.name, 'worktree'),
41 |                                     earliest_date=self.start_date)
42 |         initial_commits = list(other_hammer.iter_individual_commits())
43 |         self.assertEqual(len(initial_commits), 1)
44 |         git_repository.remote().fetch('+refs/heads/master:refs/remotes/origin/master')
45 |         git_repository.create_head('master', git_repository.remote().refs.master)
46 |         git_repository.heads.master.checkout()
47 |         other_hammer.update_data()
48 |         updated_commits = list(other_hammer.iter_individual_commits())
49 |         self.assertEqual(len(updated_commits), 3)
50 | 


--------------------------------------------------------------------------------
/tests/test_multiple_projects.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from githammer import iter_all_project_names
 4 | 
 5 | from .hammer_test import HammerTest
 6 | 
 7 | 
 8 | class HammerMultipleProjectsTest(HammerTest):
 9 | 
10 |     def _create_second_project(self):
11 |         self.otherHammer = self._make_hammer('otherTest')
12 |         self.otherHammer.add_repository(os.path.join(self.current_directory, 'data', 'subrepository'))
13 | 
14 |     def setUp(self):
15 |         super().setUp()
16 |         self.hammer.add_repository(os.path.join(self.current_directory, 'data', 'repository'))
17 | 
18 |     def test_second_project_is_created(self):
19 |         self._create_second_project()
20 |         self.assertEqual(self.otherHammer.project_name, 'otherTest')
21 | 
22 |     def test_projects_are_inserted_in_database(self):
23 |         self._create_second_project()
24 |         project_names = list(iter_all_project_names(self.database_url))
25 |         self.assertEqual(sorted(project_names), ['otherTest', 'test'])
26 | 
27 |     def test_commits_from_other_projects_are_not_included(self):
28 |         self._create_second_project()
29 |         with self.assertRaises(StopIteration):
30 |             self._fetch_commit(HammerMultipleProjectsTest._main_repo_initial_commit_hexsha, hammer=self.otherHammer)
31 | 
32 |     def test_authors_from_other_projects_are_not_included(self):
33 |         self._create_second_project()
34 |         authors = list(self.otherHammer.iter_authors())
35 |         self.assertEqual(len(authors), 1)
36 | 


--------------------------------------------------------------------------------
/tests/test_multiple_repositories.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import datetime
 3 | 
 4 | from githammer import Frequency
 5 | 
 6 | from .hammer_test import HammerTest
 7 | 
 8 | 
 9 | class HammerMultipleRepositoriesTest(HammerTest):
10 |     def setUp(self):
11 |         super().setUp()
12 |         self.hammer.add_repository(os.path.join(self.current_directory, 'data', 'repository'),
13 |                                    os.path.join(self.current_directory, 'data', 'repo-config.json'))
14 |         self.hammer.add_repository(os.path.join(self.current_directory, 'data', 'subrepository'))
15 |         self._expected_dates = [
16 |             datetime.datetime(2017, 11, 22, 7, 22, 33, tzinfo=datetime.timezone.utc),
17 |             datetime.datetime(2017, 12, 4, 7, 10, 11, tzinfo=datetime.timezone.utc),
18 |             datetime.datetime(2017, 12, 6, 3, 33, 44, tzinfo=datetime.timezone.utc),
19 |             datetime.datetime(2017, 12, 14, 10, 54, 55, tzinfo=datetime.timezone.utc)
20 |         ]
21 |         self._expected_offsets = [14400, 7200, -18000, 14400]
22 | 
23 |     def test_commits_are_combined_with_correct_dates(self):
24 |         initial_commits = list(self.hammer.iter_commits())[:4]
25 |         self.assertEqual([commit.commit_time for commit in initial_commits], self._expected_dates)
26 |         self.assertEqual([commit.commit_time_utc_offset for commit in initial_commits], self._expected_offsets)
27 | 
28 |     def test_combined_commits_are_produced_with_correct_frequency(self):
29 |         initial_commits = list(self.hammer.iter_commits(frequency=Frequency.weekly))[:3]
30 |         del self._expected_dates[2]
31 |         del self._expected_offsets[2]
32 |         self.assertEqual([commit.commit_time for commit in initial_commits], self._expected_dates)
33 |         self.assertEqual([commit.commit_time_utc_offset for commit in initial_commits], self._expected_offsets)
34 | 


--------------------------------------------------------------------------------
/tests/test_shallow_repository.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import git
 3 | 
 4 | from .hammer_test import HammerTest
 5 | 
 6 | 
 7 | class HammerShallowTest(HammerTest):
 8 |     def setUp(self):
 9 |         super().setUp()
10 |         self.git_repository = git.Repo.clone_from('file://' + os.path.join(self.current_directory, 'data', 'repository'),
11 |                                                   os.path.join(self.working_directory.name, 'worktree'),
12 |                                                   depth=1)
13 |         self.hammer.add_repository(os.path.join(self.working_directory.name, 'worktree'),
14 |                                    os.path.join(self.current_directory, 'data', 'repo-config.json'))
15 | 
16 |     def test_shallow_clone_has_only_one_commit(self):
17 |         commits = list(self.hammer.iter_individual_commits())
18 |         self.assertEqual(len(commits), 1)
19 | 
20 |     def test_shallow_clone_has_correct_counts(self):
21 |         commit = self._fetch_commit(HammerShallowTest._main_repo_head_commit_hexsha)
22 |         line_counts = commit.line_counts.values()
23 |         self.assertEqual(sorted(line_counts), [18])
24 | 


--------------------------------------------------------------------------------
/tests/test_single_repository.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from githammer import iter_sources_and_tests
 4 | 
 5 | from .hammer_test import HammerTest
 6 | 
 7 | 
 8 | class HammerRepositoryTest(HammerTest):
 9 | 
10 |     def setUp(self):
11 |         super().setUp()
12 |         self.hammer.add_repository(os.path.join(self.current_directory, 'data', 'repository'),
13 |                                    os.path.join(self.current_directory, 'data', 'repo-config.json'))
14 | 
15 |     def test_project_name_is_property_of_hammer_object(self):
16 |         self.assertEqual(self.hammer.project_name, 'test')
17 | 
18 |     def test_repository_is_processed_into_database_after_adding(self):
19 |         self.assertIsNotNone(self.hammer.head_commit())
20 | 
21 |     def test_commit_timestamps_have_correct_time(self):
22 |         initial_commit = self._fetch_commit(HammerRepositoryTest._main_repo_initial_commit_hexsha)
23 |         self.assertEqual(initial_commit.commit_time_tz().hour, 11)
24 | 
25 |     def test_initial_commit_line_counts_are_correct(self):
26 |         initial_commit = self._fetch_commit(HammerRepositoryTest._main_repo_initial_commit_hexsha)
27 |         author = initial_commit.author
28 |         self.assertEqual(initial_commit.line_counts[author], 14)
29 | 
30 |     def test_second_commit_line_counts_are_correct(self):
31 |         initial_commit = self._fetch_commit(HammerRepositoryTest._main_repo_initial_commit_hexsha)
32 |         second_commit = self._fetch_commit(HammerRepositoryTest._main_repo_second_commit_hexsha)
33 |         self.assertEqual(second_commit.line_counts[initial_commit.author], 10)
34 |         self.assertEqual(second_commit.line_counts[second_commit.author], 4)
35 | 
36 |     def test_sources_are_iterated_based_on_configuration(self):
37 |         repository_path = os.path.join(self.current_directory, 'data', 'repository')
38 |         configuration_path = os.path.join(self.current_directory, 'data', 'repo-config.json')
39 |         files = list(iter_sources_and_tests(repository_path, configuration_path))
40 |         file_names = [name for (file_type, name) in files]
41 |         self.assertIn(('source-file', 'file1.txt'), files)
42 |         self.assertNotIn('file.dat', file_names)
43 | 
44 |     def test_test_lines_are_counted_correctly(self):
45 |         test_commit = self._fetch_commit(HammerRepositoryTest._main_repo_test_commit_hexsha)
46 |         author = test_commit.author
47 |         self.assertEqual(self.hammer.head_commit().test_counts, {author: 1})
48 | 
49 |     def test_line_counts_are_correct_after_merge(self):
50 |         authors = {author.name: author for author in self.hammer.iter_authors()}
51 |         self.assertEqual(self.hammer.head_commit().line_counts, {
52 |             authors['Author A']: 7,
53 |             authors['Author B']: 9,
54 |             authors['Author C']: 2
55 |         })
56 | 


--------------------------------------------------------------------------------
/tests/test_submodule.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import git
 3 | 
 4 | from .hammer_test import HammerTest
 5 | 
 6 | 
 7 | class HammerSubmoduleTest(HammerTest):
 8 | 
 9 |     def setUp(self):
10 |         super().setUp()
11 |         git.Repo.clone_from(os.path.join(self.current_directory, 'data', 'repository'),
12 |                             os.path.join(self.working_directory.name, 'worktree'))
13 |         repository = git.Repo(os.path.join(self.working_directory.name, 'worktree'))
14 |         git.Submodule.add(repository, 'subrepo', 'subrepo',
15 |                           os.path.join(self.current_directory, 'data', 'subrepository'))
16 |         author = git.Actor('Author A', 'a@example.com')
17 |         repository.index.commit('Add subrepo', author=author)
18 | 
19 |     def test_repository_with_added_submodule_is_understood(self):
20 |         self.hammer.add_repository(os.path.join(self.working_directory.name, 'worktree'))
21 |         self.assertIsNotNone(self.hammer.head_commit())
22 | 
23 |     def test_submodule_in_initial_commit_is_understood(self):
24 |         submodule_repository = git.Repo.init(os.path.join(self.working_directory.name, 'initial_submodule'))
25 |         git.Submodule.add(submodule_repository, 'subrepo', 'subrepo',
26 |                           os.path.join(self.current_directory, 'data', 'subrepository'))
27 |         author = git.Actor('Author B', 'b@example.com')
28 |         submodule_repository.index.commit('Initial commit', author=author)
29 |         self.hammer.add_repository(os.path.join(self.working_directory.name, 'initial_submodule'))
30 |         commit = next(self.hammer.iter_individual_commits())
31 |         self.assertEqual(commit.line_counts, {commit.author: 3})
32 | 


--------------------------------------------------------------------------------
/tests/test_update.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import git
 3 | 
 4 | from .hammer_test import HammerTest
 5 | 
 6 | 
 7 | class HammerUpdateTest(HammerTest):
 8 | 
 9 |     def _update_from_old_state(self):
10 |         self.git_repository.remote().fetch('+refs/heads/master:refs/remotes/origin/master')
11 |         self.git_repository.create_head('master', self.git_repository.remote().refs.master)
12 |         self.git_repository.heads.master.checkout()
13 |         self.hammer.update_data()
14 | 
15 |     def setUp(self):
16 |         super().setUp()
17 |         self.git_repository = git.Repo.clone_from(os.path.join(self.current_directory, 'data', 'repository'),
18 |                                                   os.path.join(self.working_directory.name, 'worktree'),
19 |                                                   branch='old-state', single_branch=True)
20 |         self.hammer.add_repository(os.path.join(self.working_directory.name, 'worktree'))
21 | 
22 |     def test_clone_produced_expected_result(self):
23 |         commits = list(self.hammer.iter_individual_commits())
24 |         self.assertEqual(len(commits), 1)
25 |         self.assertEqual(commits[0].hexsha, HammerUpdateTest._main_repo_initial_commit_hexsha)
26 | 
27 |     def test_correct_statistics_were_computed_for_old_state(self):
28 |         line_counts = self.hammer.head_commit().line_counts
29 |         self.assertEqual(len(line_counts), 1)
30 |         initial_commit = self._fetch_commit(HammerUpdateTest._main_repo_initial_commit_hexsha)
31 |         self.assertIn(initial_commit.author, line_counts)
32 |         self.assertEqual(line_counts[initial_commit.author], 14)
33 | 
34 |     def test_update_after_repository_updated_brings_in_new_commits(self):
35 |         self._update_from_old_state()
36 |         commits = list(self.hammer.iter_individual_commits())
37 |         self.assertGreaterEqual(len(commits), 2)
38 |         self.assertEqual(commits[0].hexsha, HammerUpdateTest._main_repo_initial_commit_hexsha)
39 |         self.assertEqual(commits[1].hexsha, HammerUpdateTest._main_repo_second_commit_hexsha)
40 | 
41 |     def test_update_after_repository_updated_computes_correct_statistics(self):
42 |         self._update_from_old_state()
43 |         initial_commit = self._fetch_commit(HammerUpdateTest._main_repo_initial_commit_hexsha)
44 |         second_commit = self._fetch_commit(HammerUpdateTest._main_repo_second_commit_hexsha)
45 |         self.assertEqual(second_commit.line_counts[initial_commit.author], 10)
46 |         self.assertEqual(second_commit.line_counts[second_commit.author], 4)
47 | 


--------------------------------------------------------------------------------