├── .gitignore ├── .mailmap ├── .travis.yml ├── LICENSE ├── README.md ├── alembic.ini ├── alembic ├── README ├── env.py ├── script.py.mako └── versions │ └── d95efca6f334_add_start_time_to_repository_object.py ├── git-hammer-config.json ├── githammer ├── __init__.py ├── __main__.py ├── combinedcommit.py ├── config.py ├── countdict.py ├── dbtypes.py ├── frequency.py ├── hammer.py └── summary │ ├── __init__.py │ ├── graph.py │ └── table.py ├── requirements.in ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── check_regression.py ├── data ├── .gitignore ├── repo-config.json ├── repository │ ├── HEAD │ ├── config │ ├── description │ ├── info │ │ ├── exclude │ │ └── refs │ ├── objects │ │ ├── info │ │ │ ├── commit-graph │ │ │ └── packs │ │ └── pack │ │ │ ├── pack-ee6956a7f3425f41f1defd4327f7f84516571ff8.bitmap │ │ │ ├── pack-ee6956a7f3425f41f1defd4327f7f84516571ff8.idx │ │ │ └── pack-ee6956a7f3425f41f1defd4327f7f84516571ff8.pack │ ├── packed-refs │ └── refs │ │ └── .keep └── subrepository │ ├── HEAD │ ├── config │ ├── description │ ├── info │ ├── exclude │ └── refs │ ├── objects │ ├── info │ │ ├── commit-graph │ │ └── packs │ └── pack │ │ ├── pack-7fdd2a1dca94173e188275a6bc315dbc34653b99.bitmap │ │ ├── pack-7fdd2a1dca94173e188275a6bc315dbc34653b99.idx │ │ └── pack-7fdd2a1dca94173e188275a6bc315dbc34653b99.pack │ ├── packed-refs │ └── refs │ └── .keep ├── hammer_test.py ├── test_frequency.py ├── test_init.py ├── test_limited_repository.py ├── test_multiple_projects.py ├── test_multiple_repositories.py ├── test_shallow_repository.py ├── test_single_repository.py ├── test_submodule.py └── test_update.py /.gitignore: -------------------------------------------------------------------------------- 1 | ### 2 | ### Python 3 | ### 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # celery beat schedule file 89 | celerybeat-schedule 90 | 91 | # SageMath parsed files 92 | *.sage.py 93 | 94 | # Environments 95 | .env 96 | .venv 97 | env/ 98 | venv*/ 99 | ENV/ 100 | env.bak/ 101 | venv.bak/ 102 | 103 | # Spyder project settings 104 | .spyderproject 105 | .spyproject 106 | 107 | # Rope project settings 108 | .ropeproject 109 | 110 | # mkdocs documentation 111 | /site 112 | 113 | # mypy 114 | .mypy_cache/ 115 | .dmypy.json 116 | dmypy.json 117 | 118 | # Pyre type checker 119 | .pyre/ 120 | 121 | ### 122 | ### IDEs 123 | ### 124 | 125 | .vscode/ 126 | .idea/ 127 | 128 | ### 129 | ### Local 130 | ### 131 | 132 | # To allow keeping the database in this directory 133 | *.sqlite 134 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | Jaakko Kangasharju 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - '3.7' 4 | - '3.9' 5 | script: 6 | - coverage run --source githammer --omit 'githammer/__main__.py,githammer/summary/*.py' -m unittest tests 7 | after_success: 8 | - codecov 9 | deploy: 10 | provider: pypi 11 | user: __token__ 12 | password: 13 | secure: BZgxmYYR84NSaOsZC9rKiRC+dA98uEHtOtmTg2iBivT/sVDeGOMEzFVdoITPziD5nv+BCeMArEdQfendVppI/dM6cWpRA9hoWMZhuOVQWytr/02mcl5HRpTObe4n1AXRDlhWiopsrKxz63NenlN8FichXUHPtYS5rKV0jwedh7SllzMJkqQITw+d70GjA5Y0HhJ/Eb3dW2CZCAksTv6Y8sjwlEy0Kqwg4jElARs1lVYFORS2waYwty/7W04S7v8Zd/qDjV9tf5IoFMAGw8Pk2PfC/AsWfFiZT1R3AOcK6H8yrtAYa0NMPNz/1y/Cb0o0vlJqpsblOO/LaXR4RtSlvKiXcQ/r6CknFhqL/rfFjfMfrseGHyJUmbwyPGnAs6X4Tb8WaDGhcGkcCRLOClPcvuBbrJIt8yEanRAG9NnKiOyg0iKpa87PDaqu12mYy7HmQl5aQIe+hoFpC5o/hUa0oRRxyPtm2FdiGwRXyOwbNq9BrxWjQfZGSBFLZZfqny09Qx3YQzZ8uPNhz8xHwgNz9bVH5tY3nqhJJbgcwN6jj6OU1ZtmCrqCoYhZiabdWYiUosDweAHFEG66FHD5XFhoawgIQFjb+BAajbRJ3ggrjraVgNe9NL1+01Ri05kpMO5ro6/QtNbQVJF6s+F+AaI5aU4FTkHHRcZGxk/GElBDvTo= 14 | distributions: sdist bdist_wheel 15 | skip_existing: true 16 | skip_cleanup: true 17 | on: 18 | branch: master 19 | tags: true 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Git Hammer 2 | 3 | [![Build Status](https://travis-ci.com/asharov/git-hammer.svg?branch=master)](https://travis-ci.com/asharov/git-hammer) 4 | [![codecov](https://codecov.io/gh/asharov/git-hammer/branch/master/graph/badge.svg)](https://codecov.io/gh/asharov/git-hammer) 5 | [![PyPI](https://img.shields.io/pypi/v/git-hammer)](https://pypi.org/project/git-hammer/) 6 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 7 | 8 | Git Hammer is a statistics tool for projects in git repositories. 9 | Its major feature is tracking the number of lines authored by 10 | each person for every commit, but it currently includes some 11 | other useful statistics as well, and the data that it collects 12 | could be used in multiple new ways as well. 13 | 14 | Git Hammer is under active maintenance. New features appear when 15 | a need or desire for them exists. If Git Hammer lacks some 16 | feature you would like, all kinds of contributions are welcome, 17 | from simple feature suggestions to complete pull requests 18 | implementing the feature. 19 | 20 | ## Setup 21 | 22 | By default, Git Hammer stores the historical information from 23 | the repository in an SQLite database file in the current 24 | directory. If you wish to change this default, set the 25 | `DATABASE_URL` environment variable to a database URL 26 | according to the [SQLAlchemy engine documentation](https://docs.sqlalchemy.org/en/latest/core/engines.html). 27 | This database will be created if it does not already exist. 28 | Note that if you wish to use a database other than SQLite, 29 | you may need to install the appropriate Python module to 30 | connect to the database. 31 | 32 | You will need Python 3, at least version 3.5. It is a good 33 | idea to set up a virtual environment, like this: 34 | ```bash 35 | python3 -m venv venv 36 | source venv/bin/activate 37 | ``` 38 | Run these commands wherever you want to run Git Hammer. If 39 | you only want to use Git Hammer, you can install it with 40 | `pip`: 41 | ```bash 42 | pip install git-hammer 43 | ``` 44 | If you want to use the latest development version or 45 | contribute to Git Hammer development, you need to clone 46 | this repository and run 47 | ```bash 48 | pip install -r requirements.txt 49 | ``` 50 | in the directory where you cloned Git Hammer (in this 51 | case you should create the virtual environment above in 52 | that directory as well). The rest of the commands below 53 | assume that one of these has been done. 54 | 55 | ## Creating a Project 56 | 57 | Now pick some git repository to run Git Hammer on. The examples 58 | below use a hypothetical project called "baffle". You should 59 | replace the name with your own. 60 | 61 | ```bash 62 | python -m githammer init-project baffle ~/projects/baffle 63 | ``` 64 | This will create the database containing the project baffle 65 | from the repository directory (here `~/projects/baffle`; 66 | replace that with the path to your repository). Git Hammer 67 | will print out a progress report while it goes through all 68 | the commits in the repository. 69 | 70 | Usually, you want your main development branch to be checked 71 | out in the repository, and not change the checked-out branch 72 | when updating Git Hammer data. This makes the statistics more 73 | relevant for the whole development team. 74 | 75 | When the repository gets new development, first update the 76 | code in the repository to the latest version, and then run 77 | ```bash 78 | python -m githammer update-project baffle 79 | ``` 80 | This will process all the new commits that were not yet seen 81 | into the database. 82 | 83 | If the repository is very old, with much history, you might 84 | not be interested in capturing all of it. `init-project` 85 | has the option `--earliest-commit-date` that provides a date 86 | so that commits prior to that date are not included. This 87 | would be used like 88 | ```bash 89 | python -m githammer init-project baffle ~/projects/baffle --earliest-commit-date 2018-01-01 90 | ``` 91 | It is currently not possible to later add commits that were 92 | excluded by date when the repository was added. 93 | 94 | ## Showing Statistics 95 | 96 | After the project has been initialized and the repository added, 97 | you can show some information on it. First try out 98 | ```bash 99 | python -m githammer summary baffle 100 | ``` 101 | This will print out three tables: The number of commits for 102 | each person, the number of lines of code written by each 103 | person in the head version, and the number of tests written 104 | by each person in the head version. This last is only printed 105 | if the repository configuration includes test recognition (see 106 | below). 107 | 108 | There are a few graphs that Git Hammer can display. To see the 109 | types of supported graphs, enter 110 | ```bash 111 | python -m githammer graph --help 112 | ``` 113 | The graphs are 114 | 115 | Type | Description 116 | -----|------------ 117 | line-count | Number of lines in the project over time 118 | line-author-count | Same as above, except split per author 119 | test-count | Number of tests in the project over time 120 | test-author-count | Same as above, except split per author 121 | day-of-week | A histogram showing the number of commits for each day of the week 122 | time-of-day | A histogram showing the number of commits for each hour of the day 123 | 124 | ## Configuring Sources and Tests 125 | 126 | By default, Git Hammer assumes that every file in the repository 127 | is a source file and that there are no tests. This can be 128 | modified by creating a configuration file. The configuration 129 | file is JSON having some predefined keys: 130 | ```json 131 | { 132 | "sourceFiles": [ 133 | "Sources/**/*.py", 134 | "Tests/**/*.py", 135 | ... 136 | ], 137 | "excludedSourceFiles": [ 138 | "Sources/Contrib/**" 139 | ], 140 | "testFiles": [ 141 | "Tests/**/*.py" 142 | ], 143 | "testLineRegex": "def test_" 144 | } 145 | ``` 146 | 147 | Here, `sourceFiles` is a list of patterns that match the source 148 | files. Any file not matching one of these patterns is not 149 | considered by Git Hammer. If `sourceFiles` captures too many 150 | files, for instance autogenerated sources, `excludedSourceFiles` 151 | is a list of patterns that will not be considered source even 152 | if they match some `sourceFiles` pattern. 153 | 154 | To include test counts, `testFiles` needs to be specified. This 155 | is again, a list of patterns matching files that contain tests 156 | (it is up to you if you wish to define this to mean unit tests, 157 | integration tests, UI tests, etc.). Git Hammer will look inside 158 | each of the test files. Any line matching the Python regular 159 | expression `testLineRegex` is counted as one test. So 160 | `testLineRegex` should typically match whatever acts as the 161 | header of a test. Here, it is the definition of a function 162 | named starting with `test_`. Other projects, and especially 163 | other languages, will have different conventions. 164 | 165 | All the file name patterns above (`sourceFiles`, 166 | `excludedSourceFiles`, `testFiles`) are glob patterns as 167 | defined by the 168 | [globber library](https://github.com/asharov/globber). 169 | 170 | The configuration file can be given as an option to the 171 | `init-project` command: 172 | ```bash 173 | python -m githammer init-project baffle ~/projects/baffle --configuration ./baffle-config.json 174 | ``` 175 | If the `--configuration` option is not given, but the repository 176 | contains a file named `git-hammer-config.json`, this file will 177 | be read as the configuration. This way you can keep the Git 178 | Hammer configuration for a repository in that repository. 179 | 180 | Note: The configuration file path, as well as the repository 181 | path, will be stored in the database, so they should not be 182 | moved. If the configuration changes, data that was already 183 | in the database will not be reprocessed with the new 184 | configuration. 185 | 186 | There is also a command to check what are the effects of a 187 | configuration. Run 188 | ```bash 189 | python -m githammer list-sources ~/projects/baffle --configuration ./baffle-config.json 190 | ``` 191 | to print out a list of all files considered source or test files, 192 | and for each test file, the lines considered to be tests. A missing 193 | `--configuration` option is treated in the same way as with 194 | `init-project` above. 195 | 196 | A partial output of the `list-sources` command on the Git Hammer 197 | repository looks like this: 198 | ``` 199 | S: githammer/dbtypes.py 200 | S: githammer/frequency.py 201 | S: githammer/hammer.py 202 | T: tests/__init__.py 203 | T: tests/check_regression.py 204 | T: tests/hammer_test.py 205 | T: tests/test_init.py 206 | |--- def test_plain_init_does_not_create_database(self): 207 | |--- def test_update_fails_when_database_not_created(self): 208 | ``` 209 | Source files are marked with `S`, test files with `T`, and after 210 | each test file, its test lines are printed indented with `|---`. 211 | 212 | ## Multi-Repository Projects 213 | 214 | Sometimes, a team works on multiple repositories that all still 215 | belong to the same project. For instance, a piece of functionality 216 | may be better to split off into a library in an independent 217 | repository. Git Hammer supports such projects by not limiting 218 | the project data to a single repository. 219 | 220 | To add another repository to an existing project, just use 221 | `add-repository`: 222 | ```bash 223 | python -m githammer add-repository baffle ~/projects/baffle-common 224 | ``` 225 | This will process the new repository, adding it to the project 226 | database. After this, any summary information will include 227 | data from all repositories of the project. Like `init-project`, 228 | `add-repository` also accepts the `--configuration` and 229 | `--earliest-commit-date` options with the same semantics for 230 | the added repository. 231 | 232 | ## Database Migrations 233 | 234 | If you update Git Hammer, it is possible that the database 235 | schema is updated in the new version. This means that you will 236 | need to migrate any existing databases to the latest version. 237 | Migration is performed by running 238 | ```bash 239 | HAMMER_DATABASE_URL= alembic upgrade head 240 | ``` 241 | (If you haven't installed Git Hammer with `pip`, run this command 242 | in the project directory and add `PYTHONPATH=.` at the beginning.) 243 | 244 | It is safe to run this even if the database schema has not changed 245 | in the update, so there is no need to try and figure that out before 246 | running the migration. 247 | 248 | ## License 249 | 250 | Git Hammer is licensed under the Apache Software License, 251 | version 2.0. See the LICENSE file for precise license terms 252 | and conditions. 253 | -------------------------------------------------------------------------------- /alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = alembic 6 | 7 | # template used to generate migration files 8 | # file_template = %%(rev)s_%%(slug)s 9 | 10 | # timezone to use when rendering the date 11 | # within the migration file as well as the filename. 12 | # string value is passed to dateutil.tz.gettz() 13 | # leave blank for localtime 14 | # timezone = 15 | 16 | # max length of characters to apply to the 17 | # "slug" field 18 | # truncate_slug_length = 40 19 | 20 | # set to 'true' to run the environment during 21 | # the 'revision' command, regardless of autogenerate 22 | # revision_environment = false 23 | 24 | # set to 'true' to allow .pyc and .pyo files without 25 | # a source .py file to be detected as revisions in the 26 | # versions/ directory 27 | # sourceless = false 28 | 29 | # version location specification; this defaults 30 | # to alembic/versions. When using multiple version 31 | # directories, initial revisions must be specified with --version-path 32 | # version_locations = %(here)s/bar %(here)s/bat alembic/versions 33 | 34 | # the output encoding used when revision files 35 | # are written from script.py.mako 36 | # output_encoding = utf-8 37 | 38 | # sqlalchemy.url = driver://user:pass@localhost/dbname 39 | 40 | 41 | [post_write_hooks] 42 | # post_write_hooks defines scripts or Python functions that are run 43 | # on newly generated revision scripts. See the documentation for further 44 | # detail and examples 45 | 46 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 47 | # hooks=black 48 | # black.type=console_scripts 49 | # black.entrypoint=black 50 | # black.options=-l 79 51 | 52 | # Logging configuration 53 | [loggers] 54 | keys = root,sqlalchemy,alembic 55 | 56 | [handlers] 57 | keys = console 58 | 59 | [formatters] 60 | keys = generic 61 | 62 | [logger_root] 63 | level = WARN 64 | handlers = console 65 | qualname = 66 | 67 | [logger_sqlalchemy] 68 | level = WARN 69 | handlers = 70 | qualname = sqlalchemy.engine 71 | 72 | [logger_alembic] 73 | level = INFO 74 | handlers = 75 | qualname = alembic 76 | 77 | [handler_console] 78 | class = StreamHandler 79 | args = (sys.stderr,) 80 | level = NOTSET 81 | formatter = generic 82 | 83 | [formatter_generic] 84 | format = %(levelname)-5.5s [%(name)s] %(message)s 85 | datefmt = %H:%M:%S 86 | -------------------------------------------------------------------------------- /alembic/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /alembic/env.py: -------------------------------------------------------------------------------- 1 | from logging.config import fileConfig 2 | 3 | from sqlalchemy import engine_from_config 4 | from sqlalchemy import pool 5 | 6 | from alembic import context 7 | 8 | from githammer import dbtypes 9 | 10 | import os 11 | 12 | # this is the Alembic Config object, which provides 13 | # access to the values within the .ini file in use. 14 | config = context.config 15 | 16 | # Interpret the config file for Python logging. 17 | # This line sets up loggers basically. 18 | fileConfig(config.config_file_name) 19 | 20 | # add your model's MetaData object here 21 | # for 'autogenerate' support 22 | # from myapp import mymodel 23 | # target_metadata = mymodel.Base.metadata 24 | target_metadata = dbtypes.Base.metadata 25 | 26 | # other values from the config, defined by the needs of env.py, 27 | # can be acquired: 28 | # my_important_option = config.get_main_option("my_important_option") 29 | # ... etc. 30 | 31 | 32 | def run_migrations_offline(): 33 | """Run migrations in 'offline' mode. 34 | 35 | This configures the context with just a URL 36 | and not an Engine, though an Engine is acceptable 37 | here as well. By skipping the Engine creation 38 | we don't even need a DBAPI to be available. 39 | 40 | Calls to context.execute() here emit the given string to the 41 | script output. 42 | 43 | """ 44 | url = os.environ['HAMMER_DATABASE_URL'] 45 | context.configure( 46 | url=url, 47 | target_metadata=target_metadata, 48 | literal_binds=True, 49 | dialect_opts={"paramstyle": "named"}, 50 | ) 51 | 52 | with context.begin_transaction(): 53 | context.run_migrations() 54 | 55 | 56 | def run_migrations_online(): 57 | """Run migrations in 'online' mode. 58 | 59 | In this scenario we need to create an Engine 60 | and associate a connection with the context. 61 | 62 | """ 63 | configuration = config.get_section(config.config_ini_section) 64 | configuration['sqlalchemy.url'] = os.environ['HAMMER_DATABASE_URL'] 65 | connectable = engine_from_config( 66 | configuration, 67 | prefix="sqlalchemy.", 68 | poolclass=pool.NullPool, 69 | ) 70 | 71 | with connectable.connect() as connection: 72 | context.configure( 73 | connection=connection, target_metadata=target_metadata 74 | ) 75 | 76 | with context.begin_transaction(): 77 | context.run_migrations() 78 | 79 | 80 | if context.is_offline_mode(): 81 | run_migrations_offline() 82 | else: 83 | run_migrations_online() 84 | -------------------------------------------------------------------------------- /alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade(): 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade(): 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /alembic/versions/d95efca6f334_add_start_time_to_repository_object.py: -------------------------------------------------------------------------------- 1 | """Add start time to Repository object 2 | 3 | Revision ID: d95efca6f334 4 | Revises: 5 | Create Date: 2020-01-06 16:24:08.055349 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'd95efca6f334' 14 | down_revision = None 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | op.add_column('repositories', sa.Column('start_time', sa.DateTime(), nullable=True)) 21 | op.add_column('repositories', sa.Column('start_time_utc_offset', sa.Integer(), nullable=True)) 22 | 23 | 24 | def downgrade(): 25 | op.drop_column('repositories', 'start_time_utc_offset') 26 | op.drop_column('repositories', 'start_time') 27 | -------------------------------------------------------------------------------- /git-hammer-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "sourceFiles": [ 3 | "githammer/**/*.py", 4 | "tests/*.py" 5 | ], 6 | "testFiles": [ 7 | "tests/*.py" 8 | ], 9 | "testLineRegex": "def test_" 10 | } 11 | -------------------------------------------------------------------------------- /githammer/__init__.py: -------------------------------------------------------------------------------- 1 | from .frequency import Frequency 2 | from .hammer import Hammer, DatabaseNotInitializedError, OldDatabaseSchemaError 3 | from .hammer import iter_all_project_names, iter_sources_and_tests 4 | -------------------------------------------------------------------------------- /githammer/__main__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Jaakko Kangasharju 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | import datetime 17 | import os 18 | import sys 19 | import matplotlib.pyplot as plt 20 | 21 | from dateutil.parser import parse 22 | 23 | from .hammer import Hammer, iter_all_project_names, iter_sources_and_tests 24 | from .summary import * 25 | 26 | 27 | def make_hammer(project): 28 | database_url = os.environ.get('DATABASE_URL') 29 | if database_url: 30 | return Hammer(project, database_url=database_url) 31 | else: 32 | return Hammer(project) 33 | 34 | 35 | def update_project(options): 36 | hammer = make_hammer(options.project) 37 | hammer.update_data() 38 | 39 | 40 | def add_repository(options): 41 | hammer = make_hammer(options.project) 42 | if options.earliest_commit_date: 43 | date = parse(options.earliest_commit_date) 44 | if date.tzinfo is None or date.tzinfo.utcoffset(date) is None: 45 | date = date.replace(tzinfo=datetime.timezone.utc) 46 | hammer.add_repository(options.repository, options.configuration, earliest_date=date) 47 | else: 48 | hammer.add_repository(options.repository, options.configuration) 49 | 50 | 51 | def list_projects(_): 52 | database_url = os.environ.get('DATABASE_URL') 53 | if database_url: 54 | iterator = iter_all_project_names(database_url=database_url) 55 | else: 56 | iterator = iter_all_project_names() 57 | for name in iterator: 58 | print(name) 59 | 60 | 61 | def list_sources(options): 62 | for item_type, item in iter_sources_and_tests(options.repository, options.configuration): 63 | if item_type == 'source-file': 64 | print('S: {}'.format(item)) 65 | elif item_type == 'test-file': 66 | print('T: {}'.format(item)) 67 | elif item_type == 'test-line': 68 | print('|---{}'.format(item)) 69 | 70 | 71 | def plot_graph(options): 72 | hammer = make_hammer(options.project) 73 | figure = None 74 | if options.type == 'line-count': 75 | figure = total_lines(hammer) 76 | elif options.type == 'line-author-count': 77 | figure = lines_per_author(hammer) 78 | elif options.type == 'test-count': 79 | figure = total_tests(hammer) 80 | elif options.type == 'test-author-count': 81 | figure = tests_per_author(hammer) 82 | elif options.type == 'day-of-week': 83 | figure = commits_per_weekday(hammer) 84 | elif options.type == 'time-of-day': 85 | figure = commits_per_hour(hammer) 86 | if figure: 87 | if options.output_file: 88 | figure.savefig(options.output_file) 89 | else: 90 | plt.show() 91 | 92 | 93 | def print_summary(options): 94 | hammer = make_hammer(options.project) 95 | handle = open(options.output_file, 'w') if options.output_file else sys.stdout 96 | handle.write(str(commit_count_table(hammer))) 97 | handle.write('\n\n') 98 | handle.write(str(line_count_table(hammer))) 99 | test_counts = test_count_table(hammer) 100 | if test_counts: 101 | handle.write('\n\n') 102 | handle.write(str(test_counts)) 103 | handle.write('\n') 104 | if handle is not sys.stdout: 105 | handle.close() 106 | 107 | 108 | parser = argparse.ArgumentParser(prog='githammer', 109 | description='Extract statistics from Git repositories') 110 | command_parsers = parser.add_subparsers() 111 | 112 | init_parser = command_parsers.add_parser('init-project', help='Initialize a new project') 113 | init_parser.add_argument('project', help='Name of the project to create') 114 | init_parser.add_argument('repository', help='Git repository to create the project from') 115 | init_parser.add_argument('-c', '--configuration', help='Path to the repository configuration file') 116 | init_parser.add_argument('--earliest-commit-date', help='Ignore commits prior to this date') 117 | init_parser.set_defaults(func=add_repository) 118 | 119 | update_parser = command_parsers.add_parser('update-project', help='Update an existing project with new commits') 120 | update_parser.add_argument('project', help='Name of the project to update') 121 | update_parser.set_defaults(func=update_project) 122 | 123 | add_parser = command_parsers.add_parser('add-repository', help='Add a repository to an existing project') 124 | add_parser.add_argument('project', help='Project to add the repository to') 125 | add_parser.add_argument('repository', help='Path to the git repository to add') 126 | add_parser.add_argument('-c', '--configuration', help='Path to the repository configuration file') 127 | add_parser.add_argument('--earliest-commit-date', help='Ignore commits prior to this date') 128 | add_parser.set_defaults(func=add_repository) 129 | 130 | project_list_parser = command_parsers.add_parser('list-projects', help='List names of existing projects') 131 | project_list_parser.set_defaults(func=list_projects) 132 | 133 | source_list_parser = command_parsers.add_parser('list-sources', help='List source files and test lines in repository') 134 | source_list_parser.add_argument('repository', help='Git repository to examine') 135 | source_list_parser.add_argument('-c', '--configuration', help='Path to the repository configuration file') 136 | source_list_parser.set_defaults(func=list_sources) 137 | 138 | graph_parser = command_parsers.add_parser('graph', help='Draw line count per committer graph') 139 | graph_parser.add_argument('project', help='Name of the project to graph') 140 | graph_parser.add_argument('type', help='The type of graph to make', 141 | choices=['line-count', 'line-author-count', 'test-count', 'test-author-count', 'day-of-week', 142 | 'time-of-day']) 143 | graph_parser.add_argument('-o', '--output-file', 144 | help='Name of the file to save the graph to. If omitted, graph is displayed on screen') 145 | graph_parser.set_defaults(func=plot_graph) 146 | 147 | summary_parser = command_parsers.add_parser('summary', 148 | help='Print summary information of the current state of the project') 149 | summary_parser.add_argument('project', help='Name of the project to summarize') 150 | summary_parser.add_argument('-o', '--output-file', 151 | help='Name of the file to print the summary to. If omitted, summary is printed to standard output') 152 | summary_parser.set_defaults(func=print_summary) 153 | 154 | parsed_args = parser.parse_args() 155 | parsed_args.func(parsed_args) 156 | -------------------------------------------------------------------------------- /githammer/combinedcommit.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Jaakko Kangasharju 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from operator import attrgetter 16 | 17 | from .countdict import add_count_dict 18 | 19 | 20 | def _iter_combined_commits(iterators): 21 | current_values = [None] * len(iterators) 22 | has_finished = [False] * len(iterators) 23 | next_values = [None] * len(iterators) 24 | for index, iterator in enumerate(iterators): 25 | try: 26 | next_values[index] = next(iterator) 27 | except StopIteration: 28 | has_finished[index] = True 29 | while not all(has_finished): 30 | min_index = None 31 | earliest_time = None 32 | for index, commit in enumerate(next_values): 33 | if not commit or has_finished[index]: 34 | continue 35 | if not earliest_time or commit.commit_time < earliest_time: 36 | min_index = index 37 | earliest_time = commit.commit_time 38 | if min_index is not None: 39 | current_values[min_index] = next_values[min_index] 40 | yield CombinedCommit(current_values) 41 | try: 42 | next_values[min_index] = next(iterators[min_index]) 43 | except StopIteration: 44 | has_finished[min_index] = True 45 | else: 46 | return 47 | 48 | 49 | class CombinedCommit: 50 | 51 | def __init__(self, commits): 52 | actual_commits = [commit for commit in commits if commit is not None] 53 | max_index, max_commit = max(enumerate(actual_commits), key=lambda pair: pair[1].commit_time) 54 | self.commit_time = max_commit.commit_time 55 | self.commit_time_utc_offset = actual_commits[max_index].commit_time_utc_offset 56 | self.line_counts = {} 57 | self.test_counts = {} 58 | for commit in commits: 59 | if commit is not None: 60 | self.line_counts = add_count_dict(self.line_counts, commit.line_counts) 61 | self.test_counts = add_count_dict(self.test_counts, commit.test_counts) 62 | -------------------------------------------------------------------------------- /githammer/config.py: -------------------------------------------------------------------------------- 1 | import re 2 | import errno 3 | import json 4 | 5 | from globber import globber 6 | 7 | 8 | def _matches_file_pattern(file, pattern): 9 | if type(pattern) is str: 10 | return globber.match(pattern, file) 11 | elif type(pattern) is list: 12 | return any(_matches_file_pattern(file, p) for p in pattern) 13 | else: 14 | raise TypeError('Pattern {} not list or string'.format(pattern)) 15 | 16 | 17 | class Configuration: 18 | def __init__(self, file_path=None): 19 | if file_path: 20 | try: 21 | fp = open(file_path, 'r') 22 | except OSError as error: 23 | if error.errno == errno.ENOENT: 24 | config_json = {} 25 | else: 26 | raise error 27 | else: 28 | try: 29 | config_json = json.load(fp) 30 | finally: 31 | fp.close() 32 | else: 33 | config_json = {} 34 | if 'sourceFiles' in config_json: 35 | self.source_files = config_json['sourceFiles'] 36 | else: 37 | self.source_files = None 38 | if 'excludedSourceFiles' in config_json: 39 | self.excluded_source_files = config_json['excludedSourceFiles'] 40 | else: 41 | self.excluded_source_files = None 42 | if 'testFiles' in config_json: 43 | self.test_files = config_json['testFiles'] 44 | else: 45 | self.test_files = None 46 | if 'testLineRegex' in config_json: 47 | self.test_line_regex = re.compile(config_json['testLineRegex']) 48 | else: 49 | self.test_line_regex = None 50 | 51 | def is_source_file(self, path): 52 | is_included = self.source_files is None or _matches_file_pattern(path, self.source_files) 53 | is_excluded = self.excluded_source_files is not None and _matches_file_pattern(path, self.excluded_source_files) 54 | return is_included and not is_excluded 55 | 56 | def is_test_file(self, path): 57 | if not self.is_source_file(path): 58 | return False 59 | return self.test_files is not None and _matches_file_pattern(path, self.test_files) 60 | 61 | def iter_test_lines(self, path, lines): 62 | if not self.is_test_file(path): 63 | return 64 | for line in lines: 65 | if self.test_line_regex.search(line): 66 | yield line 67 | -------------------------------------------------------------------------------- /githammer/countdict.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Jaakko Kangasharju 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def normalize_count_dict(count_dict): 17 | return {key: value for key, value in count_dict.items() if value != 0} 18 | 19 | 20 | def subtract_count_dict(base_dict, dict_to_subtract): 21 | result_dict = base_dict.copy() 22 | for key, value in dict_to_subtract.items(): 23 | result_dict[key] = result_dict.get(key, 0) - value 24 | return normalize_count_dict(result_dict) 25 | 26 | 27 | def add_count_dict(base_dict, dict_to_add): 28 | result_dict = base_dict.copy() 29 | for key, value in dict_to_add.items(): 30 | result_dict[key] = result_dict.get(key, 0) + value 31 | return normalize_count_dict(result_dict) 32 | -------------------------------------------------------------------------------- /githammer/dbtypes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Jaakko Kangasharju 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import datetime 16 | import re 17 | 18 | import git 19 | from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, orm 20 | from sqlalchemy_utils import JSONType 21 | from sqlalchemy.ext.declarative import declarative_base 22 | from sqlalchemy.orm import relationship 23 | from sqlalchemy.schema import MetaData 24 | 25 | from .config import Configuration 26 | 27 | 28 | def _time_offset_to_local_time(time, offset): 29 | timezone = datetime.timezone(datetime.timedelta(seconds=offset)) 30 | return time.replace(tzinfo=datetime.timezone.utc).astimezone(timezone) 31 | 32 | 33 | _naming_convention = { 34 | "ix": 'ix_%(column_0_label)s', 35 | "uq": "uq_%(table_name)s_%(column_0_name)s", 36 | "ck": "ck_%(table_name)s_%(constraint_name)s", 37 | "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", 38 | "pk": "pk_%(table_name)s" 39 | } 40 | _metadata = MetaData(naming_convention=_naming_convention) 41 | Base = declarative_base(metadata=_metadata) 42 | 43 | 44 | class Project(Base): 45 | __tablename__ = 'projects' 46 | 47 | project_name = Column(String, primary_key=True) 48 | 49 | 50 | class Repository(Base): 51 | __tablename__ = 'repositories' 52 | 53 | id = Column(Integer, primary_key=True) 54 | repository_path = Column(String) 55 | configuration_file_path = Column(String) 56 | head_commit_id = Column(String, ForeignKey('commits.hexsha')) 57 | start_time = Column(DateTime()) 58 | start_time_utc_offset = Column(Integer) 59 | 60 | head_commit = relationship('Commit', foreign_keys=[head_commit_id]) 61 | 62 | def __init__(self, **kwargs): 63 | super(Repository, self).__init__(**kwargs) 64 | self._init_properties() 65 | 66 | @orm.reconstructor 67 | def _init_properties(self): 68 | self.configuration = Configuration(self.configuration_file_path) 69 | self.git_repository = git.Repo(self.repository_path) 70 | 71 | def start_time_tz(self): 72 | if self.start_time: 73 | return _time_offset_to_local_time(self.start_time, self.start_time_utc_offset) 74 | else: 75 | return None 76 | 77 | 78 | class ProjectRepository(Base): 79 | __tablename__ = 'projectrepository' 80 | 81 | project_name = Column(String, ForeignKey('projects.project_name'), primary_key=True) 82 | repository_id = Column(String, ForeignKey('repositories.id'), primary_key=True) 83 | 84 | 85 | class Author(Base): 86 | __tablename__ = 'authors' 87 | _name_regex = re.compile('^(.*)\\s+(<.*>)$') 88 | 89 | canonical_name = Column(String, primary_key=True) 90 | aliases = Column(JSONType) 91 | 92 | @property 93 | def name(self): 94 | match = Author._name_regex.match(self.canonical_name) 95 | if match: 96 | return match.group(1) 97 | else: 98 | return None 99 | 100 | def __eq__(self, other): 101 | return self.canonical_name == other.canonical_name and self.aliases == other.aliases 102 | 103 | def __hash__(self): 104 | return hash(self.canonical_name) 105 | 106 | def __repr__(self): 107 | return self.name 108 | 109 | 110 | class Commit(Base): 111 | __tablename__ = 'commits' 112 | 113 | hexsha = Column(String, primary_key=True) 114 | author_name = Column(String, ForeignKey('authors.canonical_name'), nullable=False) 115 | added_lines = Column(Integer) 116 | deleted_lines = Column(Integer) 117 | commit_time = Column(DateTime(), nullable=False) 118 | commit_time_utc_offset = Column(Integer, nullable=False) 119 | parent_ids = Column(JSONType) 120 | repository_id = Column(Integer, ForeignKey('repositories.id')) 121 | 122 | author = relationship('Author', back_populates='commits', lazy='joined') 123 | 124 | def __init__(self, **kwargs): 125 | super(Commit, self).__init__(**kwargs) 126 | self._init_properties() 127 | 128 | @orm.reconstructor 129 | def _init_properties(self): 130 | self.line_counts = {} 131 | self.test_counts = {} 132 | 133 | def commit_time_tz(self): 134 | return _time_offset_to_local_time(self.commit_time, self.commit_time_utc_offset) 135 | 136 | 137 | Author.commits = relationship('Commit', order_by=Commit.commit_time, back_populates='author') 138 | 139 | 140 | class AuthorCommitDetail(Base): 141 | __tablename__ = 'authorcommit' 142 | 143 | author_name = Column(String, ForeignKey('authors.canonical_name'), primary_key=True) 144 | commit_id = Column(String, ForeignKey('commits.hexsha'), primary_key=True) 145 | line_count = Column(Integer, nullable=False) 146 | test_count = Column(Integer) 147 | 148 | author = relationship('Author') 149 | commit = relationship('Commit') 150 | -------------------------------------------------------------------------------- /githammer/frequency.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Jaakko Kangasharju 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from enum import Enum 16 | import datetime 17 | 18 | from dateutil.relativedelta import relativedelta 19 | 20 | 21 | class Frequency(Enum): 22 | daily = 1 23 | weekly = 2 24 | monthly = 3 25 | yearly = 4 26 | 27 | def next_instance(self, dt): 28 | if self is Frequency.daily: 29 | return dt + relativedelta(days=1) 30 | elif self is Frequency.weekly: 31 | return dt + relativedelta(weeks=1) 32 | elif self is Frequency.monthly: 33 | return dt + relativedelta(months=1) 34 | elif self is Frequency.yearly: 35 | return dt + relativedelta(years=1) 36 | 37 | def start_of_interval(self, dt): 38 | if self is Frequency.daily: 39 | return datetime.datetime.combine(dt.date(), datetime.time(tzinfo=dt.tzinfo)) 40 | elif self is Frequency.weekly: 41 | monday_dt = dt - datetime.timedelta(days=dt.weekday()) 42 | return Frequency.daily.start_of_interval(monday_dt) 43 | elif self is Frequency.monthly: 44 | first_dt = dt.replace(day=1) 45 | return Frequency.daily.start_of_interval(first_dt) 46 | elif self is Frequency.yearly: 47 | january_dt = dt.replace(month=1) 48 | return Frequency.monthly.start_of_interval(january_dt) 49 | -------------------------------------------------------------------------------- /githammer/hammer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Jaakko Kangasharju 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import datetime 16 | import io 17 | import os 18 | import re 19 | from operator import itemgetter 20 | 21 | import git 22 | from sqlalchemy import create_engine 23 | from sqlalchemy.exc import OperationalError 24 | from sqlalchemy.orm import sessionmaker 25 | from sqlalchemy_utils import create_database, database_exists 26 | 27 | from .combinedcommit import _iter_combined_commits, CombinedCommit 28 | from .config import Configuration 29 | from .countdict import add_count_dict, subtract_count_dict, normalize_count_dict 30 | from .dbtypes import Author, Base, Commit, AuthorCommitDetail, Repository, Project, ProjectRepository 31 | 32 | _diff_stat_regex = re.compile('^([0-9]+|-)\t([0-9]+|-)\t(.*)$') 33 | _default_database_url = 'sqlite:///git-hammer.sqlite' 34 | 35 | 36 | def _time_to_utc_offset(time): 37 | utc_time = time.astimezone(datetime.timezone.utc) 38 | offset = int(time.utcoffset().total_seconds()) 39 | return utc_time, offset 40 | 41 | 42 | def _commit_exists(repository, hexsha): 43 | status, out, err = repository.git_repository.git.cat_file('-e', hexsha, with_extended_output=True, 44 | with_exceptions=False) 45 | return status == 0 46 | 47 | 48 | def _is_commit_in_range(repository, commit): 49 | if not repository.start_time: 50 | return True 51 | else: 52 | return commit.authored_datetime >= repository.start_time_tz() 53 | 54 | 55 | def _print_line_counts(line_counts): 56 | for author, count in sorted(line_counts.items(), key=itemgetter(1), reverse=True): 57 | print('{:>10} {}'.format(count, author.canonical_name)) 58 | 59 | 60 | def _author_line(commit): 61 | return '{} <{}>'.format(commit.author.name, commit.author.email) 62 | 63 | 64 | def _fail_unless_database_exists(engine): 65 | if not database_exists(engine.url): 66 | raise DatabaseNotInitializedError('Database must be created for this operation') 67 | 68 | 69 | def iter_all_project_names(database_url=_default_database_url): 70 | engine = create_engine(database_url) 71 | _fail_unless_database_exists(engine) 72 | Session = sessionmaker(bind=engine) 73 | session = Session() 74 | for project in session.query(Project): 75 | yield project.project_name 76 | session.close() 77 | 78 | 79 | def iter_sources_and_tests(repository_path, configuration_file_path=None): 80 | if configuration_file_path is None: 81 | configuration_file_path = os.path.join(repository_path, 'git-hammer-config.json') 82 | configuration = Configuration(configuration_file_path) 83 | repository = git.Repo(repository_path) 84 | for git_object in repository.tree().traverse(visit_once=True): 85 | if git_object.type != 'blob': 86 | continue 87 | if configuration.is_source_file(git_object.path): 88 | if configuration.is_test_file(git_object.path): 89 | yield 'test-file', git_object.path 90 | lines = [line.decode('utf-8', 'ignore') for line in 91 | io.BytesIO(git_object.data_stream.read()).readlines()] 92 | for line in configuration.iter_test_lines(git_object.path, lines): 93 | yield 'test-line', line.rstrip() 94 | else: 95 | yield 'source-file', git_object.path 96 | 97 | 98 | class DatabaseNotInitializedError(Exception): 99 | pass 100 | 101 | 102 | class OldDatabaseSchemaError(Exception): 103 | pass 104 | 105 | 106 | class Hammer: 107 | 108 | def _ensure_project_exists(self): 109 | if not database_exists(self._engine.url): 110 | create_database(self._engine.url) 111 | Base.metadata.create_all(self._engine) 112 | session = self._Session() 113 | if not session.query(Project).filter(Project.project_name == self.project_name).first(): 114 | project = Project(project_name=self.project_name) 115 | session.add(project) 116 | session.commit() 117 | session.close() 118 | 119 | def _init_properties(self): 120 | self._repositories = [] 121 | self._names_to_authors = {} 122 | self._shas_to_commits = {} 123 | 124 | def _commit_query(self, session): 125 | return session.query(Commit).select_from(Commit).join(Repository, Commit.repository_id == Repository.id).join( 126 | ProjectRepository).filter(ProjectRepository.project_name == self.project_name) 127 | 128 | def _is_commit_processed(self, commit_id): 129 | return commit_id in self._shas_to_commits 130 | 131 | def _build_repository_map(self, session): 132 | try: 133 | for dbrepo in session.query(Repository).join(ProjectRepository).filter( 134 | ProjectRepository.project_name == self.project_name): 135 | self._repositories.append(dbrepo) 136 | except OperationalError: 137 | raise OldDatabaseSchemaError('Database created with too-old version of Git Hammer') 138 | 139 | def _build_author_map(self, session): 140 | for dbauthor in session.query(Author): 141 | self._names_to_authors[dbauthor.canonical_name] = dbauthor 142 | for alias in dbauthor.aliases: 143 | self._names_to_authors[alias] = dbauthor 144 | 145 | def _build_commit_map(self, session): 146 | for dbcommit in self._commit_query(session): 147 | self._shas_to_commits[dbcommit.hexsha] = dbcommit 148 | commits = self._commit_query(session).subquery() 149 | for db_detail in session.query(AuthorCommitDetail).join(commits): 150 | self._shas_to_commits[db_detail.commit_id].line_counts[db_detail.author] = db_detail.line_count 151 | if db_detail.test_count: 152 | self._shas_to_commits[db_detail.commit_id].test_counts[db_detail.author] = db_detail.test_count 153 | 154 | def _process_lines_into_line_counts(self, repository, commit, path, lines, line_counts, test_counts): 155 | author = self._names_to_authors[_author_line(commit)] 156 | line_counts[author] = line_counts.get(author, 0) + len(lines) 157 | test_counts[author] = test_counts.get(author, 0) + len(list(repository.configuration.iter_test_lines(path, lines))) 158 | 159 | def _blame_blob_into_line_counts(self, repository, commit_to_blame, path, line_counts, test_counts): 160 | if not repository.configuration.is_source_file(path): 161 | return 162 | blame = repository.git_repository.blame(commit_to_blame, path, w=True) 163 | for commit, lines in blame: 164 | self._process_lines_into_line_counts(repository, commit, path, lines, line_counts, test_counts) 165 | 166 | def _make_full_commit_stats(self, repository, commit, need_full_blame=False): 167 | stats_start_time = datetime.datetime.now() 168 | line_counts = {} 169 | test_counts = {} 170 | for git_object in commit.tree.traverse(prune=lambda i, d: i is git.Submodule): 171 | if git_object.type != 'blob': 172 | continue 173 | if not repository.configuration.is_source_file(git_object.path): 174 | continue 175 | if need_full_blame: 176 | self._blame_blob_into_line_counts(repository, commit, git_object.path, line_counts, test_counts) 177 | else: 178 | lines = [line.decode('utf-8', 'ignore') for line in 179 | io.BytesIO(git_object.data_stream.read()).readlines()] 180 | self._process_lines_into_line_counts(repository, commit, git_object.path, lines, line_counts, 181 | test_counts) 182 | print('Commit {} stats time: {}'.format(commit.hexsha, 183 | datetime.datetime.now() - stats_start_time)) 184 | return normalize_count_dict(line_counts), normalize_count_dict(test_counts) 185 | 186 | def _make_diffed_commit_stats(self, repository, commit, previous_commit, previous_commit_line_counts, 187 | previous_commit_test_counts): 188 | diff_index = previous_commit.diff(commit, w=True, ignore_submodules=True) 189 | current_files = set() 190 | previous_files = set() 191 | for add_diff in diff_index.iter_change_type('A'): 192 | current_files.add(add_diff.b_path) 193 | for delete_diff in diff_index.iter_change_type('D'): 194 | previous_files.add(delete_diff.a_path) 195 | for rename_diff in diff_index.iter_change_type('R'): 196 | current_files.add(rename_diff.b_path) 197 | previous_files.add(rename_diff.a_path) 198 | for modify_diff in diff_index.iter_change_type('M'): 199 | current_files.add(modify_diff.b_path) 200 | previous_files.add(modify_diff.a_path) 201 | previous_line_counts = {} 202 | current_line_counts = {} 203 | previous_test_counts = {} 204 | current_test_counts = {} 205 | for current_file in current_files: 206 | self._blame_blob_into_line_counts(repository, commit, current_file, current_line_counts, 207 | current_test_counts) 208 | for previous_file in previous_files: 209 | self._blame_blob_into_line_counts(repository, previous_commit, previous_file, previous_line_counts, 210 | previous_test_counts) 211 | line_difference = subtract_count_dict(current_line_counts, previous_line_counts) 212 | line_counts = add_count_dict(previous_commit_line_counts, line_difference) 213 | test_difference = subtract_count_dict(current_test_counts, previous_test_counts) 214 | test_counts = add_count_dict(previous_commit_test_counts, test_difference) 215 | return line_counts, test_counts 216 | 217 | def _add_author_alias_if_needed(self, repository, commit): 218 | author_line = _author_line(commit) 219 | if not self._names_to_authors.get(author_line): 220 | canonical_name = repository.git_repository.git.show(commit.hexsha, format='%aN <%aE>', no_patch=True) 221 | author = self._names_to_authors[canonical_name] 222 | author.aliases.append(author_line) 223 | self._names_to_authors[author_line] = author 224 | 225 | def _add_canonical_authors(self, repository, session): 226 | author_lines = repository.git_repository.git.log(format='%aN <%aE>') 227 | for author_line in set(author_lines.splitlines()): 228 | if not self._names_to_authors.get(author_line): 229 | author = Author(canonical_name=author_line, aliases=[]) 230 | self._names_to_authors[author_line] = author 231 | session.add(author) 232 | 233 | def _add_commit_object(self, repository, commit, session): 234 | self._add_author_alias_if_needed(repository, commit) 235 | author_line = _author_line(commit) 236 | author = self._names_to_authors[author_line] 237 | author = session.merge(author) 238 | commit_time, commit_time_utc_offset = _time_to_utc_offset(commit.authored_datetime) 239 | commit_object = Commit(hexsha=commit.hexsha, author=author, 240 | commit_time=commit_time, 241 | commit_time_utc_offset=commit_time_utc_offset, 242 | parent_ids=[], repository_id=repository.id) 243 | if len(commit.parents) <= 1: 244 | if len(commit.parents) == 1 and _commit_exists(repository, commit.parents[0]): 245 | diff_stat = repository.git_repository.git.diff( 246 | commit.parents[0], commit, numstat=True, ignore_submodules=True) 247 | else: 248 | diff_stat = repository.git_repository.git.show(commit, numstat=True, format='') 249 | added_lines = 0 250 | deleted_lines = 0 251 | for line in diff_stat.splitlines(): 252 | match = re.fullmatch(_diff_stat_regex, line) 253 | if match: 254 | if match.group(1) == '-' or match.group(2) == '-': 255 | continue 256 | if not repository.configuration.is_source_file(match.group(3)): 257 | continue 258 | added_lines += int(match.group(1)) 259 | deleted_lines += int(match.group(2)) 260 | commit_object.added_lines = added_lines 261 | commit_object.deleted_lines = deleted_lines 262 | self._shas_to_commits[commit.hexsha] = commit_object 263 | session.add(commit_object) 264 | 265 | def _add_commit_line_counts(self, commit, line_counts, test_counts, session): 266 | self._shas_to_commits[commit.hexsha].line_counts = line_counts 267 | self._shas_to_commits[commit.hexsha].test_counts = test_counts 268 | for author, count in line_counts.items(): 269 | detail = AuthorCommitDetail( 270 | author_name=author.canonical_name, commit_id=commit.hexsha, line_count=count) 271 | if test_counts.get(author): 272 | detail.test_count = test_counts[author] 273 | session.add(detail) 274 | 275 | def _process_repository(self, repository, session): 276 | print('Repository {}'.format(repository.repository_path)) 277 | repository = session.merge(repository, load=False) 278 | start_time = datetime.datetime.now() 279 | last_session_commit_time = start_time 280 | self._add_canonical_authors(repository, session) 281 | commit_count = 0 282 | for commit in self._iter_unprocessed_commits(repository): 283 | self._add_commit_object(repository, commit, session) 284 | if commit.parents: 285 | for parent in commit.parents: 286 | self._shas_to_commits[commit.hexsha].parent_ids.append(parent.hexsha) 287 | parent_commit = self._shas_to_commits.get(commit.parents[0].hexsha) 288 | if parent_commit: 289 | line_counts, test_counts = self._make_diffed_commit_stats(repository, commit, commit.parents[0], 290 | parent_commit.line_counts, 291 | parent_commit.test_counts) 292 | else: 293 | need_full_blame = _commit_exists(repository, commit.parents[0].hexsha) 294 | line_counts, test_counts = self._make_full_commit_stats(repository, commit, 295 | need_full_blame=need_full_blame) 296 | else: 297 | line_counts, test_counts = self._make_full_commit_stats(repository, commit) 298 | self._add_commit_line_counts(commit, line_counts, test_counts, session) 299 | repository.head_commit_id = commit.hexsha 300 | commit_count += 1 301 | if commit_count % 20 == 0: 302 | print('Commit {:>5}: {}'.format(commit_count, datetime.datetime.now() - start_time)) 303 | if datetime.datetime.now() - last_session_commit_time >= datetime.timedelta(minutes=5): 304 | session_commit_start_time = datetime.datetime.now() 305 | session.commit() 306 | print('Commit {:>5}: Database commit time {}'.format(commit_count, 307 | datetime.datetime.now() - session_commit_start_time)) 308 | last_session_commit_time = datetime.datetime.now() 309 | print('Commit processing time {}'.format(datetime.datetime.now() - start_time)) 310 | 311 | def _iter_branch(self, repository): 312 | commits = [] 313 | commit_id = repository.head_commit_id 314 | while commit_id: 315 | commit = self._shas_to_commits.get(commit_id) 316 | if commit: 317 | commits.append(commit) 318 | commit_id = commit.parent_ids[0] if commit.parent_ids else None 319 | else: 320 | break 321 | return reversed(commits).__iter__() 322 | 323 | def _iter_unprocessed_commits(self, repository): 324 | for commit_id in repository.git_repository.git.log(reverse=True, date_order=True, format='%H').splitlines(): 325 | if not self._is_commit_processed(commit_id): 326 | commit = repository.git_repository.commit(commit_id) 327 | if _is_commit_in_range(repository, commit): 328 | yield commit 329 | 330 | def __init__(self, project_name, database_url=_default_database_url): 331 | start_time = datetime.datetime.now() 332 | self.project_name = project_name 333 | self._engine = create_engine(database_url) 334 | self._Session = sessionmaker(bind=self._engine) 335 | self._init_properties() 336 | if database_exists(self._engine.url): 337 | session = self._Session() 338 | self._build_repository_map(session) 339 | self._build_author_map(session) 340 | self._build_commit_map(session) 341 | session.close() 342 | print('Init time {}'.format(datetime.datetime.now() - start_time)) 343 | 344 | def add_repository(self, repository_path, configuration_file_path=None, **kwargs): 345 | self._ensure_project_exists() 346 | repository_path = os.path.abspath(repository_path) 347 | if not next((repo for repo in self._repositories if repo.repository_path == repository_path), None): 348 | if not configuration_file_path: 349 | configuration_file_path = os.path.join(repository_path, 'git-hammer-config.json') 350 | else: 351 | configuration_file_path = os.path.abspath(configuration_file_path) 352 | session = self._Session(expire_on_commit=False) 353 | dbrepo = Repository(repository_path=repository_path, configuration_file_path=configuration_file_path) 354 | if kwargs.get('earliest_date'): 355 | start_time, start_time_utc_offset = _time_to_utc_offset(kwargs.get('earliest_date')) 356 | dbrepo.start_time = start_time 357 | dbrepo.start_time_utc_offset = start_time_utc_offset 358 | session.add(dbrepo) 359 | session.flush() 360 | self._repositories.append(dbrepo) 361 | project_repo = ProjectRepository(project_name=self.project_name, repository_id=dbrepo.id) 362 | session.add(project_repo) 363 | session.flush() 364 | self._process_repository(dbrepo, session) 365 | session.commit() 366 | 367 | def update_data(self): 368 | _fail_unless_database_exists(self._engine) 369 | session = self._Session(expire_on_commit=False) 370 | for repository in self._repositories: 371 | self._process_repository(repository, session) 372 | start_time = datetime.datetime.now() 373 | session.commit() 374 | print('Database commit time {}'.format(datetime.datetime.now() - start_time)) 375 | 376 | def head_commit(self): 377 | _fail_unless_database_exists(self._engine) 378 | head_commit_ids = [repository.head_commit_id for repository in self._repositories] 379 | head_commits = [self._shas_to_commits[commit_id] for commit_id in head_commit_ids] 380 | return CombinedCommit(head_commits) 381 | 382 | def iter_authors(self): 383 | _fail_unless_database_exists(self._engine) 384 | session = self._Session() 385 | for dbauthor in self._commit_query(session).join(Author).with_entities(Author).distinct(): 386 | yield self._names_to_authors.get(dbauthor.canonical_name) 387 | session.close() 388 | 389 | def iter_commits(self, **kwargs): 390 | _fail_unless_database_exists(self._engine) 391 | iterators = [self._iter_branch(repository) for repository in self._repositories] 392 | commit_iterator = _iter_combined_commits(iterators) 393 | if not kwargs.get('frequency'): 394 | for commit in commit_iterator: 395 | yield commit 396 | else: 397 | next_commit_time = None 398 | frequency = kwargs['frequency'] 399 | for commit in commit_iterator: 400 | if not next_commit_time or commit.commit_time >= next_commit_time: 401 | yield commit 402 | start = frequency.start_of_interval(commit.commit_time) 403 | next_commit_time = frequency.next_instance(start) 404 | 405 | def iter_individual_commits(self): 406 | _fail_unless_database_exists(self._engine) 407 | session = self._Session() 408 | for commit in self._commit_query(session).order_by(Commit.commit_time): 409 | yield self._shas_to_commits.get(commit.hexsha) 410 | session.close() 411 | -------------------------------------------------------------------------------- /githammer/summary/__init__.py: -------------------------------------------------------------------------------- 1 | from .graph import total_lines, lines_per_author, total_tests, tests_per_author, commits_per_hour, commits_per_weekday 2 | from .table import commit_count_table, line_count_table, test_count_table 3 | -------------------------------------------------------------------------------- /githammer/summary/graph.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Jaakko Kangasharju 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from operator import attrgetter 16 | 17 | import matplotlib.pyplot as mpplot 18 | 19 | from githammer import Frequency 20 | 21 | 22 | class NoDataForGraphError(Exception): 23 | pass 24 | 25 | 26 | def _plot_totals(hammer, counts_property): 27 | date_array = [] 28 | line_count_array = [] 29 | for commit in hammer.iter_commits(frequency=Frequency.daily): 30 | date_array.append(commit.commit_time) 31 | line_count_array.append(sum(getattr(commit, counts_property).values())) 32 | figure = mpplot.figure() 33 | plot = figure.add_subplot(111) 34 | plot.plot(date_array, line_count_array, ls='-', marker='') 35 | figure.autofmt_xdate(rotation=45) 36 | figure.tight_layout() 37 | return figure 38 | 39 | 40 | def _plot_totals_per_author(hammer, counts_property, min_count_per_author=0): 41 | selected_authors = set() 42 | for commit in hammer.iter_commits(): 43 | for author, count in getattr(commit, counts_property).items(): 44 | if count >= min_count_per_author: 45 | selected_authors.add(author) 46 | if not selected_authors: 47 | raise NoDataForGraphError( 48 | 'No authors were found having at least a count of {} in a single commit'.format(min_count_per_author)) 49 | head_counts = getattr(hammer.head_commit(), counts_property) 50 | author_list = sorted(list(selected_authors), key=lambda a: head_counts.get(a, 0), reverse=True) 51 | author_labels = [author.name for author in author_list] 52 | date_array = [] 53 | count_array = [[] for _ in range(len(author_list))] 54 | for commit in hammer.iter_commits(frequency=Frequency.daily): 55 | date_array.append(commit.commit_time) 56 | for index, author in enumerate(author_list): 57 | count_array[index].append(getattr(commit, counts_property).get(author, 0)) 58 | figure = mpplot.figure(figsize=(12,7)) 59 | figure.subplots_adjust(left=0.08, right=0.75, top=0.95, bottom=0.05) 60 | plot = figure.add_subplot(111) 61 | plot.stackplot(date_array, count_array, labels=author_labels) 62 | handles, labels = plot.get_legend_handles_labels() 63 | plot.legend(handles[:25], labels[:25], bbox_to_anchor=(1.0, 0.5), loc='center left') 64 | figure.autofmt_xdate(rotation=45) 65 | return figure 66 | 67 | 68 | def total_lines(hammer): 69 | return _plot_totals(hammer, 'line_counts') 70 | 71 | 72 | def total_tests(hammer): 73 | return _plot_totals(hammer, 'test_counts') 74 | 75 | 76 | def lines_per_author(hammer): 77 | return _plot_totals_per_author(hammer, 'line_counts') 78 | 79 | 80 | def tests_per_author(hammer): 81 | return _plot_totals_per_author(hammer, 'test_counts') 82 | 83 | 84 | def commits_per_hour(hammer): 85 | count_array = [0] * 24 86 | for commit in hammer.iter_individual_commits(): 87 | count_array[commit.commit_time_tz().hour] += 1 88 | figure = mpplot.figure() 89 | plot = figure.add_subplot(111) 90 | plot.bar(range(len(count_array)), count_array) 91 | figure.tight_layout() 92 | return figure 93 | 94 | 95 | def commits_per_weekday(hammer): 96 | count_array = [0] * 7 97 | for commit in hammer.iter_individual_commits(): 98 | count_array[commit.commit_time_tz().weekday()] += 1 99 | figure = mpplot.figure() 100 | plot = figure.add_subplot(111) 101 | plot.bar(range(len(count_array)), count_array) 102 | figure.tight_layout() 103 | mpplot.xticks(range(len(count_array)), 104 | ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']) 105 | return figure 106 | -------------------------------------------------------------------------------- /githammer/summary/table.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Jaakko Kangasharju 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from beautifultable import BeautifulTable 16 | 17 | 18 | def _make_table(columns): 19 | table = BeautifulTable() 20 | table.set_style(BeautifulTable.STYLE_COMPACT) 21 | table.column_headers = columns 22 | for column in columns: 23 | if column == 'Author': 24 | table.column_alignments[column] = BeautifulTable.ALIGN_LEFT 25 | else: 26 | table.column_alignments[column] = BeautifulTable.ALIGN_RIGHT 27 | return table 28 | 29 | 30 | def commit_count_table(hammer): 31 | commit_counts = {} 32 | for commit in hammer.iter_individual_commits(): 33 | commit_counts[commit.author] = commit_counts.get(commit.author, 0) + 1 34 | table = _make_table(['Author', 'Commits']) 35 | for author, commit_count in commit_counts.items(): 36 | table.append_row([author.name, commit_count]) 37 | table.sort('Commits', reverse=True) 38 | return table 39 | 40 | 41 | def line_count_table(hammer): 42 | head_commit = hammer.head_commit() 43 | table = _make_table(['Author', 'Lines']) 44 | for author, line_count in head_commit.line_counts.items(): 45 | table.append_row([author.name, line_count]) 46 | table.sort('Lines', reverse=True) 47 | return table 48 | 49 | 50 | def test_count_table(hammer): 51 | head_commit = hammer.head_commit() 52 | if head_commit.test_counts: 53 | table = _make_table(['Author', 'Tests']) 54 | for author, test_count in head_commit.test_counts.items(): 55 | table.append_row([author.name, test_count]) 56 | table.sort('Tests', reverse=True) 57 | return table 58 | return None 59 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | gitpython 2 | sqlalchemy >=1.4.7, <2.0 3 | sqlalchemy-utils >=0.37.0 4 | matplotlib <3.1 5 | python-dateutil 6 | globber 7 | beautifultable 8 | alembic 9 | coverage 10 | codecov 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile 3 | # To update, run: 4 | # 5 | # pip-compile requirements.in 6 | # 7 | alembic==1.5.8 8 | # via -r requirements.in 9 | beautifultable==1.0.1 10 | # via -r requirements.in 11 | certifi==2020.12.5 12 | # via requests 13 | chardet==4.0.0 14 | # via requests 15 | codecov==2.1.11 16 | # via -r requirements.in 17 | coverage==5.5 18 | # via 19 | # -r requirements.in 20 | # codecov 21 | cycler==0.10.0 22 | # via matplotlib 23 | gitdb==4.0.7 24 | # via gitpython 25 | gitpython==3.1.14 26 | # via -r requirements.in 27 | globber==0.2.1 28 | # via -r requirements.in 29 | greenlet==1.0.0 30 | # via sqlalchemy 31 | idna==2.10 32 | # via requests 33 | kiwisolver==1.3.1 34 | # via matplotlib 35 | mako==1.1.4 36 | # via alembic 37 | markupsafe==1.1.1 38 | # via mako 39 | matplotlib==3.0.3 40 | # via -r requirements.in 41 | numpy==1.20.2 42 | # via matplotlib 43 | pyparsing==2.4.7 44 | # via matplotlib 45 | python-dateutil==2.8.1 46 | # via 47 | # -r requirements.in 48 | # alembic 49 | # matplotlib 50 | python-editor==1.0.4 51 | # via alembic 52 | requests==2.25.1 53 | # via codecov 54 | six==1.15.0 55 | # via 56 | # cycler 57 | # python-dateutil 58 | # sqlalchemy-utils 59 | smmap==4.0.0 60 | # via gitdb 61 | sqlalchemy-utils==0.37.0 62 | # via -r requirements.in 63 | sqlalchemy==1.4.7 64 | # via 65 | # -r requirements.in 66 | # alembic 67 | # sqlalchemy-utils 68 | urllib3==1.26.4 69 | # via requests 70 | wcwidth==0.2.5 71 | # via beautifultable 72 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open('README.md', 'r') as file: 4 | long_description = file.read() 5 | 6 | setuptools.setup( 7 | name='git-hammer', 8 | version='0.3.2', 9 | author='Jaakko Kangasharju', 10 | author_email='ashar@iki.fi', 11 | description='Statistics tool for git repositories', 12 | long_description=long_description, 13 | long_description_content_type='text/markdown', 14 | url='https://github.com/asharov/git-hammer', 15 | packages=setuptools.find_packages(exclude=['tests']), 16 | classifiers=[ 17 | 'Programming Language :: Python :: 3', 18 | 'License :: OSI Approved :: Apache Software License', 19 | 'Development Status :: 3 - Alpha', 20 | 'Operating System :: OS Independent' 21 | ], 22 | python_requires='>=3.7', 23 | install_requires=[ 24 | 'gitpython', 25 | 'sqlalchemy >=1.4.7, <2.0', 26 | 'sqlalchemy-utils >=0.37.0', 27 | 'matplotlib <3.1', 28 | 'python-dateutil', 29 | 'globber', 30 | 'beautifultable' 31 | ] 32 | ) 33 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | from .test_frequency import FrequencyTest 2 | from .test_init import HammerInitTest 3 | from .test_single_repository import HammerRepositoryTest 4 | from .test_submodule import HammerSubmoduleTest 5 | from .test_multiple_projects import HammerMultipleProjectsTest 6 | from .test_update import HammerUpdateTest 7 | from .test_shallow_repository import HammerShallowTest 8 | from .test_multiple_repositories import HammerMultipleRepositoriesTest 9 | from .test_limited_repository import HammerLimitedTest 10 | -------------------------------------------------------------------------------- /tests/check_regression.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import itertools 3 | 4 | from githammer import Hammer 5 | 6 | 7 | def check_commit(index, commit_old, commit_new, attr): 8 | old_attr = getattr(commit_old, attr) 9 | new_attr = getattr(commit_new, attr) 10 | if old_attr != new_attr: 11 | sys.exit('Error in commit {} ({}): Incorrect {} {} (expected {})'. 12 | format(index, commit_old.hexsha, attr, new_attr, old_attr)) 13 | 14 | 15 | if len(sys.argv) < 5: 16 | sys.exit('Usage: {} None: 9 | print() 10 | print(self.id()) 11 | self.initial_date = datetime.datetime(2019, 10, 10, 10, 10, 10, tzinfo=datetime.timezone.utc) 12 | self.year_start_date = datetime.datetime(2019, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc) 13 | self.year_start_week_date = datetime.datetime(2019, 1, 7, 0, 0, 0, tzinfo=datetime.timezone.utc) 14 | 15 | def test_correct_start_of_interval(self): 16 | self.assertEqual(Frequency.daily.start_of_interval(self.initial_date), 17 | datetime.datetime(2019, 10, 10, 0, 0, 0, tzinfo=datetime.timezone.utc)) 18 | self.assertEqual(Frequency.weekly.start_of_interval(self.initial_date), 19 | datetime.datetime(2019, 10, 7, 0, 0, 0, tzinfo=datetime.timezone.utc)) 20 | self.assertEqual(Frequency.monthly.start_of_interval(self.initial_date), 21 | datetime.datetime(2019, 10, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)) 22 | self.assertEqual(Frequency.yearly.start_of_interval(self.initial_date), 23 | datetime.datetime(2019, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)) 24 | 25 | def test_correct_next_instance(self): 26 | self.assertEqual(Frequency.daily.next_instance(self.year_start_date), 27 | datetime.datetime(2019, 1, 2, 0, 0, 0, tzinfo=datetime.timezone.utc)) 28 | self.assertEqual(Frequency.weekly.next_instance(self.year_start_week_date), 29 | datetime.datetime(2019, 1, 14, 0, 0, 0, tzinfo=datetime.timezone.utc)) 30 | self.assertEqual(Frequency.monthly.next_instance(self.year_start_date), 31 | datetime.datetime(2019, 2, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)) 32 | self.assertEqual(Frequency.yearly.next_instance(self.year_start_date), 33 | datetime.datetime(2020, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)) 34 | -------------------------------------------------------------------------------- /tests/test_init.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from githammer import DatabaseNotInitializedError 4 | from .hammer_test import HammerTest 5 | 6 | 7 | class HammerInitTest(HammerTest): 8 | 9 | def test_plain_init_does_not_create_database(self): 10 | self.assertFalse(os.listdir(self.working_directory.name)) 11 | 12 | def test_update_fails_when_database_not_created(self): 13 | with self.assertRaises(DatabaseNotInitializedError): 14 | self.hammer.update_data() 15 | -------------------------------------------------------------------------------- /tests/test_limited_repository.py: -------------------------------------------------------------------------------- 1 | import os 2 | import datetime 3 | 4 | import git 5 | 6 | from .hammer_test import HammerTest 7 | 8 | 9 | class HammerLimitedTest(HammerTest): 10 | def setUp(self): 11 | super().setUp() 12 | self.start_date = datetime.datetime(2018, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc) 13 | self.hammer.add_repository(os.path.join(self.current_directory, 'data', 'repository'), 14 | os.path.join(self.current_directory, 'data', 'repo-config.json'), 15 | earliest_date=self.start_date) 16 | 17 | def test_limiting_by_date_includes_only_commits_after(self): 18 | commits = list(self.hammer.iter_individual_commits()) 19 | self.assertEqual(len(commits), 3) 20 | 21 | def test_line_counts_are_correct_in_date_limited_repository(self): 22 | authors = {author.name: author for author in self.hammer.iter_authors()} 23 | self.assertEqual(self.hammer.head_commit().line_counts, { 24 | authors['Author A']: 7, 25 | authors['Author B']: 9, 26 | authors['Author C']: 2 27 | }) 28 | 29 | def test_updating_project_does_not_add_new_commits(self): 30 | self.hammer.update_data() 31 | commits = list(self.hammer.iter_individual_commits()) 32 | self.assertEqual(len(commits), 3) 33 | 34 | def test_updating_brings_in_later_commits_but_not_excluded_ones(self): 35 | other_hammer = self._make_hammer('otherTest', 36 | database_url='sqlite:///' + self.working_directory.name + '/other.sqlite') 37 | git_repository = git.Repo.clone_from(os.path.join(self.current_directory, 'data', 'repository'), 38 | os.path.join(self.working_directory.name, 'worktree'), 39 | branch='december', single_branch=True) 40 | other_hammer.add_repository(os.path.join(self.working_directory.name, 'worktree'), 41 | earliest_date=self.start_date) 42 | initial_commits = list(other_hammer.iter_individual_commits()) 43 | self.assertEqual(len(initial_commits), 1) 44 | git_repository.remote().fetch('+refs/heads/master:refs/remotes/origin/master') 45 | git_repository.create_head('master', git_repository.remote().refs.master) 46 | git_repository.heads.master.checkout() 47 | other_hammer.update_data() 48 | updated_commits = list(other_hammer.iter_individual_commits()) 49 | self.assertEqual(len(updated_commits), 3) 50 | -------------------------------------------------------------------------------- /tests/test_multiple_projects.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from githammer import iter_all_project_names 4 | 5 | from .hammer_test import HammerTest 6 | 7 | 8 | class HammerMultipleProjectsTest(HammerTest): 9 | 10 | def _create_second_project(self): 11 | self.otherHammer = self._make_hammer('otherTest') 12 | self.otherHammer.add_repository(os.path.join(self.current_directory, 'data', 'subrepository')) 13 | 14 | def setUp(self): 15 | super().setUp() 16 | self.hammer.add_repository(os.path.join(self.current_directory, 'data', 'repository')) 17 | 18 | def test_second_project_is_created(self): 19 | self._create_second_project() 20 | self.assertEqual(self.otherHammer.project_name, 'otherTest') 21 | 22 | def test_projects_are_inserted_in_database(self): 23 | self._create_second_project() 24 | project_names = list(iter_all_project_names(self.database_url)) 25 | self.assertEqual(sorted(project_names), ['otherTest', 'test']) 26 | 27 | def test_commits_from_other_projects_are_not_included(self): 28 | self._create_second_project() 29 | with self.assertRaises(StopIteration): 30 | self._fetch_commit(HammerMultipleProjectsTest._main_repo_initial_commit_hexsha, hammer=self.otherHammer) 31 | 32 | def test_authors_from_other_projects_are_not_included(self): 33 | self._create_second_project() 34 | authors = list(self.otherHammer.iter_authors()) 35 | self.assertEqual(len(authors), 1) 36 | -------------------------------------------------------------------------------- /tests/test_multiple_repositories.py: -------------------------------------------------------------------------------- 1 | import os 2 | import datetime 3 | 4 | from githammer import Frequency 5 | 6 | from .hammer_test import HammerTest 7 | 8 | 9 | class HammerMultipleRepositoriesTest(HammerTest): 10 | def setUp(self): 11 | super().setUp() 12 | self.hammer.add_repository(os.path.join(self.current_directory, 'data', 'repository'), 13 | os.path.join(self.current_directory, 'data', 'repo-config.json')) 14 | self.hammer.add_repository(os.path.join(self.current_directory, 'data', 'subrepository')) 15 | self._expected_dates = [ 16 | datetime.datetime(2017, 11, 22, 7, 22, 33, tzinfo=datetime.timezone.utc), 17 | datetime.datetime(2017, 12, 4, 7, 10, 11, tzinfo=datetime.timezone.utc), 18 | datetime.datetime(2017, 12, 6, 3, 33, 44, tzinfo=datetime.timezone.utc), 19 | datetime.datetime(2017, 12, 14, 10, 54, 55, tzinfo=datetime.timezone.utc) 20 | ] 21 | self._expected_offsets = [14400, 7200, -18000, 14400] 22 | 23 | def test_commits_are_combined_with_correct_dates(self): 24 | initial_commits = list(self.hammer.iter_commits())[:4] 25 | self.assertEqual([commit.commit_time for commit in initial_commits], self._expected_dates) 26 | self.assertEqual([commit.commit_time_utc_offset for commit in initial_commits], self._expected_offsets) 27 | 28 | def test_combined_commits_are_produced_with_correct_frequency(self): 29 | initial_commits = list(self.hammer.iter_commits(frequency=Frequency.weekly))[:3] 30 | del self._expected_dates[2] 31 | del self._expected_offsets[2] 32 | self.assertEqual([commit.commit_time for commit in initial_commits], self._expected_dates) 33 | self.assertEqual([commit.commit_time_utc_offset for commit in initial_commits], self._expected_offsets) 34 | -------------------------------------------------------------------------------- /tests/test_shallow_repository.py: -------------------------------------------------------------------------------- 1 | import os 2 | import git 3 | 4 | from .hammer_test import HammerTest 5 | 6 | 7 | class HammerShallowTest(HammerTest): 8 | def setUp(self): 9 | super().setUp() 10 | self.git_repository = git.Repo.clone_from('file://' + os.path.join(self.current_directory, 'data', 'repository'), 11 | os.path.join(self.working_directory.name, 'worktree'), 12 | depth=1) 13 | self.hammer.add_repository(os.path.join(self.working_directory.name, 'worktree'), 14 | os.path.join(self.current_directory, 'data', 'repo-config.json')) 15 | 16 | def test_shallow_clone_has_only_one_commit(self): 17 | commits = list(self.hammer.iter_individual_commits()) 18 | self.assertEqual(len(commits), 1) 19 | 20 | def test_shallow_clone_has_correct_counts(self): 21 | commit = self._fetch_commit(HammerShallowTest._main_repo_head_commit_hexsha) 22 | line_counts = commit.line_counts.values() 23 | self.assertEqual(sorted(line_counts), [18]) 24 | -------------------------------------------------------------------------------- /tests/test_single_repository.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from githammer import iter_sources_and_tests 4 | 5 | from .hammer_test import HammerTest 6 | 7 | 8 | class HammerRepositoryTest(HammerTest): 9 | 10 | def setUp(self): 11 | super().setUp() 12 | self.hammer.add_repository(os.path.join(self.current_directory, 'data', 'repository'), 13 | os.path.join(self.current_directory, 'data', 'repo-config.json')) 14 | 15 | def test_project_name_is_property_of_hammer_object(self): 16 | self.assertEqual(self.hammer.project_name, 'test') 17 | 18 | def test_repository_is_processed_into_database_after_adding(self): 19 | self.assertIsNotNone(self.hammer.head_commit()) 20 | 21 | def test_commit_timestamps_have_correct_time(self): 22 | initial_commit = self._fetch_commit(HammerRepositoryTest._main_repo_initial_commit_hexsha) 23 | self.assertEqual(initial_commit.commit_time_tz().hour, 11) 24 | 25 | def test_initial_commit_line_counts_are_correct(self): 26 | initial_commit = self._fetch_commit(HammerRepositoryTest._main_repo_initial_commit_hexsha) 27 | author = initial_commit.author 28 | self.assertEqual(initial_commit.line_counts[author], 14) 29 | 30 | def test_second_commit_line_counts_are_correct(self): 31 | initial_commit = self._fetch_commit(HammerRepositoryTest._main_repo_initial_commit_hexsha) 32 | second_commit = self._fetch_commit(HammerRepositoryTest._main_repo_second_commit_hexsha) 33 | self.assertEqual(second_commit.line_counts[initial_commit.author], 10) 34 | self.assertEqual(second_commit.line_counts[second_commit.author], 4) 35 | 36 | def test_sources_are_iterated_based_on_configuration(self): 37 | repository_path = os.path.join(self.current_directory, 'data', 'repository') 38 | configuration_path = os.path.join(self.current_directory, 'data', 'repo-config.json') 39 | files = list(iter_sources_and_tests(repository_path, configuration_path)) 40 | file_names = [name for (file_type, name) in files] 41 | self.assertIn(('source-file', 'file1.txt'), files) 42 | self.assertNotIn('file.dat', file_names) 43 | 44 | def test_test_lines_are_counted_correctly(self): 45 | test_commit = self._fetch_commit(HammerRepositoryTest._main_repo_test_commit_hexsha) 46 | author = test_commit.author 47 | self.assertEqual(self.hammer.head_commit().test_counts, {author: 1}) 48 | 49 | def test_line_counts_are_correct_after_merge(self): 50 | authors = {author.name: author for author in self.hammer.iter_authors()} 51 | self.assertEqual(self.hammer.head_commit().line_counts, { 52 | authors['Author A']: 7, 53 | authors['Author B']: 9, 54 | authors['Author C']: 2 55 | }) 56 | -------------------------------------------------------------------------------- /tests/test_submodule.py: -------------------------------------------------------------------------------- 1 | import os 2 | import git 3 | 4 | from .hammer_test import HammerTest 5 | 6 | 7 | class HammerSubmoduleTest(HammerTest): 8 | 9 | def setUp(self): 10 | super().setUp() 11 | git.Repo.clone_from(os.path.join(self.current_directory, 'data', 'repository'), 12 | os.path.join(self.working_directory.name, 'worktree')) 13 | repository = git.Repo(os.path.join(self.working_directory.name, 'worktree')) 14 | git.Submodule.add(repository, 'subrepo', 'subrepo', 15 | os.path.join(self.current_directory, 'data', 'subrepository')) 16 | author = git.Actor('Author A', 'a@example.com') 17 | repository.index.commit('Add subrepo', author=author) 18 | 19 | def test_repository_with_added_submodule_is_understood(self): 20 | self.hammer.add_repository(os.path.join(self.working_directory.name, 'worktree')) 21 | self.assertIsNotNone(self.hammer.head_commit()) 22 | 23 | def test_submodule_in_initial_commit_is_understood(self): 24 | submodule_repository = git.Repo.init(os.path.join(self.working_directory.name, 'initial_submodule')) 25 | git.Submodule.add(submodule_repository, 'subrepo', 'subrepo', 26 | os.path.join(self.current_directory, 'data', 'subrepository')) 27 | author = git.Actor('Author B', 'b@example.com') 28 | submodule_repository.index.commit('Initial commit', author=author) 29 | self.hammer.add_repository(os.path.join(self.working_directory.name, 'initial_submodule')) 30 | commit = next(self.hammer.iter_individual_commits()) 31 | self.assertEqual(commit.line_counts, {commit.author: 3}) 32 | -------------------------------------------------------------------------------- /tests/test_update.py: -------------------------------------------------------------------------------- 1 | import os 2 | import git 3 | 4 | from .hammer_test import HammerTest 5 | 6 | 7 | class HammerUpdateTest(HammerTest): 8 | 9 | def _update_from_old_state(self): 10 | self.git_repository.remote().fetch('+refs/heads/master:refs/remotes/origin/master') 11 | self.git_repository.create_head('master', self.git_repository.remote().refs.master) 12 | self.git_repository.heads.master.checkout() 13 | self.hammer.update_data() 14 | 15 | def setUp(self): 16 | super().setUp() 17 | self.git_repository = git.Repo.clone_from(os.path.join(self.current_directory, 'data', 'repository'), 18 | os.path.join(self.working_directory.name, 'worktree'), 19 | branch='old-state', single_branch=True) 20 | self.hammer.add_repository(os.path.join(self.working_directory.name, 'worktree')) 21 | 22 | def test_clone_produced_expected_result(self): 23 | commits = list(self.hammer.iter_individual_commits()) 24 | self.assertEqual(len(commits), 1) 25 | self.assertEqual(commits[0].hexsha, HammerUpdateTest._main_repo_initial_commit_hexsha) 26 | 27 | def test_correct_statistics_were_computed_for_old_state(self): 28 | line_counts = self.hammer.head_commit().line_counts 29 | self.assertEqual(len(line_counts), 1) 30 | initial_commit = self._fetch_commit(HammerUpdateTest._main_repo_initial_commit_hexsha) 31 | self.assertIn(initial_commit.author, line_counts) 32 | self.assertEqual(line_counts[initial_commit.author], 14) 33 | 34 | def test_update_after_repository_updated_brings_in_new_commits(self): 35 | self._update_from_old_state() 36 | commits = list(self.hammer.iter_individual_commits()) 37 | self.assertGreaterEqual(len(commits), 2) 38 | self.assertEqual(commits[0].hexsha, HammerUpdateTest._main_repo_initial_commit_hexsha) 39 | self.assertEqual(commits[1].hexsha, HammerUpdateTest._main_repo_second_commit_hexsha) 40 | 41 | def test_update_after_repository_updated_computes_correct_statistics(self): 42 | self._update_from_old_state() 43 | initial_commit = self._fetch_commit(HammerUpdateTest._main_repo_initial_commit_hexsha) 44 | second_commit = self._fetch_commit(HammerUpdateTest._main_repo_second_commit_hexsha) 45 | self.assertEqual(second_commit.line_counts[initial_commit.author], 10) 46 | self.assertEqual(second_commit.line_counts[second_commit.author], 4) 47 | --------------------------------------------------------------------------------