The response has been limited to 50k tokens of the smallest files in the repo. You can remove this limitation by removing the max tokens filter.
├── .gitattributes
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── test.yml
├── .gitignore
├── COPYING
├── COPYING.gpl
├── COPYING.mit
├── Documentation
    ├── Contributing.md
    ├── FAQ.md
    ├── converting-from-bfg-repo-cleaner.md
    ├── converting-from-filter-branch.md
    ├── examples-from-user-filed-issues.md
    └── git-filter-repo.txt
├── INSTALL.md
├── Makefile
├── README.md
├── contrib
    └── filter-repo-demos
    │   ├── README.md
    │   ├── barebones-example
    │   ├── bfg-ish
    │   ├── clean-ignore
    │   ├── convert-svnexternals
    │   ├── filter-branch-ish
    │   ├── filter-lamely
    │   ├── insert-beginning
    │   ├── lint-history
    │   └── signed-off-by
├── git-filter-repo
├── git_filter_repo.py
├── pyproject.toml
└── t
    ├── run_coverage
    ├── run_tests
    ├── t9390-filter-repo-basics.sh
    ├── t9390
        ├── basic
        ├── basic-filename
        ├── basic-mailmap
        ├── basic-message
        ├── basic-numbers
        ├── basic-replace
        ├── basic-ten
        ├── basic-twenty
        ├── degenerate
        ├── degenerate-evil-merge
        ├── degenerate-globme
        ├── degenerate-keepme
        ├── degenerate-keepme-noff
        ├── degenerate-moduleA
        ├── empty
        ├── empty-keepme
        ├── less-empty-keepme
        ├── more-empty-keepme
        ├── sample-mailmap
        ├── sample-message
        ├── sample-replace
        ├── unusual
        ├── unusual-filtered
        └── unusual-mailmap
    ├── t9391-filter-repo-lib-usage.sh
    ├── t9391
        ├── commit_info.py
        ├── create_fast_export_output.py
        ├── emoji-repo
        ├── erroneous.py
        ├── file_filter.py
        ├── print_progress.py
        ├── rename-master-to-develop.py
        ├── splice_repos.py
        ├── strip-cvs-keywords.py
        └── unusual.py
    ├── t9392-filter-repo-python-callback.sh
    ├── t9393-filter-repo-rerun.sh
    ├── t9393
        ├── lfs
        └── simple
    ├── t9394-filter-repo-sanity-checks-and-bigger-repo-setup.sh
    ├── t9394
        └── date-order
    ├── test-lib-functions.sh
    └── test-lib.sh


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.sh eol=lf
2 | *.py eol=lf
3 | /git-filter-repo eol=lf
4 | /contrib/filter-repo-demos/[a-z]* eol=lf
5 | /t/t9*/* eol=lf
6 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | ---
2 | version: 2
3 | updates:
4 |   - package-ecosystem: "github-actions"
5 |     directory: "/"
6 |     schedule:
7 |       interval: "monthly"


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Run tests
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   run-tests:
 7 |     strategy:
 8 |       matrix:
 9 |         os: [ 'windows', 'ubuntu', 'macos' ]
10 |       fail-fast: false
11 |     runs-on: ${{ matrix.os }}-latest
12 |     steps:
13 |     - uses: actions/checkout@v4
14 |     - name: Setup python
15 |       uses: actions/setup-python@v5
16 |       with:
17 |         python-version: 3.x
18 |     - name: test
19 |       shell: bash
20 |       run: |
21 |         # setup-python puts `python` into the `PATH`, not `python3`, yet
22 |         # `git-filter-repo` expects `python3` in the `PATH`. Let's add
23 |         # a shim.
24 |         printf '#!/bin/sh\n\nexec python "$@"\n' >python3 &&
25 | 
26 |         export PATH=$PWD:$PATH &&
27 | 
28 |         if ! t/run_tests -q -v -x
29 |         then
30 |           mkdir failed &&
31 |           tar czf failed/failed.tar.gz t
32 |           exit 1
33 |         fi
34 |     - name: upload failed tests' directories
35 |       if: failure()
36 |       uses: actions/upload-artifact@v4
37 |       with:
38 |         name: failed-${{ matrix.os }}
39 |         path: failed
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /Documentation/html/
2 | /Documentation/man1/
3 | /t/test-results
4 | /t/trash directory*
5 | /__pycache__/
6 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
 1 | git-filter-repo itself and most the files in this repository (exceptions
 2 | noted below) are provided under the MIT license (see COPYING.mit).
 3 | 
 4 | The usage of the MIT license probably makes filter-repo compatible with
 5 | everything, but just in case, these files can also be used under whatever
 6 | open source license[1] that git.git or libgit2 use now or in the future
 7 | (currently GPL[2] and GPL-with-linking-exception[3]).  Further, the
 8 | examples (in contrib/filter-repo-demos/ and t/t9391/) can also be used
 9 | under the same license that libgit2 provides their examples under (CC0,
10 | currently[4]).
11 | 
12 | Exceptions:
13 | 
14 |   - The test harness (t/test-lib.sh, t/test-lib-functions.sh) is a slightly
15 |     modified copy of git.git's test harness (the difference being that my
16 |     copy doesn't require a built version of 'git' to be present).  These
17 |     are thus GPL2 (see COPYING.gpl), and are individually marked as such.
18 | 
19 | 
20 | [1] ...as defined by the Open Source Initiative (https://opensource.org/)
21 | [2] https://git.kernel.org/pub/scm/git/git.git/tree/COPYING
22 | [3] https://github.com/libgit2/libgit2/blob/master/COPYING
23 | [4] https://github.com/libgit2/libgit2/blob/master/examples/COPYING
24 | 


--------------------------------------------------------------------------------
/COPYING.gpl:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/COPYING.mit:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009, 2018-2019
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/Documentation/Contributing.md:
--------------------------------------------------------------------------------
 1 | Welcome to the community!
 2 | 
 3 | Contributions need to meet the bar for inclusion in git.git.  Although
 4 | filter-repo is not part of the git.git repository, I want to leave the
 5 | option open for it to be merged in the future.  As such, any
 6 | contributions need to follow the same [guidelines for contribution to
 7 | git.git](https://git.kernel.org/pub/scm/git/git.git/tree/Documentation/SubmittingPatches),
 8 | with a few exceptions:
 9 | 
10 |   * While I
11 |     [hate](https://public-inbox.org/git/CABPp-BG2SkH0GrRYpHLfp2Wey91ThwQoTgf9UmPa9f5Szn+v3Q@mail.gmail.com/)
12 |     [GitHub](https://public-inbox.org/git/CABPp-BEcpasV4vBTm0uxQ4Vzm88MQAX-ArDG4e9QU8tEoNsZWw@mail.gmail.com/)
13 |     [PRs](https://public-inbox.org/git/CABPp-BEHy8c3raHwf9aFXvXN0smf_WwCcNiYxQBwh7W6An60qQ@mail.gmail.com/)
14 |     (as others point out, [it's mind-boggling in a bad way that
15 |     web-based Git hosting and code review systems do such a poor
16 |     job](http://nhaehnle.blogspot.com/2020/06/they-want-to-be-small-they-want-to-be.html)),
17 |     git-format-patch and git-send-email can be a beast and I have not
18 |     yet found time to modify Dscho's excellent
19 |     [GitGitGadget](https://github.com/gitgitgadget/gitgitgadget) to
20 |     work with git-filter-repo.  As such:
21 |       * For very short single-commit changes, feel free to open GitHub PRs.
22 |       * For more involved changes, if format-patch or send-email give you
23 |         too much trouble, go ahead and open a GitHub PR and just mention
24 |         that email didn't work out.
25 |   * If emailing patches to the git list:
26 |     * Include "filter-repo" at the start of the subject,
27 |       e.g. "[filter-repo PATCH] Add packaging scripts for uploading to PyPI"
28 |       instead of just "[PATCH] Add packaging scripts for uploading to PyPI"
29 |     * CC me instead of the git maintainer
30 |   * Git's [CodingGuidlines for python
31 |     code](https://github.com/git/git/blob/v2.24.0/Documentation/CodingGuidelines#L482-L494)
32 |     are only partially applicable:
33 |     * python3 is a hard requirement; python2 is/was EOL at the end of
34 |       2019 and should not be used.  (Commit 4d0264ab723c
35 |       ("filter-repo: workaround python<2.7.9 exec bug", 2019-04-30)
36 |       was the last version of filter-repo that worked with python2).
37 |     * You can depend on anything in python 3.6 or earlier.  I may bump
38 |       this minimum version over time, but do want to generally work
39 |       with the python3 version found in current enterprise Linux
40 |       distributions.
41 |     * In filter-repo, it's not just OK to use bytestrings, you are
42 |       expected to use them a lot.  Using unicode strings result in
43 |       lots of ugly errors since input comes from filesystem names,
44 |       commit messages, file contents, etc., none of which are
45 |       guaranteed to be unicode.  (Plus unicode strings require lots of
46 |       effort to verify, encode, and decode -- slowing the filtering
47 |       process down).  I tried to work with unicode strings more
48 |       broadly in the code base multiple times; but it's just a bad
49 |       idea to use an abstraction that doesn't fit the data.
50 |     * I generally like [PEP
51 |       8](https://www.python.org/dev/peps/pep-0008/), but used
52 |       two-space indents for years before learning of it and have just
53 |       continued that habit.  For consistency, contributions should also
54 |       use two-space indents and otherwise generally follow PEP 8.
55 | 
56 | There are a few extra things I would like folks to keep in mind:
57 | 
58 |   * Please test line coverage if you add or modify code
59 | 
60 |     * `make test` will run the testsuite under
61 |       [coverage3](https://pypi.org/project/coverage/) (which you will
62 |       need to install), and report on line coverage.  Line coverage of
63 |       git-filter-repo needs to remain at 100%; line coverage of
64 |       contrib and test scripts can be ignored.
65 | 
66 |   * Please do not be intimidated by detailed feedback:
67 | 
68 |     * In the git community, I have been contributing for years and
69 |       have had hundreds of patches accepted but I still find that even
70 |       when I try to make patches perfect I am not surprised when I
71 |       have to spend as much or more time fixing up patches after
72 |       submitting them than I did figuring out the patches in the first
73 |       place.  git folks tend to do thorough reviews, which has taught
74 |       me a lot, and I try to do the same for filter-repo.  Plus, as
75 |       noted above, I want contributions from others to be acceptable
76 |       in git.git itself.
77 | 


--------------------------------------------------------------------------------
/Documentation/FAQ.md:
--------------------------------------------------------------------------------
  1 | # Frequently Answered Questions
  2 | 
  3 | ## Table of Contents
  4 | 
  5 |   * [Why did `git-filter-repo` rewrite commit hashes?](#why-did-git-filter-repo-rewrite-commit-hashes)
  6 |   * [Why did `git-filter-repo` rewrite more commit hashes than I expected?](#why-did-git-filter-repo-rewrite-more-commit-hashes-than-i-expected)
  7 |   * [Why did `git-filter-repo` rewrite other branches too?](#why-did-git-filter-repo-rewrite-other-branches-too)
  8 |   * [How should paths be specified?](#How-should-paths-be-specified)
  9 |   * [Help! Can I recover or undo the filtering?](#help-can-i-recover-or-undo-the-filtering)
 10 |   * [Can you change `git-filter-repo` to allow future folks to recover from `--force`'d rewrites?](#can-you-change-git-filter-repo-to-allow-future-folks-to-recover-from---forced-rewrites)
 11 |   * [Can I use `git-filter-repo` to fix a repository with corruption?](#Can-I-use-git-filter-repo-to-fix-a-repository-with-corruption)
 12 |   * [What kinds of problems does `git-filter-repo` not try to solve?](#What-kinds-of-problems-does-git-filter-repo-not-try-to-solve)
 13 |     * [Filtering history but magically keeping the same commit IDs](#Filtering-history-but-magically-keeping-the-same-commit-IDs)
 14 |     * [Bidirectional development between a filtered and unfiltered repository](#Bidirectional-development-between-a-filtered-and-unfiltered-repository)
 15 |     * [Removing specific commits, or filtering based on the difference (a.k.a. patch or change) between commits](#Removing-specific-commits-or-filtering-based-on-the-difference-aka-patch-or-change-between-commits)
 16 |     * [Filtering two different clones of the same repository and getting the same new commit IDs](#Filtering-two-different-clones-of-the-same-repository-and-getting-the-same-new-commit-IDs)
 17 | 
 18 | ## Why did `git-filter-repo` rewrite commit hashes?
 19 | 
 20 | This is fundamental to how Git operates.  In more detail...
 21 | 
 22 | Each commit in Git is a hash of its contents.  Those contents include
 23 | the commit message, the author (name, email, and time authored), the
 24 | committer (name, email and time committed), the toplevel tree hash,
 25 | and the parent(s) of the commit.  This means that if any of the commit
 26 | fields change, including the tree hash or the hash of the parent(s) of
 27 | the commit, then the hash for the commit will change.
 28 | 
 29 | (The same is true for files ("blobs") and trees stored in git as well;
 30 | each is a hash of its contents, so literally if anything changes, the
 31 | commit hash will change.)
 32 | 
 33 | If you attempt to write a commit (or tree or blob) object with an
 34 | incorrect hash, Git will reject it as corrupt.
 35 | 
 36 | ## Why did `git-filter-repo` rewrite more commit hashes than I expected?
 37 | 
 38 | There are two aspects to this, or two possible underlying questions users
 39 | might be asking here:
 40 |   * Why did commits newer than the ones I expected have their hash change?
 41 |   * Why did commits older than the ones I expected have their hash change?
 42 | 
 43 | For the first question, see [why filter-repo rewrites commit
 44 | hashes](#why-did-git-filter-repo-rewrite-commit-hashes), and note that
 45 | if you modify some old commit, perhaps to remove a file, then obviously
 46 | that commit's hash must change.  Further, since that commit will have a
 47 | new hash, any other commit with that commit as a parent will need to
 48 | have a new hash.  That will need to chain all the way to the most recent
 49 | commits in history.  This is fundamental to Git and there is nothing you
 50 | can do to change this.
 51 | 
 52 | For the second question, there are two causes: (1) the filter you
 53 | specified applies to the older commits too, or (2) git-fast-export and
 54 | git-fast-import (both of which git-filter-repo uses) canonicalize
 55 | history in various ways.  The second cause means that even if you have
 56 | no filter, these tools sometimes change commit hashes.  This can happen
 57 | in any of these cases:
 58 | 
 59 |   * If you have signed commits, the signatures will be stripped
 60 |   * If you have commits with extended headers, the extended headers will
 61 |     be stripped (signed commits are actually a special case of this)
 62 |   * If you have commits in an encoding other than UTF-8, they will by
 63 |     default be re-encoded into UTF-8
 64 |   * If you have a commit without an author, one will be added that
 65 |     matches the committer.
 66 |   * If you have trees that are not canonical (e.g. incorrect sorting
 67 |     order), they will be canonicalized
 68 | 
 69 | If this affects you and you really only want to rewrite newer commits in
 70 | history, you can use the `--refs` argument to git-filter-repo to specify
 71 | a range of history that you want rewritten.
 72 | 
 73 | (For those attempting to be clever and use `--refs` for the first
 74 | question: Note that if you attempt to only rewrite a few old commits,
 75 | then all you'll succeed in is adding new commits that won't be part of
 76 | any branch and will be subject to garbage collection.  The branches will
 77 | still hold on to the unrewritten versions of the commits.  Thus, you
 78 | have to rewrite all the way to the branch tip for the rewrite to be
 79 | meaningful.  Said another way, the `--refs` trick is only useful for
 80 | restricting the rewrite to newer commits, never for restricting the
 81 | rewrite to older commits.)
 82 | 
 83 | ## Why did `git-filter-repo` rewrite other branches too?
 84 | 
 85 | git-filter-repo's name is git-filter-**_repo_**.  Obviously it is going
 86 | to rewrite all branches by default.
 87 | 
 88 | `git-filter-repo` can restrict its rewriting to a subset of history,
 89 | such as a single branch, using the `--refs` option.  However, using that
 90 | comes with the risk that one branch now has a different version of some
 91 | commits than other branches do; usually, when you rewrite history, you
 92 | want all branches that depend on what you are rewriting to be updated.
 93 | 
 94 | ## How should paths be specified?
 95 | 
 96 | Arguments to `--path` should be paths as Git would report them, when run
 97 | from the toplevel of the git repository (explained more below after some
 98 | examples).
 99 | 
100 | **Good** path examples:
101 |   * `README.md`
102 |   * `Documentation/README.md`
103 |   * `src/modules/flux/capacitor.rs`
104 | 
105 | You can find examples of valid path names from your repository by
106 | running either `git diff --no-relative --name-only` or `git log
107 | --no-relative --name-only --format=""`.
108 | 
109 | The following are basic rules about paths the way that Git reports and uses
110 | them:
111 |   * do not use absolute paths
112 |   * always treats paths as relative to the toplevel of the repository
113 |     (do not add a leading slash, and do not specify paths relative to some
114 |      subdirectory of the repository even if that is your current working
115 |      directory)
116 |   * do not use the special directories `.` or `..` anywhere in your path
117 |   * do not use `\`,  the Windows path separator, between directories and
118 |     files; always use `/` regardless of platform.
119 | 
120 | **Erroneous** path examples (do **_NOT_** use any of these styles):
121 |  * `/absolute/path/to/src/modules/program.c`
122 |  * `/src/modules/program.c`
123 |  * `src/docs/../modules/main.java`
124 |  * `scripts/config/./update.sh`
125 |  * `./tests/fixtures/image.jpg`
126 |  * `../src/main.rs`
127 |  * `C:\absolute\path\to\src\modules\program.c`
128 |  * `src\modules\program.c`
129 | 
130 | ## Help! Can I recover or undo the filtering?
131 | 
132 | Sure, _if_ you followed the instructions.  The instructions told you to
133 | make a fresh clone before running git-filter-repo.  If you did that (and
134 | didn't force push your rewritten history back over the original), you
135 | can just throw away your clone with the flubbed rewrite, and make a new
136 | clone.
137 | 
138 | If you didn't make a fresh clone, and you didn't run with `--force`, you
139 | would have seen the following warning:
140 | ```
141 | Aborting: Refusing to destructively overwrite repo history since
142 | this does not look like a fresh clone.
143 | [...]
144 | Please operate on a fresh clone instead.  If you want to proceed
145 | anyway, use --force.
146 | ```
147 | If you then added `--force`, well, you were warned.
148 | 
149 | If you didn't make a fresh clone, and you started with `--force`, and you
150 | didn't think to read the description of the `--force` option:
151 | ```
152 | 	Ignore fresh clone checks and rewrite history (an irreversible
153 | 	operation, especially since it by default ends with an
154 | 	immediate pruning of reflogs and old objects).
155 | ```
156 | and you didn't read even the beginning of the manual
157 | ```
158 | git-filter-repo destructively rewrites history
159 | ```
160 | and you think it's okay to run a command with `--force` in it on
161 | something you don't have a backup of, then now is the time to reasses
162 | your life choices.  `--force` should be a pretty clear warning sign.
163 | (If someone on the internet suggested `--force`, you can complain at
164 | _them_, but either way you should learn to carefully vet commands
165 | suggested by others on the internet.  Sadly, even sites like Stack
166 | Overflow where someone really ought to be able to correct bad guidance
167 | still unfortunately has a fair amount of this bad advice.)
168 | 
169 | See also the next question.
170 | 
171 | ## Can you change `git-filter-repo` to allow future folks to recover from --force'd rewrites?
172 | 
173 | This will never be supported.
174 | 
175 | * Providing an alternate method to restore would require storing both
176 |   the original history and the new history, meaning that those who are
177 |   trying to shrink their repository size instead see it grow and have to
178 |   figure out extra steps to expunge the old history to see the actual
179 |   size savings.  Experience with other tools showed that this was
180 |   frustrating and difficult to figure out for many users.
181 | 
182 | * Providing an alternate method to restore would mean that users who are
183 |   trying to purge sensitive data from their repository still find the
184 |   sensitive data after the rewrite because it hasn't actually been
185 |   purged. In order to actually purge it, they have to take extra steps.
186 |   Same as with the last bullet point, experience has shown that extra
187 |   steps to purge the extra information is difficult and error-prone.
188 |   This extra difficulty is particularly problematic when you're trying
189 |   to expunge sensitive data.
190 | 
191 | * Providing an alternate method to restore would also mean trying to
192 |   figure out what should be backed up and how. The obvious choices used
193 |   by previous tools only actually provided partial backups (reflogs
194 |   would be ignored for example, as would uncommitted changes whether
195 |   staged or not). The more you try to carefully backup everything, the
196 |   more difficult the restoration from backup will be.  The only backup
197 |   mechanism I've found that seems reasonable, is making a separate
198 |   clone.  That's expensive to do automatically for the user (especially
199 |   if the filtering is done via multiple invocations of the tool).  Plus,
200 |   it's not clear where the clone should be stored, especially to avoid
201 |   the previous problems for size-reduction and sensitive-data-removal
202 |   folks.
203 | 
204 | * Providing an alternate method to restore would also mean providing
205 |   documentation on how to restore. Past methods by other tools in the
206 |   history rewriting space suggested that it was rather difficult for
207 |   users to figure out.  Difficult enough, in fact, that users simply
208 |   didn't ever use them.  They instead made a separate clone before
209 |   rewriting history and if they didn't like the rewrite, then they just
210 |   blew it away and made a new clone to work with.  Since that was
211 |   observed to be the easy restoration method, I simply enforced it with
212 |   this tool, requiring users who look like they might not be operating
213 |   on a fresh clone to use the --force flag.
214 | 
215 | But more than all that, if there were an alternate method to restore,
216 | why would you have needed to specify the --force flag? Doesn't its
217 | existence (and the wording of its documentation) make it pretty clear on
218 | its own that there isn't going to be a way to restore?
219 | 
220 | ## Can I use `git-filter-repo` to fix a repository with corruption?
221 | 
222 | Some kinds of corruption can be fixed, in conjunction with `git
223 | replace`.  If `git fsck` reports warnings/errors for certain objects,
224 | you can often [replace them and rewrite
225 | history](examples-from-user-filed-issues.md#Handling-repository-corruption).
226 | 
227 | ## What kinds of problems does `git-filter-repo` not try to solve?
228 | 
229 | This question is often asked in the form of "How do I..." or even
230 | written as a statement such as "I found a bug with `git-filter-repo`;
231 | the behavior I got was different than I expected..."  But if you're
232 | trying to do one of the things below, then `git-filter-repo` is behaving
233 | as designed and either there is no solution to your problem, or you need
234 | to use a different tool to solve your problem.  The following subsections
235 | address some of these common requests:
236 | 
237 | ### Filtering history but magically keeping the same commit IDs
238 | 
239 | This is impossible.  If you modify commits, or the files contained in
240 | them, then you change their commit IDs; this is [fundamental to
241 | Git](#why-did-git-filter-repo-rewrite-commit-hashes).
242 | 
243 | However, _if_ you don't need to modify commits, but just don't want to
244 | download everything, then look into one of the following:
245 |   * [partial clones](https://git-scm.com/docs/partial-clone)
246 |   * the ugly, retarded hack known as [shallow clones](https://git-scm.com/docs/shallow)
247 |   * a massive hack like [cheap fake
248 |     clones](https://github.com/newren/sequester-old-big-blobs) that at
249 |     least let you put your evil overlord laugh to use
250 | 
251 | ### Bidirectional development between a filtered and unfiltered repository
252 | 
253 | Some folks want to extract a subset of a repository, do development work
254 | on it, then bring those changes back to the original repository, and
255 | send further changes in both directions.  Such a tool can be written
256 | using fast-export and fast-import, but would need to make very different
257 | design decisions than `git-filter-repo` did.  Such a tool would be
258 | capable of supporting this kind of development, but lose the ability
259 | ["to write arbitrary filters using a scripting
260 | language"](https://josh-project.github.io/josh/#concept) and other
261 | features that `git-filter-repo` has.
262 | 
263 | Such a tool exists; it's called [Josh](https://github.com/josh-project/josh).
264 | Use it if this is your usecase.
265 | 
266 | ### Removing specific commits, or filtering based on the difference (a.k.a. patch or change) between commits
267 | 
268 | You are probably looking for `git rebase`.  `git rebase` operates on the
269 | difference between commits ("diff"), allowing you to e.g. drop or modify
270 | the diff, but then runs the risk of conflicts as it attempts to apply
271 | future diffs. If you tweak one diff in the middle, since it just applies
272 | more diffs for the remaining patches, you'll still see your changes at
273 | the end.
274 | 
275 | filter-repo, by contrast, uses fast-export and fast-import.  Those tools
276 | treat every commit not as a diff but as a "use the same versions of most
277 | files from the parent commit, but make these five files have these exact
278 | contents". Since you don't have either the diff or ready access to the
279 | version of files from the parent commit, that makes it hard to "undo"
280 | part of the changes to some file.  Further, if you attempt to drop an
281 | entire commit or tweak the contents of those new files in that commit,
282 | those changes will be reverted by the next commit in the stream that
283 | mentions that file because handling the next commit does not involve
284 | applying a diff but a "make this file have these exact contents". So,
285 | filter-repo works well for things like removing a file entirely, but if
286 | you want to make any tweaks to any files you have to make the exact same
287 | tweak over and over for every single commit that touches that file.
288 | 
289 | In short, `git rebase` is the tool you want for removing specific
290 | commits or otherwise operating on the diff between commits.
291 | 
292 | ### Filtering two different clones of the same repository and getting the same new commit IDs
293 | 
294 | Sometimes two co-workers have a clone of the same repository and they
295 | run the same `git-filter-repo` command, and they expect to get the same
296 | new commit IDs.  Often they do get the same new commit IDs, but
297 | sometimes they don't.
298 | 
299 | When people get the same commit IDs, it is only by luck; not by design.
300 | There are three reasons this is unsupported and will never be reliable:
301 | 
302 |   * Different Git versions used could cause differences in filtering
303 | 
304 |     Since `git fast-export` and `git fast-import` do various
305 |     canonicalizations of history, and these could change over time,
306 |     having different versions of Git installed can result in differences
307 |     in filtering.
308 | 
309 |   * Different git-filter-repo versions used could cause differences in
310 |     filtering
311 | 
312 |     Over time, `git-filter-repo` may include new filterings by default,
313 |     or fix existing filterings, or make any other number of changes.  As
314 |     such, having different versions of `git-filter-repo` installed can
315 |     result in differences in filtering.
316 | 
317 |   * Different amounts of the repository cloned or differences in
318 |     local-only commits can cause differences in filtering
319 | 
320 |     If the clones weren't made at the same time, one clone may have more
321 |     commits than the other.  Also, both may have made local commits the
322 |     other doesn't have.  These additional commits could cause history to
323 |     be traversed in a different order, and filtering rules are allowed
324 |     to have order-dependent rules for how they filter.  Further,
325 |     filtering rules are allowed to depend upon what history exists in
326 |     your clone.  As one example, filter-repo's default to update commit
327 |     messages which refer to other commits by abbreviated hash, may be
328 |     unable to find these other commits in your clone but find them in
329 |     your coworkers' clone.  Relatedly, filter-repo's update of
330 |     abbreviated hashes in commit messages only works for commits that
331 |     have already been filtered, and thus depends on the order in which
332 |     fast-export traverses the history.
333 | 
334 | `git-filter-repo` is designed as a _one_-shot history rewriting tool.
335 | Once you have filtered one clone of the repository, you should not be
336 | using it to filter other clones.  All other clones of the repository
337 | should either be discarded and recloned, or [have all their history
338 | rebased on top of the rewritten
339 | history](https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#_make_sure_other_copies_are_cleaned_up_clones_of_colleagues).
340 | 
341 | <!--
342 | ## How do I see what was removed?
343 | 
344 | Run `git rev-list --objects --all` in both a separate fresh clone from
345 | before the rewrite and in the repo where the rewrite was done.  Then
346 | find the objects that exist in the old but not the new.
347 | 
348 | -->
349 | 


--------------------------------------------------------------------------------
/Documentation/converting-from-bfg-repo-cleaner.md:
--------------------------------------------------------------------------------
  1 | # Cheat Sheet: Converting from BFG Repo Cleaner
  2 | 
  3 | This document is aimed at folks who are familiar with BFG Repo Cleaner
  4 | and want to learn how to convert over to using filter-repo.
  5 | 
  6 | ## Table of Contents
  7 | 
  8 |   * [Half-hearted conversions](#half-hearted-conversions)
  9 |   * [Intention of "equivalent" commands](#intention-of-equivalent-commands)
 10 |   * [Basic Differences](#basic-differences)
 11 |   * [Cheat Sheet: Conversion of Examples from BFG](#cheat-sheet-conversion-of-examples-from-bfg)
 12 | 
 13 | ## Half-hearted conversions
 14 | 
 15 | You can switch most any BFG command to use filter-repo under the
 16 | covers by just replacing the `java -jar bfg.jar` part of the command
 17 | with [`bfg-ish`](../contrib/filter-repo-demos/bfg-ish).
 18 | 
 19 | bfg-ish is a reasonable tool, and provides a number of bug fixes and
 20 | features on top of bfg, but most of my focus is naturally on
 21 | filter-repo which has a number of capabilities lacking in bfg-ish.
 22 | 
 23 | ## Intention of "equivalent" commands
 24 | 
 25 | BFG and filter-repo have a few differences, highlighted in the Basic
 26 | Differences section below, that make it hard to get commands that
 27 | behave identically.  Rather than focusing on matching BFG output as
 28 | exactly as possible, I treat the BFG examples as idiomatic ways to
 29 | solve a certain type of problem with BFG, and express how one would
 30 | idiomatically solve the same problem in filter-repo.  Sometimes that
 31 | means the results are not identical, but they are largely the same in
 32 | each case.
 33 | 
 34 | ## Basic Differences
 35 | 
 36 | BFG operates directly on tree objects, which have no notion of their
 37 | leading path.  Thus, it has no way of differentiating between
 38 | 'README.md' at the toplevel versus in some subdirectory.  You simply
 39 | operate on the basename of files and directories.  This precludes
 40 | doing things like renaming files and directories or other bigger
 41 | restructures.  By directly operating on trees, it also runs into
 42 | problems with loose vs. packed objects, loose vs. packed refs, not
 43 | understanding replace refs or grafts, and not understanding the index
 44 | and working tree as another data source.
 45 | 
 46 | With `git filter-repo`, you are essentially given an editing tool to
 47 | operate on the [fast-export](https://git-scm.com/docs/git-fast-export)
 48 | serialization of a repo, which operates on filenames including their
 49 | full paths from the toplevel of the repo.  Directories are not
 50 | separately specified, so any directory-related filtering is done by
 51 | checking the leading path of each file.  Further, you aren't limited
 52 | to the pre-defined filtering types, python callbacks which operate on
 53 | the data structures from the fast-export stream can be provided to do
 54 | just about anything you want.  By leveraging fast-export and
 55 | fast-import, filter-repo gains automatic handling of objects and refs
 56 | whether they are packed or not, automatic handling of replace refs and
 57 | grafts, and future features that may appear.  It also tries hard to
 58 | provide a full rewrite solution, so it takes care of additional
 59 | important concerns such as updating the index and working tree and
 60 | running an automatic gc for the user afterwards.
 61 | 
 62 | The "protection" and "privacy" defaults in BFG are something I
 63 | fundamentally disagreed with for a variety of reasons; see the
 64 | comments at the top of the
 65 | [bfg-ish](../contrib/filter-repo-demos/bfg-ish) script if you want
 66 | details.  The bfg-ish script implemented these protection and privacy
 67 | options since it was designed to act like BFG, but still flipped the
 68 | default to the opposite of what BFG chose.  I left the "protection"
 69 | and "non-private" features out of filter-repo entirely.  This means a
 70 | number of things with filter-repo:
 71 |   * any filters you specify will also be applied to HEAD, so that you
 72 |     don't have a weird disconnect from your history transformations
 73 |     only being applied to most commits
 74 |   * `[formerly OLDHASH]` references are not munged into commit
 75 |     messages; the replace refs that filter-repo adds are a much
 76 |     cleaner way of looking up commits by old commit hashes.
 77 |   * `Former-commit-id:` footers are not added to commit messages; the
 78 |     replace refs that filter-repo adds are a much cleaner way of
 79 |     looking up commits by old commit hashes.
 80 |   * History is not littered with `<filename>.REMOVED.git-id` files.
 81 | 
 82 | BFG expects you to specify the repository to rewrite as its final
 83 | argument, whereas filter-repo expects you to cd into the repo and then
 84 | run filter-repo.
 85 | 
 86 | ## Cheat Sheet: Conversion of Examples from BFG
 87 | 
 88 | ### Stripping big blobs
 89 | 
 90 | ```shell
 91 |   java -jar bfg.jar --strip-blobs-bigger-than 100M some-big-repo.git
 92 | ```
 93 | 
 94 | becomes
 95 | 
 96 | ```shell
 97 |   git filter-repo --strip-blobs-bigger-than 100M
 98 | ```
 99 | 
100 | ### Deleting files
101 | 
102 | ```shell
103 |   java -jar bfg.jar --delete-files id_{dsa,rsa}  my-repo.git
104 | ```
105 | 
106 | becomes
107 | 
108 | ```shell
109 |   git filter-repo --use-base-name --path id_dsa --path id_rsa --invert-paths
110 | ```
111 | 
112 | ### Removing sensitive content
113 | 
114 | ```shell
115 |   java -jar bfg.jar --replace-text passwords.txt my-repo.git
116 | ```
117 | 
118 | becomes
119 | 
120 | ```shell
121 |   git filter-repo --replace-text passwords.txt
122 | ```
123 | 
124 | The `--replace-text` was a really clever idea that the BFG came up
125 | with and I just implemented mostly as-is within filter-repo.  Sadly,
126 | BFG didn't document the format of files passed to --replace text very
127 | well, but I added more detail in the filter-repo documentation.
128 | 
129 | There is one small but important difference between the two tools: if
130 | you use both "regex:" and "==>" on a single line to specify a regex
131 | search and replace, then filter-repo will use "\1", "\2", "\3",
132 | etc. for replacement strings whereas BFG used "$1", "$2", "$3", etc.
133 | The reason for this difference is simply that python used backslashes
134 | in its regex format while scala used dollar signs, and both tools
135 | wanted to just pass along the strings unmodified to the underlying
136 | language.  (Since bfg-ish attempts to emulate the BFG, it accepts
137 | "$1", "$2" and so forth and translates them to "\1", "\2", etc. so
138 | that filter-repo/python will understand it.)
139 | 
140 | ### Removing files and folders with a certain name
141 | 
142 | ```shell
143 |   java -jar bfg.jar --delete-folders .git --delete-files .git --no-blob-protection  my-repo.git
144 | ```
145 | 
146 | becomes
147 | 
148 | ```shell
149 |   git filter-repo --invert-paths --path-glob '*/.git' --path .git
150 | ```
151 | 
152 | Yes, that glob will handle .git directories one or more directories
153 | deep; it's a git-style glob rather than a shell-style glob.  Also, the
154 | `--path .git` was added because `--path-glob '*/.git'` won't match a
155 | directory named .git in the toplevel directory since it has a '/'
156 | character in the glob expression (though I would hope the repository
157 | doesn't have a tracked .git toplevel directory in its history).
158 | 


--------------------------------------------------------------------------------
/Documentation/converting-from-filter-branch.md:
--------------------------------------------------------------------------------
  1 | # Cheat Sheet: Converting from filter-branch
  2 | 
  3 | This document is aimed at folks who are familiar with filter-branch and want
  4 | to learn how to convert over to using filter-repo.
  5 | 
  6 | ## Table of Contents
  7 | 
  8 |   * [Half-hearted conversions](#half-hearted-conversions)
  9 |   * [Intention of "equivalent" commands](#intention-of-equivalent-commands)
 10 |   * [Basic Differences](#basic-differences)
 11 |   * [Cheat Sheet: Conversion of Examples from the filter-branch manpage](#cheat-sheet-conversion-of-examples-from-the-filter-branch-manpage)
 12 |   * [Cheat Sheet: Additional conversion examples](#cheat-sheet-additional-conversion-examples)
 13 | 
 14 | ## Half-hearted conversions
 15 | 
 16 | You can switch nearly any `git filter-branch` command to use
 17 | filter-repo under the covers by just replacing the `git filter-branch`
 18 | part of the command with
 19 | [`filter-lamely`](../contrib/filter-repo-demos/filter-lamely).  The
 20 | git.git regression testsuite passes when I swap out the filter-branch
 21 | script with filter-lamely, for example.  (However, the filter-branch
 22 | tests are not very comprehensive, so don't rely on that too much.)
 23 | 
 24 | Doing a half-hearted conversion has nearly all of the drawbacks of
 25 | filter-branch and nearly none of the benefits of filter-repo, but it
 26 | will make your command run a few times faster and makes for a very
 27 | simple conversion.
 28 | 
 29 | You'll get a lot more performance, safety, and features by just
 30 | switching to direct filter-repo commands.
 31 | 
 32 | ## Intention of "equivalent" commands
 33 | 
 34 | filter-branch and filter-repo have different defaults, as highlighted
 35 | in the Basic Differences section below.  As such, getting a command
 36 | which behaves identically is not possible.  Also, sometimes the
 37 | filter-branch manpage lies, e.g. it says "suppose you want to...from
 38 | all commits" and then uses a command line like "git filter-branch
 39 | ... HEAD", which only operates on commits in the current branch rather
 40 | than on all commits.
 41 | 
 42 | Rather than focusing on matching filter-branch output as exactly as
 43 | possible, I treat the filter-branch examples as idiomatic ways to
 44 | solve a certain type of problem with filter-branch, and express how
 45 | one would idiomatically solve the same problem in filter-repo.
 46 | Sometimes that means the results are not identical, but they are
 47 | largely the same in each case.
 48 | 
 49 | ## Basic Differences
 50 | 
 51 | With `git filter-branch`, you have a git repository where every single
 52 | commit (within the branches or revisions you specify) is checked out
 53 | and then you run one or more shell commands to transform the working
 54 | copy into your desired end state.
 55 | 
 56 | With `git filter-repo`, you are essentially given an editing tool to
 57 | operate on the [fast-export](https://git-scm.com/docs/git-fast-export)
 58 | serialization of a repo.  That means there is an input stream of all
 59 | the contents of the repository, and rather than specifying filters in
 60 | the form of commands to run, you usually employ a number of common
 61 | pre-defined filters that provide various ways to slice, dice, or
 62 | modify the repo based on its components (such as pathnames, file
 63 | content, user names or emails, etc.)  That makes common operations
 64 | easier, even if it's not as versatile as shell callbacks.  For cases
 65 | where more complexity or special casing is needed, filter-repo
 66 | provides python callbacks that can operate on the data structures
 67 | populated from the fast-export stream to do just about anything you
 68 | want.
 69 | 
 70 | filter-branch defaults to working on a subset of the repository, and
 71 | requires you to specify a branch or branches, meaning you need to
 72 | specify `-- --all` to modify all commits.  filter-repo by contrast
 73 | defaults to rewriting everything, and you need to specify `--refs
 74 | <rev-list-args>` if you want to limit to just a certain set of
 75 | branches or range of commits.  (Though any `<rev-list-args>` that
 76 | begin with a hyphen are not accepted by filter-repo as they look like
 77 | the start of different options.)
 78 | 
 79 | filter-repo also takes care of additional concerns automatically, like
 80 | rewriting commit messages that reference old commit IDs to instead
 81 | reference the rewritten commit IDs, pruning commits which do not start
 82 | empty but become empty due to the specified filters, and automatically
 83 | shrinking and gc'ing the repo at the end of the filtering operation.
 84 | 
 85 | ## Cheat Sheet: Conversion of Examples from the filter-branch manpage
 86 | 
 87 | ### Removing a file
 88 | 
 89 | The filter-branch manual provided three different examples of removing
 90 | a single file, based on different levels of ease vs. carefulness and
 91 | performance:
 92 | 
 93 | ```shell
 94 |   git filter-branch --tree-filter 'rm filename' HEAD
 95 | ```
 96 | ```shell
 97 |   git filter-branch --tree-filter 'rm -f filename' HEAD
 98 | ```
 99 | ```shell
100 |   git filter-branch --index-filter 'git rm --cached --ignore-unmatch filename' HEAD
101 | ```
102 | 
103 | All of these just become
104 | 
105 | ```shell
106 |   git filter-repo --invert-paths --path filename
107 | ```
108 | 
109 | ### Extracting a subdirectory
110 | 
111 | Extracting a subdirectory via
112 | 
113 | ```shell
114 |   git filter-branch --subdirectory-filter foodir -- --all
115 | ```
116 | 
117 | is one of the easiest commands to convert; it just becomes
118 | 
119 | ```shell
120 |   git filter-repo --subdirectory-filter foodir
121 | ```
122 | 
123 | ### Moving the whole tree into a subdirectory
124 | 
125 | Keeping all files but placing them in a new subdirectory via
126 | 
127 | ```shell
128 |   git filter-branch --index-filter \
129 |       'git ls-files -s | sed "s-\t\"*-&newsubdir/-" |
130 |               GIT_INDEX_FILE=$GIT_INDEX_FILE.new \
131 |                       git update-index --index-info &&
132 |        mv "$GIT_INDEX_FILE.new" "$GIT_INDEX_FILE"' HEAD
133 | ```
134 | 
135 | (which happens to be GNU-specific and will fail with BSD userland in
136 | very subtle ways) becomes
137 | 
138 | ```shell
139 |   git filter-repo --to-subdirectory-filter newsubdir
140 | ```
141 | 
142 | (which works fine regardless of GNU vs BSD userland differences.)
143 | 
144 | ### Re-grafting history
145 | 
146 | The filter-branch manual provided one example with three different
147 | commands that could be used to achieve it, though the first of them
148 | had limited applicability (only when the repo had a single initial
149 | commit).  These three examples were:
150 | ```shell
151 |   git filter-branch --parent-filter 'sed "s/^\$/-p <graft-id>/"' HEAD
152 | ```
153 | ```shell
154 |   git filter-branch --parent-filter \
155 |       'test $GIT_COMMIT = <commit-id> && echo "-p <graft-id>" || cat' HEAD
156 | ```
157 | ```shell
158 |   git replace --graft $commit-id $graft-id
159 |   git filter-branch $graft-id..HEAD
160 | ```
161 | 
162 | git-replace did not exist when the original two examples were written,
163 | but it is clear that the last example is far easier to understand.  As
164 | such, filter-repo just uses the same mechanism:
165 | 
166 | ```shell
167 |   git replace --graft $commit-id $graft-id
168 |   git filter-repo --proceed
169 | ```
170 | 
171 | NOTE: --proceed is needed here because filter-repo errors out if no
172 | arguments are specified (doing so is usually an error).
173 | 
174 | ### Removing commits by a certain author
175 | 
176 | WARNING: This is a BAD example for BOTH filter-branch and filter-repo.
177 | It does not remove the changes the user made from the repo, it just
178 | removes the commit in question while smashing the changes from it into
179 | any subsequent commits as though the subsequent authors had been
180 | responsible for those changes as well.  `git rebase` is likely to be a
181 | better fit for what you really want if you are looking at this
182 | example.  (See also [this explanation of the differences between
183 | rebase and
184 | filter-repo](https://github.com/newren/git-filter-repo/issues/62#issuecomment-597725502))
185 | 
186 | This filter-branch example
187 | 
188 | ```shell
189 |   git filter-branch --commit-filter '
190 |       if [ "$GIT_AUTHOR_NAME" = "Darl McBribe" ];
191 |       then
192 |           skip_commit "$@";
193 |       else
194 |           git commit-tree "$@";
195 |       fi' HEAD
196 | ```
197 | 
198 | becomes
199 | 
200 | ```shell
201 |   git filter-repo --commit-callback '
202 |       if commit.author_name == b"Darl McBribe":
203 |           commit.skip()
204 |       '
205 | ```
206 | 
207 | ### Rewriting commit messages -- removing text
208 | 
209 | Removing git-svn-id: lines from commit messages via
210 | 
211 | ```shell
212 |   git filter-branch --msg-filter '
213 |       sed -e "/^git-svn-id:/d"
214 |       '
215 | ```
216 | 
217 | becomes
218 | 
219 | ```shell
220 |   git filter-repo --message-callback '
221 |       return re.sub(b"^git-svn-id:.*\n", b"", message, flags=re.MULTILINE)
222 |       '
223 | ```
224 | 
225 | ### Rewriting commit messages -- adding text
226 | 
227 | Adding Acked-by lines to the last ten commits via
228 | 
229 | ```shell
230 |   git filter-branch --msg-filter '
231 |           cat &&
232 |           echo "Acked-by: Bugs Bunny <bunny@bugzilla.org>"
233 |       ' master~10..master
234 | ```
235 | 
236 | becomes
237 | 
238 | ```shell
239 |   git filter-repo --message-callback '
240 |           return message + b"Acked-by: Bugs Bunny <bunny@bugzilla.org>\n"
241 |       ' --refs master~10..master
242 | ```
243 | 
244 | ### Changing author/committer(/tagger?) information
245 | 
246 | ```shell
247 |   git filter-branch --env-filter '
248 |       if test "$GIT_AUTHOR_EMAIL" = "root@localhost"
249 |       then
250 |               GIT_AUTHOR_EMAIL=john@example.com
251 |       fi
252 |       if test "$GIT_COMMITTER_EMAIL" = "root@localhost"
253 |       then
254 |               GIT_COMMITTER_EMAIL=john@example.com
255 |       fi
256 |       ' -- --all
257 | ```
258 | 
259 | becomes either
260 | 
261 | ```shell
262 |   # Ensure '<john@example.com> <root@localhost>' is a line in .mailmap, then:
263 |   git filter-repo --use-mailmap
264 | ```
265 | 
266 | or
267 | 
268 | ```shell
269 |   git filter-repo --email-callback '
270 |     return email if email != b"root@localhost" else b"john@example.com"
271 |     '
272 | ```
273 | 
274 | (and as a bonus both filter-repo alternatives will fix tagger emails
275 | too, unlike the filter-branch example)
276 | 
277 | 
278 | ### Restricting to a range
279 | 
280 | The partial examples
281 | 
282 | ```shell
283 |   git filter-branch ... C..H
284 | ```
285 | ```shell
286 |   git filter-branch ... C..H ^D
287 | ```
288 | ```shell
289 |   git filter-branch ... D..H ^C
290 | ```
291 | 
292 | become
293 | 
294 | ```shell
295 |   git filter-repo ... --refs C..H
296 | ```
297 | ```shell
298 |   git filter-repo ... --refs C..H ^D
299 | ```
300 | ```shell
301 |   git filter-repo ... --refs D..H ^C
302 | ```
303 | 
304 | Note that filter-branch accepts `--not` among the revision specifiers,
305 | but that appears to python to be a flag name which breaks parsing.
306 | So, instead of e.g. `--not C` as we might use with filter-branch, we
307 | can specify `^C` to filter-repo.
308 | 
309 | ## Cheat Sheet: Additional conversion examples
310 | 
311 | ### Running a code formatter or linter on each file with some extension
312 | 
313 | Running some program on a subset of files is relatively natural in
314 | filter-branch:
315 | 
316 | ```shell
317 |   git filter-branch --tree-filter '
318 |       git ls-files -z "*.c" \
319 |           | xargs -0 -n 1 clang-format -style=file -i
320 |       '
321 | ```
322 | 
323 | though it has the disadvantage of running on every c file for every
324 | commit in history, even if some commits do not modify any c files.  This
325 | means this kind of command can be excruciatingly slow.
326 | 
327 | The same functionality is slightly more involved in filter-repo for
328 | two reasons:
329 |   - fast-export and fast-import split file contents and file names into
330 |     completely different data structures that aren't normally available
331 |     together
332 |   - to run a program on a file, you'll need to write the contents to the
333 |     a file, execute the program on that file, and then read the contents
334 |     of the file back in
335 | 
336 | ```shell
337 |   git filter-repo --file-info-callback '
338 |     if not filename.endswith(b".c"):
339 |       return (filename, mode, blob_id)  # no changes
340 | 
341 |     contents = value.get_contents_by_identifier(blob_id)
342 |     tmpfile = os.path.basename(filename)
343 |     with open(tmpfile, "wb") as f:
344 |       f.write(contents)
345 |     subprocess.check_call(["clang-format", "-style=file", "-i", filename])
346 |     with open(filename, "rb") as f:
347 |       contents = f.read()
348 |     new_blob_id = value.insert_file_with_contents(contents)
349 | 
350 |     return (filename, mode, new_blob_id)
351 |     '
352 | ```
353 | 
354 | However, one can write a script that uses filter-repo as a library to
355 | simplify this, while also gaining filter-repo's automatic handling of
356 | other concerns like rewriting commit IDs in commit messages or pruning
357 | commits that become empty.  In fact, one of the [contrib
358 | demos](../contrib/filter-repo-demos),
359 | [lint-history](../contrib/filter-repo-demos/lint-history), was
360 | specifically written to make this kind of case really easy:
361 | 
362 | ```shell
363 |   lint-history --relevant 'return filename.endswith(b".c")' \
364 |       clang-format -style=file -i
365 | ```
366 | 


--------------------------------------------------------------------------------
/Documentation/examples-from-user-filed-issues.md:
--------------------------------------------------------------------------------
  1 | # Examples from user-filed issues
  2 | 
  3 | Lots of people have filed issues against git-filter-repo, and many times their
  4 | issue boils down into questions of "How do I?" or "Why doesn't this work?"
  5 | 
  6 | Below are a collection of example repository filterings in answer to their
  7 | questions, which may be of interest to others.
  8 | 
  9 | ## Table of Contents
 10 | 
 11 |   * [Adding files to root commits](#adding-files-to-root-commits)
 12 |   * [Purge a large list of files](#purge-a-large-list-of-files)
 13 |   * [Extracting a libary from a repo](#Extracting-a-libary-from-a-repo)
 14 |   * [Replace words in all commit messages](#Replace-words-in-all-commit-messages)
 15 |   * [Only keep files from two branches](#Only-keep-files-from-two-branches)
 16 |   * [Renormalize end-of-line characters and add a .gitattributes](#Renormalize-end-of-line-characters-and-add-a-gitattributes)
 17 |   * [Remove spaces at the end of lines](#Remove-spaces-at-the-end-of-lines)
 18 |   * [Having both exclude and include rules for filenames](#Having-both-exclude-and-include-rules-for-filenames)
 19 |   * [Removing paths with a certain extension](#Removing-paths-with-a-certain-extension)
 20 |   * [Removing a directory](#Removing-a-directory)
 21 |   * [Convert from NFD filenames to NFC](#Convert-from-NFD-filenames-to-NFC)
 22 |   * [Set the committer of the last few commits to myself](#Set-the-committer-of-the-last-few-commits-to-myself)
 23 |   * [Handling special characters, e.g. accents in names](#Handling-special-characters-eg-accents-in-names)
 24 |   * [Handling repository corruption](#Handling-repository-corruption)
 25 |   * [Removing all files with a backslash in them](#Removing-all-files-with-a-backslash-in-them)
 26 |   * [Replace a binary blob in history](#Replace-a-binary-blob-in-history)
 27 |   * [Remove commits older than N days](#Remove-commits-older-than-N-days)
 28 |   * [Replacing pngs with compressed alternative](#Replacing-pngs-with-compressed-alternative)
 29 |   * [Updating submodule hashes](#Updating-submodule-hashes)
 30 |   * [Using multi-line strings in callbacks](#Using-multi-line-strings-in-callbacks)
 31 | 
 32 | 
 33 | ## Adding files to root commits
 34 | 
 35 | <!-- https://github.com/newren/git-filter-repo/issues/21 -->
 36 | 
 37 | Here's an example that will take `/path/to/existing/README.md` and
 38 | store it as `README.md` in the repository, and take
 39 | `/home/myusers/mymodule.gitignore` and store it as `src/.gitignore` in
 40 | the repository:
 41 | 
 42 | ```
 43 | git filter-repo --commit-callback "if not commit.parents: commit.file_changes += [
 44 |     FileChange(b'M', b'README.md', b'$(git hash-object -w '/path/to/existing/README.md')', b'100644'), 
 45 |     FileChange(b'M', b'src/.gitignore', b'$(git hash-object -w '/home/myusers/mymodule.gitignore')', b'100644')]"
 46 | ```
 47 | 
 48 | Alternatively, you could also use the [insert-beginning](../contrib/filter-repo-demos/insert-beginning) contrib script:
 49 | 
 50 | ```
 51 | mv /path/to/existing/README.md README.md
 52 | mv /home/myusers/mymodule.gitignore src/.gitignore
 53 | insert-beginning --file README.md
 54 | insert-beginning --file src/.gitignore
 55 | ```
 56 | 
 57 | ## Purge a large list of files
 58 | 
 59 | <!-- https://github.com/newren/git-filter-repo/issues/63 -->
 60 | 
 61 | Stick all the files in some file (one per line),
 62 | e.g. `../DELETED_FILENAMES.txt`, and then run
 63 | 
 64 | ```
 65 | git filter-repo --invert-paths --paths-from-file ../DELETED_FILENAMES.txt
 66 | ```
 67 | 
 68 | ## Extracting a libary from a repo
 69 | 
 70 | <!-- https://github.com/newren/git-filter-repo/issues/80 -->
 71 | 
 72 | If you want to pick out some subdirectory to keep
 73 | (e.g. `src/some-filder/some-feature/`), but don't want it moved to the
 74 | repository root (so that --subdirectory-filter isn't applicable) but
 75 | instead want it to become some other higher level directory
 76 | (e.g. `src/`):
 77 | 
 78 | ```
 79 | git filter-repo \
 80 |     --path src/some-folder/some-feature/ \
 81 |     --path-rename src/some-folder/some-feature/:src/
 82 | ```
 83 | 
 84 | ## Replace words in all commit messages
 85 | 
 86 | <!-- https://github.com/newren/git-filter-repo/issues/83 -->
 87 | 
 88 | Replace "stuff" in any commit message with "task".
 89 | 
 90 | ```
 91 | git filter-repo --message-callback 'return message.replace(b"stuff", b"task")'
 92 | ```
 93 | 
 94 | ## Only keep files from two branches
 95 | 
 96 | <!-- https://github.com/newren/git-filter-repo/issues/91 -->
 97 | 
 98 | Let's say you know that the files currently present on two branches
 99 | are the only files that matter.  Files that used to exist in either of
100 | these branches, or files that only exist on some other branch, should
101 | all be deleted from all versions of history.  This can be accomplished
102 | by getting a list of files from each branch, combining them, sorting
103 | the list and picking out just the unique entries, then passing the
104 | result to `--paths-from-file`:
105 | 
106 | ```
107 | git ls-tree -r ${BRANCH1} >../my-files
108 | git ls-tree -r ${BRANCH2} >>../my-files
109 | sort ../my-files | uniq >../my-relevant-files
110 | git filter-repo --paths-from-file ../my-relevant-files
111 | ```
112 | 
113 | ## Renormalize end-of-line characters and add a .gitattributes
114 | 
115 | <!-- https://github.com/newren/git-filter-repo/issues/122 -->
116 | 
117 | ```
118 | contrib/filter-repo-demos/lint-history dos2unix
119 | [edit .gitattributes]
120 | contrib/filter-repo-demos/insert-beginning .gitattributes
121 | ```
122 | 
123 | ## Remove spaces at the end of lines
124 | 
125 | <!-- https://github.com/newren/git-filter-repo/issues/145 -->
126 | 
127 | Removing all spaces at the end of lines of non-binary files, including
128 | converting CRLF to LF:
129 | 
130 | ```
131 | git filter-repo --replace-text <(echo 'regex:[\r\t ]+(\n|$)==>\n')
132 | ```
133 | 
134 | ## Having both exclude and include rules for filenames
135 | 
136 | <!-- https://github.com/newren/git-filter-repo/issues/230 -->
137 | 
138 | If you want to have rules to both include and exclude filenames, you
139 | can simply invoke `git filter-repo` multiple times.  Alternatively,
140 | you can do it in one run if you dispense with `--path` arguments and
141 | instead use the more generic `--filename-callback`.  For example to
142 | include all files under `src/` except for `src/README.md`:
143 | 
144 | ```
145 | git filter-repo --filename-callback '
146 |     if filename == b"src/README.md":
147 |         return None
148 |     if filename.startswith(b"src/"):
149 |         return filename
150 |   return None'
151 | ```
152 | 
153 | ## Removing paths with a certain extension
154 | 
155 | <!-- https://github.com/newren/git-filter-repo/issues/274 -->
156 | 
157 | ```
158 | git filter-repo --invert-paths --path-glob '*.xsa'
159 | ```
160 | 
161 | or
162 | 
163 | ```
164 | git filter-repo --filename-callback '
165 |     if filename.endswith(b".xsa"):
166 |         return None
167 |     return filename'
168 | ```
169 | 
170 | ## Removing a directory
171 | 
172 | <!-- https://github.com/newren/git-filter-repo/issues/278 -->
173 | 
174 | ```
175 | git filter-repo --path node_modules/electron/dist/ --invert-paths
176 | ```
177 | 
178 | ## Convert from NFD filenames to NFC
179 | 
180 | <!-- https://github.com/newren/git-filter-repo/issues/296 -->
181 | 
182 | Given that Mac does utf-8 normalization of filenames, and has
183 | historically switched which kind of normalization it does, users may
184 | have committed files with alternative normalizations to their
185 | repository.  If someone wants to convert filenames in NFD form to NFC,
186 | they could run
187 | 
188 | ```
189 | git filter-repo --filename-callback '
190 |     try: 
191 |         return subprocess.check_output("iconv -f utf-8-mac -t utf-8".split(),
192 |                                        input=filename)
193 |     except:
194 |         return filename
195 | '
196 | ```
197 | 
198 | or instead of relying on the system iconv utility and spawning separate
199 | processes, doing it within python:
200 | 
201 | ```
202 | git filter-repo --filename-callback '
203 |     import unicodedata
204 |     try:
205 |        return bytearray(unicodedata.normalize('NFC', filename.decode('utf-8')), 'utf-8')
206 |     except:
207 |       return filename
208 | '
209 | ```
210 |   
211 | ## Set the committer of the last few commits to myself
212 | 
213 | <!-- https://github.com/newren/git-filter-repo/issues/379 -->
214 | 
215 | ```
216 | git filter-repo --refs main~5..main --commit-callback '
217 |     commit.commiter_name = b"My Wonderful Self"
218 |     commit.committer_email = b"my@self.org"
219 | '
220 | ```
221 | 
222 | ## Handling special characters, e.g. accents and umlauts in names
223 | 
224 | <!-- https://github.com/newren/git-filter-repo/issues/383 -->
225 | 
226 | Since characters like ë and á are multi-byte characters and python
227 | won't allow you to directly place those in a bytestring
228 | (e.g. `b"Raphaël González"` would result in a `SyntaxError: bytes can
229 | only contain ASCII literal characters` error from Python), you just
230 | need to make a normal (UTF-8) string and then convert to a bytestring
231 | to handle these.  For example, changing the author name and email
232 | where the author email is currently `example@test.com`:
233 | 
234 | ```
235 | git filter-repo --refs main~5..main --commit-callback '
236 |     if commit.author_email = b"example@test.com":
237 |         commit.author_name = "Raphaël González".encode()
238 |         commit.author_email = b"rgonzalez@test.com"
239 | '
240 | ```
241 | 
242 | ## Handling repository corruption
243 | 
244 | <!-- https://github.com/newren/git-filter-repo/issues/420 -->
245 | 
246 | First, run fsck to get a list of the corrupt objects, e.g.:
247 | ```
248 | $ git fsck --full
249 | error in commit 166f57b3fbe31257100361ecaf735f305b533b21: missingSpaceBeforeDate: invalid author/committer line - missing space before date
250 | error in tree c15680eae81cc8539af7e7de766a8a7c13bd27df: duplicateEntries: contains duplicate file entries
251 | Checking object directories: 100% (256/256), done.
252 | ```
253 | 
254 | Odds are you'll only see one type of corruption, but if you see
255 | multiple, you can either do multiple filterings, or create replacement
256 | objects for all the corrupt objects (both commits and trees), and then
257 | do the filtering.  Since the method for handling corrupt commits and
258 | corrupt tress is slightly different, I'll give examples below for each.
259 | 
260 | ### Handling repository corruption -- commit objects
261 | 
262 | Print out the corrupt object literally to a temporary file:
263 | ```
264 | $ git cat-file -p 166f57b3fbe31257100361ecaf735f305b533b21 >tmp
265 | ```
266 | 
267 | Taking a look at the file would show, for example:
268 | ```
269 | $ cat tmp
270 | tree e1d871155fce791680ec899fe7869067f2b4ffd2
271 | author My Name <my@email.com>1673287380 -0800
272 | committer My Name <my@email.com> 1673287380 -0800
273 | 
274 | Initial
275 | ```
276 | 
277 | Edit that file to fix the error (in this case, the missing space
278 | between author email and author date).  In this case, it would look
279 | like this after editing:
280 | 
281 | ```
282 | tree e1d871155fce791680ec899fe7869067f2b4ffd2
283 | author My Name <my@email.com> 1673287380 -0800
284 | committer My Name <my@email.com> 1673287380 -0800
285 | 
286 | Initial
287 | ```
288 | 
289 | Save the updated file, then use `git replace` to make a replace reference
290 | for it.
291 | ```
292 | $ git replace -f 166f57b3fbe31257100361ecaf735f305b533b21 $(git hash-object -t commit -w tmp)
293 | ```
294 | 
295 | Then remove the temporary file `tmp` and run `filter-repo` to consume
296 | the replace reference and make it permanent:
297 | 
298 | ```
299 | $ rm tmp
300 | $ git filter-repo --proceed
301 | ```
302 | 
303 | Note that if you have multiple corrupt objects, you need to create
304 | replacements for all of them, and then run filter-repo.  Leaving any
305 | corrupt object without a replacement is likely to cause the filter-repo run
306 | to fail.
307 | 
308 | ### Handling repository corruption -- tree objects
309 | 
310 | <!-- GitHub customer example -->
311 | 
312 | Print out the corrupt object literally to a temporary file:
313 | ```
314 | $ git cat-file -p c15680eae81cc8539af7e7de766a8a7c13bd27df >tmp
315 | ```
316 | 
317 | Taking a look at the file would show, for example:
318 | ```
319 | $ cat tmp
320 | 100644 blob cd5ded43e86f80bfd384702e3f4cc7ce42de49f9	.gitignore
321 | 100644 blob 226febfcc91ec2c166a5a06834fb47c3553ec469	README.md
322 | 100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391	src
323 | 040000 tree df2b8fc99e1c1d4dbc0a854d9f72157f1d6ea078	src
324 | 040000 tree 99d732476808176bb9d73bcbfe2505e43d65cb4f	t
325 | ```
326 | 
327 | Edit that file to fix the error (in this case, removing either the `src`
328 | file (blob) or the `src` directory (tree)).  In this case, it might look
329 | like this after editing:
330 | 
331 | ```
332 | 100644 blob cd5ded43e86f80bfd384702e3f4cc7ce42de49f9	.gitignore
333 | 100644 blob 226febfcc91ec2c166a5a06834fb47c3553ec469	README.md
334 | 040000 tree df2b8fc99e1c1d4dbc0a854d9f72157f1d6ea078	src
335 | 040000 tree 99d732476808176bb9d73bcbfe2505e43d65cb4f	t
336 | ```
337 | 
338 | Save the updated file, then use `git mktree` to turn it into an actual
339 | tree object:
340 | ```
341 | $ git mktree <tmp
342 | ace04f50a5d13b43e94c12802d3d8a6c66a35b1d
343 | ```
344 | 
345 | Now use the output of that command to create a replacement object for
346 | the original corrupt object:
347 | ```
348 | git replace -f c15680eae81cc8539af7e7de766a8a7c13bd27df ace04f50a5d13b43e94c12802d3d8a6c66a35b1d
349 | ```
350 | 
351 | Then remove the temporary file `tmp` and run `filter-repo` to consume
352 | the replace reference and make it permanent:
353 | 
354 | ```
355 | $ rm tmp
356 | $ git filter-repo --proceed
357 | ```
358 | 
359 | As mentioned with corrupt commit objects, if you have multiple corrupt
360 | objects, as long as you create all the replacements for those objects
361 | first, you only need to run filter-repo once.
362 | 
363 | 
364 | ## Removing all files with a backslash in them
365 | 
366 | <!-- https://github.com/newren/git-filter-repo/issues/427 -->
367 | 
368 | ```
369 | git filter-repo --filename-callback 'return None if b'\\' in filename else filename'
370 | ```
371 | 
372 | ## Replace a binary blob in history
373 | 
374 | <!-- https://github.com/newren/git-filter-repo/issues/436 -->
375 | 
376 | Let's say you committed a binary blob, perhaps an image file, with
377 | sensitive data, and never modified it.  You want to replace it with
378 | the contents of some alternate file, currently found at
379 | `../alternative-file.jpg` (it can have a different filename than what
380 | is stored in the repository).  Let's also say the hash of the old file
381 | was `f4ede2e944868b9a08401dafeb2b944c7166fd0a`.  You can replace it
382 | with either
383 | 
384 | ```
385 | git filter-repo --blob-callback '
386 |     if blob.original_id == b"f4ede2e944868b9a08401dafeb2b944c7166fd0a":
387 |         blob.data = open("../alternative-file.jpg", "rb").read()
388 | '
389 | ```
390 | 
391 | or
392 | 
393 | ```
394 | git replace -f f4ede2e944868b9a08401dafeb2b944c7166fd0a $(git hash-object -w ../alternative-file.jpg)
395 | git filter-repo --proceed
396 | ```
397 | 
398 | ## Remove commits older than N days
399 | 
400 | <!-- https://github.com/newren/git-filter-repo/issues/300 -->
401 | 
402 | This is such a bad usecase.  I'm tempted to leave it out, but it has
403 | come up multiple times, and there are people who are totally fine with
404 | changing every commit hash in their repository and throwing away
405 | history periodically.  First, identify an ${OLD_COMMIT} that you want
406 | to be a new root commit, then run:
407 | 
408 | ```
409 | git replace --graft ${OLD_COMMIT}
410 | git filter-repo --proceed
411 | ```
412 | 
413 | (The trick here is that `git replace --graft` takes a commit to replace, and
414 | a list of new parents for the commit.  Since ${OLD_COMMIT} is the final
415 | positional argument, it means the list of new parents is an empty list, i.e.
416 | we are turning it into a new root commit.)
417 | 
418 | ## Replacing pngs with compressed alternative
419 | 
420 | <!-- https://github.com/newren/git-filter-repo/issues/492 -->
421 | 
422 | Let's say you committed thousands of pngs that were poorly compressed,
423 | but later aggressively recompressed the pngs and commited and pushed.
424 | Unfortunately, clones are slow because they still contain the poorly
425 | compressed pngs and you'd like to rewrite history to pretend that the
426 | aggressively compressed versions were used when the files were first
427 | introduced.
428 | 
429 | First, take a look at the commit that aggressively recompressed the pngs:
430 | 
431 | ```
432 | git log -1 --raw --no-abbrev ${COMMIT_WHERE_YOU_COMPRESSED_PNGS}
433 | ```
434 | 
435 | that will show output like
436 | ```
437 | :100755 100755 edf570fde099c0705432a389b96cb86489beda09 9cce52ae0806d695956dcf662cd74b497eaa7b12 M      resources/foo.png
438 | :100755 100755 644f7c55e1a88a29779dc86b9ff92f512bf9bc11 88b02e9e45c0a62db2f1751b6c065b0c2e538820 M      resources/bar.png
439 | ```
440 | 
441 | Use that to make a --file-info-callback to fix up the original versions:
442 | ```
443 | git filter-repo --file-info-callback '
444 |     if filename == b"resources/foo.png" and blob_id == b"edf570fde099c0705432a389b96cb86489beda09":
445 |         blob_id = b"9cce52ae0806d695956dcf662cd74b497eaa7b12"
446 |     if filename == b"resources/bar.png" and blob_id == b"644f7c55e1a88a29779dc86b9ff92f512bf9bc11":
447 |         blob_id = b"88b02e9e45c0a62db2f1751b6c065b0c2e538820"
448 |     return (filename, mode, blob_id)
449 | '
450 | ```
451 | 
452 | ## Updating submodule hashes
453 | 
454 | <!-- https://github.com/newren/git-filter-repo/issues/537 -->
455 | 
456 | Let's say you have a repo with a submodule at src/my-submodule, and
457 | that you feel the wrong commit-hashes of the submodule were commited
458 | within your project and you want them updated according to the
459 | following table:
460 | ```
461 | old                                      new
462 | edf570fde099c0705432a389b96cb86489beda09 9cce52ae0806d695956dcf662cd74b497eaa7b12
463 | 644f7c55e1a88a29779dc86b9ff92f512bf9bc11 88b02e9e45c0a62db2f1751b6c065b0c2e538820
464 | ```
465 | 
466 | You could do this as follows:
467 | ```
468 | git filter-repo --file-info-callback '
469 |     if filename == b"src/my-submodule" and blob_id == b"edf570fde099c0705432a389b96cb86489beda09":
470 |         blob_id = b"9cce52ae0806d695956dcf662cd74b497eaa7b12"
471 |     if filename == b"src/my-submodule" and blob_id == b"644f7c55e1a88a29779dc86b9ff92f512bf9bc11":
472 |         blob_id = b"88b02e9e45c0a62db2f1751b6c065b0c2e538820"
473 |     return (filename, mode, blob_id)
474 | ```
475 | 
476 | Yes, `blob_id` is kind of a misnomer here since the file's hash
477 | actually refers to a commit from the sub-project.  But `blob_id` is
478 | the name of the parameter passed to the --file-info-callback, so that
479 | is what must be used.
480 | 
481 | ## Using multi-line strings in callbacks
482 | 
483 | <!-- https://lore.kernel.org/git/CABPp-BFqbiS8xsbLouNB41QTc5p0hEOy-EoV0Sjnp=xJEShkTw@mail.gmail.com/ -->
484 | 
485 | Since the text for callbacks have spaces inserted at the front of every
486 | line, multi-line strings are normally munged.  For example, the command
487 | 
488 | ```
489 | git filter-repo --blob-callback '
490 |   blob.data = bytes("""\
491 | This is the new
492 | file that I am
493 | replacing every blob
494 | with.  It is great.\n""", "utf-8")
495 | '
496 | ```
497 | 
498 | would result in a file with extra spaces at the front of every line:
499 | ```
500 |   This is the new
501 |   file that I am
502 |   replacing every blob
503 |   with.  It is great.
504 | ```
505 | 
506 | The two spaces at the beginning of every-line were inserted into every
507 | line of the callback when trying to compile it as a function.
508 | However, you can use textwrap.dedent to fix this; in fact, using it
509 | will even allow you to add more leading space so that it looks nicely
510 | indented.  For example:
511 | 
512 | ```
513 | git filter-repo --blob-callback '
514 |   import textwrap
515 |   blob.data = bytes(textwrap.dedent("""\
516 |     This is the new
517 |     file that I am
518 |     replacing every blob
519 |     with.  It is great.\n"""), "utf-8")
520 | '
521 | ```
522 | 
523 | That will result in a file with contents
524 | ```
525 | This is the new
526 | file that I am
527 | replacing every blob
528 | with.  It is great.
529 | ```
530 | 
531 | which has no leading spaces on any lines.


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
  1 | # Table of Contents
  2 | 
  3 |   * [Pre-requisites](#pre-requisites)
  4 |   * [Simple Installation](#simple-installation)
  5 |   * [Installation via Package Manager](#installation-via-package-manager)
  6 |   * [Detailed installation explanation for
  7 |      packagers](#detailed-installation-explanation-for-packagers)
  8 |   * [Installation as Python Package from
  9 |      PyPI](#installation-as-python-package-from-pypi)
 10 |   * [Installation via Makefile](#installation-via-makefile)
 11 |   * [Notes for Windows Users](#notes-for-windows-users)
 12 | 
 13 | # Pre-requisites
 14 | 
 15 | Instructions on this page assume you have already installed both
 16 | [Git](https://git-scm.com) and [Python](https://www.python.org/)
 17 | (though the [Notes for Windows Users](#notes-for-windows-users) has
 18 | some tips on Python).
 19 | 
 20 | # Simple Installation
 21 | 
 22 | All you need to do is download one file: the [git-filter-repo script
 23 | in this repository](git-filter-repo) ([direct link to raw
 24 | file](https://raw.githubusercontent.com/newren/git-filter-repo/main/git-filter-repo)),
 25 | making sure to preserve its name (`git-filter-repo`, with no
 26 | extension).  **That's it**.  You're done.
 27 | 
 28 | Then you can run any command you want, such as
 29 | 
 30 |     $ python3 git-filter-repo --analyze
 31 | 
 32 | If you place the git-filter-repo script in your $PATH, then you can
 33 | shorten commands by replacing `python3 git-filter-repo` with `git
 34 | filter-repo`; the manual assumes this but you can use the longer form.
 35 | 
 36 | Optionally, if you also want to use some of the contrib scripts, then
 37 | you need to make sure you have a `git_filter_repo.py` file which is
 38 | either a link to or copy of `git-filter-repo`, and you need to place
 39 | that git_filter_repo.py file in $PYTHONPATH.
 40 | 
 41 | If you prefer an "official" installation over the manual installation
 42 | explained above, the other sections may have useful tips.
 43 | 
 44 | # Installation via Package Manager
 45 | 
 46 | If you want to install via some [package
 47 | manager](https://alternativeto.net/software/yellowdog-updater-modified/?license=opensource),
 48 | you can run
 49 | 
 50 |     $ PACKAGE_TOOL install git-filter-repo
 51 | 
 52 | The following package managers have packaged git-filter-repo:
 53 | 
 54 | [![Packaging status](https://repology.org/badge/vertical-allrepos/git-filter-repo.svg)](https://repology.org/project/git-filter-repo/versions)
 55 | 
 56 | This list covers at least Windows (Scoop), Mac OS X (Homebrew), and
 57 | Linux (most the rest).  Note that I do not curate this list (and have
 58 | no interest in doing so); https://repology.org tracks who packages
 59 | these versions.
 60 | 
 61 | # Detailed installation explanation for packagers
 62 | 
 63 | filter-repo only consists of a few files that need to be installed:
 64 | 
 65 |   * git-filter-repo
 66 | 
 67 |     This is the _only_ thing needed for basic use.
 68 | 
 69 |     This can be installed in the directory pointed to by `git --exec-path`,
 70 |     or placed anywhere in $PATH.
 71 | 
 72 |     If your python3 executable is named "python" instead of "python3"
 73 |     (this particularly appears to affect a number of Windows users),
 74 |     then you'll also need to modify the first line of git-filter-repo
 75 |     to replace "python3" with "python".
 76 | 
 77 |   * git_filter_repo.py
 78 | 
 79 |     This is needed if you want to make use of one of the scripts in
 80 |     contrib/filter-repo-demos/, or want to write your own script making use
 81 |     of filter-repo as a python library.
 82 | 
 83 |     You can create this symlink to (or copy of) git-filter-repo named
 84 |     git_filter_repo.py and place it in your python site packages; `python
 85 |     -c "import site; print(site.getsitepackages())"` may help you find the
 86 |     appropriate location for your system.  Alternatively, you can place
 87 |     this file anywhere within $PYTHONPATH.
 88 | 
 89 |   * git-filter-repo.1
 90 | 
 91 |     This is needed if you want `git filter-repo --help` to succeed in
 92 |     displaying the manpage, when help.format is "man" (the default on Linux
 93 |     and Mac).
 94 | 
 95 |     This can be installed in the directory pointed to by `$(git
 96 |     --man-path)/man1/`, or placed anywhere in $MANDIR/man1/ where $MANDIR
 97 |     is some entry from $MANPATH.
 98 | 
 99 |     Note that `git filter-repo -h` will show a more limited built-in set of
100 |     instructions regardless of whether the manpage is installed.
101 | 
102 |   * git-filter-repo.html
103 | 
104 |     This is needed if you want `git filter-repo --help` to succeed in
105 |     displaying the html version of the help, when help.format is set to
106 |     "html" (the default on Windows).
107 | 
108 |     This can be installed in the directory pointed to by `git --html-path`.
109 | 
110 |     Note that `git filter-repo -h` will show a more limited built-in set of
111 |     instructions regardless of whether the html version of help is
112 |     installed.
113 | 
114 | So, installation might look something like the following:
115 | 
116 | 1. If you don't have the necessary documentation files (because you
117 |    are installing from a clone of filter-repo instead of from a
118 |    tarball) then you can first run:
119 | 
120 |    `make snag_docs`
121 | 
122 |    (which just copies the generated documentation files from the
123 |    `docs` branch)
124 | 
125 | 2. Run the following
126 | 
127 |    ```
128 |    cp -a git-filter-repo $(git --exec-path)
129 |    cp -a git-filter-repo.1 $(git --man-path)/man1 && mandb
130 |    cp -a git-filter-repo.html $(git --html-path)
131 |    ln -s $(git --exec-path)/git-filter-repo \
132 |        $(python -c "import site; print(site.getsitepackages()[-1])")/git_filter_repo.py
133 |    ```
134 | 
135 | or you can use the provided Makefile, as noted below.
136 | 
137 | # Installation as Python Package from PyPI
138 | 
139 | `git-filter-repo` is also available as
140 | [PyPI-package](https://pypi.org/project/git-filter-repo/).
141 | 
142 | Therefore, it can be installed with [pipx](https://pypa.github.io/pipx/)
143 | or [uv tool](https://docs.astral.sh/uv/concepts/tools/).
144 | Command example for pipx:
145 | 
146 | `pipx install git-filter-repo`
147 | 
148 | # Installation via Makefile
149 | 
150 | Installing should be doable by hand, but a Makefile is provided for those
151 | that prefer it.  However, usage of the Makefile really requires overriding
152 | at least a couple of the directories with sane values, e.g.
153 | 
154 |     $ make prefix=/usr pythondir=/usr/lib64/python3.8/site-packages install
155 | 
156 | Also, the Makefile will not edit the shebang line (the first line) of
157 | git-filter-repo if your python executable is not named "python3";
158 | you'll still need to do that yourself.
159 | 
160 | # Notes for Windows Users
161 | 
162 | git-filter-repo can be installed with multiple tools, such as
163 | [pipx](https://pypa.github.io/pipx/) or a Windows-specific package manager
164 | like Scoop (both of which were covered above).
165 | 
166 | Sadly, Windows sometimes makes things difficult.  Common and historical issues:
167 | 
168 |   * **Non-functional Python stub**: Windows apparently ships with a
169 |     [non-functional
170 |     python](https://github.com/newren/git-filter-repo/issues/36#issuecomment-568933825).
171 |     This can even manifest as [the app
172 |     hanging](https://github.com/newren/git-filter-repo/issues/36) or
173 |     [the system appearing to
174 |     hang](https://github.com/newren/git-filter-repo/issues/312).  Try
175 |     installing
176 |     [Python](https://docs.microsoft.com/en-us/windows/python/beginners)
177 |     from the [Microsoft
178 |     Store](https://apps.microsoft.com/store/search?publisher=Python%20Software%20Foundation)
179 |   * **Modifying PATH, making the script executable**: If modifying your PATH
180 |     and/or making scripts executable is difficult for you, you can skip that
181 |     step by just using `python3 git-filter-repo` instead of `git filter-repo`
182 |     in your commands.
183 |   * **Different python executable name**:  Some users don't have
184 |     a `python3` executable but one named something else like `python`
185 |     or `python3.8` or whatever.  You may need to edit the first line
186 |     of the git-filter-repo script to specify the appropriate path.  Or
187 |     just don't bother and instead use the long form for executing
188 |     filter-repo commands.  Namely, replace the `git filter-repo` part
189 |     of commands with `PYTHON_EXECUTABLE git-filter-repo`. (Where
190 |     `PYTHON_EXECUTABLE` is something like `python` or `python3.8` or
191 |     `C:\PATH\TO\INSTALLATION\OF\python3.exe` or whatever).
192 |   * **Symlink issues**:  git_filter_repo.py is supposed to be a symlink to
193 |     git-filter-repo, so that it appears to have identical contents.
194 |     If your system messed up the symlink (usually meaning it looks like a
195 |     regular file with just one line), then delete git_filter_repo.py and
196 |     replace it with a copy of git-filter-repo.
197 |   * **Old GitBash limitations**: older versions of GitForWindows had an
198 |     unfortunate shebang length limitation (see [git-for-windows issue
199 |     #3165](https://github.com/git-for-windows/git/pull/3165)).  If
200 |     you're affected, just use the long form for invoking filter-repo
201 |     commands, i.e. replace the `git filter-repo` part of commands with
202 |     `python3 git-filter-repo`.
203 | 
204 | For additional historical context, see:
205 |   * [#371](https://github.com/newren/git-filter-repo/issues/371#issuecomment-1267116186)
206 |   * [#360](https://github.com/newren/git-filter-repo/issues/360#issuecomment-1276813596)
207 |   * [#312](https://github.com/newren/git-filter-repo/issues/312)
208 |   * [#307](https://github.com/newren/git-filter-repo/issues/307)
209 |   * [#225](https://github.com/newren/git-filter-repo/pull/225)
210 |   * [#231](https://github.com/newren/git-filter-repo/pull/231)
211 |   * [#124](https://github.com/newren/git-filter-repo/issues/124)
212 |   * [#36](https://github.com/newren/git-filter-repo/issues/36)
213 |   * [this git mailing list
214 |      thread](https://lore.kernel.org/git/nycvar.QRO.7.76.6.2004251610300.18039@tvgsbejvaqbjf.bet/)
215 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # A bunch of installation-related paths people can override on the command line
  2 | DESTDIR = /
  3 | INSTALL = install
  4 | prefix = $(HOME)
  5 | bindir = $(prefix)/libexec/git-core
  6 | localedir = $(prefix)/share/locale
  7 | mandir = $(prefix)/share/man
  8 | htmldir = $(prefix)/share/doc/git-doc
  9 | pythondir = $(prefix)/lib64/python3.6/site-packages
 10 | 
 11 | default: build
 12 | 
 13 | build:
 14 | 	@echo Nothing to do: filter-repo is a script which needs no compilation.
 15 | 
 16 | test:
 17 | 	time t/run_coverage
 18 | 
 19 | # fixup_locale might matter once we actually have translations, but right now
 20 | # we don't.  It might not even matter then, because python has a fallback podir.
 21 | fixup_locale:
 22 | 	sed -ie s%@@LOCALEDIR@@%$(localedir)% git-filter-repo
 23 | 
 24 | # People installing from tarball will already have man1/git-filter-repo.1 and
 25 | # html/git-filter-repo.html.  But let's support people installing from a git
 26 | # clone too; for them, just cheat and snag a copy of the built docs that I
 27 | # record in a different branch.
 28 | snag_docs: Documentation/man1/git-filter-repo.1 Documentation/html/git-filter-repo.html
 29 | 
 30 | Documentation/man1/git-filter-repo.1:
 31 | 	mkdir -p Documentation/man1
 32 | 	git show origin/docs:man1/git-filter-repo.1 >Documentation/man1/git-filter-repo.1
 33 | 
 34 | Documentation/html/git-filter-repo.html:
 35 | 	mkdir -p Documentation/html
 36 | 	git show origin/docs:html/git-filter-repo.html >Documentation/html/git-filter-repo.html
 37 | 
 38 | install: snag_docs #fixup_locale
 39 | 	$(INSTALL) -Dm0755 git-filter-repo "$(DESTDIR)/$(bindir)/git-filter-repo"
 40 | 	$(INSTALL) -dm0755 "$(DESTDIR)/$(pythondir)"
 41 | 	ln -sf "$(bindir)/git-filter-repo" "$(DESTDIR)/$(pythondir)/git_filter_repo.py"
 42 | 	$(INSTALL) -Dm0644 Documentation/man1/git-filter-repo.1 "$(DESTDIR)/$(mandir)/man1/git-filter-repo.1"
 43 | 	$(INSTALL) -Dm0644 Documentation/html/git-filter-repo.html "$(DESTDIR)/$(htmldir)/git-filter-repo.html"
 44 | 	if which mandb > /dev/null; then mandb; fi
 45 | 
 46 | 
 47 | #
 48 | # The remainder of the targets are meant for tasks for the maintainer; if they
 49 | # don't work for you, I don't care.  These tasks modify branches and upload
 50 | # releases and whatnot, and presume a directory layout I have locally.
 51 | #
 52 | update_docs:
 53 | 	# Set environment variables once
 54 | 	export GIT_WORK_TREE=$(shell mktemp -d) \
 55 | 	export GIT_INDEX_FILE=$(shell mktemp) \
 56 | 	COMMIT=$(shell git rev-parse HEAD) \
 57 | 	&& \
 58 | 	# Sanity check; we'll build docs in a clone of a git repo \
 59 | 	test -d ../git && \
 60 | 	# Sanity check; docs == origin/docs \
 61 | 	test -z "$(git rev-parse docs origin/docs | uniq -u)" && \
 62 | 	# Avoid spurious errors by forcing index to be well formatted, if empty \
 63 | 	git read-tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904 && # empty tree \
 64 | 	# Symlink git-filter-repo.txt documentation into git and build it \
 65 | 	ln -sf ../../git-filter-repo/Documentation/git-filter-repo.txt ../git/Documentation/ && \
 66 | 	make -C ../git/Documentation -j4 man html && \
 67 | 	# Take the built documentation and lay it out nicely \
 68 | 	mkdir $GIT_WORK_TREE/html && \
 69 | 	mkdir $GIT_WORK_TREE/man1 && \
 70 | 	cp -a ../git/Documentation/*.html $GIT_WORK_TREE/html/ && \
 71 | 	cp -a ../git/Documentation/git-filter-repo.1 $GIT_WORK_TREE/man1/ && \
 72 | 	dos2unix $GIT_WORK_TREE/html/* && \
 73 | 	# Add new version of the documentation as a commit, if it differs \
 74 | 	git --work-tree $GIT_WORK_TREE add . && \
 75 | 	git diff --quiet docs || git write-tree \
 76 | 		| xargs git commit-tree -p docs -m "Update docs to $COMMIT" \
 77 | 		| xargs git update-ref refs/heads/docs && \
 78 | 	# Remove temporary files \
 79 | 	rm -rf $GIT_WORK_TREE && \
 80 | 	rm $GIT_INDEX_FILE && \
 81 | 	# Push the new documentation upstream \
 82 | 	git push origin docs && \
 83 | 	# Notify of completion \
 84 | 	echo && \
 85 | 	echo === filter-repo docs branch updated ===
 86 | 
 87 | # Call like this:
 88 | #   make GITHUB_COM_TOKEN=$KEY TAGNAME=v2.23.0 release
 89 | release: github_release pypi_release
 90 | 
 91 | # Call like this:
 92 | #   make GITHUB_COM_TOKEN=$KEY TAGNAME=v2.23.0 github_release
 93 | github_release: update_docs
 94 | 	FILEBASE=git-filter-repo-$(shell echo $(TAGNAME) | tail -c +2) \
 95 | 	TMP_INDEX_FILE=$(shell mktemp) \
 96 | 	COMMIT=$(shell git rev-parse HEAD) \
 97 | 	&& \
 98 | 	test -n "$(GITHUB_COM_TOKEN)" && \
 99 | 	test -n "$(TAGNAME)" && \
100 | 	test -n "$COMMIT" && \
101 | 	# Make sure we don't have any staged or unstaged changes \
102 | 	git diff --quiet --staged HEAD && git diff --quiet HEAD && \
103 | 	# Make sure 'jq' is installed \
104 | 	type -p jq && \
105 | 	# Tag the release, push it to GitHub \
106 | 	git tag -a -m "filter-repo $(TAGNAME)" $(TAGNAME) $COMMIT && \
107 | 	git push origin $(TAGNAME) && \
108 | 	# Create the tarball \
109 | 	GIT_INDEX_FILE=$TMP_INDEX_FILE git read-tree $COMMIT && \
110 | 	git ls-tree -r docs | grep filter-repo    \
111 | 		| sed -e 's%\t%\tDocumentation/%' \
112 | 		| GIT_INDEX_FILE=$TMP_INDEX_FILE git update-index --index-info && \
113 | 	GIT_INDEX_FILE=$TMP_INDEX_FILE git write-tree                                    \
114 | 		| xargs git archive --prefix=$FILEBASE/ \
115 | 		| xz -c >$FILEBASE.tar.xz && \
116 | 	rm $TMP_INDEX_FILE && \
117 | 	# Make GitHub mark our new tag as an official release \
118 | 	curl -s -H "Authorization: token $(GITHUB_COM_TOKEN)" -X POST \
119 | 		https://api.github.com/repos/newren/git-filter-repo/releases \
120 | 		--data "{                                  \
121 | 		  \"tag_name\": \"$(TAGNAME)\",            \
122 | 		  \"target_commitish\": \"$COMMIT\",      \
123 | 		  \"name\": \"$(TAGNAME)\",                \
124 | 		  \"body\": \"filter-repo $(TAGNAME)\"     \
125 | 		}" | jq -r .id >asset_id && \
126 | 	# Upload our tarball \
127 | 	cat asset_id | xargs -I ASSET_ID curl -s -H "Authorization: token $(GITHUB_COM_TOKEN)" -H "Content-Type: application/octet-stream" --data-binary @$FILEBASE.tar.xz https://uploads.github.com/repos/newren/git-filter-repo/releases/ASSET_ID/assets?name=$FILEBASE.tar.xz && \
128 | 	# Remove temporary file(s) \
129 | 	rm asset_id && \
130 | 	# Notify of completion \
131 | 	echo && \
132 | 	echo === filter-repo $(TAGNAME) created and uploaded to GitHub ===
133 | 
134 | pypi_release: # Has an implicit dependency on github_release because...
135 | 	# Upload to PyPI, automatically picking tag created by github_release
136 | 	python3 -m venv venv
137 | 	venv/bin/pip install --upgrade pip
138 | 	venv/bin/pip install build twine
139 | 	venv/bin/pyproject-build
140 | 	# Note: Retrieve "git-filter-repo releases" token; username is 'newren'
141 | 	venv/bin/twine upload dist/*
142 | 	# Remove temporary file(s)
143 | 	rm -rf dist/ venv/ git_filter_repo.egg-info/
144 | 
145 | # NOTE TO FUTURE SELF: If you accidentally push a bad release, you can remove
146 | # all but the git-filter-repo-$VERSION.tar.xz asset with
147 | #    git push --delete origin $TAGNAME
148 | # To remove the git-filter-repo-$VERSION.tar.xz asset as well:
149 | #    curl -s -H "Authorization: token $GITHUB_COM_TOKEN" -X GET \
150 | #        https://api.github.com/repos/newren/git-filter-repo/releases
151 | # and look for the "id", then run
152 | #    curl -s -H "Authorization: token $GITHUB_COM_TOKEN" -X DELETE \
153 | #        https://api.github.com/repos/newren/git-filter-repo/releases/$ID
154 | 


--------------------------------------------------------------------------------
/contrib/filter-repo-demos/README.md:
--------------------------------------------------------------------------------
 1 | ## Background
 2 | 
 3 | filter-repo is not merely a history rewriting tool, it also contains a
 4 | library that can be used to write new history rewriting tools.  This
 5 | directory contains several examples showing the breadth of different things
 6 | that could be done.
 7 | 
 8 | ## Quick overview
 9 | 
10 | Command&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; |Description
11 | -------|-----------
12 | barebones-example    |Simple example with no modifications to filter-repo behavior, just showing what to import and run.
13 | insert-beginning     |Add a new file (e.g. LICENSE/COPYING) to the beginning of history.
14 | signed-off-by        |Add a Signed-off-by tag to a range of commits
15 | lint-history         |Run some lint command on all non-binary files in history.
16 | clean-ignore         |Delete files from history which match current gitignore rules.
17 | filter-lamely (or filter&#8209;branch&#8209;ish) |A nearly bug compatible re-implementation of filter-branch (the git testsuite passes using it instead of filter-branch), with some performance tricks to make it several times faster (though it's still glacially slow compared to filter-repo).
18 | bfg-ish              |A re-implementation of most of BFG Repo Cleaner, with new features and bug fixes.
19 | convert-svnexternals |Insert Git submodules according to SVN externals.
20 | 
21 | ## Purpose
22 | 
23 | Please note that the point of these examples is not to provide new complete
24 | tools, but simply to demonstrate that extremely varied history rewriting
25 | tools can be created which automatically inherit lots of useful base
26 | functionality: rewriting hashes in commit messages, pruning commits that
27 | become empty, handling filenames with funny characters, non-standard
28 | encodings, handling of replace refs, etc.  (Additional examples of using
29 | filter-repo as a library can also be found in [the
30 | testsuite](../../t/t9391/).)  My sincerest hope is that these examples
31 | provide lots of useful functionality, but that each is missing at least one
32 | critical piece for your usecase.  Go forth and extend and improve.
33 | 
34 | ## Usage
35 | 
36 | All the examples require a symlink to git-filter-repo in your PYTHONPATH
37 | named git_filter_repo.py in order to run; also, all have a --help flag to
38 | get a description of their usage and flags.
39 | 


--------------------------------------------------------------------------------
/contrib/filter-repo-demos/barebones-example:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | This is a simple program that behaves identically to git-filter-repo.  Its
 5 | entire purpose is just to show what to import and run to get the normal
 6 | git-filter-repo behavior, to serve as a starting point for you to figure
 7 | out what you want to modify.
 8 | """
 9 | 
10 | """
11 | Please see the
12 |   ***** API BACKWARD COMPATIBILITY CAVEAT *****
13 | near the top of git-filter-repo.
14 | """
15 | 
16 | import sys
17 | 
18 | try:
19 |   import git_filter_repo as fr
20 | except ImportError:
21 |   raise SystemExit("Error: Couldn't find git_filter_repo.py.  Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
22 | 
23 | args = fr.FilteringOptions.parse_args(sys.argv[1:])
24 | if args.analyze:
25 |   fr.RepoAnalyze.run(args)
26 | else:
27 |   filter = fr.RepoFilter(args)
28 |   filter.run()
29 | 


--------------------------------------------------------------------------------
/contrib/filter-repo-demos/clean-ignore:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | This is a simple program that will delete files from history which match
 5 | current gitignore rules, while also:
 6 |   1) pruning commits which become empty
 7 |   2) pruning merge commits which become degenerate and have no changes
 8 |      relative to its remaining relevant parent
 9 |   3) rewriting commit hashes in commit messages to reference new commit IDs.
10 | """
11 | 
12 | """
13 | Please see the
14 |   ***** API BACKWARD COMPATIBILITY CAVEAT *****
15 | near the top of git-filter-repo.
16 | """
17 | 
18 | import argparse
19 | import os
20 | import subprocess
21 | import sys
22 | try:
23 |   import git_filter_repo as fr
24 | except ImportError:
25 |   raise SystemExit("Error: Couldn't find git_filter_repo.py.  Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
26 | 
27 | class CheckIgnores:
28 |   def __init__(self):
29 |     self.ignored = set()
30 |     self.okay = set()
31 | 
32 |     cmd = 'git check-ignore --stdin --verbose --non-matching --no-index -z'
33 |     self.check_ignore_process = subprocess.Popen(cmd.split(),
34 |                                                  stdin=subprocess.PIPE,
35 |                                                  stdout=subprocess.PIPE)
36 | 
37 |   def __del__(self):
38 |     if self.check_ignore_process:
39 |       self.check_ignore_process.stdin.close()
40 | 
41 |   def get_ignored(self, filenames):
42 |     ignored = set()
43 |     for name in filenames:
44 |       if name in self.ignored:
45 |         ignored.add(name)
46 |       elif name in self.okay:
47 |         continue
48 |       else:
49 |         self.check_ignore_process.stdin.write(name+b'\0')
50 |         self.check_ignore_process.stdin.flush()
51 |         result = os.read(self.check_ignore_process.stdout.fileno(), 65535).rstrip(b'\0')
52 |         (source, linenum, pattern, pathname) = result.split(b"\0")
53 |         if name != pathname:
54 |           raise SystemExit("Error: Passed {} but got {}".format(name, pathname))
55 |         if not source and not linenum and not pattern:
56 |           self.okay.add(name)
57 |         else:
58 |           if pattern[0:1] == b"!":
59 |             self.okay.add(name)
60 |           else:
61 |             self.ignored.add(name)
62 |             ignored.add(name)
63 | 
64 |     return ignored
65 | 
66 |   def skip_ignores(self, commit, metadata):
67 |     filenames = [x.filename for x in commit.file_changes]
68 |     bad = self.get_ignored(filenames)
69 |     commit.file_changes = [x for x in commit.file_changes
70 |                            if x.filename not in bad]
71 | 
72 | 
73 | def main():
74 |   checker = CheckIgnores()
75 |   args = fr.FilteringOptions.parse_args(sys.argv[1:], error_on_empty = False)
76 |   filter = fr.RepoFilter(args, commit_callback=checker.skip_ignores)
77 |   filter.run()
78 |   
79 | 
80 | if __name__ == '__main__':
81 |   main()
82 | 
83 | 


--------------------------------------------------------------------------------
/contrib/filter-repo-demos/filter-branch-ish:
--------------------------------------------------------------------------------
1 | filter-lamely


--------------------------------------------------------------------------------
/contrib/filter-repo-demos/insert-beginning:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | This is a simple program that will insert some regular file into the root
 5 | commit(s) of history, e.g. adding a file named LICENSE or COPYING to the
 6 | first commit.  It also rewrites commit hashes in commit messages to update
 7 | them based on these changes.
 8 | """
 9 | 
10 | """
11 | Please see the
12 |   ***** API BACKWARD COMPATIBILITY CAVEAT *****
13 | near the top of git-filter-repo.
14 | """
15 | 
16 | # Technically, this program could be replaced by a one-liner:
17 | #    git filter-repo --commit-callback "if not commit.parents: commit.file_changes.append(FileChange(b'M', $RELATIVE_TO_PROJECT_ROOT_PATHNAME, b'$(git hash-object -w $FILENAME)', b'100644'))"
18 | # but let's do it as a full-fledged program that imports git_filter_repo
19 | # anyway...
20 | 
21 | import argparse
22 | import os
23 | import subprocess
24 | try:
25 |   import git_filter_repo as fr
26 | except ImportError:
27 |   raise SystemExit("Error: Couldn't find git_filter_repo.py.  Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
28 | 
29 | parser = argparse.ArgumentParser(
30 |           description='Add a file to the root commit(s) of history')
31 | parser.add_argument('--file', type=os.fsencode,
32 |         help=("Relative-path to file whose contents should be added to root commit(s)"))
33 | args = parser.parse_args()
34 | if not args.file:
35 |   raise SystemExit("Error: Need to specify the --file option")
36 | if any([x == b"." or x== b".." for x in args.file.split(b"/")]):
37 |   raise SystemExit(f"Error: Invalid path components in {fr.decode(args.file)}")
38 | if not os.path.isfile(args.file):
39 |   raise SystemExit(f"Error: {fr.decode(args.file)} not found")
40 | 
41 | fhash = subprocess.check_output(['git', 'hash-object', '-w', args.file]).strip()
42 | fmode = b'100755' if os.access(args.file, os.X_OK) else b'100644'
43 | # FIXME: I've assumed the file wasn't a directory or symlink...
44 | 
45 | def fixup_commits(commit, metadata):
46 |   if len(commit.parents) == 0:
47 |     commit.file_changes.append(fr.FileChange(b'M', args.file, fhash, fmode))
48 |   # FIXME: What if the history already had a file matching the given name,
49 |   # but which didn't exist until later in history?  Is the intent for the
50 |   # user to keep the other version that existed when it existed, or to
51 |   # overwrite the version for all of history with the specified file?  I
52 |   # don't know, but if it's the latter, we'd need to add an 'else' clause
53 |   # like the following:
54 |   #else:
55 |   #  commit.file_changes = [x for x in commit.file_changes
56 |   #                         if x.filename != args.file]
57 | 
58 | fr_args = fr.FilteringOptions.parse_args(['--preserve-commit-encoding',
59 |                                           '--force',
60 |                                           '--replace-refs', 'update-no-add'])
61 | filter = fr.RepoFilter(fr_args, commit_callback=fixup_commits)
62 | filter.run()
63 | 


--------------------------------------------------------------------------------
/contrib/filter-repo-demos/lint-history:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | This is a simple program that will run a linting program on all non-binary
  5 | files in history.  It also rewrites commit hashes in commit messages to
  6 | refer to the new commits with the rewritten files.  You call it like this:
  7 |    lint-history my-lint-command --arg whatever --another-arg
  8 | and it will repeatedly call
  9 |    my-lint-command --arg whatever --another-arg $TEMPORARY_FILE
 10 | with $TEMPORARY_FILE having contents of some file from history.
 11 | 
 12 | NOTE: Several people have taken and modified this script for a variety
 13 | of special cases (linting python files, linting jupyter notebooks, just
 14 | linting java files, etc.) and posted their modifications at
 15 |   https://github.com/newren/git-filter-repo/issues/45
 16 | Feel free to take a look and adopt some of their ideas.  Most of these
 17 | modifications are probably strictly unnecessary since you could just make
 18 | a lint-script that takes the filename, checks that it matches what you
 19 | want, and then calls the real linter.  But I guess folks don't like making
 20 | an intermediate script.  So I eventually added the --relevant flag for
 21 | picking out certain files providing yet another way to handle it.
 22 | """
 23 | 
 24 | """
 25 | Please see the
 26 |   ***** API BACKWARD COMPATIBILITY CAVEAT *****
 27 | near the top of git-filter-repo.
 28 | """
 29 | 
 30 | # Technically, if you are only running on all non-binary files and don't care
 31 | # about filenames, then this program could be replaced by a "one-liner"; e.g.
 32 | #    git filter-repo --blob-callback '
 33 | #      if not b"\0" in blob.data[0:8192]:
 34 | #        filename = ".git/info/tmpfile"
 35 | #        with open(filename, "wb") as f:
 36 | #          f.write(blob.data)
 37 | #        subprocess.check_call(["lint_program", "--some", "arg", filename])
 38 | #        with open(filename, "rb") as f:
 39 | #          blob.data = f.read()
 40 | #        os.remove(filename)
 41 | #      '
 42 | # but let's do it as a full-fledged program that imports git_filter_repo
 43 | # and show how to also do it with filename handling...
 44 | 
 45 | import argparse
 46 | import os
 47 | import subprocess
 48 | import tempfile
 49 | try:
 50 |   import git_filter_repo as fr
 51 | except ImportError:
 52 |   raise SystemExit("Error: Couldn't find git_filter_repo.py.  Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
 53 | 
 54 | example_text = '''CALLBACK
 55 | 
 56 |     When you pass --relevant 'BODY', the following style of function
 57 |     will be compiled and called:
 58 | 
 59 |         def is_relevant(filename):
 60 |             BODY
 61 | 
 62 |     Where filename is the full relative path from the toplevel of the
 63 |     repository.
 64 | 
 65 |     Thus, to only run on files with a ".txt" extension you would run
 66 |         lint-history --relevant 'return filename.endswith(b".txt")' ...
 67 | 
 68 | EXAMPLES
 69 | 
 70 |     To run dos2unix on all non-binary files in history:
 71 |         lint-history dos2unix
 72 | 
 73 |     To run eslint --fix on all .js files in history:
 74 |         lint-history --relevant 'return filename.endswith(b".js")' eslint --fix
 75 | 
 76 | INTERNALS
 77 | 
 78 |     Linting of files in history will be done by writing the files to a
 79 |     temporary directory before running the linting program; the
 80 |     location of this temporary directory can be controlled via the
 81 |     TMPDIR environment variable as per
 82 |     https://docs.python.org/3/library/tempfile.html#tempfile.mkdtemp.
 83 |     '''
 84 | 
 85 | parser = argparse.ArgumentParser(description='Run a program (e.g. code formatter or linter) on files in history',
 86 |                                  epilog = example_text,
 87 |                                  formatter_class=argparse.RawDescriptionHelpFormatter)
 88 | 
 89 | parser.add_argument('--relevant', metavar="FUNCTION_BODY",
 90 |         help=("Python code for determining whether to apply linter to a "
 91 |               "given filename.  Implies --filenames-important.  See CALLBACK "
 92 |               "below."))
 93 | parser.add_argument('--filenames-important', action='store_true',
 94 |         help=("By default, contents are written to a temporary file with a "
 95 |               "random name.  If the linting program needs to know the file "
 96 |               "basename to operate correctly (e.g. because it needs to know "
 97 |               "the file's extension), then pass this argument"))
 98 | parser.add_argument('--refs', nargs='+',
 99 |                     help=("Limit history rewriting to the specified refs. "
100 |                           "Implies --partial of git-filter-repo (and all its "
101 |                           "implications)."))
102 | parser.add_argument('command', nargs=argparse.REMAINDER,
103 |         help=("Lint command to run, other than the filename at the end"))
104 | lint_args = parser.parse_args()
105 | if not lint_args.command:
106 |   raise SystemExit("Error: Need to specify a lint command")
107 | if len(lint_args.command) > 1 and lint_args.command[0] == '--':
108 |   lint_args.command.pop(0)
109 | 
110 | tmpdir = None
111 | blobs_handled = {}
112 | cat_file_process = None
113 | def lint_with_real_filenames(commit, metadata):
114 |   for change in commit.file_changes:
115 |     if change.blob_id in blobs_handled:
116 |       change.blob_id = blobs_handled[change.blob_id]
117 |     elif change.type == b'D':
118 |       continue
119 |     elif not is_relevant(change.filename):
120 |       continue
121 |     else:
122 |       # Get the old blob contents
123 |       cat_file_process.stdin.write(change.blob_id + b'\n')
124 |       cat_file_process.stdin.flush()
125 |       objhash, objtype, objsize = cat_file_process.stdout.readline().split()
126 |       contents_plus_newline = cat_file_process.stdout.read(int(objsize)+1)
127 | 
128 |       # Write it out to a file with the same basename
129 |       filename = os.path.join(tmpdir, os.path.basename(change.filename))
130 |       with open(filename, "wb") as f:
131 |         f.write(contents_plus_newline[:-1])
132 | 
133 |       # Lint the file
134 |       subprocess.check_call(lint_args.command + [filename.decode('utf-8')])
135 | 
136 |       # Get the new contents
137 |       with open(filename, "rb") as f:
138 |         blob = fr.Blob(f.read())
139 | 
140 |       # Insert the new file into the filter's stream, and remove the tempfile
141 |       filter.insert(blob)
142 |       os.remove(filename)
143 | 
144 |       # Record our handling of the blob and use it for this change
145 |       blobs_handled[change.blob_id] = blob.id
146 |       change.blob_id = blob.id
147 | 
148 | def lint_non_binary_blobs(blob, metadata):
149 |   if not b"\0" in blob.data[0:8192]:
150 |     filename = '.git/info/tmpfile'
151 |     with open(filename, "wb") as f:
152 |       f.write(blob.data)
153 |     subprocess.check_call(lint_args.command + [filename])
154 |     with open(filename, "rb") as f:
155 |       blob.data = f.read()
156 |     os.remove(filename)
157 | 
158 | if lint_args.filenames_important and not lint_args.relevant:
159 |   lint_args.relevant = 'return True'
160 | if lint_args.relevant:
161 |   body = lint_args.relevant
162 |   exec('def is_relevant(filename):\n  '+'\n  '.join(body.splitlines()),
163 |        globals())
164 |   lint_args.filenames_important = True
165 | input_args = []
166 | if lint_args.refs:
167 |   input_args = ["--refs",] + lint_args.refs
168 | args = fr.FilteringOptions.parse_args(input_args, error_on_empty = False)
169 | args.force = True
170 | if lint_args.filenames_important:
171 |   tmpdir = tempfile.mkdtemp().encode()
172 |   cat_file_process = subprocess.Popen(['git', 'cat-file', '--batch'],
173 |                                       stdin = subprocess.PIPE,
174 |                                       stdout = subprocess.PIPE)
175 |   filter = fr.RepoFilter(args, commit_callback=lint_with_real_filenames)
176 |   filter.run()
177 |   cat_file_process.stdin.close()
178 |   cat_file_process.wait()
179 | else:
180 |   if not os.path.exists('.git/info'):
181 |     os.makedirs('.git/info')
182 |   filter = fr.RepoFilter(args, blob_callback=lint_non_binary_blobs)
183 |   filter.run()
184 | 


--------------------------------------------------------------------------------
/contrib/filter-repo-demos/signed-off-by:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | This is a simple program that will add Signed-off-by: tags to a range of
 5 | commits.  Example usage, to add a signed-off-by trailer to every commit that
 6 | is not in next but is in any of master, develop, or maint:
 7 |   signed-off-by master develop maint ^next
 8 | More likely called as:
 9 |   signed-off-by master~4..master
10 | There's no real reason to use this script since `rebase --signoff` exists;
11 | it's mostly just a demonstration of what could be done.
12 | """
13 | 
14 | """
15 | Please see the
16 |   ***** API BACKWARD COMPATIBILITY CAVEAT *****
17 | near the top of git-filter-repo.
18 | """
19 | 
20 | import argparse
21 | import re
22 | import subprocess
23 | try:
24 |   import git_filter_repo as fr
25 | except ImportError:
26 |   raise SystemExit("Error: Couldn't find git_filter_repo.py.  Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
27 | 
28 | parser = argparse.ArgumentParser(
29 |           description="Add 'Signed-off-by:' tags to a range of commits")
30 | parser.add_argument('rev_list_args', metavar='rev-list args',
31 |                     nargs=argparse.REMAINDER,
32 |         help=("Range of commits (need to include ref tips) to work on"))
33 | myargs = parser.parse_args()
34 | 
35 | user_name = subprocess.check_output('git config user.name'.split()).rstrip()
36 | user_email = subprocess.check_output('git config user.email'.split()).rstrip()
37 | trailer = b'Signed-off-by: %s <%s>' % (user_name, user_email)
38 | 
39 | def add_signed_off_by_trailer(commit, metadata):
40 |   if trailer in commit.message:
41 |     return
42 | 
43 |   # We want to add the trailer, but we want it to be separated from any
44 |   # existing paragraphs by a blank line.  However, if the commit message
45 |   # already ends with trailers, then we want all trailers to be on adjacent
46 |   # lines.
47 |   if not commit.message.endswith(b'\n'):
48 |     commit.message += b'\n'
49 |   lastline = commit.message.splitlines()[-1]
50 |   if not re.match(b'[A-Za-z0-9-_]*: ', lastline):
51 |     commit.message += b'\n'
52 |   commit.message += trailer
53 | 
54 | # Setting source and target to anything prevents:
55 | #   * remapping origin remote tracking branches to regular branches
56 | #   * deletion of the origin remote
57 | #   * nuking unused refs
58 | #   * nuking reflogs
59 | #   * repacking
60 | # so we cheat and set source and target both to '.'
61 | args = fr.FilteringOptions.parse_args(['--force',
62 |                                        '--refs'] + myargs.rev_list_args)
63 | args.refs = myargs.rev_list_args
64 | filter = fr.RepoFilter(args, commit_callback=add_signed_off_by_trailer)
65 | filter.run()
66 | 


--------------------------------------------------------------------------------
/git_filter_repo.py:
--------------------------------------------------------------------------------
1 | git-filter-repo


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "git-filter-repo"
 3 | description = "Quickly rewrite git repository history"
 4 | authors = [
 5 |     {name = "Elijah Newren", email = "newren@gmail.com"}
 6 | ]
 7 | readme = "README.md"
 8 | classifiers = [
 9 |     "Development Status :: 5 - Production/Stable",
10 |     "Operating System :: OS Independent",
11 |     "Programming Language :: Python",
12 |     "License :: OSI Approved :: MIT License",
13 |     "Programming Language :: Python :: 3.6",
14 |     "Programming Language :: Python :: 3.7",
15 |     "Programming Language :: Python :: 3.8",
16 |     "Programming Language :: Python :: 3.9",
17 |     "Programming Language :: Python :: 3.10",
18 |     "Programming Language :: Python :: 3.11",
19 |     "Programming Language :: Python :: 3.12",
20 |     "Programming Language :: Python :: 3",
21 |     "Programming Language :: Python :: Implementation :: CPython",
22 |     "Programming Language :: Python :: Implementation :: PyPy",
23 | ]
24 | license.text = "MIT"
25 | requires-python = ">= 3.6"
26 | dynamic = ["version"]
27 | 
28 | [project.urls]
29 | Homepage = "https://github.com/newren/git-filter-repo"
30 | Issues = "https://github.com/newren/git-filter-repo/issues/"
31 | Source = "https://github.com/newren/git-filter-repo"
32 | 
33 | [project.scripts]
34 | git-filter-repo = "git_filter_repo:main"
35 | 
36 | [build-system]
37 | requires = ["setuptools>=61", "setuptools_scm>=8.0", "wheel"]
38 | build-backend = "setuptools.build_meta"
39 | 
40 | [tool.setuptools]
41 | py-modules = ["git_filter_repo"]
42 | 
43 | [tool.setuptools_scm]
44 | # This section intentionally left blank
45 | 


--------------------------------------------------------------------------------
/t/run_coverage:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eu
 4 | 
 5 | orig_dir=$(cd $(dirname $0) && pwd -P)
 6 | tmpdir=$(mktemp -d)
 7 | 
 8 | cat <<EOF >$tmpdir/.coveragerc
 9 | [run]
10 | parallel=true
11 | data_file=$tmpdir/.coverage
12 | EOF
13 | 
14 | cat <<EOF >$tmpdir/sitecustomize.py
15 | import coverage
16 | coverage.process_startup()
17 | EOF
18 | 
19 | export COVERAGE_PROCESS_START=$tmpdir/.coveragerc
20 | export PYTHONPATH=$tmpdir:
21 | 
22 | # Produce a coverage report, even if the tests fail
23 | set +e
24 | $orig_dir/run_tests
25 | exitcode=$?
26 | set -e
27 | 
28 | cd $tmpdir
29 | coverage3 combine -q
30 | coverage3 html -d $orig_dir/report
31 | coverage3 report -m
32 | cd $orig_dir
33 | rm -rf $tmpdir
34 | 
35 | exit $exitcode
36 | 


--------------------------------------------------------------------------------
/t/run_tests:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -eu
 3 | 
 4 | cd $(dirname $0)
 5 | 
 6 | # Put git_filter_repo.py on the front of PYTHONPATH
 7 | export PYTHONPATH="$PWD/..${PYTHONPATH:+:$PYTHONPATH}"
 8 | 
 9 | # We pretend filenames are unicode for two reasons: (1) because it exercises
10 | # more code, and (2) this setting will detect accidental use of unicode strings
11 | # for file/directory names when it should always be bytestrings.
12 | export PRETEND_UNICODE_ARGS=1
13 | 
14 | export TEST_SHELL_PATH=/bin/sh
15 | 
16 | failed=0
17 | 
18 | for t in t[0-9]*.sh
19 | do
20 |   printf '\n\n== %s ==\n' "$t"
21 |   bash $t "$@" || failed=$(($failed+1))
22 | done
23 | 
24 | if [ 0 -lt $failed ]
25 | then
26 |   exit 1
27 | fi
28 | 


--------------------------------------------------------------------------------
/t/t9390/basic:
--------------------------------------------------------------------------------
 1 | feature done
 2 | # Simple repo with three files, a merge where each side touches exactly one
 3 | # file, and a commit at the end touching all three.  Note that the original-oid
 4 | # directives are very fake, but make it easy to recognize what original shas
 5 | # are.
 6 | blob
 7 | mark :1
 8 | original-oid 0000000000000000000000000000000000000001
 9 | data 8
10 | initial
11 | 
12 | blob
13 | mark :2
14 | original-oid 0000000000000000000000000000000000000002
15 | data 8
16 | ten-mod
17 | 
18 | blob
19 | mark :3
20 | original-oid 0000000000000000000000000000000000000003
21 | data 11
22 | twenty-mod
23 | 
24 | blob
25 | mark :4
26 | original-oid 0000000000000000000000000000000000000004
27 | data 6
28 | final
29 | 
30 | reset refs/heads/master
31 | commit refs/heads/master
32 | mark :5
33 | original-oid 0000000000000000000000000000000000000009
34 | author Little O. Me <me@little.net> 1535228562 -0700
35 | committer Little O. Me <me@little.net> 1535228562 -0700
36 | data 8
37 | Initial
38 | M 100644 :1 filename
39 | M 100644 :1 ten
40 | M 100644 :1 twenty
41 | 
42 | commit refs/heads/B
43 | mark :6
44 | original-oid 000000000000000000000000000000000000000B
45 | author Little 'ol Me <me@laptop.(none)> 1535229544 -0700
46 | committer Little 'ol Me <me@laptop.(none)> 1535229544 -0700
47 | data 11
48 | add twenty
49 | from :5
50 | M 100644 :3 twenty
51 | 
52 | commit refs/heads/A
53 | mark :7
54 | original-oid 000000000000000000000000000000000000000A
55 | author Little O. Me <me@machine52.little.net> 1535229523 -0700
56 | committer Little O. Me <me@machine52.little.net> 1535229523 -0700
57 | data 8
58 | add ten
59 | from :5
60 | M 100644 :2 ten
61 | 
62 | commit refs/heads/master
63 | mark :8
64 | original-oid 000000000000000000000000000000000000000C
65 | author Lit.e Me <me@fire.com> 1535229559 -0700
66 | committer Lit.e Me <me@fire.com> 1535229580 -0700
67 | data 24
68 | Merge branch 'A' into B
69 | from :6
70 | merge :7
71 | M 100644 :2 ten
72 | 
73 | commit refs/heads/master
74 | mark :9
75 | original-oid 000000000000000000000000000000000000000D
76 | author Little Me <me@bigcompany.com> 1535229601 -0700
77 | committer Little Me <me@bigcompany.com> 1535229601 -0700
78 | data 9
79 | whatever
80 | from :8
81 | M 100644 :4 filename
82 | M 100644 :4 ten
83 | M 100644 :4 twenty
84 | 
85 | tag v1.0
86 | from :9
87 | original-oid 000000000000000000000000000000000000000E
88 | tagger Little John <second@merry.men> 1535229618 -0700
89 | data 5
90 | v1.0
91 | 
92 | reset refs/heads/master
93 | from :9
94 | 
95 | done
96 | 


--------------------------------------------------------------------------------
/t/t9390/basic-filename:
--------------------------------------------------------------------------------
 1 | feature done
 2 | blob
 3 | mark :1
 4 | data 8
 5 | initial
 6 | 
 7 | reset refs/heads/A
 8 | commit refs/heads/A
 9 | mark :2
10 | author Little O. Me <me@little.net> 1535228562 -0700
11 | committer Little O. Me <me@little.net> 1535228562 -0700
12 | data 8
13 | Initial
14 | M 100644 :1 filename
15 | 
16 | blob
17 | mark :3
18 | data 6
19 | final
20 | 
21 | commit refs/heads/master
22 | mark :4
23 | author Little Me <me@bigcompany.com> 1535229601 -0700
24 | committer Little Me <me@bigcompany.com> 1535229601 -0700
25 | data 9
26 | whatever
27 | from :2
28 | M 100644 :3 filename
29 | 
30 | reset refs/heads/B
31 | from :2
32 | 
33 | tag v1.0
34 | from :4
35 | tagger Little John <second@merry.men> 1535229618 -0700
36 | data 5
37 | v1.0
38 | 
39 | done
40 | 


--------------------------------------------------------------------------------
/t/t9390/basic-mailmap:
--------------------------------------------------------------------------------
 1 | feature done
 2 | blob
 3 | mark :1
 4 | data 8
 5 | initial
 6 | 
 7 | reset refs/heads/B
 8 | commit refs/heads/B
 9 | mark :2
10 | author Little 'ol Me <me@little.net> 1535228562 -0700
11 | committer Little 'ol Me <me@little.net> 1535228562 -0700
12 | data 8
13 | Initial
14 | M 100644 :1 filename
15 | M 100644 :1 ten
16 | M 100644 :1 twenty
17 | 
18 | blob
19 | mark :3
20 | data 11
21 | twenty-mod
22 | 
23 | commit refs/heads/B
24 | mark :4
25 | author Little 'ol Me <me@little.net> 1535229544 -0700
26 | committer Little 'ol Me <me@little.net> 1535229544 -0700
27 | data 11
28 | add twenty
29 | from :2
30 | M 100644 :3 twenty
31 | 
32 | blob
33 | mark :5
34 | data 8
35 | ten-mod
36 | 
37 | commit refs/heads/A
38 | mark :6
39 | author Little 'ol Me <me@little.net> 1535229523 -0700
40 | committer Little 'ol Me <me@little.net> 1535229523 -0700
41 | data 8
42 | add ten
43 | from :2
44 | M 100644 :5 ten
45 | 
46 | commit refs/heads/master
47 | mark :7
48 | author Little 'ol Me <me@little.net> 1535229559 -0700
49 | committer Little 'ol Me <me@little.net> 1535229580 -0700
50 | data 24
51 | Merge branch 'A' into B
52 | from :4
53 | merge :6
54 | M 100644 :5 ten
55 | 
56 | blob
57 | mark :8
58 | data 6
59 | final
60 | 
61 | commit refs/heads/master
62 | mark :9
63 | author Little 'ol Me <me@little.net> 1535229601 -0700
64 | committer Little 'ol Me <me@little.net> 1535229601 -0700
65 | data 9
66 | whatever
67 | from :7
68 | M 100644 :8 filename
69 | M 100644 :8 ten
70 | M 100644 :8 twenty
71 | 
72 | tag v1.0
73 | from :9
74 | tagger Little John <second@merry.men> 1535229618 -0700
75 | data 5
76 | v1.0
77 | 
78 | done
79 | 


--------------------------------------------------------------------------------
/t/t9390/basic-message:
--------------------------------------------------------------------------------
 1 | feature done
 2 | blob
 3 | mark :1
 4 | data 8
 5 | initial
 6 | 
 7 | reset refs/heads/B
 8 | commit refs/heads/B
 9 | mark :2
10 | author Little O. Me <me@little.net> 1535228562 -0700
11 | committer Little O. Me <me@little.net> 1535228562 -0700
12 | data 9
13 | Modified
14 | M 100644 :1 filename
15 | M 100644 :1 ten
16 | M 100644 :1 twenty
17 | 
18 | blob
19 | mark :3
20 | data 11
21 | twenty-mod
22 | 
23 | commit refs/heads/B
24 | mark :4
25 | author Little 'ol Me <me@laptop.(none)> 1535229544 -0700
26 | committer Little 'ol Me <me@laptop.(none)> 1535229544 -0700
27 | data 18
28 | add the number 20
29 | from :2
30 | M 100644 :3 twenty
31 | 
32 | blob
33 | mark :5
34 | data 8
35 | ten-mod
36 | 
37 | commit refs/heads/A
38 | mark :6
39 | author Little O. Me <me@machine52.little.net> 1535229523 -0700
40 | committer Little O. Me <me@machine52.little.net> 1535229523 -0700
41 | data 8
42 | add ten
43 | from :2
44 | M 100644 :5 ten
45 | 
46 | commit refs/heads/master
47 | mark :7
48 | author Lit.e Me <me@fire.com> 1535229559 -0700
49 | committer Lit.e Me <me@fire.com> 1535229580 -0700
50 | data 24
51 | Merge branch 'A' into B
52 | from :4
53 | merge :6
54 | M 100644 :5 ten
55 | 
56 | blob
57 | mark :8
58 | data 6
59 | final
60 | 
61 | commit refs/heads/master
62 | mark :9
63 | author Little Me <me@bigcompany.com> 1535229601 -0700
64 | committer Little Me <me@bigcompany.com> 1535229601 -0700
65 | data 9
66 | whatever
67 | from :7
68 | M 100644 :8 filename
69 | M 100644 :8 ten
70 | M 100644 :8 twenty
71 | 
72 | tag v1.0
73 | from :9
74 | tagger Little John <second@merry.men> 1535229618 -0700
75 | data 15
76 | version one :)
77 | 
78 | done
79 | 


--------------------------------------------------------------------------------
/t/t9390/basic-numbers:
--------------------------------------------------------------------------------
 1 | feature done
 2 | blob
 3 | mark :1
 4 | data 8
 5 | initial
 6 | 
 7 | reset refs/heads/B
 8 | commit refs/heads/B
 9 | mark :2
10 | author Little O. Me <me@little.net> 1535228562 -0700
11 | committer Little O. Me <me@little.net> 1535228562 -0700
12 | data 8
13 | Initial
14 | M 100644 :1 ten
15 | M 100644 :1 twenty
16 | 
17 | blob
18 | mark :3
19 | data 11
20 | twenty-mod
21 | 
22 | commit refs/heads/B
23 | mark :4
24 | author Little 'ol Me <me@laptop.(none)> 1535229544 -0700
25 | committer Little 'ol Me <me@laptop.(none)> 1535229544 -0700
26 | data 11
27 | add twenty
28 | from :2
29 | M 100644 :3 twenty
30 | 
31 | blob
32 | mark :5
33 | data 8
34 | ten-mod
35 | 
36 | commit refs/heads/A
37 | mark :6
38 | author Little O. Me <me@machine52.little.net> 1535229523 -0700
39 | committer Little O. Me <me@machine52.little.net> 1535229523 -0700
40 | data 8
41 | add ten
42 | from :2
43 | M 100644 :5 ten
44 | 
45 | commit refs/heads/master
46 | mark :7
47 | author Lit.e Me <me@fire.com> 1535229559 -0700
48 | committer Lit.e Me <me@fire.com> 1535229580 -0700
49 | data 24
50 | Merge branch 'A' into B
51 | from :4
52 | merge :6
53 | M 100644 :5 ten
54 | 
55 | blob
56 | mark :8
57 | data 6
58 | final
59 | 
60 | commit refs/heads/master
61 | mark :9
62 | author Little Me <me@bigcompany.com> 1535229601 -0700
63 | committer Little Me <me@bigcompany.com> 1535229601 -0700
64 | data 9
65 | whatever
66 | from :7
67 | M 100644 :8 ten
68 | M 100644 :8 twenty
69 | 
70 | tag v1.0
71 | from :9
72 | tagger Little John <second@merry.men> 1535229618 -0700
73 | data 5
74 | v1.0
75 | 
76 | done
77 | 


--------------------------------------------------------------------------------
/t/t9390/basic-replace:
--------------------------------------------------------------------------------
 1 | feature done
 2 | blob
 3 | mark :1
 4 | data 8
 5 | initial
 6 | 
 7 | reset refs/heads/B
 8 | commit refs/heads/B
 9 | mark :2
10 | author Little O. Me <me@little.net> 1535228562 -0700
11 | committer Little O. Me <me@little.net> 1535228562 -0700
12 | data 8
13 | Initial
14 | M 100644 :1 filename
15 | M 100644 :1 ten
16 | M 100644 :1 twenty
17 | 
18 | blob
19 | mark :3
20 | data 28
21 | twenty-modified-by-gremlins
22 | 
23 | commit refs/heads/B
24 | mark :4
25 | author Little 'ol Me <me@laptop.(none)> 1535229544 -0700
26 | committer Little 'ol Me <me@laptop.(none)> 1535229544 -0700
27 | data 11
28 | add twenty
29 | from :2
30 | M 100644 :3 twenty
31 | 
32 | blob
33 | mark :5
34 | data 25
35 | ten-modified-by-gremlins
36 | 
37 | commit refs/heads/A
38 | mark :6
39 | author Little O. Me <me@machine52.little.net> 1535229523 -0700
40 | committer Little O. Me <me@machine52.little.net> 1535229523 -0700
41 | data 8
42 | add ten
43 | from :2
44 | M 100644 :5 ten
45 | 
46 | commit refs/heads/master
47 | mark :7
48 | author Lit.e Me <me@fire.com> 1535229559 -0700
49 | committer Lit.e Me <me@fire.com> 1535229580 -0700
50 | data 24
51 | Merge branch 'A' into B
52 | from :4
53 | merge :6
54 | M 100644 :5 ten
55 | 
56 | blob
57 | mark :8
58 | data 6
59 | final
60 | 
61 | commit refs/heads/master
62 | mark :9
63 | author Little Me <me@bigcompany.com> 1535229601 -0700
64 | committer Little Me <me@bigcompany.com> 1535229601 -0700
65 | data 9
66 | whatever
67 | from :7
68 | M 100644 :8 filename
69 | M 100644 :8 ten
70 | M 100644 :8 twenty
71 | 
72 | tag v1.0
73 | from :9
74 | tagger Little John <second@merry.men> 1535229618 -0700
75 | data 5
76 | v1.0
77 | 
78 | done
79 | 


--------------------------------------------------------------------------------
/t/t9390/basic-ten:
--------------------------------------------------------------------------------
 1 | feature done
 2 | blob
 3 | mark :1
 4 | data 8
 5 | initial
 6 | 
 7 | reset refs/heads/B
 8 | commit refs/heads/B
 9 | mark :2
10 | author Little O. Me <me@little.net> 1535228562 -0700
11 | committer Little O. Me <me@little.net> 1535228562 -0700
12 | data 8
13 | Initial
14 | M 100644 :1 ten
15 | 
16 | blob
17 | mark :3
18 | data 8
19 | ten-mod
20 | 
21 | commit refs/heads/A
22 | mark :4
23 | author Little O. Me <me@machine52.little.net> 1535229523 -0700
24 | committer Little O. Me <me@machine52.little.net> 1535229523 -0700
25 | data 8
26 | add ten
27 | from :2
28 | M 100644 :3 ten
29 | 
30 | blob
31 | mark :5
32 | data 6
33 | final
34 | 
35 | commit refs/heads/master
36 | mark :6
37 | author Little Me <me@bigcompany.com> 1535229601 -0700
38 | committer Little Me <me@bigcompany.com> 1535229601 -0700
39 | data 9
40 | whatever
41 | from :4
42 | M 100644 :5 ten
43 | 
44 | tag v1.0
45 | from :6
46 | tagger Little John <second@merry.men> 1535229618 -0700
47 | data 5
48 | v1.0
49 | 
50 | done
51 | 


--------------------------------------------------------------------------------
/t/t9390/basic-twenty:
--------------------------------------------------------------------------------
 1 | feature done
 2 | blob
 3 | mark :1
 4 | data 8
 5 | initial
 6 | 
 7 | reset refs/heads/A
 8 | commit refs/heads/A
 9 | mark :2
10 | author Little O. Me <me@little.net> 1535228562 -0700
11 | committer Little O. Me <me@little.net> 1535228562 -0700
12 | data 8
13 | Initial
14 | M 100644 :1 twenty
15 | 
16 | blob
17 | mark :3
18 | data 11
19 | twenty-mod
20 | 
21 | commit refs/heads/B
22 | mark :4
23 | author Little 'ol Me <me@laptop.(none)> 1535229544 -0700
24 | committer Little 'ol Me <me@laptop.(none)> 1535229544 -0700
25 | data 11
26 | add twenty
27 | from :2
28 | M 100644 :3 twenty
29 | 
30 | blob
31 | mark :5
32 | data 6
33 | final
34 | 
35 | commit refs/heads/master
36 | mark :6
37 | author Little Me <me@bigcompany.com> 1535229601 -0700
38 | committer Little Me <me@bigcompany.com> 1535229601 -0700
39 | data 9
40 | whatever
41 | from :4
42 | M 100644 :5 twenty
43 | 
44 | tag v1.0
45 | from :6
46 | tagger Little John <second@merry.men> 1535229618 -0700
47 | data 5
48 | v1.0
49 | 
50 | done
51 | 


--------------------------------------------------------------------------------
/t/t9390/degenerate:
--------------------------------------------------------------------------------
  1 | feature done
  2 | # Simple repo with only three files, with a bunch of cases of dealing with
  3 | # topology changes possibly causing merge commits to need to be pruned.
  4 | #
  5 | # As with case1, the original-oid directives are very fake, but if an error
  6 | # is hit that shows one of these, it makes it really easy to know where it
  7 | # came from.
  8 | #
  9 | # Expressed with shorthand, log history in the format
 10 | #     Commit Name(Parent(s)): files changed
 11 | # for this case looks like the following:
 12 | #     W(V): moduleA/keepme
 13 | #     V(U,U): moduleB/nukeme
 14 | #     U(T): moduleA/sometimes
 15 | #     T(S): moduleA/keepme
 16 | #     S(R,R): moduleA/sometimes
 17 | #     R(R): moduleB/nukeme
 18 | #     Q(P): moduleA/keepme
 19 | #     P(N,M): moduleA/sometimes
 20 | #     O(M,N): moduleA/sometimes
 21 | #     N(C): moduleB/nukeme
 22 | #     M(L): moduleB/nukeme
 23 | #     L(K): moduleA/keepme
 24 | #     K(J): moduleB/nukeme
 25 | #     J(D,H): moduleA/sometimes
 26 | #     I(H,D): moduleA/sometimes  # backwards-ish merge
 27 | #     H(G): moduleB/nukeme
 28 | #     G(F): moduleA/keepme
 29 | #     F(D): moduleB/nukeme
 30 | #     D(B,C): moduleA/sometimes
 31 | #     C(A): moduleB/nukeme
 32 | #     B(A): moduleB/nukeme
 33 | #     A(): moduleA/keepme
 34 | #
 35 | # This involved case is intended to test the following:
 36 | #   * Merge becoming non-merge due to both parents becoming same commit
 37 | #     * Two sub-cases: it has changes of its own, or it doesn't
 38 | #   * Merge becoming merge of commit with its own ancestor
 39 | #     * Two cases: and it has changes, and it doesn't have changes
 40 | #     * Two cases: first parent is the ancestor, second parent is the ancestor
 41 | #   * Merge starting as merge of commit with its own ancestor
 42 | #     * Two cases: has changes, doesn't have changes
 43 | #     * Two cases: first parent, or second parent
 44 | blob
 45 | mark :1
 46 | original-oid 0000000000000000000000000000000000000001
 47 | data 10
 48 | keepme v1
 49 | 
 50 | blob
 51 | mark :2
 52 | original-oid 0000000000000000000000000000000000000002
 53 | data 10
 54 | nukeme v1
 55 | 
 56 | blob
 57 | mark :3
 58 | original-oid 0000000000000000000000000000000000000003
 59 | data 10
 60 | nukeme v2
 61 | 
 62 | blob
 63 | mark :4
 64 | original-oid 0000000000000000000000000000000000000004
 65 | data 13
 66 | sometimes v1
 67 | 
 68 | blob
 69 | mark :5
 70 | original-oid 0000000000000000000000000000000000000005
 71 | data 10
 72 | nukeme v3
 73 | 
 74 | blob
 75 | mark :6
 76 | original-oid 0000000000000000000000000000000000000006
 77 | data 10
 78 | keepme v2
 79 | 
 80 | blob
 81 | mark :7
 82 | original-oid 0000000000000000000000000000000000000007
 83 | data 10
 84 | nukem4 v4
 85 | 
 86 | blob
 87 | mark :8
 88 | original-oid 0000000000000000000000000000000000000008
 89 | data 13
 90 | sometimes v2
 91 | 
 92 | blob
 93 | mark :9
 94 | original-oid 0000000000000000000000000000000000000009
 95 | data 13
 96 | sometimes v3
 97 | 
 98 | blob
 99 | mark :10
100 | original-oid 000000000000000000000000000000000000000A
101 | data 10
102 | nukeme v4
103 | 
104 | blob
105 | mark :11
106 | original-oid 000000000000000000000000000000000000000B
107 | data 10
108 | keepme v3
109 | 
110 | blob
111 | mark :12
112 | original-oid 000000000000000000000000000000000000000C
113 | data 10
114 | nukeme v5
115 | 
116 | blob
117 | mark :13
118 | original-oid 000000000000000000000000000000000000000D
119 | data 10
120 | nukeme v6
121 | 
122 | blob
123 | mark :14
124 | original-oid 000000000000000000000000000000000000000E
125 | data 13
126 | sometimes v4
127 | 
128 | blob
129 | mark :15
130 | original-oid 000000000000000000000000000000000000000F
131 | data 13
132 | sometimes v5
133 | 
134 | blob
135 | mark :16
136 | original-oid 0000000000000000000000000000000000000010
137 | data 10
138 | keepme v4
139 | 
140 | blob
141 | mark :17
142 | original-oid 0000000000000000000000000000000000000011
143 | data 10
144 | nukeme v7
145 | 
146 | blob
147 | mark :18
148 | original-oid 0000000000000000000000000000000000000012
149 | data 13
150 | sometimes v6
151 | 
152 | blob
153 | mark :19
154 | original-oid 0000000000000000000000000000000000000013
155 | data 10
156 | keepme v5
157 | 
158 | blob
159 | mark :20
160 | original-oid 0000000000000000000000000000000000000014
161 | data 13
162 | sometimes v7
163 | 
164 | blob
165 | mark :21
166 | original-oid 0000000000000000000000000000000000000015
167 | data 10
168 | nukeme v8
169 | 
170 | blob
171 | mark :22
172 | original-oid 0000000000000000000000000000000000000016
173 | data 10
174 | keepme v6
175 | 
176 | commit refs/heads/master
177 | mark :26
178 | original-oid 0000000000000000000000000000000000000020
179 | author Full Name <user@organization.tld> 2000000000 +0100
180 | committer Full Name <user@organization.tld> 2000000000 +0100
181 | data 2
182 | A
183 | M 100644 :1 moduleA/keepme
184 | 
185 | commit refs/heads/master
186 | mark :27
187 | original-oid 0000000000000000000000000000000000000021
188 | author Full Name <user@organization.tld> 2000010000 +0100
189 | committer Full Name <user@organization.tld> 2000010000 +0100
190 | data 2
191 | B
192 | from :26
193 | M 100644 :2 moduleB/nukeme
194 | 
195 | commit refs/heads/master
196 | mark :28
197 | original-oid 0000000000000000000000000000000000000022
198 | author Full Name <user@organization.tld> 2000020000 +0100
199 | committer Full Name <user@organization.tld> 2000020000 +0100
200 | data 2
201 | C
202 | from :26
203 | M 100644 :3 moduleB/nukeme
204 | 
205 | commit refs/heads/master
206 | mark :29
207 | original-oid 0000000000000000000000000000000000000023
208 | author Full Name <user@organization.tld> 2000030000 +0100
209 | committer Full Name <user@organization.tld> 2000030000 +0100
210 | data 29
211 | D: Merge commit 'C' into 'B'
212 | from :27
213 | merge :28
214 | M 100644 :4 moduleA/sometimes
215 | 
216 | commit refs/heads/master
217 | mark :30
218 | original-oid 0000000000000000000000000000000000000024
219 | author Full Name <user@organization.tld> 2000040000 +0100
220 | committer Full Name <user@organization.tld> 2000040000 +0100
221 | data 2
222 | F
223 | from :29
224 | M 100644 :5 moduleB/nukeme
225 | 
226 | commit refs/heads/master
227 | mark :31
228 | original-oid 0000000000000000000000000000000000000025
229 | author Full Name <user@organization.tld> 2000050000 +0100
230 | committer Full Name <user@organization.tld> 2000050000 +0100
231 | data 2
232 | G
233 | from :30
234 | M 100644 :6 moduleA/keepme
235 | 
236 | commit refs/heads/master
237 | mark :32
238 | original-oid 0000000000000000000000000000000000000026
239 | author Full Name <user@organization.tld> 2000060000 +0100
240 | committer Full Name <user@organization.tld> 2000060000 +0100
241 | data 2
242 | H
243 | from :31
244 | M 100644 :7 moduleB/nukeme
245 | 
246 | commit refs/heads/branchI
247 | mark :33
248 | original-oid 0000000000000000000000000000000000000027
249 | author Full Name <user@organization.tld> 2000070000 +0100
250 | committer Full Name <user@organization.tld> 2000070000 +0100
251 | data 29
252 | I: Merge commit 'D' into 'H'
253 | from :32
254 | merge :29
255 | M 100644 :8 moduleA/sometimes
256 | 
257 | commit refs/heads/master
258 | mark :34
259 | original-oid 0000000000000000000000000000000000000028
260 | author Full Name <user@organization.tld> 2000080000 +0100
261 | committer Full Name <user@organization.tld> 2000080000 +0100
262 | data 29
263 | J: Merge commit 'H' into 'D'
264 | from :29
265 | merge :32
266 | M 100644 :9 moduleA/sometimes
267 | 
268 | commit refs/heads/master
269 | mark :35
270 | original-oid 0000000000000000000000000000000000000029
271 | author Full Name <user@organization.tld> 2000090000 +0100
272 | committer Full Name <user@organization.tld> 2000090000 +0100
273 | data 2
274 | K
275 | from :34
276 | M 100644 :10 moduleB/nukeme
277 | 
278 | commit refs/heads/master
279 | mark :36
280 | original-oid 000000000000000000000000000000000000002A
281 | author Full Name <user@organization.tld> 2000092000 +0100
282 | committer Full Name <user@organization.tld> 2000092000 +0100
283 | data 2
284 | L
285 | from :35
286 | M 100644 :11 moduleA/keepme
287 | 
288 | commit refs/heads/master
289 | mark :37
290 | original-oid 000000000000000000000000000000000000002B
291 | author Full Name <user@organization.tld> 2000094000 +0100
292 | committer Full Name <user@organization.tld> 2000094000 +0100
293 | data 2
294 | M
295 | from :36
296 | M 100644 :12 moduleB/nukeme
297 | 
298 | commit refs/heads/master
299 | mark :38
300 | original-oid 000000000000000000000000000000000000002C
301 | author Full Name <user@organization.tld> 2000096000 +0100
302 | committer Full Name <user@organization.tld> 2000096000 +0100
303 | data 2
304 | N
305 | from :28
306 | M 100644 :13 moduleB/nukeme
307 | 
308 | commit refs/heads/branchO
309 | mark :39
310 | original-oid 000000000000000000000000000000000000002D
311 | author Full Name <user@organization.tld> 2000098000 +0100
312 | committer Full Name <user@organization.tld> 2000098000 +0100
313 | data 29
314 | O: Merge commit 'N' into 'M'
315 | from :37
316 | merge :38
317 | D moduleA/sometimes
318 | 
319 | commit refs/heads/master
320 | mark :40
321 | original-oid 000000000000000000000000000000000000002E
322 | author Full Name <user@organization.tld> 2000099000 +0100
323 | committer Full Name <user@organization.tld> 2000099000 +0100
324 | data 29
325 | P: Merge commit 'M' into 'N'
326 | from :38
327 | merge :37
328 | M 100644 :15 moduleA/sometimes
329 | 
330 | commit refs/heads/master
331 | mark :41
332 | original-oid 0000000000000000000000000000000000000030
333 | author Full Name <user@organization.tld> 3000000000 +0100
334 | committer Full Name <user@organization.tld> 3000000000 +0100
335 | data 2
336 | Q
337 | from :40
338 | M 100644 :16 moduleA/keepme
339 | 
340 | commit refs/heads/master
341 | mark :42
342 | original-oid 0000000000000000000000000000000000000031
343 | author Full Name <user@organization.tld> 3000010000 +0100
344 | committer Full Name <user@organization.tld> 3000010000 +0100
345 | data 2
346 | R
347 | from :41
348 | M 100644 :17 moduleB/nukeme
349 | 
350 | commit refs/heads/master
351 | mark :43
352 | original-oid 0000000000000000000000000000000000000032
353 | author Full Name <user@organization.tld> 3000020000 +0100
354 | committer Full Name <user@organization.tld> 3000020000 +0100
355 | data 29
356 | S: Merge commit 'R' into 'R'
357 | from :42
358 | merge :42
359 | M 100644 :18 moduleA/sometimes
360 | 
361 | commit refs/heads/master
362 | mark :44
363 | original-oid 0000000000000000000000000000000000000033
364 | author Full Name <user@organization.tld> 3000030000 +0100
365 | committer Full Name <user@organization.tld> 3000030000 +0100
366 | data 2
367 | T
368 | from :43
369 | M 100644 :19 moduleA/keepme
370 | 
371 | commit refs/heads/master
372 | mark :45
373 | original-oid 0000000000000000000000000000000000000034
374 | author Full Name <user@organization.tld> 3000040000 +0100
375 | committer Full Name <user@organization.tld> 3000040000 +0100
376 | data 2
377 | U
378 | from :44
379 | M 100644 :20 moduleA/sometimes
380 | 
381 | commit refs/heads/master
382 | mark :46
383 | original-oid 0000000000000000000000000000000000000035
384 | author Full Name <user@organization.tld> 3000050000 +0100
385 | committer Full Name <user@organization.tld> 3000050000 +0100
386 | data 29
387 | V: Merge commit 'U' into 'U'
388 | from :45
389 | merge :45
390 | M 100644 :21 moduleB/nukeme
391 | 
392 | commit refs/heads/master
393 | mark :47
394 | original-oid 0000000000000000000000000000000000000036
395 | author Full Name <user@organization.tld> 3000060000 +0100
396 | committer Full Name <user@organization.tld> 3000060000 +0100
397 | data 2
398 | W
399 | from :46
400 | M 100644 :22 moduleA/keepme
401 | 
402 | done
403 | 


--------------------------------------------------------------------------------
/t/t9390/degenerate-evil-merge:
--------------------------------------------------------------------------------
 1 | feature done
 2 | blob
 3 | mark :1
 4 | data 0
 5 | 
 6 | reset refs/heads/master
 7 | commit refs/heads/master
 8 | mark :2
 9 | author Full Name <user@organization.tld> 2000000000 +0100
10 | committer Full Name <user@organization.tld> 2000000000 +0100
11 | data 2
12 | A
13 | M 100644 :1 irrelevant
14 | M 100644 :1 module-of-interest/file1
15 | M 100644 :1 module-of-interest/file2
16 | M 100644 :1 module-of-interest/file3
17 | M 100644 :1 other-module/files
18 | M 100644 :1 other-module/are
19 | M 100644 :1 other-module/fun
20 | 
21 | commit refs/heads/master
22 | mark :3
23 | author Full Name <user@organization.tld> 2000030000 +0100
24 | committer Full Name <user@organization.tld> 2000030000 +0100
25 | data 2
26 | B
27 | from :2
28 | D irrelevant
29 | D module-of-interest/file1
30 | D module-of-interest/file2
31 | D module-of-interest/file3
32 | 
33 | blob
34 | mark :4
35 | data 8
36 | content
37 | 
38 | commit refs/heads/master
39 | mark :5
40 | author Full Name <user@organization.tld> 2000040000 +0100
41 | committer Full Name <user@organization.tld> 2000040000 +0100
42 | data 2
43 | D
44 | from :3
45 | M 100644 :4 other-module/fun
46 | 
47 | commit refs/heads/master
48 | mark :6
49 | author Full Name <user@organization.tld> 2000020000 +0100
50 | committer Full Name <user@organization.tld> 2000020000 +0100
51 | data 2
52 | C
53 | from :2
54 | M 100644 :4 irrelevant
55 | 
56 | commit refs/heads/master
57 | mark :7
58 | author Full Name <user@organization.tld> 2000050000 +0100
59 | committer Full Name <user@organization.tld> 2000050000 +0100
60 | data 31
61 | Merge and ignore the deletions
62 | from :6
63 | merge :5
64 | M 100644 :4 irrelevant
65 | M 100644 :4 other-module/fun
66 | 
67 | blob
68 | mark :8
69 | data 6
70 | final
71 | 
72 | commit refs/heads/master
73 | mark :7
74 | author Full Name <user@organization.tld> 2000060000 +0100
75 | committer Full Name <user@organization.tld> 2000060000 +0100
76 | data 13
77 | Final change
78 | from :7
79 | M 100644 :8 module-of-interest/file2
80 | 
81 | done
82 | 


--------------------------------------------------------------------------------
/t/t9390/degenerate-globme:
--------------------------------------------------------------------------------
  1 | feature done
  2 | blob
  3 | mark :1
  4 | data 10
  5 | keepme v1
  6 | 
  7 | reset refs/heads/master
  8 | commit refs/heads/master
  9 | mark :2
 10 | author Full Name <user@organization.tld> 2000000000 +0100
 11 | committer Full Name <user@organization.tld> 2000000000 +0100
 12 | data 2
 13 | A
 14 | M 100644 :1 moduleA/keepme
 15 | 
 16 | blob
 17 | mark :3
 18 | data 10
 19 | nukeme v1
 20 | 
 21 | commit refs/heads/master
 22 | mark :4
 23 | author Full Name <user@organization.tld> 2000010000 +0100
 24 | committer Full Name <user@organization.tld> 2000010000 +0100
 25 | data 2
 26 | B
 27 | from :2
 28 | M 100644 :3 moduleB/nukeme
 29 | 
 30 | blob
 31 | mark :5
 32 | data 10
 33 | nukeme v2
 34 | 
 35 | commit refs/heads/master
 36 | mark :6
 37 | author Full Name <user@organization.tld> 2000020000 +0100
 38 | committer Full Name <user@organization.tld> 2000020000 +0100
 39 | data 2
 40 | C
 41 | from :2
 42 | M 100644 :5 moduleB/nukeme
 43 | 
 44 | commit refs/heads/master
 45 | mark :7
 46 | author Full Name <user@organization.tld> 2000030000 +0100
 47 | committer Full Name <user@organization.tld> 2000030000 +0100
 48 | data 29
 49 | D: Merge commit 'C' into 'B'
 50 | from :4
 51 | merge :6
 52 | 
 53 | blob
 54 | mark :8
 55 | data 10
 56 | nukeme v3
 57 | 
 58 | commit refs/heads/master
 59 | mark :9
 60 | author Full Name <user@organization.tld> 2000040000 +0100
 61 | committer Full Name <user@organization.tld> 2000040000 +0100
 62 | data 2
 63 | F
 64 | from :7
 65 | M 100644 :8 moduleB/nukeme
 66 | 
 67 | blob
 68 | mark :10
 69 | data 10
 70 | keepme v2
 71 | 
 72 | commit refs/heads/master
 73 | mark :11
 74 | author Full Name <user@organization.tld> 2000050000 +0100
 75 | committer Full Name <user@organization.tld> 2000050000 +0100
 76 | data 2
 77 | G
 78 | from :9
 79 | M 100644 :10 moduleA/keepme
 80 | 
 81 | blob
 82 | mark :12
 83 | data 10
 84 | nukem4 v4
 85 | 
 86 | commit refs/heads/master
 87 | mark :13
 88 | author Full Name <user@organization.tld> 2000060000 +0100
 89 | committer Full Name <user@organization.tld> 2000060000 +0100
 90 | data 2
 91 | H
 92 | from :11
 93 | M 100644 :12 moduleB/nukeme
 94 | 
 95 | commit refs/heads/branchI
 96 | mark :14
 97 | author Full Name <user@organization.tld> 2000070000 +0100
 98 | committer Full Name <user@organization.tld> 2000070000 +0100
 99 | data 29
100 | I: Merge commit 'D' into 'H'
101 | from :13
102 | merge :7
103 | 
104 | commit refs/heads/master
105 | mark :15
106 | author Full Name <user@organization.tld> 2000080000 +0100
107 | committer Full Name <user@organization.tld> 2000080000 +0100
108 | data 29
109 | J: Merge commit 'H' into 'D'
110 | from :7
111 | merge :13
112 | 
113 | blob
114 | mark :16
115 | data 10
116 | nukeme v4
117 | 
118 | commit refs/heads/master
119 | mark :17
120 | author Full Name <user@organization.tld> 2000090000 +0100
121 | committer Full Name <user@organization.tld> 2000090000 +0100
122 | data 2
123 | K
124 | from :15
125 | M 100644 :16 moduleB/nukeme
126 | 
127 | blob
128 | mark :18
129 | data 10
130 | keepme v3
131 | 
132 | commit refs/heads/master
133 | mark :19
134 | author Full Name <user@organization.tld> 2000092000 +0100
135 | committer Full Name <user@organization.tld> 2000092000 +0100
136 | data 2
137 | L
138 | from :17
139 | M 100644 :18 moduleA/keepme
140 | 
141 | blob
142 | mark :20
143 | data 10
144 | nukeme v5
145 | 
146 | commit refs/heads/master
147 | mark :21
148 | author Full Name <user@organization.tld> 2000094000 +0100
149 | committer Full Name <user@organization.tld> 2000094000 +0100
150 | data 2
151 | M
152 | from :19
153 | M 100644 :20 moduleB/nukeme
154 | 
155 | blob
156 | mark :22
157 | data 10
158 | nukeme v6
159 | 
160 | commit refs/heads/master
161 | mark :23
162 | author Full Name <user@organization.tld> 2000096000 +0100
163 | committer Full Name <user@organization.tld> 2000096000 +0100
164 | data 2
165 | N
166 | from :6
167 | M 100644 :22 moduleB/nukeme
168 | 
169 | commit refs/heads/branchO
170 | mark :24
171 | author Full Name <user@organization.tld> 2000098000 +0100
172 | committer Full Name <user@organization.tld> 2000098000 +0100
173 | data 29
174 | O: Merge commit 'N' into 'M'
175 | from :21
176 | merge :23
177 | 
178 | commit refs/heads/master
179 | mark :25
180 | author Full Name <user@organization.tld> 2000099000 +0100
181 | committer Full Name <user@organization.tld> 2000099000 +0100
182 | data 29
183 | P: Merge commit 'M' into 'N'
184 | from :23
185 | merge :21
186 | 
187 | blob
188 | mark :26
189 | data 10
190 | keepme v4
191 | 
192 | commit refs/heads/master
193 | mark :27
194 | author Full Name <user@organization.tld> 3000000000 +0100
195 | committer Full Name <user@organization.tld> 3000000000 +0100
196 | data 2
197 | Q
198 | from :25
199 | M 100644 :26 moduleA/keepme
200 | 
201 | blob
202 | mark :28
203 | data 10
204 | nukeme v7
205 | 
206 | commit refs/heads/master
207 | mark :29
208 | author Full Name <user@organization.tld> 3000010000 +0100
209 | committer Full Name <user@organization.tld> 3000010000 +0100
210 | data 2
211 | R
212 | from :27
213 | M 100644 :28 moduleB/nukeme
214 | 
215 | commit refs/heads/master
216 | mark :30
217 | author Full Name <user@organization.tld> 3000020000 +0100
218 | committer Full Name <user@organization.tld> 3000020000 +0100
219 | data 29
220 | S: Merge commit 'R' into 'R'
221 | from :29
222 | merge :29
223 | 
224 | blob
225 | mark :31
226 | data 10
227 | keepme v5
228 | 
229 | commit refs/heads/master
230 | mark :32
231 | author Full Name <user@organization.tld> 3000030000 +0100
232 | committer Full Name <user@organization.tld> 3000030000 +0100
233 | data 2
234 | T
235 | from :30
236 | M 100644 :31 moduleA/keepme
237 | 
238 | blob
239 | mark :33
240 | data 10
241 | nukeme v8
242 | 
243 | commit refs/heads/master
244 | mark :34
245 | author Full Name <user@organization.tld> 3000050000 +0100
246 | committer Full Name <user@organization.tld> 3000050000 +0100
247 | data 29
248 | V: Merge commit 'U' into 'U'
249 | from :32
250 | merge :32
251 | M 100644 :33 moduleB/nukeme
252 | 
253 | blob
254 | mark :35
255 | data 10
256 | keepme v6
257 | 
258 | commit refs/heads/master
259 | mark :36
260 | author Full Name <user@organization.tld> 3000060000 +0100
261 | committer Full Name <user@organization.tld> 3000060000 +0100
262 | data 2
263 | W
264 | from :34
265 | M 100644 :35 moduleA/keepme
266 | 
267 | done
268 | 


--------------------------------------------------------------------------------
/t/t9390/degenerate-keepme:
--------------------------------------------------------------------------------
  1 | feature done
  2 | blob
  3 | mark :1
  4 | data 10
  5 | keepme v1
  6 | 
  7 | reset refs/heads/branchO
  8 | commit refs/heads/branchO
  9 | mark :2
 10 | author Full Name <user@organization.tld> 2000000000 +0100
 11 | committer Full Name <user@organization.tld> 2000000000 +0100
 12 | data 2
 13 | A
 14 | M 100644 :1 moduleA/keepme
 15 | 
 16 | blob
 17 | mark :3
 18 | data 10
 19 | keepme v2
 20 | 
 21 | commit refs/heads/branchO
 22 | mark :4
 23 | author Full Name <user@organization.tld> 2000050000 +0100
 24 | committer Full Name <user@organization.tld> 2000050000 +0100
 25 | data 2
 26 | G
 27 | from :2
 28 | M 100644 :3 moduleA/keepme
 29 | 
 30 | commit refs/heads/branchI
 31 | mark :5
 32 | author Full Name <user@organization.tld> 2000070000 +0100
 33 | committer Full Name <user@organization.tld> 2000070000 +0100
 34 | data 29
 35 | I: Merge commit 'D' into 'H'
 36 | from :4
 37 | merge :2
 38 | 
 39 | commit refs/heads/branchO
 40 | mark :6
 41 | author Full Name <user@organization.tld> 2000080000 +0100
 42 | committer Full Name <user@organization.tld> 2000080000 +0100
 43 | data 29
 44 | J: Merge commit 'H' into 'D'
 45 | from :2
 46 | merge :4
 47 | 
 48 | blob
 49 | mark :7
 50 | data 10
 51 | keepme v3
 52 | 
 53 | commit refs/heads/branchO
 54 | mark :8
 55 | author Full Name <user@organization.tld> 2000092000 +0100
 56 | committer Full Name <user@organization.tld> 2000092000 +0100
 57 | data 2
 58 | L
 59 | from :6
 60 | M 100644 :7 moduleA/keepme
 61 | 
 62 | blob
 63 | mark :9
 64 | data 10
 65 | keepme v4
 66 | 
 67 | commit refs/heads/master
 68 | mark :10
 69 | author Full Name <user@organization.tld> 3000000000 +0100
 70 | committer Full Name <user@organization.tld> 3000000000 +0100
 71 | data 2
 72 | Q
 73 | from :8
 74 | M 100644 :9 moduleA/keepme
 75 | 
 76 | blob
 77 | mark :11
 78 | data 10
 79 | keepme v5
 80 | 
 81 | commit refs/heads/master
 82 | mark :12
 83 | author Full Name <user@organization.tld> 3000030000 +0100
 84 | committer Full Name <user@organization.tld> 3000030000 +0100
 85 | data 2
 86 | T
 87 | from :10
 88 | M 100644 :11 moduleA/keepme
 89 | 
 90 | blob
 91 | mark :13
 92 | data 10
 93 | keepme v6
 94 | 
 95 | commit refs/heads/master
 96 | mark :14
 97 | author Full Name <user@organization.tld> 3000060000 +0100
 98 | committer Full Name <user@organization.tld> 3000060000 +0100
 99 | data 2
100 | W
101 | from :12
102 | M 100644 :13 moduleA/keepme
103 | 
104 | done
105 | 


--------------------------------------------------------------------------------
/t/t9390/degenerate-keepme-noff:
--------------------------------------------------------------------------------
  1 | feature done
  2 | blob
  3 | mark :1
  4 | data 10
  5 | keepme v1
  6 | 
  7 | reset refs/heads/master
  8 | commit refs/heads/master
  9 | mark :2
 10 | author Full Name <user@organization.tld> 2000000000 +0100
 11 | committer Full Name <user@organization.tld> 2000000000 +0100
 12 | data 2
 13 | A
 14 | M 100644 :1 moduleA/keepme
 15 | 
 16 | blob
 17 | mark :3
 18 | data 10
 19 | keepme v2
 20 | 
 21 | commit refs/heads/branchO
 22 | mark :4
 23 | author Full Name <user@organization.tld> 2000050000 +0100
 24 | committer Full Name <user@organization.tld> 2000050000 +0100
 25 | data 2
 26 | G
 27 | from :2
 28 | M 100644 :3 moduleA/keepme
 29 | 
 30 | commit refs/heads/branchI
 31 | mark :5
 32 | author Full Name <user@organization.tld> 2000070000 +0100
 33 | committer Full Name <user@organization.tld> 2000070000 +0100
 34 | data 29
 35 | I: Merge commit 'D' into 'H'
 36 | from :4
 37 | merge :2
 38 | 
 39 | commit refs/heads/branchO
 40 | mark :6
 41 | author Full Name <user@organization.tld> 2000080000 +0100
 42 | committer Full Name <user@organization.tld> 2000080000 +0100
 43 | data 29
 44 | J: Merge commit 'H' into 'D'
 45 | from :2
 46 | merge :4
 47 | 
 48 | blob
 49 | mark :7
 50 | data 10
 51 | keepme v3
 52 | 
 53 | commit refs/heads/branchO
 54 | mark :8
 55 | author Full Name <user@organization.tld> 2000092000 +0100
 56 | committer Full Name <user@organization.tld> 2000092000 +0100
 57 | data 2
 58 | L
 59 | from :6
 60 | M 100644 :7 moduleA/keepme
 61 | 
 62 | commit refs/heads/master
 63 | mark :9
 64 | author Full Name <user@organization.tld> 2000099000 +0100
 65 | committer Full Name <user@organization.tld> 2000099000 +0100
 66 | data 29
 67 | P: Merge commit 'M' into 'N'
 68 | from :2
 69 | merge :8
 70 | 
 71 | blob
 72 | mark :10
 73 | data 10
 74 | keepme v4
 75 | 
 76 | commit refs/heads/master
 77 | mark :11
 78 | author Full Name <user@organization.tld> 3000000000 +0100
 79 | committer Full Name <user@organization.tld> 3000000000 +0100
 80 | data 2
 81 | Q
 82 | from :9
 83 | M 100644 :10 moduleA/keepme
 84 | 
 85 | blob
 86 | mark :12
 87 | data 10
 88 | keepme v5
 89 | 
 90 | commit refs/heads/master
 91 | mark :13
 92 | author Full Name <user@organization.tld> 3000030000 +0100
 93 | committer Full Name <user@organization.tld> 3000030000 +0100
 94 | data 2
 95 | T
 96 | from :11
 97 | M 100644 :12 moduleA/keepme
 98 | 
 99 | blob
100 | mark :14
101 | data 10
102 | keepme v6
103 | 
104 | commit refs/heads/master
105 | mark :15
106 | author Full Name <user@organization.tld> 3000060000 +0100
107 | committer Full Name <user@organization.tld> 3000060000 +0100
108 | data 2
109 | W
110 | from :13
111 | M 100644 :14 moduleA/keepme
112 | 
113 | done
114 | 


--------------------------------------------------------------------------------
/t/t9390/degenerate-moduleA:
--------------------------------------------------------------------------------
  1 | feature done
  2 | blob
  3 | mark :1
  4 | data 10
  5 | keepme v1
  6 | 
  7 | reset refs/heads/master
  8 | commit refs/heads/master
  9 | mark :2
 10 | author Full Name <user@organization.tld> 2000000000 +0100
 11 | committer Full Name <user@organization.tld> 2000000000 +0100
 12 | data 2
 13 | A
 14 | M 100644 :1 moduleA/keepme
 15 | 
 16 | blob
 17 | mark :3
 18 | data 13
 19 | sometimes v1
 20 | 
 21 | commit refs/heads/master
 22 | mark :4
 23 | author Full Name <user@organization.tld> 2000030000 +0100
 24 | committer Full Name <user@organization.tld> 2000030000 +0100
 25 | data 29
 26 | D: Merge commit 'C' into 'B'
 27 | from :2
 28 | merge :2
 29 | M 100644 :3 moduleA/sometimes
 30 | 
 31 | blob
 32 | mark :5
 33 | data 10
 34 | keepme v2
 35 | 
 36 | commit refs/heads/master
 37 | mark :6
 38 | author Full Name <user@organization.tld> 2000050000 +0100
 39 | committer Full Name <user@organization.tld> 2000050000 +0100
 40 | data 2
 41 | G
 42 | from :4
 43 | M 100644 :5 moduleA/keepme
 44 | 
 45 | blob
 46 | mark :7
 47 | data 13
 48 | sometimes v2
 49 | 
 50 | commit refs/heads/branchI
 51 | mark :8
 52 | author Full Name <user@organization.tld> 2000070000 +0100
 53 | committer Full Name <user@organization.tld> 2000070000 +0100
 54 | data 29
 55 | I: Merge commit 'D' into 'H'
 56 | from :6
 57 | merge :4
 58 | M 100644 :7 moduleA/sometimes
 59 | 
 60 | blob
 61 | mark :9
 62 | data 13
 63 | sometimes v3
 64 | 
 65 | commit refs/heads/master
 66 | mark :10
 67 | author Full Name <user@organization.tld> 2000080000 +0100
 68 | committer Full Name <user@organization.tld> 2000080000 +0100
 69 | data 29
 70 | J: Merge commit 'H' into 'D'
 71 | from :4
 72 | merge :6
 73 | M 100644 :9 moduleA/sometimes
 74 | 
 75 | blob
 76 | mark :11
 77 | data 10
 78 | keepme v3
 79 | 
 80 | commit refs/heads/master
 81 | mark :12
 82 | author Full Name <user@organization.tld> 2000092000 +0100
 83 | committer Full Name <user@organization.tld> 2000092000 +0100
 84 | data 2
 85 | L
 86 | from :10
 87 | M 100644 :11 moduleA/keepme
 88 | 
 89 | commit refs/heads/branchO
 90 | mark :13
 91 | author Full Name <user@organization.tld> 2000098000 +0100
 92 | committer Full Name <user@organization.tld> 2000098000 +0100
 93 | data 29
 94 | O: Merge commit 'N' into 'M'
 95 | from :12
 96 | merge :2
 97 | D moduleA/sometimes
 98 | 
 99 | blob
100 | mark :14
101 | data 13
102 | sometimes v5
103 | 
104 | commit refs/heads/master
105 | mark :15
106 | author Full Name <user@organization.tld> 2000099000 +0100
107 | committer Full Name <user@organization.tld> 2000099000 +0100
108 | data 29
109 | P: Merge commit 'M' into 'N'
110 | from :2
111 | merge :12
112 | M 100644 :14 moduleA/sometimes
113 | 
114 | blob
115 | mark :16
116 | data 10
117 | keepme v4
118 | 
119 | commit refs/heads/master
120 | mark :17
121 | author Full Name <user@organization.tld> 3000000000 +0100
122 | committer Full Name <user@organization.tld> 3000000000 +0100
123 | data 2
124 | Q
125 | from :15
126 | M 100644 :16 moduleA/keepme
127 | 
128 | blob
129 | mark :18
130 | data 13
131 | sometimes v6
132 | 
133 | commit refs/heads/master
134 | mark :19
135 | author Full Name <user@organization.tld> 3000020000 +0100
136 | committer Full Name <user@organization.tld> 3000020000 +0100
137 | data 29
138 | S: Merge commit 'R' into 'R'
139 | from :17
140 | merge :17
141 | M 100644 :18 moduleA/sometimes
142 | 
143 | blob
144 | mark :20
145 | data 10
146 | keepme v5
147 | 
148 | commit refs/heads/master
149 | mark :21
150 | author Full Name <user@organization.tld> 3000030000 +0100
151 | committer Full Name <user@organization.tld> 3000030000 +0100
152 | data 2
153 | T
154 | from :19
155 | M 100644 :20 moduleA/keepme
156 | 
157 | blob
158 | mark :22
159 | data 13
160 | sometimes v7
161 | 
162 | commit refs/heads/master
163 | mark :23
164 | author Full Name <user@organization.tld> 3000040000 +0100
165 | committer Full Name <user@organization.tld> 3000040000 +0100
166 | data 2
167 | U
168 | from :21
169 | M 100644 :22 moduleA/sometimes
170 | 
171 | commit refs/heads/master
172 | mark :24
173 | author Full Name <user@organization.tld> 3000050000 +0100
174 | committer Full Name <user@organization.tld> 3000050000 +0100
175 | data 29
176 | V: Merge commit 'U' into 'U'
177 | from :23
178 | merge :23
179 | 
180 | blob
181 | mark :25
182 | data 10
183 | keepme v6
184 | 
185 | commit refs/heads/master
186 | mark :26
187 | author Full Name <user@organization.tld> 3000060000 +0100
188 | committer Full Name <user@organization.tld> 3000060000 +0100
189 | data 2
190 | W
191 | from :24
192 | M 100644 :25 moduleA/keepme
193 | 
194 | done
195 | 


--------------------------------------------------------------------------------
/t/t9390/empty:
--------------------------------------------------------------------------------
  1 | feature done
  2 | # Simple repo with only two files, with a whole bunch of cases dealing with
  3 | # empty pruning, particularly commits that start empty.
  4 | #
  5 | # As with case1, the original-oid directives are very fake, but if an error
  6 | # is hit that shows one of these, it makes it really easy to know where it
  7 | # came from.
  8 | blob
  9 | mark :1
 10 | original-oid 0000000000000000000000000000000000000001
 11 | data 10
 12 | nukeme v1
 13 | 
 14 | blob
 15 | mark :2
 16 | original-oid 0000000000000000000000000000000000000002
 17 | data 10
 18 | keepme v1
 19 | 
 20 | blob
 21 | mark :3
 22 | original-oid 0000000000000000000000000000000000000003
 23 | data 10
 24 | nukeme v2
 25 | 
 26 | blob
 27 | mark :4
 28 | original-oid 0000000000000000000000000000000000000004
 29 | data 10
 30 | keepme v2
 31 | 
 32 | commit refs/heads/master
 33 | mark :5
 34 | original-oid 0000000000000000000000000000000000000010
 35 | author Full Name <user@organization.tld> 1000000000 +0100
 36 | committer Full Name <user@organization.tld> 1000000000 +0100
 37 | data 2
 38 | A
 39 | 
 40 | commit refs/heads/master
 41 | mark :6
 42 | original-oid 0000000000000000000000000000000000000011
 43 | author Full Name <user@organization.tld> 1000010000 +0100
 44 | committer Full Name <user@organization.tld> 1000010000 +0100
 45 | data 2
 46 | B
 47 | from :5
 48 | 
 49 | reset refs/heads/master
 50 | commit refs/heads/master
 51 | mark :7
 52 | original-oid 0000000000000000000000000000000000000012
 53 | author Full Name <user@organization.tld> 1000020000 +0100
 54 | committer Full Name <user@organization.tld> 1000020000 +0100
 55 | data 2
 56 | C
 57 | M 100644 :1 nukeme
 58 | 
 59 | commit refs/heads/master
 60 | mark :8
 61 | original-oid 0000000000000000000000000000000000000013
 62 | author Full Name <user@organization.tld> 1000030000 +0100
 63 | committer Full Name <user@organization.tld> 1000030000 +0100
 64 | data 2
 65 | D
 66 | from :7
 67 | 
 68 | commit refs/heads/master
 69 | mark :9
 70 | original-oid 0000000000000000000000000000000000000014
 71 | author Full Name <user@organization.tld> 1000040000 +0100
 72 | committer Full Name <user@organization.tld> 1000040000 +0100
 73 | data 29
 74 | E: Merge commit 'D' into 'B'
 75 | from :6
 76 | merge :8
 77 | M 100644 :2 keepme
 78 | 
 79 | commit refs/heads/master
 80 | mark :10
 81 | original-oid 0000000000000000000000000000000000000015
 82 | author Full Name <user@organization.tld> 1000050000 +0100
 83 | committer Full Name <user@organization.tld> 1000050000 +0100
 84 | data 29
 85 | F: Merge commit 'D' into 'B'
 86 | from :6
 87 | merge :8
 88 | 
 89 | commit refs/heads/master
 90 | mark :11
 91 | original-oid 0000000000000000000000000000000000000016
 92 | author Full Name <user@organization.tld> 1000060000 +0100
 93 | committer Full Name <user@organization.tld> 1000060000 +0100
 94 | data 2
 95 | G
 96 | from :9
 97 | M 100644 :3 nukeme
 98 | 
 99 | commit refs/heads/master
100 | mark :12
101 | original-oid 0000000000000000000000000000000000000017
102 | author Full Name <user@organization.tld> 1000070000 +0100
103 | committer Full Name <user@organization.tld> 1000070000 +0100
104 | data 2
105 | H
106 | from :11
107 | 
108 | commit refs/heads/master
109 | mark :13
110 | original-oid 0000000000000000000000000000000000000018
111 | author Full Name <user@organization.tld> 1000080000 +0100
112 | committer Full Name <user@organization.tld> 1000080000 +0100
113 | data 2
114 | I
115 | from :10
116 | M 100644 :4 keepme
117 | 
118 | commit refs/heads/master
119 | mark :14
120 | original-oid 0000000000000000000000000000000000000019
121 | author Full Name <user@organization.tld> 1000090000 +0100
122 | committer Full Name <user@organization.tld> 1000090000 +0100
123 | data 29
124 | J: Merge commit 'I' into 'H'
125 | from :12
126 | merge :13
127 | 
128 | done
129 | 


--------------------------------------------------------------------------------
/t/t9390/empty-keepme:
--------------------------------------------------------------------------------
 1 | feature done
 2 | reset refs/heads/master
 3 | commit refs/heads/master
 4 | mark :1
 5 | author Full Name <user@organization.tld> 1000000000 +0100
 6 | committer Full Name <user@organization.tld> 1000000000 +0100
 7 | data 2
 8 | A
 9 | 
10 | commit refs/heads/master
11 | mark :2
12 | author Full Name <user@organization.tld> 1000010000 +0100
13 | committer Full Name <user@organization.tld> 1000010000 +0100
14 | data 2
15 | B
16 | from :1
17 | 
18 | blob
19 | mark :3
20 | data 10
21 | keepme v1
22 | 
23 | commit refs/heads/master
24 | mark :4
25 | author Full Name <user@organization.tld> 1000040000 +0100
26 | committer Full Name <user@organization.tld> 1000040000 +0100
27 | data 29
28 | E: Merge commit 'D' into 'B'
29 | from :2
30 | M 100644 :3 keepme
31 | 
32 | blob
33 | mark :5
34 | data 10
35 | keepme v2
36 | 
37 | commit refs/heads/master
38 | mark :6
39 | author Full Name <user@organization.tld> 1000080000 +0100
40 | committer Full Name <user@organization.tld> 1000080000 +0100
41 | data 2
42 | I
43 | from :2
44 | M 100644 :5 keepme
45 | 
46 | commit refs/heads/master
47 | mark :7
48 | author Full Name <user@organization.tld> 1000090000 +0100
49 | committer Full Name <user@organization.tld> 1000090000 +0100
50 | data 29
51 | J: Merge commit 'I' into 'H'
52 | from :4
53 | merge :6
54 | 
55 | done
56 | 


--------------------------------------------------------------------------------
/t/t9390/less-empty-keepme:
--------------------------------------------------------------------------------
 1 | feature done
 2 | reset refs/heads/master
 3 | commit refs/heads/master
 4 | mark :1
 5 | author Full Name <user@organization.tld> 1000000000 +0100
 6 | committer Full Name <user@organization.tld> 1000000000 +0100
 7 | data 2
 8 | A
 9 | 
10 | commit refs/heads/master
11 | mark :2
12 | author Full Name <user@organization.tld> 1000010000 +0100
13 | committer Full Name <user@organization.tld> 1000010000 +0100
14 | data 2
15 | B
16 | from :1
17 | 
18 | reset refs/heads/master
19 | commit refs/heads/master
20 | mark :3
21 | author Full Name <user@organization.tld> 1000020000 +0100
22 | committer Full Name <user@organization.tld> 1000020000 +0100
23 | data 2
24 | C
25 | 
26 | commit refs/heads/master
27 | mark :4
28 | author Full Name <user@organization.tld> 1000030000 +0100
29 | committer Full Name <user@organization.tld> 1000030000 +0100
30 | data 2
31 | D
32 | from :3
33 | 
34 | blob
35 | mark :5
36 | data 10
37 | keepme v1
38 | 
39 | commit refs/heads/master
40 | mark :6
41 | author Full Name <user@organization.tld> 1000040000 +0100
42 | committer Full Name <user@organization.tld> 1000040000 +0100
43 | data 29
44 | E: Merge commit 'D' into 'B'
45 | from :2
46 | merge :4
47 | M 100644 :5 keepme
48 | 
49 | commit refs/heads/master
50 | mark :7
51 | author Full Name <user@organization.tld> 1000060000 +0100
52 | committer Full Name <user@organization.tld> 1000060000 +0100
53 | data 2
54 | G
55 | from :6
56 | 
57 | commit refs/heads/master
58 | mark :8
59 | author Full Name <user@organization.tld> 1000070000 +0100
60 | committer Full Name <user@organization.tld> 1000070000 +0100
61 | data 2
62 | H
63 | from :7
64 | 
65 | commit refs/heads/master
66 | mark :9
67 | author Full Name <user@organization.tld> 1000050000 +0100
68 | committer Full Name <user@organization.tld> 1000050000 +0100
69 | data 29
70 | F: Merge commit 'D' into 'B'
71 | from :2
72 | merge :4
73 | 
74 | blob
75 | mark :10
76 | data 10
77 | keepme v2
78 | 
79 | commit refs/heads/master
80 | mark :11
81 | author Full Name <user@organization.tld> 1000080000 +0100
82 | committer Full Name <user@organization.tld> 1000080000 +0100
83 | data 2
84 | I
85 | from :9
86 | M 100644 :10 keepme
87 | 
88 | commit refs/heads/master
89 | mark :12
90 | author Full Name <user@organization.tld> 1000090000 +0100
91 | committer Full Name <user@organization.tld> 1000090000 +0100
92 | data 29
93 | J: Merge commit 'I' into 'H'
94 | from :8
95 | merge :11
96 | 
97 | done
98 | 


--------------------------------------------------------------------------------
/t/t9390/more-empty-keepme:
--------------------------------------------------------------------------------
 1 | feature done
 2 | blob
 3 | mark :1
 4 | data 10
 5 | keepme v1
 6 | 
 7 | reset refs/heads/master
 8 | commit refs/heads/master
 9 | mark :2
10 | author Full Name <user@organization.tld> 1000040000 +0100
11 | committer Full Name <user@organization.tld> 1000040000 +0100
12 | data 29
13 | E: Merge commit 'D' into 'B'
14 | M 100644 :1 keepme
15 | 
16 | blob
17 | mark :3
18 | data 10
19 | keepme v2
20 | 
21 | reset refs/heads/master
22 | commit refs/heads/master
23 | mark :4
24 | author Full Name <user@organization.tld> 1000080000 +0100
25 | committer Full Name <user@organization.tld> 1000080000 +0100
26 | data 2
27 | I
28 | M 100644 :3 keepme
29 | 
30 | commit refs/heads/master
31 | mark :5
32 | author Full Name <user@organization.tld> 1000090000 +0100
33 | committer Full Name <user@organization.tld> 1000090000 +0100
34 | data 29
35 | J: Merge commit 'I' into 'H'
36 | from :2
37 | merge :4
38 | 
39 | done
40 | 


--------------------------------------------------------------------------------
/t/t9390/sample-mailmap:
--------------------------------------------------------------------------------
1 | Little 'ol Me <me@little.net>
2 | <me@little.net> <me@laptop.(none)>
3 | # Here is a comment
4 | Little 'ol Me <me@little.net> Little O. Me
5 | Little 'ol Me <me@little.net> <me@fire.com>
6 | Little 'ol Me <me@little.net> Little Me <me@bigcompany.com>
7 | Little John <second@merry.men> little.john <>
8 | 


--------------------------------------------------------------------------------
/t/t9390/sample-message:
--------------------------------------------------------------------------------
1 | Initial==>Modified
2 | regex:tw.nty==>the number 20
3 | v1.0==>version one!
4 | regex:!$==> :)
5 | 


--------------------------------------------------------------------------------
/t/t9390/sample-replace:
--------------------------------------------------------------------------------
1 | mod==>modified-by-gremlins
2 | 


--------------------------------------------------------------------------------
/t/t9390/unusual:
--------------------------------------------------------------------------------
 1 | option git quiet
 2 | feature done
 3 | # Input in a format filter-repo isn't generally expected to receive (either
 4 | # because we don't pass certain flags to fast-export or repos don't have the
 5 | # weird features or whatever other reason), but which we want to test for
 6 | # completeness.
 7 | progress I am starting the import, yo.
 8 | 
 9 | checkpoint
10 | 
11 | blob
12 | mark :1
13 | original-oid 0000000000000000000000000000000000000001
14 | data 5
15 | hello
16 | 
17 | commit refs/heads/master
18 | mark :2
19 | original-oid 0000000000000000000000000000000000000002
20 | committer Srinivasa Ramanujan <fellow@cambridge.org> 1535228562 +051800
21 | data 8
22 | Initial
23 | M 100644 :1 greeting
24 | 
25 | reset refs/heads/develop
26 | from :2
27 | 
28 | tag v1.0
29 | from :2
30 | original-oid 0000000000000000000000000000000000000003
31 | tagger little.john <> 1535229618 -0700
32 | data 4
33 | v1.0
34 | 
35 | done
36 | 


--------------------------------------------------------------------------------
/t/t9390/unusual-filtered:
--------------------------------------------------------------------------------
 1 | feature done
 2 | blob
 3 | mark :1
 4 | data 5
 5 | hello
 6 | reset refs/heads/develop
 7 | commit refs/heads/develop
 8 | mark :2
 9 | author Srinivasa Ramanujan <fellow@cambridge.org> 1535228562 +051800
10 | committer Srinivasa Ramanujan <fellow@cambridge.org> 1535228562 +051800
11 | data 8
12 | Initial
13 | M 100644 :1 greeting
14 | 
15 | reset refs/heads/master
16 | from :2
17 | 
18 | tag v1.0
19 | from :2
20 | tagger little.john <> 1535229618 -0700
21 | data 4
22 | v1.0
23 | done
24 | 


--------------------------------------------------------------------------------
/t/t9390/unusual-mailmap:
--------------------------------------------------------------------------------
 1 | feature done
 2 | blob
 3 | mark :1
 4 | data 5
 5 | hello
 6 | reset refs/heads/develop
 7 | commit refs/heads/develop
 8 | mark :2
 9 | author Srinivasa Ramanujan <fellow@cambridge.org> 1535228562 +051800
10 | committer Srinivasa Ramanujan <fellow@cambridge.org> 1535228562 +051800
11 | data 8
12 | Initial
13 | M 100644 :1 greeting
14 | 
15 | reset refs/heads/master
16 | from :2
17 | 
18 | tag v1.0
19 | from :2
20 | tagger Little John <second@merry.men> 1535229618 -0700
21 | data 4
22 | v1.0
23 | done
24 | 


--------------------------------------------------------------------------------
/t/t9391-filter-repo-lib-usage.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | test_description='Usage of git-filter-repo as a library'
  4 | . ./test-lib.sh
  5 | 
  6 | # for git_filter_repo.py import
  7 | case "$(uname -s)" in
  8 | MINGW*|MSYS)
  9 | 	export PYTHONPATH=$(cygpath -am $TEST_DIRECTORY/..)\;$PYTHONPATH
 10 | 	;;
 11 | *)
 12 | 	export PYTHONPATH=$(dirname $TEST_DIRECTORY):$PYTHONPATH
 13 | 	;;
 14 | esac
 15 | # Avoid writing git_filter_repo.pyc file
 16 | export PYTHONDONTWRITEBYTECODE=1
 17 | export CONTRIB_DIR=$TEST_DIRECTORY/../contrib/filter-repo-demos
 18 | 
 19 | DATA="$TEST_DIRECTORY/t9391"
 20 | 
 21 | setup()
 22 | {
 23 | 	git init $1 &&
 24 | 	(
 25 | 		cd $1 &&
 26 | 		echo hello > world &&
 27 | 		git add world &&
 28 | 		test_tick &&
 29 | 		git commit -m initial &&
 30 | 		printf "The launch code is 1-2-3-4." > secret &&
 31 | 		git add secret &&
 32 | 		test_tick &&
 33 | 		git commit -m "Sssh.  Dont tell no one" &&
 34 | 		echo A file that you cant trust > file.doc &&
 35 | 		echo there >> world &&
 36 | 		git add file.doc world &&
 37 | 		test_tick &&
 38 | 		printf "Random useless changes\n\nLet us be like the marketing group.  Marketing is staffed with pansies" | git commit -F - &&
 39 | 		echo Do not use a preposition to end a setence with > advice &&
 40 | 		git add advice &&
 41 | 		test_tick &&
 42 | 		GIT_AUTHOR_NAME="Copy N. Paste" git commit -m "hypocrisy is fun" &&
 43 | 		echo Avoid cliches like the plague >> advice &&
 44 | 		test_tick &&
 45 | 		GIT_AUTHOR_EMAIL="foo@my.crp" git commit -m "it is still fun" advice &&
 46 | 		echo "  \$Id: A bunch of junk
quot; > foobar.c &&
 47 | 		git add foobar.c &&
 48 | 		test_tick &&
 49 | 		git commit -m "Brain damage"
 50 | 	)
 51 | }
 52 | 
 53 | test_expect_success 'commit_info.py' '
 54 | 	setup commit_info &&
 55 | 	(
 56 | 		cd commit_info &&
 57 | 		$TEST_DIRECTORY/t9391/commit_info.py &&
 58 | 		test 0e5a1029 = $(git rev-parse --short=8 --verify refs/heads/master)
 59 | 	)
 60 | '
 61 | 
 62 | test_expect_success 'file_filter.py' '
 63 | 	setup file_filter &&
 64 | 	(
 65 | 		cd file_filter &&
 66 | 		$TEST_DIRECTORY/t9391/file_filter.py &&
 67 | 		test ee59e2b4 = $(git rev-parse --short=8 --verify refs/heads/master)
 68 | 	)
 69 | '
 70 | 
 71 | test_expect_success 'print_progress.py' '
 72 | 	setup print_progress &&
 73 | 	(
 74 | 		cd print_progress &&
 75 | 		MASTER=$(git rev-parse --verify master) &&
 76 | 		$TEST_DIRECTORY/t9391/print_progress.py . new &&
 77 | 		test $MASTER = $(git rev-parse --verify refs/heads/master)
 78 | 	)
 79 | '
 80 | 
 81 | test_expect_success 'rename-master-to-develop.py' '
 82 | 	setup rename_master_to_develop &&
 83 | 	(
 84 | 		cd rename_master_to_develop &&
 85 | 		MASTER=$(git rev-parse --verify master) &&
 86 | 		$TEST_DIRECTORY/t9391/rename-master-to-develop.py &&
 87 | 		test $MASTER = $(git rev-parse --verify refs/heads/develop)
 88 | 	)
 89 | '
 90 | 
 91 | test_expect_success 'strip-cvs-keywords.py' '
 92 | 	setup strip_cvs_keywords &&
 93 | 	(
 94 | 		cd strip_cvs_keywords &&
 95 | 		$TEST_DIRECTORY/t9391/strip-cvs-keywords.py
 96 | 		test 2306fc7c = $(git rev-parse --short=8 --verify refs/heads/master)
 97 | 	)
 98 | '
 99 | 
100 | test_expect_success 'setup two extra repositories' '
101 | 	mkdir repo1 &&
102 | 	cd repo1 &&
103 | 	git init &&
104 | 	echo hello > world &&
105 | 	git add world &&
106 | 	test_tick &&
107 | 	git commit -m "Commit A" &&
108 | 	echo goodbye > world &&
109 | 	git add world &&
110 | 	test_tick &&
111 | 	git commit -m "Commit C" &&
112 | 	cd .. &&
113 | 	mkdir repo2 &&
114 | 	cd repo2 &&
115 | 	git init &&
116 | 	echo foo > bar &&
117 | 	git add bar &&
118 | 	test_tick &&
119 | 	git commit -m "Commit B" &&
120 | 	echo fooey > bar &&
121 | 	git add bar &&
122 | 	test_tick &&
123 | 	git commit -m "Commit D" &&
124 | 	cd ..
125 | '
126 | 
127 | test_expect_success 'splice_repos.py' '
128 | 	git init splice_repos &&
129 | 	$TEST_DIRECTORY/t9391/splice_repos.py repo1 repo2 splice_repos &&
130 | 	test 4 = $(git -C splice_repos rev-list master | wc -l)
131 | '
132 | 
133 | test_expect_success 'create_fast_export_output.py' '
134 | 	git init create_fast_export_output &&
135 | 	(cd create_fast_export_output &&
136 | 		$TEST_DIRECTORY/t9391/create_fast_export_output.py &&
137 | 		test e5e0569b = $(git rev-parse --short=8 --verify refs/heads/master) &&
138 | 		test 122ead00 = $(git rev-parse --short=8 --verify refs/heads/devel) &&
139 | 		test f36143f9 = $(git rev-parse --short=8 --verify refs/tags/v1.0))
140 | '
141 | 
142 | test_expect_success 'unusual.py' '
143 | 	setup unusual &&
144 | 	(
145 | 		cd unusual &&
146 | 		cat $TEST_DIRECTORY/t9390/unusual | \
147 | 			$TEST_DIRECTORY/t9391/unusual.py >output &&
148 | 
149 | 		grep "Decipher this: .oy ,tropmi eht gnitrats ma I" output &&
150 | 		grep "Found 2 blobs/commits and 4 other objects" output
151 | 	)
152 | '
153 | 
154 | test_expect_success 'erroneous.py' '
155 | 	setup erroneous &&
156 | 	(
157 | 		cd erroneous &&
158 | 		test_must_fail $TEST_DIRECTORY/t9391/erroneous.py 2>../err &&
159 | 
160 | 		test_i18ngrep "Error: Cannot pass a tag_callback to RepoFilter AND pass --tag-callback" ../err
161 | 	)
162 | '
163 | 
164 | test_expect_success 'other error cases' '
165 | 	GIT_CEILING_DIRECTORIES=$(pwd) &&
166 | 	export GIT_CEILING_DIRECTORIES &&
167 | 	(
168 | 		mkdir other &&
169 | 		cd other &&
170 | 
171 | 		! python3 -c "import git_filter_repo as fr; fr.GitUtils.get_commit_count(b\".\", [\"HEAD\"])" 2>err &&
172 | 		test_i18ngrep "\. does not appear to be a valid git repository" err
173 | 	)
174 | '
175 | 
176 | test_lazy_prereq DOS2UNIX '
177 | 	dos2unix -h
178 | 	test $? -ne 127
179 | '
180 | 
181 | test_expect_success 'lint-history' '
182 | 	test_create_repo lint-history &&
183 | 	(
184 | 		cd lint-history &&
185 | 		git config core.autocrlf false &&
186 | 		echo initial >content &&
187 | 		git add content &&
188 | 		git commit -m "initial" &&
189 | 
190 | 		printf "CRLF is stupid\r\n" >content &&
191 | 		git add content &&
192 | 		git commit -m "make a statement" &&
193 | 
194 | 		printf "CRLF is stupid\n" >content &&
195 | 		git add content &&
196 | 		git commit -m "oops, that was embarassing" &&
197 | 
198 | 		if test_have_prereq DOS2UNIX
199 | 		then
200 | 			$CONTRIB_DIR/lint-history --filenames-important dos2unix &&
201 | 			echo 2 >expect &&
202 | 			git rev-list --count HEAD >actual &&
203 | 			test_cmp expect actual
204 | 		fi
205 | 	)
206 | '
207 | 
208 | test_expect_success !WINDOWS 'lint-history --refs' '
209 | 	test_create_repo lint-history-only-some-refs &&
210 | 	(
211 | 		cd lint-history-only-some-refs &&
212 | 		test_commit a somefile bad &&
213 | 		test_commit b notherfile baaad &&
214 | 		test_commit c whatever baaaaaad &&
215 | 		git checkout -b mybranch HEAD~1 &&
216 | 		test_commit d somefile baaaaaaaad &&
217 | 		test_commit e whatever "baaaaaaaaaad to the bone" &&
218 | 
219 | 		cat <<-EOF >linter.sh &&
220 | 		#!/bin/bash
221 | 		cat \$1 | tr -d a >tmp
222 | 		mv tmp \$1
223 | 		EOF
224 | 		chmod u+x linter.sh &&
225 | 
226 | 		PATH=$PATH:. $CONTRIB_DIR/lint-history --refs master..mybranch  -- linter.sh &&
227 | 
228 | 		echo bd >expect &&
229 | 		echo bd to the bone >long-expect &&
230 | 
231 | 		# Verify master is untouched
232 | 		git checkout master &&
233 | 		! test_cmp somefile expect &&
234 | 		! test_cmp notherfile expect &&
235 | 		! test_cmp whatever expect &&
236 | 
237 | 		# Verify that files touched on the branch are tweaked
238 | 		git checkout mybranch &&
239 | 		test_cmp somefile expect &&
240 | 		! test_cmp notherfile expect &&
241 | 		test_cmp whatever long-expect
242 | 
243 | 	)
244 | '
245 | 
246 | test_expect_success 'clean-ignore with emoji in filenames' '
247 | 	test_create_repo clean-ignore &&
248 | 	(
249 | 		cd clean-ignore &&
250 | 		git fast-import --quiet <$DATA/emoji-repo &&
251 | 		git reset --hard &&
252 | 		$CONTRIB_DIR/clean-ignore --force &&
253 | 		printf ".gitignore\nfilename\n" >expect &&
254 | 		git ls-files >actual &&
255 | 		test_cmp expect actual
256 | 	)
257 | '
258 | 
259 | test_done
260 | 


--------------------------------------------------------------------------------
/t/t9391/commit_info.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Please see the
 5 |   ***** API BACKWARD COMPATIBILITY CAVEAT *****
 6 | near the top of git-filter-repo
 7 | """
 8 | 
 9 | import re
10 | import datetime
11 | 
12 | import git_filter_repo as fr
13 | 
14 | def change_up_them_commits(commit, metadata):
15 |   # Change the commit author
16 |   if commit.author_name == b"Copy N. Paste":
17 |     commit.author_name = b"Ima L. Oser"
18 |     commit.author_email = b"aloser@my.corp"
19 | 
20 |   # Fix the author email
21 |   commit.author_email = re.sub(b"@my.crp", b"@my.corp", commit.author_email)
22 | 
23 |   # Fix the committer date (bad timezone conversion in initial import)
24 |   oldtime = fr.string_to_date(commit.committer_date)
25 |   newtime = oldtime + datetime.timedelta(hours=-5)
26 |   commit.committer_date = fr.date_to_string(newtime)
27 | 
28 |   # Fix the commit message
29 |   commit.message = re.sub(b"Marketing is staffed with pansies", b"",
30 |                           commit.message)
31 | 
32 | args = fr.FilteringOptions.parse_args(['--force'])
33 | filter = fr.RepoFilter(args, commit_callback = change_up_them_commits)
34 | filter.run()
35 | 


--------------------------------------------------------------------------------
/t/t9391/create_fast_export_output.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Please see the
  5 |   ***** API BACKWARD COMPATIBILITY CAVEAT *****
  6 | near the top of git-filter-repo.
  7 | """
  8 | 
  9 | import git_filter_repo as fr
 10 | from git_filter_repo import Blob, Reset, FileChange, Commit, Tag, FixedTimeZone
 11 | from git_filter_repo import Progress, Checkpoint
 12 | 
 13 | from datetime import datetime, timedelta
 14 | 
 15 | args = fr.FilteringOptions.default_options()
 16 | out = fr.RepoFilter(args)
 17 | out.importer_only()
 18 | 
 19 | world = Blob(b"Hello")
 20 | out.insert(world)
 21 | 
 22 | bar = Blob(b"foo\n")
 23 | out.insert(bar)
 24 | 
 25 | master = Reset(b"refs/heads/master")
 26 | out.insert(master)
 27 | 
 28 | changes = [FileChange(b'M', b'world', world.id, mode=b"100644"),
 29 |            FileChange(b'M', b'bar',   bar.id,   mode=b"100644")]
 30 | when = datetime(year=2005, month=4, day=7,
 31 |                 hour=15, minute=16, second=10,
 32 |                 tzinfo=FixedTimeZone(b"-0700"))
 33 | when_string = fr.date_to_string(when)
 34 | commit1 = Commit(b"refs/heads/master",
 35 |                  b"A U Thor", b"au@thor.email", when_string,
 36 |                  b"Com M. Iter", b"comm@iter.email", when_string,
 37 |                  b"My first commit!  Wooot!\n\nLonger description",
 38 |                  changes,
 39 |                  parents = [])
 40 | out.insert(commit1)
 41 | 
 42 | world = Blob(b"Hello\nHi")
 43 | out.insert(world)
 44 | world_link = Blob(b"world")
 45 | out.insert(world_link)
 46 | 
 47 | changes = [FileChange(b'M', b'world',  world.id,      mode=b"100644"),
 48 |            FileChange(b'M', b'planet', world_link.id, mode=b"120000")]
 49 | when += timedelta(days=3, hours=4, minutes=6)
 50 | when_string = fr.date_to_string(when)
 51 | commit2 = Commit(b"refs/heads/master",
 52 |                  b"A U Thor", b"au@thor.email", when_string,
 53 |                  b"Com M. Iter", b"comm@iter.email", when_string,
 54 |                  b"Make a symlink to world called planet, modify world",
 55 |                  changes,
 56 |                  parents = [commit1.id])
 57 | out.insert(commit2)
 58 | 
 59 | script = Blob(b"#!/bin/sh\n\necho Hello")
 60 | out.insert(script)
 61 | changes = [FileChange(b'M', b'runme', script.id, mode=b"100755"),
 62 |            FileChange(b'D', b'bar')]
 63 | when_string = b"1234567890 -0700"
 64 | commit3 = Commit(b"refs/heads/master",
 65 |                  b"A U Thor", b"au@thor.email", when_string,
 66 |                  b"Com M. Iter", b"comm@iter.email", when_string,
 67 |                  b"Add runme script, remove bar",
 68 |                  changes,
 69 |                  parents = [commit2.id])
 70 | out.insert(commit3)
 71 | 
 72 | progress = Progress(b"Done with the master branch now...")
 73 | out.insert(progress)
 74 | checkpoint = Checkpoint()
 75 | out.insert(checkpoint)
 76 | 
 77 | devel = Reset(b"refs/heads/devel", commit1.id)
 78 | out.insert(devel)
 79 | 
 80 | world = Blob(b"Hello\nGoodbye")
 81 | out.insert(world)
 82 | 
 83 | changes = [FileChange(b'DELETEALL'),
 84 |            FileChange(b'M', b'world', world.id, mode=b"100644"),
 85 |            FileChange(b'M', b'bar',   bar.id,   mode=b"100644")]
 86 | when = datetime(2006, 8, 17, tzinfo=FixedTimeZone(b"+0200"))
 87 | when_string = fr.date_to_string(when)
 88 | commit4 = Commit(b"refs/heads/devel",
 89 |                  b"A U Thor", b"au@thor.email", when_string,
 90 |                  b"Com M. Iter", b"comm@iter.email", when_string,
 91 |                  b"Modify world",
 92 |                  changes,
 93 |                  parents = [commit1.id])
 94 | out.insert(commit4)
 95 | 
 96 | world = Blob(b"Hello\nHi\nGoodbye")
 97 | out.insert(world)
 98 | when = fr.string_to_date(commit3.author_date) + timedelta(days=47)
 99 | when_string = fr.date_to_string(when)
100 | # git fast-import requires file changes to be listed in terms of differences
101 | # to the first parent.  Thus, despite the fact that runme and planet have
102 | # not changed and bar was not modified in the devel side, we have to list them
103 | # all anyway.
104 | changes = [FileChange(b'M', b'world', world.id, mode=b"100644"),
105 |            FileChange(b'D', b'bar'),
106 |            FileChange(b'M', b'runme', script.id, mode=b"100755"),
107 |            FileChange(b'M', b'planet', world_link.id, mode=b"120000")]
108 | 
109 | commit5 = Commit(b"refs/heads/devel",
110 |                  b"A U Thor", b"au@thor.email", when_string,
111 |                  b"Com M. Iter", b"comm@iter.email", when_string,
112 |                  b"Merge branch 'master'\n",
113 |                  changes,
114 |                  parents = [commit4.id, commit3.id])
115 | out.insert(commit5)
116 | 
117 | 
118 | mytag = Tag(b"refs/tags/v1.0", commit5.id,
119 |             b"His R. Highness", b"royalty@my.kingdom", when_string,
120 |             b"I bequeath to my peons this royal software")
121 | out.insert(mytag)
122 | out.finish()
123 | 


--------------------------------------------------------------------------------
/t/t9391/emoji-repo:
--------------------------------------------------------------------------------
 1 | feature done
 2 | blob
 3 | mark :1
 4 | data 8
 5 | initial
 6 | 
 7 | blob
 8 | mark :2
 9 | data 5
10 | lock
11 | 
12 | blob
13 | mark :3
14 | data 11
15 | *.bak
16 | 🔒
17 | 
18 | reset refs/heads/master
19 | commit refs/heads/master
20 | mark :4
21 | author Little O. Me <me@little.net> 1535228562 -0700
22 | committer Little O. Me <me@little.net> 1535228562 -0700
23 | data 10
24 | My commit
25 | M 100644 :1 filename
26 | M 100644 :2 🔒
27 | M 100644 :3 .gitignore
28 | 
29 | done
30 | 


--------------------------------------------------------------------------------
/t/t9391/erroneous.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Please see the
 5 |   ***** API BACKWARD COMPATIBILITY CAVEAT *****
 6 | near the top of git-filter-repo
 7 | """
 8 | 
 9 | import git_filter_repo as fr
10 | 
11 | def handle_tag(tag):
12 |   print("Tagger: "+''.join(tag.tagger_name))
13 | 
14 | args = fr.FilteringOptions.parse_args(['--force', '--tag-callback', 'pass'])
15 | filter = fr.RepoFilter(args, tag_callback = handle_tag)
16 | filter.run()
17 | 


--------------------------------------------------------------------------------
/t/t9391/file_filter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Please see the
 5 |   ***** API BACKWARD COMPATIBILITY CAVEAT *****
 6 | near the top of git-filter-repo.
 7 | """
 8 | 
 9 | import sys
10 | import git_filter_repo as fr
11 | 
12 | def drop_file_by_contents(blob, metadata):
13 |   bad_file_contents = b'The launch code is 1-2-3-4.'
14 |   if blob.data == bad_file_contents:
15 |     blob.skip()
16 | 
17 | def drop_files_by_name(commit, metadata):
18 |   new_file_changes = []
19 |   for change in commit.file_changes:
20 |     if not change.filename.endswith(b'.doc'):
21 |       new_file_changes.append(change)
22 |   commit.file_changes = new_file_changes
23 | 
24 | sys.argv.append('--force')
25 | args = fr.FilteringOptions.parse_args(sys.argv[1:])
26 | 
27 | filter = fr.RepoFilter(args,
28 |                        blob_callback   = drop_file_by_contents,
29 |                        commit_callback = drop_files_by_name)
30 | filter.run()
31 | 


--------------------------------------------------------------------------------
/t/t9391/print_progress.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Please see the
 5 |   ***** API BACKWARD COMPATIBILITY CAVEAT *****
 6 | near the top of git-filter-repo
 7 | """
 8 | 
 9 | import sys
10 | import git_filter_repo as fr
11 | 
12 | if len(sys.argv) != 3:
13 |   raise SystemExit("Syntax:\n  %s SOURCE_REPO TARGET_REPO")
14 | source_repo = sys.argv[1].encode()
15 | target_repo = sys.argv[2].encode()
16 | 
17 | total_objects = fr.GitUtils.get_total_objects(source_repo) # blobs+trees
18 | total_commits = fr.GitUtils.get_commit_count(source_repo)
19 | object_count = 0
20 | commit_count = 0
21 | 
22 | def print_progress():
23 |   global object_count, commit_count, total_objects, total_commits
24 |   print("\rRewriting commits... %d/%d  (%d objects)"
25 |         % (commit_count, total_commits, object_count), end='')
26 | 
27 | def my_blob_callback(blob, metadata):
28 |   global object_count
29 |   object_count += 1
30 |   print_progress()
31 | 
32 | def my_commit_callback(commit, metadata):
33 |   global commit_count
34 |   commit_count += 1
35 |   print_progress()
36 | 
37 | args = fr.FilteringOptions.parse_args(['--force', '--quiet'])
38 | filter = fr.RepoFilter(args,
39 |                        blob_callback   = my_blob_callback,
40 |                        commit_callback = my_commit_callback)
41 | filter.run()
42 | 


--------------------------------------------------------------------------------
/t/t9391/rename-master-to-develop.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Please see the
 5 |   ***** API BACKWARD COMPATIBILITY CAVEAT *****
 6 | near the top of git-filter-repo.
 7 | """
 8 | 
 9 | import git_filter_repo as fr
10 | 
11 | def my_commit_callback(commit, metadata):
12 |   if commit.branch == b"refs/heads/master":
13 |     commit.branch = b"refs/heads/develop"
14 | 
15 | args = fr.FilteringOptions.default_options()
16 | args.force = True
17 | filter = fr.RepoFilter(args, commit_callback = my_commit_callback)
18 | filter.run()
19 | 


--------------------------------------------------------------------------------
/t/t9391/splice_repos.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Please see the
 5 |   ***** API BACKWARD COMPATIBILITY CAVEAT *****
 6 | near the top of git-filter-repo.
 7 | 
 8 | Also, note that splicing repos may need some special care as fast-export
 9 | only shows the files that changed relative to the first parent, so there
10 | may be gotchas if you are to splice near merge commits; this example does
11 | not try to handle any such special cases.
12 | """
13 | 
14 | import re
15 | import sys
16 | import git_filter_repo as fr
17 | 
18 | class InterleaveRepositories:
19 |   def __init__(self, repo1, repo2, output_dir):
20 |     self.repo1 = repo1
21 |     self.repo2 = repo2
22 |     self.output_dir = output_dir
23 | 
24 |     self.commit_map = {}
25 |     self.last_commit = None
26 | 
27 |   def skip_reset(self, reset, metadata):
28 |     reset.skip()
29 | 
30 |   def hold_commit(self, commit, metadata):
31 |     commit.skip(new_id = commit.id)
32 |     letter = re.match(b'Commit (.)', commit.message).group(1)
33 |     self.commit_map[letter] = commit
34 | 
35 |   def weave_commit(self, commit, metadata):
36 |     letter = re.match(b'Commit (.)', commit.message).group(1)
37 |     prev_letter = bytes([ord(letter)-1])
38 | 
39 |     # Splice in any extra commits needed
40 |     if prev_letter in self.commit_map:
41 |       new_commit = self.commit_map[prev_letter]
42 |       new_commit.dumped = 0
43 |       new_commit.parents = [self.last_commit] if self.last_commit else []
44 |       # direct_insertion=True to avoid weave_commit being called recursively
45 |       # on the same commit
46 |       self.out.insert(new_commit, direct_insertion = True)
47 |       commit.parents = [new_commit.id]
48 | 
49 |     # Dump our commit now
50 |     self.out.insert(commit, direct_insertion = True)
51 | 
52 |     # Make sure that commits that depended on new_commit.id will now depend
53 |     # on commit.id
54 |     if prev_letter in self.commit_map:
55 |       self.last_commit = commit.id
56 |       fr.record_id_rename(new_commit.id, commit.id)
57 | 
58 |   def run(self):
59 |     blob = fr.Blob(b'public gpg key contents')
60 |     tag = fr.Tag(b'gpg-pubkey', blob.id,
61 |                  b'Ima Tagger', b'ima@tagg.er', b'1136199845 +0300',
62 |                  b'Very important explanation and stuff')
63 | 
64 |     args = fr.FilteringOptions.parse_args(['--target', self.output_dir])
65 |     out = fr.RepoFilter(args)
66 |     out.importer_only()
67 |     self.out = out
68 | 
69 |     i1args = fr.FilteringOptions.parse_args(['--source', self.repo1])
70 |     i1 = fr.RepoFilter(i1args,
71 |                        reset_callback  = self.skip_reset,
72 |                        commit_callback = self.hold_commit)
73 |     i1.set_output(out)
74 |     i1.run()
75 | 
76 |     i2args = fr.FilteringOptions.parse_args(['--source', self.repo2])
77 |     i2 = fr.RepoFilter(i2args,
78 |                        commit_callback = self.weave_commit)
79 |     i2.set_output(out)
80 |     i2.run()
81 | 
82 |     out.insert(blob)
83 |     out.insert(tag)
84 |     out.finish()
85 | 
86 | splicer = InterleaveRepositories(sys.argv[1], sys.argv[2], sys.argv[3])
87 | splicer.run()
88 | 


--------------------------------------------------------------------------------
/t/t9391/strip-cvs-keywords.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Please see the
 5 |   ***** API BACKWARD COMPATIBILITY CAVEAT *****
 6 | near the top of git-filter-repo.
 7 | """
 8 | 
 9 | import re
10 | import git_filter_repo as fr
11 | 
12 | def strip_cvs_keywords(blob, metadata):
13 |   # FIXME: Should first check if blob is a text file to avoid ruining
14 |   # binaries.  Could use python.magic here, or just output blob.data to
15 |   # the unix 'file' command
16 |   pattern = br'\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\
#39;
17 |   replacement = br'$\1
#39;
18 |   blob.data = re.sub(pattern, replacement, blob.data)
19 | 
20 | args = fr.FilteringOptions.parse_args(['--force'])
21 | filter = fr.RepoFilter(args, blob_callback = strip_cvs_keywords)
22 | filter.run()
23 | 


--------------------------------------------------------------------------------
/t/t9391/unusual.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Please: DO NOT USE THIS AS AN EXAMPLE.
  4 | #
  5 | # This file is NOT for demonstration of how to use git-filter-repo as a
  6 | # libary; it exists to test corner cases or otherwise unusual inputs, and
  7 | # to verify some invariants that git-filter-repo currently aims to maintain
  8 | # (these invariants might be different in future versions of
  9 | # git-filter-repo).  As such, it reaches deep into the internals and does
 10 | # weird things that you should probably avoid in your usage of
 11 | # git-filter-repo.  Any code in this testcase is much more likely to have
 12 | # API breaks than other files in t9391.
 13 | 
 14 | import collections
 15 | import os
 16 | import random
 17 | import io
 18 | import sys
 19 | import textwrap
 20 | 
 21 | import git_filter_repo as fr
 22 | 
 23 | total_objects = {'common': 0, 'uncommon': 0}
 24 | def track_everything(obj, *_ignored):
 25 |   if type(obj) == fr.Blob or type(obj) == fr.Commit:
 26 |     total_objects['common'] += 1
 27 |   else:
 28 |     total_objects['uncommon'] += 1
 29 |   if type(obj) == fr.Reset:
 30 |     def assert_not_reached(x): raise SystemExit("should have been skipped!")
 31 |     obj.dump = assert_not_reached
 32 |     obj.skip()
 33 |   if hasattr(obj, 'id') and type(obj) != fr.Tag:
 34 |     # The creation of myblob should cause objects in stream to get their ids
 35 |     # increased by 1; this shouldn't be depended upon as API by external
 36 |     # projects, I'm just verifying an invariant of the current code.
 37 |     assert fr._IDS._reverse_translation[obj.id] == [obj.id - 1]
 38 | 
 39 | def handle_progress(progress):
 40 |   print(b"Decipher this: "+bytes(reversed(progress.message)))
 41 |   track_everything(progress)
 42 | 
 43 | def handle_checkpoint(checkpoint_object):
 44 |   # Flip a coin; see if we want to pass the checkpoint through.
 45 |   if random.randint(0,1) == 0:
 46 |     checkpoint_object.dump(parser._output)
 47 |   track_everything(checkpoint_object)
 48 | 
 49 | mystr = b'This is the contents of the blob'
 50 | compare = b"Blob:\n  blob\n  mark :1\n  data %d\n  %s" % (len(mystr), mystr)
 51 | # Next line's only purpose is testing code coverage of something that helps
 52 | # debugging git-filter-repo; it is NOT something external folks should depend
 53 | # upon.
 54 | myblob = fr.Blob(mystr)
 55 | assert bytes(myblob) == compare
 56 | # Everyone should be using RepoFilter objects, not FastExportParser.  But for
 57 | # testing purposes...
 58 | parser = fr.FastExportParser(blob_callback   = track_everything,
 59 |                              reset_callback  = track_everything,
 60 |                              commit_callback = track_everything,
 61 |                              tag_callback    = track_everything,
 62 |                              progress_callback = handle_progress,
 63 |                              checkpoint_callback = handle_checkpoint)
 64 | 
 65 | parser.run(input = sys.stdin.detach(),
 66 |            output = open(os.devnull, 'bw'))
 67 | # DO NOT depend upon or use _IDS directly you external script writers.  I'm
 68 | # only testing here for code coverage; the capacity exists to help debug
 69 | # git-filter-repo itself, not for external folks to use.
 70 | assert str(fr._IDS).startswith("Current count: 5")
 71 | print("Found {} blobs/commits and {} other objects"
 72 |       .format(total_objects['common'], total_objects['uncommon']))
 73 | 
 74 | 
 75 | stream = io.BytesIO(textwrap.dedent('''
 76 |   blob
 77 |   mark :1
 78 |   data 5
 79 |   hello
 80 | 
 81 |   commit refs/heads/A
 82 |   mark :2
 83 |   author Just Me <just@here.org> 1234567890 -0200
 84 |   committer Just Me <just@here.org> 1234567890 -0200
 85 |   data 2
 86 |   A
 87 | 
 88 |   commit refs/heads/B
 89 |   mark :3
 90 |   author Just Me <just@here.org> 1234567890 -0200
 91 |   committer Just Me <just@here.org> 1234567890 -0200
 92 |   data 2
 93 |   B
 94 |   from :2
 95 |   M 100644 :1 greeting
 96 | 
 97 |   reset refs/heads/B
 98 |   from :3
 99 | 
100 |   commit refs/heads/C
101 |   mark :4
102 |   author Just Me <just@here.org> 1234567890 -0200
103 |   committer Just Me <just@here.org> 1234567890 -0200
104 |   data 2
105 |   C
106 |   from :3
107 |   M 100644 :1 salutation
108 | 
109 |   '''[1:]).encode())
110 | 
111 | counts = collections.Counter()
112 | def look_for_reset(obj, metadata):
113 |   print("Processing {}".format(obj))
114 |   counts[type(obj)] += 1
115 |   if type(obj) == fr.Reset:
116 |     assert obj.ref == b'refs/heads/B'
117 | 
118 | # Use all kinds of internals that external scripts should NOT use and which
119 | # are likely to break in the future, just to verify a few invariants...
120 | args = fr.FilteringOptions.parse_args(['--stdin', '--dry-run',
121 |                                        '--path', 'salutation'])
122 | filter = fr.RepoFilter(args,
123 |                        blob_callback   = look_for_reset,
124 |                        reset_callback  = look_for_reset,
125 |                        commit_callback = look_for_reset,
126 |                        tag_callback    = look_for_reset)
127 | filter._input = stream
128 | filter._setup_output()
129 | filter._sanity_checks_handled = True
130 | filter.run()
131 | assert counts == collections.Counter({fr.Blob: 1, fr.Commit: 3, fr.Reset: 1})
132 | 


--------------------------------------------------------------------------------
/t/t9392-filter-repo-python-callback.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | test_description='Usage of git-filter-repo with python callbacks'
  4 | . ./test-lib.sh
  5 | 
  6 | export PATH=$(dirname $TEST_DIRECTORY):$PATH  # Put git-filter-repo in PATH
  7 | 
  8 | setup()
  9 | {
 10 | 	git init $1 &&
 11 | 	(
 12 | 		cd $1 &&
 13 | 		echo hello > world &&
 14 | 		git add world &&
 15 | 		test_tick &&
 16 | 		git commit -m initial &&
 17 | 		printf "The launch code is 1-2-3-4." > secret &&
 18 | 		git add secret &&
 19 | 		test_tick &&
 20 | 		git commit -m "Sssh.  Dont tell no one" &&
 21 | 		echo A file that you cant trust > file.doc &&
 22 | 		echo there >> world &&
 23 | 		git add file.doc world &&
 24 | 		test_tick &&
 25 | 		printf "Random useless changes\n\nLet us be like the marketing group.  Marketing is staffed with pansies" | git commit -F - &&
 26 | 		echo Do not use a preposition to end a setence with > advice &&
 27 | 		git add advice &&
 28 | 		test_tick &&
 29 | 		GIT_AUTHOR_NAME="Copy N. Paste" git commit -m "hypocrisy is fun" &&
 30 | 		echo Avoid cliches like the plague >> advice &&
 31 | 		test_tick &&
 32 | 		GIT_AUTHOR_EMAIL="foo@my.crp" git commit -m "it is still fun" advice &&
 33 | 		echo "  \$Id: A bunch of junk
quot; > foobar.c &&
 34 | 		git add foobar.c &&
 35 | 		test_tick &&
 36 | 		git commit -m "Brain damage" &&
 37 | 
 38 |                 git tag v1.0 HEAD~3 &&
 39 |                 git tag -a -m 'Super duper snazzy release' v2.0 HEAD~1 &&
 40 |                 git branch testing master &&
 41 | 
 42 | 		# Make it look like a fresh clone (avoid need for --force)
 43 | 		git gc &&
 44 | 		git remote add origin . &&
 45 | 		git update-ref refs/remotes/origin/master refs/heads/master
 46 | 		git update-ref refs/remotes/origin/testing refs/heads/testing
 47 | 	)
 48 | }
 49 | 
 50 | test_expect_success '--filename-callback' '
 51 | 	setup filename-callback &&
 52 | 	(
 53 | 		cd filename-callback &&
 54 | 		git filter-repo --filename-callback "return None if filename.endswith(b\".doc\") else b\"src/\"+filename" &&
 55 | 		git log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&
 56 | 		! grep file.doc f &&
 57 | 		COMPARE=$(wc -l <f) &&
 58 | 		grep src/ f >filtered_f &&
 59 | 		test_line_count = $COMPARE filtered_f
 60 | 	)
 61 | '
 62 | 
 63 | test_expect_success '--file-info-callback acting like --filename-callback' '
 64 | 	setup fileinfo-as-filename-callback &&
 65 | 	(
 66 | 		cd fileinfo-as-filename-callback &&
 67 | 		git filter-repo --file-info-callback "return (None if filename.endswith(b\".doc\") else b\"src/\"+filename, mode, blob_id)" &&
 68 | 		git log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&
 69 | 		! grep file.doc f &&
 70 | 		COMPARE=$(wc -l <f) &&
 71 | 		grep src/ f >filtered_f &&
 72 | 		test_line_count = $COMPARE filtered_f
 73 | 	)
 74 | '
 75 | 
 76 | test_expect_success '--message-callback' '
 77 | 	setup message-callback &&
 78 | 	(
 79 | 		cd message-callback &&
 80 | 		git filter-repo --message-callback "return b\"TLDR: \"+message[0:5]" &&
 81 | 		git log --format=%s >log-messages &&
 82 | 		grep TLDR:...... log-messages >modified-messages &&
 83 | 		test_line_count = 6 modified-messages
 84 | 	)
 85 | '
 86 | 
 87 | test_expect_success '--name-callback' '
 88 | 	setup name-callback &&
 89 | 	(
 90 | 		cd name-callback &&
 91 | 		git filter-repo --name-callback "return name.replace(b\"N.\", b\"And\")" &&
 92 | 		git log --format=%an >log-person-names &&
 93 | 		grep Copy.And.Paste log-person-names
 94 | 	)
 95 | '
 96 | 
 97 | test_expect_success '--email-callback' '
 98 | 	setup email-callback &&
 99 | 	(
100 | 		cd email-callback &&
101 | 		git filter-repo --email-callback "return email.replace(b\".com\", b\".org\")" &&
102 | 		git log --format=%ae%n%ce >log-emails &&
103 | 		! grep .com log-emails &&
104 | 		grep .org log-emails
105 | 	)
106 | '
107 | 
108 | test_expect_success '--refname-callback' '
109 | 	setup refname-callback &&
110 | 	(
111 | 		cd refname-callback &&
112 | 		git filter-repo --refname-callback "
113 |                     dir,path = os.path.split(refname)
114 |                     return dir+b\"/prefix-\"+path" &&
115 | 		git show-ref | grep refs/heads/prefix-master &&
116 | 		git show-ref | grep refs/tags/prefix-v1.0 &&
117 | 		git show-ref | grep refs/tags/prefix-v2.0
118 | 	)
119 | '
120 | 
121 | test_expect_success '--refname-callback sanity check' '
122 | 	setup refname-sanity-check &&
123 | 	(
124 | 		cd refname-sanity-check &&
125 | 
126 | 		test_must_fail git filter-repo --refname-callback "return re.sub(b\"tags\", b\"other-tags\", refname)" 2>../err &&
127 | 		test_i18ngrep "fast-import requires tags to be in refs/tags/ namespace" ../err &&
128 | 		rm ../err
129 | 	)
130 | '
131 | 
132 | test_expect_success '--blob-callback' '
133 | 	setup blob-callback &&
134 | 	(
135 | 		cd blob-callback &&
136 | 		git log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&
137 | 		test_line_count = 5 f &&
138 | 		rm f &&
139 | 		git filter-repo --blob-callback "if len(blob.data) > 25: blob.skip()" &&
140 | 		git log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&
141 | 		test_line_count = 2 f
142 | 	)
143 | '
144 | 
145 | test_expect_success '--file-info-callback acting like --blob-callback' '
146 | 	setup fileinfo-as-blob-callback &&
147 | 	(
148 | 		cd fileinfo-as-blob-callback &&
149 | 		git log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&
150 | 		test_line_count = 5 f &&
151 | 		rm f &&
152 | 		git filter-repo --file-info-callback "
153 | 		    size = value.get_size_by_identifier(blob_id)
154 | 		    return (None if size > 25 else filename, mode, blob_id)" &&
155 | 		git log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&
156 | 		test_line_count = 2 f
157 | 	)
158 | '
159 | 
160 | test_expect_success '--commit-callback' '
161 | 	setup commit-callback &&
162 | 	(
163 | 		cd commit-callback &&
164 | 		git filter-repo --commit-callback "
165 |                     commit.committer_name  = commit.author_name
166 |                     commit.committer_email = commit.author_email
167 |                     commit.committer_date  = commit.author_date
168 |                     for change in commit.file_changes:
169 |                       change.mode = b\"100755\"
170 |                     " &&
171 | 		git log --format=%ae%n%ce >log-emails &&
172 | 		! grep committer@example.com log-emails &&
173 | 		git log --raw | grep ^: >file-changes &&
174 | 		! grep 100644 file-changes &&
175 | 		grep 100755 file-changes
176 | 	)
177 | '
178 | 
179 | test_expect_success '--tag-callback' '
180 | 	setup tag-callback &&
181 | 	(
182 | 		cd tag-callback &&
183 | 		git filter-repo --tag-callback "
184 |                     tag.tagger_name = b\"Dr. \"+tag.tagger_name
185 |                     tag.message = b\"Awesome sauce \"+tag.message
186 |                     " &&
187 | 		git cat-file -p v2.0 | grep ^tagger.Dr\\. &&
188 | 		git cat-file -p v2.0 | grep ^Awesome.sauce.Super
189 | 	)
190 | '
191 | 
192 | test_expect_success '--reset-callback' '
193 | 	setup reset-callback &&
194 | 	(
195 | 		cd reset-callback &&
196 | 		git filter-repo --reset-callback "reset.from_ref = 3" &&
197 | 		test $(git rev-parse testing) = $(git rev-parse master~3)
198 | 	)
199 | '
200 | 
201 | test_expect_success 'callback has return statement sanity check' '
202 | 	setup callback_return_sanity &&
203 | 	(
204 | 		cd callback_return_sanity &&
205 | 
206 | 		test_must_fail git filter-repo --filename-callback "filename + b\".txt\"" 2>../err&&
207 | 		test_i18ngrep "Error: --filename-callback should have a return statement" ../err &&
208 | 		rm ../err
209 | 	)
210 | '
211 | 
212 | test_expect_success 'Callback read from a file' '
213 | 	setup name-callback-from-file &&
214 | 	(
215 | 		cd name-callback-from-file &&
216 | 		echo "return name.replace(b\"N.\", b\"And\")" >../name-func &&
217 | 		git filter-repo --name-callback ../name-func &&
218 | 		git log --format=%an >log-person-names &&
219 | 		grep Copy.And.Paste log-person-names
220 | 	)
221 | '
222 | 
223 | test_expect_success 'Filtering a blob to make it match previous version' '
224 | 	test_create_repo remove_unique_bits_of_blob &&
225 | 	(
226 | 		cd remove_unique_bits_of_blob &&
227 | 
228 | 		test_write_lines foo baz >metasyntactic_names &&
229 | 		git add metasyntactic_names &&
230 | 		git commit -m init &&
231 | 
232 | 		test_write_lines foo bar baz >metasyntactic_names &&
233 | 		git add metasyntactic_names &&
234 | 		git commit -m second &&
235 | 
236 | 		git filter-repo --force --blob-callback "blob.data = blob.data.replace(b\"\\nbar\", b\"\")"
237 | 
238 | 		echo 1 >expect &&
239 | 		git rev-list --count HEAD >actual &&
240 | 		test_cmp expect actual
241 | 	)
242 | '
243 | 
244 | test_expect_success 'tweaking just a tag' '
245 | 	test_create_repo tweaking_just_a_tag &&
246 | 	(
247 | 		cd tweaking_just_a_tag &&
248 | 
249 | 		test_commit foo &&
250 | 		git tag -a -m "Here is a tag" mytag &&
251 | 
252 | 		git filter-repo --force --refs mytag ^mytag^{commit} --name-callback "return name.replace(b\"Mitter\", b\"L D\")" &&
253 | 
254 | 		git cat-file -p mytag | grep C.O.L.D
255 | 	)
256 | '
257 | 
258 | test_expect_success '--file-info-callback messing with history' '
259 | 	setup messing_with_files &&
260 | 	(
261 | 		cd messing_with_files &&
262 | 
263 | 		echo "1-2-3-4==>1-2-3-4-5" >replacement &&
264 | 		# Trying to count the levels of backslash escaping is not fun.
265 | 		echo "regex:\\\$[^\$]*\\\$==>cvs is lame" >>replacement &&
266 | 		git filter-repo --force --file-info-callback "
267 | 		    size = value.get_size_by_identifier(blob_id)
268 | 		    contents = value.get_contents_by_identifier(blob_id)
269 | 		    if not value.is_binary(contents):
270 | 		      contents = value.apply_replace_text(contents)
271 | 		    if contents[-1] != 10:
272 | 		      contents += bytes([10])
273 | 		    blob_id = value.insert_file_with_contents(contents)
274 | 		    newname = bytes(reversed(filename))
275 | 		    if size == 27 and len(contents) == 27:
276 | 		      newname = None
277 | 		    return (newname, mode, blob_id)
278 |                     " --replace-text replacement &&
279 | 
280 | 		cat <<-EOF >expect &&
281 | 		c.raboof
282 | 		dlrow
283 | 		ecivda
284 | 		terces
285 | 		EOF
286 | 
287 | 		git ls-files >actual &&
288 | 		test_cmp expect actual &&
289 | 
290 | 		echo "The launch code is 1-2-3-4-5." >expect &&
291 | 		test_cmp expect terces &&
292 | 
293 | 		echo "  cvs is lame" >expect &&
294 | 		test_cmp expect c.raboof
295 | 	)
296 | '
297 | 
298 | test_expect_success '--file-info-callback and deletes and drops' '
299 | 	setup file_info_deletes_drops &&
300 | 	(
301 | 		cd file_info_deletes_drops &&
302 | 
303 | 		git rm file.doc &&
304 | 		git commit -m "Nuke doc file" &&
305 | 
306 | 		git filter-repo --force --file-info-callback "
307 | 		    size = value.get_size_by_identifier(blob_id)
308 | 		    (newname, newmode) = (filename, mode)
309 | 		    if filename == b\"world\" and size == 12:
310 | 		      newname = None
311 | 		    if filename == b\"advice\" and size == 77:
312 | 		      newmode = None
313 | 		    return (newname, newmode, blob_id)
314 |                     "
315 | 
316 | 		cat <<-EOF >expect &&
317 | 		foobar.c
318 | 		secret
319 | 		world
320 | 		EOF
321 | 
322 | 		echo 1 >expect &&
323 | 		git rev-list --count HEAD -- world >actual &&
324 | 		test_cmp expect actual &&
325 | 
326 | 		echo 2 >expect &&
327 | 		git rev-list --count HEAD -- advice >actual &&
328 | 		test_cmp expect actual &&
329 | 
330 | 		echo hello >expect &&
331 | 		test_cmp expect world
332 | 	)
333 | '
334 | 
335 | test_lazy_prereq UNIX2DOS '
336 |         unix2dos -h
337 |         test $? -ne 127
338 | '
339 | 
340 | test_expect_success UNIX2DOS '--file-info-callback acting like lint-history' '
341 | 	setup lint_history_replacement &&
342 | 	(
343 | 		cd lint_history_replacement &&
344 | 		git ls-files -s | grep -v file.doc >expect &&
345 | 
346 | 		git filter-repo --force --file-info-callback "
347 | 		    if not filename.endswith(b\".doc\"):
348 | 		      return (filename, mode, blob_id)
349 | 
350 | 		    if blob_id in value.data:
351 | 		      return (filename, mode, value.data[blob_id])
352 | 
353 | 		    contents = value.get_contents_by_identifier(blob_id)
354 | 		    tmpfile = os.path.basename(filename)
355 | 		    with open(tmpfile, \"wb\") as f:
356 | 		      f.write(contents)
357 | 		    subprocess.check_call([\"unix2dos\", filename])
358 | 		    with open(filename, \"rb\") as f:
359 | 		      contents = f.read()
360 | 		    new_blob_id = value.insert_file_with_contents(contents)
361 | 
362 | 		    value.data[blob_id] = new_blob_id
363 | 		    return (filename, mode, new_blob_id)
364 |                     " &&
365 | 
366 | 		git ls-files -s | grep -v file.doc >actual &&
367 | 		test_cmp expect actual &&
368 | 
369 | 		printf "A file that you cant trust\r\n" >expect &&
370 | 		test_cmp expect file.doc
371 | 	)
372 | '
373 | 
374 | test_done
375 | 


--------------------------------------------------------------------------------
/t/t9393/lfs:
--------------------------------------------------------------------------------
  1 | feature done
  2 | # Simple repo with a few files, some of them lfs.  Note that the lfs object
  3 | # ids and the original-oid directives are very fake, but make it easy to
  4 | # track things.
  5 | blob
  6 | mark :1
  7 | original-oid 0000000000000000000000000000000000000001
  8 | data 39
  9 | L* filter=lfs diff=lfs merge=lfs -text
 10 | 
 11 | blob
 12 | mark :2
 13 | original-oid 0000000000000000000000000000000000000002
 14 | data 126
 15 | version https://git-lfs.github.com/spec/v1
 16 | oid sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 17 | size 1
 18 | 
 19 | blob
 20 | mark :3
 21 | original-oid 0000000000000000000000000000000000000003
 22 | data 126
 23 | version https://git-lfs.github.com/spec/v1
 24 | oid sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
 25 | size 2
 26 | 
 27 | blob
 28 | mark :4
 29 | original-oid 0000000000000000000000000000000000000004
 30 | data 126
 31 | version https://git-lfs.github.com/spec/v1
 32 | oid sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 33 | size 3
 34 | 
 35 | blob
 36 | mark :5
 37 | original-oid 0000000000000000000000000000000000000005
 38 | data 126
 39 | version https://git-lfs.github.com/spec/v1
 40 | oid sha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd
 41 | size 4
 42 | 
 43 | blob
 44 | mark :6
 45 | original-oid 0000000000000000000000000000000000000006
 46 | data 6
 47 | stuff
 48 | 
 49 | blob
 50 | mark :7
 51 | original-oid 0000000000000000000000000000000000000007
 52 | data 1040
 53 | 0000000000000000000000000000000000000000000000000000000000000000
 54 | 1111111111111111111111111111111111111111111111111111111111111111
 55 | 2222222222222222222222222222222222222222222222222222222222222222
 56 | 3333333333333333333333333333333333333333333333333333333333333333
 57 | 4444444444444444444444444444444444444444444444444444444444444444
 58 | 5555555555555555555555555555555555555555555555555555555555555555
 59 | 6666666666666666666666666666666666666666666666666666666666666666
 60 | 7777777777777777777777777777777777777777777777777777777777777777
 61 | 8888888888888888888888888888888888888888888888888888888888888888
 62 | 9999999999999999999999999999999999999999999999999999999999999999
 63 | aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 64 | bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
 65 | cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 66 | dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd
 67 | eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee
 68 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
 69 | 
 70 | reset refs/heads/main
 71 | commit refs/heads/main
 72 | mark :10
 73 | original-oid 0000000000000000000000000000000000000010
 74 | author Little O. Me <me@little.net> 1234567890 -0700
 75 | committer Little O. Me <me@little.net> 1234567890 -0700
 76 | data 13
 77 | First commit
 78 | M 100644 :1 .gitattributes
 79 | M 100644 :4 LB
 80 | 
 81 | commit refs/heads/main
 82 | mark :11
 83 | original-oid 0000000000000000000000000000000000000011
 84 | author Little O. Me <me@little.net> 1234567891 -0700
 85 | committer Little O. Me <me@little.net> 1234567891 -0700
 86 | data 14
 87 | Second commit
 88 | M 100644 :2 LA
 89 | 
 90 | commit refs/heads/main
 91 | mark :12
 92 | original-oid 0000000000000000000000000000000000000012
 93 | author Little O. Me <me@little.net> 1234567892 -0700
 94 | committer Little O. Me <me@little.net> 1234567892 -0700
 95 | data 13
 96 | Third commit
 97 | M 100644 :6 Z
 98 | 
 99 | commit refs/heads/main
100 | mark :13
101 | original-oid 0000000000000000000000000000000000000013
102 | author Little O. Me <me@little.net> 1234567893 -0700
103 | committer Little O. Me <me@little.net> 1234567893 -0700
104 | data 14
105 | Fourth commit
106 | M 100644 :4 LC
107 | 
108 | commit refs/heads/main
109 | mark :14
110 | original-oid 0000000000000000000000000000000000000014
111 | author Little O. Me <me@little.net> 1234567894 -0700
112 | committer Little O. Me <me@little.net> 1234567894 -0700
113 | data 13
114 | Fifth commit
115 | M 100644 :3 LA
116 | 
117 | commit refs/heads/main
118 | mark :15
119 | original-oid 0000000000000000000000000000000000000015
120 | author Little O. Me <me@little.net> 1234567895 -0700
121 | committer Little O. Me <me@little.net> 1234567895 -0700
122 | data 13
123 | Sixth commit
124 | M 100644 :7 Y
125 | D Z
126 | 
127 | commit refs/heads/main
128 | mark :16
129 | original-oid 0000000000000000000000000000000000000016
130 | author Little O. Me <me@little.net> 1234567896 -0700
131 | committer Little O. Me <me@little.net> 1234567896 -0700
132 | data 15
133 | Seventh commit
134 | M 100644 :5 LD
135 | 
136 | commit refs/heads/main
137 | mark :17
138 | original-oid 0000000000000000000000000000000000000017
139 | author Little O. Me <me@little.net> 1234567897 -0700
140 | committer Little O. Me <me@little.net> 1234567897 -0700
141 | data 14
142 | Eighth commit
143 | M 100644 :2 LA
144 | 
145 | done
146 | 


--------------------------------------------------------------------------------
/t/t9393/simple:
--------------------------------------------------------------------------------
 1 | feature done
 2 | # Simple repo with a few files, and two branches with no common history.
 3 | # Note that the original-oid directives are very fake, but make it easy to
 4 | # track things.
 5 | blob
 6 | mark :1
 7 | original-oid 0000000000000000000000000000000000000001
 8 | data 16
 9 | file 1 contents
10 | 
11 | blob
12 | mark :2
13 | original-oid 0000000000000000000000000000000000000002
14 | data 16
15 | file 2 contents
16 | 
17 | blob
18 | mark :3
19 | original-oid 0000000000000000000000000000000000000003
20 | data 16
21 | file 3 contents
22 | 
23 | blob
24 | mark :4
25 | original-oid 0000000000000000000000000000000000000004
26 | data 16
27 | file 4 contents
28 | 
29 | reset refs/heads/orphan-me
30 | commit refs/heads/orphan-me
31 | mark :5
32 | original-oid 0000000000000000000000000000000000000009
33 | author Little O. Me <me@little.net> 1535228562 -0700
34 | committer Little O. Me <me@little.net> 1535228562 -0700
35 | data 8
36 | Initial
37 | M 100644 :1 nuke-me
38 | 
39 | commit refs/heads/orphan-me
40 | mark :6
41 | original-oid 000000000000000000000000000000000000000A
42 | author Little 'ol Me <me@laptop.(none)> 1535229544 -0700
43 | committer Little 'ol Me <me@laptop.(none)> 1535229544 -0700
44 | data 9
45 | Tweak it
46 | from :5
47 | M 100644 :4 nuke-me
48 | 
49 | reset refs/heads/master
50 | commit refs/heads/master
51 | mark :7
52 | original-oid 000000000000000000000000000000000000000B
53 | author Little O. Me <me@machine52.little.net> 1535229523 -0700
54 | committer Little O. Me <me@machine52.little.net> 1535229523 -0700
55 | data 15
56 | Initial commit
57 | M 100644 :1 fileA
58 | 
59 | commit refs/heads/master
60 | mark :8
61 | original-oid 000000000000000000000000000000000000000C
62 | author Lit.e Me <me@fire.com> 1535229559 -0700
63 | committer Lit.e Me <me@fire.com> 1535229580 -0700
64 | data 10
65 | Add fileB
66 | from :7
67 | M 100644 :2 fileB
68 | 
69 | commit refs/heads/master
70 | mark :9
71 | original-oid 000000000000000000000000000000000000000D
72 | author Little Me <me@bigcompany.com> 1535229601 -0700
73 | committer Little Me <me@bigcompany.com> 1535229601 -0700
74 | data 10
75 | Add fileC
76 | from :8
77 | M 100644 :3 fileC
78 | 
79 | commit refs/heads/master
80 | mark :10
81 | original-oid 000000000000000000000000000000000000000E
82 | author Little Me <me@bigcompany.com> 1535229618 -0700
83 | committer Little Me <me@bigcompany.com> 1535229618 -0700
84 | data 10
85 | Add fileD
86 | from :9
87 | M 100644 :4 fileD
88 | 
89 | tag v1.0
90 | from :10
91 | original-oid 000000000000000000000000000000000000000F
92 | tagger Little John <second@merry.men> 1535229637 -0700
93 | data 5
94 | v1.0
95 | 
96 | done
97 | 


--------------------------------------------------------------------------------
/t/t9394/date-order:
--------------------------------------------------------------------------------
 1 | feature done
 2 | blob
 3 | mark :1
 4 | data 8
 5 | initial
 6 | 
 7 | reset refs/heads/master
 8 | commit refs/heads/master
 9 | mark :2
10 | author Little O. Me <me@little.net> 1535228562 -0700
11 | committer Little O. Me <me@little.net> 1535228562 -0700
12 | data 8
13 | Initial
14 | M 100644 :1 filename
15 | 
16 | commit refs/heads/master
17 | mark :3
18 | author Little Me <me@bigcompany.com> 1535229601 -0700
19 | committer Little Me <me@bigcompany.com> 1535229601 -0700
20 | data 2
21 | A
22 | from :2
23 | 
24 | commit refs/heads/master
25 | mark :4
26 | author Little Me <me@bigcompany.com> 1535229602 -0700
27 | committer Little Me <me@bigcompany.com> 1535229602 -0700
28 | data 2
29 | B
30 | from :2
31 | 
32 | commit refs/heads/master
33 | mark :5
34 | author Little Me <me@bigcompany.com> 1535229603 -0700
35 | committer Little Me <me@bigcompany.com> 1535229603 -0700
36 | data 2
37 | C
38 | from :3
39 | 
40 | commit refs/heads/master
41 | mark :6
42 | author Little Me <me@bigcompany.com> 1535229604 -0700
43 | committer Little Me <me@bigcompany.com> 1535229604 -0700
44 | data 2
45 | D
46 | from :4
47 | 
48 | commit refs/heads/master
49 | mark :7
50 | author Little Me <me@bigcompany.com> 1535229605 -0700
51 | committer Little Me <me@bigcompany.com> 1535229605 -0700
52 | data 6
53 | merge
54 | from :5
55 | merge :6
56 | 
57 | done
58 | 


--------------------------------------------------------------------------------