├── .github
├── PULL_REQUEST_TEMPLATE.md
└── workflows
│ ├── ci.yml
│ └── release.yml
├── .gitignore
├── .idea
├── copyright
│ ├── BFG_GPL_v3.xml
│ └── profiles_settings.xml
└── scopes
│ ├── Files_for_Copyright.xml
│ └── scope_settings.xml
├── .tool-versions
├── BUILD.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── backers.md
├── bfg-benchmark
├── build.sbt
├── resources
│ ├── jars
│ │ └── grabJars.sh
│ └── repos
│ │ ├── chromium-src
│ │ └── commands
│ │ │ └── issue-23
│ │ │ └── bfg.txt
│ │ ├── gcc
│ │ └── commands
│ │ │ └── delete-file
│ │ │ ├── bfg.txt
│ │ │ └── gfb.txt
│ │ ├── git
│ │ └── commands
│ │ │ └── delete-file
│ │ │ ├── bfg.txt
│ │ │ └── gfb.txt
│ │ ├── github-gem
│ │ └── commands
│ │ │ └── delete-file
│ │ │ ├── bfg.txt
│ │ │ └── gfb.txt
│ │ ├── intellij
│ │ └── commands
│ │ │ ├── delete-binary-resources
│ │ │ └── bfg.txt
│ │ │ ├── delete-file
│ │ │ ├── bfg.txt
│ │ │ └── gfb.txt
│ │ │ └── git-lfs-binary-resources
│ │ │ └── bfg.txt
│ │ ├── jgit
│ │ └── commands
│ │ │ ├── delete-file
│ │ │ ├── bfg.txt
│ │ │ └── gfb.txt
│ │ │ ├── replace-1-existing-string
│ │ │ ├── bfg.txt
│ │ │ └── passwords.1-existing-string.txt
│ │ │ ├── replace-20-existing-strings
│ │ │ ├── bfg.txt
│ │ │ └── passwords.20-existing-strings.txt
│ │ │ └── replace-500-existing-strings
│ │ │ ├── bfg.txt
│ │ │ └── passwords.500-existing-strings.txt
│ │ ├── linux
│ │ └── commands
│ │ │ └── delete-file
│ │ │ ├── bfg.txt
│ │ │ └── gfb.txt
│ │ ├── rails
│ │ └── commands
│ │ │ └── delete-file
│ │ │ ├── bfg.txt
│ │ │ └── gfb.txt
│ │ └── wine
│ │ └── commands
│ │ └── delete-file
│ │ ├── bfg.txt
│ │ └── gfb.txt
└── src
│ ├── main
│ └── scala
│ │ ├── Benchmark.scala
│ │ ├── BenchmarkConfig.scala
│ │ ├── JavaVersion.scala
│ │ ├── lib
│ │ ├── Repo.scala
│ │ └── Timing.scala
│ │ └── model
│ │ ├── BFGJar.scala
│ │ ├── InvocableEngine.scala
│ │ ├── InvocableEngineSet.scala
│ │ └── Java.scala
│ └── test
│ └── scala
│ └── JavaVersionSpec.scala
├── bfg-library
├── build.sbt
└── src
│ ├── main
│ └── scala
│ │ └── com
│ │ └── madgag
│ │ ├── collection
│ │ └── concurrent
│ │ │ ├── ConcurrentMultiMap.scala
│ │ │ └── ConcurrentSet.scala
│ │ ├── git
│ │ ├── LFS.scala
│ │ └── bfg
│ │ │ ├── GitUtil.scala
│ │ │ ├── cleaner
│ │ │ ├── BlobCharsetDetector.scala
│ │ │ ├── BlobTextModifier.scala
│ │ │ ├── LfsBlobConverter.scala
│ │ │ ├── ObjectIdCleaner.scala
│ │ │ ├── ObjectIdSubstitutor.scala
│ │ │ ├── RepoRewriter.scala
│ │ │ ├── Reporter.scala
│ │ │ ├── TreeBlobModifier.scala
│ │ │ ├── commits.scala
│ │ │ ├── kit
│ │ │ │ └── BlobInserter.scala
│ │ │ ├── package.scala
│ │ │ ├── protection
│ │ │ │ ├── ProtectedObjectCensus.scala
│ │ │ │ └── ProtectedObjectDirtReport.scala
│ │ │ └── treeblobs.scala
│ │ │ ├── memo.scala
│ │ │ ├── model
│ │ │ ├── Commit.scala
│ │ │ ├── Footer.scala
│ │ │ └── package.scala
│ │ │ └── timing.scala
│ │ ├── inclusion
│ │ └── inclusion.scala
│ │ └── text
│ │ ├── ByteSize.scala
│ │ ├── Tables.scala
│ │ └── text.scala
│ └── test
│ ├── resources
│ └── sample-repos
│ │ ├── deep-history.zip
│ │ ├── encodings.git.zip
│ │ ├── example.git.zip
│ │ ├── exampleWithInitialCleanHistory.git.zip
│ │ ├── folder-example.git.zip
│ │ ├── footers.git.zip
│ │ └── taleOfTwoBranches.git.zip
│ └── scala
│ └── com
│ └── madgag
│ ├── git
│ ├── LFSSpec.scala
│ └── bfg
│ │ ├── GitUtilSpec.scala
│ │ ├── MessageFooterSpec.scala
│ │ ├── TreeEntrySpec.scala
│ │ ├── cleaner
│ │ ├── LfsBlobConverterSpec.scala
│ │ ├── ObjectIdCleanerSpec.scala
│ │ ├── ObjectIdSubstitutorSpec.scala
│ │ ├── RepoRewriteSpec.scala
│ │ └── TreeBlobModifierSpec.scala
│ │ └── model
│ │ └── CommitSpec.scala
│ └── text
│ └── ByteSizeSpecs.scala
├── bfg-test
├── build.sbt
└── src
│ └── main
│ └── scala
│ └── com
│ └── madgag
│ └── git
│ └── bfg
│ └── test
│ └── unpackedRepo.scala
├── bfg
├── build.sbt
└── src
│ ├── main
│ └── scala
│ │ └── com
│ │ └── madgag
│ │ └── git
│ │ └── bfg
│ │ └── cli
│ │ ├── CLIConfig.scala
│ │ └── Main.scala
│ └── test
│ ├── resources
│ └── sample-repos
│ │ ├── annotatedTagExample.git.zip
│ │ ├── badEncoding.git.zip
│ │ ├── badRepoContainingDotGitFolder.git.zip
│ │ ├── branchNameWithASlash.git.zip
│ │ ├── corruptTreeDupFileName.git.zip
│ │ ├── example.git.zip
│ │ ├── exampleWithInitialCleanHistory.git.zip
│ │ ├── folder-example.git.zip
│ │ ├── huge10MBCommitMessage.git.zip
│ │ ├── moreThanOneBigBlobWithTheSameSize.git.zip
│ │ ├── repoWithBigBlobs.git.zip
│ │ ├── unwantedSubmodule.git.zip
│ │ └── usedToHaveASubmodule.git.zip
│ └── scala
│ └── com
│ └── madgag
│ └── git
│ └── bfg
│ └── cli
│ ├── CLIConfigSpecs.scala
│ ├── MainSpec.scala
│ ├── MassiveNonFileObjectsRequiresOwnJvmSpec.scala
│ └── test
│ └── unpackedRepo.scala
├── build.sbt
├── project
├── build.properties
├── dependencies.scala
└── plugins.sbt
└── version.sbt
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | I assert that this patch is my own work, and to [simplify the licensing of the BFG Repo-Cleaner](https://github.com/rtyley/bfg-repo-cleaner/blob/master/CONTRIBUTING.md#pull-requests):
2 |
3 | _(choose 1 of these 2 options)_
4 |
5 | - [ ] I assign the copyright on this contribution to Roberto Tyley
6 | - [ ] I disclaim copyright and thus place this contribution in the public domain
7 |
8 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 | on:
3 | workflow_dispatch:
4 | pull_request:
5 |
6 | # triggering CI default branch improves caching
7 | # see https://docs.github.com/en/free-pro-team@latest/actions/guides/caching-dependencies-to-speed-up-workflows#restrictions-for-accessing-a-cache
8 | push:
9 | branches:
10 | - main
11 |
12 | jobs:
13 | test:
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: actions/checkout@v4
17 | - uses: guardian/setup-scala@v1
18 | - name: Build and Test
19 | run: sbt -v test
20 | - name: Test Summary
21 | uses: test-summary/action@v2
22 | with:
23 | paths: "test-results/**/TEST-*.xml"
24 | if: always()
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | release:
8 | uses: guardian/gha-scala-library-release-workflow/.github/workflows/reusable-release.yml@v1
9 | permissions: { contents: write, pull-requests: write }
10 | with:
11 | GITHUB_APP_ID: 930725
12 | SONATYPE_PROFILE_NAME: 'com.madgag'
13 | SONATYPE_CREDENTIAL_HOST: 's01.oss.sonatype.org'
14 | secrets:
15 | SONATYPE_TOKEN: ${{ secrets.AUTOMATED_MAVEN_RELEASE_SONATYPE_TOKEN }}
16 | PGP_PRIVATE_KEY: ${{ secrets.AUTOMATED_MAVEN_RELEASE_PGP_SECRET }}
17 | GITHUB_APP_PRIVATE_KEY: ${{ secrets.AUTOMATED_MAVEN_RELEASE_GITHUB_APP_PRIVATE_KEY }}
18 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | *~
3 | .idea
4 | .idea_modules
5 | *.iml
6 | *.jar
7 | repo.git.zip
8 | **/.project
9 | **/.classpath
10 | **/.settings
11 | .bsp
12 |
13 | .DS_Store
14 | test-results/
15 |
--------------------------------------------------------------------------------
/.idea/copyright/BFG_GPL_v3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/scopes/Files_for_Copyright.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/.idea/scopes/scope_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/.tool-versions:
--------------------------------------------------------------------------------
1 | java corretto-11.0.25.9.1
2 |
--------------------------------------------------------------------------------
/BUILD.md:
--------------------------------------------------------------------------------
1 | The BFG is written in Scala, a modern functional language that runs on the JVM - so it
2 | can run anywhere Java can.
3 |
4 | Here's a rough set of instructions for building the BFG, if you don't want to use the
5 | pre-built [downloads](http://rtyley.github.io/bfg-repo-cleaner/#download):
6 |
7 | * Install Java JDK 11 or above
8 | * Install [sbt](https://www.scala-sbt.org/1.x/docs/Setup.html)
9 | * `git clone git@github.com:rtyley/bfg-repo-cleaner.git`
10 | * `cd bfg-repo-cleaner`
11 | * `sbt`<- start the sbt console
12 | * `bfg/assembly` <- download dependencies, run the tests, build the jar
13 |
14 | To find the jar once it's built, just look at the last few lines of output from the
15 | `assembly` task - it'll say something like this:
16 |
17 | ```
18 | [info] Packaging /Users/roberto/development/bfg-repo-cleaner/bfg/target/bfg-1.11.9-SNAPSHOT-master-21d2115.jar ...
19 | [info] Done packaging.
20 | [success] Total time: 19 s, completed 26-Sep-2014 16:05:11
21 | ```
22 |
23 | If you're going to make changes to the Scala code, you may want to use IntelliJ and it's Scala
24 | plugin to help with the Scala syntax...!
25 |
26 | If you use [Eclipse IDE](http://www.eclipse.org/), you can set-up your development environment by following these instructions:
27 |
28 | * Install `sbt` and build as-above
29 | * Install [Scala IDE for Eclipse](http://scala-ide.org/) into your Eclipse installation if not already installed
30 | * Add the `sbteclipse-plugin` to your set of local sbt plugins:
31 |
32 | ```
33 | mkdir -p ~/.sbt/1.0/plugins && tee ~/.sbt/1.0/plugins/plugins.sbt < Import -> Existing Projects into Workspace`, browse to your `bfg` working-copy, and ensure that you select `Search for nested projects`
42 | * You should now have the 4 `sbt` projects imported into your Eclipse workspace.
43 |
44 | I personally found Coursera's [online Scala course](https://www.coursera.org/course/progfun) very helpful in
45 | learning Scala, YMMV.
46 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | Issues and Questions
2 | --------------------
3 |
4 | If you've found what looks like a bug, or have a feature request for the BFG, please check
5 | [issues on GitHub](https://github.com/rtyley/bfg-repo-cleaner/issues), and create a new issue
6 | if necessary.
7 |
8 | If you just have a general question, or there's something you don't understand, ask on [stackoverflow.com](http://stackoverflow.com/questions/ask) (tag it with [`git-rewrite-history`](http://stackoverflow.com/questions/tagged/git-rewrite-history) and
9 | [`bfg-repo-cleaner`](http://stackoverflow.com/questions/tagged/bfg-repo-cleaner) so I see it) - there are
10 | many more people who can answer that sort of question on Stackoverflow, you stand a good chance
11 | of getting your question answered quicker!
12 |
13 | Pull Requests
14 | -------------
15 |
16 | BFG Repo-Cleaner is licensed under the [GPL v3](http://www.gnu.org/licenses/gpl.html), and to be in the best position to enforce the GPL the copyright status of BFG Repo Cleaner needs to be as simple as possible. To achieve this, contributors should only provide contributions which are **their own work**, and either:
17 |
18 | a) Assign the copyright on the contribution to myself, Roberto Tyley
19 |
20 | **or**
21 |
22 | b) Disclaim copyright on it and thus put it in the public domain
23 |
24 | **Please specify which option you want to use when creating your pull request.**
25 |
26 | See the [GNU FAQ](http://www.gnu.org/licenses/gpl-faq.html#AssignCopyright) for a fuller explanation of the need for this. If you still want to retain copyright on your contribution, let me know and I'll see if we can work something out.
27 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | BFG Repo-Cleaner
2 | ================
3 |
4 | [](https://github.com/rtyley/bfg-repo-cleaner/actions/workflows/ci.yml)
5 | [](https://github.com/rtyley/bfg-repo-cleaner/actions/workflows/release.yml)
6 |
7 | _Removes large or troublesome blobs like git-filter-branch does, but faster - and written in Scala_ - [Fund the BFG](https://j.mp/fund-bfg)
8 |
9 | ```
10 | $ bfg --strip-blobs-bigger-than 1M --replace-text banned.txt repo.git
11 | ```
12 |
13 | The BFG is a simpler, faster ([10 - 720x](https://docs.google.com/spreadsheet/ccc?key=0AsR1d5Zpes8HdER3VGU1a3dOcmVHMmtzT2dsS2xNenc) faster)
14 | alternative to `git-filter-branch` for cleansing bad data out of your Git repository:
15 |
16 | * Removing **Crazy Big Files**
17 | * Removing **Passwords, Credentials** & other **Private data**
18 |
19 | Main documentation for The BFG is here : **https://rtyley.github.io/bfg-repo-cleaner/**
20 |
--------------------------------------------------------------------------------
/backers.md:
--------------------------------------------------------------------------------
1 | Many thanks to supporters of the BFG!
2 | -----
3 |
4 | Contribute towards the open-source development of the BFG on [**BountySource**](https://www.bountysource.com/teams/bfg-repo-cleaner)
5 |
6 | * [Thomas Ferris Nicolaisen](http://www.tfnico.com/) - host of the excellent [GitMinutes](http://www.gitminutes.com) podcast
7 | * [Alec Clews](https://alecthegeek.github.io/)
8 | * [ramtej](https://github.com/ramtej)
9 |
--------------------------------------------------------------------------------
/bfg-benchmark/build.sbt:
--------------------------------------------------------------------------------
1 | import Dependencies.*
2 |
3 | libraryDependencies ++= guava ++ Seq(
4 | madgagCompress,
5 | textmatching,
6 | scopt
7 | )
--------------------------------------------------------------------------------
/bfg-benchmark/resources/jars/grabJars.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for i in 4.0 5.0 6.0 7.0 12.0 13.0 13.1 13.2
3 | do
4 | VERSION="1.$i"
5 | curl -O "https://repo1.maven.org/maven2/com/madgag/bfg/$VERSION/bfg-$VERSION.jar"
6 | done
7 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/chromium-src/commands/issue-23/bfg.txt:
--------------------------------------------------------------------------------
1 | --delete-files *.{52,50,crx,xib,png,pdf,jpg,zip,jar,pdb,psd,jpeg,dylib,dll,DLL,exe,EXE,vcproj,so,sln,scons,nib,graffle,yuv,webm}
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/gcc/commands/delete-file/bfg.txt:
--------------------------------------------------------------------------------
1 | -D README-fixinc
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/gcc/commands/delete-file/gfb.txt:
--------------------------------------------------------------------------------
1 | --index-filter
2 | git rm --cached --ignore-unmatch gcc/README-fixinc
3 | --prune-empty
4 | --tag-name-filter
5 | cat
6 | --
7 | --all
8 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/git/commands/delete-file/bfg.txt:
--------------------------------------------------------------------------------
1 | -D object.c
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/git/commands/delete-file/gfb.txt:
--------------------------------------------------------------------------------
1 | --index-filter
2 | git rm --cached --ignore-unmatch object.c
3 | --prune-empty
4 | --tag-name-filter
5 | cat
6 | --
7 | --all
8 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/github-gem/commands/delete-file/bfg.txt:
--------------------------------------------------------------------------------
1 | -D Rakefile
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/github-gem/commands/delete-file/gfb.txt:
--------------------------------------------------------------------------------
1 | --index-filter
2 | git rm --cached --ignore-unmatch Rakefile
3 | --prune-empty
4 | --tag-name-filter
5 | cat
6 | --
7 | --all
8 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/intellij/commands/delete-binary-resources/bfg.txt:
--------------------------------------------------------------------------------
1 | --delete-files *.{zip,jar} --no-blob-protection
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/intellij/commands/delete-file/bfg.txt:
--------------------------------------------------------------------------------
1 | -D breakgen.dll
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/intellij/commands/delete-file/gfb.txt:
--------------------------------------------------------------------------------
1 | --index-filter
2 | git rm --cached --ignore-unmatch bin/breakgen.dll
3 | --prune-empty
4 | --tag-name-filter
5 | cat
6 | --
7 | --all
8 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/intellij/commands/git-lfs-binary-resources/bfg.txt:
--------------------------------------------------------------------------------
1 | --convert-to-git-lfs *.{zip,jar,exe,dll} --no-blob-protection
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/jgit/commands/delete-file/bfg.txt:
--------------------------------------------------------------------------------
1 | -D make_jgit.sh
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/jgit/commands/delete-file/gfb.txt:
--------------------------------------------------------------------------------
1 | --index-filter
2 | git rm --cached --ignore-unmatch make_jgit.sh
3 | --prune-empty
4 | --tag-name-filter
5 | cat
6 | --
7 | --all
8 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/jgit/commands/replace-1-existing-string/bfg.txt:
--------------------------------------------------------------------------------
1 | --replace-text passwords.1-existing-string.txt
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/jgit/commands/replace-1-existing-string/passwords.1-existing-string.txt:
--------------------------------------------------------------------------------
1 | invalidAdvertisementOf
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/jgit/commands/replace-20-existing-strings/bfg.txt:
--------------------------------------------------------------------------------
1 | --replace-text passwords.20-existing-strings.txt
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/jgit/commands/replace-20-existing-strings/passwords.20-existing-strings.txt:
--------------------------------------------------------------------------------
1 | invalidAdvertisementOf
2 | abbreviationLengthMustBeNonNegative
3 | abortingRebase
4 | abortingRebaseFailed
5 | abortingRebaseFailedNoOrigHead
6 | advertisementCameBefore
7 | advertisementOfCameBefore
8 | amazonS3ActionFailed
9 | amazonS3ActionFailedGivingUp
10 | ambiguousObjectAbbreviation
11 | aNewObjectIdIsRequired
12 | anExceptionOccurredWhileTryingToAddTheIdOfHEAD
13 | anSSHSessionHasBeenAlreadyCreated
14 | applyingCommitnvalidType
15 | corruptObjectInvalidType2
16 | corruptObjectMalformedHeader
17 | dirCacheIsNotLocked
18 | DIRCChecksumMismatch
19 | enumValueNotSupported3
20 | errorDecodingFromFile
21 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/jgit/commands/replace-500-existing-strings/bfg.txt:
--------------------------------------------------------------------------------
1 | --replace-text passwords.500-existing-strings.txt
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/jgit/commands/replace-500-existing-strings/passwords.500-existing-strings.txt:
--------------------------------------------------------------------------------
1 | abbreviationLengthMustBeNonNegative
2 | abortingRebase
3 | abortingRebaseFailed
4 | abortingRebaseFailedNoOrigHead
5 | advertisementCameBefore
6 | advertisementOfCameBefore
7 | amazonS3ActionFailed
8 | amazonS3ActionFailedGivingUp
9 | ambiguousObjectAbbreviation
10 | aNewObjectIdIsRequired
11 | anExceptionOccurredWhileTryingToAddTheIdOfHEAD
12 | anSSHSessionHasBeenAlreadyCreated
13 | applyingCommit
14 | archiveFormatAlreadyAbsent
15 | archiveFormatAlreadyRegistered
16 | atLeastOnePathIsRequired
17 | atLeastOnePatternIsRequired
18 | atLeastTwoFiltersNeeded
19 | authenticationNotSupported
20 | badBase64InputCharacterAt
21 | badEntryDelimiter
22 | badEntryName
23 | badEscape
24 | badGroupHeader
25 | badObjectType
26 | badSectionEntry
27 | bareRepositoryNoWorkdirAndIndex
28 | base64InputNotProperlyPadded
29 | baseLengthIncorrect
30 | bitmapMissingObject
31 | bitmapsMustBePrepared
32 | blameNotCommittedYet
33 | blobNotFound
34 | blobNotFoundForPath
35 | branchNameInvalid
36 | buildingBitmaps
37 | cachedPacksPreventsIndexCreation
38 | cachedPacksPreventsListingObjects
39 | cannotBeCombined
40 | cannotBeRecursiveWhenTreesAreIncluded
41 | cannotCombineSquashWithNoff
42 | cannotCombineTreeFilterWithRevFilter
43 | cannotCommitOnARepoWithState
44 | cannotCommitWriteTo
45 | cannotConnectPipes
46 | cannotConvertScriptToText
47 | cannotCreateConfig
48 | cannotCreateDirectory
49 | cannotCreateHEAD
50 | cannotCreateIndexfile
51 | cannotDeleteCheckedOutBranch
52 | cannotDeleteFile
53 | cannotDeleteStaleTrackingRef
54 | cannotDeleteStaleTrackingRef2
55 | cannotDetermineProxyFor
56 | cannotDownload
57 | cannotExecute
58 | cannotGet
59 | cannotListRefs
60 | cannotLock
61 | cannotLockPackIn
62 | cannotMatchOnEmptyString
63 | cannotMoveIndexTo
64 | cannotMovePackTo
65 | cannotOpenService
66 | cannotParseDate
67 | cannotParseGitURIish
68 | cannotPullOnARepoWithState
69 | cannotRead
70 | cannotReadBlob
71 | cannotReadCommit
72 | cannotReadFile
73 | cannotReadHEAD
74 | cannotReadObject
75 | cannotReadTree
76 | cannotRebaseWithoutCurrentHead
77 | cannotResolveLocalTrackingRefForUpdating
78 | cannotStoreObjects
79 | cannotUnloadAModifiedTree
80 | cannotWorkWithOtherStagesThanZeroRightNow
81 | canOnlyCherryPickCommitsWithOneParent
82 | canOnlyRevertCommitsWithOneParent
83 | cantFindObjectInReversePackIndexForTheSpecifiedOffset
84 | cantPassMeATree
85 | channelMustBeInRange0_255
86 | characterClassIsNotSupported
87 | checkoutConflictWithFile
88 | checkoutConflictWithFiles
89 | checkoutUnexpectedResult
90 | classCastNotA
91 | cloneNonEmptyDirectory
92 | collisionOn
93 | commandWasCalledInTheWrongState
94 | commitAlreadyExists
95 | commitMessageNotSpecified
96 | commitOnRepoWithoutHEADCurrentlyNotSupported
97 | commitAmendOnInitialNotPossible
98 | compressingObjects
99 | connectionFailed
100 | connectionTimeOut
101 | contextMustBeNonNegative
102 | corruptionDetectedReReadingAt
103 | corruptObjectBadStream
104 | corruptObjectBadStreamCorruptHeader
105 | corruptObjectGarbageAfterSize
106 | corruptObjectIncorrectLength
107 | corruptObjectInvalidEntryMode
108 | corruptObjectInvalidMode
109 | corruptObjectInvalidMode2
110 | corruptObjectInvalidMode3
111 | corruptObjectInvalidType
112 | corruptObjectInvalidType2
113 | corruptObjectMalformedHeader
114 | corruptObjectNegativeSize
115 | corruptObjectNoAuthor
116 | corruptObjectNoCommitter
117 | corruptObjectNoHeader
118 | corruptObjectNoObject
119 | corruptObjectNoTaggerBadHeader
120 | corruptObjectNoTaggerHeader
121 | corruptObjectNoTagName
122 | corruptObjectNotree
123 | corruptObjectNoType
124 | corruptObjectPackfileChecksumIncorrect
125 | couldNotCheckOutBecauseOfConflicts
126 | couldNotDeleteLockFileShouldNotHappen
127 | couldNotDeleteTemporaryIndexFileShouldNotHappen
128 | couldNotGetAdvertisedRef
129 | couldNotGetRepoStatistics
130 | couldNotLockHEAD
131 | couldNotReadIndexInOneGo
132 | couldNotReadObjectWhileParsingCommit
133 | couldNotRenameDeleteOldIndex
134 | couldNotRenameTemporaryFile
135 | couldNotRenameTemporaryIndexFileToIndex
136 | couldNotURLEncodeToUTF8
137 | couldNotWriteFile
138 | countingObjects
139 | createBranchFailedUnknownReason
140 | createBranchUnexpectedResult
141 | createNewFileFailed
142 | credentialPassword
143 | credentialUsername
144 | daemonAlreadyRunning
145 | daysAgo
146 | deleteBranchUnexpectedResult
147 | deleteFileFailed
148 | deleteTagUnexpectedResult
149 | deletingNotSupported
150 | destinationIsNotAWildcard
151 | detachedHeadDetected
152 | dirCacheDoesNotHaveABackingFile
153 | dirCacheFileIsNotLocked
154 | dirCacheIsNotLocked
155 | DIRCChecksumMismatch
156 | DIRCExtensionIsTooLargeAt
157 | DIRCExtensionNotSupportedByThisVersion
158 | DIRCHasTooManyEntries
159 | DIRCUnrecognizedExtendedFlags
160 | dirtyFilesExist
161 | doesNotHandleMode
162 | downloadCancelled
163 | downloadCancelledDuringIndexing
164 | duplicateAdvertisementsOf
165 | duplicateRef
166 | duplicateRemoteRefUpdateIsIllegal
167 | duplicateStagesNotAllowed
168 | eitherGitDirOrWorkTreeRequired
169 | emptyCommit
170 | emptyPathNotPermitted
171 | encryptionError
172 | endOfFileInEscape
173 | entryNotFoundByPath
174 | enumValueNotSupported2
175 | enumValueNotSupported3
176 | enumValuesNotAvailable
177 | errorDecodingFromFile
178 | errorEncodingFromFile
179 | errorInBase64CodeReadingStream
180 | errorInPackedRefs
181 | errorInvalidProtocolWantedOldNewRef
182 | errorListing
183 | errorOccurredDuringUnpackingOnTheRemoteEnd
184 | errorReadingInfoRefs
185 | errorSymlinksNotSupported
186 | exceptionCaughtDuringExecutionOfAddCommand
187 | exceptionCaughtDuringExecutionOfArchiveCommand
188 | exceptionCaughtDuringExecutionOfCherryPickCommand
189 | exceptionCaughtDuringExecutionOfCommitCommand
190 | exceptionCaughtDuringExecutionOfFetchCommand
191 | exceptionCaughtDuringExecutionOfLsRemoteCommand
192 | exceptionCaughtDuringExecutionOfMergeCommand
193 | exceptionCaughtDuringExecutionOfPullCommand
194 | exceptionCaughtDuringExecutionOfPushCommand
195 | exceptionCaughtDuringExecutionOfResetCommand
196 | exceptionCaughtDuringExecutionOfRevertCommand
197 | exceptionCaughtDuringExecutionOfRmCommand
198 | exceptionCaughtDuringExecutionOfTagCommand
199 | exceptionOccurredDuringAddingOfOptionToALogCommand
200 | exceptionOccurredDuringReadingOfGIT_DIR
201 | expectedACKNAKFoundEOF
202 | expectedACKNAKGot
203 | expectedBooleanStringValue
204 | expectedCharacterEncodingGuesses
205 | expectedEOFReceived
206 | expectedGot
207 | expectedLessThanGot
208 | expectedPktLineWithService
209 | expectedReceivedContentType
210 | expectedReportForRefNotReceived
211 | failedUpdatingRefs
212 | failureDueToOneOfTheFollowing
213 | failureUpdatingFETCH_HEAD
214 | failureUpdatingTrackingRef
215 | fileCannotBeDeleted
216 | fileIsTooBigForThisConvenienceMethod
217 | fileIsTooLarge
218 | fileModeNotSetForPath
219 | flagIsDisposed
220 | flagNotFromThis
221 | flagsAlreadyCreated
222 | funnyRefname
223 | gcFailed
224 | gitmodulesNotFound
225 | headRequiredToStash
226 | hoursAgo
227 | hugeIndexesAreNotSupportedByJgitYet
228 | hunkBelongsToAnotherFile
229 | hunkDisconnectedFromFile
230 | hunkHeaderDoesNotMatchBodyLineCountOf
231 | illegalArgumentNotA
232 | illegalCombinationOfArguments
233 | illegalPackingPhase
234 | illegalStateExists
235 | improperlyPaddedBase64Input
236 | incorrectHashFor
237 | incorrectOBJECT_ID_LENGTH
238 | indexFileIsInUse
239 | indexFileIsTooLargeForJgit
240 | indexSignatureIsInvalid
241 | indexWriteException
242 | inMemoryBufferLimitExceeded
243 | inputStreamMustSupportMark
244 | integerValueOutOfRange
245 | internalRevisionError
246 | internalServerError
247 | interruptedWriting
248 | inTheFuture
249 | invalidAdvertisementOf
250 | invalidAncestryLength
251 | invalidBooleanValue
252 | invalidChannel
253 | invalidCharacterInBase64Data
254 | invalidCommitParentNumber
255 | invalidEncryption
256 | invalidGitdirRef
257 | invalidGitType
258 | invalidId
259 | invalidIdLength
260 | invalidIntegerValue
261 | invalidKey
262 | invalidLineInConfigFile
263 | invalidModeFor
264 | invalidModeForPath
265 | invalidObject
266 | invalidOldIdSent
267 | invalidPacketLineHeader
268 | invalidPath
269 | invalidReflogRevision
270 | invalidRefName
271 | invalidRemote
272 | invalidStageForPath
273 | invalidTagOption
274 | invalidTimeout
275 | invalidURL
276 | invalidWildcards
277 | invalidRefSpec
278 | invalidWindowSize
279 | isAStaticFlagAndHasNorevWalkInstance
280 | JRELacksMD5Implementation
281 | kNotInRange
282 | largeObjectExceedsByteArray
283 | largeObjectExceedsLimit
284 | largeObjectException
285 | largeObjectOutOfMemory
286 | lengthExceedsMaximumArraySize
287 | listingAlternates
288 | localObjectsIncomplete
289 | localRefIsMissingObjects
290 | lockCountMustBeGreaterOrEqual1
291 | lockError
292 | lockOnNotClosed
293 | lockOnNotHeld
294 | malformedpersonIdentString
295 | maxCountMustBeNonNegative
296 | mergeConflictOnNonNoteEntries
297 | mergeConflictOnNotes
298 | mergeStrategyAlreadyExistsAsDefault
299 | mergeStrategyDoesNotSupportHeads
300 | mergeUsingStrategyResultedInDescription
301 | mergeRecursiveReturnedNoCommit
302 | mergeRecursiveTooManyMergeBasesFor
303 | messageAndTaggerNotAllowedInUnannotatedTags
304 | minutesAgo
305 | missingAccesskey
306 | missingConfigurationForKey
307 | missingDeltaBase
308 | missingForwardImageInGITBinaryPatch
309 | missingObject
310 | missingPrerequisiteCommits
311 | missingRequiredParameter
312 | missingSecretkey
313 | mixedStagesNotAllowed
314 | mkDirFailed
315 | mkDirsFailed
316 | month
317 | months
318 | monthsAgo
319 | multipleMergeBasesFor
320 | need2Arguments
321 | needPackOut
322 | needsAtLeastOneEntry
323 | needsWorkdir
324 | newlineInQuotesNotAllowed
325 | noApplyInDelete
326 | noClosingBracket
327 | noHEADExistsAndNoExplicitStartingRevisionWasSpecified
328 | noHMACsupport
329 | noMergeBase
330 | noMergeHeadSpecified
331 | noSuchRef
332 | notABoolean
333 | notABundle
334 | notADIRCFile
335 | notAGitDirectory
336 | notAPACKFile
337 | notARef
338 | notASCIIString
339 | notAuthorized
340 | notAValidPack
341 | notFound
342 | nothingToFetch
343 | nothingToPush
344 | notMergedExceptionMessage
345 | noXMLParserAvailable
346 | objectAtHasBadZlibStream
347 | objectAtPathDoesNotHaveId
348 | objectIsCorrupt
349 | objectIsNotA
350 | objectNotFound
351 | objectNotFoundIn
352 | obtainingCommitsForCherryPick
353 | offsetWrittenDeltaBaseForObjectNotFoundInAPack
354 | onlyAlreadyUpToDateAndFastForwardMergesAreAvailable
355 | onlyOneFetchSupported
356 | onlyOneOperationCallPerConnectionIsSupported
357 | openFilesMustBeAtLeast1
358 | openingConnection
359 | operationCanceled
360 | outputHasAlreadyBeenStarted
361 | packChecksumMismatch
362 | packCorruptedWhileWritingToFilesystem
363 | packDoesNotMatchIndex
364 | packetSizeMustBeAtLeast
365 | packetSizeMustBeAtMost
366 | packfileCorruptionDetected
367 | packFileInvalid
368 | packfileIsTruncated
369 | packHasUnresolvedDeltas
370 | packingCancelledDuringObjectsWriting
371 | packObjectCountMismatch
372 | packRefs
373 | packTooLargeForIndexVersion1
374 | packWriterStatistics
375 | panicCantRenameIndexFile
376 | patchApplyException
377 | patchFormatException
378 | pathIsNotInWorkingDir
379 | pathNotConfigured
380 | peeledLineBeforeRef
381 | peerDidNotSupplyACompleteObjectGraph
382 | prefixRemote
383 | problemWithResolvingPushRefSpecsLocally
384 | progressMonUploading
385 | propertyIsAlreadyNonNull
386 | pruneLoosePackedObjects
387 | pruneLooseUnreferencedObjects
388 | pullOnRepoWithoutHEADCurrentlyNotSupported
389 | pullTaskName
390 | pushCancelled
391 | pushIsNotSupportedForBundleTransport
392 | pushNotPermitted
393 | rawLogMessageDoesNotParseAsLogEntry
394 | readingObjectsFromLocalRepositoryFailed
395 | readTimedOut
396 | receivePackObjectTooLarge1
397 | receivePackObjectTooLarge2
398 | receivingObjects
399 | refAlreadyExists
400 | refAlreadyExists1
401 | reflogEntryNotFound
402 | refNotResolved
403 | refUpdateReturnCodeWas
404 | remoteConfigHasNoURIAssociated
405 | remoteDoesNotHaveSpec
406 | remoteDoesNotSupportSmartHTTPPush
407 | remoteHungUpUnexpectedly
408 | remoteNameCantBeNull
409 | renameBranchFailedBecauseTag
410 | renameBranchFailedUnknownReason
411 | renameBranchUnexpectedResult
412 | renameFileFailed
413 | renamesAlreadyFound
414 | renamesBreakingModifies
415 | renamesFindingByContent
416 | renamesFindingExact
417 | renamesRejoiningModifies
418 | repositoryAlreadyExists
419 | repositoryConfigFileInvalid
420 | repositoryIsRequired
421 | repositoryNotFound
422 | repositoryState_applyMailbox
423 | repositoryState_bisecting
424 | repositoryState_conflicts
425 | repositoryState_merged
426 | repositoryState_normal
427 | repositoryState_rebase
428 | repositoryState_rebaseInteractive
429 | repositoryState_rebaseOrApplyMailbox
430 | repositoryState_rebaseWithMerge
431 | requiredHashFunctionNotAvailable
432 | resettingHead
433 | resolvingDeltas
434 | resultLengthIncorrect
435 | rewinding
436 | searchForReuse
437 | searchForSizes
438 | secondsAgo
439 | selectingCommits
440 | sequenceTooLargeForDiffAlgorithm
441 | serviceNotEnabledNoName
442 | serviceNotPermitted
443 | serviceNotPermittedNoName
444 | shallowCommitsAlreadyInitialized
445 | shortCompressedStreamAt
446 | shortReadOfBlock
447 | shortReadOfOptionalDIRCExtensionExpectedAnotherBytes
448 | shortSkipOfBlock
449 | signingNotSupportedOnTag
450 | similarityScoreMustBeWithinBounds
451 | sizeExceeds2GB
452 | skipMustBeNonNegative
453 | smartHTTPPushDisabled
454 | sourceDestinationMustMatch
455 | sourceIsNotAWildcard
456 | sourceRefDoesntResolveToAnyObject
457 | sourceRefNotSpecifiedForRefspec
458 | squashCommitNotUpdatingHEAD
459 | staleRevFlagsOn
460 | startingReadStageWithoutWrittenRequestDataPendingIsNotSupported
461 | stashApplyConflict
462 | stashApplyConflictInIndex
463 | stashApplyFailed
464 | stashApplyOnUnsafeRepository
465 | stashApplyWithoutHead
466 | stashCommitMissingTwoParents
467 | stashDropDeleteRefFailed
468 | stashDropFailed
469 | stashDropMissingReflog
470 | stashFailed
471 | stashResolveFailed
472 | statelessRPCRequiresOptionToBeEnabled
473 | submoduleExists
474 | submoduleParentRemoteUrlInvalid
475 | submodulesNotSupported
476 | symlinkCannotBeWrittenAsTheLinkTarget
477 | systemConfigFileInvalid
478 | tagAlreadyExists
479 | tagNameInvalid
480 | tagOnRepoWithoutHEADCurrentlyNotSupported
481 | theFactoryMustNotBeNull
482 | timerAlreadyTerminated
483 | topologicalSortRequired
484 | transportExceptionBadRef
485 | transportExceptionEmptyRef
486 | transportExceptionInvalid
487 | transportExceptionMissingAssumed
488 | transportExceptionReadRef
489 | transportNeedsRepository
490 | transportProtoAmazonS3
491 | transportProtoBundleFile
492 | transportProtoFTP
493 | transportProtoGitAnon
494 | transportProtoHTTP
495 | transportProtoLocal
496 | transportProtoSFTP
497 | transportProtoSSH
498 | treeEntryAlreadyExists
499 | treeFilterMarkerTooManyFilters
500 | treeIteratorDoesNotSupportRemove
501 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/linux/commands/delete-file/bfg.txt:
--------------------------------------------------------------------------------
1 | -D MAINTAINERS
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/linux/commands/delete-file/gfb.txt:
--------------------------------------------------------------------------------
1 | --index-filter
2 | git rm --cached --ignore-unmatch MAINTAINERS
3 | --prune-empty
4 | --tag-name-filter
5 | cat
6 | --
7 | --all
8 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/rails/commands/delete-file/bfg.txt:
--------------------------------------------------------------------------------
1 | -D pushgems.rb
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/rails/commands/delete-file/gfb.txt:
--------------------------------------------------------------------------------
1 | --index-filter
2 | git rm --cached --ignore-unmatch pushgems.rb
3 | --prune-empty
4 | --tag-name-filter
5 | cat
6 | --
7 | --all
8 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/wine/commands/delete-file/bfg.txt:
--------------------------------------------------------------------------------
1 | -D build-spec.txt
2 |
--------------------------------------------------------------------------------
/bfg-benchmark/resources/repos/wine/commands/delete-file/gfb.txt:
--------------------------------------------------------------------------------
1 | --index-filter
2 | git rm --cached --ignore-unmatch build-spec.txt
3 | --prune-empty
4 | --tag-name-filter
5 | cat
6 | --
7 | --all
8 |
--------------------------------------------------------------------------------
/bfg-benchmark/src/main/scala/Benchmark.scala:
--------------------------------------------------------------------------------
1 | import lib.Timing.measureTask
2 | import lib._
3 | import model._
4 |
5 | import java.nio.file.Files
6 | import java.nio.file.Files.isDirectory
7 | import scala.concurrent.ExecutionContext.Implicits.global
8 | import scala.concurrent._
9 | import scala.concurrent.duration.Duration
10 | import scala.jdk.StreamConverters._
11 | import scala.sys.process._
12 |
13 | /*
14 | * Vary BFG runs by:
15 | * Java version
16 | * BFG version (JGit version?)
17 | *
18 | */
19 | object Benchmark extends App {
20 |
21 | BenchmarkConfig.parser.parse(args, BenchmarkConfig()) map {
22 | config =>
23 | println(s"Using resources dir : ${config.resourcesDir}")
24 |
25 | require(Files.exists(config.resourcesDir), s"Resources dir not found : ${config.resourcesDir}")
26 | require(Files.exists(config.jarsDir), s"Jars dir not found : ${config.jarsDir}")
27 | require(Files.exists(config.reposDir), s"Repos dir not found : ${config.reposDir}")
28 |
29 | val missingJars = config.bfgJars.filterNot(Files.exists(_))
30 | require(missingJars.isEmpty, s"Missing BFG jars : ${missingJars.mkString(",")}")
31 |
32 | val tasksFuture = for {
33 | bfgInvocableEngineSet <- bfgInvocableEngineSet(config)
34 | } yield {
35 | val gfbInvocableEngineSetOpt =
36 | Option.when(!config.onlyBfg)(InvocableEngineSet[GFBInvocation](GitFilterBranch, Seq(InvocableGitFilterBranch)))
37 | boogaloo(config, new RepoExtractor(config.scratchDir), Seq(bfgInvocableEngineSet) ++ gfbInvocableEngineSetOpt.toSeq)
38 | }
39 |
40 | Await.result(tasksFuture, Duration.Inf)
41 | }
42 |
43 | def bfgInvocableEngineSet(config: BenchmarkConfig): Future[InvocableEngineSet[BFGInvocation]] = for {
44 | javas <- Future.traverse(config.javaCmds)(jc => JavaVersion.version(jc).map(v => Java(jc, v)))
45 | } yield {
46 | val invocables = for {
47 | java <- javas
48 | bfgJar <- config.bfgJars
49 | } yield InvocableBFG(java, BFGJar.from(bfgJar))
50 |
51 | InvocableEngineSet[BFGInvocation](BFG, invocables)
52 | }
53 |
54 | /*
55 | * A Task says "here is something you can do to a given repo, and here is how to do
56 | * it with a BFG, and with git-filter-branch"
57 | */
58 | def boogaloo(config: BenchmarkConfig, repoExtractor: RepoExtractor, invocableEngineSets: Seq[InvocableEngineSet[_ <: EngineInvocation]]) = {
59 |
60 | for {
61 | repoSpecDir <- config.repoSpecDirs
62 | availableCommandDirs = Files.list(repoSpecDir.resolve("commands")).toScala(Seq).filter(isDirectory(_))
63 | // println(s"Available commands for $repoName : ${availableCommandDirs.map(_.name).mkString(", ")}")
64 | commandDir <- availableCommandDirs.filter(p => config.commands(p.getFileName.toString))
65 | } yield {
66 | val commandName: String = commandDir.getFileName.toString
67 |
68 | commandName -> (for {
69 | invocableEngineSet <- invocableEngineSets
70 | } yield for {
71 | (invocable, processMaker) <- invocableEngineSet.invocationsFor(commandDir)
72 | } yield {
73 | val cleanRepoDir = repoExtractor.extractRepoFrom(repoSpecDir.resolve("repo.git.zip"))
74 | Files.list(commandDir).toScala(Seq).foreach(p => Files.copy(p, cleanRepoDir.resolve(p.getFileName)))
75 | val process = processMaker(cleanRepoDir.toFile)
76 |
77 | val duration = measureTask(s"$commandName - $invocable") {
78 | process ! ProcessLogger(_ => ())
79 | }
80 |
81 | if (config.dieIfTaskTakesLongerThan.exists(_ < duration.toMillis)) {
82 | throw new Exception("This took too long: "+duration)
83 | }
84 |
85 | invocable -> duration
86 | })
87 | }
88 | }
89 |
90 | println(s"\n...benchmark finished.")
91 | }
92 |
--------------------------------------------------------------------------------
/bfg-benchmark/src/main/scala/BenchmarkConfig.scala:
--------------------------------------------------------------------------------
1 | import java.io.File
2 | import com.madgag.textmatching.{Glob, TextMatcher}
3 | import scopt.OptionParser
4 |
5 | import java.nio.file.{Path, Paths}
6 |
7 | object BenchmarkConfig {
8 | val parser = new OptionParser[BenchmarkConfig]("benchmark") {
9 | opt[File]("resources-dir").text("benchmark resources folder - contains jars and repos").action {
10 | (v, c) => c.copy(resourcesDirOption = v.toPath)
11 | }
12 | opt[String]("java").text("Java command paths").action {
13 | (v, c) => c.copy(javaCmds = v.split(',').toSeq)
14 | }
15 | opt[String]("versions").text("BFG versions to time - bfg-[version].jar - eg 1.4.0,1.5.0,1.6.0").action {
16 | (v, c) => c.copy(bfgVersions = v.split(",").toSeq)
17 | }
18 | opt[Int]("die-if-longer-than").text("Useful for git-bisect").action {
19 | (v, c) => c.copy(dieIfTaskTakesLongerThan = Some(v))
20 | }
21 | opt[String]("repos").text("Sample repos to test, eg github-gems,jgit,git").action {
22 | (v, c) => c.copy(repoNames = v.split(",").toSeq)
23 | }
24 | opt[String]("commands").valueName("").text("commands to exercise").action {
25 | (v, c) => c.copy(commands = TextMatcher(v, defaultType = Glob))
26 | }
27 | opt[File]("scratch-dir").text("Temp-dir for job runs - preferably ramdisk, eg tmpfs.").action {
28 | (v, c) => c.copy(scratchDir = v.toPath)
29 | }
30 | opt[Unit]("only-bfg") action { (_, c) => c.copy(onlyBfg = true) } text "Don't benchmark git-filter-branch"
31 | }
32 | }
33 | case class BenchmarkConfig(resourcesDirOption: Path = Paths.get(System.getProperty("user.dir"), "bfg-benchmark", "resources"),
34 | scratchDir: Path = Paths.get("/dev/shm/"),
35 | javaCmds: Seq[String] = Seq("java"),
36 | bfgVersions: Seq[String] = Seq.empty,
37 | commands: TextMatcher = Glob("*"),
38 | onlyBfg: Boolean = false,
39 | dieIfTaskTakesLongerThan: Option[Int] = None,
40 | repoNames: Seq[String] = Seq.empty) {
41 |
42 | lazy val resourcesDir: Path = resourcesDirOption.toAbsolutePath
43 |
44 | lazy val jarsDir: Path = resourcesDir.resolve("jars")
45 |
46 | lazy val reposDir: Path = resourcesDir.resolve("repos")
47 |
48 | lazy val bfgJars: Seq[Path] = bfgVersions.map(version => jarsDir.resolve(s"bfg-$version.jar"))
49 |
50 | lazy val repoSpecDirs: Seq[Path] = repoNames.map(reposDir.resolve)
51 | }
52 |
--------------------------------------------------------------------------------
/bfg-benchmark/src/main/scala/JavaVersion.scala:
--------------------------------------------------------------------------------
1 | import scala.concurrent.ExecutionContext.Implicits.global
2 | import scala.concurrent._
3 | import scala.sys.process.{Process, ProcessLogger}
4 |
5 | object JavaVersion {
6 | val VersionRegex = """(?:java|openjdk) version "(.*?)"""".r
7 |
8 | def version(javaCmd: String): Future[String] = {
9 | val resultPromise = Promise[String]()
10 |
11 | Future {
12 | val exitCode = Process(s"$javaCmd -version")!ProcessLogger(
13 | s => for (v <-versionFrom(s)) resultPromise.success(v)
14 | )
15 | resultPromise.tryFailure(new IllegalArgumentException(s"$javaCmd exited with code $exitCode, no Java version found"))
16 | }
17 |
18 | resultPromise.future
19 | }
20 |
21 | def versionFrom(javaVersionLine: String): Option[String] = {
22 | VersionRegex.findFirstMatchIn(javaVersionLine).map(_.group(1))
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/bfg-benchmark/src/main/scala/lib/Repo.scala:
--------------------------------------------------------------------------------
1 | package lib
2 |
3 | import com.google.common.io.MoreFiles
4 | import com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE
5 | import com.madgag.compress.CompressUtil._
6 |
7 | import java.nio.file.{Files, Path}
8 | import scala.util.Using
9 |
10 | class RepoExtractor(scratchDir: Path) {
11 |
12 | val repoDir = scratchDir.resolve( "repo.git")
13 |
14 | def extractRepoFrom(zipPath: Path) = {
15 | if (Files.exists(repoDir)) MoreFiles.deleteRecursively(repoDir, ALLOW_INSECURE)
16 |
17 | Files.createDirectories(repoDir)
18 |
19 | println(s"Extracting repo to ${repoDir.toAbsolutePath}")
20 |
21 | Using(Files.newInputStream(zipPath)) {
22 | stream => unzip(stream, repoDir.toFile)
23 | }
24 |
25 | repoDir
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/bfg-benchmark/src/main/scala/lib/Timing.scala:
--------------------------------------------------------------------------------
1 | package lib
2 |
3 | import java.lang.System._
4 | import java.util.concurrent.TimeUnit._
5 |
6 | import scala.concurrent.duration.{Duration, FiniteDuration}
7 |
8 | object Timing {
9 |
10 | def measureTask[T](description: String)(block: => T): Duration = {
11 | val start = nanoTime
12 | val result = block
13 | val duration = FiniteDuration(nanoTime - start, NANOSECONDS)
14 | println(s"$description completed in %,d ms.".format(duration.toMillis))
15 | duration
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/bfg-benchmark/src/main/scala/model/BFGJar.scala:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | import java.nio.file.Path
4 |
5 | object BFGJar {
6 | def from(path: Path) = BFGJar(path, Map.empty)
7 | }
8 |
9 | case class BFGJar(path: Path, mavenDependencyVersions: Map[String, String])
10 |
--------------------------------------------------------------------------------
/bfg-benchmark/src/main/scala/model/InvocableEngine.scala:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | import com.google.common.io.CharSource
4 | import com.google.common.io.Files.asCharSource
5 |
6 | import java.io.File
7 | import java.nio.charset.StandardCharsets.UTF_8
8 | import java.nio.file.{Files, Path}
9 | import scala.jdk.StreamConverters._
10 | import scala.sys.process.{Process, ProcessBuilder}
11 |
12 | trait EngineInvocation
13 |
14 | case class BFGInvocation(args: String) extends EngineInvocation
15 |
16 | case class GFBInvocation(args: Seq[String]) extends EngineInvocation
17 |
18 |
19 | trait InvocableEngine[InvocationArgs <: EngineInvocation] {
20 |
21 | def processFor(invocation: InvocationArgs)(repoPath: File): ProcessBuilder
22 | }
23 |
24 | case class InvocableBFG(java: Java, bfgJar: BFGJar) extends InvocableEngine[BFGInvocation] {
25 |
26 | def processFor(invocation: BFGInvocation)(repoPath: File) =
27 | Process(s"${java.javaCmd} -jar ${bfgJar.path} ${invocation.args}", repoPath)
28 |
29 | }
30 |
31 | object InvocableGitFilterBranch extends InvocableEngine[GFBInvocation] {
32 |
33 | def processFor(invocation: GFBInvocation)(repoPath: File) =
34 | Process(Seq("git", "filter-branch") ++ invocation.args, repoPath)
35 | }
36 |
37 | /*
38 | We want to allow the user to vary:
39 | - BFGs (jars, javas)
40 | - Tasks (delete a file, replace text) in [selection of repos]
41 |
42 | Tasks will have a variety of different invocations for different engines
43 | */
44 |
45 | trait EngineType[InvocationType <: EngineInvocation] {
46 | val configName: String
47 |
48 | def argsFor(config: CharSource): InvocationType
49 |
50 | def argsOptsFor(commandDir: Path): Option[InvocationType] = {
51 | val paramsPath = commandDir.resolve(s"$configName.txt")
52 | if (Files.exists(paramsPath)) Some(argsFor(asCharSource(paramsPath.toFile, UTF_8))) else None
53 | }
54 | }
55 |
56 | case object BFG extends EngineType[BFGInvocation] {
57 |
58 | val configName = "bfg"
59 |
60 | def argsFor(config: CharSource) = BFGInvocation(config.read())
61 | }
62 |
63 | case object GitFilterBranch extends EngineType[GFBInvocation] {
64 |
65 | val configName = "gfb"
66 |
67 | def argsFor(config: CharSource) = GFBInvocation(config.lines().toScala(Seq))
68 | }
69 |
--------------------------------------------------------------------------------
/bfg-benchmark/src/main/scala/model/InvocableEngineSet.scala:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | import java.io.File
4 | import java.nio.file.Path
5 |
6 | case class InvocableEngineSet[InvocationArgs <: EngineInvocation](
7 | engineType: EngineType[InvocationArgs],
8 | invocableEngines: Seq[InvocableEngine[InvocationArgs]]
9 | ) {
10 |
11 | def invocationsFor(commandDir: Path): Seq[(InvocableEngine[InvocationArgs], File => scala.sys.process.ProcessBuilder)] = {
12 | for {
13 | args <- engineType.argsOptsFor(commandDir).toSeq
14 | invocable <- invocableEngines
15 | } yield (invocable, invocable.processFor(args) _)
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/bfg-benchmark/src/main/scala/model/Java.scala:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | case class Java(javaCmd: String, version: String)
4 |
--------------------------------------------------------------------------------
/bfg-benchmark/src/test/scala/JavaVersionSpec.scala:
--------------------------------------------------------------------------------
1 | import org.scalatest.OptionValues
2 | import org.scalatest.flatspec.AnyFlatSpec
3 | import org.scalatest.matchers.should.Matchers
4 |
5 | object JavaVersionSpec extends AnyFlatSpec with OptionValues with Matchers {
6 | "version" should "parse an example line" in {
7 | JavaVersion.versionFrom("""java version "1.7.0_51"""").value shouldBe "1.7.0_51"
8 | }
9 |
10 | it should "parse openjdk weirdness" in {
11 | JavaVersion.versionFrom("""openjdk version "1.8.0_40-internal"""").value shouldBe "1.8.0_40-internal"
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/bfg-library/build.sbt:
--------------------------------------------------------------------------------
1 | import Dependencies.*
2 |
3 | libraryDependencies ++= guava ++ Seq(
4 | parCollections,
5 | scalaCollectionPlus,
6 | textmatching,
7 | scalaGit,
8 | jgit,
9 | slf4jSimple,
10 | lineSplitting,
11 | scalaGitTest % Test,
12 | "org.apache.commons" % "commons-text" % "1.13.0" % Test
13 | )
14 |
15 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentMultiMap.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.collection.concurrent
22 |
23 | import com.madgag.scala.collection.decorators._
24 |
25 | class ConcurrentMultiMap[A, B] {
26 |
27 | val m: collection.concurrent.Map[A, ConcurrentSet[B]] = collection.concurrent.TrieMap.empty
28 |
29 | def addBinding(key: A, value: B): this.type = {
30 | val store = m.getOrElse(key, {
31 | val freshStore = new ConcurrentSet[B]
32 | m.putIfAbsent(key, freshStore).getOrElse(freshStore)
33 | })
34 | store += value
35 | this
36 | }
37 |
38 | def toMap: Map[A, Set[B]] = m.toMap.mapV(_.toSet)
39 | }
40 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentSet.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.collection.concurrent
22 |
23 | import scala.collection.mutable.{AbstractSet, SetOps}
24 | import scala.collection.{IterableFactory, IterableFactoryDefaults, mutable}
25 |
26 | class ConcurrentSet[A]()
27 | extends AbstractSet[A]
28 | with SetOps[A, ConcurrentSet, ConcurrentSet[A]]
29 | with IterableFactoryDefaults[A, ConcurrentSet]
30 | {
31 |
32 | val m: collection.concurrent.Map[A, Boolean] = collection.concurrent.TrieMap.empty
33 |
34 | override def iterableFactory: IterableFactory[ConcurrentSet] = ConcurrentSet
35 |
36 | override def clear(): Unit = m.clear()
37 |
38 | override def addOne(elem: A): ConcurrentSet.this.type = {
39 | m.put(elem, true)
40 | this
41 | }
42 |
43 | override def subtractOne(elem: A): ConcurrentSet.this.type = {
44 | m.remove(elem)
45 | this
46 | }
47 |
48 | override def contains(elem: A): Boolean = m.contains(elem)
49 |
50 | override def iterator: Iterator[A] = m.keysIterator
51 |
52 | }
53 |
54 | object ConcurrentSet extends IterableFactory[ConcurrentSet] {
55 |
56 | @transient
57 | private final val EmptySet = new ConcurrentSet()
58 |
59 | def empty[A]: ConcurrentSet[A] = EmptySet.asInstanceOf[ConcurrentSet[A]]
60 |
61 | def from[A](source: collection.IterableOnce[A]): ConcurrentSet[A] =
62 | source match {
63 | case hs: ConcurrentSet[A] => hs
64 | case _ if source.knownSize == 0 => empty[A]
65 | case _ => (newBuilder[A] ++= source).result()
66 | }
67 |
68 | /** Create a new Builder which can be reused after calling `result()` without an
69 | * intermediate call to `clear()` in order to build multiple related results.
70 | */
71 | def newBuilder[A]: mutable.Builder[A, ConcurrentSet[A]] = ???
72 | }
73 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/LFS.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git
22 |
23 | import com.google.common.base.Splitter
24 | import com.madgag.git.bfg.model.FileName
25 | import org.apache.commons.codec.binary.Hex._
26 | import org.eclipse.jgit.lib.ObjectLoader
27 |
28 | import java.nio.charset.Charset
29 | import java.nio.charset.StandardCharsets.UTF_8
30 | import java.nio.file.{Files, Path}
31 | import java.security.{DigestOutputStream, MessageDigest}
32 | import scala.jdk.CollectionConverters._
33 | import scala.util.Using
34 |
35 | object LFS {
36 |
37 | val ObjectsPath: Seq[String] = Seq("lfs" , "objects")
38 |
39 | val PointerCharset: Charset = UTF_8
40 |
41 | case class Pointer(shaHex: String, blobSize: Long) {
42 |
43 | lazy val text: String = s"""|version https://git-lfs.github.com/spec/v1
44 | |oid sha256:$shaHex
45 | |size $blobSize
46 | |""".stripMargin
47 |
48 | lazy val bytes: Array[Byte] = text.getBytes(PointerCharset)
49 |
50 | lazy val path: Seq[String] = Seq(shaHex.substring(0, 2), shaHex.substring(2, 4), shaHex)
51 | }
52 |
53 | object Pointer {
54 |
55 | val splitter = Splitter.on('\n').omitEmptyStrings().trimResults().withKeyValueSeparator(' ')
56 |
57 | def parse(bytes: Array[Byte]) = {
58 | val text = new String(bytes, PointerCharset)
59 | val valuesByKey= splitter.split(text).asScala
60 | val size = valuesByKey("size").toLong
61 | val shaHex = valuesByKey("oid").stripPrefix("sha256:")
62 | Pointer(shaHex, size)
63 | }
64 | }
65 |
66 | val GitAttributesFileName = FileName(".gitattributes")
67 |
68 | def pointerFor(loader: ObjectLoader, tmpFile: Path) = {
69 | val digest = MessageDigest.getInstance("SHA-256")
70 |
71 | Using(Files.newOutputStream(tmpFile)) { outStream =>
72 | loader.copyTo(new DigestOutputStream(outStream, digest))
73 | }
74 |
75 | Pointer(encodeHexString(digest.digest()), loader.getSize)
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/GitUtil.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg
22 |
23 | import com.google.common.primitives.Ints
24 | import com.madgag.git.bfg.cleaner._
25 | import com.madgag.git.{SizedObject, _}
26 | import org.eclipse.jgit.internal.storage.file.ObjectDirectory
27 | import org.eclipse.jgit.lib.Constants.OBJ_BLOB
28 | import org.eclipse.jgit.lib.ObjectReader._
29 | import org.eclipse.jgit.lib._
30 | import org.eclipse.jgit.revwalk.RevWalk
31 | import org.eclipse.jgit.storage.file.WindowCacheConfig
32 |
33 | import scala.jdk.CollectionConverters._
34 | import scala.jdk.StreamConverters._
35 | import scala.language.implicitConversions
36 |
37 | trait CleaningMapper[V] extends Cleaner[V] {
38 | def isDirty(v: V) = apply(v) != v
39 |
40 | def substitution(oldId: V): Option[(V, V)] = {
41 | val newId = apply(oldId)
42 | if (newId == oldId) None else Some((oldId, newId))
43 | }
44 |
45 | def replacement(oldId: V): Option[V] = {
46 | val newId = apply(oldId)
47 | if (newId == oldId) None else Some(newId)
48 | }
49 | }
50 |
51 | object GitUtil {
52 |
53 | val ProbablyNoNonFileObjectsOverSizeThreshold: Long = 1024 * 1024
54 |
55 | def tweakStaticJGitConfig(massiveNonFileObjects: Option[Long]): Unit = {
56 | val wcConfig: WindowCacheConfig = new WindowCacheConfig()
57 | wcConfig.setStreamFileThreshold(Ints.saturatedCast(massiveNonFileObjects.getOrElse(ProbablyNoNonFileObjectsOverSizeThreshold)))
58 | wcConfig.install()
59 | }
60 |
61 | def hasBeenProcessedByBFGBefore(repo: Repository): Boolean = {
62 | // This method just checks the tips of all refs - a good-enough indicator for our purposes...
63 | implicit val revWalk = new RevWalk(repo)
64 | implicit val objectReader = revWalk.getObjectReader
65 |
66 | repo.getRefDatabase.getRefs().asScala.map(_.getObjectId).filter(_.open.getType == Constants.OBJ_COMMIT)
67 | .map(_.asRevCommit).exists(_.getFooterLines(FormerCommitFooter.Key).asScala.nonEmpty)
68 | }
69 |
70 | implicit def cleaner2CleaningMapper[V](f: Cleaner[V]): CleaningMapper[V] = new CleaningMapper[V] {
71 | def apply(v: V) = f(v)
72 | }
73 |
74 | def biggestBlobs(implicit objectDB: ObjectDirectory, progressMonitor: ProgressMonitor = NullProgressMonitor.INSTANCE): LazyList[SizedObject] = {
75 | Timing.measureTask("Scanning packfile for large blobs", ProgressMonitor.UNKNOWN) {
76 | val reader = objectDB.newReader
77 | objectDB.packedObjects.map {
78 | objectId =>
79 | progressMonitor update 1
80 | SizedObject(objectId, reader.getObjectSize(objectId, OBJ_ANY))
81 | }.toSeq.sorted.reverse.to(LazyList).filter { oid =>
82 | oid.size > ProbablyNoNonFileObjectsOverSizeThreshold || reader.open(oid.objectId).getType == OBJ_BLOB
83 | }
84 | }
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobCharsetDetector.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.google.common.io.ByteStreams
24 | import com.google.common.io.ByteStreams.toByteArray
25 | import com.madgag.git.bfg.model.TreeBlobEntry
26 | import org.eclipse.jgit.diff.RawText
27 | import org.eclipse.jgit.lib.ObjectLoader
28 |
29 | import java.nio.ByteBuffer
30 | import java.nio.charset.Charset
31 | import java.nio.charset.CodingErrorAction._
32 | import scala.util.{Try, Using}
33 |
34 |
35 | trait BlobCharsetDetector {
36 | // should return None if this is a binary file that can not be converted to text
37 | def charsetFor(entry: TreeBlobEntry, objectLoader: ObjectLoader): Option[Charset]
38 | }
39 |
40 |
41 | object QuickBlobCharsetDetector extends BlobCharsetDetector {
42 |
43 | val CharSets: Seq[Charset] =
44 | Seq(Charset.forName("UTF-8"), Charset.defaultCharset(), Charset.forName("ISO-8859-1")).distinct
45 |
46 | def charsetFor(entry: TreeBlobEntry, objectLoader: ObjectLoader): Option[Charset] = {
47 | Using(ByteStreams.limit(objectLoader.openStream(), 8000))(toByteArray).toOption.filterNot(RawText.isBinary).flatMap {
48 | sampleBytes =>
49 | val b = ByteBuffer.wrap(sampleBytes)
50 | CharSets.find(cs => Try(decode(b, cs)).isSuccess)
51 | }
52 | }
53 |
54 | private def decode(b: ByteBuffer, charset: Charset): Unit = {
55 | charset.newDecoder.onMalformedInput(REPORT).onUnmappableCharacter(REPORT).decode(b)
56 | }
57 | }
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobTextModifier.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.madgag.git.ThreadLocalObjectDatabaseResources
24 | import com.madgag.git.bfg.model.TreeBlobEntry
25 | import com.madgag.linesplitting.LineBreakPreservingIterator
26 | import org.eclipse.jgit.lib.Constants.OBJ_BLOB
27 | import org.eclipse.jgit.lib.ObjectLoader
28 |
29 | import java.io.{ByteArrayOutputStream, InputStreamReader}
30 | import java.nio.charset.Charset
31 |
32 |
33 | object BlobTextModifier {
34 |
35 | val DefaultSizeThreshold: Long = 1024 * 1024
36 |
37 | }
38 |
39 | trait BlobTextModifier extends TreeBlobModifier {
40 |
41 | val threadLocalObjectDBResources: ThreadLocalObjectDatabaseResources
42 |
43 | def lineCleanerFor(entry: TreeBlobEntry): Option[String => String]
44 |
45 | val charsetDetector: BlobCharsetDetector = QuickBlobCharsetDetector
46 |
47 | val sizeThreshold = BlobTextModifier.DefaultSizeThreshold
48 |
49 | override def fix(entry: TreeBlobEntry) = {
50 |
51 | def filterTextIn(e: TreeBlobEntry, lineCleaner: String => String): TreeBlobEntry = {
52 | def isDirty(line: String) = lineCleaner(line) != line
53 |
54 | val loader = threadLocalObjectDBResources.reader().open(e.objectId)
55 | val opt = for {
56 | charset <- charsetDetector.charsetFor(e, loader)
57 | if loader.getSize < sizeThreshold && linesFor(loader, charset).exists(isDirty)
58 | } yield {
59 | val b = new ByteArrayOutputStream(loader.getSize.toInt)
60 | linesFor(loader, charset).map(lineCleaner).foreach(line => b.write(line.getBytes(charset)))
61 | val oid = threadLocalObjectDBResources.inserter().insert(OBJ_BLOB, b.toByteArray)
62 | e.copy(objectId = oid)
63 | }
64 |
65 | opt.getOrElse(e)
66 | }
67 |
68 | lineCleanerFor(entry) match {
69 | case Some(lineCleaner) => filterTextIn(entry, lineCleaner).withoutName
70 | case None => entry.withoutName
71 | }
72 | }
73 |
74 | private def linesFor(loader: ObjectLoader, charset: Charset): Iterator[String] = {
75 | new LineBreakPreservingIterator(new InputStreamReader(loader.openStream(), charset))
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/LfsBlobConverter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.google.common.io.ByteSource
24 | import com.google.common.io.Files.createParentDirs
25 | import com.madgag.git.LFS._
26 | import com.madgag.git._
27 | import com.madgag.git.bfg.model._
28 | import com.madgag.git.bfg.{MemoFunc, MemoUtil}
29 | import com.madgag.textmatching.{Glob, TextMatcher}
30 | import org.eclipse.jgit.internal.storage.file.FileRepository
31 | import org.eclipse.jgit.lib.{ObjectId, ObjectReader}
32 |
33 | import java.nio.charset.{Charset, StandardCharsets}
34 | import java.nio.file.{Files, Path}
35 | import scala.jdk.StreamConverters._
36 | import scala.util.{Try, Using}
37 |
38 | class LfsBlobConverter(
39 | lfsGlobExpression: String,
40 | repo: FileRepository
41 | ) extends TreeBlobModifier {
42 |
43 | val lfsObjectsDir: Path = repo.getDirectory.toPath.resolve(LFS.ObjectsPath)
44 |
45 | val threadLocalObjectDBResources = repo.getObjectDatabase.threadLocalResources
46 |
47 | val lfsGlob = TextMatcher(Glob, lfsGlobExpression)
48 |
49 | val lfsSuitableFiles: (FileName => Boolean) = f => lfsGlob(f.string)
50 |
51 | val gitAttributesLine = s"$lfsGlobExpression filter=lfs diff=lfs merge=lfs -text"
52 |
53 | implicit val UTF_8: Charset = StandardCharsets.UTF_8
54 |
55 | val lfsPointerMemo = MemoUtil.concurrentCleanerMemo[ObjectId]()
56 |
57 | override def apply(dirtyBlobs: TreeBlobs) = {
58 | val cleanedBlobs = super.apply(dirtyBlobs)
59 | if (cleanedBlobs == dirtyBlobs) cleanedBlobs else ensureGitAttributesSetFor(cleanedBlobs)
60 | }
61 |
62 | def ensureGitAttributesSetFor(cleanedBlobs: TreeBlobs): TreeBlobs = {
63 | implicit lazy val inserter = threadLocalObjectDBResources.inserter()
64 |
65 | val newGitAttributesId = cleanedBlobs.entryMap.get(GitAttributesFileName).fold {
66 | storeBlob(gitAttributesLine)
67 | } {
68 | case (_, oldGitAttributesId) =>
69 | val objectLoader = threadLocalObjectDBResources.reader().open(oldGitAttributesId)
70 | Using(ByteSource.wrap(objectLoader.getCachedBytes).asCharSource(UTF_8).lines()) { oldAttributesStream =>
71 | val oldAttributes = oldAttributesStream.toScala(Seq)
72 | if (oldAttributes.contains(gitAttributesLine)) oldGitAttributesId else {
73 | storeBlob((oldAttributes :+ gitAttributesLine).mkString("\n"))
74 | }
75 | }.get
76 | }
77 | cleanedBlobs.copy(entryMap = cleanedBlobs.entryMap + (GitAttributesFileName -> (RegularFile, newGitAttributesId)))
78 | }
79 |
80 | override def fix(entry: TreeBlobEntry) = {
81 | val cleanId = if (lfsSuitableFiles(entry.filename)) lfsPointerBlobIdForRealBlob(entry.objectId) else entry.objectId
82 | (entry.mode, cleanId)
83 | }
84 |
85 | val lfsPointerBlobIdForRealBlob: MemoFunc[ObjectId, ObjectId] = lfsPointerMemo { blobId: ObjectId =>
86 | implicit val reader = threadLocalObjectDBResources.reader()
87 | implicit lazy val inserter = threadLocalObjectDBResources.inserter()
88 |
89 | (for {
90 | blobSize <- blobId.sizeTry if blobSize > 512
91 | pointer <- tryStoringLfsFileFor(blobId)
92 | } yield storeBlob(pointer.bytes)).getOrElse(blobId)
93 | }
94 |
95 | def tryStoringLfsFileFor(blobId: ObjectId)(implicit r: ObjectReader): Try[Pointer] = {
96 | val loader = blobId.open
97 |
98 | val tmpFile: Path = Files.createTempFile(s"bfg.git-lfs.conv-${blobId.name}","dat")
99 |
100 | val pointer = pointerFor(loader, tmpFile)
101 |
102 | val lfsPath = lfsObjectsDir.resolve(pointer.path)
103 |
104 | createParentDirs(lfsPath.toFile)
105 |
106 | val ensureLfsFile = Try(if (!Files.exists(lfsPath)) Files.move(tmpFile, lfsPath)).recover {
107 | case _ if Files.exists(lfsPath) && Files.size(lfsPath) == loader.getSize =>
108 | }
109 |
110 | Try(Files.deleteIfExists(tmpFile))
111 |
112 | ensureLfsFile.map(_ => pointer)
113 | }
114 |
115 | }
116 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/ObjectIdCleaner.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.madgag.collection.concurrent.ConcurrentMultiMap
24 | import com.madgag.git._
25 | import com.madgag.git.bfg.GitUtil._
26 | import com.madgag.git.bfg.cleaner.protection.{ProtectedObjectCensus, ProtectedObjectDirtReport}
27 | import com.madgag.git.bfg.model.{Tree, TreeSubtrees, _}
28 | import com.madgag.git.bfg.{CleaningMapper, Memo, MemoFunc, MemoUtil}
29 | import org.eclipse.jgit.lib.Constants._
30 | import org.eclipse.jgit.lib._
31 | import org.eclipse.jgit.revwalk.{RevCommit, RevTag, RevWalk}
32 |
33 | object ObjectIdCleaner {
34 |
35 | case class Config(protectedObjectCensus: ProtectedObjectCensus,
36 | objectIdSubstitutor: ObjectIdSubstitutor = ObjectIdSubstitutor.OldIdsPublic,
37 | commitNodeCleaners: Seq[CommitNodeCleaner] = Seq.empty,
38 | treeEntryListCleaners: Seq[Cleaner[Seq[Tree.Entry]]] = Seq.empty,
39 | treeBlobsCleaners: Seq[Cleaner[TreeBlobs]] = Seq.empty,
40 | treeSubtreesCleaners: Seq[Cleaner[TreeSubtrees]] = Seq.empty,
41 | // messageCleaners? - covers both Tag and Commits
42 | objectChecker: Option[ObjectChecker] = None) {
43 |
44 | lazy val commitNodeCleaner = CommitNodeCleaner.chain(commitNodeCleaners)
45 |
46 | lazy val treeEntryListCleaner = Function.chain(treeEntryListCleaners)
47 |
48 | lazy val treeBlobsCleaner = Function.chain(treeBlobsCleaners)
49 |
50 | lazy val treeSubtreesCleaner:Cleaner[TreeSubtrees] = Function.chain(treeSubtreesCleaners)
51 | }
52 |
53 | }
54 |
55 | /*
56 | * Knows how to clean an object, and what objects are protected...
57 | */
58 | class ObjectIdCleaner(config: ObjectIdCleaner.Config, objectDB: ObjectDatabase, implicit val revWalk: RevWalk) extends CleaningMapper[ObjectId] {
59 |
60 | import config._
61 |
62 | val threadLocalResources = objectDB.threadLocalResources
63 |
64 | val changesByFilename = new ConcurrentMultiMap[FileName, (ObjectId, ObjectId)]
65 | val deletionsByFilename = new ConcurrentMultiMap[FileName, ObjectId]
66 |
67 | // want to enforce that once any value is returned, it is 'good' and therefore an identity-mapped key as well
68 | val memo: Memo[ObjectId, ObjectId] = MemoUtil.concurrentCleanerMemo(protectedObjectCensus.fixedObjectIds)
69 |
70 | val commitMemo: Memo[ObjectId, ObjectId] = MemoUtil.concurrentCleanerMemo()
71 | val tagMemo: Memo[ObjectId, ObjectId] = MemoUtil.concurrentCleanerMemo()
72 |
73 | val treeMemo: Memo[ObjectId, ObjectId] = MemoUtil.concurrentCleanerMemo(protectedObjectCensus.treeIds.toSet[ObjectId])
74 |
75 | def apply(objectId: ObjectId): ObjectId = memoClean(objectId)
76 |
77 | val memoClean = memo {
78 | uncachedClean
79 | }
80 |
81 | def cleanedObjectMap(): Map[ObjectId, ObjectId] =
82 | Seq(memoClean, cleanCommit, cleanTag, cleanTree).map(_.asMap()).reduce(_ ++ _)
83 |
84 | def uncachedClean: (ObjectId) => ObjectId = {
85 | objectId =>
86 | threadLocalResources.reader().open(objectId).getType match {
87 | case OBJ_COMMIT => cleanCommit(objectId)
88 | case OBJ_TREE => cleanTree(objectId)
89 | case OBJ_TAG => cleanTag(objectId)
90 | case _ => objectId // we don't currently clean isolated blobs... only clean within a tree context
91 | }
92 | }
93 |
94 | def getCommit(commitId: AnyObjectId): RevCommit = revWalk synchronized (commitId asRevCommit)
95 |
96 | def getTag(tagId: AnyObjectId): RevTag = revWalk synchronized (tagId asRevTag)
97 |
98 | val cleanCommit: MemoFunc[ObjectId, ObjectId] = commitMemo { commitId =>
99 | val originalRevCommit = getCommit(commitId)
100 | val originalCommit = Commit(originalRevCommit)
101 |
102 | val cleanedArcs = originalCommit.arcs cleanWith this
103 | val kit = new CommitNodeCleaner.Kit(threadLocalResources, originalRevCommit, originalCommit, cleanedArcs, apply)
104 | val updatedCommitNode = commitNodeCleaner.fixer(kit)(originalCommit.node)
105 | val updatedCommit = Commit(updatedCommitNode, cleanedArcs)
106 |
107 | if (updatedCommit != originalCommit) {
108 | val commitBytes = updatedCommit.toBytes
109 | objectChecker.foreach(_.checkCommit(commitBytes))
110 | threadLocalResources.inserter().insert(OBJ_COMMIT, commitBytes)
111 | } else {
112 | originalRevCommit
113 | }
114 | }
115 |
116 | val cleanBlob: Cleaner[ObjectId] = identity // Currently a NO-OP, we only clean at treeblob level
117 |
118 | val cleanTree: MemoFunc[ObjectId, ObjectId] = treeMemo { originalObjectId =>
119 | val entries = Tree.entriesFor(originalObjectId)(threadLocalResources.reader())
120 | val cleanedTreeEntries = treeEntryListCleaner(entries)
121 |
122 | val tree = Tree(cleanedTreeEntries)
123 |
124 | val originalBlobs = tree.blobs
125 | val fixedTreeBlobs = treeBlobsCleaner(originalBlobs)
126 | val cleanedSubtrees = TreeSubtrees(treeSubtreesCleaner(tree.subtrees).entryMap.map {
127 | case (name, treeId) => (name, cleanTree(treeId))
128 | }).withoutEmptyTrees
129 |
130 | val treeBlobsChanged = fixedTreeBlobs != originalBlobs
131 | if (entries == cleanedTreeEntries && !treeBlobsChanged && cleanedSubtrees == tree.subtrees) originalObjectId else {
132 | if (treeBlobsChanged) recordChange(originalBlobs, fixedTreeBlobs)
133 |
134 | val updatedTree = tree copyWith(cleanedSubtrees, fixedTreeBlobs)
135 |
136 | val treeFormatter = updatedTree.formatter
137 | objectChecker.foreach(_.checkTree(treeFormatter.toByteArray))
138 | treeFormatter.insertTo(threadLocalResources.inserter())
139 | }
140 | }
141 |
142 | def recordChange(originalBlobs: TreeBlobs, fixedTreeBlobs: TreeBlobs): Unit = {
143 | val changedFiles: Set[TreeBlobEntry] = originalBlobs.entries.toSet -- fixedTreeBlobs.entries.toSet
144 | for (TreeBlobEntry(filename, _, oldId) <- changedFiles) {
145 | fixedTreeBlobs.objectId(filename) match {
146 | case Some(newId) => changesByFilename.addBinding(filename, (oldId, newId))
147 | case None => deletionsByFilename.addBinding(filename, oldId)
148 | }
149 | }
150 | }
151 |
152 | case class TreeBlobChange(oldId: ObjectId, newIdOpt: Option[ObjectId], filename: FileName)
153 |
154 | val cleanTag: MemoFunc[ObjectId, ObjectId] = tagMemo { id =>
155 | val originalTag = getTag(id)
156 |
157 | replacement(originalTag.getObject).map {
158 | cleanedObj =>
159 | val tb = new TagBuilder
160 | tb.setTag(originalTag.getTagName)
161 | tb.setObjectId(cleanedObj, originalTag.getObject.getType)
162 | tb.setTagger(originalTag.getTaggerIdent)
163 | tb.setMessage(objectIdSubstitutor.replaceOldIds(originalTag.getFullMessage, threadLocalResources.reader(), apply))
164 | val cleanedTag: ObjectId = threadLocalResources.inserter().insert(tb)
165 | objectChecker.foreach(_.checkTag(tb.build()))
166 | cleanedTag
167 | }.getOrElse(originalTag)
168 | }
169 |
170 | lazy val protectedDirt: Seq[ProtectedObjectDirtReport] = {
171 | protectedObjectCensus.protectorRevsByObject.map {
172 | case (protectedRevObj, refNames) =>
173 | val originalContentObject = treeOrBlobPointedToBy(protectedRevObj).merge
174 | val replacementTreeOrBlob = uncachedClean.replacement(originalContentObject)
175 | ProtectedObjectDirtReport(protectedRevObj, originalContentObject, replacementTreeOrBlob)
176 | }.toList
177 | }
178 |
179 | def stats() = Map("apply"->memoClean.stats(), "tree" -> cleanTree.stats(), "commit" -> cleanCommit.stats(), "tag" -> cleanTag.stats())
180 |
181 | }
182 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/ObjectIdSubstitutor.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.madgag.git._
24 | import com.madgag.git.bfg.GitUtil._
25 | import com.madgag.git.bfg.cleaner.ObjectIdSubstitutor._
26 | import org.eclipse.jgit.lib.{AbbreviatedObjectId, ObjectId, ObjectReader}
27 |
28 | class CommitMessageObjectIdsUpdater(objectIdSubstitutor: ObjectIdSubstitutor) extends CommitNodeCleaner {
29 |
30 | override def fixer(kit: CommitNodeCleaner.Kit) = commitNode => commitNode.copy(message = objectIdSubstitutor.replaceOldIds(commitNode.message, kit.threadLocalResources.reader(), kit.mapper))
31 |
32 | }
33 |
34 | object ObjectIdSubstitutor {
35 |
36 | object OldIdsPrivate extends ObjectIdSubstitutor {
37 | def format(oldIdText: String, newIdText: String) = newIdText
38 | }
39 |
40 | object OldIdsPublic extends ObjectIdSubstitutor {
41 | def format(oldIdText: String, newIdText: String) = s"$newIdText [formerly $oldIdText]"
42 | }
43 |
44 | val hexRegex = """\b\p{XDigit}{10,40}\b""".r // choose minimum size based on size of project??
45 |
46 | }
47 |
48 | trait ObjectIdSubstitutor {
49 |
50 | def format(oldIdText: String, newIdText: String): String
51 |
52 | // slow!
53 | def replaceOldIds(message: String, reader: ObjectReader, mapper: Cleaner[ObjectId]): String = {
54 | val substitutionOpts = for {
55 | m: String <- hexRegex.findAllIn(message).toSet
56 | objectId <- reader.resolveExistingUniqueId(AbbreviatedObjectId.fromString(m)).toOption
57 | } yield mapper.replacement(objectId).map(newId => m -> format(m, reader.abbreviate(newId, m.length).name))
58 |
59 | val substitutions = substitutionOpts.flatten.toMap
60 | if (substitutions.isEmpty) message else hexRegex.replaceSomeIn(message, m => substitutions.get(m.matched))
61 | }
62 | }
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/RepoRewriter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.madgag.git._
24 | import com.madgag.git.bfg.Timing
25 | import org.eclipse.jgit.lib.{ObjectId, ProgressMonitor, RefDatabase}
26 | import org.eclipse.jgit.revwalk.RevSort._
27 | import org.eclipse.jgit.revwalk.{RevCommit, RevWalk}
28 | import org.eclipse.jgit.transport.ReceiveCommand
29 |
30 | import scala.jdk.CollectionConverters._
31 | import scala.collection.parallel.CollectionConverters._
32 | import scala.concurrent.ExecutionContext.Implicits.global
33 | import scala.concurrent.Future
34 |
35 | /*
36 | Encountering a blob ->
37 | BIG-BLOB-DELETION : Either 'good' or 'delete' - or possibly replace, with a different filename (means tree-level)
38 | PASSWORD-REMOVAL : Either 'good' or 'replace'
39 |
40 | Encountering a tree ->
41 | BIG-BLOB-DELETION : Either 'good' or 'replace' - possibly adding with a different placeholder blob entry
42 | PASSWORD-REMOVAL : Either 'good' or 'replace' - replacing one blob entry with another
43 |
44 | So if we encounter a tree, we are unlikely to want to remove that tree entirely...
45 | SHOULD WE JUST DISALLOW THAT?
46 | Obviously, a Commit HAS to have a tree, so it's dangerous to allow a None response to tree transformation
47 |
48 | An objectId must be either GOOD or BAD, and we must never translate *either* kind of id into a BAD
49 |
50 | User-customisation interface: TreeBlobs => TreeBlobs
51 |
52 | User gets no say in adding, renaming, removing directories
53 |
54 | TWO MAIN USE CASES FOR HISTORY-CHANGING ARE:
55 | 1: GETTING RID OF BIG BLOBS
56 | 2: REMOVING PASSWORDS IN HISTORICAL FILES
57 |
58 | possible other use-case: fixing committer names - and possibly removing passwords from commits? (could possibly just be done with rebase)
59 |
60 | Why else would you want to rewrite HISTORY? Many other changes (ie putting a directory one down) need only be applied
61 | in a new commit, we don't care about history.
62 |
63 | When updating a Tree, the User has no right to muck with sub-trees. They can only alter the blob contents.
64 | */
65 |
66 | object RepoRewriter {
67 |
68 | def rewrite(repo: org.eclipse.jgit.lib.Repository, objectIdCleanerConfig: ObjectIdCleaner.Config): Map[ObjectId, ObjectId] = {
69 | implicit val refDatabase: RefDatabase = repo.getRefDatabase
70 |
71 | assert(refDatabase.hasRefs, "Can't find any refs in repo at " + repo.getDirectory.getAbsolutePath)
72 |
73 | val reporter: Reporter = new CLIReporter(repo)
74 | implicit val progressMonitor: ProgressMonitor = reporter.progressMonitor
75 |
76 | val allRefs = refDatabase.getRefs().asScala
77 |
78 | def createRevWalk: RevWalk = {
79 |
80 | val revWalk = new RevWalk(repo)
81 |
82 | revWalk.sort(TOPO) // crucial to ensure we visit parents BEFORE children, otherwise blow stack
83 | revWalk.sort(REVERSE, true) // we want to start with the earliest commits and work our way up...
84 |
85 | val startCommits = allRefs.map(_.targetObjectId.asRevObject(revWalk)).collect { case c: RevCommit => c }
86 |
87 | revWalk.markStart(startCommits.asJavaCollection)
88 | revWalk
89 | }
90 |
91 | implicit val revWalk = createRevWalk
92 | implicit val reader = revWalk.getObjectReader
93 |
94 | reporter.reportRefsForScan(allRefs)
95 |
96 | reporter.reportObjectProtection(objectIdCleanerConfig)(repo.getObjectDatabase, revWalk)
97 |
98 | val objectIdCleaner = new ObjectIdCleaner(objectIdCleanerConfig, repo.getObjectDatabase, revWalk)
99 |
100 | val commits = revWalk.asScala.toSeq
101 |
102 | def clean(commits: Seq[RevCommit]): Unit = {
103 | reporter.reportCleaningStart(commits)
104 |
105 | Timing.measureTask("Cleaning commits", commits.size) {
106 | Future {
107 | commits.par.foreach {
108 | commit => objectIdCleaner(commit.getTree)
109 | }
110 | }
111 |
112 | commits.foreach {
113 | commit =>
114 | objectIdCleaner(commit)
115 | progressMonitor update 1
116 | }
117 | }
118 | }
119 |
120 | def updateRefsWithCleanedIds(): Unit = {
121 | val refUpdateCommands = for (ref <- repo.nonSymbolicRefs;
122 | (oldId, newId) <- objectIdCleaner.substitution(ref.getObjectId)
123 | ) yield new ReceiveCommand(oldId, newId, ref.getName)
124 |
125 | if (refUpdateCommands.isEmpty) {
126 | println("\nBFG aborting: No refs to update - no dirty commits found??\n")
127 | } else {
128 | reporter.reportRefUpdateStart(refUpdateCommands)
129 |
130 | Timing.measureTask("...Ref update", refUpdateCommands.size) {
131 | // Hack a fix for issue #23 : Short-cut the calculation that determines an update is NON-FF
132 | val quickMergeCalcRevWalk = new RevWalk(revWalk.getObjectReader) {
133 | override def isMergedInto(base: RevCommit, tip: RevCommit) =
134 | if (tip == objectIdCleaner(base)) false else super.isMergedInto(base, tip)
135 | }
136 |
137 | refDatabase.newBatchUpdate.setAllowNonFastForwards(true).addCommand(refUpdateCommands.asJavaCollection)
138 | .execute(quickMergeCalcRevWalk, progressMonitor)
139 | }
140 |
141 | reporter.reportResults(commits, objectIdCleaner)
142 | }
143 | }
144 |
145 |
146 | clean(commits)
147 |
148 | updateRefsWithCleanedIds()
149 |
150 | objectIdCleaner.stats()
151 |
152 | objectIdCleaner.cleanedObjectMap()
153 | }
154 |
155 | }
156 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/Reporter.scala:
--------------------------------------------------------------------------------
1 | package com.madgag.git.bfg.cleaner
2 |
3 | import com.google.common.io.Files.asCharSink
4 | import com.madgag.collection.concurrent.ConcurrentMultiMap
5 | import com.madgag.git._
6 | import com.madgag.git.bfg.cleaner.Reporter.dump
7 | import com.madgag.git.bfg.cleaner.protection.{ProtectedObjectCensus, ProtectedObjectDirtReport}
8 | import com.madgag.git.bfg.model.FileName
9 | import com.madgag.text.Text._
10 | import com.madgag.text.{ByteSize, Tables, Text}
11 | import org.eclipse.jgit.diff.DiffEntry.ChangeType._
12 | import org.eclipse.jgit.diff._
13 | import org.eclipse.jgit.lib.FileMode._
14 | import org.eclipse.jgit.lib._
15 | import org.eclipse.jgit.revwalk.{RevCommit, RevWalk}
16 | import org.eclipse.jgit.transport.ReceiveCommand
17 |
18 | import java.nio.charset.StandardCharsets.UTF_8
19 | import java.nio.file.Files.createDirectories
20 | import java.nio.file.Path
21 | import java.time.ZonedDateTime
22 | import java.time.format.DateTimeFormatter
23 | import scala.collection.immutable.SortedMap
24 | import scala.jdk.CollectionConverters._
25 |
26 |
27 | object Reporter {
28 | def dump(path: Path, iter: Iterable[String]): Unit = {
29 | val sink = asCharSink(path.toFile, UTF_8)
30 |
31 | sink.writeLines(iter.asJava, "\n")
32 | }
33 | }
34 |
35 | trait Reporter {
36 |
37 | val progressMonitor: ProgressMonitor
38 |
39 | def reportRefsForScan(allRefs: Iterable[Ref])(implicit objReader: ObjectReader): Unit
40 |
41 | def reportRefUpdateStart(refUpdateCommands: Iterable[ReceiveCommand]): Unit
42 |
43 | def reportObjectProtection(objectIdCleanerConfig: ObjectIdCleaner.Config)(implicit objectDB: ObjectDatabase, revWalk: RevWalk): Unit
44 |
45 | def reportCleaningStart(commits: Seq[RevCommit]): Unit
46 |
47 | def reportResults(commits: Seq[RevCommit], objectIdCleaner: ObjectIdCleaner): Unit
48 | }
49 |
50 | class CLIReporter(repo: Repository) extends Reporter {
51 |
52 | lazy val reportsDir: Path = {
53 | val now = ZonedDateTime.now()
54 |
55 | val topDirPath = repo.topDirectory.toPath.toAbsolutePath
56 |
57 | val reportsDir = topDirPath.resolveSibling(s"${topDirPath.getFileName}.bfg-report")
58 |
59 | val dateFormatter = DateTimeFormatter.ofPattern("uuuu-MM-dd")
60 | val timeFormatter = DateTimeFormatter.ofPattern("HH-mm-ss")
61 |
62 | val dir = reportsDir.resolve(now.format(dateFormatter)).resolve(now.format(timeFormatter))
63 |
64 | createDirectories(dir)
65 | dir
66 | }
67 |
68 | lazy val progressMonitor = new TextProgressMonitor
69 |
70 | def reportRefUpdateStart(refUpdateCommands: Iterable[ReceiveCommand]): Unit = {
71 | println(title("Updating " + plural(refUpdateCommands, "Ref")))
72 |
73 | val summaryTableCells = refUpdateCommands.map(update => (update.getRefName, update.getOldId.shortName, update.getNewId.shortName))
74 |
75 | Tables.formatTable(("Ref", "Before", "After"), summaryTableCells.toSeq).map("\t" + _).foreach(println)
76 |
77 | println()
78 | }
79 |
80 | def reportRefsForScan(allRefs: Iterable[Ref])(implicit objReader: ObjectReader): Unit = {
81 | val refsByObjType = allRefs.groupBy {
82 | ref => objReader.open(ref.getObjectId).getType
83 | } withDefault Seq.empty
84 |
85 | refsByObjType.foreach {
86 | case (typ, refs) => println("Found " + refs.size + " " + Constants.typeString(typ) + "-pointing refs : " + abbreviate(refs.map(_.getName).toSeq, "...", 4).mkString(", "))
87 | }
88 | }
89 |
90 |
91 | // abort due to Dirty Tips on Private run - user needs to manually clean
92 | // warn due to Dirty Tips on Public run - it's not so serious if users publicise dirty tips.
93 | // if no protection
94 | def reportObjectProtection(objectIdCleanerConfig: ObjectIdCleaner.Config)(implicit objectDB: ObjectDatabase, revWalk: RevWalk): Unit = {
95 | println(title("Protected commits"))
96 |
97 | if (objectIdCleanerConfig.protectedObjectCensus.isEmpty) {
98 | println("You're not protecting any commits, which means the BFG will modify the contents of even *current* commits.\n\n" +
99 | "This isn't recommended - ideally, if your current commits are dirty, you should fix up your working copy and " +
100 | "commit that, check that your build still works, and only then run the BFG to clean up your history.")
101 | } else {
102 | println("These are your protected commits, and so their contents will NOT be altered:\n")
103 |
104 | val unprotectedConfig = objectIdCleanerConfig.copy(protectedObjectCensus = ProtectedObjectCensus.None)
105 |
106 | reportProtectedCommitsAndTheirDirt(objectIdCleanerConfig)
107 | }
108 | }
109 |
110 | case class DiffSideDetails(id: ObjectId, path: String, mode: FileMode, size: Option[Long])
111 |
112 | def reportProtectedCommitsAndTheirDirt(objectIdCleanerConfig: ObjectIdCleaner.Config)(implicit objectDB: ObjectDatabase, revWalk: RevWalk): Unit = {
113 | implicit val reader = revWalk.getObjectReader
114 |
115 | def diffDetails(d: DiffEntry) = {
116 | val side = DiffEntry.Side.OLD
117 | val id: ObjectId = d.getId(side).toObjectId
118 | DiffSideDetails(id, d.getPath(side), d.getMode(side), id.sizeOpt)
119 | }
120 |
121 | def fileInfo(d: DiffSideDetails) = {
122 | val extraInfo = (d.mode match {
123 | case GITLINK => Some("submodule")
124 | case _ => d.size.map(ByteSize.format(_))
125 | }).map(e => s"($e)")
126 |
127 | (d.path +: extraInfo.toSeq).mkString(" ")
128 | }
129 |
130 | val protectedDirtDir = reportsDir.resolve("protected-dirt")
131 | createDirectories(protectedDirtDir)
132 |
133 | val reports = ProtectedObjectDirtReport.reportsFor(objectIdCleanerConfig, objectDB)
134 |
135 | reports.foreach {
136 | report =>
137 | val protectorRevs = objectIdCleanerConfig.protectedObjectCensus.protectorRevsByObject(report.revObject)
138 | val objectTitle = s" * ${report.revObject.typeString} ${report.revObject.shortName} (protected by '${protectorRevs.mkString("', '")}')"
139 |
140 | report.dirt match {
141 | case None => println(objectTitle)
142 | case Some(diffEntries) =>
143 | if (diffEntries.isEmpty) {
144 | println(objectTitle + " - dirty")
145 | } else {
146 | println(objectTitle + " - contains " + plural(diffEntries, "dirty file") + " : ")
147 | abbreviate(diffEntries.view.map(diffDetails).map(fileInfo), "...").foreach {
148 | dirtyFile => println("\t- " + dirtyFile)
149 | }
150 |
151 | val protectorRefsFileNameSafe: String = protectorRevs.mkString("_").replace(
152 | protectedDirtDir.getFileSystem.getSeparator,
153 | "-"
154 | )
155 | val diffFile = protectedDirtDir.resolve(s"${report.revObject.shortName}-$protectorRefsFileNameSafe.csv")
156 |
157 | dump(diffFile, diffEntries.map {
158 | diffEntry =>
159 | val de = diffDetails(diffEntry)
160 |
161 | val modifiedLines = if (diffEntry.getChangeType == MODIFY) diffEntry.editList.map(changedLinesFor) else None
162 |
163 | val elems = Seq(de.id.name, diffEntry.getChangeType.name, de.mode.name, de.path, de.size.getOrElse(""), modifiedLines.getOrElse(""))
164 |
165 | elems.mkString(",")
166 | })
167 | }
168 | }
169 | }
170 |
171 | val dirtyReports = reports.filter(_.objectProtectsDirt)
172 | if (dirtyReports.nonEmpty) {
173 |
174 | println(s"""
175 | |WARNING: The dirty content above may be removed from other commits, but as
176 | |the *protected* commits still use it, it will STILL exist in your repository.
177 | |
178 | |Details of protected dirty content have been recorded here :
179 | |
180 | |${protectedDirtDir.toAbsolutePath.toString + protectedDirtDir.getFileSystem.getSeparator}
181 | |
182 | |If you *really* want this content gone, make a manual commit that removes it,
183 | |and then run the BFG on a fresh copy of your repo.
184 | """.stripMargin)
185 | // TODO would like to abort here if we are cleaning 'private' data.
186 | }
187 | }
188 |
189 | def changedLinesFor(edits: EditList): String = {
190 | edits.asScala.map {
191 | edit => Seq(edit.getBeginA + 1, edit.getEndA).distinct.mkString("-")
192 | }.mkString(";")
193 | }
194 |
195 | def reportCleaningStart(commits: Seq[RevCommit]): Unit = {
196 | println(title("Cleaning"))
197 | println("Found " + commits.size + " commits")
198 | }
199 |
200 | def reportResults(commits: Seq[RevCommit], objectIdCleaner: ObjectIdCleaner): Unit = {
201 | def reportTreeDirtHistory(): Unit = {
202 |
203 | val dirtHistoryElements = math.max(20, math.min(60, commits.size))
204 | def cut[A](xs: Seq[A], n: Int) = {
205 | val avgSize = xs.size.toFloat / n
206 | def startOf(unit: Int): Int = math.round(unit * avgSize)
207 | (0 until n).view.map(u => xs.slice(startOf(u), startOf(u + 1)))
208 | }
209 | val treeDirtHistory = cut(commits, dirtHistoryElements).map {
210 | case commits if commits.isEmpty => ' '
211 | case commits if (commits.exists(c => objectIdCleaner.isDirty(c.getTree))) => 'D'
212 | case commits if (commits.exists(objectIdCleaner.isDirty)) => 'm'
213 | case _ => '.'
214 | }.mkString
215 | def leftRight(markers: Seq[String]) = markers.mkString(" " * (treeDirtHistory.length - markers.map(_.size).sum))
216 | println(title("Commit Tree-Dirt History"))
217 | println("\t" + leftRight(Seq("Earliest", "Latest")))
218 | println("\t" + leftRight(Seq("|", "|")))
219 | println("\t" + treeDirtHistory)
220 | println("\n\tD = dirty commits (file tree fixed)")
221 | println("\tm = modified commits (commit message or parents changed)")
222 | println("\t. = clean commits (no changes to file tree)\n")
223 |
224 | val firstModifiedCommit = commits.find(objectIdCleaner.isDirty).map(_ -> "First modified commit")
225 | val lastDirtyCommit = commits.reverse.find(c => objectIdCleaner.isDirty(c.getTree)).map(_ -> "Last dirty commit")
226 | val items = for {
227 | (commit, desc) <- firstModifiedCommit ++ lastDirtyCommit
228 | (before, after) <- objectIdCleaner.substitution(commit)
229 | } yield (desc, before.shortName, after.shortName)
230 | Tables.formatTable(("", "Before", "After"), items.toSeq).map("\t" + _).foreach(println)
231 | }
232 |
233 | reportTreeDirtHistory()
234 |
235 | lazy val mapFile: Path = reportsDir.resolve("object-id-map.old-new.txt")
236 | lazy val cacheStatsFile: Path = reportsDir.resolve("cache-stats.txt")
237 |
238 | val changedIds = objectIdCleaner.cleanedObjectMap()
239 |
240 | def reportFiles[FI](
241 | fileData: ConcurrentMultiMap[FileName, FI],
242 | actionType: String,
243 | tableTitles: Product
244 | )(f: ((FileName,Set[FI])) => Product)(fi: FI => Seq[String]): Unit = {
245 | implicit val fileNameOrdering = Ordering[String].on[FileName](_.string)
246 |
247 | val dataByFilename = SortedMap[FileName, Set[FI]](fileData.toMap.toSeq: _*)
248 | if (dataByFilename.nonEmpty) {
249 | println(title(s"$actionType files"))
250 | Tables.formatTable(tableTitles, dataByFilename.map(f).toSeq).map("\t" + _).foreach(println)
251 |
252 | val actionFile = reportsDir.resolve(s"${actionType.toLowerCase}-files.txt")
253 |
254 | dump(actionFile, dataByFilename.flatMap {
255 | case (filename, changes) => changes.map(fi.andThen(fid => (fid :+ filename).mkString(" ")))
256 | })
257 | }
258 | }
259 |
260 | reportFiles(objectIdCleaner.changesByFilename, "Changed", ("Filename", "Before & After")) {
261 | case (filename, changes) => (filename, Text.abbreviate(changes.map {case (oldId, newId) => oldId.shortName+" ⇒ "+newId.shortName}, "...").mkString(", "))
262 | } { case (oldId, newId) => Seq(oldId.name, newId.name) }
263 |
264 | implicit val reader = objectIdCleaner.threadLocalResources.reader()
265 |
266 | reportFiles(objectIdCleaner.deletionsByFilename, "Deleted", ("Filename", "Git id")) {
267 | case (filename, oldIds) => (filename, Text.abbreviate(oldIds.map(oldId => oldId.shortName + oldId.sizeOpt.map(size => s" (${ByteSize.format(size)})").mkString), "...").mkString(", "))
268 | } { oldId => Seq(oldId.name, oldId.sizeOpt.mkString) }
269 |
270 | println(s"\n\nIn total, ${changedIds.size} object ids were changed. Full details are logged here:\n\n\t$reportsDir")
271 |
272 | dump(mapFile,SortedMap[AnyObjectId, ObjectId](changedIds.toSeq: _*).view.map { case (o,n) => s"${o.name} ${n.name}"})
273 |
274 | dump(cacheStatsFile,objectIdCleaner.stats().map(_.toString()))
275 |
276 | println("\nBFG run is complete! When ready, run: git reflog expire --expire=now --all && git gc --prune=now --aggressive")
277 |
278 | }
279 |
280 | def title(text: String) = s"\n$text\n" + ("-" * text.size) + "\n"
281 | }
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/TreeBlobModifier.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.madgag.git.bfg.MemoUtil
24 | import com.madgag.git.bfg.model.{TreeBlobEntry, _}
25 | import org.eclipse.jgit.lib.ObjectId
26 |
27 | trait TreeBlobModifier extends Cleaner[TreeBlobs] {
28 |
29 | val memoisedCleaner: Cleaner[TreeBlobEntry] = MemoUtil.concurrentCleanerMemo[TreeBlobEntry](Set.empty) {
30 | entry =>
31 | val (mode, objectId) = fix(entry)
32 | TreeBlobEntry(entry.filename, mode, objectId)
33 | }
34 |
35 | def fix(entry: TreeBlobEntry): (BlobFileMode, ObjectId) // implementing code can not safely know valid filename
36 |
37 | override def apply(treeBlobs: TreeBlobs) = treeBlobs.entries.map(memoisedCleaner)
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/commits.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.madgag.git.ThreadLocalObjectDatabaseResources
24 | import com.madgag.git.bfg.model._
25 | import org.eclipse.jgit.lib._
26 | import org.eclipse.jgit.revwalk.RevCommit
27 |
28 | object CommitNodeCleaner {
29 |
30 | class Kit(val threadLocalResources: ThreadLocalObjectDatabaseResources,
31 | val originalRevCommit: RevCommit,
32 | val originalCommit: Commit,
33 | val updatedArcs: CommitArcs,
34 | val mapper: Cleaner[ObjectId]) {
35 |
36 | val arcsChanged = originalCommit.arcs != updatedArcs
37 |
38 | def commitIsChanged(withThisNode: CommitNode) = arcsChanged || originalCommit.node != withThisNode
39 | }
40 |
41 | def chain(cleaners: Seq[CommitNodeCleaner]) = new CommitNodeCleaner {
42 | def fixer(kit: CommitNodeCleaner.Kit) = Function.chain(cleaners.map(_.fixer(kit)))
43 | }
44 | }
45 |
46 | trait CommitNodeCleaner {
47 | def fixer(kit: CommitNodeCleaner.Kit): Cleaner[CommitNode]
48 | }
49 |
50 | object FormerCommitFooter extends CommitNodeCleaner {
51 | val Key = "Former-commit-id"
52 |
53 | override def fixer(kit: CommitNodeCleaner.Kit) = modifyIf(kit.commitIsChanged) {
54 | _ add Footer(Key, kit.originalRevCommit.name)
55 | }
56 |
57 | def modifyIf[A](predicate: A => Boolean)(modifier: A => A): (A => A) = v => if (predicate(v)) modifier(v) else v
58 | }
59 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/kit/BlobInserter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner.kit
22 |
23 | import java.io.InputStream
24 |
25 | import org.eclipse.jgit.lib.Constants._
26 | import org.eclipse.jgit.lib.{ObjectId, ObjectInserter}
27 |
28 | class BlobInserter(objectInserter: ObjectInserter) {
29 | def insert(data: Array[Byte]): ObjectId = objectInserter.insert(OBJ_BLOB, data)
30 |
31 | def insert(length: Long, in: InputStream): ObjectId = objectInserter.insert(OBJ_BLOB, length, in)
32 | }
33 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg
22 |
23 | package object cleaner {
24 | type Cleaner[V] = V => V
25 | }
26 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/protection/ProtectedObjectCensus.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner.protection
22 |
23 | import com.madgag.git._
24 | import com.madgag.scala.collection.decorators._
25 | import org.eclipse.jgit.lib.{ObjectId, Repository}
26 | import org.eclipse.jgit.revwalk._
27 |
28 | /**
29 | * PROTECTING TREES :
30 | * Want to leave the tree unchanged for all commits at the tips of refs the user thinks are important.
31 | * What if you think a Tag is important? Or a tree?
32 | *
33 | * If a tag points to a:
34 | * - commit - that commit may change, but it's tree must stay the same
35 | * - tree - who the fuck tags tree anyway? if I've been asked to protect it, that suggests that it's supposed to be inviolate
36 | * - blob - that blob will continue to be referenced by the repo, not disappear, but not be cleaned either, as we currently clean at TreeBlob level
37 | *
38 | * We can take a shortcut here by just pushing all hallowed trees straight into the memo collection
39 | * This does mean that we will never notice, or be able to report, if somebody sets a rule that 'cleans' (alters) a hallowed tree
40 | * It might also have somewhat unexpected consequences if someone hallows a very 'simple' directory that occurs often
41 | *
42 | *
43 | * PROTECTING BLOBS :
44 | * If a user wants to protect the tip of a ref, all blobs will be retained. There is no space-saving or secrets-kept
45 | * by deleting, tampering with those blobs elsewhere. And if you have some big-old blob like a jar that you have
46 | * used consistently throughout the history of your project, it benefits no-one to remove it- in fact it's actively
47 | * harmful.
48 | *
49 | * We explicitly protect blobs (rather than just allowing them to fall under the protection given to Trees) precisely
50 | * because these blobs may historically have existed in other directories (trees) that did not appear in the
51 | * protected tips, and so would not be protected by Tree protection.
52 | *
53 | *
54 | * PROTECTING TAGS & COMMITS :
55 | * This just means protecting the Trees & Blobs under those Tags and Commits, as specified above. Changing other
56 | * state - such as the message, or author, or referenced commit Ids (and consequently the object Id of the target
57 | * object itself) is very much up for grabs. I gotta change your history, or I've no business being here.
58 | */
59 | object ProtectedObjectCensus {
60 |
61 | val None = ProtectedObjectCensus()
62 |
63 | def apply(revisions: Set[String])(implicit repo: Repository): ProtectedObjectCensus = {
64 |
65 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple
66 |
67 | val objectProtection = revisions.groupBy { revision =>
68 | Option(repo.resolve(revision)).getOrElse { throw new IllegalArgumentException(
69 | s"Couldn't find '$revision' in ${repo.topDirectory.getAbsolutePath} - are you sure that exists?"
70 | )}.asRevObject
71 | }
72 |
73 | // blobs come from direct blob references and tag references
74 | // trees come from direct tree references, commit & tag references
75 |
76 | val treeAndBlobProtection = objectProtection.keys.groupUp(treeOrBlobPointedToBy)(_.toSet) // use Either?
77 |
78 | val directBlobProtection = treeAndBlobProtection collect {
79 | case (Left(blob), p) => blob.getId -> p
80 | }
81 | val treeProtection = treeAndBlobProtection collect {
82 | case (Right(tree), p) => tree -> p
83 | }
84 | val indirectBlobProtection = treeProtection.keys.flatMap(tree => allBlobsUnder(tree).map(_ -> tree)).groupUp(_._1)(_.map(_._2).toSet)
85 |
86 | ProtectedObjectCensus(objectProtection, treeProtection, directBlobProtection, indirectBlobProtection)
87 | }
88 | }
89 |
90 | case class ProtectedObjectCensus(protectorRevsByObject: Map[RevObject, Set[String]] = Map.empty,
91 | treeProtection: Map[RevTree, Set[RevObject]] = Map.empty,
92 | directBlobProtection: Map[ObjectId, Set[RevObject]] = Map.empty,
93 | indirectBlobProtection: Map[ObjectId, Set[RevTree]] = Map.empty) {
94 |
95 | val isEmpty = protectorRevsByObject.isEmpty
96 |
97 | lazy val blobIds: Set[ObjectId] = directBlobProtection.keySet ++ indirectBlobProtection.keySet
98 |
99 | lazy val treeIds = treeProtection.keySet
100 |
101 | // blobs only for completeness here
102 | lazy val fixedObjectIds: Set[ObjectId] = treeIds ++ blobIds
103 | }
104 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/protection/ProtectedObjectDirtReport.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner.protection
22 |
23 | import com.madgag.git._
24 | import com.madgag.git.bfg.GitUtil._
25 | import com.madgag.git.bfg.cleaner.ObjectIdCleaner
26 | import org.eclipse.jgit.diff.DiffEntry
27 | import org.eclipse.jgit.diff.DiffEntry.ChangeType._
28 | import org.eclipse.jgit.lib.{ObjectDatabase, ObjectId}
29 | import org.eclipse.jgit.revwalk.{RevObject, RevWalk}
30 | import org.eclipse.jgit.treewalk.TreeWalk
31 | import org.eclipse.jgit.treewalk.filter.TreeFilter
32 |
33 | import scala.jdk.CollectionConverters._
34 |
35 | object ProtectedObjectDirtReport {
36 | def reportsFor(objectIdCleanerConfig: ObjectIdCleaner.Config, objectDB: ObjectDatabase)(implicit revWalk: RevWalk) = {
37 | val uncaringCleaner: ObjectIdCleaner = new ObjectIdCleaner(
38 | objectIdCleanerConfig.copy(protectedObjectCensus = ProtectedObjectCensus.None),
39 | objectDB,
40 | revWalk
41 | )
42 |
43 | for (protectedRevObj <- objectIdCleanerConfig.protectedObjectCensus.protectorRevsByObject.keys) yield {
44 | val originalContentTreeOrBlob = treeOrBlobPointedToBy(protectedRevObj)
45 | val replacementTreeOrBlob = originalContentTreeOrBlob.fold(uncaringCleaner.cleanBlob.replacement, uncaringCleaner.cleanTree.replacement)
46 | ProtectedObjectDirtReport(protectedRevObj, originalContentTreeOrBlob.merge, replacementTreeOrBlob)
47 | }
48 | }
49 | }
50 |
51 | /**
52 | * The function of the ProtectedObjectDirtReport is tell the user that this is the stuff they've decided
53 | * to protect in their latest commits - it's the stuff The BFG /would/ remove if you hadn't told it to
54 | * hold back,
55 | *
56 | * @param revObject - the protected object (eg protected because it is the HEAD commit, or even by additional refs)
57 | * @param originalTreeOrBlob - the unmodified content-object referred to by the protected object (may be same object)
58 | * @param replacementTreeOrBlob - an option, populated if cleaning creates a replacement for the content-object
59 | */
60 | case class ProtectedObjectDirtReport(revObject: RevObject, originalTreeOrBlob: RevObject, replacementTreeOrBlob: Option[ObjectId]) {
61 | val objectProtectsDirt: Boolean = replacementTreeOrBlob.isDefined
62 |
63 | def dirt(implicit revWalk: RevWalk): Option[Seq[DiffEntry]] = replacementTreeOrBlob.map { newId =>
64 | val tw = new TreeWalk(revWalk.getObjectReader)
65 | tw.setRecursive(true)
66 | tw.reset
67 |
68 | tw.addTree(originalTreeOrBlob.asRevTree)
69 | tw.addTree(newId.asRevTree)
70 | tw.setFilter(TreeFilter.ANY_DIFF)
71 | DiffEntry.scan(tw).asScala.filterNot(_.getChangeType == ADD).toSeq
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/treeblobs.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.madgag.git.bfg.cleaner.kit.BlobInserter
24 | import com.madgag.git.bfg.model.FileName.ImplicitConversions._
25 | import com.madgag.git.bfg.model.{TreeBlobEntry, _}
26 | import com.madgag.textmatching.TextMatcher
27 | import org.eclipse.jgit.lib.ObjectId
28 |
29 | class FileDeleter(fileNameMatcher: TextMatcher) extends Cleaner[TreeBlobs] {
30 | override def apply(tbs: TreeBlobs) = tbs.entries.filterNot(e => fileNameMatcher(e.filename))
31 | }
32 |
33 | class BlobRemover(blobIds: Set[ObjectId]) extends Cleaner[TreeBlobs] {
34 | override def apply(treeBlobs: TreeBlobs) = treeBlobs.entries.filter(e => !blobIds.contains(e.objectId))
35 | }
36 |
37 | class BlobReplacer(badBlobs: Set[ObjectId], blobInserter: => BlobInserter) extends Cleaner[TreeBlobs] {
38 | override def apply(treeBlobs: TreeBlobs) = treeBlobs.entries.map {
39 | case e if badBlobs.contains(e.objectId) =>
40 | TreeBlobEntry(FileName(e.filename + ".REMOVED.git-id"), RegularFile, blobInserter.insert(e.objectId.name.getBytes))
41 | case e => e
42 | }
43 | }
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/memo.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg
22 |
23 | import scala.jdk.CollectionConverters._
24 | import com.google.common.cache.{CacheBuilder, CacheLoader, CacheStats, LoadingCache}
25 | import com.madgag.git.bfg.cleaner._
26 |
27 | trait Memo[K, V] {
28 | def apply(z: K => V): MemoFunc[K, V]
29 | }
30 |
31 | trait MemoFunc[K,V] extends (K => V) {
32 | def asMap(): Map[K,V]
33 |
34 | def stats(): CacheStats
35 | }
36 |
37 | object MemoUtil {
38 |
39 | def memo[K, V](f: (K => V) => MemoFunc[K, V]): Memo[K, V] = new Memo[K, V] {
40 | def apply(z: K => V) = f(z)
41 | }
42 |
43 | /**
44 | *
45 | * A caching wrapper for a function (V => V), backed by a no-eviction LoadingCache from Google Collections.
46 | */
47 | def concurrentCleanerMemo[V](fixedEntries: Set[V] = Set.empty[V]): Memo[V, V] = {
48 | memo[V, V] {
49 | (f: Cleaner[V]) =>
50 | lazy val permanentCache = loaderCacheFor(f)(fix)
51 |
52 | def fix(v: V): Unit = {
53 | // enforce that once any value is returned, it is 'good' and therefore an identity-mapped key as well
54 | permanentCache.put(v, v)
55 | }
56 |
57 | fixedEntries foreach fix
58 |
59 | new MemoFunc[V, V] {
60 | def apply(k: V) = permanentCache.get(k)
61 |
62 | def asMap() = permanentCache.asMap().asScala.view.filter {
63 | case (oldId, newId) => newId != oldId
64 | }.toMap
65 |
66 | override def stats(): CacheStats = permanentCache.stats()
67 | }
68 | }
69 | }
70 |
71 | def loaderCacheFor[K, V](calc: K => V)(postCalc: V => Unit): LoadingCache[K, V] =
72 | CacheBuilder.newBuilder.asInstanceOf[CacheBuilder[K, V]].recordStats().build(new CacheLoader[K, V] {
73 | def load(key: K): V = {
74 | val v = calc(key)
75 | postCalc(v)
76 | v
77 | }
78 | })
79 | }
80 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/model/Commit.scala:
--------------------------------------------------------------------------------
1 | package com.madgag.git.bfg.model
2 |
3 | import com.madgag.git._
4 | import com.madgag.git.bfg.cleaner._
5 | import org.eclipse.jgit.lib.Constants.OBJ_COMMIT
6 | import org.eclipse.jgit.lib._
7 | import org.eclipse.jgit.revwalk.RevCommit
8 |
9 | import java.nio.charset.StandardCharsets.UTF_8
10 | import java.nio.charset.{Charset, IllegalCharsetNameException, UnsupportedCharsetException}
11 | import scala.jdk.CollectionConverters._
12 |
13 | /*
14 | * Copyright (c) 2012, 2013 Roberto Tyley
15 | *
16 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
17 | * or troublesome blobs from Git repositories.
18 | *
19 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
20 | * it under the terms of the GNU General Public License as published by
21 | * the Free Software Foundation, either version 3 of the License, or
22 | * (at your option) any later version.
23 | *
24 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
25 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 | * GNU General Public License for more details.
28 | *
29 | * You should have received a copy of the GNU General Public License
30 | * along with this program. If not, see http://www.gnu.org/licenses/ .
31 | */
32 |
33 |
34 | object Commit {
35 | def apply(revCommit: RevCommit): Commit = Commit(CommitNode(revCommit), revCommit.arcs)
36 | }
37 |
38 | case class Commit(node: CommitNode, arcs: CommitArcs) {
39 | def toBytes: Array[Byte] = {
40 | val c = new CommitBuilder
41 | c.setParentIds(arcs.parents.asJava)
42 | c.setTreeId(arcs.tree)
43 |
44 | c.setAuthor(node.author)
45 | c.setCommitter(node.committer)
46 | c.setEncoding(node.encoding)
47 | c.setMessage(node.message)
48 |
49 | c.toByteArray
50 | }
51 |
52 | lazy val id = new ObjectInserter.Formatter().idFor(OBJ_COMMIT, toBytes)
53 |
54 | override lazy val toString = s"commit[${id.shortName}${node.subject.map(s=> s" '${s.take(50)}'").getOrElse("")}]"
55 | }
56 |
57 | case class CommitArcs(parents: Seq[ObjectId], tree: ObjectId) {
58 | def cleanWith(cleaner: ObjectIdCleaner) = CommitArcs(parents map cleaner.cleanCommit, cleaner.cleanTree(tree))
59 | }
60 |
61 | object CommitNode {
62 | def apply(c: RevCommit): CommitNode = CommitNode(c.getAuthorIdent, c.getCommitterIdent, c.getFullMessage,
63 | try c.getEncoding catch {case e @ (_ : IllegalCharsetNameException | _ : UnsupportedCharsetException) => UTF_8})
64 | }
65 |
66 | case class CommitNode(author: PersonIdent, committer: PersonIdent, message: String, encoding: Charset = UTF_8) {
67 | lazy val subject = message.linesIterator.to(LazyList).headOption
68 | lazy val lastParagraphBreak = message.lastIndexOf("\n\n")
69 | lazy val messageWithoutFooters = if (footers.isEmpty) message else (message take lastParagraphBreak)
70 | lazy val footers: List[Footer] = message.drop(lastParagraphBreak).linesIterator.collect {
71 | case Footer.FooterPattern(key, value) => Footer(key, value)
72 | }.toList
73 |
74 | def add(footer: Footer) = copy(message = message + "\n" + (if (footers.isEmpty) "\n" else "") + footer.toString)
75 | }
76 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/model/Footer.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.model
22 |
23 | object Footer {
24 | // ^[A-Za-z0-9-]+:
25 | val FooterPattern = """([\p{Alnum}-]+): *(.*)""".r
26 |
27 | def apply(footerLine: String): Option[Footer] = footerLine match {
28 | case FooterPattern(key, value) => Some(Footer(key, value))
29 | case _ => None
30 | }
31 | }
32 |
33 | case class Footer(key: String, value: String) {
34 | override lazy val toString = key + ": " + value
35 | }
36 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/model/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg
22 |
23 | import org.eclipse.jgit.revwalk.RevCommit
24 |
25 | import java.nio.file.Path
26 |
27 |
28 | package object model {
29 | implicit class RichRevCommit(revCommit: RevCommit) {
30 | lazy val arcs: CommitArcs = CommitArcs(revCommit.getParents.toIndexedSeq, revCommit.getTree)
31 | }
32 |
33 | implicit class RichPath(path: Path) {
34 | def resolve(pathSegments: Seq[String]): Path = pathSegments.foldLeft(path)(_ resolve _)
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/git/bfg/timing.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg
22 |
23 | import java.lang.System._
24 | import java.util.concurrent.TimeUnit.NANOSECONDS
25 |
26 | import org.eclipse.jgit.lib.ProgressMonitor
27 |
28 | object Timing {
29 | // def measure[T](block: => T) = {
30 | // val start = nanoTime
31 | // val result = block
32 | // val duration = nanoTime - start
33 | // println("duration="+duration)
34 | // result
35 | // }
36 |
37 | def measureTask[T](taskName: String, workSize: Int)(block: => T)(implicit progressMonitor: ProgressMonitor) = {
38 | progressMonitor.beginTask(taskName, workSize)
39 | val start = nanoTime
40 | val result = block
41 | val duration = nanoTime - start
42 | progressMonitor.endTask()
43 | println(taskName + " completed in %,d ms.".format(NANOSECONDS.toMillis(duration)))
44 | result
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/inclusion/inclusion.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.inclusion
22 |
23 | import scala.Function.const
24 |
25 | case class IncExcExpression[-A](filters: Seq[Filter[A]]) {
26 | lazy val searchPath = (filters.headOption.map(_.impliedPredecessor).getOrElse(Include.everything) +: filters).reverse
27 |
28 | def includes(a: A): Boolean = searchPath.find(_.predicate(a)).get.included
29 | }
30 |
31 | sealed trait Filter[-A] {
32 | val included: Boolean
33 |
34 | val predicate: A => Boolean
35 |
36 | val impliedPredecessor: Filter[A]
37 |
38 | def isDefinedAt(a: A) = predicate(a)
39 | }
40 |
41 |
42 | object Include {
43 | def everything = Include(const(true))
44 | }
45 |
46 | object Exclude {
47 | def everything = Exclude(const(true))
48 | }
49 |
50 | case class Include[A](predicate: A => Boolean) extends Filter[A] {
51 | lazy val impliedPredecessor = Exclude.everything
52 | val included = true
53 | }
54 |
55 | case class Exclude[A](predicate: A => Boolean) extends Filter[A] {
56 | lazy val impliedPredecessor = Include.everything
57 | val included = false
58 | }
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/text/ByteSize.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.text
22 |
23 | object ByteSize {
24 |
25 | import math._
26 |
27 | val magnitudeChars = Seq('B', 'K', 'M', 'G', 'T', 'P')
28 | val unit = 1024
29 |
30 | def parse(v: String): Long = magnitudeChars.indexOf(v.takeRight(1)(0).toUpper) match {
31 | case -1 => throw new IllegalArgumentException(s"Size unit is missing (ie ${magnitudeChars.mkString(", ")})")
32 | case index => v.dropRight(1).toLong << (index * 10)
33 | }
34 |
35 | def format(bytes: Long): String = {
36 | if (bytes < unit) s"$bytes B " else {
37 | val exp = (log(bytes.toDouble) / log(unit)).toInt
38 | val pre = magnitudeChars(exp)
39 | "%.1f %sB".format(bytes / pow(unit, exp), pre)
40 | }
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/text/Tables.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.text
22 |
23 | object Tables {
24 | def formatTable(header: Product, data: Seq[Product], maxDataRows: Int = 16): Seq[String] = {
25 | val numColumns = data.head.productArity
26 | val sizes: Seq[Int] = (0 until numColumns).map(i => (data :+ header).map(_.productElement(i).toString.length).max)
27 | def padLine(l: Product): IndexedSeq[String] = {
28 | (0 until numColumns).map(c => l.productElement(c).toString.padTo(sizes(c), ' '))
29 | }
30 |
31 | val headerLine = padLine(header).mkString(" ")
32 | Text.abbreviate(headerLine +: "-" * headerLine.size +: data.map {
33 | l =>
34 | padLine(l).mkString(" | ")
35 | }, "...", maxDataRows+2).toSeq
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/bfg-library/src/main/scala/com/madgag/text/text.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.text
22 |
23 | object Text {
24 |
25 | def abbreviate[A](elems: Iterable[A], truncationToken: A, maxElements: Int = 3) = {
26 | val firstElems = elems.take(maxElements + 1)
27 | if (firstElems.size > maxElements) {
28 | firstElems.take(maxElements-1).toSeq :+ truncationToken
29 | } else {
30 | elems
31 | }
32 | }
33 |
34 | def plural[A](list: Iterable[A], noun: String) = s"${list.size} $noun${if (list.size == 1) "" else "s"}"
35 | }
36 |
--------------------------------------------------------------------------------
/bfg-library/src/test/resources/sample-repos/deep-history.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/deep-history.zip
--------------------------------------------------------------------------------
/bfg-library/src/test/resources/sample-repos/encodings.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/encodings.git.zip
--------------------------------------------------------------------------------
/bfg-library/src/test/resources/sample-repos/example.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/example.git.zip
--------------------------------------------------------------------------------
/bfg-library/src/test/resources/sample-repos/exampleWithInitialCleanHistory.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/exampleWithInitialCleanHistory.git.zip
--------------------------------------------------------------------------------
/bfg-library/src/test/resources/sample-repos/folder-example.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/folder-example.git.zip
--------------------------------------------------------------------------------
/bfg-library/src/test/resources/sample-repos/footers.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/footers.git.zip
--------------------------------------------------------------------------------
/bfg-library/src/test/resources/sample-repos/taleOfTwoBranches.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/taleOfTwoBranches.git.zip
--------------------------------------------------------------------------------
/bfg-library/src/test/scala/com/madgag/git/LFSSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git
22 |
23 | import com.madgag.git.LFS.Pointer
24 | import com.madgag.git.test._
25 | import org.eclipse.jgit.lib.Constants._
26 | import org.eclipse.jgit.lib.ObjectInserter
27 | import org.scalatest.OptionValues
28 | import org.scalatest.flatspec.AnyFlatSpec
29 | import org.scalatest.matchers.should.Matchers
30 |
31 | import java.nio.file.Files
32 | import java.nio.file.Files.createTempFile
33 |
34 | class LFSSpec extends AnyFlatSpec with Matchers with OptionValues {
35 | "Our implementation of Git LFS Pointers" should "create pointers that have the same Git id as the ones produced by `git lfs pointer`" in {
36 | val pointer = LFS.Pointer("b2893eddd9b394bfb7efadafda2ae0be02c573fdd83a70f26c781a943f3b7016", 21616)
37 |
38 | val pointerObjectId = new ObjectInserter.Formatter().idFor(OBJ_BLOB, pointer.bytes)
39 |
40 | pointerObjectId shouldBe "1d90744cffd9e9f324870ed60b6d1258e56a39e1".asObjectId
41 | }
42 |
43 | it should "have the correctly sharded path" in {
44 | val pointer = LFS.Pointer("b2893eddd9b394bfb7efadafda2ae0be02c573fdd83a70f26c781a943f3b7016", 21616)
45 |
46 | pointer.path shouldBe Seq("b2", "89", "b2893eddd9b394bfb7efadafda2ae0be02c573fdd83a70f26c781a943f3b7016")
47 | }
48 |
49 | it should "calculate pointers correctly directly from the Git database, creating a temporary file" in {
50 | implicit val repo = unpackRepo("/sample-repos/example.git.zip")
51 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple
52 |
53 | val tmpFile = createTempFile(s"bfg.test.git-lfs",".conv")
54 |
55 | val pointer = LFS.pointerFor(abbrId("06d7").open, tmpFile)
56 |
57 | pointer shouldBe Pointer("5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef", 1024)
58 |
59 | Files.size(tmpFile) shouldBe 1024
60 | }
61 | }
--------------------------------------------------------------------------------
/bfg-library/src/test/scala/com/madgag/git/bfg/GitUtilSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg
22 |
23 | import com.madgag.git._
24 | import com.madgag.git.test._
25 | import org.eclipse.jgit.internal.storage.file.FileRepository
26 | import org.scalatest.flatspec.AnyFlatSpec
27 | import org.scalatest.matchers.should.Matchers
28 |
29 | class GitUtilSpec extends AnyFlatSpec with Matchers {
30 | implicit val repo: FileRepository = unpackRepo("/sample-repos/example.git.zip")
31 |
32 | "reachable blobs" should "match expectations" in {
33 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple
34 |
35 | allBlobsReachableFrom(abbrId("475d") asRevCommit) shouldBe Set("d8d1", "34bd", "e69d", "c784", "d004").map(abbrId)
36 | }
37 | }
--------------------------------------------------------------------------------
/bfg-library/src/test/scala/com/madgag/git/bfg/MessageFooterSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg
22 |
23 | import com.madgag.git.bfg.model.{CommitNode, Footer}
24 | import org.eclipse.jgit.lib.PersonIdent
25 | import org.scalatest.flatspec.AnyFlatSpec
26 | import org.scalatest.matchers.should.Matchers
27 |
28 | class MessageFooterSpec extends AnyFlatSpec with Matchers {
29 |
30 | val person = new PersonIdent("Dave Eg", "dave@e.com")
31 |
32 | def commit(m: String) = CommitNode(person, person, m)
33 |
34 | "Message footers" should "append footer without new paragraph if footers already present" in {
35 |
36 | val updatedCommit = commit("Sub\n\nmessage\n\nSigned-off-by: Joe Eg ") add Footer("Foo", "Bar")
37 |
38 | updatedCommit.message shouldBe "Sub\n\nmessage\n\nSigned-off-by: Joe Eg \nFoo: Bar"
39 | }
40 |
41 | it should "create paragraph break if no footers already present" in {
42 |
43 | val updatedCommit = commit("Sub\n\nmessage") add Footer("Foo", "Bar")
44 |
45 | updatedCommit.message shouldBe "Sub\n\nmessage\n\nFoo: Bar"
46 | }
47 |
48 | // def footersViaJGit(commit: RevCommit) = commit.getFooterLines.map(f => Footer(f.getKey, f.getValue)).toList
49 | }
--------------------------------------------------------------------------------
/bfg-library/src/test/scala/com/madgag/git/bfg/TreeEntrySpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg
22 |
23 | import com.madgag.git.bfg.model.{FileName, Tree}
24 | import org.eclipse.jgit.lib.FileMode
25 | import org.eclipse.jgit.lib.FileMode._
26 | import org.eclipse.jgit.lib.ObjectId.zeroId
27 | import org.scalatest.flatspec.AnyFlatSpec
28 | import org.scalatest.matchers.should.Matchers
29 |
30 | class TreeEntrySpec extends AnyFlatSpec with Matchers {
31 |
32 | def a(mode: FileMode, name: String) = Tree.Entry(FileName(name), mode, zeroId)
33 |
34 | "Tree entry ordering" should "match ordering used by Git" in {
35 | a(TREE, "agit-test-utils") should be < a(TREE, "agit")
36 | }
37 | }
--------------------------------------------------------------------------------
/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/LfsBlobConverterSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.madgag.diff.{After, Before, MapDiff}
24 | import com.madgag.git.LFS.Pointer
25 | import com.madgag.git._
26 | import com.madgag.git.bfg.model.{BlobFileMode, FileName, Tree, TreeBlobs, _}
27 | import com.madgag.git.test._
28 | import com.madgag.scala.collection.decorators._
29 | import org.eclipse.jgit.internal.storage.file.FileRepository
30 | import org.eclipse.jgit.lib.ObjectId
31 | import org.scalatest.concurrent.Eventually
32 | import org.scalatest.flatspec.AnyFlatSpec
33 | import org.scalatest.matchers.should.Matchers
34 | import org.scalatest.{Inspectors, OptionValues}
35 |
36 | import java.nio.file.Files.readAllBytes
37 | import java.nio.file.{Files, Path}
38 |
39 | class LfsBlobConverterSpec extends AnyFlatSpec with Matchers with OptionValues with Inspectors with Eventually {
40 |
41 | "LfsBlobConverter" should "successfully shift the blob to the LFS store" in {
42 | implicit val repo = unpackRepo("/sample-repos/example.git.zip")
43 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple
44 |
45 | val oldTreeBlobs = Tree(repo.resolve("early-release^{tree}")).blobs
46 |
47 | val newTreeBlobs = clean(oldTreeBlobs, "*ero*")
48 |
49 | val diff = oldTreeBlobs.diff(newTreeBlobs)
50 |
51 | diff.changed shouldBe Set(FileName("one-kb-zeros"))
52 | diff.unchanged should contain allOf(FileName("hero"), FileName("zero"))
53 |
54 | verifyPointersForChangedFiles(diff)
55 | }
56 |
57 | it should "not do damage if run twice - ie don't create a pointer for a pointer!" in {
58 | implicit val repo = unpackRepo("/sample-repos/example.git.zip")
59 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple
60 |
61 | val oldTreeBlobs = Tree(repo.resolve("early-release^{tree}")).blobs
62 |
63 | val treeBlobsAfterRun1 = clean(oldTreeBlobs, "*ero*")
64 |
65 | val firstDiff = oldTreeBlobs.diff(treeBlobsAfterRun1)
66 |
67 | firstDiff.changed shouldBe Set(FileName("one-kb-zeros"))
68 |
69 | val treeBlobsAfterRun2 = clean(treeBlobsAfterRun1, "*ero*")
70 |
71 | treeBlobsAfterRun1.diff(treeBlobsAfterRun2).changed shouldBe empty
72 |
73 | verifyPointersForChangedFiles(firstDiff) // Are the LFS files still intact?
74 | }
75 |
76 |
77 | def clean(oldTreeBlobs: TreeBlobs, glob: String)(implicit repo: FileRepository): TreeBlobs = {
78 | val converter = new LfsBlobConverter(glob, repo)
79 | converter(oldTreeBlobs)
80 | }
81 |
82 | def verifyPointerInsertedFor(fileName: FileName, diff: MapDiff[FileName, (BlobFileMode, ObjectId)])(implicit repo: FileRepository) = {
83 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple
84 |
85 | diff.changed should contain(fileName)
86 |
87 | val fileBeforeAndAfter = diff.changedMap(fileName)
88 |
89 | fileBeforeAndAfter(After)._1 shouldBe fileBeforeAndAfter(Before)._1
90 |
91 | val fileIds = fileBeforeAndAfter.mapV(_._2)
92 |
93 | val (originalFileId, pointerObjectId) = (fileIds(Before), fileIds(After))
94 |
95 | verifyPointerFileFor(originalFileId, pointerObjectId)
96 | }
97 |
98 | def verifyPointerFileFor(originalFileId: ObjectId, pointerObjectId: ObjectId)(implicit repo: FileRepository) = {
99 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple
100 |
101 | val pointer = Pointer.parse(pointerObjectId.open.getCachedBytes)
102 |
103 | val lfsStoredFile: Path = repo.getDirectory.toPath.resolve(Seq("lfs", "objects") ++ pointer.path)
104 |
105 | Files.exists(lfsStoredFile) shouldBe true
106 |
107 | Files.size(lfsStoredFile) shouldBe pointer.blobSize
108 |
109 | eventually { readAllBytes(lfsStoredFile).blobId } shouldBe originalFileId
110 | }
111 |
112 | def verifyPointersForChangedFiles(diff: MapDiff[FileName, (BlobFileMode, ObjectId)])(implicit repo: FileRepository) = {
113 | diff.only(Before) shouldBe empty
114 | diff.only(After).keys shouldBe Set(FileName(".gitattributes"))
115 |
116 | forAll(diff.changed) { fileName =>
117 | verifyPointerInsertedFor(fileName, diff)
118 | }
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/ObjectIdCleanerSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.madgag.git._
24 | import com.madgag.git.bfg.cleaner.protection.ProtectedObjectCensus
25 | import com.madgag.textmatching.Literal
26 | import org.eclipse.jgit.lib.ObjectId
27 | import org.eclipse.jgit.revwalk.RevCommit
28 | import org.scalatest.Inspectors
29 | import org.scalatest.flatspec.AnyFlatSpec
30 | import org.scalatest.matchers.Matcher
31 | import org.scalatest.matchers.should.Matchers
32 |
33 | import scala.jdk.CollectionConverters._
34 |
35 | class ObjectIdCleanerSpec extends AnyFlatSpec with Matchers {
36 |
37 | "cleaning" should "not have a StackOverflowError cleaning a repo with deep history" ignore new unpackedRepo("/sample-repos/deep-history.zip") {
38 | val dirtyCommitWithDeepHistory = "d88ac4f99511667fc0617ea026f3a0ce8a25fd07".asObjectId
39 |
40 | val config = ObjectIdCleaner.Config(
41 | ProtectedObjectCensus.None,
42 | treeBlobsCleaners = Seq(new FileDeleter(Literal("foo")))
43 | )
44 |
45 | ensureCleanerWith(config).removesDirtOfCommitsThat(haveFile("foo")).whenCleaning(dirtyCommitWithDeepHistory)
46 | }
47 |
48 | }
49 |
50 | class unpackedRepo(filePath: String) extends bfg.test.unpackedRepo(filePath) {
51 |
52 | class EnsureCleanerWith(config: ObjectIdCleaner.Config) {
53 |
54 | class RemoveDirtOfCommitsThat(commitM: Matcher[RevCommit]) extends Inspectors with Matchers {
55 | def histOf(c: ObjectId) = repo.git.log.add(c).call.asScala.toSeq.reverse
56 |
57 | def whenCleaning(oldCommit: ObjectId): Unit = {
58 | val cleaner = new ObjectIdCleaner(config, repo.getObjectDatabase, revWalk)
59 | forAtLeast(1, histOf(oldCommit)) { commit =>
60 | commit should commitM
61 | }
62 |
63 | val cleanCommit = cleaner.cleanCommit(oldCommit)
64 |
65 | forAll(histOf(cleanCommit)) { commit =>
66 | commit shouldNot commitM
67 | }
68 | }
69 | }
70 |
71 | def removesDirtOfCommitsThat[T](commitM: Matcher[RevCommit]) = new RemoveDirtOfCommitsThat(commitM)
72 | }
73 |
74 | def ensureCleanerWith(config: ObjectIdCleaner.Config) = new EnsureCleanerWith(config)
75 | }
76 |
77 |
--------------------------------------------------------------------------------
/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/ObjectIdSubstitutorSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.madgag.git._
24 | import com.madgag.git.bfg.cleaner.ObjectIdSubstitutor.hexRegex
25 | import com.madgag.git.test._
26 | import org.eclipse.jgit.lib.ObjectId
27 | import org.scalatest.flatspec.AnyFlatSpec
28 | import org.scalatest.matchers.should.Matchers
29 |
30 | class ObjectIdSubstitutorSpec extends AnyFlatSpec with Matchers {
31 |
32 | "Object Id Substitutor regex" should "match hex strings" in {
33 | "01234567890" should include regex hexRegex
34 |
35 | "decade2001" should include regex hexRegex
36 |
37 | "This is decade2001" should include regex hexRegex
38 |
39 | "This is decade2001 I say" should include regex hexRegex
40 |
41 | "This is Gdecade2001 I say" shouldNot include regex hexRegex
42 |
43 | "This is decade2001X I say" shouldNot include regex hexRegex
44 | }
45 |
46 | "Object Id" should "be substituted in commit message" in {
47 | implicit val repo = unpackRepo("/sample-repos/example.git.zip")
48 | implicit val reader = repo.newObjectReader
49 |
50 | val cleanedMessage = ObjectIdSubstitutor.OldIdsPublic.replaceOldIds("See 3699910d2baab1 for backstory", reader, (_: ObjectId) => abbrId("06d7405020018d"))
51 |
52 | cleanedMessage shouldBe "See 06d7405020018d [formerly 3699910d2baab1] for backstory"
53 | }
54 |
55 | }
--------------------------------------------------------------------------------
/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/RepoRewriteSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.madgag.git._
24 | import com.madgag.git.bfg.GitUtil._
25 | import com.madgag.git.bfg.cleaner.ObjectIdSubstitutor._
26 | import com.madgag.git.bfg.cleaner.protection.ProtectedObjectCensus
27 | import com.madgag.git.bfg.model.{FileName, RegularFile, TreeBlobEntry}
28 | import com.madgag.git.test._
29 | import com.madgag.textmatching._
30 | import org.apache.commons.io.FilenameUtils
31 | import org.eclipse.jgit.lib.ObjectId
32 | import org.eclipse.jgit.revwalk.RevWalk
33 | import org.eclipse.jgit.util.RawParseUtils
34 | import org.scalatest.flatspec.AnyFlatSpec
35 | import org.scalatest.matchers.should.Matchers
36 |
37 | import java.io.StringReader
38 | import java.net.URLEncoder
39 | import java.util.Properties
40 | import java.util.regex.Pattern._
41 | import scala.PartialFunction.condOpt
42 | import scala.jdk.CollectionConverters._
43 |
44 | class RepoRewriteSpec extends AnyFlatSpec with Matchers {
45 |
46 | "Git repo" should "not explode" in {
47 | implicit val repo = unpackRepo("/sample-repos/example.git.zip")
48 | implicit val reader = repo.newObjectReader
49 |
50 | hasBeenProcessedByBFGBefore(repo) shouldBe false
51 |
52 | val blobsToRemove = Set(abbrId("06d740"))
53 | RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus(Set("HEAD")), OldIdsPublic, Seq(FormerCommitFooter), treeBlobsCleaners = Seq(new BlobRemover(blobsToRemove))))
54 |
55 | val allCommits = repo.git.log.all.call.asScala.toSeq
56 |
57 | val unwantedBlobsByCommit = allCommits.flatMap(commit => {
58 | val unwantedBlobs = allBlobsReachableFrom(commit).intersect(blobsToRemove).map(_.shortName)
59 | if (!unwantedBlobs.isEmpty) Some(commit.shortName -> unwantedBlobs) else None
60 | }).toMap
61 |
62 | unwantedBlobsByCommit shouldBe empty
63 |
64 | allCommits.head.getFullMessage should include(FormerCommitFooter.Key)
65 |
66 | hasBeenProcessedByBFGBefore(repo) shouldBe true
67 | }
68 |
69 | "Repo rewriter" should "clean commit messages even on clean branches, because commit messages may reference commits from dirty ones" in {
70 | implicit val repo = unpackRepo("/sample-repos/taleOfTwoBranches.git.zip")
71 | implicit val revWalk = new RevWalk(repo)
72 |
73 | def commitMessageForRev(rev: String) = repo.resolve(rev).asRevCommit.getFullMessage
74 |
75 | commitMessageForRev("pure") should include("6e76960ede2addbbe7e")
76 |
77 | RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus.None, OldIdsPrivate, Seq(new CommitMessageObjectIdsUpdater(OldIdsPrivate)), treeBlobsCleaners = Seq(new FileDeleter(Literal("sin")))))
78 |
79 | commitMessageForRev("pure") should not include "6e76960ede2addbbe7e"
80 | }
81 |
82 | it should "remove passwords" in {
83 | implicit val repo = unpackRepo("/sample-repos/example.git.zip")
84 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple
85 |
86 | def propertiesIn(contents: String) = {
87 | val p = new Properties()
88 | p.load(new StringReader(contents))
89 | p
90 | }
91 |
92 | def passwordFileContentsIn(id: ObjectId) = {
93 | val cleanedPasswordFile = repo.resolve(id.name + ":folder/secret-passwords.txt")
94 | RawParseUtils.decode(reader.open(cleanedPasswordFile).getCachedBytes)
95 | }
96 |
97 | object FileExt {
98 | def unapply(fileName: String) = Option(FilenameUtils.getExtension(fileName))
99 | }
100 |
101 | val blobTextModifier = new BlobTextModifier {
102 | override def lineCleanerFor(entry: TreeBlobEntry) = condOpt(entry.filename.string) {
103 | case FileExt("txt") | FileExt("scala") => """(\.password=).*""".r --> (_.group(1) + "*** PASSWORD ***")
104 | }
105 |
106 | val threadLocalObjectDBResources = repo.getObjectDatabase.threadLocalResources
107 | }
108 | val cleanedObjectMap = RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus(Set("HEAD")), treeBlobsCleaners = Seq(blobTextModifier)))
109 |
110 | val oldCommitContainingPasswords = abbrId("37bcc89")
111 |
112 | val cleanedCommitWithPasswordsRemoved = cleanedObjectMap(oldCommitContainingPasswords).asRevCommit
113 |
114 | val originalContents = passwordFileContentsIn(oldCommitContainingPasswords)
115 | val cleanedContents = passwordFileContentsIn(cleanedCommitWithPasswordsRemoved)
116 |
117 | cleanedContents should (include("science") and include("database.password="))
118 | originalContents should include("correcthorse")
119 | cleanedContents should not include "correcthorse"
120 |
121 | propertiesIn(cleanedContents).asScala.toMap should have size propertiesIn(originalContents).size
122 | }
123 |
124 |
125 |
126 |
127 | def textReplacementOf(parentPath: String, fileNamePrefix: String, fileNamePostfix: String, before: String, after: String) = {
128 | implicit val repo = unpackRepo("/sample-repos/encodings.git.zip")
129 | val beforeAndAfter = Seq(before, after).map(URLEncoder.encode(_, "UTF-8")).mkString("-")
130 | val filename = s"$fileNamePrefix-ORIGINAL.$fileNamePostfix"
131 | val beforeFile = s"$parentPath/$filename"
132 | val afterFile = s"$parentPath/$fileNamePrefix-MODIFIED-$beforeAndAfter.$fileNamePostfix"
133 |
134 | val blobTextModifier = new BlobTextModifier {
135 | def lineCleanerFor(entry: TreeBlobEntry) = Some(quote(before).r --> (_ => after))
136 |
137 | val threadLocalObjectDBResources = repo.getObjectDatabase.threadLocalResources
138 | }
139 |
140 | RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus.None, treeBlobsCleaners = Seq(blobTextModifier)))
141 |
142 | val cleanedFile = repo.resolve(s"master:$beforeFile")
143 | val expectedFile = repo.resolve(s"master:$afterFile")
144 |
145 | expectedFile should not be null
146 |
147 | implicit val threadLocalObjectReader = repo.getObjectDatabase.threadLocalResources.reader()
148 | val cleaned = cleanedFile.open.getBytes
149 | val expected = expectedFile.open.getBytes
150 | val cleanedStr = new String(cleaned)
151 | val expectedStr = new String(expected)
152 |
153 | cleanedStr shouldBe expectedStr
154 | cleanedFile shouldBe expectedFile
155 | }
156 |
157 | "Text modifier" should "handle the short UTF-8" in textReplacementOf("UTF-8", "bushhidthefacts", "txt", "facts", "toffee")
158 |
159 | it should "handle the long UTF-8" in textReplacementOf("UTF-8", "big", "scala", "good", "blessed")
160 |
161 | it should "handle ASCII in SHIFT JIS" in textReplacementOf("SHIFT-JIS", "japanese", "txt", "EUC", "BOOM")
162 |
163 | it should "handle ASCII in ISO-8859-1" in textReplacementOf("ISO-8859-1", "laparabla", "txt", "palpitando", "buscando")
164 |
165 | it should "handle converting Windows newlines to Unix" in textReplacementOf("newlines", "windows", "txt", "\r\n", "\n")
166 |
167 | it should "handle a file that uses LF for newlines" in
168 | textReplacementOf("newlines", "using-LF", "txt", "file", "blob")
169 |
170 | it should "handle a file that uses CRLF for newlines" in
171 | textReplacementOf("newlines", "using-CRLF", "txt", "file", "blob")
172 |
173 | }
174 |
--------------------------------------------------------------------------------
/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/TreeBlobModifierSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cleaner
22 |
23 | import com.google.common.util.concurrent.AtomicLongMap
24 | import com.madgag.git.bfg.cleaner.ObjectIdSubstitutor._
25 | import com.madgag.git.bfg.cleaner.protection.ProtectedObjectCensus
26 | import com.madgag.git.bfg.model.TreeBlobEntry
27 | import com.madgag.git.test._
28 | import org.scalatest.flatspec.AnyFlatSpec
29 | import org.scalatest.matchers.should.Matchers
30 |
31 | import scala.jdk.CollectionConverters._
32 |
33 | class TreeBlobModifierSpec extends AnyFlatSpec with Matchers {
34 |
35 | "TreeBlobModifier" should "only clean a given tree entry once" in {
36 | class CountingTreeBlobModifier extends TreeBlobModifier {
37 | val counts = AtomicLongMap.create[TreeBlobEntry]
38 |
39 | def fix(entry: TreeBlobEntry) = {
40 | counts.incrementAndGet(entry)
41 | (entry.mode, entry.objectId)
42 | }
43 | }
44 |
45 | implicit val repo = unpackRepo("/sample-repos/taleOfTwoBranches.git.zip")
46 |
47 | val countingTreeBlobModifier = new CountingTreeBlobModifier()
48 |
49 | RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus(Set("HEAD")), OldIdsPublic, treeBlobsCleaners = Seq(countingTreeBlobModifier)))
50 |
51 | val endCounts = countingTreeBlobModifier.counts.asMap().asScala.toMap
52 |
53 | endCounts.size should be >= 4
54 | all (endCounts.values) shouldBe 1
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/bfg-library/src/test/scala/com/madgag/git/bfg/model/CommitSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.model
22 |
23 | import com.madgag.git.bfg.test.unpackedRepo
24 | import org.scalatest.Inspectors
25 | import org.scalatest.flatspec.AnyFlatSpec
26 | import org.scalatest.matchers.should.Matchers
27 |
28 | class CommitSpec extends AnyFlatSpec with Matchers with Inspectors {
29 | "Commit model" should "calculate the same Git commit id for any given commit" in new unpackedRepo("/sample-repos/example.git.zip") {
30 | forAll (commitHist()) { revCommit =>
31 | Commit(revCommit).id shouldBe revCommit.toObjectId
32 | }
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/bfg-library/src/test/scala/com/madgag/text/ByteSizeSpecs.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.text
22 |
23 | import org.scalatest.flatspec.AnyFlatSpec
24 | import org.scalatest.matchers.should.Matchers
25 |
26 | class ByteSizeSpecs extends AnyFlatSpec with Matchers {
27 | "Size parser" should "understand 1B" in {
28 | ByteSize.parse("0B") shouldBe 0
29 | ByteSize.parse("1B") shouldBe 1
30 | ByteSize.parse("2B") shouldBe 2
31 | ByteSize.parse("10B") shouldBe 10
32 | }
33 | it should "understand 3G" in {
34 | ByteSize.parse("3G") shouldBe 3L * 1024 * 1024 * 1024
35 | }
36 | it should "understand 1G" in {
37 | ByteSize.parse("1G") shouldBe 1024 * 1024 * 1024
38 | }
39 | it should "understand 1M" in {
40 | ByteSize.parse("1M") shouldBe 1024 * 1024
41 | }
42 | it should "understand 3500M" in {
43 | ByteSize.parse("3500M") shouldBe 3500L * 1024 * 1024
44 | }
45 | it should "understand 1K" in {
46 | ByteSize.parse("1K") shouldBe 1024
47 | }
48 | it should "understand 5K" in {
49 | ByteSize.parse("5K") shouldBe 5 * 1024
50 | }
51 | it should "reject strings without a unit" in {
52 | an[IllegalArgumentException] should be thrownBy ByteSize.parse("1232")
53 | }
54 |
55 | "Size formatter" should "correctly format" in {
56 | ByteSize.format(1024) shouldBe "1.0 KB"
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/bfg-test/build.sbt:
--------------------------------------------------------------------------------
1 | import Dependencies._
2 |
3 | libraryDependencies ++= Seq(scalatest, jgit, scalaGit, scalaGitTest)
4 |
5 |
--------------------------------------------------------------------------------
/bfg-test/src/main/scala/com/madgag/git/bfg/test/unpackedRepo.scala:
--------------------------------------------------------------------------------
1 | package com.madgag.git.bfg.test
2 |
3 | import com.madgag.git._
4 | import com.madgag.git.test._
5 | import org.eclipse.jgit.internal.storage.file.{FileRepository, GC, ObjectDirectory}
6 | import org.eclipse.jgit.lib.Constants.OBJ_BLOB
7 | import org.eclipse.jgit.lib.{ObjectId, ObjectReader, Repository}
8 | import org.eclipse.jgit.revwalk.{RevCommit, RevTree, RevWalk}
9 | import org.eclipse.jgit.treewalk.TreeWalk
10 | import org.scalatest.Inspectors
11 | import org.scalatest.flatspec.AnyFlatSpec
12 | import org.scalatest.matchers.should.Matchers
13 | import org.scalatest.matchers.{MatchResult, Matcher}
14 |
15 | import scala.jdk.CollectionConverters._
16 |
17 | class unpackedRepo(filePath: String) extends AnyFlatSpec with Matchers {
18 |
19 | implicit val repo: FileRepository = unpackRepo(filePath)
20 | implicit val objectDirectory: ObjectDirectory = repo.getObjectDatabase
21 | implicit lazy val (revWalk: RevWalk, reader: ObjectReader) = repo.singleThreadedReaderTuple
22 |
23 |
24 | def blobOfSize(sizeInBytes: Int): Matcher[ObjectId] = Matcher { (objectId: ObjectId) =>
25 | val objectLoader = objectId.open
26 | val hasThatSize = objectLoader.getType == OBJ_BLOB && objectLoader.getSize == sizeInBytes
27 | def thing(boo: String) = s"${objectId.shortName} $boo size of $sizeInBytes"
28 | MatchResult(hasThatSize, thing("did not have"), thing("had"))
29 | }
30 |
31 | def packedBlobsOfSize(sizeInBytes: Long): Set[ObjectId] = {
32 | implicit val reader: ObjectReader = repo.newObjectReader()
33 | repo.getObjectDatabase.packedObjects.filter { objectId =>
34 | val objectLoader = objectId.open
35 | objectLoader.getType == OBJ_BLOB && objectLoader.getSize == sizeInBytes
36 | }.toSet
37 | }
38 |
39 | def haveFile(name: String): Matcher[ObjectId] = haveTreeEntry(name, !_.isSubtree)
40 |
41 | def haveFolder(name: String): Matcher[ObjectId] = haveTreeEntry(name, _.isSubtree)
42 |
43 | def haveTreeEntry(name: String, p: TreeWalk => Boolean)= new Matcher[ObjectId] {
44 | def apply(treeish: ObjectId) = {
45 | treeOrBlobPointedToBy(treeish.asRevObject) match {
46 | case Right(tree) =>
47 | def thing(boo: String) = s"tree ${treeish.shortName} $boo a '$name' entry"
48 | MatchResult(
49 | treeEntryNames(tree, p).contains(name),
50 | thing("did not contain"),
51 | thing("contained")
52 | )
53 | case Left(blob) =>
54 | MatchResult(
55 | false,
56 | s"blob ${treeish.shortName} was not a tree containing '$name'",
57 | s"""When does this happen??!""""
58 | )
59 | }
60 | }
61 | }
62 |
63 | def treeEntryNames(t: RevTree, p: TreeWalk => Boolean): Seq[String] =
64 | t.walk(postOrderTraversal = true).withFilter(p).map(_.getNameString).toList
65 |
66 | def commitHist(specificRefs: String*)(implicit repo: Repository): Seq[RevCommit] = {
67 | val logCommand = repo.git.log
68 | if (specificRefs.isEmpty) logCommand.all else specificRefs.foldLeft(logCommand)((lc, ref) => lc.add(repo.resolve(ref)))
69 | }.call.asScala.toSeq.reverse
70 |
71 | def haveCommitWhereObjectIds(boom: Matcher[Iterable[ObjectId]])(implicit reader: ObjectReader): Matcher[RevCommit] = boom compose {
72 | (c: RevCommit) => c.getTree.walk().map(_.getObjectId(0)).toSeq
73 | }
74 |
75 | def haveRef(refName: String, objectIdMatcher: Matcher[ObjectId]): Matcher[Repository] = objectIdMatcher compose {
76 | (r: Repository) => r resolve refName // aka s"Ref [$refName]"
77 | }
78 |
79 | def commitHistory(histMatcher: Matcher[Seq[RevCommit]]) = histMatcher compose {
80 | r: Repository => commitHist()(r)
81 | }
82 |
83 | def commitHistoryFor(refs: String*)(histMatcher: Matcher[Seq[RevCommit]]) = histMatcher compose {
84 | r: Repository => commitHist(refs:_*)(r)
85 | }
86 |
87 | def ensureRemovalOfBadEggs[S,T](expr : => Iterable[S], exprResultMatcher: Matcher[Iterable[S]])(block: => T) = {
88 | gc()
89 | expr should exprResultMatcher
90 |
91 | block
92 |
93 | gc()
94 | expr shouldBe empty
95 | }
96 |
97 | def gc() = {
98 | val gc = new GC(repo)
99 | gc.setPackExpireAgeMillis(0)
100 | gc.gc()
101 | }
102 |
103 |
104 | class CheckRemovalFromCommits(commits: => Seq[RevCommit]) extends Inspectors {
105 | def ofCommitsThat[T](commitM: Matcher[RevCommit])(block: => T): Unit = {
106 | forAtLeast(1, commits) { commit =>
107 | commit should commitM
108 | }
109 |
110 | block
111 |
112 | forAll(commits) { commit =>
113 | commit shouldNot commitM
114 | }
115 | }
116 | }
117 |
118 |
119 | def ensureRemovalFrom(commits: => Seq[RevCommit]): CheckRemovalFromCommits = new CheckRemovalFromCommits(commits)
120 |
121 | def ensureInvariantValue[T, S](f: => S)(block: => T) = {
122 | val originalValue = f
123 | block
124 | f should equal(originalValue)
125 | }
126 |
127 | def ensureInvariantCondition[T, S](cond: Matcher[Repository])(block: => T) = {
128 | repo should cond
129 | block
130 | repo should cond
131 | }
132 |
133 | }
134 |
--------------------------------------------------------------------------------
/bfg/build.sbt:
--------------------------------------------------------------------------------
1 | import java.io.{File, FileOutputStream}
2 |
3 | import Dependencies.*
4 | import sbt.taskKey
5 |
6 | import scala.sys.process.Process
7 | import scala.util.Try
8 |
9 | val gitDescription = taskKey[String]("Git description of working dir")
10 |
11 | gitDescription := Try[String](Process("git describe --all --always --dirty --long").lineStream.head.replace("heads/","").replace("-0-g","-")).getOrElse("unknown")
12 |
13 | libraryDependencies += useNewerJava
14 |
15 | mainClass := Some("use.newer.java.Version8")
16 | Compile / packageBin / packageOptions +=
17 | Package.ManifestAttributes( "Main-Class-After-UseNewerJava-Check" -> "com.madgag.git.bfg.cli.Main" )
18 |
19 | // note you don't want the jar name to collide with the non-assembly jar, otherwise confusion abounds.
20 | assembly / assemblyJarName := s"${name.value}-${version.value}-${gitDescription.value}${jgitVersionOverride.map("-jgit-" + _).mkString}.jar"
21 |
22 | assembly / assemblyMergeStrategy := {
23 | case PathList("META-INF", "versions", "9", "module-info.class") => MergeStrategy.discard
24 | case x =>
25 | val oldStrategy = (assembly / assemblyMergeStrategy).value
26 | oldStrategy(x)
27 | }
28 |
29 | buildInfoKeys := Seq[BuildInfoKey](version, scalaVersion, gitDescription)
30 |
31 | buildInfoPackage := "com.madgag.git.bfg"
32 |
33 | crossPaths := false
34 |
35 | Compile / packageBin / publishArtifact := false
36 |
37 | // replace the conventional main artifact with an uber-jar
38 | addArtifact(Compile / packageBin / artifact, assembly)
39 |
40 | val cliUsageDump = taskKey[File]("Dump the CLI 'usage' output to a file")
41 |
42 | cliUsageDump := {
43 | val usageDumpFile = File.createTempFile("bfg-usage", "dump.txt")
44 | val scalaRun = new ForkRun(ForkOptions().withOutputStrategy(CustomOutput(new FileOutputStream(usageDumpFile))))
45 |
46 | val mainClassName = (Compile / run / mainClass).value getOrElse sys.error("No main class detected.")
47 | val classpath = Attributed.data((Runtime / fullClasspath).value)
48 | val args = Seq.empty
49 |
50 | scalaRun.run(mainClassName, classpath, args, streams.value.log).failed foreach (sys error _.getMessage)
51 | usageDumpFile
52 | }
53 |
54 | addArtifact( Artifact("bfg", "usage", "txt"), cliUsageDump )
55 |
56 | libraryDependencies ++= Seq(
57 | scopt,
58 | jgit,
59 | scalaGitTest % "test"
60 | )
61 |
62 | import Tests.*
63 | {
64 | def isolateTestsWhichRequireTheirOwnJvm(tests: Seq[TestDefinition]) = {
65 | val (testsRequiringIsolation, testsNotNeedingIsolation) = tests.partition(_.name.contains("RequiresOwnJvm"))
66 |
67 | val groups: Seq[Seq[TestDefinition]] = testsRequiringIsolation.map(Seq(_)) :+ testsNotNeedingIsolation
68 |
69 | groups map { group =>
70 | Group(group.size.toString, group, SubProcess(ForkOptions()))
71 | }
72 | }
73 |
74 | Test / testGrouping := isolateTestsWhichRequireTheirOwnJvm( (Test / definedTests).value )
75 | }
76 |
77 | Test / fork := true // JGit uses static (ie JVM-wide) config
78 |
79 | Test / logBuffered := false
80 |
81 | Test / parallelExecution := false
82 |
83 |
--------------------------------------------------------------------------------
/bfg/src/main/scala/com/madgag/git/bfg/cli/CLIConfig.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cli
22 |
23 | import com.madgag.git.bfg.BuildInfo
24 | import com.madgag.git.bfg.GitUtil._
25 | import com.madgag.git.bfg.cleaner._
26 | import com.madgag.git.bfg.cleaner.kit.BlobInserter
27 | import com.madgag.git.bfg.cleaner.protection.ProtectedObjectCensus
28 | import com.madgag.git.bfg.model.FileName.ImplicitConversions._
29 | import com.madgag.git.bfg.model.{FileName, Tree, TreeBlobEntry, TreeBlobs, TreeSubtrees}
30 | import com.madgag.git.{SizedObject, _}
31 | import com.madgag.inclusion.{IncExcExpression, _}
32 | import com.madgag.text.ByteSize
33 | import com.madgag.textmatching.{Glob, TextMatcher, TextMatcherType, TextReplacementConfig}
34 | import org.eclipse.jgit.internal.storage.file.FileRepository
35 | import org.eclipse.jgit.lib._
36 | import org.eclipse.jgit.storage.file.FileRepositoryBuilder
37 | import scopt.{OptionParser, Read}
38 |
39 | import java.io.File
40 | import java.nio.file.Files
41 | import scala.jdk.CollectionConverters._
42 |
43 |
44 | object CLIConfig {
45 | val parser = new OptionParser[CLIConfig]("bfg") {
46 |
47 | def fileMatcher(name: String, defaultType: TextMatcherType = Glob) = {
48 | implicit val textMatcherRead: Read[TextMatcher] = Read.reads { TextMatcher(_, defaultType) }
49 |
50 | opt[TextMatcher](name).valueName(s"<${defaultType.expressionPrefix}>").validate { m =>
51 | if (m.expression.contains('/')) {
52 | failure("*** Can only match on filename, NOT path *** - remove '/' path segments")
53 | } else success
54 | }
55 | }
56 |
57 | def readLinesFrom(v: File): Seq[String] = Files.readAllLines(v.toPath).asScala.toSeq
58 |
59 | val exactVersion = BuildInfo.version + (if (BuildInfo.version.contains("-SNAPSHOT")) s" (${BuildInfo.gitDescription})" else "")
60 |
61 | head("bfg", exactVersion)
62 | version("version").hidden()
63 |
64 | opt[String]('b', "strip-blobs-bigger-than").valueName("").text("strip blobs bigger than X (eg '128K', '1M', etc)").action {
65 | (v , c) => c.copy(stripBlobsBiggerThan = Some(ByteSize.parse(v)))
66 | }
67 | opt[Int]('B', "strip-biggest-blobs").valueName("NUM").text("strip the top NUM biggest blobs").action {
68 | (v, c) => c.copy(stripBiggestBlobs = Some(v))
69 | }
70 | opt[File]("strip-blobs-with-ids").abbr("bi").valueName("").text("strip blobs with the specified Git object ids").action {
71 | (v, c) =>
72 | c.copy(stripBlobsWithIds = Some(readLinesFrom(v).map(_.trim).filterNot(_.isEmpty).map(_.asObjectId).toSet))
73 | }
74 | fileMatcher("delete-files").abbr("D").text("delete files with the specified names (eg '*.class', '*.{txt,log}' - matches on file name, not path within repo)").action {
75 | (v, c) => c.copy(deleteFiles = Some(v))
76 | }
77 | fileMatcher("delete-folders").text("delete folders with the specified names (eg '.svn', '*-tmp' - matches on folder name, not path within repo)").action {
78 | (v, c) => c.copy(deleteFolders = Some(v))
79 | }
80 | opt[String]("convert-to-git-lfs").text("extract files with the specified names (eg '*.zip' or '*.mp4') into Git LFS").action {
81 | (v, c) => c.copy(lfsConversion = Some(v))
82 | }
83 | opt[File]("replace-text").abbr("rt").valueName("").text("filter content of files, replacing matched text. Match expressions should be listed in the file, one expression per line - " +
84 | "by default, each expression is treated as a literal, but 'regex:' & 'glob:' prefixes are supported, with '==>' to specify a replacement " +
85 | "string other than the default of '***REMOVED***'.").action {
86 | (v, c) => c.copy(textReplacementExpressions = readLinesFrom(v).filterNot(_.trim.isEmpty))
87 | }
88 | fileMatcher("filter-content-including").abbr("fi").text("do file-content filtering on files that match the specified expression (eg '*.{txt,properties}')").action {
89 | (v, c) => c.copy(filenameFilters = c.filenameFilters :+ Include(v))
90 | }
91 | fileMatcher("filter-content-excluding").abbr("fe").text("don't do file-content filtering on files that match the specified expression (eg '*.{xml,pdf}')").action {
92 | (v, c) => c.copy(filenameFilters = c.filenameFilters :+ Exclude(v))
93 | }
94 | opt[String]("filter-content-size-threshold").abbr("fs").valueName("").text("only do file-content filtering on files smaller than (default is %1$d bytes)".format(CLIConfig().filterSizeThreshold)).action {
95 | (v, c) => c.copy(filterSizeThreshold = ByteSize.parse(v))
96 | }
97 | opt[String]('p', "protect-blobs-from").valueName("").text("protect blobs that appear in the most recent versions of the specified refs (default is 'HEAD')").action {
98 | (v, c) => c.copy(protectBlobsFromRevisions = v.split(',').toSet)
99 | }
100 | opt[Unit]("no-blob-protection").text("allow the BFG to modify even your *latest* commit. Not recommended: you should have already ensured your latest commit is clean.").action {
101 | (_, c) => c.copy(protectBlobsFromRevisions = Set.empty)
102 | }
103 | opt[Unit]("strict-object-checking").text("perform additional checks on integrity of consumed & created objects").hidden().action {
104 | (_, c) => c.copy(strictObjectChecking = true)
105 | }
106 | opt[Unit]("private").text("treat this repo-rewrite as removing private data (for example: omit old commit ids from commit messages)").action {
107 | (_, c) => c.copy(sensitiveData = Some(true))
108 | }
109 | opt[String]("massive-non-file-objects-sized-up-to").valueName("").text("increase memory usage to handle over-size Commits, Tags, and Trees that are up to X in size (eg '10M')").action {
110 | (v, c) => c.copy(massiveNonFileObjects = Some(ByteSize.parse(v)))
111 | }
112 | opt[String]("fix-filename-duplicates-preferring").valueName("").text("Fix corrupt trees which contain multiple entries with the same filename, favouring the 'tree' or 'blob'").hidden().action {
113 | (v, c) =>
114 | val preferredFileMode = v.toLowerCase match {
115 | case "tree" | "folder" => FileMode.TREE
116 | case "blob" | "file" => FileMode.REGULAR_FILE
117 | case other => throw new IllegalArgumentException(s"'$other' should be 'tree' or 'blob'")
118 | }
119 | val ord: Option[Ordering[FileMode]] = Some(Ordering.by[FileMode, Int](filemode => if (filemode==preferredFileMode) 0 else 1))
120 |
121 | c.copy(fixFilenameDuplicatesPreferring = ord)
122 | }
123 | arg[File]("").optional().action { (x, c) =>
124 | c.copy(repoLocation = x) } text("file path for Git repository to clean")
125 | }
126 | }
127 |
128 | case class CLIConfig(stripBiggestBlobs: Option[Int] = None,
129 | stripBlobsBiggerThan: Option[Long] = None,
130 | protectBlobsFromRevisions: Set[String] = Set("HEAD"),
131 | deleteFiles: Option[TextMatcher] = None,
132 | deleteFolders: Option[TextMatcher] = None,
133 | fixFilenameDuplicatesPreferring: Option[Ordering[FileMode]] = None,
134 | filenameFilters: Seq[Filter[String]] = Nil,
135 | filterSizeThreshold: Long = BlobTextModifier.DefaultSizeThreshold,
136 | textReplacementExpressions: Iterable[String] = List.empty,
137 | stripBlobsWithIds: Option[Set[ObjectId]] = None,
138 | lfsConversion: Option[String] = None,
139 | strictObjectChecking: Boolean = false,
140 | sensitiveData: Option[Boolean] = None,
141 | massiveNonFileObjects: Option[Long] = None,
142 | repoLocation: File = new File(System.getProperty("user.dir"))) {
143 |
144 | lazy val gitdir = resolveGitDirFor(repoLocation)
145 |
146 | implicit lazy val repo: FileRepository = FileRepositoryBuilder.create(gitdir.get).asInstanceOf[FileRepository]
147 |
148 | lazy val objectProtection = ProtectedObjectCensus(protectBlobsFromRevisions)
149 |
150 | lazy val objectChecker = if (strictObjectChecking) Some(new ObjectChecker()) else None
151 |
152 | lazy val fileDeletion: Option[Cleaner[TreeBlobs]] = deleteFiles.map {
153 | textMatcher => new FileDeleter(textMatcher)
154 | }
155 |
156 | lazy val folderDeletion: Option[Cleaner[TreeSubtrees]] = deleteFolders.map {
157 | textMatcher => { subtrees: TreeSubtrees =>
158 | TreeSubtrees(subtrees.entryMap.view.filterKeys(filename => !textMatcher(filename)).toMap)
159 | }
160 | }
161 |
162 | lazy val fixFileNameDuplication: Option[Cleaner[Seq[Tree.Entry]]] = fixFilenameDuplicatesPreferring.map {
163 | implicit preferredFileModes =>
164 | { treeEntries: Seq[Tree.Entry] => treeEntries.groupBy(_.name).values.map(_.minBy(_.fileMode)).toSeq }
165 | }
166 |
167 | lazy val lineModifier: Option[String => String] =
168 | TextReplacementConfig(textReplacementExpressions, "***REMOVED***")
169 |
170 | lazy val filterContentPredicate: (FileName => Boolean) = f => IncExcExpression(filenameFilters) includes (f.string)
171 |
172 | lazy val blobTextModifier: Option[BlobTextModifier] = lineModifier.map {
173 | replacer =>
174 | new BlobTextModifier {
175 | override val sizeThreshold = filterSizeThreshold
176 |
177 | def lineCleanerFor(entry: TreeBlobEntry) = if (filterContentPredicate(entry.filename)) Some(replacer) else None
178 |
179 | val threadLocalObjectDBResources = repo.getObjectDatabase.threadLocalResources
180 | }
181 | }
182 |
183 | lazy val lfsBlobConverter: Option[LfsBlobConverter] = lfsConversion.map { lfsGlobExpr =>
184 | new LfsBlobConverter(lfsGlobExpr, repo)
185 | }
186 |
187 | lazy val privateDataRemoval = sensitiveData.getOrElse(Seq(fileDeletion, folderDeletion, blobTextModifier).flatten.nonEmpty)
188 |
189 | lazy val objectIdSubstitutor = if (privateDataRemoval) ObjectIdSubstitutor.OldIdsPrivate else ObjectIdSubstitutor.OldIdsPublic
190 |
191 | lazy val treeEntryListCleaners = fixFileNameDuplication.toSeq
192 |
193 | lazy val commitNodeCleaners = {
194 | lazy val formerCommitFooter = if (privateDataRemoval) None else Some(FormerCommitFooter)
195 |
196 | Seq(new CommitMessageObjectIdsUpdater(objectIdSubstitutor)) ++ formerCommitFooter
197 | }
198 |
199 | lazy val treeBlobCleaners: Seq[Cleaner[TreeBlobs]] = {
200 |
201 | lazy val blobsByIdRemover: Option[BlobRemover] = stripBlobsWithIds.map(new BlobRemover(_))
202 |
203 | lazy val blobRemover: Option[Cleaner[TreeBlobs]] = {
204 | implicit val progressMonitor: ProgressMonitor = new TextProgressMonitor()
205 |
206 | val sizeBasedBlobTargetSources = Seq(
207 | stripBlobsBiggerThan.map(threshold => (s: LazyList[SizedObject]) => s.takeWhile(_.size > threshold)),
208 | stripBiggestBlobs.map(num => (s: LazyList[SizedObject]) => s.take(num))
209 | ).flatten
210 |
211 | if (sizeBasedBlobTargetSources.isEmpty) None else {
212 | val sizedBadIds = sizeBasedBlobTargetSources.flatMap(_(biggestBlobs(repo.getObjectDatabase, progressMonitor))).toSet
213 | if (sizedBadIds.isEmpty) {
214 | println("Warning : no large blobs matching criteria found in packfiles - does the repo need to be packed?")
215 | None
216 | } else {
217 | println("Found " + sizedBadIds.size + " blob ids for large blobs - biggest=" + sizedBadIds.max.size + " smallest=" + sizedBadIds.min.size)
218 | println("Total size (unpacked)=" + sizedBadIds.map(_.size).sum)
219 | Some(new BlobReplacer(sizedBadIds.map(_.objectId), new BlobInserter(repo.getObjectDatabase.threadLocalResources.inserter())))
220 | }
221 | }
222 | }
223 |
224 | Seq(blobsByIdRemover, blobRemover, fileDeletion, blobTextModifier, lfsBlobConverter).flatten
225 | }
226 |
227 | lazy val definesNoWork = treeBlobCleaners.isEmpty && folderDeletion.isEmpty && treeEntryListCleaners.isEmpty
228 |
229 | def objectIdCleanerConfig: ObjectIdCleaner.Config =
230 | ObjectIdCleaner.Config(
231 | objectProtection,
232 | objectIdSubstitutor,
233 | commitNodeCleaners,
234 | treeEntryListCleaners,
235 | treeBlobCleaners,
236 | folderDeletion.toSeq,
237 | objectChecker
238 | )
239 |
240 | def describe = {
241 | if (privateDataRemoval) {
242 | "is removing private data, so the '" + FormerCommitFooter.Key + "' footer will not be added to commit messages."
243 | } else {
244 | "is only removing non-private data (eg, blobs that are just big, not private) : '" + FormerCommitFooter.Key + "' footer will be added to commit messages."
245 | }
246 | }
247 | }
248 |
--------------------------------------------------------------------------------
/bfg/src/main/scala/com/madgag/git/bfg/cli/Main.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cli
22 |
23 | import com.madgag.git._
24 | import com.madgag.git.bfg.GitUtil._
25 | import com.madgag.git.bfg.cleaner._
26 |
27 | object Main extends App {
28 |
29 | if (args.isEmpty) {
30 | CLIConfig.parser.showUsage()
31 | } else {
32 |
33 | CLIConfig.parser.parse(args, CLIConfig()) map {
34 | config =>
35 |
36 | tweakStaticJGitConfig(config.massiveNonFileObjects)
37 |
38 | if (config.gitdir.isEmpty) {
39 | CLIConfig.parser.showUsage()
40 | Console.err.println("Aborting : " + config.repoLocation + " is not a valid Git repository.\n")
41 | } else {
42 | implicit val repo = config.repo
43 |
44 | println("\nUsing repo : " + repo.getDirectory.getAbsolutePath + "\n")
45 |
46 | // do this before implicitly initiating big-blob search
47 | if (hasBeenProcessedByBFGBefore(repo)) {
48 | println("\nThis repo has been processed by The BFG before! Will prune repo before proceeding - to avoid unnecessary cleaning work on unused objects...")
49 | repo.git.gc.call()
50 | println("Completed prune of old objects - will now proceed with the main job!\n")
51 | }
52 |
53 | if (config.definesNoWork) {
54 | Console.err.println("Please specify tasks for The BFG :")
55 | CLIConfig.parser.showUsage()
56 | } else {
57 | println("Found " + config.objectProtection.fixedObjectIds.size + " objects to protect")
58 |
59 | RepoRewriter.rewrite(repo, config.objectIdCleanerConfig)
60 | repo.close()
61 | }
62 | }
63 | }
64 | }
65 |
66 | }
--------------------------------------------------------------------------------
/bfg/src/test/resources/sample-repos/annotatedTagExample.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/annotatedTagExample.git.zip
--------------------------------------------------------------------------------
/bfg/src/test/resources/sample-repos/badEncoding.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/badEncoding.git.zip
--------------------------------------------------------------------------------
/bfg/src/test/resources/sample-repos/badRepoContainingDotGitFolder.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/badRepoContainingDotGitFolder.git.zip
--------------------------------------------------------------------------------
/bfg/src/test/resources/sample-repos/branchNameWithASlash.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/branchNameWithASlash.git.zip
--------------------------------------------------------------------------------
/bfg/src/test/resources/sample-repos/corruptTreeDupFileName.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/corruptTreeDupFileName.git.zip
--------------------------------------------------------------------------------
/bfg/src/test/resources/sample-repos/example.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/example.git.zip
--------------------------------------------------------------------------------
/bfg/src/test/resources/sample-repos/exampleWithInitialCleanHistory.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/exampleWithInitialCleanHistory.git.zip
--------------------------------------------------------------------------------
/bfg/src/test/resources/sample-repos/folder-example.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/folder-example.git.zip
--------------------------------------------------------------------------------
/bfg/src/test/resources/sample-repos/huge10MBCommitMessage.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/huge10MBCommitMessage.git.zip
--------------------------------------------------------------------------------
/bfg/src/test/resources/sample-repos/moreThanOneBigBlobWithTheSameSize.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/moreThanOneBigBlobWithTheSameSize.git.zip
--------------------------------------------------------------------------------
/bfg/src/test/resources/sample-repos/repoWithBigBlobs.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/repoWithBigBlobs.git.zip
--------------------------------------------------------------------------------
/bfg/src/test/resources/sample-repos/unwantedSubmodule.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/unwantedSubmodule.git.zip
--------------------------------------------------------------------------------
/bfg/src/test/resources/sample-repos/usedToHaveASubmodule.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/usedToHaveASubmodule.git.zip
--------------------------------------------------------------------------------
/bfg/src/test/scala/com/madgag/git/bfg/cli/CLIConfigSpecs.scala:
--------------------------------------------------------------------------------
1 | package com.madgag.git.bfg.cli
2 |
3 | import com.madgag.git.bfg.model.FileName
4 | import org.scalatest.flatspec.AnyFlatSpec
5 | import org.scalatest.matchers.should.Matchers
6 |
7 | class CLIConfigSpecs extends AnyFlatSpec with Matchers {
8 |
9 |
10 | def parse(args: String) = CLIConfig.parser.parse(args.split(' ') :+ "my-repo.git", CLIConfig()).get.filterContentPredicate
11 |
12 | "CLI config" should "understand lone include" in {
13 | val predicate = parse("-fi *.txt")
14 | predicate(FileName("panda")) shouldBe false
15 | predicate(FileName("foo.txt")) shouldBe true
16 | predicate(FileName("foo.java")) shouldBe false
17 | }
18 |
19 | it should "understand lone exclude" in {
20 | val predicate = parse("-fe *.txt")
21 | predicate(FileName("panda")) shouldBe true
22 | predicate(FileName("foo.txt")) shouldBe false
23 | predicate(FileName("foo.java")) shouldBe true
24 | }
25 |
26 | it should "understand include followed by exclude" in {
27 | val predicate = parse("-fi *.txt -fe Poison.*")
28 | predicate(FileName("panda")) shouldBe false
29 | predicate(FileName("foo.txt")) shouldBe true
30 | predicate(FileName("foo.java")) shouldBe false
31 | predicate(FileName("Poison.txt")) shouldBe false
32 | }
33 |
34 | it should "understand exclude followed by include" in {
35 | val predicate = parse("-fe *.xml -fi hbm.xml")
36 | predicate(FileName("panda")) shouldBe true
37 | predicate(FileName("foo.xml")) shouldBe false
38 | predicate(FileName("hbm.xml")) shouldBe true
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/bfg/src/test/scala/com/madgag/git/bfg/cli/MainSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cli
22 |
23 | import com.madgag.git._
24 | import com.madgag.git.bfg.cli.test.unpackedRepo
25 | import com.madgag.git.bfg.model._
26 | import org.eclipse.jgit.lib.{ObjectId, ObjectReader}
27 | import org.scalatest.flatspec.AnyFlatSpec
28 | import org.scalatest.matchers.should.Matchers
29 | import org.scalatest.{Inspectors, OptionValues}
30 |
31 | import java.nio.file.Files
32 | import scala.jdk.CollectionConverters._
33 |
34 | class MainSpec extends AnyFlatSpec with Matchers with OptionValues with Inspectors {
35 |
36 | // concurrent testing against scala.App is not safe https://twitter.com/rtyley/status/340376844916387840
37 |
38 | "CLI" should "not change commits unnecessarily" in new unpackedRepo("/sample-repos/exampleWithInitialCleanHistory.git.zip") {
39 | implicit val r: ObjectReader = reader
40 |
41 | ensureInvariantValue(commitHist() take 2) {
42 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveCommitWhereObjectIds(contain(abbrId("294f")))) {
43 | run("--strip-blobs-bigger-than 1K")
44 | }
45 | }
46 | }
47 |
48 |
49 | "removing empty trees" should "work" in new unpackedRepo("/sample-repos/folder-example.git.zip") {
50 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveFolder("secret-files")) {
51 | run("--delete-files {credentials,passwords}.txt")
52 | }
53 | }
54 |
55 | "removing big blobs" should "definitely still remove blobs even if they have identical size" in new unpackedRepo("/sample-repos/moreThanOneBigBlobWithTheSameSize.git.zip") {
56 | ensureRemovalOfBadEggs(packedBlobsOfSize(1024), (contain allElementsOf Set(abbrId("06d7"), abbrId("cb2c"))).matcher[Iterable[ObjectId]]) {
57 | run("--strip-blobs-bigger-than 512B")
58 | }
59 | }
60 |
61 | "converting to Git LFS" should "create a file in lfs/objects" in new unpackedRepo("/sample-repos/repoWithBigBlobs.git.zip") {
62 | ensureRemovalOfBadEggs(packedBlobsOfSize(11238), (contain only abbrId("596c")).matcher[Iterable[ObjectId]]) {
63 | run("--convert-to-git-lfs *.png --no-blob-protection")
64 | }
65 | val lfsFile = repo.getDirectory.toPath.resolve(Seq("lfs", "objects", "e0", "eb", "e0ebd49837a1cced34b9e7d3ff2fa68a8100df8f158f165ce139e366a941ba6e"))
66 |
67 | Files.size(lfsFile) shouldBe 11238
68 | }
69 |
70 | "removing a folder named '.git'" should "work" in new unpackedRepo("/sample-repos/badRepoContainingDotGitFolder.git.zip") {
71 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveFolder(".git")) {
72 | run("--delete-folders .git --no-blob-protection")
73 | }
74 | }
75 |
76 | "cleaning" should "not crash encountering a protected an annotated tag" in new unpackedRepo("/sample-repos/annotatedTagExample.git.zip") {
77 | ensureInvariantCondition(haveRef("chapter1", haveFile("chapter1.txt"))) {
78 | ensureRemovalFrom(commitHist("master")).ofCommitsThat(haveFile("chapter2.txt")) {
79 | run("--strip-blobs-bigger-than 10B --protect-blobs-from chapter1")
80 | }
81 | }
82 | }
83 |
84 | "cleaning" should "not crash encountering a protected branch containing a slash in it's name" in new unpackedRepo("/sample-repos/branchNameWithASlash.git.zip") {
85 | ensureInvariantCondition(haveRef("feature/slashes-are-ugly", haveFile("bar"))) {
86 | ensureRemovalFrom(commitHist("master")).ofCommitsThat(haveFile("bar")) {
87 | run("--delete-files bar --protect-blobs-from feature/slashes-are-ugly")
88 | }
89 | }
90 | }
91 |
92 | "strip blobs by id" should "work" in new unpackedRepo("/sample-repos/example.git.zip") {
93 | implicit val r: ObjectReader = reader
94 |
95 | val badBlobs = Set(abbrId("db59"), abbrId("86f9"))
96 | val blobIdsFile = Files.createTempFile("test-strip-blobs",".ids")
97 | Files.write(blobIdsFile, badBlobs.map(_.name()).asJava)
98 |
99 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveCommitWhereObjectIds(contain(abbrId("db59")))) {
100 | run(s"--strip-blobs-with-ids $blobIdsFile")
101 | }
102 | }
103 |
104 | "deleting a folder" should "not crash encountering a submodule" in new unpackedRepo("/sample-repos/usedToHaveASubmodule.git.zip") {
105 | ensureInvariantCondition(haveRef("master", haveFile("alpha"))) {
106 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveFolder("shared")) {
107 | run("--delete-folders shared")
108 | }
109 | }
110 | }
111 |
112 | "deleting" should "not crash encountering a protected submodule" in new unpackedRepo("/sample-repos/unwantedSubmodule.git.zip") {
113 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveFile("foo.txt")) {
114 | run("--delete-folders bar --delete-files foo.txt")
115 | }
116 | }
117 |
118 | "deleting" should "not crash on encountering a commit with bad encoding header" in new unpackedRepo("/sample-repos/badEncoding.git.zip") {
119 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveFile("test.txt")) {
120 | run("--no-blob-protection --delete-files test.txt")
121 | }
122 | }
123 |
124 | "Corrupt trees containing duplicate filenames" should "be cleaned by removing the file with the duplicate FileName, leaving the folder" in new unpackedRepo("/sample-repos/corruptTreeDupFileName.git.zip") {
125 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveFile("2.0.0")) {
126 | run("--fix-filename-duplicates-preferring tree")
127 | }
128 | }
129 | }
130 |
131 |
--------------------------------------------------------------------------------
/bfg/src/test/scala/com/madgag/git/bfg/cli/MassiveNonFileObjectsRequiresOwnJvmSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cli
22 |
23 | import com.madgag.git.bfg.cli.test.unpackedRepo
24 | import org.scalatest.flatspec.AnyFlatSpec
25 | import org.scalatest.matchers.should.Matchers
26 |
27 | // JGit has JVM-wide configuration for cache window size: https://git.eclipse.org/r/#/q/Ibf2ef604bac08885b2b3bd85f0dc31995132b682,n,z
28 | class MassiveNonFileObjectsRequiresOwnJvmSpec extends AnyFlatSpec with Matchers {
29 |
30 | // concurrent testing against scala.App is not safe https://twitter.com/rtyley/status/340376844916387840
31 |
32 | "Massive commit messages" should "be handled without crash (ie LargeObjectException) if the user specifies that the repo contains massive non-file objects" in
33 | new unpackedRepo("/sample-repos/huge10MBCommitMessage.git.zip") {
34 | ensureRemovalFrom(commitHist("master")).ofCommitsThat(haveFile("16-kb-zeros")) {
35 | run("--strip-blobs-bigger-than 1K --massive-non-file-objects-sized-up-to 20M")
36 | }
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/bfg/src/test/scala/com/madgag/git/bfg/cli/test/unpackedRepo.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012, 2013 Roberto Tyley
3 | *
4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large
5 | * or troublesome blobs from Git repositories.
6 | *
7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program. If not, see http://www.gnu.org/licenses/ .
19 | */
20 |
21 | package com.madgag.git.bfg.cli.test
22 |
23 | import com.madgag.git.bfg
24 | import com.madgag.git.bfg.cli.Main
25 |
26 | class unpackedRepo(filePath: String) extends bfg.test.unpackedRepo(filePath) {
27 | def run(options: String): Unit = {
28 | Main.main(options.split(' ') :+ repo.getDirectory.getAbsolutePath)
29 | }
30 | }
--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
1 | import ReleaseTransformations.*
2 | import Dependencies.*
3 |
4 | ThisBuild / organization := "com.madgag"
5 |
6 | ThisBuild / scalaVersion := "2.13.16"
7 |
8 | ThisBuild / scalacOptions ++= Seq("-deprecation", "-feature", "-language:postfixOps", "-release:11")
9 |
10 | ThisBuild / licenses := Seq(License.GPL3_or_later)
11 |
12 | ThisBuild / resolvers ++= jgitVersionOverride.map(_ => Resolver.mavenLocal).toSeq
13 |
14 | ThisBuild / libraryDependencies += scalatest % Test
15 |
16 | ThisBuild / Test/ testOptions += Tests.Argument(
17 | TestFrameworks.ScalaTest,
18 | "-u", s"test-results/scala-${scalaVersion.value}"
19 | )
20 |
21 | lazy val root = Project(id = "bfg-parent", base = file(".")).aggregate (bfg, `bfg-test`, `bfg-library`).settings(
22 | publish / skip := true,
23 | releaseCrossBuild := true, // true if you cross-build the project for multiple Scala versions
24 | releaseProcess := Seq[ReleaseStep](
25 | checkSnapshotDependencies,
26 | inquireVersions,
27 | runClean,
28 | runTest,
29 | setReleaseVersion,
30 | commitReleaseVersion,
31 | tagRelease,
32 | setNextVersion,
33 | commitNextVersion
34 | )
35 | )
36 |
37 | lazy val `bfg-test` = project
38 |
39 | lazy val `bfg-library` = project.dependsOn(`bfg-test` % Test)
40 |
41 | lazy val bfg = project.enablePlugins(BuildInfoPlugin).dependsOn(`bfg-library`, `bfg-test` % Test)
42 |
43 | lazy val `bfg-benchmark` = project
44 |
--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.10.7
2 |
--------------------------------------------------------------------------------
/project/dependencies.scala:
--------------------------------------------------------------------------------
1 | import sbt._
2 |
3 | object Dependencies {
4 |
5 | val scalaGitVersion = "5.0.3"
6 |
7 | val jgitVersionOverride = Option(System.getProperty("jgit.version"))
8 |
9 | val jgitVersion = jgitVersionOverride.getOrElse("6.10.0.202406032230-r")
10 |
11 | val jgit = "org.eclipse.jgit" % "org.eclipse.jgit" % jgitVersion
12 |
13 | // this matches slf4j-api in jgit's dependencies
14 | val slf4jSimple = "org.slf4j" % "slf4j-simple" % "1.7.36"
15 |
16 | val scalaCollectionPlus = "com.madgag" %% "scala-collection-plus" % "0.11"
17 |
18 | val parCollections = "org.scala-lang.modules" %% "scala-parallel-collections" % "1.2.0"
19 |
20 | val scalaGit = "com.madgag.scala-git" %% "scala-git" % scalaGitVersion exclude("org.eclipse.jgit", "org.eclipse.jgit")
21 |
22 | val scalaGitTest = "com.madgag.scala-git" %% "scala-git-test" % scalaGitVersion
23 |
24 | val scalatest = "org.scalatest" %% "scalatest" % "3.2.19"
25 |
26 | val madgagCompress = "com.madgag" % "util-compress" % "1.35"
27 |
28 | val textmatching = "com.madgag" %% "scala-textmatching" % "2.8"
29 |
30 | val scopt = "com.github.scopt" %% "scopt" % "3.7.1"
31 |
32 | val guava = Seq("com.google.guava" % "guava" % "33.4.0-jre", "com.google.code.findbugs" % "jsr305" % "3.0.2")
33 |
34 | val useNewerJava = "com.madgag" % "use-newer-java" % "1.0.2"
35 |
36 | val lineSplitting = "com.madgag" %% "line-break-preserving-line-splitting" % "0.1.6"
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.github.sbt" % "sbt-release" % "1.4.0")
2 |
3 | addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.12.2")
4 |
5 | addSbtPlugin("ch.epfl.scala" % "sbt-version-policy" % "3.2.1")
6 |
7 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.3.0")
8 |
9 | addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.13.1")
10 |
11 | addDependencyTreePlugin
--------------------------------------------------------------------------------
/version.sbt:
--------------------------------------------------------------------------------
1 | ThisBuild / version := "1.15.1-SNAPSHOT"
2 |
--------------------------------------------------------------------------------