├── .github ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── ci.yml │ └── release.yml ├── .gitignore ├── .idea ├── copyright │ ├── BFG_GPL_v3.xml │ └── profiles_settings.xml └── scopes │ ├── Files_for_Copyright.xml │ └── scope_settings.xml ├── .tool-versions ├── BUILD.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── backers.md ├── bfg-benchmark ├── build.sbt ├── resources │ ├── jars │ │ └── grabJars.sh │ └── repos │ │ ├── chromium-src │ │ └── commands │ │ │ └── issue-23 │ │ │ └── bfg.txt │ │ ├── gcc │ │ └── commands │ │ │ └── delete-file │ │ │ ├── bfg.txt │ │ │ └── gfb.txt │ │ ├── git │ │ └── commands │ │ │ └── delete-file │ │ │ ├── bfg.txt │ │ │ └── gfb.txt │ │ ├── github-gem │ │ └── commands │ │ │ └── delete-file │ │ │ ├── bfg.txt │ │ │ └── gfb.txt │ │ ├── intellij │ │ └── commands │ │ │ ├── delete-binary-resources │ │ │ └── bfg.txt │ │ │ ├── delete-file │ │ │ ├── bfg.txt │ │ │ └── gfb.txt │ │ │ └── git-lfs-binary-resources │ │ │ └── bfg.txt │ │ ├── jgit │ │ └── commands │ │ │ ├── delete-file │ │ │ ├── bfg.txt │ │ │ └── gfb.txt │ │ │ ├── replace-1-existing-string │ │ │ ├── bfg.txt │ │ │ └── passwords.1-existing-string.txt │ │ │ ├── replace-20-existing-strings │ │ │ ├── bfg.txt │ │ │ └── passwords.20-existing-strings.txt │ │ │ └── replace-500-existing-strings │ │ │ ├── bfg.txt │ │ │ └── passwords.500-existing-strings.txt │ │ ├── linux │ │ └── commands │ │ │ └── delete-file │ │ │ ├── bfg.txt │ │ │ └── gfb.txt │ │ ├── rails │ │ └── commands │ │ │ └── delete-file │ │ │ ├── bfg.txt │ │ │ └── gfb.txt │ │ └── wine │ │ └── commands │ │ └── delete-file │ │ ├── bfg.txt │ │ └── gfb.txt └── src │ ├── main │ └── scala │ │ ├── Benchmark.scala │ │ ├── BenchmarkConfig.scala │ │ ├── JavaVersion.scala │ │ ├── lib │ │ ├── Repo.scala │ │ └── Timing.scala │ │ └── model │ │ ├── BFGJar.scala │ │ ├── InvocableEngine.scala │ │ ├── InvocableEngineSet.scala │ │ └── Java.scala │ └── test │ └── scala │ └── JavaVersionSpec.scala ├── bfg-library ├── build.sbt └── src │ ├── main │ └── scala │ │ └── com │ │ └── madgag │ │ ├── collection │ │ └── concurrent │ │ │ ├── ConcurrentMultiMap.scala │ │ │ └── ConcurrentSet.scala │ │ ├── git │ │ ├── LFS.scala │ │ └── bfg │ │ │ ├── GitUtil.scala │ │ │ ├── cleaner │ │ │ ├── BlobCharsetDetector.scala │ │ │ ├── BlobTextModifier.scala │ │ │ ├── LfsBlobConverter.scala │ │ │ ├── ObjectIdCleaner.scala │ │ │ ├── ObjectIdSubstitutor.scala │ │ │ ├── RepoRewriter.scala │ │ │ ├── Reporter.scala │ │ │ ├── TreeBlobModifier.scala │ │ │ ├── commits.scala │ │ │ ├── kit │ │ │ │ └── BlobInserter.scala │ │ │ ├── package.scala │ │ │ ├── protection │ │ │ │ ├── ProtectedObjectCensus.scala │ │ │ │ └── ProtectedObjectDirtReport.scala │ │ │ └── treeblobs.scala │ │ │ ├── memo.scala │ │ │ ├── model │ │ │ ├── Commit.scala │ │ │ ├── Footer.scala │ │ │ └── package.scala │ │ │ └── timing.scala │ │ ├── inclusion │ │ └── inclusion.scala │ │ └── text │ │ ├── ByteSize.scala │ │ ├── Tables.scala │ │ └── text.scala │ └── test │ ├── resources │ └── sample-repos │ │ ├── deep-history.zip │ │ ├── encodings.git.zip │ │ ├── example.git.zip │ │ ├── exampleWithInitialCleanHistory.git.zip │ │ ├── folder-example.git.zip │ │ ├── footers.git.zip │ │ └── taleOfTwoBranches.git.zip │ └── scala │ └── com │ └── madgag │ ├── git │ ├── LFSSpec.scala │ └── bfg │ │ ├── GitUtilSpec.scala │ │ ├── MessageFooterSpec.scala │ │ ├── TreeEntrySpec.scala │ │ ├── cleaner │ │ ├── LfsBlobConverterSpec.scala │ │ ├── ObjectIdCleanerSpec.scala │ │ ├── ObjectIdSubstitutorSpec.scala │ │ ├── RepoRewriteSpec.scala │ │ └── TreeBlobModifierSpec.scala │ │ └── model │ │ └── CommitSpec.scala │ └── text │ └── ByteSizeSpecs.scala ├── bfg-test ├── build.sbt └── src │ └── main │ └── scala │ └── com │ └── madgag │ └── git │ └── bfg │ └── test │ └── unpackedRepo.scala ├── bfg ├── build.sbt └── src │ ├── main │ └── scala │ │ └── com │ │ └── madgag │ │ └── git │ │ └── bfg │ │ └── cli │ │ ├── CLIConfig.scala │ │ └── Main.scala │ └── test │ ├── resources │ └── sample-repos │ │ ├── annotatedTagExample.git.zip │ │ ├── badEncoding.git.zip │ │ ├── badRepoContainingDotGitFolder.git.zip │ │ ├── branchNameWithASlash.git.zip │ │ ├── corruptTreeDupFileName.git.zip │ │ ├── example.git.zip │ │ ├── exampleWithInitialCleanHistory.git.zip │ │ ├── folder-example.git.zip │ │ ├── huge10MBCommitMessage.git.zip │ │ ├── moreThanOneBigBlobWithTheSameSize.git.zip │ │ ├── repoWithBigBlobs.git.zip │ │ ├── unwantedSubmodule.git.zip │ │ └── usedToHaveASubmodule.git.zip │ └── scala │ └── com │ └── madgag │ └── git │ └── bfg │ └── cli │ ├── CLIConfigSpecs.scala │ ├── MainSpec.scala │ ├── MassiveNonFileObjectsRequiresOwnJvmSpec.scala │ └── test │ └── unpackedRepo.scala ├── build.sbt ├── project ├── build.properties ├── dependencies.scala └── plugins.sbt └── version.sbt /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | I assert that this patch is my own work, and to [simplify the licensing of the BFG Repo-Cleaner](https://github.com/rtyley/bfg-repo-cleaner/blob/master/CONTRIBUTING.md#pull-requests): 2 | 3 | _(choose 1 of these 2 options)_ 4 | 5 | - [ ] I assign the copyright on this contribution to Roberto Tyley 6 | - [ ] I disclaim copyright and thus place this contribution in the public domain 7 | 8 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | workflow_dispatch: 4 | pull_request: 5 | 6 | # triggering CI default branch improves caching 7 | # see https://docs.github.com/en/free-pro-team@latest/actions/guides/caching-dependencies-to-speed-up-workflows#restrictions-for-accessing-a-cache 8 | push: 9 | branches: 10 | - main 11 | 12 | jobs: 13 | test: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: guardian/setup-scala@v1 18 | - name: Build and Test 19 | run: sbt -v test 20 | - name: Test Summary 21 | uses: test-summary/action@v2 22 | with: 23 | paths: "test-results/**/TEST-*.xml" 24 | if: always() -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | release: 8 | uses: guardian/gha-scala-library-release-workflow/.github/workflows/reusable-release.yml@v1 9 | permissions: { contents: write, pull-requests: write } 10 | with: 11 | GITHUB_APP_ID: 930725 12 | SONATYPE_PROFILE_NAME: 'com.madgag' 13 | SONATYPE_CREDENTIAL_HOST: 's01.oss.sonatype.org' 14 | secrets: 15 | SONATYPE_TOKEN: ${{ secrets.AUTOMATED_MAVEN_RELEASE_SONATYPE_TOKEN }} 16 | PGP_PRIVATE_KEY: ${{ secrets.AUTOMATED_MAVEN_RELEASE_PGP_SECRET }} 17 | GITHUB_APP_PRIVATE_KEY: ${{ secrets.AUTOMATED_MAVEN_RELEASE_GITHUB_APP_PRIVATE_KEY }} 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | *~ 3 | .idea 4 | .idea_modules 5 | *.iml 6 | *.jar 7 | repo.git.zip 8 | **/.project 9 | **/.classpath 10 | **/.settings 11 | .bsp 12 | 13 | .DS_Store 14 | test-results/ 15 | -------------------------------------------------------------------------------- /.idea/copyright/BFG_GPL_v3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | -------------------------------------------------------------------------------- /.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/scopes/Files_for_Copyright.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.idea/scopes/scope_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.tool-versions: -------------------------------------------------------------------------------- 1 | java corretto-11.0.25.9.1 2 | -------------------------------------------------------------------------------- /BUILD.md: -------------------------------------------------------------------------------- 1 | The BFG is written in Scala, a modern functional language that runs on the JVM - so it 2 | can run anywhere Java can. 3 | 4 | Here's a rough set of instructions for building the BFG, if you don't want to use the 5 | pre-built [downloads](http://rtyley.github.io/bfg-repo-cleaner/#download): 6 | 7 | * Install Java JDK 11 or above 8 | * Install [sbt](https://www.scala-sbt.org/1.x/docs/Setup.html) 9 | * `git clone git@github.com:rtyley/bfg-repo-cleaner.git` 10 | * `cd bfg-repo-cleaner` 11 | * `sbt`<- start the sbt console 12 | * `bfg/assembly` <- download dependencies, run the tests, build the jar 13 | 14 | To find the jar once it's built, just look at the last few lines of output from the 15 | `assembly` task - it'll say something like this: 16 | 17 | ``` 18 | [info] Packaging /Users/roberto/development/bfg-repo-cleaner/bfg/target/bfg-1.11.9-SNAPSHOT-master-21d2115.jar ... 19 | [info] Done packaging. 20 | [success] Total time: 19 s, completed 26-Sep-2014 16:05:11 21 | ``` 22 | 23 | If you're going to make changes to the Scala code, you may want to use IntelliJ and it's Scala 24 | plugin to help with the Scala syntax...! 25 | 26 | If you use [Eclipse IDE](http://www.eclipse.org/), you can set-up your development environment by following these instructions: 27 | 28 | * Install `sbt` and build as-above 29 | * Install [Scala IDE for Eclipse](http://scala-ide.org/) into your Eclipse installation if not already installed 30 | * Add the `sbteclipse-plugin` to your set of local sbt plugins: 31 | 32 | ``` 33 | mkdir -p ~/.sbt/1.0/plugins && tee ~/.sbt/1.0/plugins/plugins.sbt < Import -> Existing Projects into Workspace`, browse to your `bfg` working-copy, and ensure that you select `Search for nested projects` 42 | * You should now have the 4 `sbt` projects imported into your Eclipse workspace. 43 | 44 | I personally found Coursera's [online Scala course](https://www.coursera.org/course/progfun) very helpful in 45 | learning Scala, YMMV. 46 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Issues and Questions 2 | -------------------- 3 | 4 | If you've found what looks like a bug, or have a feature request for the BFG, please check 5 | [issues on GitHub](https://github.com/rtyley/bfg-repo-cleaner/issues), and create a new issue 6 | if necessary. 7 | 8 | If you just have a general question, or there's something you don't understand, ask on [stackoverflow.com](http://stackoverflow.com/questions/ask) (tag it with [`git-rewrite-history`](http://stackoverflow.com/questions/tagged/git-rewrite-history) and 9 | [`bfg-repo-cleaner`](http://stackoverflow.com/questions/tagged/bfg-repo-cleaner) so I see it) - there are 10 | many more people who can answer that sort of question on Stackoverflow, you stand a good chance 11 | of getting your question answered quicker! 12 | 13 | Pull Requests 14 | ------------- 15 | 16 | BFG Repo-Cleaner is licensed under the [GPL v3](http://www.gnu.org/licenses/gpl.html), and to be in the best position to enforce the GPL the copyright status of BFG Repo Cleaner needs to be as simple as possible. To achieve this, contributors should only provide contributions which are **their own work**, and either: 17 | 18 | a) Assign the copyright on the contribution to myself, Roberto Tyley 19 | 20 | **or** 21 | 22 | b) Disclaim copyright on it and thus put it in the public domain 23 | 24 | **Please specify which option you want to use when creating your pull request.** 25 | 26 | See the [GNU FAQ](http://www.gnu.org/licenses/gpl-faq.html#AssignCopyright) for a fuller explanation of the need for this. If you still want to retain copyright on your contribution, let me know and I'll see if we can work something out. 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | BFG Repo-Cleaner 2 | ================ 3 | 4 | [![CI](https://github.com/rtyley/bfg-repo-cleaner/actions/workflows/ci.yml/badge.svg)](https://github.com/rtyley/bfg-repo-cleaner/actions/workflows/ci.yml) 5 | [![Release](https://github.com/rtyley/bfg-repo-cleaner/actions/workflows/release.yml/badge.svg)](https://github.com/rtyley/bfg-repo-cleaner/actions/workflows/release.yml) 6 | 7 | _Removes large or troublesome blobs like git-filter-branch does, but faster - and written in Scala_ - [Fund the BFG](https://j.mp/fund-bfg) 8 | 9 | ``` 10 | $ bfg --strip-blobs-bigger-than 1M --replace-text banned.txt repo.git 11 | ``` 12 | 13 | The BFG is a simpler, faster ([10 - 720x](https://docs.google.com/spreadsheet/ccc?key=0AsR1d5Zpes8HdER3VGU1a3dOcmVHMmtzT2dsS2xNenc) faster) 14 | alternative to `git-filter-branch` for cleansing bad data out of your Git repository: 15 | 16 | * Removing **Crazy Big Files** 17 | * Removing **Passwords, Credentials** & other **Private data** 18 | 19 | Main documentation for The BFG is here : **https://rtyley.github.io/bfg-repo-cleaner/** 20 | -------------------------------------------------------------------------------- /backers.md: -------------------------------------------------------------------------------- 1 | Many thanks to supporters of the BFG! 2 | ----- 3 | 4 | Contribute towards the open-source development of the BFG on [**BountySource**](https://www.bountysource.com/teams/bfg-repo-cleaner) 5 | 6 | * [Thomas Ferris Nicolaisen](http://www.tfnico.com/) - host of the excellent [GitMinutes](http://www.gitminutes.com) podcast 7 | * [Alec Clews](https://alecthegeek.github.io/) 8 | * [ramtej](https://github.com/ramtej) 9 | -------------------------------------------------------------------------------- /bfg-benchmark/build.sbt: -------------------------------------------------------------------------------- 1 | import Dependencies.* 2 | 3 | libraryDependencies ++= guava ++ Seq( 4 | madgagCompress, 5 | textmatching, 6 | scopt 7 | ) -------------------------------------------------------------------------------- /bfg-benchmark/resources/jars/grabJars.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for i in 4.0 5.0 6.0 7.0 12.0 13.0 13.1 13.2 3 | do 4 | VERSION="1.$i" 5 | curl -O "https://repo1.maven.org/maven2/com/madgag/bfg/$VERSION/bfg-$VERSION.jar" 6 | done 7 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/chromium-src/commands/issue-23/bfg.txt: -------------------------------------------------------------------------------- 1 | --delete-files *.{52,50,crx,xib,png,pdf,jpg,zip,jar,pdb,psd,jpeg,dylib,dll,DLL,exe,EXE,vcproj,so,sln,scons,nib,graffle,yuv,webm} 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/gcc/commands/delete-file/bfg.txt: -------------------------------------------------------------------------------- 1 | -D README-fixinc 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/gcc/commands/delete-file/gfb.txt: -------------------------------------------------------------------------------- 1 | --index-filter 2 | git rm --cached --ignore-unmatch gcc/README-fixinc 3 | --prune-empty 4 | --tag-name-filter 5 | cat 6 | -- 7 | --all 8 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/git/commands/delete-file/bfg.txt: -------------------------------------------------------------------------------- 1 | -D object.c 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/git/commands/delete-file/gfb.txt: -------------------------------------------------------------------------------- 1 | --index-filter 2 | git rm --cached --ignore-unmatch object.c 3 | --prune-empty 4 | --tag-name-filter 5 | cat 6 | -- 7 | --all 8 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/github-gem/commands/delete-file/bfg.txt: -------------------------------------------------------------------------------- 1 | -D Rakefile 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/github-gem/commands/delete-file/gfb.txt: -------------------------------------------------------------------------------- 1 | --index-filter 2 | git rm --cached --ignore-unmatch Rakefile 3 | --prune-empty 4 | --tag-name-filter 5 | cat 6 | -- 7 | --all 8 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/intellij/commands/delete-binary-resources/bfg.txt: -------------------------------------------------------------------------------- 1 | --delete-files *.{zip,jar} --no-blob-protection 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/intellij/commands/delete-file/bfg.txt: -------------------------------------------------------------------------------- 1 | -D breakgen.dll 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/intellij/commands/delete-file/gfb.txt: -------------------------------------------------------------------------------- 1 | --index-filter 2 | git rm --cached --ignore-unmatch bin/breakgen.dll 3 | --prune-empty 4 | --tag-name-filter 5 | cat 6 | -- 7 | --all 8 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/intellij/commands/git-lfs-binary-resources/bfg.txt: -------------------------------------------------------------------------------- 1 | --convert-to-git-lfs *.{zip,jar,exe,dll} --no-blob-protection 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/jgit/commands/delete-file/bfg.txt: -------------------------------------------------------------------------------- 1 | -D make_jgit.sh 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/jgit/commands/delete-file/gfb.txt: -------------------------------------------------------------------------------- 1 | --index-filter 2 | git rm --cached --ignore-unmatch make_jgit.sh 3 | --prune-empty 4 | --tag-name-filter 5 | cat 6 | -- 7 | --all 8 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/jgit/commands/replace-1-existing-string/bfg.txt: -------------------------------------------------------------------------------- 1 | --replace-text passwords.1-existing-string.txt 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/jgit/commands/replace-1-existing-string/passwords.1-existing-string.txt: -------------------------------------------------------------------------------- 1 | invalidAdvertisementOf 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/jgit/commands/replace-20-existing-strings/bfg.txt: -------------------------------------------------------------------------------- 1 | --replace-text passwords.20-existing-strings.txt 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/jgit/commands/replace-20-existing-strings/passwords.20-existing-strings.txt: -------------------------------------------------------------------------------- 1 | invalidAdvertisementOf 2 | abbreviationLengthMustBeNonNegative 3 | abortingRebase 4 | abortingRebaseFailed 5 | abortingRebaseFailedNoOrigHead 6 | advertisementCameBefore 7 | advertisementOfCameBefore 8 | amazonS3ActionFailed 9 | amazonS3ActionFailedGivingUp 10 | ambiguousObjectAbbreviation 11 | aNewObjectIdIsRequired 12 | anExceptionOccurredWhileTryingToAddTheIdOfHEAD 13 | anSSHSessionHasBeenAlreadyCreated 14 | applyingCommitnvalidType 15 | corruptObjectInvalidType2 16 | corruptObjectMalformedHeader 17 | dirCacheIsNotLocked 18 | DIRCChecksumMismatch 19 | enumValueNotSupported3 20 | errorDecodingFromFile 21 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/jgit/commands/replace-500-existing-strings/bfg.txt: -------------------------------------------------------------------------------- 1 | --replace-text passwords.500-existing-strings.txt 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/jgit/commands/replace-500-existing-strings/passwords.500-existing-strings.txt: -------------------------------------------------------------------------------- 1 | abbreviationLengthMustBeNonNegative 2 | abortingRebase 3 | abortingRebaseFailed 4 | abortingRebaseFailedNoOrigHead 5 | advertisementCameBefore 6 | advertisementOfCameBefore 7 | amazonS3ActionFailed 8 | amazonS3ActionFailedGivingUp 9 | ambiguousObjectAbbreviation 10 | aNewObjectIdIsRequired 11 | anExceptionOccurredWhileTryingToAddTheIdOfHEAD 12 | anSSHSessionHasBeenAlreadyCreated 13 | applyingCommit 14 | archiveFormatAlreadyAbsent 15 | archiveFormatAlreadyRegistered 16 | atLeastOnePathIsRequired 17 | atLeastOnePatternIsRequired 18 | atLeastTwoFiltersNeeded 19 | authenticationNotSupported 20 | badBase64InputCharacterAt 21 | badEntryDelimiter 22 | badEntryName 23 | badEscape 24 | badGroupHeader 25 | badObjectType 26 | badSectionEntry 27 | bareRepositoryNoWorkdirAndIndex 28 | base64InputNotProperlyPadded 29 | baseLengthIncorrect 30 | bitmapMissingObject 31 | bitmapsMustBePrepared 32 | blameNotCommittedYet 33 | blobNotFound 34 | blobNotFoundForPath 35 | branchNameInvalid 36 | buildingBitmaps 37 | cachedPacksPreventsIndexCreation 38 | cachedPacksPreventsListingObjects 39 | cannotBeCombined 40 | cannotBeRecursiveWhenTreesAreIncluded 41 | cannotCombineSquashWithNoff 42 | cannotCombineTreeFilterWithRevFilter 43 | cannotCommitOnARepoWithState 44 | cannotCommitWriteTo 45 | cannotConnectPipes 46 | cannotConvertScriptToText 47 | cannotCreateConfig 48 | cannotCreateDirectory 49 | cannotCreateHEAD 50 | cannotCreateIndexfile 51 | cannotDeleteCheckedOutBranch 52 | cannotDeleteFile 53 | cannotDeleteStaleTrackingRef 54 | cannotDeleteStaleTrackingRef2 55 | cannotDetermineProxyFor 56 | cannotDownload 57 | cannotExecute 58 | cannotGet 59 | cannotListRefs 60 | cannotLock 61 | cannotLockPackIn 62 | cannotMatchOnEmptyString 63 | cannotMoveIndexTo 64 | cannotMovePackTo 65 | cannotOpenService 66 | cannotParseDate 67 | cannotParseGitURIish 68 | cannotPullOnARepoWithState 69 | cannotRead 70 | cannotReadBlob 71 | cannotReadCommit 72 | cannotReadFile 73 | cannotReadHEAD 74 | cannotReadObject 75 | cannotReadTree 76 | cannotRebaseWithoutCurrentHead 77 | cannotResolveLocalTrackingRefForUpdating 78 | cannotStoreObjects 79 | cannotUnloadAModifiedTree 80 | cannotWorkWithOtherStagesThanZeroRightNow 81 | canOnlyCherryPickCommitsWithOneParent 82 | canOnlyRevertCommitsWithOneParent 83 | cantFindObjectInReversePackIndexForTheSpecifiedOffset 84 | cantPassMeATree 85 | channelMustBeInRange0_255 86 | characterClassIsNotSupported 87 | checkoutConflictWithFile 88 | checkoutConflictWithFiles 89 | checkoutUnexpectedResult 90 | classCastNotA 91 | cloneNonEmptyDirectory 92 | collisionOn 93 | commandWasCalledInTheWrongState 94 | commitAlreadyExists 95 | commitMessageNotSpecified 96 | commitOnRepoWithoutHEADCurrentlyNotSupported 97 | commitAmendOnInitialNotPossible 98 | compressingObjects 99 | connectionFailed 100 | connectionTimeOut 101 | contextMustBeNonNegative 102 | corruptionDetectedReReadingAt 103 | corruptObjectBadStream 104 | corruptObjectBadStreamCorruptHeader 105 | corruptObjectGarbageAfterSize 106 | corruptObjectIncorrectLength 107 | corruptObjectInvalidEntryMode 108 | corruptObjectInvalidMode 109 | corruptObjectInvalidMode2 110 | corruptObjectInvalidMode3 111 | corruptObjectInvalidType 112 | corruptObjectInvalidType2 113 | corruptObjectMalformedHeader 114 | corruptObjectNegativeSize 115 | corruptObjectNoAuthor 116 | corruptObjectNoCommitter 117 | corruptObjectNoHeader 118 | corruptObjectNoObject 119 | corruptObjectNoTaggerBadHeader 120 | corruptObjectNoTaggerHeader 121 | corruptObjectNoTagName 122 | corruptObjectNotree 123 | corruptObjectNoType 124 | corruptObjectPackfileChecksumIncorrect 125 | couldNotCheckOutBecauseOfConflicts 126 | couldNotDeleteLockFileShouldNotHappen 127 | couldNotDeleteTemporaryIndexFileShouldNotHappen 128 | couldNotGetAdvertisedRef 129 | couldNotGetRepoStatistics 130 | couldNotLockHEAD 131 | couldNotReadIndexInOneGo 132 | couldNotReadObjectWhileParsingCommit 133 | couldNotRenameDeleteOldIndex 134 | couldNotRenameTemporaryFile 135 | couldNotRenameTemporaryIndexFileToIndex 136 | couldNotURLEncodeToUTF8 137 | couldNotWriteFile 138 | countingObjects 139 | createBranchFailedUnknownReason 140 | createBranchUnexpectedResult 141 | createNewFileFailed 142 | credentialPassword 143 | credentialUsername 144 | daemonAlreadyRunning 145 | daysAgo 146 | deleteBranchUnexpectedResult 147 | deleteFileFailed 148 | deleteTagUnexpectedResult 149 | deletingNotSupported 150 | destinationIsNotAWildcard 151 | detachedHeadDetected 152 | dirCacheDoesNotHaveABackingFile 153 | dirCacheFileIsNotLocked 154 | dirCacheIsNotLocked 155 | DIRCChecksumMismatch 156 | DIRCExtensionIsTooLargeAt 157 | DIRCExtensionNotSupportedByThisVersion 158 | DIRCHasTooManyEntries 159 | DIRCUnrecognizedExtendedFlags 160 | dirtyFilesExist 161 | doesNotHandleMode 162 | downloadCancelled 163 | downloadCancelledDuringIndexing 164 | duplicateAdvertisementsOf 165 | duplicateRef 166 | duplicateRemoteRefUpdateIsIllegal 167 | duplicateStagesNotAllowed 168 | eitherGitDirOrWorkTreeRequired 169 | emptyCommit 170 | emptyPathNotPermitted 171 | encryptionError 172 | endOfFileInEscape 173 | entryNotFoundByPath 174 | enumValueNotSupported2 175 | enumValueNotSupported3 176 | enumValuesNotAvailable 177 | errorDecodingFromFile 178 | errorEncodingFromFile 179 | errorInBase64CodeReadingStream 180 | errorInPackedRefs 181 | errorInvalidProtocolWantedOldNewRef 182 | errorListing 183 | errorOccurredDuringUnpackingOnTheRemoteEnd 184 | errorReadingInfoRefs 185 | errorSymlinksNotSupported 186 | exceptionCaughtDuringExecutionOfAddCommand 187 | exceptionCaughtDuringExecutionOfArchiveCommand 188 | exceptionCaughtDuringExecutionOfCherryPickCommand 189 | exceptionCaughtDuringExecutionOfCommitCommand 190 | exceptionCaughtDuringExecutionOfFetchCommand 191 | exceptionCaughtDuringExecutionOfLsRemoteCommand 192 | exceptionCaughtDuringExecutionOfMergeCommand 193 | exceptionCaughtDuringExecutionOfPullCommand 194 | exceptionCaughtDuringExecutionOfPushCommand 195 | exceptionCaughtDuringExecutionOfResetCommand 196 | exceptionCaughtDuringExecutionOfRevertCommand 197 | exceptionCaughtDuringExecutionOfRmCommand 198 | exceptionCaughtDuringExecutionOfTagCommand 199 | exceptionOccurredDuringAddingOfOptionToALogCommand 200 | exceptionOccurredDuringReadingOfGIT_DIR 201 | expectedACKNAKFoundEOF 202 | expectedACKNAKGot 203 | expectedBooleanStringValue 204 | expectedCharacterEncodingGuesses 205 | expectedEOFReceived 206 | expectedGot 207 | expectedLessThanGot 208 | expectedPktLineWithService 209 | expectedReceivedContentType 210 | expectedReportForRefNotReceived 211 | failedUpdatingRefs 212 | failureDueToOneOfTheFollowing 213 | failureUpdatingFETCH_HEAD 214 | failureUpdatingTrackingRef 215 | fileCannotBeDeleted 216 | fileIsTooBigForThisConvenienceMethod 217 | fileIsTooLarge 218 | fileModeNotSetForPath 219 | flagIsDisposed 220 | flagNotFromThis 221 | flagsAlreadyCreated 222 | funnyRefname 223 | gcFailed 224 | gitmodulesNotFound 225 | headRequiredToStash 226 | hoursAgo 227 | hugeIndexesAreNotSupportedByJgitYet 228 | hunkBelongsToAnotherFile 229 | hunkDisconnectedFromFile 230 | hunkHeaderDoesNotMatchBodyLineCountOf 231 | illegalArgumentNotA 232 | illegalCombinationOfArguments 233 | illegalPackingPhase 234 | illegalStateExists 235 | improperlyPaddedBase64Input 236 | incorrectHashFor 237 | incorrectOBJECT_ID_LENGTH 238 | indexFileIsInUse 239 | indexFileIsTooLargeForJgit 240 | indexSignatureIsInvalid 241 | indexWriteException 242 | inMemoryBufferLimitExceeded 243 | inputStreamMustSupportMark 244 | integerValueOutOfRange 245 | internalRevisionError 246 | internalServerError 247 | interruptedWriting 248 | inTheFuture 249 | invalidAdvertisementOf 250 | invalidAncestryLength 251 | invalidBooleanValue 252 | invalidChannel 253 | invalidCharacterInBase64Data 254 | invalidCommitParentNumber 255 | invalidEncryption 256 | invalidGitdirRef 257 | invalidGitType 258 | invalidId 259 | invalidIdLength 260 | invalidIntegerValue 261 | invalidKey 262 | invalidLineInConfigFile 263 | invalidModeFor 264 | invalidModeForPath 265 | invalidObject 266 | invalidOldIdSent 267 | invalidPacketLineHeader 268 | invalidPath 269 | invalidReflogRevision 270 | invalidRefName 271 | invalidRemote 272 | invalidStageForPath 273 | invalidTagOption 274 | invalidTimeout 275 | invalidURL 276 | invalidWildcards 277 | invalidRefSpec 278 | invalidWindowSize 279 | isAStaticFlagAndHasNorevWalkInstance 280 | JRELacksMD5Implementation 281 | kNotInRange 282 | largeObjectExceedsByteArray 283 | largeObjectExceedsLimit 284 | largeObjectException 285 | largeObjectOutOfMemory 286 | lengthExceedsMaximumArraySize 287 | listingAlternates 288 | localObjectsIncomplete 289 | localRefIsMissingObjects 290 | lockCountMustBeGreaterOrEqual1 291 | lockError 292 | lockOnNotClosed 293 | lockOnNotHeld 294 | malformedpersonIdentString 295 | maxCountMustBeNonNegative 296 | mergeConflictOnNonNoteEntries 297 | mergeConflictOnNotes 298 | mergeStrategyAlreadyExistsAsDefault 299 | mergeStrategyDoesNotSupportHeads 300 | mergeUsingStrategyResultedInDescription 301 | mergeRecursiveReturnedNoCommit 302 | mergeRecursiveTooManyMergeBasesFor 303 | messageAndTaggerNotAllowedInUnannotatedTags 304 | minutesAgo 305 | missingAccesskey 306 | missingConfigurationForKey 307 | missingDeltaBase 308 | missingForwardImageInGITBinaryPatch 309 | missingObject 310 | missingPrerequisiteCommits 311 | missingRequiredParameter 312 | missingSecretkey 313 | mixedStagesNotAllowed 314 | mkDirFailed 315 | mkDirsFailed 316 | month 317 | months 318 | monthsAgo 319 | multipleMergeBasesFor 320 | need2Arguments 321 | needPackOut 322 | needsAtLeastOneEntry 323 | needsWorkdir 324 | newlineInQuotesNotAllowed 325 | noApplyInDelete 326 | noClosingBracket 327 | noHEADExistsAndNoExplicitStartingRevisionWasSpecified 328 | noHMACsupport 329 | noMergeBase 330 | noMergeHeadSpecified 331 | noSuchRef 332 | notABoolean 333 | notABundle 334 | notADIRCFile 335 | notAGitDirectory 336 | notAPACKFile 337 | notARef 338 | notASCIIString 339 | notAuthorized 340 | notAValidPack 341 | notFound 342 | nothingToFetch 343 | nothingToPush 344 | notMergedExceptionMessage 345 | noXMLParserAvailable 346 | objectAtHasBadZlibStream 347 | objectAtPathDoesNotHaveId 348 | objectIsCorrupt 349 | objectIsNotA 350 | objectNotFound 351 | objectNotFoundIn 352 | obtainingCommitsForCherryPick 353 | offsetWrittenDeltaBaseForObjectNotFoundInAPack 354 | onlyAlreadyUpToDateAndFastForwardMergesAreAvailable 355 | onlyOneFetchSupported 356 | onlyOneOperationCallPerConnectionIsSupported 357 | openFilesMustBeAtLeast1 358 | openingConnection 359 | operationCanceled 360 | outputHasAlreadyBeenStarted 361 | packChecksumMismatch 362 | packCorruptedWhileWritingToFilesystem 363 | packDoesNotMatchIndex 364 | packetSizeMustBeAtLeast 365 | packetSizeMustBeAtMost 366 | packfileCorruptionDetected 367 | packFileInvalid 368 | packfileIsTruncated 369 | packHasUnresolvedDeltas 370 | packingCancelledDuringObjectsWriting 371 | packObjectCountMismatch 372 | packRefs 373 | packTooLargeForIndexVersion1 374 | packWriterStatistics 375 | panicCantRenameIndexFile 376 | patchApplyException 377 | patchFormatException 378 | pathIsNotInWorkingDir 379 | pathNotConfigured 380 | peeledLineBeforeRef 381 | peerDidNotSupplyACompleteObjectGraph 382 | prefixRemote 383 | problemWithResolvingPushRefSpecsLocally 384 | progressMonUploading 385 | propertyIsAlreadyNonNull 386 | pruneLoosePackedObjects 387 | pruneLooseUnreferencedObjects 388 | pullOnRepoWithoutHEADCurrentlyNotSupported 389 | pullTaskName 390 | pushCancelled 391 | pushIsNotSupportedForBundleTransport 392 | pushNotPermitted 393 | rawLogMessageDoesNotParseAsLogEntry 394 | readingObjectsFromLocalRepositoryFailed 395 | readTimedOut 396 | receivePackObjectTooLarge1 397 | receivePackObjectTooLarge2 398 | receivingObjects 399 | refAlreadyExists 400 | refAlreadyExists1 401 | reflogEntryNotFound 402 | refNotResolved 403 | refUpdateReturnCodeWas 404 | remoteConfigHasNoURIAssociated 405 | remoteDoesNotHaveSpec 406 | remoteDoesNotSupportSmartHTTPPush 407 | remoteHungUpUnexpectedly 408 | remoteNameCantBeNull 409 | renameBranchFailedBecauseTag 410 | renameBranchFailedUnknownReason 411 | renameBranchUnexpectedResult 412 | renameFileFailed 413 | renamesAlreadyFound 414 | renamesBreakingModifies 415 | renamesFindingByContent 416 | renamesFindingExact 417 | renamesRejoiningModifies 418 | repositoryAlreadyExists 419 | repositoryConfigFileInvalid 420 | repositoryIsRequired 421 | repositoryNotFound 422 | repositoryState_applyMailbox 423 | repositoryState_bisecting 424 | repositoryState_conflicts 425 | repositoryState_merged 426 | repositoryState_normal 427 | repositoryState_rebase 428 | repositoryState_rebaseInteractive 429 | repositoryState_rebaseOrApplyMailbox 430 | repositoryState_rebaseWithMerge 431 | requiredHashFunctionNotAvailable 432 | resettingHead 433 | resolvingDeltas 434 | resultLengthIncorrect 435 | rewinding 436 | searchForReuse 437 | searchForSizes 438 | secondsAgo 439 | selectingCommits 440 | sequenceTooLargeForDiffAlgorithm 441 | serviceNotEnabledNoName 442 | serviceNotPermitted 443 | serviceNotPermittedNoName 444 | shallowCommitsAlreadyInitialized 445 | shortCompressedStreamAt 446 | shortReadOfBlock 447 | shortReadOfOptionalDIRCExtensionExpectedAnotherBytes 448 | shortSkipOfBlock 449 | signingNotSupportedOnTag 450 | similarityScoreMustBeWithinBounds 451 | sizeExceeds2GB 452 | skipMustBeNonNegative 453 | smartHTTPPushDisabled 454 | sourceDestinationMustMatch 455 | sourceIsNotAWildcard 456 | sourceRefDoesntResolveToAnyObject 457 | sourceRefNotSpecifiedForRefspec 458 | squashCommitNotUpdatingHEAD 459 | staleRevFlagsOn 460 | startingReadStageWithoutWrittenRequestDataPendingIsNotSupported 461 | stashApplyConflict 462 | stashApplyConflictInIndex 463 | stashApplyFailed 464 | stashApplyOnUnsafeRepository 465 | stashApplyWithoutHead 466 | stashCommitMissingTwoParents 467 | stashDropDeleteRefFailed 468 | stashDropFailed 469 | stashDropMissingReflog 470 | stashFailed 471 | stashResolveFailed 472 | statelessRPCRequiresOptionToBeEnabled 473 | submoduleExists 474 | submoduleParentRemoteUrlInvalid 475 | submodulesNotSupported 476 | symlinkCannotBeWrittenAsTheLinkTarget 477 | systemConfigFileInvalid 478 | tagAlreadyExists 479 | tagNameInvalid 480 | tagOnRepoWithoutHEADCurrentlyNotSupported 481 | theFactoryMustNotBeNull 482 | timerAlreadyTerminated 483 | topologicalSortRequired 484 | transportExceptionBadRef 485 | transportExceptionEmptyRef 486 | transportExceptionInvalid 487 | transportExceptionMissingAssumed 488 | transportExceptionReadRef 489 | transportNeedsRepository 490 | transportProtoAmazonS3 491 | transportProtoBundleFile 492 | transportProtoFTP 493 | transportProtoGitAnon 494 | transportProtoHTTP 495 | transportProtoLocal 496 | transportProtoSFTP 497 | transportProtoSSH 498 | treeEntryAlreadyExists 499 | treeFilterMarkerTooManyFilters 500 | treeIteratorDoesNotSupportRemove 501 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/linux/commands/delete-file/bfg.txt: -------------------------------------------------------------------------------- 1 | -D MAINTAINERS 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/linux/commands/delete-file/gfb.txt: -------------------------------------------------------------------------------- 1 | --index-filter 2 | git rm --cached --ignore-unmatch MAINTAINERS 3 | --prune-empty 4 | --tag-name-filter 5 | cat 6 | -- 7 | --all 8 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/rails/commands/delete-file/bfg.txt: -------------------------------------------------------------------------------- 1 | -D pushgems.rb 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/rails/commands/delete-file/gfb.txt: -------------------------------------------------------------------------------- 1 | --index-filter 2 | git rm --cached --ignore-unmatch pushgems.rb 3 | --prune-empty 4 | --tag-name-filter 5 | cat 6 | -- 7 | --all 8 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/wine/commands/delete-file/bfg.txt: -------------------------------------------------------------------------------- 1 | -D build-spec.txt 2 | -------------------------------------------------------------------------------- /bfg-benchmark/resources/repos/wine/commands/delete-file/gfb.txt: -------------------------------------------------------------------------------- 1 | --index-filter 2 | git rm --cached --ignore-unmatch build-spec.txt 3 | --prune-empty 4 | --tag-name-filter 5 | cat 6 | -- 7 | --all 8 | -------------------------------------------------------------------------------- /bfg-benchmark/src/main/scala/Benchmark.scala: -------------------------------------------------------------------------------- 1 | import lib.Timing.measureTask 2 | import lib._ 3 | import model._ 4 | 5 | import java.nio.file.Files 6 | import java.nio.file.Files.isDirectory 7 | import scala.concurrent.ExecutionContext.Implicits.global 8 | import scala.concurrent._ 9 | import scala.concurrent.duration.Duration 10 | import scala.jdk.StreamConverters._ 11 | import scala.sys.process._ 12 | 13 | /* 14 | * Vary BFG runs by: 15 | * Java version 16 | * BFG version (JGit version?) 17 | * 18 | */ 19 | object Benchmark extends App { 20 | 21 | BenchmarkConfig.parser.parse(args, BenchmarkConfig()) map { 22 | config => 23 | println(s"Using resources dir : ${config.resourcesDir}") 24 | 25 | require(Files.exists(config.resourcesDir), s"Resources dir not found : ${config.resourcesDir}") 26 | require(Files.exists(config.jarsDir), s"Jars dir not found : ${config.jarsDir}") 27 | require(Files.exists(config.reposDir), s"Repos dir not found : ${config.reposDir}") 28 | 29 | val missingJars = config.bfgJars.filterNot(Files.exists(_)) 30 | require(missingJars.isEmpty, s"Missing BFG jars : ${missingJars.mkString(",")}") 31 | 32 | val tasksFuture = for { 33 | bfgInvocableEngineSet <- bfgInvocableEngineSet(config) 34 | } yield { 35 | val gfbInvocableEngineSetOpt = 36 | Option.when(!config.onlyBfg)(InvocableEngineSet[GFBInvocation](GitFilterBranch, Seq(InvocableGitFilterBranch))) 37 | boogaloo(config, new RepoExtractor(config.scratchDir), Seq(bfgInvocableEngineSet) ++ gfbInvocableEngineSetOpt.toSeq) 38 | } 39 | 40 | Await.result(tasksFuture, Duration.Inf) 41 | } 42 | 43 | def bfgInvocableEngineSet(config: BenchmarkConfig): Future[InvocableEngineSet[BFGInvocation]] = for { 44 | javas <- Future.traverse(config.javaCmds)(jc => JavaVersion.version(jc).map(v => Java(jc, v))) 45 | } yield { 46 | val invocables = for { 47 | java <- javas 48 | bfgJar <- config.bfgJars 49 | } yield InvocableBFG(java, BFGJar.from(bfgJar)) 50 | 51 | InvocableEngineSet[BFGInvocation](BFG, invocables) 52 | } 53 | 54 | /* 55 | * A Task says "here is something you can do to a given repo, and here is how to do 56 | * it with a BFG, and with git-filter-branch" 57 | */ 58 | def boogaloo(config: BenchmarkConfig, repoExtractor: RepoExtractor, invocableEngineSets: Seq[InvocableEngineSet[_ <: EngineInvocation]]) = { 59 | 60 | for { 61 | repoSpecDir <- config.repoSpecDirs 62 | availableCommandDirs = Files.list(repoSpecDir.resolve("commands")).toScala(Seq).filter(isDirectory(_)) 63 | // println(s"Available commands for $repoName : ${availableCommandDirs.map(_.name).mkString(", ")}") 64 | commandDir <- availableCommandDirs.filter(p => config.commands(p.getFileName.toString)) 65 | } yield { 66 | val commandName: String = commandDir.getFileName.toString 67 | 68 | commandName -> (for { 69 | invocableEngineSet <- invocableEngineSets 70 | } yield for { 71 | (invocable, processMaker) <- invocableEngineSet.invocationsFor(commandDir) 72 | } yield { 73 | val cleanRepoDir = repoExtractor.extractRepoFrom(repoSpecDir.resolve("repo.git.zip")) 74 | Files.list(commandDir).toScala(Seq).foreach(p => Files.copy(p, cleanRepoDir.resolve(p.getFileName))) 75 | val process = processMaker(cleanRepoDir.toFile) 76 | 77 | val duration = measureTask(s"$commandName - $invocable") { 78 | process ! ProcessLogger(_ => ()) 79 | } 80 | 81 | if (config.dieIfTaskTakesLongerThan.exists(_ < duration.toMillis)) { 82 | throw new Exception("This took too long: "+duration) 83 | } 84 | 85 | invocable -> duration 86 | }) 87 | } 88 | } 89 | 90 | println(s"\n...benchmark finished.") 91 | } 92 | -------------------------------------------------------------------------------- /bfg-benchmark/src/main/scala/BenchmarkConfig.scala: -------------------------------------------------------------------------------- 1 | import java.io.File 2 | import com.madgag.textmatching.{Glob, TextMatcher} 3 | import scopt.OptionParser 4 | 5 | import java.nio.file.{Path, Paths} 6 | 7 | object BenchmarkConfig { 8 | val parser = new OptionParser[BenchmarkConfig]("benchmark") { 9 | opt[File]("resources-dir").text("benchmark resources folder - contains jars and repos").action { 10 | (v, c) => c.copy(resourcesDirOption = v.toPath) 11 | } 12 | opt[String]("java").text("Java command paths").action { 13 | (v, c) => c.copy(javaCmds = v.split(',').toSeq) 14 | } 15 | opt[String]("versions").text("BFG versions to time - bfg-[version].jar - eg 1.4.0,1.5.0,1.6.0").action { 16 | (v, c) => c.copy(bfgVersions = v.split(",").toSeq) 17 | } 18 | opt[Int]("die-if-longer-than").text("Useful for git-bisect").action { 19 | (v, c) => c.copy(dieIfTaskTakesLongerThan = Some(v)) 20 | } 21 | opt[String]("repos").text("Sample repos to test, eg github-gems,jgit,git").action { 22 | (v, c) => c.copy(repoNames = v.split(",").toSeq) 23 | } 24 | opt[String]("commands").valueName("").text("commands to exercise").action { 25 | (v, c) => c.copy(commands = TextMatcher(v, defaultType = Glob)) 26 | } 27 | opt[File]("scratch-dir").text("Temp-dir for job runs - preferably ramdisk, eg tmpfs.").action { 28 | (v, c) => c.copy(scratchDir = v.toPath) 29 | } 30 | opt[Unit]("only-bfg") action { (_, c) => c.copy(onlyBfg = true) } text "Don't benchmark git-filter-branch" 31 | } 32 | } 33 | case class BenchmarkConfig(resourcesDirOption: Path = Paths.get(System.getProperty("user.dir"), "bfg-benchmark", "resources"), 34 | scratchDir: Path = Paths.get("/dev/shm/"), 35 | javaCmds: Seq[String] = Seq("java"), 36 | bfgVersions: Seq[String] = Seq.empty, 37 | commands: TextMatcher = Glob("*"), 38 | onlyBfg: Boolean = false, 39 | dieIfTaskTakesLongerThan: Option[Int] = None, 40 | repoNames: Seq[String] = Seq.empty) { 41 | 42 | lazy val resourcesDir: Path = resourcesDirOption.toAbsolutePath 43 | 44 | lazy val jarsDir: Path = resourcesDir.resolve("jars") 45 | 46 | lazy val reposDir: Path = resourcesDir.resolve("repos") 47 | 48 | lazy val bfgJars: Seq[Path] = bfgVersions.map(version => jarsDir.resolve(s"bfg-$version.jar")) 49 | 50 | lazy val repoSpecDirs: Seq[Path] = repoNames.map(reposDir.resolve) 51 | } 52 | -------------------------------------------------------------------------------- /bfg-benchmark/src/main/scala/JavaVersion.scala: -------------------------------------------------------------------------------- 1 | import scala.concurrent.ExecutionContext.Implicits.global 2 | import scala.concurrent._ 3 | import scala.sys.process.{Process, ProcessLogger} 4 | 5 | object JavaVersion { 6 | val VersionRegex = """(?:java|openjdk) version "(.*?)"""".r 7 | 8 | def version(javaCmd: String): Future[String] = { 9 | val resultPromise = Promise[String]() 10 | 11 | Future { 12 | val exitCode = Process(s"$javaCmd -version")!ProcessLogger( 13 | s => for (v <-versionFrom(s)) resultPromise.success(v) 14 | ) 15 | resultPromise.tryFailure(new IllegalArgumentException(s"$javaCmd exited with code $exitCode, no Java version found")) 16 | } 17 | 18 | resultPromise.future 19 | } 20 | 21 | def versionFrom(javaVersionLine: String): Option[String] = { 22 | VersionRegex.findFirstMatchIn(javaVersionLine).map(_.group(1)) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /bfg-benchmark/src/main/scala/lib/Repo.scala: -------------------------------------------------------------------------------- 1 | package lib 2 | 3 | import com.google.common.io.MoreFiles 4 | import com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE 5 | import com.madgag.compress.CompressUtil._ 6 | 7 | import java.nio.file.{Files, Path} 8 | import scala.util.Using 9 | 10 | class RepoExtractor(scratchDir: Path) { 11 | 12 | val repoDir = scratchDir.resolve( "repo.git") 13 | 14 | def extractRepoFrom(zipPath: Path) = { 15 | if (Files.exists(repoDir)) MoreFiles.deleteRecursively(repoDir, ALLOW_INSECURE) 16 | 17 | Files.createDirectories(repoDir) 18 | 19 | println(s"Extracting repo to ${repoDir.toAbsolutePath}") 20 | 21 | Using(Files.newInputStream(zipPath)) { 22 | stream => unzip(stream, repoDir.toFile) 23 | } 24 | 25 | repoDir 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /bfg-benchmark/src/main/scala/lib/Timing.scala: -------------------------------------------------------------------------------- 1 | package lib 2 | 3 | import java.lang.System._ 4 | import java.util.concurrent.TimeUnit._ 5 | 6 | import scala.concurrent.duration.{Duration, FiniteDuration} 7 | 8 | object Timing { 9 | 10 | def measureTask[T](description: String)(block: => T): Duration = { 11 | val start = nanoTime 12 | val result = block 13 | val duration = FiniteDuration(nanoTime - start, NANOSECONDS) 14 | println(s"$description completed in %,d ms.".format(duration.toMillis)) 15 | duration 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /bfg-benchmark/src/main/scala/model/BFGJar.scala: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import java.nio.file.Path 4 | 5 | object BFGJar { 6 | def from(path: Path) = BFGJar(path, Map.empty) 7 | } 8 | 9 | case class BFGJar(path: Path, mavenDependencyVersions: Map[String, String]) 10 | -------------------------------------------------------------------------------- /bfg-benchmark/src/main/scala/model/InvocableEngine.scala: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import com.google.common.io.CharSource 4 | import com.google.common.io.Files.asCharSource 5 | 6 | import java.io.File 7 | import java.nio.charset.StandardCharsets.UTF_8 8 | import java.nio.file.{Files, Path} 9 | import scala.jdk.StreamConverters._ 10 | import scala.sys.process.{Process, ProcessBuilder} 11 | 12 | trait EngineInvocation 13 | 14 | case class BFGInvocation(args: String) extends EngineInvocation 15 | 16 | case class GFBInvocation(args: Seq[String]) extends EngineInvocation 17 | 18 | 19 | trait InvocableEngine[InvocationArgs <: EngineInvocation] { 20 | 21 | def processFor(invocation: InvocationArgs)(repoPath: File): ProcessBuilder 22 | } 23 | 24 | case class InvocableBFG(java: Java, bfgJar: BFGJar) extends InvocableEngine[BFGInvocation] { 25 | 26 | def processFor(invocation: BFGInvocation)(repoPath: File) = 27 | Process(s"${java.javaCmd} -jar ${bfgJar.path} ${invocation.args}", repoPath) 28 | 29 | } 30 | 31 | object InvocableGitFilterBranch extends InvocableEngine[GFBInvocation] { 32 | 33 | def processFor(invocation: GFBInvocation)(repoPath: File) = 34 | Process(Seq("git", "filter-branch") ++ invocation.args, repoPath) 35 | } 36 | 37 | /* 38 | We want to allow the user to vary: 39 | - BFGs (jars, javas) 40 | - Tasks (delete a file, replace text) in [selection of repos] 41 | 42 | Tasks will have a variety of different invocations for different engines 43 | */ 44 | 45 | trait EngineType[InvocationType <: EngineInvocation] { 46 | val configName: String 47 | 48 | def argsFor(config: CharSource): InvocationType 49 | 50 | def argsOptsFor(commandDir: Path): Option[InvocationType] = { 51 | val paramsPath = commandDir.resolve(s"$configName.txt") 52 | if (Files.exists(paramsPath)) Some(argsFor(asCharSource(paramsPath.toFile, UTF_8))) else None 53 | } 54 | } 55 | 56 | case object BFG extends EngineType[BFGInvocation] { 57 | 58 | val configName = "bfg" 59 | 60 | def argsFor(config: CharSource) = BFGInvocation(config.read()) 61 | } 62 | 63 | case object GitFilterBranch extends EngineType[GFBInvocation] { 64 | 65 | val configName = "gfb" 66 | 67 | def argsFor(config: CharSource) = GFBInvocation(config.lines().toScala(Seq)) 68 | } 69 | -------------------------------------------------------------------------------- /bfg-benchmark/src/main/scala/model/InvocableEngineSet.scala: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import java.io.File 4 | import java.nio.file.Path 5 | 6 | case class InvocableEngineSet[InvocationArgs <: EngineInvocation]( 7 | engineType: EngineType[InvocationArgs], 8 | invocableEngines: Seq[InvocableEngine[InvocationArgs]] 9 | ) { 10 | 11 | def invocationsFor(commandDir: Path): Seq[(InvocableEngine[InvocationArgs], File => scala.sys.process.ProcessBuilder)] = { 12 | for { 13 | args <- engineType.argsOptsFor(commandDir).toSeq 14 | invocable <- invocableEngines 15 | } yield (invocable, invocable.processFor(args) _) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /bfg-benchmark/src/main/scala/model/Java.scala: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | case class Java(javaCmd: String, version: String) 4 | -------------------------------------------------------------------------------- /bfg-benchmark/src/test/scala/JavaVersionSpec.scala: -------------------------------------------------------------------------------- 1 | import org.scalatest.OptionValues 2 | import org.scalatest.flatspec.AnyFlatSpec 3 | import org.scalatest.matchers.should.Matchers 4 | 5 | object JavaVersionSpec extends AnyFlatSpec with OptionValues with Matchers { 6 | "version" should "parse an example line" in { 7 | JavaVersion.versionFrom("""java version "1.7.0_51"""").value shouldBe "1.7.0_51" 8 | } 9 | 10 | it should "parse openjdk weirdness" in { 11 | JavaVersion.versionFrom("""openjdk version "1.8.0_40-internal"""").value shouldBe "1.8.0_40-internal" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /bfg-library/build.sbt: -------------------------------------------------------------------------------- 1 | import Dependencies.* 2 | 3 | libraryDependencies ++= guava ++ Seq( 4 | parCollections, 5 | scalaCollectionPlus, 6 | textmatching, 7 | scalaGit, 8 | jgit, 9 | slf4jSimple, 10 | lineSplitting, 11 | scalaGitTest % Test, 12 | "org.apache.commons" % "commons-text" % "1.13.0" % Test 13 | ) 14 | 15 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentMultiMap.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.collection.concurrent 22 | 23 | import com.madgag.scala.collection.decorators._ 24 | 25 | class ConcurrentMultiMap[A, B] { 26 | 27 | val m: collection.concurrent.Map[A, ConcurrentSet[B]] = collection.concurrent.TrieMap.empty 28 | 29 | def addBinding(key: A, value: B): this.type = { 30 | val store = m.getOrElse(key, { 31 | val freshStore = new ConcurrentSet[B] 32 | m.putIfAbsent(key, freshStore).getOrElse(freshStore) 33 | }) 34 | store += value 35 | this 36 | } 37 | 38 | def toMap: Map[A, Set[B]] = m.toMap.mapV(_.toSet) 39 | } 40 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentSet.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.collection.concurrent 22 | 23 | import scala.collection.mutable.{AbstractSet, SetOps} 24 | import scala.collection.{IterableFactory, IterableFactoryDefaults, mutable} 25 | 26 | class ConcurrentSet[A]() 27 | extends AbstractSet[A] 28 | with SetOps[A, ConcurrentSet, ConcurrentSet[A]] 29 | with IterableFactoryDefaults[A, ConcurrentSet] 30 | { 31 | 32 | val m: collection.concurrent.Map[A, Boolean] = collection.concurrent.TrieMap.empty 33 | 34 | override def iterableFactory: IterableFactory[ConcurrentSet] = ConcurrentSet 35 | 36 | override def clear(): Unit = m.clear() 37 | 38 | override def addOne(elem: A): ConcurrentSet.this.type = { 39 | m.put(elem, true) 40 | this 41 | } 42 | 43 | override def subtractOne(elem: A): ConcurrentSet.this.type = { 44 | m.remove(elem) 45 | this 46 | } 47 | 48 | override def contains(elem: A): Boolean = m.contains(elem) 49 | 50 | override def iterator: Iterator[A] = m.keysIterator 51 | 52 | } 53 | 54 | object ConcurrentSet extends IterableFactory[ConcurrentSet] { 55 | 56 | @transient 57 | private final val EmptySet = new ConcurrentSet() 58 | 59 | def empty[A]: ConcurrentSet[A] = EmptySet.asInstanceOf[ConcurrentSet[A]] 60 | 61 | def from[A](source: collection.IterableOnce[A]): ConcurrentSet[A] = 62 | source match { 63 | case hs: ConcurrentSet[A] => hs 64 | case _ if source.knownSize == 0 => empty[A] 65 | case _ => (newBuilder[A] ++= source).result() 66 | } 67 | 68 | /** Create a new Builder which can be reused after calling `result()` without an 69 | * intermediate call to `clear()` in order to build multiple related results. 70 | */ 71 | def newBuilder[A]: mutable.Builder[A, ConcurrentSet[A]] = ??? 72 | } 73 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/LFS.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git 22 | 23 | import com.google.common.base.Splitter 24 | import com.madgag.git.bfg.model.FileName 25 | import org.apache.commons.codec.binary.Hex._ 26 | import org.eclipse.jgit.lib.ObjectLoader 27 | 28 | import java.nio.charset.Charset 29 | import java.nio.charset.StandardCharsets.UTF_8 30 | import java.nio.file.{Files, Path} 31 | import java.security.{DigestOutputStream, MessageDigest} 32 | import scala.jdk.CollectionConverters._ 33 | import scala.util.Using 34 | 35 | object LFS { 36 | 37 | val ObjectsPath: Seq[String] = Seq("lfs" , "objects") 38 | 39 | val PointerCharset: Charset = UTF_8 40 | 41 | case class Pointer(shaHex: String, blobSize: Long) { 42 | 43 | lazy val text: String = s"""|version https://git-lfs.github.com/spec/v1 44 | |oid sha256:$shaHex 45 | |size $blobSize 46 | |""".stripMargin 47 | 48 | lazy val bytes: Array[Byte] = text.getBytes(PointerCharset) 49 | 50 | lazy val path: Seq[String] = Seq(shaHex.substring(0, 2), shaHex.substring(2, 4), shaHex) 51 | } 52 | 53 | object Pointer { 54 | 55 | val splitter = Splitter.on('\n').omitEmptyStrings().trimResults().withKeyValueSeparator(' ') 56 | 57 | def parse(bytes: Array[Byte]) = { 58 | val text = new String(bytes, PointerCharset) 59 | val valuesByKey= splitter.split(text).asScala 60 | val size = valuesByKey("size").toLong 61 | val shaHex = valuesByKey("oid").stripPrefix("sha256:") 62 | Pointer(shaHex, size) 63 | } 64 | } 65 | 66 | val GitAttributesFileName = FileName(".gitattributes") 67 | 68 | def pointerFor(loader: ObjectLoader, tmpFile: Path) = { 69 | val digest = MessageDigest.getInstance("SHA-256") 70 | 71 | Using(Files.newOutputStream(tmpFile)) { outStream => 72 | loader.copyTo(new DigestOutputStream(outStream, digest)) 73 | } 74 | 75 | Pointer(encodeHexString(digest.digest()), loader.getSize) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/GitUtil.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg 22 | 23 | import com.google.common.primitives.Ints 24 | import com.madgag.git.bfg.cleaner._ 25 | import com.madgag.git.{SizedObject, _} 26 | import org.eclipse.jgit.internal.storage.file.ObjectDirectory 27 | import org.eclipse.jgit.lib.Constants.OBJ_BLOB 28 | import org.eclipse.jgit.lib.ObjectReader._ 29 | import org.eclipse.jgit.lib._ 30 | import org.eclipse.jgit.revwalk.RevWalk 31 | import org.eclipse.jgit.storage.file.WindowCacheConfig 32 | 33 | import scala.jdk.CollectionConverters._ 34 | import scala.jdk.StreamConverters._ 35 | import scala.language.implicitConversions 36 | 37 | trait CleaningMapper[V] extends Cleaner[V] { 38 | def isDirty(v: V) = apply(v) != v 39 | 40 | def substitution(oldId: V): Option[(V, V)] = { 41 | val newId = apply(oldId) 42 | if (newId == oldId) None else Some((oldId, newId)) 43 | } 44 | 45 | def replacement(oldId: V): Option[V] = { 46 | val newId = apply(oldId) 47 | if (newId == oldId) None else Some(newId) 48 | } 49 | } 50 | 51 | object GitUtil { 52 | 53 | val ProbablyNoNonFileObjectsOverSizeThreshold: Long = 1024 * 1024 54 | 55 | def tweakStaticJGitConfig(massiveNonFileObjects: Option[Long]): Unit = { 56 | val wcConfig: WindowCacheConfig = new WindowCacheConfig() 57 | wcConfig.setStreamFileThreshold(Ints.saturatedCast(massiveNonFileObjects.getOrElse(ProbablyNoNonFileObjectsOverSizeThreshold))) 58 | wcConfig.install() 59 | } 60 | 61 | def hasBeenProcessedByBFGBefore(repo: Repository): Boolean = { 62 | // This method just checks the tips of all refs - a good-enough indicator for our purposes... 63 | implicit val revWalk = new RevWalk(repo) 64 | implicit val objectReader = revWalk.getObjectReader 65 | 66 | repo.getRefDatabase.getRefs().asScala.map(_.getObjectId).filter(_.open.getType == Constants.OBJ_COMMIT) 67 | .map(_.asRevCommit).exists(_.getFooterLines(FormerCommitFooter.Key).asScala.nonEmpty) 68 | } 69 | 70 | implicit def cleaner2CleaningMapper[V](f: Cleaner[V]): CleaningMapper[V] = new CleaningMapper[V] { 71 | def apply(v: V) = f(v) 72 | } 73 | 74 | def biggestBlobs(implicit objectDB: ObjectDirectory, progressMonitor: ProgressMonitor = NullProgressMonitor.INSTANCE): LazyList[SizedObject] = { 75 | Timing.measureTask("Scanning packfile for large blobs", ProgressMonitor.UNKNOWN) { 76 | val reader = objectDB.newReader 77 | objectDB.packedObjects.map { 78 | objectId => 79 | progressMonitor update 1 80 | SizedObject(objectId, reader.getObjectSize(objectId, OBJ_ANY)) 81 | }.toSeq.sorted.reverse.to(LazyList).filter { oid => 82 | oid.size > ProbablyNoNonFileObjectsOverSizeThreshold || reader.open(oid.objectId).getType == OBJ_BLOB 83 | } 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobCharsetDetector.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.google.common.io.ByteStreams 24 | import com.google.common.io.ByteStreams.toByteArray 25 | import com.madgag.git.bfg.model.TreeBlobEntry 26 | import org.eclipse.jgit.diff.RawText 27 | import org.eclipse.jgit.lib.ObjectLoader 28 | 29 | import java.nio.ByteBuffer 30 | import java.nio.charset.Charset 31 | import java.nio.charset.CodingErrorAction._ 32 | import scala.util.{Try, Using} 33 | 34 | 35 | trait BlobCharsetDetector { 36 | // should return None if this is a binary file that can not be converted to text 37 | def charsetFor(entry: TreeBlobEntry, objectLoader: ObjectLoader): Option[Charset] 38 | } 39 | 40 | 41 | object QuickBlobCharsetDetector extends BlobCharsetDetector { 42 | 43 | val CharSets: Seq[Charset] = 44 | Seq(Charset.forName("UTF-8"), Charset.defaultCharset(), Charset.forName("ISO-8859-1")).distinct 45 | 46 | def charsetFor(entry: TreeBlobEntry, objectLoader: ObjectLoader): Option[Charset] = { 47 | Using(ByteStreams.limit(objectLoader.openStream(), 8000))(toByteArray).toOption.filterNot(RawText.isBinary).flatMap { 48 | sampleBytes => 49 | val b = ByteBuffer.wrap(sampleBytes) 50 | CharSets.find(cs => Try(decode(b, cs)).isSuccess) 51 | } 52 | } 53 | 54 | private def decode(b: ByteBuffer, charset: Charset): Unit = { 55 | charset.newDecoder.onMalformedInput(REPORT).onUnmappableCharacter(REPORT).decode(b) 56 | } 57 | } 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobTextModifier.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.madgag.git.ThreadLocalObjectDatabaseResources 24 | import com.madgag.git.bfg.model.TreeBlobEntry 25 | import com.madgag.linesplitting.LineBreakPreservingIterator 26 | import org.eclipse.jgit.lib.Constants.OBJ_BLOB 27 | import org.eclipse.jgit.lib.ObjectLoader 28 | 29 | import java.io.{ByteArrayOutputStream, InputStreamReader} 30 | import java.nio.charset.Charset 31 | 32 | 33 | object BlobTextModifier { 34 | 35 | val DefaultSizeThreshold: Long = 1024 * 1024 36 | 37 | } 38 | 39 | trait BlobTextModifier extends TreeBlobModifier { 40 | 41 | val threadLocalObjectDBResources: ThreadLocalObjectDatabaseResources 42 | 43 | def lineCleanerFor(entry: TreeBlobEntry): Option[String => String] 44 | 45 | val charsetDetector: BlobCharsetDetector = QuickBlobCharsetDetector 46 | 47 | val sizeThreshold = BlobTextModifier.DefaultSizeThreshold 48 | 49 | override def fix(entry: TreeBlobEntry) = { 50 | 51 | def filterTextIn(e: TreeBlobEntry, lineCleaner: String => String): TreeBlobEntry = { 52 | def isDirty(line: String) = lineCleaner(line) != line 53 | 54 | val loader = threadLocalObjectDBResources.reader().open(e.objectId) 55 | val opt = for { 56 | charset <- charsetDetector.charsetFor(e, loader) 57 | if loader.getSize < sizeThreshold && linesFor(loader, charset).exists(isDirty) 58 | } yield { 59 | val b = new ByteArrayOutputStream(loader.getSize.toInt) 60 | linesFor(loader, charset).map(lineCleaner).foreach(line => b.write(line.getBytes(charset))) 61 | val oid = threadLocalObjectDBResources.inserter().insert(OBJ_BLOB, b.toByteArray) 62 | e.copy(objectId = oid) 63 | } 64 | 65 | opt.getOrElse(e) 66 | } 67 | 68 | lineCleanerFor(entry) match { 69 | case Some(lineCleaner) => filterTextIn(entry, lineCleaner).withoutName 70 | case None => entry.withoutName 71 | } 72 | } 73 | 74 | private def linesFor(loader: ObjectLoader, charset: Charset): Iterator[String] = { 75 | new LineBreakPreservingIterator(new InputStreamReader(loader.openStream(), charset)) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/LfsBlobConverter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.google.common.io.ByteSource 24 | import com.google.common.io.Files.createParentDirs 25 | import com.madgag.git.LFS._ 26 | import com.madgag.git._ 27 | import com.madgag.git.bfg.model._ 28 | import com.madgag.git.bfg.{MemoFunc, MemoUtil} 29 | import com.madgag.textmatching.{Glob, TextMatcher} 30 | import org.eclipse.jgit.internal.storage.file.FileRepository 31 | import org.eclipse.jgit.lib.{ObjectId, ObjectReader} 32 | 33 | import java.nio.charset.{Charset, StandardCharsets} 34 | import java.nio.file.{Files, Path} 35 | import scala.jdk.StreamConverters._ 36 | import scala.util.{Try, Using} 37 | 38 | class LfsBlobConverter( 39 | lfsGlobExpression: String, 40 | repo: FileRepository 41 | ) extends TreeBlobModifier { 42 | 43 | val lfsObjectsDir: Path = repo.getDirectory.toPath.resolve(LFS.ObjectsPath) 44 | 45 | val threadLocalObjectDBResources = repo.getObjectDatabase.threadLocalResources 46 | 47 | val lfsGlob = TextMatcher(Glob, lfsGlobExpression) 48 | 49 | val lfsSuitableFiles: (FileName => Boolean) = f => lfsGlob(f.string) 50 | 51 | val gitAttributesLine = s"$lfsGlobExpression filter=lfs diff=lfs merge=lfs -text" 52 | 53 | implicit val UTF_8: Charset = StandardCharsets.UTF_8 54 | 55 | val lfsPointerMemo = MemoUtil.concurrentCleanerMemo[ObjectId]() 56 | 57 | override def apply(dirtyBlobs: TreeBlobs) = { 58 | val cleanedBlobs = super.apply(dirtyBlobs) 59 | if (cleanedBlobs == dirtyBlobs) cleanedBlobs else ensureGitAttributesSetFor(cleanedBlobs) 60 | } 61 | 62 | def ensureGitAttributesSetFor(cleanedBlobs: TreeBlobs): TreeBlobs = { 63 | implicit lazy val inserter = threadLocalObjectDBResources.inserter() 64 | 65 | val newGitAttributesId = cleanedBlobs.entryMap.get(GitAttributesFileName).fold { 66 | storeBlob(gitAttributesLine) 67 | } { 68 | case (_, oldGitAttributesId) => 69 | val objectLoader = threadLocalObjectDBResources.reader().open(oldGitAttributesId) 70 | Using(ByteSource.wrap(objectLoader.getCachedBytes).asCharSource(UTF_8).lines()) { oldAttributesStream => 71 | val oldAttributes = oldAttributesStream.toScala(Seq) 72 | if (oldAttributes.contains(gitAttributesLine)) oldGitAttributesId else { 73 | storeBlob((oldAttributes :+ gitAttributesLine).mkString("\n")) 74 | } 75 | }.get 76 | } 77 | cleanedBlobs.copy(entryMap = cleanedBlobs.entryMap + (GitAttributesFileName -> (RegularFile, newGitAttributesId))) 78 | } 79 | 80 | override def fix(entry: TreeBlobEntry) = { 81 | val cleanId = if (lfsSuitableFiles(entry.filename)) lfsPointerBlobIdForRealBlob(entry.objectId) else entry.objectId 82 | (entry.mode, cleanId) 83 | } 84 | 85 | val lfsPointerBlobIdForRealBlob: MemoFunc[ObjectId, ObjectId] = lfsPointerMemo { blobId: ObjectId => 86 | implicit val reader = threadLocalObjectDBResources.reader() 87 | implicit lazy val inserter = threadLocalObjectDBResources.inserter() 88 | 89 | (for { 90 | blobSize <- blobId.sizeTry if blobSize > 512 91 | pointer <- tryStoringLfsFileFor(blobId) 92 | } yield storeBlob(pointer.bytes)).getOrElse(blobId) 93 | } 94 | 95 | def tryStoringLfsFileFor(blobId: ObjectId)(implicit r: ObjectReader): Try[Pointer] = { 96 | val loader = blobId.open 97 | 98 | val tmpFile: Path = Files.createTempFile(s"bfg.git-lfs.conv-${blobId.name}","dat") 99 | 100 | val pointer = pointerFor(loader, tmpFile) 101 | 102 | val lfsPath = lfsObjectsDir.resolve(pointer.path) 103 | 104 | createParentDirs(lfsPath.toFile) 105 | 106 | val ensureLfsFile = Try(if (!Files.exists(lfsPath)) Files.move(tmpFile, lfsPath)).recover { 107 | case _ if Files.exists(lfsPath) && Files.size(lfsPath) == loader.getSize => 108 | } 109 | 110 | Try(Files.deleteIfExists(tmpFile)) 111 | 112 | ensureLfsFile.map(_ => pointer) 113 | } 114 | 115 | } 116 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/ObjectIdCleaner.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.madgag.collection.concurrent.ConcurrentMultiMap 24 | import com.madgag.git._ 25 | import com.madgag.git.bfg.GitUtil._ 26 | import com.madgag.git.bfg.cleaner.protection.{ProtectedObjectCensus, ProtectedObjectDirtReport} 27 | import com.madgag.git.bfg.model.{Tree, TreeSubtrees, _} 28 | import com.madgag.git.bfg.{CleaningMapper, Memo, MemoFunc, MemoUtil} 29 | import org.eclipse.jgit.lib.Constants._ 30 | import org.eclipse.jgit.lib._ 31 | import org.eclipse.jgit.revwalk.{RevCommit, RevTag, RevWalk} 32 | 33 | object ObjectIdCleaner { 34 | 35 | case class Config(protectedObjectCensus: ProtectedObjectCensus, 36 | objectIdSubstitutor: ObjectIdSubstitutor = ObjectIdSubstitutor.OldIdsPublic, 37 | commitNodeCleaners: Seq[CommitNodeCleaner] = Seq.empty, 38 | treeEntryListCleaners: Seq[Cleaner[Seq[Tree.Entry]]] = Seq.empty, 39 | treeBlobsCleaners: Seq[Cleaner[TreeBlobs]] = Seq.empty, 40 | treeSubtreesCleaners: Seq[Cleaner[TreeSubtrees]] = Seq.empty, 41 | // messageCleaners? - covers both Tag and Commits 42 | objectChecker: Option[ObjectChecker] = None) { 43 | 44 | lazy val commitNodeCleaner = CommitNodeCleaner.chain(commitNodeCleaners) 45 | 46 | lazy val treeEntryListCleaner = Function.chain(treeEntryListCleaners) 47 | 48 | lazy val treeBlobsCleaner = Function.chain(treeBlobsCleaners) 49 | 50 | lazy val treeSubtreesCleaner:Cleaner[TreeSubtrees] = Function.chain(treeSubtreesCleaners) 51 | } 52 | 53 | } 54 | 55 | /* 56 | * Knows how to clean an object, and what objects are protected... 57 | */ 58 | class ObjectIdCleaner(config: ObjectIdCleaner.Config, objectDB: ObjectDatabase, implicit val revWalk: RevWalk) extends CleaningMapper[ObjectId] { 59 | 60 | import config._ 61 | 62 | val threadLocalResources = objectDB.threadLocalResources 63 | 64 | val changesByFilename = new ConcurrentMultiMap[FileName, (ObjectId, ObjectId)] 65 | val deletionsByFilename = new ConcurrentMultiMap[FileName, ObjectId] 66 | 67 | // want to enforce that once any value is returned, it is 'good' and therefore an identity-mapped key as well 68 | val memo: Memo[ObjectId, ObjectId] = MemoUtil.concurrentCleanerMemo(protectedObjectCensus.fixedObjectIds) 69 | 70 | val commitMemo: Memo[ObjectId, ObjectId] = MemoUtil.concurrentCleanerMemo() 71 | val tagMemo: Memo[ObjectId, ObjectId] = MemoUtil.concurrentCleanerMemo() 72 | 73 | val treeMemo: Memo[ObjectId, ObjectId] = MemoUtil.concurrentCleanerMemo(protectedObjectCensus.treeIds.toSet[ObjectId]) 74 | 75 | def apply(objectId: ObjectId): ObjectId = memoClean(objectId) 76 | 77 | val memoClean = memo { 78 | uncachedClean 79 | } 80 | 81 | def cleanedObjectMap(): Map[ObjectId, ObjectId] = 82 | Seq(memoClean, cleanCommit, cleanTag, cleanTree).map(_.asMap()).reduce(_ ++ _) 83 | 84 | def uncachedClean: (ObjectId) => ObjectId = { 85 | objectId => 86 | threadLocalResources.reader().open(objectId).getType match { 87 | case OBJ_COMMIT => cleanCommit(objectId) 88 | case OBJ_TREE => cleanTree(objectId) 89 | case OBJ_TAG => cleanTag(objectId) 90 | case _ => objectId // we don't currently clean isolated blobs... only clean within a tree context 91 | } 92 | } 93 | 94 | def getCommit(commitId: AnyObjectId): RevCommit = revWalk synchronized (commitId asRevCommit) 95 | 96 | def getTag(tagId: AnyObjectId): RevTag = revWalk synchronized (tagId asRevTag) 97 | 98 | val cleanCommit: MemoFunc[ObjectId, ObjectId] = commitMemo { commitId => 99 | val originalRevCommit = getCommit(commitId) 100 | val originalCommit = Commit(originalRevCommit) 101 | 102 | val cleanedArcs = originalCommit.arcs cleanWith this 103 | val kit = new CommitNodeCleaner.Kit(threadLocalResources, originalRevCommit, originalCommit, cleanedArcs, apply) 104 | val updatedCommitNode = commitNodeCleaner.fixer(kit)(originalCommit.node) 105 | val updatedCommit = Commit(updatedCommitNode, cleanedArcs) 106 | 107 | if (updatedCommit != originalCommit) { 108 | val commitBytes = updatedCommit.toBytes 109 | objectChecker.foreach(_.checkCommit(commitBytes)) 110 | threadLocalResources.inserter().insert(OBJ_COMMIT, commitBytes) 111 | } else { 112 | originalRevCommit 113 | } 114 | } 115 | 116 | val cleanBlob: Cleaner[ObjectId] = identity // Currently a NO-OP, we only clean at treeblob level 117 | 118 | val cleanTree: MemoFunc[ObjectId, ObjectId] = treeMemo { originalObjectId => 119 | val entries = Tree.entriesFor(originalObjectId)(threadLocalResources.reader()) 120 | val cleanedTreeEntries = treeEntryListCleaner(entries) 121 | 122 | val tree = Tree(cleanedTreeEntries) 123 | 124 | val originalBlobs = tree.blobs 125 | val fixedTreeBlobs = treeBlobsCleaner(originalBlobs) 126 | val cleanedSubtrees = TreeSubtrees(treeSubtreesCleaner(tree.subtrees).entryMap.map { 127 | case (name, treeId) => (name, cleanTree(treeId)) 128 | }).withoutEmptyTrees 129 | 130 | val treeBlobsChanged = fixedTreeBlobs != originalBlobs 131 | if (entries == cleanedTreeEntries && !treeBlobsChanged && cleanedSubtrees == tree.subtrees) originalObjectId else { 132 | if (treeBlobsChanged) recordChange(originalBlobs, fixedTreeBlobs) 133 | 134 | val updatedTree = tree copyWith(cleanedSubtrees, fixedTreeBlobs) 135 | 136 | val treeFormatter = updatedTree.formatter 137 | objectChecker.foreach(_.checkTree(treeFormatter.toByteArray)) 138 | treeFormatter.insertTo(threadLocalResources.inserter()) 139 | } 140 | } 141 | 142 | def recordChange(originalBlobs: TreeBlobs, fixedTreeBlobs: TreeBlobs): Unit = { 143 | val changedFiles: Set[TreeBlobEntry] = originalBlobs.entries.toSet -- fixedTreeBlobs.entries.toSet 144 | for (TreeBlobEntry(filename, _, oldId) <- changedFiles) { 145 | fixedTreeBlobs.objectId(filename) match { 146 | case Some(newId) => changesByFilename.addBinding(filename, (oldId, newId)) 147 | case None => deletionsByFilename.addBinding(filename, oldId) 148 | } 149 | } 150 | } 151 | 152 | case class TreeBlobChange(oldId: ObjectId, newIdOpt: Option[ObjectId], filename: FileName) 153 | 154 | val cleanTag: MemoFunc[ObjectId, ObjectId] = tagMemo { id => 155 | val originalTag = getTag(id) 156 | 157 | replacement(originalTag.getObject).map { 158 | cleanedObj => 159 | val tb = new TagBuilder 160 | tb.setTag(originalTag.getTagName) 161 | tb.setObjectId(cleanedObj, originalTag.getObject.getType) 162 | tb.setTagger(originalTag.getTaggerIdent) 163 | tb.setMessage(objectIdSubstitutor.replaceOldIds(originalTag.getFullMessage, threadLocalResources.reader(), apply)) 164 | val cleanedTag: ObjectId = threadLocalResources.inserter().insert(tb) 165 | objectChecker.foreach(_.checkTag(tb.build())) 166 | cleanedTag 167 | }.getOrElse(originalTag) 168 | } 169 | 170 | lazy val protectedDirt: Seq[ProtectedObjectDirtReport] = { 171 | protectedObjectCensus.protectorRevsByObject.map { 172 | case (protectedRevObj, refNames) => 173 | val originalContentObject = treeOrBlobPointedToBy(protectedRevObj).merge 174 | val replacementTreeOrBlob = uncachedClean.replacement(originalContentObject) 175 | ProtectedObjectDirtReport(protectedRevObj, originalContentObject, replacementTreeOrBlob) 176 | }.toList 177 | } 178 | 179 | def stats() = Map("apply"->memoClean.stats(), "tree" -> cleanTree.stats(), "commit" -> cleanCommit.stats(), "tag" -> cleanTag.stats()) 180 | 181 | } 182 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/ObjectIdSubstitutor.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.madgag.git._ 24 | import com.madgag.git.bfg.GitUtil._ 25 | import com.madgag.git.bfg.cleaner.ObjectIdSubstitutor._ 26 | import org.eclipse.jgit.lib.{AbbreviatedObjectId, ObjectId, ObjectReader} 27 | 28 | class CommitMessageObjectIdsUpdater(objectIdSubstitutor: ObjectIdSubstitutor) extends CommitNodeCleaner { 29 | 30 | override def fixer(kit: CommitNodeCleaner.Kit) = commitNode => commitNode.copy(message = objectIdSubstitutor.replaceOldIds(commitNode.message, kit.threadLocalResources.reader(), kit.mapper)) 31 | 32 | } 33 | 34 | object ObjectIdSubstitutor { 35 | 36 | object OldIdsPrivate extends ObjectIdSubstitutor { 37 | def format(oldIdText: String, newIdText: String) = newIdText 38 | } 39 | 40 | object OldIdsPublic extends ObjectIdSubstitutor { 41 | def format(oldIdText: String, newIdText: String) = s"$newIdText [formerly $oldIdText]" 42 | } 43 | 44 | val hexRegex = """\b\p{XDigit}{10,40}\b""".r // choose minimum size based on size of project?? 45 | 46 | } 47 | 48 | trait ObjectIdSubstitutor { 49 | 50 | def format(oldIdText: String, newIdText: String): String 51 | 52 | // slow! 53 | def replaceOldIds(message: String, reader: ObjectReader, mapper: Cleaner[ObjectId]): String = { 54 | val substitutionOpts = for { 55 | m: String <- hexRegex.findAllIn(message).toSet 56 | objectId <- reader.resolveExistingUniqueId(AbbreviatedObjectId.fromString(m)).toOption 57 | } yield mapper.replacement(objectId).map(newId => m -> format(m, reader.abbreviate(newId, m.length).name)) 58 | 59 | val substitutions = substitutionOpts.flatten.toMap 60 | if (substitutions.isEmpty) message else hexRegex.replaceSomeIn(message, m => substitutions.get(m.matched)) 61 | } 62 | } -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/RepoRewriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.madgag.git._ 24 | import com.madgag.git.bfg.Timing 25 | import org.eclipse.jgit.lib.{ObjectId, ProgressMonitor, RefDatabase} 26 | import org.eclipse.jgit.revwalk.RevSort._ 27 | import org.eclipse.jgit.revwalk.{RevCommit, RevWalk} 28 | import org.eclipse.jgit.transport.ReceiveCommand 29 | 30 | import scala.jdk.CollectionConverters._ 31 | import scala.collection.parallel.CollectionConverters._ 32 | import scala.concurrent.ExecutionContext.Implicits.global 33 | import scala.concurrent.Future 34 | 35 | /* 36 | Encountering a blob -> 37 | BIG-BLOB-DELETION : Either 'good' or 'delete' - or possibly replace, with a different filename (means tree-level) 38 | PASSWORD-REMOVAL : Either 'good' or 'replace' 39 | 40 | Encountering a tree -> 41 | BIG-BLOB-DELETION : Either 'good' or 'replace' - possibly adding with a different placeholder blob entry 42 | PASSWORD-REMOVAL : Either 'good' or 'replace' - replacing one blob entry with another 43 | 44 | So if we encounter a tree, we are unlikely to want to remove that tree entirely... 45 | SHOULD WE JUST DISALLOW THAT? 46 | Obviously, a Commit HAS to have a tree, so it's dangerous to allow a None response to tree transformation 47 | 48 | An objectId must be either GOOD or BAD, and we must never translate *either* kind of id into a BAD 49 | 50 | User-customisation interface: TreeBlobs => TreeBlobs 51 | 52 | User gets no say in adding, renaming, removing directories 53 | 54 | TWO MAIN USE CASES FOR HISTORY-CHANGING ARE: 55 | 1: GETTING RID OF BIG BLOBS 56 | 2: REMOVING PASSWORDS IN HISTORICAL FILES 57 | 58 | possible other use-case: fixing committer names - and possibly removing passwords from commits? (could possibly just be done with rebase) 59 | 60 | Why else would you want to rewrite HISTORY? Many other changes (ie putting a directory one down) need only be applied 61 | in a new commit, we don't care about history. 62 | 63 | When updating a Tree, the User has no right to muck with sub-trees. They can only alter the blob contents. 64 | */ 65 | 66 | object RepoRewriter { 67 | 68 | def rewrite(repo: org.eclipse.jgit.lib.Repository, objectIdCleanerConfig: ObjectIdCleaner.Config): Map[ObjectId, ObjectId] = { 69 | implicit val refDatabase: RefDatabase = repo.getRefDatabase 70 | 71 | assert(refDatabase.hasRefs, "Can't find any refs in repo at " + repo.getDirectory.getAbsolutePath) 72 | 73 | val reporter: Reporter = new CLIReporter(repo) 74 | implicit val progressMonitor: ProgressMonitor = reporter.progressMonitor 75 | 76 | val allRefs = refDatabase.getRefs().asScala 77 | 78 | def createRevWalk: RevWalk = { 79 | 80 | val revWalk = new RevWalk(repo) 81 | 82 | revWalk.sort(TOPO) // crucial to ensure we visit parents BEFORE children, otherwise blow stack 83 | revWalk.sort(REVERSE, true) // we want to start with the earliest commits and work our way up... 84 | 85 | val startCommits = allRefs.map(_.targetObjectId.asRevObject(revWalk)).collect { case c: RevCommit => c } 86 | 87 | revWalk.markStart(startCommits.asJavaCollection) 88 | revWalk 89 | } 90 | 91 | implicit val revWalk = createRevWalk 92 | implicit val reader = revWalk.getObjectReader 93 | 94 | reporter.reportRefsForScan(allRefs) 95 | 96 | reporter.reportObjectProtection(objectIdCleanerConfig)(repo.getObjectDatabase, revWalk) 97 | 98 | val objectIdCleaner = new ObjectIdCleaner(objectIdCleanerConfig, repo.getObjectDatabase, revWalk) 99 | 100 | val commits = revWalk.asScala.toSeq 101 | 102 | def clean(commits: Seq[RevCommit]): Unit = { 103 | reporter.reportCleaningStart(commits) 104 | 105 | Timing.measureTask("Cleaning commits", commits.size) { 106 | Future { 107 | commits.par.foreach { 108 | commit => objectIdCleaner(commit.getTree) 109 | } 110 | } 111 | 112 | commits.foreach { 113 | commit => 114 | objectIdCleaner(commit) 115 | progressMonitor update 1 116 | } 117 | } 118 | } 119 | 120 | def updateRefsWithCleanedIds(): Unit = { 121 | val refUpdateCommands = for (ref <- repo.nonSymbolicRefs; 122 | (oldId, newId) <- objectIdCleaner.substitution(ref.getObjectId) 123 | ) yield new ReceiveCommand(oldId, newId, ref.getName) 124 | 125 | if (refUpdateCommands.isEmpty) { 126 | println("\nBFG aborting: No refs to update - no dirty commits found??\n") 127 | } else { 128 | reporter.reportRefUpdateStart(refUpdateCommands) 129 | 130 | Timing.measureTask("...Ref update", refUpdateCommands.size) { 131 | // Hack a fix for issue #23 : Short-cut the calculation that determines an update is NON-FF 132 | val quickMergeCalcRevWalk = new RevWalk(revWalk.getObjectReader) { 133 | override def isMergedInto(base: RevCommit, tip: RevCommit) = 134 | if (tip == objectIdCleaner(base)) false else super.isMergedInto(base, tip) 135 | } 136 | 137 | refDatabase.newBatchUpdate.setAllowNonFastForwards(true).addCommand(refUpdateCommands.asJavaCollection) 138 | .execute(quickMergeCalcRevWalk, progressMonitor) 139 | } 140 | 141 | reporter.reportResults(commits, objectIdCleaner) 142 | } 143 | } 144 | 145 | 146 | clean(commits) 147 | 148 | updateRefsWithCleanedIds() 149 | 150 | objectIdCleaner.stats() 151 | 152 | objectIdCleaner.cleanedObjectMap() 153 | } 154 | 155 | } 156 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/Reporter.scala: -------------------------------------------------------------------------------- 1 | package com.madgag.git.bfg.cleaner 2 | 3 | import com.google.common.io.Files.asCharSink 4 | import com.madgag.collection.concurrent.ConcurrentMultiMap 5 | import com.madgag.git._ 6 | import com.madgag.git.bfg.cleaner.Reporter.dump 7 | import com.madgag.git.bfg.cleaner.protection.{ProtectedObjectCensus, ProtectedObjectDirtReport} 8 | import com.madgag.git.bfg.model.FileName 9 | import com.madgag.text.Text._ 10 | import com.madgag.text.{ByteSize, Tables, Text} 11 | import org.eclipse.jgit.diff.DiffEntry.ChangeType._ 12 | import org.eclipse.jgit.diff._ 13 | import org.eclipse.jgit.lib.FileMode._ 14 | import org.eclipse.jgit.lib._ 15 | import org.eclipse.jgit.revwalk.{RevCommit, RevWalk} 16 | import org.eclipse.jgit.transport.ReceiveCommand 17 | 18 | import java.nio.charset.StandardCharsets.UTF_8 19 | import java.nio.file.Files.createDirectories 20 | import java.nio.file.Path 21 | import java.time.ZonedDateTime 22 | import java.time.format.DateTimeFormatter 23 | import scala.collection.immutable.SortedMap 24 | import scala.jdk.CollectionConverters._ 25 | 26 | 27 | object Reporter { 28 | def dump(path: Path, iter: Iterable[String]): Unit = { 29 | val sink = asCharSink(path.toFile, UTF_8) 30 | 31 | sink.writeLines(iter.asJava, "\n") 32 | } 33 | } 34 | 35 | trait Reporter { 36 | 37 | val progressMonitor: ProgressMonitor 38 | 39 | def reportRefsForScan(allRefs: Iterable[Ref])(implicit objReader: ObjectReader): Unit 40 | 41 | def reportRefUpdateStart(refUpdateCommands: Iterable[ReceiveCommand]): Unit 42 | 43 | def reportObjectProtection(objectIdCleanerConfig: ObjectIdCleaner.Config)(implicit objectDB: ObjectDatabase, revWalk: RevWalk): Unit 44 | 45 | def reportCleaningStart(commits: Seq[RevCommit]): Unit 46 | 47 | def reportResults(commits: Seq[RevCommit], objectIdCleaner: ObjectIdCleaner): Unit 48 | } 49 | 50 | class CLIReporter(repo: Repository) extends Reporter { 51 | 52 | lazy val reportsDir: Path = { 53 | val now = ZonedDateTime.now() 54 | 55 | val topDirPath = repo.topDirectory.toPath.toAbsolutePath 56 | 57 | val reportsDir = topDirPath.resolveSibling(s"${topDirPath.getFileName}.bfg-report") 58 | 59 | val dateFormatter = DateTimeFormatter.ofPattern("uuuu-MM-dd") 60 | val timeFormatter = DateTimeFormatter.ofPattern("HH-mm-ss") 61 | 62 | val dir = reportsDir.resolve(now.format(dateFormatter)).resolve(now.format(timeFormatter)) 63 | 64 | createDirectories(dir) 65 | dir 66 | } 67 | 68 | lazy val progressMonitor = new TextProgressMonitor 69 | 70 | def reportRefUpdateStart(refUpdateCommands: Iterable[ReceiveCommand]): Unit = { 71 | println(title("Updating " + plural(refUpdateCommands, "Ref"))) 72 | 73 | val summaryTableCells = refUpdateCommands.map(update => (update.getRefName, update.getOldId.shortName, update.getNewId.shortName)) 74 | 75 | Tables.formatTable(("Ref", "Before", "After"), summaryTableCells.toSeq).map("\t" + _).foreach(println) 76 | 77 | println() 78 | } 79 | 80 | def reportRefsForScan(allRefs: Iterable[Ref])(implicit objReader: ObjectReader): Unit = { 81 | val refsByObjType = allRefs.groupBy { 82 | ref => objReader.open(ref.getObjectId).getType 83 | } withDefault Seq.empty 84 | 85 | refsByObjType.foreach { 86 | case (typ, refs) => println("Found " + refs.size + " " + Constants.typeString(typ) + "-pointing refs : " + abbreviate(refs.map(_.getName).toSeq, "...", 4).mkString(", ")) 87 | } 88 | } 89 | 90 | 91 | // abort due to Dirty Tips on Private run - user needs to manually clean 92 | // warn due to Dirty Tips on Public run - it's not so serious if users publicise dirty tips. 93 | // if no protection 94 | def reportObjectProtection(objectIdCleanerConfig: ObjectIdCleaner.Config)(implicit objectDB: ObjectDatabase, revWalk: RevWalk): Unit = { 95 | println(title("Protected commits")) 96 | 97 | if (objectIdCleanerConfig.protectedObjectCensus.isEmpty) { 98 | println("You're not protecting any commits, which means the BFG will modify the contents of even *current* commits.\n\n" + 99 | "This isn't recommended - ideally, if your current commits are dirty, you should fix up your working copy and " + 100 | "commit that, check that your build still works, and only then run the BFG to clean up your history.") 101 | } else { 102 | println("These are your protected commits, and so their contents will NOT be altered:\n") 103 | 104 | val unprotectedConfig = objectIdCleanerConfig.copy(protectedObjectCensus = ProtectedObjectCensus.None) 105 | 106 | reportProtectedCommitsAndTheirDirt(objectIdCleanerConfig) 107 | } 108 | } 109 | 110 | case class DiffSideDetails(id: ObjectId, path: String, mode: FileMode, size: Option[Long]) 111 | 112 | def reportProtectedCommitsAndTheirDirt(objectIdCleanerConfig: ObjectIdCleaner.Config)(implicit objectDB: ObjectDatabase, revWalk: RevWalk): Unit = { 113 | implicit val reader = revWalk.getObjectReader 114 | 115 | def diffDetails(d: DiffEntry) = { 116 | val side = DiffEntry.Side.OLD 117 | val id: ObjectId = d.getId(side).toObjectId 118 | DiffSideDetails(id, d.getPath(side), d.getMode(side), id.sizeOpt) 119 | } 120 | 121 | def fileInfo(d: DiffSideDetails) = { 122 | val extraInfo = (d.mode match { 123 | case GITLINK => Some("submodule") 124 | case _ => d.size.map(ByteSize.format(_)) 125 | }).map(e => s"($e)") 126 | 127 | (d.path +: extraInfo.toSeq).mkString(" ") 128 | } 129 | 130 | val protectedDirtDir = reportsDir.resolve("protected-dirt") 131 | createDirectories(protectedDirtDir) 132 | 133 | val reports = ProtectedObjectDirtReport.reportsFor(objectIdCleanerConfig, objectDB) 134 | 135 | reports.foreach { 136 | report => 137 | val protectorRevs = objectIdCleanerConfig.protectedObjectCensus.protectorRevsByObject(report.revObject) 138 | val objectTitle = s" * ${report.revObject.typeString} ${report.revObject.shortName} (protected by '${protectorRevs.mkString("', '")}')" 139 | 140 | report.dirt match { 141 | case None => println(objectTitle) 142 | case Some(diffEntries) => 143 | if (diffEntries.isEmpty) { 144 | println(objectTitle + " - dirty") 145 | } else { 146 | println(objectTitle + " - contains " + plural(diffEntries, "dirty file") + " : ") 147 | abbreviate(diffEntries.view.map(diffDetails).map(fileInfo), "...").foreach { 148 | dirtyFile => println("\t- " + dirtyFile) 149 | } 150 | 151 | val protectorRefsFileNameSafe: String = protectorRevs.mkString("_").replace( 152 | protectedDirtDir.getFileSystem.getSeparator, 153 | "-" 154 | ) 155 | val diffFile = protectedDirtDir.resolve(s"${report.revObject.shortName}-$protectorRefsFileNameSafe.csv") 156 | 157 | dump(diffFile, diffEntries.map { 158 | diffEntry => 159 | val de = diffDetails(diffEntry) 160 | 161 | val modifiedLines = if (diffEntry.getChangeType == MODIFY) diffEntry.editList.map(changedLinesFor) else None 162 | 163 | val elems = Seq(de.id.name, diffEntry.getChangeType.name, de.mode.name, de.path, de.size.getOrElse(""), modifiedLines.getOrElse("")) 164 | 165 | elems.mkString(",") 166 | }) 167 | } 168 | } 169 | } 170 | 171 | val dirtyReports = reports.filter(_.objectProtectsDirt) 172 | if (dirtyReports.nonEmpty) { 173 | 174 | println(s""" 175 | |WARNING: The dirty content above may be removed from other commits, but as 176 | |the *protected* commits still use it, it will STILL exist in your repository. 177 | | 178 | |Details of protected dirty content have been recorded here : 179 | | 180 | |${protectedDirtDir.toAbsolutePath.toString + protectedDirtDir.getFileSystem.getSeparator} 181 | | 182 | |If you *really* want this content gone, make a manual commit that removes it, 183 | |and then run the BFG on a fresh copy of your repo. 184 | """.stripMargin) 185 | // TODO would like to abort here if we are cleaning 'private' data. 186 | } 187 | } 188 | 189 | def changedLinesFor(edits: EditList): String = { 190 | edits.asScala.map { 191 | edit => Seq(edit.getBeginA + 1, edit.getEndA).distinct.mkString("-") 192 | }.mkString(";") 193 | } 194 | 195 | def reportCleaningStart(commits: Seq[RevCommit]): Unit = { 196 | println(title("Cleaning")) 197 | println("Found " + commits.size + " commits") 198 | } 199 | 200 | def reportResults(commits: Seq[RevCommit], objectIdCleaner: ObjectIdCleaner): Unit = { 201 | def reportTreeDirtHistory(): Unit = { 202 | 203 | val dirtHistoryElements = math.max(20, math.min(60, commits.size)) 204 | def cut[A](xs: Seq[A], n: Int) = { 205 | val avgSize = xs.size.toFloat / n 206 | def startOf(unit: Int): Int = math.round(unit * avgSize) 207 | (0 until n).view.map(u => xs.slice(startOf(u), startOf(u + 1))) 208 | } 209 | val treeDirtHistory = cut(commits, dirtHistoryElements).map { 210 | case commits if commits.isEmpty => ' ' 211 | case commits if (commits.exists(c => objectIdCleaner.isDirty(c.getTree))) => 'D' 212 | case commits if (commits.exists(objectIdCleaner.isDirty)) => 'm' 213 | case _ => '.' 214 | }.mkString 215 | def leftRight(markers: Seq[String]) = markers.mkString(" " * (treeDirtHistory.length - markers.map(_.size).sum)) 216 | println(title("Commit Tree-Dirt History")) 217 | println("\t" + leftRight(Seq("Earliest", "Latest"))) 218 | println("\t" + leftRight(Seq("|", "|"))) 219 | println("\t" + treeDirtHistory) 220 | println("\n\tD = dirty commits (file tree fixed)") 221 | println("\tm = modified commits (commit message or parents changed)") 222 | println("\t. = clean commits (no changes to file tree)\n") 223 | 224 | val firstModifiedCommit = commits.find(objectIdCleaner.isDirty).map(_ -> "First modified commit") 225 | val lastDirtyCommit = commits.reverse.find(c => objectIdCleaner.isDirty(c.getTree)).map(_ -> "Last dirty commit") 226 | val items = for { 227 | (commit, desc) <- firstModifiedCommit ++ lastDirtyCommit 228 | (before, after) <- objectIdCleaner.substitution(commit) 229 | } yield (desc, before.shortName, after.shortName) 230 | Tables.formatTable(("", "Before", "After"), items.toSeq).map("\t" + _).foreach(println) 231 | } 232 | 233 | reportTreeDirtHistory() 234 | 235 | lazy val mapFile: Path = reportsDir.resolve("object-id-map.old-new.txt") 236 | lazy val cacheStatsFile: Path = reportsDir.resolve("cache-stats.txt") 237 | 238 | val changedIds = objectIdCleaner.cleanedObjectMap() 239 | 240 | def reportFiles[FI]( 241 | fileData: ConcurrentMultiMap[FileName, FI], 242 | actionType: String, 243 | tableTitles: Product 244 | )(f: ((FileName,Set[FI])) => Product)(fi: FI => Seq[String]): Unit = { 245 | implicit val fileNameOrdering = Ordering[String].on[FileName](_.string) 246 | 247 | val dataByFilename = SortedMap[FileName, Set[FI]](fileData.toMap.toSeq: _*) 248 | if (dataByFilename.nonEmpty) { 249 | println(title(s"$actionType files")) 250 | Tables.formatTable(tableTitles, dataByFilename.map(f).toSeq).map("\t" + _).foreach(println) 251 | 252 | val actionFile = reportsDir.resolve(s"${actionType.toLowerCase}-files.txt") 253 | 254 | dump(actionFile, dataByFilename.flatMap { 255 | case (filename, changes) => changes.map(fi.andThen(fid => (fid :+ filename).mkString(" "))) 256 | }) 257 | } 258 | } 259 | 260 | reportFiles(objectIdCleaner.changesByFilename, "Changed", ("Filename", "Before & After")) { 261 | case (filename, changes) => (filename, Text.abbreviate(changes.map {case (oldId, newId) => oldId.shortName+" ⇒ "+newId.shortName}, "...").mkString(", ")) 262 | } { case (oldId, newId) => Seq(oldId.name, newId.name) } 263 | 264 | implicit val reader = objectIdCleaner.threadLocalResources.reader() 265 | 266 | reportFiles(objectIdCleaner.deletionsByFilename, "Deleted", ("Filename", "Git id")) { 267 | case (filename, oldIds) => (filename, Text.abbreviate(oldIds.map(oldId => oldId.shortName + oldId.sizeOpt.map(size => s" (${ByteSize.format(size)})").mkString), "...").mkString(", ")) 268 | } { oldId => Seq(oldId.name, oldId.sizeOpt.mkString) } 269 | 270 | println(s"\n\nIn total, ${changedIds.size} object ids were changed. Full details are logged here:\n\n\t$reportsDir") 271 | 272 | dump(mapFile,SortedMap[AnyObjectId, ObjectId](changedIds.toSeq: _*).view.map { case (o,n) => s"${o.name} ${n.name}"}) 273 | 274 | dump(cacheStatsFile,objectIdCleaner.stats().map(_.toString())) 275 | 276 | println("\nBFG run is complete! When ready, run: git reflog expire --expire=now --all && git gc --prune=now --aggressive") 277 | 278 | } 279 | 280 | def title(text: String) = s"\n$text\n" + ("-" * text.size) + "\n" 281 | } -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/TreeBlobModifier.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.madgag.git.bfg.MemoUtil 24 | import com.madgag.git.bfg.model.{TreeBlobEntry, _} 25 | import org.eclipse.jgit.lib.ObjectId 26 | 27 | trait TreeBlobModifier extends Cleaner[TreeBlobs] { 28 | 29 | val memoisedCleaner: Cleaner[TreeBlobEntry] = MemoUtil.concurrentCleanerMemo[TreeBlobEntry](Set.empty) { 30 | entry => 31 | val (mode, objectId) = fix(entry) 32 | TreeBlobEntry(entry.filename, mode, objectId) 33 | } 34 | 35 | def fix(entry: TreeBlobEntry): (BlobFileMode, ObjectId) // implementing code can not safely know valid filename 36 | 37 | override def apply(treeBlobs: TreeBlobs) = treeBlobs.entries.map(memoisedCleaner) 38 | 39 | } 40 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/commits.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.madgag.git.ThreadLocalObjectDatabaseResources 24 | import com.madgag.git.bfg.model._ 25 | import org.eclipse.jgit.lib._ 26 | import org.eclipse.jgit.revwalk.RevCommit 27 | 28 | object CommitNodeCleaner { 29 | 30 | class Kit(val threadLocalResources: ThreadLocalObjectDatabaseResources, 31 | val originalRevCommit: RevCommit, 32 | val originalCommit: Commit, 33 | val updatedArcs: CommitArcs, 34 | val mapper: Cleaner[ObjectId]) { 35 | 36 | val arcsChanged = originalCommit.arcs != updatedArcs 37 | 38 | def commitIsChanged(withThisNode: CommitNode) = arcsChanged || originalCommit.node != withThisNode 39 | } 40 | 41 | def chain(cleaners: Seq[CommitNodeCleaner]) = new CommitNodeCleaner { 42 | def fixer(kit: CommitNodeCleaner.Kit) = Function.chain(cleaners.map(_.fixer(kit))) 43 | } 44 | } 45 | 46 | trait CommitNodeCleaner { 47 | def fixer(kit: CommitNodeCleaner.Kit): Cleaner[CommitNode] 48 | } 49 | 50 | object FormerCommitFooter extends CommitNodeCleaner { 51 | val Key = "Former-commit-id" 52 | 53 | override def fixer(kit: CommitNodeCleaner.Kit) = modifyIf(kit.commitIsChanged) { 54 | _ add Footer(Key, kit.originalRevCommit.name) 55 | } 56 | 57 | def modifyIf[A](predicate: A => Boolean)(modifier: A => A): (A => A) = v => if (predicate(v)) modifier(v) else v 58 | } 59 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/kit/BlobInserter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner.kit 22 | 23 | import java.io.InputStream 24 | 25 | import org.eclipse.jgit.lib.Constants._ 26 | import org.eclipse.jgit.lib.{ObjectId, ObjectInserter} 27 | 28 | class BlobInserter(objectInserter: ObjectInserter) { 29 | def insert(data: Array[Byte]): ObjectId = objectInserter.insert(OBJ_BLOB, data) 30 | 31 | def insert(length: Long, in: InputStream): ObjectId = objectInserter.insert(OBJ_BLOB, length, in) 32 | } 33 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg 22 | 23 | package object cleaner { 24 | type Cleaner[V] = V => V 25 | } 26 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/protection/ProtectedObjectCensus.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner.protection 22 | 23 | import com.madgag.git._ 24 | import com.madgag.scala.collection.decorators._ 25 | import org.eclipse.jgit.lib.{ObjectId, Repository} 26 | import org.eclipse.jgit.revwalk._ 27 | 28 | /** 29 | * PROTECTING TREES : 30 | * Want to leave the tree unchanged for all commits at the tips of refs the user thinks are important. 31 | * What if you think a Tag is important? Or a tree? 32 | * 33 | * If a tag points to a: 34 | * - commit - that commit may change, but it's tree must stay the same 35 | * - tree - who the fuck tags tree anyway? if I've been asked to protect it, that suggests that it's supposed to be inviolate 36 | * - blob - that blob will continue to be referenced by the repo, not disappear, but not be cleaned either, as we currently clean at TreeBlob level 37 | * 38 | * We can take a shortcut here by just pushing all hallowed trees straight into the memo collection 39 | * This does mean that we will never notice, or be able to report, if somebody sets a rule that 'cleans' (alters) a hallowed tree 40 | * It might also have somewhat unexpected consequences if someone hallows a very 'simple' directory that occurs often 41 | * 42 | * 43 | * PROTECTING BLOBS : 44 | * If a user wants to protect the tip of a ref, all blobs will be retained. There is no space-saving or secrets-kept 45 | * by deleting, tampering with those blobs elsewhere. And if you have some big-old blob like a jar that you have 46 | * used consistently throughout the history of your project, it benefits no-one to remove it- in fact it's actively 47 | * harmful. 48 | * 49 | * We explicitly protect blobs (rather than just allowing them to fall under the protection given to Trees) precisely 50 | * because these blobs may historically have existed in other directories (trees) that did not appear in the 51 | * protected tips, and so would not be protected by Tree protection. 52 | * 53 | * 54 | * PROTECTING TAGS & COMMITS : 55 | * This just means protecting the Trees & Blobs under those Tags and Commits, as specified above. Changing other 56 | * state - such as the message, or author, or referenced commit Ids (and consequently the object Id of the target 57 | * object itself) is very much up for grabs. I gotta change your history, or I've no business being here. 58 | */ 59 | object ProtectedObjectCensus { 60 | 61 | val None = ProtectedObjectCensus() 62 | 63 | def apply(revisions: Set[String])(implicit repo: Repository): ProtectedObjectCensus = { 64 | 65 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple 66 | 67 | val objectProtection = revisions.groupBy { revision => 68 | Option(repo.resolve(revision)).getOrElse { throw new IllegalArgumentException( 69 | s"Couldn't find '$revision' in ${repo.topDirectory.getAbsolutePath} - are you sure that exists?" 70 | )}.asRevObject 71 | } 72 | 73 | // blobs come from direct blob references and tag references 74 | // trees come from direct tree references, commit & tag references 75 | 76 | val treeAndBlobProtection = objectProtection.keys.groupUp(treeOrBlobPointedToBy)(_.toSet) // use Either? 77 | 78 | val directBlobProtection = treeAndBlobProtection collect { 79 | case (Left(blob), p) => blob.getId -> p 80 | } 81 | val treeProtection = treeAndBlobProtection collect { 82 | case (Right(tree), p) => tree -> p 83 | } 84 | val indirectBlobProtection = treeProtection.keys.flatMap(tree => allBlobsUnder(tree).map(_ -> tree)).groupUp(_._1)(_.map(_._2).toSet) 85 | 86 | ProtectedObjectCensus(objectProtection, treeProtection, directBlobProtection, indirectBlobProtection) 87 | } 88 | } 89 | 90 | case class ProtectedObjectCensus(protectorRevsByObject: Map[RevObject, Set[String]] = Map.empty, 91 | treeProtection: Map[RevTree, Set[RevObject]] = Map.empty, 92 | directBlobProtection: Map[ObjectId, Set[RevObject]] = Map.empty, 93 | indirectBlobProtection: Map[ObjectId, Set[RevTree]] = Map.empty) { 94 | 95 | val isEmpty = protectorRevsByObject.isEmpty 96 | 97 | lazy val blobIds: Set[ObjectId] = directBlobProtection.keySet ++ indirectBlobProtection.keySet 98 | 99 | lazy val treeIds = treeProtection.keySet 100 | 101 | // blobs only for completeness here 102 | lazy val fixedObjectIds: Set[ObjectId] = treeIds ++ blobIds 103 | } 104 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/protection/ProtectedObjectDirtReport.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner.protection 22 | 23 | import com.madgag.git._ 24 | import com.madgag.git.bfg.GitUtil._ 25 | import com.madgag.git.bfg.cleaner.ObjectIdCleaner 26 | import org.eclipse.jgit.diff.DiffEntry 27 | import org.eclipse.jgit.diff.DiffEntry.ChangeType._ 28 | import org.eclipse.jgit.lib.{ObjectDatabase, ObjectId} 29 | import org.eclipse.jgit.revwalk.{RevObject, RevWalk} 30 | import org.eclipse.jgit.treewalk.TreeWalk 31 | import org.eclipse.jgit.treewalk.filter.TreeFilter 32 | 33 | import scala.jdk.CollectionConverters._ 34 | 35 | object ProtectedObjectDirtReport { 36 | def reportsFor(objectIdCleanerConfig: ObjectIdCleaner.Config, objectDB: ObjectDatabase)(implicit revWalk: RevWalk) = { 37 | val uncaringCleaner: ObjectIdCleaner = new ObjectIdCleaner( 38 | objectIdCleanerConfig.copy(protectedObjectCensus = ProtectedObjectCensus.None), 39 | objectDB, 40 | revWalk 41 | ) 42 | 43 | for (protectedRevObj <- objectIdCleanerConfig.protectedObjectCensus.protectorRevsByObject.keys) yield { 44 | val originalContentTreeOrBlob = treeOrBlobPointedToBy(protectedRevObj) 45 | val replacementTreeOrBlob = originalContentTreeOrBlob.fold(uncaringCleaner.cleanBlob.replacement, uncaringCleaner.cleanTree.replacement) 46 | ProtectedObjectDirtReport(protectedRevObj, originalContentTreeOrBlob.merge, replacementTreeOrBlob) 47 | } 48 | } 49 | } 50 | 51 | /** 52 | * The function of the ProtectedObjectDirtReport is tell the user that this is the stuff they've decided 53 | * to protect in their latest commits - it's the stuff The BFG /would/ remove if you hadn't told it to 54 | * hold back, 55 | * 56 | * @param revObject - the protected object (eg protected because it is the HEAD commit, or even by additional refs) 57 | * @param originalTreeOrBlob - the unmodified content-object referred to by the protected object (may be same object) 58 | * @param replacementTreeOrBlob - an option, populated if cleaning creates a replacement for the content-object 59 | */ 60 | case class ProtectedObjectDirtReport(revObject: RevObject, originalTreeOrBlob: RevObject, replacementTreeOrBlob: Option[ObjectId]) { 61 | val objectProtectsDirt: Boolean = replacementTreeOrBlob.isDefined 62 | 63 | def dirt(implicit revWalk: RevWalk): Option[Seq[DiffEntry]] = replacementTreeOrBlob.map { newId => 64 | val tw = new TreeWalk(revWalk.getObjectReader) 65 | tw.setRecursive(true) 66 | tw.reset 67 | 68 | tw.addTree(originalTreeOrBlob.asRevTree) 69 | tw.addTree(newId.asRevTree) 70 | tw.setFilter(TreeFilter.ANY_DIFF) 71 | DiffEntry.scan(tw).asScala.filterNot(_.getChangeType == ADD).toSeq 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/treeblobs.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.madgag.git.bfg.cleaner.kit.BlobInserter 24 | import com.madgag.git.bfg.model.FileName.ImplicitConversions._ 25 | import com.madgag.git.bfg.model.{TreeBlobEntry, _} 26 | import com.madgag.textmatching.TextMatcher 27 | import org.eclipse.jgit.lib.ObjectId 28 | 29 | class FileDeleter(fileNameMatcher: TextMatcher) extends Cleaner[TreeBlobs] { 30 | override def apply(tbs: TreeBlobs) = tbs.entries.filterNot(e => fileNameMatcher(e.filename)) 31 | } 32 | 33 | class BlobRemover(blobIds: Set[ObjectId]) extends Cleaner[TreeBlobs] { 34 | override def apply(treeBlobs: TreeBlobs) = treeBlobs.entries.filter(e => !blobIds.contains(e.objectId)) 35 | } 36 | 37 | class BlobReplacer(badBlobs: Set[ObjectId], blobInserter: => BlobInserter) extends Cleaner[TreeBlobs] { 38 | override def apply(treeBlobs: TreeBlobs) = treeBlobs.entries.map { 39 | case e if badBlobs.contains(e.objectId) => 40 | TreeBlobEntry(FileName(e.filename + ".REMOVED.git-id"), RegularFile, blobInserter.insert(e.objectId.name.getBytes)) 41 | case e => e 42 | } 43 | } 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/memo.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg 22 | 23 | import scala.jdk.CollectionConverters._ 24 | import com.google.common.cache.{CacheBuilder, CacheLoader, CacheStats, LoadingCache} 25 | import com.madgag.git.bfg.cleaner._ 26 | 27 | trait Memo[K, V] { 28 | def apply(z: K => V): MemoFunc[K, V] 29 | } 30 | 31 | trait MemoFunc[K,V] extends (K => V) { 32 | def asMap(): Map[K,V] 33 | 34 | def stats(): CacheStats 35 | } 36 | 37 | object MemoUtil { 38 | 39 | def memo[K, V](f: (K => V) => MemoFunc[K, V]): Memo[K, V] = new Memo[K, V] { 40 | def apply(z: K => V) = f(z) 41 | } 42 | 43 | /** 44 | * 45 | * A caching wrapper for a function (V => V), backed by a no-eviction LoadingCache from Google Collections. 46 | */ 47 | def concurrentCleanerMemo[V](fixedEntries: Set[V] = Set.empty[V]): Memo[V, V] = { 48 | memo[V, V] { 49 | (f: Cleaner[V]) => 50 | lazy val permanentCache = loaderCacheFor(f)(fix) 51 | 52 | def fix(v: V): Unit = { 53 | // enforce that once any value is returned, it is 'good' and therefore an identity-mapped key as well 54 | permanentCache.put(v, v) 55 | } 56 | 57 | fixedEntries foreach fix 58 | 59 | new MemoFunc[V, V] { 60 | def apply(k: V) = permanentCache.get(k) 61 | 62 | def asMap() = permanentCache.asMap().asScala.view.filter { 63 | case (oldId, newId) => newId != oldId 64 | }.toMap 65 | 66 | override def stats(): CacheStats = permanentCache.stats() 67 | } 68 | } 69 | } 70 | 71 | def loaderCacheFor[K, V](calc: K => V)(postCalc: V => Unit): LoadingCache[K, V] = 72 | CacheBuilder.newBuilder.asInstanceOf[CacheBuilder[K, V]].recordStats().build(new CacheLoader[K, V] { 73 | def load(key: K): V = { 74 | val v = calc(key) 75 | postCalc(v) 76 | v 77 | } 78 | }) 79 | } 80 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/model/Commit.scala: -------------------------------------------------------------------------------- 1 | package com.madgag.git.bfg.model 2 | 3 | import com.madgag.git._ 4 | import com.madgag.git.bfg.cleaner._ 5 | import org.eclipse.jgit.lib.Constants.OBJ_COMMIT 6 | import org.eclipse.jgit.lib._ 7 | import org.eclipse.jgit.revwalk.RevCommit 8 | 9 | import java.nio.charset.StandardCharsets.UTF_8 10 | import java.nio.charset.{Charset, IllegalCharsetNameException, UnsupportedCharsetException} 11 | import scala.jdk.CollectionConverters._ 12 | 13 | /* 14 | * Copyright (c) 2012, 2013 Roberto Tyley 15 | * 16 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 17 | * or troublesome blobs from Git repositories. 18 | * 19 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 20 | * it under the terms of the GNU General Public License as published by 21 | * the Free Software Foundation, either version 3 of the License, or 22 | * (at your option) any later version. 23 | * 24 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 25 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 26 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 27 | * GNU General Public License for more details. 28 | * 29 | * You should have received a copy of the GNU General Public License 30 | * along with this program. If not, see http://www.gnu.org/licenses/ . 31 | */ 32 | 33 | 34 | object Commit { 35 | def apply(revCommit: RevCommit): Commit = Commit(CommitNode(revCommit), revCommit.arcs) 36 | } 37 | 38 | case class Commit(node: CommitNode, arcs: CommitArcs) { 39 | def toBytes: Array[Byte] = { 40 | val c = new CommitBuilder 41 | c.setParentIds(arcs.parents.asJava) 42 | c.setTreeId(arcs.tree) 43 | 44 | c.setAuthor(node.author) 45 | c.setCommitter(node.committer) 46 | c.setEncoding(node.encoding) 47 | c.setMessage(node.message) 48 | 49 | c.toByteArray 50 | } 51 | 52 | lazy val id = new ObjectInserter.Formatter().idFor(OBJ_COMMIT, toBytes) 53 | 54 | override lazy val toString = s"commit[${id.shortName}${node.subject.map(s=> s" '${s.take(50)}'").getOrElse("")}]" 55 | } 56 | 57 | case class CommitArcs(parents: Seq[ObjectId], tree: ObjectId) { 58 | def cleanWith(cleaner: ObjectIdCleaner) = CommitArcs(parents map cleaner.cleanCommit, cleaner.cleanTree(tree)) 59 | } 60 | 61 | object CommitNode { 62 | def apply(c: RevCommit): CommitNode = CommitNode(c.getAuthorIdent, c.getCommitterIdent, c.getFullMessage, 63 | try c.getEncoding catch {case e @ (_ : IllegalCharsetNameException | _ : UnsupportedCharsetException) => UTF_8}) 64 | } 65 | 66 | case class CommitNode(author: PersonIdent, committer: PersonIdent, message: String, encoding: Charset = UTF_8) { 67 | lazy val subject = message.linesIterator.to(LazyList).headOption 68 | lazy val lastParagraphBreak = message.lastIndexOf("\n\n") 69 | lazy val messageWithoutFooters = if (footers.isEmpty) message else (message take lastParagraphBreak) 70 | lazy val footers: List[Footer] = message.drop(lastParagraphBreak).linesIterator.collect { 71 | case Footer.FooterPattern(key, value) => Footer(key, value) 72 | }.toList 73 | 74 | def add(footer: Footer) = copy(message = message + "\n" + (if (footers.isEmpty) "\n" else "") + footer.toString) 75 | } 76 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/model/Footer.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.model 22 | 23 | object Footer { 24 | // ^[A-Za-z0-9-]+: 25 | val FooterPattern = """([\p{Alnum}-]+): *(.*)""".r 26 | 27 | def apply(footerLine: String): Option[Footer] = footerLine match { 28 | case FooterPattern(key, value) => Some(Footer(key, value)) 29 | case _ => None 30 | } 31 | } 32 | 33 | case class Footer(key: String, value: String) { 34 | override lazy val toString = key + ": " + value 35 | } 36 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/model/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg 22 | 23 | import org.eclipse.jgit.revwalk.RevCommit 24 | 25 | import java.nio.file.Path 26 | 27 | 28 | package object model { 29 | implicit class RichRevCommit(revCommit: RevCommit) { 30 | lazy val arcs: CommitArcs = CommitArcs(revCommit.getParents.toIndexedSeq, revCommit.getTree) 31 | } 32 | 33 | implicit class RichPath(path: Path) { 34 | def resolve(pathSegments: Seq[String]): Path = pathSegments.foldLeft(path)(_ resolve _) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/git/bfg/timing.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg 22 | 23 | import java.lang.System._ 24 | import java.util.concurrent.TimeUnit.NANOSECONDS 25 | 26 | import org.eclipse.jgit.lib.ProgressMonitor 27 | 28 | object Timing { 29 | // def measure[T](block: => T) = { 30 | // val start = nanoTime 31 | // val result = block 32 | // val duration = nanoTime - start 33 | // println("duration="+duration) 34 | // result 35 | // } 36 | 37 | def measureTask[T](taskName: String, workSize: Int)(block: => T)(implicit progressMonitor: ProgressMonitor) = { 38 | progressMonitor.beginTask(taskName, workSize) 39 | val start = nanoTime 40 | val result = block 41 | val duration = nanoTime - start 42 | progressMonitor.endTask() 43 | println(taskName + " completed in %,d ms.".format(NANOSECONDS.toMillis(duration))) 44 | result 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/inclusion/inclusion.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.inclusion 22 | 23 | import scala.Function.const 24 | 25 | case class IncExcExpression[-A](filters: Seq[Filter[A]]) { 26 | lazy val searchPath = (filters.headOption.map(_.impliedPredecessor).getOrElse(Include.everything) +: filters).reverse 27 | 28 | def includes(a: A): Boolean = searchPath.find(_.predicate(a)).get.included 29 | } 30 | 31 | sealed trait Filter[-A] { 32 | val included: Boolean 33 | 34 | val predicate: A => Boolean 35 | 36 | val impliedPredecessor: Filter[A] 37 | 38 | def isDefinedAt(a: A) = predicate(a) 39 | } 40 | 41 | 42 | object Include { 43 | def everything = Include(const(true)) 44 | } 45 | 46 | object Exclude { 47 | def everything = Exclude(const(true)) 48 | } 49 | 50 | case class Include[A](predicate: A => Boolean) extends Filter[A] { 51 | lazy val impliedPredecessor = Exclude.everything 52 | val included = true 53 | } 54 | 55 | case class Exclude[A](predicate: A => Boolean) extends Filter[A] { 56 | lazy val impliedPredecessor = Include.everything 57 | val included = false 58 | } -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/text/ByteSize.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.text 22 | 23 | object ByteSize { 24 | 25 | import math._ 26 | 27 | val magnitudeChars = Seq('B', 'K', 'M', 'G', 'T', 'P') 28 | val unit = 1024 29 | 30 | def parse(v: String): Long = magnitudeChars.indexOf(v.takeRight(1)(0).toUpper) match { 31 | case -1 => throw new IllegalArgumentException(s"Size unit is missing (ie ${magnitudeChars.mkString(", ")})") 32 | case index => v.dropRight(1).toLong << (index * 10) 33 | } 34 | 35 | def format(bytes: Long): String = { 36 | if (bytes < unit) s"$bytes B " else { 37 | val exp = (log(bytes.toDouble) / log(unit)).toInt 38 | val pre = magnitudeChars(exp) 39 | "%.1f %sB".format(bytes / pow(unit, exp), pre) 40 | } 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/text/Tables.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.text 22 | 23 | object Tables { 24 | def formatTable(header: Product, data: Seq[Product], maxDataRows: Int = 16): Seq[String] = { 25 | val numColumns = data.head.productArity 26 | val sizes: Seq[Int] = (0 until numColumns).map(i => (data :+ header).map(_.productElement(i).toString.length).max) 27 | def padLine(l: Product): IndexedSeq[String] = { 28 | (0 until numColumns).map(c => l.productElement(c).toString.padTo(sizes(c), ' ')) 29 | } 30 | 31 | val headerLine = padLine(header).mkString(" ") 32 | Text.abbreviate(headerLine +: "-" * headerLine.size +: data.map { 33 | l => 34 | padLine(l).mkString(" | ") 35 | }, "...", maxDataRows+2).toSeq 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /bfg-library/src/main/scala/com/madgag/text/text.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.text 22 | 23 | object Text { 24 | 25 | def abbreviate[A](elems: Iterable[A], truncationToken: A, maxElements: Int = 3) = { 26 | val firstElems = elems.take(maxElements + 1) 27 | if (firstElems.size > maxElements) { 28 | firstElems.take(maxElements-1).toSeq :+ truncationToken 29 | } else { 30 | elems 31 | } 32 | } 33 | 34 | def plural[A](list: Iterable[A], noun: String) = s"${list.size} $noun${if (list.size == 1) "" else "s"}" 35 | } 36 | -------------------------------------------------------------------------------- /bfg-library/src/test/resources/sample-repos/deep-history.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/deep-history.zip -------------------------------------------------------------------------------- /bfg-library/src/test/resources/sample-repos/encodings.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/encodings.git.zip -------------------------------------------------------------------------------- /bfg-library/src/test/resources/sample-repos/example.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/example.git.zip -------------------------------------------------------------------------------- /bfg-library/src/test/resources/sample-repos/exampleWithInitialCleanHistory.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/exampleWithInitialCleanHistory.git.zip -------------------------------------------------------------------------------- /bfg-library/src/test/resources/sample-repos/folder-example.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/folder-example.git.zip -------------------------------------------------------------------------------- /bfg-library/src/test/resources/sample-repos/footers.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/footers.git.zip -------------------------------------------------------------------------------- /bfg-library/src/test/resources/sample-repos/taleOfTwoBranches.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg-library/src/test/resources/sample-repos/taleOfTwoBranches.git.zip -------------------------------------------------------------------------------- /bfg-library/src/test/scala/com/madgag/git/LFSSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git 22 | 23 | import com.madgag.git.LFS.Pointer 24 | import com.madgag.git.test._ 25 | import org.eclipse.jgit.lib.Constants._ 26 | import org.eclipse.jgit.lib.ObjectInserter 27 | import org.scalatest.OptionValues 28 | import org.scalatest.flatspec.AnyFlatSpec 29 | import org.scalatest.matchers.should.Matchers 30 | 31 | import java.nio.file.Files 32 | import java.nio.file.Files.createTempFile 33 | 34 | class LFSSpec extends AnyFlatSpec with Matchers with OptionValues { 35 | "Our implementation of Git LFS Pointers" should "create pointers that have the same Git id as the ones produced by `git lfs pointer`" in { 36 | val pointer = LFS.Pointer("b2893eddd9b394bfb7efadafda2ae0be02c573fdd83a70f26c781a943f3b7016", 21616) 37 | 38 | val pointerObjectId = new ObjectInserter.Formatter().idFor(OBJ_BLOB, pointer.bytes) 39 | 40 | pointerObjectId shouldBe "1d90744cffd9e9f324870ed60b6d1258e56a39e1".asObjectId 41 | } 42 | 43 | it should "have the correctly sharded path" in { 44 | val pointer = LFS.Pointer("b2893eddd9b394bfb7efadafda2ae0be02c573fdd83a70f26c781a943f3b7016", 21616) 45 | 46 | pointer.path shouldBe Seq("b2", "89", "b2893eddd9b394bfb7efadafda2ae0be02c573fdd83a70f26c781a943f3b7016") 47 | } 48 | 49 | it should "calculate pointers correctly directly from the Git database, creating a temporary file" in { 50 | implicit val repo = unpackRepo("/sample-repos/example.git.zip") 51 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple 52 | 53 | val tmpFile = createTempFile(s"bfg.test.git-lfs",".conv") 54 | 55 | val pointer = LFS.pointerFor(abbrId("06d7").open, tmpFile) 56 | 57 | pointer shouldBe Pointer("5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef", 1024) 58 | 59 | Files.size(tmpFile) shouldBe 1024 60 | } 61 | } -------------------------------------------------------------------------------- /bfg-library/src/test/scala/com/madgag/git/bfg/GitUtilSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg 22 | 23 | import com.madgag.git._ 24 | import com.madgag.git.test._ 25 | import org.eclipse.jgit.internal.storage.file.FileRepository 26 | import org.scalatest.flatspec.AnyFlatSpec 27 | import org.scalatest.matchers.should.Matchers 28 | 29 | class GitUtilSpec extends AnyFlatSpec with Matchers { 30 | implicit val repo: FileRepository = unpackRepo("/sample-repos/example.git.zip") 31 | 32 | "reachable blobs" should "match expectations" in { 33 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple 34 | 35 | allBlobsReachableFrom(abbrId("475d") asRevCommit) shouldBe Set("d8d1", "34bd", "e69d", "c784", "d004").map(abbrId) 36 | } 37 | } -------------------------------------------------------------------------------- /bfg-library/src/test/scala/com/madgag/git/bfg/MessageFooterSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg 22 | 23 | import com.madgag.git.bfg.model.{CommitNode, Footer} 24 | import org.eclipse.jgit.lib.PersonIdent 25 | import org.scalatest.flatspec.AnyFlatSpec 26 | import org.scalatest.matchers.should.Matchers 27 | 28 | class MessageFooterSpec extends AnyFlatSpec with Matchers { 29 | 30 | val person = new PersonIdent("Dave Eg", "dave@e.com") 31 | 32 | def commit(m: String) = CommitNode(person, person, m) 33 | 34 | "Message footers" should "append footer without new paragraph if footers already present" in { 35 | 36 | val updatedCommit = commit("Sub\n\nmessage\n\nSigned-off-by: Joe Eg ") add Footer("Foo", "Bar") 37 | 38 | updatedCommit.message shouldBe "Sub\n\nmessage\n\nSigned-off-by: Joe Eg \nFoo: Bar" 39 | } 40 | 41 | it should "create paragraph break if no footers already present" in { 42 | 43 | val updatedCommit = commit("Sub\n\nmessage") add Footer("Foo", "Bar") 44 | 45 | updatedCommit.message shouldBe "Sub\n\nmessage\n\nFoo: Bar" 46 | } 47 | 48 | // def footersViaJGit(commit: RevCommit) = commit.getFooterLines.map(f => Footer(f.getKey, f.getValue)).toList 49 | } -------------------------------------------------------------------------------- /bfg-library/src/test/scala/com/madgag/git/bfg/TreeEntrySpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg 22 | 23 | import com.madgag.git.bfg.model.{FileName, Tree} 24 | import org.eclipse.jgit.lib.FileMode 25 | import org.eclipse.jgit.lib.FileMode._ 26 | import org.eclipse.jgit.lib.ObjectId.zeroId 27 | import org.scalatest.flatspec.AnyFlatSpec 28 | import org.scalatest.matchers.should.Matchers 29 | 30 | class TreeEntrySpec extends AnyFlatSpec with Matchers { 31 | 32 | def a(mode: FileMode, name: String) = Tree.Entry(FileName(name), mode, zeroId) 33 | 34 | "Tree entry ordering" should "match ordering used by Git" in { 35 | a(TREE, "agit-test-utils") should be < a(TREE, "agit") 36 | } 37 | } -------------------------------------------------------------------------------- /bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/LfsBlobConverterSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.madgag.diff.{After, Before, MapDiff} 24 | import com.madgag.git.LFS.Pointer 25 | import com.madgag.git._ 26 | import com.madgag.git.bfg.model.{BlobFileMode, FileName, Tree, TreeBlobs, _} 27 | import com.madgag.git.test._ 28 | import com.madgag.scala.collection.decorators._ 29 | import org.eclipse.jgit.internal.storage.file.FileRepository 30 | import org.eclipse.jgit.lib.ObjectId 31 | import org.scalatest.concurrent.Eventually 32 | import org.scalatest.flatspec.AnyFlatSpec 33 | import org.scalatest.matchers.should.Matchers 34 | import org.scalatest.{Inspectors, OptionValues} 35 | 36 | import java.nio.file.Files.readAllBytes 37 | import java.nio.file.{Files, Path} 38 | 39 | class LfsBlobConverterSpec extends AnyFlatSpec with Matchers with OptionValues with Inspectors with Eventually { 40 | 41 | "LfsBlobConverter" should "successfully shift the blob to the LFS store" in { 42 | implicit val repo = unpackRepo("/sample-repos/example.git.zip") 43 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple 44 | 45 | val oldTreeBlobs = Tree(repo.resolve("early-release^{tree}")).blobs 46 | 47 | val newTreeBlobs = clean(oldTreeBlobs, "*ero*") 48 | 49 | val diff = oldTreeBlobs.diff(newTreeBlobs) 50 | 51 | diff.changed shouldBe Set(FileName("one-kb-zeros")) 52 | diff.unchanged should contain allOf(FileName("hero"), FileName("zero")) 53 | 54 | verifyPointersForChangedFiles(diff) 55 | } 56 | 57 | it should "not do damage if run twice - ie don't create a pointer for a pointer!" in { 58 | implicit val repo = unpackRepo("/sample-repos/example.git.zip") 59 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple 60 | 61 | val oldTreeBlobs = Tree(repo.resolve("early-release^{tree}")).blobs 62 | 63 | val treeBlobsAfterRun1 = clean(oldTreeBlobs, "*ero*") 64 | 65 | val firstDiff = oldTreeBlobs.diff(treeBlobsAfterRun1) 66 | 67 | firstDiff.changed shouldBe Set(FileName("one-kb-zeros")) 68 | 69 | val treeBlobsAfterRun2 = clean(treeBlobsAfterRun1, "*ero*") 70 | 71 | treeBlobsAfterRun1.diff(treeBlobsAfterRun2).changed shouldBe empty 72 | 73 | verifyPointersForChangedFiles(firstDiff) // Are the LFS files still intact? 74 | } 75 | 76 | 77 | def clean(oldTreeBlobs: TreeBlobs, glob: String)(implicit repo: FileRepository): TreeBlobs = { 78 | val converter = new LfsBlobConverter(glob, repo) 79 | converter(oldTreeBlobs) 80 | } 81 | 82 | def verifyPointerInsertedFor(fileName: FileName, diff: MapDiff[FileName, (BlobFileMode, ObjectId)])(implicit repo: FileRepository) = { 83 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple 84 | 85 | diff.changed should contain(fileName) 86 | 87 | val fileBeforeAndAfter = diff.changedMap(fileName) 88 | 89 | fileBeforeAndAfter(After)._1 shouldBe fileBeforeAndAfter(Before)._1 90 | 91 | val fileIds = fileBeforeAndAfter.mapV(_._2) 92 | 93 | val (originalFileId, pointerObjectId) = (fileIds(Before), fileIds(After)) 94 | 95 | verifyPointerFileFor(originalFileId, pointerObjectId) 96 | } 97 | 98 | def verifyPointerFileFor(originalFileId: ObjectId, pointerObjectId: ObjectId)(implicit repo: FileRepository) = { 99 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple 100 | 101 | val pointer = Pointer.parse(pointerObjectId.open.getCachedBytes) 102 | 103 | val lfsStoredFile: Path = repo.getDirectory.toPath.resolve(Seq("lfs", "objects") ++ pointer.path) 104 | 105 | Files.exists(lfsStoredFile) shouldBe true 106 | 107 | Files.size(lfsStoredFile) shouldBe pointer.blobSize 108 | 109 | eventually { readAllBytes(lfsStoredFile).blobId } shouldBe originalFileId 110 | } 111 | 112 | def verifyPointersForChangedFiles(diff: MapDiff[FileName, (BlobFileMode, ObjectId)])(implicit repo: FileRepository) = { 113 | diff.only(Before) shouldBe empty 114 | diff.only(After).keys shouldBe Set(FileName(".gitattributes")) 115 | 116 | forAll(diff.changed) { fileName => 117 | verifyPointerInsertedFor(fileName, diff) 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/ObjectIdCleanerSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.madgag.git._ 24 | import com.madgag.git.bfg.cleaner.protection.ProtectedObjectCensus 25 | import com.madgag.textmatching.Literal 26 | import org.eclipse.jgit.lib.ObjectId 27 | import org.eclipse.jgit.revwalk.RevCommit 28 | import org.scalatest.Inspectors 29 | import org.scalatest.flatspec.AnyFlatSpec 30 | import org.scalatest.matchers.Matcher 31 | import org.scalatest.matchers.should.Matchers 32 | 33 | import scala.jdk.CollectionConverters._ 34 | 35 | class ObjectIdCleanerSpec extends AnyFlatSpec with Matchers { 36 | 37 | "cleaning" should "not have a StackOverflowError cleaning a repo with deep history" ignore new unpackedRepo("/sample-repos/deep-history.zip") { 38 | val dirtyCommitWithDeepHistory = "d88ac4f99511667fc0617ea026f3a0ce8a25fd07".asObjectId 39 | 40 | val config = ObjectIdCleaner.Config( 41 | ProtectedObjectCensus.None, 42 | treeBlobsCleaners = Seq(new FileDeleter(Literal("foo"))) 43 | ) 44 | 45 | ensureCleanerWith(config).removesDirtOfCommitsThat(haveFile("foo")).whenCleaning(dirtyCommitWithDeepHistory) 46 | } 47 | 48 | } 49 | 50 | class unpackedRepo(filePath: String) extends bfg.test.unpackedRepo(filePath) { 51 | 52 | class EnsureCleanerWith(config: ObjectIdCleaner.Config) { 53 | 54 | class RemoveDirtOfCommitsThat(commitM: Matcher[RevCommit]) extends Inspectors with Matchers { 55 | def histOf(c: ObjectId) = repo.git.log.add(c).call.asScala.toSeq.reverse 56 | 57 | def whenCleaning(oldCommit: ObjectId): Unit = { 58 | val cleaner = new ObjectIdCleaner(config, repo.getObjectDatabase, revWalk) 59 | forAtLeast(1, histOf(oldCommit)) { commit => 60 | commit should commitM 61 | } 62 | 63 | val cleanCommit = cleaner.cleanCommit(oldCommit) 64 | 65 | forAll(histOf(cleanCommit)) { commit => 66 | commit shouldNot commitM 67 | } 68 | } 69 | } 70 | 71 | def removesDirtOfCommitsThat[T](commitM: Matcher[RevCommit]) = new RemoveDirtOfCommitsThat(commitM) 72 | } 73 | 74 | def ensureCleanerWith(config: ObjectIdCleaner.Config) = new EnsureCleanerWith(config) 75 | } 76 | 77 | -------------------------------------------------------------------------------- /bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/ObjectIdSubstitutorSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.madgag.git._ 24 | import com.madgag.git.bfg.cleaner.ObjectIdSubstitutor.hexRegex 25 | import com.madgag.git.test._ 26 | import org.eclipse.jgit.lib.ObjectId 27 | import org.scalatest.flatspec.AnyFlatSpec 28 | import org.scalatest.matchers.should.Matchers 29 | 30 | class ObjectIdSubstitutorSpec extends AnyFlatSpec with Matchers { 31 | 32 | "Object Id Substitutor regex" should "match hex strings" in { 33 | "01234567890" should include regex hexRegex 34 | 35 | "decade2001" should include regex hexRegex 36 | 37 | "This is decade2001" should include regex hexRegex 38 | 39 | "This is decade2001 I say" should include regex hexRegex 40 | 41 | "This is Gdecade2001 I say" shouldNot include regex hexRegex 42 | 43 | "This is decade2001X I say" shouldNot include regex hexRegex 44 | } 45 | 46 | "Object Id" should "be substituted in commit message" in { 47 | implicit val repo = unpackRepo("/sample-repos/example.git.zip") 48 | implicit val reader = repo.newObjectReader 49 | 50 | val cleanedMessage = ObjectIdSubstitutor.OldIdsPublic.replaceOldIds("See 3699910d2baab1 for backstory", reader, (_: ObjectId) => abbrId("06d7405020018d")) 51 | 52 | cleanedMessage shouldBe "See 06d7405020018d [formerly 3699910d2baab1] for backstory" 53 | } 54 | 55 | } -------------------------------------------------------------------------------- /bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/RepoRewriteSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.madgag.git._ 24 | import com.madgag.git.bfg.GitUtil._ 25 | import com.madgag.git.bfg.cleaner.ObjectIdSubstitutor._ 26 | import com.madgag.git.bfg.cleaner.protection.ProtectedObjectCensus 27 | import com.madgag.git.bfg.model.{FileName, RegularFile, TreeBlobEntry} 28 | import com.madgag.git.test._ 29 | import com.madgag.textmatching._ 30 | import org.apache.commons.io.FilenameUtils 31 | import org.eclipse.jgit.lib.ObjectId 32 | import org.eclipse.jgit.revwalk.RevWalk 33 | import org.eclipse.jgit.util.RawParseUtils 34 | import org.scalatest.flatspec.AnyFlatSpec 35 | import org.scalatest.matchers.should.Matchers 36 | 37 | import java.io.StringReader 38 | import java.net.URLEncoder 39 | import java.util.Properties 40 | import java.util.regex.Pattern._ 41 | import scala.PartialFunction.condOpt 42 | import scala.jdk.CollectionConverters._ 43 | 44 | class RepoRewriteSpec extends AnyFlatSpec with Matchers { 45 | 46 | "Git repo" should "not explode" in { 47 | implicit val repo = unpackRepo("/sample-repos/example.git.zip") 48 | implicit val reader = repo.newObjectReader 49 | 50 | hasBeenProcessedByBFGBefore(repo) shouldBe false 51 | 52 | val blobsToRemove = Set(abbrId("06d740")) 53 | RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus(Set("HEAD")), OldIdsPublic, Seq(FormerCommitFooter), treeBlobsCleaners = Seq(new BlobRemover(blobsToRemove)))) 54 | 55 | val allCommits = repo.git.log.all.call.asScala.toSeq 56 | 57 | val unwantedBlobsByCommit = allCommits.flatMap(commit => { 58 | val unwantedBlobs = allBlobsReachableFrom(commit).intersect(blobsToRemove).map(_.shortName) 59 | if (!unwantedBlobs.isEmpty) Some(commit.shortName -> unwantedBlobs) else None 60 | }).toMap 61 | 62 | unwantedBlobsByCommit shouldBe empty 63 | 64 | allCommits.head.getFullMessage should include(FormerCommitFooter.Key) 65 | 66 | hasBeenProcessedByBFGBefore(repo) shouldBe true 67 | } 68 | 69 | "Repo rewriter" should "clean commit messages even on clean branches, because commit messages may reference commits from dirty ones" in { 70 | implicit val repo = unpackRepo("/sample-repos/taleOfTwoBranches.git.zip") 71 | implicit val revWalk = new RevWalk(repo) 72 | 73 | def commitMessageForRev(rev: String) = repo.resolve(rev).asRevCommit.getFullMessage 74 | 75 | commitMessageForRev("pure") should include("6e76960ede2addbbe7e") 76 | 77 | RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus.None, OldIdsPrivate, Seq(new CommitMessageObjectIdsUpdater(OldIdsPrivate)), treeBlobsCleaners = Seq(new FileDeleter(Literal("sin"))))) 78 | 79 | commitMessageForRev("pure") should not include "6e76960ede2addbbe7e" 80 | } 81 | 82 | it should "remove passwords" in { 83 | implicit val repo = unpackRepo("/sample-repos/example.git.zip") 84 | implicit val (revWalk, reader) = repo.singleThreadedReaderTuple 85 | 86 | def propertiesIn(contents: String) = { 87 | val p = new Properties() 88 | p.load(new StringReader(contents)) 89 | p 90 | } 91 | 92 | def passwordFileContentsIn(id: ObjectId) = { 93 | val cleanedPasswordFile = repo.resolve(id.name + ":folder/secret-passwords.txt") 94 | RawParseUtils.decode(reader.open(cleanedPasswordFile).getCachedBytes) 95 | } 96 | 97 | object FileExt { 98 | def unapply(fileName: String) = Option(FilenameUtils.getExtension(fileName)) 99 | } 100 | 101 | val blobTextModifier = new BlobTextModifier { 102 | override def lineCleanerFor(entry: TreeBlobEntry) = condOpt(entry.filename.string) { 103 | case FileExt("txt") | FileExt("scala") => """(\.password=).*""".r --> (_.group(1) + "*** PASSWORD ***") 104 | } 105 | 106 | val threadLocalObjectDBResources = repo.getObjectDatabase.threadLocalResources 107 | } 108 | val cleanedObjectMap = RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus(Set("HEAD")), treeBlobsCleaners = Seq(blobTextModifier))) 109 | 110 | val oldCommitContainingPasswords = abbrId("37bcc89") 111 | 112 | val cleanedCommitWithPasswordsRemoved = cleanedObjectMap(oldCommitContainingPasswords).asRevCommit 113 | 114 | val originalContents = passwordFileContentsIn(oldCommitContainingPasswords) 115 | val cleanedContents = passwordFileContentsIn(cleanedCommitWithPasswordsRemoved) 116 | 117 | cleanedContents should (include("science") and include("database.password=")) 118 | originalContents should include("correcthorse") 119 | cleanedContents should not include "correcthorse" 120 | 121 | propertiesIn(cleanedContents).asScala.toMap should have size propertiesIn(originalContents).size 122 | } 123 | 124 | 125 | 126 | 127 | def textReplacementOf(parentPath: String, fileNamePrefix: String, fileNamePostfix: String, before: String, after: String) = { 128 | implicit val repo = unpackRepo("/sample-repos/encodings.git.zip") 129 | val beforeAndAfter = Seq(before, after).map(URLEncoder.encode(_, "UTF-8")).mkString("-") 130 | val filename = s"$fileNamePrefix-ORIGINAL.$fileNamePostfix" 131 | val beforeFile = s"$parentPath/$filename" 132 | val afterFile = s"$parentPath/$fileNamePrefix-MODIFIED-$beforeAndAfter.$fileNamePostfix" 133 | 134 | val blobTextModifier = new BlobTextModifier { 135 | def lineCleanerFor(entry: TreeBlobEntry) = Some(quote(before).r --> (_ => after)) 136 | 137 | val threadLocalObjectDBResources = repo.getObjectDatabase.threadLocalResources 138 | } 139 | 140 | RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus.None, treeBlobsCleaners = Seq(blobTextModifier))) 141 | 142 | val cleanedFile = repo.resolve(s"master:$beforeFile") 143 | val expectedFile = repo.resolve(s"master:$afterFile") 144 | 145 | expectedFile should not be null 146 | 147 | implicit val threadLocalObjectReader = repo.getObjectDatabase.threadLocalResources.reader() 148 | val cleaned = cleanedFile.open.getBytes 149 | val expected = expectedFile.open.getBytes 150 | val cleanedStr = new String(cleaned) 151 | val expectedStr = new String(expected) 152 | 153 | cleanedStr shouldBe expectedStr 154 | cleanedFile shouldBe expectedFile 155 | } 156 | 157 | "Text modifier" should "handle the short UTF-8" in textReplacementOf("UTF-8", "bushhidthefacts", "txt", "facts", "toffee") 158 | 159 | it should "handle the long UTF-8" in textReplacementOf("UTF-8", "big", "scala", "good", "blessed") 160 | 161 | it should "handle ASCII in SHIFT JIS" in textReplacementOf("SHIFT-JIS", "japanese", "txt", "EUC", "BOOM") 162 | 163 | it should "handle ASCII in ISO-8859-1" in textReplacementOf("ISO-8859-1", "laparabla", "txt", "palpitando", "buscando") 164 | 165 | it should "handle converting Windows newlines to Unix" in textReplacementOf("newlines", "windows", "txt", "\r\n", "\n") 166 | 167 | it should "handle a file that uses LF for newlines" in 168 | textReplacementOf("newlines", "using-LF", "txt", "file", "blob") 169 | 170 | it should "handle a file that uses CRLF for newlines" in 171 | textReplacementOf("newlines", "using-CRLF", "txt", "file", "blob") 172 | 173 | } 174 | -------------------------------------------------------------------------------- /bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/TreeBlobModifierSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cleaner 22 | 23 | import com.google.common.util.concurrent.AtomicLongMap 24 | import com.madgag.git.bfg.cleaner.ObjectIdSubstitutor._ 25 | import com.madgag.git.bfg.cleaner.protection.ProtectedObjectCensus 26 | import com.madgag.git.bfg.model.TreeBlobEntry 27 | import com.madgag.git.test._ 28 | import org.scalatest.flatspec.AnyFlatSpec 29 | import org.scalatest.matchers.should.Matchers 30 | 31 | import scala.jdk.CollectionConverters._ 32 | 33 | class TreeBlobModifierSpec extends AnyFlatSpec with Matchers { 34 | 35 | "TreeBlobModifier" should "only clean a given tree entry once" in { 36 | class CountingTreeBlobModifier extends TreeBlobModifier { 37 | val counts = AtomicLongMap.create[TreeBlobEntry] 38 | 39 | def fix(entry: TreeBlobEntry) = { 40 | counts.incrementAndGet(entry) 41 | (entry.mode, entry.objectId) 42 | } 43 | } 44 | 45 | implicit val repo = unpackRepo("/sample-repos/taleOfTwoBranches.git.zip") 46 | 47 | val countingTreeBlobModifier = new CountingTreeBlobModifier() 48 | 49 | RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus(Set("HEAD")), OldIdsPublic, treeBlobsCleaners = Seq(countingTreeBlobModifier))) 50 | 51 | val endCounts = countingTreeBlobModifier.counts.asMap().asScala.toMap 52 | 53 | endCounts.size should be >= 4 54 | all (endCounts.values) shouldBe 1 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /bfg-library/src/test/scala/com/madgag/git/bfg/model/CommitSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.model 22 | 23 | import com.madgag.git.bfg.test.unpackedRepo 24 | import org.scalatest.Inspectors 25 | import org.scalatest.flatspec.AnyFlatSpec 26 | import org.scalatest.matchers.should.Matchers 27 | 28 | class CommitSpec extends AnyFlatSpec with Matchers with Inspectors { 29 | "Commit model" should "calculate the same Git commit id for any given commit" in new unpackedRepo("/sample-repos/example.git.zip") { 30 | forAll (commitHist()) { revCommit => 31 | Commit(revCommit).id shouldBe revCommit.toObjectId 32 | } 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /bfg-library/src/test/scala/com/madgag/text/ByteSizeSpecs.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.text 22 | 23 | import org.scalatest.flatspec.AnyFlatSpec 24 | import org.scalatest.matchers.should.Matchers 25 | 26 | class ByteSizeSpecs extends AnyFlatSpec with Matchers { 27 | "Size parser" should "understand 1B" in { 28 | ByteSize.parse("0B") shouldBe 0 29 | ByteSize.parse("1B") shouldBe 1 30 | ByteSize.parse("2B") shouldBe 2 31 | ByteSize.parse("10B") shouldBe 10 32 | } 33 | it should "understand 3G" in { 34 | ByteSize.parse("3G") shouldBe 3L * 1024 * 1024 * 1024 35 | } 36 | it should "understand 1G" in { 37 | ByteSize.parse("1G") shouldBe 1024 * 1024 * 1024 38 | } 39 | it should "understand 1M" in { 40 | ByteSize.parse("1M") shouldBe 1024 * 1024 41 | } 42 | it should "understand 3500M" in { 43 | ByteSize.parse("3500M") shouldBe 3500L * 1024 * 1024 44 | } 45 | it should "understand 1K" in { 46 | ByteSize.parse("1K") shouldBe 1024 47 | } 48 | it should "understand 5K" in { 49 | ByteSize.parse("5K") shouldBe 5 * 1024 50 | } 51 | it should "reject strings without a unit" in { 52 | an[IllegalArgumentException] should be thrownBy ByteSize.parse("1232") 53 | } 54 | 55 | "Size formatter" should "correctly format" in { 56 | ByteSize.format(1024) shouldBe "1.0 KB" 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /bfg-test/build.sbt: -------------------------------------------------------------------------------- 1 | import Dependencies._ 2 | 3 | libraryDependencies ++= Seq(scalatest, jgit, scalaGit, scalaGitTest) 4 | 5 | -------------------------------------------------------------------------------- /bfg-test/src/main/scala/com/madgag/git/bfg/test/unpackedRepo.scala: -------------------------------------------------------------------------------- 1 | package com.madgag.git.bfg.test 2 | 3 | import com.madgag.git._ 4 | import com.madgag.git.test._ 5 | import org.eclipse.jgit.internal.storage.file.{FileRepository, GC, ObjectDirectory} 6 | import org.eclipse.jgit.lib.Constants.OBJ_BLOB 7 | import org.eclipse.jgit.lib.{ObjectId, ObjectReader, Repository} 8 | import org.eclipse.jgit.revwalk.{RevCommit, RevTree, RevWalk} 9 | import org.eclipse.jgit.treewalk.TreeWalk 10 | import org.scalatest.Inspectors 11 | import org.scalatest.flatspec.AnyFlatSpec 12 | import org.scalatest.matchers.should.Matchers 13 | import org.scalatest.matchers.{MatchResult, Matcher} 14 | 15 | import scala.jdk.CollectionConverters._ 16 | 17 | class unpackedRepo(filePath: String) extends AnyFlatSpec with Matchers { 18 | 19 | implicit val repo: FileRepository = unpackRepo(filePath) 20 | implicit val objectDirectory: ObjectDirectory = repo.getObjectDatabase 21 | implicit lazy val (revWalk: RevWalk, reader: ObjectReader) = repo.singleThreadedReaderTuple 22 | 23 | 24 | def blobOfSize(sizeInBytes: Int): Matcher[ObjectId] = Matcher { (objectId: ObjectId) => 25 | val objectLoader = objectId.open 26 | val hasThatSize = objectLoader.getType == OBJ_BLOB && objectLoader.getSize == sizeInBytes 27 | def thing(boo: String) = s"${objectId.shortName} $boo size of $sizeInBytes" 28 | MatchResult(hasThatSize, thing("did not have"), thing("had")) 29 | } 30 | 31 | def packedBlobsOfSize(sizeInBytes: Long): Set[ObjectId] = { 32 | implicit val reader: ObjectReader = repo.newObjectReader() 33 | repo.getObjectDatabase.packedObjects.filter { objectId => 34 | val objectLoader = objectId.open 35 | objectLoader.getType == OBJ_BLOB && objectLoader.getSize == sizeInBytes 36 | }.toSet 37 | } 38 | 39 | def haveFile(name: String): Matcher[ObjectId] = haveTreeEntry(name, !_.isSubtree) 40 | 41 | def haveFolder(name: String): Matcher[ObjectId] = haveTreeEntry(name, _.isSubtree) 42 | 43 | def haveTreeEntry(name: String, p: TreeWalk => Boolean)= new Matcher[ObjectId] { 44 | def apply(treeish: ObjectId) = { 45 | treeOrBlobPointedToBy(treeish.asRevObject) match { 46 | case Right(tree) => 47 | def thing(boo: String) = s"tree ${treeish.shortName} $boo a '$name' entry" 48 | MatchResult( 49 | treeEntryNames(tree, p).contains(name), 50 | thing("did not contain"), 51 | thing("contained") 52 | ) 53 | case Left(blob) => 54 | MatchResult( 55 | false, 56 | s"blob ${treeish.shortName} was not a tree containing '$name'", 57 | s"""When does this happen??!"""" 58 | ) 59 | } 60 | } 61 | } 62 | 63 | def treeEntryNames(t: RevTree, p: TreeWalk => Boolean): Seq[String] = 64 | t.walk(postOrderTraversal = true).withFilter(p).map(_.getNameString).toList 65 | 66 | def commitHist(specificRefs: String*)(implicit repo: Repository): Seq[RevCommit] = { 67 | val logCommand = repo.git.log 68 | if (specificRefs.isEmpty) logCommand.all else specificRefs.foldLeft(logCommand)((lc, ref) => lc.add(repo.resolve(ref))) 69 | }.call.asScala.toSeq.reverse 70 | 71 | def haveCommitWhereObjectIds(boom: Matcher[Iterable[ObjectId]])(implicit reader: ObjectReader): Matcher[RevCommit] = boom compose { 72 | (c: RevCommit) => c.getTree.walk().map(_.getObjectId(0)).toSeq 73 | } 74 | 75 | def haveRef(refName: String, objectIdMatcher: Matcher[ObjectId]): Matcher[Repository] = objectIdMatcher compose { 76 | (r: Repository) => r resolve refName // aka s"Ref [$refName]" 77 | } 78 | 79 | def commitHistory(histMatcher: Matcher[Seq[RevCommit]]) = histMatcher compose { 80 | r: Repository => commitHist()(r) 81 | } 82 | 83 | def commitHistoryFor(refs: String*)(histMatcher: Matcher[Seq[RevCommit]]) = histMatcher compose { 84 | r: Repository => commitHist(refs:_*)(r) 85 | } 86 | 87 | def ensureRemovalOfBadEggs[S,T](expr : => Iterable[S], exprResultMatcher: Matcher[Iterable[S]])(block: => T) = { 88 | gc() 89 | expr should exprResultMatcher 90 | 91 | block 92 | 93 | gc() 94 | expr shouldBe empty 95 | } 96 | 97 | def gc() = { 98 | val gc = new GC(repo) 99 | gc.setPackExpireAgeMillis(0) 100 | gc.gc() 101 | } 102 | 103 | 104 | class CheckRemovalFromCommits(commits: => Seq[RevCommit]) extends Inspectors { 105 | def ofCommitsThat[T](commitM: Matcher[RevCommit])(block: => T): Unit = { 106 | forAtLeast(1, commits) { commit => 107 | commit should commitM 108 | } 109 | 110 | block 111 | 112 | forAll(commits) { commit => 113 | commit shouldNot commitM 114 | } 115 | } 116 | } 117 | 118 | 119 | def ensureRemovalFrom(commits: => Seq[RevCommit]): CheckRemovalFromCommits = new CheckRemovalFromCommits(commits) 120 | 121 | def ensureInvariantValue[T, S](f: => S)(block: => T) = { 122 | val originalValue = f 123 | block 124 | f should equal(originalValue) 125 | } 126 | 127 | def ensureInvariantCondition[T, S](cond: Matcher[Repository])(block: => T) = { 128 | repo should cond 129 | block 130 | repo should cond 131 | } 132 | 133 | } 134 | -------------------------------------------------------------------------------- /bfg/build.sbt: -------------------------------------------------------------------------------- 1 | import java.io.{File, FileOutputStream} 2 | 3 | import Dependencies.* 4 | import sbt.taskKey 5 | 6 | import scala.sys.process.Process 7 | import scala.util.Try 8 | 9 | val gitDescription = taskKey[String]("Git description of working dir") 10 | 11 | gitDescription := Try[String](Process("git describe --all --always --dirty --long").lineStream.head.replace("heads/","").replace("-0-g","-")).getOrElse("unknown") 12 | 13 | libraryDependencies += useNewerJava 14 | 15 | mainClass := Some("use.newer.java.Version8") 16 | Compile / packageBin / packageOptions += 17 | Package.ManifestAttributes( "Main-Class-After-UseNewerJava-Check" -> "com.madgag.git.bfg.cli.Main" ) 18 | 19 | // note you don't want the jar name to collide with the non-assembly jar, otherwise confusion abounds. 20 | assembly / assemblyJarName := s"${name.value}-${version.value}-${gitDescription.value}${jgitVersionOverride.map("-jgit-" + _).mkString}.jar" 21 | 22 | assembly / assemblyMergeStrategy := { 23 | case PathList("META-INF", "versions", "9", "module-info.class") => MergeStrategy.discard 24 | case x => 25 | val oldStrategy = (assembly / assemblyMergeStrategy).value 26 | oldStrategy(x) 27 | } 28 | 29 | buildInfoKeys := Seq[BuildInfoKey](version, scalaVersion, gitDescription) 30 | 31 | buildInfoPackage := "com.madgag.git.bfg" 32 | 33 | crossPaths := false 34 | 35 | Compile / packageBin / publishArtifact := false 36 | 37 | // replace the conventional main artifact with an uber-jar 38 | addArtifact(Compile / packageBin / artifact, assembly) 39 | 40 | val cliUsageDump = taskKey[File]("Dump the CLI 'usage' output to a file") 41 | 42 | cliUsageDump := { 43 | val usageDumpFile = File.createTempFile("bfg-usage", "dump.txt") 44 | val scalaRun = new ForkRun(ForkOptions().withOutputStrategy(CustomOutput(new FileOutputStream(usageDumpFile)))) 45 | 46 | val mainClassName = (Compile / run / mainClass).value getOrElse sys.error("No main class detected.") 47 | val classpath = Attributed.data((Runtime / fullClasspath).value) 48 | val args = Seq.empty 49 | 50 | scalaRun.run(mainClassName, classpath, args, streams.value.log).failed foreach (sys error _.getMessage) 51 | usageDumpFile 52 | } 53 | 54 | addArtifact( Artifact("bfg", "usage", "txt"), cliUsageDump ) 55 | 56 | libraryDependencies ++= Seq( 57 | scopt, 58 | jgit, 59 | scalaGitTest % "test" 60 | ) 61 | 62 | import Tests.* 63 | { 64 | def isolateTestsWhichRequireTheirOwnJvm(tests: Seq[TestDefinition]) = { 65 | val (testsRequiringIsolation, testsNotNeedingIsolation) = tests.partition(_.name.contains("RequiresOwnJvm")) 66 | 67 | val groups: Seq[Seq[TestDefinition]] = testsRequiringIsolation.map(Seq(_)) :+ testsNotNeedingIsolation 68 | 69 | groups map { group => 70 | Group(group.size.toString, group, SubProcess(ForkOptions())) 71 | } 72 | } 73 | 74 | Test / testGrouping := isolateTestsWhichRequireTheirOwnJvm( (Test / definedTests).value ) 75 | } 76 | 77 | Test / fork := true // JGit uses static (ie JVM-wide) config 78 | 79 | Test / logBuffered := false 80 | 81 | Test / parallelExecution := false 82 | 83 | -------------------------------------------------------------------------------- /bfg/src/main/scala/com/madgag/git/bfg/cli/CLIConfig.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cli 22 | 23 | import com.madgag.git.bfg.BuildInfo 24 | import com.madgag.git.bfg.GitUtil._ 25 | import com.madgag.git.bfg.cleaner._ 26 | import com.madgag.git.bfg.cleaner.kit.BlobInserter 27 | import com.madgag.git.bfg.cleaner.protection.ProtectedObjectCensus 28 | import com.madgag.git.bfg.model.FileName.ImplicitConversions._ 29 | import com.madgag.git.bfg.model.{FileName, Tree, TreeBlobEntry, TreeBlobs, TreeSubtrees} 30 | import com.madgag.git.{SizedObject, _} 31 | import com.madgag.inclusion.{IncExcExpression, _} 32 | import com.madgag.text.ByteSize 33 | import com.madgag.textmatching.{Glob, TextMatcher, TextMatcherType, TextReplacementConfig} 34 | import org.eclipse.jgit.internal.storage.file.FileRepository 35 | import org.eclipse.jgit.lib._ 36 | import org.eclipse.jgit.storage.file.FileRepositoryBuilder 37 | import scopt.{OptionParser, Read} 38 | 39 | import java.io.File 40 | import java.nio.file.Files 41 | import scala.jdk.CollectionConverters._ 42 | 43 | 44 | object CLIConfig { 45 | val parser = new OptionParser[CLIConfig]("bfg") { 46 | 47 | def fileMatcher(name: String, defaultType: TextMatcherType = Glob) = { 48 | implicit val textMatcherRead: Read[TextMatcher] = Read.reads { TextMatcher(_, defaultType) } 49 | 50 | opt[TextMatcher](name).valueName(s"<${defaultType.expressionPrefix}>").validate { m => 51 | if (m.expression.contains('/')) { 52 | failure("*** Can only match on filename, NOT path *** - remove '/' path segments") 53 | } else success 54 | } 55 | } 56 | 57 | def readLinesFrom(v: File): Seq[String] = Files.readAllLines(v.toPath).asScala.toSeq 58 | 59 | val exactVersion = BuildInfo.version + (if (BuildInfo.version.contains("-SNAPSHOT")) s" (${BuildInfo.gitDescription})" else "") 60 | 61 | head("bfg", exactVersion) 62 | version("version").hidden() 63 | 64 | opt[String]('b', "strip-blobs-bigger-than").valueName("").text("strip blobs bigger than X (eg '128K', '1M', etc)").action { 65 | (v , c) => c.copy(stripBlobsBiggerThan = Some(ByteSize.parse(v))) 66 | } 67 | opt[Int]('B', "strip-biggest-blobs").valueName("NUM").text("strip the top NUM biggest blobs").action { 68 | (v, c) => c.copy(stripBiggestBlobs = Some(v)) 69 | } 70 | opt[File]("strip-blobs-with-ids").abbr("bi").valueName("").text("strip blobs with the specified Git object ids").action { 71 | (v, c) => 72 | c.copy(stripBlobsWithIds = Some(readLinesFrom(v).map(_.trim).filterNot(_.isEmpty).map(_.asObjectId).toSet)) 73 | } 74 | fileMatcher("delete-files").abbr("D").text("delete files with the specified names (eg '*.class', '*.{txt,log}' - matches on file name, not path within repo)").action { 75 | (v, c) => c.copy(deleteFiles = Some(v)) 76 | } 77 | fileMatcher("delete-folders").text("delete folders with the specified names (eg '.svn', '*-tmp' - matches on folder name, not path within repo)").action { 78 | (v, c) => c.copy(deleteFolders = Some(v)) 79 | } 80 | opt[String]("convert-to-git-lfs").text("extract files with the specified names (eg '*.zip' or '*.mp4') into Git LFS").action { 81 | (v, c) => c.copy(lfsConversion = Some(v)) 82 | } 83 | opt[File]("replace-text").abbr("rt").valueName("").text("filter content of files, replacing matched text. Match expressions should be listed in the file, one expression per line - " + 84 | "by default, each expression is treated as a literal, but 'regex:' & 'glob:' prefixes are supported, with '==>' to specify a replacement " + 85 | "string other than the default of '***REMOVED***'.").action { 86 | (v, c) => c.copy(textReplacementExpressions = readLinesFrom(v).filterNot(_.trim.isEmpty)) 87 | } 88 | fileMatcher("filter-content-including").abbr("fi").text("do file-content filtering on files that match the specified expression (eg '*.{txt,properties}')").action { 89 | (v, c) => c.copy(filenameFilters = c.filenameFilters :+ Include(v)) 90 | } 91 | fileMatcher("filter-content-excluding").abbr("fe").text("don't do file-content filtering on files that match the specified expression (eg '*.{xml,pdf}')").action { 92 | (v, c) => c.copy(filenameFilters = c.filenameFilters :+ Exclude(v)) 93 | } 94 | opt[String]("filter-content-size-threshold").abbr("fs").valueName("").text("only do file-content filtering on files smaller than (default is %1$d bytes)".format(CLIConfig().filterSizeThreshold)).action { 95 | (v, c) => c.copy(filterSizeThreshold = ByteSize.parse(v)) 96 | } 97 | opt[String]('p', "protect-blobs-from").valueName("").text("protect blobs that appear in the most recent versions of the specified refs (default is 'HEAD')").action { 98 | (v, c) => c.copy(protectBlobsFromRevisions = v.split(',').toSet) 99 | } 100 | opt[Unit]("no-blob-protection").text("allow the BFG to modify even your *latest* commit. Not recommended: you should have already ensured your latest commit is clean.").action { 101 | (_, c) => c.copy(protectBlobsFromRevisions = Set.empty) 102 | } 103 | opt[Unit]("strict-object-checking").text("perform additional checks on integrity of consumed & created objects").hidden().action { 104 | (_, c) => c.copy(strictObjectChecking = true) 105 | } 106 | opt[Unit]("private").text("treat this repo-rewrite as removing private data (for example: omit old commit ids from commit messages)").action { 107 | (_, c) => c.copy(sensitiveData = Some(true)) 108 | } 109 | opt[String]("massive-non-file-objects-sized-up-to").valueName("").text("increase memory usage to handle over-size Commits, Tags, and Trees that are up to X in size (eg '10M')").action { 110 | (v, c) => c.copy(massiveNonFileObjects = Some(ByteSize.parse(v))) 111 | } 112 | opt[String]("fix-filename-duplicates-preferring").valueName("").text("Fix corrupt trees which contain multiple entries with the same filename, favouring the 'tree' or 'blob'").hidden().action { 113 | (v, c) => 114 | val preferredFileMode = v.toLowerCase match { 115 | case "tree" | "folder" => FileMode.TREE 116 | case "blob" | "file" => FileMode.REGULAR_FILE 117 | case other => throw new IllegalArgumentException(s"'$other' should be 'tree' or 'blob'") 118 | } 119 | val ord: Option[Ordering[FileMode]] = Some(Ordering.by[FileMode, Int](filemode => if (filemode==preferredFileMode) 0 else 1)) 120 | 121 | c.copy(fixFilenameDuplicatesPreferring = ord) 122 | } 123 | arg[File]("").optional().action { (x, c) => 124 | c.copy(repoLocation = x) } text("file path for Git repository to clean") 125 | } 126 | } 127 | 128 | case class CLIConfig(stripBiggestBlobs: Option[Int] = None, 129 | stripBlobsBiggerThan: Option[Long] = None, 130 | protectBlobsFromRevisions: Set[String] = Set("HEAD"), 131 | deleteFiles: Option[TextMatcher] = None, 132 | deleteFolders: Option[TextMatcher] = None, 133 | fixFilenameDuplicatesPreferring: Option[Ordering[FileMode]] = None, 134 | filenameFilters: Seq[Filter[String]] = Nil, 135 | filterSizeThreshold: Long = BlobTextModifier.DefaultSizeThreshold, 136 | textReplacementExpressions: Iterable[String] = List.empty, 137 | stripBlobsWithIds: Option[Set[ObjectId]] = None, 138 | lfsConversion: Option[String] = None, 139 | strictObjectChecking: Boolean = false, 140 | sensitiveData: Option[Boolean] = None, 141 | massiveNonFileObjects: Option[Long] = None, 142 | repoLocation: File = new File(System.getProperty("user.dir"))) { 143 | 144 | lazy val gitdir = resolveGitDirFor(repoLocation) 145 | 146 | implicit lazy val repo: FileRepository = FileRepositoryBuilder.create(gitdir.get).asInstanceOf[FileRepository] 147 | 148 | lazy val objectProtection = ProtectedObjectCensus(protectBlobsFromRevisions) 149 | 150 | lazy val objectChecker = if (strictObjectChecking) Some(new ObjectChecker()) else None 151 | 152 | lazy val fileDeletion: Option[Cleaner[TreeBlobs]] = deleteFiles.map { 153 | textMatcher => new FileDeleter(textMatcher) 154 | } 155 | 156 | lazy val folderDeletion: Option[Cleaner[TreeSubtrees]] = deleteFolders.map { 157 | textMatcher => { subtrees: TreeSubtrees => 158 | TreeSubtrees(subtrees.entryMap.view.filterKeys(filename => !textMatcher(filename)).toMap) 159 | } 160 | } 161 | 162 | lazy val fixFileNameDuplication: Option[Cleaner[Seq[Tree.Entry]]] = fixFilenameDuplicatesPreferring.map { 163 | implicit preferredFileModes => 164 | { treeEntries: Seq[Tree.Entry] => treeEntries.groupBy(_.name).values.map(_.minBy(_.fileMode)).toSeq } 165 | } 166 | 167 | lazy val lineModifier: Option[String => String] = 168 | TextReplacementConfig(textReplacementExpressions, "***REMOVED***") 169 | 170 | lazy val filterContentPredicate: (FileName => Boolean) = f => IncExcExpression(filenameFilters) includes (f.string) 171 | 172 | lazy val blobTextModifier: Option[BlobTextModifier] = lineModifier.map { 173 | replacer => 174 | new BlobTextModifier { 175 | override val sizeThreshold = filterSizeThreshold 176 | 177 | def lineCleanerFor(entry: TreeBlobEntry) = if (filterContentPredicate(entry.filename)) Some(replacer) else None 178 | 179 | val threadLocalObjectDBResources = repo.getObjectDatabase.threadLocalResources 180 | } 181 | } 182 | 183 | lazy val lfsBlobConverter: Option[LfsBlobConverter] = lfsConversion.map { lfsGlobExpr => 184 | new LfsBlobConverter(lfsGlobExpr, repo) 185 | } 186 | 187 | lazy val privateDataRemoval = sensitiveData.getOrElse(Seq(fileDeletion, folderDeletion, blobTextModifier).flatten.nonEmpty) 188 | 189 | lazy val objectIdSubstitutor = if (privateDataRemoval) ObjectIdSubstitutor.OldIdsPrivate else ObjectIdSubstitutor.OldIdsPublic 190 | 191 | lazy val treeEntryListCleaners = fixFileNameDuplication.toSeq 192 | 193 | lazy val commitNodeCleaners = { 194 | lazy val formerCommitFooter = if (privateDataRemoval) None else Some(FormerCommitFooter) 195 | 196 | Seq(new CommitMessageObjectIdsUpdater(objectIdSubstitutor)) ++ formerCommitFooter 197 | } 198 | 199 | lazy val treeBlobCleaners: Seq[Cleaner[TreeBlobs]] = { 200 | 201 | lazy val blobsByIdRemover: Option[BlobRemover] = stripBlobsWithIds.map(new BlobRemover(_)) 202 | 203 | lazy val blobRemover: Option[Cleaner[TreeBlobs]] = { 204 | implicit val progressMonitor: ProgressMonitor = new TextProgressMonitor() 205 | 206 | val sizeBasedBlobTargetSources = Seq( 207 | stripBlobsBiggerThan.map(threshold => (s: LazyList[SizedObject]) => s.takeWhile(_.size > threshold)), 208 | stripBiggestBlobs.map(num => (s: LazyList[SizedObject]) => s.take(num)) 209 | ).flatten 210 | 211 | if (sizeBasedBlobTargetSources.isEmpty) None else { 212 | val sizedBadIds = sizeBasedBlobTargetSources.flatMap(_(biggestBlobs(repo.getObjectDatabase, progressMonitor))).toSet 213 | if (sizedBadIds.isEmpty) { 214 | println("Warning : no large blobs matching criteria found in packfiles - does the repo need to be packed?") 215 | None 216 | } else { 217 | println("Found " + sizedBadIds.size + " blob ids for large blobs - biggest=" + sizedBadIds.max.size + " smallest=" + sizedBadIds.min.size) 218 | println("Total size (unpacked)=" + sizedBadIds.map(_.size).sum) 219 | Some(new BlobReplacer(sizedBadIds.map(_.objectId), new BlobInserter(repo.getObjectDatabase.threadLocalResources.inserter()))) 220 | } 221 | } 222 | } 223 | 224 | Seq(blobsByIdRemover, blobRemover, fileDeletion, blobTextModifier, lfsBlobConverter).flatten 225 | } 226 | 227 | lazy val definesNoWork = treeBlobCleaners.isEmpty && folderDeletion.isEmpty && treeEntryListCleaners.isEmpty 228 | 229 | def objectIdCleanerConfig: ObjectIdCleaner.Config = 230 | ObjectIdCleaner.Config( 231 | objectProtection, 232 | objectIdSubstitutor, 233 | commitNodeCleaners, 234 | treeEntryListCleaners, 235 | treeBlobCleaners, 236 | folderDeletion.toSeq, 237 | objectChecker 238 | ) 239 | 240 | def describe = { 241 | if (privateDataRemoval) { 242 | "is removing private data, so the '" + FormerCommitFooter.Key + "' footer will not be added to commit messages." 243 | } else { 244 | "is only removing non-private data (eg, blobs that are just big, not private) : '" + FormerCommitFooter.Key + "' footer will be added to commit messages." 245 | } 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /bfg/src/main/scala/com/madgag/git/bfg/cli/Main.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cli 22 | 23 | import com.madgag.git._ 24 | import com.madgag.git.bfg.GitUtil._ 25 | import com.madgag.git.bfg.cleaner._ 26 | 27 | object Main extends App { 28 | 29 | if (args.isEmpty) { 30 | CLIConfig.parser.showUsage() 31 | } else { 32 | 33 | CLIConfig.parser.parse(args, CLIConfig()) map { 34 | config => 35 | 36 | tweakStaticJGitConfig(config.massiveNonFileObjects) 37 | 38 | if (config.gitdir.isEmpty) { 39 | CLIConfig.parser.showUsage() 40 | Console.err.println("Aborting : " + config.repoLocation + " is not a valid Git repository.\n") 41 | } else { 42 | implicit val repo = config.repo 43 | 44 | println("\nUsing repo : " + repo.getDirectory.getAbsolutePath + "\n") 45 | 46 | // do this before implicitly initiating big-blob search 47 | if (hasBeenProcessedByBFGBefore(repo)) { 48 | println("\nThis repo has been processed by The BFG before! Will prune repo before proceeding - to avoid unnecessary cleaning work on unused objects...") 49 | repo.git.gc.call() 50 | println("Completed prune of old objects - will now proceed with the main job!\n") 51 | } 52 | 53 | if (config.definesNoWork) { 54 | Console.err.println("Please specify tasks for The BFG :") 55 | CLIConfig.parser.showUsage() 56 | } else { 57 | println("Found " + config.objectProtection.fixedObjectIds.size + " objects to protect") 58 | 59 | RepoRewriter.rewrite(repo, config.objectIdCleanerConfig) 60 | repo.close() 61 | } 62 | } 63 | } 64 | } 65 | 66 | } -------------------------------------------------------------------------------- /bfg/src/test/resources/sample-repos/annotatedTagExample.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/annotatedTagExample.git.zip -------------------------------------------------------------------------------- /bfg/src/test/resources/sample-repos/badEncoding.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/badEncoding.git.zip -------------------------------------------------------------------------------- /bfg/src/test/resources/sample-repos/badRepoContainingDotGitFolder.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/badRepoContainingDotGitFolder.git.zip -------------------------------------------------------------------------------- /bfg/src/test/resources/sample-repos/branchNameWithASlash.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/branchNameWithASlash.git.zip -------------------------------------------------------------------------------- /bfg/src/test/resources/sample-repos/corruptTreeDupFileName.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/corruptTreeDupFileName.git.zip -------------------------------------------------------------------------------- /bfg/src/test/resources/sample-repos/example.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/example.git.zip -------------------------------------------------------------------------------- /bfg/src/test/resources/sample-repos/exampleWithInitialCleanHistory.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/exampleWithInitialCleanHistory.git.zip -------------------------------------------------------------------------------- /bfg/src/test/resources/sample-repos/folder-example.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/folder-example.git.zip -------------------------------------------------------------------------------- /bfg/src/test/resources/sample-repos/huge10MBCommitMessage.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/huge10MBCommitMessage.git.zip -------------------------------------------------------------------------------- /bfg/src/test/resources/sample-repos/moreThanOneBigBlobWithTheSameSize.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/moreThanOneBigBlobWithTheSameSize.git.zip -------------------------------------------------------------------------------- /bfg/src/test/resources/sample-repos/repoWithBigBlobs.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/repoWithBigBlobs.git.zip -------------------------------------------------------------------------------- /bfg/src/test/resources/sample-repos/unwantedSubmodule.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/unwantedSubmodule.git.zip -------------------------------------------------------------------------------- /bfg/src/test/resources/sample-repos/usedToHaveASubmodule.git.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rtyley/bfg-repo-cleaner/1bd9715198dd53cdffa6512a5f3af3f04f82b803/bfg/src/test/resources/sample-repos/usedToHaveASubmodule.git.zip -------------------------------------------------------------------------------- /bfg/src/test/scala/com/madgag/git/bfg/cli/CLIConfigSpecs.scala: -------------------------------------------------------------------------------- 1 | package com.madgag.git.bfg.cli 2 | 3 | import com.madgag.git.bfg.model.FileName 4 | import org.scalatest.flatspec.AnyFlatSpec 5 | import org.scalatest.matchers.should.Matchers 6 | 7 | class CLIConfigSpecs extends AnyFlatSpec with Matchers { 8 | 9 | 10 | def parse(args: String) = CLIConfig.parser.parse(args.split(' ') :+ "my-repo.git", CLIConfig()).get.filterContentPredicate 11 | 12 | "CLI config" should "understand lone include" in { 13 | val predicate = parse("-fi *.txt") 14 | predicate(FileName("panda")) shouldBe false 15 | predicate(FileName("foo.txt")) shouldBe true 16 | predicate(FileName("foo.java")) shouldBe false 17 | } 18 | 19 | it should "understand lone exclude" in { 20 | val predicate = parse("-fe *.txt") 21 | predicate(FileName("panda")) shouldBe true 22 | predicate(FileName("foo.txt")) shouldBe false 23 | predicate(FileName("foo.java")) shouldBe true 24 | } 25 | 26 | it should "understand include followed by exclude" in { 27 | val predicate = parse("-fi *.txt -fe Poison.*") 28 | predicate(FileName("panda")) shouldBe false 29 | predicate(FileName("foo.txt")) shouldBe true 30 | predicate(FileName("foo.java")) shouldBe false 31 | predicate(FileName("Poison.txt")) shouldBe false 32 | } 33 | 34 | it should "understand exclude followed by include" in { 35 | val predicate = parse("-fe *.xml -fi hbm.xml") 36 | predicate(FileName("panda")) shouldBe true 37 | predicate(FileName("foo.xml")) shouldBe false 38 | predicate(FileName("hbm.xml")) shouldBe true 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /bfg/src/test/scala/com/madgag/git/bfg/cli/MainSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cli 22 | 23 | import com.madgag.git._ 24 | import com.madgag.git.bfg.cli.test.unpackedRepo 25 | import com.madgag.git.bfg.model._ 26 | import org.eclipse.jgit.lib.{ObjectId, ObjectReader} 27 | import org.scalatest.flatspec.AnyFlatSpec 28 | import org.scalatest.matchers.should.Matchers 29 | import org.scalatest.{Inspectors, OptionValues} 30 | 31 | import java.nio.file.Files 32 | import scala.jdk.CollectionConverters._ 33 | 34 | class MainSpec extends AnyFlatSpec with Matchers with OptionValues with Inspectors { 35 | 36 | // concurrent testing against scala.App is not safe https://twitter.com/rtyley/status/340376844916387840 37 | 38 | "CLI" should "not change commits unnecessarily" in new unpackedRepo("/sample-repos/exampleWithInitialCleanHistory.git.zip") { 39 | implicit val r: ObjectReader = reader 40 | 41 | ensureInvariantValue(commitHist() take 2) { 42 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveCommitWhereObjectIds(contain(abbrId("294f")))) { 43 | run("--strip-blobs-bigger-than 1K") 44 | } 45 | } 46 | } 47 | 48 | 49 | "removing empty trees" should "work" in new unpackedRepo("/sample-repos/folder-example.git.zip") { 50 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveFolder("secret-files")) { 51 | run("--delete-files {credentials,passwords}.txt") 52 | } 53 | } 54 | 55 | "removing big blobs" should "definitely still remove blobs even if they have identical size" in new unpackedRepo("/sample-repos/moreThanOneBigBlobWithTheSameSize.git.zip") { 56 | ensureRemovalOfBadEggs(packedBlobsOfSize(1024), (contain allElementsOf Set(abbrId("06d7"), abbrId("cb2c"))).matcher[Iterable[ObjectId]]) { 57 | run("--strip-blobs-bigger-than 512B") 58 | } 59 | } 60 | 61 | "converting to Git LFS" should "create a file in lfs/objects" in new unpackedRepo("/sample-repos/repoWithBigBlobs.git.zip") { 62 | ensureRemovalOfBadEggs(packedBlobsOfSize(11238), (contain only abbrId("596c")).matcher[Iterable[ObjectId]]) { 63 | run("--convert-to-git-lfs *.png --no-blob-protection") 64 | } 65 | val lfsFile = repo.getDirectory.toPath.resolve(Seq("lfs", "objects", "e0", "eb", "e0ebd49837a1cced34b9e7d3ff2fa68a8100df8f158f165ce139e366a941ba6e")) 66 | 67 | Files.size(lfsFile) shouldBe 11238 68 | } 69 | 70 | "removing a folder named '.git'" should "work" in new unpackedRepo("/sample-repos/badRepoContainingDotGitFolder.git.zip") { 71 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveFolder(".git")) { 72 | run("--delete-folders .git --no-blob-protection") 73 | } 74 | } 75 | 76 | "cleaning" should "not crash encountering a protected an annotated tag" in new unpackedRepo("/sample-repos/annotatedTagExample.git.zip") { 77 | ensureInvariantCondition(haveRef("chapter1", haveFile("chapter1.txt"))) { 78 | ensureRemovalFrom(commitHist("master")).ofCommitsThat(haveFile("chapter2.txt")) { 79 | run("--strip-blobs-bigger-than 10B --protect-blobs-from chapter1") 80 | } 81 | } 82 | } 83 | 84 | "cleaning" should "not crash encountering a protected branch containing a slash in it's name" in new unpackedRepo("/sample-repos/branchNameWithASlash.git.zip") { 85 | ensureInvariantCondition(haveRef("feature/slashes-are-ugly", haveFile("bar"))) { 86 | ensureRemovalFrom(commitHist("master")).ofCommitsThat(haveFile("bar")) { 87 | run("--delete-files bar --protect-blobs-from feature/slashes-are-ugly") 88 | } 89 | } 90 | } 91 | 92 | "strip blobs by id" should "work" in new unpackedRepo("/sample-repos/example.git.zip") { 93 | implicit val r: ObjectReader = reader 94 | 95 | val badBlobs = Set(abbrId("db59"), abbrId("86f9")) 96 | val blobIdsFile = Files.createTempFile("test-strip-blobs",".ids") 97 | Files.write(blobIdsFile, badBlobs.map(_.name()).asJava) 98 | 99 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveCommitWhereObjectIds(contain(abbrId("db59")))) { 100 | run(s"--strip-blobs-with-ids $blobIdsFile") 101 | } 102 | } 103 | 104 | "deleting a folder" should "not crash encountering a submodule" in new unpackedRepo("/sample-repos/usedToHaveASubmodule.git.zip") { 105 | ensureInvariantCondition(haveRef("master", haveFile("alpha"))) { 106 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveFolder("shared")) { 107 | run("--delete-folders shared") 108 | } 109 | } 110 | } 111 | 112 | "deleting" should "not crash encountering a protected submodule" in new unpackedRepo("/sample-repos/unwantedSubmodule.git.zip") { 113 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveFile("foo.txt")) { 114 | run("--delete-folders bar --delete-files foo.txt") 115 | } 116 | } 117 | 118 | "deleting" should "not crash on encountering a commit with bad encoding header" in new unpackedRepo("/sample-repos/badEncoding.git.zip") { 119 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveFile("test.txt")) { 120 | run("--no-blob-protection --delete-files test.txt") 121 | } 122 | } 123 | 124 | "Corrupt trees containing duplicate filenames" should "be cleaned by removing the file with the duplicate FileName, leaving the folder" in new unpackedRepo("/sample-repos/corruptTreeDupFileName.git.zip") { 125 | ensureRemovalFrom(commitHist()).ofCommitsThat(haveFile("2.0.0")) { 126 | run("--fix-filename-duplicates-preferring tree") 127 | } 128 | } 129 | } 130 | 131 | -------------------------------------------------------------------------------- /bfg/src/test/scala/com/madgag/git/bfg/cli/MassiveNonFileObjectsRequiresOwnJvmSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cli 22 | 23 | import com.madgag.git.bfg.cli.test.unpackedRepo 24 | import org.scalatest.flatspec.AnyFlatSpec 25 | import org.scalatest.matchers.should.Matchers 26 | 27 | // JGit has JVM-wide configuration for cache window size: https://git.eclipse.org/r/#/q/Ibf2ef604bac08885b2b3bd85f0dc31995132b682,n,z 28 | class MassiveNonFileObjectsRequiresOwnJvmSpec extends AnyFlatSpec with Matchers { 29 | 30 | // concurrent testing against scala.App is not safe https://twitter.com/rtyley/status/340376844916387840 31 | 32 | "Massive commit messages" should "be handled without crash (ie LargeObjectException) if the user specifies that the repo contains massive non-file objects" in 33 | new unpackedRepo("/sample-repos/huge10MBCommitMessage.git.zip") { 34 | ensureRemovalFrom(commitHist("master")).ofCommitsThat(haveFile("16-kb-zeros")) { 35 | run("--strip-blobs-bigger-than 1K --massive-non-file-objects-sized-up-to 20M") 36 | } 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /bfg/src/test/scala/com/madgag/git/bfg/cli/test/unpackedRepo.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2013 Roberto Tyley 3 | * 4 | * This file is part of 'BFG Repo-Cleaner' - a tool for removing large 5 | * or troublesome blobs from Git repositories. 6 | * 7 | * BFG Repo-Cleaner is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * BFG Repo-Cleaner is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see http://www.gnu.org/licenses/ . 19 | */ 20 | 21 | package com.madgag.git.bfg.cli.test 22 | 23 | import com.madgag.git.bfg 24 | import com.madgag.git.bfg.cli.Main 25 | 26 | class unpackedRepo(filePath: String) extends bfg.test.unpackedRepo(filePath) { 27 | def run(options: String): Unit = { 28 | Main.main(options.split(' ') :+ repo.getDirectory.getAbsolutePath) 29 | } 30 | } -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | import ReleaseTransformations.* 2 | import Dependencies.* 3 | 4 | ThisBuild / organization := "com.madgag" 5 | 6 | ThisBuild / scalaVersion := "2.13.16" 7 | 8 | ThisBuild / scalacOptions ++= Seq("-deprecation", "-feature", "-language:postfixOps", "-release:11") 9 | 10 | ThisBuild / licenses := Seq(License.GPL3_or_later) 11 | 12 | ThisBuild / resolvers ++= jgitVersionOverride.map(_ => Resolver.mavenLocal).toSeq 13 | 14 | ThisBuild / libraryDependencies += scalatest % Test 15 | 16 | ThisBuild / Test/ testOptions += Tests.Argument( 17 | TestFrameworks.ScalaTest, 18 | "-u", s"test-results/scala-${scalaVersion.value}" 19 | ) 20 | 21 | lazy val root = Project(id = "bfg-parent", base = file(".")).aggregate (bfg, `bfg-test`, `bfg-library`).settings( 22 | publish / skip := true, 23 | releaseCrossBuild := true, // true if you cross-build the project for multiple Scala versions 24 | releaseProcess := Seq[ReleaseStep]( 25 | checkSnapshotDependencies, 26 | inquireVersions, 27 | runClean, 28 | runTest, 29 | setReleaseVersion, 30 | commitReleaseVersion, 31 | tagRelease, 32 | setNextVersion, 33 | commitNextVersion 34 | ) 35 | ) 36 | 37 | lazy val `bfg-test` = project 38 | 39 | lazy val `bfg-library` = project.dependsOn(`bfg-test` % Test) 40 | 41 | lazy val bfg = project.enablePlugins(BuildInfoPlugin).dependsOn(`bfg-library`, `bfg-test` % Test) 42 | 43 | lazy val `bfg-benchmark` = project 44 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.7 2 | -------------------------------------------------------------------------------- /project/dependencies.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | 3 | object Dependencies { 4 | 5 | val scalaGitVersion = "5.0.3" 6 | 7 | val jgitVersionOverride = Option(System.getProperty("jgit.version")) 8 | 9 | val jgitVersion = jgitVersionOverride.getOrElse("6.10.0.202406032230-r") 10 | 11 | val jgit = "org.eclipse.jgit" % "org.eclipse.jgit" % jgitVersion 12 | 13 | // this matches slf4j-api in jgit's dependencies 14 | val slf4jSimple = "org.slf4j" % "slf4j-simple" % "1.7.36" 15 | 16 | val scalaCollectionPlus = "com.madgag" %% "scala-collection-plus" % "0.11" 17 | 18 | val parCollections = "org.scala-lang.modules" %% "scala-parallel-collections" % "1.2.0" 19 | 20 | val scalaGit = "com.madgag.scala-git" %% "scala-git" % scalaGitVersion exclude("org.eclipse.jgit", "org.eclipse.jgit") 21 | 22 | val scalaGitTest = "com.madgag.scala-git" %% "scala-git-test" % scalaGitVersion 23 | 24 | val scalatest = "org.scalatest" %% "scalatest" % "3.2.19" 25 | 26 | val madgagCompress = "com.madgag" % "util-compress" % "1.35" 27 | 28 | val textmatching = "com.madgag" %% "scala-textmatching" % "2.8" 29 | 30 | val scopt = "com.github.scopt" %% "scopt" % "3.7.1" 31 | 32 | val guava = Seq("com.google.guava" % "guava" % "33.4.0-jre", "com.google.code.findbugs" % "jsr305" % "3.0.2") 33 | 34 | val useNewerJava = "com.madgag" % "use-newer-java" % "1.0.2" 35 | 36 | val lineSplitting = "com.madgag" %% "line-break-preserving-line-splitting" % "0.1.6" 37 | 38 | } 39 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.github.sbt" % "sbt-release" % "1.4.0") 2 | 3 | addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.12.2") 4 | 5 | addSbtPlugin("ch.epfl.scala" % "sbt-version-policy" % "3.2.1") 6 | 7 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.3.0") 8 | 9 | addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.13.1") 10 | 11 | addDependencyTreePlugin -------------------------------------------------------------------------------- /version.sbt: -------------------------------------------------------------------------------- 1 | ThisBuild / version := "1.15.1-SNAPSHOT" 2 | --------------------------------------------------------------------------------