├── README.md ├── bats ├── environment.bash └── tests.bats └── git-split-branch /README.md: -------------------------------------------------------------------------------- 1 | # git-split-branch 2 | 3 | **git-split-branch** splits a single git branch into multiple branches, each 4 | of which contains a specified set of the original branch's files and only the 5 | commits which affect that set. 6 | 7 | This is a similar effect to what 8 | [**git-filter-branch**][2] 9 | would produce by using an index filter to select specific files. (In fact, 10 | git-split-branch is obviously and unabashedly modified from 11 | git-filter-branch.) But unlike... 12 | 13 | * **[git-subtree][1]**, it can split off an arbitrary set of files, not just a 14 | subdirectory. 15 | * **git-filter-branch**, it can create multiple branches. 16 | * **multiple git-filter-branch calls**, it only requires one pass over the 17 | commit history. 18 | * **[other][3] [scripts][4] [that][5] [wrap][6] [git-filter-branch][7]**, see 19 | one of the previous two points. 20 | 21 | [1]: https://github.com/git/git/blob/master/contrib/subtree/git-subtree.sh 22 | [2]: https://github.com/git/git/blob/master/git-filter-branch.sh 23 | [3]: https://github.com/ajdruff/git-splits 24 | [4]: https://github.com/vangorra/git_split 25 | [5]: https://github.com/phord/git-split/blob/master/git-split.sh 26 | [6]: https://gist.github.com/aseigneurin/7531087 27 | [7]: https://gist.github.com/tijn/5301258 28 | 29 | In addition, git-split-branch assigns the leftover files to a "remainder" 30 | branch or rewrites the original source branch to contain only the leftovers. 31 | 32 | On the other hand, git-filter-branch allows for other filters to be applied, 33 | while git-split-branch is single in its purpose. 34 | 35 | ## Install 36 | 37 | Download and place git-split-branch somewhere in your $PATH, or symlink from its git repository to your $PATH. 38 | 39 | No other setup is required before invoking as `git split-branch`. 40 | 41 | ## Usage 42 | 43 | git split-branch [-d ] [-r ] ... [ -- ... ]... 44 | 45 | This command will split the contents of the `` branch, creating branch 46 | `` to contain only files matching ``, branch `` to 47 | contain ``, and so forth. The remaining unsplit files will be written 48 | to branch `` if the `-r` flag is given; otherwise `` will 49 | be rewritten to contain the remaining files only. 50 | 51 | As with git-filter-branch, the directory in which work is done can be 52 | specified (e.g. on a tmpfs) with `-d`. 53 | 54 | ## Motivation 55 | 56 | This utility was motivated by the desire to migrate a large $HOME repository 57 | to [vcsh](https://github.com/RichiH/vcsh). This repository contained seven 58 | years of configuration, grad school work, personal writings, code for side 59 | projects, etc. It had originally been hosted in Subversion as a single, large 60 | repository, and it was later converted to Git via git-svn but not restructured 61 | into smaller, more logical units. git-split-branch was written to split this 62 | repository into smaller units without having to iterate seven years of commits 63 | for every split. 64 | 65 | The tool took 24 minutes to split 2750 commits into 60 branches. 66 | -------------------------------------------------------------------------------- /bats/environment.bash: -------------------------------------------------------------------------------- 1 | setup() { 2 | PATH="$BATS_TEST_DIRNAME/..:$PATH" 3 | export PATH 4 | GSB="git split-branch" 5 | 6 | LC_ALL=C 7 | export LC_ALL 8 | 9 | # Make a directory for testing and make it $HOME 10 | BATS_TESTDIR=$(mktemp -d -p "$BATS_TMPDIR") 11 | cd "$BATS_TESTDIR" 12 | } 13 | 14 | teardown() { 15 | # Don't saw off the branch you're sitting on 16 | cd / 17 | 18 | if test -z "$BATS_LEAVE_TESTDIR"; then 19 | # Make sure removal will succeed even if we have altered permissions 20 | chmod -R u+rwX "$BATS_TESTDIR" 21 | rm -rf "$BATS_TESTDIR" 22 | fi 23 | } 24 | 25 | make_linear_commits() { 26 | export GIT_AUTHOR_NAME='A U Thor' 27 | export GIT_AUTHOR_EMAIL='author@example.com' 28 | export GIT_COMMITTER_NAME=$GIT_AUTHOR_NAME 29 | export GIT_COMMITTER_EMAIL=$GIT_AUTHOR_EMAIL 30 | 31 | echo aye > a 32 | git add a 33 | GIT_AUTHOR_DATE='@1 +0000' GIT_COMMITTER_DATE=$GIT_AUTHOR_DATE \ 34 | git commit -m 'add a' 35 | 36 | echo bee > b 37 | git add b 38 | GIT_AUTHOR_DATE='@2 +0000' GIT_COMMITTER_DATE=$GIT_AUTHOR_DATE \ 39 | git commit -m 'add b' 40 | 41 | echo eh > a 42 | git add a 43 | GIT_AUTHOR_DATE='@3 +0000' GIT_COMMITTER_DATE=$GIT_AUTHOR_DATE \ 44 | git commit -m 'change a' 45 | 46 | echo cee > c 47 | git add c 48 | GIT_AUTHOR_DATE='@4 +0000' GIT_COMMITTER_DATE=$GIT_AUTHOR_DATE \ 49 | git commit -m 'add c' 50 | 51 | echo be > b 52 | git add b 53 | GIT_AUTHOR_DATE='@5 +0000' GIT_COMMITTER_DATE=$GIT_AUTHOR_DATE \ 54 | git commit -m 'change b' 55 | } 56 | -------------------------------------------------------------------------------- /bats/tests.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load environment 4 | 5 | @test "Split with no branches/commits fails" { 6 | git init 7 | ! $GSB master split '*.c' 8 | } 9 | 10 | @test "Splitting linear repo" { 11 | git init 12 | make_linear_commits 13 | 14 | $GSB master split-a a 15 | 16 | run git rev-parse split-a 17 | [ "$status" -eq 0 ] 18 | [ "$output" = 2e177ee154aee27a2e10cf30b25acefbb752942c ] 19 | 20 | run git rev-parse master 21 | [ "$status" -eq 0 ] 22 | [ "$output" = 65f64808f13983e338c77cd9e5be9a921d187016 ] 23 | } 24 | 25 | @test "Splitting linear repo with remainder branch" { 26 | git init 27 | make_linear_commits 28 | 29 | $GSB -r rem master split-a a 30 | 31 | run git rev-parse split-a 32 | [ "$status" -eq 0 ] 33 | [ "$output" = 2e177ee154aee27a2e10cf30b25acefbb752942c ] 34 | 35 | run git rev-parse rem 36 | [ "$status" -eq 0 ] 37 | [ "$output" = 65f64808f13983e338c77cd9e5be9a921d187016 ] 38 | 39 | run git rev-parse master 40 | [ "$status" -eq 0 ] 41 | [ "$output" = 10e8418dfa6a9136da9368970b8954f2856362b8 ] 42 | } 43 | 44 | @test "-r option works without the following space" { 45 | git init 46 | make_linear_commits 47 | 48 | $GSB -rrem master split-a a 49 | 50 | run git rev-parse split-a 51 | [ "$status" -eq 0 ] 52 | [ "$output" = 2e177ee154aee27a2e10cf30b25acefbb752942c ] 53 | 54 | run git rev-parse rem 55 | [ "$status" -eq 0 ] 56 | [ "$output" = 65f64808f13983e338c77cd9e5be9a921d187016 ] 57 | 58 | run git rev-parse master 59 | [ "$status" -eq 0 ] 60 | [ "$output" = 10e8418dfa6a9136da9368970b8954f2856362b8 ] 61 | } 62 | 63 | @test "Splitting linear repo with multiple filenames" { 64 | git init 65 | make_linear_commits 66 | 67 | $GSB -r rem-c master split-ab a b 68 | 69 | run git rev-parse split-ab 70 | [ "$status" -eq 0 ] 71 | [ "$output" = ba2316913c0550b2ea7cb7f283afdfef8d7acfc2 ] 72 | 73 | run git rev-parse rem-c 74 | [ "$status" -eq 0 ] 75 | [ "$output" = 7032187efd8b43043460e2cc1391da56315e7ab9 ] 76 | } 77 | 78 | @test "Splitting linear repo into multiple branches" { 79 | git init 80 | make_linear_commits 81 | 82 | $GSB -r rem-c master split-a a -- split-b b 83 | 84 | run git rev-parse split-a 85 | [ "$status" -eq 0 ] 86 | [ "$output" = 2e177ee154aee27a2e10cf30b25acefbb752942c ] 87 | 88 | run git rev-parse split-b 89 | [ "$status" -eq 0 ] 90 | [ "$output" = 29cbbea5edd1c01131aff2cba75308061c1384f5 ] 91 | 92 | run git rev-parse rem-c 93 | [ "$status" -eq 0 ] 94 | [ "$output" = 7032187efd8b43043460e2cc1391da56315e7ab9 ] 95 | 96 | run git rev-parse master 97 | [ "$status" -eq 0 ] 98 | [ "$output" = 10e8418dfa6a9136da9368970b8954f2856362b8 ] 99 | } 100 | 101 | @test "Emptied source branch is deleted" { 102 | git init 103 | make_linear_commits 104 | 105 | $GSB master split-all a b c 106 | 107 | ! git branch --list master | grep '' || false 108 | } 109 | 110 | @test "Empty remainder branch is not created" { 111 | git init 112 | make_linear_commits 113 | 114 | $GSB -r rem master split-all a b c 115 | 116 | ! git branch --list rem | grep '' || false 117 | } 118 | 119 | @test "Untracked files remain in work tree if source is deleted" { 120 | git init 121 | make_linear_commits 122 | touch d 123 | 124 | $GSB master split-all a b c 125 | 126 | [ -e d ] 127 | } 128 | -------------------------------------------------------------------------------- /git-split-branch: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | map() 4 | { 5 | # if it was not rewritten, take the original 6 | if test -r "$workdir/../map-$1/$2" 7 | then 8 | cat "$workdir/../map-$1/$2" 9 | else 10 | echo "$2" 11 | fi 12 | } 13 | 14 | # skip commits that leave the tree untouched, commit others 15 | git_commit_non_empty_tree() 16 | { 17 | mapbranch=$1 18 | shift 19 | if test $# = 3 && test "$1" = $(git rev-parse "$3^{tree}"); then 20 | map "$mapbranch" "$3" 21 | elif test $# = 1 && test "$1" = 4b825dc642cb6eb9a060e54bf8d69288fbee4904; then 22 | : 23 | else 24 | git commit-tree "$@" 25 | fi 26 | } 27 | 28 | # override die(): this version puts in an extra line break, so that 29 | # the progress is still visible 30 | die() 31 | { 32 | echo >&2 33 | echo "$*" >&2 34 | exit 1 35 | } 36 | 37 | finish_ident() { 38 | # Ensure non-empty id name. 39 | echo "case \"\$GIT_$1_NAME\" in \"\") GIT_$1_NAME=\"\${GIT_$1_EMAIL%%@*}\" && export GIT_$1_NAME;; esac" 40 | # And make sure everything is exported. 41 | echo "export GIT_$1_NAME" 42 | echo "export GIT_$1_EMAIL" 43 | echo "export GIT_$1_DATE" 44 | } 45 | 46 | set_ident() { 47 | parse_ident_from_commit author AUTHOR committer COMMITTER 48 | finish_ident AUTHOR 49 | finish_ident COMMITTER 50 | } 51 | 52 | get_reparents() { 53 | local parentstr= 54 | mapbranch=$1 55 | shift 56 | for parent in "$@"; do 57 | for reparent in $(map "$mapbranch" "$parent"); do 58 | case "$parentstr " in 59 | *" -p $reparent "*) 60 | ;; 61 | *) 62 | parentstr="$parentstr -p $reparent" 63 | ;; 64 | esac 65 | done 66 | done 67 | printf '%s\n' "$parentstr" 68 | } 69 | 70 | USAGE="[-d ] [-r ] 71 | ... [-- ...]..." 72 | OPTIONS_SPEC= 73 | . git-sh-setup 74 | 75 | if [ "$(is_bare_repository)" = false ]; then 76 | require_clean_work_tree 'split a branch' 77 | fi 78 | 79 | tempdir=.git-rewrite 80 | rembr= 81 | srcbr= 82 | while arg=$1; shift 83 | do 84 | case "$arg" in 85 | --) 86 | break 87 | ;; 88 | -d*) 89 | tempdir=${arg:2} 90 | if [ -z "$tempdir" ]; then 91 | tempdir=$1 92 | shift || usage 93 | fi 94 | ;; 95 | -r*) 96 | rembr=${arg:2} 97 | if [ -z "$rembr" ]; then 98 | rembr=$1 99 | shift || usage 100 | fi 101 | ;; 102 | -?*) 103 | usage 104 | ;; 105 | *) 106 | srcbr=$arg 107 | break 108 | ;; 109 | esac 110 | done 111 | 112 | [ -n "$srcbr" ] || usage 113 | 114 | # By default, rewrite source branch to remainder in place 115 | : ${rembr:=$srcbr} 116 | 117 | # Set up and change to temporary directory 118 | orig_dir=$(pwd) 119 | mkdir -p "$tempdir/t" && 120 | tempdir="$(cd "$tempdir"; pwd)" && 121 | cd "$tempdir/t" && 122 | workdir="$(pwd)" || 123 | die "" 124 | 125 | # Remove tempdir on exit 126 | trap 'cd "$orig_dir"; rm -rf "$tempdir"' 0 127 | 128 | # Now read and parse destination specs 129 | dstbrs= 130 | declare -A dstfiles=() 131 | while branch=$1 && shift; do 132 | dstbrs+=$(git rev-parse --sq-quote "$branch") 133 | dstfiles[$branch]= 134 | while arg=$1 && shift; do 135 | case $arg in 136 | --) 137 | break 138 | ;; 139 | *) 140 | dstfiles[$branch]+=$(git rev-parse --sq-quote "$arg") 141 | ;; 142 | esac 143 | done 144 | done 145 | 146 | ORIG_GIT_DIR="$GIT_DIR" 147 | ORIG_GIT_WORK_TREE="$GIT_WORK_TREE" 148 | ORIG_GIT_INDEX_FILE="$GIT_INDEX_FILE" 149 | GIT_WORK_TREE=. 150 | export GIT_DIR GIT_WORK_TREE 151 | 152 | GIT_INDEX_FILE="$(pwd)/../index" 153 | export GIT_INDEX_FILE 154 | 155 | # Set up ID map for rewriting parents on each branch 156 | eval set -- "$dstbrs" "$rembr" 157 | for branch in "$@"; do 158 | mkdir -p ../map-"$branch" || die "Could not create map-$branch/ directory" 159 | done 160 | 161 | srcrev=$(git rev-parse "$srcbr") 162 | git rev-list --reverse --topo-order --default HEAD \ 163 | --parents --simplify-merges "$srcrev" >../revs || 164 | die "Could not get the commits" 165 | commits=$(wc -l <../revs | tr -d " ") 166 | 167 | test $commits -eq 0 && die "Found nothing to rewrite" 168 | 169 | # Rewrite the commits 170 | report_progress () 171 | { 172 | if test -n "$progress" && 173 | test $git_filter_branch__commit_count -gt $next_sample_at 174 | then 175 | count=$git_filter_branch__commit_count 176 | 177 | now=$(date +%s) 178 | elapsed=$(($now - $start_timestamp)) 179 | remaining=$(( ($commits - $count) * $elapsed / $count )) 180 | if test $elapsed -gt 0 181 | then 182 | next_sample_at=$(( ($elapsed + 1) * $count / $elapsed )) 183 | else 184 | next_sample_at=$(($next_sample_at + 1)) 185 | fi 186 | progress=" ($elapsed seconds passed, remaining $remaining predicted)" 187 | fi 188 | printf "\rRewrite $commit ($count/$commits)$progress " 189 | } 190 | 191 | git_filter_branch__commit_count=0 192 | 193 | progress= start_timestamp= 194 | if date '+%s' 2>/dev/null | grep -q '^[0-9][0-9]*$' 195 | then 196 | next_sample_at=0 197 | progress="dummy to ensure this is not empty" 198 | start_timestamp=$(date '+%s') 199 | fi 200 | 201 | while read commit parents; do 202 | git_filter_branch__commit_count=$(($git_filter_branch__commit_count+1)) 203 | 204 | report_progress 205 | 206 | git cat-file commit "$commit" >../commit || 207 | die "Cannot read commit $commit" 208 | 209 | eval "$(set_ident <../commit)" || 210 | die "setting author/committer failed for commit $commit" 211 | 212 | { 213 | while IFS='' read -r header_line && test -n "$header_line" 214 | do 215 | # skip header lines... 216 | :; 217 | done 218 | # and output the actual commit message 219 | cat 220 | } <../commit >../message 221 | 222 | GIT_INDEX_FILE=$GIT_INDEX_FILE-rem GIT_ALLOW_NULL_SHA1=1 git read-tree -i -m $commit || 223 | die "Could not initialize the index" 224 | 225 | eval set -- "$dstbrs" 226 | for branch in "$@"; do 227 | # Get just the specified files into the index 228 | eval set -- "${dstfiles[$branch]}" 229 | GIT_INDEX_FILE=$GIT_INDEX_FILE-rem git rm -qr --cached --ignore-unmatch -- "$@" 230 | git read-tree --empty 231 | git reset -q $commit -- "$@" 232 | 233 | parentstr=$(get_reparents "$branch" $parents) 234 | newtree=$(git write-tree) 235 | git_commit_non_empty_tree "$branch" "$newtree" $parentstr < ../message > "../map-$branch/$commit" || 236 | die "could not write rewritten commit" 237 | done 238 | 239 | parentstr=$(get_reparents "$rembr" $parents) 240 | remtree=$(GIT_INDEX_FILE=$GIT_INDEX_FILE-rem git write-tree) 241 | git_commit_non_empty_tree "$rembr" "$remtree" $parentstr < ../message > "../map-$rembr/$commit" || 242 | die "could not write rewritten commit" 243 | done <../revs 244 | 245 | echo 246 | 247 | eval set -- "$dstbrs" 248 | { 249 | # Create refs for our split branches 250 | for branch in "$@"; do 251 | sha1=$(map "$branch" "$srcrev") 252 | if [ -z "$sha1" ]; then 253 | echo "$branch: matched no files, not creating" >&2 254 | continue 255 | fi 256 | echo "$branch: creating at $(git rev-parse --short "$sha1")" >&2 257 | printf 'create refs/heads/%s %s\n' "$branch" "$sha1" 258 | done 259 | 260 | # Create or update the ref for our remainder 261 | sha1=$(map "$rembr" "$srcrev") 262 | if [ "$rembr" != "$srcbr" ]; then 263 | if [ -z "$sha1" ]; then 264 | echo "$rembr: no files remained, not creating" >&2 265 | else 266 | echo "$rembr: creating at $(git rev-parse --short "$sha1")" >&2 267 | printf 'create refs/heads/%s %s\n' "$rembr" "$sha1" 268 | fi 269 | else 270 | if [ -z "$sha1" ]; then 271 | echo "$rembr: no files remained, deleting" >&2 272 | printf 'delete refs/heads/%s %s\n' "$rembr" "$srcrev" 273 | else 274 | echo "$rembr: rewriting to $(git rev-parse --short "$sha1")" >&2 275 | printf 'update refs/heads/%s %s %s\n' "$rembr" "$sha1" "$srcrev" 276 | fi 277 | fi 278 | } | git update-ref --stdin 279 | 280 | # Store our potential final destination before deleting the map 281 | sha1=$(map "$rembr" "$srcrev") 282 | 283 | cd "$orig_dir" 284 | rm -rf "$tempdir" 285 | 286 | trap - 0 287 | 288 | unset GIT_DIR GIT_WORK_TREE GIT_INDEX_FILE 289 | test -z "$ORIG_GIT_DIR" || { 290 | GIT_DIR="$ORIG_GIT_DIR" && export GIT_DIR 291 | } 292 | test -z "$ORIG_GIT_WORK_TREE" || { 293 | GIT_WORK_TREE="$ORIG_GIT_WORK_TREE" && 294 | export GIT_WORK_TREE 295 | } 296 | test -z "$ORIG_GIT_INDEX_FILE" || { 297 | GIT_INDEX_FILE="$ORIG_GIT_INDEX_FILE" && 298 | export GIT_INDEX_FILE 299 | } 300 | 301 | if [ "$(is_bare_repository)" = false ] && [ "$rembr" = "$srcbr" ]; then 302 | if [ -n "$sha1" ]; then 303 | git read-tree -u -m "$srcbr" || exit 304 | else 305 | git read-tree -u -m 4b825dc642cb6eb9a060e54bf8d69288fbee4904 || exit 306 | fi 307 | fi 308 | 309 | exit 0 310 | --------------------------------------------------------------------------------