├── .gitignore ├── go.mod ├── CHANGELOG ├── LICENSE ├── .github └── workflows │ └── tests.yml ├── splitter ├── utils.go ├── result.go ├── config.go ├── cache.go └── state.go ├── go.sum ├── main.go ├── README.md └── run-tests.sh /.gitignore: -------------------------------------------------------------------------------- 1 | splitter-lite-tests/ 2 | vendor/ 3 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/splitsh/lite 2 | 3 | go 1.22 4 | 5 | require ( 6 | github.com/libgit2/git2go/v34 v34.0.0 7 | go.etcd.io/bbolt v1.3.9 8 | ) 9 | 10 | require ( 11 | golang.org/x/crypto v0.20.0 // indirect 12 | golang.org/x/sys v0.17.0 // indirect 13 | ) 14 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | CHANGELOG 2 | ========= 3 | 4 | * 2.0.0 (2023-10-25) 5 | 6 | * move to go.mod 7 | * remove the `--quiet` option (append `2>/dev/null` to the command instead) 8 | * remove the `--legacy` option (same as `--git '<1.8.2'`) 9 | * move information console display to stderr instead of stdout 10 | 11 | * 1.0.1 (2017-02-24) 12 | 13 | * add the `--version` flag 14 | 15 | * 1.0.0 (2017-01-02) 16 | 17 | * initial version 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015-2024 Fabien Potencier 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is furnished 8 | to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | pull_request: 5 | push: 6 | 7 | jobs: 8 | update: 9 | name: Run tests 10 | runs-on: ubuntu-latest 11 | steps: 12 | - 13 | name: Install deps 14 | run: sudo apt-get install -y pkg-config cmake 15 | - 16 | name: Checkout 17 | uses: actions/checkout@v3 18 | - 19 | name: Set up Go 20 | uses: actions/setup-go@v3 21 | with: 22 | go-version: '^1.22.0' 23 | - uses: actions/cache@v3 24 | with: 25 | path: | 26 | ~/.cache/go-build 27 | ~/go/pkg/mod 28 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} 29 | restore-keys: | 30 | ${{ runner.os }}-go- 31 | - 32 | name: Building 33 | run: | 34 | go mod vendor 35 | rm -rf vendor/github.com/libgit2/git2go 36 | git clone https://github.com/libgit2/git2go vendor/github.com/libgit2/git2go/v34 37 | cd vendor/github.com/libgit2/git2go/v34 && git checkout v34.0.0 && git submodule update --init && make install-static 38 | - 39 | name: Test 40 | run: | 41 | export PKG_CONFIG_PATH=/home/runner/work/lite/lite/vendor/github.com/libgit2/git2go/v34/static-build/build 42 | go test -v ./... 43 | -------------------------------------------------------------------------------- /splitter/utils.go: -------------------------------------------------------------------------------- 1 | package splitter 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "regexp" 8 | "strings" 9 | 10 | git "github.com/libgit2/git2go/v34" 11 | ) 12 | 13 | var messageNormalizer = regexp.MustCompile(`\s*\r?\n`) 14 | 15 | // GitDirectory returns the .git directory for a given directory 16 | func GitDirectory(path string) string { 17 | gitPath := filepath.Join(path, ".git") 18 | if _, err := os.Stat(gitPath); os.IsNotExist(err) { 19 | // this might be a bare repo 20 | return path 21 | } 22 | 23 | return gitPath 24 | } 25 | 26 | // SplitMessage splits a git message 27 | func SplitMessage(message string) (string, string) { 28 | // we split the message at \n\n or \r\n\r\n 29 | var subject, body string 30 | found := false 31 | for i := 0; i+4 <= len(message); i++ { 32 | if message[i] == '\n' && message[i+1] == '\n' { 33 | subject = message[0:i] 34 | body = message[i+2:] 35 | found = true 36 | break 37 | } else if message[i] == '\r' && message[i+1] == '\n' && message[i+2] == '\r' && message[i+3] == '\n' { 38 | subject = message[0:i] 39 | body = message[i+4:] 40 | found = true 41 | break 42 | } 43 | } 44 | 45 | if !found { 46 | subject = message 47 | body = "" 48 | } 49 | 50 | // normalize \r\n and whitespaces 51 | subject = messageNormalizer.ReplaceAllLiteralString(subject, " ") 52 | 53 | // remove spaces at the end of the subject 54 | subject = strings.TrimRight(subject, " ") 55 | body = strings.TrimLeft(body, "\r\n") 56 | return subject, body 57 | } 58 | 59 | func normalizeOrigin(repo *git.Repository, origin string) (string, error) { 60 | if origin == "" { 61 | origin = "HEAD" 62 | } 63 | 64 | obj, ref, err := repo.RevparseExt(origin) 65 | if err != nil { 66 | return "", fmt.Errorf("bad revision for origin: %s", err) 67 | } 68 | if obj != nil { 69 | obj.Free() 70 | } 71 | defer ref.Free() 72 | 73 | return ref.Name(), nil 74 | } 75 | -------------------------------------------------------------------------------- /splitter/result.go: -------------------------------------------------------------------------------- 1 | package splitter 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | 7 | git "github.com/libgit2/git2go/v34" 8 | ) 9 | 10 | // Result represents the outcome of a split 11 | type Result struct { 12 | mu sync.RWMutex 13 | traversed int 14 | created int 15 | head *git.Oid 16 | duration time.Duration 17 | } 18 | 19 | // NewResult returns a pre-populated result 20 | func NewResult(duration time.Duration, traversed, created int) *Result { 21 | return &Result{ 22 | duration: duration, 23 | traversed: traversed, 24 | created: created, 25 | } 26 | } 27 | 28 | // Traversed returns the number of commits traversed during the split 29 | func (r *Result) Traversed() int { 30 | r.mu.RLock() 31 | defer r.mu.RUnlock() 32 | return r.traversed 33 | } 34 | 35 | // Created returns the number of created commits 36 | func (r *Result) Created() int { 37 | r.mu.RLock() 38 | defer r.mu.RUnlock() 39 | return r.created 40 | } 41 | 42 | // Duration returns the current duration of the split 43 | func (r *Result) Duration(precision time.Duration) time.Duration { 44 | r.mu.RLock() 45 | defer r.mu.RUnlock() 46 | return roundDuration(r.duration, precision) 47 | } 48 | 49 | // Head returns the latest split sha1 50 | func (r *Result) Head() *git.Oid { 51 | r.mu.RLock() 52 | defer r.mu.RUnlock() 53 | return r.head 54 | } 55 | 56 | func (r *Result) moveHead(oid *git.Oid) { 57 | r.mu.Lock() 58 | r.head = oid 59 | r.mu.Unlock() 60 | } 61 | 62 | func (r *Result) incCreated() { 63 | r.mu.Lock() 64 | r.created++ 65 | r.mu.Unlock() 66 | } 67 | 68 | func (r *Result) incTraversed() { 69 | r.mu.Lock() 70 | r.traversed++ 71 | r.mu.Unlock() 72 | } 73 | 74 | func (r *Result) end(start time.Time) { 75 | r.mu.Lock() 76 | r.duration = time.Since(start) 77 | r.mu.Unlock() 78 | } 79 | 80 | // roundDuration rounds a duration to a given precision (use roundDuration(d, 10*time.Second) to get a 10s precision fe) 81 | func roundDuration(d, r time.Duration) time.Duration { 82 | if r <= 0 { 83 | return d 84 | } 85 | neg := d < 0 86 | if neg { 87 | d -= d 88 | } 89 | if m := d % r; m+m < r { 90 | d -= m 91 | } else { 92 | d += r - m 93 | } 94 | if neg { 95 | return -d 96 | } 97 | return d 98 | } 99 | -------------------------------------------------------------------------------- /splitter/config.go: -------------------------------------------------------------------------------- 1 | package splitter 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "strings" 7 | "sync" 8 | 9 | git "github.com/libgit2/git2go/v34" 10 | bolt "go.etcd.io/bbolt" 11 | ) 12 | 13 | // Prefix represents which paths to split 14 | type Prefix struct { 15 | From string 16 | To string 17 | Excludes []string 18 | } 19 | 20 | // NewPrefix returns a new prefix, sanitizing the input 21 | func NewPrefix(from, to string, excludes []string) *Prefix { 22 | // remove the trailing slash (to avoid duplicating cache) 23 | from = strings.TrimRight(from, "/") 24 | to = strings.TrimRight(to, "/") 25 | 26 | // remove trailing slashes from excludes (as it does not mean anything) 27 | for i, exclude := range excludes { 28 | excludes[i] = strings.TrimRight(exclude, "/") 29 | } 30 | 31 | return &Prefix{ 32 | From: from, 33 | To: to, 34 | Excludes: excludes, 35 | } 36 | } 37 | 38 | // Config represents a split configuration 39 | type Config struct { 40 | Prefixes []*Prefix 41 | Path string 42 | Origin string 43 | Commit string 44 | Target string 45 | GitVersion string 46 | Debug bool 47 | Scratch bool 48 | 49 | // for advanced usage only 50 | // naming and types subject to change anytime! 51 | Logger *log.Logger 52 | DB *bolt.DB 53 | RepoMu *sync.Mutex 54 | Repo *git.Repository 55 | Git int 56 | } 57 | 58 | var supportedGitVersions = map[string]int{ 59 | "<1.8.2": 1, 60 | "<2.8.0": 2, 61 | "latest": 3, 62 | } 63 | 64 | // Split splits a configuration 65 | func Split(config *Config, result *Result) error { 66 | state, err := newState(config, result) 67 | if err != nil { 68 | return err 69 | } 70 | defer state.close() 71 | return state.split() 72 | } 73 | 74 | // Validate validates the configuration 75 | func (config *Config) Validate() error { 76 | ok, err := git.ReferenceNameIsValid(config.Origin) 77 | if err != nil { 78 | return err 79 | } 80 | if !ok { 81 | return fmt.Errorf("the origin is not a valid Git reference") 82 | } 83 | 84 | ok, err = git.ReferenceNameIsValid(config.Target) 85 | if err != nil { 86 | return err 87 | } 88 | if config.Target != "" && !ok { 89 | return fmt.Errorf("the target is not a valid Git reference") 90 | } 91 | 92 | git, ok := supportedGitVersions[config.GitVersion] 93 | if !ok { 94 | return fmt.Errorf(`the git version can only be one of "<1.8.2", "<2.8.0", or "latest"`) 95 | } 96 | config.Git = git 97 | 98 | return nil 99 | } 100 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= 4 | github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= 5 | github.com/libgit2/git2go/v34 v34.0.0 h1:UKoUaKLmiCRbOCD3PtUi2hD6hESSXzME/9OUZrGcgu8= 6 | github.com/libgit2/git2go/v34 v34.0.0/go.mod h1:blVco2jDAw6YTXkErMMqzHLcAjKkwF0aWIRHBqiJkZ0= 7 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 8 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 9 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 10 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 11 | go.etcd.io/bbolt v1.3.9 h1:8x7aARPEXiXbHmtUwAIv7eV2fQFHrLLavdiJ3uzJXoI= 12 | go.etcd.io/bbolt v1.3.9/go.mod h1:zaO32+Ti0PK1ivdPtgMESzuzL2VPoIG1PCQNvOdo/dE= 13 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 14 | golang.org/x/crypto v0.0.0-20201203163018-be400aefbc4c/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= 15 | golang.org/x/crypto v0.20.0 h1:jmAMJJZXr5KiCw05dfYK9QnqaqKLYXijU23lsEdcQqg= 16 | golang.org/x/crypto v0.20.0/go.mod h1:Xwo95rrVNIoSMx9wa1JroENMToLWn3RNVrTBpLHgZPQ= 17 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 18 | golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= 19 | golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 20 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 21 | golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 22 | golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 23 | golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= 24 | golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 25 | golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= 26 | golang.org/x/term v0.17.0 h1:mkTF7LCd6WGJNL3K1Ad7kwxNfYAW6a8a8QqtMblp/4U= 27 | golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= 28 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 29 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 30 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 31 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "strings" 8 | "time" 9 | 10 | "github.com/splitsh/lite/splitter" 11 | ) 12 | 13 | var ( 14 | version = "dev" 15 | ) 16 | 17 | type prefixesFlag []*splitter.Prefix 18 | 19 | func (p *prefixesFlag) String() string { 20 | return fmt.Sprint(*p) 21 | } 22 | 23 | func (p *prefixesFlag) Set(value string) error { 24 | parts := strings.Split(value, ":") 25 | from := parts[0] 26 | to := "" 27 | excludes := make([]string, 0) 28 | if len(parts) >= 2 { 29 | to = strings.TrimRight(parts[1], "/") 30 | if len(parts) > 2 { 31 | for _, exclude := range parts[2:] { 32 | excludes = append(excludes, exclude) 33 | } 34 | } 35 | } 36 | 37 | // value must be unique 38 | for _, prefix := range *p { 39 | if prefix.To == to { 40 | return fmt.Errorf("cannot have two prefix splits under the same directory: %s -> %s vs %s -> %s", prefix.From, prefix.To, from, to) 41 | } 42 | } 43 | 44 | *p = append(*p, splitter.NewPrefix(from, to, excludes)) 45 | return nil 46 | } 47 | 48 | var prefixes prefixesFlag 49 | var origin, target, commit, path, gitVersion string 50 | var scratch, debug, progress, v bool 51 | 52 | func init() { 53 | flag.Var(&prefixes, "prefix", "The directory(ies) to split") 54 | flag.StringVar(&origin, "origin", "HEAD", "The branch to split (optional, defaults to the current one)") 55 | flag.StringVar(&target, "target", "", "The branch to create when split is finished (optional)") 56 | flag.StringVar(&commit, "commit", "", "The commit at which to start the split (optional)") 57 | flag.StringVar(&path, "path", ".", "The repository path (optional, current directory by default)") 58 | flag.BoolVar(&scratch, "scratch", false, "Flush the cache (optional)") 59 | flag.BoolVar(&debug, "debug", false, "Enable the debug mode (optional)") 60 | flag.StringVar(&gitVersion, "git", "latest", "Simulate a given version of Git (optional)") 61 | flag.BoolVar(&progress, "progress", false, "Show progress bar (optional, cannot be enabled when debug is enabled)") 62 | flag.BoolVar(&v, "version", false, "Show version") 63 | } 64 | 65 | func main() { 66 | flag.Parse() 67 | 68 | if v { 69 | fmt.Fprintf(os.Stderr, "splitsh-lite version %s\n", version) 70 | os.Exit(0) 71 | } 72 | 73 | if len(prefixes) == 0 { 74 | fmt.Fprintln(os.Stderr, "You must provide the directory to split via the --prefix flag") 75 | os.Exit(1) 76 | } 77 | 78 | config := &splitter.Config{ 79 | Path: path, 80 | Origin: origin, 81 | Prefixes: prefixes, 82 | Target: target, 83 | Commit: commit, 84 | Debug: debug, 85 | Scratch: scratch, 86 | GitVersion: gitVersion, 87 | } 88 | 89 | result := &splitter.Result{} 90 | 91 | var ticker *time.Ticker 92 | if progress && !debug { 93 | ticker = time.NewTicker(time.Millisecond * 50) 94 | go func() { 95 | for range ticker.C { 96 | fmt.Fprintf(os.Stderr, "%d commits created, %d commits traversed\r", result.Created(), result.Traversed()) 97 | } 98 | }() 99 | } 100 | 101 | if err := splitter.Split(config, result); err != nil { 102 | fmt.Fprintln(os.Stderr, err.Error()) 103 | os.Exit(1) 104 | } 105 | 106 | if ticker != nil { 107 | ticker.Stop() 108 | } 109 | 110 | fmt.Fprintf(os.Stderr, "%d commits created, %d commits traversed, in %s\n", result.Created(), result.Traversed(), result.Duration(time.Millisecond)) 111 | 112 | if result.Head() != nil { 113 | fmt.Println(result.Head().String()) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /splitter/cache.go: -------------------------------------------------------------------------------- 1 | package splitter 2 | 3 | import ( 4 | "crypto/sha1" 5 | "fmt" 6 | "io" 7 | "path/filepath" 8 | "strconv" 9 | "time" 10 | 11 | git "github.com/libgit2/git2go/v34" 12 | bolt "go.etcd.io/bbolt" 13 | ) 14 | 15 | type cache struct { 16 | key []byte 17 | branch string 18 | db *bolt.DB 19 | data map[string][]byte 20 | } 21 | 22 | func newCache(branch string, config *Config) (*cache, error) { 23 | var err error 24 | db := config.DB 25 | if db == nil { 26 | db, err = bolt.Open(filepath.Join(GitDirectory(config.Path), "splitsh.db"), 0644, &bolt.Options{Timeout: 5 * time.Second}) 27 | if err != nil { 28 | return nil, err 29 | } 30 | } 31 | 32 | c := &cache{ 33 | db: db, 34 | branch: branch, 35 | key: key(config), 36 | data: make(map[string][]byte), 37 | } 38 | 39 | err = db.Update(func(tx *bolt.Tx) error { 40 | _, err1 := tx.CreateBucketIfNotExists(c.key) 41 | return err1 42 | }) 43 | if err != nil { 44 | return nil, fmt.Errorf("impossible to create bucket: %s", err) 45 | } 46 | 47 | return c, nil 48 | } 49 | 50 | func (c *cache) close() error { 51 | err := c.db.Update(func(tx *bolt.Tx) error { 52 | for k, v := range c.data { 53 | if err := tx.Bucket(c.key).Put([]byte(k), v); err != nil { 54 | return err 55 | } 56 | } 57 | return nil 58 | }) 59 | if err != nil { 60 | return err 61 | } 62 | 63 | return c.db.Close() 64 | } 65 | 66 | func key(config *Config) []byte { 67 | h := sha1.New() 68 | if config.Commit != "" { 69 | io.WriteString(h, config.Commit) 70 | } else { 71 | // value does not matter, should just be always the same 72 | io.WriteString(h, "oldest") 73 | } 74 | 75 | io.WriteString(h, strconv.Itoa(config.Git)) 76 | 77 | for _, prefix := range config.Prefixes { 78 | io.WriteString(h, prefix.From) 79 | io.WriteString(h, prefix.To) 80 | for _, exclude := range prefix.Excludes { 81 | io.WriteString(h, exclude) 82 | } 83 | } 84 | 85 | return h.Sum(nil) 86 | } 87 | 88 | func (c *cache) setHead(head *git.Oid) { 89 | c.data["head/"+c.branch] = head[0:20] 90 | } 91 | 92 | func (c *cache) getHead() *git.Oid { 93 | if head, ok := c.data["head"+c.branch]; ok { 94 | return git.NewOidFromBytes(head) 95 | } 96 | 97 | var oid *git.Oid 98 | c.db.View(func(tx *bolt.Tx) error { 99 | result := tx.Bucket(c.key).Get([]byte("head/" + c.branch)) 100 | if result != nil { 101 | c.data["head/"+c.branch] = result 102 | oid = git.NewOidFromBytes(result) 103 | } 104 | return nil 105 | }) 106 | return oid 107 | } 108 | 109 | func (c *cache) get(rev *git.Oid) *git.Oid { 110 | if v, ok := c.data[string(rev[0:20])]; ok { 111 | return git.NewOidFromBytes(v) 112 | } 113 | 114 | var oid *git.Oid 115 | c.db.View(func(tx *bolt.Tx) error { 116 | result := tx.Bucket(c.key).Get(rev[0:20]) 117 | if result != nil { 118 | c.data[string(rev[0:20])] = result 119 | oid = git.NewOidFromBytes(result) 120 | } 121 | return nil 122 | }) 123 | return oid 124 | } 125 | 126 | func (c *cache) set(rev, newrev *git.Oid) { 127 | c.data[string(rev[0:20])] = newrev[0:20] 128 | } 129 | 130 | func (c *cache) gets(commits []*git.Oid) []*git.Oid { 131 | var oids []*git.Oid 132 | c.db.View(func(tx *bolt.Tx) error { 133 | b := tx.Bucket(c.key) 134 | for _, commit := range commits { 135 | result := c.data[string(commit[0:20])] 136 | if result != nil { 137 | oids = append(oids, git.NewOidFromBytes(result)) 138 | } else { 139 | result := b.Get(commit[0:20]) 140 | if result != nil { 141 | oids = append(oids, git.NewOidFromBytes(result)) 142 | } 143 | } 144 | } 145 | return nil 146 | }) 147 | return oids 148 | } 149 | 150 | func (c *cache) flush() error { 151 | return c.db.Update(func(tx *bolt.Tx) error { 152 | if tx.Bucket(c.key) != nil { 153 | err := tx.DeleteBucket(c.key) 154 | if err != nil { 155 | return err 156 | } 157 | 158 | _, err = tx.CreateBucketIfNotExists(c.key) 159 | if err != nil { 160 | return err 161 | } 162 | } 163 | 164 | return nil 165 | }) 166 | } 167 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Git Subtree Splitter 2 | ==================== 3 | 4 | **splitsh-lite** replaces the `subtree split` Git built-in command to make 5 | **splitting a monolithic repository** to read-only standalone repositories 6 | **easy and fast**. 7 | 8 | Why do I need this tool? 9 | ------------------------ 10 | 11 | When starting a project, do you store all the code in one repository? Or are 12 | you creating many standalone repositories? 13 | 14 | Both strategies work well and both have drawbacks as well. **splitsh** helps 15 | use both strategies by providing tools that automatically **synchronize a 16 | monolithic repository to standalone repositories** in real-time. 17 | 18 | **splitsh-lite** is a sub-project that provides a faster implementation of the 19 | `git subtree split` command, which helps create standalone repositories for one 20 | or more sub-directories of a main repository. 21 | 22 | If you want to learn more about monorepo vs manyrepos, watch this [4-minute 23 | lightning talk](http://www.thedotpost.com/2016/05/fabien-potencier-monolithic-repositories-vs-many-repositories) 24 | I gave at dotScale 25 | (or [read the slides](https://speakerdeck.com/fabpot/a-monorepo-vs-manyrepos))... 26 | or watch the longer version from 27 | [DrupalCon](https://www.youtube.com/watch?v=4w3-f6Xhvu8). 28 | ["The Monorepo - Storing your source code has never been so much fun"](https://speakerdeck.com/garethr/the-monorepo-storing-your-source-code-has-never-been-so-much-fun) 29 | is also a great resource. 30 | 31 | **Note** If you currently have multiple repositories that you want to merge into 32 | a monorepo, use the [tomono](https://github.com/unravelin/tomono) tool. 33 | 34 | Installation 35 | ------------ 36 | 37 | Manual Installation 38 | ------------------- 39 | 40 | First, you need to install `libgit2`, preferably using your package manager of 41 | choice. 42 | 43 | If you get `libgit2` version `1.5`, you're all set and jump to the compilation 44 | step below. If not, you first need to change the `git2go` version used in the 45 | code. Using the table on the 46 | [libgit2](https://github.com/libgit2/git2go#which-go-version-to-use) repository, 47 | figure out which version of the `git2go` you need based on the `liggit2` library 48 | you installed. Let's say you need version `v31`: 49 | 50 | ```bash 51 | sed -i -e 's/v34/v31/g' go.mod splitter/*.go 52 | go mod tidy 53 | ``` 54 | 55 | Then, compile `splitsh-lite`: 56 | 57 | ```bash 58 | go build -o splitsh-lite github.com/splitsh/lite 59 | ``` 60 | 61 | If everything goes fine, a `splitsh-lite` binary should be available in the 62 | current directory. 63 | 64 | If you get errors about an incompatible `libgit2` library, try exporting the 65 | needed flags, e.g. 66 | 67 | ```bash 68 | export LDFLAGS="-L/opt/homebrew/opt/libgit2@1.5/lib" 69 | export CPPFLAGS="-I/opt/homebrew/opt/libgit2@1.5/include" 70 | export PKG_CONFIG_PATH="/opt/homebrew/opt/libgit2@1.5/lib/pkgconfig" 71 | ``` 72 | 73 | before running `go build`. 74 | 75 | If you want to integrate splitsh with Git, install it like this (and use it via 76 | `git splitsh`): 77 | 78 | ```bash 79 | cp splitsh-lite "$(git --exec-path)"/git-splitsh 80 | ``` 81 | 82 | Usage 83 | ----- 84 | 85 | Let's say you want to split the `lib/` directory of a repository to its own 86 | branch; from the "master" Git repository (bare or clone), run: 87 | 88 | ```bash 89 | splitsh-lite --prefix=lib/ 90 | ``` 91 | 92 | The *sha1* of the split is displayed at the end of the execution: 93 | 94 | ```bash 95 | SHA1=`splitsh-lite --prefix=lib/` 96 | ``` 97 | 98 | The sha1 can be used to create a branch or to push the commits to a new 99 | repository. 100 | 101 | Automatically create a branch for the split by passing a branch name 102 | via the `--target` option: 103 | 104 | ```bash 105 | splitsh-lite --prefix=lib/ --target=heads/branch-name 106 | ``` 107 | 108 | If new commits are made to the repository, update the split by running the same 109 | command again. Updates are much faster as **splitsh-lite** keeps a cache of 110 | already split commits. Caching is possible as **splitsh-lite** guarantees that 111 | two splits of the same code always results in the same history and the same 112 | `sha1`s for each commit. 113 | 114 | By default, **splitsh-lite** splits the currently checked out branch but you can 115 | split a different branch by passing it explicitly via the `--origin` flag 116 | (mandatory when splitting a bare repository): 117 | 118 | ```bash 119 | splitsh-lite --prefix=lib/ --origin=origin/master 120 | ``` 121 | 122 | You don't even need to run the command from the Git repository directory if you 123 | pass the `--path` option: 124 | 125 | ```bash 126 | splitsh-lite --prefix=lib/ --origin=origin/1.0 --path=/path/to/repo 127 | ``` 128 | 129 | Available options: 130 | 131 | * `--prefix` is the prefix of the directory to split; the value can be one of 132 | the following: 133 | 134 | * `from`: the origin directory to split; 135 | 136 | * `from:to`: move the split content to a sub-directory on the target; 137 | 138 | * `from:to:exclude`: exclude a directory from the origin `from` directory 139 | (use `from:to:exclude1:exclude2:...` to exclude more than one 140 | directory). 141 | 142 | Split several directories by passing multiple `--prefix` flags; 143 | 144 | * `--path` is the path of the repository to split (current directory by default); 145 | 146 | * `--origin` is the Git reference for the origin (can be any Git reference 147 | like `HEAD`, `heads/xxx`, `tags/xxx`, `origin/xxx`, or any `refs/xxx`); 148 | 149 | * `--target` creates a reference for the tip of the split (can be any Git 150 | reference like `heads/xxx`, `tags/xxx`, `origin/xxx`, or any `refs/xxx`); 151 | 152 | * `--progress` displays a progress bar; 153 | 154 | * `--scratch` flushes the cache (useful when a branch is force pushed or in 155 | case of a cache corruption). 156 | 157 | Migrating from `git subtree split` 158 | ---------------------------------- 159 | 160 | Migrating from `git subtree split` to `splith-lite` is easy as both tools 161 | generate the same `sha1`s. 162 | 163 | However, note that older versions of `git subtree split` used broken 164 | algorithms, and so generated different `sha1`s than the latest version. You can 165 | simulate those version via the `--git` flag. Use `<1.8.2` or `<2.8.0` depending 166 | on which version of `git subtree split` you want to simulate. 167 | -------------------------------------------------------------------------------- /run-tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | switchAsSammy() 6 | { 7 | AUTHOR_DATE=$1 8 | COMMITTER_DATE=$2 9 | export GIT_AUTHOR_NAME="Sammy Cobol" 10 | export GIT_AUTHOR_EMAIL="" 11 | export GIT_AUTHOR_DATE="${AUTHOR_DATE}" 12 | export GIT_COMMITTER_NAME="Fred Foobar" 13 | export GIT_COMMITTER_EMAIL="" 14 | export GIT_COMMITTER_DATE="${COMMITTER_DATE}" 15 | } 16 | 17 | switchAsFred() { 18 | AUTHOR_DATE=$1 19 | COMMITTER_DATE=$2 20 | export GIT_AUTHOR_NAME="Fred Foobar" 21 | export GIT_AUTHOR_EMAIL="" 22 | export GIT_AUTHOR_DATE="${AUTHOR_DATE}" 23 | export GIT_COMMITTER_NAME="Sammy Cobol" 24 | export GIT_COMMITTER_EMAIL="" 25 | export GIT_COMMITTER_DATE="${COMMITTER_DATE}" 26 | } 27 | 28 | simpleTest() { 29 | rm -rf simple 30 | mkdir simple 31 | cd simple 32 | git init > /dev/null 33 | 34 | switchAsSammy "Sat, 24 Nov 1973 19:01:02 +0200" "Sat, 24 Nov 1973 19:11:22 +0200" 35 | echo "a" > a 36 | git add a 37 | git commit -m"added a" > /dev/null 38 | 39 | switchAsFred "Sat, 24 Nov 1973 20:01:02 +0200" "Sat, 24 Nov 1973 20:11:22 +0200" 40 | mkdir b/ 41 | echo "b" > b/b 42 | git add b 43 | git commit -m"added b" > /dev/null 44 | 45 | switchAsFred "Sat, 24 Nov 1973 21:01:02 +0200" "Sat, 24 Nov 1973 21:11:22 +0200" 46 | echo "aa" > a 47 | git add a 48 | git commit -m"updated a" > /dev/null 49 | 50 | switchAsFred "Sat, 24 Nov 1973 22:01:02 +0200" "Sat, 24 Nov 1973 22:11:22 +0200" 51 | git rm a > /dev/null 52 | git commit -m"updated a" > /dev/null 53 | 54 | switchAsFred "Sat, 24 Nov 1973 23:01:02 +0200" "Sat, 24 Nov 1973 23:11:22 +0200" 55 | echo "bb" > b/b 56 | git add b/ 57 | git commit -m"updated b" > /dev/null 58 | 59 | GIT_SUBTREE_SPLIT_SHA1=`git subtree split --prefix=b/ -q` 60 | GIT_SPLITSH_SHA1=`$LITE_PATH --prefix=b/ 2>/dev/null` 61 | 62 | if [ "$GIT_SUBTREE_SPLIT_SHA1" == "$GIT_SPLITSH_SHA1" ]; then 63 | echo "Test #1 - OK ($GIT_SUBTREE_SPLIT_SHA1 == $GIT_SPLITSH_SHA1)" 64 | else 65 | echo "Test #1 - NOT OK ($GIT_SUBTREE_SPLIT_SHA1 != $GIT_SPLITSH_SHA1)" 66 | exit 1 67 | fi 68 | 69 | GIT_SUBTREE_SPLIT_SHA1=`git subtree split --prefix=b/ -q 71777969e7c0ddd02e0c060c5c892c083971b953` 70 | GIT_SPLITSH_SHA1=`$LITE_PATH --prefix=b/ --commit=71777969e7c0ddd02e0c060c5c892c083971b953 2>/dev/null` 71 | 72 | if [ "$GIT_SUBTREE_SPLIT_SHA1" == "$GIT_SPLITSH_SHA1" ]; then 73 | echo "Test #2 - OK ($GIT_SUBTREE_SPLIT_SHA1 == $GIT_SPLITSH_SHA1)" 74 | else 75 | echo "Test #2 - NOT OK ($GIT_SUBTREE_SPLIT_SHA1 != $GIT_SPLITSH_SHA1)" 76 | exit 1 77 | fi 78 | 79 | cd ../ 80 | } 81 | 82 | mergeTest() { 83 | rm -rf merge 84 | mkdir -p merge/src 85 | cd merge 86 | git init > /dev/null 87 | 88 | switchAsSammy "Sat, 24 Nov 1973 19:01:01 +0200" "Sat, 24 Nov 1973 19:01:01 +0200" 89 | echo -e "a\n\nb\n\nc\n\n" > src/foo 90 | git add src/foo 91 | git commit -m"init" > /dev/null 92 | 93 | git checkout -b branch1 2> /dev/null 94 | 95 | switchAsSammy "Sat, 24 Nov 1973 19:02:02 +0200" "Sat, 24 Nov 1973 19:02:02 +0200" 96 | echo -e "a\n\nb\nchange 2\nc\n\n" > src/foo 97 | git commit -a -m"change 2" > /dev/null 98 | 99 | switchAsSammy "Sat, 24 Nov 1973 19:02:02 +0200" "Sat, 24 Nov 1973 19:02:02 +0200" 100 | echo -e "a\n\nb\nchange 2\nc\nchange 3\n" > src/foo 101 | git commit -a -m"change 3" > /dev/null 102 | 103 | git checkout main 2> /dev/null 104 | switchAsSammy "Sat, 24 Nov 1973 19:02:02 +0200" "Sat, 24 Nov 1973 19:02:02 +0200" 105 | echo -e "a\nchange 1\nb\n\nc\n\n" > src/foo 106 | git commit -a -m"change 1" > /dev/null 107 | 108 | git checkout -b branch2 2> /dev/null 109 | switchAsSammy "Sat, 24 Nov 1973 19:02:02 +0200" "Sat, 24 Nov 1973 19:02:02 +0200" 110 | echo -e "a\n\nb\nchange 2\nc\n\n" > src/foo 111 | git commit -a -m"change 2" > /dev/null 112 | 113 | git checkout main 2> /dev/null 114 | git checkout -b branch3 2> /dev/null 115 | git merge branch1 --no-edit > /dev/null 116 | git merge branch2 --no-edit -s ours > /dev/null 117 | 118 | GIT_SUBTREE_SPLIT_SHA1_2="a2c4245703f8dac149ab666242a12e1d4b2510d9" 119 | GIT_SUBTREE_SPLIT_SHA1_3="ba0dab2c4e99d68d11088f2c556af92851e93b14" 120 | GIT_SPLITSH_SHA1_2=`$LITE_PATH --git="<2.8.0" --prefix=src/ 2>/dev/null` 121 | GIT_SPLITSH_SHA1_3=`$LITE_PATH --prefix=src/ 2>/dev/null` 122 | 123 | if [ "$GIT_SUBTREE_SPLIT_SHA1_2" == "$GIT_SPLITSH_SHA1_2" ]; then 124 | echo "Test #3 - OK ($GIT_SUBTREE_SPLIT_SHA1_2 == $GIT_SPLITSH_SHA1_2)" 125 | else 126 | echo "Test #3 - NOT OK ($GIT_SUBTREE_SPLIT_SHA1_2 != $GIT_SPLITSH_SHA1_2)" 127 | exit 1 128 | fi 129 | 130 | if [ "$GIT_SUBTREE_SPLIT_SHA1_3" == "$GIT_SPLITSH_SHA1_3" ]; then 131 | echo "Test #4 - OK ($GIT_SUBTREE_SPLIT_SHA1_3 == $GIT_SPLITSH_SHA1_3)" 132 | else 133 | echo "Test #4 - NOT OK ($GIT_SUBTREE_SPLIT_SHA1_3 != $GIT_SPLITSH_SHA1_3)" 134 | exit 1 135 | fi 136 | 137 | cd ../ 138 | } 139 | 140 | twigSplitTest() { 141 | # run on some Open-Source repositories 142 | if [ ! -d Twig ]; then 143 | git clone https://github.com/twigphp/Twig > /dev/null 144 | fi 145 | GIT_SUBTREE_SPLIT_SHA1="ea449b0f2acba7d489a91f88154687250d2bdf42" 146 | GIT_SPLITSH_SHA1=`$LITE_PATH --prefix=lib/ --origin=refs/tags/v1.24.1 --path=Twig --scratch 2>/dev/null` 147 | 148 | if [ "$GIT_SUBTREE_SPLIT_SHA1" == "$GIT_SPLITSH_SHA1" ]; then 149 | echo "Test #5 - OK ($GIT_SUBTREE_SPLIT_SHA1 == $GIT_SPLITSH_SHA1)" 150 | else 151 | echo "Test #5 - NOT OK ($GIT_SUBTREE_SPLIT_SHA1 != $GIT_SPLITSH_SHA1)" 152 | exit 1 153 | fi 154 | 155 | cd ../ 156 | } 157 | 158 | filemodeTest() { 159 | rm -rf filemode 160 | mkdir filemode 161 | cd filemode 162 | git init > /dev/null 163 | 164 | switchAsSammy "Sat, 24 Nov 1973 19:01:02 +0200" "Sat, 24 Nov 1973 19:11:22 +0200" 165 | echo "a" > a 166 | git add a 167 | git commit -m"added a" > /dev/null 168 | 169 | switchAsFred "Sat, 24 Nov 1973 20:01:02 +0200" "Sat, 24 Nov 1973 20:11:22 +0200" 170 | mkdir b/ 171 | echo "b" > b/b 172 | chmod +x b/b 173 | git add b 174 | git commit -m"added b" > /dev/null 175 | 176 | $LITE_PATH --prefix=b/::not-important:also-not-important --target refs/heads/split 2>/dev/null 177 | FILEMODE=`git ls-tree -r --format='%(objectmode)' split b` 178 | 179 | if test "$FILEMODE" = "100755"; then 180 | echo "Test #6 - OK" 181 | else 182 | echo "Test #6 - NOT OK" 183 | exit 1 184 | fi 185 | 186 | cd ../ 187 | } 188 | 189 | LITE_PATH=`pwd`/splitsh-lite 190 | if [ ! -e $LITE_PATH ]; then 191 | echo "You first need to compile the splitsh-lite binary" 192 | exit 1 193 | fi 194 | 195 | if [ ! -d splitter-lite-tests ]; then 196 | mkdir splitter-lite-tests 197 | fi 198 | cd splitter-lite-tests 199 | 200 | simpleTest 201 | mergeTest 202 | twigSplitTest 203 | filemodeTest 204 | -------------------------------------------------------------------------------- /splitter/state.go: -------------------------------------------------------------------------------- 1 | package splitter 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | "strings" 8 | "sync" 9 | "time" 10 | 11 | git "github.com/libgit2/git2go/v34" 12 | ) 13 | 14 | type state struct { 15 | config *Config 16 | origin string 17 | repoMu *sync.Mutex 18 | repo *git.Repository 19 | cache *cache 20 | logger *log.Logger 21 | simplePrefix string 22 | result *Result 23 | } 24 | 25 | func newState(config *Config, result *Result) (*state, error) { 26 | var err error 27 | 28 | // validate config 29 | if err = config.Validate(); err != nil { 30 | return nil, err 31 | } 32 | 33 | state := &state{ 34 | config: config, 35 | result: result, 36 | repoMu: config.RepoMu, 37 | repo: config.Repo, 38 | logger: config.Logger, 39 | } 40 | 41 | if state.repo == nil { 42 | if state.repo, err = git.OpenRepository(config.Path); err != nil { 43 | return nil, err 44 | } 45 | } 46 | 47 | if state.repoMu == nil { 48 | state.repoMu = &sync.Mutex{} 49 | } 50 | 51 | if state.logger == nil { 52 | state.logger = log.New(os.Stderr, "", log.LstdFlags) 53 | } 54 | 55 | if state.origin, err = normalizeOrigin(state.repo, config.Origin); err != nil { 56 | return nil, err 57 | } 58 | 59 | if state.cache, err = newCache(state.origin, config); err != nil { 60 | return nil, err 61 | } 62 | 63 | if config.Debug { 64 | state.logger.Printf("Splitting %s", state.origin) 65 | for _, v := range config.Prefixes { 66 | to := v.To 67 | if to == "" { 68 | to = "ROOT" 69 | } 70 | state.logger.Printf(` From "%s" to "%s"`, v.From, to) 71 | if (len(v.Excludes)) == 0 { 72 | } else { 73 | state.logger.Printf(` Excluding "%s"`, strings.Join(v.Excludes, `", "`)) 74 | } 75 | } 76 | } 77 | 78 | if config.Scratch { 79 | if err := state.flush(); err != nil { 80 | return nil, err 81 | } 82 | } 83 | 84 | // simplePrefix contains the prefix when there is only one 85 | // with an empty value (target) 86 | if len(config.Prefixes) == 1 && config.Prefixes[0].To == "" && len(config.Prefixes[0].Excludes) == 0 { 87 | state.simplePrefix = config.Prefixes[0].From 88 | } 89 | 90 | return state, nil 91 | } 92 | 93 | func (s *state) close() error { 94 | err := s.cache.close() 95 | if err != nil { 96 | return err 97 | } 98 | s.repo.Free() 99 | return nil 100 | } 101 | 102 | func (s *state) flush() error { 103 | if err := s.cache.flush(); err != nil { 104 | return err 105 | } 106 | 107 | if s.config.Target != "" { 108 | branch, err := s.repo.LookupBranch(s.config.Target, git.BranchLocal) 109 | if err == nil { 110 | branch.Delete() 111 | branch.Free() 112 | } 113 | } 114 | 115 | return nil 116 | } 117 | 118 | func (s *state) split() error { 119 | startTime := time.Now() 120 | defer func() { 121 | s.result.end(startTime) 122 | }() 123 | 124 | revWalk, err := s.walker() 125 | if err != nil { 126 | return fmt.Errorf("impossible to walk the repository: %s", err) 127 | } 128 | defer revWalk.Free() 129 | 130 | var iterationErr error 131 | var lastRev *git.Oid 132 | err = revWalk.Iterate(func(rev *git.Commit) bool { 133 | defer rev.Free() 134 | lastRev = rev.Id() 135 | 136 | if s.config.Debug { 137 | s.logger.Printf("Processing commit: %s\n", rev.Id().String()) 138 | } 139 | 140 | var newrev *git.Oid 141 | newrev, err = s.splitRev(rev) 142 | if err != nil { 143 | iterationErr = err 144 | return false 145 | } 146 | 147 | if newrev != nil { 148 | s.result.moveHead(newrev) 149 | } 150 | 151 | return true 152 | }) 153 | if err != nil { 154 | return err 155 | } 156 | if iterationErr != nil { 157 | return iterationErr 158 | } 159 | 160 | if lastRev != nil { 161 | s.cache.setHead(lastRev) 162 | } 163 | 164 | return s.updateTarget() 165 | } 166 | 167 | func (s *state) walker() (*git.RevWalk, error) { 168 | revWalk, err := s.repo.Walk() 169 | if err != nil { 170 | return nil, fmt.Errorf("impossible to walk the repository: %s", err) 171 | } 172 | 173 | err = s.pushRevs(revWalk) 174 | if err != nil { 175 | return nil, fmt.Errorf("impossible to determine split range: %s", err) 176 | } 177 | 178 | revWalk.Sorting(git.SortTopological | git.SortReverse) 179 | 180 | return revWalk, nil 181 | } 182 | 183 | func (s *state) splitRev(rev *git.Commit) (*git.Oid, error) { 184 | s.result.incTraversed() 185 | 186 | v := s.cache.get(rev.Id()) 187 | if v != nil { 188 | if s.config.Debug { 189 | s.logger.Printf(" prior: %s\n", v.String()) 190 | } 191 | return v, nil 192 | } 193 | 194 | var parents []*git.Oid 195 | var n uint 196 | for n = 0; n < rev.ParentCount(); n++ { 197 | parents = append(parents, rev.ParentId(n)) 198 | } 199 | 200 | if s.config.Debug { 201 | debugMsg := " parents:" 202 | for _, parent := range parents { 203 | debugMsg += fmt.Sprintf(" %s", parent.String()) 204 | } 205 | s.logger.Print(debugMsg) 206 | } 207 | 208 | newParents := s.cache.gets(parents) 209 | 210 | if s.config.Debug { 211 | debugMsg := " newparents:" 212 | for _, parent := range newParents { 213 | debugMsg += fmt.Sprintf(" %s", parent) 214 | } 215 | s.logger.Print(debugMsg) 216 | } 217 | 218 | tree, err := s.subtreeForCommit(rev) 219 | if err != nil { 220 | return nil, err 221 | } 222 | 223 | if nil == tree { 224 | // should never happen 225 | return nil, nil 226 | } 227 | defer tree.Free() 228 | 229 | if s.config.Debug { 230 | s.logger.Printf(" tree is: %s\n", tree.Id().String()) 231 | } 232 | 233 | newrev, created, err := s.copyOrSkip(rev, tree, newParents) 234 | if err != nil { 235 | return nil, err 236 | } 237 | 238 | if s.config.Debug { 239 | s.logger.Printf(" newrev is: %s\n", newrev) 240 | } 241 | 242 | if created { 243 | s.result.incCreated() 244 | } 245 | 246 | s.cache.set(rev.Id(), newrev) 247 | 248 | return newrev, nil 249 | } 250 | 251 | func (s *state) subtreeForCommit(commit *git.Commit) (*git.Tree, error) { 252 | tree, err := commit.Tree() 253 | if err != nil { 254 | return nil, err 255 | } 256 | defer tree.Free() 257 | 258 | if s.simplePrefix != "" { 259 | return s.treeByPath(tree, s.simplePrefix) 260 | } 261 | 262 | return s.treeByPaths(tree) 263 | } 264 | 265 | func (s *state) treeByPath(tree *git.Tree, prefix string) (*git.Tree, error) { 266 | treeEntry, err := tree.EntryByPath(prefix) 267 | if err != nil { 268 | return nil, nil 269 | } 270 | 271 | if treeEntry.Type != git.ObjectTree { 272 | // tree is not a tree (a directory for a gitmodule for instance), skip 273 | return nil, nil 274 | } 275 | 276 | return s.repo.LookupTree(treeEntry.Id) 277 | } 278 | 279 | func (s *state) treeByPaths(tree *git.Tree) (*git.Tree, error) { 280 | var currentTree, prefixedTree, mergedTree *git.Tree 281 | for _, prefix := range s.config.Prefixes { 282 | // splitting 283 | splitTree, err := s.treeByPath(tree, prefix.From) 284 | if err != nil { 285 | return nil, err 286 | } 287 | if splitTree == nil { 288 | continue 289 | } 290 | 291 | if len(prefix.Excludes) > 0 { 292 | prunedTree, err := s.pruneTree(splitTree, prefix.Excludes) 293 | if err != nil { 294 | return nil, err 295 | } 296 | splitTree = prunedTree 297 | } 298 | 299 | // adding the prefix 300 | if prefix.To != "" { 301 | prefixedTree, err = s.addPrefixToTree(splitTree, prefix.To) 302 | if err != nil { 303 | return nil, err 304 | } 305 | } else { 306 | prefixedTree = splitTree 307 | } 308 | 309 | // merging with the current tree 310 | if currentTree != nil { 311 | mergedTree, err = s.mergeTrees(currentTree, prefixedTree) 312 | currentTree.Free() 313 | prefixedTree.Free() 314 | if err != nil { 315 | return nil, err 316 | } 317 | } else { 318 | mergedTree = prefixedTree 319 | } 320 | 321 | currentTree = mergedTree 322 | } 323 | 324 | return currentTree, nil 325 | } 326 | 327 | func (s *state) mergeTrees(t1, t2 *git.Tree) (*git.Tree, error) { 328 | index, err := s.repo.MergeTrees(nil, t1, t2, nil) 329 | if err != nil { 330 | return nil, err 331 | } 332 | defer index.Free() 333 | 334 | if index.HasConflicts() { 335 | return nil, fmt.Errorf("cannot split as there is a merge conflict between two paths") 336 | } 337 | 338 | oid, err := index.WriteTreeTo(s.repo) 339 | if err != nil { 340 | return nil, err 341 | } 342 | 343 | return s.repo.LookupTree(oid) 344 | } 345 | 346 | func (s *state) addPrefixToTree(tree *git.Tree, prefix string) (*git.Tree, error) { 347 | treeOid := tree.Id() 348 | parts := strings.Split(prefix, "/") 349 | for i := len(parts) - 1; i >= 0; i-- { 350 | treeBuilder, err := s.repo.TreeBuilder() 351 | if err != nil { 352 | return nil, err 353 | } 354 | defer treeBuilder.Free() 355 | 356 | err = treeBuilder.Insert(parts[i], treeOid, git.FilemodeTree) 357 | if err != nil { 358 | return nil, err 359 | } 360 | 361 | treeOid, err = treeBuilder.Write() 362 | if err != nil { 363 | return nil, err 364 | } 365 | } 366 | 367 | prefixedTree, err := s.repo.LookupTree(treeOid) 368 | if err != nil { 369 | return nil, err 370 | } 371 | 372 | return prefixedTree, nil 373 | } 374 | 375 | func (s *state) pruneTree(tree *git.Tree, excludes []string) (*git.Tree, error) { 376 | var err error 377 | treeBuilder, err := s.repo.TreeBuilder() 378 | if err != nil { 379 | return nil, err 380 | } 381 | defer treeBuilder.Free() 382 | 383 | err = tree.Walk(func(path string, entry *git.TreeEntry) error { 384 | // always add files at the root directory 385 | if entry.Type == git.ObjectBlob { 386 | if err := treeBuilder.Insert(entry.Name, entry.Id, entry.Filemode); err != nil { 387 | return err 388 | } 389 | return nil 390 | } 391 | 392 | if entry.Type != git.ObjectTree { 393 | // should never happen 394 | return fmt.Errorf("Unexpected entry %s/%s (type %s)", path, entry.Name, entry.Type) 395 | } 396 | 397 | // exclude directory in excludes 398 | for _, exclude := range excludes { 399 | if entry.Name == exclude { 400 | return git.TreeWalkSkip 401 | } 402 | } 403 | 404 | if err := treeBuilder.Insert(entry.Name, entry.Id, git.FilemodeTree); err != nil { 405 | return err 406 | } 407 | return git.TreeWalkSkip 408 | }) 409 | 410 | if err != nil { 411 | return nil, err 412 | } 413 | 414 | treeOid, err := treeBuilder.Write() 415 | if err != nil { 416 | return nil, err 417 | } 418 | 419 | return s.repo.LookupTree(treeOid) 420 | } 421 | 422 | func (s *state) copyOrSkip(rev *git.Commit, tree *git.Tree, newParents []*git.Oid) (*git.Oid, bool, error) { 423 | var identical, nonIdentical *git.Oid 424 | var gotParents []*git.Oid 425 | var p []*git.Commit 426 | for _, parent := range newParents { 427 | ptree, err := s.topTreeForCommit(parent) 428 | if err != nil { 429 | return nil, false, err 430 | } 431 | if nil == ptree { 432 | continue 433 | } 434 | 435 | if ptree.Cmp(tree.Id()) == 0 { 436 | // an identical parent could be used in place of this rev. 437 | identical = parent 438 | } else { 439 | nonIdentical = parent 440 | } 441 | 442 | // sometimes both old parents map to the same newparent 443 | // eliminate duplicates 444 | isNew := true 445 | for _, gp := range gotParents { 446 | if gp.Cmp(parent) == 0 { 447 | isNew = false 448 | break 449 | } 450 | } 451 | 452 | if isNew { 453 | gotParents = append(gotParents, parent) 454 | commit, err := s.repo.LookupCommit(parent) 455 | if err != nil { 456 | return nil, false, err 457 | } 458 | defer commit.Free() 459 | p = append(p, commit) 460 | } 461 | } 462 | 463 | copyCommit := false 464 | if s.config.Git > 2 && nil != identical && nil != nonIdentical { 465 | revWalk, err := s.repo.Walk() 466 | if err != nil { 467 | return nil, false, fmt.Errorf("impossible to walk the repository: %s", err) 468 | } 469 | 470 | s.repoMu.Lock() 471 | defer s.repoMu.Unlock() 472 | 473 | err = revWalk.PushRange(fmt.Sprintf("%s..%s", identical, nonIdentical)) 474 | if err != nil { 475 | return nil, false, fmt.Errorf("impossible to determine split range: %s", err) 476 | } 477 | 478 | err = revWalk.Iterate(func(rev *git.Commit) bool { 479 | // we need to preserve history along the other branch 480 | copyCommit = true 481 | return false 482 | }) 483 | if err != nil { 484 | return nil, false, err 485 | } 486 | 487 | revWalk.Free() 488 | } 489 | 490 | if nil != identical && !copyCommit { 491 | return identical, false, nil 492 | } 493 | 494 | commit, err := s.copyCommit(rev, tree, p) 495 | if err != nil { 496 | return nil, false, err 497 | } 498 | 499 | return commit, true, nil 500 | } 501 | 502 | func (s *state) topTreeForCommit(sha *git.Oid) (*git.Oid, error) { 503 | commit, err := s.repo.LookupCommit(sha) 504 | if err != nil { 505 | return nil, err 506 | } 507 | defer commit.Free() 508 | 509 | tree, err := commit.Tree() 510 | if err != nil { 511 | return nil, err 512 | } 513 | defer tree.Free() 514 | 515 | return tree.Id(), nil 516 | } 517 | 518 | func (s *state) copyCommit(rev *git.Commit, tree *git.Tree, parents []*git.Commit) (*git.Oid, error) { 519 | if s.config.Debug { 520 | parentStrs := make([]string, len(parents)) 521 | for i, parent := range parents { 522 | parentStrs[i] = parent.Id().String() 523 | } 524 | s.logger.Printf(" copy commit \"%s\" \"%s\" \"%s\"\n", rev.Id().String(), tree.Id().String(), strings.Join(parentStrs, " ")) 525 | } 526 | 527 | message := rev.RawMessage() 528 | if s.config.Git == 1 { 529 | message = s.legacyMessage(rev) 530 | } 531 | 532 | author := rev.Author() 533 | if author.Email == "" { 534 | author.Email = "nobody@example.com" 535 | } 536 | 537 | committer := rev.Committer() 538 | if committer.Email == "" { 539 | committer.Email = "nobody@example.com" 540 | } 541 | 542 | oid, err := s.repo.CreateCommit("", author, committer, message, tree, parents...) 543 | if err != nil { 544 | return nil, err 545 | } 546 | 547 | return oid, nil 548 | } 549 | 550 | func (s *state) updateTarget() error { 551 | if s.config.Target == "" { 552 | return nil 553 | } 554 | 555 | if nil == s.result.Head() { 556 | return fmt.Errorf("unable to create branch %s as it is empty (no commits were split)", s.config.Target) 557 | } 558 | 559 | obj, ref, err := s.repo.RevparseExt(s.config.Target) 560 | if obj != nil { 561 | obj.Free() 562 | } 563 | if err != nil { 564 | ref, err = s.repo.References.Create(s.config.Target, s.result.Head(), false, "subtree split") 565 | if err != nil { 566 | return err 567 | } 568 | ref.Free() 569 | } else { 570 | defer ref.Free() 571 | ref.SetTarget(s.result.Head(), "subtree split") 572 | } 573 | 574 | return nil 575 | } 576 | 577 | func (s *state) legacyMessage(rev *git.Commit) string { 578 | subject, body := SplitMessage(rev.Message()) 579 | return subject + "\n\n" + body 580 | } 581 | 582 | // pushRevs sets the range to split 583 | func (s *state) pushRevs(revWalk *git.RevWalk) error { 584 | s.repoMu.Lock() 585 | defer s.repoMu.Unlock() 586 | 587 | var start *git.Oid 588 | start = s.cache.getHead() 589 | if start != nil { 590 | s.result.moveHead(s.cache.get(start)) 591 | // FIXME: CHECK that this is an ancestor of the branch? 592 | return revWalk.PushRange(fmt.Sprintf("%s..%s", start, s.origin)) 593 | } 594 | 595 | // find the latest split sha1 if any on origin 596 | var err error 597 | if s.config.Commit != "" { 598 | start, err = git.NewOid(s.config.Commit) 599 | if err != nil { 600 | return err 601 | } 602 | s.result.moveHead(s.cache.get(start)) 603 | return revWalk.PushRange(fmt.Sprintf("%s^..%s", start, s.origin)) 604 | } 605 | 606 | branch, err := s.repo.RevparseSingle(s.origin) 607 | if err != nil { 608 | return err 609 | } 610 | 611 | return revWalk.Push(branch.Id()) 612 | } 613 | --------------------------------------------------------------------------------