├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── Rakefile ├── cache.go ├── docker └── Dockerfile ├── dump.go ├── gitwho_ed25519.pub ├── go.mod ├── go.sum ├── hist.go ├── internal ├── cache │ ├── backends │ │ ├── gob.go │ │ ├── gob_test.go │ │ ├── json.go │ │ ├── json_test.go │ │ ├── log.go │ │ └── noop.go │ ├── cache.go │ └── log.go ├── concurrent │ ├── cache.go │ ├── concurrent.go │ ├── log.go │ └── worker.go ├── format │ ├── format.go │ └── format_test.go ├── git │ ├── args.go │ ├── args_test.go │ ├── cmd.go │ ├── git.go │ ├── git_test.go │ ├── log.go │ ├── main_test.go │ ├── parse.go │ ├── pathspec.go │ ├── pathspec_test.go │ └── repo.go ├── pretty │ ├── ansi.go │ └── pretty.go ├── repotest │ └── repotest.go ├── tally │ ├── bucket.go │ ├── bucket_test.go │ ├── log.go │ ├── tally.go │ ├── tally_test.go │ ├── tree.go │ └── tree_test.go └── utils │ ├── flagutils │ └── flag.go │ ├── iterutils │ └── iterutils.go │ └── timeutils │ └── timeutils.go ├── log.go ├── main.go ├── parse.go ├── screenshots └── vanity.png ├── table.go ├── test ├── functional │ ├── big_repo_test.rb │ ├── hist_test.rb │ ├── table_csv_test.rb │ ├── table_test.rb │ ├── tree_test.rb │ └── version_test.rb └── lib │ ├── cmd.rb │ └── repo.rb └── tree.go /.gitignore: -------------------------------------------------------------------------------- 1 | git-who 2 | out/ 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "test-repo"] 2 | path = test-repo 3 | url = git@github.com:sinclairtarget/git-who-test-repo.git 4 | [submodule "gunicorn"] 5 | path = gunicorn 6 | url = git@github.com:benoitc/gunicorn.git 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Sinclair Target 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'fileutils' 2 | require 'rake/clean' 3 | 4 | PROGNAME = 'git-who' 5 | SUPPORTED = [ 6 | ['darwin', 'arm64'], 7 | ['darwin', 'amd64'], 8 | ['linux', 'amd64'], 9 | ['linux', 'arm64'], 10 | ['linux', 'arm'], 11 | ['windows', 'amd64'], 12 | ] 13 | OUTDIR = 'out' 14 | RELEASE_DIRS = SUPPORTED.map do |os, arch| 15 | "#{OUTDIR}/#{os}_#{arch}" 16 | end 17 | 18 | task default: [:build] 19 | 20 | desc 'Run go fmt' 21 | task :fmt do 22 | sh 'go fmt ./internal/...' 23 | sh 'go fmt *.go' 24 | end 25 | 26 | desc 'Build executable' 27 | task :build do 28 | gohostos = `go env GOHOSTOS`.strip 29 | gohostarch = `go env GOHOSTARCH`.strip 30 | build_for_platform gohostos, gohostarch, out: exec_name(gohostos) 31 | end 32 | 33 | namespace 'release' do 34 | directory OUTDIR 35 | 36 | RELEASE_DIRS.each do |dir| 37 | directory dir 38 | end 39 | 40 | desc 'Build binaries for all supported platforms' 41 | task build: RELEASE_DIRS do 42 | SUPPORTED.each do |os, arch| 43 | output_dir = "#{OUTDIR}/#{os}_#{arch}" 44 | progname = exec_name(os) 45 | build_for_platform(os, arch, out: "#{output_dir}/#{progname}") 46 | 47 | version = get_version 48 | sh "tar czf #{OUTDIR}/gitwho_#{version}_#{os}_#{arch}.tar.gz "\ 49 | "-C #{OUTDIR} #{os}_#{arch}" 50 | end 51 | end 52 | 53 | desc 'Sign checksum of built artifacts' 54 | task :sign do 55 | FileUtils.cd(OUTDIR) do 56 | version = get_version 57 | sumsfile = "SHA2-256SUMS_#{version}.txt" 58 | sh "shasum -a 256 **/git-who* > #{sumsfile}" 59 | sh "ssh-keygen -Y sign -n file -f ~/.ssh/gitwho_ed25519 #{sumsfile}" 60 | end 61 | end 62 | 63 | task all: [:build, :sign] 64 | end 65 | 66 | CLOBBER.include(OUTDIR) 67 | CLOBBER.include(PROGNAME) 68 | 69 | def get_version() 70 | `git describe --tags --always --dirty`.strip 71 | end 72 | 73 | def get_commit() 74 | `git rev-parse --short HEAD`.strip 75 | end 76 | 77 | def exec_name(goos) 78 | if goos == 'windows' 79 | PROGNAME + '.exe' 80 | else 81 | PROGNAME 82 | end 83 | end 84 | 85 | def build_for_platform(goos, goarch, out: PROGNAME) 86 | version = get_version 87 | rev = get_commit 88 | sh "GOOS=#{goos} GOARCH=#{goarch} go build -a -o #{out} "\ 89 | "-ldflags '-s -w -X main.Commit=#{rev} -X main.Version=#{version}'" 90 | end 91 | 92 | desc 'Run all unit tests' 93 | task :test do 94 | sh 'go test -count=1 ./internal/...' 95 | end 96 | 97 | namespace 'functional' do 98 | begin 99 | require 'minitest/test_task' 100 | 101 | Minitest::TestTask.create(:test) do |t| 102 | t.libs << "test/lib" 103 | t.test_globs = ["test/**/*_test.rb"] 104 | end 105 | rescue LoadError 106 | # no-op, minitest not installed 107 | end 108 | end 109 | -------------------------------------------------------------------------------- /cache.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | 8 | "github.com/sinclairtarget/git-who/internal/cache" 9 | cacheBackends "github.com/sinclairtarget/git-who/internal/cache/backends" 10 | "github.com/sinclairtarget/git-who/internal/git" 11 | ) 12 | 13 | func warnFail(cb cache.Backend, err error) cache.Cache { 14 | logger().Warn( 15 | fmt.Sprintf("failed to initialize cache: %v", err), 16 | ) 17 | logger().Warn("disabling caching") 18 | return cache.NewCache(cb) 19 | } 20 | 21 | func getCache(gitRootPath string, repoFiles git.RepoConfigFiles) cache.Cache { 22 | var fallback cache.Backend = cacheBackends.NoopBackend{} 23 | 24 | if !cache.IsCachingEnabled() { 25 | return cache.NewCache(fallback) 26 | } 27 | 28 | cacheStorageDir, err := cache.CacheStorageDir( 29 | cacheBackends.GobBackendName, 30 | ) 31 | if err != nil { 32 | return warnFail(fallback, err) 33 | } 34 | 35 | dirname := cacheBackends.GobCacheDir(cacheStorageDir, gitRootPath) 36 | err = os.MkdirAll(dirname, 0o700) 37 | if err != nil { 38 | return warnFail(fallback, err) 39 | } 40 | 41 | filename, err := cacheBackends.GobCacheFilename(repoFiles) 42 | if err != nil { 43 | return warnFail(fallback, err) 44 | } 45 | 46 | p := filepath.Join(dirname, filename) 47 | logger().Debug("cache initialized", "path", p) 48 | return cache.NewCache(&cacheBackends.GobBackend{Path: p, Dir: dirname}) 49 | } 50 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.23-alpine AS builder 2 | 3 | RUN mkdir /app 4 | ADD . /app 5 | 6 | RUN cd /app && go build -a -o git-who -buildvcs=false -ldflags '-s -w' 7 | 8 | FROM alpine/git 9 | RUN mkdir /app && git config --system --add safe.directory /git && git config --system alias.who '!/app/git-who' 10 | COPY --from=builder /app/git-who /app/git-who 11 | -------------------------------------------------------------------------------- /dump.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "fmt" 7 | "os" 8 | "strings" 9 | "time" 10 | 11 | "github.com/sinclairtarget/git-who/internal/git" 12 | ) 13 | 14 | // Just prints out the output of git log as seen by git who. 15 | func dump( 16 | revs []string, 17 | pathspecs []string, 18 | short bool, 19 | since string, 20 | until string, 21 | authors []string, 22 | nauthors []string, 23 | ) (err error) { 24 | defer func() { 25 | if err != nil { 26 | err = fmt.Errorf("error running \"dump\": %w", err) 27 | } 28 | }() 29 | 30 | logger().Debug( 31 | "called revs()", 32 | "revs", 33 | revs, 34 | "pathspecs", 35 | pathspecs, 36 | "short", 37 | short, 38 | "since", 39 | since, 40 | "until", 41 | until, 42 | "authors", 43 | authors, 44 | "nauthors", 45 | nauthors, 46 | ) 47 | 48 | start := time.Now() 49 | 50 | ctx, cancel := context.WithCancel(context.Background()) 51 | defer cancel() 52 | 53 | filters := git.LogFilters{ 54 | Since: since, 55 | Until: until, 56 | Authors: authors, 57 | Nauthors: nauthors, 58 | } 59 | 60 | gitRootPath, err := git.GetRoot() 61 | if err != nil { 62 | return err 63 | } 64 | 65 | repoFiles, err := git.CheckRepoConfigFiles(gitRootPath) 66 | if err != nil { 67 | return err 68 | } 69 | 70 | subprocess, err := git.RunLog( 71 | ctx, 72 | revs, 73 | pathspecs, 74 | filters, 75 | !short, 76 | repoFiles.HasMailmap(), 77 | ) 78 | if err != nil { 79 | return err 80 | } 81 | 82 | w := bufio.NewWriter(os.Stdout) 83 | 84 | lines := subprocess.StdoutNullDelimitedLines() 85 | for line, err := range lines { 86 | if err != nil { 87 | w.Flush() 88 | return err 89 | } 90 | 91 | lineWithReplaced := strings.ReplaceAll(line, "\n", "\\n") 92 | fmt.Fprintln(w, lineWithReplaced) 93 | } 94 | 95 | w.Flush() 96 | 97 | err = subprocess.Wait() 98 | if err != nil { 99 | return err 100 | } 101 | 102 | elapsed := time.Now().Sub(start) 103 | logger().Debug("finished dump", "duration_ms", elapsed.Milliseconds()) 104 | 105 | return nil 106 | } 107 | -------------------------------------------------------------------------------- /gitwho_ed25519.pub: -------------------------------------------------------------------------------- 1 | ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHRaLHMw/YFLIUPDIOSpCub+43Y1TZIHREdUdYrQMQN+ git-who-release-key 2 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/sinclairtarget/git-who 2 | 3 | go 1.23 4 | 5 | require ( 6 | github.com/bmatcuk/doublestar/v4 v4.8.1 7 | github.com/google/go-cmp v0.6.0 8 | github.com/mattn/go-runewidth v0.0.16 9 | golang.org/x/term v0.28.0 10 | ) 11 | 12 | require ( 13 | github.com/rivo/uniseg v0.2.0 // indirect 14 | golang.org/x/sys v0.29.0 // indirect 15 | ) 16 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/bmatcuk/doublestar/v4 v4.8.1 h1:54Bopc5c2cAvhLRAzqOGCYHYyhcDHsFF4wWIR5wKP38= 2 | github.com/bmatcuk/doublestar/v4 v4.8.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= 3 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 4 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 5 | github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= 6 | github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 7 | github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= 8 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 9 | golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= 10 | golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 11 | golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg= 12 | golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek= 13 | -------------------------------------------------------------------------------- /hist.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "math" 7 | "os" 8 | "runtime" 9 | "strings" 10 | "time" 11 | 12 | "github.com/sinclairtarget/git-who/internal/concurrent" 13 | "github.com/sinclairtarget/git-who/internal/format" 14 | "github.com/sinclairtarget/git-who/internal/git" 15 | "github.com/sinclairtarget/git-who/internal/pretty" 16 | "github.com/sinclairtarget/git-who/internal/tally" 17 | ) 18 | 19 | const barWidth = 36 20 | 21 | func hist( 22 | revs []string, 23 | pathspecs []string, 24 | mode tally.TallyMode, 25 | showEmail bool, 26 | countMerges bool, 27 | since string, 28 | until string, 29 | authors []string, 30 | nauthors []string, 31 | ) (err error) { 32 | defer func() { 33 | if err != nil { 34 | err = fmt.Errorf("error running \"hist\": %w", err) 35 | } 36 | }() 37 | 38 | logger().Debug( 39 | "called hist()", 40 | "revs", 41 | revs, 42 | "pathspecs", 43 | pathspecs, 44 | "mode", 45 | mode, 46 | "showEmail", 47 | showEmail, 48 | "countMerges", 49 | countMerges, 50 | "since", 51 | since, 52 | "until", 53 | until, 54 | "authors", 55 | authors, 56 | "nauthors", 57 | nauthors, 58 | ) 59 | 60 | ctx, cancel := context.WithCancel(context.Background()) 61 | defer cancel() 62 | 63 | tallyOpts := tally.TallyOpts{Mode: mode, CountMerges: countMerges} 64 | if showEmail { 65 | tallyOpts.Key = func(c git.Commit) string { return c.AuthorEmail } 66 | } else { 67 | tallyOpts.Key = func(c git.Commit) string { return c.AuthorName } 68 | } 69 | 70 | populateDiffs := tallyOpts.IsDiffMode() 71 | filters := git.LogFilters{ 72 | Since: since, 73 | Until: until, 74 | Authors: authors, 75 | Nauthors: nauthors, 76 | } 77 | 78 | var end time.Time // Default is zero time, meaning use last commit 79 | if len(revs) == 1 && revs[0] == "HEAD" && len(until) == 0 { 80 | // If no revs or --until given, end timeline at current time 81 | end = time.Now() 82 | } 83 | 84 | gitRootPath, err := git.GetRoot() 85 | if err != nil { 86 | return err 87 | } 88 | 89 | repoFiles, err := git.CheckRepoConfigFiles(gitRootPath) 90 | if err != nil { 91 | return err 92 | } 93 | 94 | var buckets []tally.TimeBucket 95 | if populateDiffs && runtime.GOMAXPROCS(0) > 1 { 96 | buckets, err = concurrent.TallyCommitsTimeline( 97 | ctx, 98 | revs, 99 | pathspecs, 100 | filters, 101 | repoFiles, 102 | tallyOpts, 103 | end, 104 | getCache(gitRootPath, repoFiles), 105 | pretty.AllowDynamic(os.Stdout), 106 | ) 107 | if err != nil { 108 | return err 109 | } 110 | } else { 111 | commits, closer, err := git.CommitsWithOpts( 112 | ctx, 113 | revs, 114 | pathspecs, 115 | filters, 116 | populateDiffs, 117 | repoFiles, 118 | ) 119 | if err != nil { 120 | return err 121 | } 122 | 123 | buckets, err = tally.TallyCommitsTimeline( 124 | commits, 125 | tallyOpts, 126 | end, 127 | ) 128 | if err != nil { 129 | return err 130 | } 131 | 132 | err = closer() 133 | if err != nil { 134 | return err 135 | } 136 | } 137 | 138 | // -- Pick winner in each bucket -- 139 | for i, bucket := range buckets { 140 | buckets[i] = bucket.Rank(mode) 141 | } 142 | 143 | // -- Draw bar plot -- 144 | maxVal := barWidth 145 | for _, bucket := range buckets { 146 | if bucket.TotalValue(mode) > maxVal { 147 | maxVal = bucket.TotalValue(mode) 148 | } 149 | } 150 | 151 | drawPlot(buckets, maxVal, mode, showEmail) 152 | return nil 153 | } 154 | 155 | func drawPlot( 156 | buckets []tally.TimeBucket, 157 | maxVal int, 158 | mode tally.TallyMode, 159 | showEmail bool, 160 | ) { 161 | var lastAuthor string 162 | for _, bucket := range buckets { 163 | value := bucket.Value(mode) 164 | clampedValue := int(math.Ceil( 165 | (float64(value) / float64(maxVal)) * float64(barWidth), 166 | )) 167 | 168 | total := bucket.TotalValue(mode) 169 | clampedTotal := int(math.Ceil( 170 | (float64(total) / float64(maxVal)) * float64(barWidth), 171 | )) 172 | 173 | valueBar := strings.Repeat("#", clampedValue) 174 | totalBar := strings.Repeat("-", clampedTotal-clampedValue) 175 | 176 | if value > 0 { 177 | tallyPart := fmtHistTally( 178 | bucket.Tally, 179 | mode, 180 | showEmail, 181 | bucket.Tally.AuthorName == lastAuthor, 182 | ) 183 | fmt.Printf( 184 | "%s ┤ %s%s%-*s%s %s\n", 185 | bucket.Name, 186 | valueBar, 187 | pretty.Dim, 188 | barWidth-clampedValue, 189 | totalBar, 190 | pretty.Reset, 191 | tallyPart, 192 | ) 193 | 194 | lastAuthor = bucket.Tally.AuthorName 195 | } else { 196 | fmt.Printf("%s ┤ \n", bucket.Name) 197 | } 198 | } 199 | } 200 | 201 | func fmtHistTally( 202 | t tally.FinalTally, 203 | mode tally.TallyMode, 204 | showEmail bool, 205 | fade bool, 206 | ) string { 207 | var metric string 208 | switch mode { 209 | case tally.CommitMode: 210 | metric = fmt.Sprintf("(%s)", format.Number(t.Commits)) 211 | case tally.FilesMode: 212 | metric = fmt.Sprintf("(%s)", format.Number(t.FileCount)) 213 | case tally.LinesMode: 214 | metric = fmt.Sprintf( 215 | "(%s%s%s / %s%s%s)", 216 | pretty.Green, 217 | format.Number(t.LinesAdded), 218 | pretty.DefaultColor, 219 | pretty.Red, 220 | format.Number(t.LinesRemoved), 221 | pretty.DefaultColor, 222 | ) 223 | default: 224 | panic("unrecognized tally mode in switch") 225 | } 226 | 227 | var author string 228 | if showEmail { 229 | author = format.Abbrev(format.GitEmail(t.AuthorEmail), 25) 230 | } else { 231 | author = format.Abbrev(t.AuthorName, 25) 232 | } 233 | 234 | if fade { 235 | return fmt.Sprintf( 236 | "%s%s %s%s", 237 | pretty.Dim, 238 | author, 239 | metric, 240 | pretty.Reset, 241 | ) 242 | } else { 243 | return fmt.Sprintf("%s %s", author, metric) 244 | } 245 | } 246 | -------------------------------------------------------------------------------- /internal/cache/backends/gob.go: -------------------------------------------------------------------------------- 1 | package backends 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "compress/gzip" 7 | "encoding/binary" 8 | "encoding/gob" 9 | "errors" 10 | "fmt" 11 | "hash/fnv" 12 | "io" 13 | "io/fs" 14 | "os" 15 | "path/filepath" 16 | 17 | "github.com/sinclairtarget/git-who/internal/cache" 18 | "github.com/sinclairtarget/git-who/internal/git" 19 | ) 20 | 21 | // Stores commits on disk at a particular filepath. 22 | // 23 | // Commits are stored in Gob format. The file stored on disk is a series of 24 | // Gob-encoded arrays, each prefixed with a four-byte value indicating the 25 | // number of bytes in the next array. This framing creates redundancy (since 26 | // the Gob type metadata is repeated for each array) but allows us to append to 27 | // the file on disk instead of replacing the whole file when we want to cache 28 | // new commits. 29 | // 30 | // The Gob backend produces a cache file roughly half the size of the JSON 31 | // backend on disk. It's also SIGNIFICANTLY faster to read the cache from disk 32 | // when in Gob format rather than JSON format. 33 | // 34 | // We also gzip the file when we're done using it to keep it even smaller on 35 | // disk. 36 | type GobBackend struct { 37 | Dir string 38 | Path string 39 | wasOpened bool 40 | isDirty bool 41 | } 42 | 43 | const GobBackendName string = "gob" 44 | 45 | func (b *GobBackend) Name() string { 46 | return GobBackendName 47 | } 48 | 49 | func (b *GobBackend) compressedPath() string { 50 | return b.Path + ".gz" 51 | } 52 | 53 | func (b *GobBackend) Open() (err error) { 54 | b.wasOpened = true 55 | 56 | // Uncompress gzipped file to regular location if it exists 57 | f, err := os.Open(b.compressedPath()) 58 | if errors.Is(err, fs.ErrNotExist) { 59 | return nil 60 | } else if err != nil { 61 | return err 62 | } 63 | defer f.Close() 64 | 65 | fout, err := os.OpenFile(b.Path, os.O_WRONLY|os.O_CREATE, 0644) 66 | if err != nil { 67 | return err 68 | } 69 | defer fout.Close() 70 | 71 | zr, err := gzip.NewReader(f) 72 | if err != nil { 73 | return err 74 | } 75 | 76 | w := bufio.NewWriter(fout) 77 | _, err = io.Copy(w, zr) 78 | if err != nil { 79 | return err 80 | } 81 | 82 | err = zr.Close() 83 | if err != nil { 84 | return err 85 | } 86 | 87 | err = w.Flush() 88 | if err != nil { 89 | return err 90 | } 91 | 92 | return nil 93 | } 94 | 95 | func (b *GobBackend) Close() (err error) { 96 | if b.isDirty { 97 | // Compress file and save to gzipped location 98 | f, err := os.Open(b.Path) 99 | if errors.Is(err, fs.ErrNotExist) { 100 | return nil 101 | } else if err != nil { 102 | return err 103 | } 104 | defer f.Close() 105 | 106 | fout, err := os.OpenFile(b.compressedPath(), os.O_WRONLY|os.O_CREATE, 0644) 107 | if err != nil { 108 | return err 109 | } 110 | defer fout.Close() 111 | 112 | r := bufio.NewReader(f) 113 | zw, err := gzip.NewWriterLevel(fout, gzip.BestSpeed) 114 | if err != nil { 115 | return err 116 | } 117 | 118 | _, err = io.Copy(zw, r) 119 | if err != nil { 120 | return err 121 | } 122 | 123 | err = zw.Close() 124 | if err != nil { 125 | return err 126 | } 127 | } 128 | 129 | // Remove uncompressed file 130 | err = os.RemoveAll(b.Path) 131 | if err != nil { 132 | return err 133 | } 134 | 135 | // Remove any other dangling cache files 136 | matches, err := filepath.Glob(filepath.Join(b.Dir, "*")) 137 | if err != nil { 138 | panic(err) // Bad pattern 139 | } 140 | 141 | for _, match := range matches { 142 | if match == b.compressedPath() { 143 | continue 144 | } 145 | 146 | err := os.Remove(match) 147 | if err != nil { 148 | logger().Warn( 149 | fmt.Sprintf("failed to delete old cache file: %v", err), 150 | ) 151 | } 152 | } 153 | 154 | return nil 155 | } 156 | 157 | func (b *GobBackend) Get(revs []string) (_ cache.Result, err error) { 158 | if !b.wasOpened { 159 | panic("cache not yet open. Did you forget to call Open()?") 160 | } 161 | 162 | result := cache.EmptyResult() 163 | 164 | lookingFor := map[string]bool{} 165 | for _, rev := range revs { 166 | lookingFor[rev] = true 167 | } 168 | 169 | f, err := os.Open(b.Path) 170 | if errors.Is(err, fs.ErrNotExist) { 171 | return result, nil 172 | } else if err != nil { 173 | return result, err 174 | } 175 | 176 | // In theory we shouldn't get any duplicates into the cache if we're 177 | // careful about what we write to it. But let's make sure by detecting dups 178 | // and throwing an error if we see one. 179 | seen := map[string]bool{} 180 | 181 | it := func(yield func(git.Commit, error) bool) { 182 | defer f.Close() // Don't care about error closing when reading 183 | 184 | for { 185 | var commit git.Commit 186 | 187 | // -- Find length of next gob in bytes -- 188 | prefix := make([]byte, 4) 189 | _, err = f.Read(prefix) 190 | if err == io.EOF { 191 | return 192 | } else if err != nil { 193 | yield(commit, err) 194 | return 195 | } 196 | 197 | var size uint32 198 | err = binary.Read( 199 | bytes.NewReader(prefix), 200 | binary.LittleEndian, 201 | &size, 202 | ) 203 | if err != nil { 204 | yield(commit, err) 205 | return 206 | } 207 | 208 | // -- Decode next gob -- 209 | var commits []git.Commit 210 | 211 | data := make([]byte, size) 212 | _, err = f.Read(data) 213 | 214 | dec := gob.NewDecoder(bytes.NewReader(data)) 215 | err = dec.Decode(&commits) 216 | if err != nil { 217 | yield(commit, err) 218 | return 219 | } 220 | 221 | // -- Yield matching commits -- 222 | for _, c := range commits { 223 | hit, _ := lookingFor[c.Hash] 224 | if hit { 225 | if isDup, _ := seen[c.Hash]; isDup { 226 | yield(c, fmt.Errorf( 227 | "duplicate commit in cache: %s", 228 | c.Hash, 229 | )) 230 | return 231 | } 232 | 233 | seen[c.Hash] = true 234 | if !yield(c, nil) { 235 | return 236 | } 237 | } 238 | } 239 | } 240 | } 241 | 242 | return cache.Result{Commits: it}, nil 243 | } 244 | 245 | func (b *GobBackend) Add(commits []git.Commit) (err error) { 246 | if !b.wasOpened { 247 | panic("cache not yet open. Did you forget to call Open()?") 248 | } 249 | 250 | b.isDirty = true 251 | 252 | f, err := os.OpenFile( 253 | b.Path, 254 | os.O_WRONLY|os.O_APPEND|os.O_CREATE, 255 | 0644, 256 | ) 257 | if err != nil { 258 | return err 259 | } 260 | defer func() { 261 | closeErr := f.Close() 262 | if err == nil { 263 | err = closeErr 264 | } 265 | }() 266 | 267 | var data bytes.Buffer 268 | 269 | enc := gob.NewEncoder(&data) 270 | err = enc.Encode(&commits) 271 | if err != nil { 272 | return err 273 | } 274 | 275 | if data.Len() > 0x7FFF_FFFF { 276 | return errors.New( 277 | "cannot add more than 2,147,483,648 bytes to cache at once", // lol 278 | ) 279 | } 280 | 281 | err = binary.Write(f, binary.LittleEndian, uint32(data.Len())) 282 | if err != nil { 283 | return err 284 | } 285 | 286 | _, err = f.Write(data.Bytes()) 287 | if err != nil { 288 | return err 289 | } 290 | 291 | return nil 292 | } 293 | 294 | func (b *GobBackend) Clear() error { 295 | err := os.RemoveAll(b.Dir) 296 | if err != nil { 297 | return err 298 | } 299 | 300 | return nil 301 | } 302 | 303 | func GobCacheDir(prefix string, gitRootPath string) string { 304 | // Filename includes hash of path to repo so we don't collide with other 305 | // git-who caches for other repos. 306 | h := fnv.New32() 307 | h.Write([]byte(gitRootPath)) 308 | 309 | base := filepath.Base(gitRootPath) 310 | dirname := fmt.Sprintf("%s-%x", base, h.Sum32()) 311 | repoDir := filepath.Join(prefix, dirname) 312 | return repoDir 313 | } 314 | 315 | func GobCacheFilename(repoFiles git.RepoConfigFiles) (string, error) { 316 | stateHash, err := cache.RepoStateHash(repoFiles) 317 | if err != nil { 318 | return "", err 319 | } 320 | 321 | filename := fmt.Sprintf("%s.gobs", stateHash) 322 | return filename, nil 323 | } 324 | -------------------------------------------------------------------------------- /internal/cache/backends/gob_test.go: -------------------------------------------------------------------------------- 1 | package backends_test 2 | 3 | import ( 4 | "iter" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | "time" 9 | 10 | "github.com/google/go-cmp/cmp" 11 | 12 | "github.com/sinclairtarget/git-who/internal/cache/backends" 13 | "github.com/sinclairtarget/git-who/internal/git" 14 | "github.com/sinclairtarget/git-who/internal/utils/iterutils" 15 | ) 16 | 17 | func CacheDir(t *testing.T) string { 18 | dirname := filepath.Join(t.TempDir(), "gob", "test-1234") 19 | err := os.MkdirAll(dirname, 0o700) 20 | if err != nil { 21 | t.Fatalf("could not create cache dir: %v", err) 22 | } 23 | 24 | return dirname 25 | } 26 | 27 | func TestGobAddGetClear(t *testing.T) { 28 | dir := CacheDir(t) 29 | c := backends.GobBackend{ 30 | Dir: dir, 31 | Path: filepath.Join(dir, "commits.gob"), 32 | } 33 | 34 | err := c.Open() 35 | if err != nil { 36 | t.Fatalf("could not open cache: %v", err) 37 | } 38 | defer func() { 39 | err = c.Close() 40 | if err != nil { 41 | t.Fatalf("could not close cache: %v", err) 42 | } 43 | }() 44 | 45 | commit := git.Commit{ 46 | ShortHash: "9e9ea7662b1", 47 | Hash: "9e9ea7662b1001d860471a4cece5e2f1de8062fb", 48 | AuthorName: "Bob", 49 | AuthorEmail: "bob@work.com", 50 | Date: time.Date( 51 | 2025, 1, 31, 16, 35, 26, 0, time.UTC, 52 | ), 53 | FileDiffs: []git.FileDiff{ 54 | { 55 | Path: "foo/bar.txt", 56 | LinesAdded: 3, 57 | LinesRemoved: 5, 58 | }, 59 | }, 60 | } 61 | 62 | // -- Add -- 63 | err = c.Add([]git.Commit{commit}) 64 | if err != nil { 65 | t.Fatalf("add commits to cache failed with error: %v", err) 66 | } 67 | 68 | // -- Get -- 69 | revs := []string{commit.Hash} 70 | result, err := c.Get(revs) 71 | if err != nil { 72 | t.Fatalf("get commits from cache failed with error: %v", err) 73 | } 74 | 75 | next, stop := iter.Pull2(result.Commits) 76 | defer stop() 77 | 78 | cachedCommit, err, ok := next() 79 | if err != nil { 80 | t.Fatalf("error iterating cached commits: %v", err) 81 | } 82 | 83 | if !ok { 84 | t.Fatal("not enough commits in result") 85 | } 86 | 87 | if diff := cmp.Diff(commit, cachedCommit); diff != "" { 88 | t.Errorf("commit is wrong:\n%s", diff) 89 | } 90 | 91 | // -- Clear -- 92 | err = c.Clear() 93 | if err != nil { 94 | t.Fatalf("clearing cache failed with error: %v", err) 95 | } 96 | 97 | result, err = c.Get(revs) 98 | if err != nil { 99 | t.Fatalf( 100 | "get commits from cache after clear failed with error: %v", 101 | err, 102 | ) 103 | } 104 | 105 | commits, err := iterutils.Collect(result.Commits) 106 | if err != nil { 107 | t.Fatalf("error collecting commits: %v", err) 108 | } 109 | 110 | if len(commits) > 0 { 111 | t.Errorf("cache result after clear should have been empty") 112 | } 113 | } 114 | 115 | func TestGobAddGetAddGet(t *testing.T) { 116 | dir := CacheDir(t) 117 | c := backends.GobBackend{ 118 | Dir: dir, 119 | Path: filepath.Join(dir, "commits.gob"), 120 | } 121 | 122 | err := c.Open() 123 | if err != nil { 124 | t.Fatalf("could not open cache: %v", err) 125 | } 126 | defer func() { 127 | err = c.Close() 128 | if err != nil { 129 | t.Fatalf("could not close cache: %v", err) 130 | } 131 | }() 132 | 133 | commitOne := git.Commit{ 134 | ShortHash: "1e9ea7662b1", 135 | Hash: "1e9ea7662b1001d860471a4cece5e2f1de8062fb", 136 | AuthorName: "Bob", 137 | AuthorEmail: "bob@work.com", 138 | Date: time.Date( 139 | 2025, 1, 30, 16, 35, 26, 0, time.UTC, 140 | ), 141 | FileDiffs: []git.FileDiff{ 142 | { 143 | Path: "foo/bar.txt", 144 | LinesAdded: 3, 145 | LinesRemoved: 5, 146 | }, 147 | }, 148 | } 149 | commitTwo := git.Commit{ 150 | ShortHash: "2e9ea7662b1", 151 | Hash: "2e9ea7662b1001d860471a4cece5e2f1de8062fb", 152 | AuthorName: "Bob", 153 | AuthorEmail: "bob@work.com", 154 | Date: time.Date( 155 | 2025, 1, 31, 16, 35, 26, 0, time.UTC, 156 | ), 157 | FileDiffs: []git.FileDiff{ 158 | { 159 | Path: "foo/bim.txt", 160 | LinesAdded: 4, 161 | LinesRemoved: 0, 162 | }, 163 | }, 164 | } 165 | revs := []string{commitOne.Hash, commitTwo.Hash} 166 | 167 | err = c.Add([]git.Commit{commitOne}) 168 | if err != nil { 169 | t.Fatalf("add commits to cache failed with error: %v", err) 170 | } 171 | 172 | result, err := c.Get(revs) 173 | if err != nil { 174 | t.Fatalf("get commits from cache failed with error: %v", err) 175 | } 176 | 177 | commits, err := iterutils.Collect(result.Commits) 178 | if err != nil { 179 | t.Fatalf("error collecting commits: %v", err) 180 | } 181 | 182 | if len(commits) != 1 { 183 | t.Errorf( 184 | "expected to get one commit from cache, but got %d", 185 | len(commits), 186 | ) 187 | } 188 | 189 | err = c.Add([]git.Commit{commitTwo}) 190 | if err != nil { 191 | t.Fatalf("add commits to cache failed with error: %v", err) 192 | } 193 | 194 | result, err = c.Get(revs) 195 | if err != nil { 196 | t.Fatalf("get commits from cache failed with error: %v", err) 197 | } 198 | 199 | commits, err = iterutils.Collect(result.Commits) 200 | if err != nil { 201 | t.Fatalf("error collecting commits: %v", err) 202 | } 203 | 204 | if len(commits) != 2 { 205 | t.Errorf( 206 | "expected to get two commits from cache, but got %d", 207 | len(commits), 208 | ) 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /internal/cache/backends/json.go: -------------------------------------------------------------------------------- 1 | package backends 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "io/fs" 9 | "os" 10 | "slices" 11 | 12 | "github.com/sinclairtarget/git-who/internal/cache" 13 | "github.com/sinclairtarget/git-who/internal/git" 14 | "github.com/sinclairtarget/git-who/internal/utils/iterutils" 15 | ) 16 | 17 | // Stores commits on disk at a particular filepath. 18 | // 19 | // Commits are stored as newline-delimited JSON. For now, all commits that match 20 | // the revs being searched for are loaded into memory before being returned. 21 | type JSONBackend struct { 22 | Path string 23 | } 24 | 25 | func (b JSONBackend) Name() string { 26 | return "json" 27 | } 28 | 29 | func (b JSONBackend) Open() error { 30 | return nil 31 | } 32 | 33 | func (b JSONBackend) Close() error { 34 | return nil 35 | } 36 | 37 | func (b JSONBackend) Get(revs []string) (cache.Result, error) { 38 | result := cache.EmptyResult() 39 | 40 | lookingFor := map[string]bool{} 41 | for _, rev := range revs { 42 | lookingFor[rev] = true 43 | } 44 | 45 | f, err := os.Open(b.Path) 46 | if errors.Is(err, fs.ErrNotExist) { 47 | return result, nil 48 | } else if err != nil { 49 | return result, err 50 | } 51 | defer f.Close() // Don't care about error closing when reading 52 | 53 | dec := json.NewDecoder(f) 54 | 55 | var commits []git.Commit 56 | 57 | // In theory we shouldn't get any duplicates into the cache if we're 58 | // careful about what we write to it. But let's make sure by detecting dups 59 | // and throwing an error if we see one. 60 | seen := map[string]bool{} 61 | 62 | for { 63 | var c git.Commit 64 | 65 | err = dec.Decode(&c) 66 | if err == io.EOF { 67 | break 68 | } else if err != nil { 69 | return result, err 70 | } 71 | 72 | hit, _ := lookingFor[c.Hash] 73 | if hit { 74 | if isDup, _ := seen[c.Hash]; isDup { 75 | return result, fmt.Errorf( 76 | "duplicate commit in cache: %s", 77 | c.Hash, 78 | ) 79 | } 80 | 81 | seen[c.Hash] = true 82 | commits = append(commits, c) 83 | } 84 | } 85 | 86 | return cache.Result{ 87 | Commits: iterutils.WithoutErrors(slices.Values(commits)), 88 | }, nil 89 | } 90 | 91 | func (b JSONBackend) Add(commits []git.Commit) (err error) { 92 | f, err := os.OpenFile( 93 | b.Path, 94 | os.O_WRONLY|os.O_APPEND|os.O_CREATE, 95 | 0644, 96 | ) 97 | if err != nil { 98 | return err 99 | } 100 | defer func() { 101 | closeErr := f.Close() 102 | if err == nil { 103 | err = closeErr 104 | } 105 | }() 106 | 107 | enc := json.NewEncoder(f) 108 | 109 | for _, c := range commits { 110 | err = enc.Encode(&c) 111 | if err != nil { 112 | return err 113 | } 114 | } 115 | 116 | return nil 117 | } 118 | 119 | func (b JSONBackend) Clear() error { 120 | return os.Remove(b.Path) 121 | } 122 | -------------------------------------------------------------------------------- /internal/cache/backends/json_test.go: -------------------------------------------------------------------------------- 1 | package backends_test 2 | 3 | import ( 4 | "iter" 5 | "path/filepath" 6 | "testing" 7 | "time" 8 | 9 | "github.com/google/go-cmp/cmp" 10 | 11 | "github.com/sinclairtarget/git-who/internal/cache/backends" 12 | "github.com/sinclairtarget/git-who/internal/git" 13 | "github.com/sinclairtarget/git-who/internal/utils/iterutils" 14 | ) 15 | 16 | func TestAddGetClear(t *testing.T) { 17 | dir := t.TempDir() 18 | c := backends.JSONBackend{ 19 | Path: filepath.Join(dir, "commits.json"), 20 | } 21 | 22 | err := c.Open() 23 | if err != nil { 24 | t.Fatalf("could not open cache: %v", err) 25 | } 26 | defer func() { 27 | err = c.Close() 28 | if err != nil { 29 | t.Fatalf("could not close cache: %v", err) 30 | } 31 | }() 32 | 33 | commit := git.Commit{ 34 | ShortHash: "9e9ea7662b1", 35 | Hash: "9e9ea7662b1001d860471a4cece5e2f1de8062fb", 36 | AuthorName: "Bob", 37 | AuthorEmail: "bob@work.com", 38 | Date: time.Date( 39 | 2025, 1, 31, 16, 35, 26, 0, time.UTC, 40 | ), 41 | FileDiffs: []git.FileDiff{ 42 | { 43 | Path: "foo/bar.txt", 44 | LinesAdded: 3, 45 | LinesRemoved: 5, 46 | }, 47 | }, 48 | } 49 | 50 | // -- Add -- 51 | err = c.Add([]git.Commit{commit}) 52 | if err != nil { 53 | t.Fatalf("add commits to cache failed with error: %v", err) 54 | } 55 | 56 | // -- Get -- 57 | revs := []string{commit.Hash} 58 | result, err := c.Get(revs) 59 | if err != nil { 60 | t.Fatalf("get commits from cache failed with error: %v", err) 61 | } 62 | 63 | next, stop := iter.Pull2(result.Commits) 64 | defer stop() 65 | 66 | cachedCommit, err, ok := next() 67 | if err != nil { 68 | t.Fatalf("error iterating cached commits: %v", err) 69 | } 70 | 71 | if !ok { 72 | t.Fatal("not enough commits in result") 73 | } 74 | 75 | if diff := cmp.Diff(commit, cachedCommit); diff != "" { 76 | t.Errorf("commit is wrong:\n%s", diff) 77 | } 78 | 79 | // -- Clear -- 80 | err = c.Clear() 81 | if err != nil { 82 | t.Fatalf("clearing cache failed with error: %v", err) 83 | } 84 | 85 | result, err = c.Get(revs) 86 | if err != nil { 87 | t.Fatalf( 88 | "get commits from cache after clear failed with error: %v", 89 | err, 90 | ) 91 | } 92 | 93 | commits, err := iterutils.Collect(result.Commits) 94 | if err != nil { 95 | t.Fatalf("error collecting commits: %v", err) 96 | } 97 | 98 | if len(commits) > 0 { 99 | t.Errorf("cache result after clear should have been empty") 100 | } 101 | } 102 | 103 | func TestAddGetAddGet(t *testing.T) { 104 | dir := t.TempDir() 105 | c := backends.JSONBackend{ 106 | Path: filepath.Join(dir, "commits.json"), 107 | } 108 | 109 | err := c.Open() 110 | if err != nil { 111 | t.Fatalf("could not open cache: %v", err) 112 | } 113 | defer func() { 114 | err = c.Close() 115 | if err != nil { 116 | t.Fatalf("could not close cache: %v", err) 117 | } 118 | }() 119 | 120 | commitOne := git.Commit{ 121 | ShortHash: "1e9ea7662b1", 122 | Hash: "1e9ea7662b1001d860471a4cece5e2f1de8062fb", 123 | AuthorName: "Bob", 124 | AuthorEmail: "bob@work.com", 125 | Date: time.Date( 126 | 2025, 1, 30, 16, 35, 26, 0, time.UTC, 127 | ), 128 | FileDiffs: []git.FileDiff{ 129 | { 130 | Path: "foo/bar.txt", 131 | LinesAdded: 3, 132 | LinesRemoved: 5, 133 | }, 134 | }, 135 | } 136 | commitTwo := git.Commit{ 137 | ShortHash: "2e9ea7662b1", 138 | Hash: "2e9ea7662b1001d860471a4cece5e2f1de8062fb", 139 | AuthorName: "Bob", 140 | AuthorEmail: "bob@work.com", 141 | Date: time.Date( 142 | 2025, 1, 31, 16, 35, 26, 0, time.UTC, 143 | ), 144 | FileDiffs: []git.FileDiff{ 145 | { 146 | Path: "foo/bim.txt", 147 | LinesAdded: 4, 148 | LinesRemoved: 0, 149 | }, 150 | }, 151 | } 152 | revs := []string{commitOne.Hash, commitTwo.Hash} 153 | 154 | err = c.Add([]git.Commit{commitOne}) 155 | if err != nil { 156 | t.Fatalf("add commits to cache failed with error: %v", err) 157 | } 158 | 159 | result, err := c.Get(revs) 160 | if err != nil { 161 | t.Fatalf("get commits from cache failed with error: %v", err) 162 | } 163 | 164 | commits, err := iterutils.Collect(result.Commits) 165 | if err != nil { 166 | t.Fatalf("error collecting commits: %v", err) 167 | } 168 | 169 | if len(commits) != 1 { 170 | t.Errorf( 171 | "expected to get one commit from cache, but got %d", 172 | len(commits), 173 | ) 174 | } 175 | 176 | err = c.Add([]git.Commit{commitTwo}) 177 | if err != nil { 178 | t.Fatalf("add commits to cache failed with error: %v", err) 179 | } 180 | 181 | result, err = c.Get(revs) 182 | if err != nil { 183 | t.Fatalf("get commits from cache failed with error: %v", err) 184 | } 185 | 186 | commits, err = iterutils.Collect(result.Commits) 187 | if err != nil { 188 | t.Fatalf("error collecting commits: %v", err) 189 | } 190 | 191 | if len(commits) != 2 { 192 | t.Errorf( 193 | "expected to get two commits from cache, but got %d", 194 | len(commits), 195 | ) 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /internal/cache/backends/log.go: -------------------------------------------------------------------------------- 1 | package backends 2 | 3 | import ( 4 | "log/slog" 5 | ) 6 | 7 | var pkgLogger *slog.Logger 8 | 9 | func logger() *slog.Logger { 10 | if pkgLogger == nil { 11 | pkgLogger = slog.Default().With("package", "cache.backends") 12 | } 13 | 14 | return pkgLogger 15 | } 16 | -------------------------------------------------------------------------------- /internal/cache/backends/noop.go: -------------------------------------------------------------------------------- 1 | package backends 2 | 3 | import ( 4 | "github.com/sinclairtarget/git-who/internal/cache" 5 | "github.com/sinclairtarget/git-who/internal/git" 6 | ) 7 | 8 | type NoopBackend struct{} 9 | 10 | func (b NoopBackend) Name() string { 11 | return "noop" 12 | } 13 | 14 | func (b NoopBackend) Open() error { 15 | return nil 16 | } 17 | 18 | func (b NoopBackend) Close() error { 19 | return nil 20 | } 21 | 22 | func (b NoopBackend) Get(revs []string) (cache.Result, error) { 23 | return cache.EmptyResult(), nil 24 | } 25 | 26 | func (b NoopBackend) Add(commits []git.Commit) error { 27 | return nil 28 | } 29 | 30 | func (b NoopBackend) Clear() error { 31 | return nil 32 | } 33 | -------------------------------------------------------------------------------- /internal/cache/cache.go: -------------------------------------------------------------------------------- 1 | // Cache for storing commits we've already diff-ed and parsed. 2 | package cache 3 | 4 | import ( 5 | "encoding/hex" 6 | "fmt" 7 | "hash/fnv" 8 | "iter" 9 | "os" 10 | "os/user" 11 | "path/filepath" 12 | "slices" 13 | "time" 14 | 15 | "github.com/sinclairtarget/git-who/internal/git" 16 | "github.com/sinclairtarget/git-who/internal/utils/iterutils" 17 | ) 18 | 19 | func IsCachingEnabled() bool { 20 | if len(os.Getenv("GIT_WHO_DISABLE_CACHE")) > 0 { 21 | return false 22 | } 23 | 24 | return true 25 | } 26 | 27 | type Result struct { 28 | Commits iter.Seq2[git.Commit, error] // The sequence of commits 29 | } 30 | 31 | // If we use the zero-value for Result, the iterator will be nil. We instead 32 | // want an interator over a zero-length sequence. 33 | func EmptyResult() Result { 34 | return Result{ 35 | Commits: iterutils.WithoutErrors(slices.Values([]git.Commit{})), 36 | } 37 | } 38 | 39 | type Backend interface { 40 | Name() string 41 | Open() error 42 | Close() error 43 | Get(revs []string) (Result, error) 44 | Add(commits []git.Commit) error 45 | Clear() error 46 | } 47 | 48 | type Cache struct { 49 | backend Backend 50 | } 51 | 52 | func NewCache(backend Backend) Cache { 53 | return Cache{ 54 | backend: backend, 55 | } 56 | } 57 | 58 | func (c *Cache) Name() string { 59 | return c.backend.Name() 60 | } 61 | 62 | func (c *Cache) Open() (err error) { 63 | defer func() { 64 | if err != nil { 65 | err = fmt.Errorf("error opening cache: %w", err) 66 | } 67 | }() 68 | 69 | start := time.Now() 70 | 71 | err = c.backend.Open() 72 | if err != nil { 73 | return err 74 | } 75 | 76 | elapsed := time.Now().Sub(start) 77 | logger().Debug( 78 | "cache open", 79 | "duration_ms", 80 | elapsed.Milliseconds(), 81 | ) 82 | 83 | return nil 84 | } 85 | 86 | func (c *Cache) Close() (err error) { 87 | defer func() { 88 | if err != nil { 89 | err = fmt.Errorf("error closing cache: %w", err) 90 | } 91 | }() 92 | 93 | start := time.Now() 94 | 95 | err = c.backend.Close() 96 | if err != nil { 97 | return err 98 | } 99 | 100 | elapsed := time.Now().Sub(start) 101 | logger().Debug( 102 | "cache close", 103 | "duration_ms", 104 | elapsed.Milliseconds(), 105 | ) 106 | 107 | return nil 108 | } 109 | 110 | func (c *Cache) Get(revs []string) (_ Result, err error) { 111 | defer func() { 112 | if err != nil { 113 | err = fmt.Errorf("failed to retrieve from cache: %w", err) 114 | } 115 | }() 116 | 117 | start := time.Now() 118 | 119 | result, err := c.backend.Get(revs) 120 | if err != nil { 121 | return result, err 122 | } 123 | 124 | elapsed := time.Now().Sub(start) 125 | logger().Debug( 126 | "cache get", 127 | "duration_ms", 128 | elapsed.Milliseconds(), 129 | ) 130 | 131 | // Make sure iterator is not nil 132 | if result.Commits == nil { 133 | panic("Cache backend returned nil commits iterator; this isn't kosher!") 134 | } 135 | 136 | return result, nil 137 | } 138 | 139 | func (c *Cache) Add(commits []git.Commit) error { 140 | start := time.Now() 141 | 142 | err := c.backend.Add(commits) 143 | if err != nil { 144 | return err 145 | } 146 | 147 | elapsed := time.Now().Sub(start) 148 | logger().Debug( 149 | "cache add", 150 | "duration_ms", 151 | elapsed.Milliseconds(), 152 | ) 153 | 154 | return nil 155 | } 156 | 157 | func (c *Cache) Clear() error { 158 | err := c.backend.Clear() 159 | if err != nil { 160 | return err 161 | } 162 | 163 | logger().Debug("cache clear") 164 | return nil 165 | } 166 | 167 | // Returns the absolute path at which we should store data for a given cache 168 | // backend. 169 | // 170 | // Tries to store it under the XDG_CACHE_HOME dir. 171 | func CacheStorageDir(name string) (_ string, err error) { 172 | defer func() { 173 | if err != nil { 174 | err = fmt.Errorf("failed to determine cache storage path: %w", err) 175 | } 176 | }() 177 | 178 | usr, err := user.Current() 179 | if err != nil { 180 | return "", err 181 | } 182 | 183 | cacheHome := filepath.Join(usr.HomeDir, ".cache") 184 | if len(os.Getenv("XDG_CACHE_HOME")) > 0 { 185 | cacheHome = os.Getenv("XDG_CACHE_HOME") 186 | } 187 | 188 | p := filepath.Join(cacheHome, "git-who", name) 189 | absP, err := filepath.Abs(p) 190 | if err != nil { 191 | return "", err 192 | } 193 | 194 | return absP, nil 195 | } 196 | 197 | // Hash of all the state in the repo that affects the validity of our cache 198 | func RepoStateHash(rf git.RepoConfigFiles) (string, error) { 199 | h := fnv.New32() 200 | err := rf.MailmapHash(h) 201 | if err != nil { 202 | return "", err 203 | } 204 | 205 | return hex.EncodeToString(h.Sum(nil)), nil 206 | } 207 | -------------------------------------------------------------------------------- /internal/cache/log.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "log/slog" 5 | ) 6 | 7 | var pkgLogger *slog.Logger 8 | 9 | func logger() *slog.Logger { 10 | if pkgLogger == nil { 11 | pkgLogger = slog.Default().With("package", "cache") 12 | } 13 | 14 | return pkgLogger 15 | } 16 | -------------------------------------------------------------------------------- /internal/concurrent/cache.go: -------------------------------------------------------------------------------- 1 | package concurrent 2 | 3 | import ( 4 | "iter" 5 | 6 | "github.com/sinclairtarget/git-who/internal/git" 7 | ) 8 | 9 | const cacheChunkSize = chunkSize 10 | 11 | // Transparently splits off commits to the cache queue 12 | func cacheTee( 13 | commits iter.Seq2[git.Commit, error], 14 | toCache chan<- []git.Commit, 15 | ) iter.Seq2[git.Commit, error] { 16 | chunk := []git.Commit{} 17 | 18 | return func(yield func(git.Commit, error) bool) { 19 | for c, err := range commits { 20 | if err != nil { 21 | yield(c, err) 22 | return 23 | } 24 | 25 | chunk = append(chunk, c) 26 | 27 | if len(chunk) >= cacheChunkSize { 28 | toCache <- chunk 29 | chunk = []git.Commit{} 30 | } 31 | 32 | if !yield(c, nil) { 33 | break 34 | } 35 | } 36 | 37 | // Make sure to write any remainder 38 | if len(chunk) > 0 { 39 | toCache <- chunk 40 | } 41 | } 42 | } 43 | 44 | // We want to get a list of revs from an iterator over commits while passing 45 | // through the iterator to someone else for consumption. 46 | // 47 | // A little awkward... is there a better way to do this? 48 | func revTee( 49 | commits iter.Seq2[git.Commit, error], 50 | revs *[]string, 51 | ) iter.Seq2[git.Commit, error] { 52 | return func(yield func(git.Commit, error) bool) { 53 | for c, err := range commits { 54 | if err != nil { 55 | yield(c, err) 56 | return 57 | } 58 | 59 | *revs = append(*revs, c.Hash) 60 | if !yield(c, nil) { 61 | return 62 | } 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /internal/concurrent/concurrent.go: -------------------------------------------------------------------------------- 1 | // Try to get some speed up on large repos by running git log in parallel. 2 | // 3 | // Concurrency graph is something like: 4 | // 5 | // rev writer 6 | // v 7 | // ~q~ 8 | // v 9 | // spawner 10 | // v 11 | // ~q2~ 12 | // v v v 13 | // worker worker worker ... 14 | // v v v v v 15 | // ~results~ waiter cacher 16 | // | v 17 | // | ~errs~ 18 | // v v 19 | // main 20 | package concurrent 21 | 22 | import ( 23 | "context" 24 | "errors" 25 | "fmt" 26 | "iter" 27 | "runtime" 28 | "time" 29 | 30 | "github.com/sinclairtarget/git-who/internal/cache" 31 | "github.com/sinclairtarget/git-who/internal/format" 32 | "github.com/sinclairtarget/git-who/internal/git" 33 | "github.com/sinclairtarget/git-who/internal/pretty" 34 | "github.com/sinclairtarget/git-who/internal/tally" 35 | ) 36 | 37 | // We run one git log process for each chuck of this many revisions. 38 | const chunkSize = 1024 39 | 40 | var nCPU int 41 | 42 | func init() { 43 | nCPU = runtime.GOMAXPROCS(0) 44 | } 45 | 46 | type tallyFunc[T any] func( 47 | commits iter.Seq2[git.Commit, error], 48 | opts tally.TallyOpts, 49 | ) (T, error) 50 | 51 | type combinable[T any] interface { 52 | Combine(other T) T 53 | } 54 | 55 | // tally job we can do concurrently 56 | type whoperation[T combinable[T]] struct { 57 | revspec []string 58 | pathspecs []string 59 | filters git.LogFilters 60 | useMailmap bool 61 | ignoreRevs []string 62 | tally tallyFunc[T] 63 | opts tally.TallyOpts 64 | } 65 | 66 | func calcTotalChunks(revCount int) int { 67 | return revCount/chunkSize + 1 68 | } 69 | 70 | func shouldShowProgress(chunks int) bool { 71 | return chunks > nCPU 72 | } 73 | 74 | // All the strings in the first array minus the strings in the second array 75 | func setDiff(a []string, b []string) []string { 76 | m := map[string]bool{} 77 | for _, rev := range b { 78 | m[rev] = true 79 | } 80 | 81 | ret := []string{} 82 | for _, rev := range a { 83 | if _, ok := m[rev]; !ok { 84 | ret = append(ret, rev) 85 | } 86 | } 87 | 88 | return ret 89 | } 90 | 91 | func accumulateCached[T combinable[T]]( 92 | whop whoperation[T], 93 | c cache.Cache, 94 | revs []string, 95 | ) (T, []string, error) { 96 | var none T 97 | 98 | result, err := c.Get(revs) 99 | if err != nil { 100 | return none, revs, err 101 | } 102 | 103 | commits := git.LimitDiffsByPathspec(result.Commits, whop.pathspecs) 104 | 105 | foundRevs := []string{} 106 | accumulator, err := whop.tally(revTee(commits, &foundRevs), whop.opts) 107 | if err != nil { 108 | return none, revs, err 109 | } 110 | 111 | logger().Debug("commits found in cache", "num", len(foundRevs)) 112 | return accumulator, setDiff(revs, foundRevs), nil 113 | } 114 | 115 | func handleCacheFailure(c cache.Cache, err error) error { 116 | // Graceful handling of cache error. Wipe cache and move on without it 117 | logger().Warn( 118 | fmt.Sprintf("error reading from cache (maybe corrupt?): %v", err), 119 | ) 120 | logger().Warn("wiping cache and moving on") 121 | return c.Clear() 122 | } 123 | 124 | func tallyFanOutFanIn[T combinable[T]]( 125 | ctx context.Context, 126 | whop whoperation[T], 127 | cache cache.Cache, 128 | allowProgressBar bool, 129 | ) (_ T, _err error) { 130 | defer func() { 131 | if _err != nil { 132 | _err = fmt.Errorf("error running concurrent tally: %w", _err) 133 | } 134 | }() 135 | 136 | var accumulator T 137 | 138 | // -- Get rev list --------------------------------------------------------- 139 | revs, err := git.RevList(ctx, whop.revspec, whop.pathspecs, whop.filters) 140 | if err != nil { 141 | return accumulator, err 142 | } 143 | 144 | if len(revs) == 0 { 145 | logger().Debug("no commits found; no work to do") 146 | return accumulator, nil 147 | } 148 | 149 | // Filter out ignored revs 150 | if len(whop.ignoreRevs) > 0 { 151 | ignoreSet := map[string]bool{} 152 | for _, rev := range whop.ignoreRevs { 153 | ignoreSet[rev] = true 154 | } 155 | 156 | unignoredRevs := []string{} 157 | for _, rev := range revs { 158 | if ignoreSet[rev] { 159 | continue 160 | } 161 | 162 | unignoredRevs = append(unignoredRevs, rev) 163 | } 164 | 165 | revs = unignoredRevs 166 | } 167 | 168 | // -- Use cached commits if there are any ---------------------------------- 169 | remainingRevs := revs 170 | 171 | err = cache.Open() 172 | defer func() { 173 | err = cache.Close() 174 | }() 175 | 176 | if err == nil { 177 | accumulator, remainingRevs, err = accumulateCached(whop, cache, revs) 178 | if err != nil { 179 | err = handleCacheFailure(cache, err) 180 | if err != nil { 181 | return accumulator, err 182 | } 183 | } else if len(remainingRevs) == 0 { 184 | logger().Debug("all commits read from cache") 185 | return accumulator, nil 186 | } 187 | } else { 188 | err = handleCacheFailure(cache, err) 189 | if err != nil { 190 | return accumulator, err 191 | } 192 | } 193 | 194 | // -- Fork ----------------------------------------------------------------- 195 | logger().Debug( 196 | "running concurrent tally", 197 | "revCount", 198 | len(remainingRevs), 199 | "nCPU", 200 | nCPU, 201 | ) 202 | 203 | ctx, cancel := context.WithCancel(ctx) 204 | defer cancel() 205 | 206 | q := func() <-chan []string { 207 | q := make(chan []string) // q is our work queue 208 | go func() { 209 | defer close(q) 210 | 211 | runWriter(ctx, remainingRevs, q) 212 | }() 213 | 214 | return q 215 | }() 216 | 217 | // Launches workers that consume from q and write to results and errors 218 | // that can be read by the main coroutine. 219 | results, errs, cacheErr := func() (<-chan T, <-chan error, <-chan error) { 220 | q2 := make(chan []string) // Intermediate work queue 221 | workers := make(chan worker, nCPU) 222 | toCache := make(chan []git.Commit) 223 | results := make(chan T) 224 | errs := make(chan error, 1) 225 | 226 | go func() { 227 | defer close(q2) 228 | defer close(workers) 229 | 230 | runSpawner[T](ctx, whop, q, q2, workers, results, toCache) 231 | }() 232 | 233 | go func() { 234 | defer close(toCache) 235 | defer close(results) 236 | defer close(errs) 237 | 238 | runWaiter(workers, errs) 239 | }() 240 | 241 | cacheErr := make(chan error, 1) 242 | go func() { 243 | defer close(cacheErr) 244 | 245 | err := runCacher(ctx, &cache, toCache) 246 | if err != nil { 247 | cacheErr <- err 248 | } 249 | }() 250 | 251 | return results, errs, cacheErr 252 | }() 253 | 254 | // -- Join ----------------------------------------------------------------- 255 | // Read and combine results until results channel is closed, context is 256 | // cancelled, or we get a worker error 257 | totalChunks := calcTotalChunks(len(remainingRevs)) 258 | chunksComplete := 0 259 | showProgress := allowProgressBar && shouldShowProgress(totalChunks) 260 | 261 | if showProgress { 262 | fmt.Printf(" 0%% (0/%s commits)", format.Number(len(remainingRevs))) 263 | } 264 | 265 | loop: 266 | for { 267 | select { 268 | case <-ctx.Done(): 269 | return accumulator, errors.New("concurrent tally cancelled") 270 | case result, ok := <-results: 271 | if !ok { 272 | break loop 273 | } 274 | 275 | accumulator = accumulator.Combine(result) 276 | chunksComplete += 1 277 | 278 | if showProgress { 279 | fmt.Printf("%s\r", pretty.EraseLine) 280 | fmt.Printf( 281 | "%3.0f%% (%s/%s commits)", 282 | float32(chunksComplete)/float32(totalChunks)*100, 283 | format.Number(min(len(remainingRevs), chunksComplete*chunkSize)), 284 | format.Number(len(remainingRevs)), 285 | ) 286 | } 287 | case err, ok := <-errs: 288 | if ok && err != nil { 289 | logger().Debug("error in concurrent tally; cancelling") 290 | return accumulator, fmt.Errorf( 291 | "concurrent tally failed: %w", 292 | err, 293 | ) 294 | } 295 | case err, ok := <-cacheErr: 296 | if ok && err != nil { 297 | logger().Debug("cache error in concurrent tally; cancelling") 298 | return accumulator, fmt.Errorf( 299 | "concurrent tally failed: %w", 300 | err, 301 | ) 302 | } 303 | } 304 | } 305 | 306 | if showProgress { 307 | fmt.Printf("%s\r", pretty.EraseLine) 308 | } 309 | 310 | // Check if there was a caching error (and wait for cacher to exit) 311 | // We have to do this here in addition to above in case there is a caching 312 | // error after the last chunk of work is handled. 313 | select { 314 | case <-ctx.Done(): 315 | return accumulator, errors.New("concurrent tally cancelled") 316 | case err, ok := <-cacheErr: 317 | if ok && err != nil { 318 | return accumulator, err 319 | } 320 | } 321 | 322 | return accumulator, nil 323 | } 324 | 325 | func TallyCommits( 326 | ctx context.Context, 327 | revspec []string, 328 | pathspecs []string, 329 | filters git.LogFilters, 330 | repoFiles git.RepoConfigFiles, 331 | opts tally.TallyOpts, 332 | cache cache.Cache, 333 | allowProgressBar bool, 334 | ) (_ map[string]tally.Tally, err error) { 335 | ignoreRevs, err := repoFiles.IgnoreRevs() 336 | if err != nil { 337 | return nil, err 338 | } 339 | 340 | whop := whoperation[tally.TalliesByPath]{ 341 | revspec: revspec, 342 | pathspecs: pathspecs, 343 | filters: filters, 344 | useMailmap: repoFiles.HasMailmap(), 345 | ignoreRevs: ignoreRevs, 346 | tally: tally.TallyCommitsByPath, 347 | opts: opts, 348 | } 349 | 350 | talliesByPath, err := tallyFanOutFanIn[tally.TalliesByPath]( 351 | ctx, 352 | whop, 353 | cache, 354 | allowProgressBar, 355 | ) 356 | if err != nil { 357 | return nil, err 358 | } 359 | 360 | return talliesByPath.Reduce(), nil 361 | } 362 | 363 | func TallyCommitsTree( 364 | ctx context.Context, 365 | revspec []string, 366 | pathspecs []string, 367 | filters git.LogFilters, 368 | repoFiles git.RepoConfigFiles, 369 | opts tally.TallyOpts, 370 | worktreePaths map[string]bool, 371 | gitRootPath string, 372 | cache cache.Cache, 373 | allowProgressBar bool, 374 | ) (*tally.TreeNode, error) { 375 | ignoreRevs, err := repoFiles.IgnoreRevs() 376 | if err != nil { 377 | return nil, err 378 | } 379 | 380 | whop := whoperation[tally.TalliesByPath]{ 381 | revspec: revspec, 382 | pathspecs: pathspecs, 383 | filters: filters, 384 | useMailmap: repoFiles.HasMailmap(), 385 | ignoreRevs: ignoreRevs, 386 | tally: tally.TallyCommitsByPath, 387 | opts: opts, 388 | } 389 | 390 | talliesByPath, err := tallyFanOutFanIn[tally.TalliesByPath]( 391 | ctx, 392 | whop, 393 | cache, 394 | allowProgressBar, 395 | ) 396 | if err != nil { 397 | return nil, err 398 | } 399 | 400 | return tally.TallyCommitsTreeFromPaths( 401 | talliesByPath, 402 | worktreePaths, 403 | gitRootPath, 404 | ) 405 | } 406 | 407 | func TallyCommitsTimeline( 408 | ctx context.Context, 409 | revspec []string, 410 | pathspecs []string, 411 | filters git.LogFilters, 412 | repoFiles git.RepoConfigFiles, 413 | opts tally.TallyOpts, 414 | end time.Time, 415 | cache cache.Cache, 416 | allowProgressBar bool, 417 | ) ([]tally.TimeBucket, error) { 418 | ignoreRevs, err := repoFiles.IgnoreRevs() 419 | if err != nil { 420 | return nil, err 421 | } 422 | 423 | f := func( 424 | commits iter.Seq2[git.Commit, error], 425 | opts tally.TallyOpts, 426 | ) (tally.TimeSeries, error) { 427 | return tally.TallyCommitsByDate(commits, opts) 428 | } 429 | 430 | whop := whoperation[tally.TimeSeries]{ 431 | revspec: revspec, 432 | pathspecs: pathspecs, 433 | filters: filters, 434 | useMailmap: repoFiles.HasMailmap(), 435 | ignoreRevs: ignoreRevs, 436 | tally: f, 437 | opts: opts, 438 | } 439 | 440 | buckets, err := tallyFanOutFanIn[tally.TimeSeries]( 441 | ctx, 442 | whop, 443 | cache, 444 | allowProgressBar, 445 | ) 446 | if err != nil { 447 | return nil, err 448 | } 449 | 450 | if end.IsZero() { 451 | end = buckets[len(buckets)-1].Time 452 | } 453 | resolution := tally.CalcResolution(buckets[0].Time, end) 454 | rebuckets := tally.Rebucket(buckets, resolution, end) 455 | return rebuckets, nil 456 | } 457 | -------------------------------------------------------------------------------- /internal/concurrent/log.go: -------------------------------------------------------------------------------- 1 | package concurrent 2 | 3 | import ( 4 | "log/slog" 5 | ) 6 | 7 | var pkgLogger *slog.Logger 8 | 9 | func logger() *slog.Logger { 10 | if pkgLogger == nil { 11 | pkgLogger = slog.Default().With("package", "concurrent") 12 | } 13 | 14 | return pkgLogger 15 | } 16 | -------------------------------------------------------------------------------- /internal/concurrent/worker.go: -------------------------------------------------------------------------------- 1 | package concurrent 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | 8 | "github.com/sinclairtarget/git-who/internal/cache" 9 | "github.com/sinclairtarget/git-who/internal/git" 10 | ) 11 | 12 | type worker struct { 13 | id int 14 | err chan error 15 | } 16 | 17 | // Write chunks of work to our work queue to be handled by workers downstream. 18 | func runWriter(ctx context.Context, revs []string, q chan<- []string) { 19 | logger().Debug("writer started") 20 | defer logger().Debug("writer exited") 21 | 22 | i := 0 23 | for i < len(revs) { 24 | select { 25 | case <-ctx.Done(): 26 | return 27 | case q <- revs[i:min(i+chunkSize, len(revs))]: 28 | i += chunkSize 29 | } 30 | } 31 | } 32 | 33 | // Spawner. Creates new workers while we have free CPUs and work to do. 34 | func runSpawner[T combinable[T]]( 35 | ctx context.Context, 36 | whop whoperation[T], 37 | q <-chan []string, 38 | q2 chan []string, 39 | workers chan<- worker, 40 | results chan<- T, 41 | toCache chan<- []git.Commit, 42 | ) { 43 | logger().Debug("spawner started") 44 | defer logger().Debug("spawner exited") 45 | 46 | nWorkers := 0 47 | 48 | for { 49 | var revs []string 50 | var ok bool 51 | 52 | select { 53 | case <-ctx.Done(): 54 | return 55 | case revs, ok = <-q: 56 | if !ok { 57 | // Channel closed, no more work 58 | return 59 | } 60 | } 61 | 62 | // Spawn worker if we are still under count 63 | if nWorkers < nCPU { 64 | nWorkers += 1 65 | 66 | w := worker{ 67 | id: nWorkers, 68 | err: make(chan error, 1), 69 | } 70 | go func() { 71 | defer close(w.err) 72 | 73 | err := runWorker[T]( 74 | ctx, 75 | w.id, 76 | whop, 77 | q2, 78 | results, 79 | toCache, 80 | ) 81 | if err != nil { 82 | w.err <- err 83 | } 84 | }() 85 | 86 | workers <- w 87 | } 88 | 89 | select { 90 | case <-ctx.Done(): 91 | return 92 | case q2 <- revs: // Forward work to workers 93 | } 94 | } 95 | } 96 | 97 | // Waiter. Waits for done or error for each one in turn. Forwards 98 | // errors to errs channel. 99 | func runWaiter(workers <-chan worker, errs chan<- error) { 100 | logger().Debug("waiter started") 101 | defer logger().Debug("waiter exited") 102 | 103 | for w := range workers { 104 | logger().Debug("waiting on worker", "workerId", w.id) 105 | 106 | err, ok := <-w.err 107 | if ok && err != nil { 108 | errs <- err 109 | } 110 | } 111 | } 112 | 113 | // Cacher. Writes parsed commits to the cache. 114 | func runCacher( 115 | ctx context.Context, 116 | cache *cache.Cache, 117 | toCache <-chan []git.Commit, 118 | ) (err error) { 119 | logger().Debug("cacher started") 120 | 121 | defer func() { 122 | if err != nil { 123 | err = fmt.Errorf("error in cacher: %w", err) 124 | } 125 | 126 | logger().Debug("cacher exited") 127 | }() 128 | 129 | loop: 130 | for { 131 | select { 132 | case <-ctx.Done(): 133 | return errors.New("cacher cancelled") 134 | case commits, ok := <-toCache: 135 | if !ok { 136 | break loop 137 | } 138 | 139 | err := cache.Add(commits) 140 | if err != nil { 141 | return err 142 | } 143 | } 144 | } 145 | 146 | return nil 147 | } 148 | 149 | // A tally worker that runs git log for each chunk of work. 150 | func runWorker[T combinable[T]]( 151 | ctx context.Context, 152 | id int, 153 | whop whoperation[T], 154 | in <-chan []string, 155 | results chan<- T, 156 | toCache chan<- []git.Commit, 157 | ) (err error) { 158 | logger := logger().With("workerId", id) 159 | logger.Debug("worker started") 160 | 161 | defer func() { 162 | if err != nil { 163 | err = fmt.Errorf("error in worker %d: %w", id, err) 164 | logger.Debug("worker exiting with error") 165 | } 166 | 167 | logger.Debug("worker exited") 168 | }() 169 | 170 | loop: 171 | for { 172 | select { 173 | case <-ctx.Done(): 174 | return errors.New("worker cancelled") 175 | case revs, ok := <-in: 176 | if !ok { 177 | if err != nil { 178 | return err 179 | } 180 | 181 | break loop // We're done, input channel is closed 182 | } 183 | 184 | // We pass an empty array of paths here. Even if we are only 185 | // tallying commits that affected certain paths, we want to make 186 | // sure that the diffs we get include ALL paths touched by each 187 | // commit. Otherwise when we cache the commits we would be caching 188 | // only a part of the commit 189 | nopaths := []string{} 190 | subprocess, err := git.RunStdinLog( 191 | ctx, 192 | nopaths, 193 | true, 194 | whop.useMailmap, 195 | ) 196 | if err != nil { 197 | return err 198 | } 199 | 200 | w, stdinCloser := subprocess.StdinWriter() 201 | 202 | // Write revs to git log stdin 203 | for _, rev := range revs { 204 | fmt.Fprintln(w, rev) 205 | } 206 | w.Flush() 207 | 208 | err = stdinCloser() 209 | if err != nil { 210 | return err 211 | } 212 | 213 | // Read parsed commits and enqueue for caching 214 | lines := subprocess.StdoutNullDelimitedLines() 215 | commits := git.ParseCommits(lines) 216 | commits = cacheTee(commits, toCache) 217 | 218 | // Now that we're tallying, we DO care to only look at the file 219 | // diffs under the given paths 220 | commits = git.LimitDiffsByPathspec(commits, whop.pathspecs) 221 | 222 | result, err := whop.tally(commits, whop.opts) 223 | if err != nil { 224 | return err 225 | } 226 | 227 | err = subprocess.Wait() 228 | if err != nil { 229 | return err 230 | } 231 | 232 | results <- result 233 | } 234 | } 235 | 236 | return nil 237 | } 238 | -------------------------------------------------------------------------------- /internal/format/format.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Utility functions for formatting output. 3 | */ 4 | package format 5 | 6 | import ( 7 | "fmt" 8 | "time" 9 | "unicode/utf8" 10 | 11 | runewidth "github.com/mattn/go-runewidth" 12 | ) 13 | 14 | // Print string with max length, truncating with ellipsis. 15 | func Abbrev(s string, max int) string { 16 | tail := "…" 17 | 18 | if len(s) > utf8.RuneCountInString(s) { 19 | return runewidth.Truncate(s, max, tail) 20 | } 21 | 22 | if len(s) <= max { 23 | return s 24 | } 25 | 26 | return s[:max-1] + tail 27 | } 28 | 29 | func GitEmail(email string) string { 30 | return fmt.Sprintf("<%s>", email) 31 | } 32 | 33 | func RelativeTime(now time.Time, t time.Time) string { 34 | duration := now.Sub(t) 35 | 36 | day := time.Hour * 24 37 | week := day * 7 38 | month := day * 30 // eh 39 | year := day * 365 40 | 41 | if duration < time.Hour { 42 | minutes := int(duration / time.Minute) 43 | return fmt.Sprintf("%d min. ago", minutes) 44 | } else if duration < day { 45 | hours := int(duration / time.Hour) 46 | if hours > 1 { 47 | return fmt.Sprintf("%d hr. ago", hours) 48 | } else { 49 | return fmt.Sprintf("%d hour ago", hours) 50 | } 51 | } else if duration < week { 52 | days := int(duration / day) 53 | if days > 1 { 54 | return fmt.Sprintf("%d days ago", days) 55 | } else { 56 | return fmt.Sprintf("%d day ago", days) 57 | } 58 | } else if duration < month { 59 | weeks := int(duration / week) 60 | if weeks > 1 { 61 | return fmt.Sprintf("%d weeks ago", weeks) 62 | } else { 63 | return fmt.Sprintf("%d week ago", weeks) 64 | } 65 | } else if duration < year { 66 | months := int(duration / month) 67 | if months > 1 { 68 | return fmt.Sprintf("%d mon. ago", months) 69 | } else { 70 | return fmt.Sprintf("%d month ago", months) 71 | } 72 | } else { 73 | years := int(duration / year) 74 | if years > 99 { 75 | return ">99 yr. ago" 76 | } else if years > 1 { 77 | return fmt.Sprintf("%d yr. ago", years) 78 | } else { 79 | return fmt.Sprintf("%d year ago", years) 80 | } 81 | } 82 | } 83 | 84 | // Adds thousands comma and abbreviates numbers > 1m 85 | func Number(num int) string { 86 | if num < 0 { 87 | panic("cannot format negative number") 88 | } 89 | 90 | if num > 100_000_000 { 91 | return ">99m" 92 | } 93 | 94 | if num > 1_000_000 { 95 | mils := float32(num) / 1_000_000 96 | return fmt.Sprintf("%.1fm", mils) 97 | } 98 | 99 | if num > 1_000 { 100 | ones := num % 1_000 101 | thousands := num / 1_000 102 | return fmt.Sprintf("%d,%03d", thousands, ones) 103 | } 104 | 105 | return fmt.Sprintf("%d", num) 106 | } 107 | -------------------------------------------------------------------------------- /internal/format/format_test.go: -------------------------------------------------------------------------------- 1 | package format_test 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/sinclairtarget/git-who/internal/format" 8 | ) 9 | 10 | func TestRelativeTime(t *testing.T) { 11 | now, err := time.Parse(time.DateTime, "2024-12-30 10:13:00") 12 | if err != nil { 13 | t.Fatal("could not parse timestamp") 14 | } 15 | 16 | then, err := time.Parse(time.DateTime, "2023-10-16 17:16:05") 17 | if err != nil { 18 | t.Fatal("could not parse timestamp") 19 | } 20 | 21 | description := format.RelativeTime(now, then) 22 | if description != "1 year ago" { 23 | t.Fatalf("expected \"%s\", but got: \"%s\"", "1 year ago", description) 24 | } 25 | } 26 | 27 | func TestNumber(t *testing.T) { 28 | tests := []struct { 29 | name string 30 | n int 31 | exp string 32 | }{ 33 | { 34 | name: "zero", 35 | n: 0, 36 | exp: "0", 37 | }, 38 | { 39 | name: "hundereds", 40 | n: 123, 41 | exp: "123", 42 | }, 43 | { 44 | name: "thousand_and_one", 45 | n: 1001, 46 | exp: "1,001", 47 | }, 48 | { 49 | name: "low_thousands", 50 | n: 1234, 51 | exp: "1,234", 52 | }, 53 | { 54 | name: "high_thousands", 55 | n: 957123, 56 | exp: "957,123", 57 | }, 58 | { 59 | name: "millions", 60 | n: 1_234_567, 61 | exp: "1.2m", 62 | }, 63 | { 64 | name: "ten_millions", 65 | n: 12_345_678, 66 | exp: "12.3m", 67 | }, 68 | { 69 | name: "hundred_millions", 70 | n: 123_456_789, 71 | exp: ">99m", 72 | }, 73 | } 74 | 75 | for _, test := range tests { 76 | t.Run(test.name, func(t *testing.T) { 77 | ans := format.Number(test.n) 78 | if ans != test.exp { 79 | t.Errorf("expected %s but got %s", test.exp, ans) 80 | } 81 | }) 82 | } 83 | } 84 | 85 | func TestNumberNegativeError(t *testing.T) { 86 | defer func() { 87 | if r := recover(); r == nil { 88 | t.Errorf("Number() did not panic with negative input") 89 | } 90 | }() 91 | 92 | format.Number(-1) 93 | } 94 | -------------------------------------------------------------------------------- /internal/git/args.go: -------------------------------------------------------------------------------- 1 | package git 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "path/filepath" 7 | ) 8 | 9 | // Handles splitting the Git revisions from the pathspecs given a list of args. 10 | // 11 | // We call git rev-parse to disambiguate. 12 | func ParseArgs(args []string) (revs []string, pathspecs []string, err error) { 13 | ctx, cancel := context.WithCancel(context.Background()) 14 | defer cancel() 15 | 16 | subprocess, err := RunRevParse(ctx, args) 17 | if err != nil { 18 | return nil, nil, fmt.Errorf("could not parse args: %w", err) 19 | } 20 | 21 | lines := subprocess.StdoutLines() 22 | revs = []string{} 23 | pathspecs = []string{} 24 | 25 | finishedRevs := false 26 | for line, err := range lines { 27 | if err != nil { 28 | return nil, nil, fmt.Errorf( 29 | "failed reading output of rev-parse: %w", 30 | err, 31 | ) 32 | } 33 | 34 | if !finishedRevs && isRev(line) { 35 | revs = append(revs, line) 36 | } else { 37 | finishedRevs = true 38 | 39 | if line != "--" { 40 | // If user used backslashes as path separator on windows, 41 | // we want to turn into forward slashes 42 | pathspecs = append(pathspecs, filepath.ToSlash(line)) 43 | } 44 | } 45 | } 46 | 47 | err = subprocess.Wait() 48 | if err != nil { 49 | return nil, nil, err 50 | } 51 | 52 | if len(revs) == 0 { 53 | // Default rev 54 | revs = append(revs, "HEAD") 55 | } 56 | 57 | return revs, pathspecs, nil 58 | } 59 | -------------------------------------------------------------------------------- /internal/git/args_test.go: -------------------------------------------------------------------------------- 1 | // This file contains tests for git-who's argument parsing. 2 | // 3 | // Since properly parsing the arguments involves invoking git rev-parse as a 4 | // subprocess, these tests run against the test repo submodule. 5 | 6 | package git_test 7 | 8 | import ( 9 | "errors" 10 | "slices" 11 | "testing" 12 | 13 | "github.com/sinclairtarget/git-who/internal/git" 14 | ) 15 | 16 | const safeTag string = "root" 17 | const safeCommit string = "6afef287af5ca43f7d741e7ceff61aad38055b6a" 18 | const filename string = "README.md" 19 | 20 | func TestParseArgs(t *testing.T) { 21 | tests := []struct { 22 | name string 23 | args []string 24 | expRevs []string 25 | expPaths []string 26 | }{ 27 | { 28 | name: "empty_args", 29 | args: []string{}, 30 | expRevs: []string{"HEAD"}, 31 | expPaths: []string{}, 32 | }, 33 | { 34 | name: "commit", 35 | args: []string{safeTag}, 36 | expRevs: []string{safeCommit}, 37 | expPaths: []string{}, 38 | }, 39 | { 40 | name: "commit_path", 41 | args: []string{safeTag, filename}, 42 | expRevs: []string{safeCommit}, 43 | expPaths: []string{filename}, 44 | }, 45 | { 46 | name: "path", 47 | args: []string{filename}, 48 | expRevs: []string{"HEAD"}, 49 | expPaths: []string{filename}, 50 | }, 51 | { 52 | name: "separator", 53 | args: []string{safeTag, "--", filename}, 54 | expRevs: []string{safeCommit}, 55 | expPaths: []string{filename}, 56 | }, 57 | { 58 | name: "nonexistant_path_after_separator", 59 | args: []string{safeTag, "--", "foobar"}, 60 | expRevs: []string{safeCommit}, 61 | expPaths: []string{"foobar"}, 62 | }, 63 | { 64 | name: "nonexistant_path_after_separator_no_rev", 65 | args: []string{"--", "foobar"}, 66 | expRevs: []string{"HEAD"}, 67 | expPaths: []string{"foobar"}, 68 | }, 69 | { 70 | name: "trailing_separator", 71 | args: []string{safeTag, "--"}, 72 | expRevs: []string{safeCommit}, 73 | expPaths: []string{}, 74 | }, 75 | { 76 | name: "leading_separator", 77 | args: []string{"--", filename}, 78 | expRevs: []string{"HEAD"}, 79 | expPaths: []string{filename}, 80 | }, 81 | { 82 | name: "multiple_args", 83 | args: []string{ 84 | safeTag, 85 | safeTag, 86 | filename, 87 | filename, 88 | }, 89 | expRevs: []string{safeCommit, safeCommit}, 90 | expPaths: []string{filename, filename}, 91 | }, 92 | } 93 | 94 | for _, test := range tests { 95 | t.Run(test.name, func(t *testing.T) { 96 | revs, paths, err := git.ParseArgs(test.args) 97 | if err != nil { 98 | var subErr git.SubprocessErr 99 | if errors.As(err, &subErr) { 100 | t.Logf("subprocess error output:\n%s", subErr.Stderr) 101 | } 102 | t.Errorf("got error: %v", err) 103 | } 104 | 105 | if !slices.Equal(revs, test.expRevs) { 106 | t.Errorf( 107 | "expected %v as revs but got %v", 108 | test.expRevs, 109 | revs, 110 | ) 111 | } 112 | 113 | if !slices.Equal(paths, test.expPaths) { 114 | t.Errorf( 115 | "expected %v as paths but got %v", 116 | test.expPaths, 117 | paths, 118 | ) 119 | } 120 | }) 121 | } 122 | } 123 | 124 | func TestParseArgsError(t *testing.T) { 125 | tests := []struct { 126 | name string 127 | args []string 128 | }{ 129 | { 130 | name: "not_path_or_rev", 131 | args: []string{"foobar"}, 132 | }, 133 | { 134 | name: "not_path", 135 | args: []string{safeTag, "foobar"}, 136 | }, 137 | { 138 | name: "not_rev", 139 | args: []string{"foobar", "--", filename}, 140 | }, 141 | } 142 | 143 | for _, test := range tests { 144 | t.Run(test.name, func(t *testing.T) { 145 | _, _, err := git.ParseArgs(test.args) 146 | if err == nil { 147 | t.Error("expected error, but none returned") 148 | } 149 | }) 150 | } 151 | } 152 | 153 | func TestParseArgsRange(t *testing.T) { 154 | revs, paths, err := git.ParseArgs([]string{"HEAD~3.."}) 155 | if err != nil { 156 | t.Errorf("got unexpected error: %v", err) 157 | } 158 | 159 | if len(revs) != 2 { 160 | t.Errorf("expected revs to have length 2, but got: %v", revs) 161 | } 162 | 163 | expPaths := []string{} 164 | if !slices.Equal(paths, expPaths) { 165 | t.Errorf("expected %v as paths but got %v", expPaths, paths) 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /internal/git/cmd.go: -------------------------------------------------------------------------------- 1 | package git 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "context" 7 | "errors" 8 | "fmt" 9 | "io" 10 | "iter" 11 | "os/exec" 12 | "slices" 13 | "strings" 14 | ) 15 | 16 | const ( 17 | logFormat = "--pretty=format:%H%x00%h%x00%p%x00%an%x00%ae%x00%ad%x00" 18 | mailmapLogFormat = "--pretty=format:%H%x00%h%x00%p%x00%aN%x00%aE%x00%ad%x00" 19 | ) 20 | 21 | type SubprocessErr struct { 22 | ExitCode int 23 | Stderr string 24 | Err error 25 | } 26 | 27 | func (err SubprocessErr) Error() string { 28 | if err.Stderr != "" { 29 | return fmt.Sprintf( 30 | "Git subprocess exited with code %d. Error output:\n%s", 31 | err.ExitCode, 32 | err.Stderr, 33 | ) 34 | } 35 | 36 | return fmt.Sprintf("Git subprocess exited with code %d", err.ExitCode) 37 | } 38 | 39 | func (err SubprocessErr) Unwrap() error { 40 | return err.Err 41 | } 42 | 43 | type Subprocess struct { 44 | cmd *exec.Cmd 45 | stdin io.WriteCloser 46 | stdout io.ReadCloser 47 | stderr io.ReadCloser 48 | } 49 | 50 | func (s Subprocess) StdinWriter() (_ *bufio.Writer, closer func() error) { 51 | return bufio.NewWriter(s.stdin), func() error { 52 | return s.stdin.Close() 53 | } 54 | } 55 | 56 | // Returns a single-use iterator over the output of the command, line by line. 57 | func (s Subprocess) StdoutLines() iter.Seq2[string, error] { 58 | scanner := bufio.NewScanner(s.stdout) 59 | 60 | return func(yield func(string, error) bool) { 61 | for scanner.Scan() { 62 | if !yield(scanner.Text(), nil) { 63 | return 64 | } 65 | } 66 | 67 | if err := scanner.Err(); err != nil { 68 | yield("", fmt.Errorf("error while scanning: %w", err)) 69 | } 70 | } 71 | } 72 | 73 | // Returns a single-use iterator over the output from git log. 74 | // 75 | // Lines are split on NULLs with some additional processing. 76 | func (s Subprocess) StdoutNullDelimitedLines() iter.Seq2[string, error] { 77 | scanner := bufio.NewScanner(s.stdout) 78 | 79 | scanner.Split(func(data []byte, atEOF bool) (int, []byte, error) { 80 | null_i := bytes.IndexByte(data, '\x00') 81 | 82 | if null_i >= 0 { 83 | return null_i + 1, data[:null_i], nil 84 | } 85 | 86 | if atEOF { 87 | return 0, data, bufio.ErrFinalToken 88 | } 89 | 90 | return 0, nil, nil // Scan more 91 | }) 92 | 93 | return func(yield func(string, error) bool) { 94 | for scanner.Scan() { 95 | line := scanner.Text() 96 | 97 | // Handle annoying new line that exists between regular commit 98 | // fields and --numstat data 99 | processedLine := strings.TrimPrefix(line, "\n") 100 | 101 | if !yield(processedLine, nil) { 102 | return 103 | } 104 | } 105 | 106 | if err := scanner.Err(); err != nil { 107 | yield("", fmt.Errorf("error while scanning: %w", err)) 108 | } 109 | } 110 | } 111 | 112 | func (s Subprocess) Wait() error { 113 | logger().Debug("waiting for subprocess...") 114 | 115 | stderr, err := io.ReadAll(s.stderr) 116 | if err != nil { 117 | return fmt.Errorf("could not read stderr: %w", err) 118 | } 119 | 120 | err = s.cmd.Wait() 121 | logger().Debug( 122 | "subprocess exited", 123 | "code", 124 | s.cmd.ProcessState.ExitCode(), 125 | ) 126 | 127 | if err != nil { 128 | return SubprocessErr{ 129 | ExitCode: s.cmd.ProcessState.ExitCode(), 130 | Stderr: strings.TrimSpace(string(stderr)), 131 | Err: err, 132 | } 133 | } 134 | 135 | return nil 136 | } 137 | 138 | func run( 139 | ctx context.Context, 140 | args []string, 141 | needStdin bool, 142 | ) (*Subprocess, error) { 143 | cmd := exec.CommandContext(ctx, "git", args...) 144 | logger().Debug("running subprocess", "cmd", cmd) 145 | 146 | stdout, err := cmd.StdoutPipe() 147 | if err != nil { 148 | return nil, fmt.Errorf("failed to open stdout pipe: %w", err) 149 | } 150 | 151 | stderr, err := cmd.StderrPipe() 152 | if err != nil { 153 | return nil, fmt.Errorf("failed to open stderr pipe: %w", err) 154 | } 155 | 156 | var stdin io.WriteCloser 157 | if needStdin { 158 | stdin, err = cmd.StdinPipe() 159 | if err != nil { 160 | return nil, fmt.Errorf("failed to open stdin pipe: %w", err) 161 | } 162 | } 163 | 164 | err = cmd.Start() 165 | if err != nil { 166 | return nil, fmt.Errorf("failed to start subprocess: %w", err) 167 | } 168 | 169 | return &Subprocess{ 170 | cmd: cmd, 171 | stdin: stdin, 172 | stdout: stdout, 173 | stderr: stderr, 174 | }, nil 175 | } 176 | 177 | type LogFilters struct { 178 | Since string 179 | Until string 180 | Authors []string 181 | Nauthors []string 182 | } 183 | 184 | // Turn into CLI args we can pass to `git log` 185 | func (f LogFilters) ToArgs() []string { 186 | args := []string{} 187 | 188 | if f.Since != "" { 189 | args = append(args, "--since", f.Since) 190 | } 191 | 192 | if f.Until != "" { 193 | args = append(args, "--until", f.Until) 194 | } 195 | 196 | for _, author := range f.Authors { 197 | args = append(args, "--author", author) 198 | } 199 | 200 | if len(f.Nauthors) > 0 { 201 | args = append(args, "--perl-regexp") 202 | 203 | // Build regex pattern OR-ing together all the nauthors 204 | var b strings.Builder 205 | for i, nauthor := range f.Nauthors { 206 | b.WriteString(nauthor) 207 | if i < len(f.Nauthors)-1 { 208 | b.WriteString("|") 209 | } 210 | } 211 | 212 | regex := fmt.Sprintf(`^((?!%s).*)$`, b.String()) 213 | args = append(args, "--author", regex) 214 | } 215 | 216 | return args 217 | } 218 | 219 | // Runs git log 220 | func RunLog( 221 | ctx context.Context, 222 | revs []string, 223 | pathspecs []string, 224 | filters LogFilters, 225 | needDiffs bool, 226 | useMailmap bool, 227 | ) (*Subprocess, error) { 228 | var baseArgs []string 229 | 230 | if useMailmap { 231 | baseArgs = []string{ 232 | "log", 233 | mailmapLogFormat, 234 | "-z", 235 | "--date=unix", 236 | "--reverse", 237 | "--no-show-signature", 238 | } 239 | } else { 240 | baseArgs = []string{ 241 | "log", 242 | logFormat, 243 | "-z", 244 | "--date=unix", 245 | "--reverse", 246 | "--no-show-signature", 247 | "--no-mailmap", 248 | } 249 | } 250 | 251 | if needDiffs { 252 | baseArgs = append(baseArgs, "--numstat") 253 | } 254 | 255 | filterArgs := filters.ToArgs() 256 | 257 | var args []string 258 | if len(pathspecs) > 0 { 259 | args = slices.Concat( 260 | baseArgs, 261 | filterArgs, 262 | revs, 263 | []string{"--"}, 264 | pathspecs, 265 | ) 266 | } else { 267 | args = slices.Concat(baseArgs, filterArgs, revs) 268 | } 269 | 270 | subprocess, err := run(ctx, args, false) 271 | if err != nil { 272 | return nil, fmt.Errorf("failed to run git log: %w", err) 273 | } 274 | 275 | return subprocess, nil 276 | } 277 | 278 | // Runs git log --stdin 279 | func RunStdinLog( 280 | ctx context.Context, 281 | pathspecs []string, // Doesn't limit commits, but limits diffs! 282 | needDiffs bool, 283 | useMailmap bool, 284 | ) (*Subprocess, error) { 285 | var baseArgs []string 286 | 287 | if useMailmap { 288 | baseArgs = []string{ 289 | "log", 290 | mailmapLogFormat, 291 | "-z", 292 | "--date=unix", 293 | "--reverse", 294 | "--no-show-signature", 295 | "--stdin", 296 | "--no-walk", 297 | } 298 | } else { 299 | baseArgs = []string{ 300 | "log", 301 | logFormat, 302 | "-z", 303 | "--date=unix", 304 | "--reverse", 305 | "--no-show-signature", 306 | "--stdin", 307 | "--no-walk", 308 | "--no-mailmap", 309 | } 310 | } 311 | 312 | if needDiffs { 313 | baseArgs = append(baseArgs, "--numstat") 314 | } 315 | 316 | var args []string 317 | if len(pathspecs) > 0 { 318 | args = slices.Concat(baseArgs, []string{"--"}, pathspecs) 319 | } else { 320 | args = baseArgs 321 | } 322 | 323 | subprocess, err := run(ctx, args, true) 324 | if err != nil { 325 | return nil, fmt.Errorf("error running git log --stdin: %w", err) 326 | } 327 | 328 | return subprocess, nil 329 | } 330 | 331 | // Runs git rev-parse 332 | func RunRevParse(ctx context.Context, args []string) (*Subprocess, error) { 333 | var baseArgs = []string{ 334 | "rev-parse", 335 | "--no-flags", 336 | } 337 | 338 | subprocess, err := run(ctx, slices.Concat(baseArgs, args), false) 339 | if err != nil { 340 | return nil, fmt.Errorf("failed to run git rev-parse: %w", err) 341 | } 342 | 343 | return subprocess, nil 344 | } 345 | 346 | // Runs git rev-list. When countOnly is true, passes --count, which is much 347 | // faster than printing then getting all the revisions when all you need is the 348 | // count. 349 | func RunRevList( 350 | ctx context.Context, 351 | revs []string, 352 | pathspecs []string, 353 | filters LogFilters, 354 | ) (*Subprocess, error) { 355 | if len(revs) == 0 { 356 | return nil, errors.New("git rev-list requires revision spec") 357 | } 358 | 359 | baseArgs := []string{ 360 | "rev-list", 361 | "--reverse", 362 | } 363 | 364 | filterArgs := filters.ToArgs() 365 | 366 | var args []string 367 | if len(pathspecs) > 0 { 368 | args = slices.Concat( 369 | baseArgs, 370 | filterArgs, 371 | revs, 372 | []string{"--"}, 373 | pathspecs, 374 | ) 375 | } else { 376 | args = slices.Concat(baseArgs, filterArgs, revs) 377 | } 378 | 379 | subprocess, err := run(ctx, args, false) 380 | if err != nil { 381 | return nil, fmt.Errorf("failed to run git rev-list: %w", err) 382 | } 383 | 384 | return subprocess, nil 385 | } 386 | 387 | func RunLsFiles(ctx context.Context, pathspecs []string) (*Subprocess, error) { 388 | baseArgs := []string{ 389 | "ls-files", 390 | "--exclude-standard", 391 | "-z", 392 | } 393 | 394 | var args []string 395 | if len(pathspecs) > 0 { 396 | args = slices.Concat(baseArgs, pathspecs) 397 | } else { 398 | args = slices.Concat(baseArgs, []string{"--"}, pathspecs) 399 | } 400 | 401 | subprocess, err := run(ctx, args, false) 402 | if err != nil { 403 | return nil, fmt.Errorf("failed to run git ls-files: %w", err) 404 | } 405 | 406 | return subprocess, nil 407 | } 408 | -------------------------------------------------------------------------------- /internal/git/git.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Wraps access to data needed from Git. 3 | * 4 | * We invoke Git directly as a subprocess and parse the output rather than using 5 | * git2go/libgit2. 6 | */ 7 | package git 8 | 9 | import ( 10 | "context" 11 | "fmt" 12 | "io" 13 | "iter" 14 | "strings" 15 | "time" 16 | ) 17 | 18 | type Commit struct { 19 | Hash string 20 | ShortHash string 21 | IsMerge bool 22 | AuthorName string 23 | AuthorEmail string 24 | Date time.Time 25 | FileDiffs []FileDiff 26 | } 27 | 28 | func (c Commit) Name() string { 29 | if c.ShortHash != "" { 30 | return c.ShortHash 31 | } else if c.Hash != "" { 32 | return c.Hash 33 | } else { 34 | return "unknown" 35 | } 36 | } 37 | 38 | func (c Commit) String() string { 39 | return fmt.Sprintf( 40 | "{ hash:%s author:%s <%s> date:%s merge:%v }", 41 | c.Name(), 42 | c.AuthorName, 43 | c.AuthorEmail, 44 | c.Date.Format("Jan 2, 2006"), 45 | c.IsMerge, 46 | ) 47 | } 48 | 49 | // A file that was changed in a Commit. 50 | type FileDiff struct { 51 | Path string 52 | LinesAdded int 53 | LinesRemoved int 54 | } 55 | 56 | func (d FileDiff) String() string { 57 | return fmt.Sprintf( 58 | "{ path:\"%s\" added:%d removed:%d }", 59 | d.Path, 60 | d.LinesAdded, 61 | d.LinesRemoved, 62 | ) 63 | } 64 | 65 | // Returns an iterator over commits identified by the given revisions and paths. 66 | // 67 | // Also returns a closer() function for cleanup and an error when encountered. 68 | func CommitsWithOpts( 69 | ctx context.Context, 70 | revs []string, 71 | pathspecs []string, 72 | filters LogFilters, 73 | populateDiffs bool, 74 | repoFiles RepoConfigFiles, 75 | ) ( 76 | iter.Seq2[Commit, error], 77 | func() error, 78 | error, 79 | ) { 80 | ignoreRevs, err := repoFiles.IgnoreRevs() 81 | if err != nil { 82 | return nil, nil, err 83 | } 84 | 85 | subprocess, err := RunLog( 86 | ctx, 87 | revs, 88 | pathspecs, 89 | filters, 90 | populateDiffs, 91 | repoFiles.HasMailmap(), 92 | ) 93 | if err != nil { 94 | return nil, nil, err 95 | } 96 | 97 | lines := subprocess.StdoutNullDelimitedLines() 98 | commits := ParseCommits(lines) 99 | commits = SkipIgnored(commits, ignoreRevs) 100 | 101 | closer := func() error { 102 | return subprocess.Wait() 103 | } 104 | return commits, closer, nil 105 | } 106 | 107 | func RevList( 108 | ctx context.Context, 109 | revranges []string, 110 | pathspecs []string, 111 | filters LogFilters, 112 | ) (_ []string, err error) { 113 | defer func() { 114 | if err != nil { 115 | err = fmt.Errorf("error getting full rev list: %w", err) 116 | } 117 | }() 118 | 119 | revs := []string{} 120 | 121 | subprocess, err := RunRevList(ctx, revranges, pathspecs, filters) 122 | if err != nil { 123 | return revs, err 124 | } 125 | 126 | lines := subprocess.StdoutLines() 127 | for line, err := range lines { 128 | if err != nil { 129 | return revs, err 130 | } 131 | 132 | revs = append(revs, line) 133 | } 134 | 135 | err = subprocess.Wait() 136 | if err != nil { 137 | return revs, err 138 | } 139 | 140 | return revs, nil 141 | } 142 | 143 | func GetRoot() (_ string, err error) { 144 | defer func() { 145 | if err != nil { 146 | err = fmt.Errorf( 147 | "failed to run git rev-parse --show-toplevel: %w", 148 | err, 149 | ) 150 | } 151 | }() 152 | 153 | ctx, cancel := context.WithCancel(context.Background()) 154 | defer cancel() 155 | 156 | args := []string{"rev-parse", "--show-toplevel"} 157 | subprocess, err := run(ctx, args, false) 158 | if err != nil { 159 | return "", err 160 | } 161 | 162 | b, err := io.ReadAll(subprocess.stdout) 163 | if err != nil { 164 | return "", err 165 | } 166 | 167 | err = subprocess.Wait() 168 | if err != nil { 169 | return "", err 170 | } 171 | 172 | root := strings.TrimSpace(string(b)) 173 | return root, nil 174 | } 175 | 176 | // Returns all paths in the working tree under the given pathspecs. 177 | func WorkingTreeFiles(pathspecs []string) (_ map[string]bool, err error) { 178 | defer func() { 179 | if err != nil { 180 | err = fmt.Errorf("error getting tree files: %w", err) 181 | } 182 | }() 183 | 184 | ctx, cancel := context.WithCancel(context.Background()) 185 | defer cancel() 186 | 187 | wtreeset := map[string]bool{} 188 | 189 | subprocess, err := RunLsFiles(ctx, pathspecs) 190 | if err != nil { 191 | return wtreeset, err 192 | } 193 | 194 | lines := subprocess.StdoutNullDelimitedLines() 195 | for line, err := range lines { 196 | if err != nil { 197 | return wtreeset, err 198 | } 199 | 200 | wtreeset[line] = true 201 | } 202 | 203 | err = subprocess.Wait() 204 | if err != nil { 205 | return wtreeset, err 206 | } 207 | 208 | return wtreeset, nil 209 | } 210 | 211 | // Returns all commits in the input iterator, but for each commit, strips out 212 | // any file diff not modifying one of the given pathspecs 213 | func LimitDiffsByPathspec( 214 | commits iter.Seq2[Commit, error], 215 | pathspecs []string, 216 | ) iter.Seq2[Commit, error] { 217 | if len(pathspecs) == 0 { 218 | return commits 219 | } 220 | 221 | return func(yield func(Commit, error) bool) { 222 | // Check all pathspecs are supported 223 | for _, p := range pathspecs { 224 | if !IsSupportedPathspec(p) { 225 | yield( 226 | Commit{}, 227 | fmt.Errorf("unsupported magic in pathspec: \"%s\"", p), 228 | ) 229 | return 230 | } 231 | } 232 | 233 | includes, excludes := SplitPathspecs(pathspecs) 234 | 235 | for commit, err := range commits { 236 | if err != nil { 237 | yield(commit, err) 238 | return 239 | } 240 | 241 | filtered := []FileDiff{} 242 | for _, diff := range commit.FileDiffs { 243 | shouldInclude := false 244 | for _, p := range includes { 245 | if PathspecMatch(p, diff.Path) { 246 | shouldInclude = true 247 | break 248 | } 249 | } 250 | 251 | shouldExclude := false 252 | for _, p := range excludes { 253 | if PathspecMatch(p, diff.Path) { 254 | shouldExclude = true 255 | break 256 | } 257 | } 258 | 259 | if shouldInclude && !shouldExclude { 260 | filtered = append(filtered, diff) 261 | } 262 | } 263 | 264 | commit.FileDiffs = filtered 265 | yield(commit, nil) 266 | } 267 | } 268 | } 269 | 270 | // Returns an iterator over commits that skips any revs in the given list. 271 | func SkipIgnored( 272 | commits iter.Seq2[Commit, error], 273 | ignoreRevs []string, 274 | ) iter.Seq2[Commit, error] { 275 | ignoreSet := map[string]bool{} 276 | for _, rev := range ignoreRevs { 277 | ignoreSet[rev] = true 278 | } 279 | 280 | return func(yield func(Commit, error) bool) { 281 | for commit, err := range commits { 282 | if err != nil { 283 | yield(commit, err) 284 | return 285 | } 286 | 287 | if shouldIgnore := ignoreSet[commit.Hash]; shouldIgnore { 288 | continue // skip this commit 289 | } 290 | 291 | if !yield(commit, nil) { 292 | break 293 | } 294 | } 295 | } 296 | } 297 | -------------------------------------------------------------------------------- /internal/git/git_test.go: -------------------------------------------------------------------------------- 1 | package git_test 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/sinclairtarget/git-who/internal/git" 8 | "github.com/sinclairtarget/git-who/internal/utils/iterutils" 9 | ) 10 | 11 | func TestCommitsFileRename(t *testing.T) { 12 | path := "file-rename" 13 | 14 | ctx, cancel := context.WithCancel(context.Background()) 15 | defer cancel() 16 | 17 | commitsSeq, closer, err := git.CommitsWithOpts( 18 | ctx, 19 | []string{"HEAD"}, 20 | []string{path}, 21 | git.LogFilters{}, 22 | true, 23 | git.RepoConfigFiles{}, 24 | ) 25 | if err != nil { 26 | t.Fatalf("error getting commits: %v", err) 27 | } 28 | 29 | commits, err := iterutils.Collect(commitsSeq) 30 | if err != nil { 31 | t.Fatalf(err.Error()) 32 | } 33 | 34 | err = closer() 35 | if err != nil { 36 | t.Errorf("encountered error cleaning up: %v", err) 37 | } 38 | 39 | if len(commits) != 3 { 40 | t.Fatalf("expected 3 commits but found %d", len(commits)) 41 | } 42 | 43 | commit := commits[1] 44 | if commit.Hash != "879e94bbbcbbec348ba1df332dd46e7314c62df1" { 45 | t.Errorf( 46 | "expected commit to have hash %s but got %s", 47 | "879e94bbbcbbec348ba1df332dd46e7314c62df1", 48 | commit.Hash, 49 | ) 50 | } 51 | 52 | if len(commit.FileDiffs) != 1 { 53 | t.Errorf( 54 | "len of commit file diffs should be 1, but got %d", 55 | len(commit.FileDiffs), 56 | ) 57 | } 58 | 59 | diff := commit.FileDiffs[0] 60 | if diff.Path != "file-rename/bim.go" { 61 | t.Errorf( 62 | "expected diff path to be %s but got \"%s\"", 63 | "file-rename/bim.go", 64 | diff.Path, 65 | ) 66 | } 67 | } 68 | 69 | // Test moving a file into a new directory (to make sure we handle { => foo}) 70 | func TestCommitsFileRenameNewDir(t *testing.T) { 71 | path := "rename-new-dir" 72 | 73 | ctx, cancel := context.WithCancel(context.Background()) 74 | defer cancel() 75 | 76 | commitsSeq, closer, err := git.CommitsWithOpts( 77 | ctx, 78 | []string{"HEAD"}, 79 | []string{path}, 80 | git.LogFilters{}, 81 | true, 82 | git.RepoConfigFiles{}, 83 | ) 84 | if err != nil { 85 | t.Fatalf("error getting commits: %v", err) 86 | } 87 | 88 | commits, err := iterutils.Collect(commitsSeq) 89 | if err != nil { 90 | t.Fatalf(err.Error()) 91 | } 92 | 93 | err = closer() 94 | if err != nil { 95 | t.Errorf("encountered error cleaning up: %v", err) 96 | } 97 | 98 | if len(commits) != 2 { 99 | t.Fatalf("expected 2 commits but found %d", len(commits)) 100 | } 101 | 102 | commit := commits[1] 103 | if commit.Hash != "13b6f4f70c682ab06da9ef433cdb4fcbf65d78c3" { 104 | t.Errorf( 105 | "expected commit to have hash %s but got %s", 106 | "13b6f4f70c682ab06da9ef433cdb4fcbf65d78c3", 107 | commit.Hash, 108 | ) 109 | } 110 | 111 | if len(commit.FileDiffs) != 1 { 112 | t.Errorf( 113 | "len of commit file diffs should be 1, but got %d", 114 | len(commit.FileDiffs), 115 | ) 116 | } 117 | 118 | diff := commit.FileDiffs[0] 119 | if diff.Path != "rename-new-dir/foo/hello.txt" { 120 | t.Errorf( 121 | "expected diff path to be %s but got \"%s\"", 122 | "rename-new-dir/foo/hello.txt", 123 | diff.Path, 124 | ) 125 | } 126 | } 127 | 128 | // Test moving where change will look like /foo/{bim/bar => baz/biz}/hello.txt 129 | func TestCommitsRenameDeepDir(t *testing.T) { 130 | path := "rename-across-deep-dirs" 131 | 132 | ctx, cancel := context.WithCancel(context.Background()) 133 | defer cancel() 134 | 135 | commitsSeq, closer, err := git.CommitsWithOpts( 136 | ctx, 137 | []string{"HEAD"}, 138 | []string{path}, 139 | git.LogFilters{}, 140 | true, 141 | git.RepoConfigFiles{}, 142 | ) 143 | if err != nil { 144 | t.Fatalf("error getting commits: %v", err) 145 | } 146 | 147 | commits, err := iterutils.Collect(commitsSeq) 148 | if err != nil { 149 | t.Fatalf(err.Error()) 150 | } 151 | 152 | err = closer() 153 | if err != nil { 154 | t.Errorf("encountered error cleaning up: %v", err) 155 | } 156 | 157 | if len(commits) != 2 { 158 | t.Fatalf("expected 2 commits but found %d", len(commits)) 159 | } 160 | 161 | commit := commits[1] 162 | if commit.Hash != "b9acb309a2c20ab6b93549bc7468b3e3ae5fc05e" { 163 | t.Errorf( 164 | "expected commit to have hash %s but got %s", 165 | "b9acb309a2c20ab6b93549bc7468b3e3ae5fc05e", 166 | commit.Hash, 167 | ) 168 | } 169 | 170 | if len(commit.FileDiffs) != 1 { 171 | t.Errorf( 172 | "len of commit file diffs should be 1, but got %d", 173 | len(commit.FileDiffs), 174 | ) 175 | } 176 | 177 | diff := commit.FileDiffs[0] 178 | if diff.Path != "rename-across-deep-dirs/zim/zam/hello.txt" { 179 | t.Errorf( 180 | "expected diff path to be %s but got \"%s\"", 181 | "rename-across-deep-dirs/zim/zam/hello.txt", 182 | diff.Path, 183 | ) 184 | } 185 | } 186 | 187 | func TestParseWholeLog(t *testing.T) { 188 | ctx, cancel := context.WithCancel(context.Background()) 189 | defer cancel() 190 | 191 | commitsSeq, closer, err := git.CommitsWithOpts( 192 | ctx, 193 | []string{"HEAD"}, 194 | []string{"."}, 195 | git.LogFilters{}, 196 | true, 197 | git.RepoConfigFiles{}, 198 | ) 199 | if err != nil { 200 | t.Fatalf("error getting commits: %v", err) 201 | } 202 | 203 | _, err = iterutils.Collect(commitsSeq) 204 | if err != nil { 205 | t.Fatalf(err.Error()) 206 | } 207 | 208 | err = closer() 209 | if err != nil { 210 | t.Errorf("encountered error cleaning up: %v", err) 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /internal/git/log.go: -------------------------------------------------------------------------------- 1 | package git 2 | 3 | import ( 4 | "log/slog" 5 | ) 6 | 7 | var pkgLogger *slog.Logger 8 | 9 | func logger() *slog.Logger { 10 | if pkgLogger == nil { 11 | pkgLogger = slog.Default().With("package", "git") 12 | } 13 | 14 | return pkgLogger 15 | } 16 | -------------------------------------------------------------------------------- /internal/git/main_test.go: -------------------------------------------------------------------------------- 1 | package git_test 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "testing" 7 | 8 | "github.com/sinclairtarget/git-who/internal/repotest" 9 | ) 10 | 11 | // Run these tests in the test submodule. 12 | func TestMain(m *testing.M) { 13 | err := repotest.UseTestRepo() 14 | if err != nil { 15 | fmt.Fprintf(os.Stderr, "%v\n", err) 16 | os.Exit(1) 17 | } 18 | m.Run() 19 | } 20 | -------------------------------------------------------------------------------- /internal/git/parse.go: -------------------------------------------------------------------------------- 1 | package git 2 | 3 | import ( 4 | "fmt" 5 | "iter" 6 | "regexp" 7 | "strconv" 8 | "strings" 9 | "time" 10 | ) 11 | 12 | var fileRenameRegexp *regexp.Regexp 13 | var commitHashRegexp *regexp.Regexp 14 | 15 | func init() { 16 | fileRenameRegexp = regexp.MustCompile(`{(.*) => (.*)}`) 17 | commitHashRegexp = regexp.MustCompile(`^\^?[a-f0-9]+$`) 18 | } 19 | 20 | func parseLinesChanged(s string, line string) (int, error) { 21 | changed, err := strconv.Atoi(s) 22 | if err != nil { 23 | return 0, fmt.Errorf("could not parse %s as int on line \"%s\": %w", 24 | s, 25 | line, 26 | err, 27 | ) 28 | } 29 | 30 | return changed, nil 31 | } 32 | 33 | func allowCommit(commit Commit, now time.Time) bool { 34 | if commit.AuthorName == "" && commit.AuthorEmail == "" { 35 | logger().Debug( 36 | "skipping commit with no author", 37 | "commit", 38 | commit.Name(), 39 | ) 40 | 41 | return false 42 | } 43 | 44 | if commit.Date.After(now) { 45 | logger().Debug( 46 | "skipping commit with commit date in the future", 47 | "commit", 48 | commit.Name(), 49 | ) 50 | 51 | return false 52 | } 53 | 54 | return true 55 | } 56 | 57 | // Turns an iterator over lines from git log into an iterator of commits 58 | func ParseCommits(lines iter.Seq2[string, error]) iter.Seq2[Commit, error] { 59 | return func(yield func(Commit, error) bool) { 60 | var commit Commit 61 | var diff *FileDiff 62 | now := time.Now() 63 | linesThisCommit := 0 64 | 65 | for line, err := range lines { 66 | if err != nil { 67 | yield( 68 | commit, 69 | fmt.Errorf( 70 | "error reading commit %s: %w", 71 | commit.Name(), 72 | err, 73 | ), 74 | ) 75 | return 76 | } 77 | 78 | done := linesThisCommit >= 6 && (len(line) == 0 || isRev(line)) 79 | if done { 80 | if allowCommit(commit, now) { 81 | if !yield(commit, nil) { 82 | return 83 | } 84 | } 85 | 86 | commit = Commit{} 87 | diff = nil 88 | linesThisCommit = 0 89 | 90 | if len(line) == 0 { 91 | continue 92 | } 93 | } 94 | 95 | switch { 96 | case linesThisCommit == 0: 97 | commit.Hash = line 98 | case linesThisCommit == 1: 99 | commit.ShortHash = line 100 | case linesThisCommit == 2: 101 | parts := strings.Split(line, " ") 102 | commit.IsMerge = len(parts) > 1 103 | case linesThisCommit == 3: 104 | commit.AuthorName = line 105 | case linesThisCommit == 4: 106 | commit.AuthorEmail = line 107 | case linesThisCommit == 5: 108 | i, err := strconv.Atoi(line) 109 | if err != nil { 110 | yield( 111 | commit, 112 | fmt.Errorf( 113 | "error parsing date from commit %s: %w", 114 | commit.Name(), 115 | err, 116 | ), 117 | ) 118 | return 119 | } 120 | 121 | commit.Date = time.Unix(int64(i), 0) 122 | default: 123 | // Handle file diffs 124 | var err error 125 | if diff == nil { 126 | diff = &FileDiff{} 127 | 128 | // Split to get non-empty tokens 129 | parts := strings.SplitN(line, "\t", 3) 130 | nonemptyParts := []string{} 131 | for _, p := range parts { 132 | if len(p) > 0 { 133 | nonemptyParts = append(nonemptyParts, p) 134 | } 135 | } 136 | 137 | if len(nonemptyParts) == 3 { 138 | if nonemptyParts[0] != "-" { 139 | diff.LinesAdded, err = parseLinesChanged( 140 | nonemptyParts[0], 141 | line, 142 | ) 143 | if err != nil { 144 | goto handleError 145 | } 146 | } 147 | 148 | if nonemptyParts[1] != "-" { 149 | diff.LinesRemoved, err = parseLinesChanged( 150 | nonemptyParts[1], 151 | line, 152 | ) 153 | if err != nil { 154 | goto handleError 155 | } 156 | } 157 | 158 | diff.Path = nonemptyParts[2] 159 | commit.FileDiffs = append(commit.FileDiffs, *diff) 160 | diff = nil 161 | } else if len(nonemptyParts) == 2 { 162 | if nonemptyParts[0] != "-" { 163 | diff.LinesAdded, err = parseLinesChanged( 164 | nonemptyParts[0], 165 | line, 166 | ) 167 | if err != nil { 168 | goto handleError 169 | } 170 | } 171 | 172 | if nonemptyParts[1] != "-" { 173 | diff.LinesRemoved, err = parseLinesChanged( 174 | nonemptyParts[1], 175 | line, 176 | ) 177 | if err != nil { 178 | goto handleError 179 | } 180 | } 181 | } else { 182 | err = fmt.Errorf( 183 | "wrong number of elements on line after split: %d", 184 | len(nonemptyParts), 185 | ) 186 | } 187 | } else { 188 | if len(diff.Path) > 0 { 189 | diff.Path = line 190 | commit.FileDiffs = append(commit.FileDiffs, *diff) 191 | diff = nil 192 | } else { 193 | // Used to handle moved files specially. For now, just 194 | // mark as path until we overwrite it with next line 195 | diff.Path = line 196 | } 197 | } 198 | 199 | handleError: 200 | if err != nil { 201 | yield( 202 | commit, 203 | fmt.Errorf( 204 | "error parsing file diffs from commit %s: %w", 205 | commit.Name(), 206 | err, 207 | ), 208 | ) 209 | return 210 | } 211 | } 212 | 213 | linesThisCommit += 1 214 | } 215 | 216 | if linesThisCommit > 0 && allowCommit(commit, now) { 217 | yield(commit, nil) 218 | } 219 | } 220 | } 221 | 222 | // Returns true if this is a (full-length) Git revision hash, false otherwise. 223 | // 224 | // We also need to handle a hash with "^" in front. 225 | func isRev(s string) bool { 226 | matched := commitHashRegexp.MatchString(s) 227 | return matched && (len(s) == 40 || len(s) == 41) 228 | } 229 | -------------------------------------------------------------------------------- /internal/git/pathspec.go: -------------------------------------------------------------------------------- 1 | package git 2 | 3 | import ( 4 | "path" 5 | "regexp" 6 | 7 | "github.com/bmatcuk/doublestar/v4" 8 | ) 9 | 10 | var excludePathspecRegexp *regexp.Regexp 11 | var excludeStripRegexp *regexp.Regexp 12 | 13 | func init() { 14 | excludePathspecRegexp = regexp.MustCompile( 15 | `^(:[!\^]:|:[!\^][^!\^/]|:\(exclude\))`, 16 | ) 17 | excludeStripRegexp = regexp.MustCompile( 18 | `^(:[!\^]:?|:\(exclude\))`, 19 | ) 20 | } 21 | 22 | /* 23 | * We only support the "exclude" pathspec magic. 24 | */ 25 | func IsSupportedPathspec(pathspec string) bool { 26 | if len(pathspec) > 0 && pathspec[0] == ':' { 27 | return excludePathspecRegexp.MatchString(pathspec) 28 | } 29 | 30 | return true 31 | } 32 | 33 | /* 34 | * Splits the include pathspecs from the exclude pathspecs. 35 | * 36 | * For the exclude pathspecs, we also strip off the leading "magic". 37 | */ 38 | func SplitPathspecs(pathspecs []string) (includes []string, excludes []string) { 39 | for _, p := range pathspecs { 40 | if len(p) == 0 { 41 | continue // skip this degenerate case, Git disallows it 42 | } 43 | 44 | if p[0] == ':' { 45 | // Strip magic 46 | stripped := excludeStripRegexp.ReplaceAllString(p, "") 47 | excludes = append(excludes, stripped) 48 | } else { 49 | includes = append(includes, p) 50 | } 51 | } 52 | 53 | return includes, excludes 54 | } 55 | 56 | func PathspecMatch(pathspec string, p string) bool { 57 | if len(pathspec) == 0 { 58 | panic("empty string is not valid pathspec") 59 | } 60 | 61 | // Note: Git uses fnmatch(). This match may differ. Hopefully only rarely. 62 | didMatch, err := doublestar.PathMatch(pathspec, p) 63 | if err != nil { 64 | panic("bad pattern passed to doublestar.Match()") 65 | } 66 | 67 | if didMatch { 68 | return true 69 | } 70 | 71 | // Ensure we mimic Git behavior with trailing slash. See "pathspec" in 72 | // gitglossary(3). 73 | subdirPathspec := path.Join(pathspec, "**") 74 | didMatch, err = doublestar.PathMatch(subdirPathspec, p) 75 | if err != nil { 76 | panic("bad pattern passed to doublestar.Match()") 77 | } 78 | 79 | if didMatch { 80 | return true 81 | } 82 | 83 | if pathspec[0] == '*' { 84 | toplevelPathspec := path.Join("**/", pathspec) 85 | didMatch, err = doublestar.PathMatch(toplevelPathspec, p) 86 | if err != nil { 87 | panic("bad pattern passed to doublestar.Match()") 88 | } 89 | 90 | if didMatch { 91 | return true 92 | } 93 | } 94 | 95 | return false 96 | } 97 | -------------------------------------------------------------------------------- /internal/git/pathspec_test.go: -------------------------------------------------------------------------------- 1 | package git_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/google/go-cmp/cmp" 7 | 8 | "github.com/sinclairtarget/git-who/internal/git" 9 | ) 10 | 11 | func TestSupportedPathspec(t *testing.T) { 12 | tests := []struct { 13 | name string 14 | pathspec string 15 | expected bool 16 | }{ 17 | { 18 | name: "empty_pathspec", 19 | pathspec: "", 20 | expected: true, 21 | }, 22 | { 23 | name: "literal_path", 24 | pathspec: "foo/bar.txt", 25 | expected: true, 26 | }, 27 | { 28 | name: "directory_prefix", 29 | pathspec: "foo/", 30 | expected: true, 31 | }, 32 | { 33 | name: "glob", 34 | pathspec: "foo/*.txt", 35 | expected: true, 36 | }, 37 | { 38 | name: "double_glob", 39 | pathspec: "foo/**/*.txt", 40 | expected: true, 41 | }, 42 | { 43 | name: "single_wildcard", 44 | pathspec: "foo/?ar.txt", 45 | expected: true, 46 | }, 47 | { 48 | name: "range", 49 | pathspec: "foo/[a-z]ar.txt", 50 | expected: true, 51 | }, 52 | { 53 | name: "ignore", 54 | pathspec: ":(exclude)vendor/", 55 | expected: true, 56 | }, 57 | { 58 | name: "ignore_short", 59 | pathspec: ":!vendor/", 60 | expected: true, 61 | }, 62 | { 63 | name: "ignore_short_caret", 64 | pathspec: ":^vendor/", 65 | expected: true, 66 | }, 67 | { 68 | name: "ignore_short_optional_colon", 69 | pathspec: ":!:vendor/", 70 | expected: true, 71 | }, 72 | { 73 | name: "ignore_leading_whitespace", 74 | pathspec: ":! foo.txt", 75 | expected: true, 76 | }, 77 | { 78 | name: "ignore_leading_tab", 79 | pathspec: ":!\tfoo.txt", 80 | expected: true, 81 | }, 82 | { 83 | name: "ignore_glob", 84 | pathspec: ":!*.txt", 85 | expected: true, 86 | }, 87 | { 88 | name: "ignore_pycache", 89 | pathspec: ":!__pycache__/", 90 | expected: true, 91 | }, 92 | { 93 | name: "attr", 94 | pathspec: ":(attr: foo)vendor/", 95 | expected: false, 96 | }, 97 | { 98 | name: "literal", 99 | pathspec: ":(literal)vendor/", 100 | expected: false, 101 | }, 102 | { 103 | name: "glob", 104 | pathspec: ":(glob)vendor/", 105 | expected: false, 106 | }, 107 | { 108 | name: "icase", 109 | pathspec: ":(icase)vendor/", 110 | expected: false, 111 | }, 112 | { 113 | name: "top", 114 | pathspec: ":(top)vendor/", 115 | expected: false, 116 | }, 117 | { 118 | name: "top_short", 119 | pathspec: ":/foo/bar.txt", 120 | expected: false, 121 | }, 122 | { 123 | name: "multiple", 124 | pathspec: ":(icase,exclude)foo/*.txt", 125 | expected: false, 126 | }, 127 | { 128 | name: "multiple_short", 129 | pathspec: ":!/foo/*.txt", 130 | expected: false, 131 | }, 132 | } 133 | 134 | for _, test := range tests { 135 | t.Run(test.name, func(t *testing.T) { 136 | result := git.IsSupportedPathspec(test.pathspec) 137 | if result != test.expected { 138 | t.Errorf( 139 | "expected pathspec \"%s\" supported is %v but got %v", 140 | test.pathspec, 141 | test.expected, 142 | result, 143 | ) 144 | } 145 | }) 146 | } 147 | } 148 | 149 | func TestSplitPathspecs(t *testing.T) { 150 | tests := []struct { 151 | name string 152 | pathspecs []string 153 | includes []string 154 | excludes []string 155 | }{ 156 | { 157 | name: "long", 158 | pathspecs: []string{"*.txt", ":(exclude)vendor/"}, 159 | includes: []string{"*.txt"}, 160 | excludes: []string{"vendor/"}, 161 | }, 162 | { 163 | name: "short", 164 | pathspecs: []string{"*.txt", ":!vendor/"}, 165 | includes: []string{"*.txt"}, 166 | excludes: []string{"vendor/"}, 167 | }, 168 | { 169 | name: "caret", 170 | pathspecs: []string{"*.txt", ":^vendor/"}, 171 | includes: []string{"*.txt"}, 172 | excludes: []string{"vendor/"}, 173 | }, 174 | { 175 | name: "optional_colon", 176 | pathspecs: []string{"*.txt", ":!:vendor/"}, 177 | includes: []string{"*.txt"}, 178 | excludes: []string{"vendor/"}, 179 | }, 180 | } 181 | 182 | for _, test := range tests { 183 | t.Run(test.name, func(t *testing.T) { 184 | includes, excludes := git.SplitPathspecs(test.pathspecs) 185 | if diff := cmp.Diff(test.includes, includes); diff != "" { 186 | t.Errorf("includes is wrong:\n%s", diff) 187 | } 188 | if diff := cmp.Diff(test.excludes, excludes); diff != "" { 189 | t.Errorf("excludes is wrong:\n%s", diff) 190 | } 191 | }) 192 | } 193 | } 194 | 195 | func TestPathspecMatch(t *testing.T) { 196 | tests := []struct { 197 | name string 198 | pathspec string 199 | path string 200 | expected bool 201 | }{ 202 | { 203 | name: "empty_path", 204 | pathspec: "*", 205 | path: "", 206 | expected: true, 207 | }, 208 | { 209 | name: "directory_prefix", 210 | pathspec: "foo/", 211 | path: "foo/bar.txt", 212 | expected: true, 213 | }, 214 | { 215 | name: "glob", 216 | pathspec: "*", 217 | path: "foo", 218 | expected: true, 219 | }, 220 | { 221 | name: "dir", 222 | pathspec: "foo", 223 | path: "foo/bar.txt", 224 | expected: true, 225 | }, 226 | { 227 | name: "glob_dir", 228 | pathspec: "foo/*", 229 | path: "foo/bar.txt", 230 | expected: true, 231 | }, 232 | { 233 | name: "glob_dir_ext", 234 | pathspec: "foo/*.txt", 235 | path: "foo/bar.txt", 236 | expected: true, 237 | }, 238 | { 239 | name: "double_glob", 240 | pathspec: "foo/**/bar.txt", 241 | path: "foo/bim/bam/bar.txt", 242 | expected: true, 243 | }, 244 | { 245 | name: "double_glob_dir", 246 | pathspec: "foo/", 247 | path: "foo/bim/bam/bar.txt", 248 | expected: true, 249 | }, 250 | { 251 | name: "toplevel_glob", 252 | pathspec: "*_test.go", 253 | path: "foo/bim/bam/foo_test.go", 254 | expected: true, 255 | }, 256 | { 257 | name: "glob_not_match", 258 | pathspec: "*.txt", 259 | path: "README.md", 260 | expected: false, 261 | }, 262 | { 263 | name: "glob_dir_not_match", 264 | pathspec: "foo/*.txt", 265 | path: "foo/bim/bam/bar.txt", 266 | expected: false, 267 | }, 268 | { 269 | name: "subdir_not_match", 270 | pathspec: "foo/bim", 271 | path: "foo/bar.txt", 272 | expected: false, 273 | }, 274 | } 275 | 276 | for _, test := range tests { 277 | t.Run(test.name, func(t *testing.T) { 278 | result := git.PathspecMatch(test.pathspec, test.path) 279 | if result != test.expected { 280 | t.Errorf( 281 | "expected match of path \"%s\" to pathspec \"%s\" to be %v but got %v", 282 | test.path, 283 | test.pathspec, 284 | test.expected, 285 | result, 286 | ) 287 | } 288 | }) 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /internal/git/repo.go: -------------------------------------------------------------------------------- 1 | package git 2 | 3 | import ( 4 | "bufio" 5 | "errors" 6 | "fmt" 7 | "hash" 8 | "io" 9 | "io/fs" 10 | "os" 11 | "path/filepath" 12 | "strings" 13 | ) 14 | 15 | type RepoConfigFiles struct { 16 | MailmapPath string 17 | IgnoreRevsPath string 18 | } 19 | 20 | func (rf RepoConfigFiles) HasMailmap() bool { 21 | return len(rf.MailmapPath) > 0 22 | } 23 | 24 | func (rf RepoConfigFiles) HasIgnoreRevs() bool { 25 | return len(rf.IgnoreRevsPath) > 0 26 | } 27 | 28 | func (rf RepoConfigFiles) MailmapHash(h hash.Hash32) error { 29 | if rf.HasMailmap() { 30 | f, err := os.Open(rf.MailmapPath) 31 | if !errors.Is(err, fs.ErrNotExist) { 32 | if err != nil { 33 | return fmt.Errorf("could not read mailmap file: %v", err) 34 | } 35 | defer f.Close() 36 | 37 | _, err = io.Copy(h, f) 38 | if err != nil { 39 | return fmt.Errorf("error hashing mailmap file: %v", err) 40 | } 41 | } 42 | } 43 | 44 | return nil 45 | } 46 | 47 | // Get git blame ignored revisions 48 | func (rf RepoConfigFiles) IgnoreRevs() (_ []string, err error) { 49 | defer func() { 50 | if err != nil { 51 | err = fmt.Errorf("error reading git blame ignore revs: %w", err) 52 | } 53 | }() 54 | 55 | var revs []string 56 | 57 | if !rf.HasIgnoreRevs() { 58 | return revs, nil 59 | } 60 | 61 | f, err := os.Open(rf.IgnoreRevsPath) 62 | if err != nil { 63 | return revs, err 64 | } 65 | defer f.Close() 66 | 67 | scanner := bufio.NewScanner(f) 68 | for scanner.Scan() { 69 | line := strings.TrimSpace(scanner.Text()) 70 | if isRev(line) { 71 | revs = append(revs, line) 72 | } 73 | } 74 | 75 | err = scanner.Err() 76 | if err != nil { 77 | return revs, err 78 | } 79 | 80 | return revs, nil 81 | } 82 | 83 | // NOTE: We do NOT respect the git config here, we just assume the conventional 84 | // path for this file. 85 | func MailmapPath(gitRootPath string) string { 86 | path := filepath.Join(gitRootPath, ".mailmap") 87 | return path 88 | } 89 | 90 | // NOTE: We do NOT respect the git config here, we just assume the conventional 91 | // path for this file. 92 | func IgnoreRevsPath(gitRootPath string) string { 93 | path := filepath.Join(gitRootPath, ".git-blame-ignore-revs") 94 | return path 95 | } 96 | 97 | // Checks to see whether the files exist on disk or not 98 | func CheckRepoConfigFiles(gitRootPath string) (_ RepoConfigFiles, err error) { 99 | defer func() { 100 | if err != nil { 101 | err = fmt.Errorf( 102 | "error while checking for repository configuration files: %w", 103 | err, 104 | ) 105 | } 106 | }() 107 | 108 | var files RepoConfigFiles 109 | 110 | mailmapPath := MailmapPath(gitRootPath) 111 | _, err = os.Stat(mailmapPath) 112 | if err == nil { 113 | files.MailmapPath = mailmapPath 114 | } else if !errors.Is(err, os.ErrNotExist) { 115 | return files, err 116 | } 117 | 118 | ignoreRevsPath := IgnoreRevsPath(gitRootPath) 119 | _, err = os.Stat(ignoreRevsPath) 120 | if err == nil { 121 | files.IgnoreRevsPath = ignoreRevsPath 122 | } else if !errors.Is(err, os.ErrNotExist) { 123 | return files, err 124 | } 125 | 126 | return files, nil 127 | } 128 | -------------------------------------------------------------------------------- /internal/pretty/ansi.go: -------------------------------------------------------------------------------- 1 | // ANSI escape codes 2 | package pretty 3 | 4 | const Reset string = "\x1b[0m" 5 | 6 | const Green string = "\x1b[32m" 7 | const Red string = "\x1b[31m" 8 | const DefaultColor string = "\x1b[39m" 9 | 10 | const Dim string = "\x1b[2m" 11 | const Invert string = "\x1b[7m" 12 | 13 | const EraseLine string = "\x1b[2K" 14 | -------------------------------------------------------------------------------- /internal/pretty/pretty.go: -------------------------------------------------------------------------------- 1 | package pretty 2 | 3 | import ( 4 | "os" 5 | 6 | "golang.org/x/term" 7 | ) 8 | 9 | // Allow backspacing and replacing output for e.g. progress indicator? 10 | func AllowDynamic(f *os.File) bool { 11 | return term.IsTerminal(int(f.Fd())) 12 | } 13 | -------------------------------------------------------------------------------- /internal/repotest/repotest.go: -------------------------------------------------------------------------------- 1 | // Helpers for running tests in the test submodule/repo. 2 | package repotest 3 | 4 | import ( 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | const msg = `error changing working directory to submodule: %w 10 | Did you remember to initialize the submodule? See README.md` 11 | 12 | func UseTestRepo() error { 13 | err := os.Chdir("../../test-repo") 14 | if err != nil { 15 | return fmt.Errorf(msg, err) 16 | } 17 | 18 | return nil 19 | } 20 | -------------------------------------------------------------------------------- /internal/tally/bucket.go: -------------------------------------------------------------------------------- 1 | package tally 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "iter" 7 | "maps" 8 | "slices" 9 | "time" 10 | 11 | "github.com/sinclairtarget/git-who/internal/git" 12 | ) 13 | 14 | type TimeBucket struct { 15 | Name string 16 | Time time.Time 17 | Tally FinalTally // Winning author's tally 18 | TotalTally FinalTally // Overall tally for all authors 19 | tallies map[string]Tally 20 | } 21 | 22 | func newBucket(name string, t time.Time) TimeBucket { 23 | return TimeBucket{ 24 | Name: name, 25 | Time: t, 26 | tallies: map[string]Tally{}, 27 | } 28 | } 29 | 30 | func (b TimeBucket) Value(mode TallyMode) int { 31 | switch mode { 32 | case CommitMode: 33 | return b.Tally.Commits 34 | case FilesMode: 35 | return b.Tally.FileCount 36 | case LinesMode: 37 | return b.Tally.LinesAdded + b.Tally.LinesRemoved 38 | default: 39 | panic("unrecognized tally mode in switch") 40 | } 41 | } 42 | 43 | func (b TimeBucket) TotalValue(mode TallyMode) int { 44 | switch mode { 45 | case CommitMode: 46 | return b.TotalTally.Commits 47 | case FilesMode: 48 | return b.TotalTally.FileCount 49 | case LinesMode: 50 | return b.TotalTally.LinesAdded + b.TotalTally.LinesRemoved 51 | default: 52 | panic("unrecognized tally mode in switch") 53 | } 54 | } 55 | 56 | func (a TimeBucket) Combine(b TimeBucket) TimeBucket { 57 | if a.Name != b.Name { 58 | panic("cannot combine buckets whose names do not match") 59 | } 60 | 61 | if a.Time != b.Time { 62 | panic("cannot combine buckets whose times do not match") 63 | } 64 | 65 | merged := a 66 | for key, tally := range b.tallies { 67 | existing, ok := a.tallies[key] 68 | if ok { 69 | merged.tallies[key] = existing.Combine(tally) 70 | } else { 71 | merged.tallies[key] = tally 72 | } 73 | } 74 | 75 | return merged 76 | } 77 | 78 | func (b TimeBucket) Rank(mode TallyMode) TimeBucket { 79 | if len(b.tallies) > 0 { 80 | b.Tally = Rank(b.tallies, mode)[0] 81 | 82 | var runningTally Tally 83 | for _, tally := range b.tallies { 84 | runningTally = runningTally.Combine(tally) 85 | } 86 | b.TotalTally = runningTally.Final() 87 | } 88 | 89 | return b 90 | } 91 | 92 | type TimeSeries []TimeBucket 93 | 94 | func (a TimeSeries) Combine(b TimeSeries) TimeSeries { 95 | buckets := map[int64]TimeBucket{} 96 | for _, bucket := range a { 97 | buckets[bucket.Time.Unix()] = bucket 98 | } 99 | for _, bucket := range b { 100 | existing, ok := buckets[bucket.Time.Unix()] 101 | if ok { 102 | buckets[bucket.Time.Unix()] = existing.Combine(bucket) 103 | } else { 104 | buckets[bucket.Time.Unix()] = bucket 105 | } 106 | } 107 | 108 | sortedKeys := slices.Sorted(maps.Keys(buckets)) 109 | 110 | outBuckets := []TimeBucket{} 111 | for _, key := range sortedKeys { 112 | outBuckets = append(outBuckets, buckets[key]) 113 | } 114 | 115 | return outBuckets 116 | } 117 | 118 | // Resolution for a time series. 119 | // 120 | // apply - Truncate time to its time bucket 121 | // label - Format the date to a label for the bucket 122 | // next - Get next time in series, given a time 123 | type Resolution struct { 124 | apply func(time.Time) time.Time 125 | label func(time.Time) string 126 | next func(time.Time) time.Time 127 | } 128 | 129 | func applyDaily(t time.Time) time.Time { 130 | year, month, day := t.Date() 131 | return time.Date(year, month, day, 0, 0, 0, 0, time.Local) 132 | } 133 | 134 | var daily = Resolution{ 135 | apply: applyDaily, 136 | next: func(t time.Time) time.Time { 137 | t = applyDaily(t) 138 | year, month, day := t.Date() 139 | return time.Date(year, month, day+1, 0, 0, 0, 0, time.Local) 140 | }, 141 | label: func(t time.Time) string { 142 | return applyDaily(t).Format(time.DateOnly) 143 | }, 144 | } 145 | 146 | func CalcResolution(start time.Time, end time.Time) Resolution { 147 | duration := end.Sub(start) 148 | day := time.Hour * 24 149 | year := day * 365 150 | 151 | if duration > year*5 { 152 | // Yearly buckets 153 | apply := func(t time.Time) time.Time { 154 | year, _, _ := t.Date() 155 | return time.Date(year, 1, 1, 0, 0, 0, 0, time.Local) 156 | } 157 | return Resolution{ 158 | apply: apply, 159 | next: func(t time.Time) time.Time { 160 | t = apply(t) 161 | year, _, _ := t.Date() 162 | return time.Date(year+1, 1, 1, 0, 0, 0, 0, time.Local) 163 | }, 164 | label: func(t time.Time) string { 165 | return apply(t).Format("2006") 166 | }, 167 | } 168 | } else if duration > day*60 { 169 | // Monthly buckets 170 | apply := func(t time.Time) time.Time { 171 | year, month, _ := t.Date() 172 | return time.Date(year, month, 1, 0, 0, 0, 0, time.Local) 173 | } 174 | return Resolution{ 175 | apply: apply, 176 | next: func(t time.Time) time.Time { 177 | t = apply(t) 178 | year, month, _ := t.Date() 179 | return time.Date(year, month+1, 1, 0, 0, 0, 0, time.Local) 180 | }, 181 | label: func(t time.Time) string { 182 | return apply(t).Format("Jan 2006") 183 | }, 184 | } 185 | } else { 186 | return daily 187 | } 188 | } 189 | 190 | // Returns tallies grouped by calendar date. 191 | func TallyCommitsByDate( 192 | commits iter.Seq2[git.Commit, error], 193 | opts TallyOpts, 194 | ) (_ []TimeBucket, err error) { 195 | defer func() { 196 | if err != nil { 197 | err = fmt.Errorf("error while tallying commits by date: %w", err) 198 | } 199 | }() 200 | 201 | if opts.Mode == LastModifiedMode || opts.Mode == FirstModifiedMode { 202 | return nil, errors.New("mode not implemented") 203 | } 204 | 205 | var ( 206 | minTime time.Time = time.Now() 207 | maxTime time.Time 208 | ) 209 | 210 | resolution := daily 211 | buckets := map[int64]TimeBucket{} // Map of (unix) time to bucket 212 | 213 | // Tally 214 | for commit, err := range commits { 215 | if err != nil { 216 | return nil, fmt.Errorf("error iterating commits: %w", err) 217 | } 218 | 219 | bucketedCommitTime := resolution.apply(commit.Date) 220 | if bucketedCommitTime.Before(minTime) { 221 | minTime = bucketedCommitTime 222 | } 223 | if bucketedCommitTime.After(maxTime) { 224 | maxTime = bucketedCommitTime 225 | } 226 | 227 | bucket, ok := buckets[bucketedCommitTime.Unix()] 228 | if !ok { 229 | bucket = newBucket( 230 | resolution.label(bucketedCommitTime), 231 | resolution.apply(bucketedCommitTime), 232 | ) 233 | } 234 | 235 | skipMerge := commit.IsMerge && !opts.CountMerges 236 | if !skipMerge { 237 | key := opts.Key(commit) 238 | 239 | tally, ok := bucket.tallies[key] 240 | if !ok { 241 | tally.name = commit.AuthorName 242 | tally.email = commit.AuthorEmail 243 | tally.fileset = map[string]bool{} 244 | } 245 | 246 | tally.numTallied += 1 247 | 248 | if !commit.IsMerge { 249 | for _, diff := range commit.FileDiffs { 250 | tally.added += diff.LinesAdded 251 | tally.removed += diff.LinesRemoved 252 | tally.fileset[diff.Path] = true 253 | } 254 | } 255 | 256 | bucket.tallies[key] = tally 257 | buckets[bucket.Time.Unix()] = bucket 258 | } 259 | } 260 | 261 | // Turn into slice representing *dense* timeseries 262 | t := minTime 263 | bucketSlice := []TimeBucket{} 264 | 265 | for t.Before(maxTime) || t.Equal(maxTime) { 266 | bucket, ok := buckets[t.Unix()] 267 | if !ok { 268 | bucket = newBucket(resolution.label(t), resolution.apply(t)) 269 | } 270 | 271 | bucketSlice = append(bucketSlice, bucket) 272 | t = resolution.next(t) 273 | } 274 | 275 | return bucketSlice, nil 276 | } 277 | 278 | // Returns a list of "time buckets" with tallies for each date. 279 | // 280 | // The resolution / size of the buckets is determined based on the duration 281 | // between the first commit and end time, if the end-time is non-zero. Otherwise 282 | // the end time is the time of the last commit in chronological order. 283 | func TallyCommitsTimeline( 284 | commits iter.Seq2[git.Commit, error], 285 | opts TallyOpts, 286 | end time.Time, 287 | ) ([]TimeBucket, error) { 288 | buckets, err := TallyCommitsByDate(commits, opts) 289 | if err != nil { 290 | return buckets, err 291 | } 292 | 293 | if len(buckets) == 0 { 294 | return buckets, err 295 | } 296 | 297 | if end.IsZero() { 298 | end = buckets[len(buckets)-1].Time 299 | } 300 | 301 | resolution := CalcResolution(buckets[0].Time, end) 302 | rebuckets := Rebucket(buckets, resolution, end) 303 | 304 | return rebuckets, nil 305 | } 306 | 307 | func Rebucket( 308 | buckets []TimeBucket, 309 | resolution Resolution, 310 | end time.Time, 311 | ) []TimeBucket { 312 | if len(buckets) < 1 { 313 | return buckets 314 | } 315 | 316 | rebuckets := []TimeBucket{} 317 | 318 | // Re-bucket using new resolution 319 | t := resolution.apply(buckets[0].Time) 320 | for t.Before(end) || t.Equal(end) { 321 | bucket := newBucket(resolution.label(t), resolution.apply(t)) 322 | rebuckets = append(rebuckets, bucket) 323 | t = resolution.next(t) 324 | } 325 | 326 | i := 0 327 | for _, bucket := range buckets { 328 | rebucketedTime := resolution.apply(bucket.Time) 329 | rebucket := rebuckets[i] 330 | if rebucketedTime.After(rebucket.Time) { 331 | // Next bucket, might have to skip some empty ones 332 | for !rebucketedTime.Equal(rebucket.Time) { 333 | i += 1 334 | rebucket = rebuckets[i] 335 | } 336 | } 337 | 338 | bucket.Time = rebucket.Time 339 | bucket.Name = rebucket.Name 340 | rebuckets[i] = rebuckets[i].Combine(bucket) 341 | } 342 | 343 | return rebuckets 344 | } 345 | -------------------------------------------------------------------------------- /internal/tally/bucket_test.go: -------------------------------------------------------------------------------- 1 | package tally 2 | 3 | import ( 4 | "slices" 5 | "testing" 6 | "time" 7 | 8 | "github.com/sinclairtarget/git-who/internal/git" 9 | "github.com/sinclairtarget/git-who/internal/utils/iterutils" 10 | ) 11 | 12 | func TestTimeSeriesCombine(t *testing.T) { 13 | a := TimeSeries{ 14 | TimeBucket{ 15 | Name: "2024-04-01", 16 | Time: time.Date(2024, 4, 1, 0, 0, 0, 0, time.Local), 17 | tallies: map[string]Tally{ 18 | "alice": {added: 3}, 19 | "bob": {added: 2}, 20 | }, 21 | }, 22 | TimeBucket{ 23 | Name: "2024-04-02", 24 | Time: time.Date(2024, 4, 2, 0, 0, 0, 0, time.Local), 25 | tallies: map[string]Tally{ 26 | "bob": {added: 1}, 27 | }, 28 | }, 29 | TimeBucket{ 30 | Name: "2024-04-03", 31 | Time: time.Date(2024, 4, 3, 0, 0, 0, 0, time.Local), 32 | tallies: map[string]Tally{ 33 | "bob": {added: 4}, 34 | "john": {added: 7}, 35 | }, 36 | }, 37 | } 38 | 39 | b := TimeSeries{ 40 | TimeBucket{ 41 | Name: "2024-04-02", 42 | Time: time.Date(2024, 4, 2, 0, 0, 0, 0, time.Local), 43 | tallies: map[string]Tally{ 44 | "alice": {added: 1}, 45 | }, 46 | }, 47 | TimeBucket{ 48 | Name: "2024-04-03", 49 | Time: time.Date(2024, 4, 3, 0, 0, 0, 0, time.Local), 50 | tallies: map[string]Tally{ 51 | "bob": {added: 2}, 52 | }, 53 | }, 54 | TimeBucket{ 55 | Name: "2024-04-04", 56 | Time: time.Date(2024, 4, 4, 0, 0, 0, 0, time.Local), 57 | tallies: map[string]Tally{ 58 | "alice": {added: 9}, 59 | }, 60 | }, 61 | } 62 | 63 | c := a.Combine(b) 64 | expected := TimeSeries{ 65 | TimeBucket{ 66 | Name: "2024-04-01", 67 | Time: time.Date(2024, 4, 1, 0, 0, 0, 0, time.Local), 68 | tallies: map[string]Tally{ 69 | "alice": {added: 3}, 70 | "bob": {added: 2}, 71 | }, 72 | }, 73 | TimeBucket{ 74 | Name: "2024-04-02", 75 | Time: time.Date(2024, 4, 2, 0, 0, 0, 0, time.Local), 76 | tallies: map[string]Tally{ 77 | "alice": {added: 1}, 78 | "bob": {added: 1}, 79 | }, 80 | }, 81 | TimeBucket{ 82 | Name: "2024-04-03", 83 | Time: time.Date(2024, 4, 3, 0, 0, 0, 0, time.Local), 84 | tallies: map[string]Tally{ 85 | "bob": {added: 6}, 86 | "john": {added: 7}, 87 | }, 88 | }, 89 | TimeBucket{ 90 | Name: "2024-04-04", 91 | Time: time.Date(2024, 4, 4, 0, 0, 0, 0, time.Local), 92 | tallies: map[string]Tally{ 93 | "alice": {added: 9}, 94 | }, 95 | }, 96 | } 97 | 98 | if c[0].Name != expected[0].Name { 99 | t.Errorf("first bucket date is wrong") 100 | } 101 | if c[0].tallies["alice"].added != expected[0].tallies["alice"].added { 102 | t.Errorf("alice tally for first bucket is wrong") 103 | } 104 | if c[0].tallies["bob"].added != expected[0].tallies["bob"].added { 105 | t.Errorf("bob tally for first bucket is wrong") 106 | } 107 | 108 | if c[1].Name != expected[1].Name { 109 | t.Errorf("second bucket date is wrong") 110 | } 111 | if c[1].tallies["alice"].added != expected[1].tallies["alice"].added { 112 | t.Errorf("alice tally for second bucket is wrong") 113 | } 114 | if c[1].tallies["bob"].added != expected[1].tallies["bob"].added { 115 | t.Errorf("bob tally for second bucket is wrong") 116 | } 117 | 118 | if c[2].Name != expected[2].Name { 119 | t.Errorf("third bucket date is wrong") 120 | } 121 | if c[2].tallies["alice"].added != expected[2].tallies["alice"].added { 122 | t.Errorf("alice tally for third bucket is wrong") 123 | } 124 | if c[2].tallies["bob"].added != expected[2].tallies["bob"].added { 125 | t.Errorf("bob tally for third bucket is wrong") 126 | } 127 | if c[2].tallies["john"].added != expected[2].tallies["john"].added { 128 | t.Errorf("john tally for third bucket is wrong") 129 | } 130 | 131 | if c[3].Name != expected[3].Name { 132 | t.Errorf("fourth bucket date is wrong") 133 | } 134 | if c[3].tallies["alice"].added != expected[3].tallies["alice"].added { 135 | t.Errorf("alice tally for fourth bucket is wrong") 136 | } 137 | } 138 | 139 | func TestTallyCommitsTimelineEmpty(t *testing.T) { 140 | seq := iterutils.WithoutErrors(slices.Values([]git.Commit{})) 141 | opts := TallyOpts{ 142 | Mode: CommitMode, 143 | Key: func(c git.Commit) string { return c.AuthorEmail }, 144 | } 145 | end := time.Now() 146 | 147 | buckets, err := TallyCommitsTimeline(seq, opts, end) 148 | if err != nil { 149 | t.Errorf("TallyCommitsTimeline() returned error: %v", err) 150 | } 151 | 152 | if len(buckets) > 0 { 153 | t.Errorf( 154 | "TallyCommitsTimeline() should have returned empty slice but returned %v", 155 | buckets, 156 | ) 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /internal/tally/log.go: -------------------------------------------------------------------------------- 1 | package tally 2 | 3 | import ( 4 | "log/slog" 5 | ) 6 | 7 | var pkgLogger *slog.Logger 8 | 9 | func logger() *slog.Logger { 10 | if pkgLogger == nil { 11 | pkgLogger = slog.Default().With("package", "tally") 12 | } 13 | 14 | return pkgLogger 15 | } 16 | -------------------------------------------------------------------------------- /internal/tally/tally.go: -------------------------------------------------------------------------------- 1 | // Handles summations over commits. 2 | package tally 3 | 4 | import ( 5 | "fmt" 6 | "iter" 7 | "slices" 8 | "time" 9 | 10 | "github.com/sinclairtarget/git-who/internal/git" 11 | "github.com/sinclairtarget/git-who/internal/utils/timeutils" 12 | ) 13 | 14 | // Whether we rank authors by commit, lines, or files. 15 | type TallyMode int 16 | 17 | const ( 18 | CommitMode TallyMode = iota 19 | LinesMode 20 | FilesMode 21 | LastModifiedMode 22 | FirstModifiedMode 23 | ) 24 | 25 | const NoDiffPathname = ".git-who-no-diff-commits" 26 | 27 | type TallyOpts struct { 28 | Mode TallyMode 29 | Key func(c git.Commit) string // Unique ID for author 30 | CountMerges bool 31 | } 32 | 33 | // Whether we need --stat and --summary data from git log for this tally mode 34 | func (opts TallyOpts) IsDiffMode() bool { 35 | return opts.Mode == FilesMode || opts.Mode == LinesMode 36 | } 37 | 38 | // Metrics tallied for a single author while walking git log. 39 | // 40 | // This kind of tally cannot be combined with others because intermediate 41 | // information has been lost. 42 | type FinalTally struct { 43 | AuthorName string 44 | AuthorEmail string 45 | Commits int // Num commits editing paths in tree by this author 46 | LinesAdded int // Num lines added to paths in tree by author 47 | LinesRemoved int // Num lines deleted from paths in tree by author 48 | FileCount int // Num of file paths in working dir touched by author 49 | FirstCommitTime time.Time 50 | LastCommitTime time.Time 51 | } 52 | 53 | func (t FinalTally) SortKey(mode TallyMode) int64 { 54 | switch mode { 55 | case CommitMode: 56 | return int64(t.Commits) 57 | case FilesMode: 58 | return int64(t.FileCount) 59 | case LinesMode: 60 | return int64(t.LinesAdded + t.LinesRemoved) 61 | case FirstModifiedMode: 62 | return -t.FirstCommitTime.Unix() 63 | case LastModifiedMode: 64 | return t.LastCommitTime.Unix() 65 | default: 66 | panic("unrecognized mode in switch statement") 67 | } 68 | } 69 | 70 | func (a FinalTally) Compare(b FinalTally, mode TallyMode) int { 71 | aRank := a.SortKey(mode) 72 | bRank := b.SortKey(mode) 73 | 74 | if aRank < bRank { 75 | return -1 76 | } else if bRank < aRank { 77 | return 1 78 | } 79 | 80 | // Break ties with last edited 81 | return a.LastCommitTime.Compare(b.LastCommitTime) 82 | } 83 | 84 | // A non-final tally that can be combined with other tallies and then finalized 85 | type Tally struct { 86 | name string 87 | email string 88 | commitset map[string]bool 89 | added int 90 | removed int 91 | fileset map[string]bool 92 | firstCommitTime time.Time 93 | lastCommitTime time.Time 94 | // Can be used to count Tally objs when we don't need to disambiguate 95 | numTallied int 96 | } 97 | 98 | func or(a, b string) string { 99 | if a == "" { 100 | return b 101 | } else if b == "" { 102 | return a 103 | } 104 | 105 | return a 106 | } 107 | 108 | func unionInPlace(a, b map[string]bool) map[string]bool { 109 | if a == nil { 110 | return b 111 | } 112 | 113 | union := a 114 | 115 | for k, _ := range b { 116 | union[k] = true 117 | } 118 | 119 | return union 120 | } 121 | 122 | func (a Tally) Combine(b Tally) Tally { 123 | return Tally{ 124 | name: or(a.name, b.name), 125 | email: or(a.email, b.email), 126 | commitset: unionInPlace(a.commitset, b.commitset), 127 | added: a.added + b.added, 128 | removed: a.removed + b.removed, 129 | fileset: unionInPlace(a.fileset, b.fileset), 130 | firstCommitTime: timeutils.Min(a.firstCommitTime, b.firstCommitTime), 131 | lastCommitTime: timeutils.Max(a.lastCommitTime, b.lastCommitTime), 132 | numTallied: a.numTallied + b.numTallied, 133 | } 134 | } 135 | 136 | func (t Tally) Final() FinalTally { 137 | commits := t.numTallied // Not using commitset? Fallback to numTallied 138 | if len(t.commitset) > 0 { 139 | commits = len(t.commitset) 140 | } 141 | 142 | files := t.numTallied // Not using fileset? Fallback to numTallied 143 | if len(t.fileset) > 0 { 144 | files = len(t.fileset) 145 | } 146 | 147 | if t.name == "" && t.email == "" { 148 | panic("tally finalized but has no name and no email") 149 | } 150 | 151 | return FinalTally{ 152 | AuthorName: t.name, 153 | AuthorEmail: t.email, 154 | Commits: commits, 155 | LinesAdded: t.added, 156 | LinesRemoved: t.removed, 157 | FileCount: files, 158 | FirstCommitTime: t.firstCommitTime, 159 | LastCommitTime: t.lastCommitTime, 160 | } 161 | } 162 | 163 | // author -> path -> tally 164 | type TalliesByPath map[string]map[string]Tally 165 | 166 | func (left TalliesByPath) Combine(right TalliesByPath) TalliesByPath { 167 | for key, leftPathTallies := range left { 168 | rightPathTallies, ok := right[key] 169 | if !ok { 170 | rightPathTallies = map[string]Tally{} 171 | } 172 | 173 | for path, leftTally := range leftPathTallies { 174 | rightTally, ok := rightPathTallies[path] 175 | if !ok { 176 | rightTally.firstCommitTime = time.Unix(1<<62, 0) 177 | } 178 | 179 | t := leftTally.Combine(rightTally) 180 | t.numTallied = min(t.numTallied, 1) // Same path 181 | rightPathTallies[path] = t 182 | } 183 | 184 | right[key] = rightPathTallies 185 | } 186 | 187 | return right 188 | } 189 | 190 | // Reduce by-path tallies to a single tally for each author. 191 | func (byPath TalliesByPath) Reduce() map[string]Tally { 192 | tallies := map[string]Tally{} 193 | 194 | for key, pathTallies := range byPath { 195 | var runningTally Tally 196 | runningTally.commitset = map[string]bool{} 197 | runningTally.firstCommitTime = time.Unix(1<<62, 0) 198 | 199 | for _, tally := range pathTallies { 200 | runningTally = runningTally.Combine(tally) 201 | } 202 | 203 | if len(runningTally.commitset) > 0 { 204 | tallies[key] = runningTally 205 | } 206 | } 207 | 208 | return tallies 209 | } 210 | 211 | func TallyCommits( 212 | commits iter.Seq2[git.Commit, error], 213 | opts TallyOpts, 214 | ) (map[string]Tally, error) { 215 | // Map of author to tally 216 | var tallies map[string]Tally 217 | 218 | start := time.Now() 219 | 220 | if !opts.IsDiffMode() { 221 | tallies = map[string]Tally{} 222 | 223 | // Don't need info about file paths, just count commits and commit time 224 | for commit, err := range commits { 225 | if err != nil { 226 | return nil, fmt.Errorf("error iterating commits: %w", err) 227 | } 228 | 229 | if commit.IsMerge && !opts.CountMerges { 230 | continue 231 | } 232 | 233 | key := opts.Key(commit) 234 | 235 | tally, ok := tallies[key] 236 | if !ok { 237 | tally.name = commit.AuthorName 238 | tally.email = commit.AuthorEmail 239 | tally.firstCommitTime = commit.Date 240 | } 241 | 242 | tally.numTallied += 1 243 | tally.firstCommitTime = timeutils.Min( 244 | commit.Date, 245 | tally.firstCommitTime, 246 | ) 247 | tally.lastCommitTime = timeutils.Max( 248 | commit.Date, 249 | tally.lastCommitTime, 250 | ) 251 | 252 | tallies[key] = tally 253 | } 254 | } else { 255 | talliesByPath, err := TallyCommitsByPath(commits, opts) 256 | if err != nil { 257 | return nil, err 258 | } 259 | 260 | tallies = talliesByPath.Reduce() 261 | } 262 | 263 | elapsed := time.Now().Sub(start) 264 | logger().Debug("tallied commits", "duration_ms", elapsed.Milliseconds()) 265 | 266 | return tallies, nil 267 | } 268 | 269 | // Tally metrics per author per path. 270 | func TallyCommitsByPath( 271 | commits iter.Seq2[git.Commit, error], 272 | opts TallyOpts, 273 | ) (TalliesByPath, error) { 274 | tallies := TalliesByPath{} 275 | 276 | // Tally over commits 277 | for commit, err := range commits { 278 | if err != nil { 279 | return nil, fmt.Errorf("error iterating commits: %w", err) 280 | } 281 | 282 | if commit.IsMerge && !opts.CountMerges { 283 | continue 284 | } 285 | 286 | key := opts.Key(commit) 287 | 288 | pathTallies, ok := tallies[key] 289 | if !ok { 290 | pathTallies = map[string]Tally{} 291 | } 292 | 293 | if len(commit.FileDiffs) == 0 { 294 | // We still want to count commits that introduce no diff. 295 | // This could happen with a merge commit that has no diff with its 296 | // first parent. Have also seen this happen with an SVN-imported 297 | // commit. 298 | // 299 | // We count these commits under a special pathname we hope never 300 | // collides. 301 | tally, ok := pathTallies[NoDiffPathname] 302 | if !ok { 303 | tally.name = commit.AuthorName 304 | tally.email = commit.AuthorEmail 305 | tally.firstCommitTime = commit.Date 306 | tally.commitset = map[string]bool{} 307 | tally.numTallied = 0 // Don't count toward files changed 308 | } 309 | 310 | tally.commitset[commit.ShortHash] = true 311 | tally.firstCommitTime = timeutils.Min( 312 | tally.firstCommitTime, 313 | commit.Date, 314 | ) 315 | tally.lastCommitTime = timeutils.Max( 316 | tally.lastCommitTime, 317 | commit.Date, 318 | ) 319 | 320 | pathTallies[NoDiffPathname] = tally 321 | } else { 322 | for _, diff := range commit.FileDiffs { 323 | tally, ok := pathTallies[diff.Path] 324 | if !ok { 325 | tally.name = commit.AuthorName 326 | tally.email = commit.AuthorEmail 327 | tally.firstCommitTime = commit.Date 328 | tally.commitset = map[string]bool{} 329 | } 330 | 331 | tally.commitset[commit.ShortHash] = true 332 | tally.firstCommitTime = timeutils.Min( 333 | tally.firstCommitTime, 334 | commit.Date, 335 | ) 336 | tally.lastCommitTime = timeutils.Max( 337 | tally.lastCommitTime, 338 | commit.Date, 339 | ) 340 | 341 | if !commit.IsMerge { 342 | // Only non-merge commits contribute to files / lines 343 | tally.numTallied = 1 344 | tally.added += diff.LinesAdded 345 | tally.removed += diff.LinesRemoved 346 | } 347 | 348 | pathTallies[diff.Path] = tally 349 | } 350 | } 351 | 352 | tallies[key] = pathTallies 353 | } 354 | 355 | return tallies, nil 356 | } 357 | 358 | // Sort tallies according to mode. 359 | func Rank(tallies map[string]Tally, mode TallyMode) []FinalTally { 360 | final := []FinalTally{} 361 | for _, t := range tallies { 362 | final = append(final, t.Final()) 363 | } 364 | 365 | slices.SortFunc(final, func(a, b FinalTally) int { 366 | return -a.Compare(b, mode) 367 | }) 368 | return final 369 | } 370 | -------------------------------------------------------------------------------- /internal/tally/tally_test.go: -------------------------------------------------------------------------------- 1 | package tally_test 2 | 3 | import ( 4 | "slices" 5 | "testing" 6 | 7 | "github.com/google/go-cmp/cmp" 8 | 9 | "github.com/sinclairtarget/git-who/internal/git" 10 | "github.com/sinclairtarget/git-who/internal/tally" 11 | "github.com/sinclairtarget/git-who/internal/utils/iterutils" 12 | ) 13 | 14 | func TestTallyCommits(t *testing.T) { 15 | commits := []git.Commit{ 16 | git.Commit{ 17 | Hash: "baa", 18 | ShortHash: "baa", 19 | AuthorName: "bob", 20 | AuthorEmail: "bob@mail.com", 21 | FileDiffs: []git.FileDiff{ 22 | git.FileDiff{ 23 | Path: "bim.txt", 24 | LinesAdded: 4, 25 | LinesRemoved: 0, 26 | }, 27 | git.FileDiff{ 28 | Path: "vim.txt", 29 | LinesAdded: 8, 30 | LinesRemoved: 2, 31 | }, 32 | git.FileDiff{ 33 | Path: "nim.txt", 34 | LinesAdded: 2, 35 | LinesRemoved: 1, 36 | }, 37 | }, 38 | }, 39 | git.Commit{ 40 | Hash: "bab", 41 | ShortHash: "bab", 42 | AuthorName: "jim", 43 | AuthorEmail: "jim@mail.com", 44 | FileDiffs: []git.FileDiff{ 45 | git.FileDiff{ 46 | Path: "bim.txt", 47 | LinesAdded: 3, 48 | LinesRemoved: 1, 49 | }, 50 | }, 51 | }, 52 | } 53 | 54 | seq := iterutils.WithoutErrors(slices.Values(commits)) 55 | opts := tally.TallyOpts{ 56 | Mode: tally.LinesMode, 57 | Key: func(c git.Commit) string { 58 | return c.AuthorEmail 59 | }, 60 | } 61 | tallies, err := tally.TallyCommits(seq, opts) 62 | rankedTallies := tally.Rank(tallies, opts.Mode) 63 | if err != nil { 64 | t.Fatalf("TallyCommits() returned error: %v", err) 65 | } 66 | 67 | if len(rankedTallies) == 0 { 68 | t.Fatalf("TallyCommits() returned empty slice") 69 | } 70 | 71 | bob := rankedTallies[0] 72 | expected := tally.FinalTally{ 73 | AuthorName: "bob", 74 | AuthorEmail: "bob@mail.com", 75 | Commits: 1, 76 | LinesAdded: 14, 77 | LinesRemoved: 3, 78 | FileCount: 3, 79 | } 80 | if diff := cmp.Diff(expected, bob); diff != "" { 81 | t.Errorf("bob's tally is wrong:\n%s", diff) 82 | } 83 | 84 | jim := rankedTallies[1] 85 | expected = tally.FinalTally{ 86 | AuthorName: "jim", 87 | AuthorEmail: "jim@mail.com", 88 | Commits: 1, 89 | LinesAdded: 3, 90 | LinesRemoved: 1, 91 | FileCount: 1, 92 | } 93 | if diff := cmp.Diff(expected, jim); diff != "" { 94 | t.Errorf("jim's tally is wrong:\n%s", diff) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /internal/tally/tree.go: -------------------------------------------------------------------------------- 1 | package tally 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "iter" 7 | "os" 8 | "path" 9 | "path/filepath" 10 | "strings" 11 | "time" 12 | 13 | "github.com/sinclairtarget/git-who/internal/git" 14 | ) 15 | 16 | var EmptyTreeErr = errors.New("No commits; tree is empty.") 17 | 18 | // A file tree of edits to the repo 19 | type TreeNode struct { 20 | Tally FinalTally 21 | Children map[string]*TreeNode 22 | InWorkTree bool // In git working tree/directory 23 | tallies map[string]Tally 24 | } 25 | 26 | func newNode(inWTree bool) *TreeNode { 27 | return &TreeNode{ 28 | Children: map[string]*TreeNode{}, 29 | InWorkTree: inWTree, 30 | tallies: map[string]Tally{}, 31 | } 32 | } 33 | 34 | func (t *TreeNode) String() string { 35 | return fmt.Sprintf("{ %d }", len(t.tallies)) 36 | } 37 | 38 | // Splits path into first dir and remainder. 39 | func splitPath(path string) (string, string) { 40 | dir, subpath, found := strings.Cut(path, "/") 41 | if !found { 42 | return path, "" 43 | } 44 | 45 | return dir, subpath 46 | } 47 | 48 | func (t *TreeNode) insert(path string, key string, tally Tally, inWTree bool) { 49 | if path == "" { 50 | // Leaf 51 | t.tallies[key] = tally 52 | return 53 | } 54 | 55 | // Insert child 56 | p, nextP := splitPath(path) 57 | child, ok := t.Children[p] 58 | if !ok { 59 | child = newNode(inWTree) 60 | } 61 | child.InWorkTree = child.InWorkTree || inWTree 62 | t.Children[p] = child 63 | 64 | child.insert(nextP, key, tally, inWTree) 65 | } 66 | 67 | func (t *TreeNode) Rank(mode TallyMode) *TreeNode { 68 | if len(t.Children) > 0 { 69 | // Recursively sum up metrics. 70 | // For each author, merge the tallies for all children together. 71 | for p, child := range t.Children { 72 | t.Children[p] = child.Rank(mode) 73 | 74 | for key, childTally := range child.tallies { 75 | tally, ok := t.tallies[key] 76 | if !ok { 77 | tally.name = childTally.name 78 | tally.email = childTally.email 79 | tally.commitset = map[string]bool{} 80 | tally.firstCommitTime = time.Unix(1<<62, 0) 81 | } 82 | 83 | tally = tally.Combine(childTally) 84 | t.tallies[key] = tally 85 | } 86 | } 87 | } 88 | 89 | // Pick best tally for the node according to the tally mode 90 | sorted := Rank(t.tallies, mode) 91 | t.Tally = sorted[0] 92 | return t 93 | } 94 | 95 | /* 96 | * TallyCommitsTree() returns a tree of nodes mirroring the working directory 97 | * with a tally for each node. 98 | */ 99 | func TallyCommitsTree( 100 | commits iter.Seq2[git.Commit, error], 101 | opts TallyOpts, 102 | worktreePaths map[string]bool, 103 | gitRootPath string, 104 | ) (*TreeNode, error) { 105 | // Tally paths 106 | talliesByPath, err := TallyCommitsByPath(commits, opts) 107 | if err != nil { 108 | return nil, err 109 | } 110 | 111 | return TallyCommitsTreeFromPaths(talliesByPath, worktreePaths, gitRootPath) 112 | } 113 | 114 | func TallyCommitsTreeFromPaths( 115 | talliesByPath TalliesByPath, 116 | worktreePaths map[string]bool, 117 | gitRootPath string, 118 | ) (*TreeNode, error) { 119 | root := newNode(true) 120 | 121 | wd, err := os.Getwd() 122 | if err != nil { 123 | return root, err 124 | } 125 | 126 | // Build tree 127 | for key, pathTallies := range talliesByPath { 128 | for p, tally := range pathTallies { 129 | relPath := p 130 | if gitRootPath != "" { 131 | // Adjust path for working dir 132 | // Here we use the os separator 133 | absPath := path.Join(gitRootPath, p) 134 | relPath, err = filepath.Rel(wd, filepath.FromSlash(absPath)) 135 | if err != nil || !filepath.IsLocal(relPath) { 136 | continue // Skip any paths outside of working dir 137 | } 138 | } 139 | 140 | // Okay, back to all paths using forward-slash separator 141 | relPath = filepath.ToSlash(relPath) 142 | inWTree := worktreePaths[relPath] 143 | root.insert(relPath, key, tally, inWTree) 144 | } 145 | } 146 | 147 | if len(root.Children) == 0 { 148 | return root, EmptyTreeErr 149 | } 150 | 151 | return root, nil 152 | } 153 | -------------------------------------------------------------------------------- /internal/tally/tree_test.go: -------------------------------------------------------------------------------- 1 | package tally_test 2 | 3 | import ( 4 | "slices" 5 | "testing" 6 | 7 | "github.com/google/go-cmp/cmp" 8 | 9 | "github.com/sinclairtarget/git-who/internal/git" 10 | "github.com/sinclairtarget/git-who/internal/tally" 11 | "github.com/sinclairtarget/git-who/internal/utils/iterutils" 12 | ) 13 | 14 | func TestTallyCommitsTree(t *testing.T) { 15 | commits := []git.Commit{ 16 | git.Commit{ 17 | Hash: "baa", 18 | ShortHash: "baa", 19 | AuthorName: "bob", 20 | AuthorEmail: "bob@mail.com", 21 | FileDiffs: []git.FileDiff{ 22 | git.FileDiff{ 23 | Path: "foo/bim.txt", 24 | LinesAdded: 4, 25 | LinesRemoved: 0, 26 | }, 27 | git.FileDiff{ 28 | Path: "foo/bar.txt", 29 | LinesAdded: 8, 30 | LinesRemoved: 2, 31 | }, 32 | }, 33 | }, 34 | git.Commit{ 35 | Hash: "bab", 36 | ShortHash: "bab", 37 | AuthorName: "jim", 38 | AuthorEmail: "jim@mail.com", 39 | FileDiffs: []git.FileDiff{ 40 | git.FileDiff{ 41 | Path: "foo/bim.txt", 42 | LinesAdded: 3, 43 | LinesRemoved: 1, 44 | }, 45 | }, 46 | }, 47 | git.Commit{ 48 | Hash: "bac", 49 | ShortHash: "bac", 50 | AuthorName: "bob", 51 | AuthorEmail: "bob@mail.com", 52 | FileDiffs: []git.FileDiff{ 53 | git.FileDiff{ 54 | Path: "foo/bim.txt", 55 | LinesAdded: 23, 56 | LinesRemoved: 0, 57 | }, 58 | }, 59 | }, 60 | } 61 | 62 | worktreeset := map[string]bool{"foo/bim.txt": true, "foo/bar.txt": true} 63 | seq := iterutils.WithoutErrors(slices.Values(commits)) 64 | opts := tally.TallyOpts{ 65 | Mode: tally.CommitMode, 66 | Key: func(c git.Commit) string { return c.AuthorEmail }, 67 | } 68 | 69 | root, err := tally.TallyCommitsTree(seq, opts, worktreeset, "") 70 | if err != nil { 71 | t.Fatalf("TallyCommits() returned error: %v", err) 72 | } 73 | 74 | root = root.Rank(opts.Mode) 75 | 76 | if len(root.Children) == 0 { 77 | t.Fatalf("root node has no children") 78 | } 79 | 80 | fooNode, ok := root.Children["foo"] 81 | if !ok { 82 | t.Fatalf("root node has no \"foo\" child") 83 | } 84 | 85 | bimNode, ok := fooNode.Children["bim.txt"] 86 | if !ok { 87 | t.Errorf("\"foo\" node has no \"bim.txt\" child") 88 | } 89 | 90 | _, ok = fooNode.Children["bar.txt"] 91 | if !ok { 92 | t.Errorf("\"foo\" node has no \"bar.txt\" child") 93 | } 94 | 95 | expected := tally.FinalTally{ 96 | AuthorName: "bob", 97 | AuthorEmail: "bob@mail.com", 98 | Commits: 2, 99 | LinesAdded: 4 + 8 + 23, 100 | LinesRemoved: 2, 101 | FileCount: 2, 102 | } 103 | if diff := cmp.Diff(expected, root.Tally); diff != "" { 104 | t.Errorf("bob's tally is wrong:\n%s", diff) 105 | } 106 | 107 | expected = tally.FinalTally{ 108 | AuthorName: "bob", 109 | AuthorEmail: "bob@mail.com", 110 | Commits: 2, 111 | LinesAdded: 4 + 23, 112 | LinesRemoved: 0, 113 | FileCount: 1, 114 | } 115 | if diff := cmp.Diff(expected, bimNode.Tally); diff != "" { 116 | t.Errorf("bob's second tally is wrong:\n%s", diff) 117 | } 118 | } 119 | 120 | func TestTallyCommitsTreeNoCommits(t *testing.T) { 121 | seq := iterutils.WithoutErrors(slices.Values([]git.Commit{})) 122 | opts := tally.TallyOpts{ 123 | Mode: tally.CommitMode, 124 | Key: func(c git.Commit) string { return c.AuthorEmail }, 125 | } 126 | worktreeset := map[string]bool{} 127 | 128 | _, err := tally.TallyCommitsTree(seq, opts, worktreeset, "") 129 | if err != tally.EmptyTreeErr { 130 | t.Fatalf( 131 | "TallyCommits() should have returned EmptyTreeErr but returned %v", 132 | err, 133 | ) 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /internal/utils/flagutils/flag.go: -------------------------------------------------------------------------------- 1 | // https://stackoverflow.com/questions/28322997/how-to-get-a-list-of-values-into-a-flag-in-golang 2 | package flagutils 3 | 4 | import "fmt" 5 | 6 | type SliceFlag []string 7 | 8 | func (s *SliceFlag) String() string { 9 | return fmt.Sprintf("%v", *s) 10 | } 11 | 12 | func (s *SliceFlag) Set(value string) error { 13 | *s = append(*s, value) 14 | return nil 15 | } 16 | -------------------------------------------------------------------------------- /internal/utils/iterutils/iterutils.go: -------------------------------------------------------------------------------- 1 | // Iterator helpers 2 | package iterutils 3 | 4 | import ( 5 | "fmt" 6 | "iter" 7 | ) 8 | 9 | // Turns a Seq into a Seq2 where the second element is always nil 10 | func WithoutErrors[V any](seq iter.Seq[V]) iter.Seq2[V, error] { 11 | return func(yield func(V, error) bool) { 12 | for v := range seq { 13 | if !yield(v, nil) { 14 | break 15 | } 16 | } 17 | } 18 | } 19 | 20 | // Turns a seq2 into a slice 21 | func Collect[V any](seq iter.Seq2[V, error]) ([]V, error) { 22 | s := []V{} 23 | for v, err := range seq { 24 | if err != nil { 25 | return nil, fmt.Errorf("error collecting sequence: %w", err) 26 | } 27 | s = append(s, v) 28 | } 29 | 30 | return s, nil 31 | } 32 | -------------------------------------------------------------------------------- /internal/utils/timeutils/timeutils.go: -------------------------------------------------------------------------------- 1 | package timeutils 2 | 3 | import "time" 4 | 5 | func Max(a, b time.Time) time.Time { 6 | if b.Before(a) { 7 | return a 8 | } else { 9 | return b 10 | } 11 | } 12 | 13 | func Min(a, b time.Time) time.Time { 14 | if b.Before(a) { 15 | return b 16 | } else { 17 | return a 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /log.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log/slog" 5 | ) 6 | 7 | var pkgLogger *slog.Logger 8 | 9 | func logger() *slog.Logger { 10 | if pkgLogger == nil { 11 | pkgLogger = slog.Default().With("package", "main") 12 | } 13 | 14 | return pkgLogger 15 | } 16 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "flag" 6 | "fmt" 7 | "log/slog" 8 | "os" 9 | "strings" 10 | "time" 11 | 12 | "github.com/sinclairtarget/git-who/internal/git" 13 | "github.com/sinclairtarget/git-who/internal/tally" 14 | "github.com/sinclairtarget/git-who/internal/utils/flagutils" 15 | ) 16 | 17 | var Commit = "unknown" 18 | var Version = "unknown" 19 | 20 | var progStart time.Time 21 | 22 | type command struct { 23 | flagSet *flag.FlagSet 24 | run func(args []string) error 25 | description string 26 | } 27 | 28 | // Main examines the args and delegates to the specified subcommand. 29 | // 30 | // If no subcommand was specified, we default to the "table" subcommand. 31 | func main() { 32 | subcommands := map[string]command{ // Available subcommands 33 | "dump": dumpCmd(), 34 | "parse": parseCmd(), 35 | "table": tableCmd(), 36 | "tree": treeCmd(), 37 | "hist": histCmd(), 38 | } 39 | 40 | // --- Handle top-level flags --- 41 | mainFlagSet := flag.NewFlagSet("git-who", flag.ExitOnError) 42 | 43 | versionFlag := mainFlagSet.Bool("version", false, "Print version and exit") 44 | verboseFlag := mainFlagSet.Bool("v", false, "Enables debug logging") 45 | 46 | mainFlagSet.Usage = func() { 47 | fmt.Println("Usage: git-who [-v] [subcommand] [subcommand options...]") 48 | fmt.Println("git-who tallies code contributions by author") 49 | 50 | fmt.Println() 51 | fmt.Println("Top-level options:") 52 | mainFlagSet.PrintDefaults() 53 | 54 | fmt.Println() 55 | fmt.Println("Subcommands:") 56 | 57 | helpSubcommands := []string{"table", "tree", "hist"} 58 | for _, name := range helpSubcommands { 59 | cmd := subcommands[name] 60 | 61 | fmt.Printf(" %s\n", name) 62 | fmt.Printf("\t%s\n", cmd.description) 63 | } 64 | 65 | fmt.Println() 66 | fmt.Println("Run git-who -h for subcommand help") 67 | } 68 | 69 | // Look for the index of the first arg not intended as a top-level flag. 70 | // We handle this manually so that specifying the default subcommand is 71 | // optional even when providing subcommand flags. 72 | subcmdIndex := 1 73 | loop: 74 | for subcmdIndex < len(os.Args) { 75 | switch os.Args[subcmdIndex] { 76 | case "-version", "--version", "-v", "--v", "-h", "--help": 77 | subcmdIndex += 1 78 | default: 79 | break loop 80 | } 81 | } 82 | 83 | mainFlagSet.Parse(os.Args[1:subcmdIndex]) 84 | 85 | if *versionFlag { 86 | fmt.Printf("%s %s\n", Version, Commit) 87 | return 88 | } 89 | 90 | if *verboseFlag { 91 | configureLogging(slog.LevelDebug) 92 | logger().Debug("log level set to DEBUG") 93 | } else { 94 | configureLogging(slog.LevelInfo) 95 | } 96 | 97 | args := os.Args[subcmdIndex:] 98 | 99 | // --- Handle subcommands --- 100 | cmd := subcommands["table"] // Default to "table" 101 | if len(args) > 0 { 102 | first := args[0] 103 | if subcommand, ok := subcommands[first]; ok { 104 | cmd = subcommand 105 | args = args[1:] 106 | } 107 | } 108 | 109 | args = escapeTerminator(args) 110 | 111 | cmd.flagSet.Parse(args) 112 | subargs := cmd.flagSet.Args() 113 | subargs = unescapeTerminator(subargs) 114 | 115 | progStart = time.Now() 116 | if err := cmd.run(subargs); err != nil { 117 | fmt.Fprintf(os.Stderr, "%s\n", err) 118 | os.Exit(1) 119 | } 120 | } 121 | 122 | // -v- Subcommand definitions -------------------------------------------------- 123 | 124 | func tableCmd() command { 125 | flagSet := flag.NewFlagSet("git-who table", flag.ExitOnError) 126 | 127 | useCsv := flagSet.Bool("csv", false, "Output as csv") 128 | showEmail := flagSet.Bool("e", false, "Show email address of each author") 129 | countMerges := flagSet.Bool("merges", false, "Count merge commits toward commit total") 130 | linesMode := flagSet.Bool("l", false, "Sort by lines added + removed") 131 | filesMode := flagSet.Bool("f", false, "Sort by files changed") 132 | firstModifiedMode := flagSet.Bool("c", false, "Sort by first modified (created)") 133 | lastModifiedMode := flagSet.Bool("m", false, "Sort by last modified") 134 | limit := flagSet.Int("n", 10, "Limit rows in table (set to 0 for no limit)") 135 | 136 | filterFlags := addFilterFlags(flagSet) 137 | 138 | description := "Print out a table showing total contributions by author" 139 | 140 | flagSet.Usage = func() { 141 | fmt.Println(strings.TrimSpace(` 142 | Usage: git-who table [options...] [revisions...] [[--] paths...] 143 | `)) 144 | fmt.Println(description) 145 | fmt.Println() 146 | flagSet.PrintDefaults() 147 | } 148 | 149 | return command{ 150 | flagSet: flagSet, 151 | description: description, 152 | run: func(args []string) error { 153 | mode := tally.CommitMode 154 | 155 | if !isOnlyOne( 156 | *linesMode, 157 | *filesMode, 158 | *lastModifiedMode, 159 | *firstModifiedMode, 160 | ) { 161 | return errors.New("all sort flags are mutually exclusive") 162 | } 163 | 164 | if *linesMode { 165 | mode = tally.LinesMode 166 | } else if *filesMode { 167 | mode = tally.FilesMode 168 | } else if *lastModifiedMode { 169 | mode = tally.LastModifiedMode 170 | } else if *firstModifiedMode { 171 | mode = tally.FirstModifiedMode 172 | } 173 | 174 | if *limit < 0 { 175 | return errors.New("-n flag must be a positive integer") 176 | } 177 | 178 | revs, pathspecs, err := git.ParseArgs(args) 179 | if err != nil { 180 | return err 181 | } 182 | 183 | err = checkPathspecs(pathspecs) 184 | if err != nil { 185 | return err 186 | } 187 | 188 | return table( 189 | revs, 190 | pathspecs, 191 | mode, 192 | *useCsv, 193 | *showEmail, 194 | *countMerges, 195 | *limit, 196 | *filterFlags.since, 197 | *filterFlags.until, 198 | filterFlags.authors, 199 | filterFlags.nauthors, 200 | ) 201 | }, 202 | } 203 | } 204 | 205 | func treeCmd() command { 206 | flagSet := flag.NewFlagSet("git-who tree", flag.ExitOnError) 207 | 208 | showEmail := flagSet.Bool("e", false, "Show email address of each author") 209 | showHidden := flagSet.Bool("a", false, "Show files not in working tree") 210 | countMerges := flagSet.Bool("merges", false, "Count merge commits toward commit total") 211 | useLines := flagSet.Bool("l", false, "Rank authors by lines added/changed") 212 | useFiles := flagSet.Bool("f", false, "Rank authors by files touched") 213 | useFirstModified := flagSet.Bool("c", false, "Rank authors by first commit time (created)") 214 | useLastModified := flagSet.Bool( 215 | "m", 216 | false, 217 | "Rank authors by last commit time", 218 | ) 219 | depth := flagSet.Int("d", 0, "Limit on tree depth") 220 | 221 | filterFlags := addFilterFlags(flagSet) 222 | 223 | description := "Print out a file tree showing most contributions by path" 224 | 225 | flagSet.Usage = func() { 226 | fmt.Println(strings.TrimSpace(` 227 | Usage: git-who tree [options...] [revisions...] [[--] paths...] 228 | `)) 229 | fmt.Println(description) 230 | fmt.Println() 231 | flagSet.PrintDefaults() 232 | } 233 | 234 | return command{ 235 | flagSet: flagSet, 236 | description: description, 237 | run: func(args []string) error { 238 | revs, pathspecs, err := git.ParseArgs(args) 239 | if err != nil { 240 | return fmt.Errorf("could not parse args: %w", err) 241 | } 242 | 243 | err = checkPathspecs(pathspecs) 244 | if err != nil { 245 | return err 246 | } 247 | 248 | if !isOnlyOne( 249 | *useLines, 250 | *useFiles, 251 | *useLastModified, 252 | *useFirstModified, 253 | ) { 254 | return errors.New("all ranking flags are mutually exclusive") 255 | } 256 | 257 | mode := tally.CommitMode 258 | if *useLines { 259 | mode = tally.LinesMode 260 | } else if *useFiles { 261 | mode = tally.FilesMode 262 | } else if *useLastModified { 263 | mode = tally.LastModifiedMode 264 | } else if *useFirstModified { 265 | mode = tally.FirstModifiedMode 266 | } 267 | 268 | return tree( 269 | revs, 270 | pathspecs, 271 | mode, 272 | *depth, 273 | *showEmail, 274 | *showHidden, 275 | *countMerges, 276 | *filterFlags.since, 277 | *filterFlags.until, 278 | filterFlags.authors, 279 | filterFlags.nauthors, 280 | ) 281 | }, 282 | } 283 | } 284 | 285 | func histCmd() command { 286 | flagSet := flag.NewFlagSet("git-who hist", flag.ExitOnError) 287 | 288 | useLines := flagSet.Bool("l", false, "Rank authors by lines added/changed") 289 | useFiles := flagSet.Bool("f", false, "Rank authors by files touched") 290 | showEmail := flagSet.Bool("e", false, "Show email address of each author") 291 | countMerges := flagSet.Bool("merges", false, "Count merge commits toward commit total") 292 | 293 | filterFlags := addFilterFlags(flagSet) 294 | 295 | description := "Print out a timeline showing most contributions by date" 296 | 297 | flagSet.Usage = func() { 298 | fmt.Println(strings.TrimSpace(` 299 | Usage: git-who hist [options...] [revisions...] [[--] paths...] 300 | `)) 301 | fmt.Println(description) 302 | fmt.Println() 303 | flagSet.PrintDefaults() 304 | } 305 | 306 | return command{ 307 | flagSet: flagSet, 308 | description: description, 309 | run: func(args []string) error { 310 | revs, pathspecs, err := git.ParseArgs(args) 311 | if err != nil { 312 | return fmt.Errorf("could not parse args: %w", err) 313 | } 314 | 315 | err = checkPathspecs(pathspecs) 316 | if err != nil { 317 | return err 318 | } 319 | 320 | if !isOnlyOne(*useLines, *useFiles) { 321 | return errors.New("all ranking flags are mutually exclusive") 322 | } 323 | 324 | mode := tally.CommitMode 325 | if *useLines { 326 | mode = tally.LinesMode 327 | } else if *useFiles { 328 | mode = tally.FilesMode 329 | } 330 | 331 | return hist( 332 | revs, 333 | pathspecs, 334 | mode, 335 | *showEmail, 336 | *countMerges, 337 | *filterFlags.since, 338 | *filterFlags.until, 339 | filterFlags.authors, 340 | filterFlags.nauthors, 341 | ) 342 | }, 343 | } 344 | } 345 | 346 | func dumpCmd() command { 347 | flagSet := flag.NewFlagSet("git-who dump", flag.ExitOnError) 348 | 349 | short := flagSet.Bool("s", false, "Use short log") 350 | 351 | filterFlags := addFilterFlags(flagSet) 352 | 353 | return command{ 354 | flagSet: flagSet, 355 | run: func(args []string) error { 356 | revs, pathspecs, err := git.ParseArgs(args) 357 | if err != nil { 358 | return fmt.Errorf("could not parse args: %w", err) 359 | } 360 | 361 | err = checkPathspecs(pathspecs) 362 | if err != nil { 363 | return err 364 | } 365 | 366 | return dump( 367 | revs, 368 | pathspecs, 369 | *short, 370 | *filterFlags.since, 371 | *filterFlags.until, 372 | filterFlags.authors, 373 | filterFlags.nauthors, 374 | ) 375 | }, 376 | } 377 | } 378 | 379 | func parseCmd() command { 380 | flagSet := flag.NewFlagSet("git-who parse", flag.ExitOnError) 381 | 382 | short := flagSet.Bool("s", false, "Use short log") 383 | 384 | filterFlags := addFilterFlags(flagSet) 385 | 386 | return command{ 387 | flagSet: flagSet, 388 | run: func(args []string) error { 389 | revs, pathspecs, err := git.ParseArgs(args) 390 | if err != nil { 391 | return fmt.Errorf("could not parse args: %w", err) 392 | } 393 | 394 | err = checkPathspecs(pathspecs) 395 | if err != nil { 396 | return err 397 | } 398 | 399 | return parse( 400 | revs, 401 | pathspecs, 402 | *short, 403 | *filterFlags.since, 404 | *filterFlags.until, 405 | filterFlags.authors, 406 | filterFlags.nauthors, 407 | ) 408 | }, 409 | } 410 | } 411 | 412 | // -^--------------------------------------------------------------------------- 413 | 414 | func configureLogging(level slog.Level) { 415 | handler := slog.NewTextHandler( 416 | os.Stderr, 417 | &slog.HandlerOptions{ 418 | Level: level, 419 | }, 420 | ) 421 | logger := slog.New(handler) 422 | slog.SetDefault(logger) 423 | } 424 | 425 | // Used to check mutual exclusion. 426 | func isOnlyOne(flags ...bool) bool { 427 | var foundOne bool 428 | for _, f := range flags { 429 | if f { 430 | if foundOne { 431 | return false 432 | } 433 | 434 | foundOne = true 435 | } 436 | } 437 | 438 | return true 439 | } 440 | 441 | type filterFlags struct { 442 | since *string 443 | until *string 444 | authors flagutils.SliceFlag 445 | nauthors flagutils.SliceFlag 446 | } 447 | 448 | func addFilterFlags(set *flag.FlagSet) *filterFlags { 449 | flags := filterFlags{ 450 | since: set.String("since", "", strings.TrimSpace(` 451 | Only count commits after the given date. See git-commit(1) for valid date formats 452 | `)), 453 | until: set.String("until", "", strings.TrimSpace(` 454 | Only count commits before the given date. See git-commit(1) for valid date formats 455 | `)), 456 | } 457 | 458 | set.Var(&flags.authors, "author", strings.TrimSpace(` 459 | Only count commits by these authors. Can be specified multiple times 460 | `)) 461 | 462 | set.Var(&flags.nauthors, "nauthor", strings.TrimSpace(` 463 | Exclude commits by these authors. Can be specified multiple times 464 | `)) 465 | 466 | return &flags 467 | } 468 | 469 | /* 470 | * The "flag" package treats `--` as a terminator and doesn't return it as an 471 | * arg. We aren't really using it as a terminator though; we want to use it like 472 | * Git does, to separate revisions from paths. So we escape it so the "flag" 473 | * package treats it like any other arg. 474 | */ 475 | func escapeTerminator(args []string) []string { 476 | newArgs := []string{} 477 | for _, arg := range args { 478 | if arg == "--" { 479 | newArgs = append(newArgs, "^--") // Seems unlikely to be used? 480 | } else { 481 | newArgs = append(newArgs, arg) 482 | } 483 | } 484 | 485 | return newArgs 486 | } 487 | 488 | func unescapeTerminator(args []string) []string { 489 | newArgs := []string{} 490 | for _, arg := range args { 491 | if arg == "^--" { 492 | newArgs = append(newArgs, "--") 493 | } else { 494 | newArgs = append(newArgs, arg) 495 | } 496 | } 497 | 498 | return newArgs 499 | } 500 | 501 | func checkPathspecs(pathspecs []string) error { 502 | for _, p := range pathspecs { 503 | if !git.IsSupportedPathspec(p) { 504 | return fmt.Errorf( 505 | "unsupported magic in pathspec: \"%s\"\n"+ 506 | "only the \"exclude\" magic is supported", 507 | p, 508 | ) 509 | } 510 | } 511 | 512 | return nil 513 | } 514 | -------------------------------------------------------------------------------- /parse.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "fmt" 7 | "os" 8 | "time" 9 | 10 | "github.com/sinclairtarget/git-who/internal/git" 11 | ) 12 | 13 | // Just prints out a simple representation of the commits parsed from `git log` 14 | // for debugging. 15 | func parse( 16 | revs []string, 17 | pathspecs []string, 18 | short bool, 19 | since string, 20 | until string, 21 | authors []string, 22 | nauthors []string, 23 | ) (err error) { 24 | defer func() { 25 | if err != nil { 26 | err = fmt.Errorf("error running \"parse\": %w", err) 27 | } 28 | }() 29 | 30 | logger().Debug( 31 | "called parse()", 32 | "revs", 33 | revs, 34 | "pathspecs", 35 | pathspecs, 36 | "short", 37 | short, 38 | "since", 39 | since, 40 | "until", 41 | until, 42 | "authors", 43 | authors, 44 | "nauthors", 45 | nauthors, 46 | ) 47 | 48 | start := time.Now() 49 | 50 | ctx, cancel := context.WithCancel(context.Background()) 51 | defer cancel() 52 | 53 | filters := git.LogFilters{ 54 | Since: since, 55 | Until: until, 56 | Authors: authors, 57 | Nauthors: nauthors, 58 | } 59 | 60 | gitRootPath, err := git.GetRoot() 61 | if err != nil { 62 | return err 63 | } 64 | 65 | repoFiles, err := git.CheckRepoConfigFiles(gitRootPath) 66 | if err != nil { 67 | return err 68 | } 69 | 70 | commits, closer, err := git.CommitsWithOpts( 71 | ctx, 72 | revs, 73 | pathspecs, 74 | filters, 75 | !short, 76 | repoFiles, 77 | ) 78 | if err != nil { 79 | return err 80 | } 81 | 82 | w := bufio.NewWriter(os.Stdout) 83 | 84 | numCommits := 0 85 | for commit, err := range commits { 86 | if err != nil { 87 | w.Flush() 88 | return fmt.Errorf("Error iterating commits: %w", err) 89 | } 90 | 91 | fmt.Fprintf(w, "%s\n", commit) 92 | for _, diff := range commit.FileDiffs { 93 | fmt.Fprintf(w, " %s\n", diff) 94 | } 95 | 96 | fmt.Fprintln(w) 97 | 98 | numCommits += 1 99 | } 100 | 101 | w.Flush() 102 | 103 | fmt.Printf("Parsed %d commits.\n", numCommits) 104 | 105 | err = closer() 106 | if err != nil { 107 | return err 108 | } 109 | 110 | elapsed := time.Now().Sub(start) 111 | logger().Debug("finished parse", "duration_ms", elapsed.Milliseconds()) 112 | 113 | return nil 114 | } 115 | -------------------------------------------------------------------------------- /screenshots/vanity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sinclairtarget/git-who/33dfff88d3649cdd9f5f6b4ab02e05b911e881dd/screenshots/vanity.png -------------------------------------------------------------------------------- /table.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "encoding/csv" 6 | "fmt" 7 | "os" 8 | "runtime" 9 | "strconv" 10 | "strings" 11 | "time" 12 | 13 | runewidth "github.com/mattn/go-runewidth" 14 | 15 | "github.com/sinclairtarget/git-who/internal/concurrent" 16 | "github.com/sinclairtarget/git-who/internal/format" 17 | "github.com/sinclairtarget/git-who/internal/git" 18 | "github.com/sinclairtarget/git-who/internal/pretty" 19 | "github.com/sinclairtarget/git-who/internal/tally" 20 | ) 21 | 22 | const narrowWidth = 55 23 | const wideWidth = 80 24 | const maxBeforeColorAlternating = 14 25 | 26 | func pickWidth(mode tally.TallyMode, showEmail bool) int { 27 | wideMode := mode == tally.FilesMode || mode == tally.LinesMode 28 | if wideMode || showEmail { 29 | return wideWidth 30 | } 31 | 32 | return narrowWidth 33 | } 34 | 35 | // The "table" subcommand summarizes the authorship history of the given 36 | // commits and paths in a table printed to stdout. 37 | func table( 38 | revs []string, 39 | pathspecs []string, 40 | mode tally.TallyMode, 41 | useCsv bool, 42 | showEmail bool, 43 | countMerges bool, 44 | limit int, 45 | since string, 46 | until string, 47 | authors []string, 48 | nauthors []string, 49 | ) (err error) { 50 | defer func() { 51 | if err != nil { 52 | err = fmt.Errorf("error running \"table\": %w", err) 53 | } 54 | }() 55 | 56 | logger().Debug( 57 | "called table()", 58 | "revs", 59 | revs, 60 | "pathspecs", 61 | pathspecs, 62 | "mode", 63 | mode, 64 | "useCsv", 65 | useCsv, 66 | "showEmail", 67 | showEmail, 68 | "countMerges", 69 | countMerges, 70 | "limit", 71 | limit, 72 | "since", 73 | since, 74 | "until", 75 | until, 76 | "authors", 77 | authors, 78 | "nauthors", 79 | nauthors, 80 | ) 81 | 82 | ctx, cancel := context.WithCancel(context.Background()) 83 | defer cancel() 84 | 85 | tallyOpts := tally.TallyOpts{Mode: mode, CountMerges: countMerges} 86 | if showEmail { 87 | tallyOpts.Key = func(c git.Commit) string { return c.AuthorEmail } 88 | } else { 89 | tallyOpts.Key = func(c git.Commit) string { return c.AuthorName } 90 | } 91 | 92 | populateDiffs := tallyOpts.IsDiffMode() 93 | filters := git.LogFilters{ 94 | Since: since, 95 | Until: until, 96 | Authors: authors, 97 | Nauthors: nauthors, 98 | } 99 | 100 | gitRootPath, err := git.GetRoot() 101 | if err != nil { 102 | return err 103 | } 104 | 105 | repoFiles, err := git.CheckRepoConfigFiles(gitRootPath) 106 | if err != nil { 107 | return err 108 | } 109 | 110 | var tallies map[string]tally.Tally 111 | if populateDiffs && runtime.GOMAXPROCS(0) > 1 { 112 | tallies, err = concurrent.TallyCommits( 113 | ctx, 114 | revs, 115 | pathspecs, 116 | filters, 117 | repoFiles, 118 | tallyOpts, 119 | getCache(gitRootPath, repoFiles), 120 | pretty.AllowDynamic(os.Stdout), 121 | ) 122 | if err != nil { 123 | return err 124 | } 125 | } else { 126 | // This is fast in the no-diff case even if we don't parallelize it 127 | commits, closer, err := git.CommitsWithOpts( 128 | ctx, 129 | revs, 130 | pathspecs, 131 | filters, 132 | populateDiffs, 133 | repoFiles, 134 | ) 135 | if err != nil { 136 | return err 137 | } 138 | 139 | tallies, err = tally.TallyCommits(commits, tallyOpts) 140 | if err != nil { 141 | return fmt.Errorf("failed to tally commits: %w", err) 142 | } 143 | 144 | err = closer() 145 | if err != nil { 146 | return err 147 | } 148 | } 149 | 150 | rankedTallies := tally.Rank(tallies, mode) 151 | 152 | numFilteredOut := 0 153 | if limit > 0 && limit < len(rankedTallies) { 154 | numFilteredOut = len(rankedTallies) - limit 155 | rankedTallies = rankedTallies[:limit] 156 | } 157 | 158 | if useCsv { 159 | err := writeCsv(rankedTallies, tallyOpts, showEmail) 160 | if err != nil { 161 | return err 162 | } 163 | } else { 164 | colwidth := pickWidth(mode, showEmail) 165 | writeTable(rankedTallies, colwidth, showEmail, mode, numFilteredOut) 166 | } 167 | 168 | return nil 169 | } 170 | 171 | func toRecord( 172 | t tally.FinalTally, 173 | opts tally.TallyOpts, 174 | showEmail bool, 175 | ) []string { 176 | record := []string{t.AuthorName} 177 | 178 | if showEmail { 179 | record = append(record, t.AuthorEmail) 180 | } 181 | 182 | record = append(record, strconv.Itoa(t.Commits)) 183 | 184 | if opts.IsDiffMode() { 185 | record = append( 186 | record, 187 | strconv.Itoa(t.LinesAdded), 188 | strconv.Itoa(t.LinesRemoved), 189 | strconv.Itoa(t.FileCount), 190 | ) 191 | } 192 | 193 | return append( 194 | record, 195 | t.LastCommitTime.Format(time.RFC3339), 196 | t.FirstCommitTime.Format(time.RFC3339), 197 | ) 198 | } 199 | 200 | func writeCsv( 201 | tallies []tally.FinalTally, 202 | opts tally.TallyOpts, 203 | showEmail bool, 204 | ) error { 205 | w := csv.NewWriter(os.Stdout) 206 | 207 | // Write header 208 | columnHeaders := []string{"name"} 209 | if showEmail { 210 | columnHeaders = append(columnHeaders, "email") 211 | } 212 | 213 | columnHeaders = append(columnHeaders, "commits") 214 | 215 | if opts.IsDiffMode() { 216 | columnHeaders = append( 217 | columnHeaders, 218 | "lines added", 219 | "lines removed", 220 | "files", 221 | ) 222 | } 223 | 224 | columnHeaders = append(columnHeaders, "last commit time", "first commit time") 225 | w.Write(columnHeaders) 226 | 227 | for _, tally := range tallies { 228 | record := toRecord(tally, opts, showEmail) 229 | if err := w.Write(record); err != nil { 230 | return fmt.Errorf("error writing CSV record to stdout: %w", err) 231 | } 232 | } 233 | 234 | w.Flush() 235 | if err := w.Error(); err != nil { 236 | return fmt.Errorf("error flushing CSV writer: %w", err) 237 | } 238 | 239 | return nil 240 | } 241 | 242 | // Returns a string matching the given width describing the author 243 | func formatAuthor( 244 | t tally.FinalTally, 245 | showEmail bool, 246 | width int, 247 | ) string { 248 | var author string 249 | if showEmail { 250 | author = fmt.Sprintf( 251 | "%s %s", 252 | t.AuthorName, 253 | format.GitEmail(t.AuthorEmail), 254 | ) 255 | } else { 256 | author = t.AuthorName 257 | } 258 | 259 | author = format.Abbrev(author, width) 260 | return runewidth.FillRight(author, width) 261 | } 262 | 263 | func writeTable( 264 | tallies []tally.FinalTally, 265 | colwidth int, 266 | showEmail bool, 267 | mode tally.TallyMode, 268 | numFilteredOut int, 269 | ) { 270 | if len(tallies) == 0 { 271 | return 272 | } 273 | 274 | var build strings.Builder 275 | for _ = range colwidth - 2 { 276 | build.WriteRune('─') 277 | } 278 | rule := build.String() 279 | 280 | // -- Write header -- 281 | fmt.Printf("┌%s┐\n", rule) 282 | 283 | if mode == tally.LinesMode || mode == tally.FilesMode { 284 | fmt.Printf( 285 | "│%-*s %-11s %7s %7s %17s│\n", 286 | colwidth-36-13, 287 | "Author", 288 | "Last Edit", 289 | "Commits", 290 | "Files", 291 | "Lines (+/-)", 292 | ) 293 | } else if mode == tally.FirstModifiedMode { 294 | fmt.Printf( 295 | "│%-*s %-11s %7s│\n", 296 | colwidth-22, 297 | "Author", 298 | "First Edit", 299 | "Commits", 300 | ) 301 | } else { 302 | fmt.Printf( 303 | "│%-*s %-11s %7s│\n", 304 | colwidth-22, 305 | "Author", 306 | "Last Edit", 307 | "Commits", 308 | ) 309 | } 310 | fmt.Printf("├%s┤\n", rule) 311 | 312 | // -- Write table rows -- 313 | totalRows := len(tallies) 314 | for i, t := range tallies { 315 | alternating := "" 316 | if totalRows > maxBeforeColorAlternating && i%2 == 1 { 317 | alternating = pretty.Invert 318 | } 319 | 320 | lines := fmt.Sprintf( 321 | "%s%7s%s / %s%7s%s", 322 | pretty.Green, 323 | format.Number(t.LinesAdded), 324 | pretty.DefaultColor, 325 | pretty.Red, 326 | format.Number(t.LinesRemoved), 327 | pretty.DefaultColor, 328 | ) 329 | 330 | if mode == tally.LinesMode || mode == tally.FilesMode { 331 | fmt.Printf( 332 | "│%s%s %-11s %7s %7s %17s%s│\n", 333 | alternating, 334 | formatAuthor(t, showEmail, colwidth-36-13), 335 | format.RelativeTime(progStart, t.LastCommitTime), 336 | format.Number(t.Commits), 337 | format.Number(t.FileCount), 338 | lines, 339 | pretty.Reset, 340 | ) 341 | } else if mode == tally.FirstModifiedMode { 342 | fmt.Printf( 343 | "│%s%s %-11s %7s%s│\n", 344 | alternating, 345 | formatAuthor(t, showEmail, colwidth-22), 346 | format.RelativeTime(progStart, t.FirstCommitTime), 347 | format.Number(t.Commits), 348 | pretty.Reset, 349 | ) 350 | } else { 351 | fmt.Printf( 352 | "│%s%s %-11s %7s%s│\n", 353 | alternating, 354 | formatAuthor(t, showEmail, colwidth-22), 355 | format.RelativeTime(progStart, t.LastCommitTime), 356 | format.Number(t.Commits), 357 | pretty.Reset, 358 | ) 359 | } 360 | } 361 | 362 | if numFilteredOut > 0 { 363 | msg := fmt.Sprintf("...%s more...", format.Number(numFilteredOut)) 364 | fmt.Printf("│%-*s│\n", colwidth-2, msg) 365 | } 366 | 367 | fmt.Printf("└%s┘\n", rule) 368 | } 369 | -------------------------------------------------------------------------------- /test/functional/big_repo_test.rb: -------------------------------------------------------------------------------- 1 | require 'csv' 2 | require 'pathname' 3 | require 'tmpdir' 4 | 5 | require 'minitest/autorun' 6 | 7 | require 'lib/cmd' 8 | require 'lib/repo' 9 | 10 | 11 | class TestBigRepo < Minitest::Test 12 | def test_table_csv_big_repo 13 | cmd = GitWho.new(GitWho.built_bin_path, BigRepo.path) 14 | stdout_s = cmd.run 'table', '--csv', n_procs: 1 15 | refute_empty(stdout_s) 16 | 17 | data = CSV.parse(stdout_s, headers: true) 18 | assert_equal data.headers, [ 19 | 'name', 'commits', 'last commit time', 'first commit time', 20 | ] 21 | assert_equal data.length, 10 22 | assert_equal data[0]['name'], 'benoitc' 23 | assert_equal data[0]['commits'], '1043' 24 | end 25 | 26 | def test_table_csv_big_repo_lines 27 | cmd = GitWho.new(GitWho.built_bin_path, BigRepo.path) 28 | stdout_s = cmd.run 'table', '--csv', '-l' 29 | refute_empty(stdout_s) 30 | 31 | data = CSV.parse(stdout_s, headers: true) 32 | check_csv_results(data) 33 | end 34 | 35 | def test_table_csv_big_repo_concurrent 36 | cmd = GitWho.new(GitWho.built_bin_path, BigRepo.path) 37 | stdout_s = cmd.run 'table', '--csv', '-l' 38 | refute_empty(stdout_s) 39 | 40 | data = CSV.parse(stdout_s, headers: true) 41 | check_csv_results(data) 42 | end 43 | 44 | def test_table_csv_big_repo_caching 45 | Dir.mktmpdir do |dir| 46 | cmd = GitWho.new(GitWho.built_bin_path, BigRepo.path) 47 | 48 | git_who_cache_path = Pathname.new(dir) / 'git-who' / 'gob' 49 | refute git_who_cache_path.exist? 50 | 51 | # First run, cold start 52 | stdout_s = cmd.run 'table', '--csv', '-l', cache_home: dir 53 | refute_empty(stdout_s) 54 | 55 | data = CSV.parse(stdout_s, headers: true) 56 | check_csv_results(data) 57 | 58 | assert git_who_cache_path.exist? 59 | 60 | # Second run 61 | stdout_s = cmd.run 'table', '--csv', '-l', cache_home: dir 62 | refute_empty(stdout_s) 63 | 64 | data = CSV.parse(stdout_s, headers: true) 65 | check_csv_results(data) 66 | end 67 | end 68 | 69 | def check_csv_results(data) 70 | assert_equal data.headers, [ 71 | 'name', 72 | 'commits', 73 | 'lines added', 74 | 'lines removed', 75 | 'files', 76 | 'last commit time', 77 | 'first commit time', 78 | ] 79 | assert_equal data.length, 10 80 | assert_equal data[0]['name'], 'Benoit Chesneau' 81 | assert_equal data[0]['commits'], '316' 82 | assert_equal data[0]['lines added'], '28094' 83 | assert_equal data[0]['lines removed'], '24412' 84 | assert_equal data[0]['files'], '185' 85 | end 86 | end 87 | -------------------------------------------------------------------------------- /test/functional/hist_test.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | 3 | require 'lib/cmd' 4 | require 'lib/repo' 5 | 6 | # This set of tests for the `hist` subcommand does nothing to check the 7 | # validity of the output. We just try to hit as many codepaths as we can to 8 | # check that the program doesn't error out. 9 | class TestHist < Minitest::Test 10 | MODE_FLAGS = ['', '-f', '-l'] 11 | EMAIL_FLAGS = ['', '-e'] 12 | MERGES_FLAGS = ['', '--merges'] 13 | 14 | AUTHOR_FILTER_FLAGS = ['', '--author Bob'] 15 | NAUTHOR_FILTER_FLAGS = ['', '--nauthor Alice'] 16 | SINCE_FILTER_FLAGS = ['', '--since 2024-12-25'] 17 | UNTIL_FILTER_FLAGS = ['', '--until 2025-02-01'] 18 | 19 | def test_hist_no_flags 20 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 21 | stdout_s = cmd.run 'hist' 22 | refute_empty(stdout_s) 23 | end 24 | 25 | all_flag_combos = GitWho.generate_args_cartesian_product([ 26 | MODE_FLAGS, 27 | EMAIL_FLAGS, 28 | MERGES_FLAGS, 29 | ]) 30 | all_flag_combos.each do |flags| 31 | test_name = "test_hist_(#{flags.join ','})" 32 | define_method(test_name) do 33 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 34 | stdout_s = cmd.run 'hist', *flags 35 | refute_empty(stdout_s) 36 | end 37 | end 38 | 39 | all_filter_flag_combos = GitWho.generate_args_cartesian_product([ 40 | AUTHOR_FILTER_FLAGS, 41 | NAUTHOR_FILTER_FLAGS, 42 | SINCE_FILTER_FLAGS, 43 | UNTIL_FILTER_FLAGS, 44 | ]) 45 | all_filter_flag_combos.each do |flags| 46 | test_name = "test_hist_filter_(#{flags.join ','})" 47 | define_method(test_name) do 48 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 49 | stdout_s = cmd.run 'hist', *flags 50 | refute_empty(stdout_s) 51 | end 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /test/functional/table_csv_test.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | require 'csv' 3 | 4 | require 'lib/cmd' 5 | require 'lib/repo' 6 | 7 | class TestTableCSV < Minitest::Test 8 | def test_table_csv 9 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 10 | stdout_s = cmd.run 'table', '--csv' 11 | refute_empty(stdout_s) 12 | 13 | data = CSV.parse(stdout_s, headers: true) 14 | assert_equal data.headers, [ 15 | 'name', 'commits', 'last commit time', 'first commit time', 16 | ] 17 | assert_equal data.length, 2 18 | assert_equal data[0]['name'], 'Sinclair Target' 19 | assert_equal data[1]['name'], 'Bob' 20 | end 21 | 22 | def test_table_csv_email 23 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 24 | stdout_s = cmd.run 'table', '--csv', '-e' 25 | refute_empty(stdout_s) 26 | 27 | data = CSV.parse(stdout_s, headers: true) 28 | assert_equal data.headers, [ 29 | 'name', 'email', 'commits', 'last commit time', 'first commit time', 30 | ] 31 | assert_equal data.length, 2 32 | assert_equal data[0]['email'], 'sinclairtarget@gmail.com' 33 | assert_equal data[1]['email'], 'bob@mail.com' 34 | end 35 | 36 | def test_table_csv_lines 37 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 38 | stdout_s = cmd.run 'table', '--csv', '-l' 39 | refute_empty(stdout_s) 40 | 41 | data = CSV.parse(stdout_s, headers: true) 42 | assert_equal data.headers, [ 43 | 'name', 44 | 'commits', 45 | 'lines added', 46 | 'lines removed', 47 | 'files', 48 | 'last commit time', 49 | 'first commit time', 50 | ] 51 | assert_equal data.length, 2 52 | assert_equal data[0]['name'], 'Sinclair Target' 53 | assert_equal data[1]['name'], 'Bob' 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /test/functional/table_test.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | 3 | require 'lib/cmd' 4 | require 'lib/repo' 5 | 6 | # This set of tests for the `table` subcommand does nothing to check the 7 | # validity of the output. We just try to hit as many codepaths as we can to 8 | # check that the program doesn't error out. 9 | class TestTable < Minitest::Test 10 | MODE_FLAGS = ['', '-c', '-f', '-l', '-m'] 11 | EMAIL_FLAGS = ['', '-e'] 12 | MERGES_FLAGS = ['', '--merges'] 13 | LIMIT_FLAGS = ['', '-n 5'] 14 | 15 | AUTHOR_FILTER_FLAGS = ['', '--author Bob'] 16 | NAUTHOR_FILTER_FLAGS = ['', '--nauthor Alice'] 17 | SINCE_FILTER_FLAGS = ['', '--since 2024-12-25'] 18 | UNTIL_FILTER_FLAGS = ['', '--until 2025-02-01'] 19 | 20 | def test_table_no_flags 21 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 22 | stdout_s = cmd.run 'table' 23 | refute_empty(stdout_s) 24 | end 25 | 26 | all_flag_combos = GitWho.generate_args_cartesian_product([ 27 | MODE_FLAGS, 28 | EMAIL_FLAGS, 29 | MERGES_FLAGS, 30 | LIMIT_FLAGS, 31 | ]) 32 | all_flag_combos.each do |flags| 33 | test_name = "test_table_(#{flags.join ','})" 34 | define_method(test_name) do 35 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 36 | stdout_s = cmd.run 'table', *flags 37 | assert stdout_s 38 | end 39 | end 40 | 41 | all_filter_flag_combos = GitWho.generate_args_cartesian_product([ 42 | AUTHOR_FILTER_FLAGS, 43 | NAUTHOR_FILTER_FLAGS, 44 | SINCE_FILTER_FLAGS, 45 | UNTIL_FILTER_FLAGS, 46 | ]) 47 | all_filter_flag_combos.each do |flags| 48 | test_name = "test_table_filter_(#{flags.join ','})" 49 | define_method(test_name) do 50 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 51 | stdout_s = cmd.run 'table', *flags 52 | refute_empty(stdout_s) 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /test/functional/tree_test.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | 3 | require 'lib/cmd' 4 | require 'lib/repo' 5 | 6 | # This set of tests for the `tree` subcommand does nothing to check the 7 | # validity of the output. We just try to hit as many codepaths as we can to 8 | # check that the program doesn't error out. 9 | class TestTree < Minitest::Test 10 | SHOW_ALL_FLAGS = ['', '-a'] 11 | MODE_FLAGS = ['', '-c', '-f', '-l', '-m'] 12 | EMAIL_FLAGS = ['', '-e'] 13 | MERGES_FLAGS = ['', '--merges'] 14 | 15 | AUTHOR_FILTER_FLAGS = ['', '--author Bob'] 16 | NAUTHOR_FILTER_FLAGS = ['', '--nauthor Alice'] 17 | SINCE_FILTER_FLAGS = ['', '--since 2024-12-25'] 18 | UNTIL_FILTER_FLAGS = ['', '--until 2025-02-01'] 19 | 20 | def test_tree_no_flags 21 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 22 | stdout_s = cmd.run 'tree' 23 | refute_empty(stdout_s) 24 | end 25 | 26 | def test_tree_subdir 27 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 28 | stdout_s = cmd.run 'tree', 'file-rename' 29 | refute_empty(stdout_s) 30 | end 31 | 32 | def test_tree_exclude_ext_pathspec 33 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 34 | stdout_s = cmd.run 'tree', '--', 'exclude-ext', ':!*.py' 35 | refute_empty(stdout_s) 36 | end 37 | 38 | def test_tree_exclude_ext_pathspec_longform 39 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 40 | stdout_s = cmd.run 'tree', '--', 'exclude-ext', ':(exclude)*.py' 41 | refute_empty(stdout_s) 42 | end 43 | 44 | def test_tree_exclude_ext_pathspec_no_concurrent 45 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 46 | stdout_s = cmd.run 'tree', '--', 'exclude-ext', ':!*.py', n_procs: 1 47 | refute_empty(stdout_s) 48 | end 49 | 50 | def test_exclude_ext_pathspec_trailing_slash 51 | skip("git ls-files bug") 52 | 53 | # This doesn't work because ls-files doesn't output anything. But it works 54 | # if you put the exclude magic pathspec first. Bug with git ls-files? 55 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 56 | stdout_s = cmd.run 'tree', '--', 'exclude-ext/', ':!*.py' 57 | refute_empty(stdout_s) 58 | end 59 | 60 | all_flag_combos = GitWho.generate_args_cartesian_product([ 61 | SHOW_ALL_FLAGS, 62 | MODE_FLAGS, 63 | EMAIL_FLAGS, 64 | MERGES_FLAGS, 65 | ]) 66 | all_flag_combos.each do |flags| 67 | test_name = "test_tree_(#{flags.join ','})" 68 | define_method(test_name) do 69 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 70 | stdout_s = cmd.run 'tree', *flags 71 | assert stdout_s 72 | end 73 | end 74 | 75 | all_filter_flag_combos = GitWho.generate_args_cartesian_product([ 76 | AUTHOR_FILTER_FLAGS, 77 | NAUTHOR_FILTER_FLAGS, 78 | SINCE_FILTER_FLAGS, 79 | UNTIL_FILTER_FLAGS, 80 | ]) 81 | all_filter_flag_combos.each do |flags| 82 | test_name = "test_tree_filter_(#{flags.join ','})" 83 | define_method(test_name) do 84 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 85 | stdout_s = cmd.run 'tree', *flags 86 | refute_empty(stdout_s) 87 | end 88 | end 89 | end 90 | -------------------------------------------------------------------------------- /test/functional/version_test.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | 3 | require 'lib/cmd' 4 | require 'lib/repo' 5 | 6 | class TestVersion < Minitest::Test 7 | def test_version 8 | cmd = GitWho.new(GitWho.built_bin_path, TestRepo.path) 9 | stdout_s = cmd.run '--version' 10 | refute_empty(stdout_s) 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /test/lib/cmd.rb: -------------------------------------------------------------------------------- 1 | require 'open3' 2 | require 'pathname' 3 | 4 | class GitWhoError < StandardError 5 | end 6 | 7 | class GitWho 8 | def initialize(exec_path, rundir) 9 | @exec_path = exec_path 10 | @rundir = rundir 11 | end 12 | 13 | def run(*args, cache_home: nil, n_procs: nil) 14 | env_hash = {} 15 | 16 | if cache_home 17 | env_hash['XDG_CACHE_HOME'] = cache_home 18 | else 19 | env_hash['GIT_WHO_DISABLE_CACHE'] = '1' 20 | end 21 | 22 | unless n_procs.nil? 23 | env_hash['GOMAXPROCS'] = n_procs.to_s 24 | end 25 | 26 | split_args = args.reduce([]) do |args, arg| 27 | arg.split(' ').each do |part| 28 | args << part 29 | end 30 | 31 | args 32 | end 33 | 34 | stdout_s, stderr_s, status = Open3.capture3( 35 | env_hash, 36 | @exec_path, 37 | *split_args, 38 | chdir: @rundir, 39 | ) 40 | 41 | unless status.success? 42 | invocation = GitWho.format_invocation(split_args) 43 | raise GitWhoError, 44 | "#{invocation} exited with status: #{status.exitstatus}\n#{stderr_s}" 45 | end 46 | 47 | stdout_s 48 | end 49 | 50 | def self.built_bin_path 51 | p = Pathname.new(__dir__) + '../../git-who' 52 | p.cleanpath.to_s 53 | end 54 | 55 | def self.format_invocation(args) 56 | 'git-who ' + args.join(' ') 57 | end 58 | 59 | # Given a list of "flagsets", where each flagset is a set of mutually 60 | # exclusive flags that could be supplied for a command, returns the cartesian 61 | # product of all the flagsets (i.e. all possible combinations of flags). 62 | def self.generate_args_cartesian_product(flagsets, no_empty: true) 63 | all_args = 64 | if flagsets.empty? 65 | [[]] 66 | else 67 | head = flagsets[0] 68 | tail = flagsets[1..] 69 | 70 | tail_args = self.generate_args_cartesian_product(tail, no_empty: false) 71 | 72 | head.reduce([]) do |all_args, flag| 73 | tail_args.each do |args| 74 | if flag.empty? 75 | all_args << args 76 | else 77 | all_args << [flag] + args 78 | end 79 | end 80 | 81 | all_args 82 | end 83 | end 84 | 85 | if no_empty 86 | all_args.filter { |args| !args.empty? } 87 | else 88 | all_args 89 | end 90 | end 91 | end 92 | -------------------------------------------------------------------------------- /test/lib/repo.rb: -------------------------------------------------------------------------------- 1 | require 'pathname' 2 | 3 | module TestRepo 4 | def self.path 5 | p = Pathname.new(__dir__) + '../../test-repo' 6 | p.cleanpath.to_s 7 | end 8 | end 9 | 10 | # Our bigger test repo with a commit history long enough to require concurrent 11 | # processing. 12 | module BigRepo 13 | def self.path 14 | p = Pathname.new(__dir__) + '../../gunicorn' 15 | p.cleanpath.to_s 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /tree.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "maps" 7 | "os" 8 | "path/filepath" 9 | "runtime" 10 | "slices" 11 | "strings" 12 | "unicode/utf8" 13 | 14 | "github.com/sinclairtarget/git-who/internal/concurrent" 15 | "github.com/sinclairtarget/git-who/internal/format" 16 | "github.com/sinclairtarget/git-who/internal/git" 17 | "github.com/sinclairtarget/git-who/internal/pretty" 18 | "github.com/sinclairtarget/git-who/internal/tally" 19 | ) 20 | 21 | const defaultMaxDepth = 100 22 | 23 | type printTreeOpts struct { 24 | mode tally.TallyMode 25 | maxDepth int 26 | showHidden bool 27 | key func(t tally.FinalTally) string 28 | } 29 | 30 | type treeOutputLine struct { 31 | indent string 32 | path string 33 | metric string 34 | tally tally.FinalTally 35 | showLine bool 36 | showTally bool 37 | dimTally bool 38 | dimPath bool 39 | } 40 | 41 | func tree( 42 | revs []string, 43 | pathspecs []string, 44 | mode tally.TallyMode, 45 | depth int, 46 | showEmail bool, 47 | showHidden bool, 48 | countMerges bool, 49 | since string, 50 | until string, 51 | authors []string, 52 | nauthors []string, 53 | ) (err error) { 54 | defer func() { 55 | if err != nil { 56 | err = fmt.Errorf("error running \"tree\": %w", err) 57 | } 58 | }() 59 | 60 | logger().Debug( 61 | "called tree()", 62 | "revs", 63 | revs, 64 | "pathspecs", 65 | pathspecs, 66 | "mode", 67 | mode, 68 | "depth", 69 | depth, 70 | "showEmail", 71 | showEmail, 72 | "showHidden", 73 | showHidden, 74 | "countMerges", 75 | countMerges, 76 | "since", 77 | since, 78 | "until", 79 | until, 80 | "authors", 81 | authors, 82 | "nauthors", 83 | nauthors, 84 | ) 85 | 86 | wtreeset, err := git.WorkingTreeFiles(pathspecs) 87 | if err != nil { 88 | return err 89 | } 90 | 91 | ctx, cancel := context.WithCancel(context.Background()) 92 | defer cancel() 93 | 94 | filters := git.LogFilters{ 95 | Since: since, 96 | Until: until, 97 | Authors: authors, 98 | Nauthors: nauthors, 99 | } 100 | 101 | tallyOpts := tally.TallyOpts{Mode: mode, CountMerges: countMerges} 102 | if showEmail { 103 | tallyOpts.Key = func(c git.Commit) string { return c.AuthorEmail } 104 | } else { 105 | tallyOpts.Key = func(c git.Commit) string { return c.AuthorName } 106 | } 107 | 108 | gitRootPath, err := git.GetRoot() 109 | if err != nil { 110 | return err 111 | } 112 | 113 | repoFiles, err := git.CheckRepoConfigFiles(gitRootPath) 114 | if err != nil { 115 | return err 116 | } 117 | 118 | var root *tally.TreeNode 119 | if runtime.GOMAXPROCS(0) > 1 { 120 | root, err = concurrent.TallyCommitsTree( 121 | ctx, 122 | revs, 123 | pathspecs, 124 | filters, 125 | repoFiles, 126 | tallyOpts, 127 | wtreeset, 128 | gitRootPath, 129 | getCache(gitRootPath, repoFiles), 130 | pretty.AllowDynamic(os.Stdout), 131 | ) 132 | 133 | if err == tally.EmptyTreeErr { 134 | logger().Debug("Tree was empty.") 135 | return nil 136 | } 137 | 138 | if err != nil { 139 | return err 140 | } 141 | } else { 142 | commits, closer, innererr := git.CommitsWithOpts( 143 | ctx, 144 | revs, 145 | pathspecs, 146 | filters, 147 | true, 148 | repoFiles, 149 | ) 150 | if innererr != nil { 151 | return innererr 152 | } 153 | 154 | root, innererr = tally.TallyCommitsTree( 155 | commits, 156 | tallyOpts, 157 | wtreeset, 158 | gitRootPath, 159 | ) 160 | if innererr == tally.EmptyTreeErr { 161 | logger().Debug("Tree was empty.") 162 | return nil 163 | } 164 | 165 | if innererr != nil { 166 | return fmt.Errorf("failed to tally commits: %w", innererr) 167 | } 168 | 169 | innererr = closer() 170 | if innererr != nil { 171 | return innererr 172 | } 173 | } 174 | 175 | root = root.Rank(mode) 176 | 177 | maxDepth := depth 178 | if depth == 0 { 179 | maxDepth = defaultMaxDepth 180 | } 181 | 182 | opts := printTreeOpts{ 183 | maxDepth: maxDepth, 184 | mode: mode, 185 | showHidden: showHidden, 186 | } 187 | if showEmail { 188 | opts.key = func(t tally.FinalTally) string { return t.AuthorEmail } 189 | } else { 190 | opts.key = func(t tally.FinalTally) string { return t.AuthorName } 191 | } 192 | 193 | lines := toLines(root, ".", 0, "", []bool{}, opts, []treeOutputLine{}) 194 | printTree(lines, showEmail) 195 | return nil 196 | } 197 | 198 | // Recursively descend tree, turning tree nodes into output lines. 199 | func toLines( 200 | node *tally.TreeNode, 201 | path string, 202 | depth int, 203 | lastAuthor string, 204 | isFinalChild []bool, 205 | opts printTreeOpts, 206 | lines []treeOutputLine, 207 | ) []treeOutputLine { 208 | if path == tally.NoDiffPathname { 209 | return lines 210 | } 211 | 212 | if depth > opts.maxDepth { 213 | return lines 214 | } 215 | 216 | if depth < opts.maxDepth && len(node.Children) == 1 { 217 | // Path ellision 218 | for k, v := range node.Children { 219 | lines = toLines( 220 | v, 221 | filepath.Join(path, k), 222 | depth+1, 223 | lastAuthor, 224 | isFinalChild, 225 | opts, 226 | lines, 227 | ) 228 | } 229 | return lines 230 | } 231 | 232 | var line treeOutputLine 233 | 234 | var indentBuilder strings.Builder 235 | for i, isFinal := range isFinalChild { 236 | if i < len(isFinalChild)-1 { 237 | if isFinal { 238 | fmt.Fprintf(&indentBuilder, " ") 239 | } else { 240 | fmt.Fprintf(&indentBuilder, "│ ") 241 | } 242 | } else { 243 | if isFinal { 244 | fmt.Fprintf(&indentBuilder, "└── ") 245 | } else { 246 | fmt.Fprintf(&indentBuilder, "├── ") 247 | } 248 | } 249 | } 250 | line.indent = indentBuilder.String() 251 | 252 | line.path = path 253 | if len(node.Children) > 0 { 254 | // Have a directory 255 | line.path = path + string(os.PathSeparator) 256 | } 257 | 258 | line.tally = node.Tally 259 | line.metric = fmtTallyMetric(node.Tally, opts) 260 | line.showLine = node.InWorkTree || opts.showHidden 261 | line.dimTally = len(node.Children) > 0 262 | line.dimPath = !node.InWorkTree 263 | 264 | newAuthor := opts.key(node.Tally) != lastAuthor 265 | line.showTally = opts.showHidden || newAuthor || len(node.Children) > 0 266 | 267 | lines = append(lines, line) 268 | 269 | childPaths := slices.SortedFunc( 270 | maps.Keys(node.Children), 271 | func(a, b string) int { 272 | // Show directories first 273 | aHasChildren := len(node.Children[a].Children) > 0 274 | bHasChildren := len(node.Children[b].Children) > 0 275 | 276 | if aHasChildren == bHasChildren { 277 | return strings.Compare(a, b) // Sort alphabetically 278 | } else if aHasChildren { 279 | return -1 280 | } else { 281 | return 1 282 | } 283 | }, 284 | ) 285 | 286 | // Find last non-hidden child 287 | finalChildIndex := 0 288 | for i, p := range childPaths { 289 | child := node.Children[p] 290 | if child.InWorkTree || opts.showHidden { 291 | finalChildIndex = i 292 | } 293 | } 294 | 295 | for i, p := range childPaths { 296 | child := node.Children[p] 297 | lines = toLines( 298 | child, 299 | p, 300 | depth+1, 301 | opts.key(node.Tally), 302 | append(isFinalChild, i == finalChildIndex), 303 | opts, 304 | lines, 305 | ) 306 | } 307 | 308 | return lines 309 | } 310 | 311 | func fmtTallyMetric(t tally.FinalTally, opts printTreeOpts) string { 312 | switch opts.mode { 313 | case tally.CommitMode: 314 | return fmt.Sprintf("(%s)", format.Number(t.Commits)) 315 | case tally.FilesMode: 316 | return fmt.Sprintf("(%s)", format.Number(t.FileCount)) 317 | case tally.LinesMode: 318 | return fmt.Sprintf( 319 | "(%s%s%s / %s%s%s)", 320 | pretty.Green, 321 | format.Number(t.LinesAdded), 322 | pretty.DefaultColor, 323 | pretty.Red, 324 | format.Number(t.LinesRemoved), 325 | pretty.DefaultColor, 326 | ) 327 | case tally.LastModifiedMode: 328 | return fmt.Sprintf( 329 | "(%s)", 330 | format.RelativeTime(progStart, t.LastCommitTime), 331 | ) 332 | case tally.FirstModifiedMode: 333 | return fmt.Sprintf( 334 | "(%s)", 335 | format.RelativeTime(progStart, t.FirstCommitTime), 336 | ) 337 | default: 338 | panic("unrecognized mode in switch") 339 | } 340 | } 341 | 342 | func printTree(lines []treeOutputLine, showEmail bool) { 343 | longest := 0 344 | for _, line := range lines { 345 | indentLen := utf8.RuneCountInString(line.indent) 346 | pathLen := utf8.RuneCountInString(line.path) 347 | if indentLen+pathLen > longest { 348 | longest = indentLen + pathLen 349 | } 350 | } 351 | 352 | tallyStart := longest + 4 // Use at least 4 "." to separate path from tally 353 | 354 | for _, line := range lines { 355 | if !line.showLine { 356 | continue 357 | } 358 | 359 | var path string 360 | if line.dimPath { 361 | path = fmt.Sprintf("%s%s%s", pretty.Dim, line.path, pretty.Reset) 362 | } else { 363 | path = line.path 364 | } 365 | 366 | if !line.showTally { 367 | fmt.Printf("%s%s\n", line.indent, path) 368 | continue 369 | } 370 | 371 | var author string 372 | if showEmail { 373 | author = format.Abbrev(format.GitEmail(line.tally.AuthorEmail), 25) 374 | } else { 375 | author = format.Abbrev(line.tally.AuthorName, 25) 376 | } 377 | 378 | indentLen := utf8.RuneCountInString(line.indent) 379 | pathLen := utf8.RuneCountInString(line.path) 380 | separator := strings.Repeat(".", tallyStart-indentLen-pathLen) 381 | 382 | if line.dimTally { 383 | fmt.Printf( 384 | "%s%s%s%s%s%s %s\n", 385 | line.indent, 386 | path, 387 | pretty.Dim, 388 | separator, 389 | pretty.Reset, 390 | author, 391 | line.metric, 392 | ) 393 | } else { 394 | fmt.Printf( 395 | "%s%s%s%s%s %s%s\n", 396 | line.indent, 397 | path, 398 | pretty.Dim, 399 | separator, 400 | author, 401 | line.metric, 402 | pretty.Reset, 403 | ) 404 | } 405 | } 406 | } 407 | --------------------------------------------------------------------------------