├── basic-branching-4.png ├── dag.png ├── data-model-4.png ├── index.org ├── tutorial.org └── withCid.pl /basic-branching-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmgerman/git-mining/5fc7db3854a8160e2e5ddd62632e2bed2f0a52ff/basic-branching-4.png -------------------------------------------------------------------------------- /dag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmgerman/git-mining/5fc7db3854a8160e2e5ddd62632e2bed2f0a52ff/dag.png -------------------------------------------------------------------------------- /data-model-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmgerman/git-mining/5fc7db3854a8160e2e5ddd62632e2bed2f0a52ff/data-model-4.png -------------------------------------------------------------------------------- /index.org: -------------------------------------------------------------------------------- 1 | tutorial.org -------------------------------------------------------------------------------- /tutorial.org: -------------------------------------------------------------------------------- 1 | #+STARTUP: showall 2 | #+STARTUP: lognotestate 3 | #+TAGS: 4 | #+SEQ_TODO: TODO STARTED DONE DEFERRED CANCELLED | WAITING DELEGATED APPT 5 | #+DRAWERS: HIDDEN STATE 6 | #+TITLE: Git mining tutorial 7 | #+CATEGORY: 8 | #+PROPERTY: header-args: lang :varname value 9 | #+PROPERTY: header-args:sqlite :db /path/to/db :colnames yes 10 | #+PROPERTY: header-args:R :results output 11 | #+PROPERTY: header-args:sh :results output :exports both 12 | #+OPTIONS: ^:nil 13 | 14 | * Intro 15 | 16 | This file is information that I wish I had known when I started doing heavy mining of git. 17 | 18 | * The DAG 19 | 20 | [[./dag.png]] 21 | 22 | - git records commits 23 | 24 | - commits create a DAG 25 | - Each commit has one or more parents (ordered) 26 | - A commit with more than one parent is a merge-commit 27 | 28 | - Most frequently the DAG is a tree 29 | - one root commit 30 | 31 | - But some projects have many roots 32 | - This is a *feature* of git: you can merge repos!!! 33 | 34 | * Commits 35 | 36 | - Commits record the state of the file system after each commit 37 | - What files/directories (and their attributes) exist 38 | - and what their contents are 39 | - And metadata associated with it: 40 | - Who/when committed to the current repo (its committer) 41 | - Who/when created the commit (its author) 42 | 43 | - git does not store diffs! 44 | - which means diffs are expensive to compute 45 | 46 | 47 | * Non-merge commits 48 | 49 | - Non-merge commits *always* have one single parent 50 | 51 | * Merge commits 52 | 53 | - Merge commits have multiple parents. 54 | 55 | - Merges with more than 2 parents are called /octupus/ merge 56 | - From Linus, regarding an octupus merge in the Kernel (Jan 2014). 57 | 58 | #+BEGIN_EXAMPLE 59 | Christ. When you start doing octopus merges, you don't do it by half 60 | measures, do you? 61 | 62 | I just pulled the sound updates from Takashi, and as a result got your 63 | merge commit 2cde51fbd0f3. That one has 66 parents. 64 | 65 | That kind of merge either needs to be split up, or gitk needs to be 66 | made better about visualizing it, because it ends up being *so* wide 67 | that the history is hard to read. 68 | #+END_EXAMPLE 69 | 70 | 71 | * Cloning 72 | 73 | 74 | #+BEGIN_SRC sh 75 | cd /tmp 76 | git clone https://github.com/git/git repo 77 | #+END_SRC 78 | 79 | - Cloning retrieves all commits in the repo 80 | - And their tags 81 | 82 | * Branches 83 | 84 | [[./basic-branching-4.png]] 85 | 86 | * Branches 87 | 88 | If you also want a clean list of branches and their corresponding heads: 89 | 90 | #+BEGIN_SRC sh 91 | git -C /tmp/repo fetch --all 92 | #+END_SRC 93 | 94 | 95 | This creates a file called .git/FETCH_HEAD. It contains 96 | 97 | - commit id 98 | - branch name 99 | - location 100 | 101 | #+BEGIN_SRC sh :exports both 102 | head /tmp/repo/.git/FETCH_HEAD 103 | #+END_SRC 104 | 105 | #+RESULTS: 106 | #+begin_example 107 | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7 branch 'master' of https://github.com/git/git 108 | 3b9e3c2cede15057af3ff8076c45ad5f33829436 not-for-merge branch 'maint' of https://github.com/git/git 109 | 67fe103aa68d54c3706f4a9cdf52c50f21979829 not-for-merge branch 'next' of https://github.com/git/git 110 | 512253265c9c9d014d29165f4c421e6304266292 not-for-merge branch 'pu' of https://github.com/git/git 111 | 22216bdf6ae7a2709b577b361e5d58dc97270f38 not-for-merge branch 'todo' of https://github.com/git/git 112 | #+end_example 113 | 114 | * Git log 115 | 116 | - This is the workhorse of mining git 117 | - You can extract *almost* everything with it 118 | 119 | #+BEGIN_SRC sh :exports both 120 | exec 2>&1 121 | git log --help | head -20 122 | : 123 | #+END_SRC 124 | 125 | #+begin_example 126 | GIT-LOG(1) Git Manual GIT-LOG(1) 127 | 128 | NAME 129 | git-log - Show commit logs 130 | 131 | SYNOPSIS 132 | git log [] [] [[--] ...] 133 | 134 | DESCRIPTION 135 | Shows the commit logs. 136 | 137 | The command takes options applicable to the git rev-list command to 138 | control what is shown and how, and options applicable to the git diff-* 139 | commands to control how the changes each commit introduces are shown. 140 | 141 | OPTIONS 142 | --follow 143 | Continue listing the history of a file beyond renames (works only 144 | for a single file). 145 | 146 | #+end_example 147 | 148 | ** Default output 149 | 150 | #+BEGIN_SRC sh :exports both 151 | git -C /tmp/repo log -2 152 | #+END_SRC 153 | 154 | #+RESULTS: 155 | #+begin_example 156 | commit e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7 157 | Author: Junio C Hamano 158 | Date: Fri Feb 24 10:49:58 2017 -0800 159 | 160 | Git 2.12 161 | 162 | Signed-off-by: Junio C Hamano 163 | 164 | commit cca4f20edad04decdc268102f9a6ee2e3803bcc7 165 | Merge: dc9ded4 7e82388 166 | Author: Junio C Hamano 167 | Date: Fri Feb 24 10:48:10 2017 -0800 168 | 169 | Merge branch 'ps/doc-gc-aggressive-depth-update' 170 | 171 | Doc update. 172 | 173 | * ps/doc-gc-aggressive-depth-update: 174 | docs/git-gc: fix default value for `--aggressiveDepth` 175 | #+end_example 176 | 177 | ** Some important git log options: oneline 178 | 179 | | --oneline | one commit per line. 180 | 181 | #+BEGIN_SRC sh :exports both 182 | git -C /tmp/repo log --oneline -5 183 | #+END_SRC 184 | 185 | #+RESULTS: 186 | #+begin_example 187 | e7e07d5 Git 2.12 188 | cca4f20 Merge branch 'ps/doc-gc-aggressive-depth-update' 189 | dc9ded4 Merge branch 'bc/worktree-doc-fix-detached' 190 | c6788b1 Merge branch 'dr/doc-check-ref-format-normalize' 191 | eccf97c Merge branch 'gp/document-dotfiles-in-templates-are-not-copied' 192 | #+end_example 193 | 194 | 195 | ** Some important git log options: no-abbrev 196 | 197 | | --no-abbrev | Force git to always give you full commit ids | 198 | 199 | #+BEGIN_SRC sh :exports both 200 | git -C /tmp/repo log -1 --oneline --no-abbrev -5 201 | #+END_SRC 202 | 203 | #+RESULTS: 204 | #+begin_example 205 | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7 Git 2.12 206 | cca4f20edad04decdc268102f9a6ee2e3803bcc7 Merge branch 'ps/doc-gc-aggressive-depth-update' 207 | dc9ded480245c1014b526c4b951d1acb3a60d3fa Merge branch 'bc/worktree-doc-fix-detached' 208 | c6788b1f45c6da1139570430b998028e3d2af590 Merge branch 'dr/doc-check-ref-format-normalize' 209 | eccf97c386ea970cc1fa6e6387f4786d05ecd76e Merge branch 'gp/document-dotfiles-in-templates-are-not-copied' 210 | #+end_example 211 | 212 | 213 | * Git log can be parsed with a very simple finite state machine 214 | 215 | This program will: 216 | 217 | - replace any semicolon in the input with 218 | - every input line is converted to a "record": 219 | - separator ";" 220 | - first field: commitid 221 | - second field: name of header field, empty for body 222 | - third field: contents of field 223 | - a line starting with "commit " or " " will be considered a new commit 224 | 225 | #+name: withCid.pl 226 | #+BEGIN_SRC perl :tangle withCid.pl 227 | #!/usr/bin/perl 228 | 229 | my $strict; 230 | my $cid; 231 | while (<>) { 232 | chomp; 233 | s/;//g; 234 | 235 | if (/^commit ([0-9a-f]{40}) *(.*)$/) { 236 | # match commit id 237 | $cid = $1; 238 | print "$1;$2\n"; 239 | } elsif (/^([0-9a-f]{40}) *(.*)$/) { 240 | # --abbrev does not include commit 241 | $cid = $1; 242 | print "$1;$2\n"; 243 | } else { 244 | if (/^(\S+):\s+(.*)$/) { 245 | # if it has a non-space sequence followed by : 246 | # then it is a header field. split 247 | print "$cid;$1;$2\n"; 248 | } else { 249 | print "$cid;;$_\n"; 250 | } 251 | } 252 | } 253 | #+END_SRC 254 | 255 | #+RESULTS: withCid.pl 256 | 257 | #+BEGIN_SRC sh :exports both 258 | git -C /tmp/repo log -2 | perl withCid.pl 259 | #+END_SRC 260 | 261 | #+RESULTS: 262 | #+begin_example 263 | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7; 264 | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7;Author;Junio C Hamano 265 | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7;Date;Fri Feb 24 10:49:58 2017 -0800 266 | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7;; 267 | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7;; Git 2.12 268 | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7;; 269 | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7;; Signed-off-by: Junio C Hamano 270 | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7;; 271 | cca4f20edad04decdc268102f9a6ee2e3803bcc7; 272 | cca4f20edad04decdc268102f9a6ee2e3803bcc7;Merge;dc9ded4 7e82388 273 | cca4f20edad04decdc268102f9a6ee2e3803bcc7;Author;Junio C Hamano 274 | cca4f20edad04decdc268102f9a6ee2e3803bcc7;Date;Fri Feb 24 10:48:10 2017 -0800 275 | cca4f20edad04decdc268102f9a6ee2e3803bcc7;; 276 | cca4f20edad04decdc268102f9a6ee2e3803bcc7;; Merge branch 'ps/doc-gc-aggressive-depth-update' 277 | cca4f20edad04decdc268102f9a6ee2e3803bcc7;; 278 | cca4f20edad04decdc268102f9a6ee2e3803bcc7;; Doc update. 279 | cca4f20edad04decdc268102f9a6ee2e3803bcc7;; 280 | cca4f20edad04decdc268102f9a6ee2e3803bcc7;; * ps/doc-gc-aggressive-depth-update: 281 | cca4f20edad04decdc268102f9a6ee2e3803bcc7;; docs/git-gc: fix default value for `--aggressiveDepth` 282 | #+end_example 283 | 284 | 285 | So now we can extract any information we want: 286 | 287 | #+BEGIN_SRC sh :exports both 288 | git -C /tmp/repo log -6 | perl withCid.pl | grep ';Author;' 289 | #+END_SRC 290 | 291 | #+RESULTS: 292 | #+begin_example 293 | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7;Author;Junio C Hamano 294 | cca4f20edad04decdc268102f9a6ee2e3803bcc7;Author;Junio C Hamano 295 | dc9ded480245c1014b526c4b951d1acb3a60d3fa;Author;Junio C Hamano 296 | c6788b1f45c6da1139570430b998028e3d2af590;Author;Junio C Hamano 297 | eccf97c386ea970cc1fa6e6387f4786d05ecd76e;Author;Junio C Hamano 298 | c0588fd61aa6da96824eec60719c505b66239dd6;Author;Junio C Hamano 299 | #+end_example 300 | 301 | * But git log is already capable of doing this: format 302 | 303 | #+BEGIN_SRC sh :exports both 304 | man git-log | egrep -A 20 'format:$' 305 | #+END_SRC 306 | 307 | #+RESULTS: 308 | #+begin_example 309 | · format: 310 | 311 | The format: format allows you to specify which information 312 | you want to show. It works a little bit like printf format, with 313 | the notable exception that you get a newline with %n instead of \n. 314 | 315 | E.g, format:"The author of %h was %an, %ar%nThe title was >>%s<<%n" 316 | would show something like this: 317 | 318 | The author of fe6e0ee was Junio C Hamano, 23 hours ago 319 | The title was >>t4119: test autocomputing -p for traditional diff input.<< 320 | 321 | The placeholders are: 322 | 323 | · %H: commit hash 324 | 325 | · %h: abbreviated commit hash 326 | 327 | · %T: tree hash 328 | 329 | · %t: abbreviated tree hash 330 | #+end_example 331 | 332 | 333 | * git log format: most useful options 334 | 335 | 336 | | %H | commit hash | 337 | | %P | parent hashes | 338 | | %an | author name | 339 | | %ae | author email | 340 | | %ad | author date (format respects --date= option) | 341 | | %aD | author date, RFC2822 style | 342 | | %cn | committer name | 343 | | %ce | committer email | 344 | | %cd | committer date (format respects --date= option) | 345 | | %cD | committer date, RFC2822 style | 346 | | %s | subject | 347 | | %f | sanitized subject line, suitable for a filename | 348 | | %b | body | 349 | 350 | * git log format... 351 | 352 | or you can learn how to do it via the format option: 353 | 354 | #+BEGIN_SRC sh :exports both 355 | git -C /tmp/repo log -6 --format='%H;%an;%ae' 356 | #+END_SRC 357 | 358 | #+RESULTS: 359 | #+begin_example 360 | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7;Junio C Hamano;gitster@pobox.com 361 | cca4f20edad04decdc268102f9a6ee2e3803bcc7;Junio C Hamano;gitster@pobox.com 362 | dc9ded480245c1014b526c4b951d1acb3a60d3fa;Junio C Hamano;gitster@pobox.com 363 | c6788b1f45c6da1139570430b998028e3d2af590;Junio C Hamano;gitster@pobox.com 364 | eccf97c386ea970cc1fa6e6387f4786d05ecd76e;Junio C Hamano;gitster@pobox.com 365 | c0588fd61aa6da96824eec60719c505b66239dd6;Junio C Hamano;gitster@pobox.com 366 | #+end_example 367 | 368 | #+BEGIN_SRC sh :exports both 369 | git -C /tmp/repo log -6 --format='%H;Commiter;%cn; CommiterDate;%ct' 370 | #+END_SRC 371 | 372 | #+RESULTS: 373 | #+begin_example 374 | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7;Commiter;Junio C Hamano; CommiterDate;1487962198 375 | cca4f20edad04decdc268102f9a6ee2e3803bcc7;Commiter;Junio C Hamano; CommiterDate;1487962090 376 | dc9ded480245c1014b526c4b951d1acb3a60d3fa;Commiter;Junio C Hamano; CommiterDate;1487962090 377 | c6788b1f45c6da1139570430b998028e3d2af590;Commiter;Junio C Hamano; CommiterDate;1487962089 378 | eccf97c386ea970cc1fa6e6387f4786d05ecd76e;Commiter;Junio C Hamano; CommiterDate;1487962089 379 | c0588fd61aa6da96824eec60719c505b66239dd6;Commiter;Junio C Hamano; CommiterDate;1487962088 380 | #+end_example 381 | 382 | * You can easily import info into a database 383 | 384 | *Warning*. you have to be careful about using separators that might appear in the fields. It is safer to use your own 385 | filter (see my example above). that way you can make sure the separator is never part of a field. Or 386 | hack it as I have done it below. You could restore the semicolons once data is in the database. 387 | 388 | In this example I am going to import: 389 | - cid, author, authoremail, authortime, committer, comitteremail, commitdate and summary 390 | 391 | #+BEGIN_SRC sh :exports both 392 | git -C /tmp/repo log --format='%H%an%ae%at%cn%ce%ct%s' > /tmp/rip.rip 393 | # replace semicolons with a marker so we can import it 394 | perl -pe 's/;//g;' /tmp/rip.rip > /tmp/rip.rip2 395 | # replace with the ; delimiter (I like ;) 396 | perl -pe 's//;/g' /tmp/rip.rip2 > /tmp/rip.rip3 397 | #+END_SRC 398 | 399 | #+RESULTS: 400 | 401 | #+BEGIN_SRC sqlite :db /tmp/mydb.sql :exports both 402 | drop table if exists commits; 403 | create table commits(cid varchar, author varchar, authoremail varchar, authortime int, 404 | committer varchar, commiteremail varchar, commitdate int, summary varchar); 405 | .mode csv commits 406 | .separator ; 407 | .import "/tmp/rip.rip3" commits 408 | select count(*) from commits; 409 | #+END_SRC 410 | 411 | #+RESULTS: 412 | | count(*) | 413 | |----------| 414 | | 43937 | 415 | 416 | #+BEGIN_SRC sqlite :db /tmp/mydb.sql 417 | select * from commits limit 10; 418 | #+END_SRC 419 | 420 | #+RESULTS: 421 | | cid | author | authoremail | authortime | committer | commiteremail | commitdate | summary | 422 | |------------------------------------------+--------------------+--------------------------+------------+----------------+--------------------------+------------+------------------------------------------------------------------| 423 | | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7 | Junio C Hamano | gitster@pobox.com | 1487962198 | Junio C Hamano | gitster@pobox.com | 1487962198 | Git 2.12 | 424 | | cca4f20edad04decdc268102f9a6ee2e3803bcc7 | Junio C Hamano | gitster@pobox.com | 1487962090 | Junio C Hamano | gitster@pobox.com | 1487962090 | Merge branch 'ps/doc-gc-aggressive-depth-update' | 425 | | dc9ded480245c1014b526c4b951d1acb3a60d3fa | Junio C Hamano | gitster@pobox.com | 1487962090 | Junio C Hamano | gitster@pobox.com | 1487962090 | Merge branch 'bc/worktree-doc-fix-detached' | 426 | | c6788b1f45c6da1139570430b998028e3d2af590 | Junio C Hamano | gitster@pobox.com | 1487962089 | Junio C Hamano | gitster@pobox.com | 1487962089 | Merge branch 'dr/doc-check-ref-format-normalize' | 427 | | eccf97c386ea970cc1fa6e6387f4786d05ecd76e | Junio C Hamano | gitster@pobox.com | 1487962089 | Junio C Hamano | gitster@pobox.com | 1487962089 | Merge branch 'gp/document-dotfiles-in-templates-are-not-copied' | 428 | | c0588fd61aa6da96824eec60719c505b66239dd6 | Junio C Hamano | gitster@pobox.com | 1487962088 | Junio C Hamano | gitster@pobox.com | 1487962088 | Merge branch 'rt/align-add-i-help-text' | 429 | | af4027f2decece569e7a565e592ca69677c27996 | Junio C Hamano | gitster@pobox.com | 1487962087 | Junio C Hamano | gitster@pobox.com | 1487962088 | Merge branch 'bc/blame-doc-fix' | 430 | | 7e82388024504be733ba23c97f884148870fe9cc | Patrick Steinhardt | ps@pks.im | 1487926005 | Junio C Hamano | gitster@pobox.com | 1487959152 | docs/git-gc: fix default value for `--aggressiveDepth` | 431 | | cc24d7d21f4fe034b9559dcfd669d9639b9d52a4 | Junio C Hamano | gitster@pobox.com | 1487958941 | Junio C Hamano | gitster@pobox.com | 1487958941 | Merge tag 'l10n-2.12.0-rnd2' of git://github.com/git-l10n/git-po | 432 | | 1a79b2f1795a6ec4c70674ce930843aa59bff859 | Jiang Xin | worldhello.net@gmail.com | 1485528657 | Jiang Xin | worldhello.net@gmail.com | 1487953154 | l10n: zh_CN: for git v2.12.0 l10n round 2 | 433 | 434 | 435 | 436 | #+BEGIN_SRC sqlite :db /tmp/mydb.sql 437 | select cid, summary from commits limit 4; 438 | #+END_SRC 439 | 440 | #+RESULTS: 441 | | cid | summary | 442 | |------------------------------------------+--------------------------------------------------| 443 | | e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7 | Git 2.12 | 444 | | cca4f20edad04decdc268102f9a6ee2e3803bcc7 | Merge branch 'ps/doc-gc-aggressive-depth-update' | 445 | | dc9ded480245c1014b526c4b951d1acb3a60d3fa | Merge branch 'bc/worktree-doc-fix-detached' | 446 | | c6788b1f45c6da1139570430b998028e3d2af590 | Merge branch 'dr/doc-check-ref-format-normalize' | 447 | 448 | 449 | * Branches 450 | 451 | When cloning, you get all commits in the repo, but not all the branch info 452 | 453 | See [[http://github.com/dmgerman/xournal]] 454 | 455 | 456 | #+BEGIN_SRC sh :exports both 457 | cd /tmp/ 458 | git clone http://github.com/dmgerman/xournal 459 | #+END_SRC 460 | 461 | #+RESULTS: 462 | 463 | 464 | #+BEGIN_SRC sh :exports both 465 | git -C /tmp/xournal branch --all 466 | #+END_SRC 467 | 468 | #+RESULTS: 469 | #+begin_example 470 | \* master 471 | remotes/origin/HEAD -> origin/master 472 | remotes/origin/bookmarks 473 | remotes/origin/cl-options 474 | remotes/origin/cmake 475 | remotes/origin/dmgwin 476 | remotes/origin/dot-paper-style 477 | remotes/origin/fix-update-width 478 | remotes/origin/glade 479 | remotes/origin/gtk3 480 | remotes/origin/horizontal-scroll-lock 481 | remotes/origin/layers 482 | remotes/origin/master 483 | remotes/origin/message 484 | remotes/origin/mru 485 | remotes/origin/next 486 | remotes/origin/popupmenu 487 | remotes/origin/quick-zoom 488 | remotes/origin/redo 489 | remotes/origin/snap-to-grid 490 | remotes/origin/upstream 491 | #+end_example 492 | 493 | Use fetch --all to create a nice file that has all this info 494 | 495 | #+BEGIN_SRC sh :exports both 496 | git -C /tmp/xournal fetch --all 497 | #+END_SRC 498 | 499 | 500 | Then you have a file: .git/FETCH_HEAD that contains all information of branches 501 | 502 | #+BEGIN_SRC sh :exports both 503 | cat /tmp/xournal/.git/FETCH_HEAD | head -5 504 | #+END_SRC 505 | 506 | #+RESULTS: 507 | #+begin_example 508 | 015aa5dfb1b612c208192cfb9692d1eb6b8fd297 branch 'master' of http://github.com/dmgerman/xournal 509 | b3f07476632ed5b742af3506cb7cd4272956d339 not-for-merge branch 'bookmarks' of http://github.com/dmgerman/xournal 510 | ad1febce77ed17987dda1bca940fc3e555e6b503 not-for-merge branch 'cl-options' of http://github.com/dmgerman/xournal 511 | 59cb4a16aba5042b3b552c4a7f3187196b99d953 not-for-merge branch 'cmake' of http://github.com/dmgerman/xournal 512 | 3b3cb888333d165fe2dd05dfd10560f40ca7befa not-for-merge branch 'dmgwin' of http://github.com/dmgerman/xournal 513 | #+end_example 514 | 515 | 516 | * Commits in Branches 517 | 518 | By default, git log gives you the log of the "current" branch 519 | 520 | - From the current commit backwards to the roots of the repo 521 | 522 | #+BEGIN_SRC sh :cmdline -v :exports both 523 | git -C /tmp/xournal checkout master 524 | git -C /tmp/xournal branch 525 | git -C /tmp/xournal log -1 526 | git -C /tmp/xournal checkout remotes/origin/next 527 | git -C /tmp/xournal branch 528 | git -C /tmp/xournal log -1 529 | #+END_SRC 530 | 531 | #+RESULTS: 532 | #+begin_example 533 | git -C /tmp/xournal checkout master 534 | Switched to branch 'master' 535 | Your branch is up-to-date with 'origin/master'. 536 | 537 | git -C /tmp/xournal branch 538 | *master 539 | 540 | git -C /tmp/xournal log -1 541 | commit 015aa5dfb1b612c208192cfb9692d1eb6b8fd297 542 | Author: Denis Auroux 543 | Date: Wed Aug 17 10:03:35 2016 -0700 544 | 545 | Add .gitignore 546 | git -C /tmp/xournal checkout remotes/origin/next 547 | Note: checking out 'remotes/origin/next'. 548 | 549 | You are in 'detached HEAD' state. You can look around, make experimental 550 | changes and commit them, and you can discard any commits you make in this 551 | state without impacting any branches by performing another checkout. 552 | 553 | If you want to create a new branch to retain commits you create, you may 554 | do so (now or later) by using -b with the checkout command again. Example: 555 | 556 | git checkout -b 557 | 558 | HEAD is now at 031f268... merged kinetic scroll by Immi 559 | git -C /tmp/xournal branch 560 | *(HEAD detached at origin/next) 561 | master 562 | git -C /tmp/xournal log -1 563 | commit 031f268cf4ee67bb2e26bfe0a842fa48352b6d24 564 | Merge: b8712f4 ef22a43 565 | Author: D German 566 | Date: Sun Sep 11 22:16:02 2016 -0700 567 | 568 | merged kinetic scroll by Immi 569 | #+end_example 570 | 571 | * Commits in Branches... 572 | 573 | if you want all commits, then use glob='*' 574 | 575 | - includes all branches 576 | - even "detached branches" 577 | - irrespective to current commit 578 | 579 | #+BEGIN_SRC sh :cmdline -v :exports both 580 | git -C /tmp/xournal checkout master 581 | git -C /tmp/xournal branch 582 | git -C /tmp/xournal log --oneline | wc -l 583 | git -C /tmp/xournal log --oneline --glob='*' | wc -l 584 | git -C /tmp/xournal checkout remotes/origin/next 585 | git -C /tmp/xournal branch 586 | git -C /tmp/xournal log --oneline | wc -l 587 | git -C /tmp/xournal log --oneline --glob='*'| wc -l 588 | #+END_SRC 589 | 590 | #+RESULTS: 591 | #+begin_example 592 | Already on 'master' 593 | Your branch is up-to-date with 'origin/master'. 594 | *master 595 | 171 596 | 352 597 | Note: checking out 'remotes/origin/next'. 598 | 599 | You are in 'detached HEAD' state. You can look around, make experimental 600 | changes and commit them, and you can discard any commits you make in this 601 | state without impacting any branches by performing another checkout. 602 | 603 | If you want to create a new branch to retain commits you create, you may 604 | do so (now or later) by using -b with the checkout command again. Example: 605 | 606 | git checkout -b 607 | 608 | HEAD is now at 031f268... merged kinetic scroll by Immi 609 | *(HEAD detached at origin/next) 610 | master 611 | 231 612 | 352 613 | #+end_example 614 | 615 | 616 | 617 | * Parents 618 | 619 | [[./dag.png]] 620 | 621 | The DAG is the fundamental data structure of git 622 | 623 | - Every commit has zero or more parents 624 | - Current heads of branches (including master) might have zero children 625 | - At least one branch will always have zero children 626 | - A branch head with children means it has been merged 627 | - Roots of the branches have zero parents 628 | 629 | #+BEGIN_SRC sh :exports both 630 | git -C /tmp/xournal log --pretty='%H;%P' -n 5 origin/next 631 | #+END_SRC 632 | 633 | 634 | #+begin_example 635 | c08da1d700a818cada297aee949b40da780b536e;661075ab91e296fb5b702be379967bd581290942 03b7651644aea0a95fd09f1566d4a667df8d7950 636 | 03b7651644aea0a95fd09f1566d4a667df8d7950;addb9bf2c0be2ff61e72d3c565253d319311d2f8 637 | 661075ab91e296fb5b702be379967bd581290942;ae37e72839ae59308974b9375eae0c8630c4795c addb9bf2c0be2ff61e72d3c565253d319311d2f8 638 | ae37e72839ae59308974b9375eae0c8630c4795c;a40e13ec95dc9b8fbc1578ec5be8cbce6e7c839c f28489f7f7477e59a9015e17a795c88ab89d977d 639 | addb9bf2c0be2ff61e72d3c565253d319311d2f8;f28489f7f7477e59a9015e17a795c88ab89d977d 640 | #+end_example 641 | 642 | * Parents... 643 | 644 | - Merges with more than 2 parents are called /octupus/ merge 645 | - From Linus, regarding an octupus merge in the Kernel (Jan 2014). 646 | 647 | #+BEGIN_EXAMPLE 648 | Christ. When you start doing octopus merges, you don't do it by half 649 | measures, do you? 650 | 651 | I just pulled the sound updates from Takashi, and as a result got your 652 | merge commit 2cde51fbd0f3. That one has 66 parents. 653 | 654 | That kind of merge either needs to be split up, or gitk needs to be 655 | made better about visualizing it, because it ends up being *so* wide 656 | that the history is hard to read. 657 | #+END_EXAMPLE 658 | 659 | 660 | * Parents... 661 | 662 | - Parents are ordered 663 | - First parent determines the branch in which the merge was applied 664 | - Merging strategy determines how to resolve merges (an option of git merge) 665 | - If the merge has a patch associated with it, 666 | - then there was a merge conflict and that patch is the manual fix 667 | 668 | * Roots: 669 | 670 | - there might be more than one root (linux has 4, git has more) 671 | - commits without parents 672 | 673 | ** Merges 674 | 675 | - You can ask for only merges 676 | 677 | #+BEGIN_SRC sh :exports both 678 | git -C /tmp/xournal log --merges -5 --glob='*' --pretty='%H %P' 679 | #+END_SRC 680 | 681 | #+RESULTS: 682 | #+begin_example 683 | 031f268cf4ee67bb2e26bfe0a842fa48352b6d24 b8712f4ba54ed72e4b2b2a3829620cfa86740d9c ef22a4356ac80742242aff8906eec646b8692071 684 | 5a47ec2006f731234f4aeb59c45ad88b051dc91a 6bb4780799f7480eaebf1737a851739595db599c 356bed06c8b3370ac74d9c7c8c316b9c56cdeebe 685 | 0b2aee20e9d102235e006a0a11581e4914111de2 84b90eae4bbbe63d073d321ca1a594042e690c69 f503bba9ff65b45ead76ffa6372f050a90bd4b06 686 | 84b90eae4bbbe63d073d321ca1a594042e690c69 c08da1d700a818cada297aee949b40da780b536e 015aa5dfb1b612c208192cfb9692d1eb6b8fd297 687 | c08da1d700a818cada297aee949b40da780b536e 661075ab91e296fb5b702be379967bd581290942 03b7651644aea0a95fd09f1566d4a667df8d7950 688 | #+end_example 689 | 690 | * Files modified 691 | 692 | * Blobs 693 | 694 | [[./data-model-4.png]] 695 | 696 | * Files modified... 697 | 698 | - Several ways to extract it: 699 | 700 | | --numstat | 701 | | --stat | 702 | 703 | ** --numstat 704 | 705 | #+BEGIN_SRC sh :exports both 706 | git -C /tmp/xournal log -2 --numstat 707 | #+END_SRC 708 | 709 | #+RESULTS: 710 | #+begin_example 711 | commit 015aa5dfb1b612c208192cfb9692d1eb6b8fd297 712 | Author: Denis Auroux 713 | Date: Wed Aug 17 10:03:35 2016 -0700 714 | 715 | Add .gitignore 716 | 717 | 24 0 .gitignore 718 | 719 | commit 565e4cb0c1e59fe19c7520c3171e21beb948a143 720 | Author: Denis Auroux 721 | Date: Fri Jul 15 23:24:12 2016 +0200 722 | 723 | fix crash when pasting text or images via xclip (bug #171) 724 | 725 | 1 0 ChangeLog 726 | 1 2 src/xo-clipboard.c 727 | #+end_example 728 | 729 | ** --stat 730 | 731 | #+BEGIN_SRC sh :exports both 732 | git -C /tmp/xournal log -2 --stat 733 | #+END_SRC 734 | 735 | #+RESULTS: 736 | #+begin_example 737 | commit 015aa5dfb1b612c208192cfb9692d1eb6b8fd297 738 | Author: Denis Auroux 739 | Date: Wed Aug 17 10:03:35 2016 -0700 740 | 741 | Add .gitignore 742 | 743 | .gitignore | 24 ++++++++++++++++++++++++ 744 | 1 file changed, 24 insertions(+) 745 | 746 | commit 565e4cb0c1e59fe19c7520c3171e21beb948a143 747 | Author: Denis Auroux 748 | Date: Fri Jul 15 23:24:12 2016 +0200 749 | 750 | fix crash when pasting text or images via xclip (bug #171) 751 | 752 | ChangeLog | 1 + 753 | src/xo-clipboard.c | 3 +-- 754 | 2 files changed, 2 insertions(+), 2 deletions(-) 755 | #+end_example 756 | 757 | ** How they deal with added files: 758 | 759 | None of them identify files added or removed 760 | 761 | - in this commit, the file po/ja.po was added 762 | 763 | #+BEGIN_SRC sh :cmdline -v :exports both 764 | printf "With --stat\n\n" 765 | git -C /tmp/xournal log -1 --stat 8e7af75f1c297a0144fce9db84450d4b9fff7090 766 | printf "\nWith --numstat\n\n" 767 | git -C /tmp/xournal log -1 --numstat 8e7af75f1c297a0144fce9db84450d4b9fff7090 768 | #+END_SRC 769 | 770 | #+RESULTS: 771 | #+begin_example 772 | With --stat 773 | 774 | commit 8e7af75f1c297a0144fce9db84450d4b9fff7090 775 | Author: Denis Auroux 776 | Date: Fri Nov 22 10:52:32 2013 -0800 777 | 778 | Add Japanese translation (by Hiroshi Saito) 779 | 780 | AUTHORS | 4 +- 781 | ChangeLog | 1 + 782 | po/ChangeLog | 1 + 783 | po/LINGUAS | 1 + 784 | po/Makefile.in.in | 280 ------------ 785 | po/ja.po | 1277 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 786 | 6 files changed, 1282 insertions(+), 282 deletions(-) 787 | 788 | With --numstat 789 | 790 | commit 8e7af75f1c297a0144fce9db84450d4b9fff7090 791 | Author: Denis Auroux 792 | Date: Fri Nov 22 10:52:32 2013 -0800 793 | 794 | Add Japanese translation (by Hiroshi Saito) 795 | 796 | 2 2 AUTHORS 797 | 1 0 ChangeLog 798 | 1 0 po/ChangeLog 799 | 1 0 po/LINGUAS 800 | 0 280 po/Makefile.in.in 801 | 1277 0 po/ja.po 802 | #+end_example 803 | 804 | 805 | ** issues with --numstat or --stat 806 | 807 | Several issues: 808 | 809 | - git log does not indicate if the file is added or removed 810 | - it does not output numbers of added/deleted for binary files (only - -) 811 | - Time consuming: it requires computing a diff 812 | - A simple test: git log -100000 (log of 10k commits) of the linux kernel 813 | | --numstat | 66 seconds | 814 | | --stat | 66 seconds | 815 | | --raw | 10 seconds | 816 | 817 | ** Better solution: --raw 818 | 819 | #+BEGIN_SRC sh :exports both 820 | git -C /tmp/xournal log -1 --no-abbrev --raw 8e7af75f1c297a0144fce9db84450d4b9fff7090 821 | #+END_SRC 822 | 823 | #+RESULTS: 824 | #+begin_example 825 | commit 8e7af75f1c297a0144fce9db84450d4b9fff7090 826 | Author: Denis Auroux 827 | Date: Fri Nov 22 10:52:32 2013 -0800 828 | 829 | Add Japanese translation (by Hiroshi Saito) 830 | 831 | :100644 100644 b06824a1794b3fc57e6f20daf5d08d7a1676cabd a75eb8323c5e436b1575e298d15c2fb55d239989 M AUTHORS 832 | :100644 100644 9e94a076aa0d43fd1fb2334b841d26c697bf78f3 4362844c11e55d54b05475878ed094e1654dabdf M ChangeLog 833 | :100644 100644 e4842ac28c06f8ecc1c890d8a40274bd2748bcb9 4183b0780214178c84aa17583a45a47c5e7bd050 M po/ChangeLog 834 | :100644 100644 5f08a210539c0397bbfe46d4e98fdbe9b07c60b4 9c24ed16b02d55023e7eeba7323d31280f559ceb M po/LINGUAS 835 | :100644 000000 53b496dc6896de97115014f78ade708b766a6fc1 0000000000000000000000000000000000000000 D po/Makefile.in.in 836 | :000000 100644 0000000000000000000000000000000000000000 9fcb55aed95a2c5c5a01c0b6e1d2a145bd21d9cd A po/ja.po 837 | #+end_example 838 | 839 | Fields: 840 | 841 | | File mode before | | 842 | | File mode after | | 843 | | blob before | | 844 | | blob after | | 845 | | operation | M modified, D deleted, R rename, A added | 846 | | filename | | 847 | 848 | For merges, it contains n+1 info: 849 | 850 | - File info before in each branch 851 | - File info after merge 852 | 853 | but only when the merge included a patch 854 | 855 | ** Files modified: --name-status 856 | 857 | #+BEGIN_SRC sh :exports both 858 | git -C /tmp/xournal log --oneline --no-abbrev -1 --name-status 8e7af75f1c297a0144fce9db84450d4b9fff7090 859 | #+END_SRC 860 | 861 | #+RESULTS: 862 | #+begin_example 863 | 8e7af75f1c297a0144fce9db84450d4b9fff7090 Add Japanese translation (by Hiroshi Saito) 864 | M AUTHORS 865 | M ChangeLog 866 | M po/ChangeLog 867 | M po/LINGUAS 868 | D po/Makefile.in.in 869 | A po/ja.po 870 | #+end_example 871 | 872 | - Subset info of --raw 873 | - Easier to parse 874 | 875 | ** When to use 876 | 877 | - *--numstat/--stat*: When you need the "churn" 878 | - *--raw*: when you need the files that were modified/added/removed 879 | 880 | 881 | You can now easily preprocess the data to load it into our database 882 | 883 | #+BEGIN_SRC sh :exports both 884 | git -C /tmp/xournal log --oneline --no-abbrev --name-status -10 | perl withCid.pl | egrep $';;' | perl -pe 's/\t/;/;s/;;/;/;' 885 | #+END_SRC 886 | 887 | #+RESULTS: 888 | #+begin_example 889 | 015aa5dfb1b612c208192cfb9692d1eb6b8fd297;A;.gitignore 890 | 565e4cb0c1e59fe19c7520c3171e21beb948a143;M;ChangeLog 891 | 565e4cb0c1e59fe19c7520c3171e21beb948a143;M;src/xo-clipboard.c 892 | 50184f046f8dc80b145805aaec34ceb4a41781f6;M;ChangeLog 893 | 50184f046f8dc80b145805aaec34ceb4a41781f6;M;src/xo-callbacks.c 894 | f15bc362de2c5401600c420395b765566c2152e1;M;ChangeLog 895 | f15bc362de2c5401600c420395b765566c2152e1;M;src/xo-file.c 896 | f15bc362de2c5401600c420395b765566c2152e1;M;src/xournal.h 897 | f28489f7f7477e59a9015e17a795c88ab89d977d;M;ChangeLog 898 | f28489f7f7477e59a9015e17a795c88ab89d977d;M;src/xo-callbacks.c 899 | f28489f7f7477e59a9015e17a795c88ab89d977d;M;src/xournal.h 900 | 1b6ef5a24461b72bf2e98f1bfaad3567f0b9f542;M;src/xo-misc.c 901 | c10f6e8d0793a40bd6e19fad861194bd40b0ffec;M;src/main.c 902 | b885aab5d6c3e836c844d27ed6919ef4e1292c9c;M;src/main.c 903 | f5c777d4e081a886baed8bbe2aee4d84b3562722;M;src/xo-callbacks.c 904 | 2b74a01bae6c282e12006579cf285e21c847472e;M;src/xo-file.c 905 | #+end_example 906 | 907 | 908 | * Renames 909 | 910 | this file was renamed more than once (jruby) 911 | 912 | #+BEGIN_EXAMPLE 913 | truffle/src/main/java/org/jruby/truffle/platform/posix/TrufflePOSIXHandler.java 914 | #+END_EXAMPLE 915 | 916 | #+BEGIN_SRC sh :exports both 917 | git -C /tmp/jruby log --oneline --follow -10 -- truffle/src/main/java/org/jruby/truffle/platform/posix/TrufflePOSIXHandler.java 918 | #+END_SRC 919 | 920 | #+RESULTS: 921 | #+begin_example 922 | 745fccc [Truffle] File rename case error. 923 | 7f591b6 [Truffle] Stop implementing POSIX. 924 | e2de847 [Truffle] getpid can be done in the POSIXHandler. 925 | 4ae93d2 [Truffle] .posix package. 926 | f4d6315 [Truffle] Make it clear that Ruby is the JRuby runtime. 927 | a157011 [Truffle] Organise imports. 928 | 30d975e [Truffle] Update copyright years - mostly moves but I'll run it anyway as Git will give the correct history if needs be. 929 | 29c937f [Truffle] Exception package. 930 | 587a1a6 [Truffle] Move a bunch of stuff out of runtime. 931 | 85e3ab5 [Truffle] Move context and language to the top level. 932 | #+end_example 933 | 934 | ** How different commands report it 935 | 936 | Her is an example of how this rename is reported by different runs of git log on one of the commits that did the rename 937 | 938 | Using --stat 939 | 940 | #+BEGIN_SRC sh :exports both 941 | git -C /tmp/jruby log --oneline --no-abbrev --stat 745fcccc684131c16159523c277de01737d764f0 -1 942 | #+END_SRC 943 | 944 | #+RESULTS: 945 | #+begin_example 946 | 745fcccc684131c16159523c277de01737d764f0 [Truffle] File rename case error. 947 | .../platform/posix/{TrufflePOSIXHandler.java => TrufflePosixHandler.java} | 0 948 | 1 file changed, 0 insertions(+), 0 deletions(-) 949 | #+end_example 950 | 951 | Using --numstat 952 | 953 | #+BEGIN_SRC sh :exports both 954 | git -C /tmp/jruby log --oneline --no-abbrev --numstat 745fcccc684131c16159523c277de01737d764f0 -1 955 | #+END_SRC 956 | 957 | #+RESULTS: 958 | #+begin_example 959 | 745fcccc684131c16159523c277de01737d764f0 [Truffle] File rename case error. 960 | 0 0 truffle/src/main/java/org/jruby/truffle/platform/posix/{TrufflePOSIXHandler.java => TrufflePosixHandler.java} 961 | #+end_example 962 | 963 | Using --name-status 964 | 965 | #+BEGIN_SRC sh :exports both 966 | git -C /tmp/jruby log --oneline --no-abbrev --name-status 745fcccc684131c16159523c277de01737d764f0 -1 967 | #+END_SRC 968 | 969 | #+RESULTS: 970 | #+begin_example 971 | 745fcccc684131c16159523c277de01737d764f0 [Truffle] File rename case error. 972 | R100 truffle/src/main/java/org/jruby/truffle/platform/posix/TrufflePOSIXHandler.java truffle/src/main/java/org/jruby/truffle/platform/posix/TrufflePosixHandler.java 973 | #+end_example 974 | 975 | Using --summary 976 | 977 | #+BEGIN_SRC sh :exports both 978 | git -C /tmp/jruby log --oneline --no-abbrev --summary 745fcccc684131c16159523c277de01737d764f0 -1 979 | #+END_SRC 980 | 981 | #+RESULTS: 982 | #+begin_example 983 | 745fcccc684131c16159523c277de01737d764f0 [Truffle] File rename case error. 984 | rename truffle/src/main/java/org/jruby/truffle/platform/posix/{TrufflePOSIXHandler.java => TrufflePosixHandler.java} (100%) 985 | #+end_example 986 | 987 | Using --raw 988 | 989 | #+BEGIN_SRC sh :exports both 990 | git -C /tmp/jruby log --oneline --no-abbrev --raw 745fcccc684131c16159523c277de01737d764f0 -1 991 | #+END_SRC 992 | 993 | #+RESULTS: 994 | #+begin_example 995 | 745fcccc684131c16159523c277de01737d764f0 [Truffle] File rename case error. 996 | :100644 100644 3b785cd8bfee363b17097a118673bc6d9d14185f 3b785cd8bfee363b17097a118673bc6d9d14185f R100 truffle/src/main/java/org/jruby/truffle/platform/posix/TrufflePOSIXHandler.java truffle/src/main/java/org/jruby/truffle/platform/posix/TrufflePosixHandler.java 997 | #+end_example 998 | 999 | ** Renames.. that R... 1000 | 1001 | - What is that number after the R? 1002 | - how similar the old and the new versions are 1003 | - when a file is deleted and another one is added, git computes a similarity metric 1004 | 1005 | #+BEGIN_SRC sh :exports both 1006 | git -C /tmp/jruby log --name-status --oneline --no-abbrev | egrep '^R' | head -5 1007 | #+END_SRC 1008 | 1009 | #+RESULTS: 1010 | #+begin_example 1011 | R100 spec/truffle/specs/truffle/digest.rb spec/truffle/specs/truffle/digest_spec.rb 1012 | R087 truffle/src/main/java/org/jruby/truffle/parser/TempSourceSection.java truffle/src/main/java/org/jruby/truffle/language/SourceIndexLength.java 1013 | R081 truffle/src/main/java/org/jruby/truffle/parser/LazyTranslationNode.java truffle/src/main/java/org/jruby/truffle/language/LazyRubyNode.java 1014 | R088 truffle/src/main/java/org/jruby/truffle/language/RubySourceSection.java truffle/src/main/java/org/jruby/truffle/parser/TempSourceSection.java 1015 | R080 core/src/main/java/org/jruby/internal/runtime/methods/CompiledIRMetaClassBody.java core/src/main/java/org/jruby/internal/runtime/methods/CompiledIRNoProtocolMethod.java 1016 | #+end_example 1017 | 1018 | Let us add the commit id using our command withCid.pl: 1019 | 1020 | #+BEGIN_SRC sh :exports both 1021 | git -C /tmp/jruby log --name-status --oneline --no-abbrev | perl withCid.pl | grep ';;R' | head -10 1022 | #+END_SRC 1023 | 1024 | #+RESULTS: 1025 | #+begin_example 1026 | 4975495fe0459141f0df6306f522fe5673f61b03;;R100 spec/truffle/specs/truffle/digest.rb spec/truffle/specs/truffle/digest_spec.rb 1027 | 62068e649dbd1f6d923de8cc52f2ff6fc0b65463;;R087 truffle/src/main/java/org/jruby/truffle/parser/TempSourceSection.java truffle/src/main/java/org/jruby/truffle/language/SourceIndexLength.java 1028 | 177282c270e14c4ec35821e7359f9aad62b84043;;R081 truffle/src/main/java/org/jruby/truffle/parser/LazyTranslationNode.java truffle/src/main/java/org/jruby/truffle/language/LazyRubyNode.java 1029 | b2df8ddc031a63779a4b91bfa63cf1cedf906b6e;;R088 truffle/src/main/java/org/jruby/truffle/language/RubySourceSection.java truffle/src/main/java/org/jruby/truffle/parser/TempSourceSection.java 1030 | 4da4c1676fa8799a9214eb6745613cfdc4430353;;R080 core/src/main/java/org/jruby/internal/runtime/methods/CompiledIRMetaClassBody.java core/src/main/java/org/jruby/internal/runtime/methods/CompiledIRNoProtocolMethod.java 1031 | 1477ccc5a0b2a6f277fb6c398169c4fac3c74062;;R091 truffle/src/main/java/org/jruby/truffle/parser/KeyValuePair.java truffle/src/main/java/org/jruby/truffle/collections/Tuple.java 1032 | 65b84d2a2e25626ec5e6593289e5a1c7b13f0b38;;R066 truffle/src/main/java/org/jruby/truffle/algorithms/Random.java truffle/src/main/java/org/jruby/truffle/algorithms/Randomizer.java 1033 | b56a070a2695f36a7a08ed36f8e6ace4ee6f0c08;;R095 truffle/src/main/java/org/jruby/truffle/datastructures/BoundaryIterable.java truffle/src/main/java/org/jruby/truffle/collections/BoundaryIterable.java 1034 | b56a070a2695f36a7a08ed36f8e6ace4ee6f0c08;;R095 truffle/src/main/java/org/jruby/truffle/datastructures/BoundaryIterator.java truffle/src/main/java/org/jruby/truffle/collections/BoundaryIterator.java 1035 | b56a070a2695f36a7a08ed36f8e6ace4ee6f0c08;;R099 truffle/src/main/java/org/jruby/truffle/datastructures/IntHashMap.java truffle/src/main/java/org/jruby/truffle/collections/IntHashMap.java 1036 | #+end_example 1037 | 1038 | ** Rename: what is the actual diff 1039 | 1040 | Example of a rename: 1041 | 1042 | #+BEGIN_SRC sh :exports both 1043 | git -C /tmp/jruby log -1 --raw 62068e649dbd1f6d923de8cc52f2ff6fc0b65463 | grep TempSource 1044 | #+END_SRC 1045 | 1046 | #+RESULTS: 1047 | #+begin_example 1048 | [Truffle] Rename TempSourceSection to SourceIndexLength and move. 1049 | :100644 100644 f06d224... 7bc9443... R087 truffle/src/main/java/org/jruby/truffle/parser/TempSourceSection.java truffle/src/main/java/org/jruby/truffle/language/SourceIndexLength.java 1050 | #+end_example 1051 | 1052 | but the diff reports all the additions and deletions 1053 | 1054 | #+BEGIN_SRC sh :exports both 1055 | git -C /tmp/jruby diff 62068e649dbd1f6d923de8cc52f2ff6fc0b65463 -- truffle/src/main/java/org/jruby/truffle/language/SourceIndexLength.java | head -10 1056 | #+END_SRC 1057 | 1058 | #+RESULTS: 1059 | #+begin_example 1060 | diff --git a/truffle/src/main/java/org/jruby/truffle/language/SourceIndexLength.java b/truffle/src/main/java/org/jruby/truffle/language/SourceIndexLength.java 1061 | deleted file mode 100644 1062 | index 7bc9443..0000000 1063 | --- a/truffle/src/main/java/org/jruby/truffle/language/SourceIndexLength.java 1064 | +++ /dev/null 1065 | @@ -1,41 +0,0 @@ 1066 | -/* 1067 | - * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved. This 1068 | - * code is released under a tri EPL/GPL/LGPL license. You can use it, 1069 | - * redistribute it and/or modify it under the terms of the: 1070 | #+end_example 1071 | 1072 | * git does not store diffs 1073 | 1074 | - it computes them 1075 | - but it stores metadata about what the type of change a file suffered (addition, deletion) 1076 | - renames are stored as "before" and "after" and detected at "query" time 1077 | 1078 | you can also see the "churn" in the rename with --numstat 1079 | 1080 | #+BEGIN_SRC sh :exports both 1081 | git -C /tmp/jruby log -1 --numstat 62068e649dbd1f6d923de8cc52f2ff6fc0b65463 | grep TempSource 1082 | #+END_SRC 1083 | 1084 | #+RESULTS: 1085 | #+begin_example 1086 | [Truffle] Rename TempSourceSection to SourceIndexLength and move. 1087 | 3 3 truffle/src/main/java/org/jruby/truffle/{parser/TempSourceSection.java => language/SourceIndexLength.java} 1088 | #+end_example 1089 | 1090 | * Extracting versions of files 1091 | 1092 | - Git stores the actual contents of the files: the blobs 1093 | - they might be compacted (to save space) 1094 | - *do not extract using git checkout*. 1095 | - it is very slow 1096 | - and it recreates the entire filesystem at the commit 1097 | - The --raw option gives you the blob of a given file before and after the commit is executed: 1098 | - fields after the file mode 1099 | 1100 | #+BEGIN_SRC sh :exports both 1101 | git -C /tmp/xournal log -1 --no-abbrev --raw README 1102 | #+END_SRC 1103 | 1104 | 1105 | #+RESULTS: 1106 | #+begin_example 1107 | commit ab94f7e668505f9080796a5b52cd9a00c81a9326 1108 | Author: Denis Auroux 1109 | Date: Sun Jun 29 09:56:18 2014 +0200 1110 | 1111 | Bump version number ahead of release 0.4.8 1112 | 1113 | :100644 100644 37cbe88fe37fd54433af43e61c9590a9fcfc8a76 4b2c36af80202e29b88b75d5689e125bb64b5fa4 M README 1114 | #+end_example 1115 | 1116 | 1117 | ** git show 1118 | 1119 | You can extract the contents of file using git show in two ways: 1120 | 1121 | - by commit-id and filename 1122 | - by blob-id 1123 | 1124 | ** cid:filename 1125 | 1126 | - You need to use the filename at the time of the commit (in case it was later renamed) 1127 | 1128 | #+BEGIN_SRC sh :exports both 1129 | git -C /tmp/xournal show ab94f7e668505f9080796a5b52cd9a00c81a9326:README | head 1130 | #+END_SRC 1131 | 1132 | #+RESULTS: 1133 | #+begin_example 1134 | Version 0.4.8 (June 30, 2014) 1135 | 1136 | Installation: see INSTALL 1137 | User's manual: see html-doc/manual.html 1138 | Updates: see http://xournal.sourceforge.net/ 1139 | 1140 | See also: http://sourceforge.net/projects/xournal 1141 | #+end_example 1142 | 1143 | ** by blob 1144 | 1145 | #+BEGIN_SRC sh :exports both 1146 | git -C /tmp/xournal show 4b2c36af80202e29b88b75d5689e125bb64b5fa4 | head 1147 | #+END_SRC 1148 | 1149 | #+RESULTS: 1150 | #+begin_example 1151 | Version 0.4.8 (June 30, 2014) 1152 | 1153 | Installation: see INSTALL 1154 | User's manual: see html-doc/manual.html 1155 | Updates: see http://xournal.sourceforge.net/ 1156 | 1157 | See also: http://sourceforge.net/projects/xournal 1158 | #+end_example 1159 | 1160 | 1161 | * blobid 1162 | 1163 | Any file/directory is recorded as a hash of its contents 1164 | 1165 | - git keeps track of the changes to the file system at each commit using this blob 1166 | - it removes duplicated content 1167 | - it makes it easy to know what a commit has changed 1168 | - and retrieve it 1169 | 1170 | ** How the blob id is computed: 1171 | 1172 | (this explains why the SHA1 vulnerability is not such a big issue for git) 1173 | 1174 | - SHA1 the concatenation of: 1175 | - "blob " 1176 | - ASCII length of the file 1177 | - ASCII 0 1178 | - Contents 1179 | 1180 | ** Example 1181 | 1182 | Compute the blob of the current version of the file README and compare it against the lastest commit on that file 1183 | 1184 | #+BEGIN_SRC perl :results output 1185 | use Digest::SHA qw(sha1_hex); 1186 | 1187 | $file = "README"; 1188 | chdir("/tmp/xournal"); 1189 | 1190 | #read contents of file 1191 | local $/; 1192 | open(IN, $file); 1193 | $contents = ; 1194 | close IN; 1195 | 1196 | 1197 | $len = length($contents); 1198 | print sha1_hex("blob $len\0" . $contents); 1199 | #+END_SRC 1200 | 1201 | #+RESULTS: 1202 | #+begin_example 1203 | 4b2c36af80202e29b88b75d5689e125bb64b5fa4 1204 | #+end_example 1205 | 1206 | #+BEGIN_SRC sh :exports both 1207 | git -C /tmp/xournal log --no-abbrev -1 --raw README 1208 | #+END_SRC 1209 | 1210 | #+RESULTS: 1211 | #+begin_example 1212 | commit ab94f7e668505f9080796a5b52cd9a00c81a9326 1213 | Author: Denis Auroux 1214 | Date: Sun Jun 29 09:56:18 2014 +0200 1215 | 1216 | Bump version number ahead of release 0.4.8 1217 | 1218 | :100644 100644 37cbe88fe37fd54433af43e61c9590a9fcfc8a76 4b2c36af80202e29b88b75d5689e125bb64b5fa4 M README 1219 | #+end_example 1220 | 1221 | 1222 | * Modifications to a file 1223 | 1224 | - simply append the filename after the other git-log parameters 1225 | 1226 | #+BEGIN_SRC sh :exports both 1227 | git -C /tmp/xournal log --oneline --no-abbrev src/xo-file.c | head 1228 | #+END_SRC 1229 | 1230 | #+RESULTS: 1231 | #+begin_example 1232 | f15bc362de2c5401600c420395b765566c2152e1 config option to create new file when trying to open non-existent .xoj 1233 | 2b74a01bae6c282e12006579cf285e21c847472e Fix issues with commas in config file (bug #161) 1234 | f305d3bee6290b9a82680a186594619b8e217740 Fix includes to avoid implicitly defined function warnings. 1235 | 982874f254c3e03d4def80c44012f1e0bd222377 disable xinput during modal dialog boxes (bug #159) 1236 | 5c74a49542ef78cc31cf9df5a1284a53513bb8b2 option to export successive layers to separate PDF pages 1237 | ac1af31bee3cdfcc8988c193e071d10d7167dea7 use GDK macros (not WIN32) to disable X11-specific code (T. Schoonjans) 1238 | c34acb584baf71ef00086583b80d32f84e75bde0 Fix crash in ps/pdf bitmap background import via ghostscript 1239 | 57c8488b3c6ffe94917dcc417cd0027bfc2df55c fix a minor bug with save file paths in Windows (D. German) 1240 | 2cfa0f4174ce5456e22003bde152b6609df288e4 new Export to PDF code using cairo (+ config option to prefer old code) 1241 | ad96f4135f5138afe3d7d20d9f88ae87ea7c0cfd option to auto-save documents and recover auto-saves 1242 | #+end_example 1243 | 1244 | 1245 | ** who was the last to touch a given line? 1246 | 1247 | - use git blame. 1248 | - It can be slow. 1249 | - It gives you the blame with respect to the current commit (head of the current branch). So 1250 | make sure you run it after a checkout at the time you want to see the blame. 1251 | 1252 | By default its format is hard to parse (more on that later) 1253 | 1254 | #+BEGIN_SRC sh :exports both 1255 | git -C /tmp/xournal blame src/xo-file.c | head 1256 | #+END_SRC 1257 | 1258 | #+RESULTS: 1259 | #+begin_example 1260 | a71d7685 (Denis Auroux 2012-05-22 19:06:49 +0000 1) /* 1261 | a71d7685 (Denis Auroux 2012-05-22 19:06:49 +0000 2) * This program is free software; you can redistribute it and/or 1262 | a71d7685 (Denis Auroux 2012-05-22 19:06:49 +0000 3) * modify it under the terms of the GNU General Public 1263 | a71d7685 (Denis Auroux 2012-05-22 19:06:49 +0000 4) * License as published by the Free Software Foundation; either 1264 | a71d7685 (Denis Auroux 2012-05-22 19:06:49 +0000 5) * version 2 of the License, or (at your option) any later version. 1265 | a71d7685 (Denis Auroux 2012-05-22 19:06:49 +0000 6) * 1266 | a71d7685 (Denis Auroux 2012-05-22 19:06:49 +0000 7) * This software is distributed in the hope that it will be useful, 1267 | a71d7685 (Denis Auroux 2012-05-22 19:06:49 +0000 8) * but WITHOUT ANY WARRANTY; without even the implied warranty of 1268 | a71d7685 (Denis Auroux 2012-05-22 19:06:49 +0000 9) * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1269 | a71d7685 (Denis Auroux 2012-05-22 19:06:49 +0000 10) * General Public License for more details. 1270 | #+end_example 1271 | 1272 | ** Be careful 1273 | 1274 | - In git-blame 1275 | - The authors are mapped using the file .mailmap found in the root directory of the project 1276 | 1277 | ** Example 1: 1278 | 1279 | From the git-blame documentation: 1280 | 1281 | *Example:* Your history contains commits by two authors, Jane and Joe, whose names appear in the repository under several forms: 1282 | 1283 | #+BEGIN_EXAMPLE 1284 | Joe Developer 1285 | Joe R. Developer 1286 | Jane Doe 1287 | Jane Doe 1288 | Jane D. 1289 | #+END_EXAMPLE 1290 | 1291 | Now suppose that Joe wants his middle name initial used, and Jane prefers her family name fully spelled out. A proper 1292 | *.mailmap* file would look like: 1293 | 1294 | #+BEGIN_EXAMPLE 1295 | Jane Doe 1296 | Joe R. Developer 1297 | #+END_EXAMPLE 1298 | 1299 | 1300 | ** Example 1301 | 1302 | This is the top of .mailmap from the linux kernel 1303 | 1304 | #+BEGIN_SRC 1305 | # 1306 | # This list is used by git-shortlog to fix a few botched name translations 1307 | # in the git archive, either because the author's full name was messed up 1308 | # and/or not always written the same way, making contributions from the 1309 | # same person appearing not to be so or badly displayed. 1310 | # 1311 | p# repo-abbrev: /pub/scm/linux/kernel/git/ 1312 | # 1313 | 1314 | Aaron Durbin 1315 | Adam Oldham 1316 | Adam Radford 1317 | Adrian Bunk 1318 | Adriana Reus 1319 | Alan Cox 1320 | #+END_SRC 1321 | 1322 | For example: 1323 | 1324 | #+BEGIN_EXAMPLE 1325 | Adriana Reus 1326 | #+END_EXAMPLE 1327 | 1328 | - Adriana Reus email is reported different in commands that use .mailmap. 1329 | - including blame 1330 | - *%an* returns name as in commit 1331 | - *%aN* returns name mapped using .mailmap 1332 | - *%ae* returns email as in commit 1333 | - *%aE* returns email mapped using .mailmap 1334 | #+BEGIN_SRC sh :exports both 1335 | git -C /home/linux/original.repo/linux.git log --oneline --format='%H;author;%an;%ae;mailmapAuthor;%aN;%aE' | grep 'reus' | head -2 1336 | #+END_SRC 1337 | 1338 | 1339 | #+RESULTS: 1340 | #+begin_example 1341 | 6c55c418f071dc7df2dfeb66398d009139cc5ef1;author;Adriana Reus;adi.reus@gmail.com;mailmapAuthor;Adriana Reus;adi.reus@gmail.com 1342 | 65ae47b0ec535a008e53578abc11082f3b742f75;author;Adriana Reus;adriana.reus@intel.com;mailmapAuthor;Adriana Reus;adi.reus@gmail.com 1343 | #+end_example 1344 | 1345 | 1346 | This command uses raw user data: 1347 | 1348 | #+BEGIN_SRC sh :exports both 1349 | git -C /home/linux/original.repo/linux.git log 65ae47b0ec535a008e53578abc11082f3b742f75 -1 | head 1350 | #+END_SRC 1351 | 1352 | #+RESULTS: 1353 | #+begin_example 1354 | commit 65ae47b0ec535a008e53578abc11082f3b742f75 1355 | Author: Adriana Reus 1356 | Date: Thu Mar 24 11:29:31 2016 +0200 1357 | 1358 | iio: accel: kxcjk-1013: optimize i2c transfers in trigger handler 1359 | 1360 | Some i2c busses (e.g.: Synopsys DesignWare I2C adapter) need to 1361 | enable/disable the bus at each i2c transfer and must wait for 1362 | the enable/disable to happen before sending the data. 1363 | 1364 | #+end_example 1365 | 1366 | but this one uses mapped user data: 1367 | 1368 | #+BEGIN_SRC sh :exports both 1369 | git -C /home/linux/original.repo/linux.git shortlog --email 65ae47b0ec535a008e53578abc11082f3b742f75 -1 | head 1370 | #+END_SRC 1371 | 1372 | 1373 | #+RESULTS: 1374 | #+begin_example 1375 | Adriana Reus (1): 1376 | iio: accel: kxcjk-1013: optimize i2c transfers in trigger handler 1377 | 1378 | #+end_example 1379 | 1380 | 1381 | ** git-blame and .mailmap 1382 | 1383 | - git blame maps users using .mailmap 1384 | - so be careful when you parse its information 1385 | - it might appear that the line is modified by the wrong person 1386 | 1387 | 1388 | * --porcelain 1389 | 1390 | Many commands support --porcelain 1391 | 1392 | #+BEGIN_QUOTE 1393 | "Porcelain" is the material from which toilets are usually made (and sometimes other fixtures such as washbasins). This 1394 | is distinct from "plumbing" (the actual pipes and drains), where the porcelain provides a more user-friendly interface 1395 | to the plumbing 1396 | #+END_QUOTE 1397 | 1398 | http://stackoverflow.com/questions/6976473/what-does-the-term-porcelain-mean-in-git 1399 | 1400 | 1401 | ** git-blame porcelain 1402 | 1403 | it is useful when one need to parse 1404 | 1405 | | (author or commiter)-time | unix time in epoch | 1406 | | summary | one liner of commit log | 1407 | | filename | if file was renamed, it tells you what the original filename was | 1408 | | previous | commit responsible for the previous version of a specific line | 1409 | | \tab | actual content is preceeded by tab | 1410 | 1411 | 1412 | #+BEGIN_SRC sh :exports both 1413 | git -C /tmp/xournal blame --no-abbrev src/xo-file.c --line-porcelain | head -26 1414 | #+END_SRC 1415 | 1416 | #+RESULTS: 1417 | #+begin_example 1418 | a71d76854d1caabd6bcc7fd1ce6a105fca06de61 1 1 15 1419 | author Denis Auroux 1420 | author-mail 1421 | author-time 1337713609 1422 | author-tz +0000 1423 | committer Denis Auroux 1424 | committer-mail 1425 | committer-time 1337713609 1426 | committer-tz +0000 1427 | summary Add "GPL v2 or later" headers to the source files 1428 | previous c6c08ac049e4d849ca96363d8feba52c0cb8474a src/xo-file.c 1429 | filename src/xo-file.c 1430 | /* 1431 | a71d76854d1caabd6bcc7fd1ce6a105fca06de61 2 2 1432 | author Denis Auroux 1433 | author-mail 1434 | author-time 1337713609 1435 | author-tz +0000 1436 | committer Denis Auroux 1437 | committer-mail 1438 | committer-time 1337713609 1439 | committer-tz +0000 1440 | summary Add "GPL v2 or later" headers to the source files 1441 | previous c6c08ac049e4d849ca96363d8feba52c0cb8474a src/xo-file.c 1442 | filename src/xo-file.c 1443 | * This program is free software; you can redistribute it and/or 1444 | #+end_example 1445 | 1446 | 1447 | * Final comments 1448 | 1449 | - Read the man pages 1450 | - You can simplify your life by asking for the right info 1451 | 1452 | * Ignore 1453 | 1454 | #+BEGIN_SRC emacs-lisp :exports none 1455 | (setq org-image-actual-width 1000) 1456 | (setq org-export-babel-evaluate nil) 1457 | #+END_SRC 1458 | 1459 | #+RESULTS: 1460 | -------------------------------------------------------------------------------- /withCid.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | my $strict; 4 | my $cid; 5 | while (<>) { 6 | chomp; 7 | s/;//g; 8 | 9 | if (/^commit ([0-9a-f]{40}) *(.*)$/) { 10 | # match commit id 11 | $cid = $1; 12 | print "$1;$2\n"; 13 | } elsif (/^([0-9a-f]{40}) *(.*)$/) { 14 | # --abbrev does not include commit 15 | $cid = $1; 16 | print "$1;$2\n"; 17 | } else { 18 | if (/^(\S+):\s+(.*)$/) { 19 | # if it has a non-space sequence followed by : 20 | # then it is a header field. split 21 | print "$cid;$1;$2\n"; 22 | } else { 23 | print "$cid;;$_\n"; 24 | } 25 | } 26 | } 27 | --------------------------------------------------------------------------------