├── .gitignore ├── LICENSE ├── README.md ├── docs └── demo.gif ├── git-also.js ├── git-cograph.js ├── index.js ├── lib ├── computeSimilarities.js └── startShell.js ├── package-lock.json └── package.json /.gitignore: -------------------------------------------------------------------------------- 1 | lib-cov 2 | *.seed 3 | *.log 4 | *.csv 5 | *.dat 6 | *.out 7 | *.pid 8 | *.gz 9 | 10 | pids 11 | logs 12 | results 13 | 14 | npm-debug.log 15 | node_modules 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016-2025 Andrei Kashcha 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # git-also 2 | 3 | For a `file` in your git repository, prints other files that are most often committed together. 4 | 5 | ![demo](https://raw.githubusercontent.com/anvaka/git-also/master/docs/demo.gif) 6 | 7 | This is a demo of `git-also` applied on three.js library: 8 | 9 | ``` 10 | > src/core/Object3D.js most often committed with: 11 | 12 | # together Similarity Name 13 | 51 0.22 src/core/Geometry.js 14 | 48 0.21 src/renderers/WebGLRenderer.js 15 | 45 0.19 build/Three.js 16 | 43 0.18 src/materials/Material.js 17 | 36 0.15 build/custom/ThreeWebGL.js 18 | 36 0.15 src/cameras/Camera.js 19 | 35 0.15 build/custom/ThreeCanvas.js 20 | 34 0.15 build/custom/ThreeSVG.js 21 | 34 0.15 build/custom/ThreeDOM.js 22 | 32 0.14 src/core/BufferGeometry.js 23 | ``` 24 | 25 | This means that file `Object3D.js` is most often committed with `Geometry.js` - 26 | they both appear together in `51` commits! By looking at this output 27 | you can immediately see core pieces of three.js. 28 | 29 | The `Similarity` column shows [`Jaccard index`](https://en.wikipedia.org/wiki/Jaccard_index) 30 | of two files. 31 | 32 | # usage 33 | 34 | Install the package with npm: 35 | 36 | ``` 37 | npm install -g git-also 38 | ``` 39 | 40 | Run it from command line inside your git repository: 41 | 42 | ``` 43 | git also 44 | ``` 45 | 46 | If you run it without arguments it prints help: 47 | 48 | ``` 49 | Usage: git-also [options] 50 | 51 | For a in your git repository prints other files that are most often committed together 52 | 53 | Options: 54 | 55 | -h, --help output usage information 56 | -V, --version output the version number 57 | -c, --count Print top N other files. N is 10 by default 58 | ``` 59 | 60 | # motivation 61 | 62 | Files are often committed together when developers improve code or add new features. 63 | This information could serve as a hint when you are exploring new code: 64 | 65 | * What are related file to this file? 66 | * Where else should I look when I fix bugs in this file? 67 | 68 | # license 69 | 70 | MIT 71 | -------------------------------------------------------------------------------- /docs/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anvaka/git-also/bd8994bb7d7ad77c8789d9c0d67af54b243fa282/docs/demo.gif -------------------------------------------------------------------------------- /git-also.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | var computeSimilarities = require('./lib/computeSimilarities.js'); 3 | var readline = require('readline'); 4 | var path = require('path'); 5 | 6 | var program = require('commander'); 7 | program 8 | .version(require('./package.json').version) 9 | .usage('[options] ') 10 | .description('For a in your git repository prints other files that are most often committed together') 11 | .option('-c, --count ', 'Print top N other files. N is 10 by default', parseInt) 12 | .parse(process.argv); 13 | 14 | if (program.args.length !== 1) { 15 | program.outputHelp(); 16 | return; 17 | } 18 | 19 | // Only commits that have this file will be counted: 20 | var fileLookup = path.resolve(program.args[0]) 21 | 22 | changeWorkingDirectoryBasedOnInput(path.dirname(fileLookup)); 23 | 24 | fileLookup = path.normalize(fileLookup); 25 | // in cygwin/windows the lookup path is "Root\Lib\file.c", while git shows it as 26 | // "Root/Lib/file.c". Changing: 27 | fileLookup = fileLookup.replace(/\\/g, '/'); 28 | 29 | var childProcess = require('child_process') 30 | 31 | fixNestedPaths(processGitLogs); 32 | 33 | function fixNestedPaths(finishedCallback) { 34 | var cmd = 'git rev-parse --show-toplevel'; 35 | childProcess.exec(cmd, function(error, stdout, stderr) { 36 | if (error) { 37 | if (stderr) console.log(stderr); 38 | else { 39 | console.log('something is wrong: ', error); 40 | } 41 | process.exit(2); 42 | } 43 | 44 | // stdout has \n at the end - remove it 45 | var gitRoot = stdout.trim(); 46 | // fileLookup is guaranteed to have gitRoot in it (since we've done path.resolve above) 47 | // just remove the git root, and that will give us relative file name (which is printed 48 | // by `git log` output) 49 | fileLookup = fileLookup.substring(gitRoot.length + 1); 50 | finishedCallback(); 51 | }); 52 | } 53 | 54 | function processGitLogs() { 55 | var buffer = []; 56 | var commits = []; 57 | 58 | var git = childProcess.spawn('git', ['log', '--name-only', '--pretty=format:""']); 59 | 60 | var rl = readline.createInterface({ input: git.stdout }); 61 | rl.on('line', processLine).on('close', printResults); 62 | 63 | git.stderr.on('data', function (data) { 64 | console.error('stderr: ' + data.toString()); 65 | }); 66 | 67 | function printResults() { 68 | var similarities = computeSimilarities(commits) 69 | similarities.print(fileLookup, program.count); 70 | } 71 | 72 | function processLine(line) { 73 | if (line === '""') return; 74 | if (line) { 75 | buffer.push(line) 76 | } else { 77 | if (buffer.length > 0) { 78 | if (hasFile(buffer, fileLookup)) commits.push(buffer); 79 | buffer = []; 80 | } 81 | } 82 | } 83 | 84 | function hasFile(buffer, fileLookup) { 85 | for (var i = 0; i < buffer.length; ++i) { 86 | if (buffer[i] === fileLookup) return true; 87 | } 88 | } 89 | } 90 | 91 | 92 | function changeWorkingDirectoryBasedOnInput(dirName) { 93 | try { 94 | process.chdir(dirName); 95 | } catch(error) { 96 | if (error.code !== 'ENOENT') throw error; 97 | 98 | console.log('no such directory: ' + dirName); 99 | process.exit(1); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /git-cograph.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | var readline = require('readline'); 3 | var path = require('path'); 4 | var createGraph = require('ngraph.graph'); 5 | 6 | var program = require('commander'); 7 | program 8 | .version(require('./package.json').version) 9 | .usage('[options]') 10 | .description('Prints a graph of files that are most frequently changed together') 11 | .parse(process.argv); 12 | 13 | 14 | var childProcess = require('child_process') 15 | 16 | processGitLogs(); 17 | 18 | function processGitLogs() { 19 | var buffer = []; 20 | 21 | var git = childProcess.spawn('git', ['log', '--name-only', '--pretty=format:""']); 22 | var graph = createGraph(); 23 | 24 | var rl = readline.createInterface({ input: git.stdout }); 25 | rl.on('line', processLine).on('close', printResults); 26 | 27 | git.stderr.on('data', function (data) { 28 | console.error('stderr: ' + data.toString()); 29 | }); 30 | 31 | function printResults() { 32 | console.warn('Pairs: ', graph.getLinkCount(), 'Nodes: ' + graph.getNodeCount()); 33 | 34 | let nodes = []; 35 | graph.forEachNode(node => { 36 | nodes.push(node.data.count); 37 | }); 38 | let meanNodeCount = nodes.reduce((a, b) => a + b, 0) / nodes.length; 39 | let stdDevNode = nodes.reduce((a, b) => a + Math.pow(b - meanNodeCount, 2), 0) / nodes.length; 40 | let nodeFilterThreshold = meanNodeCount;// + stdDevNode; 41 | 42 | console.warn('Mean node count: ', meanNodeCount, 'StdDev: ', Math.sqrt(stdDevNode)); 43 | 44 | let scores = []; 45 | graph.forEachLink(link => { 46 | let fromNode = graph.getNode(link.fromId); 47 | let toNode = graph.getNode(link.toId); 48 | let committedTogether = link.data.count; 49 | // if (!(fromNode.id.match(/^src/) || toNode.id.match(/^src/))) return; 50 | 51 | if (fromNode.data.count > nodeFilterThreshold || 52 | toNode.data.count > nodeFilterThreshold) { 53 | let score = committedTogether / (fromNode.data.count + toNode.data.count - committedTogether); 54 | scores.push({ 55 | link, score 56 | }); 57 | } 58 | }) 59 | 60 | scores.sort((a, b) => b.score - a.score); 61 | 62 | if (scores.length > 0) { 63 | let mean = scores.reduce((a, b) => a + b.score, 0) / scores.length; 64 | let stdDev = Math.sqrt(scores.reduce((a, b) => a + Math.pow(b.score - mean, 2), 0) / scores.length); 65 | 66 | console.warn('Jaccard similarity mean: ', mean, '; StdDev: ', stdDev); 67 | } 68 | console.log('graph G {') 69 | scores.forEach(({link, score}) => { 70 | if (score > mean + stdDev) { 71 | console.log(` "${link.fromId}" -- "${link.toId}" [score=${Math.round(score*100)/100}];`) 72 | } 73 | }); 74 | console.log('}') 75 | // var similarities = computeSimilarities(commits) 76 | // similarities.print(fileLookup, program.count); 77 | } 78 | 79 | function processLine(line) { 80 | if (line === '""') return; 81 | if (line) { 82 | buffer.push(line) 83 | } else { 84 | if (buffer.length > 0) { 85 | buffer.forEach(fileName => { 86 | if (!graph.hasNode(fileName)) { 87 | graph.addNode(fileName, {count: 0}); 88 | } 89 | graph.getNode(fileName).data.count += 1; 90 | }); 91 | for (let i = 0; i < buffer.length - 1; i++) { 92 | let from = buffer[i]; 93 | for (let j = i + 1; j < buffer.length; j++) { 94 | let to = buffer[j]; 95 | let canonicalFrom = from; 96 | let canonicalTo = to; 97 | if (from < to) { 98 | canonicalFrom = to; 99 | canonicalTo = from; 100 | } 101 | 102 | if (!graph.hasLink(canonicalFrom, canonicalTo)) { 103 | graph.addLink(canonicalFrom, canonicalTo, {count: 0}); 104 | } 105 | graph.getLink(canonicalFrom, canonicalTo).data.count += 1; 106 | } 107 | } 108 | buffer = []; 109 | } 110 | } 111 | } 112 | 113 | function hasFile(buffer, fileLookup) { 114 | for (var i = 0; i < buffer.length; ++i) { 115 | if (buffer[i] === fileLookup) return true; 116 | } 117 | } 118 | } 119 | 120 | 121 | function changeWorkingDirectoryBasedOnInput(dirName) { 122 | try { 123 | process.chdir(dirName); 124 | } catch(error) { 125 | if (error.code !== 'ENOENT') throw error; 126 | 127 | console.error('no such directory: ' + dirName); 128 | process.exit(1); 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | // log files generated by 2 | // 3 | // git log --name-only --pretty=format:"" 4 | // 5 | // NOTE: use git-also.js if you wan to run this in the git repository 6 | var readline = require('readline'); 7 | var fs = require('fs'); 8 | var computeSimilarities = require('./lib/computeSimilarities.js'); 9 | var startShell = require('./lib/startShell.js'); 10 | 11 | var inputFileName = process.argv[2]; 12 | 13 | if (!fs.existsSync(inputFileName)) { 14 | console.log('Cannot find input file.'); 15 | console.log(''); 16 | console.log('To create input file, run the following command: '); 17 | console.log(''); 18 | console.log('git log --name-only --pretty=format:"" > inputFile.log'); 19 | console.log(''); 20 | console.log('Then re-run this program: '); 21 | console.log(''); 22 | console.log('node index.js inputFile.log'); 23 | process.exit(1) 24 | } 25 | 26 | console.log('reading file: ' + inputFileName); 27 | 28 | var rl = readline.createInterface({ 29 | input: fs.createReadStream(inputFileName) 30 | }); 31 | 32 | var buffer = []; 33 | var commits = []; 34 | 35 | rl.on('line', processLine); 36 | rl.on('close', startInteractiveShell); 37 | 38 | function processLine(line) { 39 | if (line) { 40 | buffer.push(line) 41 | } else { 42 | if (buffer.length > 0) { 43 | commits.push(buffer); 44 | buffer = []; 45 | } 46 | } 47 | } 48 | 49 | function startInteractiveShell() { 50 | var similarities = computeSimilarities(commits) 51 | startShell(similarities); 52 | } 53 | -------------------------------------------------------------------------------- /lib/computeSimilarities.js: -------------------------------------------------------------------------------- 1 | module.exports = computeSimilarities; 2 | 3 | function computeSimilarities(commits) { 4 | var index = buildIndex(); 5 | 6 | return { 7 | print: print 8 | }; 9 | 10 | function print(fileName, maxCount) { 11 | if (maxCount === undefined || Number.isNaN(maxCount)) { 12 | maxCount = 10; 13 | } 14 | 15 | index.forEach(printEntry); 16 | 17 | function printEntry(entry) { 18 | if (entry.name === fileName) { 19 | console.log(entry.name + ' most often committed with: '); 20 | console.log(''); 21 | console.log('# together\tSimilarity\tName'); 22 | console.log(entry.related.slice(0, maxCount).map(toUIOutput).join('\n')); 23 | console.log(''); 24 | } 25 | } 26 | } 27 | 28 | function toUIOutput(record) { 29 | return pad(record.count, 10) + '\t' + pad(record.index.toFixed(2), 10) + '\t' + record.name; 30 | } 31 | 32 | function pad(n, width, z) { 33 | z = z || ' '; 34 | n = n + ''; 35 | return n.length >= width ? n : new Array(width - n.length + 1).join(z) + n; 36 | } 37 | 38 | function buildIndex() { 39 | var files = Object.create(null);// number of times each file was seen 40 | 41 | commits.forEach(countFiles); 42 | commits.forEach(countCoocurrences); 43 | var values = toValues(files); 44 | values.forEach(updateJaccardSimilarity) 45 | 46 | return values; 47 | 48 | function updateJaccardSimilarity(fileRecord) { 49 | // a & b / (a + b - a & b) 50 | var related = toValues(fileRecord.similarities); 51 | 52 | related.forEach(function(otherFile) { 53 | var together = otherFile.count; 54 | otherFile.index = together/(fileRecord.count + files[otherFile.name].count - together); 55 | }); 56 | 57 | fileRecord.related = related.sort(byIndex); 58 | } 59 | 60 | function countCoocurrences(commit) { 61 | commit.forEach(processFile) 62 | 63 | function processFile(fileA) { 64 | var aRecord = files[fileA]; 65 | 66 | commit.forEach(processOtherFile) 67 | 68 | function processOtherFile(fileB) { 69 | if (fileB === fileA) return; 70 | 71 | var related = aRecord.similarities[fileB] 72 | if (!related) related = aRecord.similarities[fileB] = { 73 | name: fileB, 74 | count: 0 75 | }; 76 | 77 | related.count += 1; // /commit.length; <- should I weight it? 78 | } 79 | } 80 | } 81 | 82 | function countFiles(commit) { 83 | commit.forEach(processFile); 84 | 85 | function processFile(file) { 86 | var record = files[file]; 87 | if (!record) record = files[file] = { 88 | count: 0, 89 | name: file, 90 | similarities: Object.create(null) 91 | }; 92 | 93 | record.count += 1; 94 | } 95 | } 96 | } 97 | } 98 | 99 | function toValues(object) { 100 | return Object.keys(object).map(toValue); 101 | 102 | function toValue(key) { 103 | return object[key] 104 | } 105 | } 106 | 107 | function byIndex(x, y) { 108 | return y.index - x.index 109 | } 110 | -------------------------------------------------------------------------------- /lib/startShell.js: -------------------------------------------------------------------------------- 1 | var readline = require('readline'); 2 | 3 | var rl = readline.createInterface({ 4 | input: process.stdin, 5 | output: process.stdout, 6 | prompt: 'Enter file to find what is related: ' 7 | }); 8 | 9 | module.exports = startShell; 10 | 11 | function startShell(similarities) { 12 | rl.prompt(); 13 | 14 | rl.on('line', printLine).on('close', printBye); 15 | 16 | function printLine(line) { 17 | var entered = line.trim(); 18 | similarities.print(entered); 19 | rl.prompt(); 20 | } 21 | 22 | function printBye() { 23 | console.log('Have a great day!'); 24 | process.exit(0); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "git-also", 3 | "version": "1.1.0", 4 | "lockfileVersion": 2, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "git-also", 9 | "version": "1.1.0", 10 | "license": "MIT", 11 | "dependencies": { 12 | "commander": "^6.1.0", 13 | "ngraph.graph": "^20.0.0" 14 | }, 15 | "bin": { 16 | "git-also": "git-also.js" 17 | } 18 | }, 19 | "node_modules/commander": { 20 | "version": "6.1.0", 21 | "resolved": "https://registry.npmjs.org/commander/-/commander-6.1.0.tgz", 22 | "integrity": "sha512-wl7PNrYWd2y5mp1OK/LhTlv8Ff4kQJQRXXAvF+uU/TPNiVJUxZLRYGj/B0y/lPGAVcSbJqH2Za/cvHmrPMC8mA==", 23 | "engines": { 24 | "node": ">= 6" 25 | } 26 | }, 27 | "node_modules/ngraph.events": { 28 | "version": "1.2.2", 29 | "resolved": "https://registry.npmjs.org/ngraph.events/-/ngraph.events-1.2.2.tgz", 30 | "integrity": "sha512-JsUbEOzANskax+WSYiAPETemLWYXmixuPAlmZmhIbIj6FH/WDgEGCGnRwUQBK0GjOnVm8Ui+e5IJ+5VZ4e32eQ==" 31 | }, 32 | "node_modules/ngraph.graph": { 33 | "version": "20.0.0", 34 | "resolved": "https://registry.npmjs.org/ngraph.graph/-/ngraph.graph-20.0.0.tgz", 35 | "integrity": "sha512-tJqmik6U5geNDSbmTSwm4R6coTMDbkfFFHD8wdeSJtKU/cxIWFsKtXuwMva/wTk6tQQl1C2//lrzmwfPJXAXHw==", 36 | "dependencies": { 37 | "ngraph.events": "^1.2.1" 38 | } 39 | } 40 | }, 41 | "dependencies": { 42 | "commander": { 43 | "version": "6.1.0", 44 | "resolved": "https://registry.npmjs.org/commander/-/commander-6.1.0.tgz", 45 | "integrity": "sha512-wl7PNrYWd2y5mp1OK/LhTlv8Ff4kQJQRXXAvF+uU/TPNiVJUxZLRYGj/B0y/lPGAVcSbJqH2Za/cvHmrPMC8mA==" 46 | }, 47 | "ngraph.events": { 48 | "version": "1.2.2", 49 | "resolved": "https://registry.npmjs.org/ngraph.events/-/ngraph.events-1.2.2.tgz", 50 | "integrity": "sha512-JsUbEOzANskax+WSYiAPETemLWYXmixuPAlmZmhIbIj6FH/WDgEGCGnRwUQBK0GjOnVm8Ui+e5IJ+5VZ4e32eQ==" 51 | }, 52 | "ngraph.graph": { 53 | "version": "20.0.0", 54 | "resolved": "https://registry.npmjs.org/ngraph.graph/-/ngraph.graph-20.0.0.tgz", 55 | "integrity": "sha512-tJqmik6U5geNDSbmTSwm4R6coTMDbkfFFHD8wdeSJtKU/cxIWFsKtXuwMva/wTk6tQQl1C2//lrzmwfPJXAXHw==", 56 | "requires": { 57 | "ngraph.events": "^1.2.1" 58 | } 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "git-also", 3 | "version": "1.1.0", 4 | "description": "For a `file` in your git repository, prints other files that are most often committed together", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "bin": { 10 | "git-also": "git-also.js" 11 | }, 12 | "author": "Andrei Kashcha", 13 | "license": "MIT", 14 | "repository": { 15 | "type": "git", 16 | "url": "https://github.com/anvaka/git-also" 17 | }, 18 | "dependencies": { 19 | "commander": "^6.1.0", 20 | "ngraph.graph": "^20.0.0" 21 | } 22 | } 23 | --------------------------------------------------------------------------------