├── .gitignore ├── .gitmodules ├── Makefile ├── README.md ├── check-logins.coffee ├── format-languages.coffee ├── format-users.coffee ├── get-details.coffee ├── get-users.coffee ├── package.json └── utils.js /.gitignore: -------------------------------------------------------------------------------- 1 | # Numerous always-ignore extensions 2 | *.diff 3 | *.err 4 | *.orig 5 | *.log 6 | *~ 7 | 8 | # OS or Editor folders 9 | .DS_Store 10 | .cache 11 | Icon? 12 | 13 | # Folders to ignore 14 | .hg 15 | .svn 16 | 17 | # Node.js package manager 18 | /node_modules 19 | /npm-debug.log 20 | 21 | # Other stuff 22 | *.pyc 23 | /tmp 24 | 25 | # Project stuff 26 | /temp-logins.json 27 | /old-logins.json 28 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "formatted"] 2 | path = formatted 3 | url = git@github.com:2657075.git 4 | [submodule "raw"] 5 | path = raw 6 | url = git@github.com:4524946.git 7 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: get format 2 | get: 1 2 3 | format: 3 4 | 5 | 1: 6 | if [ -e temp-logins.json ]; then mv temp-logins.json old-logins.json; fi; 7 | npx coffee get-users.coffee 8 | # for debug - requires get-users.coffee/get-details.coffee already ran: 9 | #npx coffee check-logins.coffee 10 | 11 | 2: 12 | npx coffee get-details.coffee 13 | 14 | 3: 15 | npx coffee format-languages.coffee 16 | npx coffee format-users.coffee 17 | 18 | 4: sync-raw sync-formatted 19 | 20 | sync: sync-raw sync-formatted 21 | force-sync: force-sync-raw sync-formatted 22 | 23 | sync-raw: 24 | cd raw && git commit -am 'Update stats.' && git push 25 | 26 | force-sync-raw: 27 | cd raw && git commit -am 'Update stats.' --amend && git push --force 28 | 29 | sync-formatted: 30 | cd formatted && git commit -am 'Sync.' --amend && git push --force 31 | 32 | clean: 33 | rm temp-logins.json 34 | rm old-logins.json 35 | rm raw/github-languages-stats.json 36 | rm raw/github-users-stats.json 37 | rm formatted/active.md 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GitHub top users 2 | 3 | Generated stats: [git.io/top](http://git.io/top). 4 | 5 | ## Usage 6 | 7 | Make sure you’ve got node.js and coffeescript installed. 8 | 9 | ```bash 10 | # Install deps. 11 | npm install 12 | # Download and format everything. 13 | make 14 | 15 | # or 16 | 17 | # Download data. 18 | make get 19 | 20 | # Generate stats. 21 | make format 22 | ``` 23 | 24 | 25 | ## License 26 | 27 | The MIT License (MIT) 28 | 29 | Copyright (c) 2013 Paul Miller (http://paulmillr.com/) 30 | 31 | Permission is hereby granted, free of charge, to any person obtaining a copy 32 | of this software and associated documentation files (the “Software”), to deal 33 | in the Software without restriction, including without limitation the rights 34 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 35 | copies of the Software, and to permit persons to whom the Software is 36 | furnished to do so, subject to the following conditions: 37 | 38 | The above copyright notice and this permission notice shall be included in 39 | all copies or substantial portions of the Software. 40 | 41 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 42 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 43 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 44 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 45 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 46 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 47 | THE SOFTWARE. 48 | -------------------------------------------------------------------------------- /check-logins.coffee: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env coffee 2 | utils = require './utils' 3 | fs = require 'fs' 4 | data = require './raw/github-users-stats.json' 5 | prev = require './old-logins.json' 6 | curr = require './temp-logins.json' 7 | 8 | filtered = prev 9 | .filter(utils.isNotIn(curr)) 10 | .map(utils.reverseFind(data)) 11 | .filter((_) -> _) 12 | .map (_) -> 13 | login: _.login, followers: _.followers 14 | .sort (a, b) -> 15 | b.followers - a.followers 16 | 17 | console.log 'Filtered logins:' 18 | console.log filtered 19 | console.log JSON.stringify filtered.map(utils.prop 'login'), null, 2 20 | -------------------------------------------------------------------------------- /format-languages.coffee: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env coffee 2 | utils = require './utils' 3 | 4 | getLanguageStats = (inputFile, outFile) -> 5 | stats = require inputFile 6 | total = stats.length 7 | unsorted = Total: total 8 | stats.forEach (stat) -> 9 | {language} = stat 10 | return unless language 11 | unsorted[language] ?= 0 12 | unsorted[language] += 1 13 | 14 | languages = {} 15 | Object.keys(unsorted) 16 | .sort (a, b) -> 17 | unsorted[b] - unsorted[a] 18 | .forEach (language) -> 19 | languages[language] = unsorted[language] 20 | 21 | utils.writeStats outFile, languages 22 | 23 | getLanguageStats './raw/github-users-stats.json', './raw/github-languages-stats.json' 24 | -------------------------------------------------------------------------------- /format-users.coffee: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env coffee 2 | fs = require 'fs' 3 | 4 | # Reducer. 5 | minimum = (min, current) -> 6 | if current < min 7 | current 8 | else 9 | min 10 | 11 | top = (stats, field, type) -> 12 | get = (stat) -> 13 | value = stat[field] 14 | if type is 'list' then value.length else value 15 | 16 | format = (stat) -> 17 | value = get stat 18 | switch type 19 | when 'thousands' then "#{(value / 1000)}k" 20 | else value 21 | 22 | stats 23 | .slice() 24 | .sort (a, b) -> 25 | get(b) - get(a) 26 | .slice(0, 15) 27 | .map (stat) -> 28 | login = stat.login 29 | "[#{login}](https://github.com/#{login}) (#{format stat})" 30 | .join ', ' 31 | 32 | stats2markdown = (datafile, mdfile, title) -> 33 | stats = require(datafile) 34 | minFollowers = stats.map((_) -> _.followers).reduce(minimum, 1000) 35 | maxNumber = 256 36 | 37 | today = new Date() 38 | from = new Date() 39 | from.setYear today.getFullYear() - 1 40 | 41 | out = """ 42 | # Most active GitHub users ([git.io/top](http://git.io/top)) 43 | 44 | The count of contributions (summary of Pull Requests, opened issues and commits) to public repos at GitHub.com from **#{from.toGMTString()}** till **#{today.toGMTString()}**. 45 | 46 | Only first 1000 GitHub users according to the count of followers are taken. 47 | This is because of limitations of GitHub search. Sorting algo in pseudocode: 48 | 49 | ```javascript 50 | githubUsers 51 | .filter(user => user.followers > #{minFollowers}) 52 | .sortBy('contributions') 53 | .slice(0, #{maxNumber}) 54 | ``` 55 | 56 | Made with data mining of GitHub.com ([raw data](https://gist.github.com/4524946), [script](https://github.com/paulmillr/top-github-users)) by [@paulmillr](https://github.com/paulmillr) with contribs of [@lifesinger](https://github.com/lifesinger) and [@ahmetalpbalkan](https://github.com/ahmetalpbalkan). Updated once per week. 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | \n 66 | """ 67 | 68 | rows = stats 69 | .filter((stat) -> stat.contributions < 20000).slice(0, maxNumber).map (stat, index) -> 70 | """ 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | """.replace(/\n/g, '') 80 | 81 | out += "#{rows.join('\n')}\n
#UserContribsLocationPicture
##{index + 1}#{stat.login}#{if stat.name then ' (' + stat.name + ')' else ''}#{stat.contributions}#{stat.location}
\n\n" 82 | 83 | out += """## Top 10 users from this list by other metrics: 84 | 85 | * **Followers:** #{top stats, 'followers', 'thousands'} 86 | * **Organisations:** #{top stats, 'organizations', 'list'} 87 | """ 88 | 89 | fs.writeFileSync mdfile, out 90 | console.log 'Saved to', mdfile 91 | 92 | stats2markdown './raw/github-users-stats.json', './formatted/active.md' 93 | -------------------------------------------------------------------------------- /get-details.coffee: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env coffee 2 | cheerio = require 'cheerio' 3 | utils = require './utils' 4 | 5 | stats = {} 6 | 7 | getStats = (html, url) -> 8 | $ = cheerio.load html 9 | byProp = (field) -> $("[itemprop='#{field}']") 10 | getInt = (text) -> parseInt text.replace ',', '' 11 | getOrgName = (item) -> $(item).attr('aria-label') 12 | login = byProp('additionalName').text().trim() 13 | getFollowers = -> 14 | text = $("a[href=\"https://github.com/#{login}?tab=followers\"] > .text-bold").text().trim() 15 | multiplier = if text.indexOf('k') > 0 then 1000 else 1 16 | (parseFloat text) * multiplier 17 | 18 | pageDesc = $('meta[name="description"]').attr('content') 19 | 20 | userStats = 21 | name: byProp('name').text().trim() 22 | login: login 23 | location: byProp('homeLocation').text().trim() 24 | language: (/\sin ([\w-+#\s\(\)]+)/.exec(pageDesc)?[1] ? '') 25 | gravatar: byProp('image').attr('href') 26 | followers: getFollowers() 27 | organizations: $('.p-org').text().trim() 28 | contributions: getInt $('div.position-relative > h2.f4.text-normal.mb-2').text().trim().split(' ')[0] 29 | 30 | stats[userStats.login] = userStats 31 | userStats 32 | 33 | sortStats = (stats) -> 34 | minContributions = 1 35 | Object.keys(stats) 36 | .filter (login) -> 37 | stats[login].contributions >= minContributions 38 | .sort (a, b) -> 39 | stats[b].contributions - stats[a].contributions 40 | .map (login) -> 41 | stats[login] 42 | 43 | saveStats = -> 44 | logins = require './temp-logins.json' 45 | urls = logins.map (login) -> "https://github.com/#{login}" 46 | utils.batchGet urls, getStats, -> 47 | utils.writeStats './raw/github-users-stats.json', sortStats stats 48 | 49 | saveStats() 50 | -------------------------------------------------------------------------------- /get-users.coffee: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env coffee 2 | fs = require 'fs' 3 | utils = require './utils' 4 | 5 | BANNED = [ 6 | 'gugod' # 7K commits in 4 days. 7 | 'sindresorhus' # Asked to remove himself from the list. 8 | 'funkenstein' # Appears in the list even though he has 30 followers (bug). 9 | 'beberlei' # 1.7K contribs every day 10 | 'IonicaBizau' # Contribution graffiti. 11 | 'scottgonzalez' 12 | 'AutumnsWind' 13 | 'hintjens' 14 | 'meehawk' 15 | 'BaseMax' # Private contributions. 16 | 'sibelius' 17 | 'yegor256' 18 | 'kamranahmedse' 19 | 'skydoves' 20 | 'draveness' 21 | 'ryanb' 22 | 'shiftkey' 23 | 'hkirat' 24 | 'Ebazhanov' 25 | 'steven-tey' 26 | 'somekindofwallflower' 27 | 'koush' 28 | 'mitchellh' 29 | # 'ice1000' # can't ban, they're into some mad stuff 30 | #'bradfitz' 31 | ] 32 | 33 | saveTopLogins = -> 34 | MIN_FOLLOWERS = 4000 35 | MAX_PAGES = 10 36 | urls = utils.range(1, MAX_PAGES + 1).map (page) -> [ 37 | "https://api.github.com/search/users?q=followers:%3E#{MIN_FOLLOWERS}+sort:followers+type:user&per_page=100" 38 | "&page=#{page}" 39 | ].join('') 40 | 41 | parse = (text) -> 42 | JSON.parse(text).items.map (_) -> _.login 43 | 44 | utils.batchGet urls, parse, (all) -> 45 | logins = [].concat.apply [], all 46 | filtered = logins.filter (name) -> 47 | name not in BANNED 48 | utils.writeStats './temp-logins.json', filtered 49 | 50 | saveTopLogins() 51 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "top-github-users", 3 | "version": "0.1.0", 4 | "description": "[Generated stats](https://gist.github.com/2657075).", 5 | "main": "utils.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git://github.com/paulmillr/top-github-users.git" 12 | }, 13 | "author": "Paul Miller", 14 | "license": "MIT", 15 | "gitHead": "e6a55ed9abd436d3c60323e6cc1d2b3ad6784abb", 16 | "readmeFilename": "README.md", 17 | "dependencies": { 18 | "batch": "~0.3.2", 19 | "cheerio": "v1.0.0-rc.12", 20 | "coffee-script": "1", 21 | "superagent": "~0.14.0" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /utils.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs'); 2 | var Batch = require('batch'); 3 | var request = require('superagent'); 4 | 5 | var batchGet = exports.batchGet = function(urls, progressback, callback) { 6 | var batch = new Batch; 7 | batch.concurrency(5); 8 | urls.forEach(function(url) { 9 | batch.push(function(done) { 10 | request 11 | .get(url) 12 | .set('User-Agent', 'curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5') 13 | .end(function(error, response) { 14 | console.log(url); 15 | if (error) throw new Error(error); 16 | if (response.error) { 17 | if (response.status === 404) { 18 | done(); 19 | } else { 20 | throw [response.error,response.text].join("\n"); 21 | } 22 | } 23 | var result; 24 | try { 25 | result = progressback(response.text); 26 | } catch (err) { 27 | error = err; 28 | } 29 | done(error, result); 30 | }); 31 | }); 32 | }); 33 | 34 | batch.end(function(error, all) { 35 | if (error) throw new Error(error); 36 | callback(all); 37 | }); 38 | }; 39 | 40 | exports.range = function(start, end, step) { 41 | start = +start || 0; 42 | step = +step || 1; 43 | 44 | if (end == null) { 45 | end = start; 46 | start = 0; 47 | } 48 | // use `Array(length)` so V8 will avoid the slower "dictionary" mode 49 | // http://youtu.be/XAqIpGU8ZZk#t=17m25s 50 | var index = -1, 51 | length = Math.max(0, Math.ceil((end - start) / step)), 52 | result = Array(length); 53 | 54 | while (++index < length) { 55 | result[index] = start; 56 | start += step; 57 | } 58 | return result; 59 | }; 60 | 61 | exports.writeStats = function(filename, stats) { 62 | fs.writeFileSync(filename, JSON.stringify(stats, null, 2) + '\n'); 63 | console.log(' Saved to ' + filename); 64 | }; 65 | 66 | // For debugging GitHub search. 67 | var prop = function(name) { 68 | return function(item) {return item[name];}; 69 | }; 70 | 71 | var isNotIn = function(list) { 72 | return function(item) {return list.indexOf(item) === -1;}; 73 | }; 74 | 75 | var diff = function(prev, curr) { 76 | return prev.map(prop('login')).filter(isNotIn(curr.map(prop('login')))); 77 | }; 78 | 79 | var reverseFind = function(list) { 80 | return function(login) { 81 | return list.filter(function(item) { 82 | return item.login === login; 83 | })[0]; 84 | }; 85 | }; 86 | 87 | exports.prop = prop; 88 | exports.isNotIn = isNotIn; 89 | exports.diff = diff; 90 | exports.reverseFind = reverseFind; 91 | 92 | // diff(prev, curr).map(reverseFind(prev)); 93 | // prev.map(prop('login')).filter(isNotIn(logins)) 94 | --------------------------------------------------------------------------------