├── README.md ├── analytics.txt ├── app.js ├── build_db.py ├── build_db_index.py ├── events ├── delete.json ├── fork.json ├── issuecomment.json ├── issues.json ├── pullrequest.json ├── push.json └── watch.json ├── evttypes.txt ├── handle.py ├── index.h5 ├── languages.txt ├── ml.py ├── package.json ├── parse_data.py ├── points.h5 └── screenshots ├── 2014-01-01.png ├── feb-one-plot.png └── feb-results.png /README.md: -------------------------------------------------------------------------------- 1 | h5dump points.h5 |less 2 | 3 | h5ls -vlr points.h5 4 | 5 | Opened "points.h5" with sec2 driver. 6 | / Group 7 | Location: 1:96 8 | Links: 1 9 | /names Dataset {50001/50001} 10 | Location: 1:1400 11 | Links: 1 12 | Storage: 1950039 logical bytes, 1950039 allocated bytes, 100.00% utilization 13 | Type: 39-byte null-padded ASCII string 14 | /points Dataset {50001/50001, 63/63} 15 | Location: 1:800 16 | Links: 1 17 | Storage: 25200504 logical bytes, 25200504 allocated bytes, 100.00% utilization 18 | Type: native double 19 | 20 | -------------------------------------------------------------------------------- /analytics.txt: -------------------------------------------------------------------------------- 1 | sudo ARCHFLAGS="-Wno-error=unused-command-line-argument-hard-error-in-future" easy_install line_profiler 2 | 3 | 4 | kernprof.py -l -v handle.py 5 | Wrote profile results to handle.py.lprof 6 | Timer unit: 1e-06 s 7 | 8 | File: handle.py 9 | Function: draw_date at line 17 10 | Total time: 5.77839 s 11 | 12 | Line # Hits Time Per Hit % Time Line Contents 13 | ============================================================== 14 | 17 @profile 15 | 18 def draw_date(files): 16 | 19 1 1 1.0 0.0 x = [] 17 | 20 1 1 1.0 0.0 y = [] 18 | 21 1 2381188 2381188.0 41.2 mwcs = pd.get_minutes_counts_with_id(files) 19 | 22 61 32 0.5 0.0 for mwc in mwcs: 20 | 23 60 43 0.7 0.0 x.append(mwc[0]) 21 | 24 60 40 0.7 0.0 y.append(mwc[1]) 22 | 25 23 | 26 1 3397081 3397081.0 58.8 simple_draw(files, mwcs) 24 | 25 | 26 | Wrote profile results to handle.py.lprof 27 | Timer unit: 1e-06 s 28 | 29 | File: parse_data.py 30 | Function: handle_json at line 15 31 | Total time: 2.40157 s 32 | 33 | Line # Hits Time Per Hit % Time Line Contents 34 | ============================================================== 35 | 15 @profile 36 | 16 def handle_json(jsonfile): 37 | 17 1 19 19.0 0.0 f = open(jsonfile, "r") 38 | 18 1 1 1.0 0.0 dataarray = [] 39 | 19 1 1 1.0 0.0 datacount = 0 40 | 20 41 | 21 4185 11605 2.8 0.5 for line in open(jsonfile): 42 | 22 4184 48394 11.6 2.0 line = f.readline() 43 | 23 4184 249621 59.7 10.4 lin = json.loads(line) 44 | 24 4184 2082951 497.8 86.7 date = dateutil.parser.parse(lin["created_at"]) 45 | 25 4184 4375 1.0 0.2 datacount += 1 46 | 26 4184 4594 1.1 0.2 dataarray.append(date.minute) 47 | 27 48 | 28 1 8 8.0 0.0 f.close() 49 | 29 1 1 1.0 0.0 return datacount, dataarray 50 | 51 | 52 | kernprof.py -l -v handle.py 53 | [6570, 7420, 11274, 12073, 12160, 12378, 12897, 8474, 7984, 12933, 13504, 13763, 13544, 12940, 7119, 7346, 13412, 14008, 12555] 54 | Wrote profile results to handle.py.lprof 55 | Timer unit: 1e-06 s 56 | 57 | File: parse_data.py 58 | Function: handle_json at line 15 59 | Total time: 127.332 s 60 | 61 | Line # Hits Time Per Hit % Time Line Contents 62 | ============================================================== 63 | 15 @profile 64 | 16 def handle_json(jsonfile): 65 | 17 19 636 33.5 0.0 f = open(jsonfile, "r") 66 | 18 19 21 1.1 0.0 dataarray = [] 67 | 19 19 16 0.8 0.0 datacount = 0 68 | 20 69 | 21 212373 730344 3.4 0.6 for line in open(jsonfile): 70 | 22 212354 2826826 13.3 2.2 line = f.readline() 71 | 23 212354 13848171 65.2 10.9 lin = json.loads(line) 72 | 24 212354 109427317 515.3 85.9 date = dateutil.parser.parse(lin["created_at"]) 73 | 25 212354 238112 1.1 0.2 datacount += 1 74 | 26 212354 260227 1.2 0.2 dataarray.append(date.minute) 75 | 27 76 | 28 19 349 18.4 0.0 f.close() 77 | 29 19 20 1.1 0.0 return datacount, dataarray 78 | 79 | time python handle.py 80 | 81 | real 0m43.411s 82 | user 0m39.226s 83 | sys 0m0.618s 84 | 85 | 86 | 87 | 88 | 89 | Filename: parse_data.py 90 | 91 | Line # Mem usage Increment Line Contents 92 | ================================================ 93 | 13 39.930 MiB 0.000 MiB @profile 94 | 14 def handle_json(jsonfile): 95 | 15 39.930 MiB 0.000 MiB f = open(jsonfile, "r") 96 | 16 39.930 MiB 0.000 MiB dataarray = [] 97 | 17 39.930 MiB 0.000 MiB datacount = 0 98 | 18 99 | 19 40.055 MiB 0.125 MiB for line in open(jsonfile): 100 | 20 40.055 MiB 0.000 MiB line = f.readline() 101 | 21 40.066 MiB 0.012 MiB lin = json.loads(line) 102 | 22 40.055 MiB -0.012 MiB date = dateutil.parser.parse(lin["created_at"]) 103 | 23 40.055 MiB 0.000 MiB datacount += 1 104 | 24 40.055 MiB 0.000 MiB dataarray.append(date.minute) 105 | 25 106 | 26 f.close() 107 | 27 return datacount, dataarray 108 | 109 | 110 | function 8259 111 | dict 2137 112 | tuple 1949 113 | wrapper_descriptor 1625 114 | list 1586 115 | weakref 1145 116 | builtin_function_or_method 1117 117 | method_descriptor 948 118 | getset_descriptor 708 119 | type 705 -------------------------------------------------------------------------------- /app.js: -------------------------------------------------------------------------------- 1 | var redis = require("redis"), 2 | client = redis.createClient(); 3 | 4 | client.on("error", function(err) { 5 | console.log("Error " + err); 6 | }); 7 | client. -------------------------------------------------------------------------------- /build_db.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sqlite3 3 | import os 4 | import glob 5 | import gzip 6 | import logging 7 | from datetime import date, timedelta 8 | import re 9 | 10 | import redis 11 | r = redis.StrictRedis(host='localhost', port=6379, db=1) 12 | 13 | def init_db(): 14 | conn = sqlite3.connect('userdata.db') 15 | c = conn.cursor() 16 | c.execute('''CREATE TABLE userinfo (owner text, language text, eventtype text, name text, url text)''') 17 | c.close() 18 | 19 | 20 | def build_db(jsonfile): 21 | conn = sqlite3.connect('userdata.db') 22 | c = conn.cursor() 23 | # init_db(conn) 24 | f = open(jsonfile, "r") 25 | count = 1 26 | userinfo = [] 27 | 28 | for line in open(jsonfile): 29 | date = f.readline() 30 | date = json.loads(date) 31 | if 'repository' in date: 32 | repo = date["repository"] 33 | if 'language' in repo: 34 | info = str(repo['owner']), str(repo['language']), str(date["type"]), str(repo["name"]), str(repo["url"]) 35 | userinfo.append(info) 36 | count += 1 37 | 38 | c.executemany('INSERT INTO userinfo VALUES (?,?,?,?,?)', userinfo) 39 | f.close() 40 | conn.commit() 41 | c.close() 42 | 43 | 44 | def build_all_db(): 45 | for i in range(1, 20): 46 | if i < 10: 47 | filename = 'data/2014-02-0' + i.__str__() + '-0.json' 48 | else: 49 | filename = 'data/2014-02-' + i.__str__() + '-0.json' 50 | build_db(filename) 51 | 52 | 53 | def handle_gzip_file(filename): 54 | userinfo = [] 55 | with gzip.GzipFile(filename) as f: 56 | events = [line.decode("utf-8", errors="ignore") for line in f] 57 | 58 | for n, line in enumerate(events): 59 | try: 60 | event = json.loads(line) 61 | except: 62 | 63 | continue 64 | 65 | actor = event["actor"] 66 | attrs = event.get("actor_attributes", {}) 67 | if actor is None or attrs.get("type") != "User": 68 | continue 69 | 70 | key = actor.lower() 71 | 72 | repo = event.get("repository", {}) 73 | info = str(repo.get("owner")), str(repo.get("language")), str(event["type"]), str(repo.get("name")), str( 74 | repo.get("url")) 75 | userinfo.append(info) 76 | 77 | return userinfo 78 | 79 | 80 | def build_db_with_gzip(): 81 | init_db() 82 | conn = sqlite3.connect('userdata.db') 83 | c = conn.cursor() 84 | 85 | year = 2014 86 | month = 3 87 | 88 | for day in range(1,31): 89 | date_re = re.compile(r"([0-9]{4})-([0-9]{2})-([0-9]{2})-([0-9]+)\.json.gz") 90 | 91 | fn_template = os.path.join("march", 92 | "{year}-{month:02d}-{day:02d}-{n}.json.gz") 93 | kwargs = {"year": year, "month": month, "day": day, "n": "*"} 94 | filenames = glob.glob(fn_template.format(**kwargs)) 95 | 96 | for filename in filenames: 97 | c.executemany('INSERT INTO userinfo VALUES (?,?,?,?,?)', handle_gzip_file(filename)) 98 | 99 | conn.commit() 100 | c.close() 101 | 102 | def _format(key): 103 | return "{0}:{1}".format("od", key) 104 | 105 | def build_db_with_redis(): 106 | year = 2014 107 | month = 3 108 | pipe = r.pipeline() 109 | 110 | for day in range(2, 4): 111 | date_re = re.compile(r"([0-9]{4})-([0-9]{2})-([0-9]{2})-([0-9]+)\.json.gz") 112 | 113 | fn_template = os.path.join("march", 114 | "{year}-{month:02d}-{day:02d}-{n}.json.gz") 115 | kwargs = {"year": year, "month": month, "day": day, "n": "*"} 116 | filenames = glob.glob(fn_template.format(**kwargs)) 117 | 118 | for filename in filenames: 119 | userinfo = [] 120 | year, month, day, hour = map(int, date_re.findall(filename)[0]) 121 | weekday = date(year=year, month=month, day=day).strftime("%w") 122 | 123 | with gzip.GzipFile(filename) as f: 124 | events = [line.decode("utf-8", errors="ignore") for line in f] 125 | count = len(events) 126 | 127 | for n, line in enumerate(events): 128 | 129 | event = json.loads(line) 130 | 131 | actor = event["actor"] 132 | attrs = event.get("actor_attributes", {}) 133 | if actor is None or attrs.get("type") != "User": 134 | # This was probably an anonymous event (like a gist event) 135 | # or an organization event. 136 | continue 137 | 138 | key = actor.lower() 139 | evttype = event["type"] 140 | nevents = 1 141 | contribution = evttype in ["IssuesEvent", "PullRequestEvent","PushEvent"] 142 | 143 | pipe.incr(_format("total"), nevents) 144 | pipe.hincrby(_format("day"), weekday, nevents) 145 | pipe.hincrby(_format("hour"), hour, nevents) 146 | pipe.zincrby(_format("user"), key, nevents) 147 | pipe.zincrby(_format("event"), evttype, nevents) 148 | 149 | # Event histograms. 150 | pipe.hincrby(_format("event:{0}:day".format(evttype)), weekday, 151 | nevents) 152 | pipe.hincrby(_format("event:{0}:hour".format(evttype)), hour, 153 | nevents) 154 | 155 | # User schedule histograms. 156 | pipe.hincrby(_format("user:{0}:day".format(key)), weekday, nevents) 157 | pipe.hincrby(_format("user:{0}:hour".format(key)), hour, nevents) 158 | 159 | # User event type histogram. 160 | pipe.zincrby(_format("user:{0}:event".format(key)), evttype, 161 | nevents) 162 | pipe.hincrby(_format("user:{0}:event:{1}:day".format(key, 163 | evttype)), 164 | weekday, nevents) 165 | pipe.hincrby(_format("user:{0}:event:{1}:hour".format(key, 166 | evttype)), 167 | hour, nevents) 168 | 169 | # Parse the name and owner of the affected repository. 170 | repo = event.get("repository", {}) 171 | owner, name, org = (repo.get("owner"), repo.get("name"), 172 | repo.get("organization")) 173 | if owner and name: 174 | repo_name = "{0}/{1}".format(owner, name) 175 | pipe.zincrby(_format("repo"), repo_name, nevents) 176 | 177 | # Save the social graph. 178 | pipe.zincrby(_format("social:user:{0}".format(key)), 179 | repo_name, nevents) 180 | pipe.zincrby(_format("social:repo:{0}".format(repo_name)), 181 | key, nevents) 182 | 183 | # Do we know what the language of the repository is? 184 | language = repo.get("language") 185 | if language: 186 | # Which are the most popular languages? 187 | pipe.zincrby(_format("lang"), language, nevents) 188 | 189 | # Total number of pushes. 190 | if evttype == "PushEvent": 191 | pipe.zincrby(_format("pushes:lang"), language, nevents) 192 | 193 | pipe.zincrby(_format("user:{0}:lang".format(key)), 194 | language, nevents) 195 | 196 | # Who are the most important users of a language? 197 | if contribution: 198 | pipe.zincrby(_format("lang:{0}:user".format(language)), 199 | key, nevents) 200 | 201 | pipe.execute() -------------------------------------------------------------------------------- /build_db_index.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import redis 3 | 4 | redis_pool = redis.ConnectionPool(port=6379) 5 | r = redis.Redis(connection_pool=redis_pool) 6 | 7 | conn = sqlite3.connect('userdata.db') 8 | c = conn.cursor() 9 | 10 | 11 | def get_count(username): 12 | count = 0 13 | userinfo = [] 14 | condition = 'select * from userinfo where owener = \'' + str(username) + '\'' 15 | for zero in c.execute(condition): 16 | count += 1 17 | userinfo.append(zero) 18 | print zero 19 | 20 | return count, userinfo 21 | -------------------------------------------------------------------------------- /events/delete.json: -------------------------------------------------------------------------------- 1 | { 2 | "created_at": "2014-01-01T00:02:01-08:00", 3 | "payload": { 4 | "ref": "2-14-spec-stub-chain-args", 5 | "ref_type": "branch" 6 | }, 7 | "public": true, 8 | "type": "DeleteEvent", 9 | "url": "https://github.com/", 10 | "actor": "myronmarston", 11 | "actor_attributes": { 12 | "login": "myronmarston", 13 | "type": "User", 14 | "gravatar_id": "42d4590355a1404230fbc9aff4dd377b", 15 | "name": "Myron Marston", 16 | "company": "Moz", 17 | "blog": "myronmars.to/n", 18 | "location": "Seattle, WA", 19 | "email": "myron.marston@gmail.com" 20 | }, 21 | "repository": { 22 | "id": 238983, 23 | "name": "rspec-mocks", 24 | "url": "https://github.com/rspec/rspec-mocks", 25 | "description": "RSpec's 'test double' framework, with support for stubbing and mocking", 26 | "homepage": "http://relishapp.com/rspec/rspec-mocks", 27 | "watchers": 333, 28 | "stargazers": 333, 29 | "forks": 134, 30 | "fork": false, 31 | "size": 8630, 32 | "owner": "rspec", 33 | "private": false, 34 | "open_issues": 20, 35 | "has_issues": true, 36 | "has_downloads": false, 37 | "has_wiki": true, 38 | "language": "Ruby", 39 | "created_at": "2009-06-29T08:56:06-07:00", 40 | "pushed_at": "2014-01-01T00:02:01-08:00", 41 | "master_branch": "master", 42 | "organization": "rspec" 43 | } 44 | } -------------------------------------------------------------------------------- /events/fork.json: -------------------------------------------------------------------------------- 1 | { 2 | "created_at": "2014-01-01T00:02:25-08:00", 3 | "payload": {}, 4 | "public": true, 5 | "type": "ForkEvent", 6 | "url": "https://github.com/VenomVendor/platform_frameworks_support", 7 | "actor": "VenomVendor", 8 | "actor_attributes": { 9 | "login": "VenomVendor", 10 | "type": "User", 11 | "gravatar_id": "5d751e2e43f29815d97b9e9f8a40e854", 12 | "name": "Yoganandh", 13 | "company": "Floost", 14 | "blog": "http://www.venomvendor.com", 15 | "location": "Near the edge of the world", 16 | "email": "info@VenomVendor.com" 17 | }, 18 | "repository": { 19 | "id": 3978781, 20 | "name": "platform_frameworks_support", 21 | "url": "https://github.com/android/platform_frameworks_support", 22 | "description": "", 23 | "homepage": "", 24 | "watchers": 118, 25 | "stargazers": 118, 26 | "forks": 81, 27 | "fork": false, 28 | "size": 8279, 29 | "owner": "android", 30 | "private": false, 31 | "open_issues": 0, 32 | "has_issues": false, 33 | "has_downloads": true, 34 | "has_wiki": true, 35 | "language": "Java", 36 | "created_at": "2012-04-09T21:06:44-07:00", 37 | "pushed_at": "2013-12-11T12:50:30-08:00", 38 | "master_branch": "master", 39 | "organization": "android" 40 | } 41 | } -------------------------------------------------------------------------------- /events/issuecomment.json: -------------------------------------------------------------------------------- 1 | { 2 | "created_at": "2014-01-01T00:02:08-08:00", 3 | "payload": { 4 | "issue_id": 24931935, 5 | "comment_id": 31420019 6 | }, 7 | "public": true, 8 | "type": "IssueCommentEvent", 9 | "url": "https://github.com/ReduxFramework/ReduxFramework/issues/473#issuecomment-31420019", 10 | "actor": "dovy", 11 | "actor_attributes": { 12 | "login": "dovy", 13 | "type": "User", 14 | "gravatar_id": "7002a89744b59220e3c14a9f69f342f3", 15 | "name": "Dovy Paukstys", 16 | "company": "Real-Time Collaboration, Inc.", 17 | "location": "Provo, Utah" 18 | }, 19 | "repository": { 20 | "id": 12578007, 21 | "name": "ReduxFramework", 22 | "url": "https://github.com/ReduxFramework/ReduxFramework", 23 | "description": "Redux is a simple, truly extensible options framework for WordPress themes and plugins.", 24 | "homepage": "http://reduxframework.com/", 25 | "watchers": 202, 26 | "stargazers": 202, 27 | "forks": 83, 28 | "fork": false, 29 | "size": 15041, 30 | "owner": "ReduxFramework", 31 | "private": false, 32 | "open_issues": 53, 33 | "has_issues": true, 34 | "has_downloads": true, 35 | "has_wiki": true, 36 | "language": "PHP", 37 | "created_at": "2013-09-03T17:20:06-07:00", 38 | "pushed_at": "2013-12-31T12:59:44-08:00", 39 | "master_branch": "master", 40 | "organization": "ReduxFramework" 41 | } 42 | } -------------------------------------------------------------------------------- /events/issues.json: -------------------------------------------------------------------------------- 1 | { 2 | "created_at": "2014-01-01T00:02:01-08:00", 3 | "payload": { 4 | "action": "opened", 5 | "issue": 24938482, 6 | "number": 6 7 | }, 8 | "public": true, 9 | "type": "IssuesEvent", 10 | "url": "https://github.com/terkel/terkel.jp-stylesheets/issues/6", 11 | "actor": "terkel", 12 | "actor_attributes": { 13 | "login": "terkel", 14 | "type": "User", 15 | "gravatar_id": "dc3f88b2c2c4210e3068d94ce7c25bf2", 16 | "name": "Takeru Suzuki", 17 | "company": "Shiftbrain Inc.", 18 | "blog": "http://terkel.jp/", 19 | "location": "Japan", 20 | "email": "terkeljp@gmail.com" 21 | }, 22 | "repository": { 23 | "id": 4113385, 24 | "name": "terkel.jp-stylesheets", 25 | "url": "https://github.com/terkel/terkel.jp-stylesheets", 26 | "description": "Stylesheets for terkel.jp", 27 | "homepage": "", 28 | "watchers": 10, 29 | "stargazers": 10, 30 | "forks": 2, 31 | "fork": false, 32 | "size": 302, 33 | "owner": "terkel", 34 | "private": false, 35 | "open_issues": 5, 36 | "has_issues": true, 37 | "has_downloads": true, 38 | "has_wiki": true, 39 | "language": "CSS", 40 | "created_at": "2012-04-23T06:31:27-07:00", 41 | "pushed_at": "2013-12-31T23:54:20-08:00", 42 | "master_branch": "master" 43 | } 44 | } -------------------------------------------------------------------------------- /events/pullrequest.json: -------------------------------------------------------------------------------- 1 | { 2 | "created_at": "2014-01-01T00:02:02-08:00", 3 | "payload": { 4 | "action": "closed", 5 | "number": 28, 6 | "pull_request": { 7 | "url": "https://api.github.com/repos/raghothams/sharURL/pulls/28", 8 | "id": 11168887, 9 | "html_url": "https://github.com/raghothams/sharURL/pull/28", 10 | "diff_url": "https://github.com/raghothams/sharURL/pull/28.diff", 11 | "patch_url": "https://github.com/raghothams/sharURL/pull/28.patch", 12 | "issue_url": "https://github.com/raghothams/sharURL/pull/28", 13 | "number": 28, 14 | "state": "closed", 15 | "title": "Clicking on link opens it in new tab now", 16 | "user": { 17 | "login": "shrayas", 18 | "id": 240368, 19 | "avatar_url": "https://gravatar.com/avatar/04916cffc8dea77475189ac7ed480420?d=https%3A%2F%2Fidenticons.github.com%2Ff29aa139e90025ff44dd23f1f3ec8b58.png&r=x", 20 | "gravatar_id": "04916cffc8dea77475189ac7ed480420", 21 | "url": "https://api.github.com/users/shrayas", 22 | "html_url": "https://github.com/shrayas", 23 | "followers_url": "https://api.github.com/users/shrayas/followers", 24 | "following_url": "https://api.github.com/users/shrayas/following{/other_user}", 25 | "gists_url": "https://api.github.com/users/shrayas/gists{/gist_id}", 26 | "starred_url": "https://api.github.com/users/shrayas/starred{/owner}{/repo}", 27 | "subscriptions_url": "https://api.github.com/users/shrayas/subscriptions", 28 | "organizations_url": "https://api.github.com/users/shrayas/orgs", 29 | "repos_url": "https://api.github.com/users/shrayas/repos", 30 | "events_url": "https://api.github.com/users/shrayas/events{/privacy}", 31 | "received_events_url": "https://api.github.com/users/shrayas/received_events", 32 | "type": "User", 33 | "site_admin": false 34 | }, 35 | "body": "Links used to be opened in the same tab, hence breaking the flow.", 36 | "created_at": "2014-01-01T07:58:44Z", 37 | "updated_at": "2014-01-01T08:02:02Z", 38 | "closed_at": "2014-01-01T08:02:02Z", 39 | "merged_at": "2014-01-01T08:02:02Z", 40 | "merge_commit_sha": "ef74fa88aa543f7035fa69f7a9ab7352705b69d4", 41 | "assignee": null, 42 | "milestone": null, 43 | "commits_url": "https://github.com/raghothams/sharURL/pull/28/commits", 44 | "review_comments_url": "https://github.com/raghothams/sharURL/pull/28/comments", 45 | "review_comment_url": "/repos/raghothams/sharURL/pulls/comments/{number}", 46 | "comments_url": "https://api.github.com/repos/raghothams/sharURL/issues/28/comments", 47 | "statuses_url": "https://api.github.com/repos/raghothams/sharURL/statuses/84d3daa33301d74a10cce1fc340e306b19108208", 48 | "head": { 49 | "label": "shrayas:open-link-in-new-tab", 50 | "ref": "open-link-in-new-tab", 51 | "sha": "84d3daa33301d74a10cce1fc340e306b19108208", 52 | "user": { 53 | "login": "shrayas", 54 | "id": 240368, 55 | "avatar_url": "https://gravatar.com/avatar/04916cffc8dea77475189ac7ed480420?d=https%3A%2F%2Fidenticons.github.com%2Ff29aa139e90025ff44dd23f1f3ec8b58.png&r=x", 56 | "gravatar_id": "04916cffc8dea77475189ac7ed480420", 57 | "url": "https://api.github.com/users/shrayas", 58 | "html_url": "https://github.com/shrayas", 59 | "followers_url": "https://api.github.com/users/shrayas/followers", 60 | "following_url": "https://api.github.com/users/shrayas/following{/other_user}", 61 | "gists_url": "https://api.github.com/users/shrayas/gists{/gist_id}", 62 | "starred_url": "https://api.github.com/users/shrayas/starred{/owner}{/repo}", 63 | "subscriptions_url": "https://api.github.com/users/shrayas/subscriptions", 64 | "organizations_url": "https://api.github.com/users/shrayas/orgs", 65 | "repos_url": "https://api.github.com/users/shrayas/repos", 66 | "events_url": "https://api.github.com/users/shrayas/events{/privacy}", 67 | "received_events_url": "https://api.github.com/users/shrayas/received_events", 68 | "type": "User", 69 | "site_admin": false 70 | }, 71 | "repo": { 72 | "id": 11402218, 73 | "name": "sharURL", 74 | "full_name": "shrayas/sharURL", 75 | "owner": { 76 | "login": "shrayas", 77 | "id": 240368, 78 | "avatar_url": "https://gravatar.com/avatar/04916cffc8dea77475189ac7ed480420?d=https%3A%2F%2Fidenticons.github.com%2Ff29aa139e90025ff44dd23f1f3ec8b58.png&r=x", 79 | "gravatar_id": "04916cffc8dea77475189ac7ed480420", 80 | "url": "https://api.github.com/users/shrayas", 81 | "html_url": "https://github.com/shrayas", 82 | "followers_url": "https://api.github.com/users/shrayas/followers", 83 | "following_url": "https://api.github.com/users/shrayas/following{/other_user}", 84 | "gists_url": "https://api.github.com/users/shrayas/gists{/gist_id}", 85 | "starred_url": "https://api.github.com/users/shrayas/starred{/owner}{/repo}", 86 | "subscriptions_url": "https://api.github.com/users/shrayas/subscriptions", 87 | "organizations_url": "https://api.github.com/users/shrayas/orgs", 88 | "repos_url": "https://api.github.com/users/shrayas/repos", 89 | "events_url": "https://api.github.com/users/shrayas/events{/privacy}", 90 | "received_events_url": "https://api.github.com/users/shrayas/received_events", 91 | "type": "User", 92 | "site_admin": false 93 | }, 94 | "private": false, 95 | "html_url": "https://github.com/shrayas/sharURL", 96 | "description": "Web app for sharing URL with group of people", 97 | "fork": true, 98 | "url": "https://api.github.com/repos/shrayas/sharURL", 99 | "forks_url": "https://api.github.com/repos/shrayas/sharURL/forks", 100 | "keys_url": "https://api.github.com/repos/shrayas/sharURL/keys{/key_id}", 101 | "collaborators_url": "https://api.github.com/repos/shrayas/sharURL/collaborators{/collaborator}", 102 | "teams_url": "https://api.github.com/repos/shrayas/sharURL/teams", 103 | "hooks_url": "https://api.github.com/repos/shrayas/sharURL/hooks", 104 | "issue_events_url": "https://api.github.com/repos/shrayas/sharURL/issues/events{/number}", 105 | "events_url": "https://api.github.com/repos/shrayas/sharURL/events", 106 | "assignees_url": "https://api.github.com/repos/shrayas/sharURL/assignees{/user}", 107 | "branches_url": "https://api.github.com/repos/shrayas/sharURL/branches{/branch}", 108 | "tags_url": "https://api.github.com/repos/shrayas/sharURL/tags", 109 | "blobs_url": "https://api.github.com/repos/shrayas/sharURL/git/blobs{/sha}", 110 | "git_tags_url": "https://api.github.com/repos/shrayas/sharURL/git/tags{/sha}", 111 | "git_refs_url": "https://api.github.com/repos/shrayas/sharURL/git/refs{/sha}", 112 | "trees_url": "https://api.github.com/repos/shrayas/sharURL/git/trees{/sha}", 113 | "statuses_url": "https://api.github.com/repos/shrayas/sharURL/statuses/{sha}", 114 | "languages_url": "https://api.github.com/repos/shrayas/sharURL/languages", 115 | "stargazers_url": "https://api.github.com/repos/shrayas/sharURL/stargazers", 116 | "contributors_url": "https://api.github.com/repos/shrayas/sharURL/contributors", 117 | "subscribers_url": "https://api.github.com/repos/shrayas/sharURL/subscribers", 118 | "subscription_url": "https://api.github.com/repos/shrayas/sharURL/subscription", 119 | "commits_url": "https://api.github.com/repos/shrayas/sharURL/commits{/sha}", 120 | "git_commits_url": "https://api.github.com/repos/shrayas/sharURL/git/commits{/sha}", 121 | "comments_url": "https://api.github.com/repos/shrayas/sharURL/comments{/number}", 122 | "issue_comment_url": "https://api.github.com/repos/shrayas/sharURL/issues/comments/{number}", 123 | "contents_url": "https://api.github.com/repos/shrayas/sharURL/contents/{+path}", 124 | "compare_url": "https://api.github.com/repos/shrayas/sharURL/compare/{base}...{head}", 125 | "merges_url": "https://api.github.com/repos/shrayas/sharURL/merges", 126 | "archive_url": "https://api.github.com/repos/shrayas/sharURL/{archive_format}{/ref}", 127 | "downloads_url": "https://api.github.com/repos/shrayas/sharURL/downloads", 128 | "issues_url": "https://api.github.com/repos/shrayas/sharURL/issues{/number}", 129 | "pulls_url": "https://api.github.com/repos/shrayas/sharURL/pulls{/number}", 130 | "milestones_url": "https://api.github.com/repos/shrayas/sharURL/milestones{/number}", 131 | "notifications_url": "https://api.github.com/repos/shrayas/sharURL/notifications{?since,all,participating}", 132 | "labels_url": "https://api.github.com/repos/shrayas/sharURL/labels{/name}", 133 | "releases_url": "https://api.github.com/repos/shrayas/sharURL/releases{/id}", 134 | "created_at": "2013-07-14T11:47:37Z", 135 | "updated_at": "2014-01-01T08:02:02Z", 136 | "pushed_at": "2014-01-01T07:58:07Z", 137 | "git_url": "git://github.com/shrayas/sharURL.git", 138 | "ssh_url": "git@github.com:shrayas/sharURL.git", 139 | "clone_url": "https://github.com/shrayas/sharURL.git", 140 | "svn_url": "https://github.com/shrayas/sharURL", 141 | "homepage": null, 142 | "size": 1073, 143 | "stargazers_count": 0, 144 | "watchers_count": 0, 145 | "language": "JavaScript", 146 | "has_issues": false, 147 | "has_downloads": true, 148 | "has_wiki": true, 149 | "forks_count": 0, 150 | "mirror_url": null, 151 | "open_issues_count": 0, 152 | "forks": 0, 153 | "open_issues": 0, 154 | "watchers": 0, 155 | "default_branch": "master", 156 | "master_branch": "master" 157 | } 158 | }, 159 | "base": { 160 | "label": "raghothams:master", 161 | "ref": "master", 162 | "sha": "dea5599b6c3c1d203c92bab2a5ca0298f637f00b", 163 | "user": { 164 | "login": "raghothams", 165 | "id": 1147540, 166 | "avatar_url": "https://gravatar.com/avatar/4a1a459a7121d36bbd0ad15b59735b50?d=https%3A%2F%2Fidenticons.github.com%2F620cac81ae2956238c51ba07966e2b85.png&r=x", 167 | "gravatar_id": "4a1a459a7121d36bbd0ad15b59735b50", 168 | "url": "https://api.github.com/users/raghothams", 169 | "html_url": "https://github.com/raghothams", 170 | "followers_url": "https://api.github.com/users/raghothams/followers", 171 | "following_url": "https://api.github.com/users/raghothams/following{/other_user}", 172 | "gists_url": "https://api.github.com/users/raghothams/gists{/gist_id}", 173 | "starred_url": "https://api.github.com/users/raghothams/starred{/owner}{/repo}", 174 | "subscriptions_url": "https://api.github.com/users/raghothams/subscriptions", 175 | "organizations_url": "https://api.github.com/users/raghothams/orgs", 176 | "repos_url": "https://api.github.com/users/raghothams/repos", 177 | "events_url": "https://api.github.com/users/raghothams/events{/privacy}", 178 | "received_events_url": "https://api.github.com/users/raghothams/received_events", 179 | "type": "User", 180 | "site_admin": false 181 | }, 182 | "repo": { 183 | "id": 11090251, 184 | "name": "sharURL", 185 | "full_name": "raghothams/sharURL", 186 | "owner": { 187 | "login": "raghothams", 188 | "id": 1147540, 189 | "avatar_url": "https://gravatar.com/avatar/4a1a459a7121d36bbd0ad15b59735b50?d=https%3A%2F%2Fidenticons.github.com%2F620cac81ae2956238c51ba07966e2b85.png&r=x", 190 | "gravatar_id": "4a1a459a7121d36bbd0ad15b59735b50", 191 | "url": "https://api.github.com/users/raghothams", 192 | "html_url": "https://github.com/raghothams", 193 | "followers_url": "https://api.github.com/users/raghothams/followers", 194 | "following_url": "https://api.github.com/users/raghothams/following{/other_user}", 195 | "gists_url": "https://api.github.com/users/raghothams/gists{/gist_id}", 196 | "starred_url": "https://api.github.com/users/raghothams/starred{/owner}{/repo}", 197 | "subscriptions_url": "https://api.github.com/users/raghothams/subscriptions", 198 | "organizations_url": "https://api.github.com/users/raghothams/orgs", 199 | "repos_url": "https://api.github.com/users/raghothams/repos", 200 | "events_url": "https://api.github.com/users/raghothams/events{/privacy}", 201 | "received_events_url": "https://api.github.com/users/raghothams/received_events", 202 | "type": "User", 203 | "site_admin": false 204 | }, 205 | "private": false, 206 | "html_url": "https://github.com/raghothams/sharURL", 207 | "description": "Web app for sharing URL with group of people", 208 | "fork": false, 209 | "url": "https://api.github.com/repos/raghothams/sharURL", 210 | "forks_url": "https://api.github.com/repos/raghothams/sharURL/forks", 211 | "keys_url": "https://api.github.com/repos/raghothams/sharURL/keys{/key_id}", 212 | "collaborators_url": "https://api.github.com/repos/raghothams/sharURL/collaborators{/collaborator}", 213 | "teams_url": "https://api.github.com/repos/raghothams/sharURL/teams", 214 | "hooks_url": "https://api.github.com/repos/raghothams/sharURL/hooks", 215 | "issue_events_url": "https://api.github.com/repos/raghothams/sharURL/issues/events{/number}", 216 | "events_url": "https://api.github.com/repos/raghothams/sharURL/events", 217 | "assignees_url": "https://api.github.com/repos/raghothams/sharURL/assignees{/user}", 218 | "branches_url": "https://api.github.com/repos/raghothams/sharURL/branches{/branch}", 219 | "tags_url": "https://api.github.com/repos/raghothams/sharURL/tags", 220 | "blobs_url": "https://api.github.com/repos/raghothams/sharURL/git/blobs{/sha}", 221 | "git_tags_url": "https://api.github.com/repos/raghothams/sharURL/git/tags{/sha}", 222 | "git_refs_url": "https://api.github.com/repos/raghothams/sharURL/git/refs{/sha}", 223 | "trees_url": "https://api.github.com/repos/raghothams/sharURL/git/trees{/sha}", 224 | "statuses_url": "https://api.github.com/repos/raghothams/sharURL/statuses/{sha}", 225 | "languages_url": "https://api.github.com/repos/raghothams/sharURL/languages", 226 | "stargazers_url": "https://api.github.com/repos/raghothams/sharURL/stargazers", 227 | "contributors_url": "https://api.github.com/repos/raghothams/sharURL/contributors", 228 | "subscribers_url": "https://api.github.com/repos/raghothams/sharURL/subscribers", 229 | "subscription_url": "https://api.github.com/repos/raghothams/sharURL/subscription", 230 | "commits_url": "https://api.github.com/repos/raghothams/sharURL/commits{/sha}", 231 | "git_commits_url": "https://api.github.com/repos/raghothams/sharURL/git/commits{/sha}", 232 | "comments_url": "https://api.github.com/repos/raghothams/sharURL/comments{/number}", 233 | "issue_comment_url": "https://api.github.com/repos/raghothams/sharURL/issues/comments/{number}", 234 | "contents_url": "https://api.github.com/repos/raghothams/sharURL/contents/{+path}", 235 | "compare_url": "https://api.github.com/repos/raghothams/sharURL/compare/{base}...{head}", 236 | "merges_url": "https://api.github.com/repos/raghothams/sharURL/merges", 237 | "archive_url": "https://api.github.com/repos/raghothams/sharURL/{archive_format}{/ref}", 238 | "downloads_url": "https://api.github.com/repos/raghothams/sharURL/downloads", 239 | "issues_url": "https://api.github.com/repos/raghothams/sharURL/issues{/number}", 240 | "pulls_url": "https://api.github.com/repos/raghothams/sharURL/pulls{/number}", 241 | "milestones_url": "https://api.github.com/repos/raghothams/sharURL/milestones{/number}", 242 | "notifications_url": "https://api.github.com/repos/raghothams/sharURL/notifications{?since,all,participating}", 243 | "labels_url": "https://api.github.com/repos/raghothams/sharURL/labels{/name}", 244 | "releases_url": "https://api.github.com/repos/raghothams/sharURL/releases{/id}", 245 | "created_at": "2013-07-01T09:24:30Z", 246 | "updated_at": "2014-01-01T08:02:02Z", 247 | "pushed_at": "2014-01-01T08:02:02Z", 248 | "git_url": "git://github.com/raghothams/sharURL.git", 249 | "ssh_url": "git@github.com:raghothams/sharURL.git", 250 | "clone_url": "https://github.com/raghothams/sharURL.git", 251 | "svn_url": "https://github.com/raghothams/sharURL", 252 | "homepage": null, 253 | "size": 1530, 254 | "stargazers_count": 1, 255 | "watchers_count": 1, 256 | "language": "JavaScript", 257 | "has_issues": true, 258 | "has_downloads": true, 259 | "has_wiki": true, 260 | "forks_count": 1, 261 | "mirror_url": null, 262 | "open_issues_count": 9, 263 | "forks": 1, 264 | "open_issues": 9, 265 | "watchers": 1, 266 | "default_branch": "master", 267 | "master_branch": "master" 268 | } 269 | }, 270 | "_links": { 271 | "self": { 272 | "href": "https://api.github.com/repos/raghothams/sharURL/pulls/28" 273 | }, 274 | "html": { 275 | "href": "https://github.com/raghothams/sharURL/pull/28" 276 | }, 277 | "issue": { 278 | "href": "https://api.github.com/repos/raghothams/sharURL/issues/28" 279 | }, 280 | "comments": { 281 | "href": "https://api.github.com/repos/raghothams/sharURL/issues/28/comments" 282 | }, 283 | "review_comments": { 284 | "href": "https://api.github.com/repos/raghothams/sharURL/pulls/28/comments" 285 | }, 286 | "statuses": { 287 | "href": "https://api.github.com/repos/raghothams/sharURL/statuses/84d3daa33301d74a10cce1fc340e306b19108208" 288 | } 289 | }, 290 | "merged": true, 291 | "mergeable": null, 292 | "mergeable_state": "unknown", 293 | "merged_by": { 294 | "login": "raghothams", 295 | "id": 1147540, 296 | "avatar_url": "https://gravatar.com/avatar/4a1a459a7121d36bbd0ad15b59735b50?d=https%3A%2F%2Fidenticons.github.com%2F620cac81ae2956238c51ba07966e2b85.png&r=x", 297 | "gravatar_id": "4a1a459a7121d36bbd0ad15b59735b50", 298 | "url": "https://api.github.com/users/raghothams", 299 | "html_url": "https://github.com/raghothams", 300 | "followers_url": "https://api.github.com/users/raghothams/followers", 301 | "following_url": "https://api.github.com/users/raghothams/following{/other_user}", 302 | "gists_url": "https://api.github.com/users/raghothams/gists{/gist_id}", 303 | "starred_url": "https://api.github.com/users/raghothams/starred{/owner}{/repo}", 304 | "subscriptions_url": "https://api.github.com/users/raghothams/subscriptions", 305 | "organizations_url": "https://api.github.com/users/raghothams/orgs", 306 | "repos_url": "https://api.github.com/users/raghothams/repos", 307 | "events_url": "https://api.github.com/users/raghothams/events{/privacy}", 308 | "received_events_url": "https://api.github.com/users/raghothams/received_events", 309 | "type": "User", 310 | "site_admin": false 311 | }, 312 | "comments": 0, 313 | "review_comments": 0, 314 | "commits": 1, 315 | "additions": 1, 316 | "deletions": 1, 317 | "changed_files": 1 318 | } 319 | }, 320 | "public": true, 321 | "type": "PullRequestEvent", 322 | "url": "https://github.com/raghothams/sharURL/pull/28", 323 | "actor": "raghothams", 324 | "actor_attributes": { 325 | "login": "raghothams", 326 | "type": "User", 327 | "gravatar_id": "4a1a459a7121d36bbd0ad15b59735b50", 328 | "name": "Raghotham S", 329 | "company": "SAP Labs Pvt. Ltd.", 330 | "location": "Bangalore", 331 | "email": "raghotham.s@gmail.com" 332 | }, 333 | "repository": { 334 | "id": 11090251, 335 | "name": "sharURL", 336 | "url": "https://github.com/raghothams/sharURL", 337 | "description": "Web app for sharing URL with group of people", 338 | "watchers": 1, 339 | "stargazers": 1, 340 | "forks": 1, 341 | "fork": false, 342 | "size": 1530, 343 | "owner": "raghothams", 344 | "private": false, 345 | "open_issues": 5, 346 | "has_issues": true, 347 | "has_downloads": true, 348 | "has_wiki": true, 349 | "language": "JavaScript", 350 | "created_at": "2013-07-01T02:24:30-07:00", 351 | "pushed_at": "2014-01-01T00:02:02-08:00", 352 | "master_branch": "master" 353 | } 354 | } -------------------------------------------------------------------------------- /events/push.json: -------------------------------------------------------------------------------- 1 | { 2 | "created_at": "2014-01-01T00:02:02-08:00", 3 | "payload": { 4 | "shas": [ 5 | [ 6 | "84d3daa33301d74a10cce1fc340e306b19108208", 7 | "shrayasr@gmail.com", 8 | "Clicking on link opens it in new tab now", 9 | "Shrayas", 10 | true 11 | ], 12 | [ 13 | "403f4884d3e1ae2651345304d989275c69eed65e", 14 | "raghotham.s@gmail.com", 15 | "Merge pull request #28 from shrayas/open-link-in-new-tab\n\nClicking on link opens it in new tab now", 16 | "Raghotham S", 17 | true 18 | ] 19 | ], 20 | "size": 2, 21 | "ref": "refs/heads/master", 22 | "head": "403f4884d3e1ae2651345304d989275c69eed65e" 23 | }, 24 | "public": true, 25 | "type": "PushEvent", 26 | "url": "https://github.com/raghothams/sharURL/compare/dea5599b6c...403f4884d3", 27 | "actor": "raghothams", 28 | "actor_attributes": { 29 | "login": "raghothams", 30 | "type": "User", 31 | "gravatar_id": "4a1a459a7121d36bbd0ad15b59735b50", 32 | "name": "Raghotham S", 33 | "company": "SAP Labs Pvt. Ltd.", 34 | "location": "Bangalore", 35 | "email": "raghotham.s@gmail.com" 36 | }, 37 | "repository": { 38 | "id": 11090251, 39 | "name": "sharURL", 40 | "url": "https://github.com/raghothams/sharURL", 41 | "description": "Web app for sharing URL with group of people", 42 | "watchers": 1, 43 | "stargazers": 1, 44 | "forks": 1, 45 | "fork": false, 46 | "size": 1530, 47 | "owner": "raghothams", 48 | "private": false, 49 | "open_issues": 5, 50 | "has_issues": true, 51 | "has_downloads": true, 52 | "has_wiki": true, 53 | "language": "JavaScript", 54 | "created_at": "2013-07-01T02:24:30-07:00", 55 | "pushed_at": "2014-01-01T00:02:02-08:00", 56 | "master_branch": "master" 57 | } 58 | } -------------------------------------------------------------------------------- /events/watch.json: -------------------------------------------------------------------------------- 1 | { 2 | "created_at": "2014-01-01T00:02:03-08:00", 3 | "payload": { 4 | "action": "started" 5 | }, 6 | "public": true, 7 | "type": "WatchEvent", 8 | "url": "https://github.com/segmentio/myth", 9 | "actor": "dasakigr", 10 | "actor_attributes": { 11 | "login": "dasakigr", 12 | "type": "User", 13 | "gravatar_id": "925bc9a8406c0dd12320fbdc6b52286b" 14 | }, 15 | "repository": { 16 | "id": 15221739, 17 | "name": "myth", 18 | "url": "https://github.com/segmentio/myth", 19 | "description": "A CSS preprocessor that acts like a polyfill for future versions of the spec.", 20 | "homepage": "myth.io", 21 | "watchers": 2501, 22 | "stargazers": 2501, 23 | "forks": 79, 24 | "fork": false, 25 | "size": 8099, 26 | "owner": "segmentio", 27 | "private": false, 28 | "open_issues": 4, 29 | "has_issues": true, 30 | "has_downloads": true, 31 | "has_wiki": true, 32 | "language": "CSS", 33 | "created_at": "2013-12-16T01:22:19-08:00", 34 | "pushed_at": "2013-12-25T11:48:06-08:00", 35 | "master_branch": "master", 36 | "organization": "segmentio" 37 | } 38 | } -------------------------------------------------------------------------------- /evttypes.txt: -------------------------------------------------------------------------------- 1 | CommitCommentEvent 2 | CreateEvent 3 | DeleteEvent 4 | DownloadEvent 5 | FollowEvent 6 | ForkEvent 7 | ForkApplyEvent 8 | GistEvent 9 | GollumEvent 10 | IssueCommentEvent 11 | IssuesEvent 12 | MemberEvent 13 | PublicEvent 14 | PullRequestEvent 15 | PullRequestReviewCommentEvent 16 | PushEvent 17 | TeamAddEvent 18 | WatchEvent 19 | ReleaseEvent 20 | -------------------------------------------------------------------------------- /handle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import numpy as np 5 | import matplotlib.mlab as mlab 6 | import matplotlib.pyplot as plt 7 | import build_db as bd 8 | import build_db_index as dbindex 9 | import ml as ml 10 | 11 | def simple_draw(label, y): 12 | plt.figure(figsize=(8, 4)) 13 | plt.plot(y, label=label) 14 | plt.legend() 15 | plt.show() 16 | 17 | 18 | def draw_date(files): 19 | x = [] 20 | y = [] 21 | mwcs = pd.get_minutes_counts_with_id(files) 22 | for mwc in mwcs: 23 | x.append(mwc[0]) 24 | y.append(mwc[1]) 25 | 26 | simple_draw(files, mwcs) 27 | 28 | 29 | def draw_month(): 30 | results = pd.get_month_total() 31 | print results 32 | 33 | plt.figure(figsize=(8, 4)) 34 | plt.plot(results.__getslice__(0, 7), label="first week") 35 | plt.plot(results.__getslice__(7, 14), label="second week") 36 | plt.plot(results.__getslice__(14, 21), label="third week") 37 | plt.legend() 38 | plt.show() 39 | 40 | 41 | if __name__ == '__main__': 42 | data = ml.get_vector("gmszone") 43 | print data 44 | gmszone = ml.get_neighbors("gmszone") 45 | print gmszone 46 | result1 = ml.get_points("gmszone") 47 | print result1 48 | alesdokshanin = ml.get_points("alesdokshanin") 49 | print alesdokshanin -------------------------------------------------------------------------------- /index.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/ml/cce4dabadb351ca57d322e99a7c21b42d91f9b58/index.h5 -------------------------------------------------------------------------------- /languages.txt: -------------------------------------------------------------------------------- 1 | JavaScript 2 | Java 3 | Ruby 4 | Python 5 | PHP 6 | C 7 | C++ 8 | Objective-C 9 | Shell 10 | C# 11 | Perl 12 | VimL 13 | CoffeeScript 14 | Scala 15 | Go 16 | Emacs Lisp 17 | Lua 18 | Haskell 19 | Clojure 20 | Erlang 21 | R 22 | ActionScript 23 | Groovy 24 | Puppet 25 | Matlab 26 | Rust 27 | OCaml 28 | D 29 | Julia 30 | Dart 31 | Common Lisp 32 | -------------------------------------------------------------------------------- /ml.py: -------------------------------------------------------------------------------- 1 | import build_db as db 2 | import redis 3 | import pyflann 4 | import h5py 5 | import numpy as np 6 | 7 | evttypes = [l.strip() for l in open("evttypes.txt")] 8 | langs = [l.strip() for l in open("languages.txt")] 9 | 10 | index_filename = "index.h5" 11 | points_filename = "points.h5" 12 | 13 | nevts = len(evttypes) 14 | nlangs = len(langs) 15 | nvector = 1 + 7 + nevts + 1 + 1 + 1 + 1 + nlangs + 1 16 | 17 | 18 | def get_format(key): 19 | return "{0}:{1}".format("osrc", key) 20 | 21 | 22 | def parse_vector(results): 23 | points = np.zeros(nvector) 24 | total = int(results[0]) 25 | 26 | points[0] = 1.0 / (total + 1) 27 | 28 | # Week means. 29 | for k, v in results[1].iteritems(): 30 | points[1 + int(k)] = float(v) / total 31 | 32 | # Event types. 33 | n = 8 34 | for k, v in results[2]: 35 | points[n + evttypes.index(k)] = float(v) / total 36 | 37 | # Number of contributions, connections and languages. 38 | n += nevts 39 | points[n] = 1.0 / (float(results[3]) + 1) 40 | points[n + 1] = 1.0 / (float(results[4]) + 1) 41 | points[n + 2] = 1.0 / (float(results[5]) + 1) 42 | points[n + 3] = 1.0 / (float(results[6]) + 1) 43 | 44 | # Top languages. 45 | n += 4 46 | for k, v in results[7]: 47 | if k in langs: 48 | points[n + langs.index(k)] = float(v) / total 49 | else: 50 | # Unknown language. 51 | points[-1] = float(v) / total 52 | 53 | return points 54 | 55 | 56 | def get_points(usernames): 57 | r = redis.StrictRedis(host='localhost', port=6379, db=0) 58 | pipe = r.pipeline() 59 | 60 | results = get_vector(usernames) 61 | points = np.zeros([len(usernames), nvector]) 62 | points = parse_vector(results) 63 | return points 64 | 65 | 66 | def get_vector(user, pipe=None): 67 | r = redis.StrictRedis(host='localhost', port=6379, db=0) 68 | no_pipe = False 69 | if pipe is None: 70 | pipe = r.pipeline() 71 | no_pipe = True 72 | 73 | user = user.lower() 74 | pipe.zscore(get_format("user"), user) 75 | pipe.hgetall(get_format("user:{0}:day".format(user))) 76 | pipe.zrevrange(get_format("user:{0}:event".format(user)), 0, -1, 77 | withscores=True) 78 | pipe.zcard(get_format("user:{0}:contribution".format(user))) 79 | pipe.zcard(get_format("user:{0}:connection".format(user))) 80 | pipe.zcard(get_format("user:{0}:repo".format(user))) 81 | pipe.zcard(get_format("user:{0}:lang".format(user))) 82 | pipe.zrevrange(get_format("user:{0}:lang".format(user)), 0, -1, 83 | withscores=True) 84 | 85 | if no_pipe: 86 | return pipe.execute() 87 | 88 | 89 | def get_neighbors(name, num=5): 90 | vector = get_vector(name) 91 | 92 | if any([v is None for v in vector]): 93 | return [] 94 | 95 | vector = parse_vector(vector) 96 | 97 | with h5py.File("points.h5", "r") as f: 98 | points = f["points"][...] 99 | usernames = f["names"][...] 100 | 101 | flann = pyflann.FLANN() 102 | flann.load_index("index.h5", points) 103 | 104 | inds, dists = flann.nn_index(vector, num_neighbors=num + 1) 105 | inds = inds[0] 106 | if usernames[inds[0]] == name: 107 | inds = inds[1:] 108 | else: 109 | inds = inds[:-1] 110 | return list(usernames[inds]) -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "redis":"0.10.0" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /parse_data.py: -------------------------------------------------------------------------------- 1 | __author__ = 'fdhuang' 2 | __all__ = ["get_minutes_counts_with_id"] 3 | 4 | import json 5 | import dateutil.parser 6 | 7 | 8 | def get_minutes_counts_with_id(jsonfile): 9 | datacount, dataarray = handle_json(jsonfile) 10 | minuteswithcount = [(x, dataarray.count(x)) for x in set(dataarray)] 11 | return minuteswithcount 12 | 13 | 14 | def handle_json(jsonfile): 15 | f = open(jsonfile, "r") 16 | dataarray = [] 17 | datacount = 0 18 | 19 | for line in open(jsonfile): 20 | line = f.readline() 21 | lin = json.loads(line) 22 | date = dateutil.parser.parse(lin["created_at"]) 23 | datacount += 1 24 | dataarray.append(date.minute) 25 | 26 | f.close() 27 | return datacount, dataarray 28 | 29 | 30 | def get_minutes_count_num(jsonfile): 31 | datacount, dataarray = handle_json(jsonfile) 32 | return datacount 33 | 34 | 35 | def get_month_total(): 36 | monthdaycount = [] 37 | for i in range(1, 20): 38 | if i < 10: 39 | filename = 'data/2014-02-0' + i.__str__() + '-0.json' 40 | else: 41 | filename = 'data/2014-02-' + i.__str__() + '-0.json' 42 | monthdaycount.append(get_minutes_count_num(filename)) 43 | return monthdaycount -------------------------------------------------------------------------------- /points.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/ml/cce4dabadb351ca57d322e99a7c21b42d91f9b58/points.h5 -------------------------------------------------------------------------------- /screenshots/2014-01-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/ml/cce4dabadb351ca57d322e99a7c21b42d91f9b58/screenshots/2014-01-01.png -------------------------------------------------------------------------------- /screenshots/feb-one-plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/ml/cce4dabadb351ca57d322e99a7c21b42d91f9b58/screenshots/feb-one-plot.png -------------------------------------------------------------------------------- /screenshots/feb-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/ml/cce4dabadb351ca57d322e99a7c21b42d91f9b58/screenshots/feb-results.png --------------------------------------------------------------------------------