├── .gitignore ├── LICENSE.md ├── README.md ├── app.py ├── fbapi.py ├── fetcher.py ├── graph.py ├── history.py ├── requirements.txt ├── sample_data.csv ├── static ├── css │ └── nv.d3.min.css └── js │ ├── nv.d3.min.js │ └── nv.d3.min.js.map ├── status.py └── templates ├── main.html └── sample_graph.html /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | SECRETS.txt 3 | .*.swp 4 | *.pyc 5 | generated_graphs/ 6 | log/ -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) [year] [fullname] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Stalky 2 | ===== 3 | 4 | Um hello I guess you're here because you want to look at the code for this or run it yourself. 5 | 6 | The code is up there ^^^^ so I guess here's how you run it yourself. 7 | 8 | What is this? 9 | ============= 10 | Oh, reading [the blog post](https://defaultnamehere.tumblr.com/post/139351766005/graphing-when-your-facebook-friends-are-awake) would really make that more clear. 11 | 12 | Installation 13 | ----------- 14 | 15 | Just run 16 | ```pip install -r requirements.txt``` 17 | 18 | (virtualenv is for suckers right now) 19 | 20 | You'll also need to supply some way of authenticating yourself to Facebook. 21 | 22 | Do this by creating a SECRETS.txt file with the following lines: 23 | 24 | ``` 25 | uid= 26 | cookie= 27 | client_id= 28 | excludes= 29 | ``` 30 | 31 | Download some data 32 | ------------------ 33 | 34 | ```python fetcher.py``` 35 | 36 | This will run indefinitely and create data in "log". 37 | Depending on the number of Facebook friends you have, and how active they are, you can expect around 50-100MB/day to be written to disk. 38 | 39 | Make some graphs 40 | ---------------- 41 | 42 | 1. Run `python graph.py` to convert all the raw log data into CSVs 43 | 2. Run `python app.py` to start the 100% CSS-free "webapp" 44 | 3. Go to `http://localhost:5000` to view the ultra-minimal "webapp" 45 | 4. Paste the Facebook user id that you want to graph into the box. 46 | 47 | You did it! 48 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | 2 | from flask import Flask, render_template, send_file 3 | 4 | 5 | app = Flask('stalky') 6 | 7 | @app.route('/') 8 | def index(): 9 | return render_template("main.html") 10 | 11 | @app.route('/data/') 12 | def get_data_for_uid(uid): 13 | return send_file("generated_graphs/csv/{uid}.csv".format(uid=uid)) 14 | 15 | if __name__ == '__main__': 16 | app.run(host='0.0.0.0', debug=True) 17 | -------------------------------------------------------------------------------- /fbapi.py: -------------------------------------------------------------------------------- 1 | 2 | import requests 3 | 4 | import fetcher 5 | 6 | 7 | def get_user_name(fbid): 8 | resp = requests.get("https://www.facebook.com/app_scoped_user_id/" + str(fbid), headers=fetcher.Fetcher.REQUEST_HEADERS, allow_redirects=True) 9 | return resp.url.split("/")[-1] 10 | 11 | -------------------------------------------------------------------------------- /fetcher.py: -------------------------------------------------------------------------------- 1 | import argparse as ap 2 | import json 3 | import os 4 | import time 5 | 6 | import requests 7 | 8 | import graph 9 | 10 | 11 | secrets = ap.Namespace() 12 | 13 | # Load the secrets from file so some scrublord like me doesn't accidentally commit them to git. 14 | with open("SECRETS.txt") as f: 15 | for line in f: 16 | vals = line.strip().split('=', 1) 17 | setattr(secrets, vals[0].lower(), vals[1]) 18 | 19 | 20 | SLEEP_TIME = 1 21 | 22 | OFFLINE_STATUS_JSON = """{"lat": "offline", "webStatus": "invisible", "fbAppStatus": "invisible", "otherStatus": "invisible", "status": "invisible", "messengerStatus": "invisible"}""" 23 | ACTIVE_STATUS_JSON = """{ "lat": "online", "webStatus": "invisible", "fbAppStatus": "invisible", "otherStatus": "invisible", "status": "active", "messengerStatus": "invisible"}""" 24 | 25 | class Fetcher(): 26 | # Headers to send with every request. 27 | REQUEST_HEADERS = { 28 | 'accept': '*/*', 29 | 'accept-encoding': 'gzip, deflate, sdch', 30 | 'accept-language': 'en-US,en;q=0.8,en-AU;q=0.6', 31 | 'cookie': secrets.cookie, 32 | 'dnt': '1', 33 | 'origin': 'https://www.facebook.com', 34 | 'referer': 'https://www.facebook.com/', 35 | 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36' 36 | } 37 | 38 | # Hey hey, Facebook puts this in front of all their JSON to prevent hijacking. But don't worry, we're ~verified secure~. 39 | JSON_PAYLOAD_PREFIX = "for (;;); " 40 | 41 | def __init__(self): 42 | if not os.path.exists(graph.LOG_DATA_DIR): 43 | os.makedirs(graph.LOG_DATA_DIR) 44 | self.reset_params() 45 | self.excludes = [] 46 | if hasattr(secrets, 'excludes'): 47 | self.excludes = secrets.excludes.split(',',1) 48 | 49 | def make_request(self): 50 | # Load balancing is for chumps. Facebook can take it. 51 | url = "https://5-edge-chat.facebook.com/pull" 52 | response_obj = requests.get(url, params=self.params, headers=self.REQUEST_HEADERS) 53 | 54 | try: 55 | raw_response = response_obj.text 56 | if not raw_response: 57 | return None 58 | if raw_response.startswith(self.JSON_PAYLOAD_PREFIX): 59 | data = raw_response[len(self.JSON_PAYLOAD_PREFIX) - 1:].strip() 60 | data = json.loads(data) 61 | else: 62 | # If it didn't start with for (;;); then something weird is happening. 63 | # Maybe it's unprotected JSON? 64 | data = json.loads(raw_response) 65 | except ValueError as e: 66 | print(str(e)) 67 | return None 68 | 69 | print("Response:" + str(data)) 70 | 71 | return data 72 | 73 | 74 | def _log_lat(self, uid, lat_time): 75 | if not uid in self.excludes: 76 | with open("log/{uid}.txt".format(uid=uid), "a") as f: 77 | # Now add an online status at the user's LAT. 78 | user_data = [] 79 | user_data.append(lat_time) 80 | user_data.append(ACTIVE_STATUS_JSON) 81 | f.write("|".join(user_data)) 82 | f.write("\n") 83 | 84 | # Assume the user is currently offline, since we got a lat for them. (This is guaranteed I think.) 85 | user_data = [] 86 | user_data.append(str(time.time())) 87 | user_data.append(OFFLINE_STATUS_JSON) 88 | f.write("|".join(user_data)) 89 | f.write("\n") 90 | 91 | 92 | 93 | def start_request(self): 94 | print(">") 95 | resp = self.make_request() 96 | if resp is None: 97 | print("Got error from request, restarting...") 98 | self.reset_params() 99 | return 100 | 101 | # We got info about which pool/sticky we should be using I think??? Something to do with load balancers? 102 | if "lb_info" in resp: 103 | self.params["sticky_pool"] = resp["lb_info"]["pool"] 104 | self.params["sticky_token"] = resp["lb_info"]["sticky"] 105 | 106 | if "seq" in resp: 107 | self.params["seq"] = resp["seq"] 108 | 109 | if "ms" in resp: 110 | for item in resp["ms"]: 111 | # The online/offline info we're looking for. 112 | 113 | if item["type"] == "buddylist_overlay": 114 | 115 | # Find the key with all the message details, that one is the UID. 116 | for key in item["overlay"]: 117 | if type(item["overlay"][key]) == dict: 118 | uid = key 119 | 120 | # Log the LAT in this message. 121 | self._log_lat(uid, str(item["overlay"][uid]["la"])) 122 | 123 | # Now log their current status. 124 | if "p" in item["overlay"][uid]: 125 | with open("log/{uid}.txt".format(uid=uid), "a") as f: 126 | user_data = [] 127 | user_data.append(str(time.time())) 128 | user_data.append(json.dumps(item["overlay"][uid]["p"])) 129 | f.write("|".join(user_data)) 130 | f.write("\n") 131 | 132 | # This list contains the last active times (lats) of users. 133 | if "buddyList" in item: 134 | for uid in item["buddyList"]: 135 | if "lat" in item["buddyList"][uid]: 136 | self._log_lat(uid, str(item["buddyList"][uid]["lat"])) 137 | 138 | 139 | 140 | def reset_params(self): 141 | self.params = { 142 | # No idea what this is. 143 | 'cap': '8', 144 | # No idea what this is. 145 | 'cb': '2qfi', 146 | # No idea what this is. 147 | 'channel': 'p_' + secrets.uid, 148 | 'clientid': secrets.client_id, 149 | 'format': 'json', 150 | # Is this my online status? 151 | 'idle': '0', 152 | # No idea what this is. 153 | 'isq': '173180', 154 | # Whether to stream the HTTP GET request. We don't want to! 155 | # 'mode': 'stream', 156 | # Is this how many messages we have got from Facebook in this session so far? 157 | # Previous value: 26 158 | 'msgs_recv': '0', 159 | # No idea what this is. 160 | 'partition': '-2', 161 | # No idea what this is. 162 | 'qp': 'y', 163 | # Set starting sequence number to 0. 164 | # This number doesn't seem to be necessary for getting the /pull content, since setting it to 0 every time still gets everything as far as I can tell. Maybe it's used for #webscale reasons. 165 | 'seq': '0', 166 | 'state': 'active', 167 | 'sticky_pool': 'atn2c06_chat-proxy', 168 | 'sticky_token': '0', 169 | 'uid': secrets.uid, 170 | 'viewer_uid': secrets.uid, 171 | 'wtc': '171%2C170%2C0.000%2C171%2C171' 172 | } 173 | 174 | 175 | if __name__ == "__main__": 176 | f = Fetcher() 177 | while True: 178 | try: 179 | f.start_request() 180 | time.sleep(SLEEP_TIME) 181 | except UnicodeDecodeError: 182 | f.reset_params() 183 | print("UnicodeDecodeError!") 184 | -------------------------------------------------------------------------------- /graph.py: -------------------------------------------------------------------------------- 1 | 2 | import datetime 3 | import os 4 | 5 | import history 6 | import fbapi 7 | import status 8 | 9 | 10 | LOG_DATA_DIR = "log" 11 | CSV_OUTPUT_DIR = "generated_graphs/csv" 12 | 13 | # LOL TIMEZONES TOO HARD 14 | UTC_OFFSET = 11 15 | 16 | ONE_DAY_SECONDS = 60 * 60 * 24 17 | 18 | class Grapher(): 19 | 20 | def __init__(self): 21 | if not os.path.exists(CSV_OUTPUT_DIR): 22 | os.makedirs(CSV_OUTPUT_DIR) 23 | 24 | def to_csv(self, uid, start_time, end_time): 25 | 26 | # The user's history. 27 | status_history = history.StatusHistory(uid) 28 | 29 | # Their Facebook username. 30 | #uname = fbapi.get_user_name(uid) 31 | 32 | 33 | # Generate a CSV from the multiple linear timeseries 34 | with open("generated_graphs/csv/{uid}.csv".format(uid=uid), "w") as f: 35 | 36 | f.write("time,") 37 | f.write(",".join(status.Status.statuses)) 38 | f.write("\n") 39 | 40 | # TODO preprocess sort and splice this instead of linear search. 41 | # UPDATE nahhhh I think I'll just commit it to github ;>_> 42 | for data_point in status_history.activity: 43 | if start_time < data_point.time < end_time: 44 | # Write the time. 45 | f.write(str(data_point.time) + ",") 46 | # Write the various statuses. 47 | # Sample line: