├── .gitignore ├── README.md └── overleaf.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.cache 2 | *output* 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # overleaf 2 | A script tool to access overleaf. 3 | 4 | ### Dependencies 5 | 6 | ``` 7 | Python3 - requests, websocket, json, bs4, argparse 8 | ``` 9 | 10 | ### Usage 11 | 12 | ``` 13 | liwz11@ubuntu:~/overleaf$ python3 overleaf.py -h 14 | usage: overleaf.py [-h] [--logout] [--projects] [--project PROJECT] [--docs] 15 | [--doc DOC] [--down DOWN] [--url URL] 16 | 17 | A script tool to access www.overleaf.com. 18 | 19 | optional arguments: 20 | -h, --help show this help message and exit 21 | --logout clear cookies and remove csrf token, and then exit 22 | --projects list all available projects, and then exit 23 | --project PROJECT specify a project id, default '' 24 | --docs list all .tex documents in the specified project, and then exit 25 | --doc DOC specify a document id, default null(meaning the main document) 26 | --down DOWN specify a file type and download the file, default 'pdf', 27 | options: 'zip', 'pdf', 'bbl', 'aux', 'out', 'log', 'blg', 'synctex.gz' 28 | --url URL specify a url to directly download the target file, default '' 29 | --compile force the project to be re-compiled 30 | ``` 31 | 32 | ### Example 1 - list all available projects 33 | 34 | ``` 35 | liwz11@ubuntu:~/overleaf$ python3 overleaf.py --projects 36 | 37 | [+] loading the project list... 38 | 39 | 5c349976c042023b1bd97751 A LaTeX Example 40 | 5e60cbcf1afbd8000150aec4 ****** 41 | 5e5612385b881f0001ba1023 ****** 42 | ...... 43 | 44 | ``` 45 | 46 | ### Example 2 - list all documents in the specified project 47 | 48 | ``` 49 | liwz11@ubuntu:~/overleaf$ python3 overleaf.py --project 5e60cbcf1afbd8000150aec4 --docs 50 | 51 | [+] loading the document list... 52 | 53 | 5e60cbd01afbd8000150aede ******.tex 54 | 5e60cbd01afbd8000150aee0 ******.tex 55 | 5e60cbd11afbd8000150aee3 ******.tex 56 | ...... 57 | 58 | ``` 59 | 60 | ### Example 3 - download a target file by specifying the project id 61 | 62 | ``` 63 | liwz11@ubuntu:~/overleaf$ python3 overleaf.py --project 5e60cbcf1afbd8000150aec4 64 | 65 | [+] compiling the project... 66 | 67 | [+] downloading the target file to ./output.pdf 68 | 69 | ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 3.69MB 70 | 71 | Total Time: 2.69 s 72 | 73 | liwz11@ubuntu:~/overleaf$ python3 overleaf.py --project 5e60cbcf1afbd8000150aec4 --down bbl 74 | 75 | [+] this project was compiled within 10 minutes. 76 | [+] you can force it to be re-compiled with the option '--compile'. 77 | 78 | [+] downloading the target file to ./output.bbl 79 | 80 | ▇▇▇▇▇▇ 0.01MB 81 | 82 | Total Time: 0.01 s 83 | 84 | ``` 85 | 86 | ### Example 4 - download a target file by specifying the url 87 | 88 | ``` 89 | liwz11@ubuntu:~/overleaf$ python3 overleaf.py --url "https://www.overleaf.com/download/project/5e60cbcf1afbd8000150aec4/build/1711dbeca03-dbd3e44a305f01b0/output/output.pdf?compileGroup=standard&clsiserverid=clsi-pre-emp-n1-b-2565&popupDownload=true" 90 | 91 | [+] downloading the target file to ./output.pdf 92 | 93 | ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 3.79MB 94 | 95 | Total Time: 2.51 s 96 | 97 | ``` 98 | 99 | ### Acknowledgements 100 | 101 | The OverleafClient class contains code from Gabriel Pelouze’s overleaf_backup tool (https://github.com/gpelouze/overleaf_backup), which was adapted to work with Overleaf v2. 102 | 103 | 104 | -------------------------------------------------------------------------------- /overleaf.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python3 2 | # Author: liwz11 3 | # Acknowledgement: The OverleafClient class contains code from Gabriel Pelouze’s overleaf_backup tool (https://github.com/gpelouze/overleaf_backup), which was adapted to work with Overleaf v2. 4 | 5 | import os, sys, re, time, math, getpass, json, pickle 6 | import requests, websocket 7 | 8 | from bs4 import BeautifulSoup 9 | from argparse import ArgumentParser 10 | 11 | 12 | class OverleafClient(object): 13 | homepage = 'https://www.overleaf.com' 14 | headers = { 15 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36', 16 | } 17 | cookies_file = '.ov.cookies.cache' 18 | csrf_file = '.ov.csrf.cache' 19 | output_file = '.ov.output.cache' 20 | 21 | def __init__(self): 22 | try: 23 | self._load_session() 24 | except (FileNotFoundError, EOFError, TypeError): 25 | self.cookies = requests.cookies.RequestsCookieJar() 26 | self.csrf_token = '' 27 | 28 | if 'overleaf_session2' not in self.cookies.keys() or self.csrf_token == '': 29 | print('[+] login\n') 30 | self.login(input('Email: '), getpass.getpass()) 31 | self._dump_session() 32 | 33 | def _load_session(self): 34 | with open(self.cookies_file, 'rb') as f: 35 | self.cookies = pickle.load(f) 36 | self.cookies.clear_expired_cookies() 37 | 38 | with open(self.csrf_file, 'r') as f: 39 | self.csrf_token = f.read() 40 | 41 | def _dump_session(self): 42 | with open(self.cookies_file, 'wb') as f: 43 | pickle.dump(self.cookies, f) 44 | 45 | with open(self.csrf_file, 'w') as f: 46 | f.write(self.csrf_token) 47 | 48 | @staticmethod 49 | def logout(): 50 | try: 51 | os.remove(OverleafClient.cookies_file) 52 | os.remove(OverleafClient.csrf_file) 53 | except (FileNotFoundError): 54 | pass 55 | 56 | print("Logout!\n") 57 | 58 | def login(self, email, password): 59 | print('') 60 | 61 | url = self.homepage + '/login' 62 | signin_get = requests.get(url, headers=self.headers) 63 | if signin_get.status_code != 200: 64 | err_msg = 'Status code %d when GET %s.' % (signin_get.status_code, url) 65 | raise Exception(err_msg) 66 | 67 | self.cookies.update(signin_get.cookies) 68 | 69 | html_doc = signin_get.text 70 | soup = BeautifulSoup(html_doc, 'html.parser') 71 | for tag in soup.find_all('input'): 72 | if tag.get('name', None) == '_csrf': 73 | self.csrf_token = tag.get('value', None) 74 | break 75 | 76 | if len(self.csrf_token) == 0: 77 | raise Exception('CSRF token is empty.') 78 | 79 | # send login form 80 | data = {'_csrf': self.csrf_token, 'email': email, 'password': password } 81 | signin_post = requests.post(self.homepage+'/login', headers=self.headers, data=data, cookies=signin_get.cookies, timeout=5) 82 | if signin_post.status_code != 200: 83 | err_msg = 'Status code %d when POST %s.' % (signin_post.status_code, url) 84 | raise Exception(err_msg) 85 | 86 | try: 87 | response = json.loads(signin_post.text) 88 | if response['message']['type'] == 'error': 89 | err_msg = 'Login failed: ' + response['message']['text'] 90 | raise ValueError(err_msg) 91 | except json.JSONDecodeError: 92 | # this happens when the login is successful 93 | pass 94 | 95 | self.cookies.update(signin_post.cookies) 96 | 97 | def get_projects(self): 98 | print('[+] loading the project list...\n') 99 | 100 | url = self.homepage + '/project' 101 | projects_get = requests.get(url, headers=self.headers, cookies=self.cookies) 102 | if projects_get.status_code != 200: 103 | err_msg = 'Status code %d when GET %s.' % (projects_get.status_code, url) 104 | raise Exception(err_msg) 105 | 106 | html_doc = projects_get.text 107 | soup = BeautifulSoup(html_doc, 'html.parser') 108 | for tag in soup.find_all('script'): 109 | if tag.get('id', None) == 'data': 110 | self.projects = json.loads(tag.get_text().strip())['projects'] 111 | break 112 | 113 | def get_documents(self, project_id): 114 | print('[+] loading the document list...\n') 115 | 116 | t = str(time.time()).replace('.', '')[:13] 117 | url = self.homepage + '/socket.io/1/?t=' + t 118 | r = requests.get(url, headers=self.headers, cookies=self.cookies) 119 | websocket_token = r.text.split(':')[0] 120 | 121 | url = 'wss://www.overleaf.com/socket.io/1/websocket/' + websocket_token 122 | headers = {} 123 | headers['User-Agent'] = self.headers['User-Agent'] 124 | headers['Cookie'] = 'gke-route=' + self.cookies.get('gke-route') 125 | 126 | # websocket.enableTrace(True) 127 | ws = websocket.WebSocket() 128 | ws.connect(url, header=headers) 129 | ws.recv() # 1:: 130 | ws.recv() # 5:::{"name":"connectionAccepted"} 131 | ws.send('5:1+::{"name":"joinProject","args":[{"project_id":"%s"}]}' % project_id) 132 | msg = ws.recv() # 6:::1+[null,{"_id","name","rootDoc_id", "rootFolder"},"owner",2] 133 | ws.close() 134 | 135 | project_info = json.loads(msg.split('6:::1+')[1]) 136 | self.docs = project_info[1]['rootFolder'][0]['docs'] 137 | 138 | def compile(self, project_id, document_id, force_compile): 139 | outputs_table = {} 140 | key = project_id + str(document_id) 141 | 142 | if os.path.exists(self.output_file): 143 | with open(self.output_file, 'r') as f: 144 | outputs_table = json.loads(f.read()) 145 | 146 | if not force_compile and key in outputs_table.keys(): 147 | self.outputs = outputs_table[key] 148 | if time.time() < self.outputs['expired']: 149 | print('[+] the project and the document were compiled within 10 minutes.') 150 | print('[+] you can force it to be re-compiled with the option \'--compile\'.\n') 151 | return 152 | 153 | ''' 154 | print('[+] loading the project...') 155 | 156 | url = '%s/project/%s' % (self.homepage, project_id) 157 | project_get = requests.get(url, headers=self.headers, cookies=self.cookies) 158 | if project_get.status_code != 200: 159 | err_msg = 'Status code %d when GET %s.' % (project_get.status_code, url) 160 | raise Exception(err_msg) 161 | 162 | html_doc = project_get.text 163 | csrf_token = html_doc.split('window.csrfToken = "')[1].split('";')[0] 164 | ''' 165 | 166 | print('[+] compiling the project...\n') 167 | 168 | url = '%s/project/%s/compile' % (self.homepage, project_id) 169 | headers = {} 170 | headers['User-Agent'] = self.headers['User-Agent'] 171 | headers['Referer'] = '%s/project/%s' % (self.homepage, project_id) 172 | #data = { 'rootDoc_id': document_id, 'draft': False, 'check': 'silent', 'incrementalCompilesEnabled': True, '_csrf': self.csrf_token } 173 | data = { 'rootDoc_id': document_id, 'check': 'silent', 'incrementalCompilesEnabled': True, '_csrf': self.csrf_token } 174 | r = requests.post(url, headers=self.headers, data=data, cookies=self.cookies) 175 | if r.status_code != 200: 176 | err_msg = 'Status code %d when POST %s.' % (r.status_code, url) 177 | raise Exception(err_msg) 178 | 179 | res = json.loads(r.text) 180 | if res['status'] != 'success': 181 | raise Exception('Compiling failed - ' + res['status']) 182 | 183 | self.outputs = { 'expired': time.time() + 600 } 184 | for op in res['outputFiles']: 185 | self.outputs[op['type']] = op['url'] 186 | 187 | outputs_table[key] = self.outputs 188 | with open(self.output_file, 'w') as f: 189 | f.write(json.dumps(outputs_table)) 190 | 191 | def download(self, project_id, down_filetype, url=''): 192 | if url != '': 193 | down_filetype = url.split('?')[0].split('/')[-1].split('.')[1] 194 | pass 195 | elif down_filetype == 'zip': 196 | url = '%s/project/%s/download/zip' % (self.homepage, project_id) 197 | else: 198 | ftype = down_filetype.split('.')[-1] 199 | url = '%s%s' % (self.homepage, self.outputs[ftype]) 200 | 201 | print('[+] downloading the target file to ./output.' + down_filetype, '\n') 202 | 203 | r = requests.get(url, headers=self.headers, cookies=self.cookies, stream=True) 204 | 205 | if r.status_code == 200: 206 | with open('output.' + down_filetype, 'wb') as f: 207 | count = 0 208 | nbyte = 0 209 | t1 = time.time() 210 | for chunk in r: 211 | f.write(chunk) 212 | 213 | count += 1 214 | nbyte += len(chunk) 215 | print('\r' + '▇' * round(math.log(count,2)) + " " + str(round(nbyte/1048576,2)) + 'MB ', end="") 216 | t2 = time.time() 217 | print('\n\nTotal Time:', round(t2 - t1, 2), 's\n') 218 | 219 | r.close() 220 | 221 | if __name__ == '__main__': 222 | parser = ArgumentParser(description='A script tool to access www.overleaf.com.') 223 | parser.add_argument('--logout', action='store_true', help='clear cookies and remove csrf token, and then exit') 224 | parser.add_argument('--projects', action='store_true', help='list all available projects, and then exit') 225 | parser.add_argument('--project', type=str, default='', help='specify a project id, default \'\'') 226 | parser.add_argument('--docs', action='store_true', help='list all .tex documents in the specified project, and then exit') 227 | parser.add_argument('--doc', type=str, default=None, help='specify a document id, default null(the main document)') 228 | parser.add_argument('--down', type=str, default='pdf', help='specify a file type and download the file, default \'pdf\', options: \'zip\', \'pdf\', \'bbl\', \'aux\', \'out\', \'log\', \'blg\', \'synctex.gz\'') 229 | parser.add_argument('--url', type=str, default='', help='specify a url to directly download the target file, default \'\'') 230 | parser.add_argument('--compile', action='store_true', help='force the project to be re-compiled') 231 | args = parser.parse_args() 232 | 233 | logout = args.logout 234 | list_projects = args.projects 235 | project_id = args.project 236 | list_docs = args.docs 237 | document_id = args.doc 238 | down_filetype = args.down 239 | down_url = args.url 240 | force_compile = args.compile 241 | 242 | print('') 243 | 244 | if logout: 245 | OverleafClient.logout() 246 | os._exit(0) 247 | 248 | client = OverleafClient() 249 | 250 | if down_url != '': 251 | client.download('', '', url=down_url) 252 | os._exit(0) 253 | 254 | if list_projects: 255 | client.get_projects() 256 | for project in client.projects: 257 | if not project['trashed']: 258 | print(project['id'], project['name']) 259 | print('\n') 260 | os._exit(0) 261 | 262 | if project_id == '': 263 | print('Please use the option \'--project\' to specify a project id.') 264 | print('Try \'python3 overleaf.py --projects\' to list all available projects.\n') 265 | os._exit(0) 266 | 267 | if list_docs: 268 | client.get_documents(project_id) 269 | for doc in client.docs: 270 | if doc['name'].endswith('.tex'): 271 | print(doc['_id'], doc['name']) 272 | print('\n') 273 | os._exit(0) 274 | 275 | if down_filetype != 'zip': 276 | client.compile(project_id, document_id, force_compile) 277 | 278 | client.download(project_id, down_filetype) 279 | 280 | 281 | --------------------------------------------------------------------------------