├── setup.py ├── .gitignore ├── tests └── test_api.py ├── pyoverleaf ├── __init__.py ├── __main__.py ├── _io.py └── _webapi.py ├── pyproject.toml ├── .github └── workflows │ ├── release.yml │ └── run-tests.yml ├── LICENSE └── README.md /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup() 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | /build 3 | /dist 4 | *.egg-info/ 5 | /pyoverleaf/_version.py 6 | -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | # 2 | def test_import_api(): 3 | from pyoverleaf import Api, ProjectIO # pylint: disable=unused-import -------------------------------------------------------------------------------- /pyoverleaf/__init__.py: -------------------------------------------------------------------------------- 1 | from ._webapi import Api # pylint: disable=unused-import 2 | from ._webapi import User, Project, ProjectFolder, ProjectFile # pylint: disable=unused-import 3 | from ._io import ProjectIO, ProjectBytesIO # pylint: disable=unused-import 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.setuptools_scm] 6 | write_to = "pyoverleaf/_version.py" 7 | 8 | [project] 9 | name = "pyoverleaf" 10 | authors = [ 11 | {name = "Jonas Kulhanek", email = "jonas.kulhanek@live.com"}, 12 | ] 13 | description = "Overleaf API and simple CLI" 14 | readme = "README.md" 15 | requires-python = ">=3.8" 16 | keywords = ["overleaf", "api"] 17 | license = {text = "MIT"} 18 | classifiers = [ 19 | "Programming Language :: Python :: 3", 20 | ] 21 | dependencies = [ 22 | "requests", 23 | "beautifulsoup4", 24 | "browsercookie", 25 | "websocket-client", 26 | ] 27 | dynamic = ["version"] 28 | 29 | [project.optional-dependencies] 30 | cli = ["click"] 31 | 32 | [project.scripts] 33 | pyoverleaf = "pyoverleaf.__main__:main" -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | push: 5 | branches: 6 | - "releases/**" 7 | tags: 8 | - "v*" 9 | workflow_dispatch: 10 | 11 | jobs: 12 | deploy: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: "3.x" 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install -e '.[cli]' 25 | pip install twine pytest build 26 | - name: Run tests 27 | run: | 28 | python -m pytest 29 | - name: Build and publish 30 | env: 31 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 32 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 33 | run: | 34 | python -m build 35 | twine upload dist/* 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Jonas Kulhanek 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/run-tests.yml: -------------------------------------------------------------------------------- 1 | name: run-tests 2 | on: 3 | push: 4 | pull_request: 5 | branches: 6 | - "master" 7 | workflow_dispatch: 8 | jobs: 9 | pytest: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | python-version: [3.8, 3.9, "3.10", "3.11"] 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Setup Python # Set Python version 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | # Install pip and pytest 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | pip install -e '.[cli]' 26 | pip install pytest pytest-cov 27 | - name: Test with pytest 28 | run: 29 | PYTHONHASHSEED=0 pytest --ignore docs --cov=pyoverleaf ${{ ((matrix.python-version == '3.7') && '--ignore-glob "tests/*_py38_test.py"') || '' }} --cov-report=xml --doctest-modules --junitxml=junit/test-results-${{ matrix.python-version }}.xml 30 | # - name: Upload coverage to Codecov 31 | # uses: codecov/codecov-action@v2 32 | # with: 33 | # env_vars: OS 34 | # fail_ci_if_error: true 35 | # files: ./coverage.xml 36 | # flags: python-${{ matrix.python-version }},click-${{ matrix.click-version }} 37 | # verbose: true -------------------------------------------------------------------------------- /pyoverleaf/__main__.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import sys 3 | import click 4 | from . import Api, ProjectIO 5 | 6 | 7 | def _get_io_and_path(api, path): 8 | if "/" not in path: 9 | raise click.BadParameter("Path must be in the format /.") 10 | projects = api.get_projects() 11 | if path.startswith("/"): 12 | path = path[1:] 13 | project, *path = path.split("/", 1) 14 | if not path: 15 | path = "" 16 | else: 17 | path = path[0] 18 | project_id = None 19 | for p in projects: 20 | if p.name == project: 21 | project_id = p.id 22 | break 23 | if project_id is None: 24 | raise FileNotFoundError(f"Project '{project}' not found.") 25 | io = ProjectIO(api, project_id) 26 | return io, path 27 | 28 | 29 | @click.group() 30 | def main(): 31 | pass 32 | 33 | @main.command("ls", help="List projects or files in a project") 34 | @click.argument("path", type=str, default=".") 35 | def list_projects_and_files(path): 36 | api = Api() 37 | api.login_from_browser() 38 | projects = api.get_projects() 39 | if not path or path in {".", "/"}: 40 | print("\n".join(project.name for project in projects)) 41 | else: 42 | if path.startswith("/"): 43 | path = path[1:] 44 | project, *path = path.split("/", 1) 45 | if not path: 46 | path = "" 47 | else: 48 | path = path[0] 49 | project_id = None 50 | for p in projects: 51 | if p.name == project: 52 | project_id = p.id 53 | break 54 | if project_id is None: 55 | raise FileNotFoundError(f"Project '{project}' not found.") 56 | io = ProjectIO(api, project_id) 57 | files = io.listdir(path) 58 | print("\n".join(files)) 59 | 60 | @main.command("mkdir", help="Create a directory in a project") 61 | @click.option("-p", "--parents", is_flag=True, help="Create parent directories if they don't exist.") 62 | @click.argument("path", type=str) 63 | def make_directory(path, parents): 64 | api = Api() 65 | api.login_from_browser() 66 | io, path = _get_io_and_path(api, path) 67 | io.mkdir(path, parents=parents, exist_ok=parents) 68 | 69 | 70 | @main.command("read", help="Reads the file in a project and writes to the standard output") 71 | @click.argument("path", type=str) 72 | def read(path): 73 | api = Api() 74 | api.login_from_browser() 75 | io, path = _get_io_and_path(api, path) 76 | with io.open(path, "rb") as f: 77 | shutil.copyfileobj(f, sys.stdout.buffer) 78 | 79 | @main.command("write", help="Reads the standard input and writes to the file in a project") 80 | @click.argument("path", type=str) 81 | def write(path): 82 | api = Api() 83 | api.login_from_browser() 84 | io, path = _get_io_and_path(api, path) 85 | with io.open(path, "wb+") as f: 86 | shutil.copyfileobj(sys.stdin.buffer, f) 87 | 88 | @main.command("rm", help="Remove file or folder from a project") 89 | @click.argument("path", type=str) 90 | def remove(path): 91 | api = Api() 92 | api.login_from_browser() 93 | io, path = _get_io_and_path(api, path) 94 | io.remove(path) 95 | 96 | @main.command("download-project", help="Download project as a zip file to the specified path.") 97 | @click.argument("project", type=str) 98 | @click.argument("output_path", type=str) 99 | def download_project(project, output_path): 100 | api = Api() 101 | api.login_from_browser() 102 | projects = api.get_projects() 103 | project_id = None 104 | for p in projects: 105 | if p.name == project: 106 | project_id = p.id 107 | break 108 | if project_id is None: 109 | raise FileNotFoundError(f"Project '{project}' not found.") 110 | api.download_project(project_id, output_path) 111 | print("Project downloaded to " + output_path) 112 | 113 | 114 | if __name__ == "__main__": 115 | main() 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyOverleaf 2 | Unofficial Python API to access Overleaf. 3 | 4 | ## Tasks 5 | - [x] List projects 6 | - [x] Download project as zip 7 | - [x] List and download individual files/docs 8 | - [x] Upload new files/docs 9 | - [x] Delete files, create folders 10 | - [x] Python CLI interface to access project files 11 | - [ ] Move, rename files 12 | - [ ] Create, delete, archive, and rename projects 13 | - [ ] Access/update comments, perform live changes 14 | - [ ] Access/update profile details 15 | - [ ] Robust login 16 | 17 | ## Getting started 18 | Install the project by running the following: 19 | ```bash 20 | pip install 'pyoverleaf[cli]' 21 | ``` 22 | 23 | Before using the API, make sure you are logged into Overleaf in your default web browser. 24 | Currently, only Google Chrome and Mozilla Firefox are supported: https://github.com/richardpenman/browsercookie 25 | Test if everything is working by listing the projects: 26 | ```bash 27 | pyoverleaf ls 28 | ``` 29 | 30 | 31 | ## Python API 32 | The low-level Python API provides a way to access Overleaf projects from Python. 33 | The main entrypoint is the class `pyoverleaf.Api` 34 | 35 | ### Accessing projects 36 | ```python 37 | import pyoverleaf 38 | 39 | api = pyoverleaf.Api() 40 | api.login_from_browser() 41 | 42 | # Lists the projects 43 | projects = api.get_projects() 44 | 45 | # Download the project as a zip 46 | project_id = projects[0].id 47 | api.download_project(project_id, "project.zip") 48 | ``` 49 | 50 | ### Managing project files 51 | ```python 52 | import pyoverleaf 53 | 54 | api = pyoverleaf.Api() 55 | api.login_from_browser() 56 | # Choose a project 57 | project_id = projects[0].id 58 | 59 | # Get project files 60 | root_folder = api.project_get_files(project_id) 61 | 62 | # Create new folder 63 | new_folder = api.project_create_folder(project_id, root_folder.id, "new-folder") 64 | 65 | # Upload new file to the newly created folder 66 | file_bytes = open("test-image.jpg", "rb").read() 67 | new_file = api.project_upload_file(project_id, new_folder.id, "file-name.jpg", file_bytes) 68 | 69 | # Delete newly added folder containing the file 70 | api.project_delete_entity(project_id, new_folder) 71 | ``` 72 | 73 | ## Higher-level Python IO API 74 | The higher-level Python IO API allows users to access the project files in a Pythonic way. 75 | The main entrypoint is the class `pyoverleaf.ProjectIO` 76 | 77 | Here are some examples on how to use the API: 78 | ```python 79 | import pyoverleaf 80 | 81 | api = pyoverleaf.Api() 82 | api.login_from_browser() 83 | # Choose a project 84 | project_id = projects[0].id 85 | 86 | # Get project IO API 87 | io = pyoverleaf.ProjectIO(api, project_id) 88 | 89 | # Check if a path exists 90 | exists = io.exists("path/to/a/file/or/folder") 91 | 92 | # Create a directory 93 | io.mkdir("path/to/new/directory", parents=True, exist_ok=True) 94 | 95 | # Listing a directory 96 | for entity in io.listdir("path/to/a/directory"): 97 | print(entity.name) 98 | 99 | # Reading a file 100 | with io.open("path/to/a/file", "r") as f: 101 | print(f.read()) 102 | 103 | # Creating a new file 104 | with io.open("path/to/a/new/file", "w+") as f: 105 | f.write("new content") 106 | ``` 107 | 108 | 109 | ## Using the CLI 110 | The CLI provides a way to access Overleaf from the shell. 111 | To get started, run `pyoverleaf --help` to list available commands and their arguments. 112 | 113 | ### Listing projects and files 114 | ```bash 115 | # Listing projects 116 | pyoverleaf ls 117 | 118 | # Listing project files 119 | pyoverleaf ls project-name 120 | 121 | # Listing project files in a folder 122 | pyoverleaf ls project-name/path/to/files 123 | ``` 124 | 125 | ### Downloading existing projects 126 | ```bash 127 | pyoverleaf download-project project-name output.zip 128 | ``` 129 | 130 | ### Creating and deleting directories 131 | ```bash 132 | # Creating a new directory (including parents) 133 | pyoverleaf mkdir -p project-name/path/to/new/directory 134 | 135 | # Deleting 136 | pyoverleaf rm project-name/path/to/new/directory 137 | ``` 138 | 139 | ### Reading and writing files 140 | ```bash 141 | # Writing to a file 142 | echo "new content" | pyoverleaf write project-name/path/to/file.txt 143 | 144 | # Uploading an image 145 | cat image.jpg | pyoverleaf write project-name/path/to/image.jpg 146 | 147 | # Reading a file 148 | pyoverleaf read project-name/path/to/file.txt 149 | ``` 150 | -------------------------------------------------------------------------------- /pyoverleaf/_io.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import io 4 | from typing import Optional, Union, IO, List 5 | from ._webapi import Api, ProjectFolder, ProjectFile 6 | 7 | 8 | class ProjectBytesIO(io.BytesIO): 9 | def __init__(self, api: Api, project_id: str, file: Optional[ProjectFile] = None, mode: str = "r", update_file = None): 10 | self._api = api 11 | self._project_id = project_id 12 | self._file = file 13 | self._mode = mode 14 | self._update_file = update_file 15 | self._prefix_bytes = None 16 | init_bytes = b"" 17 | if file is not None and "w" not in mode: 18 | init_bytes = self._api.project_download_file(self._project_id, self._file) 19 | if "a" in mode: 20 | self._prefix_bytes = init_bytes 21 | init_bytes = b"" 22 | super().__init__(init_bytes) 23 | 24 | def writable(self) -> bool: 25 | return "w" in self._mode or "a" in self._mode or "+" in self._mode 26 | 27 | def readable(self) -> bool: 28 | return "r" in self._mode or "+" in self._mode 29 | 30 | def flush(self) -> None: 31 | super().flush() 32 | if self.writable(): 33 | data = self.getvalue() 34 | if self._prefix_bytes is not None: 35 | data = self._prefix_bytes + data 36 | self._file = self._update_file(data) 37 | 38 | def close(self) -> None: 39 | self.flush() 40 | super().close() 41 | 42 | 43 | class ProjectIO: 44 | def __init__(self, api: "Api", project_id: str): 45 | self._api = api 46 | self._project_id = project_id 47 | self._cached_project_files = None 48 | 49 | def _project_files(self) -> ProjectFolder: 50 | if self._cached_project_files is None: 51 | self._cached_project_files = self._api.project_get_files(self._project_id) 52 | return self._cached_project_files 53 | 54 | def _find(self, path: Union[pathlib.PurePath, str]) -> Union[ProjectFolder, ProjectFile, None]: 55 | current_pointer = self._project_files() 56 | path = pathlib.PurePath(path) 57 | for part in path.parts: 58 | for child in current_pointer.children: 59 | if child.name == part: 60 | current_pointer = child 61 | break 62 | else: 63 | return None 64 | return current_pointer 65 | 66 | def exists(self, path: Union[pathlib.PurePath, str]): 67 | """ 68 | Check if a file exists in the project. 69 | 70 | :param path: The path to the file. 71 | :return: True if the file exists, else False. 72 | """ 73 | return self._find(path) is not None 74 | 75 | def open(self, path: Union[pathlib.PurePath, str], mode: str = "r", encoding: Optional[str] = None) -> IO: 76 | """ 77 | Open a file in the project. 78 | 79 | :param path: The path to the file. 80 | :param mode: The mode to open the file in. 81 | :param encoding: The encoding to use if the file is not opened in binary mode. 82 | :return: A file-like object. 83 | """ 84 | assert mode in ["r", "w", "a", "r+", "w+", "a+", "rb", "wb", "ab", "rb+", "wb+", "ab+"] 85 | binary = False 86 | if "b" in mode: 87 | binary = True 88 | 89 | assert_file_exists = True 90 | if "r" in mode and "+" in mode: 91 | # Create file if it doesn't exist 92 | assert_file_exists = False 93 | elif "w" in mode: 94 | assert_file_exists = False 95 | 96 | # Find the handles 97 | parent_path = pathlib.PurePath(path).parent 98 | folder = self._project_files() 99 | for part in parent_path.parts: 100 | for child in folder.children: 101 | if child.name == part and child.type == "folder": 102 | folder = child 103 | break 104 | else: 105 | raise FileNotFoundError("No such file or directory: " + str(path)) 106 | 107 | folder_id = folder.id 108 | file = None 109 | filename = os.path.split(path)[-1] 110 | for child in folder.children: 111 | if child.name == filename and child.type != "folder": 112 | file = child 113 | break 114 | if file is None and assert_file_exists: 115 | raise FileNotFoundError("No such file or directory: " + str(path)) 116 | 117 | def update_file(data): 118 | return self._api.project_upload_file(self._project_id, folder_id, filename, data) 119 | 120 | bytes_io = ProjectBytesIO(self._api, self._project_id, file, mode, update_file) 121 | if not binary: 122 | return io.TextIOWrapper(bytes_io, encoding=encoding) 123 | return bytes_io 124 | 125 | def mkdir(self, path: Union[pathlib.PurePath, str], exist_ok: bool = False, *, parents: bool = False) -> None: 126 | """ 127 | Create a directory in the project. 128 | 129 | :param path: The path to the directory. 130 | :param exist_ok: If True, no exception will be raised if the directory already exists. 131 | :param parents: If True, all parent directories will be created if they don't exist. 132 | """ 133 | path = pathlib.PurePath(path) 134 | current_pointer = self._project_files() 135 | for i, part in enumerate(path.parts): 136 | for child in current_pointer.children: 137 | if child.name == part: 138 | if child.type != "folder": 139 | raise FileExistsError("Cannot create directory: " + str(path)) 140 | current_pointer = child 141 | if i == len(path.parts) - 1: 142 | if not exist_ok: 143 | raise FileExistsError("Cannot create directory: " + str(path)) 144 | break 145 | else: 146 | if i < len(path.parts) - 1 and not parents: 147 | raise FileNotFoundError("No such file or directory: " + str(path)) 148 | current_pointer = self._api.project_create_folder(self._project_id, current_pointer.id, part) 149 | 150 | def listdir(self, path: Union[pathlib.PurePath, str]) -> List[str]: 151 | """ 152 | List the contents of a directory in the project. 153 | 154 | :param path: The path to the directory. 155 | :return: A list of the contents of the directory. 156 | """ 157 | directory = self._find(path) 158 | if directory is None: 159 | raise FileNotFoundError("No such file or directory: " + str(path)) 160 | return [child.name for child in directory.children] 161 | 162 | def remove(self, path: Union[pathlib.PurePath, str], missing_ok: bool = False) -> None: 163 | """ 164 | Remove a file/directory from the project. 165 | 166 | :param path: The path to the file. 167 | """ 168 | entity = self._find(path) 169 | if entity is None: 170 | if missing_ok: 171 | return 172 | raise FileNotFoundError("No such file or directory: " + str(path)) 173 | self._api.project_delete_entity(self._project_id, entity) -------------------------------------------------------------------------------- /pyoverleaf/_webapi.py: -------------------------------------------------------------------------------- 1 | from base64 import b64encode 2 | import ssl 3 | import urllib.parse 4 | import time 5 | try: 6 | import http.cookiejar as cookielib 7 | except ImportError: 8 | import cookielib # type: ignore 9 | from typing import List, Optional, Union, overload, Literal, Dict 10 | import json 11 | from dataclasses import dataclass, field 12 | from websocket import create_connection 13 | import browsercookie 14 | import requests 15 | from bs4 import BeautifulSoup 16 | 17 | 18 | @dataclass 19 | class User: 20 | id: str 21 | email: str 22 | first_name: str 23 | last_name: str 24 | 25 | @classmethod 26 | def from_data(cls, data: dict): 27 | return cls( 28 | id=data["id"], 29 | email=data["email"], 30 | first_name=data["firstName"], 31 | last_name=data["lastName"], 32 | ) 33 | 34 | 35 | @dataclass 36 | class Tag: 37 | id: str 38 | name: str 39 | color: str 40 | 41 | @classmethod 42 | def from_data(cls, data: dict): 43 | return cls( 44 | id=data["_id"], 45 | name=data["name"], 46 | color=data["color"], 47 | ) 48 | 49 | 50 | @dataclass 51 | class Project: 52 | id: str 53 | name: str 54 | last_updated: str 55 | access_level: str 56 | source: str 57 | archived: bool 58 | trashed: bool 59 | owner: Optional[User] = None 60 | last_updated_by: Optional[User] = None 61 | tags: Optional[List[Tag]] = field(default_factory=list) 62 | 63 | @classmethod 64 | def from_data(cls, data: dict): 65 | out = cls( 66 | id=data["id"], 67 | name=data["name"], 68 | last_updated=data["lastUpdated"], 69 | access_level=data["accessLevel"], 70 | source=data["source"], 71 | archived=data["archived"], 72 | trashed=data["trashed"], 73 | ) 74 | 75 | owner_data = data.get("owner") 76 | if owner_data is not None: 77 | out.owner = User.from_data(owner_data) 78 | 79 | last_updated_by_data = data.get("lastUpdatedBy") 80 | if last_updated_by_data is not None: 81 | out.last_updated_by = User.from_data(last_updated_by_data) 82 | 83 | return out 84 | 85 | 86 | @dataclass 87 | class ProjectFile: 88 | id: str 89 | name: str 90 | created: Optional[str] 91 | type: Literal["file", "doc"] = "file" 92 | 93 | @classmethod 94 | def from_data(cls, data: dict): 95 | return cls( 96 | id=data["_id"], 97 | name=data["name"], 98 | created=data.get("created", None), 99 | ) 100 | 101 | def __str__(self): 102 | return self.name 103 | 104 | @dataclass 105 | class ProjectFolder: 106 | id: str 107 | name: str 108 | children: List[Union[ProjectFile, "ProjectFolder"]] = field(default_factory=list) 109 | 110 | @classmethod 111 | def from_data(cls, data: dict): 112 | out = cls( 113 | id=data["_id"], 114 | name=data["name"], 115 | ) 116 | for child in data["folders"]: 117 | out.children.append(ProjectFolder.from_data(child)) 118 | 119 | for child in data["fileRefs"]: 120 | out.children.append(ProjectFile.from_data(child)) 121 | 122 | for child in data["docs"]: 123 | doc = ProjectFile.from_data(child) 124 | doc.type = "doc" 125 | out.children.append(doc) 126 | return out 127 | 128 | def __str__(self): 129 | out = self.name + ":" 130 | for child in self.children: 131 | child_str = str(child) 132 | out += "\n" 133 | for line in child_str.splitlines(True): 134 | out += " " + line 135 | return out 136 | 137 | @property 138 | def type(self): 139 | return "folder" 140 | 141 | 142 | 143 | class Api: 144 | def __init__(self, *, timeout: int = 16, proxies=None, ssl_verify: bool = True, host: str = "www.overleaf.com"): 145 | self._session_initialized = False 146 | self._cookies = None 147 | self._request_kwargs = { "timeout": timeout } 148 | self._proxies = proxies 149 | self._ssl_verify = ssl_verify 150 | self._csrf_cache = None 151 | self._host = host 152 | 153 | def get_projects(self, *, trashed: bool = False, archived: bool = False) -> List[Project]: 154 | """ 155 | Get the full list of projects. 156 | 157 | :param trashed: Whether to include trashed projects. 158 | :param archived: Whether to include archived projects. 159 | :return: A list of projects. 160 | """ 161 | self._assert_session_initialized() 162 | r = self._get_session().get(f"https://{self._host}/", **self._request_kwargs) 163 | r.raise_for_status() 164 | content = BeautifulSoup(r.content, features="html.parser") 165 | data = content.find("meta", dict(name="ol-prefetchedProjectsBlob")).get("content") 166 | data = json.loads(data) 167 | projects = [] 168 | for project_data in data["projects"]: 169 | proj = Project.from_data(project_data) 170 | if not trashed and proj.trashed: 171 | continue 172 | if not archived and proj.archived: 173 | continue 174 | projects.append(proj) 175 | 176 | # Add tags to projects 177 | tags = content.find("meta", dict(name="ol-tags")).get("content") 178 | tags = json.loads(tags) 179 | proj_map = {proj.id: proj for proj in projects} 180 | for tag_data in tags: 181 | tag = Tag.from_data(tag_data) 182 | for project_id in tag_data["project_ids"]: 183 | if project_id in proj_map: 184 | project = proj_map[project_id] 185 | if not hasattr(project, "tags"): 186 | project.tags = [] 187 | project.tags.append(tag) 188 | return projects 189 | 190 | @overload 191 | def download_project(self, project_id: str) -> bytes: 192 | ... 193 | 194 | @overload 195 | def download_project(self, project_id: str, output_path: str) -> None: 196 | ... 197 | 198 | def download_project(self, project_id: str, output_path: Optional[str] = None) -> Union[bytes, None]: 199 | """ 200 | Download a project as a zip file. 201 | 202 | :param project_id: The id of the project to download. 203 | :param output_path: The path to save the project to. If none, the project will be returned as bytes. 204 | :return: The zipped project if output_path is None, else None. 205 | """ 206 | self._assert_session_initialized() 207 | r = self._get_session().get(f"https://{self._host}/project/{project_id}/download/zip", **self._request_kwargs) 208 | r.raise_for_status() 209 | if output_path is not None: 210 | with open(output_path, "wb") as f: 211 | f.write(r.content) 212 | return None 213 | return r.content 214 | 215 | def project_get_files(self, project_id: str) -> ProjectFolder: 216 | """ 217 | Get the root directory of a project. 218 | 219 | :param project_id: The id of the project. 220 | :return: The root directory of the project. 221 | """ 222 | data = None 223 | socket = self._open_socket(project_id) 224 | while True: 225 | line = socket.recv() 226 | if line.startswith("7:"): 227 | # Unauthorized. TODO: handle this. 228 | raise RuntimeError("Could not get project files.") 229 | if line.startswith("5:"): 230 | break 231 | data = json.loads(line[len("5:"):].lstrip(":")) 232 | 233 | # Parse the data 234 | assert data["name"] == "joinProjectResponse" 235 | data = data["args"][0] 236 | assert len(data["project"]["rootFolder"]) == 1 237 | return ProjectFolder.from_data(data["project"]["rootFolder"][0]) 238 | 239 | def project_create_folder(self, project_id: str, parent_folder_id: str, folder_name: str) -> ProjectFolder: 240 | """ 241 | Create a folder in a project. 242 | 243 | :param project_id: The id of the project. 244 | :param parent_folder_id: The id of the parent folder. 245 | :param folder_name: The name of the folder. 246 | """ 247 | self._assert_session_initialized() 248 | r = self._get_session().post(f"https://{self._host}/project/{project_id}/folder", json={ 249 | "parent_folder_id": parent_folder_id, 250 | "name": folder_name 251 | }, **self._request_kwargs, headers={ 252 | "Referer": f"https://{self._host}/project/{project_id}", 253 | "Accept": "application/json", 254 | "Cache-Control": "no-cache", 255 | "x-csrf-token": self._get_csrf_token(project_id), 256 | }) 257 | r.raise_for_status() 258 | new_project_folder = ProjectFolder.from_data(json.loads(r.content)) 259 | return new_project_folder 260 | 261 | def project_upload_file(self, project_id: str, folder_id: str, file_name: str, file_content: bytes) -> ProjectFile: 262 | """ 263 | Upload a file to a project. 264 | 265 | :param project_id: The id of the project. 266 | :param folder_id: The id of the folder to upload to. 267 | :param file_name: The name of the file. 268 | :param file_content: The content of the file. 269 | """ 270 | mime = "application/octet-stream" 271 | self._assert_session_initialized() 272 | r = self._get_session().post(f"https://{self._host}/project/{project_id}/upload?folder_id={folder_id}", 273 | files={ 274 | "relativePath": (None, "null"), 275 | "name": (None, file_name), 276 | "type": (None, mime), 277 | "qqfile": (file_name, file_content, mime), 278 | }, **self._request_kwargs, headers={ 279 | "Referer": f"https://{self._host}/project/{project_id}", 280 | "Accept": "application/json", 281 | "Cache-Control": "no-cache", 282 | "x-csrf-token": self._get_csrf_token(project_id), 283 | }) 284 | r.raise_for_status() 285 | response = json.loads(r.content) 286 | new_file = ProjectFile( 287 | response["entity_id"], 288 | name=file_name, 289 | created=None, 290 | type=response["entity_type"]) 291 | return new_file 292 | 293 | @overload 294 | def project_download_file(self, project_id: str, file: ProjectFile) -> bytes: 295 | ... 296 | 297 | @overload 298 | def project_download_file(self, project_id: str, file: ProjectFile, output_path: str) -> None: 299 | ... 300 | 301 | def project_download_file(self, project_id: str, file: ProjectFile, output_path: Optional[str] = None) -> Union[bytes, None]: 302 | """ 303 | Download a file from a project. 304 | 305 | :param project_id: The id of the project. 306 | :param file: The file to download. 307 | :param output_path: The path to save the file to. If none, the file will be returned as bytes. 308 | :return: The file if output_path is None, else None. 309 | """ 310 | self._assert_session_initialized() 311 | if file.type == "file": 312 | r = self._get_session().get(f"https://{self._host}/project/{project_id}/file/{file.id}", **self._request_kwargs) # pylint: disable=protected-access 313 | r.raise_for_status() 314 | if output_path is not None: 315 | with open(output_path, "wb") as f: 316 | f.write(r.content) 317 | return None 318 | return r.content 319 | elif file.type == "doc": 320 | return self._pull_doc_project_file_content(project_id, file.id).encode("utf-8") 321 | else: 322 | raise ValueError(f"Unknown file type: {file.type}") 323 | 324 | @overload 325 | def project_delete_entity(self, project_id: str, entity: Union[ProjectFile, ProjectFolder]) -> None: 326 | ... 327 | 328 | @overload 329 | def project_delete_entity(self, project_id: str, entity: str, entity_type: Literal["file", "doc", "folder"]) -> None: 330 | ... 331 | 332 | def project_delete_entity(self, project_id: str, entity, entity_type=None) -> None: 333 | """ 334 | Delete a file/folder/doc from the project 335 | 336 | :param project_id: The id of the project. 337 | :param entity_id: The id of the entity to delete. 338 | """ 339 | if entity_type is None: 340 | assert isinstance(entity, ProjectFile) or isinstance(entity, ProjectFolder) 341 | entity_type = entity.type 342 | entity = entity.id 343 | else: 344 | assert isinstance(entity, str) 345 | self._assert_session_initialized() 346 | r = self._get_session().delete(f"https://{self._host}/project/{project_id}/{entity_type}/{entity}", json={}, **self._request_kwargs, headers={ 347 | "Referer": f"https://{self._host}/project/{project_id}", 348 | "Accept": "application/json", 349 | "Cache-Control": "no-cache", 350 | "x-csrf-token": self._get_csrf_token(project_id), 351 | }) 352 | r.raise_for_status() 353 | 354 | def login_from_browser(self): 355 | """ 356 | Login to Overleaf using the default browser's cookies. 357 | """ 358 | cookies = browsercookie.load() 359 | self.login_from_cookies(cookies) 360 | 361 | @overload 362 | def login_from_cookies(self, cookies: Dict[str, str]): 363 | """ 364 | Login to Overleaf using a dictionary of cookies. 365 | """ 366 | 367 | @overload 368 | def login_from_cookies(self, cookies: cookielib.CookieJar): 369 | """ 370 | Login to Overleaf using a CookieJar. 371 | """ 372 | 373 | def login_from_cookies(self, cookies): 374 | dot_host = f".{self._host.removeprefix('www.')}" 375 | if not isinstance(cookies, cookielib.CookieJar): 376 | assert isinstance(cookies, dict) 377 | cookies_jar = cookielib.CookieJar() 378 | for name, value in cookies.items(): 379 | cookies_jar.set_cookie(requests.cookies.create_cookie(name, value, domain=dot_host)) 380 | cookies = cookies_jar 381 | 382 | assert isinstance(cookies, cookielib.CookieJar) 383 | self._cookies = cookielib.CookieJar() 384 | for cookie in cookies: 385 | if cookie.domain.endswith(dot_host): 386 | self._cookies.set_cookie(cookie) 387 | self._session_initialized = True 388 | 389 | def _pull_doc_project_file_content(self, project_id: str, file_id: str) -> str: 390 | socket = None 391 | try: 392 | socket = self._open_socket(project_id) 393 | 394 | # Initial waiting 395 | while True: 396 | line = socket.recv() 397 | if line.startswith("7:"): 398 | # Unauthorized. TODO: handle this. 399 | raise RuntimeError("Could not get project files.") 400 | if line.startswith("5:"): 401 | break 402 | socket.send('5:1+::{"name":"clientTracking.getConnectedUsers"}'.encode("utf-8")) 403 | 404 | # Join the doc 405 | socket.send(f'5:2+::{{"name": "joinDoc", "args": ["{file_id}", {{"encodeRanges": true}}]}}'.encode("utf-8")) 406 | while True: 407 | line = socket.recv() 408 | if line.startswith("7:"): 409 | # Unauthorized. TODO: handle this. 410 | raise RuntimeError("Could not get project files.") 411 | if line.startswith("6:::2+"): 412 | break 413 | data = line[6:] 414 | 415 | # Leave doc 416 | socket.send(f"5:3+::{{\"name\": \"leaveDoc\", \"args\": [\"{file_id}\"]}}".encode("utf-8")) 417 | while True: 418 | line = socket.recv() 419 | if line.startswith("7:"): 420 | # Unauthorized. TODO: handle this. 421 | raise RuntimeError("Could not get project files.") 422 | if line.startswith("6:::3+"): 423 | break 424 | finally: 425 | if socket is not None: 426 | socket.close() 427 | socket = None 428 | return "\n".join(json.loads(data)[1]) 429 | 430 | def _get_session(self): 431 | self._assert_session_initialized() 432 | http_session = requests.Session() 433 | http_session.cookies = self._cookies 434 | http_session.proxies = self._proxies 435 | http_session.verify = self._ssl_verify 436 | return http_session 437 | 438 | def _assert_session_initialized(self): 439 | if not self._session_initialized: 440 | raise RuntimeError("Must call api.login_*() before using the api") 441 | 442 | def _get_csrf_token(self, project_id): 443 | self._assert_session_initialized() 444 | # First we pull the csrf token 445 | if self._csrf_cache is not None and self._csrf_cache[0] == project_id: 446 | return self._csrf_cache[1] 447 | r = self._get_session().get(f"https://{self._host}/project/{project_id}", **self._request_kwargs) 448 | r.raise_for_status() 449 | content = BeautifulSoup(r.content, features="html.parser") 450 | token = content.find("meta", dict(name="ol-csrfToken")).get("content") 451 | self._csrf_cache = (project_id, token) 452 | return token 453 | 454 | def _open_socket(self, project_id: str) -> bytes: 455 | self._assert_session_initialized() 456 | time_now = int(time.time() * 1000) 457 | session = self._get_session() # pylint: disable=protected-access 458 | r = session.get( 459 | f"https://{self._host}/socket.io/1/?projectId={project_id}&t={time_now}", **self._request_kwargs) # pylint: disable=protected-access 460 | r.raise_for_status() 461 | content = r.content.decode("utf-8") 462 | socket_id = content.split(":")[0] 463 | socket_url = f"wss://{self._host}/socket.io/1/websocket/{socket_id}?projectId={project_id}" 464 | kwargs = {} 465 | cookies = None 466 | 467 | dot_host = f".{self._host.removeprefix('www.')}" 468 | cookies = "; ".join([f"{c.name}={c.value}" for c in session.cookies if c.domain.endswith(dot_host)]) 469 | headers = dict(**session.headers) 470 | for header, value in headers.items(): 471 | if header.lower() == 'cookie': 472 | if cookies: 473 | cookies += '; ' 474 | cookies += value 475 | del headers[header] 476 | break 477 | 478 | # auth 479 | if 'Authorization' not in headers and session.auth is not None: 480 | if not isinstance(session.auth, tuple): # pragma: no cover 481 | raise ValueError('Only basic authentication is supported') 482 | basic_auth = f'{session.auth[0]}:{session.auth[1]}'.encode('utf-8') # pylint: disable=unsubscriptable-object 483 | basic_auth = b64encode(basic_auth).decode('utf-8') 484 | headers['Authorization'] = 'Basic ' + basic_auth 485 | 486 | # cert 487 | # this can be given as ('certfile', 'keyfile') or just 'certfile' 488 | if isinstance(session.cert, tuple): 489 | kwargs['sslopt'] = { 490 | 'certfile': session.cert[0], # pylint: disable=unsubscriptable-object 491 | 'keyfile': session.cert[1]} # pylint: disable=unsubscriptable-object 492 | elif session.cert: 493 | kwargs['sslopt'] = {'certfile': session.cert} 494 | 495 | # proxies 496 | if session.proxies: 497 | proxy_url = None 498 | if socket_url.startswith('ws://'): 499 | proxy_url = session.proxies.get( 500 | 'ws', session.proxies.get('http')) 501 | else: # wss:// 502 | proxy_url = session.proxies.get( 503 | 'wss', session.proxies.get('https')) 504 | if proxy_url: 505 | parsed_url = urllib.parse.urlparse( 506 | proxy_url if '://' in proxy_url 507 | else 'scheme://' + proxy_url) 508 | kwargs['http_proxy_host'] = parsed_url.hostname 509 | kwargs['http_proxy_port'] = parsed_url.port 510 | kwargs['http_proxy_auth'] = ( 511 | (parsed_url.username, parsed_url.password) 512 | if parsed_url.username or parsed_url.password 513 | else None) 514 | 515 | # verify 516 | if isinstance(session.verify, str): 517 | if 'sslopt' in kwargs: 518 | kwargs['sslopt']['ca_certs'] = session.verify 519 | else: 520 | kwargs['sslopt'] = {'ca_certs': session.verify} 521 | elif not session.verify: 522 | kwargs['sslopt'] = {"cert_reqs": ssl.CERT_NONE} 523 | 524 | # combine internally generated options with the ones supplied by the 525 | # caller. The caller's options take precedence. 526 | kwargs['header'] = headers 527 | kwargs['cookie'] = cookies 528 | kwargs['enable_multithread'] = True 529 | if 'timeout' in self._request_kwargs: 530 | kwargs['timeout'] = self._request_kwargs['timeout'] 531 | return create_connection(socket_url, **kwargs) 532 | --------------------------------------------------------------------------------