├── requirements.txt ├── README.md ├── .gitignore └── canvas-files-downloader.py /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2018.10.15 2 | chardet==3.0.4 3 | idna==2.7 4 | pkg-resources==0.0.0 5 | python-decouple==3.1 6 | requests==2.20.1 7 | urllib3==1.24.1 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # canvas-files-downloader 2 | 3 | Downloads your Canvas course files, group submissions, and user submissions 4 | 5 | ## Usage 6 | 7 | * Requires Python 3 8 | * Create `.env` 9 | * Generate an access token for testing 10 | * https://canvas.instructure.com/doc/api/file.oauth.html#manual-token-generation 11 | * ``` 12 | ACCESS_TOKEN= 13 | ``` 14 | * Put your access token after the equals sign 15 | * Create a virtual environment and activate it 16 | * `python3 -m venv venv` 17 | * `source venv/bin/activate` 18 | * Install dependencies 19 | * `pip install -r requirements.txt` 20 | * Run the script 21 | * `python canvas-files-downloader.py` 22 | * Files will be downloaded to `files/` 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /files/ 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # celery beat schedule file 87 | celerybeat-schedule 88 | 89 | # SageMath parsed files 90 | *.sage.py 91 | 92 | # Environments 93 | .env 94 | .venv 95 | env/ 96 | venv/ 97 | ENV/ 98 | env.bak/ 99 | venv.bak/ 100 | 101 | # Spyder project settings 102 | .spyderproject 103 | .spyproject 104 | 105 | # Rope project settings 106 | .ropeproject 107 | 108 | # mkdocs documentation 109 | /site 110 | 111 | # mypy 112 | .mypy_cache/ 113 | .dmypy.json 114 | dmypy.json 115 | 116 | # Pyre type checker 117 | .pyre/ 118 | -------------------------------------------------------------------------------- /canvas-files-downloader.py: -------------------------------------------------------------------------------- 1 | from decouple import config 2 | import os 3 | import requests 4 | import urllib.request 5 | 6 | ACCESS_TOKEN = config('ACCESS_TOKEN') 7 | 8 | headers = {'Authorization': 'Bearer ' + ACCESS_TOKEN} 9 | 10 | # TODO make psu.instructure.com changeable 11 | # TODO refactor code 12 | 13 | root_directory = os.getcwd() 14 | files_directory = os.path.join(root_directory, 'files') 15 | 16 | courses_directory = os.path.join(files_directory, 'courses') 17 | if not os.path.exists(courses_directory): 18 | os.makedirs(courses_directory) 19 | 20 | print('\n\n\n') 21 | 22 | courses = [] 23 | 24 | # get courses 25 | next_url = 'https://psu.instructure.com/api/v1/courses' 26 | while next_url: 27 | response = requests.get(next_url, headers=headers) 28 | if not 'Link' in response.headers: 29 | break 30 | links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<')) 31 | next_url = None 32 | for link in links: 33 | if link['rel'] == 'next': 34 | next_url = link['url'] 35 | break 36 | current_courses = response.json() 37 | for current_course in current_courses: 38 | courses.append(current_course) 39 | print(current_course['name'], current_course['id']) 40 | 41 | print('\n\n\n') 42 | 43 | # download files for a course 44 | for course in courses: 45 | next_url = 'https://psu.instructure.com/api/v1/courses/{}/files'.format(course['id']) 46 | while next_url: 47 | response = requests.get(next_url, headers=headers) 48 | if not 'Link' in response.headers: 49 | break 50 | links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<')) 51 | next_url = None 52 | for link in links: 53 | if link['rel'] == 'next': 54 | next_url = link['url'] 55 | break 56 | files = response.json() 57 | course_directory = os.path.join(courses_directory, course['name']) 58 | if not os.path.exists(course_directory): 59 | os.makedirs(course_directory) 60 | for f in files: 61 | print(f['display_name'], f['url'], f['id']) 62 | f_path = os.path.join(course_directory, f['display_name']) 63 | if not os.path.exists(f_path): 64 | urllib.request.urlretrieve(f['url'], f_path) 65 | 66 | groups_directory = os.path.join(files_directory, 'groups') 67 | if not os.path.exists(groups_directory): 68 | os.makedirs(groups_directory) 69 | 70 | print('\n\n\n') 71 | 72 | groups = [] 73 | 74 | # get groups 75 | next_url = 'https://psu.instructure.com/api/v1/users/self/groups' 76 | while next_url: 77 | response = requests.get(next_url, headers=headers) 78 | if not 'Link' in response.headers: 79 | break 80 | links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<')) 81 | next_url = None 82 | for link in links: 83 | if link['rel'] == 'next': 84 | next_url = link['url'] 85 | break 86 | current_groups = response.json() 87 | for current_group in current_groups: 88 | groups.append(current_group) 89 | print(current_group['name'], current_group['id']) 90 | 91 | print('\n\n\n') 92 | 93 | # download files for a group 94 | for group in groups: 95 | next_url = 'https://psu.instructure.com/api/v1/groups/{}/files'.format(group['id']) 96 | while next_url: 97 | response = requests.get(next_url, headers=headers) 98 | if not 'Link' in response.headers: 99 | break 100 | links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<')) 101 | next_url = None 102 | for link in links: 103 | if link['rel'] == 'next': 104 | next_url = link['url'] 105 | break 106 | files = response.json() 107 | group_directory = os.path.join(groups_directory, '{} {}'.format(group['name'].replace('/', '-'), group['id'])) 108 | if not os.path.exists(group_directory): 109 | os.makedirs(group_directory) 110 | for f in files: 111 | print(f['display_name'], f['url'], f['id']) 112 | f_path = os.path.join(group_directory, f['display_name']) 113 | if not os.path.exists(f_path): 114 | urllib.request.urlretrieve(f['url'], f_path) 115 | group_users = [] 116 | next_group_users_url = 'https://psu.instructure.com/api/v1/groups/{}/users'.format(group['id']) 117 | while next_group_users_url: 118 | response = requests.get(next_group_users_url, headers=headers) 119 | if not 'Link' in response.headers: 120 | break 121 | links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<')) 122 | next_group_users_url = None 123 | for link in links: 124 | if link['rel'] == 'next': 125 | next_group_users_url = link['url'] 126 | break 127 | current_group_users = response.json() 128 | for current_group_user in current_group_users: 129 | group_users.append(current_group_user) 130 | group_users_file_path = os.path.join(group_directory, '{} {} users.txt'.format(group['name'].replace('/', '-'), group['id'])) 131 | if not os.path.exists(group_users_file_path): 132 | group_users_file = open(group_users_file_path, 'w') 133 | for group_user in group_users: 134 | group_users_file.write('{} {}\n'.format(group_user['name'], group_user['id'])) 135 | group_users_file.close() 136 | 137 | submissions_directory = os.path.join(files_directory, 'submissions') 138 | if not os.path.exists(submissions_directory): 139 | os.makedirs(submissions_directory) 140 | 141 | print('\n\n\n') 142 | 143 | submissions = [] 144 | 145 | # get course submissions 146 | next_url = 'https://psu.instructure.com/api/v1/users/self/folders' 147 | while next_url: 148 | response = requests.get(next_url, headers=headers) 149 | if not 'Link' in response.headers: 150 | break 151 | links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<')) 152 | next_url = None 153 | for link in links: 154 | if link['rel'] == 'next': 155 | next_url = link['url'] 156 | break 157 | current_submissions = response.json() 158 | for current_submission in current_submissions: 159 | submissions.append(current_submission) 160 | print(current_submission['name'], current_submission['id']) 161 | 162 | print('\n\n\n') 163 | 164 | # download course submissions 165 | for submission in submissions: 166 | next_url = 'https://psu.instructure.com/api/v1/folders/{}/files'.format(submission['id']) 167 | while next_url: 168 | response = requests.get(next_url, headers=headers) 169 | if not 'Link' in response.headers: 170 | break 171 | links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<')) 172 | next_url = None 173 | for link in links: 174 | if link['rel'] == 'next': 175 | next_url = link['url'] 176 | break 177 | files = response.json() 178 | submission_directory = os.path.join(submissions_directory, submission['name']) 179 | if not os.path.exists(submission_directory): 180 | os.makedirs(submission_directory) 181 | for f in files: 182 | print(f['display_name'], f['url'], f['id']) 183 | f_path = os.path.join(submission_directory, f['display_name']) 184 | if not os.path.exists(f_path): 185 | urllib.request.urlretrieve(f['url'], f_path) 186 | --------------------------------------------------------------------------------