├── requirements.txt
├── README.md
├── .gitignore
└── canvas-files-downloader.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | certifi==2018.10.15
2 | chardet==3.0.4
3 | idna==2.7
4 | pkg-resources==0.0.0
5 | python-decouple==3.1
6 | requests==2.20.1
7 | urllib3==1.24.1
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # canvas-files-downloader
 2 | 
 3 | Downloads your Canvas course files, group submissions, and user submissions
 4 | 
 5 | ## Usage
 6 | 
 7 | * Requires Python 3
 8 | * Create `.env`
 9 |   * Generate an access token for testing
10 |     * https://canvas.instructure.com/doc/api/file.oauth.html#manual-token-generation
11 |   * ```
12 |     ACCESS_TOKEN=
13 |     ```
14 |     * Put your access token after the equals sign
15 | * Create a virtual environment and activate it
16 |   * `python3 -m venv venv`
17 |   * `source venv/bin/activate`
18 | * Install dependencies
19 |   * `pip install -r requirements.txt`
20 | * Run the script
21 |   * `python canvas-files-downloader.py`
22 | * Files will be downloaded to `files/`
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | /files/
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # IPython
 80 | profile_default/
 81 | ipython_config.py
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # celery beat schedule file
 87 | celerybeat-schedule
 88 | 
 89 | # SageMath parsed files
 90 | *.sage.py
 91 | 
 92 | # Environments
 93 | .env
 94 | .venv
 95 | env/
 96 | venv/
 97 | ENV/
 98 | env.bak/
 99 | venv.bak/
100 | 
101 | # Spyder project settings
102 | .spyderproject
103 | .spyproject
104 | 
105 | # Rope project settings
106 | .ropeproject
107 | 
108 | # mkdocs documentation
109 | /site
110 | 
111 | # mypy
112 | .mypy_cache/
113 | .dmypy.json
114 | dmypy.json
115 | 
116 | # Pyre type checker
117 | .pyre/
118 | 


--------------------------------------------------------------------------------
/canvas-files-downloader.py:
--------------------------------------------------------------------------------
  1 | from decouple import config
  2 | import os
  3 | import requests
  4 | import urllib.request
  5 | 
  6 | ACCESS_TOKEN = config('ACCESS_TOKEN')
  7 | 
  8 | headers = {'Authorization': 'Bearer ' + ACCESS_TOKEN}
  9 | 
 10 | # TODO make psu.instructure.com changeable
 11 | # TODO refactor code
 12 | 
 13 | root_directory = os.getcwd()
 14 | files_directory = os.path.join(root_directory, 'files')
 15 | 
 16 | courses_directory = os.path.join(files_directory, 'courses')
 17 | if not os.path.exists(courses_directory):
 18 |     os.makedirs(courses_directory)
 19 | 
 20 | print('\n\n\n')
 21 | 
 22 | courses = []
 23 | 
 24 | # get courses
 25 | next_url = 'https://psu.instructure.com/api/v1/courses'
 26 | while next_url:
 27 |     response = requests.get(next_url, headers=headers)
 28 |     if not 'Link' in response.headers:
 29 |         break
 30 |     links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<'))
 31 |     next_url = None
 32 |     for link in links:
 33 |         if link['rel'] == 'next':
 34 |             next_url = link['url']
 35 |             break
 36 |     current_courses = response.json()
 37 |     for current_course in current_courses:
 38 |         courses.append(current_course)
 39 |         print(current_course['name'], current_course['id'])
 40 | 
 41 | print('\n\n\n')
 42 | 
 43 | # download files for a course
 44 | for course in courses:
 45 |     next_url = 'https://psu.instructure.com/api/v1/courses/{}/files'.format(course['id'])
 46 |     while next_url:
 47 |         response = requests.get(next_url, headers=headers)
 48 |         if not 'Link' in response.headers:
 49 |             break
 50 |         links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<'))
 51 |         next_url = None
 52 |         for link in links:
 53 |             if link['rel'] == 'next':
 54 |                 next_url = link['url']
 55 |                 break
 56 |         files = response.json()
 57 |         course_directory = os.path.join(courses_directory, course['name'])
 58 |         if not os.path.exists(course_directory):
 59 |             os.makedirs(course_directory)
 60 |         for f in files:
 61 |             print(f['display_name'], f['url'], f['id'])
 62 |             f_path = os.path.join(course_directory, f['display_name'])
 63 |             if not os.path.exists(f_path):
 64 |                 urllib.request.urlretrieve(f['url'], f_path)
 65 | 
 66 | groups_directory = os.path.join(files_directory, 'groups')
 67 | if not os.path.exists(groups_directory):
 68 |     os.makedirs(groups_directory)
 69 | 
 70 | print('\n\n\n')
 71 | 
 72 | groups = []
 73 | 
 74 | # get groups
 75 | next_url = 'https://psu.instructure.com/api/v1/users/self/groups'
 76 | while next_url:
 77 |     response = requests.get(next_url, headers=headers)
 78 |     if not 'Link' in response.headers:
 79 |         break
 80 |     links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<'))
 81 |     next_url = None
 82 |     for link in links:
 83 |         if link['rel'] == 'next':
 84 |             next_url = link['url']
 85 |             break
 86 |     current_groups = response.json()
 87 |     for current_group in current_groups:
 88 |         groups.append(current_group)
 89 |         print(current_group['name'], current_group['id'])
 90 | 
 91 | print('\n\n\n')
 92 | 
 93 | # download files for a group
 94 | for group in groups:
 95 |     next_url = 'https://psu.instructure.com/api/v1/groups/{}/files'.format(group['id'])
 96 |     while next_url:
 97 |         response = requests.get(next_url, headers=headers)
 98 |         if not 'Link' in response.headers:
 99 |             break
100 |         links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<'))
101 |         next_url = None
102 |         for link in links:
103 |             if link['rel'] == 'next':
104 |                 next_url = link['url']
105 |                 break
106 |         files = response.json()
107 |         group_directory = os.path.join(groups_directory, '{} {}'.format(group['name'].replace('/', '-'), group['id']))
108 |         if not os.path.exists(group_directory):
109 |             os.makedirs(group_directory)
110 |         for f in files:
111 |             print(f['display_name'], f['url'], f['id'])
112 |             f_path = os.path.join(group_directory, f['display_name'])
113 |             if not os.path.exists(f_path):
114 |                 urllib.request.urlretrieve(f['url'], f_path)
115 |         group_users = []
116 |         next_group_users_url = 'https://psu.instructure.com/api/v1/groups/{}/users'.format(group['id'])
117 |         while next_group_users_url:
118 |             response = requests.get(next_group_users_url, headers=headers)
119 |             if not 'Link' in response.headers:
120 |                 break
121 |             links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<'))
122 |             next_group_users_url = None
123 |             for link in links:
124 |                 if link['rel'] == 'next':
125 |                     next_group_users_url = link['url']
126 |                     break
127 |             current_group_users = response.json()
128 |             for current_group_user in current_group_users:
129 |                 group_users.append(current_group_user)
130 |         group_users_file_path = os.path.join(group_directory, '{} {} users.txt'.format(group['name'].replace('/', '-'), group['id']))
131 |         if not os.path.exists(group_users_file_path):
132 |             group_users_file = open(group_users_file_path, 'w')
133 |             for group_user in group_users:
134 |                 group_users_file.write('{} {}\n'.format(group_user['name'],  group_user['id']))
135 |             group_users_file.close()
136 | 
137 | submissions_directory = os.path.join(files_directory, 'submissions')
138 | if not os.path.exists(submissions_directory):
139 |     os.makedirs(submissions_directory)
140 | 
141 | print('\n\n\n')
142 | 
143 | submissions = []
144 | 
145 | # get course submissions
146 | next_url = 'https://psu.instructure.com/api/v1/users/self/folders'
147 | while next_url:
148 |     response = requests.get(next_url, headers=headers)
149 |     if not 'Link' in response.headers:
150 |         break
151 |     links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<'))
152 |     next_url = None
153 |     for link in links:
154 |         if link['rel'] == 'next':
155 |             next_url = link['url']
156 |             break
157 |     current_submissions = response.json()
158 |     for current_submission in current_submissions:
159 |         submissions.append(current_submission)
160 |         print(current_submission['name'], current_submission['id'])
161 | 
162 | print('\n\n\n')
163 | 
164 | # download course submissions
165 | for submission in submissions:
166 |     next_url = 'https://psu.instructure.com/api/v1/folders/{}/files'.format(submission['id'])
167 |     while next_url:
168 |         response = requests.get(next_url, headers=headers)
169 |         if not 'Link' in response.headers:
170 |             break
171 |         links = requests.utils.parse_header_links(response.headers['Link'].rstrip('>').replace('>,<', ',<'))
172 |         next_url = None
173 |         for link in links:
174 |             if link['rel'] == 'next':
175 |                 next_url = link['url']
176 |                 break
177 |         files = response.json()
178 |         submission_directory = os.path.join(submissions_directory, submission['name'])
179 |         if not os.path.exists(submission_directory):
180 |             os.makedirs(submission_directory)
181 |         for f in files:
182 |             print(f['display_name'], f['url'], f['id'])
183 |             f_path = os.path.join(submission_directory, f['display_name'])
184 |             if not os.path.exists(f_path):
185 |                 urllib.request.urlretrieve(f['url'], f_path)
186 | 


--------------------------------------------------------------------------------