├── README.md ├── .gitignore ├── LICENSE.md └── downloader.py /README.md: -------------------------------------------------------------------------------- 1 | # Stepic.org Video Downloader 2 | 3 | Example of OAuth2 application for Stepic.org. 4 | 5 | Downloads all video files from a module (week) of a course or the whole course. 6 | 7 | 1. Go to https://stepik.org/oauth2/applications/ 8 | 9 | 2. Register your application with settings: 10 | `Client type: confidential` 11 | `Authorization Grant Type: client-credentials` 12 | 13 | 3. Install requests module 14 | 15 | ``` 16 | pip install requests 17 | ``` 18 | 19 | 4. Run the script 20 | 21 | ``` 22 | python3 downloader.py [-h] --course_id=COURSE_ID --client_id=CLIENT_ID --client_secret=CLIENT_SECRET [--week_id=WEEK_ID] [--quality=360|720|1080] [--output_dir=.] 23 | ``` 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | 45 | # Translations 46 | *.mo 47 | *.pot 48 | 49 | # Django stuff: 50 | *.log 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | # PyBuilder 56 | target/ 57 | 58 | # secret keys 59 | settings.py 60 | 61 | #videos 62 | *.mp4 63 | 64 | 65 | # Additional directories 66 | .idea/ 67 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013-2016 Stepic.Org 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /downloader.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | import urllib 5 | import urllib.request 6 | import requests 7 | import sys 8 | from requests.auth import HTTPBasicAuth 9 | 10 | 11 | def get_course_page(api_url, token): 12 | return json.loads(requests.get(api_url, headers={'Authorization': 'Bearer ' + token}).text) 13 | 14 | 15 | def get_all_weeks(stepik_resp): 16 | return stepik_resp['courses'][0]['sections'] 17 | 18 | 19 | def get_unit_list(section_list, token): 20 | resp = [json.loads(requests.get('https://stepik.org/api/sections/' + str(arr), 21 | headers={'Authorization': 'Bearer ' + token}).text) 22 | for arr in section_list] 23 | return [section['sections'][0]['units'] for section in resp] 24 | 25 | 26 | def get_steps_list(units_list, week, token): 27 | data = [json.loads(requests.get('https://stepik.org/api/units/' + str(unit_id), 28 | headers={'Authorization': 'Bearer ' + token}).text) 29 | for unit_id in units_list[week - 1]] 30 | lesson_lists = [elem['units'][0]['lesson'] for elem in data] 31 | data = [json.loads(requests.get('https://stepik.org/api/lessons/' + str(lesson_id), 32 | headers={'Authorization': 'Bearer ' + token}).text)['lessons'][0]['steps'] 33 | for lesson_id in lesson_lists] 34 | return [item for sublist in data for item in sublist] 35 | 36 | 37 | def get_only_video_steps(step_list, token): 38 | resp_list = list() 39 | for s in step_list: 40 | resp = json.loads(requests.get('https://stepik.org/api/steps/' + str(s), 41 | headers={'Authorization': 'Bearer ' + token}).text) 42 | if resp['steps'][0]['block']['video']: 43 | resp_list.append(resp['steps'][0]['block']) 44 | print('Only video:', len(resp_list)) 45 | return resp_list 46 | 47 | 48 | def parse_arguments(): 49 | """ 50 | Parse input arguments with help of argparse. 51 | """ 52 | 53 | parser = argparse.ArgumentParser( 54 | description='Stepik downloader') 55 | 56 | parser.add_argument('-c', '--client_id', 57 | help='your client_id from https://stepik.org/oauth2/applications/', 58 | required=True) 59 | 60 | parser.add_argument('-s', '--client_secret', 61 | help='your client_secret from https://stepik.org/oauth2/applications/', 62 | required=True) 63 | 64 | parser.add_argument('-i', '--course_id', 65 | help='course id', 66 | required=True) 67 | 68 | parser.add_argument('-w', '--week_id', 69 | help='week id starts from 1 (if not set then it will download the whole course)', 70 | type=int, 71 | default=None) 72 | 73 | parser.add_argument('-q', '--quality', 74 | help='quality of a video. Default is 720', 75 | choices=['360', '720', '1080'], 76 | default='720') 77 | 78 | parser.add_argument('-o', '--output_dir', 79 | help='output directory. Default is the current folder', 80 | default='.') 81 | 82 | args = parser.parse_args() 83 | 84 | return args 85 | 86 | def reporthook(blocknum, blocksize, totalsize): # progressbar 87 | readsofar = blocknum * blocksize 88 | if totalsize > 0: 89 | percent = readsofar * 1e2 / totalsize 90 | s = "\r%5.1f%% %*d / %d" % (percent, len(str(totalsize)), readsofar, totalsize) 91 | sys.stderr.write(s) 92 | if readsofar >= totalsize: # near the end 93 | sys.stderr.write("\n") 94 | else: # total size is unknown 95 | sys.stderr.write("read %d\n" % (readsofar,)) 96 | 97 | def main(): 98 | args = parse_arguments() 99 | 100 | """ 101 | Example how to receive token from Stepik.org 102 | Token should also been add to every request header 103 | example: requests.get(api_url, headers={'Authorization': 'Bearer '+ token}) 104 | """ 105 | 106 | auth = HTTPBasicAuth(args.client_id, args.client_secret) 107 | resp = requests.post('https://stepik.org/oauth2/token/', data={'grant_type': 'client_credentials'}, auth=auth) 108 | token = json.loads(resp.text)['access_token'] 109 | 110 | course_data = get_course_page('http://stepik.org/api/courses/' + args.course_id, token) 111 | 112 | weeks_num = get_all_weeks(course_data) 113 | 114 | all_units = get_unit_list(weeks_num, token) 115 | # Loop through all week in a course and 116 | # download all videos or 117 | # download only for the week_id is passed as an argument. 118 | for week in range(1, len(weeks_num)+1): 119 | # Skip if week_id is passed as an argument 120 | args_week_id = str(args.week_id) 121 | if args_week_id != "None": 122 | # week_id starts from 1 and week counts from 0! 123 | if week != int(args_week_id): 124 | continue 125 | 126 | all_steps = get_steps_list(all_units, week, token) 127 | 128 | only_video_steps = get_only_video_steps(all_steps, token) 129 | 130 | url_list_with_q = [] 131 | 132 | # Loop through videos and store the url link and the quality. 133 | for video_step in only_video_steps: 134 | video_link = None 135 | msg = None 136 | 137 | # Check a video quality. 138 | for url in video_step['video']['urls']: 139 | if url['quality'] == args.quality: 140 | video_link = url['url'] 141 | 142 | # If the is no required video quality then download 143 | # with the best available quality. 144 | if video_link is None: 145 | msg = "The requested quality = {} is not available!".format(args.quality) 146 | 147 | video_link = video_step['video']['urls'][0]['url'] 148 | 149 | # Store link and quality. 150 | url_list_with_q.append({'url': video_link, 'msg': msg}) 151 | 152 | # Compose a folder name. 153 | folder_name = os.path.join(args.output_dir, args.course_id, 'week_' + str(week)) 154 | 155 | # Create a folder if needed. 156 | if not os.path.isdir(folder_name): 157 | try: 158 | # Create a directory for a particular week in the course. 159 | os.makedirs(folder_name) 160 | except PermissionError: 161 | print("Run the script from admin") 162 | exit(1) 163 | except FileExistsError: 164 | print("Please delete the folder " + folder_name) 165 | exit(1) 166 | 167 | print('Folder_name ', folder_name) 168 | 169 | for week, el in enumerate(url_list_with_q): 170 | # Print a message if something wrong. 171 | if el['msg']: 172 | print("{}".format(el['msg'])) 173 | 174 | filename = os.path.join(folder_name, 'Video_' + str(week) + '.mp4') 175 | if not os.path.isfile(filename): 176 | try: 177 | print('Downloading file ', filename) 178 | urllib.request.urlretrieve(el['url'], filename, reporthook) 179 | print('Done') 180 | except urllib.error.ContentTooShortError: 181 | os.remove(filename) 182 | print('Error while downloading. File {} deleted:'.format(filename)) 183 | except KeyboardInterrupt: 184 | if os.path.isfile(filename): 185 | os.remove(filename) 186 | print('\nAborted') 187 | exit(1) 188 | else: 189 | print('File {} already exist'.format(filename)) 190 | print("All steps downloaded") 191 | 192 | 193 | if __name__ == "__main__": 194 | main() 195 | --------------------------------------------------------------------------------