├── requirements.txt ├── .gitignore ├── docker-compose.yml ├── Dockerfile ├── code ├── example.py ├── from-arguments.py ├── .gitignore └── downloader.py ├── LICENSE.txt └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | python-slugify 2 | requests -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | .idea 3 | code/data/* 4 | code/houdinii.py -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | ssdl: 2 | build: . 3 | environment: 4 | FILE_PATH: '/data/' 5 | volumes: 6 | - ./code:/code 7 | - ./data:/data 8 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3-slim 2 | COPY ./requirements.txt /requirements.txt 3 | RUN pip install -r /requirements.txt 4 | COPY ./code /code 5 | WORKDIR /code 6 | # ENTRYPOINT ["python", "/code/from-arguments.py"] 7 | -------------------------------------------------------------------------------- /code/example.py: -------------------------------------------------------------------------------- 1 | from downloader import Downloader 2 | 3 | cookie = """ 4 | ADD YOUR COOKIE HERE 5 | """ 6 | 7 | dl = Downloader(cookie=cookie) 8 | 9 | # download by class URL: 10 | dl.download_course_by_url('https://www.skillshare.com/classes/Art-Fundamentals-in-One-Hour/189505397') 11 | 12 | # or by class ID: 13 | # dl.download_course_by_class_id(189505397) 14 | -------------------------------------------------------------------------------- /code/from-arguments.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | from downloader import Downloader 4 | 5 | cookie = sys.argv[1] 6 | dl = Downloader(cookie=cookie) 7 | 8 | if len(sys.argv) != 3: 9 | raise Exception('Invalid arguments. Usage : {program} '.format(program=sys.argv[0])) 10 | 11 | if re.match(r'^[0-9]+$', sys.argv[2]): 12 | dl.download_course_by_class_id(sys.argv[2]) 13 | 14 | else: 15 | dl.download_course_by_url(sys.argv[2]) 16 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Klas Källqvist 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Skillshare video downloader in python 2 | 3 | I needed offline access to some skillshare courses I wanted to take while on vacation. 4 | Video download is only available in the skillshare mobile apps and I didn't want to 5 | choose between shaky 3G streaming or watching on a tiny mobile screen so I put together a 6 | quick and dirty video downloader in python. 7 | 8 | ### Support your content creators, do NOT use this for piracy! 9 | 10 | You will need a skillshare premium account to access premium content. 11 | This script will not handle login for you. 12 | 13 | 1. Log-in to skillshare in your browser and open up the developer console. 14 | (cmd-shift-c for chrome on mac) 15 | 16 | 2. Use it to grab your cookie by typing: 17 | ``` 18 | document.cookie 19 | ``` 20 | 21 | 3. Copy-paste cookie from developer console (without " if present) into example script. 22 | 23 | #### Example: 24 | ``` 25 | from downloader import Downloader 26 | 27 | cookie = """ 28 | ADD YOUR COOKIE HERE 29 | """ 30 | 31 | dl = Downloader(cookie=cookie) 32 | 33 | # download by class URL: 34 | dl.download_course_by_url('https://www.skillshare.com/classes/Art-Fundamentals-in-One-Hour/189505397') 35 | 36 | # or by class ID: 37 | # dl.download_course_by_class_id(189505397) 38 | ``` 39 | 40 | 4. (Optionally) run with docker and docker-compose: 41 | ``` 42 | docker-compose build 43 | docker-compose run --rm ssdl python example.py 44 | ``` 45 | -------------------------------------------------------------------------------- /code/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | .static_storage/ 56 | .media/ 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /code/downloader.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import sys 4 | import re 5 | import os 6 | from slugify import slugify 7 | 8 | 9 | class Downloader(object): 10 | def __init__( 11 | self, 12 | cookie, 13 | download_path=os.environ.get('FILE_PATH', './data'), 14 | pk='BCpkADawqM2OOcM6njnM7hf9EaK6lIFlqiXB0iWjqGWUQjU7R8965xUvIQNqdQbnDTLz0IAO7E6Ir2rIbXJtFdzrGtitoee0n1XXRliD-RH9A-svuvNW9qgo3Bh34HEZjXjG4Nml4iyz3KqF', 15 | brightcove_account_id=3695997568001, 16 | ): 17 | self.cookie = cookie.strip().strip('"') 18 | self.download_path = download_path 19 | self.pk = pk.strip() 20 | self.brightcove_account_id = brightcove_account_id 21 | self.pythonversion = 3 if sys.version_info >= (3, 0) else 2 22 | 23 | def is_unicode_string(self, string): 24 | if (self.pythonversion == 3 and isinstance(string, str)) or (self.pythonversion == 2 and isinstance(string, unicode)): 25 | return True 26 | 27 | else: 28 | return False 29 | 30 | def download_course_by_url(self, url): 31 | m = re.match(r'https://www.skillshare.com/classes/.*?/(\d+)', url) 32 | 33 | if not m: 34 | raise Exception('Failed to parse class ID from URL') 35 | 36 | self.download_course_by_class_id(m.group(1)) 37 | 38 | def download_course_by_class_id(self, class_id): 39 | data = self.fetch_course_data_by_class_id(class_id=class_id) 40 | teacher_name = None 41 | 42 | if 'vanity_username' in data['_embedded']['teacher']: 43 | teacher_name = data['_embedded']['teacher']['vanity_username'] 44 | 45 | if not teacher_name: 46 | teacher_name = data['_embedded']['teacher']['full_name'] 47 | 48 | if not teacher_name: 49 | raise Exception('Failed to read teacher name from data') 50 | 51 | if self.is_unicode_string(teacher_name): 52 | teacher_name = teacher_name.encode('ascii', 'replace') 53 | 54 | title = data['title'] 55 | 56 | if self.is_unicode_string(title): 57 | title = title.encode('ascii', 'replace') # ignore any weird char 58 | 59 | base_path = os.path.abspath( 60 | os.path.join( 61 | self.download_path, 62 | slugify(teacher_name), 63 | slugify(title), 64 | ) 65 | ).rstrip('/') 66 | 67 | if not os.path.exists(base_path): 68 | os.makedirs(base_path) 69 | 70 | for u in data['_embedded']['units']['_embedded']['units']: 71 | for s in u['_embedded']['sessions']['_embedded']['sessions']: 72 | video_id = None 73 | 74 | if 'video_hashed_id' in s and s['video_hashed_id']: 75 | video_id = s['video_hashed_id'].split(':')[1] 76 | 77 | if not video_id: 78 | # NOTE: this happens sometimes... 79 | # seems random and temporary but might be some random 80 | # server-side check on user-agent etc? 81 | # ...think it's more stable now with those set to 82 | # emulate an android device 83 | raise Exception('Failed to read video ID from data') 84 | 85 | s_title = s['title'] 86 | 87 | if self.is_unicode_string(s_title): 88 | s_title = s_title.encode('ascii', 'replace') # ignore any weird char 89 | 90 | file_name = '{} - {}'.format( 91 | str(s['index'] + 1).zfill(2), 92 | slugify(s_title), 93 | ) 94 | 95 | self.download_video( 96 | fpath='{base_path}/{session}.mp4'.format( 97 | base_path=base_path, 98 | session=file_name, 99 | ), 100 | video_id=video_id, 101 | ) 102 | 103 | print('') 104 | 105 | def fetch_course_data_by_class_id(self, class_id): 106 | res = requests.get( 107 | url='https://api.skillshare.com/classes/{}'.format(class_id), 108 | headers={ 109 | 'Accept': 'application/vnd.skillshare.class+json;,version=0.8', 110 | 'User-Agent': 'Skillshare/5.3.0; Android 9.0.1', 111 | 'Host': 'api.skillshare.com', 112 | 'Referer': 'https://www.skillshare.com/', 113 | 'cookie': self.cookie, 114 | } 115 | ) 116 | 117 | if not res.status_code == 200: 118 | raise Exception('Fetch error, code == {}'.format(res.status_code)) 119 | 120 | return res.json() 121 | 122 | def download_video(self, fpath, video_id): 123 | meta_url = 'https://edge.api.brightcove.com/playback/v1/accounts/{account_id}/videos/{video_id}'.format( 124 | account_id=self.brightcove_account_id, 125 | video_id=video_id, 126 | ) 127 | 128 | meta_res = requests.get( 129 | meta_url, 130 | headers={ 131 | 'Accept': 'application/json;pk={}'.format(self.pk), 132 | 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0', 133 | 'Origin': 'https://www.skillshare.com' 134 | } 135 | ) 136 | 137 | if meta_res.status_code != 200: 138 | raise Exception('Failed to fetch video meta') 139 | 140 | for x in meta_res.json()['sources']: 141 | if 'container' in x: 142 | if x['container'] == 'MP4' and 'src' in x: 143 | dl_url = x['src'] 144 | break 145 | 146 | print('Downloading {}...'.format(fpath)) 147 | 148 | if os.path.exists(fpath): 149 | print('Video already downloaded, skipping...') 150 | return 151 | 152 | with open(fpath, 'wb') as f: 153 | response = requests.get(dl_url, allow_redirects=True, stream=True) 154 | total_length = response.headers.get('content-length') 155 | 156 | if not total_length: 157 | f.write(response.content) 158 | 159 | else: 160 | dl = 0 161 | total_length = int(total_length) 162 | 163 | for data in response.iter_content(chunk_size=4096): 164 | dl += len(data) 165 | f.write(data) 166 | done = int(50 * dl / total_length) 167 | sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50 - done))) 168 | sys.stdout.flush() 169 | 170 | print('') 171 | --------------------------------------------------------------------------------