├── requirements.txt
├── .gitignore
├── docker-compose.yml
├── Dockerfile
├── code
    ├── example.py
    ├── from-arguments.py
    ├── .gitignore
    └── downloader.py
├── LICENSE.txt
└── README.md


/requirements.txt:
--------------------------------------------------------------------------------
1 | python-slugify
2 | requests


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | data/*
2 | .idea
3 | code/data/*
4 | code/houdinii.py


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | ssdl:
2 |   build: .
3 |   environment:
4 |     FILE_PATH: '/data/'
5 |   volumes:
6 |     - ./code:/code
7 |     - ./data:/data
8 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3-slim
2 | COPY ./requirements.txt /requirements.txt
3 | RUN pip install -r /requirements.txt
4 | COPY ./code /code
5 | WORKDIR /code
6 | # ENTRYPOINT ["python", "/code/from-arguments.py"]
7 | 


--------------------------------------------------------------------------------
/code/example.py:
--------------------------------------------------------------------------------
 1 | from downloader import Downloader
 2 | 
 3 | cookie = """
 4 | ADD YOUR COOKIE HERE
 5 | """
 6 | 
 7 | dl = Downloader(cookie=cookie)
 8 | 
 9 | # download by class URL:
10 | dl.download_course_by_url('https://www.skillshare.com/classes/Art-Fundamentals-in-One-Hour/189505397')
11 | 
12 | # or by class ID:
13 | # dl.download_course_by_class_id(189505397)
14 | 


--------------------------------------------------------------------------------
/code/from-arguments.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import re
 3 | from downloader import Downloader
 4 | 
 5 | cookie = sys.argv[1]
 6 | dl = Downloader(cookie=cookie)
 7 | 
 8 | if len(sys.argv) != 3:
 9 |     raise Exception('Invalid arguments. Usage : {program} <cookie> <url_or_class_id>'.format(program=sys.argv[0]))
10 | 
11 | if re.match(r'^[0-9]+$', sys.argv[2]):
12 | 	dl.download_course_by_class_id(sys.argv[2])
13 | 
14 | else:
15 | 	dl.download_course_by_url(sys.argv[2])
16 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Klas Källqvist
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Skillshare video downloader in python
 2 | 
 3 | I needed offline access to some skillshare courses I wanted to take while on vacation.
 4 | Video download is only available in the skillshare mobile apps and I didn't want to
 5 | choose between shaky 3G streaming or watching on a tiny mobile screen so I put together a
 6 | quick and dirty video downloader in python.
 7 | 
 8 | ### Support your content creators, do NOT use this for piracy!
 9 | 
10 | You will need a skillshare premium account to access premium content.
11 | This script will not handle login for you.
12 | 
13 | 1. Log-in to skillshare in your browser and open up the developer console.
14 | (cmd-shift-c for chrome on mac)
15 | 
16 | 2. Use it to grab your cookie by typing:
17 | ```
18 | document.cookie
19 | ```
20 | 
21 | 3. Copy-paste cookie from developer console (without " if present) into example script.
22 | 
23 | #### Example:
24 | ```
25 | from downloader import Downloader
26 | 
27 | cookie = """
28 | ADD YOUR COOKIE HERE
29 | """
30 | 
31 | dl = Downloader(cookie=cookie)
32 | 
33 | # download by class URL:
34 | dl.download_course_by_url('https://www.skillshare.com/classes/Art-Fundamentals-in-One-Hour/189505397')
35 | 
36 | # or by class ID:
37 | # dl.download_course_by_class_id(189505397)
38 | ```
39 | 
40 | 4. (Optionally) run with docker and docker-compose:
41 | ```
42 | docker-compose build
43 | docker-compose run --rm ssdl python example.py
44 | ```
45 | 


--------------------------------------------------------------------------------
/code/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | .static_storage/
 56 | .media/
 57 | local_settings.py
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/code/downloader.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | import sys
  4 | import re
  5 | import os
  6 | from slugify import slugify
  7 | 
  8 | 
  9 | class Downloader(object):
 10 |     def __init__(
 11 |         self,
 12 |         cookie,
 13 |         download_path=os.environ.get('FILE_PATH', './data'),
 14 |         pk='BCpkADawqM2OOcM6njnM7hf9EaK6lIFlqiXB0iWjqGWUQjU7R8965xUvIQNqdQbnDTLz0IAO7E6Ir2rIbXJtFdzrGtitoee0n1XXRliD-RH9A-svuvNW9qgo3Bh34HEZjXjG4Nml4iyz3KqF',
 15 |         brightcove_account_id=3695997568001,
 16 |     ):
 17 |         self.cookie = cookie.strip().strip('"')
 18 |         self.download_path = download_path
 19 |         self.pk = pk.strip()
 20 |         self.brightcove_account_id = brightcove_account_id
 21 |         self.pythonversion = 3 if sys.version_info >= (3, 0) else 2
 22 | 
 23 |     def is_unicode_string(self, string):
 24 |         if (self.pythonversion == 3 and isinstance(string, str)) or (self.pythonversion == 2 and isinstance(string, unicode)):
 25 |             return True
 26 | 
 27 |         else:
 28 |             return False
 29 | 
 30 |     def download_course_by_url(self, url):
 31 |         m = re.match(r'https://www.skillshare.com/classes/.*?/(\d+)', url)
 32 | 
 33 |         if not m:
 34 |             raise Exception('Failed to parse class ID from URL')
 35 | 
 36 |         self.download_course_by_class_id(m.group(1))
 37 | 
 38 |     def download_course_by_class_id(self, class_id):
 39 |         data = self.fetch_course_data_by_class_id(class_id=class_id)
 40 |         teacher_name = None
 41 | 
 42 |         if 'vanity_username' in data['_embedded']['teacher']:
 43 |             teacher_name = data['_embedded']['teacher']['vanity_username']
 44 | 
 45 |         if not teacher_name:
 46 |             teacher_name = data['_embedded']['teacher']['full_name']
 47 | 
 48 |         if not teacher_name:
 49 |             raise Exception('Failed to read teacher name from data')
 50 | 
 51 |         if self.is_unicode_string(teacher_name):
 52 |             teacher_name = teacher_name.encode('ascii', 'replace')
 53 | 
 54 |         title = data['title']
 55 | 
 56 |         if self.is_unicode_string(title):
 57 |             title = title.encode('ascii', 'replace')  # ignore any weird char
 58 | 
 59 |         base_path = os.path.abspath(
 60 |             os.path.join(
 61 |                 self.download_path,
 62 |                 slugify(teacher_name),
 63 |                 slugify(title),
 64 |             )
 65 |         ).rstrip('/')
 66 | 
 67 |         if not os.path.exists(base_path):
 68 |             os.makedirs(base_path)
 69 | 
 70 |         for u in data['_embedded']['units']['_embedded']['units']:
 71 |             for s in u['_embedded']['sessions']['_embedded']['sessions']:
 72 |                 video_id = None
 73 | 
 74 |                 if 'video_hashed_id' in s and s['video_hashed_id']:
 75 |                     video_id = s['video_hashed_id'].split(':')[1]
 76 | 
 77 |                 if not video_id:
 78 |                     # NOTE: this happens sometimes...
 79 |                     # seems random and temporary but might be some random
 80 |                     # server-side check on user-agent etc?
 81 |                     # ...think it's more stable now with those set to
 82 |                     # emulate an android device
 83 |                     raise Exception('Failed to read video ID from data')
 84 | 
 85 |                 s_title = s['title']
 86 | 
 87 |                 if self.is_unicode_string(s_title):
 88 |                     s_title = s_title.encode('ascii', 'replace')  # ignore any weird char
 89 | 
 90 |                 file_name = '{} - {}'.format(
 91 |                     str(s['index'] + 1).zfill(2),
 92 |                     slugify(s_title),
 93 |                 )
 94 | 
 95 |                 self.download_video(
 96 |                     fpath='{base_path}/{session}.mp4'.format(
 97 |                         base_path=base_path,
 98 |                         session=file_name,
 99 |                     ),
100 |                     video_id=video_id,
101 |                 )
102 | 
103 |                 print('')
104 | 
105 |     def fetch_course_data_by_class_id(self, class_id):
106 |         res = requests.get(
107 |             url='https://api.skillshare.com/classes/{}'.format(class_id),
108 |             headers={
109 |                 'Accept': 'application/vnd.skillshare.class+json;,version=0.8',
110 |                 'User-Agent': 'Skillshare/5.3.0; Android 9.0.1',
111 |                 'Host': 'api.skillshare.com',
112 |                 'Referer': 'https://www.skillshare.com/',
113 |                 'cookie': self.cookie,
114 |             }
115 |         )
116 | 
117 |         if not res.status_code == 200:
118 |             raise Exception('Fetch error, code == {}'.format(res.status_code))
119 | 
120 |         return res.json()
121 | 
122 |     def download_video(self, fpath, video_id):
123 |         meta_url = 'https://edge.api.brightcove.com/playback/v1/accounts/{account_id}/videos/{video_id}'.format(
124 |             account_id=self.brightcove_account_id,
125 |             video_id=video_id,
126 |         )
127 | 
128 |         meta_res = requests.get(
129 |             meta_url,
130 |             headers={
131 |                 'Accept': 'application/json;pk={}'.format(self.pk),
132 |                 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
133 |                 'Origin': 'https://www.skillshare.com'
134 |             }
135 |         )
136 | 
137 |         if meta_res.status_code != 200:
138 |             raise Exception('Failed to fetch video meta')
139 | 
140 |         for x in meta_res.json()['sources']:
141 |             if 'container' in x:
142 |                 if x['container'] == 'MP4' and 'src' in x:
143 |                     dl_url = x['src']
144 |                     break
145 | 
146 |         print('Downloading {}...'.format(fpath))
147 | 
148 |         if os.path.exists(fpath):
149 |             print('Video already downloaded, skipping...')
150 |             return
151 | 
152 |         with open(fpath, 'wb') as f:
153 |             response = requests.get(dl_url, allow_redirects=True, stream=True)
154 |             total_length = response.headers.get('content-length')
155 | 
156 |             if not total_length:
157 |                 f.write(response.content)
158 | 
159 |             else:
160 |                 dl = 0
161 |                 total_length = int(total_length)
162 | 
163 |                 for data in response.iter_content(chunk_size=4096):
164 |                     dl += len(data)
165 |                     f.write(data)
166 |                     done = int(50 * dl / total_length)
167 |                     sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50 - done)))
168 |                     sys.stdout.flush()
169 | 
170 |             print('')
171 | 


--------------------------------------------------------------------------------