├── .gitignore ├── README ├── setup.py ├── udacity_courses.txt └── udacitydl ├── __init__.py ├── courses.py └── udacitydl.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | .DS_Store 3 | 4 | # C extensions 5 | *.so 6 | 7 | # Packages 8 | *.egg 9 | *.egg-info 10 | dist 11 | build 12 | eggs 13 | parts 14 | bin 15 | var 16 | sdist 17 | develop-eggs 18 | .installed.cfg 19 | lib 20 | lib64 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | udacity-dl 2 | =========== 3 | 4 | A python package for archiving content from udacity.org (videos, 5 | lecture notes, quizzes, …) for offline reference. 6 | 7 | ------------- 8 | Installation 9 | ------------- 10 | 11 | Make sure you have installed: 12 | Python 2.7 (http://www.python.org/download) 13 | mechanize 0.3.1 (https://pypi.python.org/pypi/mechanize/) 14 | 15 | With the help of python pip installation is as simple: 16 | `sudo pip install udacity-dl` 17 | 18 | If you prefer manual installation: 19 | - Clone [this repo]() locally 20 | - Then simply run: `python setup.py install` 21 | This will create a udacity-dl script in `/usr/local/bin` (linux) or 22 | `c:\\Python27\\Scripts` (windows) 23 | 24 | 25 | ------ 26 | Usage 27 | ------ 28 | For Usage help, run: 29 | `udacity-dl -h` 30 | 31 | Example usage: 32 | `udacity-dl -d /my/destination/path/ course_name(s)` 33 | 34 | 35 | ------ 36 | Notes 37 | ------ 38 | For the link: https://www.udacity.com/wiki/ST095/downloads *ST095* is the course_name 39 | 40 | Usage: 41 | `udacity-dl -d /my/destination/path/ ST095 cs222` 42 | 43 | A few courses and their respective course names have been listed under 44 | [udacity_courses](/udacity_courses.md) 45 | 46 | 47 | --------- 48 | Features 49 | --------- 50 | 51 | * Now downloads all available resources in wiki 52 | * Skips already downloaded resources 53 | * Proper naming of course and class contents 54 | * No need of Udacity username and password 55 | * Updated Courses list 56 | * Fix class downloads with multiple section per unit 57 | 58 | ======================= 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | # from distutils.core import setup 5 | 6 | setup(name="udacity-dl", 7 | version="1.2.2", 8 | description="Download udacity class videos and resources", 9 | long_description=open("README").read(), 10 | author="Ritesh Pradhan", 11 | author_email="ritesxz@gmail.com", 12 | url="https://github.com/riteshpradhan/udacity-dl", 13 | download_url="https://pypi.python.org/pypi/udacity-dl", 14 | packages=["udacitydl"], 15 | entry_points = { "console_scripts" : [ "udacity-dl = udacitydl.udacitydl:main"]}, 16 | install_requires=["mechanize","beautifulsoup4","argparse", "progressbar2"], 17 | ) 18 | -------------------------------------------------------------------------------- /udacity_courses.txt: -------------------------------------------------------------------------------- 1 | List of Courses 2 | ================ 3 | 4 | - 'cs101': 'Introduction to Computer Science', 5 | - 'PH100': 'Intro to Physics: Landmarks in Physics', 6 | - 'ST101': 'Intro to Statistics: Making Decisions Based on Data', 7 | - 'cs215': 'Algorithms: Crunching Social Networks', 8 | - 'cs222': 'Differential Equations in Action', 9 | - 'cs253': 'Web Development', 10 | - 'cs255': 'HTML5 Game Development', 11 | - 'cs258': 'Software Testing', 12 | - 'cs259': 'Software Debugging', 13 | - 'cs262': 'Programming Languages', 14 | - 'cs271': 'Introduction to Artificial Intelligence', 15 | - 'cs291': 'Interactive Rendering', 16 | - 'cs313': 'Introduction to Theoretical Computer Science', 17 | - 'ep245': 'How to Build a Startup', 18 | - 'cs212': 'Design of Computer Programs', 19 | - 'cs344': 'Introduction to Parallel Programming', 20 | - 'cs348': 'Functional Hardware Verification', 21 | - 'cs373': 'Artificial Intelligence for Robotics', 22 | - 'cs387': 'Applied Cryptography', 23 | - 'ma006': 'Visualizing Algebra', 24 | - 'ma008': 'College Algebra', 25 | - 'ud436': 'Computer Networking - Open', 26 | - 'UD015': 'Object-Oriented JavaScript', 27 | - 'UD032': 'Data Wrangling with MongoDB', 28 | - 'UD036': 'Programming Foundations with Python', 29 | - 'UD088': 'Full Stack Foundations', 30 | - 'UD110': 'Intro to AJAX', 31 | - 'UD120': 'Intro to Machine Learning', 32 | - 'UD162': 'Intro to Point and Click App development', 33 | - 'UD197': 'Intro to Relational Databases', 34 | - 'UD201': 'Intro to Inferential Statistics', 35 | - 'UD245': 'Intro to jQuery', 36 | - 'UD292': 'HTML5 Canvas', 37 | - 'UD304': 'Intro to HTML and CSS', 38 | - 'UD359': 'Intro to Data Science', 39 | - 'UD381': 'Real-Time Analytics with Apache Storm', 40 | - 'UD436': 'Computer Networking - Open', 41 | - 'UD507': 'Data Visualization and D3.js', 42 | - 'UD585': 'Intro to iOS App Development with Swift', 43 | - 'UD617': 'Intro to Hadoop and MapReduce', 44 | - 'UD651': 'Data Analysis with R', 45 | - 'UD675': 'Machine Learning: Supervised Learning', 46 | - 'UD741': 'Machine Learning: Unsupervised Learning', 47 | - 'UD775': 'How to Use Git and GitHub', 48 | - 'UD804': 'JavaScript Basics', 49 | - 'UD805': 'Software Development Processes - Open', 50 | - 'UD820': 'Machine Learning: Reinforcement Learning', 51 | - 'UD827': 'Intro to Descriptive Statistics', 52 | - 'UD849': 'UX Design for Mobile Developers', 53 | - 'UD853': 'Developing Android Apps', 54 | - 'UD859': 'Developing Scalable Apps with Java', 55 | - 'UD884': 'Website Performance Optimization', 56 | - 'UD893': 'Responsive Web Design Fundamentals', 57 | - 'UD919': 'Model Building and Validation', 58 | - 'UD989': 'JavaScript Design Patterns', 59 | - 'CS046': 'Introduction to Programming in Java: Learning Java', 60 | - 'CS101': 'Intro to Computer Science: Building a Search Engine', 61 | - 'CS212': 'Design of Computer Programs: Programming Principles', 62 | - 'CS215': 'Algorithms: Crunching Social Networks', 63 | - 'CS222': 'Differential Equations: Making Math Matter', 64 | - 'CS253': 'Web Application Engineering: How to Build a Blog', 65 | - 'CS255': 'HTML5 Game Development: Building High Performance Web Applications', 66 | - 'CS256': 'Mobile Web Development', 67 | - 'CS258': 'Software Testing: How to Make Software Fail', 68 | - 'CS259': 'Software Debugging: Automating the Boring Tasks', 69 | - 'CS262': 'Programming Languages: Building a Web Browser', 70 | - 'CS271': 'Introduction to Artificial Intelligence: AI Class', 71 | - 'CS291': 'Interactive 3D Graphics: Creating Virtual Worlds', 72 | - 'CS313': 'Introduction to Theoretical Computer Science', 73 | - 'CS344': 'Intro To Parallel Programming: Using CUDA to Harness the Power of GPUs', 74 | - 'CS348': 'Functional Hardware Verification: How to Verify Chips and Eliminate Bugs', 75 | - 'CS373': 'Artificial Intelligence for Robotics: Programming a Robotic Car', 76 | - 'CS387': 'Applied Cryptography: The Science of Secrets', 77 | - 'DESIGN101': 'Intro to the Design of Everyday Things', 78 | - 'EP245': 'How to Build a Startup: The Lean LaunchPad', 79 | - 'MA004': 'Introductory Algebra Review', 80 | - 'MA006': 'Visualizing Algebra: Problems and Patterns', 81 | - 'MA008': 'College Algebra: Animals, Architecture, and Innovation', 82 | - 'PS001': 'Introduction to Psychology: The Science of Thought and Behavior', 83 | - 'ST095': 'Statistics: The Science of Decisions' 84 | -------------------------------------------------------------------------------- /udacitydl/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: Ritesh Pradhan 3 | # @Date: 2016-05-03 13:39:08 4 | # @Last Modified by: Ritesh Pradhan 5 | # @Last Modified time: 2016-05-05 11:45:21 6 | -------------------------------------------------------------------------------- /udacitydl/courses.py: -------------------------------------------------------------------------------- 1 | 2 | COURSES_DICT = { 3 | 4 | 'cs101': 'Introduction to Computer Science', 5 | 'PH100': 'Intro to Physics: Landmarks in Physics', 6 | 'ST101': 'Intro to Statistics: Making Decisions Based on Data', 7 | 'cs215': 'Algorithms: Crunching Social Networks', 8 | 'cs222': 'Differential Equations in Action', 9 | 'cs253': 'Web Development', 10 | 'cs255': 'HTML5 Game Development', 11 | 'cs258': 'Software Testing', 12 | 'cs259': 'Software Debugging', 13 | 'cs262': 'Programming Languages', 14 | 'cs271': 'Introduction to Artificial Intelligence', 15 | 'cs291': 'Interactive Rendering', 16 | 'cs313': 'Introduction to Theoretical Computer Science', 17 | 'ep245': 'How to Build a Startup', 18 | 'cs212': 'Design of Computer Programs', 19 | 'cs344': 'Introduction to Parallel Programming', 20 | 'cs348': 'Functional Hardware Verification', 21 | 'cs373': 'Artificial Intelligence for Robotics', 22 | 'cs387': 'Applied Cryptography', 23 | 'ma006': 'Visualizing Algebra', 24 | 'ma008': 'College Algebra', 25 | 'ud436': 'Computer Networking - Open', 26 | 'UD015': 'Object-Oriented JavaScript', 27 | 'UD032': 'Data Wrangling with MongoDB', 28 | 'UD036': 'Programming Foundations with Python', 29 | 'UD088': 'Full Stack Foundations', 30 | 'UD110': 'Intro to AJAX', 31 | 'UD120': 'Intro to Machine Learning', 32 | 'UD162': 'Intro to Point and Click App development', 33 | 'UD197': 'Intro to Relational Databases', 34 | 'UD201': 'Intro to Inferential Statistics', 35 | 'UD245': 'Intro to jQuery', 36 | 'UD292': 'HTML5 Canvas', 37 | 'UD304': 'Intro to HTML and CSS', 38 | 'UD359': 'Intro to Data Science', 39 | 'UD381': 'Real-Time Analytics with Apache Storm', 40 | 'UD436': 'Computer Networking - Open', 41 | 'UD507': 'Data Visualization and D3.js', 42 | 'UD585': 'Intro to iOS App Development with Swift', 43 | 'UD617': 'Intro to Hadoop and MapReduce', 44 | 'UD651': 'Data Analysis with R', 45 | 'UD675': 'Machine Learning: Supervised Learning', 46 | 'UD741': 'Machine Learning: Unsupervised Learning', 47 | 'UD775': 'How to Use Git and GitHub', 48 | 'UD804': 'JavaScript Basics', 49 | 'UD805': 'Software Development Processes - Open', 50 | 'UD820': 'Machine Learning: Reinforcement Learning', 51 | 'UD827': 'Intro to Descriptive Statistics', 52 | 'UD849': 'UX Design for Mobile Developers', 53 | 'UD853': 'Developing Android Apps', 54 | 'UD859': 'Developing Scalable Apps with Java', 55 | 'UD884': 'Website Performance Optimization', 56 | 'UD893': 'Responsive Web Design Fundamentals', 57 | 'UD919': 'Model Building and Validation', 58 | 'UD989': 'JavaScript Design Patterns', 59 | 'CS046': 'Introduction to Programming in Java: Learning Java', 60 | 'CS101': 'Intro to Computer Science: Building a Search Engine', 61 | 'CS212': 'Design of Computer Programs: Programming Principles', 62 | 'CS215': 'Algorithms: Crunching Social Networks', 63 | 'CS222': 'Differential Equations: Making Math Matter', 64 | 'CS253': 'Web Application Engineering: How to Build a Blog', 65 | 'CS255': 'HTML5 Game Development: Building High Performance Web Applications', 66 | 'CS256': 'Mobile Web Development', 67 | 'CS258': 'Software Testing: How to Make Software Fail', 68 | 'CS259': 'Software Debugging: Automating the Boring Tasks', 69 | 'CS262': 'Programming Languages: Building a Web Browser', 70 | 'CS271': 'Introduction to Artificial Intelligence: AI Class', 71 | 'CS291': 'Interactive 3D Graphics: Creating Virtual Worlds', 72 | 'CS313': 'Introduction to Theoretical Computer Science', 73 | 'CS344': 'Intro To Parallel Programming: Using CUDA to Harness the Power of GPUs', 74 | 'CS348': 'Functional Hardware Verification: How to Verify Chips and Eliminate Bugs', 75 | 'CS373': 'Artificial Intelligence for Robotics: Programming a Robotic Car', 76 | 'CS387': 'Applied Cryptography: The Science of Secrets', 77 | 'DESIGN101': 'Intro to the Design of Everyday Things', 78 | 'EP245': 'How to Build a Startup: The Lean LaunchPad', 79 | 'MA004': 'Introductory Algebra Review', 80 | 'MA006': 'Visualizing Algebra: Problems and Patterns', 81 | 'MA008': 'College Algebra: Animals, Architecture, and Innovation', 82 | 'PS001': 'Introduction to Psychology: The Science of Thought and Behavior', 83 | 'ST095': 'Statistics: The Science of Decisions', 84 | 85 | 86 | } -------------------------------------------------------------------------------- /udacitydl/udacitydl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import argparse 4 | import requests 5 | import progressbar 6 | from mechanize import Browser 7 | from bs4 import BeautifulSoup 8 | from courses import COURSES_DICT 9 | 10 | class UdacityDownloader(): 11 | """ 12 | Class to download content (videos, lecture notes, ...) from udacity.com for 13 | use offline. 14 | """ 15 | 16 | BASE_URL = 'http://udacity.com/wiki/%s' 17 | DOWNLOAD_URL = BASE_URL + '/downloads' 18 | 19 | def __init__(self): 20 | self.browser = Browser() 21 | self.browser.set_handle_robots(False) 22 | 23 | def get_course_name_from_url(self, course_url): 24 | """Given the course URL, return the name, e.g., cs212""" 25 | return course_url.split('/')[4] 26 | 27 | def get_download_url_from_name(self, course_name): 28 | """Given the name of a course, return the video lecture url""" 29 | return self.DOWNLOAD_URL % course_name 30 | 31 | def get_downloadable_content(self, course_url): 32 | """ 33 | returns {"types" : {"class_name":"link", "class_name": "link"}, "arko_type": {"class_name":"link", "class_name": "link"}} 34 | """ 35 | course_name = self.get_course_name_from_url(course_url) 36 | long_course_name = COURSES_DICT.get(course_name, course_name) 37 | 38 | print "* Collecting downloadable content from " + course_url 39 | 40 | # get the course name, and redirect to the course lecture page 41 | vidpage = self.browser.open(course_url) 42 | 43 | # extract the weekly classes 44 | soup = BeautifulSoup(vidpage,"lxml") 45 | headers = soup.find("div", { "class" : "wtabs extl" }) 46 | 47 | head_names = headers.findAll("h2") 48 | resources = {} 49 | for head_name in head_names: 50 | ul = head_name.findNextSibling('ul') 51 | lis = ul.findAll('li') 52 | 53 | weeklyClasses = {} 54 | classNames = [] 55 | for li in lis: 56 | className = li.a.text 57 | classNames.append(className) 58 | hrefs = li.find('a') 59 | resourceLink = hrefs['href'] 60 | while className in weeklyClasses: 61 | className += "." 62 | weeklyClasses[className] = resourceLink 63 | headText = head_name.text 64 | while headText in resources: 65 | headText += "." 66 | resources[headText] = weeklyClasses 67 | return resources 68 | 69 | def download_course(self, cname, dest_dir="."): 70 | """Download all the contents (quizzes, videos, lecture notes, ...) of the course to the given destination directory (defaults to .)""" 71 | 72 | download_url = self.get_download_url_from_name(cname) 73 | print "* Need to download from ", download_url 74 | 75 | resource_dict = self.get_downloadable_content(download_url) 76 | 77 | long_cname = COURSES_DICT.get(cname, cname) 78 | print '* Got all downloadable content for ' + long_cname 79 | 80 | course_dir = os.path.abspath(os.path.join(dest_dir, long_cname)) 81 | 82 | # ensure the target dir exists 83 | if not os.path.exists(course_dir): 84 | os.mkdir(course_dir) 85 | 86 | print "* " + cname + " will be downloaded to " + course_dir 87 | 88 | # download the standard pages 89 | print " - Downloading zipped/videos pages" 90 | 91 | for types, download_dict in resource_dict.iteritems(): 92 | # ensure the course directory exists 93 | resource_dir = os.path.join(course_dir, types) 94 | if not os.path.exists(resource_dir): 95 | os.makedirs(resource_dir) 96 | print " -- Downloading ", types 97 | for fname, tfname in download_dict.iteritems(): 98 | try: 99 | print " * Downloading ", fname, "..." 100 | download_file(tfname, resource_dir, fname) 101 | except Exception as e: 102 | print " - failed ", fname, e 103 | 104 | def download_file(url, path ,fn): 105 | file = os.path.join(path, fn) 106 | # NOTE the stream=True parameter 107 | r = requests.get(url, stream=True) 108 | total_length = int(r.headers.get('content-length')) 109 | 110 | if os.path.exists(file): 111 | existing_file_len = os.path.getsize(file) 112 | if total_length == existing_file_len: 113 | # File downloaded, early return 114 | return 115 | 116 | ch_size = (1024**2) * 2 # MBs 117 | bar = progressbar.ProgressBar(max_value=total_length/ch_size) 118 | 119 | with open(file, 'wb') as f: 120 | counter = 0 121 | for chunk in r.iter_content(chunk_size=ch_size): 122 | if chunk: # filter out keep-alive new chunks 123 | f.write(chunk) 124 | f.flush() 125 | counter += 1 126 | bar.update(counter) 127 | def main(): 128 | #parse the commandline args 129 | parser = argparse.ArgumentParser(description='Download Udacity.com course videos/docs for offline use.') 130 | parser.add_argument("-d", dest='dest_dir', type=str, default=".", help='destination directory where everything will be saved') 131 | parser.add_argument('course_names', nargs="+", metavar='', 132 | type=str, help='one or more course names (from the url)') 133 | args = parser.parse_args() 134 | 135 | # check the parser 136 | # instantiate the downloader class 137 | d = UdacityDownloader() 138 | 139 | # download the content 140 | for cn in args.course_names: 141 | d.download_course(cn, dest_dir=args.dest_dir) 142 | 143 | print " Download Complete." 144 | 145 | if __name__ == '__main__': 146 | main() 147 | 148 | 149 | --------------------------------------------------------------------------------