├── upodder ├── __init__.py ├── test │ ├── __init__.py │ └── test_upodder.py └── upodder.py ├── .gitignore ├── requirements.txt ├── .pypirc ├── Makefile ├── tox.ini ├── .bumpversion.cfg ├── circle.yml ├── setup.py ├── LICENSE.TXT ├── README.md └── README.rst /upodder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /upodder/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | upodder.egg-info 2 | .DS_Store 3 | dist 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | clint 2 | requests 3 | feedparser 4 | sqlobject 5 | tqdm 6 | -------------------------------------------------------------------------------- /.pypirc: -------------------------------------------------------------------------------- 1 | [upodder] 2 | index-servers = 3 | pypi 4 | 5 | [pypi] 6 | repository: https://upload.pypi.org/legacy/ 7 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | release: 5 | pandoc -s README.md -o README.rst 6 | python3 setup.py check register sdist upload 7 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27,py35,py36, 3 | skip_missing_interpreters = True 4 | 5 | [testenv] 6 | commands=nosetests 7 | deps = 8 | nose 9 | -rrequirements.txt 10 | -------------------------------------------------------------------------------- /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.6.13 3 | commit = True 4 | tag = True 5 | message = Bump version {current_version} > {new_version} [skip ci] 6 | 7 | [bumpversion:file:setup.py] 8 | 9 | -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | override: 3 | - git config --global user.email $GIT_EMAIL 4 | - git config --global user.name $GIT_NAME 5 | - git config --global push.default matching 6 | - pyenv global 2.7.9 3.5.0 3.4.3 7 | - pip install -U bumpversion twine tox urllib3[secure] 8 | 9 | test: 10 | override: 11 | - tox -e py27 12 | - rm -rf .tox 13 | - rm -rf ~/.cache/pip 14 | - tox -e py34,py35 15 | 16 | deployment: 17 | pypi_prod: 18 | branch: master 19 | commands: 20 | - python setup.py sdist bdist_egg 21 | - twine upload -u $PYPI_USERNAME -p $PYPI_PASSWORD --config-file .pypirc dist/* 22 | - bumpversion patch 23 | - git push origin master --tags 24 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import re 2 | import ast 3 | from setuptools import setup 4 | 5 | description = 'Command Line Podcast Downloader' 6 | 7 | setup( 8 | name='upodder', 9 | author='"Stan Vitkovskiy, Manuel Riel"', 10 | author_email='manu@snapdragon.cc', 11 | version='0.6.13', 12 | license='LICENSE.txt', 13 | url='https://github.com/m3nu/upodder', 14 | description=description, 15 | packages=['upodder', 'upodder.test'], 16 | entry_points={'console_scripts': ['upodder = upodder.upodder:main']}, 17 | long_description=open('README.rst').read(), 18 | install_requires=[ 19 | 'clint', 20 | 'requests', 21 | 'feedparser', 22 | 'sqlobject >=3.0.0a2dev-20151224', 23 | 'tqdm', 24 | 'listparser' 25 | ], 26 | classifiers=[ 27 | 'Development Status :: 4 - Beta', 28 | 'Environment :: Console', 29 | 'Intended Audience :: End Users/Desktop', 30 | 'License :: OSI Approved :: MIT License', 31 | 'Operating System :: Unix', 32 | 'Programming Language :: Python', 33 | 'Programming Language :: Python :: 2.7', 34 | 'Programming Language :: Python :: 3', 35 | 'Programming Language :: Python :: 3.4', 36 | 'Programming Language :: Python :: 3.5', 37 | 'Topic :: Internet :: WWW/HTTP :: Dynamic Content :: News/Diary', 38 | ], 39 | ) 40 | -------------------------------------------------------------------------------- /LICENSE.TXT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 to 2015, Stan Vitkovskiy, Manuel Riel 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, this 11 | list of conditions and the following disclaimer in the documentation and/or 12 | other materials provided with the distribution. 13 | 14 | * Neither the name of the {organization} nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | ------------------------------------------------------------------------------- 30 | 31 | This program also bundles with it python-tabulate 32 | (https://pypi.python.org/pypi/tabulate) library. This library is licensed under 33 | MIT License. 34 | 35 | ------------------------------------------------------------------------------- 36 | -------------------------------------------------------------------------------- /upodder/test/test_upodder.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import shutil 3 | 4 | from upodder import upodder 5 | 6 | BASEDIR = '/tmp/upodder_testing' 7 | 8 | class TestUpodder(unittest.TestCase): 9 | feeds = [ 10 | "https://www.relay.fm/clockwise/feed", 11 | "http://popupchinese.com/feeds/custom/sinica", 12 | "http://www.radiolab.org/feeds/podcast/", 13 | "http://99percentinvisible.org/feed/", 14 | "http://chaosradio.ccc.de/chaosradio-latest.rss", 15 | "http://djfm.ca/?feed=rss2", 16 | "http://feeds.feedburner.com/Sebastien-bHouseFromIbiza/", 17 | "http://alternativlos.org/ogg.rss", 18 | "http://www.sovereignman.com/feed/", 19 | "http://neusprech.org/feed/", 20 | "http://www.davidbarrkirtley.com/podcast/geeksguideshow.xml", 21 | "http://www.cbc.ca/cmlink/1.2919550", 22 | "http://feeds.feedburner.com/binaergewitter-podcast-opus", 23 | "http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml", 24 | "http://feeds.feedburner.com/uhhyeahdude/podcast", 25 | ] 26 | 27 | def setUp(self): 28 | upodder.args.no_download = True 29 | upodder.args.mark_seen = False 30 | upodder.args.oldness = 720 31 | upodder.args.basedir = BASEDIR 32 | upodder.init() 33 | 34 | def tearDown(cls): 35 | shutil.rmtree(BASEDIR); 36 | 37 | class TestFeedProcessing(TestUpodder): 38 | def test_feedparsing(self): 39 | for f in self.feeds: 40 | upodder.process_feed(f) 41 | 42 | def test_mark_seen(self): 43 | upodder.args.mark_seen = True 44 | for f in self.feeds: 45 | upodder.process_feed(f) 46 | 47 | self.assertGreater(upodder.SeenEntry.select().count(), 5) 48 | 49 | 50 | class TestFailingFeeds(TestUpodder): 51 | def test_failing_feed(self): 52 | upodder.process_feed('http://www.google.com') 53 | 54 | if __name__ == '__main__': 55 | unittest.main() 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | (Idea and first version from Stan Vitkovsky. Forked from https://code.google.com/p/upodder/) 2 | 3 | # Simple podcast downloader for the command line 4 | 5 | [![Circle CI](https://circleci.com/gh/m3nu/upodder.svg?style=svg)](https://circleci.com/gh/m3nu/upodder) 6 | 7 | A simple command-line podcast downloader. Can be run from cron. Simply add your RSS feeds in `~/.upodder/subscriptions` and watch your latest podcasts come in. Destination dir, filename, folder structures, etc can all be customized. 8 | 9 | Please report any bugs on [Github](https://github.com/manuelRiel/upodder). I will promptly fix them. 10 | 11 | ## Installation 12 | 13 | `pip install upodder` 14 | 15 | ## Usage 16 | 17 | After installation, run `upodder`. It will initialize `~/.upodder/` to keep your subscriptions and a small DB of seen files. After that simply enter you feeds in `~/.upodder/subscriptions`. 18 | 19 | The next time you run `upodder`, it will go over each feed and download new entries to `~/Downloads/podcasts`. 20 | 21 | To view available options, run `upodder --help` 22 | 23 | ## History and motivation 24 | 25 | I've been using this script for several years and the project seems abandoned on Google Code. To keep it from disappearing, after Google Code is shut down, I've forked it here and refactored most parts. To quote the original author, Stan Vitkovsky and his motivation: 26 | 27 | "I needed a simple console podcast downloader. 28 | 29 | I did not find any one suitable for my needs (podracer lacked ATOM support, hpodder segfaulted from time to time and didn't understand ATOM as well. Both of then were unaware for entries IDs, only for mp3 file names, which are subjects to be changed, as on rpod.ru). 30 | 31 | My usage scenario is to download unseen enclosures, place them in the folder with a name ~/podcasts/%d-%m-%Y/{somename}.mp3 (like podracer does) and then rsync them to my MP3 player. 32 | 33 | Also, I wrote a bash script, which mounts my player with pmount-hal, calls podracer, rsyncs my player and unmounts it safely." 34 | 35 | ## Further Contributors 36 | 37 | - *akira (gaspar0069)*: Add support for multiple file extensions and fix file move bug. 38 | 39 | 40 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | (Idea and first version from Stan Vitkovsky. Forked from 2 | https://code.google.com/p/upodder/) 3 | 4 | Simple podcast downloader 5 | ========================= 6 | 7 | A simple command-line podcast downloader. Can be run from cron. Simply 8 | add your RSS feeds in ``~/.upodder/subscriptions`` and watch your latest 9 | podcasts come in. Destination dir, filename, folder structures, etc can 10 | all be customized. 11 | 12 | Please report any bugs on 13 | `Github `__. I will promptly fix 14 | them. 15 | 16 | Installation 17 | ------------ 18 | 19 | ``pip install upodder`` 20 | 21 | Usage 22 | ----- 23 | 24 | After installation, run ``upodder``. It will initialize ``~/.upodder/`` 25 | to keep your subscriptions and a small DB of seen files. After that 26 | simply enter you feeds in ``~/.upodder/subscriptions``. 27 | 28 | The next time you run ``upodder``, it will go over each feed and 29 | download new entries to ``~/Downloads/podcasts``. 30 | 31 | To view available options, run ``upodder --help`` 32 | 33 | History and motivation 34 | ---------------------- 35 | 36 | I've been using this script for several years and the project seems 37 | abandoned on Google Code. To keep it from disappearing, after Google 38 | Code is shut down, I've forked it here and refactored most parts. To 39 | quote the original author, Stan Vitkovsky and his motivation: 40 | 41 | "I needed a simple console podcast downloader. 42 | 43 | I did not find any one suitable for my needs (podracer lacked ATOM 44 | support, hpodder segfaulted from time to time and didn't understand ATOM 45 | as well. Both of then were unaware for entries IDs, only for mp3 file 46 | names, which are subjects to be changed, as on rpod.ru). 47 | 48 | My usage scenario is to download unseen enclosures, place them in the 49 | folder with a name ~/podcasts/%d-%m-%Y/{somename}.mp3 (like podracer 50 | does) and then rsync them to my MP3 player. 51 | 52 | Also, I wrote a bash script, which mounts my player with pmount-hal, 53 | calls podracer, rsyncs my player and unmounts it safely." 54 | 55 | Further Contributors 56 | -------------------- 57 | 58 | - *akira (gaspar0069)*: Add support for multiple file extensions and 59 | fix file move bug. 60 | -------------------------------------------------------------------------------- /upodder/upodder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | import feedparser 4 | import listparser 5 | import time 6 | import hashlib 7 | import os 8 | import sys 9 | from os.path import expanduser 10 | import logging 11 | import re 12 | import requests 13 | import argparse 14 | import shutil # To get around "cross-device" rename error when moving to dest. dir. 15 | from datetime import datetime as dt 16 | from sqlobject import SQLObject, sqlite, DateTimeCol, UnicodeCol 17 | from tqdm import tqdm 18 | 19 | # python2 compat 20 | try: input = raw_input 21 | except NameError: pass 22 | 23 | # TODO expanduser as action? https://gist.github.com/brantfaircloth/1252339 24 | parser = argparse.ArgumentParser(description='Download podcasts via the command line.') 25 | parser.add_argument('--no-download', action='store_true', 26 | help="Don't download any files. Just mark as read.") 27 | parser.add_argument('--podcastdir', '-p', default='~/Downloads/podcasts', 28 | help="Folder to download podcast files to.") 29 | parser.add_argument('--basedir', '-b', default='~/.upodder', 30 | help="Folder to store subscriptions and seen database.") 31 | parser.add_argument('--oldness', '-o', default=30, type=int, 32 | help="Skip entries older than X days.") 33 | parser.add_argument('--mark-seen', action='store_true', 34 | help="Just mark all entries as seen and exit.") 35 | parser.add_argument('--import-opml', '-i', dest='opmlpath', 36 | help='Import feeds from an OPML file.') 37 | parser.add_argument("--quiet", help="Only output errors.", 38 | action="store_true") 39 | parser.add_argument('--user-agent', '-A', default='upodder', 40 | help="Set custom User-Agent string.") 41 | args = parser.parse_args() 42 | 43 | YES = [1,"1","on","yes","Yes","YES","y","Y","true","True","TRUE","t","T"] 44 | CONFIGCOMMENT = ['#',';','$',':','"',"'"] 45 | BADFNCHARS = re.compile(r'[^\w]+') 46 | TEMPDIR = '/tmp/upodder' 47 | FILENAME = '{entry_title}.{filename_extension}' 48 | 49 | # Initializing logging 50 | if args.quiet: 51 | l = logging.Logger('upodder', logging.ERROR) 52 | else: 53 | l = logging.Logger('upodder', logging.DEBUG) 54 | stderrHandler = logging.StreamHandler() 55 | # stderrHandler.setFormatter(logging.Formatter('%(message)s')) 56 | l.addHandler(stderrHandler) 57 | 58 | # Dict of possible file types 59 | FILE_TYPES = { 60 | 'audio/mpeg': 'mp3', 61 | 'audio/x-m4a': 'm4a', 62 | 'video/x-m4v': 'm4v', 63 | 'audio/x-opus': 'opus', 64 | 'audio/x-ogg': 'ogg', 65 | 'audio/aac': 'aac', 66 | 'audio/mp4': 'm4a', 67 | 'audio/mp3': 'mp3' 68 | } 69 | 70 | class SeenEntry(SQLObject): 71 | "Represents a single feed item, seen before. Used to keep track of download status." 72 | hashed = UnicodeCol() 73 | pub_date = DateTimeCol() 74 | 75 | class EntryProcessor(object): 76 | "Processes single feed entry" 77 | def __init__(self, entry, feed): 78 | self.hashed = hashlib.sha1(entry['title'].encode('ascii', 'ignore')).hexdigest() 79 | self.pub_date = dt.fromtimestamp(time.mktime(entry.published_parsed)) 80 | 81 | if args.mark_seen: 82 | SeenEntry(pub_date=self.pub_date, hashed=self.hashed) 83 | l.debug("Marking as seen: %s"%(entry['title'])) 84 | return 85 | 86 | # Let's check if we worked on this entry earlier... 87 | if SeenEntry.select(SeenEntry.q.hashed == self.hashed).count() > 0: 88 | l.debug("Already seen: %s"%(entry['title'])) 89 | return 90 | 91 | # Let's check the entry's date 92 | if (dt.now() - self.pub_date).days > args.oldness: 93 | l.debug("Too old for us: %s"%entry['title']) 94 | return 95 | 96 | # Search for mpeg enclosures 97 | for enclosure in filter(lambda x: x.get('type') in FILE_TYPES.keys() ,entry.get('enclosures',[])): 98 | # Work only with first found audio/mpeg or video/x-m4v enclosure (Bad Thing? maybe :( ) 99 | 100 | # copy enclosure.type to entry.type for generate_filename processing. 101 | entry['type'] = enclosure.get('type') 102 | 103 | if self._download_enclosure(enclosure, entry, feed, args.no_download): 104 | SeenEntry( pub_date=self.pub_date, hashed=self.hashed) 105 | break 106 | 107 | def _download_enclosure(self, enclosure, entry, feed, no_download=False): 108 | """Performs downloading of specified file. Returns True on success and False in other case""" 109 | 110 | downloadto = TEMPDIR + os.sep + self.hashed 111 | 112 | if no_download: 113 | l.debug("Would download %s from %s" % (entry['title'], enclosure['href'])) 114 | return True 115 | 116 | try: 117 | """Downloads URL to file, returns file name of download (from URL or Content-Disposition)""" 118 | if not os.path.exists(os.path.dirname(downloadto)): 119 | os.makedirs(os.path.dirname(downloadto)) 120 | 121 | l.debug("Downloading %s from %s" % (entry['title'], enclosure['href'])) 122 | 123 | headers = { 'User-Agent': args.user_agent } 124 | r = requests.get(enclosure['href'], stream=True, timeout=25, headers=headers) 125 | 126 | with open(downloadto, 'wb') as f: 127 | if 'content-length' in r.headers: 128 | total_length = int(r.headers['content-length']) 129 | with tqdm(total=total_length, 130 | unit="B", 131 | unit_scale=True, 132 | ncols=90) as pbar: 133 | for chunk in r.iter_content(1024): 134 | f.write(chunk) 135 | if chunk: 136 | pbar.update(len(chunk)) 137 | else: 138 | for chunk in r.iter_content(1024): 139 | if chunk: 140 | f.write(chunk) 141 | f.flush() 142 | 143 | # filename = cgi.parse_header(r.headers.get('content-disposition'))[1]['filename'] 144 | # if not filename: 145 | # filename = "Untitled.mp3" 146 | 147 | except KeyboardInterrupt: 148 | l.info("Download aborted by Ctrl+c") 149 | try: 150 | user_wish = input("Do you like to mark item as read? (y/n) or quit? (Ctrl+c): ") 151 | if user_wish in YES: 152 | return True 153 | else: 154 | return False 155 | except KeyboardInterrupt: 156 | print("\nQuitting") 157 | sys.exit() 158 | 159 | # Move downloaded file to its final destination 160 | moveto = expanduser(args.podcastdir) + os.sep + self._generate_filename(entry, feed) 161 | l.debug("Moving {%s} to {%s}"%(downloadto,moveto)) 162 | if not os.path.exists(os.path.dirname(moveto)): os.makedirs(os.path.dirname(moveto)) 163 | shutil.move(downloadto, moveto) 164 | return True 165 | 166 | def _generate_filename(self, entry, feed): 167 | """Generates file name for this enclosure based on config settins 168 | Added filename_extension dict mapping to handle different file types.""" 169 | (year,month,day,hour,minute,second,weekday,yearday,leap) = time.localtime() 170 | subst = { 171 | 'today': '%i-%02i-%02i'%(year,month,day), 172 | 'entry_date': self.pub_date.date().isoformat(), 173 | 'id': self.hashed, 174 | 'entry_title': re.sub(BADFNCHARS,'_',entry.get('title')), 175 | 'feed_href': re.sub(BADFNCHARS,'_',feed.href.split('://')[-1]), 176 | 'feed_title': re.sub(BADFNCHARS,'_',feed.feed.get('title',feed.href)), 177 | 'filename_extension': FILE_TYPES.get(entry.get('type')), 178 | } 179 | return FILENAME.format(**subst) 180 | 181 | def process_feed(url): 182 | l.info('Downloading feed: %s' % url) 183 | feed = feedparser.parse(url) 184 | 185 | # Not all bozo errors cause total failure 186 | if feed.bozo and isinstance(feed.bozo_exception, 187 | (type(feedparser.NonXMLContentType), type(feedparser.CharacterEncodingOverride))): 188 | l.error("Erroneous feed URL: %s (%s)"%(url, type(feed.bozo_exception))) 189 | return 190 | 191 | # When parsing a website or error message, title is missing. 192 | if 'title' not in feed.feed: 193 | l.error("Erroneous feed URL: %s" % url) 194 | return 195 | 196 | l.info("Parsing feed: %s"%feed.feed.title) 197 | 198 | feed.entries.reverse() 199 | for entry in feed.entries: 200 | EntryProcessor(entry, feed) 201 | 202 | def import_opml(subscriptions, opml): 203 | """Import a list of subscriptions from an OPML file.""" 204 | subscribed_feeds = [] 205 | imported_feeds = listparser.parse(opml) 206 | # Load the list of currently subscribed feeds 207 | with open(subscriptions, 'r') as f: 208 | for line in f: 209 | feed = line.strip() 210 | if feed.startswith("#") or len(feed) == 0: 211 | continue 212 | subscribed_feeds.append(feed) 213 | # Import any feeds we're not already subscribed to 214 | with open(subscriptions, 'a') as f: 215 | for feed in imported_feeds.feeds: 216 | if not feed.url in subscribed_feeds: 217 | print("Importing " + feed.title + "...") 218 | subscribed_feeds.append(feed.url) 219 | f.write(feed.url + "\n") 220 | sys.exit() 221 | 222 | def init(): 223 | if not os.path.exists(expanduser(args.basedir)): 224 | l.info("Creating base dir %s"%args.basedir) 225 | os.makedirs(expanduser(args.basedir)) 226 | 227 | subscriptions = expanduser(args.basedir) + os.sep + 'subscriptions' 228 | if not os.path.exists(subscriptions): 229 | l.info("Creating empty subscriptions file %s"%subscriptions) 230 | open(subscriptions,'a').write("# Add your RSS/ATOM subscriptions here.\n\n") 231 | 232 | if args.opmlpath: 233 | import_opml(subscriptions, args.opmlpath) 234 | 235 | SeenEntry._connection = sqlite.builder()(expanduser(args.basedir + os.sep + 'seen.sqlite'), debug=False) 236 | SeenEntry.createTable(ifNotExists=True) 237 | 238 | def main(): 239 | init() 240 | 241 | for url in map(lambda x: x.strip(), open(expanduser(args.basedir) + os.sep + 'subscriptions')): 242 | if url and url[0] not in CONFIGCOMMENT: 243 | process_feed(url) 244 | 245 | l.info('Done updating feeds.') 246 | 247 | 248 | if __name__ == '__main__': 249 | main() 250 | 251 | --------------------------------------------------------------------------------