├── podcastdownloader
    ├── __init__.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_integration.py
    │   ├── test_utility_functions.py
    │   └── test_episode.py
    ├── exceptions.py
    ├── writer.py
    ├── podcast.py
    ├── utility_functions.py
    ├── tag_engine.py
    ├── episode.py
    └── __main__.py
├── requirements.txt
├── setup.py
├── setup.cfg
├── LICENSE
├── .gitignore
└── README.md


/podcastdownloader/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/podcastdownloader/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp
2 | click
3 | feedparser
4 | multidict
5 | mutagen


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | 
3 | import setuptools
4 | 
5 | setuptools.setup(setup_requires=['pbr'], pbr=True)
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = podcastdownloader
3 | author = Serene-Arc
4 | author-email = serenical@gmail.com
5 | 
6 | [options]
7 | packages=podcastdownloader
8 | 


--------------------------------------------------------------------------------
/podcastdownloader/exceptions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding=utf-8
 3 | 
 4 | class PodcastException(Exception):
 5 |     pass
 6 | 
 7 | 
 8 | class FeedException(PodcastException):
 9 |     pass
10 | 
11 | 
12 | class EpisodeException(PodcastException):
13 |     pass
14 | 
15 | 
16 | class TagEngineError(PodcastException):
17 |     pass
18 | 


--------------------------------------------------------------------------------
/podcastdownloader/tests/test_integration.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding=utf-8
 3 | 
 4 | from pathlib import Path
 5 | 
 6 | import pytest
 7 | from click.testing import CliRunner
 8 | 
 9 | from podcastdownloader.__main__ import cli
10 | 
11 | 
12 | @pytest.mark.parametrize('test_args', (
13 |     [],
14 | ))
15 | def test_download_no_feeds(test_args: list[str], tmp_path: Path):
16 |     runner = CliRunner()
17 |     result = runner.invoke(cli, ['download', '-vv', str(tmp_path)] + test_args)
18 |     assert result.exit_code == 0
19 |     assert 'No feeds have been provided' in result.output
20 | 
21 | 
22 | @pytest.mark.parametrize('test_args', (
23 |     ['-f', 'https://rss.art19.com/wecrashed'],
24 | ))
25 | def test_download_single_feed(test_args: list[str], tmp_path: Path):
26 |     runner = CliRunner()
27 |     result = runner.invoke(cli, ['download', '-vv', str(tmp_path)] + test_args)
28 |     assert result.exit_code == 0
29 | 


--------------------------------------------------------------------------------
/podcastdownloader/writer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding=utf-8
 3 | 
 4 | import logging
 5 | import pathlib
 6 | 
 7 | from podcastdownloader.podcast import Podcast
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | def _write_m3u(podcast: Podcast):
13 |     podcast_path = podcast.episodes[0].file_path.parent
14 |     podcast_path.mkdir(parents=True, exist_ok=True)
15 |     with open(pathlib.Path(podcast_path, 'episode_playlist.m3u'), 'w') as file:
16 |         file.write('#EXTM3U\n')
17 |         for episode in reversed(podcast.episodes):
18 |             try:
19 |                 file.write('./' + episode.file_path.name + '\n')
20 |             except AttributeError:
21 |                 logger.warning(f'Could not write {episode.title} to playlist')
22 |     logger.debug(f'M3U playlist for {podcast.name} written')
23 | 
24 | 
25 | def write_episode_playlist(podcast: Podcast, write_choices: tuple[str]):
26 |     for format_choice in write_choices:
27 |         if format_choice == 'm3u':
28 |             _write_m3u(podcast)
29 |         else:
30 |             logger.error(f'Unknown playlist format type: {format_choice}')
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Serene 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/podcastdownloader/tests/test_utility_functions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding=utf-8
 3 | 
 4 | import pytest
 5 | 
 6 | import podcastdownloader.utility_functions as util
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(('test_input_string', 'expected'), (
10 |     ('', None),
11 |     ('\n', None),
12 |     ('   \n', None),
13 |     ('#test', None),
14 |     ('# test', None),
15 |     ('  #test', None),
16 |     ('  # test', None),
17 | ))
18 | def test_clean_text_line_non_feeds(test_input_string: str, expected: str):
19 |     result = util._clean_text_line(test_input_string)
20 |     assert result == expected
21 | 
22 | 
23 | @pytest.mark.parametrize(('test_input_string', 'expected'), (
24 |     ('https://www.example.com/test', 'https://www.example.com/test'),
25 |     ('  https://www.example.com/test', 'https://www.example.com/test'),
26 |     ('https://www.example.com/test#random', 'https://www.example.com/test#random'),
27 |     ('https://www.example.com/test/feed.rss # test comment', 'https://www.example.com/test/feed.rss'),
28 |     ('   https://www.example.com/test/feed.rss # test comment', 'https://www.example.com/test/feed.rss'),
29 |     ('https://www.example.com/test/feed.rss\t # test comment', 'https://www.example.com/test/feed.rss'),
30 | ))
31 | def test_clean_text_line_good(test_input_string: str, expected: str):
32 |     result = util._clean_text_line(test_input_string)
33 |     assert result == expected
34 | 


--------------------------------------------------------------------------------
/podcastdownloader/podcast.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding=utf-8
 3 | 
 4 | import logging
 5 | from pathlib import Path
 6 | from typing import Optional
 7 | 
 8 | import aiohttp
 9 | import aiohttp.client_exceptions
10 | import feedparser
11 | import feedparser.exceptions
12 | 
13 | from podcastdownloader.episode import Episode
14 | from podcastdownloader.exceptions import FeedException
15 | 
16 | logger = logging.getLogger(__name__)
17 | 
18 | 
19 | class Podcast:
20 |     def __init__(self, url: str):
21 |         self.url = url
22 |         self.feed: Optional[feedparser.FeedParserDict] = None
23 |         self.name: Optional[str] = None
24 |         self.location: Optional[Path] = None
25 |         self.episodes: Optional[list[Episode]] = []
26 | 
27 |     async def download_feed(self, session: aiohttp.ClientSession):
28 |         try:
29 |             async with session.get(self.url) as response:
30 |                 feed_data = await response.content.read()
31 |                 if response.status != 200:
32 |                     raise FeedException(f'Failed to download feed from {self.url}: Response code {response.status}')
33 |         except aiohttp.client_exceptions.ClientError as e:
34 |             raise FeedException(f'Failed to download feed from {self.url}: {e}')
35 |         feed = feedparser.parse(feed_data)
36 |         if feed['bozo']:
37 |             raise FeedException(f'Feed from {self.url} was malformed')
38 |         self.feed = feed
39 |         self.name = feed['feed']['title']
40 |         self.episodes = [Episode.parse_dict(entry, self.name) for entry in self.feed['entries']]
41 | 


--------------------------------------------------------------------------------
/podcastdownloader/utility_functions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding=utf-8
 3 | 
 4 | import logging
 5 | import re
 6 | import xml.etree.ElementTree as ElementTree
 7 | from pathlib import Path
 8 | from typing import Optional
 9 | 
10 | from podcastdownloader.exceptions import FeedException
11 | 
12 | logger = logging.getLogger(__name__)
13 | 
14 | 
15 | def _check_required_path(file_path: str) -> Path:
16 |     result = Path(file_path).resolve().expanduser()
17 |     return result
18 | 
19 | 
20 | def load_feeds_from_text_file(feed_files: tuple[str]) -> list[str]:
21 |     result = []
22 |     feed_files = [_check_required_path(file) for file in feed_files]
23 |     for feed_file in feed_files:
24 |         with open(Path(feed_file), 'r') as feed:
25 |             for line in feed.readlines():
26 |                 if parsed_line := _clean_text_line(line):
27 |                     result.append(parsed_line)
28 |                     logger.debug(f'Feed {parsed_line} added')
29 |     return result
30 | 
31 | 
32 | def _clean_text_line(in_string: str) -> Optional[str]:
33 |     non_feed_pattern = re.compile(r'^\s*(#.*)?$')
34 |     if re.match(non_feed_pattern, in_string):
35 |         return None
36 |     feed_pattern = re.compile(r'^\s*(.*?)(\s+#.*)?$')
37 |     feed_match = re.match(feed_pattern, in_string)
38 |     if feed_match:
39 |         return feed_match.group(1)
40 |     else:
41 |         raise FeedException(f'Could not extract feed from {in_string.strip()}')
42 | 
43 | 
44 | def load_feeds_from_opml(opml_files: tuple[str]) -> list[str]:
45 |     result = []
46 |     opml_files = [_check_required_path(file) for file in opml_files]
47 |     for opml_loc in opml_files:
48 |         opml_tree = ElementTree.parse(Path(opml_loc))
49 |         for opml_feed in opml_tree.getroot().iter('outline'):
50 |             result.append(opml_feed.attrib['xmlUrl'])
51 |             logger.debug(f'Feed {opml_feed.attrib["xmlUrl"]} added')
52 |     return result
53 | 


--------------------------------------------------------------------------------
/podcastdownloader/tag_engine.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding=utf-8
 3 | 
 4 | import logging
 5 | 
 6 | import mutagen
 7 | import mutagen.id3
 8 | import mutagen.mp3
 9 | import mutagen.mp4
10 | from mutagen.id3 import PCST, TALB, TDES, TIT2
11 | 
12 | from podcastdownloader.episode import Episode
13 | from podcastdownloader.exceptions import TagEngineError
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class TagEngine:
19 |     def __init__(self):
20 |         pass
21 | 
22 |     @staticmethod
23 |     def tag_episode(episode: Episode):
24 |         tag_file = mutagen.File(episode.file_path)
25 |         if tag_file is None:
26 |             raise TagEngineError(f'Could not write tags to {episode.title} in {episode.podcast_name}')
27 |         try:
28 |             tag_file.add_tags()
29 |         except mutagen.MutagenError:
30 |             pass
31 |         if isinstance(tag_file.tags, mutagen.id3.ID3):
32 |             TagEngine._write_id3_tags(episode, tag_file)
33 |         elif isinstance(tag_file.tags, mutagen.mp4.MP4Tags):
34 |             TagEngine._write_mp4_tags(episode, tag_file)
35 |         else:
36 |             raise TagEngineError(f'Tagging for type {type(tag_file).__name__} not supported')
37 | 
38 |     @staticmethod
39 |     def _write_id3_tags(episode: Episode, tag_file: mutagen.File):
40 |         tag_file.tags.add(PCST(value=True))  # Podcast Flag
41 |         tag_file.tags.add(TALB(encoding=3, text=episode.podcast_name))
42 |         tag_file.tags.add(TDES(encoding=3, text=episode.feed.get('summary', '')))
43 |         tag_file.tags.add(TIT2(encoding=3, text=episode.title))
44 |         tag_file.save()
45 | 
46 |     @staticmethod
47 |     def _write_mp4_tags(episode: Episode, tag_file: mutagen.File):
48 |         tag_file.tags['\xa9nam'] = episode.title  # Episode title
49 |         tag_file.tags['\xa9alb'] = episode.podcast_name  # Podcast name
50 |         tag_file.tags['pcst'] = True  # Podcast bit
51 |         tag_file.tags['desc'] = episode.feed.get('summary', '')
52 |         tag_file.save()
53 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pytype static type analyzer
135 | .pytype/
136 | 
137 | # Cython debug symbols
138 | cython_debug/
139 | 


--------------------------------------------------------------------------------
/podcastdownloader/tests/test_episode.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding=utf-8
 3 | import asyncio
 4 | 
 5 | import aiohttp
 6 | import pytest
 7 | 
 8 | from podcastdownloader.episode import Episode
 9 | from podcastdownloader.exceptions import EpisodeException
10 | 
11 | 
12 | @pytest.fixture(scope='session')
13 | def client_session() -> aiohttp.ClientSession:
14 |     out = aiohttp.ClientSession()
15 |     return out
16 | 
17 | 
18 | @pytest.mark.parametrize(('test_link_dict', 'expected'), (
19 |     ([{
20 |         'rel': 'alternate',
21 |         'type': 'text/html',
22 |         'href': 'http://evenmorenewspodcast.com/usa-still-leaving-afghanistan-still-shooting'
23 |         '-unarmed-people-w-christopher-rivas-ep-142',
24 |     },
25 |         {
26 |         'length': '54468321',
27 |         'type': 'audio/mpeg',
28 |         'href': 'https://dts.podtrac.com/redirect.mp3/chtbl.com/track/242FB3/'
29 |         'traffic.libsyn.com/secure/evenmorenews/EMN_Ep142.mp3?dest-id=695480',
30 |         'rel': 'enclosure',
31 |     }],
32 |         'https://dts.podtrac.com/redirect.mp3/chtbl.com/track/242FB3/'
33 |         'traffic.libsyn.com/secure/evenmorenews/EMN_Ep142.mp3?dest-id=695480'),
34 | ))
35 | def test_episode_find_url(test_link_dict: list[dict], expected: str):
36 |     test_dict = {'links': test_link_dict, }
37 |     result = Episode._find_url(test_dict)
38 |     assert result == expected
39 | 
40 | 
41 | @pytest.mark.parametrize('test_link_dict', (
42 |     [{
43 |         'rel': 'alternate',
44 |         'type': 'text/html',
45 |         'href': 'http://evenmorenewspodcast.com/usa-still-leaving-afghanistan-still-shooting'
46 |         '-unarmed-people-w-christopher-rivas-ep-142',
47 |     }],
48 | ))
49 | def test_episode_find_url_bad(test_link_dict: list[dict]):
50 |     test_dict = {'links': test_link_dict, }
51 |     with pytest.raises(EpisodeException):
52 |         Episode._find_url(test_dict)
53 | 
54 | 
55 | @pytest.mark.parametrize(('test_url', 'expected'), (
56 |     ('https://www.example.com/test.png', '.png'),
57 |     ('https://www.example.com/test.mp3', '.mp3'),
58 |     ('https://www.example.com/random/test.flac', '.flac'),
59 |     ('https://www.example.com/test.mp3?test=value', '.mp3'),
60 |     ('https://www.example.com/test.mp3?test=value#test', '.mp3'),
61 |     ('https://www.example.com/test.aac', '.aac'),
62 | ))
63 | def test_determine_file_extension_from_url(test_url: str, expected: str, client_session):
64 |     result = asyncio.run(Episode._get_file_extension(test_url, client_session))
65 |     assert result == expected
66 | 
67 | 
68 | @pytest.mark.parametrize(('test_name', 'expected'), (
69 |     ('test', 'test'),
70 |     ('te/st', 'test'),
71 |     ('test/test', 'testtest'),
72 |     ('test\0', 'test'),
73 | ))
74 | def test_clean_name(test_name: str, expected: str):
75 |     result = Episode._clean_name(test_name)
76 |     assert result == expected
77 | 


--------------------------------------------------------------------------------
/podcastdownloader/episode.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # coding=utf-8
  3 | 
  4 | import logging
  5 | import mimetypes
  6 | import re
  7 | import urllib.parse
  8 | from pathlib import Path
  9 | from typing import Optional
 10 | 
 11 | import aiohttp
 12 | import aiohttp.client_exceptions
 13 | import mutagen
 14 | from multidict import CIMultiDictProxy
 15 | 
 16 | from podcastdownloader.exceptions import EpisodeException
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | class Episode:
 22 |     def __init__(self, title_name: str, episode_url: str, podcast_name: str, feed: dict):
 23 |         self.title = self._clean_name(title_name)
 24 |         self.url = episode_url
 25 |         self.podcast_name = podcast_name
 26 |         self.file_path: Optional[Path] = None
 27 |         self.feed = feed
 28 | 
 29 |     @staticmethod
 30 |     def parse_dict(feed_dict: dict, podcast_name: str) -> 'Episode':
 31 |         episode_url = Episode._find_url(feed_dict)
 32 |         result = Episode(
 33 |             feed_dict['title'],
 34 |             episode_url,
 35 |             podcast_name,
 36 |             feed_dict,
 37 |         )
 38 |         return result
 39 | 
 40 |     @staticmethod
 41 |     def _clean_name(name: str) -> str:
 42 |         name = re.sub(r'([\0/])', '', name)
 43 |         return name
 44 | 
 45 |     @staticmethod
 46 |     def _find_url(feed_dict: dict) -> str:
 47 |         mime_type_regex = re.compile(r'^audio.*')
 48 |         try:
 49 |             valid_urls = list(filter(lambda u: re.match(mime_type_regex, u['type']), feed_dict['links']))
 50 |         except KeyError:
 51 |             valid_urls = None
 52 |         if valid_urls:
 53 |             return valid_urls[0].get('href')
 54 |         else:
 55 |             raise EpisodeException(f'Could not find a valid link for episode {feed_dict["title"]}')
 56 | 
 57 |     @staticmethod
 58 |     async def _get_file_extension(url: str, session: aiohttp.ClientSession) -> str:
 59 |         url = urllib.parse.urlsplit(url).path
 60 |         mime_type = mimetypes.guess_type(url)
 61 |         mime_type = mime_type[0]
 62 |         if not mime_type:
 63 |             async with session.get(url) as response:
 64 |                 headers = response.headers
 65 |             mime_type = headers.get('Content-Type')
 66 |         if not mime_type:
 67 |             raise EpisodeException(f'Could not determine MIME type for URL {url}')
 68 |         result = mimetypes.guess_extension(mime_type)
 69 |         if result:
 70 |             return result
 71 |         else:
 72 |             raise EpisodeException(f'Could not determine file extension for download {url}')
 73 | 
 74 |     async def calculate_path(self, destination: Path, session: aiohttp.ClientSession):
 75 |         try:
 76 |             file_extension = await self._get_file_extension(self.url, session)
 77 |             file_name = self.title + file_extension
 78 |             self.file_path = Path(destination, self.podcast_name, file_name)
 79 |         except (aiohttp.client_exceptions.ClientError, EpisodeException) as e:
 80 |             raise EpisodeException(f'Failed to determine path for "{self.title}" from "{self.podcast_name}": {e}')
 81 | 
 82 |     async def download(self, session: aiohttp.ClientSession):
 83 |         if not self.file_path:
 84 |             raise EpisodeException('Episode has no calculated path')
 85 |         try:
 86 |             async with session.get(self.url) as response:
 87 |                 if not self.file_path.exists():
 88 |                     data = await response.content.read()
 89 |                     self.file_path.parent.mkdir(exist_ok=True, parents=True)
 90 |                     with open(self.file_path, 'wb') as file:
 91 |                         file.write(data)
 92 |                     logger.info(f'Downloaded {self.title} in podcast {self.podcast_name}')
 93 |                     try:
 94 |                         from podcastdownloader.tag_engine import TagEngine
 95 |                         TagEngine.tag_episode(self)
 96 |                     except mutagen.MutagenError as e:
 97 |                         logger.error(f'Failed to tag episode {self.title}: {e}')
 98 |                 else:
 99 |                     logger.debug(f'File already exists at {self.file_path}')
100 |         except aiohttp.client_exceptions.ClientError as e:
101 |             raise EpisodeException(f'Failed to download "{self.title}" from "{self.podcast_name}": {e}')
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # podcast-downloader
 2 | 
 3 | This is a simple tool for downloading all the available episodes in an RSS feed to disk, where they can be listened to offline.
 4 | 
 5 | Firstly, Python 3 must be installed, then the requirements must be installed. These are documented in `requirements.txt` and can be installed via the command `python3 -m pip install -r requirements.txt`.
 6 | 
 7 | ## Arguments
 8 | 
 9 | Following are the arguments that can be supplied to the program:
10 | 
11 | - `destination` is the directory that the folder structure will be created in and the podcasts downloaded to
12 | - `-f, --feed` is the URL for the RSS feed of the podcast
13 | - `-o, --opml` is the location of an OPML file with podcast data
14 | - `--file` is the location of a simple text file with an RSS feed URL on each line
15 | - `-l, --limit` is the maximum number of episodes to try and download from the feed; if left blank, it is all episodes, but a small number is fastest for updating a feed
16 | - `-m, --max-downloads` will limit the number of episodes to be downloaded to the specified integer
17 | - `-w, --write-list` is the option to write an ordered list of the episodes in the podcast in several different formats, as specified:
18 |   - `none`
19 |   - `text`
20 |   - `audacious`
21 |   - `m3u`
22 | - `-t, --threads` is the number of threads to run concurrently; defaults to 10
23 | - `--max-attempts` will specify the number of reattempts for a failed or refused connection; see below for more details
24 | 
25 | The following arguments alter the functioning of the program in a major way e.g. they do not download:
26 | 
27 | - `--skip-download` will do everything but download the files; useful for updating episode playlists without a lengthy download
28 | - `--verify` will scan existing files for ones with a file-size outside a 2% and list them in `results.txt`
29 | - `--update-tags` will download episode information and write tags to all episodes already downloaded
30 | 
31 | The following arguments alter the verbosity and logging behaviour:
32 | 
33 | - `-s, --suppress-progress` will disable all progress bars
34 | - `-v, --verbose` will increase the verbosity of the information output to the console
35 | - `--log` will log all messages to a debug level (the equivalent of `-v`) to the specified file, appending if it already exists
36 | 
37 | The `--feed`, `--file`, and `--opml` flags can all be specified multiple times to aggregate feeds from multiple locations.
38 | 
39 | Of these, only the destination is required, though one or more feeds or one or more OPML files must be provided or the program will just complete instantly.
40 | 
41 | ### Maximum Reattempts
42 | 
43 | In some cases, particularly when downloading a single or a few specific podcasts with a lot of episodes at once, the remote server will receive a number of simultaneous or consecutive requests. As this may appear to be atypical behaviour, this server may refuse or close incoming connections as a rate-limiting measure. This is normal in scraping servers that do not want to be scraped.
44 | 
45 | There are several countermeasures in the downloader for this behaviour, such as randomising the download list to avoid repeated calls to the same server in a short amount of time, but this may not work if there is only one or a few podcast feeds to download. As such, the method of last resort is a sleep function to wait until the server allows the download to continue. This is done with increasing increments of 30 seconds, with the maximum number or reattempts specified by the `--max-attempts` argument. For example, if left at the default of 10, the program will sleep for 30 seconds if the connection is refused. Then, if it was refused again, it will sleep for 60 before reattempting the download. It will do this until the 10th attempt, where it will sleep for 300 seconds, or five minutes. If the connection is refused after this, then an error will occur and the download thread will move on to the next podcast episode.
46 | 
47 | The maximum number of reattempts may need to be changed in several cases. If you wish to download the episode regardless of anything else, then you may want to increase the argument. This may result in longer wait times for the downloads to complete. However, a low argument will make the program skip downloads if they time out repeatedly, missing content but completing faster.
48 | 
49 | ### Warnings
50 | 
51 | The `--write-list` option should not be used with the `--limit` option. The limit option will be applied to the episode list in whatever format chosen, and this will overwrite any past episode list files. For example, if a `--limit` of 5 is chosen with `-w audacious`, then the exported Audacious playlist will only be 5 items long. Thus the `-w` option should only be used when there is not a limit.
52 | 
53 | ## Tags
54 | 
55 | The downloader has basic tag writing support. It will write ID3 tags to MP3 files and iTunes-compatible tags to m4a and MP4 files. The information written is as follows:
56 | 
57 | - The episode title
58 | - The podcast title
59 | - The publishing date and time of the episode
60 | - The description accompanying the episode
61 | - The episode number (if available)
62 | 
63 | ## Example Command
64 | 
65 | Following is an example command to download a single feed to a podcasts folder.
66 | 
67 | `python3 -m podcastdownloader media/podcasts --f 'http://linustechtips.libsyn.com/wanshow' -o podcasts.opml`
68 | 
69 | ## Podcast Feed Files
70 | 
71 | A feed file, for use with the `--file` option, is a simple text file with one URL that leads to the RSS feed per line. The podcastdownloader will ignore all lines beginning with a hash (#), as well as empty lines to allow comments and a rudimentary structure if desired. Additionally, comments can be appended to the end of a line with a feed URL. As long as there is a space between the hash and the end of the URL, it will be removed when the file is parsed.
72 | 


--------------------------------------------------------------------------------
/podcastdownloader/__main__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import asyncio
  4 | import itertools
  5 | import logging
  6 | import random
  7 | import sys
  8 | from asyncio.queues import Queue
  9 | from pathlib import Path
 10 | from typing import Optional
 11 | 
 12 | import aiohttp
 13 | import click
 14 | 
 15 | import podcastdownloader.utility_functions as util
 16 | from podcastdownloader.exceptions import EpisodeException, PodcastException
 17 | from podcastdownloader.podcast import Podcast
 18 | from podcastdownloader.writer import write_episode_playlist
 19 | 
 20 | logger = logging.getLogger()
 21 | 
 22 | 
 23 | def _setup_logging(verbosity: int):
 24 |     logger.setLevel(1)
 25 |     stream = logging.StreamHandler(sys.stdout)
 26 |     formatter = logging.Formatter('[%(asctime)s - %(name)s - %(levelname)s] - %(message)s')
 27 |     stream.setFormatter(formatter)
 28 |     logger.addHandler(stream)
 29 |     if verbosity >= 1:
 30 |         stream.setLevel(logging.DEBUG)
 31 |     else:
 32 |         stream.setLevel(logging.INFO)
 33 |     logging.getLogger('asyncio').setLevel(logging.CRITICAL)
 34 |     logging.getLogger('chardet').setLevel(logging.CRITICAL)
 35 | 
 36 | 
 37 | _common_options = [
 38 |     click.argument('destination', type=str),
 39 |     click.option('-v', '--verbose', type=int, default=0, count=True),
 40 |     click.option('-f', '--feed', type=str, multiple=True, default=[]),
 41 |     click.option('-F', '--file', type=str, multiple=True, default=[]),
 42 |     click.option('--opml', type=str, multiple=True, default=[]),
 43 | ]
 44 | 
 45 | 
 46 | async def fill_individual_feed(in_queue: Queue, out_queue: Queue, destination: Path, session: aiohttp.ClientSession):
 47 |     while not in_queue.empty():
 48 |         podcast = await in_queue.get()
 49 |         if podcast is None:
 50 |             break
 51 |         logger.debug(f'Beginning retrieval for {podcast.url}')
 52 |         try:
 53 |             await podcast.download_feed(session)
 54 |             for episode in podcast.episodes:
 55 |                 try:
 56 |                     await episode.calculate_path(destination, session)
 57 |                 except TypeError:
 58 |                     logger.error(f'Failed to parse {episode.title} in {episode.podcast_name}')
 59 |         except PodcastException as e:
 60 |             logger.error(e)
 61 |         except Exception:
 62 |             logger.critical(f'Error with {podcast.url}')
 63 |             raise
 64 |         else:
 65 |             await out_queue.put(podcast)
 66 |             logger.info(f'Retrieved RSS for {podcast.name}')
 67 |         in_queue.task_done()
 68 | 
 69 | 
 70 | async def download_individual_episode(in_queue: Queue, session: aiohttp.ClientSession):
 71 |     while not in_queue.empty():
 72 |         episode = await in_queue.get()
 73 |         if episode is None:
 74 |             break
 75 |         logger.debug(f'Attempting download of episode {episode.title} in {episode.podcast_name}')
 76 |         try:
 77 |             await episode.download(session)
 78 |         except EpisodeException as e:
 79 |             logger.error(e)
 80 |         in_queue.task_done()
 81 | 
 82 | 
 83 | def add_common_options(func):
 84 |     for option in _common_options:
 85 |         func = option(func)
 86 |     return func
 87 | 
 88 | 
 89 | @click.group()
 90 | def cli():
 91 |     pass
 92 | 
 93 | 
 94 | @cli.command('download')
 95 | @add_common_options
 96 | @click.option('-l', '--limit', type=int, default=None)
 97 | @click.option('-t', '--threads', type=int, default=10)
 98 | @click.option('-w', '--write-playlist', type=click.Choice(('m3u',)), default=(), multiple=True)
 99 | def cli_download(
100 |         destination: str,
101 |         feed: tuple[str],
102 |         file: tuple[str],
103 |         limit: Optional[int],
104 |         opml: tuple[str],
105 |         threads: int,
106 |         verbose: int,
107 |         write_playlist: tuple[str],
108 | ):
109 |     _setup_logging(verbose)
110 |     destination = Path(destination).expanduser().resolve()
111 |     if not destination.exists():
112 |         logger.warning(f'Specified destination {destination} does not exist, creating it now')
113 |         destination.mkdir(parents=True)
114 | 
115 |     all_feeds = set(itertools.chain(feed, util.load_feeds_from_text_file(file), util.load_feeds_from_opml(opml)))
116 |     logger.info(f'{len(all_feeds)} feeds found')
117 |     if all_feeds:
118 |         asyncio.run(download_episodes(all_feeds, destination, threads, write_playlist, limit))
119 |     else:
120 |         logger.error('No feeds have been provided')
121 |     logger.info('Program Complete')
122 | 
123 | 
124 | async def download_episodes(
125 |     all_feeds: set[str],
126 |     destination: Path,
127 |     threads: int,
128 |     playlist_formats: tuple[str],
129 |     limit: Optional[int],
130 | ):
131 |     unfilled_podcasts = Queue()
132 |     filled_podcasts = Queue()
133 |     episodes = Queue()
134 |     [await unfilled_podcasts.put(Podcast(url)) for url in all_feeds]
135 |     async with aiohttp.ClientSession() as session:
136 |         feed_fillers = [asyncio.create_task(
137 |             fill_individual_feed(unfilled_podcasts, filled_podcasts, destination, session)
138 |         ) for _ in range(1, threads)]
139 |         await asyncio.gather(*feed_fillers)
140 |         await unfilled_podcasts.join()
141 |         logger.info('All feeds filled')
142 | 
143 |         podcasts = []
144 |         while not filled_podcasts.empty():
145 |             podcast = filled_podcasts.get_nowait()
146 |             write_episode_playlist(podcast, playlist_formats)
147 |             podcasts.append(podcast)
148 | 
149 |         if limit:
150 |             logger.info(f'Limiting episodes per podcast to {limit} entries')
151 |             for podcast in podcasts:
152 |                 podcast.episodes = podcast.episodes[:limit]
153 | 
154 |         unfilled_episodes = list(filter(
155 |             lambda e: not e.file_path or not e.file_path.exists(),
156 |             [ep for pod in podcasts for ep in pod.episodes],
157 |         ))
158 |         logger.info(f'{len(unfilled_episodes)} episodes to download')
159 | 
160 |         random.shuffle(unfilled_episodes)
161 | 
162 |         [await episodes.put(ep) for ep in unfilled_episodes]
163 | 
164 |         episode_downloaders = [asyncio.create_task(
165 |             download_individual_episode(episodes, session)
166 |         ) for _ in range(1, threads)]
167 | 
168 |         await asyncio.gather(*episode_downloaders)
169 |         await episodes.join()
170 | 
171 | 
172 | if __name__ == '__main__':
173 |     cli()
174 | 


--------------------------------------------------------------------------------