├── .gitignore
├── requirements.in
├── record_player.jpg
├── add_email_password
    ├── overcast_account_1.png
    ├── overcast_account_2.png
    ├── overcast_account_4.png
    ├── overcast_account_3a.png
    ├── overcast_account_3b.png
    └── README.md
├── download_podcasts.sh
├── LICENSE
├── requirements.txt
├── download_all_episodes_from_rss.py
├── download.py
├── README.md
└── download_overcast_podcasts.py


/.gitignore:
--------------------------------------------------------------------------------
1 | audiofiles
2 | *.pyc
3 | 


--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
1 | httpx
2 | lxml
3 | pip-tools
4 | smartypants
5 | tenacity
6 | urllib3
7 | 


--------------------------------------------------------------------------------
/record_player.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/overcast-downloader/main/record_player.jpg


--------------------------------------------------------------------------------
/add_email_password/overcast_account_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/overcast-downloader/main/add_email_password/overcast_account_1.png


--------------------------------------------------------------------------------
/add_email_password/overcast_account_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/overcast-downloader/main/add_email_password/overcast_account_2.png


--------------------------------------------------------------------------------
/add_email_password/overcast_account_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/overcast-downloader/main/add_email_password/overcast_account_4.png


--------------------------------------------------------------------------------
/add_email_password/overcast_account_3a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/overcast-downloader/main/add_email_password/overcast_account_3a.png


--------------------------------------------------------------------------------
/add_email_password/overcast_account_3b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/overcast-downloader/main/add_email_password/overcast_account_3b.png


--------------------------------------------------------------------------------
/download_podcasts.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | set -o errexit
4 | set -o nounset
5 | 
6 | python3 download_overcast_podcasts.py ~/Desktop/overcast.opml --download_dir "/Volumes/Media (Sapphire)/backups/overcast/audiofiles"
7 | # mv ~/Desktop/overcast.opml "/Volumes/Media (Sapphire)/backups/overcast/overcast.$(date +'%Y-%m-%d').xml"
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019 Alex Chan
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a
 4 | copy of this software and associated documentation files (the "Software"),
 5 | to deal in the Software without restriction, including without limitation
 6 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
 7 | and/or sell copies of the Software, and to permit persons to whom the Software
 8 | is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
17 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19 | OTHER DEALINGS IN THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with Python 3.12
 3 | # by the following command:
 4 | #
 5 | #    pip-compile
 6 | #
 7 | anyio==4.1.0
 8 |     # via httpx
 9 | build==1.0.3
10 |     # via pip-tools
11 | certifi==2023.11.17
12 |     # via
13 |     #   httpcore
14 |     #   httpx
15 | click==8.1.7
16 |     # via pip-tools
17 | h11==0.14.0
18 |     # via httpcore
19 | httpcore==1.0.2
20 |     # via httpx
21 | httpx==0.25.2
22 |     # via -r requirements.in
23 | idna==3.5
24 |     # via
25 |     #   anyio
26 |     #   httpx
27 | lxml==4.9.3
28 |     # via -r requirements.in
29 | packaging==23.2
30 |     # via build
31 | pip-tools==7.3.0
32 |     # via -r requirements.in
33 | pyproject-hooks==1.0.0
34 |     # via build
35 | smartypants==2.0.1
36 |     # via -r requirements.in
37 | sniffio==1.3.0
38 |     # via
39 |     #   anyio
40 |     #   httpx
41 | tenacity==8.2.3
42 |     # via -r requirements.in
43 | urllib3==2.1.0
44 |     # via -r requirements.in
45 | wheel==0.41.3
46 |     # via pip-tools
47 | 
48 | # The following packages are considered to be unsafe in a requirements file:
49 | # pip
50 | # setuptools
51 | 


--------------------------------------------------------------------------------
/add_email_password/README.md:
--------------------------------------------------------------------------------
 1 | # Create a username/password for your account
 2 | 
 3 | 1.  In the iOS app, tap the icon in the top left-hand corner to open the settings screen.
 4 | 
 5 |     ![Screenshot of Overcast, with an icon in the top-left corner highlighted with a red arrow.](overcast_account_1.png)
 6 | 
 7 | 2.  Tap "Account" to enter the account settings.
 8 | 
 9 |     ![An iOS settings screen, with a list of options. "Settings" is highlighted in orange.](overcast_account_2.png)
10 | 
11 | 3.  If you already have an email/password set up, you'll see the email at the top of the account settings:
12 | 
13 |     ![Account settings. The email entry has "example@example.org" filled in.](overcast_account_3a.png)
14 | 
15 |     If you remember your password, you're done!
16 |     If not, tap "Change Password" to set a new password.
17 | 
18 |     If you don't have an email and password set up, tap "Add Email and Password":
19 | 
20 |     ![An iOS settings screen, with a list of options. "Add Email and Password" is highlighted in orange.](overcast_account_3b.png)
21 | 
22 | 4.  Enter your email address and set a password for your account:
23 | 
24 |     ![An "Add Email To Account" settings screen, with email and password fields](overcast_account_4.png).
25 | 


--------------------------------------------------------------------------------
/download_all_episodes_from_rss.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | The main downloader script will also get a copy of the RSS feed.
 4 | 
 5 | If there are episodes in the RSS feeds that you haven't listened to in Overcast,
 6 | but you still want in your podcast archive (for example, if you listened to them
 7 | in a different podcast app), you can use this script to download them all.
 8 | """
 9 | 
10 | import glob
11 | import html
12 | import os
13 | import sys
14 | 
15 | from lxml import etree
16 | import smartypants
17 | 
18 | from download_overcast_podcasts import download_url, get_filename, logger
19 | 
20 | 
21 | def download_files_for_xml(xml_path):
22 |     logger.info("Inspecting %r", xml_path)
23 |     tree = etree.parse(xml_path)
24 | 
25 |     download_dir = os.path.dirname(xml_path)
26 | 
27 |     for item in tree.xpath(".//item"):
28 |         title = item.find("title").text
29 |         logger.debug("Checking episode %r", title)
30 | 
31 |         audio_url = item.find("enclosure").attrib["url"]
32 | 
33 |         filename = get_filename(
34 |             download_url=audio_url,
35 |             # We have to replicate some of the processing done by Overcast's
36 |             # title cleanups.
37 |             title=html.unescape(smartypants.smartypants(title)),
38 |         )
39 |         download_path = os.path.join(download_dir, filename)
40 | 
41 |         if os.path.exists(download_path):
42 |             logger.debug("This episode is already downloaded, skipping")
43 |             continue
44 | 
45 |         logger.info("Downloading episode %r", title)
46 | 
47 |         download_url(url=audio_url, path=download_path, description="audio file")
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     try:
52 |         audiofile_dir = sys.argv[1]
53 |     except IndexError:
54 |         sys.exit(f"{__file__} <AUDIOFILE_DIR>")
55 | 
56 |     for xml_path in glob.iglob(os.path.join(audiofile_dir, "feed.*.xml")):
57 |         download_files_for_xml(xml_path)
58 | 


--------------------------------------------------------------------------------
/download.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import uuid
 4 | 
 5 | import httpx
 6 | from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
 7 | import urllib3.exceptions
 8 | 
 9 | 
10 | @retry(
11 |     retry=(
12 |         retry_if_exception_type(httpx.HTTPError)
13 |         | retry_if_exception_type(urllib3.exceptions.HTTPError)
14 |     ),
15 |     stop=stop_after_attempt(10),
16 |     wait=wait_fixed(60),
17 | )
18 | def download_file(*, url, path, client=None):
19 |     """
20 |     Atomically download a file from ``url`` to ``path``.
21 | 
22 |     If ``path`` already exists, the file will not be downloaded again.
23 |     This means that different URLs should be saved to different paths.
24 | 
25 |     This function is meant to be used in cases where the contents of ``url``
26 |     is immutable -- calling it more than once should always return the same bytes.
27 | 
28 |     Returns the download path.
29 | 
30 |     """
31 |     # If the URL has already been downloaded, we can skip downloading it again.
32 |     if os.path.exists(path):
33 |         return path
34 | 
35 |     if os.path.dirname(path):
36 |         os.makedirs(os.path.dirname(path), exist_ok=True)
37 | 
38 |     if client is None:
39 |         client = httpx.Client()
40 | 
41 |     try:
42 |         with client.stream("GET", url) as resp:
43 |             resp.raise_for_status()
44 | 
45 |             # Download to a temporary path first.  That way, we only get
46 |             # something at the destination path if the download is successful.
47 |             #
48 |             # We download to a path in the same directory so we can do an
49 |             # atomic ``os.rename()`` later -- atomic renames don't work
50 |             # across filesystem boundaries.
51 |             tmp_path = f"{path}.{uuid.uuid4()}.tmp"
52 | 
53 |             with open(tmp_path, "wb") as out_file:
54 |                 for chunk in resp.iter_raw():
55 |                     out_file.write(chunk)
56 | 
57 |     # If something goes wrong, it will probably be retried by tenacity.
58 |     # Log the exception in case a programming bug has been introduced in
59 |     # the ``try`` block or there's a persistent error.
60 |     except Exception as exc:
61 |         print(exc, file=sys.stderr)
62 |         raise
63 | 
64 |     os.rename(tmp_path, path)
65 |     return path
66 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # overcast-downloader
 2 | 
 3 | This is a script that lets you download the audio files for every podcast you've listened to in Overcast.
 4 | 
 5 | I listen to a lot of podcasts, and I use [Overcast].
 6 | Sometimes I want to listen to a podcast I heard a long time ago, but links rot, websites break, and episodes go offline.
 7 | This script allows me to download the audio file of every episode I've listened to, so I have a local archive of podcast episodes.
 8 | 
 9 | [Overcast]: https://overcast.fm/
10 | 
11 | ![An old man sitting on a chair outdoors, listening to a record player.](record_player.jpg)
12 | 
13 | *Podcasts have a richer sound when you listen to them on vinyl. Image credit: <a href="https://wellcomecollection.org/works/n7cthawx">Wellcome Collection</a>. CC BY.*
14 | 
15 | 
16 | 
17 | ## Background
18 | 
19 | This is something I've wanted for a while (at least two podcasts I loved have completely disappeared from the web), but I was never sure how to do it.
20 | In a segment in [ATP episode 353][atp353], Marco mentioned an export with a list of every episode you'd ever listened to:
21 | 
22 | > I can tell you what Overcast saves in the database table that saves your episode progress, which is by far my biggest database table.
23 | > It saves, for each episode you've interacted with, the current timestamp, and whether it's been completed. […]
24 | >
25 | > If you go to the account section of the website, you can export OPML -- a custom, extended format that I implemented -- so you can actually export all your data. […]
26 | > I can tell you a list of episodes you've completed, or played in some way in Overcast.
27 | 
28 | I went digging, and it was exactly what I wanted -- and shortly after, I had a script that downloads every MP3 it refers to.
29 | 
30 | [atp353]: https://overcast.fm/+R7DWLpsnY/1:40:21
31 | 
32 | 
33 | 
34 | ## How to use this script
35 | 
36 | You need:
37 | 
38 | *   **An Overcast account with an email and password.**
39 |     You can create this in the Overcast iOS app.
40 |     If you haven't done this before, or you've forgotten your email/password, read [my instructions](add_email_password) for doing so.
41 | 
42 | *   **A working Python 3 installation.**
43 |     This script only works with Python 3.6 or later.
44 | 
45 | Steps:
46 | 
47 | 1.  **Get your Overcast OPML file.**
48 | 
49 |     Log in to the Overcast website at <https://overcast.fm/login> using your email address and password.
50 | 
51 |     Once you're logged in, navigate to <https://overcast.fm/account>.
52 |     Under "Export Your Data", click "All data".
53 |     This will download an OPML file, which includes a list of every podcast episode you've ever played.
54 | 
55 | 2.  **Download the Python script.**
56 | 
57 |     Download the script [`download_overcast_podcasts.py`](download_overcast_podcasts.py), and save it somewhere on your disk.
58 | 
59 | 3.  **Run the script, passing the path to your OPML file as the first argument.**
60 |     For example, if the OPML file is in `~/Downloads/overcast.opml.xml`, run:
61 | 
62 |     ```console
63 |     $ python download_overcast_podcasts.py ~/Downloads/overcast.opml.xml
64 |     ```
65 | 
66 |     This will start downloading the audio files to a folder called `audiofiles`.
67 |     If you'd like to save them somewhere different, pass the `--download_dir` flag.
68 |     For example, if you wanted to save the files to `~/Documents/podcasts`, run:
69 | 
70 |     ```console
71 |     $ python download_overcast_podcasts.py ~/Downloads/overcast.opml.xml --out_dir ~/Documents/podcasts
72 |     ```
73 | 
74 | The initial download will be very slow, depending on how many podcasts you've listened to, and it uses a lot of disk space.
75 | (At time of writing, I have ~1200 episodes in my export, which take up 61 GB.)
76 | On subsequent runs, the script should only download files that it hasn't saved before, so it should be a lot faster.
77 | 
78 | 
79 | 
80 | ## License
81 | 
82 | MIT.
83 | 


--------------------------------------------------------------------------------
/download_overcast_podcasts.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8
  3 | """
  4 | Download podcast files based on your Overcast export.
  5 | 
  6 | If you have an Overcast account, you can download an OPML file with
  7 | a list of every episode you've played from https://overcast.fm/account.
  8 | 
  9 | This tool can read that OPML file, and save a local copy of the audio files
 10 | for every episode you've listened to.
 11 | """
 12 | 
 13 | import argparse
 14 | import datetime
 15 | import errno
 16 | import filecmp
 17 | import functools
 18 | import glob
 19 | import json
 20 | import os
 21 | import sqlite3
 22 | import sys
 23 | from urllib.parse import urlparse
 24 | import xml.etree.ElementTree as ET
 25 | 
 26 | from download import download_file
 27 | 
 28 | 
 29 | def parse_args(argv):
 30 |     """Parse command-line arguments."""
 31 |     parser = argparse.ArgumentParser(description=__doc__)
 32 | 
 33 |     parser.add_argument(
 34 |         "OPML_PATH",
 35 |         help="Path to an OPML file downloaded from https://overcast.fm/account",
 36 |     )
 37 | 
 38 |     parser.add_argument(
 39 |         "--download_dir",
 40 |         default="audiofiles",
 41 |         help="directory to save podcast information to to",
 42 |     )
 43 | 
 44 |     args = parser.parse_args(argv)
 45 | 
 46 |     return {
 47 |         "opml_path": os.path.abspath(args.OPML_PATH),
 48 |         "download_dir": os.path.abspath(args.download_dir),
 49 |     }
 50 | 
 51 | 
 52 | def get_episodes(xml_string):
 53 |     """
 54 |     Given the XML string of the Overcast OPML, generate a sequence of entries
 55 |     that represent a single, played podcast episode.
 56 |     """
 57 |     root = ET.fromstring(xml_string)
 58 | 
 59 |     # The Overcast OPML has the following form:
 60 |     #
 61 |     #   <?xml version="1.0" encoding="utf-8"?>
 62 |     #   <opml version="1.0">
 63 |     #       <head><title>Overcast Podcast Subscriptions</title></head>
 64 |     #       <body>
 65 |     #           <outline text="playlists">...</outline>
 66 |     #           <outline text="feeds">...</outline>
 67 |     #       </body>
 68 |     #   </opml>
 69 |     #
 70 |     # Within the <outline text="feeds"> block of XML, there's a list of feeds
 71 |     # with the following structure (some attributes omitted):
 72 |     #
 73 |     #   <outline type="rss"
 74 |     #            title="My Example Podcast"
 75 |     #            xmlUrl="https://example.org/podcast.xml">
 76 |     #       <outline type="podcast-episode"
 77 |     #                overcastId="12345"
 78 |     #                pubDate="2001-01-01T01:01:01-00:00"
 79 |     #                title="The first episode"
 80 |     #                url="https://example.net/podcast/1"
 81 |     #                overcastUrl="https://overcast.fm/+ABCDE"
 82 |     #                enclosureUrl="https://example.net/files/1.mp3"/>
 83 |     #       ...
 84 |     #   </outline>
 85 |     #
 86 |     # We use an XPath expression to find the <outline type="rss"> entries
 87 |     # (so we get the podcast metadata), and then find the individual
 88 |     # "podcast-episode" entries in that feed.
 89 | 
 90 |     for feed in root.findall("./body/outline[@text='feeds']/outline[@type='rss']"):
 91 |         podcast = {
 92 |             "title": feed.get("title"),
 93 |             "text": feed.get("text"),
 94 |             "xml_url": feed.get("xmlUrl"),
 95 |         }
 96 | 
 97 |         for episode_xml in feed.findall("./outline[@type='podcast-episode']"):
 98 |             episode = {
 99 |                 "published_date": episode_xml.get("pubDate"),
100 |                 "title": episode_xml.get("title"),
101 |                 "url": episode_xml.get("url"),
102 |                 "overcast_id": episode_xml.get("overcastId"),
103 |                 "overcast_url": episode_xml.get("overcastUrl"),
104 |                 "enclosure_url": episode_xml.get("enclosureUrl"),
105 |             }
106 | 
107 |             yield {
108 |                 "podcast": podcast,
109 |                 "episode": episode,
110 |             }
111 | 
112 | 
113 | def has_episode_been_downloaded_already(episode, download_dir):
114 |     try:
115 |         conn = sqlite3.connect(os.path.join(download_dir, "overcast.db"))
116 |     except sqlite3.OperationalError as err:
117 |         if err.args[0] == "unable to open database file":
118 |             return False
119 |         else:
120 |             raise
121 | 
122 |     c = conn.cursor()
123 | 
124 |     try:
125 |         c.execute(
126 |             "SELECT * FROM downloaded_episodes WHERE overcast_id=?",
127 |             (episode["episode"]["overcast_id"],),
128 |         )
129 |     except sqlite3.OperationalError as err:
130 |         if err.args[0] == "no such table: downloaded_episodes":
131 |             return False
132 |         else:
133 |             raise
134 | 
135 |     return c.fetchone() is not None
136 | 
137 | 
138 | def mark_episode_as_downloaded(episode, download_dir):
139 |     conn = sqlite3.connect(os.path.join(download_dir, "overcast.db"))
140 |     c = conn.cursor()
141 | 
142 |     try:
143 |         c.execute("CREATE TABLE downloaded_episodes (overcast_id text PRIMARY KEY)")
144 |     except sqlite3.OperationalError as err:
145 |         if err.args[0] == "table downloaded_episodes already exists":
146 |             pass
147 |         else:
148 |             raise
149 | 
150 |     c.execute(
151 |         "INSERT INTO downloaded_episodes VALUES (?)",
152 |         (episode["episode"]["overcast_id"],),
153 |     )
154 |     conn.commit()
155 |     conn.close()
156 | 
157 | 
158 | def _escape(s):
159 |     return s.replace(":", "-").replace("/", "-")
160 | 
161 | 
162 | def get_filename(*, download_url, title):
163 |     url_path = urlparse(download_url).path
164 | 
165 |     extension = os.path.splitext(url_path)[-1]
166 |     base_name = _escape(title)
167 | 
168 |     return base_name + extension
169 | 
170 | 
171 | def download_episode(episode, download_dir):
172 |     """
173 |     Given a blob of episode data from get_episodes, download the MP3 file and
174 |     save the metadata to ``download_dir``.
175 |     """
176 |     if has_episode_been_downloaded_already(episode=episode, download_dir=download_dir):
177 |         return
178 | 
179 |     # If the MP3 URL is https://example.net/mypodcast/podcast1.mp3 and the
180 |     # title is "Episode 1: My Great Podcast", the filename is
181 |     # ``Episode 1- My Great Podcast.mp3``.
182 |     audio_url = episode["episode"]["enclosure_url"]
183 | 
184 |     filename = get_filename(download_url=audio_url, title=episode["episode"]["title"])
185 | 
186 |     # Within the download_dir, put the episodes for each podcast in the
187 |     # same folder.
188 |     podcast_dir = os.path.join(download_dir, _escape(episode["podcast"]["title"]))
189 |     os.makedirs(podcast_dir, exist_ok=True)
190 | 
191 |     # Download the podcast audio file if it hasn't already been downloaded.
192 |     download_path = os.path.join(podcast_dir, filename)
193 |     base_name = _escape(episode["episode"]["title"])
194 |     json_path = os.path.join(podcast_dir, base_name + ".json")
195 | 
196 |     # If the MP3 file already exists, check to see if it's the same episode,
197 |     # or if this podcast isn't using unique filenames.
198 |     #
199 |     # If a podcast has multiple episodes with the same filename in its feed,
200 |     # append the Overcast ID to disambiguate.
201 |     if os.path.exists(download_path):
202 |         try:
203 |             cached_metadata = json.load(open(json_path, "r"))
204 |         except Exception as err:
205 |             print(err, json_path)
206 |             return
207 | 
208 |         cached_overcast_id = cached_metadata["episode"]["overcast_id"]
209 |         this_overcast_id = episode["episode"]["overcast_id"]
210 | 
211 |         if cached_overcast_id != this_overcast_id:
212 |             filename = filename.replace(".mp3", "_%s.mp3" % this_overcast_id)
213 |             old_download_path = download_path
214 |             download_path = os.path.join(podcast_dir, filename)
215 |             json_path = download_path + ".json"
216 | 
217 |             print(
218 |                 "Downloading %s: %s to %s"
219 |                 % (episode["podcast"]["title"], audio_url, filename)
220 |             )
221 |             download_file(url=audio_url, path=download_path)
222 | 
223 |             try:
224 |                 if filecmp.cmp(download_path, old_download_path, shallow=False):
225 |                     print("Duplicates detected! %s" % download_path)
226 |                     os.unlink(download_path)
227 |                     download_path = old_download_path
228 |             except FileNotFoundError:
229 |                 # This can occur if the download fails -- say, the episode is
230 |                 # in the Overcast catalogue, but no longer available from source.
231 |                 pass
232 | 
233 |         else:
234 |             # Already downloaded and it's the same episode.
235 |             pass
236 | 
237 |     # This episode has never been downloaded before, so we definitely have
238 |     # to download it fresh.
239 |     else:
240 |         print(
241 |             "Downloading %s: %s to %s"
242 |             % (episode["podcast"]["title"], audio_url, filename)
243 |         )
244 |         download_file(url=audio_url, path=download_path)
245 | 
246 |     # Save a blob of JSON with some episode metadata
247 |     episode["filename"] = filename
248 | 
249 |     json_string = json.dumps(episode, indent=2, sort_keys=True)
250 | 
251 |     with open(json_path, "w") as outfile:
252 |         outfile.write(json_string)
253 | 
254 |     save_rss_feed(episode=episode, download_dir=download_dir)
255 |     mark_episode_as_downloaded(episode=episode, download_dir=download_dir)
256 | 
257 | 
258 | def save_rss_feed(*, episode, download_dir):
259 |     _save_rss_feed(
260 |         title=episode["podcast"]["title"],
261 |         xml_url=episode["podcast"]["xml_url"],
262 |         download_dir=download_dir,
263 |     )
264 | 
265 | 
266 | # Use caching so we only have to download this RSS feed once.
267 | @functools.lru_cache()
268 | def _save_rss_feed(*, title, xml_url, download_dir):
269 |     podcast_dir = os.path.join(download_dir, _escape(title))
270 | 
271 |     today = datetime.datetime.now().strftime("%Y-%m-%d")
272 | 
273 |     rss_path = os.path.join(podcast_dir, f"feed.{today}.xml")
274 | 
275 |     if not os.path.exists(rss_path):
276 |         print("Downloading RSS feed for %s" % title)
277 |         download_file(url=xml_url, path=rss_path)
278 | 
279 |     matching_feeds = sorted(glob.glob(os.path.join(podcast_dir, "feed.*.xml")))
280 | 
281 |     while len(matching_feeds) >= 2 and filecmp.cmp(
282 |         matching_feeds[-2], matching_feeds[-1], shallow=False
283 |     ):
284 |         os.unlink(matching_feeds[-1])
285 |         matching_feeds.remove(matching_feeds[-1])
286 | 
287 | 
288 | if __name__ == "__main__":
289 |     args = parse_args(argv=sys.argv[1:])
290 | 
291 |     opml_path = args["opml_path"]
292 |     download_dir = args["download_dir"]
293 | 
294 |     try:
295 |         with open(opml_path) as infile:
296 |             xml_string = infile.read()
297 |     except OSError as err:
298 |         if err.errno == errno.ENOENT:
299 |             sys.exit("Could not find an OPML file at %s" % opml_path)
300 |         else:
301 |             raise
302 | 
303 |     for episode in get_episodes(xml_string):
304 |         download_episode(episode, download_dir=download_dir)
305 | 


--------------------------------------------------------------------------------