├── .flake8
├── .gitignore
├── .isort.cfg
├── LICENSE
├── README.md
├── bandcamp-dl.py
└── requirements.txt


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | extend-ignore = E203
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pytype static type analyzer
135 | .pytype/
136 | 
137 | # Cython debug symbols
138 | cython_debug/
139 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | multi_line_output = 3
3 | include_trailing_comma = True
4 | force_grid_wrap = 0
5 | use_parentheses = True
6 | ensure_newline_before_comments = True
7 | line_length = 88
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright 2020 iliana etaoin <iliana@buttslol.net>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 6 | software and associated documentation files (the "Software"), to deal in the Software
 7 | without restriction, including without limitation the rights to use, copy, modify,
 8 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
 9 | permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
12 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
13 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
14 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
15 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
16 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # bandcamp-dl
 2 | 
 3 | This is a Python script that will download your entire Bandcamp collection to your working directory. For albums, it downloads ZIP files, and for tracks, it downloads the track.
 4 | 
 5 | It downloads items in FLAC format by default. Use `--format` to change the format; known format identifiers are: 
 6 | `aac-hi`
 7 | `aiff-lossless`
 8 | `alac`
 9 | `flac`
10 | `mp3-320`
11 | `mp3-v0`
12 | `vorbis`
13 | `wav`
14 | 
15 | If you have [browser-cookie3](https://pypi.org/project/browser-cookie3/) the script will attempt to pull your bandcamp.com `identity` cookie and username from Firefox or Chrome.
16 | If this doesn't work for you, provide the value of your `identity` cookie as a raw or Base64 string to `--identity`.
17 | browser-cookie3 exceptions are visible with `-v`/`--verbose`.
18 | 
19 | This script only downloads items you've purchased.
20 | Because this uses undocumented and unsupported APIs, it may break at any time.
21 | **The author will not provide support for this script.**
22 | 


--------------------------------------------------------------------------------
/bandcamp-dl.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | import base64
  5 | import binascii
  6 | import glob
  7 | import html
  8 | import json
  9 | import logging
 10 | import os
 11 | import os.path
 12 | import sys
 13 | import urllib.parse
 14 | import urllib.request
 15 | import zipfile
 16 | from collections import namedtuple
 17 | 
 18 | try:
 19 |     import browser_cookie3
 20 | except ImportError:
 21 |     COOKIE_FN = []
 22 | else:
 23 |     COOKIE_FN = [browser_cookie3.chrome, browser_cookie3.firefox]
 24 | 
 25 | 
 26 | USER_AGENT = (
 27 |     "bandcamp-dl/0.0 (https://github.com/iliana/bandcamp-dl) Python-urllib/"
 28 |     + "{0}.{1}".format(*sys.version_info[:2])
 29 | )
 30 | CLEAR = "\033[K"
 31 | 
 32 | Item = namedtuple("Item", ["artist", "title", "id", "download_url", "tracks"])
 33 | 
 34 | 
 35 | def get_identity(identity):
 36 |     if identity:
 37 |         try:
 38 |             data = base64.b64decode(identity)
 39 |             if identity.encode("utf-8") != base64.b64encode(data):
 40 |                 data = identity
 41 |         except binascii.Error:
 42 |             data = identity
 43 |         return urllib.parse.quote(data, safe="")
 44 | 
 45 |     for cookie_fn in COOKIE_FN:
 46 |         try:
 47 |             cookies = cookie_fn(domain_name="bandcamp.com")
 48 |         except:  # noqa=E722
 49 |             logging.info("%s failed", cookie_fn, exc_info=sys.exc_info())
 50 |             continue
 51 |         for cookie in cookies:
 52 |             if cookie.name == "identity" and cookie.domain == ".bandcamp.com":
 53 |                 return cookie.value
 54 | 
 55 | 
 56 | def build_request(url, identity=None, *args, **kwargs):
 57 |     req = urllib.request.Request(url, *args, **kwargs)
 58 |     if identity:
 59 |         req.add_header("cookie", "identity={}".format(identity))
 60 |     req.add_header("user-agent", USER_AGENT)
 61 |     return req
 62 | 
 63 | 
 64 | def bc_json(path, identity, data=None):
 65 |     url = urllib.parse.urljoin("https://bandcamp.com/api/", path)
 66 |     logging.info("fetch {} as json".format(url))
 67 |     if data:
 68 |         data = json.dumps(data).encode("utf-8")
 69 |     with urllib.request.urlopen(build_request(url, identity, data)) as f:
 70 |         return json.loads(f.read().decode("utf-8"))
 71 | 
 72 | 
 73 | def bc_pagedata(url, identity):
 74 |     logging.info("fetch {} as html".format(url))
 75 |     with urllib.request.urlopen(build_request(url, identity)) as f:
 76 |         for line in f.readlines():
 77 |             line = line.decode(f.headers.get_content_charset())
 78 |             if "pagedata" in line and "data-blob" in line:
 79 |                 break
 80 |     return json.loads(html.unescape(line.split('data-blob="')[1].split('"')[0]))
 81 | 
 82 | 
 83 | class DownloadURIError(Exception):
 84 |     pass
 85 | 
 86 | 
 87 | class ExpiredDownloadError(DownloadURIError):
 88 |     pass
 89 | 
 90 | 
 91 | def bc_download(url, identity, format):
 92 |     items = bc_pagedata(url, identity)["digital_items"]
 93 |     assert len(items) == 1
 94 |     url = items[0]["downloads"][format]["url"]
 95 |     # munge the download URL to request the correct URL for the bcbits CDN
 96 |     split = urllib.parse.urlsplit(url.replace("/download/", "/statdownload/"))
 97 |     query = urllib.parse.parse_qsl(split.query)
 98 |     query.append((".vrs", 1))
 99 |     split = split._replace(query=urllib.parse.urlencode(query))
100 |     url = urllib.parse.urlunsplit(split)
101 |     # then fetch that to get the bcbits URL
102 |     req = build_request(url, identity)
103 |     req.add_header("accept", "application/json")
104 |     logging.info("fetch {} as json".format(url))
105 |     with urllib.request.urlopen(req) as f:
106 |         data = json.loads(f.read().decode("utf-8"))
107 |     if "errortype" in data:
108 |         # catch error
109 |         if data["errortype"] == "ExpirationError":
110 |             raise ExpiredDownloadError()
111 |         else:
112 |             raise DownloadURIError(data["errortype"])
113 | 
114 |     return data["download_url"]
115 | 
116 | 
117 | def download_file(item, url):
118 |     logging.info("download {}".format(url))
119 |     with urllib.request.urlopen(build_request(url)) as f:
120 |         for x in f.headers["content-disposition"].split(";"):
121 |             x = x.strip()
122 |             if x.startswith("filename*=UTF-8''"):
123 |                 filename = urllib.parse.unquote(x.split("''", 1)[1])
124 |         split = filename.rsplit(".", 1)
125 |         filename = "{split[0]} ({item.id}).{split[1]}".format(split=split, item=item)
126 |         size = int(f.headers["content-length"])
127 |         try:
128 |             with open(filename, "wb") as t:
129 |                 at = 0
130 |                 while True:
131 |                     buf = f.read(16 * 1024)
132 |                     if not buf:
133 |                         break
134 |                     t.write(buf)
135 |                     at += len(buf)
136 |                     progress(filename, at=at, size=size)
137 |         except:  # noqa=E722
138 |             os.remove(filename)
139 |             raise
140 | 
141 | 
142 | def collection(identity):
143 |     summary = bc_json("fan/2/collection_summary", identity)["collection_summary"]
144 |     pagedata = bc_pagedata(
145 |         "https://bandcamp.com/{}".format(summary["username"]), identity
146 |     )
147 | 
148 |     for kind in ("collection", "hidden"):
149 |         yield from items(
150 |             {
151 |                 "items": pagedata["item_cache"][kind].values(),
152 |                 "redownload_urls": pagedata["collection_data"]["redownload_urls"],
153 |             }
154 |         )
155 |         data = {
156 |             "fan_id": summary["fan_id"],
157 |             "older_than_token": pagedata["{}_data".format(kind)]["last_token"],
158 |         }
159 |         if data["older_than_token"] is None:
160 |             break
161 |         while True:
162 |             res = bc_json("fancollection/1/{}_items".format(kind), identity, data)
163 |             yield from items(res)
164 |             if res["more_available"]:
165 |                 data["older_than_token"] = res["last_token"]
166 |             else:
167 |                 break
168 | 
169 | 
170 | def items(data):
171 |     for item in data["items"]:
172 |         # some items are not actually downloadable! these can be detected with
173 |         # a null featured_track
174 |         if item["featured_track"] is None:
175 |             continue
176 |         sid = "{sale_item_type}{sale_item_id}".format(**item)
177 |         yield Item(
178 |             artist=item["band_name"],
179 |             title=item["item_title"],
180 |             id=item["tralbum_id"],
181 |             download_url=data["redownload_urls"][sid],
182 |             tracks=item["num_streamable_tracks"],
183 |         )
184 | 
185 | 
186 | def already_downloaded(item):
187 |     g = glob.glob("*({})*".format(item.id))
188 |     if g:
189 |         # redownload for pre-orders / albums with new tracks: if this is a zip,
190 |         # get the track count and compare against the item's streamable tracks
191 |         # count. if the former is less than the latter, delete and redownload.
192 |         if g[0].rsplit(".", 1)[1] == "zip":
193 |             with zipfile.ZipFile(g[0]) as z:
194 |                 count = len(list(filter(is_track, z.namelist())))
195 |             if item.tracks > count:
196 |                 logging.info(
197 |                     "remove %s (%s tracks, now has %s)", g[0], count, item.tracks
198 |                 )
199 |                 os.remove(g[0])
200 |                 return False
201 |         progress(g[0], skip=True)
202 |         return True
203 |     else:
204 |         return False
205 | 
206 | 
207 | def is_track(filename):
208 |     return any(
209 |         filename.rsplit(".", 1)[1] == ext
210 |         for ext in ("flac", "mp3", "m4a", "ogg", "wav", "aiff")
211 |     )
212 | 
213 | 
214 | def progress(item, skip=None, at=None, size=None):
215 |     if isinstance(item, Item):
216 |         data = "{} - {}".format(item.artist, item.title)
217 |         if item.id:
218 |             data += " ({})".format(item.id)
219 |     else:
220 |         data = item
221 | 
222 |     if skip:
223 |         state = "already downloaded"
224 |     elif at:
225 |         state = "{}%".format(at * 100 // size)
226 |     else:
227 |         state = "starting..."
228 | 
229 |     print("{}{}: {}".format(CLEAR, data, state), file=sys.stderr, end="\r", flush=True)
230 |     if skip or (at and at == size):
231 |         print(file=sys.stderr)
232 | 
233 | 
234 | if __name__ == "__main__":
235 |     parser = argparse.ArgumentParser()
236 |     parser.add_argument(
237 |         "--format", help="Format to download (default: %(default)s)", default="flac"
238 |     )
239 |     parser.add_argument(
240 |         "--identity", help='Value of the "identity" cookie (raw or Base64)'
241 |     )
242 |     parser.add_argument(
243 |         "-v",
244 |         "--verbose",
245 |         help="Be verbose",
246 |         action="store_const",
247 |         dest="loglevel",
248 |         const=logging.INFO,
249 |     )
250 |     parser.add_argument(
251 |         "--ignore-expired",
252 |         help="Warn but continue on expired URLs",
253 |         action="store_const",
254 |         dest="ignore_expired",
255 |         const=True,
256 |     )
257 |     args = parser.parse_args()
258 |     logging.basicConfig(level=args.loglevel)
259 | 
260 |     identity = get_identity(args.identity)
261 |     if identity is None:
262 |         print("Failed to load identity cookie for bandcamp.com", file=sys.stderr)
263 |         sys.exit(1)
264 | 
265 |     for item in collection(identity):
266 |         if already_downloaded(item):
267 |             continue
268 |         progress(item)
269 |         try:
270 |             download_file(item, bc_download(item.download_url, identity, args.format))
271 |         except ExpiredDownloadError:
272 |             print(
273 |                 "{} - {}: download expired. See https://get.bandcamp.help/hc/en-us/articles/360046095574".format(
274 |                     item.artist, item.title
275 |                 )
276 |             )
277 |             if not args.ignore_expired:
278 |                 sys.exit(1)
279 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | browser_cookie3
2 | 


--------------------------------------------------------------------------------