├── .gitattributes
├── .github
    ├── release-drafter.yml
    └── workflows
    │   ├── ci.yml
    │   └── release-drafter.yml
├── .gitignore
├── .readthedocs.yaml
├── LICENSE
├── README.md
├── brainzutils
    ├── __init__.py
    ├── cache.py
    ├── flask
    │   ├── __init__.py
    │   └── test
    │   │   ├── __init__.py
    │   │   ├── test_main.py
    │   │   └── test_ratelimit.py
    ├── mail.py
    ├── metrics.py
    ├── musicbrainz_db
    │   ├── __init__.py
    │   ├── artist.py
    │   ├── editor.py
    │   ├── event.py
    │   ├── exceptions.py
    │   ├── helpers.py
    │   ├── includes.py
    │   ├── label.py
    │   ├── models.py
    │   ├── place.py
    │   ├── recording.py
    │   ├── release.py
    │   ├── release_group.py
    │   ├── serialize.py
    │   ├── test_data.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_artist.py
    │   │   ├── test_editor.py
    │   │   ├── test_event.py
    │   │   ├── test_helper.py
    │   │   ├── test_label.py
    │   │   ├── test_place.py
    │   │   ├── test_recording.py
    │   │   ├── test_release.py
    │   │   ├── test_release_group.py
    │   │   ├── test_serialize.py
    │   │   └── test_work.py
    │   ├── utils.py
    │   └── work.py
    ├── ratelimit.py
    ├── sentry.py
    └── test
    │   ├── __init__.py
    │   ├── test_cache.py
    │   ├── test_mail.py
    │   └── test_metrics.py
├── conftest.py
├── docs
    ├── Makefile
    ├── _static
    │   └── .gitkeep
    ├── cache.rst
    ├── conf.py
    ├── flask.rst
    ├── index.rst
    ├── mail.rst
    ├── metrics.rst
    ├── musicbrainz_db
    │   ├── artist.rst
    │   ├── editor.rst
    │   ├── event.rst
    │   ├── index.rst
    │   ├── label.rst
    │   ├── place.rst
    │   ├── recording.rst
    │   ├── release.rst
    │   ├── release_group.rst
    │   └── work.rst
    ├── ratelimit.rst
    └── requirements.txt
├── pylintrc
├── pyproject.toml
├── pytest.ini
├── requirements.txt
├── requirements_dev.txt
├── test.sh
└── test
    ├── Dockerfile
    ├── docker-compose.yml
    └── musicbrainz_db
        ├── Dockerfile
        ├── README.md
        └── scripts
            ├── create_test_db.sh
            ├── createdb.sh
            └── fetch-dump.sh


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
1 | template: |
2 |   ## What’s Changed
3 | 
4 |   $CHANGES


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Docker Image CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 | 
11 |   build:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 | 
18 |     - name: Login to Docker Hub
19 |       run: echo ${{ secrets.DOCKER_HUB_PASSWORD }} | docker login -u ${{ secrets.DOCKER_HUB_USERNAME }} --password-stdin
20 |       continue-on-error: true
21 | 
22 |     - name: Pull docker images
23 |       run: docker compose -f test/docker-compose.yml pull
24 |       
25 |     - name: Build the Docker image
26 |       run: docker compose -f test/docker-compose.yml -p brainzutils_test build
27 |     
28 |     - name: Bring up dependencies
29 |       run: docker compose -f test/docker-compose.yml -p brainzutils_test up -d redis musicbrainz_db
30 |     
31 |     - name: Run tests
32 |       run: docker compose -f test/docker-compose.yml -p brainzutils_test run --rm test
33 |       
34 |     - name: Bring down containers
35 |       run: docker compose -f test/docker-compose.yml -p brainzutils_test down
36 | 


--------------------------------------------------------------------------------
/.github/workflows/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name: Release Drafter
 2 | 
 3 | on:
 4 |   push:
 5 |     # branches to consider in the event; optional, defaults to all
 6 |     branches:
 7 |       - master
 8 | 
 9 | jobs:
10 |   update_release_draft:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       # Drafts your next Release notes as Pull Requests are merged into "master"
14 |       - uses: release-drafter/release-drafter@v5
15 |         env:
16 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # Distribution / packaging
 6 | .Python
 7 | env/
 8 | venv/
 9 | build/
10 | develop-eggs/
11 | dist/
12 | downloads/
13 | eggs/
14 | .eggs/
15 | lib/
16 | lib64/
17 | parts/
18 | sdist/
19 | var/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 | 
24 | # Docs
25 | docs/_build
26 | 
27 | # Test results
28 | htmlcov
29 | .coverage
30 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-20.04
 5 |   tools:
 6 |     python: "3.10"
 7 | 
 8 | sphinx:
 9 |    configuration: docs/conf.py
10 | 
11 | formats: all
12 | 
13 | python:
14 |    install:
15 |    - requirements: docs/requirements.txt
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # BrainzUtils for Python
 2 | 
 3 | This is a package with common utilities that are used throughout MetaBrainz
 4 | projects that use Python programming language.
 5 | 
 6 | Note that v1.18.* will be the last line of releases compatible with Python 2.
 7 | 
 8 | Please report issues at https://tickets.musicbrainz.org/browse/BU.
 9 | 
10 | # Development
11 | To develop BrainzUtils itself, install it locally in editable mode with development
12 | dependencies by running: `pip install --group dev -e .`. 
13 | 
14 | > [!NOTE]  
15 | > Support for dependency groups (`--group`) was added to pip in 25.1.0.
16 | 
17 | ## Usage
18 | 
19 | You can include this line into a `requirements.txt` file:
20 | 
21 |     git+https://github.com/metabrainz/brainzutils-python.git@<VERSION>
22 | 
23 | Replace `<VERSION>` with the tag that you want to reference.
24 | See https://github.com/metabrainz/brainzutils-python/releases.
25 | 
26 | ## Release process
27 | 
28 | For this project we are using [semantic versioning](http://semver.org/). If
29 | you want to make a new release:
30 | 
31 | 1. Create a new tag in git using the following format: `v<MAJOR>.<MINOR>.<PATCH>`.
32 | 
33 |        git tag v1.x.0
34 |        git push --tags
35 | 
36 | 2. Create a release on GitHub based on that tag. Specify changes that were made.
37 |   https://github.com/metabrainz/brainzutils-python/releases/new
38 | 
39 | When updating underlying dependencies keep in mind breaking changes that they
40 | might have. Update version of `brainzutils-python` accordingly.
41 | 
42 | ## License
43 | 
44 | ```
45 | brainzutils - Python utilities for MetaBrainz projects
46 | Copyright (C) 2018  MetaBrainz Foundation Inc.
47 | 
48 | This program is free software; you can redistribute it and/or modify
49 | it under the terms of the GNU General Public License as published by
50 | the Free Software Foundation; either version 2 of the License, or
51 | (at your option) any later version.
52 | 
53 | This program is distributed in the hope that it will be useful,
54 | but WITHOUT ANY WARRANTY; without even the implied warranty of
55 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
56 | GNU General Public License for more details.
57 | 
58 | You should have received a copy of the GNU General Public License along
59 | with this program; if not, write to the Free Software Foundation, Inc.,
60 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
61 | ```
62 | 


--------------------------------------------------------------------------------
/brainzutils/__init__.py:
--------------------------------------------------------------------------------
1 | from importlib.metadata import version, PackageNotFoundError
2 | 
3 | try:
4 |     __version__ = version(__name__)
5 | except PackageNotFoundError:
6 |     # package is not installed
7 |     __version__ = "unknown"
8 | 


--------------------------------------------------------------------------------
/brainzutils/cache.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=invalid-name
  2 | """
  3 | This module serves as an interface for Redis.
  4 | 
  5 | The module needs to be initialized before use! See :meth:`init()`.
  6 | 
  7 | It basically is a wrapper for redis package with additional
  8 | functionality and tweaks specific to serve our needs.
  9 | 
 10 | There's also support for namespacing, which simplifies management of different
 11 | versions of data saved in the cache.
 12 | 
 13 | More information about Redis can be found at http://redis.io/.
 14 | """
 15 | import builtins
 16 | import os
 17 | import socket
 18 | from functools import wraps
 19 | import datetime
 20 | import re
 21 | from typing import Optional
 22 | 
 23 | import redis
 24 | import msgpack
 25 | 
 26 | 
 27 | _r: redis.StrictRedis = None
 28 | _glob_namespace: str = None
 29 | 
 30 | NS_REGEX = re.compile('[a-zA-Z0-9_-]+$')
 31 | CONTENT_ENCODING = "utf-8"
 32 | ENCODING_ASCII = "ascii"
 33 | 
 34 | 
 35 | def init(host: str = "localhost", port: int = 6379, db_number: int = 0,
 36 |          namespace: str = "", client_name: str = None):
 37 |     """Initializes Redis client. Needs to be called before use.
 38 | 
 39 |     Namespace versions are stored in a local directory.
 40 | 
 41 |     Args:
 42 |         host: Redis server hostname.
 43 |         port: Redis port.
 44 |         db_number: Redis database number.
 45 |         namespace: Global namespace that will be prepended to all keys.
 46 |         client_name: The client name to assign to the redis connection. This value is used to identify which clients
 47 |           are connected to a server, and is only used for debugging purposes.
 48 |     """
 49 | 
 50 |     # The first priority in setting the client name is to set the user specified
 51 |     # client_name as this can come in handy during testing and development. Otherwise,
 52 |     # we use CONTAINER_NAME environment variable, this is always set in production.
 53 |     # Finally, we fall back to the host name, not as informative as the container name
 54 |     # but something is better than nothing.
 55 |     if client_name is None:
 56 |         client_name = os.getenv("CONTAINER_NAME", None)
 57 |     if client_name is None:
 58 |         client_name = socket.gethostname()
 59 | 
 60 |     global _r, _glob_namespace
 61 |     _r = redis.StrictRedis(
 62 |         host=host,
 63 |         port=port,
 64 |         db=db_number,
 65 |         client_name=client_name
 66 |     )
 67 | 
 68 |     _glob_namespace = namespace + ":"
 69 | 
 70 | 
 71 | def init_required(f):
 72 |     @wraps(f)
 73 |     def decorated(*args, **kwargs):
 74 |         if not _r:
 75 |             raise RuntimeError("Cache module needs to be initialized before "
 76 |                                "use! See documentation for more info.")
 77 |         return f(*args, **kwargs)
 78 | 
 79 |     return decorated
 80 | 
 81 | 
 82 | # pylint: disable=redefined-builtin
 83 | @init_required
 84 | def set(key, val, expirein, namespace=None, encode=True):
 85 |     """Set a key to a given value.
 86 | 
 87 |     Args:
 88 |         key (str): Key of the item.
 89 |         val: Item's value.
 90 |         expirein (int): The time after which this value should expire, in seconds.
 91 |         namespace (str): Optional namespace in which key needs to be defined.
 92 |         encode: True if the value should be encoded with msgpack, False otherwise
 93 | 
 94 |     Returns:
 95 |         True if stored successfully.
 96 |     """
 97 |     # Note that both key and value are encoded before insertion.
 98 |     return set_many(
 99 |         mapping={key: val},
100 |         expirein=expirein,
101 |         namespace=namespace,
102 |         encode=encode
103 |     )
104 | 
105 | 
106 | @init_required
107 | def get(key, namespace=None, decode=True):
108 |     """Retrieve an item.
109 | 
110 |     Args:
111 |         key: Key of the item that needs to be retrieved.
112 |         namespace: Optional namespace in which key was defined.
113 |         decode (bool): True if value should be decoded with msgpack, False otherwise
114 | 
115 |     Returns:
116 |         Stored value or None if it's not found.
117 |     """
118 |     # Note that key is encoded before retrieval request.
119 |     return get_many([key], namespace, decode).get(key)
120 | 
121 | 
122 | @init_required
123 | def delete(key, namespace=None):
124 |     """Delete an item.
125 | 
126 |     Args:
127 |         key: Key of the item that needs to be deleted.
128 |         namespace: Optional namespace in which key was defined.
129 | 
130 |     Returns:
131 |           Number of keys that were deleted.
132 |     """
133 |     # Note that key is encoded before deletion request.
134 |     return delete_many([key], namespace)
135 | 
136 | 
137 | @init_required
138 | def expire(key, expirein, namespace=None):
139 |     """Set the expiration time for an item
140 | 
141 |     Args:
142 |         key: Key of the item that needs to be deleted.
143 |         expirein: the number of seconds after which the item should expire
144 |         namespace: Optional namespace in which key was defined.
145 | 
146 |     Returns:
147 |           True if the timeout was set, False otherwise
148 |     """
149 |     # Note that key is encoded before deletion request.
150 |     return _r.pexpire(_prep_key(key, namespace), expirein * 1000)
151 | 
152 | 
153 | @init_required
154 | def expireat(key, timeat, namespace=None):
155 |     """Set the absolute expiration time for an item
156 | 
157 |     Args:
158 |         key: Key of the item that needs to be deleted.
159 |         timeat: the number of seconds since the epoch when the item should expire
160 |         namespace: Optional namespace in which key was defined.
161 | 
162 |     Returns:
163 |           True if the timeout was set, False otherwise
164 |     """
165 |     # Note that key is encoded before deletion request.
166 |     return _r.pexpireat(_prep_key(key, namespace), timeat * 1000)
167 | 
168 | 
169 | @init_required
170 | def set_many(mapping, expirein, namespace=None, encode=True):
171 |     """Set multiple keys doing just one query.
172 | 
173 |     Args:
174 |         mapping (dict): A dict of key/value pairs to set.
175 |         expirein (int): The time after which this value should expire, in seconds.
176 |         namespace (str): Namespace for the keys.
177 |         encode: True if the values should be encoded with msgpack, False otherwise
178 | 
179 |     Returns:
180 |         True on success.
181 |     """
182 |     # TODO: Fix return value
183 |     result = _r.mset(_prep_dict(mapping, namespace, encode))
184 |     if expirein:
185 |         for key in list(mapping.keys()):
186 |             expire(key, expirein, namespace)
187 | 
188 |     return result
189 | 
190 | 
191 | @init_required
192 | def get_many(keys, namespace=None, decode=True):
193 |     """Retrieve multiple keys doing just one query.
194 | 
195 |     Args:
196 |         keys (list): List of keys that need to be retrieved.
197 |         namespace (str): Namespace for the keys.
198 |         decode (bool): True if values should be decoded with msgpack, False otherwise
199 | 
200 |     Returns:
201 |         A dictionary of key/value pairs that were available.
202 |     """
203 |     result = {}
204 |     for i, value in enumerate(_r.mget(_prep_keys_list(keys, namespace))):
205 |         result[keys[i]] = _decode_val(value) if decode else value
206 |     return result
207 | 
208 | 
209 | @init_required
210 | def delete_many(keys, namespace=None):
211 |     """Delete multiple keys.
212 | 
213 |     Returns:
214 |         Number of keys that were deleted.
215 |     """
216 |     return _r.delete(*_prep_keys_list(keys, namespace))
217 | 
218 | 
219 | @init_required
220 | def increment(key, amount=1, namespace=None):
221 |     """ Increment the value for given key using the INCR command.
222 | 
223 |     Args:
224 |         key: Key of the item that needs to be incremented
225 |         amount: the amount to increment the value by
226 |         namespace: Namespace for the key
227 | 
228 |     Returns:
229 |         An integer equal to the value after increment
230 |     """
231 |     return _r.incr(_prep_keys_list([key], namespace)[0], amount=amount)
232 | 
233 | 
234 | @init_required
235 | def hincrby(name, key, amount, namespace=None):
236 |     """Increment a hashes key by a given amount using HINCRBY
237 | 
238 |     Args:
239 |         name: Name of the hash
240 |         key: Key of the item in the hash to increment
241 |         amount: the number to increment the key by
242 |         namespace: Namespace for the name
243 | 
244 |     Returns:
245 |         An integer equal to the value after increment
246 |     """
247 |     return _r.hincrby(_prep_keys_list([name], namespace)[0], key, amount)
248 | 
249 | 
250 | @init_required
251 | def hgetall(name, namespace=None):
252 |     """Get all keys and values for a hash using HGETALL
253 | 
254 |     Args:
255 |         name: Name of the hash
256 |         namespace: Namespace for the name
257 | 
258 |     Returns:
259 |         A dictionary of {key: value} items for all keys in the hash
260 |     """
261 |     return _r.hgetall(_prep_keys_list([name], namespace)[0])
262 | 
263 | 
264 | @init_required
265 | def hkeys(name, namespace=None):
266 |     """Get all keys for a hash using HKEYS
267 | 
268 |     Args:
269 |         name: Name of the hash
270 |         namespace: Namespace for the name
271 | 
272 |     Returns:
273 |         A list of [key] values for all keys in the hash
274 |     """
275 |     return _r.hkeys(_prep_keys_list([name], namespace)[0])
276 | 
277 | 
278 | @init_required
279 | def hset(name, key, value, namespace=None):
280 |     """Delete the specified keys from a hash using HDEL.
281 |     Note that the ``keys`` argument must be a list. This differs from the underlying redis
282 |     library's version of this command, which takes varargs.
283 | 
284 |     Args:
285 |         name: Name of the hash
286 |         key: Key of the item in the hash to set
287 |         value: value to set the item to
288 |         namespace: Namespace for the name
289 | 
290 |     Returns:
291 |         the number of keys deleted from the hash
292 |     """
293 |     return _r.hset(_prep_keys_list([name], namespace)[0], key, value)
294 | 
295 | 
296 | @init_required
297 | def hdel(name, keys, namespace=None):
298 |     """Delete the specified keys from a hash using HDEL.
299 |     Note that the ``keys`` argument must be a list. This differs from the underlying redis
300 |     library's version of this command, which takes varargs.
301 | 
302 |     Args:
303 |         name: Name of the hash
304 |         keys: a list of the keys to delete from the has
305 |         namespace: Namespace for the name
306 | 
307 |     Returns:
308 |         the number of keys deleted from the hash
309 |     """
310 |     if not isinstance(keys, list):
311 |         keys = [keys]
312 |     return _r.hdel(_prep_keys_list([name], namespace)[0], *keys)
313 | 
314 | 
315 | @init_required
316 | def sadd(name, keys, expirein, encode=True, namespace=None):
317 |     """Add the specified keys to the set stored at name using SADD
318 |     Note that it is not possible to expire a single value stored in a set.  The ``expirein``
319 |     argument will set the expiration period of the entire set stored at ``name``. Therefore,
320 |     any additions to a set will reset its expiry to the value of ``expirein`` passed in
321 |     last call.
322 |     Args:
323 |         name: Name of the set
324 |         keys: keys to add to the set
325 |         expirein: the number of seconds after which the item should expire
326 |         namespace: namespace for the name
327 |         encode: True if the value should be encoded with msgpack, False otherwise
328 | 
329 |     Returns:
330 |         the number of elements that were added to the set, not including all the elements already present into the set.
331 |     """
332 |     prepared_name = _prep_key(name, namespace)
333 |     if not isinstance(keys, list) and not isinstance(keys, builtins.set):
334 |         keys = {keys}
335 | 
336 |     if encode:
337 |         keys = {_encode_val(key) for key in keys}
338 | 
339 |     result = _r.sadd(prepared_name, *keys)
340 |     expire(name, expirein, namespace)
341 |     return result
342 | 
343 | 
344 | @init_required
345 | def smembers(name, decode=True, namespace=None):
346 |     """Returns all the members of the set value stored at name.
347 |     Args:
348 |         name: Name of the set
349 |         decode: True if value should be decoded with msgpack, False otherwise
350 |         namespace: namespace for the name
351 | 
352 |     Returns:
353 |         all members of the set
354 |     """
355 |     keys = _r.smembers(_prep_key(name, namespace))
356 |     if decode:
357 |         keys = {_decode_val(key) for key in keys}
358 |     return keys
359 | 
360 | 
361 | @init_required
362 | def flush_all():
363 |     _r.flushdb()
364 | 
365 | 
366 | def gen_key(key, *attributes):
367 |     """Helper function that generates a key with attached attributes.
368 | 
369 |     Args:
370 |         key: Original key.
371 |         attributes: Attributes that will be appended a key.
372 | 
373 |     Returns:
374 |         Key that can be used with cache.
375 |     """
376 |     if not isinstance(key, str):
377 |         key = str(key)
378 |     key = key.encode(ENCODING_ASCII, errors='xmlcharrefreplace').decode(ENCODING_ASCII)
379 | 
380 |     for attr in attributes:
381 |         if not isinstance(attr, str):
382 |             attr = str(attr)
383 |         key += '_' + attr.encode(ENCODING_ASCII, errors='xmlcharrefreplace').decode(ENCODING_ASCII)
384 | 
385 |     key = key.replace(' ', '_')  # spaces are not allowed
386 | 
387 |     return key
388 | 
389 | 
390 | def _prep_dict(dictionary, namespace=None, encode=True):
391 |     """Wrapper for _prep_key and _encode_val functions that works with dictionaries."""
392 |     return {_prep_key(key, namespace): _encode_val(value) if encode else value
393 |             for key, value in dictionary.items()}
394 | 
395 | 
396 | def _prep_key(key, namespace=None):
397 |     """Prepares a key for use with Redis."""
398 |     if namespace:
399 |         key = "%s:%s" % (namespace, key)
400 |     if not isinstance(key, bytes):
401 |         key = key.encode(ENCODING_ASCII, errors='xmlcharrefreplace').decode(ENCODING_ASCII)
402 |     return _glob_namespace + key
403 | 
404 | 
405 | def _prep_keys_list(l, namespace=None):
406 |     """Wrapper for _prep_key function that works with lists.
407 | 
408 |     Returns:
409 |         Prepared keys in the same order.
410 |     """
411 |     return [_prep_key(k, namespace) for k in l]
412 | 
413 | 
414 | def _encode_val(value):
415 |     if value is None:
416 |         return value
417 |     return msgpack.packb(value, use_bin_type=True, default=_msgpack_default)
418 | 
419 | 
420 | def _decode_val(value):
421 |     if value is None:
422 |         return value
423 |     return msgpack.unpackb(value, raw=False, ext_hook=_msgpack_ext_hook)
424 | 
425 | 
426 | ############
427 | # NAMESPACES
428 | ############
429 | 
430 | def validate_namespace(namespace):
431 |     """Checks that namespace value is supported."""
432 |     if not NS_REGEX.match(namespace):
433 |         raise ValueError("Invalid namespace. Must match regex /[a-zA-Z0-9_-]+$/.")
434 | 
435 | 
436 | ######################
437 | # CUSTOM SERIALIZATION
438 | ######################
439 | 
440 | TYPE_DATETIME_CODE = 1
441 | 
442 | 
443 | def _msgpack_default(obj):
444 |     if isinstance(obj, datetime.datetime):
445 |         return msgpack.ExtType(TYPE_DATETIME_CODE, obj.isoformat().encode(CONTENT_ENCODING))
446 |     raise TypeError("Unknown type: %r" % (obj,))
447 | 
448 | 
449 | def _msgpack_ext_hook(code, data):
450 |     if code == TYPE_DATETIME_CODE:
451 |         return datetime.datetime.fromisoformat(data.decode(CONTENT_ENCODING))
452 |     return msgpack.ExtType(code, data)
453 | 


--------------------------------------------------------------------------------
/brainzutils/flask/__init__.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask
 2 | from flask_debugtoolbar import DebugToolbarExtension
 3 | 
 4 | 
 5 | class CustomFlask(Flask):
 6 |     """Custom version of Flask with our bells and whistles."""
 7 | 
 8 |     def __init__(self, import_name, config_file=None, debug=None,
 9 |                  *args, **kwargs):
10 |         """Create an instance of Flask app.
11 | 
12 |             See original documentation for Flask.
13 | 
14 |             Arguments:
15 |                 import_name (str): Name of the application package.
16 |                 config_file (str): Path to a config file that needs to be loaded.
17 |                     Should be in a form of Python module.
18 |                 debug (bool): Override debug value.
19 |         """
20 |         super(CustomFlask, self).__init__(import_name, *args, **kwargs)
21 |         if config_file:
22 |             self.config.from_pyfile(config_file)
23 |         if debug is not None:
24 |             self.debug = debug
25 | 
26 |     def init_debug_toolbar(self):
27 |         """This method initializes the Flask-Debug extension toolbar for the
28 |         Flask app.
29 | 
30 |         Note that the Flask-Debug extension requires app.debug be true
31 |         and the SECRET_KEY be defined in app.config.
32 |         """
33 |         if self.debug:
34 |             DebugToolbarExtension(self)
35 | 


--------------------------------------------------------------------------------
/brainzutils/flask/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metabrainz/brainzutils-python/bf01c6da15d4a2426d64a31cf232c06bec3860f3/brainzutils/flask/test/__init__.py


--------------------------------------------------------------------------------
/brainzutils/flask/test/test_main.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from brainzutils import flask
 4 | 
 5 | class FlaskTestCase(unittest.TestCase):
 6 | 
 7 |     def test_create_app(self):
 8 |         app = flask.CustomFlask(__name__)
 9 |         self.assertIsNotNone(app)
10 | 
11 |     def test_debug_toolbar(self):
12 |         """ Tests that debug toolbar loads if initialized correctly
13 |         """
14 | 
15 |         # create an app
16 |         app = flask.CustomFlask(__name__)
17 |         self.assertIsNotNone(app)
18 |         app.debug = True
19 |         app.config['SECRET_KEY'] = 'this is a totally secret key btw'
20 |         app.init_debug_toolbar()
21 | 
22 |         # add a dummy route
23 |         @app.route('/')
24 |         def index():
25 |             return '<html><body>test</body></html>'
26 | 
27 |         client = app.test_client()
28 |         response = client.get('/')
29 |         self.assertEqual(response.status_code, 200)
30 |         self.assertIn('flDebug', str(response.data))
31 | 


--------------------------------------------------------------------------------
/brainzutils/flask/test/test_ratelimit.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import os
  3 | from time import sleep
  4 | 
  5 | from brainzutils import flask, cache
  6 | from brainzutils.ratelimit import ratelimit, set_rate_limits, inject_x_rate_headers, set_user_validation_function
  7 | 
  8 | valid_user = "41FB6EEB-636B-4F7C-B376-3A8613F1E69A"
  9 | def validate_user(user):
 10 |     if user == valid_user:
 11 |         return True
 12 |     return False
 13 | 
 14 | class RatelimitTestCase(unittest.TestCase):
 15 | 
 16 |     host = os.environ.get("REDIS_HOST", "localhost")
 17 |     port = 6379
 18 |     namespace = "NS_TEST"
 19 |     max_ip_requests = 3
 20 |     max_token_requests = 5
 21 |     ratelimit_window = 10
 22 | 
 23 |     def setUp(self):
 24 |         cache.init(
 25 |             host=self.host,
 26 |             port=self.port,
 27 |             namespace=self.namespace,
 28 |         )
 29 |         # Making sure there are no items in cache before we run each test
 30 |         cache.flush_all()
 31 | 
 32 |     def test_create_app(self):
 33 |         app = flask.CustomFlask(__name__)
 34 |         self.assertIsNotNone(app)
 35 | 
 36 |     def test_ratelimit(self):
 37 |         """ Tests that the ratelimit decorator works
 38 |         """
 39 | 
 40 |         # Set the limits as per defines in this class
 41 |         set_rate_limits(self.max_token_requests, self.max_ip_requests, self.ratelimit_window)
 42 | 
 43 |         # create an app
 44 |         app = flask.CustomFlask(__name__)
 45 |         self.assertIsNotNone(app)
 46 |         app.debug = True
 47 |         app.config['SECRET_KEY'] = 'this is a totally secret key btw'
 48 |         app.init_debug_toolbar()
 49 | 
 50 |         @app.after_request
 51 |         def after_request_callbacks(response):
 52 |             return inject_x_rate_headers(response)
 53 | 
 54 |         # add a dummy route
 55 |         @app.route('/')
 56 |         @ratelimit()
 57 |         def index():
 58 |             return '<html><body>test</body></html>'
 59 | 
 60 |         def print_headers(response):
 61 |             print("X-RateLimit-Remaining", response.headers['X-RateLimit-Remaining'])
 62 |             print("X-RateLimit-Limit", response.headers['X-RateLimit-Limit'])
 63 |             print("X-RateLimit-Reset", response.headers['X-RateLimit-Reset'])
 64 |             print("X-RateLimit-Reset-In", response.headers['X-RateLimit-Reset-In'])
 65 |             print()
 66 | 
 67 | 
 68 |         def make_requests(client, nominal_num_requests, token = None):
 69 | 
 70 |             print("===== make %d requests" % nominal_num_requests)
 71 |             # make one more than the allowed number of requests to catch the 429
 72 |             num_requests = nominal_num_requests + 1
 73 | 
 74 |             # make a specified number of requests
 75 |             while True:
 76 |                 reset_time = 0
 77 |                 restart = False
 78 |                 for i in range(num_requests):
 79 |                     if token:
 80 |                         response = client.get('/', headers={'Authorization': token})
 81 |                     else:
 82 |                         response = client.get('/')
 83 |                     if reset_time == 0:
 84 |                         reset_time = response.headers['X-RateLimit-Reset']
 85 | 
 86 |                     if reset_time != response.headers['X-RateLimit-Reset']:
 87 |                         # Whoops, we didn't get our tests done before the window expired. start over.
 88 |                         restart = True
 89 | 
 90 |                         # when restarting we need to do one request less, since the current requests counts to the new window
 91 |                         num_requests = nominal_num_requests
 92 |                         break
 93 | 
 94 |                     if i == num_requests - 1:
 95 |                         self.assertEqual(response.status_code, 429)
 96 |                     else:
 97 |                         self.assertEqual(response.status_code, 200)
 98 |                         self.assertEqual(int(response.headers['X-RateLimit-Remaining']), num_requests - i - 2)
 99 |                     print_headers(response)
100 | 
101 |                     sleep(1.1)
102 | 
103 |                 if not restart:
104 |                     break
105 | 
106 |         client = app.test_client()
107 | 
108 |         # Make a pile of requests based on IP address
109 |         make_requests(client, self.max_ip_requests)
110 | 
111 |         # Set a user token and make requests based on the token
112 |         cache.flush_all()
113 |         set_user_validation_function(validate_user)
114 |         set_rate_limits(self.max_token_requests, self.max_ip_requests, self.ratelimit_window)
115 |         make_requests(client, self.max_token_requests, token="Token %s" % valid_user)
116 | 


--------------------------------------------------------------------------------
/brainzutils/mail.py:
--------------------------------------------------------------------------------
 1 | """This module provides a way to send emails."""
 2 | from email.mime.application import MIMEApplication
 3 | from email.mime.multipart import MIMEMultipart
 4 | from email.mime.text import MIMEText
 5 | from typing import List
 6 | import smtplib
 7 | import socket
 8 | 
 9 | from flask import current_app
10 | 
11 | 
12 | def send_mail(subject: str, text: str, recipients: List[str], attachments=None,
13 |               from_name="MetaBrainz Notifications",
14 |               from_addr=None, boundary=None):
15 |     """This function can be used as a foundation for sending email.
16 | 
17 |     Args:
18 |         subject: Subject of the message.
19 |         text: The message itself.
20 |         recipients: List of recipients.
21 |         attachments: List of (file object, subtype, name) tuples. For example:
22 |             (<file_obj>, 'pdf', 'receipt.pdf').
23 |         from_name: Name of the sender.
24 |         from_addr: Email address of the sender.
25 |     """
26 |     if not isinstance(recipients, list):
27 |         raise ValueError("recipients must be a list of email addresses")
28 | 
29 |     if 'SMTP_SERVER' not in current_app.config or 'SMTP_PORT' not in current_app.config:
30 |         raise ValueError("Flask current_app requires config items SMTP_SERVER and SMTP_PORT to be set")
31 | 
32 |     if attachments is None:
33 |         attachments = []
34 |     if from_addr is None:
35 |         from_addr = 'noreply@' + current_app.config['MAIL_FROM_DOMAIN']
36 |                      
37 |     if current_app.config['TESTING']:  # Not sending any emails during the testing process
38 |         return
39 | 
40 |     if not recipients:
41 |         return
42 | 
43 |     message = MIMEMultipart()
44 | 
45 |     if boundary is not None:
46 |         message = MIMEMultipart(boundary=boundary)
47 |      
48 |     message['To'] = ", ".join(recipients)
49 |     message['Subject'] = subject
50 |     message['From'] = "%s <%s>" % (from_name, from_addr)
51 |     message.attach(MIMEText(text, _charset='utf-8'))
52 | 
53 |     for attachment in attachments:
54 |         file_obj, subtype, name = attachment
55 |         attachment = MIMEApplication(file_obj.read(), _subtype=subtype)
56 |         file_obj.close()  # FIXME(roman): This feels kind of hacky. Maybe there's a better way?
57 |         attachment.add_header('content-disposition', 'attachment', filename=name)
58 |         message.attach(attachment)
59 |     try:
60 |         smtp_server = smtplib.SMTP(current_app.config['SMTP_SERVER'], current_app.config['SMTP_PORT'])
61 |     except (socket.error, smtplib.SMTPException) as e:
62 |         current_app.logger.error('Error while sending email: %s', e, exc_info=True)
63 |         raise MailException(e)
64 |     smtp_server.sendmail(from_addr, recipients, message.as_string())
65 |     smtp_server.quit()
66 | 
67 | 
68 | class MailException(Exception):
69 |     pass
70 | 


--------------------------------------------------------------------------------
/brainzutils/metrics.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | import os
 3 | import socket
 4 | import logging
 5 | from time import time_ns
 6 | from typing import Dict
 7 | 
 8 | from brainzutils import cache
 9 | 
10 | REDIS_METRICS_KEY = "metrics:influx_data"
11 | _metrics_project_name = None
12 | 
13 | 
14 | def init(project):
15 |     global _metrics_project_name
16 |     _metrics_project_name = project
17 | 
18 | 
19 | def metrics_init_required(f):
20 |     @wraps(f)
21 |     def decorated(*args, **kwargs):
22 |         if not _metrics_project_name:
23 |             raise RuntimeError("Metrics module needs to be initialized before use")
24 |         return f(*args, **kwargs)
25 |     return decorated
26 | 
27 | 
28 | @cache.init_required
29 | @metrics_init_required
30 | def set(metric_name: str, tags: Dict[str, str] = None, timestamp: int = None, **fields):
31 |     """
32 |         Submit a metric to be read by the MetaBrainz influx datastore for graphing/monitoring
33 |         purposes. These metrics are stored in redis in the influxdb line protocol format:
34 |         https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/
35 | 
36 |         Args:
37 |           metric_name: The name of the metric to record.
38 |           tags: Additional influx tags to write with the metric. (optional)
39 |           timestamp: A nanosecond timestamp to use for this metric. If not provided
40 |                      the current time is used.
41 |           fields: The key, value pairs to store with this metric.
42 |     """
43 | 
44 |     # Add types to influx data
45 |     try:
46 |         host = os.environ['PRIVATE_IP']
47 |     except KeyError:
48 |         host = socket.gethostname()
49 | 
50 |     if tags is None:
51 |         tags = {}
52 | 
53 |     tags["dc"] = "hetzner"
54 |     tags["server"] = host
55 |     tags["project"] = _metrics_project_name
56 |     tag_string = ",".join([ "%s=%s" % (k, v) for k, v in tags.items() ])
57 | 
58 |     fields_list = []
59 |     for k, v in fields.items():
60 |         if type(v) == int:
61 |             fields_list.append("%s=%di" % (k, v))
62 |         elif type(v) == float:
63 |             fields_list.append('%s=%f' % (k, v))
64 |         elif type(v) == bool:
65 |             val = "t" if v else "f"
66 |             fields_list.append("%s=%s" % (k, val))
67 |         elif type(fields[k]) == str:
68 |             fields_list.append('%s="%s"' % (k, v))
69 |         else:
70 |             fields_list.append("%s=%s" % (k, str(v)))
71 | 
72 |     fields = ",".join(fields_list)
73 | 
74 |     if timestamp is None:
75 |         timestamp = time_ns()
76 | 
77 |     metric = "%s,%s %s %d" % (metric_name, tag_string, fields, timestamp)
78 |     try:
79 |         cache._r.rpush(REDIS_METRICS_KEY, metric)
80 |     except Exception:
81 |         logging.error("Cannot set redis metric:", exc_info=True)
82 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/__init__.py:
--------------------------------------------------------------------------------
 1 | from contextlib import contextmanager
 2 | from sqlalchemy import create_engine
 3 | from sqlalchemy.orm import sessionmaker, scoped_session, Session
 4 | from sqlalchemy.pool import NullPool
 5 | 
 6 | 
 7 | engine = None
 8 | 
 9 | 
10 | def init_db_engine(connect_str):
11 |     global engine, Session
12 |     engine = create_engine(connect_str, poolclass=NullPool)
13 |     Session = scoped_session(
14 |         sessionmaker(bind=engine)
15 |     )
16 | 
17 | 
18 | @contextmanager
19 | def mb_session():
20 |     session = Session()
21 |     try:
22 |         yield session
23 |     finally:
24 |         session.close()
25 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/artist.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from sqlalchemy.orm import joinedload
 3 | from mbdata import models
 4 | from brainzutils.musicbrainz_db import mb_session
 5 | from brainzutils.musicbrainz_db.helpers import get_relationship_info
 6 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids
 7 | from brainzutils.musicbrainz_db.serialize import serialize_artists
 8 | from brainzutils.musicbrainz_db.includes import check_includes
 9 | 
10 | 
11 | def get_artist_by_mbid(mbid, includes=None):
12 |     """Get artist with MusicBrainz ID.
13 |     Args:
14 |         mbid (uuid): MBID(gid) of the artist.
15 |         includes (list): List of values to be included.
16 |                          For list of possible values see includes.py.
17 |     Returns:
18 |         Dictionary containing the artist information, or None if the artist doesn't exist.
19 |     """
20 |     if includes is None:
21 |         includes = []
22 | 
23 |     return fetch_multiple_artists(
24 |         [mbid],
25 |         includes=includes,
26 |     ).get(mbid)
27 | 
28 | 
29 | def fetch_multiple_artists(mbids, includes=None):
30 |     """Get info related to multiple artists using their MusicBrainz IDs.
31 |     Args:
32 |         mbids (list): List of MBIDs of artists.
33 |         includes (list): List of information to be included.
34 |     Returns:
35 |         A dictionary containing info of multiple artists keyed by their MBID.
36 |         If an MBID doesn't exist in the database, it isn't returned.
37 |         If an MBID is a redirect, the dictionary key will be the MBID given as an argument,
38 |          but the returned object will contain the new MBID in the 'mbid' key.
39 |     """
40 | 
41 |     if includes is None:
42 |         includes = []
43 |     includes_data = defaultdict(dict)
44 |     check_includes('artist', includes)
45 | 
46 |     with mb_session() as db:
47 |         query = db.query(models.Artist).options(joinedload(models.Artist.type))
48 | 
49 |         artists = get_entities_by_gids(
50 |             query=query,
51 |             entity_type='artist',
52 |             mbids=mbids,
53 |         )
54 | 
55 |         artist_ids = [artist.id for artist in artists.values()]
56 | 
57 |         if 'artist-rels' in includes:
58 |             get_relationship_info(
59 |                 db=db,
60 |                 target_type='artist',
61 |                 source_type='artist',
62 |                 source_entity_ids=artist_ids,
63 |                 includes_data=includes_data,
64 |             )
65 |         if 'url-rels' in includes:
66 |             get_relationship_info(
67 |                 db=db,
68 |                 target_type='url',
69 |                 source_type='artist',
70 |                 source_entity_ids=artist_ids,
71 |                 includes_data=includes_data,
72 |             )
73 | 
74 |         artists = {str(mbid): serialize_artists(artist, includes_data[artist.id]) for mbid, artist in artists.items()}
75 |         return artists
76 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/editor.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from sqlalchemy.orm import joinedload
 3 | from mbdata import models
 4 | from brainzutils.musicbrainz_db import mb_session
 5 | from brainzutils.musicbrainz_db.utils import get_entities_by_ids
 6 | from brainzutils.musicbrainz_db.serialize import serialize_editor
 7 | from brainzutils.musicbrainz_db.includes import check_includes
 8 | 
 9 | 
10 | def get_editor_by_id(editor_id, includes=None):
11 |     """Get editor with editor ID.
12 |     Args:
13 |         editor_id (int): ID of the editor.
14 |     Returns:
15 |         Dictionary containing the editor information
16 |     """
17 |     if includes is None:
18 |         includes = []
19 | 
20 |     return fetch_multiple_editors(
21 |         [editor_id],
22 |         includes=includes,
23 |     ).get(editor_id)
24 | 
25 | 
26 | def fetch_multiple_editors(editor_ids, includes=None):
27 |     """Get info related to multiple editors using their editor IDs.
28 |     Args:
29 |         editor_ids (list): List of IDs of editors.
30 |         includes (list): List of information to be included.
31 |     Returns:
32 |         Dictionary containing info of multiple editors keyed by their editor_id.
33 |     """
34 |     if includes is None:
35 |         includes = []
36 | 
37 |     includes_data = defaultdict(dict)
38 |     check_includes('editor', includes)
39 |     with mb_session() as db:
40 |         query = db.query(models.Editor)
41 |         editors = get_entities_by_ids(
42 |             query=query,
43 |             entity_type='editor',
44 |             ids=editor_ids,
45 |         )
46 |         editor_ids = [editor.id for editor in editors.values()]
47 |         editors = {editor_id: serialize_editor(editors[editor_id], includes_data) for editor_id in editor_ids}
48 | 
49 |     return editors
50 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/event.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from typing import List
  3 | from uuid import UUID
  4 | 
  5 | from mbdata import models
  6 | from sqlalchemy import or_, nullslast
  7 | from sqlalchemy.orm import contains_eager, joinedload
  8 | 
  9 | from brainzutils.musicbrainz_db import mb_session
 10 | import brainzutils.musicbrainz_db.exceptions as mb_exceptions
 11 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids
 12 | from brainzutils.musicbrainz_db.includes import check_includes
 13 | from brainzutils.musicbrainz_db.serialize import serialize_events
 14 | from brainzutils.musicbrainz_db.helpers import get_relationship_info
 15 | 
 16 | def get_mapped_event_types(event_types: list) -> list:
 17 |     """ Get event types mapped to their case sensitive name in musicbrainz.
 18 |     event_type table in the database.
 19 | 
 20 |     Args:
 21 |         event_types (list): List of event types.
 22 |     Returns:
 23 |         List of mapped event types.
 24 | 
 25 |     """
 26 |     event_types = [event_type.lower() for event_type in event_types]
 27 |     mapped_event_types = []
 28 |     with mb_session() as db:
 29 |         supported_types = [event_type.name for event_type in db.query(models.EventType).all()]
 30 |         event_type_mapping = {supported_type.lower(): supported_type for supported_type in supported_types}
 31 | 
 32 |         for event_type in event_types:
 33 |             if event_type in event_type_mapping:
 34 |                 mapped_event_types.append(event_type_mapping[event_type])
 35 |             else:
 36 |                 raise mb_exceptions.InvalidTypeError("Bad event_type: {etype} is not supported".format(etype = event_type))
 37 | 
 38 |         return mapped_event_types
 39 | 
 40 | 
 41 | def get_event_by_mbid(mbid, includes=None):
 42 |     """Get event with the MusicBrainz ID.
 43 | 
 44 |     Args:
 45 |         mbid (uuid): MBID(gid) of the event.
 46 |     Returns:
 47 |         Dictionary containing the event information, or None if the event doesn't exist.
 48 |     """
 49 |     if includes is None:
 50 |         includes = []
 51 | 
 52 |     return fetch_multiple_events(
 53 |         [mbid],
 54 |         includes=includes,
 55 |     ).get(mbid)
 56 | 
 57 | 
 58 | def fetch_multiple_events(mbids, includes=None):
 59 |     """Get info related to multiple events using their MusicBrainz IDs.
 60 | 
 61 |     Args:
 62 |         mbids (list): List of MBIDs of events.
 63 |         includes (list): List of information to be included.
 64 | 
 65 |     Returns:
 66 |         A dictionary containing info of multiple events keyed by their MBID.
 67 |         If an MBID doesn't exist in the database, it isn't returned.
 68 |         If an MBID is a redirect, the dictionary key will be the MBID given as an argument,
 69 |          but the returned object will contain the new MBID in the 'mbid' key.
 70 |     """
 71 |     if includes is None:
 72 |         includes = []
 73 |     includes_data = defaultdict(dict)
 74 |     check_includes('event', includes)
 75 |     with mb_session() as db:
 76 |         query = db.query(models.Event).options(joinedload(models.Event.type))
 77 |         events = get_entities_by_gids(
 78 |             query=query,
 79 |             entity_type='event',
 80 |             mbids=mbids,
 81 |         )
 82 |         event_ids = [event.id for event in events.values()]
 83 | 
 84 |         if 'artist-rels' in includes:
 85 |             get_relationship_info(
 86 |                 db=db,
 87 |                 target_type='artist',
 88 |                 source_type='event',
 89 |                 source_entity_ids=event_ids,
 90 |                 includes_data=includes_data,
 91 |             )
 92 |         if 'place-rels' in includes:
 93 |             get_relationship_info(
 94 |                 db=db,
 95 |                 target_type='place',
 96 |                 source_type='event',
 97 |                 source_entity_ids=event_ids,
 98 |                 includes_data=includes_data,
 99 |             )
100 |         if 'series-rels' in includes:
101 |             get_relationship_info(
102 |                 db=db,
103 |                 target_type='series',
104 |                 source_type='event',
105 |                 source_entity_ids=event_ids,
106 |                 includes_data=includes_data,
107 |             )
108 |         if 'url-rels' in includes:
109 |             get_relationship_info(
110 |                 db=db,
111 |                 target_type='url',
112 |                 source_type='event',
113 |                 source_entity_ids=event_ids,
114 |                 includes_data=includes_data,
115 |             )
116 |         if 'release-group-rels' in includes:
117 |             get_relationship_info(
118 |                 db=db,
119 |                 target_type='release_group',
120 |                 source_type='event',
121 |                 source_entity_ids=event_ids,
122 |                 includes_data=includes_data,
123 |             )
124 | 
125 |         return {str(mbid): serialize_events(event, includes_data[event.id]) for mbid, event in events.items()}
126 | 
127 | 
128 | def get_events_for_place(place_id: UUID, event_types: List[str] = [],  include_null_type: bool = True, limit: int = None, offset: int = None) -> tuple:
129 |     """Get all events that occurred at a place.
130 | 
131 |     Args:
132 |         place_id: MBID of the place.
133 |         event_types: List of types of events to be fetched. The supported event_types are
134 |         'Concert', 'Festival', 'Convention/Expo', 'Launch event', 'Award ceremony', 'Stage performance', and 'Masterclass/Clinic'.
135 |         include_null_type: Whether to include events with no type.
136 |         limit: Max number of events to return.
137 |         offset: Offset that can be used in conjunction with the limit.
138 | 
139 |     Returns:
140 |         Tuple containing the list of dictionaries of events and the total count of the events.
141 |         The list of dictionaries of events is ordered by event begin year, begin month, begin date
142 |         begin time, and begin name. In case one of these is set to NULL, it will be ordered last.
143 |     """
144 | 
145 |     place_id = str(place_id)
146 |     event_types = get_mapped_event_types(event_types)
147 | 
148 |     with mb_session() as db:
149 |         event_query = db.query(models.Event).outerjoin(models.EventType).\
150 |             options(contains_eager(models.Event.type)).\
151 |             join(models.LinkEventPlace, models.Event.id == models.LinkEventPlace.entity0_id).\
152 |             join(models.Place, models.LinkEventPlace.entity1_id == models.Place.id).\
153 |             filter(models.Place.gid == place_id)
154 | 
155 |         if include_null_type and event_types:
156 |             event_query = event_query.filter(or_(models.Event.type == None, models.EventType.name.in_(event_types)))
157 |         elif event_types:
158 |             event_query = event_query.filter(models.EventType.name.in_(event_types))
159 |         
160 |         event_query = event_query.order_by(
161 |             nullslast(models.Event.begin_date_year.desc()),
162 |             nullslast(models.Event.begin_date_month.desc()),
163 |             nullslast(models.Event.begin_date_day.desc()),
164 |             nullslast(models.Event.time.desc()),
165 |             nullslast(models.Event.name.asc())
166 |         )
167 |         count = event_query.count()
168 |         events = event_query.limit(limit).offset(offset).all()
169 | 
170 |         return ([serialize_events(event) for event in events], count)
171 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/exceptions.py:
--------------------------------------------------------------------------------
 1 | class MBDatabaseException(Exception):
 2 |     """Base exception for all exceptions related to MusicBrainz database"""
 3 |     pass
 4 | 
 5 | 
 6 | class InvalidTypeError(MBDatabaseException):
 7 |     """Exception related to wrong type in present functions"""
 8 |     pass
 9 | 
10 | 
11 | class InvalidIncludeError(MBDatabaseException):
12 |     """Exception related to wrong includes in present functions"""
13 |     pass
14 | 
15 | 
16 | class NoDataFoundException(MBDatabaseException):
17 |     """Exception to be raised when no data has been found"""
18 |     pass
19 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/helpers.py:
--------------------------------------------------------------------------------
 1 | from mbdata.utils.models import get_link_model
 2 | from mbdata.models import Tag, Link
 3 | from sqlalchemy.orm import joinedload
 4 | from sqlalchemy import func
 5 | from brainzutils.musicbrainz_db.models import ENTITY_MODELS
 6 | 
 7 | 
 8 | def get_relationship_info(db, target_type, source_type, source_entity_ids, includes_data):
 9 |     """Get information related to relationships between different entities.
10 | 
11 |     Keep in mind that includes_data (dict) is altered to contain the relationship objects
12 |     keyed by the source entity MBIDs.
13 | 
14 |     Args:
15 |         db (Session object): Session object.
16 |         target_type (str): Type of target entity.
17 |         source_type (str): Type of source entity.
18 |         source_entity_ids (list): IDs of the source entity.
19 |         includes_data (dict): Dictionary containing includes data of entities.
20 |    """
21 |     source_model = ENTITY_MODELS[source_type]
22 |     target_model = ENTITY_MODELS[target_type]
23 |     relation = get_link_model(source_model, target_model)
24 | 
25 |     query = db.query(relation).\
26 |         options(
27 |             joinedload(relation.link, innerjoin=True).
28 |             joinedload(Link.link_type, innerjoin=True)
29 |         )
30 |     if relation.entity0.property.mapper.class_ == relation.entity1.property.mapper.class_:
31 |         _relationship_link_helper(relation, query, "entity0", "entity1", target_type, source_entity_ids, includes_data)
32 |         _relationship_link_helper(relation, query, "entity1", "entity0", target_type, source_entity_ids, includes_data)
33 |     else:
34 |         if source_model == relation.entity0.property.mapper.class_:
35 |             _relationship_link_helper(relation, query, "entity0", "entity1", target_type, source_entity_ids, includes_data)
36 |         else:
37 |             _relationship_link_helper(relation, query, "entity1", "entity0", target_type, source_entity_ids, includes_data)
38 | 
39 | 
40 | def _relationship_link_helper(relation, query, source_attr, target_attr, target_type, source_entity_ids, includes_data):
41 |     """Get relationship links between two entities.
42 | 
43 |     Keep in mind that includes_data (dict) is altered to contain the relationship objects
44 |     keyed by the source entity MBIDs.
45 | 
46 |     Args:
47 |         relation (mbdata.model): Model relating the two entities.
48 |         query (Session.query): Query object.
49 |         source_attr (str): 'entity0' or 'entity1' based on which represents source model in relation table.
50 |         target_attr (str): 'entity0' or 'entity1' based on which represents target model in relation table.
51 |         target_type (str): Type of the target entity.
52 |         source_entity_ids (list): IDs of the source entity.
53 |         includes_data (dict): Dictionary containing the includes data of entities.
54 |    """
55 |     source_id_attr = source_attr + "_id"
56 |     query = query.filter(getattr(relation, source_id_attr).in_(source_entity_ids))
57 |     query = query.options(joinedload(getattr(relation, target_attr), innerjoin=True))
58 |     relation_type = target_type + "-rels"
59 |     for link in query:
60 |         includes_data[getattr(link, source_id_attr)].setdefault('relationship_objs', {}).\
61 |             setdefault(relation_type, []).append(link)
62 | 
63 | 
64 | def get_tags(db, entity_model, tag_model, foreign_tag_id, entity_ids):
65 |     """Get tags associated with entities.
66 | 
67 |     Args:
68 |         db (Session object): Session object.
69 |         entity_model (mbdata.models): Model of the entity.
70 |         tag_model (mbdata.models): Tag of the model.
71 |         foreign_tag_id (tag_model.foreign_key): Foreign ID that joins the tag model and entity model
72 |         entity_ids (list): IDs of the entity whose tags are to be fetched
73 | 
74 |     Returns:
75 |         List of tuples containing the entity_ids and the list of associated tags.
76 |     """
77 |     tags = db.query(entity_model.id, func.array_agg(Tag.name)).\
78 |         join(tag_model, entity_model.id == foreign_tag_id).\
79 |         join(Tag).\
80 |         filter(entity_model.id.in_(entity_ids)).\
81 |         group_by(entity_model.id).\
82 |         all()
83 |     return tags


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/includes.py:
--------------------------------------------------------------------------------
 1 | import brainzutils.musicbrainz_db.exceptions as mb_exceptions
 2 | 
 3 | 
 4 | RELATABLE_TYPES = [
 5 |     'area',
 6 |     'artist',
 7 |     'label',
 8 |     'place',
 9 |     'event',
10 |     'recording',
11 |     'release',
12 |     'release-group',
13 |     'series',
14 |     'url',
15 |     'work',
16 |     'instrument'
17 | ]
18 | 
19 | RELATION_INCLUDES = [entity + '-rels' for entity in RELATABLE_TYPES]
20 | 
21 | TAG_INCLUDES = ["tags"]
22 | 
23 | VALID_INCLUDES = {
24 |     'place': ["aliases", "annotation"] + RELATION_INCLUDES + TAG_INCLUDES,
25 |     'event': ["aliases"] + RELATION_INCLUDES + TAG_INCLUDES,
26 |     'recording': ["artist", "artists", "isrc"] + TAG_INCLUDES + RELATION_INCLUDES,
27 |     'release_group': ["artists", "media", "releases"] + TAG_INCLUDES + RELATION_INCLUDES,
28 |     'release': [
29 |         "artists", "labels", "recordings", "release-groups", "media", "annotation", "aliases"
30 |     ] + TAG_INCLUDES + RELATION_INCLUDES,
31 |     'artist': ["recordings", "releases", "media", "aliases", "annotation"] + RELATION_INCLUDES + TAG_INCLUDES,
32 |     'label': ["area", "aliases", "annotation"] + RELATION_INCLUDES + TAG_INCLUDES,
33 |     'work': ["artists", "recordings", "aliases", "annotation"] + RELATION_INCLUDES + TAG_INCLUDES,
34 |     'editor': [],  # TODO: List includes here (BU-18)
35 | }
36 | 
37 | 
38 | def check_includes(entity, includes):
39 |     """Check if includes specified for an entity are valid includes."""
40 |     for include in includes:
41 |         if include not in VALID_INCLUDES[entity]:
42 |             raise mb_exceptions.InvalidIncludeError("Bad includes: {inc} is not a valid include".format(inc=include))
43 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/label.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from mbdata import models
 3 | from sqlalchemy.orm import joinedload
 4 | from brainzutils.musicbrainz_db import mb_session
 5 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids
 6 | from brainzutils.musicbrainz_db.includes import check_includes
 7 | from brainzutils.musicbrainz_db.serialize import serialize_labels
 8 | from brainzutils.musicbrainz_db.helpers import get_relationship_info
 9 | 
10 | 
11 | def get_label_by_mbid(mbid, includes=None):
12 |     """Get label with the MusicBrainz ID.
13 | 
14 |     Args:
15 |         mbid (uuid): MBID(gid) of the label.
16 |     Returns:
17 |         Dictionary containing the label information, or None if the label doesn't exist.
18 |     """
19 |     if includes is None:
20 |         includes = []
21 | 
22 |     return fetch_multiple_labels(
23 |         [mbid],
24 |         includes=includes,
25 |     ).get(mbid)
26 | 
27 | 
28 | def fetch_multiple_labels(mbids, includes=None):
29 |     """Get info related to multiple labels using their MusicBrainz IDs.
30 | 
31 |     Args:
32 |         mbids (list): List of MBIDs of labels.
33 |         includes (list): List of information to be included.
34 |     Returns:
35 |         A dictionary containing info of multiple labels keyed by their MBID.
36 |         If an MBID doesn't exist in the database, it isn't returned.
37 |         If an MBID is a redirect, the dictionary key will be the MBID given as an argument,
38 |          but the returned object will contain the new MBID in the 'mbid' key.
39 |     """
40 |     if includes is None:
41 |         includes = []
42 |     includes_data = defaultdict(dict)
43 |     check_includes('label', includes)
44 |     with mb_session() as db:
45 |         query = db.query(models.Label).\
46 |             options(joinedload(models.Label.type)).\
47 |             options(joinedload(models.Label.area))
48 |         labels = get_entities_by_gids(
49 |             query=query,
50 |             entity_type='label',
51 |             mbids=mbids,
52 |         )
53 |         label_ids = [label.id for label in labels.values()]
54 | 
55 |         if 'artist-rels' in includes:
56 |             get_relationship_info(
57 |                 db=db,
58 |                 target_type='artist',
59 |                 source_type='label',
60 |                 source_entity_ids=label_ids,
61 |                 includes_data=includes_data,
62 |             )
63 | 
64 |         if 'url-rels' in includes:
65 |             get_relationship_info(
66 |                 db=db,
67 |                 target_type='url',
68 |                 source_type='label',
69 |                 source_entity_ids=label_ids,
70 |                 includes_data=includes_data,
71 |             )
72 | 
73 |         return {str(mbid): serialize_labels(label, includes_data[label.id]) for mbid, label in labels.items()}
74 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/models.py:
--------------------------------------------------------------------------------
 1 | from mbdata import models
 2 | 
 3 | 
 4 | # Entity models
 5 | ENTITY_MODELS = {
 6 |     'artist': models.Artist,
 7 |     'place': models.Place,
 8 |     'release_group': models.ReleaseGroup,
 9 |     'release': models.Release,
10 |     'event': models.Event,
11 |     'label': models.Label,
12 |     'series': models.Series,
13 |     'url': models.URL,
14 |     'recording': models.Recording,
15 |     'work': models.Work,
16 |     'editor': models.Editor,
17 | }
18 | 
19 | 
20 | # Redirect models
21 | REDIRECT_MODELS = {
22 |     'place': models.PlaceGIDRedirect,
23 |     'artist': models.ArtistGIDRedirect,
24 |     'release': models.ReleaseGIDRedirect,
25 |     'release_group': models.ReleaseGroupGIDRedirect,
26 |     'event': models.EventGIDRedirect,
27 |     'label': models.LabelGIDRedirect,
28 |     'recording': models.RecordingGIDRedirect,
29 |     'work': models.WorkGIDRedirect,
30 | }
31 | 
32 | 
33 | # Meta models
34 | META_MODELS = {
35 |     'label': models.LabelMeta,
36 |     'release_group': models.ReleaseGroupMeta,
37 |     'event': models.EventMeta,
38 |     'work': models.WorkMeta,
39 |     'artist': models.ArtistMeta,
40 |     'recording': models.RecordingMeta,
41 | }
42 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/place.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from mbdata import models
 3 | from sqlalchemy.orm import joinedload
 4 | from brainzutils.musicbrainz_db import mb_session
 5 | from brainzutils.musicbrainz_db.includes import check_includes
 6 | from brainzutils.musicbrainz_db.serialize import serialize_places
 7 | from brainzutils.musicbrainz_db.helpers import get_relationship_info
 8 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids
 9 | 
10 | 
11 | def get_place_by_mbid(mbid, includes=None):
12 |     """Get place with the MusicBrainz ID.
13 | 
14 |     Args:
15 |         mbid (uuid): MBID(gid) of the place.
16 |     Returns:
17 |         Dictionary containing the place information, or None if the place doesn't exist.
18 |     """
19 |     if includes is None:
20 |         includes = []
21 | 
22 |     return fetch_multiple_places(
23 |         [mbid],
24 |         includes=includes,
25 |     ).get(mbid)
26 | 
27 | 
28 | def fetch_multiple_places(mbids, includes=None):
29 |     """Get info related to multiple places using their MusicBrainz IDs.
30 | 
31 |     Args:
32 |         mbids (list): List of MBIDs of places.
33 |         includes (list): List of information to be included.
34 | 
35 |     Returns:
36 |         A dictionary containing info of multiple places keyed by their MBID.
37 |         If an MBID doesn't exist in the database, it isn't returned.
38 |         If an MBID is a redirect, the dictionary key will be the MBID given as an argument,
39 |          but the returned object will contain the new MBID in the 'mbid' key.
40 |     """
41 |     if includes is None:
42 |         includes = []
43 |     includes_data = defaultdict(dict)
44 |     check_includes('place', includes)
45 |     with mb_session() as db:
46 |         query = db.query(models.Place).\
47 |             options(joinedload(models.Place.area)).\
48 |             options(joinedload(models.Place.type))
49 |         places = get_entities_by_gids(
50 |             query=query,
51 |             entity_type='place',
52 |             mbids=mbids,
53 |         )
54 |         place_ids = [place.id for place in places.values()]
55 | 
56 |         if 'artist-rels' in includes:
57 |             get_relationship_info(
58 |                 db=db,
59 |                 target_type='artist',
60 |                 source_type='place',
61 |                 source_entity_ids=place_ids,
62 |                 includes_data=includes_data,
63 |             )
64 |         if 'place-rels' in includes:
65 |             get_relationship_info(
66 |                 db=db,
67 |                 target_type='place',
68 |                 source_type='place',
69 |                 source_entity_ids=place_ids,
70 |                 includes_data=includes_data,
71 |             )
72 |         if 'url-rels' in includes:
73 |             get_relationship_info(
74 |                 db=db,
75 |                 target_type='url',
76 |                 source_type='place',
77 |                 source_entity_ids=place_ids,
78 |                 includes_data=includes_data,
79 |             )
80 | 
81 |         places = {str(mbid): serialize_places(place, includes_data[place.id]) for mbid, place in places.items()}
82 |     return places
83 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/recording.py:
--------------------------------------------------------------------------------
  1 | from brainzutils.musicbrainz_db import mb_session
  2 | from brainzutils.musicbrainz_db.helpers import get_relationship_info
  3 | from brainzutils.musicbrainz_db.includes import check_includes
  4 | from brainzutils.musicbrainz_db.serialize import serialize_recording
  5 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids
  6 | from collections import defaultdict
  7 | from mbdata.models import Recording, ArtistCredit, ArtistCreditName
  8 | from sqlalchemy.orm import joinedload, subqueryload
  9 | 
 10 | 
 11 | def get_recording_by_mbid(mbid, includes=None):
 12 |     """ Get recording with MusicBrainz ID.
 13 | 
 14 |     Args:
 15 |         mbid (uuid): MBID(gid) of the recording.
 16 |         includes (list): List of values to be included.
 17 |                         For list of possible values visit https://bitbucket.org/lalinsky/mbdata/wiki/API/v1/includes#!recording
 18 |     Returns:
 19 |         Dictionary containing the recording information, or None if the recording doesn't exist.
 20 |     """
 21 |     if includes is None:
 22 |         includes = []
 23 |     return fetch_multiple_recordings(
 24 |         [mbid],
 25 |         includes=includes,
 26 |     ).get(mbid)
 27 | 
 28 | 
 29 | def get_many_recordings_by_mbid(mbids, includes=None):
 30 |     """ Get multiple recordings with MusicBrainz IDs. It fetches recordings
 31 |     using fetch_multiple_recordings.
 32 | 
 33 |     Args:
 34 |         mbids (list): list of uuid (MBID(gid)) of the recordings.
 35 |         includes (list): List of values to be included.
 36 |                         For list of possible values visit https://bitbucket.org/lalinsky/mbdata/wiki/API/v1/includes#!recording
 37 |     Returns:
 38 |         A dictionary containing the recording's information with MBIDs as keys.
 39 |         If an MBID doesn't exist in the database, it isn't returned.
 40 |         If an MBID is a redirect, the dictionary key will be the MBID given as an argument,
 41 |          but the returned object will contain the new MBID in the 'mbid' key.
 42 |     """
 43 |     if includes is None:
 44 |         includes = []
 45 | 
 46 |     return fetch_multiple_recordings(
 47 |         mbids,
 48 |         includes,
 49 |     )
 50 | 
 51 | 
 52 | def fetch_multiple_recordings(mbids, includes=None):
 53 |     """ Fetch multiple recordings with MusicBrainz IDs.
 54 | 
 55 |     Args:
 56 |         mbids (list): list of uuid (MBID(gid)) of the recordings.
 57 |         includes (list): List of values to be included.
 58 |                         For list of possible values visit https://bitbucket.org/lalinsky/mbdata/wiki/API/v1/includes#!recording
 59 |     Returns:
 60 |         Dictionary containing the recording information with MBIDs as keys.
 61 |             - id: Recording mbid
 62 |             - name: Name of the recording
 63 |             - length: length of the recording
 64 |             - artists:
 65 |                 - artist information: id, name, credited_name and join_phrase
 66 |     """
 67 |     if includes is None:
 68 |         includes = []
 69 |     includes_data = defaultdict(dict)
 70 |     check_includes('recording', includes)
 71 | 
 72 |     with mb_session() as db:
 73 |         query = db.query(Recording)
 74 | 
 75 |         if 'artist' in includes:
 76 |             query = query.options(joinedload(Recording.artist_credit, innerjoin=True))
 77 | 
 78 |         if 'artists' in includes:
 79 |             query = query.options(
 80 |                 joinedload(Recording.artist_credit, innerjoin=True).
 81 |                 joinedload(ArtistCredit.artists).
 82 |                 joinedload(ArtistCreditName.artist)
 83 |             )
 84 | 
 85 |         recordings = get_entities_by_gids(
 86 |             query=query,
 87 |             entity_type='recording',
 88 |             mbids=mbids,
 89 |         )
 90 | 
 91 |         recording_ids = [recording.id for recording in recordings.values()]
 92 | 
 93 |         if 'artist' in includes:
 94 |             for recording in recordings.values():
 95 |                 includes_data[recording.id]['artist'] = recording.artist_credit
 96 | 
 97 |         if 'artists' in includes:
 98 |             for recording in recordings.values():
 99 |                 includes_data[recording.id]['artists'] = recording.artist_credit.artists
100 |                 includes_data[recording.id]['artist-credit-phrase'] = recording.artist_credit.name
101 | 
102 |         if 'url-rels' in includes:
103 |             get_relationship_info(
104 |                 db=db,
105 |                 target_type='url',
106 |                 source_type='recording',
107 |                 source_entity_ids=recording_ids,
108 |                 includes_data=includes_data,
109 |             )
110 | 
111 |         if 'work-rels' in includes:
112 |             get_relationship_info(
113 |                 db=db,
114 |                 target_type='work',
115 |                 source_type='recording',
116 |                 source_entity_ids=recording_ids,
117 |                 includes_data=includes_data,
118 |             )
119 | 
120 |         serial_recordings = {str(mbid): serialize_recording(recording, includes_data[recording.id])
121 |                              for mbid, recording in recordings.items()}
122 | 
123 |     return serial_recordings
124 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/release.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from mbdata.models import Release, ReleaseGroup, Medium, Track, Recording, ArtistCredit, ArtistCreditName
  3 | from sqlalchemy.orm import joinedload
  4 | from brainzutils.musicbrainz_db import exceptions as mb_exceptions
  5 | from brainzutils.musicbrainz_db import mb_session
  6 | from brainzutils.musicbrainz_db.includes import check_includes
  7 | from brainzutils.musicbrainz_db.serialize import serialize_releases
  8 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids
  9 | from brainzutils.musicbrainz_db.helpers import get_relationship_info
 10 | from brainzutils.musicbrainz_db import recording
 11 | 
 12 | 
 13 | def get_release_by_mbid(mbid, includes=None):
 14 |     """Get release with the MusicBrainz ID.
 15 |     Args:
 16 |         mbid (uuid): MBID(gid) of the release.
 17 |         includes (list): List of values to be included.
 18 |                          For list of possible values see includes.py.
 19 |     Returns:
 20 |         Dictionary containing the release information, or None if the release doesn't exist.
 21 |     """
 22 |     if includes is None:
 23 |         includes = []
 24 | 
 25 |     return fetch_multiple_releases(
 26 |         [mbid],
 27 |         includes=includes,
 28 |     ).get(mbid)
 29 | 
 30 | 
 31 | def fetch_multiple_releases(mbids, includes=None):
 32 |     """Get info related to multiple releases using their MusicBrainz IDs.
 33 |     Args:
 34 |         mbids (list): List of MBIDs of releases.
 35 |         includes (list): List of information to be included.
 36 |     Returns:
 37 |         A dictionary containing info of multiple releases keyed by their MBID.
 38 |         If an MBID doesn't exist in the database, it isn't returned.
 39 |         If an MBID is a redirect, the dictionary key will be the MBID given as an argument,
 40 |          but the returned object will contain the new MBID in the 'mbid' key.
 41 |     """
 42 |     if includes is None:
 43 |         includes = []
 44 |     includes_data = defaultdict(dict)
 45 |     check_includes('release', includes)
 46 |     with mb_session() as db:
 47 |         query = db.query(Release)
 48 |         if 'release-groups' in includes:
 49 |             query = query.options(joinedload(Release.release_group))
 50 |         if 'artists' in includes:
 51 |             query = query.options(
 52 |                 joinedload(Release.artist_credit).
 53 |                 joinedload(ArtistCredit.artists).
 54 |                 joinedload(ArtistCreditName.artist)
 55 |             )
 56 |         if 'media' in includes:
 57 |             # Fetch media with tracks
 58 |             query = query\
 59 |                 .options(
 60 |                     joinedload(Release.mediums)
 61 |                     .options(
 62 |                         joinedload(Medium.format),
 63 |                         joinedload(Medium.tracks).
 64 |                         joinedload(Track.recording).
 65 |                         joinedload(Recording.artist_credit).
 66 |                         joinedload(ArtistCredit.artists).
 67 |                         joinedload(ArtistCreditName.artist))
 68 |                     )
 69 |         releases = get_entities_by_gids(
 70 |             query=query,
 71 |             entity_type='release',
 72 |             mbids=mbids,
 73 |         )
 74 |         release_ids = [release.id for release in releases.values()]
 75 | 
 76 |         if 'release-groups' in includes:
 77 |             for release in releases.values():
 78 |                 includes_data[release.id]['release-groups'] = release.release_group
 79 | 
 80 |         if 'artists' in includes:
 81 |             for release in releases.values():
 82 |                 artist_credit_names = release.artist_credit.artists
 83 |                 includes_data[release.id]['artist-credit-names'] = artist_credit_names
 84 |                 includes_data[release.id]['artist-credit-phrase'] = release.artist_credit.name
 85 | 
 86 |         if 'media' in includes:
 87 |             for release in releases.values():
 88 |                 includes_data[release.id]['media'] = release.mediums
 89 | 
 90 |         if 'url-rels' in includes:
 91 |             get_relationship_info(
 92 |                 db=db,
 93 |                 target_type='url',
 94 |                 source_type='release',
 95 |                 source_entity_ids=release_ids,
 96 |                 includes_data=includes_data,
 97 |             )
 98 | 
 99 |         releases = {str(mbid): serialize_releases(release, includes_data[release.id])
100 |                     for mbid, release in releases.items()}
101 |     return releases
102 | 
103 | 
104 | def browse_releases(release_group_id, includes=None):
105 |     """Get all the releases by a certain release group.
106 |     You need to provide the Release Group's MusicBrainz ID.
107 |     """
108 |     if includes is None:
109 |         includes = []
110 |     with mb_session() as db:
111 |         release_ids = db.query(Release.gid).\
112 |                       join(ReleaseGroup).\
113 |                       filter(ReleaseGroup.gid == release_group_id).all()
114 |         release_ids = [release_id[0] for release_id in release_ids]
115 |     releases = fetch_multiple_releases(release_ids, includes=includes)
116 |     return releases
117 | 
118 | 
119 | def get_url_rels_from_releases(releases):
120 |     """Returns all url-rels for a list of releases in a single list (of url-rel dictionaries)
121 |     Typical usage with browse_releases()
122 |     """
123 |     all_url_rels = []
124 |     for release_gid in releases.keys():
125 |         if 'url-rels' in releases[release_gid]:
126 |             all_url_rels.extend([url_rel for url_rel in releases[release_gid]['url-rels']])
127 |     return all_url_rels
128 | 
129 | 
130 | def get_releases_using_recording_mbid(recording_mbid):
131 |     """Returns a list of releases that contain the recording with
132 |        the given recording MBID.
133 | 
134 |        Args:
135 |            recording_mbid (UUID): recording MBID for which releases are to be fetched.
136 | 
137 |        Returns:
138 |            serial_releases (list): list with dictionary elements of following format::
139 | 
140 |            {
141 |                'id': <release MBID>,
142 |                'name': <release Title>,
143 |            }
144 |     """
145 | 
146 |     # First fetch the recording so that redirects don't create any problem
147 |     recording_redirect = recording.get_recording_by_mbid(recording_mbid)
148 |     recording_mbid = recording_redirect['mbid']
149 |     with mb_session() as db:
150 |         releases = db.query(Release).\
151 |                     join(Medium).\
152 |                     join(Track).\
153 |                     join(Recording).\
154 |                     filter(Recording.gid == recording_mbid).all()
155 | 
156 |         serial_releases = [serialize_releases(release) for release in releases]
157 |         if not serial_releases:
158 |             raise mb_exceptions.NoDataFoundException("Couldn't find release for recording with MBID: %s." % str(recording_mbid))
159 | 
160 |         return serial_releases
161 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/release_group.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from mbdata import models
  3 | from sqlalchemy import nullslast, or_
  4 | from sqlalchemy.orm import contains_eager, joinedload
  5 | from brainzutils.musicbrainz_db import mb_session
  6 | import brainzutils.musicbrainz_db.exceptions as mb_exceptions
  7 | from brainzutils.musicbrainz_db.includes import check_includes
  8 | from brainzutils.musicbrainz_db.serialize import serialize_release_groups
  9 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids
 10 | from brainzutils.musicbrainz_db.helpers import get_relationship_info, get_tags
 11 | 
 12 | def get_mapped_release_types(release_types):
 13 |     """Get release types mapped to their case sensitive name in musicbrainz.
 14 |     release_group_primary_type table.
 15 |     
 16 |     Args:
 17 |         release_types (list): List of release types.
 18 |     Returns:
 19 |         List of mapped release types.
 20 |     """
 21 | 
 22 |     release_types = [release_type.lower() for release_type in release_types]
 23 |     mapped_release_types = []
 24 |     with mb_session() as db:
 25 |         supported_types = [release_group_type.name for release_group_type in db.query(models.ReleaseGroupPrimaryType)]
 26 |         release_type_mapping = {supported_type.lower(): supported_type for supported_type in supported_types}
 27 | 
 28 |         for release_type in release_types:
 29 |             if release_type not in release_type_mapping:
 30 |                 raise mb_exceptions.InvalidTypeError("Bad release_types: {rtype} is not supported".format(rtype = release_type))
 31 |             else:
 32 |                 mapped_release_types.append(release_type_mapping[release_type])
 33 | 
 34 |         return mapped_release_types
 35 | 
 36 | 
 37 | def get_release_group_by_mbid(mbid, includes=None):
 38 |     """Get release group with the MusicBrainz ID.
 39 |     Args:
 40 |         mbid (uuid): MBID(gid) of the release group.
 41 |     Returns:
 42 |         Dictionary containing the release group information, or None if the release group doesn't exist.
 43 |     """
 44 |     if includes is None:
 45 |         includes = []
 46 | 
 47 |     return fetch_multiple_release_groups(
 48 |         [mbid],
 49 |         includes=includes,
 50 |     ).get(mbid)
 51 | 
 52 | 
 53 | def fetch_multiple_release_groups(mbids, includes=None):
 54 |     """Get info related to multiple release groups using their MusicBrainz IDs.
 55 |     Args:
 56 |         mbids (list): List of MBIDs of releases groups.
 57 |         includes (list): List of information to be included.
 58 |     Returns:
 59 |         A dictionary containing info of multiple release groups keyed by their MBID.
 60 |         If an MBID doesn't exist in the database, it isn't returned.
 61 |         If an MBID is a redirect, the dictionary key will be the MBID given as an argument,
 62 |          but the returned object will contain the new MBID in the 'mbid' key.
 63 |     """
 64 |     if includes is None:
 65 |         includes = []
 66 |     includes_data = defaultdict(dict)
 67 |     check_includes('release_group', includes)
 68 |     with mb_session() as db:
 69 |         # Join table meta which contains release date for a release group
 70 |         query = db.query(models.ReleaseGroup).options(joinedload(models.ReleaseGroup.meta)).\
 71 |                 options(joinedload(models.ReleaseGroup.type))
 72 | 
 73 |         if 'artists' in includes:
 74 |             query = query.\
 75 |                 options(
 76 |                     joinedload(models.ReleaseGroup.artist_credit).
 77 |                     joinedload(models.ArtistCredit.artists).
 78 |                     joinedload(models.ArtistCreditName.artist)
 79 |                 )
 80 | 
 81 |         release_groups = get_entities_by_gids(
 82 |             query=query,
 83 |             entity_type='release_group',
 84 |             mbids=mbids,
 85 |         )
 86 |         release_group_ids = [release_group.id for release_group in release_groups.values()]
 87 | 
 88 |         if 'artists' in includes:
 89 |             for release_group in release_groups.values():
 90 |                 artist_credit_names = release_group.artist_credit.artists
 91 |                 includes_data[release_group.id]['artist-credit-names'] = artist_credit_names
 92 |                 includes_data[release_group.id]['artist-credit-phrase'] = release_group.artist_credit.name
 93 | 
 94 |         if 'releases' in includes:
 95 |             query = db.query(models.Release).filter(getattr(models.Release, "release_group_id").in_(release_group_ids))
 96 |             for release in query:
 97 |                 includes_data[release.release_group_id].setdefault('releases', []).append(release)
 98 | 
 99 |         if 'release-group-rels' in includes:
100 |             get_relationship_info(
101 |                 db=db,
102 |                 target_type='release_group',
103 |                 source_type='release_group',
104 |                 source_entity_ids=release_group_ids,
105 |                 includes_data=includes_data,
106 |             )
107 | 
108 |         if 'url-rels' in includes:
109 |             get_relationship_info(
110 |                 db=db,
111 |                 target_type='url',
112 |                 source_type='release_group',
113 |                 source_entity_ids=release_group_ids,
114 |                 includes_data=includes_data,
115 |             )
116 | 
117 |         if 'work-rels' in includes:
118 |             get_relationship_info(
119 |                 db=db,
120 |                 target_type='work',
121 |                 source_type='release_group',
122 |                 source_entity_ids=release_group_ids,
123 |                 includes_data=includes_data,
124 |             )
125 | 
126 |         if 'tags' in includes:
127 |             release_group_tags = get_tags(
128 |                 db=db,
129 |                 entity_model=models.ReleaseGroup,
130 |                 tag_model=models.ReleaseGroupTag,
131 |                 foreign_tag_id=models.ReleaseGroupTag.release_group_id,
132 |                 entity_ids=release_group_ids,
133 |             )
134 |             for release_group_id, tags in release_group_tags:
135 |                 includes_data[release_group_id]['tags'] = tags
136 | 
137 |         for release_group in release_groups.values():
138 |             includes_data[release_group.id]['meta'] = release_group.meta
139 |         release_groups = {str(mbid): serialize_release_groups(release_group, includes_data[release_group.id])
140 |                           for mbid, release_group in release_groups.items()}
141 |         return release_groups
142 | 
143 | 
144 | def get_release_groups_for_artist(artist_id, release_types=None, limit=None, offset=None):
145 |     """Get all release groups linked to an artist.
146 | 
147 |     Args:
148 |         artist_id (uuid): MBID of the artist.
149 |         release_types (list): List of types of release groups to be fetched.
150 |         limit (int): Max number of release groups to return.
151 |         offset (int): Offset that can be used in conjunction with the limit.
152 | 
153 |     Returns:
154 |         Tuple containing the list of dictionaries of release groups ordered by release year
155 |         and the total count of the release groups.
156 |     """
157 |     artist_id = str(artist_id)
158 |     includes_data = defaultdict(dict)
159 |     if release_types is None:
160 |         release_types = []
161 |     release_types = [release_type.lower() for release_type in release_types]
162 |     # map release types to their case sensitive name in musicbrainz.release_group_primary_type table in the database
163 |     release_types_mapping = {
164 |         'album': 'Album',
165 |         'single': 'Single',
166 |         'ep': 'EP',
167 |         'broadcast': 'Broadcast',
168 |         'other': 'Other'
169 |     }
170 |     release_types = [release_types_mapping[release_type] for release_type in release_types]
171 |     with mb_session() as db:
172 |         release_groups_query = _get_release_groups_for_artist_query(db, artist_id, release_types)
173 |         count = release_groups_query.count()
174 |         release_groups = release_groups_query.order_by(
175 |             nullslast(models.ReleaseGroupMeta.first_release_date_year.desc())
176 |         ).limit(limit).offset(offset).all()
177 | 
178 |         for release_group in release_groups:
179 |             includes_data[release_group.id]['meta'] = release_group.meta
180 |         release_groups = ([serialize_release_groups(release_group, includes_data[release_group.id])
181 |                             for release_group in release_groups], count)
182 |         return release_groups
183 | 
184 | 
185 | def _get_release_groups_for_artist_query(db, artist_id, release_types):
186 |     return db.query(models.ReleaseGroup).\
187 |         options(joinedload(models.ReleaseGroup.meta)).\
188 |         join(models.ReleaseGroupPrimaryType).join(models.ReleaseGroupMeta).\
189 |         join(models.ArtistCreditName, models.ArtistCreditName.artist_credit_id == models.ReleaseGroup.artist_credit_id).\
190 |         join(models.Artist, models.Artist.id == models.ArtistCreditName.artist_id).\
191 |         filter(models.Artist.gid == artist_id).filter(models.ReleaseGroupPrimaryType.name.in_(release_types))
192 | 
193 | 
194 | def get_release_groups_for_label(label_mbid, release_types=None, limit=None, offset=None):
195 |     """Get all release groups linked to a label.
196 | 
197 |     Args:
198 |         label_id (uuid): MBID of the label.
199 |         release_types (list): List of types of release groups to be fetched. The supported release_types are 
200 |         'album', 'single', 'ep', 'broadcast', and 'other'.
201 |         limit (int): Max number of release groups to return.
202 |         offset (int): Offset that can be used in conjunction with the limit.
203 | 
204 |     Returns:
205 |         Tuple containing the list of dictionaries of release groups and the total count of the release groups.
206 |         The list of dictionaries of release groups is ordered by release year, release month,
207 |         release date, and release name. In case one of these is set to NULL, it will be ordered last.
208 |         List also contains release groups with null type if 'Other' is in the list of release types.
209 |     """
210 |     label_mbid = str(label_mbid)
211 |     includes_data = defaultdict(dict)
212 |     if release_types is None:
213 |         release_types = []
214 |     release_types = get_mapped_release_types(release_types)
215 |     include_null_type = True if "Other" in release_types else False
216 |     with mb_session() as db:
217 |         release_groups_query = _get_release_groups_for_label_query(db, label_mbid, release_types, include_null_type)
218 |         count = release_groups_query.count()
219 |         release_groups = release_groups_query.order_by(
220 |             nullslast(models.ReleaseGroupMeta.first_release_date_year.desc()),
221 |             nullslast(models.ReleaseGroupMeta.first_release_date_month.desc()),
222 |             nullslast(models.ReleaseGroupMeta.first_release_date_day.desc()),
223 |             nullslast(models.ReleaseGroup.name.asc())
224 |         ).limit(limit).offset(offset).all()
225 | 
226 |         for release_group in release_groups:
227 |             includes_data[release_group.id]['meta'] = release_group.meta
228 |         release_groups = [serialize_release_groups(release_group, includes_data[release_group.id])
229 |                             for release_group in release_groups]
230 |         return release_groups, count
231 | 
232 | 
233 | def _get_release_groups_for_label_query(db, label_mbid, release_types, include_null_type=False):
234 |     release_groups = db.query(models.ReleaseGroup).\
235 |         outerjoin(models.ReleaseGroupPrimaryType).join(models.ReleaseGroupMeta).\
236 |         options(contains_eager(models.ReleaseGroup.meta)).\
237 |         options(contains_eager(models.ReleaseGroup.type)).\
238 |         join(models.Release, models.Release.release_group_id == models.ReleaseGroup.id).\
239 |         join(models.ReleaseLabel, models.ReleaseLabel.release_id == models.Release.id).\
240 |         join(models.Label, models.Label.id == models.ReleaseLabel.label_id).\
241 |         filter(models.Label.gid == label_mbid).\
242 |         group_by(models.ReleaseGroup, models.ReleaseGroupMeta, models.ReleaseGroupPrimaryType)
243 | 
244 |     if include_null_type and release_types:
245 |         release_groups = release_groups.filter(or_(models.ReleaseGroup.type == None, models.ReleaseGroupPrimaryType.name.in_(release_types)))
246 |     elif release_types:
247 |         release_groups = release_groups.filter(models.ReleaseGroupPrimaryType.name.in_(release_types))
248 | 
249 |     return release_groups
250 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/serialize.py:
--------------------------------------------------------------------------------
  1 | from brainzutils.musicbrainz_db.models import ENTITY_MODELS
  2 | from mbdata.utils.models import get_link_target
  3 | 
  4 | 
  5 | def serialize_begin_end(entity):
  6 |     begin_date = entity.begin_date
  7 |     end_date = entity.end_date
  8 |     begin = []
  9 |     end = []
 10 |     if begin_date and begin_date.year:
 11 |         begin.append(f'{begin_date.year:04}')
 12 |         if begin_date.month:
 13 |             begin.append(f'{begin_date.month:02}')
 14 |             if begin_date.day:
 15 |                 begin.append(f'{begin_date.day:02}')
 16 | 
 17 |     if end_date and end_date.year:
 18 |         end.append(f'{end_date.year:04}')
 19 |         if end_date.month:
 20 |             end.append(f'{end_date.month:02}')
 21 |             if end_date.day:
 22 |                 end.append(f'{end_date.day:02}')
 23 | 
 24 |     data = {}
 25 |     if begin:
 26 |         data["begin"] = "-".join(begin)
 27 |     if end:
 28 |         data["end"] = "-".join(end)
 29 |     return data
 30 | 
 31 | 
 32 | def serialize_areas(area, includes=None):
 33 |     if includes is None:
 34 |         includes = {}
 35 |     data = {
 36 |         'mbid': str(area.gid),
 37 |         'name': area.name,
 38 |     }
 39 | 
 40 |     if area.comment:
 41 |         data['comment'] = area.comment
 42 | 
 43 |     dates = serialize_begin_end(area)
 44 |     if dates:
 45 |         data['life-span'] = dates
 46 | 
 47 |     if 'relationship_objs' in includes:
 48 |         serialize_relationships(data, area, includes['relationship_objs'])
 49 |     return data
 50 | 
 51 | 
 52 | def serialize_relationships(data, source_obj, relationship_objs):
 53 |     """Convert relationship objects to dictionaries.
 54 | 
 55 |     Args:
 56 |         data (dict): Dictionary containing info of source object.
 57 |         source_obj (mbdata.models): object of source entity.
 58 |         relationship_objs (dict): Dictionary containing list of objects of different relations.
 59 | 
 60 |     Returns:
 61 |         Dictionary containing lists of dictionaries of related entities.
 62 |     """
 63 | 
 64 |     for entity_type in ENTITY_MODELS:
 65 |         relation = '{0}-rels'.format(entity_type)
 66 |         if relation in relationship_objs:
 67 |             data[relation] = []
 68 |             for obj in relationship_objs[relation]:
 69 |                 link_data = {
 70 |                     'type': obj.link.link_type.name,
 71 |                     'type-id': str(obj.link.link_type.gid),
 72 |                     'begin-year': obj.link.begin_date_year,
 73 |                     'end-year': obj.link.end_date_year,
 74 |                 }
 75 |                 link_data['direction'] = 'forward' if source_obj.id == obj.entity0_id else 'backward'
 76 |                 if obj.link.ended:
 77 |                     link_data['ended'] = True
 78 |                 link_data[entity_type] = SERIALIZE_ENTITIES[entity_type](get_link_target(obj, source_obj))
 79 |                 data[relation].append(link_data)
 80 | 
 81 | 
 82 | def serialize_artist_credit(artist_credit):
 83 |     """Convert artist_credit object into a list of artist credits."""
 84 |     data = []
 85 |     for artist_credit_name in artist_credit.artists:
 86 |         artist_credit_data = {
 87 |             'mbid': str(artist_credit_name.artist.gid),
 88 |             'name': artist_credit_name.artist.name,
 89 |         }
 90 | 
 91 |         if artist_credit_name.name != artist_credit_name.artist.name:
 92 |             artist_credit_data['credited_name'] = artist_credit_name.name
 93 | 
 94 |         if artist_credit_name.join_phrase:
 95 |             artist_credit_data['join_phrase'] = artist_credit_name.join_phrase
 96 | 
 97 |         data.append(artist_credit_data)
 98 | 
 99 |     return data
100 | 
101 | 
102 | def serialize_recording(recording, includes=None):
103 |     """Convert recording objects into dictionary."""
104 |     if includes is None:
105 |         includes = {}
106 |     data = {
107 |         'mbid': str(recording.gid),
108 |         'name': recording.name,
109 |     }
110 | 
111 |     if recording.comment:
112 |         data['comment'] = recording.comment
113 | 
114 |     if recording.length:
115 |         # Divide recording length by 1000 to convert milliseconds into seconds
116 |         data['length'] = recording.length / 1000.0
117 | 
118 |     if recording.video:
119 |         data['video'] = True
120 | 
121 |     if getattr(recording, 'rating', None):
122 |         data['rating'] = recording.rating
123 | 
124 |     if 'artist' in includes:
125 |         data['artist'] = recording.artist_credit.name
126 |     elif 'artists' in includes:
127 |         data['artists'] = serialize_artist_credit(recording.artist_credit)
128 |         data['artist-credit-phrase'] = includes['artist-credit-phrase']
129 | 
130 |     if 'isrc' in includes:
131 |         data['isrcs'] = [isrc.isrc for isrc in recording.isrcs]
132 | 
133 |     return data
134 | 
135 | 
136 | def serialize_places(place, includes=None):
137 |     if includes is None:
138 |         includes = {}
139 |     data = {
140 |         'mbid': str(place.gid),
141 |         'name': place.name,
142 |         'address': place.address,
143 |     }
144 | 
145 |     if place.comment:
146 |         data['comment'] = place.comment
147 | 
148 |     if place.type:
149 |         data['type'] = place.type.name
150 | 
151 |     if place.area:
152 |         data['area'] = serialize_areas(place.area)
153 | 
154 |     if place.coordinates:
155 |         data['coordinates'] = {
156 |             'latitude': place.coordinates[0],
157 |             'longitude': place.coordinates[1],
158 |         }
159 | 
160 |     dates = serialize_begin_end(place)
161 |     if dates:
162 |         data['life-span'] = dates
163 | 
164 |     if 'relationship_objs' in includes:
165 |         serialize_relationships(data, place, includes['relationship_objs'])
166 |     return data
167 | 
168 | 
169 | def serialize_labels(label, includes=None):
170 |     if includes is None:
171 |         includes = {}
172 |     data = {
173 |         'mbid': str(label.gid),
174 |         'name': label.name,
175 |     }
176 | 
177 |     if label.comment:
178 |         data['comment'] = label.comment
179 | 
180 |     dates = serialize_begin_end(label)
181 |     if dates:
182 |         data['life-span'] = dates
183 | 
184 |     if label.type:
185 |         data['type'] = label.type.name
186 | 
187 |     if label.area:
188 |         data['area'] = label.area.name
189 | 
190 |     if getattr(label, 'rating', None):
191 |         data['rating'] = label.rating
192 | 
193 |     if 'relationship_objs' in includes:
194 |         serialize_relationships(data, label, includes['relationship_objs'])
195 | 
196 |     return data
197 | 
198 | 
199 | def serialize_artists(artist, includes=None):
200 |     if includes is None:
201 |         includes = {}
202 |     data = {
203 |         'mbid': str(artist.gid),
204 |         'name': artist.name,
205 |         'sort_name': artist.sort_name,
206 |     }
207 | 
208 |     if artist.comment:
209 |         data['comment'] = artist.comment
210 | 
211 |     dates = serialize_begin_end(artist)
212 |     if dates:
213 |         data['life-span'] = dates
214 | 
215 |     if artist.type:
216 |         data['type'] = artist.type.name
217 | 
218 |     if getattr(artist, 'rating', None):
219 |         data['rating'] = artist.rating
220 | 
221 |     if 'relationship_objs' in includes:
222 |         serialize_relationships(data, artist, includes['relationship_objs'])
223 | 
224 |     return data
225 | 
226 | 
227 | def serialize_artist_credit_names(artist_credit_name):
228 |     data = {
229 |         'name': artist_credit_name.name,
230 |         'artist': serialize_artists(artist_credit_name.artist),
231 |     }
232 |     if artist_credit_name.join_phrase:
233 |         data['join_phrase'] = artist_credit_name.join_phrase
234 |     return data
235 | 
236 | 
237 | def serialize_release_groups(release_group, includes=None):
238 |     if includes is None:
239 |         includes = {}
240 | 
241 |     data = {
242 |         'mbid': str(release_group.gid),
243 |         'title': release_group.name,
244 |     }
245 | 
246 |     if release_group.comment:
247 |         data['comment'] = release_group.comment
248 | 
249 |     if release_group.type:
250 |         data['type'] = release_group.type.name
251 | 
252 |     if getattr(release_group, 'rating', None):
253 |         data['rating'] = release_group.rating
254 | 
255 |     if 'artist-credit-phrase' in includes:
256 |         data['artist-credit-phrase'] = includes['artist-credit-phrase']
257 | 
258 |     if 'meta' in includes and includes['meta'].first_release_date_year:
259 |         data['first-release-year'] = includes['meta'].first_release_date_year
260 | 
261 |     if 'artist-credit-names' in includes:
262 |         data['artist-credit'] = [serialize_artist_credit_names(artist_credit_name)
263 |                                  for artist_credit_name in includes['artist-credit-names']]
264 | 
265 |     if 'releases' in includes:
266 |         data['release-list'] = [serialize_releases(release) for release in includes['releases']]
267 | 
268 |     if 'relationship_objs' in includes:
269 |         serialize_relationships(data, release_group, includes['relationship_objs'])
270 | 
271 |     if 'tags' in includes:
272 |         data['tag-list'] = includes['tags']
273 |     return data
274 | 
275 | 
276 | def serialize_medium(medium, includes=None):
277 |     if includes is None:
278 |         includes = {}
279 |     data = {
280 |         'name': medium.name,
281 |         'track_count': medium.track_count,
282 |         'position': medium.position,
283 |     }
284 |     if medium.format:
285 |         data['format'] = medium.format.name
286 | 
287 |     if 'tracks' in includes and includes['tracks']:
288 |         data['track-list'] = [serialize_track(track) for track in includes['tracks']]
289 |     return data
290 | 
291 | 
292 | def serialize_track(track):
293 |     return {
294 |         'mbid': str(track.gid),
295 |         'name': track.name,
296 |         'number': track.number,
297 |         'position': track.position,
298 |         'length': track.length,
299 |         'recording_id': str(track.recording.gid),
300 |         'recording_title': track.recording.name,
301 |         'artist-credit': [serialize_artist_credit_names(artist_credit_name)
302 |                           for artist_credit_name in track.recording.artist_credit.artists],
303 |         'artist-credit-phrase': track.recording.artist_credit.name
304 |     }
305 | 
306 | 
307 | def serialize_releases(release, includes=None):
308 |     if includes is None:
309 |         includes = {}
310 | 
311 |     data = {
312 |         'mbid': str(release.gid),
313 |         'name': release.name,
314 |     }
315 | 
316 |     if 'relationship_objs' in includes:
317 |         serialize_relationships(data, release, includes['relationship_objs'])
318 | 
319 |     if 'release-groups' in includes:
320 |         data['release-group'] = serialize_release_groups(includes['release-groups'])
321 | 
322 |     if 'artist-credit-phrase' in includes:
323 |         data['artist-credit-phrase'] = includes['artist-credit-phrase']
324 | 
325 |     if 'artist-credit-names' in includes:
326 |         data['artist-credit'] = [serialize_artist_credit_names(artist_credit_name)
327 |                                  for artist_credit_name in includes['artist-credit-names']]
328 | 
329 |     if 'media' in includes:
330 |         data['medium-list'] = [serialize_medium(medium, includes={'tracks': medium.tracks})
331 |                                for medium in includes['media']]
332 | 
333 |     if release.comment:
334 |         data['comment'] = release.comment
335 | 
336 |     return data
337 | 
338 | 
339 | def serialize_events(event, includes=None):
340 |     if includes is None:
341 |         includes = {}
342 |     data = {
343 |         'mbid': str(event.gid),
344 |         'name': event.name,
345 |     }
346 | 
347 |     if event.comment:
348 |         data['comment'] = event.comment
349 | 
350 |     dates = serialize_begin_end(event)
351 |     if dates:
352 |         data['life-span'] = dates
353 | 
354 |     if event.type:
355 |         data['type'] = event.type.name
356 | 
357 |     if getattr(event, 'rating', None):
358 |         data['rating'] = event.rating
359 | 
360 |     if 'relationship_objs' in includes:
361 |         serialize_relationships(data, event, includes['relationship_objs'])
362 |     return data
363 | 
364 | 
365 | def serialize_url(url, includes=None):
366 |     if includes is None:
367 |         includes = {}
368 |     data = {
369 |         'mbid': str(url.gid),
370 |         'url': url.url,
371 |     }
372 | 
373 |     if 'relationship_objs' in includes:
374 |         serialize_relationships(data, url, includes['relationship_objs'])
375 |     return data
376 | 
377 | 
378 | def serialize_works(work, includes=None):
379 |     if includes is None:
380 |         includes = {}
381 |     data = {
382 |         'mbid': str(work.gid),
383 |         'name': work.name,
384 |     }
385 | 
386 |     if work.comment:
387 |         data['comment'] = work.comment
388 | 
389 |     if work.type:
390 |         data['type'] = work.type.name
391 | 
392 |     if getattr(work, 'rating', None):
393 |         data['rating'] = work.rating
394 | 
395 |     if 'relationship_objs' in includes:
396 |         serialize_relationships(data, work, includes['relationship_objs'])
397 | 
398 |     return data
399 | 
400 | 
401 | def serialize_editor(editor, includes=None):
402 |     # TODO: Add includes to data here (BU-18)
403 |     data = {
404 |         "id": editor.id,
405 |         "name": editor.name,
406 |         "privs": editor.privs,
407 |         "email": editor.email,
408 |         "website": editor.website,
409 |         "bio": editor.bio,
410 |         "member_since": editor.member_since,
411 |         "email_confirm_date": editor.email_confirm_date,
412 |         "last_login_date": editor.last_login_date,
413 |         "last_updated": editor.last_updated,
414 |         "birth_date": editor.birth_date,
415 |         "deleted": editor.deleted,
416 |         "gender": editor.gender,
417 |         "area": None
418 |     }
419 |     if editor.area:
420 |         data["area"] = serialize_areas(editor.area)
421 |     return data
422 | 
423 | 
424 | def serialize_series(series, includes=None):
425 |     if includes is None:
426 |         includes = {}
427 | 
428 |     data = {
429 |         'mbid': str(series.gid),
430 |         'name': series.name,
431 |     }
432 | 
433 |     if series.comment:
434 |         data['comment'] = series.comment
435 | 
436 |     if 'relationship_objs' in includes:
437 |         serialize_relationships(data, series, includes['relationship_objs'])
438 | 
439 |     return data
440 | 
441 | 
442 | SERIALIZE_ENTITIES = {
443 |     'artist': serialize_artists,
444 |     'release_group': serialize_release_groups,
445 |     'release': serialize_releases,
446 |     'medium': serialize_medium,
447 |     'url': serialize_url,
448 |     'editor': serialize_editor,
449 |     'recording': serialize_recording,
450 |     'place': serialize_places,
451 |     'area': serialize_areas,
452 |     'event': serialize_events,
453 |     'series': serialize_series,
454 | }
455 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metabrainz/brainzutils-python/bf01c6da15d4a2426d64a31cf232c06bec3860f3/brainzutils/musicbrainz_db/tests/__init__.py


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/tests/test_artist.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from brainzutils.musicbrainz_db import artist as mb_artist
 4 | 
 5 | 
 6 | @pytest.mark.database
 7 | class TestArtist:
 8 | 
 9 |     def test_get_artist_by_mbid(self, engine):
10 |         artist = mb_artist.get_artist_by_mbid("f59c5520-5f46-4d2c-b2c4-822eabf53419")
11 |         assert artist == {
12 |             "mbid": "f59c5520-5f46-4d2c-b2c4-822eabf53419",
13 |             "name": "Linkin Park",
14 |             "sort_name": "Linkin Park",
15 |             "comment": "American rock band",
16 |             "life-span": {"begin": "1999"},
17 |             "rating": 85,
18 |             "type": "Group",
19 |         }
20 | 
21 |     def test_get_artist_by_mbid_redirect(self, engine):
22 |         """Using an MBID which is a redirect will return the "canonical" id"""
23 |         artist = mb_artist.get_artist_by_mbid("b3d01315-d52a-4f3a-908b-0618315c1ef2")
24 |         assert artist == {
25 |             "mbid": "79239441-bfd5-4981-a70c-55c3f15c1287",
26 |             "name": "Madonna",
27 |             "sort_name": "Madonna",
28 |             "comment": "“Queen of Pop”",
29 |             "life-span": {"begin": "1958-08-16"},
30 |             "rating": 88,
31 |             "type": "Person",
32 |         }
33 | 
34 |     def test_fetch_multiple_artists(self, engine):
35 |         artists = mb_artist.fetch_multiple_artists([
36 |             "f59c5520-5f46-4d2c-b2c4-822eabf53419",
37 |             "f82bcf78-5b69-4622-a5ef-73800768d9ac",
38 |         ])
39 |         assert artists["f82bcf78-5b69-4622-a5ef-73800768d9ac"] == {
40 |             "mbid": "f82bcf78-5b69-4622-a5ef-73800768d9ac",
41 |             "name": "JAY‐Z",
42 |             "sort_name": "JAY‐Z",
43 |             "type": "Person",
44 |             "comment": "US rapper",
45 |             "life-span": {"begin": "1969-12-04"},
46 |             "rating": 71,
47 |         }
48 |         assert artists["f59c5520-5f46-4d2c-b2c4-822eabf53419"] == {
49 |             "mbid": "f59c5520-5f46-4d2c-b2c4-822eabf53419",
50 |             "name": "Linkin Park",
51 |             "sort_name": "Linkin Park",
52 |             "type": "Group",
53 |             "comment": "American rock band",
54 |             "life-span": {"begin": "1999"},
55 |             "rating": 85,
56 |         }
57 | 
58 |     def test_fetch_multiple_artists_redirect(self, engine):
59 |         """Artist with a redirect uses redirected mbid in dictionary key, but canonical id in returned data"""
60 |         artists = mb_artist.fetch_multiple_artists(["fe008f22-07be-46f0-9206-7cab2d26e89d"])
61 |         assert len(artists) == 1
62 |         assert artists["fe008f22-07be-46f0-9206-7cab2d26e89d"] == {
63 |             "mbid": "f59c5520-5f46-4d2c-b2c4-822eabf53419",
64 |             "name": "Linkin Park",
65 |             "sort_name": "Linkin Park",
66 |             "comment": "American rock band",
67 |             "life-span": {"begin": "1999"},
68 |             "rating": 85,
69 |             "type": "Group"
70 |         }
71 | 
72 |     def test_fetch_multiple_artists_missing(self, engine):
73 |         """If an artist id doesn't exist, don't fetch it"""
74 |         artists = mb_artist.fetch_multiple_artists(["f59c5520-5f46-4d2c-b2c4-822eabf53419",
75 |                                                     "f59c5520-aaaa-aaaa-b2c4-822eabf53419"],
76 |                                                    includes=['artist-rels', 'url-rels'])
77 |         assert list(artists.keys()) == ["f59c5520-5f46-4d2c-b2c4-822eabf53419"]
78 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/tests/test_editor.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | import pytest
  4 | from mbdata.models import Editor
  5 | from psycopg2.tz import FixedOffsetTimezone
  6 | 
  7 | from brainzutils.musicbrainz_db import editor as mb_editor
  8 | 
  9 | 
 10 | @pytest.mark.database
 11 | class TestEditor:
 12 |     editor_dt = datetime(2014, 12, 1, 14, 6, 42, 321443, tzinfo=FixedOffsetTimezone(offset=0, name=None))
 13 | 
 14 |     editor_1 = dict(id=2323, name="Editor 1", privs=0, member_since=editor_dt, email_confirm_date=editor_dt,
 15 |                     last_login_date=editor_dt, last_updated=editor_dt, deleted=False, password="{CLEARTEXT}pass",
 16 |                     ha1="3f3edade87115ce351d63f42d92a1834")
 17 |     expected_editor_1 = {
 18 |         'area': None,
 19 |         'bio': None,
 20 |         'birth_date': None,
 21 |         'deleted': False,
 22 |         'email': None,
 23 |         'email_confirm_date': editor_dt,
 24 |         'gender': None,
 25 |         'id': 2323,
 26 |         'last_login_date': editor_dt,
 27 |         'last_updated': editor_dt,
 28 |         'member_since': editor_dt,
 29 |         'name': 'Editor 1',
 30 |         'privs': 0,
 31 |         'website': None
 32 |     }
 33 | 
 34 |     editor_2 = dict(id=2324, name="Editor 2", privs=3, email="editor@example.com", website="example.com",
 35 |                     bio="Random\neditor", member_since=editor_dt, email_confirm_date=editor_dt,
 36 |                     last_login_date=editor_dt, last_updated=editor_dt, deleted=False, area=None,
 37 |                     password="$2b$12$2odiKUAGktuwM2J.tp/uZ.54bniapSMjCln3J1TfC6zx74QFuawQ6",
 38 |                     ha1="3f3edade87115ce351d63f42d92a1834")
 39 |     expected_editor_2 = {
 40 |         "id": 2324,
 41 |         "name": "Editor 2",
 42 |         "privs": 3,
 43 |         "email": "editor@example.com",
 44 |         "website": "example.com",
 45 |         "bio": "Random\neditor",
 46 |         "member_since": editor_dt,
 47 |         "email_confirm_date": editor_dt,
 48 |         "last_login_date": editor_dt,
 49 |         "last_updated": editor_dt,
 50 |         "birth_date": None,
 51 |         "deleted": False,
 52 |         "gender": None,
 53 |         "area": None,
 54 |     }
 55 | 
 56 |     def test_get_by_id(self, session):
 57 |         # Manually adding and deleting data in tests can get tedious. However, we have only two tests for which this is
 58 |         # needed. In case in future we need to add more tests where the test database needs to be modified, we should
 59 |         # explore other alternatives to ease the process.
 60 |         with session as db:
 61 |             # The editors table in test database has many empty columns and fields like last_login_date may change with
 62 |             # new dump.
 63 |             insert_editor_1 = Editor(**TestEditor.editor_1)
 64 |             db.add(insert_editor_1)
 65 |             db.commit()
 66 |             try:
 67 |                 editor = mb_editor.get_editor_by_id(2323)
 68 |                 assert editor == TestEditor.expected_editor_1
 69 |             finally:
 70 |                 # regardless whether the assertion fails or passes, delete the inserted editor to prevent side effects
 71 |                 # on subsequent tests
 72 |                 db.delete(insert_editor_1)
 73 |                 db.commit()
 74 | 
 75 |     def test_fetch_multiple_editors(self, session):
 76 |         # Manually adding and deleting data in tests can get tedious. However, we have only two tests for which this is
 77 |         # needed. In case in future we need to add more tests where the test database needs to be modified, we should
 78 |         # explore other alternatives to ease the process.
 79 |         with session as db:
 80 |             # The editors table in test database has many empty columns and fields like last_login_date may change with
 81 |             # new dump.
 82 |             insert_editor_1 = Editor(**TestEditor.editor_1)
 83 |             insert_editor_2 = Editor(**TestEditor.editor_2)
 84 |             db.add(insert_editor_1)
 85 |             db.add(insert_editor_2)
 86 |             db.commit()
 87 |             try:
 88 |                 editors = mb_editor.fetch_multiple_editors([2323, 2324])
 89 |                 assert editors[2323] == TestEditor.expected_editor_1
 90 |                 assert editors[2324] == TestEditor.expected_editor_2
 91 |             finally:
 92 |                 # regardless whether the assertion fails or passes, delete the inserted editor to prevent side effects
 93 |                 # on subsequent tests
 94 |                 db.delete(insert_editor_1)
 95 |                 db.delete(insert_editor_2)
 96 |                 db.commit()
 97 | 
 98 |     def test_fetch_multiple_editors_empty(self, engine):
 99 |         editors = mb_editor.fetch_multiple_editors(
100 |             [2323, 2324],
101 |         )
102 |         assert editors == {}
103 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/tests/test_event.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from brainzutils.musicbrainz_db import event as mb_event
  4 | 
  5 | 
  6 | @pytest.mark.database
  7 | class TestEvent:
  8 | 
  9 |     def test_get_event_by_mbid(self, engine):
 10 |         event = mb_event.get_event_by_mbid('d4921d43-bf92-464e-aef4-bba8540fc5bd')
 11 |         assert event == {
 12 |             'mbid': 'd4921d43-bf92-464e-aef4-bba8540fc5bd',
 13 |             'name': 'Butterfly Whirl 2015',
 14 |             'life-span': {'begin': '2015-05-22', 'end': '2015-05-25'},
 15 |             'type': 'Festival'
 16 |         }
 17 | 
 18 |     def test_get_event_by_mbid_redirect(self, engine):
 19 |         """If using an id that is redirected, return the """
 20 |         event = mb_event.get_event_by_mbid('b8528315-ef77-46e2-bff9-d1b00d84dc3f')
 21 |         assert event == {
 22 |             'mbid': '499559c8-b84b-422e-8ad7-b746d48c21aa',
 23 |             'name': '1995-10-11: Riverport Amphitheatre, Maryland Heights, Missouri',
 24 |             'life-span': {'begin': '1995-10-11', 'end': '1995-10-11'},
 25 |             'rating': 100,
 26 |             'type': 'Concert',
 27 |         }
 28 | 
 29 |     def test_get_event_by_mbid_with_includes(self, engine):
 30 |         event = mb_event.get_event_by_mbid('b8528315-ef77-46e2-bff9-d1b00d84dc3f',
 31 |             includes=['artist-rels'])
 32 |         assert event['mbid'] == '499559c8-b84b-422e-8ad7-b746d48c21aa'
 33 |         assert len(event['artist-rels']) == 1
 34 |         assert event['artist-rels'][0]['type-id'] == '936c7c95-3156-3889-a062-8a0cd57f8946'
 35 | 
 36 |     def test_fetch_multiple_events(self, engine):
 37 |         events = mb_event.fetch_multiple_events(
 38 |             ['d4921d43-bf92-464e-aef4-bba8540fc5bd', 'b335b093-b3a0-411f-9f3d-7f680a4992d6'],
 39 |         )
 40 |         assert events['d4921d43-bf92-464e-aef4-bba8540fc5bd']['name'] == 'Butterfly Whirl 2015'
 41 |         assert events['b335b093-b3a0-411f-9f3d-7f680a4992d6']['name'] == 'KISS in Atlanta'
 42 | 
 43 |     def test_fetch_multiple_events_redirect(self, engine):
 44 |         """"""
 45 |         events = mb_event.fetch_multiple_events(
 46 |             ['b8528315-ef77-46e2-bff9-d1b00d84dc3f'],
 47 |         )
 48 |         assert events == {'b8528315-ef77-46e2-bff9-d1b00d84dc3f': {
 49 |             'mbid': '499559c8-b84b-422e-8ad7-b746d48c21aa',
 50 |             'name': '1995-10-11: Riverport Amphitheatre, Maryland Heights, Missouri',
 51 |             'life-span': {'begin': '1995-10-11', 'end': '1995-10-11'},
 52 |             'rating': 100,
 53 |             'type': 'Concert',
 54 |         }}
 55 | 
 56 |     def test_fetch_multiple_events_empty(self, engine):
 57 |         """If an event id doesn't exist, don't return it in the list"""
 58 |         events = mb_event.fetch_multiple_events([
 59 |             'd4921d43-bf92-464e-aef4-bba8540fc5bd',
 60 |             '40e6153d-4444-4444-4444-b0a47e3825ce'
 61 |         ],
 62 |             includes=['artist-rels', 'place-rels', 'series-rels', 'url-rels', 'release-group-rels'])
 63 |         assert list(events.keys()) == ['d4921d43-bf92-464e-aef4-bba8540fc5bd']
 64 | 
 65 |     def test_get_events_for_place(self, engine):
 66 |         events = mb_event.get_events_for_place(
 67 |             place_id='4352063b-a833-421b-a420-e7fb295dece0',
 68 |             event_types=['Concert', 'Festival'],
 69 |             include_null_type=False,
 70 |         )
 71 |         assert events[0][0] == {
 72 |             "life-span": {
 73 |                 "begin": "2015-07-17",
 74 |                 "end": "2015-09-12"
 75 |             },
 76 |             "mbid": "00d6449e-c6d2-42f1-a09e-c01668af1dd7",
 77 |             "name": "The Proms 2015",
 78 |             "type": "Festival"
 79 |         }
 80 | 
 81 |         assert events[1] == 5
 82 |         assert len(events[0]) == 5
 83 | 
 84 |         events2 = mb_event.get_events_for_place(
 85 |             place_id='06e5431e-ef98-424c-a43a-4b7a3cf26327',
 86 |             event_types=[],
 87 |             include_null_type=True,
 88 |         )
 89 | 
 90 |         # first item doesn't have a 'type' key
 91 |         assert events2[0][0] == {
 92 |             "life-span": {
 93 |                 "begin": "2015-12-19",
 94 |                 "end": "2015-12-19"
 95 |             },
 96 |             "mbid": "6cc3999a-2f19-433e-b760-f2ff2a6bc86b",
 97 |             "name": "2015-12-19: Studio 8H, GE Building, Rockefeller Center, New York City, NY, USA",
 98 |             'comment': 'Saturday Night Live',
 99 |         }
100 | 
101 |         assert events2[1] == 5
102 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/tests/test_helper.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | 
 3 | import pytest
 4 | from mbdata import models
 5 | 
 6 | from brainzutils.musicbrainz_db.serialize import serialize_relationships
 7 | from brainzutils.musicbrainz_db.helpers import get_relationship_info
 8 | import brainzutils.musicbrainz_db as mb
 9 | from brainzutils.musicbrainz_db.helpers import get_tags
10 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids
11 | 
12 | 
13 | @pytest.mark.database
14 | class TestHelpers:
15 | 
16 |     def test_get_tags(self, engine):
17 |         data = defaultdict(dict)
18 |         with mb.mb_session() as db:
19 |             release_group_tags = get_tags(
20 |                 db=db,
21 |                 entity_model=models.ReleaseGroup,
22 |                 tag_model=models.ReleaseGroupTag,
23 |                 foreign_tag_id=models.ReleaseGroupTag.release_group_id,
24 |                 entity_ids=[253487],
25 |             )
26 |             for release_group_id, tags in release_group_tags:
27 |                 data[release_group_id]['tags'] = tags
28 |             expected_data = {
29 |                 253487: {
30 |                     'tags': ['classical', 'ballet']
31 |                 }
32 |             }
33 |             assert dict(data) == expected_data
34 | 
35 |     def test_get_relationship_info(self, engine):
36 |         data = {}
37 |         includes_data = defaultdict(dict)
38 |         with mb.mb_session() as db:
39 |             gid = "3185e028-9a08-448b-83e3-873dfda40476"
40 |             place = get_entities_by_gids(
41 |                 query=db.query(models.Place),
42 |                 entity_type='place',
43 |                 mbids=[gid],
44 |             )[gid]
45 |             get_relationship_info(
46 |                 db=db,
47 |                 target_type='url',
48 |                 source_type='place',
49 |                 source_entity_ids=[place.id],
50 |                 includes_data=includes_data,
51 |             )
52 |             serialize_relationships(data, place, includes_data[place.id]['relationship_objs'])
53 |             expected_data = {
54 |                 'url-rels': [
55 |                     {
56 |                         'type': 'wikidata',
57 |                         'type-id': 'e6826618-b410-4b8d-b3b5-52e29eac5e1f',
58 |                         'begin-year': None,
59 |                         'end-year': None,
60 |                         'direction': 'forward',
61 |                         'url': {
62 |                             'mbid': '86d64bb6-bcee-4cda-b1f8-050394664671',
63 |                             'url': 'https://www.wikidata.org/wiki/Q2489904'
64 |                         }
65 |                     },
66 |                     {
67 |                         'type': 'discogs',
68 |                         'type-id': '1c140ac8-8dc2-449e-92cb-52c90d525640',
69 |                         'begin-year': None,
70 |                         'end-year': None,
71 |                         'direction': 'forward',
72 |                         'url': {
73 |                             'mbid': '06332787-5aac-4e4c-95b9-75cf729ae308',
74 |                             'url': 'https://www.discogs.com/label/266610'
75 |                         }
76 |                     }
77 |                 ]
78 |             }
79 |             assert data == expected_data
80 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/tests/test_label.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from brainzutils.musicbrainz_db import label as mb_label
 4 | 
 5 | 
 6 | @pytest.mark.database
 7 | class TestLabel:
 8 | 
 9 |     def test_get_label_by_mbid(self, engine):
10 |         label = mb_label.get_label_by_mbid('4cccc72a-0bd0-433a-905e-dad87871397d')
11 |         assert label == {
12 |             "mbid": "4cccc72a-0bd0-433a-905e-dad87871397d",
13 |             "name": "Roc‐A‐Fella Records",
14 |             "type": "Original Production",
15 |             "area": "United States",
16 |             "life-span": {"begin": "1996", "end": "2013"},
17 |             "rating": 100,
18 |         }
19 | 
20 |     def test_get_label_by_mbid_redirect(self, engine):
21 |         label = mb_label.get_label_by_mbid('67cf4cad-c039-4f01-bc84-f8dab7791ed7')
22 |         assert label == {
23 |             "mbid": "50c384a2-0b44-401b-b893-8181173339c7",
24 |             "name": "Atlantic",
25 |             "type": "Imprint",
26 |             "area": "United States",
27 |             "comment": "Warner Music imprint",
28 |             "life-span": {"begin": "1947"},
29 |             "rating": 100,
30 |         }
31 | 
32 |     def test_fetch_multiple_labels(self, engine):
33 |         labels = mb_label.fetch_multiple_labels([
34 |             'c595c289-47ce-4fba-b999-b87503e8cb71',
35 |             '4cccc72a-0bd0-433a-905e-dad87871397d'
36 |         ])
37 |         assert len(labels) == 2
38 |         assert labels["c595c289-47ce-4fba-b999-b87503e8cb71"] == {
39 |             "mbid": "c595c289-47ce-4fba-b999-b87503e8cb71",
40 |             "name": "Warner Bros. Records",
41 |             "comment": '1958–2019; “WB” logo, with or without “records” beneath or on banner across',
42 |             "type": "Imprint",
43 |             "area": "United States",
44 |             "life-span": {"begin": "1958-03-19", "end": "2019-05-28"},
45 |         }
46 |         assert labels["4cccc72a-0bd0-433a-905e-dad87871397d"] == {
47 |             "mbid": "4cccc72a-0bd0-433a-905e-dad87871397d",
48 |             "name": "Roc‐A‐Fella Records",
49 |             "type": "Original Production",
50 |             "area": "United States",
51 |             "life-span": {"begin": "1996", "end": "2013"},
52 |             "rating": 100
53 |         }
54 | 
55 |     def test_fetch_multiple_labels_redirect(self, engine):
56 |         labels = mb_label.fetch_multiple_labels([
57 |             '67cf4cad-c039-4f01-bc84-f8dab7791ed7'
58 |         ])
59 |         assert len(labels) == 1
60 |         assert labels["67cf4cad-c039-4f01-bc84-f8dab7791ed7"] == {
61 |             "mbid": "50c384a2-0b44-401b-b893-8181173339c7",
62 |             "name": "Atlantic",
63 |             "type": "Imprint",
64 |             "area": "United States",
65 |             "comment": "Warner Music imprint",
66 |             "life-span": {"begin": "1947"},
67 |             "rating": 100,
68 |         }
69 | 
70 |     def test_fetch_multiple_labels_missing(self, engine):
71 |         labels = mb_label.fetch_multiple_labels([
72 |             '50c384a2-0b44-401b-b893-8181173339c7',
73 |             '50c384a2-0000-0000-0000-8181173339c7'
74 |         ], includes=['artist-rels', 'url-rels'])
75 |         assert list(labels.keys()) == ['50c384a2-0b44-401b-b893-8181173339c7']
76 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/tests/test_place.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from brainzutils.musicbrainz_db import place as mb_place
 4 | 
 5 | 
 6 | @pytest.mark.database
 7 | class TestPlace:
 8 | 
 9 |     def test_get_place_by_mbid(self, engine):
10 |         place = mb_place.get_place_by_mbid('4352063b-a833-421b-a420-e7fb295dece0')
11 |         assert place['name'] == 'Royal Albert Hall'
12 |         assert place['type'] == 'Venue'
13 |         assert place['coordinates'] == {
14 |             'latitude': 51.50105,
15 |             'longitude': -0.17748
16 |         }
17 |         assert place['area'] == {
18 |             'mbid': 'b9576171-3434-4d1b-8883-165ed6e65d2f',
19 |             'name': 'Kensington and Chelsea',
20 |         }
21 | 
22 |     def test_get_place_by_mbid_redirect(self, engine):
23 |         place = mb_place.get_place_by_mbid('b1690ae6-5a37-46d7-99ae-b7e2d790485f')
24 |         assert place == {
25 |             'address': 'Herbert-von-Karajan-Straße 1, 10785 Berlin, Germany',
26 |             'area': {'mbid': 'c9ac1239-e832-41bc-9930-e252a1fd1105', 'name': 'Berlin'},
27 |             'coordinates': {'latitude': 52.51, 'longitude': 13.37},
28 |             'mbid': 'bea135c0-a32e-49be-85fd-9234c73fa0a8',
29 |             'name': 'Berliner Philharmonie',
30 |             'type': 'Venue',
31 |             'life-span': {'begin': '1963'},
32 |         }
33 | 
34 |     def test_fetch_multiple_places(self, engine):
35 |         places = mb_place.fetch_multiple_places(
36 |             ['4352063b-a833-421b-a420-e7fb295dece0', '2056ad56-cea9-4536-9f2d-58765a38829c']
37 |         )
38 |         assert places['4352063b-a833-421b-a420-e7fb295dece0']['name'] == 'Royal Albert Hall'
39 |         assert places['2056ad56-cea9-4536-9f2d-58765a38829c']['name'] == 'Finnvox'
40 | 
41 |     def test_fetch_multiple_places_redirect(self, engine):
42 |         places = mb_place.fetch_multiple_places(
43 |             ['4352063b-a833-421b-a420-e7fb295dece0', 'b1690ae6-5a37-46d7-99ae-b7e2d790485f']
44 |         )
45 |         assert len(places) == 2
46 |         assert places['b1690ae6-5a37-46d7-99ae-b7e2d790485f']['mbid'] == 'bea135c0-a32e-49be-85fd-9234c73fa0a8'
47 |         assert places['b1690ae6-5a37-46d7-99ae-b7e2d790485f']['name'] == 'Berliner Philharmonie'
48 | 
49 |     def test_fetch_multiple_places_empty(self, engine):
50 |         places = mb_place.fetch_multiple_places(
51 |             ['bea135c0-a32e-49be-85fd-9234c73fa0a8', 'bea135c0-3333-3333-3333-9234c73fa0a8'],
52 |             includes=['artist-rels', 'place-rels', 'url-rels']
53 |         )
54 |         assert list(places.keys()) == ['bea135c0-a32e-49be-85fd-9234c73fa0a8']
55 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/tests/test_recording.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from brainzutils.musicbrainz_db import recording as mb_recording
  4 | 
  5 | 
  6 | @pytest.mark.database
  7 | class TestRecording:
  8 | 
  9 |     def test_get_recording_by_mbid(self, engine):
 10 |         """ Tests if appropriate recording is returned for a given MBID. """
 11 |         self.maxDiff = None
 12 |         recording = mb_recording.get_recording_by_mbid('daccb724-8023-432a-854c-e0accb6c8678', includes=['artists'])
 13 | 
 14 |         assert recording == {
 15 |             'mbid': 'daccb724-8023-432a-854c-e0accb6c8678',
 16 |             'name': 'Numb / Encore',
 17 |             'comment': 'explicit',
 18 |             'length': 205.28,
 19 |             'rating': 78,
 20 |             'artist-credit-phrase': 'Jay‐Z / Linkin Park',
 21 |             'artists': [
 22 |                 {
 23 |                     'mbid': 'f82bcf78-5b69-4622-a5ef-73800768d9ac',
 24 |                     'name': 'JAY‐Z',
 25 |                     'credited_name': 'Jay‐Z',
 26 |                     'join_phrase': ' / ',
 27 |                 },
 28 |                 {
 29 |                     'mbid': 'f59c5520-5f46-4d2c-b2c4-822eabf53419',
 30 |                     'name': 'Linkin Park'
 31 |                 }
 32 |             ]
 33 |         }
 34 | 
 35 |     def test_get_recording_by_mbid_redirect(self, engine):
 36 |         recording = mb_recording.get_recording_by_mbid('e00d4dce-097e-4098-bbb3-77db884566f3')
 37 |         assert recording == {
 38 |             'mbid': 'fbe3d0b9-3990-4a76-bddb-12f4a0447a2c',
 39 |             'name': 'The Perfect Drug (Nine Inch Nails)',
 40 |             'length': 499,
 41 |             'rating': 60,
 42 |         }
 43 | 
 44 |     def test_fetch_multiple_recordings(self, engine):
 45 |         """ Tests if appropriate recordings are returned for a given list of MBIDs. """
 46 |         self.maxDiff = None
 47 | 
 48 |         mbids = ['daccb724-8023-432a-854c-e0accb6c8678', 'ae83579c-5f33-4a35-83f3-89206c44a426']
 49 |         recordings = mb_recording.fetch_multiple_recordings(mbids, includes=['artists'])
 50 | 
 51 |         assert recordings == {
 52 |             'daccb724-8023-432a-854c-e0accb6c8678': {
 53 |                 'mbid': 'daccb724-8023-432a-854c-e0accb6c8678',
 54 |                 'name': 'Numb / Encore',
 55 |                 'comment': 'explicit',
 56 |                 'length': 205.28,
 57 |                 'rating': 78,
 58 |                 'artist-credit-phrase': 'Jay‐Z / Linkin Park',
 59 |                 'artists': [
 60 |                     {
 61 |                         'mbid': 'f82bcf78-5b69-4622-a5ef-73800768d9ac',
 62 |                         'name': 'JAY‐Z',
 63 |                         'credited_name': 'Jay‐Z',
 64 |                         'join_phrase': ' / ',
 65 |                     },
 66 |                     {
 67 |                         'mbid': 'f59c5520-5f46-4d2c-b2c4-822eabf53419',
 68 |                         'name': 'Linkin Park'
 69 |                     }
 70 |                 ]
 71 |             },
 72 |             'ae83579c-5f33-4a35-83f3-89206c44a426': {
 73 |                 'mbid': 'ae83579c-5f33-4a35-83f3-89206c44a426',
 74 |                 'name': "I'm a Stranger Here Myself",
 75 |                 'length': 344.0,
 76 |                 'artist-credit-phrase': 'Charlie Byrd & The Washington Guitar Quintet',
 77 |                 'artists': [
 78 |                     {
 79 |                         'mbid': '9d99c378-247c-47a3-94ea-753efa330023',
 80 |                         'name': 'Charlie Byrd',
 81 |                         'join_phrase': ' & '
 82 |                     },
 83 |                     {
 84 |                         'mbid': 'c805fb7e-c8ff-49e0-b74f-61d638444fad',
 85 |                         'name': 'The Washington Guitar Quintet'
 86 |                     }
 87 |                 ]
 88 |             }
 89 |         }
 90 | 
 91 |     def test_fetch_multiple_recordings_redirect(self, engine):
 92 |         recordings = mb_recording.fetch_multiple_recordings([
 93 |             'e00d4dce-097e-4098-bbb3-77db884566f3'
 94 |         ])
 95 |         assert recordings == {
 96 |             'e00d4dce-097e-4098-bbb3-77db884566f3': {
 97 |                 'mbid': 'fbe3d0b9-3990-4a76-bddb-12f4a0447a2c',
 98 |                 'name': 'The Perfect Drug (Nine Inch Nails)',
 99 |                 'length': 499,
100 |                 'rating': 60,
101 |             }
102 |         }
103 | 
104 |     def test_fetch_multiple_recordings_missing(self, engine):
105 |         """ Tests if appropriate recordings are returned for a given list of MBIDs. """
106 | 
107 |         recordings = mb_recording.fetch_multiple_recordings(
108 |             ['e00d4dce-097e-4098-bbb3-77db884566f3', 'e00d4dce-0000-0000-0000-77db884566f3']
109 |         )
110 | 
111 |         assert list(recordings.keys()) == ['e00d4dce-097e-4098-bbb3-77db884566f3']
112 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/tests/test_release.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from brainzutils.musicbrainz_db import release as mb_release
  4 | 
  5 | 
  6 | @pytest.mark.database
  7 | class TestRelease:
  8 | 
  9 |     def test_get_release_by_mbid(self, engine):
 10 |         release = mb_release.get_release_by_mbid('fed37cfc-2a6d-4569-9ac0-501a7c7598eb',
 11 |                                                  includes=['media', 'release-groups'])
 12 |         assert release["name"] == "Master of Puppets"
 13 |         assert len(release["medium-list"][0]["track-list"]) == 8
 14 |         assert release["medium-list"][0]["track-list"] == [
 15 |             {
 16 |                 'mbid': '58c97804-bd98-3bc6-b8c7-5234db05bc2e',
 17 |                 'name': 'Battery',
 18 |                 'number': '1',
 19 |                 'position': 1,
 20 |                 'length': 312373,
 21 |                 'recording_id': '3bfda26a-49fa-4bc4-a4d6-8bbfa0767ab7',
 22 |                 'recording_title': 'Battery',
 23 |                 'artist-credit': [
 24 |                     {
 25 |                         'name': 'Metallica',
 26 |                         'artist': {
 27 |                             'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab',
 28 |                             'name': 'Metallica',
 29 |                             'sort_name': 'Metallica',
 30 |                             'life-span': {'begin': '1981-10-28'},
 31 |                             'type': 'Group',
 32 |                         }
 33 |                     }
 34 |                 ],
 35 |                 'artist-credit-phrase': 'Metallica'
 36 |             },
 37 |             {
 38 |                 'mbid': '51b179fa-8e72-383b-9549-0ae9a6dd9cfb',
 39 |                 'name': 'Master of Puppets',
 40 |                 'number': '2',
 41 |                 'position': 2,
 42 |                 'length': 515226,
 43 |                 'recording_id': '0151d8a4-50c8-4036-b824-4a4f4b140e8e',
 44 |                 'recording_title': 'Master of Puppets',
 45 |                 'artist-credit': [
 46 |                     {
 47 |                         'name': 'Metallica',
 48 |                         'artist': {
 49 |                             'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab',
 50 |                             'name': 'Metallica',
 51 |                             'sort_name': 'Metallica',
 52 |                             'life-span': {'begin': '1981-10-28'},
 53 |                             'type': 'Group',
 54 |                         }
 55 |                     }
 56 |                 ],
 57 |                 'artist-credit-phrase': 'Metallica'
 58 |             },
 59 |             {
 60 |                 'mbid': '052e25d8-373e-3a5a-bced-bd47eb209dc5',
 61 |                 'name': 'The Thing That Should Not Be',
 62 |                 'number': '3',
 63 |                 'position': 3,
 64 |                 'length': 396200,
 65 |                 'recording_id': 'f5267fe1-5cb6-47f7-8df2-e6e8f09fa7ad',
 66 |                 'recording_title': 'The Thing That Should Not Be',
 67 |                 'artist-credit': [
 68 |                     {
 69 |                         'name': 'Metallica',
 70 |                         'artist': {
 71 |                             'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab',
 72 |                             'name': 'Metallica',
 73 |                             'sort_name': 'Metallica',
 74 |                             'life-span': {'begin': '1981-10-28'},
 75 |                             'type': 'Group',
 76 |                         }
 77 |                     }
 78 |                 ],
 79 |                 'artist-credit-phrase': 'Metallica'},
 80 |             {
 81 |                 'mbid': '00367246-d956-3a44-af4b-bc3cfd34ec49',
 82 |                 'name': 'Welcome Home (Sanitarium)',
 83 |                 'number': '4',
 84 |                 'position': 4,
 85 |                 'length': 386866,
 86 |                 'recording_id': 'a20860e9-7636-422b-a9cd-2da671b242a8',
 87 |                 'recording_title': 'Welcome Home (Sanitarium)',
 88 |                 'artist-credit': [
 89 |                     {
 90 |                         'name': 'Metallica',
 91 |                         'artist': {
 92 |                             'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab',
 93 |                             'name': 'Metallica',
 94 |                             'sort_name': 'Metallica',
 95 |                             'life-span': {'begin': '1981-10-28'},
 96 |                             'type': 'Group',
 97 |                         }
 98 |                     }
 99 |                 ],
100 |                 'artist-credit-phrase': 'Metallica'
101 |             },
102 |             {
103 |                 'mbid': '77fac948-8223-3077-a25e-50d9512142f0',
104 |                 'name': 'Disposable Heroes',
105 |                 'number': '5',
106 |                 'position': 5,
107 |                 'length': 496640,
108 |                 'recording_id': '93ae3251-d9b5-46ee-9849-7b16d5e57d8b',
109 |                 'recording_title': 'Disposable Heroes',
110 |                 'artist-credit': [
111 |                     {
112 |                         'name': 'Metallica',
113 |                         'artist': {
114 |                             'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab',
115 |                             'name': 'Metallica',
116 |                             'sort_name': 'Metallica',
117 |                             'life-span': {'begin': '1981-10-28'},
118 |                             'type': 'Group',
119 |                         }
120 |                     }
121 |                 ],
122 |                 'artist-credit-phrase': 'Metallica'},
123 |             {
124 |                 'mbid': '7f97a9e0-89ec-37ed-a3d7-5a7390ffa43b',
125 |                 'name': 'Leper Messiah',
126 |                 'number': '6',
127 |                 'position': 6,
128 |                 'length': 339866,
129 |                 'recording_id': '2d9a5b40-f5e6-4499-ab7a-567fe3b42ab9',
130 |                 'recording_title': 'Leper Messiah',
131 |                 'artist-credit': [
132 |                     {
133 |                         'name': 'Metallica',
134 |                         'artist': {
135 |                             'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab',
136 |                             'name': 'Metallica',
137 |                             'sort_name': 'Metallica',
138 |                             'life-span': {'begin': '1981-10-28'},
139 |                             'type': 'Group',
140 |                         }
141 |                     }
142 |                 ],
143 |                 'artist-credit-phrase': 'Metallica'
144 |             },
145 |             {
146 |                 'mbid': 'b7e772d3-3a9b-32ad-8e5c-e8c079d5e4f4',
147 |                 'name': 'Orion',
148 |                 'number': '7',
149 |                 'position': 7,
150 |                 'length': 507426,
151 |                 'recording_id': 'b6cbe414-8b21-4600-8588-f6a80fd7043a',
152 |                 'recording_title': 'Orion',
153 |                 'artist-credit': [
154 |                     {
155 |                         'name': 'Metallica',
156 |                         'artist': {
157 |                             'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab',
158 |                             'name': 'Metallica',
159 |                             'sort_name': 'Metallica',
160 |                             'life-span': {'begin': '1981-10-28'},
161 |                             'type': 'Group',
162 |                         }
163 |                     }
164 |                 ],
165 |                 'artist-credit-phrase': 'Metallica'
166 |             },
167 |             {
168 |                 'mbid': '0949ef68-edef-39a1-a3a0-dc666920f629',
169 |                 'name': 'Damage, Inc.',
170 |                 'number': '8',
171 |                 'position': 8,
172 |                 'length': 330933,
173 |                 'recording_id': '01ea1189-e0d2-48a0-9dc2-c615785a5ae0',
174 |                 'recording_title': 'Damage, Inc.',
175 |                 'artist-credit': [
176 |                     {
177 |                         'name': 'Metallica',
178 |                         'artist': {
179 |                             'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab',
180 |                             'name': 'Metallica',
181 |                             'sort_name': 'Metallica',
182 |                             'life-span': {'begin': '1981-10-28'},
183 |                             'type': 'Group',
184 |                         }
185 |                     }
186 |                 ],
187 |                 'artist-credit-phrase': 'Metallica'
188 |             }
189 |         ]
190 | 
191 |     def test_get_release_by_mbid_redirect(self, engine):
192 |         release = mb_release.get_release_by_mbid('fb2031ae-4e2a-4d2c-9819-32568f9e5e17')
193 |         assert release == {
194 |             'mbid': 'a6949d8e-c1eb-4eee-a670-680d28dd80e6',
195 |             'name': 'The College Dropout'
196 |         }
197 | 
198 |     def test_fetch_multiple_releases(self, engine):
199 |         releases = mb_release.fetch_multiple_releases(
200 |             mbids=['e327da6d-717b-4eb3-b396-bbce6b9466bc', 'b1bb026c-e813-407f-ba7b-db7466cdc56c'],
201 |         )
202 |         assert len(releases) == 2
203 |         assert releases['e327da6d-717b-4eb3-b396-bbce6b9466bc']['name'] == 'Without a Sound'
204 |         assert releases['b1bb026c-e813-407f-ba7b-db7466cdc56c']['name'] == 'War All the Time'
205 | 
206 |     def test_fetch_multiple_releases_redirect(self, engine):
207 |         releases = mb_release.fetch_multiple_releases(
208 |             mbids=['fb2031ae-4e2a-4d2c-9819-32568f9e5e17'],
209 |         )
210 |         assert releases == {
211 |             'fb2031ae-4e2a-4d2c-9819-32568f9e5e17': {
212 |                 'mbid': 'a6949d8e-c1eb-4eee-a670-680d28dd80e6',
213 |                 'name': 'The College Dropout'
214 |             }
215 |         }
216 | 
217 |     def test_fetch_multiple_releases_missing(self, engine):
218 |         releases = mb_release.fetch_multiple_releases(
219 |             mbids=['a6949d8e-c1eb-4eee-a670-680d28dd80e6', 'a6949d8e-cccc-cccc-cccc-680d28dd80e6'],
220 |         )
221 |         assert list(releases.keys()) == ['a6949d8e-c1eb-4eee-a670-680d28dd80e6']
222 | 
223 |     def test_get_releases_using_recording_mbid(self, engine):
224 |         """Tests if releases are fetched correctly for a given recording MBID"""
225 |         self.maxDiff = None
226 | 
227 |         releases = mb_release.get_releases_using_recording_mbid('5465ca86-3881-4349-81b2-6efbd3a59451')
228 | 
229 |         assert releases == [
230 |             {'mbid': 'cb48685f-beea-4ca6-93f3-49ef4d8cbf28', 'name': 'The Blueprint²: The Gift & The Curse'},
231 |             {'mbid': '4c9bd72b-dae9-44bf-a052-9b2f6c0d50de', 'name': 'Back to Bey-Sic', 'comment': 'deluxe edition'},
232 |             {'mbid': '89f64145-2f75-41d1-831a-517b785ed75a', 'name': 'The Blueprint Collector’s Edition'},
233 |             {'mbid': 'f1183a86-36d2-4f1f-ab8f-6f965dc0b033', 'name': 'The Hits Collection Volume One'},
234 |             {'mbid': '7c77ca4d-d84b-4b67-8705-e6afe9eb5878', 'name': 'The Blueprint²: The Gift & The Curse', 'comment': 'MQA, explicit'},
235 |             {'mbid': '77a74b85-0ae0-338f-aaca-4f36cd394f88', 'name': 'Blueprint 2.1'},
236 |             {'mbid': 'cb180855-979d-4d5d-9024-3bc97c64d19c', 'name': 'The Blueprint²: The Gift & The Curse', 'comment': 'explicit'},
237 |             {'mbid': 'b207b569-6323-4426-801b-3d5dbaf28d49', 'name': 'The Blueprint²: The Gift & The Curse', 'comment': 'explicit'},
238 |             {'mbid': '7111c8bc-8549-4abc-8ab9-db13f65b4a55', 'name': 'Blueprint 2.1'},
239 |             {'mbid': '3c535d03-2fcc-467a-8d47-34b3250b8211', 'name': 'The Hits Collection Volume One', 'comment': 'explicit'},
240 |             {'mbid': 'c84d8fa8-6f8d-42c9-87cc-b726e859b41d', 'name': 'The Hits Collection Volume One', 'comment': 'edited version'},
241 |             {'mbid': '8d51f750-7ee9-4937-8907-0243efc2f6df', 'name': 'The Blueprint²: The Gift & The Curse', 'comment': 'explicit'},
242 |             {'mbid': '4f41108c-db36-4616-8614-f504fdef287a', 'name': 'Blueprint 2.1'},
243 |             {'mbid': 'b0075ce9-58c8-47e2-8a72-5f783314a97e', 'name': 'The Hits Collection Volume One', 'comment': 'explicit'},
244 |             {'mbid': '4a441628-2e4d-4032-825f-6bdf4aee382e', 'name': 'The Hits Collection, Volume 1'},
245 |             {'mbid': '5e782ae3-602b-48b7-99be-de6bcffa4aba', 'name': 'The Hits Collection, Volume 1', 'comment': 'Deluxe edition'},
246 |             {'mbid': '7ebaaa95-e316-3b20-8819-7e4ca648c135', 'name': 'The Hits Collection, Volume 1'},
247 |             {'mbid': '240f52cd-9120-452d-98de-8df087e389e8', 'name': 'The Real Best of Both Worlds'}
248 |         ]
249 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/tests/test_release_group.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from brainzutils.musicbrainz_db import release_group as mb_release_group
  4 | 
  5 | 
  6 | @pytest.mark.database
  7 | class TestReleaseGroup:
  8 | 
  9 |     def test_get_release_group_by_mbid(self, engine):
 10 |         release_group = mb_release_group.get_release_group_by_mbid('0f18ec88-aa87-38a9-8a65-f03d81763560',
 11 |                                                                    includes=['artists', 'releases',
 12 |                                                                              'release-group-rels', 'url-rels', 'tags'])
 13 | 
 14 |         assert release_group['mbid'] == '0f18ec88-aa87-38a9-8a65-f03d81763560'
 15 |         assert release_group['title'] == 'Led Zeppelin'
 16 |         # Check if multiple artists are properly fetched
 17 |         assert release_group['artist-credit-phrase'] == 'Led Zeppelin'
 18 |         assert release_group['artist-credit'][0] == {
 19 |             'name': 'Led Zeppelin',
 20 |             'artist': {
 21 |                 'mbid': '678d88b2-87b0-403b-b63d-5da7465aecc3',
 22 |                 'name': 'Led Zeppelin',
 23 |                 'sort_name': 'Led Zeppelin',
 24 |                 'life-span': {'begin': '1968', 'end': '1980-09-25'},
 25 |                 'type': 'Group',
 26 |             }
 27 |         }
 28 | 
 29 |     def test_get_release_group_by_mbid_redirect(self, engine):
 30 |         release_group = mb_release_group.get_release_group_by_mbid('358bbed4-1717-3e1c-ba8e-af54d2d3a5d6')
 31 |         assert release_group == {
 32 |             'mbid': '8a01217e-6947-3927-a39b-6691104694f1',
 33 |             'title': 'The College Dropout',
 34 |             'first-release-year': 2003,
 35 |             'type': 'Album',
 36 |             'rating': 88,
 37 |         }
 38 | 
 39 |     def test_fetch_release_groups(self, engine):
 40 |         release_groups = mb_release_group.fetch_multiple_release_groups(
 41 |             mbids=['0f18ec88-aa87-38a9-8a65-f03d81763560', '1b36a363-eec6-35ba-b0ed-34a1f2f2cd82'],
 42 |         )
 43 |         assert len(release_groups) == 2
 44 |         assert release_groups['0f18ec88-aa87-38a9-8a65-f03d81763560']['title'] == 'Led Zeppelin'
 45 |         assert release_groups['1b36a363-eec6-35ba-b0ed-34a1f2f2cd82']['title'] == 'Cosmic Thing'
 46 | 
 47 |     def test_fetch_release_groups_redirect(self, engine):
 48 |         release_groups = mb_release_group.fetch_multiple_release_groups(
 49 |             mbids=['358bbed4-1717-3e1c-ba8e-af54d2d3a5d6'],
 50 |         )
 51 |         assert release_groups == {
 52 |             '358bbed4-1717-3e1c-ba8e-af54d2d3a5d6': {
 53 |                 'mbid': '8a01217e-6947-3927-a39b-6691104694f1',
 54 |                 'title': 'The College Dropout',
 55 |                 'first-release-year': 2003,
 56 |                 'type': 'Album',
 57 |                 'rating': 88,
 58 |             }
 59 |         }
 60 | 
 61 |     def test_fetch_release_groups_missing(self, engine):
 62 |         release_groups = mb_release_group.fetch_multiple_release_groups(
 63 |             mbids=['358bbed4-1717-3e1c-ba8e-af54d2d3a5d6', '358bbed4-1111-1111-1111-af54d2d3a5d6'],
 64 |         )
 65 |         assert list(release_groups.keys()) == ['358bbed4-1717-3e1c-ba8e-af54d2d3a5d6']
 66 | 
 67 |     def test_fetch_get_release_groups_for_artist(self, engine):
 68 |         release_groups = mb_release_group.get_release_groups_for_artist(
 69 |             artist_id='074e3847-f67f-49f9-81f1-8c8cea147e8e',
 70 |             release_types=['Single', 'EP'],
 71 |         )
 72 |         assert release_groups[0] == [
 73 |             {
 74 |                 'mbid': '07f5e633-8846-3fe7-8e68-472b54dba159',
 75 |                 'title': 'This Is What the Edge of Your Seat Was Made For',
 76 |                 'first-release-year': 2004,
 77 |                 'type': 'EP',
 78 |             }
 79 |         ]
 80 |         assert release_groups[1] == 1
 81 | 
 82 |     def test_fetch_get_release_groups_for_label(self, engine):
 83 |         release_groups = mb_release_group.get_release_groups_for_label(
 84 |             label_mbid='4cccc72a-0bd0-433a-905e-dad87871397d',
 85 |             release_types=['Album'],
 86 |         )
 87 |         assert release_groups[0][0] == {
 88 |             'mbid': 'a96597aa-93b4-4e14-9e6e-03892ab24979',
 89 |             'title': 'Watch the Throne',
 90 |             'first-release-year': 2011,
 91 |             'type': 'Album',
 92 |         }
 93 | 
 94 |         assert release_groups[1] == 19
 95 |         assert len(release_groups[0]) == 19
 96 | 
 97 |         # Test release group with null type
 98 |         release_groups_1 = mb_release_group.get_release_groups_for_label(
 99 |             label_mbid='d835e36a-78ee-48ba-ac04-b46fb37df41f',
100 |             release_types=['Other'],
101 |         )
102 |         assert release_groups_1[0][0] == {
103 |             'mbid': '39d08e6e-b877-4c64-aef9-ce79a19f6075',
104 |             'title': 'American Songbook: The American Music Collection, Vol. III',
105 |             'first-release-year': 1996,
106 |         }
107 | 
108 |         assert release_groups_1[1] == 2
109 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/tests/test_serialize.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, date
 2 | 
 3 | from brainzutils.musicbrainz_db.serialize import serialize_recording, serialize_artist_credit, serialize_editor
 4 | from brainzutils.musicbrainz_db.test_data import recording_numb_encore_explicit, artistcredit_jay_z_linkin_park, \
 5 |     editor_2
 6 | from unittest import TestCase
 7 | 
 8 | 
 9 | class SerializeTestCase(TestCase):
10 |     def test_serialize_recording(self):
11 |         """Tests that recordings are serialized properly."""
12 |         # Without any includes
13 |         recording = serialize_recording(recording_numb_encore_explicit)
14 |         self.assertDictEqual(recording,
15 |                              {
16 |                                  'length': 205.28,
17 |                                  'mbid': 'daccb724-8023-432a-854c-e0accb6c8678',
18 |                                  'name': 'Numb/Encore (explicit)',
19 |                              }
20 |                              )
21 | 
22 |         # With artists included
23 |         artists = recording_numb_encore_explicit.artist_credit.artists
24 |         recording = serialize_recording(
25 |             recording_numb_encore_explicit,
26 |             includes={'artists': artists, 'artist-credit-phrase': 'Jay-Z/Linkin Park'}
27 |         )
28 |         self.assertDictEqual(recording,
29 |                              {
30 |                                  'mbid': 'daccb724-8023-432a-854c-e0accb6c8678',
31 |                                  'name': 'Numb/Encore (explicit)',
32 |                                  'length': 205.28,
33 |                                  'artist-credit-phrase': 'Jay-Z/Linkin Park',
34 |                                  'artists': [
35 |                                      {
36 |                                          'mbid': 'f82bcf78-5b69-4622-a5ef-73800768d9ac',
37 |                                          'name': 'JAY Z',
38 |                                          'credited_name': 'Jay-Z',
39 |                                          'join_phrase': '/'
40 |                                      },
41 |                                      {
42 |                                          'mbid': 'f59c5520-5f46-4d2c-b2c4-822eabf53419',
43 |                                          'name': 'Linkin Park'
44 |                                      }
45 |                                  ]
46 |                              }
47 |                              )
48 | 
49 |     def test_serialize_artist_credits(self):
50 |         """Test that artist_credits are serialized properly."""
51 |         artist_credits = serialize_artist_credit(artistcredit_jay_z_linkin_park)
52 |         self.assertListEqual(artist_credits,
53 |                              [
54 |                                  {
55 |                                      'mbid': 'f82bcf78-5b69-4622-a5ef-73800768d9ac',
56 |                                      'name': 'JAY Z',
57 |                                      'credited_name': 'Jay-Z',
58 |                                      'join_phrase': '/'
59 |                                  },
60 |                                  {
61 |                                      'mbid': 'f59c5520-5f46-4d2c-b2c4-822eabf53419',
62 |                                      'name': 'Linkin Park'
63 |                                  }
64 |                              ]
65 |                              )
66 | 
67 |     def test_serialize_editor(self):
68 |         """Test that sensitive information is removed, everything else is covered in test_editor."""
69 |         editor = serialize_editor(editor_2)
70 |         self.assertNotIn("password", editor)
71 |         self.assertNotIn("ha1", editor)
72 |         self.assertEqual(editor, {
73 |             'id': 2324,
74 |             'name': 'Editor 2',
75 |             'privs': 3,
76 |             'email': 'editor@example.com',
77 |             'website': 'example.com',
78 |             'bio': 'Random\neditor',
79 |             'member_since': datetime(2014, 12, 1, 14, 6, 42, 321443),
80 |             'email_confirm_date': datetime(2014, 12, 1, 14, 6, 42, 321443),
81 |             'last_login_date': datetime(2014, 12, 1, 14, 6, 42, 321443),
82 |             'last_updated': datetime(2014, 12, 1, 14, 6, 42, 321443),
83 |             'birth_date': date(1999, 1, 1),
84 |             'deleted': False,
85 |             'gender': None,
86 |             'area': {
87 |                 "mbid": "4479c385-74d8-4a2b-bdab-f48d1e6969ba",
88 |                 "name": "Hämeenlinna"
89 |             }
90 |         })
91 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/tests/test_work.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from brainzutils.musicbrainz_db import work as mb_work
 4 | 
 5 | 
 6 | @pytest.mark.database
 7 | class TestWork:
 8 |     def test_get_work_by_mbid(self, engine):
 9 |         work = mb_work.get_work_by_mbid('d35f8fb8-52ab-4a12-b1c8-f2054d10cf88')
10 |         assert work == {
11 |             "mbid": "d35f8fb8-52ab-4a12-b1c8-f2054d10cf88",
12 |             "name": "Apple Bush",
13 |             "type": "Song",
14 |         }
15 | 
16 |     def test_get_work_by_mbid_redirect(self, engine):
17 |         work = mb_work.get_work_by_mbid('4531bed5-073c-37a8-9500-70de8583c0a1')
18 |         assert work == {
19 |             "mbid": "36e33f94-ef5f-36b5-97b0-c1ed9c5a542f",
20 |             "name": "Jesus Walks",
21 |             "type": "Song",
22 |         }
23 | 
24 |     def test_get_work_by_mbid_with_includes(self, engine):
25 |         work = mb_work.get_work_by_mbid('4531bed5-073c-37a8-9500-70de8583c0a1',
26 |             includes=['artist-rels', 'recording-rels'])
27 |         assert work["mbid"] == "36e33f94-ef5f-36b5-97b0-c1ed9c5a542f"
28 |         assert len(work["artist-rels"]) == 4
29 |         assert len(work["recording-rels"]) == 55
30 | 
31 |     def test_fetch_multiple_works(self, engine):
32 |         works = mb_work.fetch_multiple_works([
33 |             'd35f8fb8-52ab-4a12-b1c8-f2054d10cf88',
34 |             '1deb7377-f980-4adb-8f0f-a36355461f38'
35 |         ])
36 |         assert works["d35f8fb8-52ab-4a12-b1c8-f2054d10cf88"] == {
37 |             "mbid": "d35f8fb8-52ab-4a12-b1c8-f2054d10cf88",
38 |             "name": "Apple Bush",
39 |             "type": "Song",
40 |         }
41 |         assert works["1deb7377-f980-4adb-8f0f-a36355461f38"] == {
42 |             "mbid": "1deb7377-f980-4adb-8f0f-a36355461f38",
43 |             "name": "Fields of Regret",
44 |             "type": "Song",
45 |         }
46 | 
47 |     def test_fetch_multiple_works_redirect(self, engine):
48 |         works = mb_work.fetch_multiple_works([
49 |             '4531bed5-073c-37a8-9500-70de8583c0a1',
50 |         ])
51 |         assert works == {
52 |             '4531bed5-073c-37a8-9500-70de8583c0a1': {
53 |                 "mbid": "36e33f94-ef5f-36b5-97b0-c1ed9c5a542f",
54 |                 "name": "Jesus Walks",
55 |                 "type": "Song",
56 |             }
57 |         }
58 | 
59 |     def test_fetch_multiple_works_missing(self, engine):
60 |         works = mb_work.fetch_multiple_works([
61 |             '36e33f94-ef5f-36b5-97b0-c1ed9c5a542f',
62 |             '36e33f94-eeee-eeee-eeee-c1ed9c5a542f'
63 |         ])
64 |         assert list(works.keys()) == ['36e33f94-ef5f-36b5-97b0-c1ed9c5a542f']
65 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/utils.py:
--------------------------------------------------------------------------------
 1 | from brainzutils.musicbrainz_db.models import ENTITY_MODELS, META_MODELS, REDIRECT_MODELS
 2 | import brainzutils.musicbrainz_db.exceptions as mb_exceptions
 3 | 
 4 | 
 5 | def get_entities_by_gids(query, entity_type, mbids):
 6 |     """Get entities using their MBIDs.
 7 | 
 8 |     An entity can have multiple MBIDs. This function may be passed another
 9 |     MBID of an entity, in which case, it is redirected to the original entity.
10 | 
11 |     Note that the query may be modified before passing it to this
12 |     function in order to save queries made to the database.
13 | 
14 |     Args:
15 |         query (Query): SQLAlchemy Query object.
16 |         entity_type (str): Type of entity being queried.
17 |         mbids (list): IDs of the target entities.
18 | 
19 |     Returns:
20 |         Dictionary of objects of target entities keyed by their MBID.
21 |     """
22 |     entity_model = ENTITY_MODELS[entity_type]
23 |     if entity_type in META_MODELS:
24 |         meta_model = META_MODELS[entity_type]
25 |         query = query.add_entity(meta_model).join(meta_model)
26 | 
27 |     results = query.filter(entity_model.gid.in_(mbids)).all()
28 |     entity_gids = set()
29 |     entities = {}
30 |     if entity_type in META_MODELS:
31 |         for entity, entity_meta in results:
32 |             entities[entity.gid] = entity
33 |             entities[entity.gid].rating = entity_meta.rating
34 |             entity_gids.add(entity.gid)
35 |     else:
36 |         entities = {str(entity.gid): entity for entity in results}
37 |         entity_gids = {entity.gid for entity in results}
38 | 
39 |     remaining_gids = list(set(mbids) - entity_gids)
40 |     if remaining_gids:
41 |         redirect_model = REDIRECT_MODELS[entity_type]
42 |         query = query.add_entity(redirect_model).join(redirect_model)
43 |         results = query.filter(redirect_model.gid.in_(remaining_gids))
44 | 
45 |         redirect_gids = set()
46 |         if entity_type in META_MODELS:
47 |             for entity, entity_meta, redirect_obj in results:
48 |                 entities[redirect_obj.gid] = entity
49 |                 entities[redirect_obj.gid].rating = entity_meta.rating
50 |                 redirect_gids.add(redirect_obj.gid)
51 |         else:
52 |             for entity, redirect_obj in results:
53 |                 entities[redirect_obj.gid] = entity
54 |                 redirect_gids.add(redirect_obj.gid)
55 | 
56 |     return entities
57 | 
58 | 
59 | def get_entities_by_ids(query, entity_type, ids):
60 |     """Get entities using their IDs.
61 | 
62 |     Note that the query may be modified before passing it to this
63 |     function in order to save queries made to the database.
64 | 
65 |     Args:
66 |         query (Query): SQLAlchemy Query object.
67 |         entity_type (str): Type of entity being queried.
68 |         ids (list): IDs of the target entities.
69 | 
70 |     Returns:
71 |         Dictionary of objects of target entities keyed by their ID.
72 |     """
73 |     entity_model = ENTITY_MODELS[entity_type]
74 |     results = query.filter(entity_model.id.in_(ids)).all()
75 |     entities = {entity.id: entity for entity in results}
76 | 
77 |     return entities
78 | 


--------------------------------------------------------------------------------
/brainzutils/musicbrainz_db/work.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from mbdata import models
 3 | from sqlalchemy.orm import joinedload
 4 | from brainzutils.musicbrainz_db import mb_session
 5 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids
 6 | from brainzutils.musicbrainz_db.includes import check_includes
 7 | from brainzutils.musicbrainz_db.serialize import serialize_works
 8 | from brainzutils.musicbrainz_db.helpers import get_relationship_info
 9 | 
10 | 
11 | def get_work_by_mbid(mbid, includes=None):
12 |     """Get work with the MusicBrainz ID.
13 | 
14 |     Args:
15 |         mbid (uuid): MBID(gid) of the work.
16 |     Returns:
17 |         Dictionary containing the work information, or None if the work doesn't exist.
18 |     """
19 |     if includes is None:
20 |         includes = []
21 | 
22 |     return fetch_multiple_works(
23 |         [mbid],
24 |         includes=includes,
25 |     ).get(mbid)
26 | 
27 | 
28 | def fetch_multiple_works(mbids, includes=None):
29 |     """Get info related to multiple works using their MusicBrainz IDs.
30 | 
31 |     Args:
32 |         mbids (list): List of MBIDs of works.
33 |         includes (list): List of information to be included.
34 | 
35 |     Returns:
36 |         A dictionary containing info of multiple works keyed by their MBID.
37 |         If an MBID doesn't exist in the database, it isn't returned.
38 |         If an MBID is a redirect, the dictionary key will be the MBID given as an argument,
39 |          but the returned object will contain the new MBID in the 'mbid' key.
40 |     """
41 |     if includes is None:
42 |         includes = []
43 |     includes_data = defaultdict(dict)
44 |     check_includes('work', includes)
45 |     with mb_session() as db:
46 |         query = db.query(models.Work).options(joinedload(models.Work.type))
47 | 
48 |         works = get_entities_by_gids(
49 |             query=query,
50 |             entity_type='work',
51 |             mbids=mbids,
52 |         )
53 |         work_ids = [work.id for work in works.values()]
54 | 
55 |         if 'artist-rels' in includes:
56 |             get_relationship_info(
57 |                 db=db,
58 |                 target_type='artist',
59 |                 source_type='work',
60 |                 source_entity_ids=work_ids,
61 |                 includes_data=includes_data,
62 |             )
63 | 
64 |         if 'recording-rels' in includes:
65 |             get_relationship_info(
66 |                 db=db,
67 |                 target_type='recording',
68 |                 source_type='work',
69 |                 source_entity_ids=work_ids,
70 |                 includes_data=includes_data,
71 |             )
72 | 
73 |         return {str(mbid): serialize_works(work, includes_data[work.id]) for mbid, work in works.items()}
74 | 


--------------------------------------------------------------------------------
/brainzutils/ratelimit.py:
--------------------------------------------------------------------------------
  1 | # The original version of this code was written by Armin Ronacher:
  2 | #
  3 | # This snippet by Armin Ronacher can be used freely for anything you like. Consider it public domain.
  4 | #
  5 | # http://flask.pocoo.org/snippets/70/
  6 | #
  7 | import time
  8 | from functools import update_wrapper
  9 | 
 10 | from flask import request, g
 11 | from werkzeug.exceptions import TooManyRequests
 12 | 
 13 | from brainzutils import cache
 14 | 
 15 | # g key for the timeout when limits must be refreshed from cache
 16 | ratelimit_refresh = 60 # in seconds
 17 | ratelimit_timeout = "rate_limits_timeout"
 18 | 
 19 | # Defaults
 20 | ratelimit_per_token_default = 50
 21 | ratelimit_per_ip_default = 30
 22 | ratelimit_window_default = 10
 23 | 
 24 | # keys
 25 | ratelimit_per_token_key = "rate_limit_per_token_limit"
 26 | ratelimit_per_ip_key = "rate_limit_per_ip_limit"
 27 | ratelimit_window_key = "rate_limit_window"
 28 | ratelimit_cache_namespace = "rate_limit"
 29 | 
 30 | # external functions
 31 | ratelimit_user_validation = None
 32 | 
 33 | 
 34 | class RateLimit(object):
 35 |     """
 36 |         This Ratelimit object is created when a request is started (via the ratelimit decorator)
 37 |         and is stored in the flask's request context so that the results can be injected into
 38 |         the response headers before the request is over.
 39 | 
 40 |     HOW TO USE THIS MODULE:
 41 | 
 42 |     This module defines a set of function that allows your to add ratelimiting to your
 43 |     flask app. There are three values to know and set:
 44 | 
 45 |        per_token_limit - The number of requests that are allowed for a caller who is
 46 |             setting an::
 47 | 
 48 |                Authorization: Token <auth token>
 49 | 
 50 |             header. This limit can be different than the limit for rate limiting on an IP basis.
 51 | 
 52 |        per_ip_limit - The number of requests that are allowed for a caller who is not
 53 |             providing an Authorization header and is rate limited on their IP address.
 54 | 
 55 |        ratelimit_window - The window, in number of seconds, how long long the limits
 56 |             above are applied for.
 57 | 
 58 |     To add ratelimit capabilities to your flask app, follow these steps:
 59 | 
 60 |     1. During app creation add these lines::
 61 | 
 62 |           from brainzutils.ratelimit import ratelimit, inject_x_rate_headers
 63 | 
 64 |           @app.after_request
 65 |           def after_request_callbacks(response):
 66 |               return inject_x_rate_headers(response)
 67 | 
 68 |     2. Then apply the ratelimit() decorator to any function that should be rate limited::
 69 | 
 70 |          @app.route('/')
 71 |          @ratelimit()
 72 |          def index():
 73 |              return '<html><body>test</body></html>'
 74 | 
 75 |     3. The default rate limits are defined above (see comment Defaults). If you want to set different
 76 |        rate limits, which can be also done dynamically without restarting the application, call
 77 |        the set_rate_limits function::
 78 | 
 79 |           from brainzutils.ratelimit import set_rate_limits
 80 | 
 81 |           set_rate_limits(per_token_limit, per_ip_limit, rate_limit_window)
 82 | 
 83 |     4. To enable token based rate limiting, callers need to pass the Authorization header (see above)
 84 |        and the application needs to provide a user validation function::
 85 | 
 86 |           from brainzutils.ratelimit import set_user_validation_function
 87 | 
 88 |           def validate_user(user):
 89 |               if user == valid_user:
 90 |                   return True
 91 |               return False
 92 | 
 93 |          set_user_validation_function(validate_user)
 94 | 
 95 |     """
 96 | 
 97 |     # From the docs:
 98 |     # We also give the key extra expiration_window seconds time to expire in cache so that badly
 99 |     # synchronized clocks between the workers and the cache server do not cause problems.
100 |     expiration_window = 10
101 | 
102 |     def __init__(self, key_prefix, limit, per):
103 |         current_time = int(time.time())
104 |         self.reset = (current_time // per) * per + per
105 |         self.seconds_before_reset = self.reset - current_time
106 |         self.key = key_prefix + str(self.reset)
107 |         self.limit = limit
108 |         self.per = per
109 |         self.current = cache.increment(self.key, namespace=ratelimit_cache_namespace)
110 |         cache.expireat(self.key, self.reset + self.expiration_window, namespace=ratelimit_cache_namespace)
111 | 
112 |     remaining = property(lambda x: max(x.limit - x.current, 0))
113 |     over_limit = property(lambda x: x.current > x.limit)
114 | 
115 | 
116 | def set_user_validation_function(func):
117 |     '''
118 |         The function passed to this method should accept on argument, the Authorization header contents
119 |         and return a True/False value if this user is a valid user.
120 |     '''
121 | 
122 |     global ratelimit_user_validation
123 |     ratelimit_user_validation = func
124 | 
125 | 
126 | def set_rate_limits(per_token, per_ip, window):
127 |     '''
128 |         Update the current rate limits. This will affect all new rate limiting windows and existing windows will not be changed.
129 |     '''
130 |     cache.set(ratelimit_per_token_key, per_token, expirein=0, namespace=ratelimit_cache_namespace)
131 |     cache.set(ratelimit_per_ip_key, per_ip, expirein=0, namespace=ratelimit_cache_namespace)
132 |     cache.set(ratelimit_window_key, window, expirein=0, namespace=ratelimit_cache_namespace)
133 | 
134 | 
135 | def inject_x_rate_headers(response):
136 |     '''
137 |         Add rate limit headers to responses
138 |     '''
139 |     limit = get_view_rate_limit()
140 |     if limit:
141 |         h = response.headers
142 |         h.add('Access-Control-Expose-Headers', 'X-RateLimit-Remaining,X-RateLimit-Limit,X-RateLimit-Reset,X-RateLimit-Reset-In')
143 |         h.add('X-RateLimit-Remaining', str(limit.remaining))
144 |         h.add('X-RateLimit-Limit', str(limit.limit))
145 |         h.add('X-RateLimit-Reset', str(limit.reset))
146 |         h.add('X-RateLimit-Reset-In', str(limit.seconds_before_reset))
147 |     return response
148 | 
149 | 
150 | def get_view_rate_limit():
151 |     '''
152 |         Helper function to fetch the ratelimit limits from the flask context
153 |     '''
154 |     return getattr(g, '_view_rate_limit', None)
155 | 
156 | 
157 | def on_over_limit(limit):
158 |     ''' 
159 |         Set a nice and readable error message for over the limit requests.
160 |     '''
161 |     raise TooManyRequests(
162 |         'You have exceeded your rate limit. See the X-RateLimit-* response headers for more ' \
163 |         'information on your current rate limit.')
164 | 
165 | 
166 | def check_limit_freshness():
167 |     '''
168 |         This function checks to see if the values we have cached in the current request context
169 |         are still fresh enough. If they've existed longer than the timeout value, refresh from
170 |         the cache. This allows us to not check the limits for each request, saving cache traffic.
171 |     '''
172 | 
173 |     limits_timeout = getattr(g, '_' + ratelimit_timeout, 0)
174 |     if time.time() <= limits_timeout:
175 |         return
176 | 
177 |     value = int(cache.get(ratelimit_per_token_key, namespace=ratelimit_cache_namespace) or '0')
178 |     if not value:
179 |         cache.set(ratelimit_per_token_key, ratelimit_per_token_default, expirein=0, namespace=ratelimit_cache_namespace)
180 |         value = ratelimit_per_token_default
181 |     setattr(g, '_' + ratelimit_per_token_key, value)
182 | 
183 |     value = int(cache.get(ratelimit_per_ip_key, namespace=ratelimit_cache_namespace) or '0')
184 |     if not value:
185 |         cache.set(ratelimit_per_ip_key, ratelimit_per_ip_default, expirein=0, namespace=ratelimit_cache_namespace)
186 |         value = ratelimit_per_ip_default
187 |     setattr(g, '_' + ratelimit_per_ip_key, value)
188 | 
189 |     value = int(cache.get(ratelimit_window_key, namespace=ratelimit_cache_namespace) or '0')
190 |     if not value:
191 |         cache.set(ratelimit_window_key, ratelimit_window_default, expirein=0, namespace=ratelimit_cache_namespace)
192 |         value = ratelimit_window_default
193 |     setattr(g, '_' + ratelimit_window_key, value)
194 | 
195 |     setattr(g, '_' + ratelimit_timeout, int(time.time()) + ratelimit_refresh)
196 | 
197 | 
198 | def get_per_ip_limits():
199 |     '''
200 |         Fetch the per IP limits from context/cache
201 |     '''
202 |     check_limit_freshness()
203 |     return {
204 |             'limit':   getattr(g, '_' + ratelimit_per_ip_key),
205 |             'window' : getattr(g, '_' + ratelimit_window_key),
206 |            }
207 | 
208 | 
209 | def get_per_token_limits():
210 |     '''
211 |         Fetch the per token limits from context/cache
212 |     '''
213 |     check_limit_freshness()
214 |     return {
215 |             'limit':   getattr(g, '_' + ratelimit_per_token_key),
216 |             'window' : getattr(g, '_' + ratelimit_window_key),
217 |            }
218 | 
219 | 
220 | def get_rate_limit_data(request):
221 |     '''Fetch key for the given request. If an Authorization header is provided,
222 |        the caller will get a better and personalized rate limit. If no header is provided,
223 |        the caller will be rate limited by IP, which gets an overall lower rate limit.
224 |        This should encourage callers to always provide the Authorization token
225 |     '''
226 | 
227 |     # If a user verification function is provided, parse the Authorization header and try to look up that user
228 |     if ratelimit_user_validation:
229 |         auth_header = request.headers.get('Authorization')
230 |         if auth_header:
231 |             auth_token = auth_header[6:]
232 |             is_valid = ratelimit_user_validation(auth_token)
233 |             if is_valid:
234 |                 values = get_per_token_limits()
235 |                 values['key'] = auth_token
236 |                 return values
237 | 
238 | 
239 |     # no valid auth token provided. Look for a remote addr header provided a the proxy
240 |     # or if that isn't available use the IP address from the header
241 |     ip = request.environ.get('REMOTE_ADDR', None)
242 |     if not ip:
243 |         ip = request.remote_addr
244 | 
245 |     values = get_per_ip_limits()
246 |     values['key'] = ip
247 |     return values
248 | 
249 | 
250 | def ratelimit():
251 |     '''
252 |         This is the decorator that should be applied to all view functions that should be 
253 |         rate limited.
254 |     '''
255 |     def decorator(f):
256 |         def rate_limited(*args, **kwargs):
257 |             data = get_rate_limit_data(request)
258 |             rlimit = RateLimit(data['key'], data['limit'], data['window'])
259 |             g._view_rate_limit = rlimit
260 |             if rlimit.over_limit:
261 |                 return on_over_limit(rlimit)
262 |             return f(*args, **kwargs)
263 |         return update_wrapper(rate_limited, f)
264 |     return decorator
265 | 


--------------------------------------------------------------------------------
/brainzutils/sentry.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | import sentry_sdk
 5 | from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration
 6 | from sentry_sdk.integrations.logging import LoggingIntegration
 7 | from sentry_sdk.integrations.redis import RedisIntegration
 8 | from sentry_sdk.integrations.flask import FlaskIntegration
 9 | 
10 | 
11 | def init_sentry(dsn, level=logging.WARNING, **options):
12 |     """Adds Sentry event logging.
13 | 
14 |     Sentry is a realtime event logging and aggregation platform.
15 |     By default, we add integration to the python logger, flask, redis, and sqlalchemy.
16 | 
17 |     Arguments:
18 |         dsn: The sentry DSN to connect to
19 |         level: the logging level at which logging messages are sent as events to sentry
20 |         options: Any other arguments to be passed to sentry_sdk.init.
21 |           See https://docs.sentry.io/platforms/python/configuration/options/
22 |     """
23 |     sentry_sdk.init(dsn, integrations=[LoggingIntegration(level=level), FlaskIntegration(), RedisIntegration(),
24 |                                        SqlalchemyIntegration()],
25 |                     **options)
26 |     # This env variable is set in the MetaBrainz production infrastructure and is unique per container
27 |     container_name = os.getenv("CONTAINER_NAME")
28 |     if container_name:
29 |         sentry_sdk.set_tag("container_name", container_name)
30 | 


--------------------------------------------------------------------------------
/brainzutils/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metabrainz/brainzutils-python/bf01c6da15d4a2426d64a31cf232c06bec3860f3/brainzutils/test/__init__.py


--------------------------------------------------------------------------------
/brainzutils/test/test_cache.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=protected-access
  2 | 
  3 | import datetime
  4 | import os
  5 | import unittest
  6 | from time import sleep, time
  7 | 
  8 | from unittest import mock
  9 | import redis
 10 | 
 11 | from brainzutils import cache
 12 | 
 13 | 
 14 | class CacheTestCase(unittest.TestCase):
 15 |     """Testing our custom wrapper for redis."""
 16 |     host = os.environ.get("REDIS_HOST", "localhost")
 17 |     port = 6379
 18 |     namespace = "NS_TEST"
 19 | 
 20 |     def setUp(self):
 21 |         cache.init(
 22 |             host=self.host,
 23 |             port=self.port,
 24 |             namespace=self.namespace,
 25 |         )
 26 |         # Making sure there are no items in cache before we run each test
 27 |         cache.flush_all()
 28 | 
 29 |     def test_no_init(self):
 30 |         cache._r = None
 31 |         with self.assertRaises(RuntimeError):
 32 |             cache.set("test", "testing", expirein=0)
 33 |         with self.assertRaises(RuntimeError):
 34 |             cache.get("test")
 35 | 
 36 |     def test_single(self):
 37 |         self.assertTrue(cache.set("test2", "Hello!", expirein=0))
 38 |         self.assertEqual(cache.get("test2"), "Hello!")
 39 | 
 40 |     def test_single_no_encode(self):
 41 |         self.assertTrue(cache.set("no encode", 1, expirein=0, encode=False))
 42 |         self.assertEqual(cache.get("no encode", decode=False), b"1")
 43 | 
 44 |     def test_single_with_namespace(self):
 45 |         self.assertTrue(cache.set("test", 42, namespace="testing", expirein=0))
 46 |         self.assertEqual(cache.get("test", namespace="testing"), 42)
 47 | 
 48 |     def test_single_fancy(self):
 49 |         self.assertTrue(cache.set("test3", u"Привет!", expirein=0))
 50 |         self.assertEqual(cache.get("test3"), u"Привет!")
 51 | 
 52 |     def test_single_dict(self):
 53 |         dictionary = {
 54 |             "fancy": "yeah",
 55 |             "wow": 11,
 56 |         }
 57 |         self.assertTrue(cache.set('some_dict', dictionary, expirein=0))
 58 |         self.assertEqual(cache.get('some_dict'), dictionary)
 59 | 
 60 |     def test_single_dict_fancy(self):
 61 |         dictionary = {
 62 |             "fancy": u"Да",
 63 |             "тест": 11,
 64 |         }
 65 |         cache.set('some_dict', dictionary, expirein=0)
 66 |         self.assertEqual(cache.get('some_dict'), dictionary)
 67 | 
 68 |     def test_datetime(self):
 69 |         self.assertTrue(cache.set('some_time', datetime.datetime.now(), expirein=0))
 70 |         self.assertEqual(type(cache.get('some_time')), datetime.datetime)
 71 | 
 72 |         dictionary = {
 73 |             "id": 1,
 74 |             "created": datetime.datetime.now(),
 75 |         }
 76 |         self.assertTrue(cache.set('some_other_time', dictionary, expirein=0))
 77 |         self.assertEqual(cache.get('some_other_time'), dictionary)
 78 | 
 79 |     def test_delete(self):
 80 |         key = "testing"
 81 |         self.assertTrue(cache.set(key, u"Пример", expirein=0))
 82 |         self.assertEqual(cache.get(key), u"Пример")
 83 |         self.assertEqual(cache.delete(key), 1)
 84 |         self.assertIsNone(cache.get(key))
 85 | 
 86 |     def test_delete_with_namespace(self):
 87 |         key = "testing"
 88 |         namespace = "spaaaaaaace"
 89 |         self.assertTrue(cache.set(key, u"Пример", namespace=namespace, expirein=0))
 90 |         self.assertEqual(cache.get(key, namespace=namespace), u"Пример")
 91 |         self.assertEqual(cache.delete(key, namespace=namespace), 1)
 92 |         self.assertIsNone(cache.get(key, namespace=namespace))
 93 | 
 94 |     def test_many(self):
 95 |         # With namespace
 96 |         mapping = {
 97 |             "test1": "Hello",
 98 |             "test2": "there",
 99 |         }
100 |         self.assertTrue(cache.set_many(mapping, namespace="testing-1", expirein=0))
101 |         self.assertEqual(cache.get_many(list(mapping.keys()), namespace="testing-1"), mapping)
102 | 
103 |         # With another namespace
104 |         test = cache.get_many(list(mapping.keys()), namespace="testing-2")
105 |         for key, val in test.items():
106 |             self.assertIn(key, mapping)
107 |             self.assertIsNone(val)
108 | 
109 |         # Without a namespace
110 |         mapping = {
111 |             "test1": "What's",
112 |             "test2": "good",
113 |         }
114 |         self.assertTrue(cache.set_many(mapping, expirein=0))
115 |         self.assertEqual(cache.get_many(list(mapping.keys())), mapping)
116 | 
117 |     def test_increment(self):
118 |         cache.set("a", 1, encode=False, expirein=0)
119 |         self.assertEqual(cache.increment("a"), 2)
120 | 
121 |     def test_increment_invalid_value(self):
122 |         cache.set("a", "not a number", expirein=0)
123 |         with self.assertRaises(redis.exceptions.ResponseError):
124 |             cache.increment("a")
125 | 
126 |     def test_expire(self):
127 |         cache.set("a", 1, expirein=100)
128 |         self.assertEqual(cache.expire("a", 1), True)
129 |         sleep(1.1)
130 |         self.assertEqual(cache.get("a"), None)
131 | 
132 |     def test_expireat(self):
133 |         cache.set("a", 1, expirein=100)
134 |         self.assertEqual(cache.expireat("a", int(time() + 1)), True)
135 |         sleep(1.1)
136 |         self.assertEqual(cache.get("a"), None)
137 | 
138 |     def test_sadd(self):
139 |         cache.sadd("myset", {"a", "b", "c"}, expirein=1000)
140 |         cache.sadd("myset", ["a", "f", "d"], expirein=1000)
141 |         cache.sadd("myset", "z", expirein=1000)
142 |         self.assertEqual({"a", "b", "c", "d", "f", "z"}, cache.smembers("myset"))
143 | 
144 | 
145 | class CacheKeyTestCase(unittest.TestCase):
146 |     namespace = "NS_TEST"
147 | 
148 |     @mock.patch('brainzutils.cache.redis.StrictRedis', autospec=True)
149 |     def test_set_key(self, mock_redis):
150 |         """Test setting a bytes value"""
151 |         cache.init(host='host', port=2, namespace=self.namespace)
152 |         cache.set('key', u'value'.encode('utf-8'), expirein=0)
153 | 
154 |         # Keys are encoded into bytes always
155 |         expected_key = 'NS_TEST:key'
156 |         # msgpack encoded value
157 |         expected_value = b'\xc4\x05value'
158 |         mock_redis.return_value.mset.assert_called_with({expected_key: expected_value})
159 |         mock_redis.return_value.pexpire.assert_not_called()
160 | 
161 |     @mock.patch('brainzutils.cache.redis.StrictRedis', autospec=True)
162 |     def test_set_key_unicode(self, mock_redis):
163 |         """Test setting a unicode value"""
164 |         cache.init(host='host', port=2, namespace=self.namespace)
165 |         cache.set('key', u'value', expirein=0)
166 | 
167 |         expected_key = 'NS_TEST:key'
168 |         # msgpack encoded value
169 |         expected_value = b'\xa5value'
170 |         mock_redis.return_value.mset.assert_called_with({expected_key: expected_value})
171 |         mock_redis.return_value.pexpire.assert_not_called()
172 | 
173 |     @mock.patch('brainzutils.cache.redis.StrictRedis', autospec=True)
174 |     def test_key_expire(self, mock_redis):
175 |         cache.init(host='host', port=2, namespace=self.namespace)
176 |         cache.set('key', u'value'.encode('utf-8'), expirein=30)
177 |         expected_key = 'NS_TEST:key'
178 |         # msgpack encoded value
179 |         expected_value = b'\xc4\x05value'
180 |         mock_redis.return_value.mset.assert_called_with({expected_key: expected_value})
181 |         mock_redis.return_value.pexpire.assert_called_with(expected_key, 30000)
182 | 


--------------------------------------------------------------------------------
/brainzutils/test/test_mail.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import smtplib
 3 | from unittest import mock
 4 | 
 5 | from email.mime.multipart import MIMEMultipart
 6 | from email.mime.text import MIMEText
 7 | from brainzutils import flask
 8 | from brainzutils import mail
 9 | 
10 | class MailTestCase(unittest.TestCase):
11 | 
12 |     def test_send_email_missing_config(self):
13 |         app = flask.CustomFlask(__name__)
14 |         with app.app_context():
15 |             with self.assertRaises(ValueError) as err:
16 |                 mail.send_mail(
17 |                     subject='ListenBrainz Spotify Importer Error',
18 |                     text='It is a test mail',
19 |                     recipients=[],
20 |                     attachments=None,
21 |                     from_name='ListenBrainz',
22 |                     from_addr='noreply@metabrainz.org',
23 |                     boundary='b'
24 |                 )
25 |             assert "Flask current_app requires config items" in str(err.exception)
26 | 
27 |     def test_send_email_string_recipients(self):
28 |         app = flask.CustomFlask(__name__)
29 |         with app.app_context():
30 |             with self.assertRaises(ValueError) as err:
31 |                 mail.send_mail(
32 |                     subject='ListenBrainz Spotify Importer Error',
33 |                     text='It is a test mail',
34 |                     recipients='wrongemail@metabrainz.org',
35 |                     attachments=None,
36 |                     from_name='ListenBrainz',
37 |                     from_addr='noreply@metabrainz.org',
38 |                     boundary='b'
39 |                 )
40 |             assert str(err.exception) == "recipients must be a list of email addresses"
41 | 
42 |     @mock.patch('smtplib.SMTP')
43 |     def test_send_email(self, mock_smtp):
44 |         app = flask.CustomFlask(__name__)
45 |         app.config['SMTP_SERVER'] = 'localhost'
46 |         app.config['SMTP_PORT'] = 25
47 | 
48 |         with app.app_context():
49 |             from_address = 'noreply@metabrainz.org'
50 |             recipients = ['musicbrainz@metabrainz.org', 'listenbrainz@metabrainz.org']
51 |             text = 'It is a test mail'
52 |             from_name = 'ListenBrainz'
53 |             subject = 'ListenBrainz Spotify Importer Error'
54 |             boundary = '===============2220963697271485568=='
55 |             message = MIMEMultipart(boundary=boundary)
56 |             message['To'] = "musicbrainz@metabrainz.org, listenbrainz@metabrainz.org"
57 |             message['Subject'] = subject
58 |             message['From'] = '%s <%s>' % (from_name, from_address)
59 |             message.attach(MIMEText(text, _charset='utf-8'))
60 | 
61 |             mail.send_mail(
62 |                 subject='ListenBrainz Spotify Importer Error',
63 |                 text='It is a test mail',
64 |                 recipients=recipients,
65 |                 attachments=None,
66 |                 from_name='ListenBrainz',
67 |                 from_addr='noreply@metabrainz.org',
68 |                 boundary=boundary
69 |             )
70 | 
71 |             mock_smtp.return_value.sendmail.assert_called_once_with(from_address, recipients, message.as_string())
72 | 


--------------------------------------------------------------------------------
/brainzutils/test/test_metrics.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest import mock, TestCase
 3 | 
 4 | from brainzutils import cache
 5 | from brainzutils import metrics
 6 | 
 7 | 
 8 | class MetricsTestCase(TestCase):
 9 | 
10 |     def setUp(self):
11 |         cache.init('redis')
12 | 
13 |     def tearDown(self):
14 |         metrics._metrics_project_name = None
15 | 
16 |     @mock.patch('brainzutils.metrics.cache._r.rpush')
17 |     def test_set(self, rpush):
18 |         metrics.init('listenbrainz.org')
19 |         os.environ["PRIVATE_IP"] = "127.0.0.1"
20 |         metrics.set("my_metric", timestamp=1619629462352960742, test_i=2, test_fl=.3, test_t=True, test_f=False, test_s="gobble")
21 |         rpush.assert_called_with(metrics.REDIS_METRICS_KEY,
22 |             'my_metric,dc=hetzner,server=127.0.0.1,project=listenbrainz.org test_i=2i,test_fl=0.300000,test_t=t,test_f=f,test_s="gobble" 1619629462352960742')
23 | 


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from brainzutils.musicbrainz_db import init_db_engine, mb_session
 4 | 
 5 | 
 6 | @pytest.fixture(scope="session")
 7 | def engine():
 8 |     init_db_engine("postgresql://musicbrainz@musicbrainz_db/musicbrainz_db")
 9 | 
10 | 
11 | @pytest.fixture(scope="function")
12 | def session(engine):
13 |     return mb_session()
14 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/_static/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metabrainz/brainzutils-python/bf01c6da15d4a2426d64a31cf232c06bec3860f3/docs/_static/.gitkeep


--------------------------------------------------------------------------------
/docs/cache.rst:
--------------------------------------------------------------------------------
1 | Cache
2 | =====
3 | 
4 | The cache module provides an interface to redis to store items temporarily
5 | 
6 | .. automodule:: brainzutils.cache
7 |    :members:


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'BrainzUtils'
21 | copyright = '2020, MetaBrainz Foundation'
22 | author = 'MetaBrainz Foundation'
23 | 
24 | 
25 | # -- General configuration ---------------------------------------------------
26 | 
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx_rtd_theme']
31 | 
32 | # Add any paths that contain templates here, relative to this directory.
33 | templates_path = ['_templates']
34 | 
35 | # List of patterns, relative to source directory, that match files and
36 | # directories to ignore when looking for source files.
37 | # This pattern also affects html_static_path and html_extra_path.
38 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
39 | 
40 | 
41 | # -- Options for HTML output -------------------------------------------------
42 | 
43 | # The theme to use for HTML and HTML Help pages.  See the documentation for
44 | # a list of builtin themes.
45 | #
46 | html_theme = 'sphinx_rtd_theme'
47 | 
48 | # Add any paths that contain custom static files (such as style sheets) here,
49 | # relative to this directory. They are copied after the builtin static files,
50 | # so a file named "default.css" will overwrite the builtin "default.css".
51 | html_static_path = ['_static']
52 | 


--------------------------------------------------------------------------------
/docs/flask.rst:
--------------------------------------------------------------------------------
1 | Flask
2 | =====
3 | 
4 | The Flask module provides a Flask application with a few sensible defaults for MetaBrainz projects
5 | 
6 | .. automodule:: brainzutils.flask
7 |    :members:


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | BrainzUtils
 2 | ===========
 3 | 
 4 | BrainzUtils is a set of python tools used in projects by the MetaBrainz foundation.
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 |    :caption: Contents:
 9 | 
10 |    cache
11 |    flask
12 |    metrics
13 |    mail
14 |    musicbrainz_db/index
15 |    ratelimit
16 | 
17 | Indices and tables
18 | ==================
19 | 
20 | * :ref:`genindex`
21 | * :ref:`modindex`
22 | * :ref:`search`
23 | 


--------------------------------------------------------------------------------
/docs/mail.rst:
--------------------------------------------------------------------------------
1 | Mail
2 | ====
3 | 
4 | The mail module provides tools for sending emails
5 | 
6 | .. automodule:: brainzutils.mail
7 |    :members:


--------------------------------------------------------------------------------
/docs/metrics.rst:
--------------------------------------------------------------------------------
1 | Metrics
2 | =======
3 | 
4 | The metrics module provides a way of storing numerical values that can can be stored in a statistics database.
5 | 
6 | .. automodule:: brainzutils.metrics
7 |    :members:


--------------------------------------------------------------------------------
/docs/musicbrainz_db/artist.rst:
--------------------------------------------------------------------------------
1 | MusicBrainz Artist
2 | ==================
3 | 
4 | For loading an artist from musicbrainz
5 | 
6 | .. automodule:: brainzutils.musicbrainz_db.artist
7 |    :members:


--------------------------------------------------------------------------------
/docs/musicbrainz_db/editor.rst:
--------------------------------------------------------------------------------
1 | MusicBrainz Editor
2 | ==================
3 | 
4 | For loading an editor from musicbrainz
5 | 
6 | .. automodule:: brainzutils.musicbrainz_db.editor
7 |    :members:


--------------------------------------------------------------------------------
/docs/musicbrainz_db/event.rst:
--------------------------------------------------------------------------------
1 | MusicBrainz Event
2 | =================
3 | 
4 | For loading an event from musicbrainz
5 | 
6 | .. automodule:: brainzutils.musicbrainz_db.event
7 |    :members:


--------------------------------------------------------------------------------
/docs/musicbrainz_db/index.rst:
--------------------------------------------------------------------------------
 1 | Direct MusicBrainz access
 2 | =========================
 3 | 
 4 | The musicbrainz_db module provides wrappers around mbdata to load musicbrainz entities
 5 | directly from a musicbrainz database.
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 |    :caption: Contents:
10 | 
11 |    artist
12 |    editor
13 |    event
14 |    label
15 |    place
16 |    recording
17 |    release
18 |    release_group
19 |    work
20 | 


--------------------------------------------------------------------------------
/docs/musicbrainz_db/label.rst:
--------------------------------------------------------------------------------
1 | MusicBrainz Label
2 | =================
3 | 
4 | For loading a label from musicbrainz
5 | 
6 | .. automodule:: brainzutils.musicbrainz_db.label
7 |    :members:


--------------------------------------------------------------------------------
/docs/musicbrainz_db/place.rst:
--------------------------------------------------------------------------------
1 | MusicBrainz Place
2 | =================
3 | 
4 | For loading a place from musicbrainz
5 | 
6 | .. automodule:: brainzutils.musicbrainz_db.place
7 |    :members:


--------------------------------------------------------------------------------
/docs/musicbrainz_db/recording.rst:
--------------------------------------------------------------------------------
1 | MusicBrainz Recording
2 | =====================
3 | 
4 | For loading a recording from musicbrainz
5 | 
6 | .. automodule:: brainzutils.musicbrainz_db.recording
7 |    :members:


--------------------------------------------------------------------------------
/docs/musicbrainz_db/release.rst:
--------------------------------------------------------------------------------
1 | MusicBrainz Release
2 | ===================
3 | 
4 | For loading a release from musicbrainz
5 | 
6 | .. automodule:: brainzutils.musicbrainz_db.release
7 |    :members:


--------------------------------------------------------------------------------
/docs/musicbrainz_db/release_group.rst:
--------------------------------------------------------------------------------
1 | MusicBrainz Release Group
2 | =========================
3 | 
4 | For loading release group from musicbrainz
5 | 
6 | .. automodule:: brainzutils.musicbrainz_db.release_group
7 |    :members:


--------------------------------------------------------------------------------
/docs/musicbrainz_db/work.rst:
--------------------------------------------------------------------------------
1 | MusicBrainz Work
2 | ================
3 | 
4 | For loading work from musicbrainz
5 | 
6 | .. automodule:: brainzutils.musicbrainz_db.work
7 |    :members:


--------------------------------------------------------------------------------
/docs/ratelimit.rst:
--------------------------------------------------------------------------------
1 | Ratelimit
2 | =========
3 | 
4 | The ratelimit module provides tools for limiting access to an API based on IP address over a certain amount
5 | of time. The limits are stored in redis.
6 | 
7 | .. automodule:: brainzutils.ratelimit
8 |    :members:


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==3.5.1
2 | sphinx_rtd_theme==0.5.1
3 | 


--------------------------------------------------------------------------------
/pylintrc:
--------------------------------------------------------------------------------
 1 | [MESSAGES CONTROL]
 2 | 
 3 | # Disable the message, report, category or checker with the given id(s). You
 4 | # can either give multiple identifiers separated by comma (,) or put this
 5 | # option multiple times (only on the command line, not in the configuration
 6 | # file where it should appear only once).You can also use "--disable=all" to
 7 | # disable everything first and then reenable specific checks. For example, if
 8 | # you want to run only the similarities checker, you can use "--disable=all
 9 | # --enable=similarities". If you want to run only the classes checker, but have
10 | # no Warning level messages displayed, use"--disable=all --enable=classes
11 | # --disable=W"
12 | disable=missing-docstring,too-many-arguments,fixme,invalid-name,global-statement
13 | 
14 | 
15 | [REPORTS]
16 | 
17 | # Set the output format. Available formats are text, parseable, colorized, msvs
18 | # (visual studio) and html. You can also give a reporter class, eg
19 | # mypackage.mymodule.MyReporterClass.
20 | output-format=parseable
21 | 
22 | # Put messages in a separate file for each module / package specified on the
23 | # command line instead of printing them on stdout. Reports (if any) will be
24 | # written in a file name "pylint_global.[txt|html]". This option is deprecated
25 | # and it will be removed in Pylint 2.0.
26 | files-output=no
27 | 
28 | 
29 | [FORMAT]
30 | 
31 | # Maximum number of characters on a single line.
32 | max-line-length=130
33 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "brainzutils"
 3 | description = "Python tools for MetaBrainz projects"
 4 | authors = [
 5 |     { name = "MetaBrainz Foundation", email = "support@metabrainz.org" }
 6 | ]
 7 | dynamic = ["version"]
 8 | requires-python = ">=3.10"
 9 | dependencies = [
10 |     "Flask>=3.1.0",
11 |     "Jinja2>=3.1.6",
12 |     "itsdangerous>=2.2.0",
13 |     "click>=8.1.8",
14 |     "Werkzeug>=3.1.3",
15 |     "Flask-DebugToolbar",
16 |     "sentry-sdk[flask]>=2.27.0",
17 |     "redis>=5.2.1",
18 |     "msgpack>=1.1.0",
19 |     "requests>=2.32.3",
20 |     "SQLAlchemy>=2.0.40",
21 |     "mbdata@git+https://github.com/metabrainz/mbdata.git@v30.0.0"
22 | ]
23 | 
24 | [tool.setuptools]
25 | packages = ["brainzutils"]
26 | 
27 | [tool.setuptools_scm]
28 | 
29 | [build-system]
30 | requires = ["setuptools>=80", "setuptools-scm>=8"]
31 | build-backend = "setuptools.build_meta"
32 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | testpaths = brainzutils
3 | addopts = --cov-report html --cov=brainzutils -W always::DeprecationWarning -W error::sqlalchemy.exc.Base20DeprecationWarning
4 | 
5 | markers =
6 |     database: requires access to the musicbrainz sample database


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Flask>=3.1.0
 2 | Jinja2>=3.1.6
 3 | itsdangerous>=2.2.0
 4 | click>=8.1.8
 5 | Werkzeug>=3.1.3
 6 | Flask-DebugToolbar
 7 | sentry-sdk[flask]>=2.27.0
 8 | redis>=5.2.1
 9 | msgpack>=1.1.0
10 | requests>=2.32.3
11 | SQLAlchemy>=2.0.40
12 | mbdata@git+https://github.com/metabrainz/mbdata.git@v29.0.0
13 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | psycopg2-binary==2.9.10
2 | freezegun==1.5.1
3 | pytest==8.3.5
4 | pytest-cov==6.1.1
5 | pylint==3.3.6
6 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | #!/bin/bash
  4 | 
  5 | # Github Actions automatically sets the CI environment variable. We use this variable to detect if the script is running
  6 | # inside a CI environment and modify its execution as needed.
  7 | if [ "$CI" == "true" ] ; then
  8 |     echo "Running in CI mode"
  9 | fi
 10 | 
 11 | # UNIT TESTS
 12 | # ./test.sh                build unit test containers, bring up, make database, test, bring down
 13 | # for development:
 14 | # ./test.sh -u             build unit test containers, bring up background and load database if needed
 15 | # ./test.sh [params]       run unit tests, passing optional params to inner test
 16 | # ./test.sh -s             stop unit test containers without removing
 17 | # ./test.sh -d             clean unit test containers
 18 | 
 19 | COMPOSE_FILE_LOC=test/docker-compose.yml
 20 | COMPOSE_PROJECT_NAME=brainzutils_test
 21 | 
 22 | echo "Checking docker compose version"
 23 | if docker compose version &> /dev/null; then
 24 |     DOCKER_COMPOSE_CMD="docker compose"
 25 | else
 26 |     DOCKER_COMPOSE_CMD="docker-compose"
 27 | fi
 28 | 
 29 | function invoke_docker_compose {
 30 |     $DOCKER_COMPOSE_CMD \
 31 |       -f $COMPOSE_FILE_LOC \
 32 |       -p $COMPOSE_PROJECT_NAME \
 33 |       "$@"
 34 | }
 35 | 
 36 | function docker_compose_run {
 37 |     invoke_docker_compose run --rm --user `id -u`:`id -g` "$@"
 38 | }
 39 | 
 40 | function build_unit_containers {
 41 |     invoke_docker_compose build 
 42 | }
 43 | 
 44 | function bring_up_unit_db {
 45 |     invoke_docker_compose up -d redis musicbrainz_db
 46 | }
 47 | 
 48 | function is_unit_db_running {
 49 |     # Check if the database container is running
 50 |     containername="${COMPOSE_PROJECT_NAME}_musicbrainz_db_1"
 51 |     res=`docker ps --filter "name=$containername" --filter "status=running" -q`
 52 |     if [ -n "$res" ]; then
 53 |         return 0
 54 |     else
 55 |         return 1
 56 |     fi
 57 | }
 58 | 
 59 | function is_unit_db_exists {
 60 |     containername="${COMPOSE_PROJECT_NAME}_musicbrainz_db_1"
 61 |     res=`docker ps --filter "name=$containername" --filter "status=exited" -q`
 62 |     if [ -n "$res" ]; then
 63 |         return 0
 64 |     else
 65 |         return 1
 66 |     fi
 67 | }
 68 | 
 69 | # Exit immediately if a command exits with a non-zero status.
 70 | # set -e
 71 | # trap cleanup EXIT  # Cleanup after tests finish running
 72 | 
 73 | 
 74 | if [ "$1" == "-s" ]; then
 75 |     echo "Stopping unit test containers"
 76 |     invoke_docker_compose stop
 77 |     exit 0
 78 | fi
 79 | 
 80 | if [ "$1" == "-d" ]; then
 81 |     echo "Running docker-compose down"
 82 |     invoke_docker_compose down
 83 |     exit 0
 84 | fi
 85 | 
 86 | # if -u flag, bring up db, run setup, quit
 87 | if [ "$1" == "-u" ]; then
 88 |     is_unit_db_exists
 89 |     DB_EXISTS=$?
 90 |     is_unit_db_running
 91 |     DB_RUNNING=$?
 92 |     if [ $DB_EXISTS -eq 0 -o $DB_RUNNING -eq 0 ]; then
 93 |         echo "Database is already up, doing nothing"
 94 |     else
 95 |         echo "Building containers"
 96 |         invoke_docker_compose build
 97 |         echo "Bringing up DB"
 98 |         bring_up_unit_db
 99 |     fi
100 |     exit 0
101 | fi
102 | 
103 | is_unit_db_exists
104 | DB_EXISTS=$?
105 | is_unit_db_running
106 | DB_RUNNING=$?
107 | if [ $DB_EXISTS -eq 1 -a $DB_RUNNING -eq 1 ]; then
108 |     # If no containers, build them, run setup then run tests, then bring down
109 |     invoke_docker_compose build
110 |     bring_up_unit_db
111 |     echo "Running tests"
112 |     docker_compose_run test "$@"
113 |     RET=$?
114 |     invoke_docker_compose down
115 |     exit $RET
116 | else
117 |     # Else, we have containers, just run tests
118 |     echo "Running tests"
119 |     docker_compose_run test "$@"
120 |     exit $?
121 | fi
122 | 


--------------------------------------------------------------------------------
/test/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM metabrainz/python:3.13-20250313
 2 | 
 3 | ENV DOCKERIZE_VERSION v0.6.1
 4 | RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
 5 |     && tar -C /usr/local/bin -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz
 6 | 
 7 | RUN mkdir /code
 8 | WORKDIR /code
 9 | 
10 | # Python dependencies
11 | RUN apt-get update \
12 |     && apt-get install -y --no-install-recommends \
13 |                        build-essential \
14 |                        git
15 | 
16 | COPY requirements.txt  /code/requirements.txt
17 | COPY requirements_dev.txt /code/requirements_dev.txt
18 | RUN pip install -r requirements.txt
19 | RUN pip install -r requirements_dev.txt
20 | 
21 | COPY . /code/
22 | 
23 | ENV REDIS_HOST "redis"
24 | 
25 | ENTRYPOINT ["dockerize", "-wait", "tcp://redis:6379", "-timeout", "10s", \
26 |     "dockerize", "-wait", "tcp://musicbrainz_db:5432", "-timeout", "10s", \
27 |     "pytest", "--junitxml=reports/test_results.xml"]
28 | 


--------------------------------------------------------------------------------
/test/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 | 
 3 |   test:
 4 |     build:
 5 |       context: ..
 6 |       dockerfile: ./test/Dockerfile
 7 |     volumes:
 8 |       - ../:/code
 9 |     environment:
10 |       PYTHONDONTWRITEBYTECODE: 1
11 |       SQLALCHEMY_WARN_20: 1
12 |     depends_on:
13 |       - redis
14 |       - musicbrainz_db
15 | 
16 |   redis:
17 |     image: redis:3.2.1
18 | 
19 |   musicbrainz_db:
20 |     image: metabrainz/brainzutils-mb-sample-database:schema-27-2022-05-20.0
21 |     environment:
22 |       POSTGRES_HOST_AUTH_METHOD: trust
23 |     ports:
24 |       - "5430:5432"
25 | 


--------------------------------------------------------------------------------
/test/musicbrainz_db/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM metabrainz/musicbrainz-test-database:beta
 2 | 
 3 | RUN apt-get update && apt-get install -y wget
 4 | 
 5 | RUN mkdir /home/musicbrainz/musicbrainz-server/setup_db
 6 | COPY scripts/* /home/musicbrainz/musicbrainz-server/setup_db/
 7 | RUN chmod +x /home/musicbrainz/musicbrainz-server/setup_db/*
 8 | 
 9 | RUN mkdir -p /media/dbdump
10 | RUN chown postgres /media/dbdump
11 | 
12 | RUN rm -f /docker-entrypoint-initdb.d/create_test_db.sh
13 | RUN ln -s /home/musicbrainz/musicbrainz-server/setup_db/create_test_db.sh /docker-entrypoint-initdb.d/


--------------------------------------------------------------------------------
/test/musicbrainz_db/README.md:
--------------------------------------------------------------------------------
 1 | # Musicbrainz sample database for testing
 2 | 
 3 | This is a postgres docker image that contains a copy of the musicbrainz database, useful
 4 | for testing.
 5 | 
 6 | It's based on the https://hub.docker.com/r/metabrainz/musicbrainz-test-database image, but includes
 7 | some extra scripts from [musicbrainz-docker](https://github.com/metabrainz/musicbrainz-docker) in order
 8 | to download and set up a sample database. The musicbrainz sample database is a very small subset of the
 9 | musicbrainz database, but contains real data. This makes it useful for testing on on the database
10 | without importing everything.
11 | 
12 | This image can be run in a `docker-compose.yml` file like this:
13 | 
14 | ```yaml
15 |   musicbrainz_db:
16 |     build:
17 |       context: musicbrainz_db
18 |       dockerfile: Dockerfile
19 |     environment:
20 |       PGDATA: /var/lib/postgresql/data/pgdata
21 |       POSTGRES_HOST_AUTH_METHOD: trust
22 |     ports:
23 |       - "5430:5432"
24 | ```
25 | 
26 | however, this will cause the sample database to be downloaded and installed every time the container
27 | starts up. This takes between 5-10 minutes depending on how slow your computer is.
28 | 
29 | ### Image with built-in data
30 | We also build an image and import the data musicbrainz database in order to have a container that
31 | can start up immediately with all data imported.
32 | 
33 | This image is hosted at https://hub.docker.com/r/metabrainz/brainzutils-mb-sample-database
34 | 
35 | The steps to create a new version are manual, but should only need to be done each time
36 | the musicbrainz schema changes.
37 | 
38 | Build the image:
39 | 
40 |     docker build -t musicbrainz_db_sample .
41 | 
42 | Start the container running bash, this is so that we can do the import and perform some cleanups.
43 | We choose a different PGDATA location because `/var/lib/postgresql/data` by default is configured as
44 | a volume but we don't want the data to be put in a temporary location.
45 | 
46 |     docker run -ti --rm --name musicbrainz_db_sample -e PGDATA=/var/lib/postgresql-musicbrainz/data -e POSTGRES_HOST_AUTH_METHOD=trust musicbrainz_db_sample bash
47 | 
48 | Inside the running container, run these commands
49 | 
50 |     # Start up postgres, running the entrypoint which imports the database
51 |     /docker-entrypoint.sh postgres
52 |     # Once the import finishes and postgres starts up, quit it with ^C
53 |     # Remove some intermediate data and our custom entrypoint
54 |     rm -r /media/dbdump
55 |     rm /docker-entrypoint-initdb.d/create_test_db.sh
56 |     grep DB_SCHEMA_SEQUENCE /home/musicbrainz/musicbrainz-server/lib/DBDefs.pm
57 | 
58 | Without quitting the container, in another terminal on the host, make a new docker commit to build 
59 | the new image
60 | 
61 |     docker commit --change='CMD ["postgres"]' musicbrainz_db_sample metabrainz/brainzutils-mb-sample-database:schema-25-2021-04-04.0
62 | 
63 | The first argument is the container name (set with `--name` in `docker run`) and the second argument
64 | is the name of the image to create. We include the database schema number from the grep command.
65 | 
66 | Once built, this image can be pushed to docker hub by an approved user
67 | 
68 |     docker push metabrainz/brainzutils-mb-sample-database:schema-25-2021-04-04.0
69 | 


--------------------------------------------------------------------------------
/test/musicbrainz_db/scripts/create_test_db.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | # During the entrypoint stage, postgres is only listening on a socket
4 | # force it to listen on localhost in order to perform the data load
5 | pg_ctl -o "-c listen_addresses='localhost'" -w restart
6 | 
7 | cd /home/musicbrainz/musicbrainz-server
8 | carton exec -- ./setup_db/createdb.sh -sample -fetch
9 | 


--------------------------------------------------------------------------------
/test/musicbrainz_db/scripts/createdb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e -o pipefail -u
 4 | 
 5 | FTP_MB=ftp://ftp.eu.metabrainz.org/pub/musicbrainz
 6 | IMPORT="fullexport"
 7 | FETCH_DUMPS=""
 8 | WGET_OPTIONS=""
 9 | 
10 | HELP=$(cat <<EOH
11 | Usage: $0 [-wget-opts <options list>] [-sample] [-fetch] [MUSICBRAINZ_FTP_URL]
12 | 
13 | Options:
14 |   -fetch      Fetch latest dump from MusicBrainz FTP
15 |   -sample     Load sample data instead of full data
16 |   -wget-opts  Pass additional space-separated options list (should be
17 |               a single argument, escape spaces if necessary) to wget
18 | 
19 | Default MusicBrainz FTP URL: $FTP_MB
20 | EOH
21 | )
22 | 
23 | if [ $# -gt 4 ]; then
24 |     echo "$0: too many arguments"
25 |     echo "$HELP"
26 |     exit 1
27 | fi
28 | 
29 | while [ $# -gt 0 ]; do
30 |     case "$1" in
31 |         -wget-opts )
32 |             shift
33 |             WGET_OPTIONS=$1
34 |             ;;
35 |         -sample )
36 |             IMPORT="sample"
37 |             ;;
38 |         -fetch  )
39 |             FETCH_DUMPS="$1"
40 |             ;;
41 |         -*      )
42 |             echo "$0: unrecognized option '$1'"
43 |             echo "$HELP"
44 |             exit 1
45 |             ;;
46 |         *       )
47 |             FTP_MB="$1"
48 |             ;;
49 |     esac
50 |     shift
51 | done
52 | 
53 | TMP_DIR=/media/dbdump/tmp
54 | 
55 | case "$IMPORT" in
56 |     fullexport  )
57 |         DUMP_FILES=(
58 |             mbdump.tar.bz2
59 |             mbdump-cdstubs.tar.bz2
60 |             mbdump-cover-art-archive.tar.bz2
61 |             mbdump-derived.tar.bz2
62 |             mbdump-stats.tar.bz2
63 |             mbdump-wikidocs.tar.bz2
64 |         );;
65 |     sample      )
66 |         DUMP_FILES=(
67 |             mbdump-sample.tar.xz
68 |         );;
69 | esac
70 | 
71 | if [[ $FETCH_DUMPS == "-fetch" ]]; then
72 |     FETCH_OPTIONS=("${IMPORT/fullexport/replica}" --base-ftp-url "$FTP_MB")
73 |     if [[ -n "$WGET_OPTIONS" ]]; then
74 |         FETCH_OPTIONS+=(--wget-options "$WGET_OPTIONS")
75 |     fi
76 |     `dirname "$0"`/fetch-dump.sh "${FETCH_OPTIONS[@]}"
77 | fi
78 | 
79 | if [[ -a /media/dbdump/"${DUMP_FILES[0]}" ]]; then
80 |     echo "found existing dumps"
81 | 
82 |     mkdir -p $TMP_DIR
83 |     #cd /media/dbdump
84 | 
85 |     INITDB_OPTIONS='--echo --import'
86 |     if ! /home/musicbrainz/musicbrainz-server/script/database_exists MAINTENANCE; then
87 |         INITDB_OPTIONS="--createdb $INITDB_OPTIONS"
88 |     fi
89 |     # shellcheck disable=SC2086
90 |     ./admin/InitDb.pl --createdb --database READWRITE --import /media/dbdump/mbdump*.tar.xz --echo
91 |     #/home/musicbrainz/musicbrainz-server/admin/InitDb.pl $INITDB_OPTIONS -- --skip-editor --tmp-dir $TMP_DIR "${DUMP_FILES[@]}"
92 | else
93 |     echo "no dumps found or dumps are incomplete"
94 | fi
95 | 


--------------------------------------------------------------------------------
/test/musicbrainz_db/scripts/fetch-dump.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | set -e -o pipefail -u
  4 | 
  5 | DB_DUMP_DIR=/media/dbdump
  6 | SEARCH_DUMP_DIR=/media/searchdump
  7 | BASE_FTP_URL='ftp://ftp.eu.metabrainz.org/pub/musicbrainz'
  8 | TARGET=''
  9 | WGET_CMD=(wget)
 10 | 
 11 | SCRIPT_NAME=$(basename "$0")
 12 | HELP=$(cat <<EOH
 13 | Usage: $SCRIPT_NAME [<options>] <target>
 14 | 
 15 | Fetch dump files of the MusicBrainz database and/or search indexes.
 16 | 
 17 | Targets:
 18 |   both          Fetch latest search dump with replica dump of the same day.
 19 |   replica       Fetch latest database's replicated tables only.
 20 |   sample        Fetch latest database's sample only.
 21 |   search        Fetch latest search indexes only.
 22 | 
 23 | Options:
 24 |   --base-ftp-url <url>          Specify URL to MetaBrainz/MusicBrainz FTP directory.
 25 |                                 (Default: '$BASE_FTP_URL')
 26 |   --wget-options <wget options> Specify additional options to be passed to wget,
 27 |                                 these should be separated with whitespace,
 28 |                                 the list should be a single argument
 29 |                                 (escape whitespaces if needed).
 30 | 
 31 |   -h, --help                    Print this help message.
 32 | EOH
 33 | )
 34 | 
 35 | # Parse arguments
 36 | 
 37 | while [[ $# -gt 0 ]]
 38 | do
 39 | 	case "$1" in
 40 | 		both | replica | sample | search )
 41 | 			if [[ -n $TARGET ]]
 42 | 			then
 43 | 				echo >&2 "$SCRIPT_NAME: only one target argument can be given"
 44 | 				echo >&2 "Try '$SCRIPT_NAME --help' for usage."
 45 | 				exit 64 # EX_USAGE
 46 | 			fi
 47 | 			TARGET=$1
 48 | 			;;
 49 | 		--base-ftp-url )
 50 | 			shift
 51 | 			BASE_FTP_URL="$1"
 52 | 			;;
 53 | 		--wget-options )
 54 | 			shift
 55 | 			IFS=' ' read -r -a WGET_OPTIONS <<< "$1"
 56 | 			WGET_CMD+=("${WGET_OPTIONS[@]}")
 57 | 			unset WGET_OPTIONS
 58 | 			;;
 59 | 		-h | --help )
 60 | 			echo "$HELP"
 61 | 			exit 0 # EX_OK
 62 | 			;;
 63 | 		-* )
 64 | 			echo >&2 "$SCRIPT_NAME: unrecognized option '$1'"
 65 | 			echo >&2 "Try '$SCRIPT_NAME --help' for usage."
 66 | 			exit 64 # EX_USAGE
 67 | 			;;
 68 | 		* )
 69 | 			echo >&2 "$SCRIPT_NAME: unrecognized argument '$1'"
 70 | 			echo >&2 "Try '$SCRIPT_NAME --help' for usage."
 71 | 			exit 64 # EX_USAGE
 72 | 			;;
 73 | 	esac
 74 | 	shift
 75 | done
 76 | 
 77 | if [[ -z $TARGET ]]
 78 | then
 79 | 	echo >&2 "$SCRIPT_NAME: no dump type has been specified"
 80 | 	echo >&2 "Try '$SCRIPT_NAME --help' for usage."
 81 | 	exit 64 # EX_USAGE
 82 | fi
 83 | 
 84 | # Fetch latest search indexes
 85 | 
 86 | if [[ $TARGET =~ ^(both|search)$ ]]
 87 | then
 88 | 	echo "$(date): Fetching search indexes dump..."
 89 | 	cd "$SEARCH_DUMP_DIR" && find . -delete && cd -
 90 | 	"${WGET_CMD[@]}" -nd -nH -P "$SEARCH_DUMP_DIR" \
 91 | 		"$BASE_FTP_URL/data/search-indexes/LATEST"
 92 | 	DUMP_TIMESTAMP=$(cat /media/searchdump/LATEST)
 93 | 	"${WGET_CMD[@]}" -nd -nH -r -P "$SEARCH_DUMP_DIR" \
 94 | 		"$BASE_FTP_URL/data/search-indexes/$DUMP_TIMESTAMP/"
 95 | 	cd "$SEARCH_DUMP_DIR" && md5sum -c MD5SUMS && cd -
 96 | 	if [[ $TARGET == search ]]
 97 | 	then
 98 | 		echo 'Done fetching search indexes dump'
 99 | 		exit 0 # EX_OK
100 | 	fi
101 | fi
102 | 
103 | # Prepare to fetch database dump
104 | 
105 | if [[ $TARGET != search ]]
106 | then
107 | 	echo "$(date): Fetching database dump..."
108 | 
109 | 	rm -rf "${DB_DUMP_DIR:?}"/*
110 | fi
111 | 
112 | case "$TARGET" in
113 | 	both | replica )
114 | 		DB_DUMP_REMOTE_DIR=data/fullexport
115 | 		DB_DUMP_FILES=(
116 | 			mbdump.tar.bz2
117 | 			mbdump-cdstubs.tar.bz2
118 | 			mbdump-cover-art-archive.tar.bz2
119 | 			mbdump-derived.tar.bz2
120 | 			mbdump-stats.tar.bz2
121 | 			mbdump-wikidocs.tar.bz2
122 | 		)
123 | 		;;
124 | 	sample )
125 | 		DB_DUMP_REMOTE_DIR=data/sample
126 | 		DB_DUMP_FILES=(
127 | 			mbdump-sample.tar.xz
128 | 		)
129 | 		;;
130 | esac
131 | 
132 | if [[ $TARGET == both ]]
133 | then
134 | 	# Find latest database dump corresponding to search indexes
135 | 
136 | 	SEARCH_DUMP_DAY="${DUMP_TIMESTAMP/-*}"
137 | 	"${WGET_CMD[@]}" --spider --no-remove-listing -P "$DB_DUMP_DIR" \
138 | 		"$BASE_FTP_URL/$DB_DUMP_REMOTE_DIR"
139 | 	DUMP_TIMESTAMP=$(
140 | 		grep -E "\\s${SEARCH_DUMP_DAY}-\\d*" "$DB_DUMP_DIR/.listing" \
141 | 			| sed -e 's/\s*$//' -e 's/.*\s//'
142 | 	)
143 | 	rm -f "$DB_DUMP_DIR/.listing"
144 | 	echo "$DUMP_TIMESTAMP" >> "$DB_DUMP_DIR/LATEST-WITH-SEARCH-INDEXES"
145 | elif [[ $TARGET != search ]]
146 | then
147 | 	# Just find latest database dump
148 | 
149 | 	"${WGET_CMD[@]}" -nd -nH -P "$DB_DUMP_DIR" \
150 | 		"$BASE_FTP_URL/$DB_DUMP_REMOTE_DIR/LATEST"
151 | 	DUMP_TIMESTAMP=$(cat /media/dbdump/LATEST)
152 | fi
153 | 
154 | # Actually fetch database dump
155 | 
156 | if [[ $TARGET =~ ^(both|replica)$ ]]
157 | then
158 | 	for F in MD5SUMS "${DB_DUMP_FILES[@]}"
159 | 	do
160 | 		"${WGET_CMD[@]}" -P "$DB_DUMP_DIR" \
161 | 			"$BASE_FTP_URL/$DB_DUMP_REMOTE_DIR/$DUMP_TIMESTAMP/$F"
162 | 	done
163 | 	cd "$DB_DUMP_DIR"
164 | 	for F in "${DB_DUMP_FILES[@]}"
165 | 	do
166 | 		MD5SUM=$(md5sum -b "$F")
167 | 		grep -Fqx "$MD5SUM" MD5SUMS || {
168 | 			echo >&2 "$0: unmatched MD5 checksum: $MD5SUM *$F" &&
169 | 			exit 70 # EX_SOFTWARE
170 | 		}
171 | 	done
172 | 	cd -
173 | elif [[ $TARGET == sample ]]
174 | then
175 | 	for F in "${DB_DUMP_FILES[@]}"
176 | 	do
177 | 		"${WGET_CMD[@]}" -P "$DB_DUMP_DIR" \
178 | 			"$BASE_FTP_URL/$DB_DUMP_REMOTE_DIR/$DUMP_TIMESTAMP/$F"
179 | 	done
180 | fi
181 | 
182 | echo "$(date): Done fetching dump files."
183 | # vi: set noexpandtab softtabstop=0:
184 | 


--------------------------------------------------------------------------------