├── .gitattributes ├── .github ├── release-drafter.yml └── workflows │ ├── ci.yml │ └── release-drafter.yml ├── .gitignore ├── .readthedocs.yaml ├── LICENSE ├── README.md ├── brainzutils ├── __init__.py ├── cache.py ├── flask │ ├── __init__.py │ └── test │ │ ├── __init__.py │ │ ├── test_main.py │ │ └── test_ratelimit.py ├── mail.py ├── metrics.py ├── musicbrainz_db │ ├── __init__.py │ ├── artist.py │ ├── editor.py │ ├── event.py │ ├── exceptions.py │ ├── helpers.py │ ├── includes.py │ ├── label.py │ ├── models.py │ ├── place.py │ ├── recording.py │ ├── release.py │ ├── release_group.py │ ├── serialize.py │ ├── test_data.py │ ├── tests │ │ ├── __init__.py │ │ ├── test_artist.py │ │ ├── test_editor.py │ │ ├── test_event.py │ │ ├── test_helper.py │ │ ├── test_label.py │ │ ├── test_place.py │ │ ├── test_recording.py │ │ ├── test_release.py │ │ ├── test_release_group.py │ │ ├── test_serialize.py │ │ └── test_work.py │ ├── utils.py │ └── work.py ├── ratelimit.py ├── sentry.py └── test │ ├── __init__.py │ ├── test_cache.py │ ├── test_mail.py │ └── test_metrics.py ├── conftest.py ├── docs ├── Makefile ├── _static │ └── .gitkeep ├── cache.rst ├── conf.py ├── flask.rst ├── index.rst ├── mail.rst ├── metrics.rst ├── musicbrainz_db │ ├── artist.rst │ ├── editor.rst │ ├── event.rst │ ├── index.rst │ ├── label.rst │ ├── place.rst │ ├── recording.rst │ ├── release.rst │ ├── release_group.rst │ └── work.rst ├── ratelimit.rst └── requirements.txt ├── pylintrc ├── pyproject.toml ├── pytest.ini ├── requirements.txt ├── requirements_dev.txt ├── test.sh └── test ├── Dockerfile ├── docker-compose.yml └── musicbrainz_db ├── Dockerfile ├── README.md └── scripts ├── create_test_db.sh ├── createdb.sh └── fetch-dump.sh /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | template: | 2 | ## What’s Changed 3 | 4 | $CHANGES -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | - name: Login to Docker Hub 19 | run: echo ${{ secrets.DOCKER_HUB_PASSWORD }} | docker login -u ${{ secrets.DOCKER_HUB_USERNAME }} --password-stdin 20 | continue-on-error: true 21 | 22 | - name: Pull docker images 23 | run: docker compose -f test/docker-compose.yml pull 24 | 25 | - name: Build the Docker image 26 | run: docker compose -f test/docker-compose.yml -p brainzutils_test build 27 | 28 | - name: Bring up dependencies 29 | run: docker compose -f test/docker-compose.yml -p brainzutils_test up -d redis musicbrainz_db 30 | 31 | - name: Run tests 32 | run: docker compose -f test/docker-compose.yml -p brainzutils_test run --rm test 33 | 34 | - name: Bring down containers 35 | run: docker compose -f test/docker-compose.yml -p brainzutils_test down 36 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | # branches to consider in the event; optional, defaults to all 6 | branches: 7 | - master 8 | 9 | jobs: 10 | update_release_draft: 11 | runs-on: ubuntu-latest 12 | steps: 13 | # Drafts your next Release notes as Pull Requests are merged into "master" 14 | - uses: release-drafter/release-drafter@v5 15 | env: 16 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # Distribution / packaging 6 | .Python 7 | env/ 8 | venv/ 9 | build/ 10 | develop-eggs/ 11 | dist/ 12 | downloads/ 13 | eggs/ 14 | .eggs/ 15 | lib/ 16 | lib64/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | *.egg-info/ 21 | .installed.cfg 22 | *.egg 23 | 24 | # Docs 25 | docs/_build 26 | 27 | # Test results 28 | htmlcov 29 | .coverage 30 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-20.04 5 | tools: 6 | python: "3.10" 7 | 8 | sphinx: 9 | configuration: docs/conf.py 10 | 11 | formats: all 12 | 13 | python: 14 | install: 15 | - requirements: docs/requirements.txt 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BrainzUtils for Python 2 | 3 | This is a package with common utilities that are used throughout MetaBrainz 4 | projects that use Python programming language. 5 | 6 | Note that v1.18.* will be the last line of releases compatible with Python 2. 7 | 8 | Please report issues at https://tickets.musicbrainz.org/browse/BU. 9 | 10 | # Development 11 | To develop BrainzUtils itself, install it locally in editable mode with development 12 | dependencies by running: `pip install --group dev -e .`. 13 | 14 | > [!NOTE] 15 | > Support for dependency groups (`--group`) was added to pip in 25.1.0. 16 | 17 | ## Usage 18 | 19 | You can include this line into a `requirements.txt` file: 20 | 21 | git+https://github.com/metabrainz/brainzutils-python.git@ 22 | 23 | Replace `` with the tag that you want to reference. 24 | See https://github.com/metabrainz/brainzutils-python/releases. 25 | 26 | ## Release process 27 | 28 | For this project we are using [semantic versioning](http://semver.org/). If 29 | you want to make a new release: 30 | 31 | 1. Create a new tag in git using the following format: `v..`. 32 | 33 | git tag v1.x.0 34 | git push --tags 35 | 36 | 2. Create a release on GitHub based on that tag. Specify changes that were made. 37 | https://github.com/metabrainz/brainzutils-python/releases/new 38 | 39 | When updating underlying dependencies keep in mind breaking changes that they 40 | might have. Update version of `brainzutils-python` accordingly. 41 | 42 | ## License 43 | 44 | ``` 45 | brainzutils - Python utilities for MetaBrainz projects 46 | Copyright (C) 2018 MetaBrainz Foundation Inc. 47 | 48 | This program is free software; you can redistribute it and/or modify 49 | it under the terms of the GNU General Public License as published by 50 | the Free Software Foundation; either version 2 of the License, or 51 | (at your option) any later version. 52 | 53 | This program is distributed in the hope that it will be useful, 54 | but WITHOUT ANY WARRANTY; without even the implied warranty of 55 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 56 | GNU General Public License for more details. 57 | 58 | You should have received a copy of the GNU General Public License along 59 | with this program; if not, write to the Free Software Foundation, Inc., 60 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 61 | ``` 62 | -------------------------------------------------------------------------------- /brainzutils/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import version, PackageNotFoundError 2 | 3 | try: 4 | __version__ = version(__name__) 5 | except PackageNotFoundError: 6 | # package is not installed 7 | __version__ = "unknown" 8 | -------------------------------------------------------------------------------- /brainzutils/cache.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=invalid-name 2 | """ 3 | This module serves as an interface for Redis. 4 | 5 | The module needs to be initialized before use! See :meth:`init()`. 6 | 7 | It basically is a wrapper for redis package with additional 8 | functionality and tweaks specific to serve our needs. 9 | 10 | There's also support for namespacing, which simplifies management of different 11 | versions of data saved in the cache. 12 | 13 | More information about Redis can be found at http://redis.io/. 14 | """ 15 | import builtins 16 | import os 17 | import socket 18 | from functools import wraps 19 | import datetime 20 | import re 21 | from typing import Optional 22 | 23 | import redis 24 | import msgpack 25 | 26 | 27 | _r: redis.StrictRedis = None 28 | _glob_namespace: str = None 29 | 30 | NS_REGEX = re.compile('[a-zA-Z0-9_-]+$') 31 | CONTENT_ENCODING = "utf-8" 32 | ENCODING_ASCII = "ascii" 33 | 34 | 35 | def init(host: str = "localhost", port: int = 6379, db_number: int = 0, 36 | namespace: str = "", client_name: str = None): 37 | """Initializes Redis client. Needs to be called before use. 38 | 39 | Namespace versions are stored in a local directory. 40 | 41 | Args: 42 | host: Redis server hostname. 43 | port: Redis port. 44 | db_number: Redis database number. 45 | namespace: Global namespace that will be prepended to all keys. 46 | client_name: The client name to assign to the redis connection. This value is used to identify which clients 47 | are connected to a server, and is only used for debugging purposes. 48 | """ 49 | 50 | # The first priority in setting the client name is to set the user specified 51 | # client_name as this can come in handy during testing and development. Otherwise, 52 | # we use CONTAINER_NAME environment variable, this is always set in production. 53 | # Finally, we fall back to the host name, not as informative as the container name 54 | # but something is better than nothing. 55 | if client_name is None: 56 | client_name = os.getenv("CONTAINER_NAME", None) 57 | if client_name is None: 58 | client_name = socket.gethostname() 59 | 60 | global _r, _glob_namespace 61 | _r = redis.StrictRedis( 62 | host=host, 63 | port=port, 64 | db=db_number, 65 | client_name=client_name 66 | ) 67 | 68 | _glob_namespace = namespace + ":" 69 | 70 | 71 | def init_required(f): 72 | @wraps(f) 73 | def decorated(*args, **kwargs): 74 | if not _r: 75 | raise RuntimeError("Cache module needs to be initialized before " 76 | "use! See documentation for more info.") 77 | return f(*args, **kwargs) 78 | 79 | return decorated 80 | 81 | 82 | # pylint: disable=redefined-builtin 83 | @init_required 84 | def set(key, val, expirein, namespace=None, encode=True): 85 | """Set a key to a given value. 86 | 87 | Args: 88 | key (str): Key of the item. 89 | val: Item's value. 90 | expirein (int): The time after which this value should expire, in seconds. 91 | namespace (str): Optional namespace in which key needs to be defined. 92 | encode: True if the value should be encoded with msgpack, False otherwise 93 | 94 | Returns: 95 | True if stored successfully. 96 | """ 97 | # Note that both key and value are encoded before insertion. 98 | return set_many( 99 | mapping={key: val}, 100 | expirein=expirein, 101 | namespace=namespace, 102 | encode=encode 103 | ) 104 | 105 | 106 | @init_required 107 | def get(key, namespace=None, decode=True): 108 | """Retrieve an item. 109 | 110 | Args: 111 | key: Key of the item that needs to be retrieved. 112 | namespace: Optional namespace in which key was defined. 113 | decode (bool): True if value should be decoded with msgpack, False otherwise 114 | 115 | Returns: 116 | Stored value or None if it's not found. 117 | """ 118 | # Note that key is encoded before retrieval request. 119 | return get_many([key], namespace, decode).get(key) 120 | 121 | 122 | @init_required 123 | def delete(key, namespace=None): 124 | """Delete an item. 125 | 126 | Args: 127 | key: Key of the item that needs to be deleted. 128 | namespace: Optional namespace in which key was defined. 129 | 130 | Returns: 131 | Number of keys that were deleted. 132 | """ 133 | # Note that key is encoded before deletion request. 134 | return delete_many([key], namespace) 135 | 136 | 137 | @init_required 138 | def expire(key, expirein, namespace=None): 139 | """Set the expiration time for an item 140 | 141 | Args: 142 | key: Key of the item that needs to be deleted. 143 | expirein: the number of seconds after which the item should expire 144 | namespace: Optional namespace in which key was defined. 145 | 146 | Returns: 147 | True if the timeout was set, False otherwise 148 | """ 149 | # Note that key is encoded before deletion request. 150 | return _r.pexpire(_prep_key(key, namespace), expirein * 1000) 151 | 152 | 153 | @init_required 154 | def expireat(key, timeat, namespace=None): 155 | """Set the absolute expiration time for an item 156 | 157 | Args: 158 | key: Key of the item that needs to be deleted. 159 | timeat: the number of seconds since the epoch when the item should expire 160 | namespace: Optional namespace in which key was defined. 161 | 162 | Returns: 163 | True if the timeout was set, False otherwise 164 | """ 165 | # Note that key is encoded before deletion request. 166 | return _r.pexpireat(_prep_key(key, namespace), timeat * 1000) 167 | 168 | 169 | @init_required 170 | def set_many(mapping, expirein, namespace=None, encode=True): 171 | """Set multiple keys doing just one query. 172 | 173 | Args: 174 | mapping (dict): A dict of key/value pairs to set. 175 | expirein (int): The time after which this value should expire, in seconds. 176 | namespace (str): Namespace for the keys. 177 | encode: True if the values should be encoded with msgpack, False otherwise 178 | 179 | Returns: 180 | True on success. 181 | """ 182 | # TODO: Fix return value 183 | result = _r.mset(_prep_dict(mapping, namespace, encode)) 184 | if expirein: 185 | for key in list(mapping.keys()): 186 | expire(key, expirein, namespace) 187 | 188 | return result 189 | 190 | 191 | @init_required 192 | def get_many(keys, namespace=None, decode=True): 193 | """Retrieve multiple keys doing just one query. 194 | 195 | Args: 196 | keys (list): List of keys that need to be retrieved. 197 | namespace (str): Namespace for the keys. 198 | decode (bool): True if values should be decoded with msgpack, False otherwise 199 | 200 | Returns: 201 | A dictionary of key/value pairs that were available. 202 | """ 203 | result = {} 204 | for i, value in enumerate(_r.mget(_prep_keys_list(keys, namespace))): 205 | result[keys[i]] = _decode_val(value) if decode else value 206 | return result 207 | 208 | 209 | @init_required 210 | def delete_many(keys, namespace=None): 211 | """Delete multiple keys. 212 | 213 | Returns: 214 | Number of keys that were deleted. 215 | """ 216 | return _r.delete(*_prep_keys_list(keys, namespace)) 217 | 218 | 219 | @init_required 220 | def increment(key, amount=1, namespace=None): 221 | """ Increment the value for given key using the INCR command. 222 | 223 | Args: 224 | key: Key of the item that needs to be incremented 225 | amount: the amount to increment the value by 226 | namespace: Namespace for the key 227 | 228 | Returns: 229 | An integer equal to the value after increment 230 | """ 231 | return _r.incr(_prep_keys_list([key], namespace)[0], amount=amount) 232 | 233 | 234 | @init_required 235 | def hincrby(name, key, amount, namespace=None): 236 | """Increment a hashes key by a given amount using HINCRBY 237 | 238 | Args: 239 | name: Name of the hash 240 | key: Key of the item in the hash to increment 241 | amount: the number to increment the key by 242 | namespace: Namespace for the name 243 | 244 | Returns: 245 | An integer equal to the value after increment 246 | """ 247 | return _r.hincrby(_prep_keys_list([name], namespace)[0], key, amount) 248 | 249 | 250 | @init_required 251 | def hgetall(name, namespace=None): 252 | """Get all keys and values for a hash using HGETALL 253 | 254 | Args: 255 | name: Name of the hash 256 | namespace: Namespace for the name 257 | 258 | Returns: 259 | A dictionary of {key: value} items for all keys in the hash 260 | """ 261 | return _r.hgetall(_prep_keys_list([name], namespace)[0]) 262 | 263 | 264 | @init_required 265 | def hkeys(name, namespace=None): 266 | """Get all keys for a hash using HKEYS 267 | 268 | Args: 269 | name: Name of the hash 270 | namespace: Namespace for the name 271 | 272 | Returns: 273 | A list of [key] values for all keys in the hash 274 | """ 275 | return _r.hkeys(_prep_keys_list([name], namespace)[0]) 276 | 277 | 278 | @init_required 279 | def hset(name, key, value, namespace=None): 280 | """Delete the specified keys from a hash using HDEL. 281 | Note that the ``keys`` argument must be a list. This differs from the underlying redis 282 | library's version of this command, which takes varargs. 283 | 284 | Args: 285 | name: Name of the hash 286 | key: Key of the item in the hash to set 287 | value: value to set the item to 288 | namespace: Namespace for the name 289 | 290 | Returns: 291 | the number of keys deleted from the hash 292 | """ 293 | return _r.hset(_prep_keys_list([name], namespace)[0], key, value) 294 | 295 | 296 | @init_required 297 | def hdel(name, keys, namespace=None): 298 | """Delete the specified keys from a hash using HDEL. 299 | Note that the ``keys`` argument must be a list. This differs from the underlying redis 300 | library's version of this command, which takes varargs. 301 | 302 | Args: 303 | name: Name of the hash 304 | keys: a list of the keys to delete from the has 305 | namespace: Namespace for the name 306 | 307 | Returns: 308 | the number of keys deleted from the hash 309 | """ 310 | if not isinstance(keys, list): 311 | keys = [keys] 312 | return _r.hdel(_prep_keys_list([name], namespace)[0], *keys) 313 | 314 | 315 | @init_required 316 | def sadd(name, keys, expirein, encode=True, namespace=None): 317 | """Add the specified keys to the set stored at name using SADD 318 | Note that it is not possible to expire a single value stored in a set. The ``expirein`` 319 | argument will set the expiration period of the entire set stored at ``name``. Therefore, 320 | any additions to a set will reset its expiry to the value of ``expirein`` passed in 321 | last call. 322 | Args: 323 | name: Name of the set 324 | keys: keys to add to the set 325 | expirein: the number of seconds after which the item should expire 326 | namespace: namespace for the name 327 | encode: True if the value should be encoded with msgpack, False otherwise 328 | 329 | Returns: 330 | the number of elements that were added to the set, not including all the elements already present into the set. 331 | """ 332 | prepared_name = _prep_key(name, namespace) 333 | if not isinstance(keys, list) and not isinstance(keys, builtins.set): 334 | keys = {keys} 335 | 336 | if encode: 337 | keys = {_encode_val(key) for key in keys} 338 | 339 | result = _r.sadd(prepared_name, *keys) 340 | expire(name, expirein, namespace) 341 | return result 342 | 343 | 344 | @init_required 345 | def smembers(name, decode=True, namespace=None): 346 | """Returns all the members of the set value stored at name. 347 | Args: 348 | name: Name of the set 349 | decode: True if value should be decoded with msgpack, False otherwise 350 | namespace: namespace for the name 351 | 352 | Returns: 353 | all members of the set 354 | """ 355 | keys = _r.smembers(_prep_key(name, namespace)) 356 | if decode: 357 | keys = {_decode_val(key) for key in keys} 358 | return keys 359 | 360 | 361 | @init_required 362 | def flush_all(): 363 | _r.flushdb() 364 | 365 | 366 | def gen_key(key, *attributes): 367 | """Helper function that generates a key with attached attributes. 368 | 369 | Args: 370 | key: Original key. 371 | attributes: Attributes that will be appended a key. 372 | 373 | Returns: 374 | Key that can be used with cache. 375 | """ 376 | if not isinstance(key, str): 377 | key = str(key) 378 | key = key.encode(ENCODING_ASCII, errors='xmlcharrefreplace').decode(ENCODING_ASCII) 379 | 380 | for attr in attributes: 381 | if not isinstance(attr, str): 382 | attr = str(attr) 383 | key += '_' + attr.encode(ENCODING_ASCII, errors='xmlcharrefreplace').decode(ENCODING_ASCII) 384 | 385 | key = key.replace(' ', '_') # spaces are not allowed 386 | 387 | return key 388 | 389 | 390 | def _prep_dict(dictionary, namespace=None, encode=True): 391 | """Wrapper for _prep_key and _encode_val functions that works with dictionaries.""" 392 | return {_prep_key(key, namespace): _encode_val(value) if encode else value 393 | for key, value in dictionary.items()} 394 | 395 | 396 | def _prep_key(key, namespace=None): 397 | """Prepares a key for use with Redis.""" 398 | if namespace: 399 | key = "%s:%s" % (namespace, key) 400 | if not isinstance(key, bytes): 401 | key = key.encode(ENCODING_ASCII, errors='xmlcharrefreplace').decode(ENCODING_ASCII) 402 | return _glob_namespace + key 403 | 404 | 405 | def _prep_keys_list(l, namespace=None): 406 | """Wrapper for _prep_key function that works with lists. 407 | 408 | Returns: 409 | Prepared keys in the same order. 410 | """ 411 | return [_prep_key(k, namespace) for k in l] 412 | 413 | 414 | def _encode_val(value): 415 | if value is None: 416 | return value 417 | return msgpack.packb(value, use_bin_type=True, default=_msgpack_default) 418 | 419 | 420 | def _decode_val(value): 421 | if value is None: 422 | return value 423 | return msgpack.unpackb(value, raw=False, ext_hook=_msgpack_ext_hook) 424 | 425 | 426 | ############ 427 | # NAMESPACES 428 | ############ 429 | 430 | def validate_namespace(namespace): 431 | """Checks that namespace value is supported.""" 432 | if not NS_REGEX.match(namespace): 433 | raise ValueError("Invalid namespace. Must match regex /[a-zA-Z0-9_-]+$/.") 434 | 435 | 436 | ###################### 437 | # CUSTOM SERIALIZATION 438 | ###################### 439 | 440 | TYPE_DATETIME_CODE = 1 441 | 442 | 443 | def _msgpack_default(obj): 444 | if isinstance(obj, datetime.datetime): 445 | return msgpack.ExtType(TYPE_DATETIME_CODE, obj.isoformat().encode(CONTENT_ENCODING)) 446 | raise TypeError("Unknown type: %r" % (obj,)) 447 | 448 | 449 | def _msgpack_ext_hook(code, data): 450 | if code == TYPE_DATETIME_CODE: 451 | return datetime.datetime.fromisoformat(data.decode(CONTENT_ENCODING)) 452 | return msgpack.ExtType(code, data) 453 | -------------------------------------------------------------------------------- /brainzutils/flask/__init__.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask_debugtoolbar import DebugToolbarExtension 3 | 4 | 5 | class CustomFlask(Flask): 6 | """Custom version of Flask with our bells and whistles.""" 7 | 8 | def __init__(self, import_name, config_file=None, debug=None, 9 | *args, **kwargs): 10 | """Create an instance of Flask app. 11 | 12 | See original documentation for Flask. 13 | 14 | Arguments: 15 | import_name (str): Name of the application package. 16 | config_file (str): Path to a config file that needs to be loaded. 17 | Should be in a form of Python module. 18 | debug (bool): Override debug value. 19 | """ 20 | super(CustomFlask, self).__init__(import_name, *args, **kwargs) 21 | if config_file: 22 | self.config.from_pyfile(config_file) 23 | if debug is not None: 24 | self.debug = debug 25 | 26 | def init_debug_toolbar(self): 27 | """This method initializes the Flask-Debug extension toolbar for the 28 | Flask app. 29 | 30 | Note that the Flask-Debug extension requires app.debug be true 31 | and the SECRET_KEY be defined in app.config. 32 | """ 33 | if self.debug: 34 | DebugToolbarExtension(self) 35 | -------------------------------------------------------------------------------- /brainzutils/flask/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metabrainz/brainzutils-python/bf01c6da15d4a2426d64a31cf232c06bec3860f3/brainzutils/flask/test/__init__.py -------------------------------------------------------------------------------- /brainzutils/flask/test/test_main.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from brainzutils import flask 4 | 5 | class FlaskTestCase(unittest.TestCase): 6 | 7 | def test_create_app(self): 8 | app = flask.CustomFlask(__name__) 9 | self.assertIsNotNone(app) 10 | 11 | def test_debug_toolbar(self): 12 | """ Tests that debug toolbar loads if initialized correctly 13 | """ 14 | 15 | # create an app 16 | app = flask.CustomFlask(__name__) 17 | self.assertIsNotNone(app) 18 | app.debug = True 19 | app.config['SECRET_KEY'] = 'this is a totally secret key btw' 20 | app.init_debug_toolbar() 21 | 22 | # add a dummy route 23 | @app.route('/') 24 | def index(): 25 | return 'test' 26 | 27 | client = app.test_client() 28 | response = client.get('/') 29 | self.assertEqual(response.status_code, 200) 30 | self.assertIn('flDebug', str(response.data)) 31 | -------------------------------------------------------------------------------- /brainzutils/flask/test/test_ratelimit.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | from time import sleep 4 | 5 | from brainzutils import flask, cache 6 | from brainzutils.ratelimit import ratelimit, set_rate_limits, inject_x_rate_headers, set_user_validation_function 7 | 8 | valid_user = "41FB6EEB-636B-4F7C-B376-3A8613F1E69A" 9 | def validate_user(user): 10 | if user == valid_user: 11 | return True 12 | return False 13 | 14 | class RatelimitTestCase(unittest.TestCase): 15 | 16 | host = os.environ.get("REDIS_HOST", "localhost") 17 | port = 6379 18 | namespace = "NS_TEST" 19 | max_ip_requests = 3 20 | max_token_requests = 5 21 | ratelimit_window = 10 22 | 23 | def setUp(self): 24 | cache.init( 25 | host=self.host, 26 | port=self.port, 27 | namespace=self.namespace, 28 | ) 29 | # Making sure there are no items in cache before we run each test 30 | cache.flush_all() 31 | 32 | def test_create_app(self): 33 | app = flask.CustomFlask(__name__) 34 | self.assertIsNotNone(app) 35 | 36 | def test_ratelimit(self): 37 | """ Tests that the ratelimit decorator works 38 | """ 39 | 40 | # Set the limits as per defines in this class 41 | set_rate_limits(self.max_token_requests, self.max_ip_requests, self.ratelimit_window) 42 | 43 | # create an app 44 | app = flask.CustomFlask(__name__) 45 | self.assertIsNotNone(app) 46 | app.debug = True 47 | app.config['SECRET_KEY'] = 'this is a totally secret key btw' 48 | app.init_debug_toolbar() 49 | 50 | @app.after_request 51 | def after_request_callbacks(response): 52 | return inject_x_rate_headers(response) 53 | 54 | # add a dummy route 55 | @app.route('/') 56 | @ratelimit() 57 | def index(): 58 | return 'test' 59 | 60 | def print_headers(response): 61 | print("X-RateLimit-Remaining", response.headers['X-RateLimit-Remaining']) 62 | print("X-RateLimit-Limit", response.headers['X-RateLimit-Limit']) 63 | print("X-RateLimit-Reset", response.headers['X-RateLimit-Reset']) 64 | print("X-RateLimit-Reset-In", response.headers['X-RateLimit-Reset-In']) 65 | print() 66 | 67 | 68 | def make_requests(client, nominal_num_requests, token = None): 69 | 70 | print("===== make %d requests" % nominal_num_requests) 71 | # make one more than the allowed number of requests to catch the 429 72 | num_requests = nominal_num_requests + 1 73 | 74 | # make a specified number of requests 75 | while True: 76 | reset_time = 0 77 | restart = False 78 | for i in range(num_requests): 79 | if token: 80 | response = client.get('/', headers={'Authorization': token}) 81 | else: 82 | response = client.get('/') 83 | if reset_time == 0: 84 | reset_time = response.headers['X-RateLimit-Reset'] 85 | 86 | if reset_time != response.headers['X-RateLimit-Reset']: 87 | # Whoops, we didn't get our tests done before the window expired. start over. 88 | restart = True 89 | 90 | # when restarting we need to do one request less, since the current requests counts to the new window 91 | num_requests = nominal_num_requests 92 | break 93 | 94 | if i == num_requests - 1: 95 | self.assertEqual(response.status_code, 429) 96 | else: 97 | self.assertEqual(response.status_code, 200) 98 | self.assertEqual(int(response.headers['X-RateLimit-Remaining']), num_requests - i - 2) 99 | print_headers(response) 100 | 101 | sleep(1.1) 102 | 103 | if not restart: 104 | break 105 | 106 | client = app.test_client() 107 | 108 | # Make a pile of requests based on IP address 109 | make_requests(client, self.max_ip_requests) 110 | 111 | # Set a user token and make requests based on the token 112 | cache.flush_all() 113 | set_user_validation_function(validate_user) 114 | set_rate_limits(self.max_token_requests, self.max_ip_requests, self.ratelimit_window) 115 | make_requests(client, self.max_token_requests, token="Token %s" % valid_user) 116 | -------------------------------------------------------------------------------- /brainzutils/mail.py: -------------------------------------------------------------------------------- 1 | """This module provides a way to send emails.""" 2 | from email.mime.application import MIMEApplication 3 | from email.mime.multipart import MIMEMultipart 4 | from email.mime.text import MIMEText 5 | from typing import List 6 | import smtplib 7 | import socket 8 | 9 | from flask import current_app 10 | 11 | 12 | def send_mail(subject: str, text: str, recipients: List[str], attachments=None, 13 | from_name="MetaBrainz Notifications", 14 | from_addr=None, boundary=None): 15 | """This function can be used as a foundation for sending email. 16 | 17 | Args: 18 | subject: Subject of the message. 19 | text: The message itself. 20 | recipients: List of recipients. 21 | attachments: List of (file object, subtype, name) tuples. For example: 22 | (, 'pdf', 'receipt.pdf'). 23 | from_name: Name of the sender. 24 | from_addr: Email address of the sender. 25 | """ 26 | if not isinstance(recipients, list): 27 | raise ValueError("recipients must be a list of email addresses") 28 | 29 | if 'SMTP_SERVER' not in current_app.config or 'SMTP_PORT' not in current_app.config: 30 | raise ValueError("Flask current_app requires config items SMTP_SERVER and SMTP_PORT to be set") 31 | 32 | if attachments is None: 33 | attachments = [] 34 | if from_addr is None: 35 | from_addr = 'noreply@' + current_app.config['MAIL_FROM_DOMAIN'] 36 | 37 | if current_app.config['TESTING']: # Not sending any emails during the testing process 38 | return 39 | 40 | if not recipients: 41 | return 42 | 43 | message = MIMEMultipart() 44 | 45 | if boundary is not None: 46 | message = MIMEMultipart(boundary=boundary) 47 | 48 | message['To'] = ", ".join(recipients) 49 | message['Subject'] = subject 50 | message['From'] = "%s <%s>" % (from_name, from_addr) 51 | message.attach(MIMEText(text, _charset='utf-8')) 52 | 53 | for attachment in attachments: 54 | file_obj, subtype, name = attachment 55 | attachment = MIMEApplication(file_obj.read(), _subtype=subtype) 56 | file_obj.close() # FIXME(roman): This feels kind of hacky. Maybe there's a better way? 57 | attachment.add_header('content-disposition', 'attachment', filename=name) 58 | message.attach(attachment) 59 | try: 60 | smtp_server = smtplib.SMTP(current_app.config['SMTP_SERVER'], current_app.config['SMTP_PORT']) 61 | except (socket.error, smtplib.SMTPException) as e: 62 | current_app.logger.error('Error while sending email: %s', e, exc_info=True) 63 | raise MailException(e) 64 | smtp_server.sendmail(from_addr, recipients, message.as_string()) 65 | smtp_server.quit() 66 | 67 | 68 | class MailException(Exception): 69 | pass 70 | -------------------------------------------------------------------------------- /brainzutils/metrics.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | import os 3 | import socket 4 | import logging 5 | from time import time_ns 6 | from typing import Dict 7 | 8 | from brainzutils import cache 9 | 10 | REDIS_METRICS_KEY = "metrics:influx_data" 11 | _metrics_project_name = None 12 | 13 | 14 | def init(project): 15 | global _metrics_project_name 16 | _metrics_project_name = project 17 | 18 | 19 | def metrics_init_required(f): 20 | @wraps(f) 21 | def decorated(*args, **kwargs): 22 | if not _metrics_project_name: 23 | raise RuntimeError("Metrics module needs to be initialized before use") 24 | return f(*args, **kwargs) 25 | return decorated 26 | 27 | 28 | @cache.init_required 29 | @metrics_init_required 30 | def set(metric_name: str, tags: Dict[str, str] = None, timestamp: int = None, **fields): 31 | """ 32 | Submit a metric to be read by the MetaBrainz influx datastore for graphing/monitoring 33 | purposes. These metrics are stored in redis in the influxdb line protocol format: 34 | https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/ 35 | 36 | Args: 37 | metric_name: The name of the metric to record. 38 | tags: Additional influx tags to write with the metric. (optional) 39 | timestamp: A nanosecond timestamp to use for this metric. If not provided 40 | the current time is used. 41 | fields: The key, value pairs to store with this metric. 42 | """ 43 | 44 | # Add types to influx data 45 | try: 46 | host = os.environ['PRIVATE_IP'] 47 | except KeyError: 48 | host = socket.gethostname() 49 | 50 | if tags is None: 51 | tags = {} 52 | 53 | tags["dc"] = "hetzner" 54 | tags["server"] = host 55 | tags["project"] = _metrics_project_name 56 | tag_string = ",".join([ "%s=%s" % (k, v) for k, v in tags.items() ]) 57 | 58 | fields_list = [] 59 | for k, v in fields.items(): 60 | if type(v) == int: 61 | fields_list.append("%s=%di" % (k, v)) 62 | elif type(v) == float: 63 | fields_list.append('%s=%f' % (k, v)) 64 | elif type(v) == bool: 65 | val = "t" if v else "f" 66 | fields_list.append("%s=%s" % (k, val)) 67 | elif type(fields[k]) == str: 68 | fields_list.append('%s="%s"' % (k, v)) 69 | else: 70 | fields_list.append("%s=%s" % (k, str(v))) 71 | 72 | fields = ",".join(fields_list) 73 | 74 | if timestamp is None: 75 | timestamp = time_ns() 76 | 77 | metric = "%s,%s %s %d" % (metric_name, tag_string, fields, timestamp) 78 | try: 79 | cache._r.rpush(REDIS_METRICS_KEY, metric) 80 | except Exception: 81 | logging.error("Cannot set redis metric:", exc_info=True) 82 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/__init__.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from sqlalchemy import create_engine 3 | from sqlalchemy.orm import sessionmaker, scoped_session, Session 4 | from sqlalchemy.pool import NullPool 5 | 6 | 7 | engine = None 8 | 9 | 10 | def init_db_engine(connect_str): 11 | global engine, Session 12 | engine = create_engine(connect_str, poolclass=NullPool) 13 | Session = scoped_session( 14 | sessionmaker(bind=engine) 15 | ) 16 | 17 | 18 | @contextmanager 19 | def mb_session(): 20 | session = Session() 21 | try: 22 | yield session 23 | finally: 24 | session.close() 25 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/artist.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from sqlalchemy.orm import joinedload 3 | from mbdata import models 4 | from brainzutils.musicbrainz_db import mb_session 5 | from brainzutils.musicbrainz_db.helpers import get_relationship_info 6 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids 7 | from brainzutils.musicbrainz_db.serialize import serialize_artists 8 | from brainzutils.musicbrainz_db.includes import check_includes 9 | 10 | 11 | def get_artist_by_mbid(mbid, includes=None): 12 | """Get artist with MusicBrainz ID. 13 | Args: 14 | mbid (uuid): MBID(gid) of the artist. 15 | includes (list): List of values to be included. 16 | For list of possible values see includes.py. 17 | Returns: 18 | Dictionary containing the artist information, or None if the artist doesn't exist. 19 | """ 20 | if includes is None: 21 | includes = [] 22 | 23 | return fetch_multiple_artists( 24 | [mbid], 25 | includes=includes, 26 | ).get(mbid) 27 | 28 | 29 | def fetch_multiple_artists(mbids, includes=None): 30 | """Get info related to multiple artists using their MusicBrainz IDs. 31 | Args: 32 | mbids (list): List of MBIDs of artists. 33 | includes (list): List of information to be included. 34 | Returns: 35 | A dictionary containing info of multiple artists keyed by their MBID. 36 | If an MBID doesn't exist in the database, it isn't returned. 37 | If an MBID is a redirect, the dictionary key will be the MBID given as an argument, 38 | but the returned object will contain the new MBID in the 'mbid' key. 39 | """ 40 | 41 | if includes is None: 42 | includes = [] 43 | includes_data = defaultdict(dict) 44 | check_includes('artist', includes) 45 | 46 | with mb_session() as db: 47 | query = db.query(models.Artist).options(joinedload(models.Artist.type)) 48 | 49 | artists = get_entities_by_gids( 50 | query=query, 51 | entity_type='artist', 52 | mbids=mbids, 53 | ) 54 | 55 | artist_ids = [artist.id for artist in artists.values()] 56 | 57 | if 'artist-rels' in includes: 58 | get_relationship_info( 59 | db=db, 60 | target_type='artist', 61 | source_type='artist', 62 | source_entity_ids=artist_ids, 63 | includes_data=includes_data, 64 | ) 65 | if 'url-rels' in includes: 66 | get_relationship_info( 67 | db=db, 68 | target_type='url', 69 | source_type='artist', 70 | source_entity_ids=artist_ids, 71 | includes_data=includes_data, 72 | ) 73 | 74 | artists = {str(mbid): serialize_artists(artist, includes_data[artist.id]) for mbid, artist in artists.items()} 75 | return artists 76 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/editor.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from sqlalchemy.orm import joinedload 3 | from mbdata import models 4 | from brainzutils.musicbrainz_db import mb_session 5 | from brainzutils.musicbrainz_db.utils import get_entities_by_ids 6 | from brainzutils.musicbrainz_db.serialize import serialize_editor 7 | from brainzutils.musicbrainz_db.includes import check_includes 8 | 9 | 10 | def get_editor_by_id(editor_id, includes=None): 11 | """Get editor with editor ID. 12 | Args: 13 | editor_id (int): ID of the editor. 14 | Returns: 15 | Dictionary containing the editor information 16 | """ 17 | if includes is None: 18 | includes = [] 19 | 20 | return fetch_multiple_editors( 21 | [editor_id], 22 | includes=includes, 23 | ).get(editor_id) 24 | 25 | 26 | def fetch_multiple_editors(editor_ids, includes=None): 27 | """Get info related to multiple editors using their editor IDs. 28 | Args: 29 | editor_ids (list): List of IDs of editors. 30 | includes (list): List of information to be included. 31 | Returns: 32 | Dictionary containing info of multiple editors keyed by their editor_id. 33 | """ 34 | if includes is None: 35 | includes = [] 36 | 37 | includes_data = defaultdict(dict) 38 | check_includes('editor', includes) 39 | with mb_session() as db: 40 | query = db.query(models.Editor) 41 | editors = get_entities_by_ids( 42 | query=query, 43 | entity_type='editor', 44 | ids=editor_ids, 45 | ) 46 | editor_ids = [editor.id for editor in editors.values()] 47 | editors = {editor_id: serialize_editor(editors[editor_id], includes_data) for editor_id in editor_ids} 48 | 49 | return editors 50 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/event.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from typing import List 3 | from uuid import UUID 4 | 5 | from mbdata import models 6 | from sqlalchemy import or_, nullslast 7 | from sqlalchemy.orm import contains_eager, joinedload 8 | 9 | from brainzutils.musicbrainz_db import mb_session 10 | import brainzutils.musicbrainz_db.exceptions as mb_exceptions 11 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids 12 | from brainzutils.musicbrainz_db.includes import check_includes 13 | from brainzutils.musicbrainz_db.serialize import serialize_events 14 | from brainzutils.musicbrainz_db.helpers import get_relationship_info 15 | 16 | def get_mapped_event_types(event_types: list) -> list: 17 | """ Get event types mapped to their case sensitive name in musicbrainz. 18 | event_type table in the database. 19 | 20 | Args: 21 | event_types (list): List of event types. 22 | Returns: 23 | List of mapped event types. 24 | 25 | """ 26 | event_types = [event_type.lower() for event_type in event_types] 27 | mapped_event_types = [] 28 | with mb_session() as db: 29 | supported_types = [event_type.name for event_type in db.query(models.EventType).all()] 30 | event_type_mapping = {supported_type.lower(): supported_type for supported_type in supported_types} 31 | 32 | for event_type in event_types: 33 | if event_type in event_type_mapping: 34 | mapped_event_types.append(event_type_mapping[event_type]) 35 | else: 36 | raise mb_exceptions.InvalidTypeError("Bad event_type: {etype} is not supported".format(etype = event_type)) 37 | 38 | return mapped_event_types 39 | 40 | 41 | def get_event_by_mbid(mbid, includes=None): 42 | """Get event with the MusicBrainz ID. 43 | 44 | Args: 45 | mbid (uuid): MBID(gid) of the event. 46 | Returns: 47 | Dictionary containing the event information, or None if the event doesn't exist. 48 | """ 49 | if includes is None: 50 | includes = [] 51 | 52 | return fetch_multiple_events( 53 | [mbid], 54 | includes=includes, 55 | ).get(mbid) 56 | 57 | 58 | def fetch_multiple_events(mbids, includes=None): 59 | """Get info related to multiple events using their MusicBrainz IDs. 60 | 61 | Args: 62 | mbids (list): List of MBIDs of events. 63 | includes (list): List of information to be included. 64 | 65 | Returns: 66 | A dictionary containing info of multiple events keyed by their MBID. 67 | If an MBID doesn't exist in the database, it isn't returned. 68 | If an MBID is a redirect, the dictionary key will be the MBID given as an argument, 69 | but the returned object will contain the new MBID in the 'mbid' key. 70 | """ 71 | if includes is None: 72 | includes = [] 73 | includes_data = defaultdict(dict) 74 | check_includes('event', includes) 75 | with mb_session() as db: 76 | query = db.query(models.Event).options(joinedload(models.Event.type)) 77 | events = get_entities_by_gids( 78 | query=query, 79 | entity_type='event', 80 | mbids=mbids, 81 | ) 82 | event_ids = [event.id for event in events.values()] 83 | 84 | if 'artist-rels' in includes: 85 | get_relationship_info( 86 | db=db, 87 | target_type='artist', 88 | source_type='event', 89 | source_entity_ids=event_ids, 90 | includes_data=includes_data, 91 | ) 92 | if 'place-rels' in includes: 93 | get_relationship_info( 94 | db=db, 95 | target_type='place', 96 | source_type='event', 97 | source_entity_ids=event_ids, 98 | includes_data=includes_data, 99 | ) 100 | if 'series-rels' in includes: 101 | get_relationship_info( 102 | db=db, 103 | target_type='series', 104 | source_type='event', 105 | source_entity_ids=event_ids, 106 | includes_data=includes_data, 107 | ) 108 | if 'url-rels' in includes: 109 | get_relationship_info( 110 | db=db, 111 | target_type='url', 112 | source_type='event', 113 | source_entity_ids=event_ids, 114 | includes_data=includes_data, 115 | ) 116 | if 'release-group-rels' in includes: 117 | get_relationship_info( 118 | db=db, 119 | target_type='release_group', 120 | source_type='event', 121 | source_entity_ids=event_ids, 122 | includes_data=includes_data, 123 | ) 124 | 125 | return {str(mbid): serialize_events(event, includes_data[event.id]) for mbid, event in events.items()} 126 | 127 | 128 | def get_events_for_place(place_id: UUID, event_types: List[str] = [], include_null_type: bool = True, limit: int = None, offset: int = None) -> tuple: 129 | """Get all events that occurred at a place. 130 | 131 | Args: 132 | place_id: MBID of the place. 133 | event_types: List of types of events to be fetched. The supported event_types are 134 | 'Concert', 'Festival', 'Convention/Expo', 'Launch event', 'Award ceremony', 'Stage performance', and 'Masterclass/Clinic'. 135 | include_null_type: Whether to include events with no type. 136 | limit: Max number of events to return. 137 | offset: Offset that can be used in conjunction with the limit. 138 | 139 | Returns: 140 | Tuple containing the list of dictionaries of events and the total count of the events. 141 | The list of dictionaries of events is ordered by event begin year, begin month, begin date 142 | begin time, and begin name. In case one of these is set to NULL, it will be ordered last. 143 | """ 144 | 145 | place_id = str(place_id) 146 | event_types = get_mapped_event_types(event_types) 147 | 148 | with mb_session() as db: 149 | event_query = db.query(models.Event).outerjoin(models.EventType).\ 150 | options(contains_eager(models.Event.type)).\ 151 | join(models.LinkEventPlace, models.Event.id == models.LinkEventPlace.entity0_id).\ 152 | join(models.Place, models.LinkEventPlace.entity1_id == models.Place.id).\ 153 | filter(models.Place.gid == place_id) 154 | 155 | if include_null_type and event_types: 156 | event_query = event_query.filter(or_(models.Event.type == None, models.EventType.name.in_(event_types))) 157 | elif event_types: 158 | event_query = event_query.filter(models.EventType.name.in_(event_types)) 159 | 160 | event_query = event_query.order_by( 161 | nullslast(models.Event.begin_date_year.desc()), 162 | nullslast(models.Event.begin_date_month.desc()), 163 | nullslast(models.Event.begin_date_day.desc()), 164 | nullslast(models.Event.time.desc()), 165 | nullslast(models.Event.name.asc()) 166 | ) 167 | count = event_query.count() 168 | events = event_query.limit(limit).offset(offset).all() 169 | 170 | return ([serialize_events(event) for event in events], count) 171 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/exceptions.py: -------------------------------------------------------------------------------- 1 | class MBDatabaseException(Exception): 2 | """Base exception for all exceptions related to MusicBrainz database""" 3 | pass 4 | 5 | 6 | class InvalidTypeError(MBDatabaseException): 7 | """Exception related to wrong type in present functions""" 8 | pass 9 | 10 | 11 | class InvalidIncludeError(MBDatabaseException): 12 | """Exception related to wrong includes in present functions""" 13 | pass 14 | 15 | 16 | class NoDataFoundException(MBDatabaseException): 17 | """Exception to be raised when no data has been found""" 18 | pass 19 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/helpers.py: -------------------------------------------------------------------------------- 1 | from mbdata.utils.models import get_link_model 2 | from mbdata.models import Tag, Link 3 | from sqlalchemy.orm import joinedload 4 | from sqlalchemy import func 5 | from brainzutils.musicbrainz_db.models import ENTITY_MODELS 6 | 7 | 8 | def get_relationship_info(db, target_type, source_type, source_entity_ids, includes_data): 9 | """Get information related to relationships between different entities. 10 | 11 | Keep in mind that includes_data (dict) is altered to contain the relationship objects 12 | keyed by the source entity MBIDs. 13 | 14 | Args: 15 | db (Session object): Session object. 16 | target_type (str): Type of target entity. 17 | source_type (str): Type of source entity. 18 | source_entity_ids (list): IDs of the source entity. 19 | includes_data (dict): Dictionary containing includes data of entities. 20 | """ 21 | source_model = ENTITY_MODELS[source_type] 22 | target_model = ENTITY_MODELS[target_type] 23 | relation = get_link_model(source_model, target_model) 24 | 25 | query = db.query(relation).\ 26 | options( 27 | joinedload(relation.link, innerjoin=True). 28 | joinedload(Link.link_type, innerjoin=True) 29 | ) 30 | if relation.entity0.property.mapper.class_ == relation.entity1.property.mapper.class_: 31 | _relationship_link_helper(relation, query, "entity0", "entity1", target_type, source_entity_ids, includes_data) 32 | _relationship_link_helper(relation, query, "entity1", "entity0", target_type, source_entity_ids, includes_data) 33 | else: 34 | if source_model == relation.entity0.property.mapper.class_: 35 | _relationship_link_helper(relation, query, "entity0", "entity1", target_type, source_entity_ids, includes_data) 36 | else: 37 | _relationship_link_helper(relation, query, "entity1", "entity0", target_type, source_entity_ids, includes_data) 38 | 39 | 40 | def _relationship_link_helper(relation, query, source_attr, target_attr, target_type, source_entity_ids, includes_data): 41 | """Get relationship links between two entities. 42 | 43 | Keep in mind that includes_data (dict) is altered to contain the relationship objects 44 | keyed by the source entity MBIDs. 45 | 46 | Args: 47 | relation (mbdata.model): Model relating the two entities. 48 | query (Session.query): Query object. 49 | source_attr (str): 'entity0' or 'entity1' based on which represents source model in relation table. 50 | target_attr (str): 'entity0' or 'entity1' based on which represents target model in relation table. 51 | target_type (str): Type of the target entity. 52 | source_entity_ids (list): IDs of the source entity. 53 | includes_data (dict): Dictionary containing the includes data of entities. 54 | """ 55 | source_id_attr = source_attr + "_id" 56 | query = query.filter(getattr(relation, source_id_attr).in_(source_entity_ids)) 57 | query = query.options(joinedload(getattr(relation, target_attr), innerjoin=True)) 58 | relation_type = target_type + "-rels" 59 | for link in query: 60 | includes_data[getattr(link, source_id_attr)].setdefault('relationship_objs', {}).\ 61 | setdefault(relation_type, []).append(link) 62 | 63 | 64 | def get_tags(db, entity_model, tag_model, foreign_tag_id, entity_ids): 65 | """Get tags associated with entities. 66 | 67 | Args: 68 | db (Session object): Session object. 69 | entity_model (mbdata.models): Model of the entity. 70 | tag_model (mbdata.models): Tag of the model. 71 | foreign_tag_id (tag_model.foreign_key): Foreign ID that joins the tag model and entity model 72 | entity_ids (list): IDs of the entity whose tags are to be fetched 73 | 74 | Returns: 75 | List of tuples containing the entity_ids and the list of associated tags. 76 | """ 77 | tags = db.query(entity_model.id, func.array_agg(Tag.name)).\ 78 | join(tag_model, entity_model.id == foreign_tag_id).\ 79 | join(Tag).\ 80 | filter(entity_model.id.in_(entity_ids)).\ 81 | group_by(entity_model.id).\ 82 | all() 83 | return tags -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/includes.py: -------------------------------------------------------------------------------- 1 | import brainzutils.musicbrainz_db.exceptions as mb_exceptions 2 | 3 | 4 | RELATABLE_TYPES = [ 5 | 'area', 6 | 'artist', 7 | 'label', 8 | 'place', 9 | 'event', 10 | 'recording', 11 | 'release', 12 | 'release-group', 13 | 'series', 14 | 'url', 15 | 'work', 16 | 'instrument' 17 | ] 18 | 19 | RELATION_INCLUDES = [entity + '-rels' for entity in RELATABLE_TYPES] 20 | 21 | TAG_INCLUDES = ["tags"] 22 | 23 | VALID_INCLUDES = { 24 | 'place': ["aliases", "annotation"] + RELATION_INCLUDES + TAG_INCLUDES, 25 | 'event': ["aliases"] + RELATION_INCLUDES + TAG_INCLUDES, 26 | 'recording': ["artist", "artists", "isrc"] + TAG_INCLUDES + RELATION_INCLUDES, 27 | 'release_group': ["artists", "media", "releases"] + TAG_INCLUDES + RELATION_INCLUDES, 28 | 'release': [ 29 | "artists", "labels", "recordings", "release-groups", "media", "annotation", "aliases" 30 | ] + TAG_INCLUDES + RELATION_INCLUDES, 31 | 'artist': ["recordings", "releases", "media", "aliases", "annotation"] + RELATION_INCLUDES + TAG_INCLUDES, 32 | 'label': ["area", "aliases", "annotation"] + RELATION_INCLUDES + TAG_INCLUDES, 33 | 'work': ["artists", "recordings", "aliases", "annotation"] + RELATION_INCLUDES + TAG_INCLUDES, 34 | 'editor': [], # TODO: List includes here (BU-18) 35 | } 36 | 37 | 38 | def check_includes(entity, includes): 39 | """Check if includes specified for an entity are valid includes.""" 40 | for include in includes: 41 | if include not in VALID_INCLUDES[entity]: 42 | raise mb_exceptions.InvalidIncludeError("Bad includes: {inc} is not a valid include".format(inc=include)) 43 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/label.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from mbdata import models 3 | from sqlalchemy.orm import joinedload 4 | from brainzutils.musicbrainz_db import mb_session 5 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids 6 | from brainzutils.musicbrainz_db.includes import check_includes 7 | from brainzutils.musicbrainz_db.serialize import serialize_labels 8 | from brainzutils.musicbrainz_db.helpers import get_relationship_info 9 | 10 | 11 | def get_label_by_mbid(mbid, includes=None): 12 | """Get label with the MusicBrainz ID. 13 | 14 | Args: 15 | mbid (uuid): MBID(gid) of the label. 16 | Returns: 17 | Dictionary containing the label information, or None if the label doesn't exist. 18 | """ 19 | if includes is None: 20 | includes = [] 21 | 22 | return fetch_multiple_labels( 23 | [mbid], 24 | includes=includes, 25 | ).get(mbid) 26 | 27 | 28 | def fetch_multiple_labels(mbids, includes=None): 29 | """Get info related to multiple labels using their MusicBrainz IDs. 30 | 31 | Args: 32 | mbids (list): List of MBIDs of labels. 33 | includes (list): List of information to be included. 34 | Returns: 35 | A dictionary containing info of multiple labels keyed by their MBID. 36 | If an MBID doesn't exist in the database, it isn't returned. 37 | If an MBID is a redirect, the dictionary key will be the MBID given as an argument, 38 | but the returned object will contain the new MBID in the 'mbid' key. 39 | """ 40 | if includes is None: 41 | includes = [] 42 | includes_data = defaultdict(dict) 43 | check_includes('label', includes) 44 | with mb_session() as db: 45 | query = db.query(models.Label).\ 46 | options(joinedload(models.Label.type)).\ 47 | options(joinedload(models.Label.area)) 48 | labels = get_entities_by_gids( 49 | query=query, 50 | entity_type='label', 51 | mbids=mbids, 52 | ) 53 | label_ids = [label.id for label in labels.values()] 54 | 55 | if 'artist-rels' in includes: 56 | get_relationship_info( 57 | db=db, 58 | target_type='artist', 59 | source_type='label', 60 | source_entity_ids=label_ids, 61 | includes_data=includes_data, 62 | ) 63 | 64 | if 'url-rels' in includes: 65 | get_relationship_info( 66 | db=db, 67 | target_type='url', 68 | source_type='label', 69 | source_entity_ids=label_ids, 70 | includes_data=includes_data, 71 | ) 72 | 73 | return {str(mbid): serialize_labels(label, includes_data[label.id]) for mbid, label in labels.items()} 74 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/models.py: -------------------------------------------------------------------------------- 1 | from mbdata import models 2 | 3 | 4 | # Entity models 5 | ENTITY_MODELS = { 6 | 'artist': models.Artist, 7 | 'place': models.Place, 8 | 'release_group': models.ReleaseGroup, 9 | 'release': models.Release, 10 | 'event': models.Event, 11 | 'label': models.Label, 12 | 'series': models.Series, 13 | 'url': models.URL, 14 | 'recording': models.Recording, 15 | 'work': models.Work, 16 | 'editor': models.Editor, 17 | } 18 | 19 | 20 | # Redirect models 21 | REDIRECT_MODELS = { 22 | 'place': models.PlaceGIDRedirect, 23 | 'artist': models.ArtistGIDRedirect, 24 | 'release': models.ReleaseGIDRedirect, 25 | 'release_group': models.ReleaseGroupGIDRedirect, 26 | 'event': models.EventGIDRedirect, 27 | 'label': models.LabelGIDRedirect, 28 | 'recording': models.RecordingGIDRedirect, 29 | 'work': models.WorkGIDRedirect, 30 | } 31 | 32 | 33 | # Meta models 34 | META_MODELS = { 35 | 'label': models.LabelMeta, 36 | 'release_group': models.ReleaseGroupMeta, 37 | 'event': models.EventMeta, 38 | 'work': models.WorkMeta, 39 | 'artist': models.ArtistMeta, 40 | 'recording': models.RecordingMeta, 41 | } 42 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/place.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from mbdata import models 3 | from sqlalchemy.orm import joinedload 4 | from brainzutils.musicbrainz_db import mb_session 5 | from brainzutils.musicbrainz_db.includes import check_includes 6 | from brainzutils.musicbrainz_db.serialize import serialize_places 7 | from brainzutils.musicbrainz_db.helpers import get_relationship_info 8 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids 9 | 10 | 11 | def get_place_by_mbid(mbid, includes=None): 12 | """Get place with the MusicBrainz ID. 13 | 14 | Args: 15 | mbid (uuid): MBID(gid) of the place. 16 | Returns: 17 | Dictionary containing the place information, or None if the place doesn't exist. 18 | """ 19 | if includes is None: 20 | includes = [] 21 | 22 | return fetch_multiple_places( 23 | [mbid], 24 | includes=includes, 25 | ).get(mbid) 26 | 27 | 28 | def fetch_multiple_places(mbids, includes=None): 29 | """Get info related to multiple places using their MusicBrainz IDs. 30 | 31 | Args: 32 | mbids (list): List of MBIDs of places. 33 | includes (list): List of information to be included. 34 | 35 | Returns: 36 | A dictionary containing info of multiple places keyed by their MBID. 37 | If an MBID doesn't exist in the database, it isn't returned. 38 | If an MBID is a redirect, the dictionary key will be the MBID given as an argument, 39 | but the returned object will contain the new MBID in the 'mbid' key. 40 | """ 41 | if includes is None: 42 | includes = [] 43 | includes_data = defaultdict(dict) 44 | check_includes('place', includes) 45 | with mb_session() as db: 46 | query = db.query(models.Place).\ 47 | options(joinedload(models.Place.area)).\ 48 | options(joinedload(models.Place.type)) 49 | places = get_entities_by_gids( 50 | query=query, 51 | entity_type='place', 52 | mbids=mbids, 53 | ) 54 | place_ids = [place.id for place in places.values()] 55 | 56 | if 'artist-rels' in includes: 57 | get_relationship_info( 58 | db=db, 59 | target_type='artist', 60 | source_type='place', 61 | source_entity_ids=place_ids, 62 | includes_data=includes_data, 63 | ) 64 | if 'place-rels' in includes: 65 | get_relationship_info( 66 | db=db, 67 | target_type='place', 68 | source_type='place', 69 | source_entity_ids=place_ids, 70 | includes_data=includes_data, 71 | ) 72 | if 'url-rels' in includes: 73 | get_relationship_info( 74 | db=db, 75 | target_type='url', 76 | source_type='place', 77 | source_entity_ids=place_ids, 78 | includes_data=includes_data, 79 | ) 80 | 81 | places = {str(mbid): serialize_places(place, includes_data[place.id]) for mbid, place in places.items()} 82 | return places 83 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/recording.py: -------------------------------------------------------------------------------- 1 | from brainzutils.musicbrainz_db import mb_session 2 | from brainzutils.musicbrainz_db.helpers import get_relationship_info 3 | from brainzutils.musicbrainz_db.includes import check_includes 4 | from brainzutils.musicbrainz_db.serialize import serialize_recording 5 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids 6 | from collections import defaultdict 7 | from mbdata.models import Recording, ArtistCredit, ArtistCreditName 8 | from sqlalchemy.orm import joinedload, subqueryload 9 | 10 | 11 | def get_recording_by_mbid(mbid, includes=None): 12 | """ Get recording with MusicBrainz ID. 13 | 14 | Args: 15 | mbid (uuid): MBID(gid) of the recording. 16 | includes (list): List of values to be included. 17 | For list of possible values visit https://bitbucket.org/lalinsky/mbdata/wiki/API/v1/includes#!recording 18 | Returns: 19 | Dictionary containing the recording information, or None if the recording doesn't exist. 20 | """ 21 | if includes is None: 22 | includes = [] 23 | return fetch_multiple_recordings( 24 | [mbid], 25 | includes=includes, 26 | ).get(mbid) 27 | 28 | 29 | def get_many_recordings_by_mbid(mbids, includes=None): 30 | """ Get multiple recordings with MusicBrainz IDs. It fetches recordings 31 | using fetch_multiple_recordings. 32 | 33 | Args: 34 | mbids (list): list of uuid (MBID(gid)) of the recordings. 35 | includes (list): List of values to be included. 36 | For list of possible values visit https://bitbucket.org/lalinsky/mbdata/wiki/API/v1/includes#!recording 37 | Returns: 38 | A dictionary containing the recording's information with MBIDs as keys. 39 | If an MBID doesn't exist in the database, it isn't returned. 40 | If an MBID is a redirect, the dictionary key will be the MBID given as an argument, 41 | but the returned object will contain the new MBID in the 'mbid' key. 42 | """ 43 | if includes is None: 44 | includes = [] 45 | 46 | return fetch_multiple_recordings( 47 | mbids, 48 | includes, 49 | ) 50 | 51 | 52 | def fetch_multiple_recordings(mbids, includes=None): 53 | """ Fetch multiple recordings with MusicBrainz IDs. 54 | 55 | Args: 56 | mbids (list): list of uuid (MBID(gid)) of the recordings. 57 | includes (list): List of values to be included. 58 | For list of possible values visit https://bitbucket.org/lalinsky/mbdata/wiki/API/v1/includes#!recording 59 | Returns: 60 | Dictionary containing the recording information with MBIDs as keys. 61 | - id: Recording mbid 62 | - name: Name of the recording 63 | - length: length of the recording 64 | - artists: 65 | - artist information: id, name, credited_name and join_phrase 66 | """ 67 | if includes is None: 68 | includes = [] 69 | includes_data = defaultdict(dict) 70 | check_includes('recording', includes) 71 | 72 | with mb_session() as db: 73 | query = db.query(Recording) 74 | 75 | if 'artist' in includes: 76 | query = query.options(joinedload(Recording.artist_credit, innerjoin=True)) 77 | 78 | if 'artists' in includes: 79 | query = query.options( 80 | joinedload(Recording.artist_credit, innerjoin=True). 81 | joinedload(ArtistCredit.artists). 82 | joinedload(ArtistCreditName.artist) 83 | ) 84 | 85 | recordings = get_entities_by_gids( 86 | query=query, 87 | entity_type='recording', 88 | mbids=mbids, 89 | ) 90 | 91 | recording_ids = [recording.id for recording in recordings.values()] 92 | 93 | if 'artist' in includes: 94 | for recording in recordings.values(): 95 | includes_data[recording.id]['artist'] = recording.artist_credit 96 | 97 | if 'artists' in includes: 98 | for recording in recordings.values(): 99 | includes_data[recording.id]['artists'] = recording.artist_credit.artists 100 | includes_data[recording.id]['artist-credit-phrase'] = recording.artist_credit.name 101 | 102 | if 'url-rels' in includes: 103 | get_relationship_info( 104 | db=db, 105 | target_type='url', 106 | source_type='recording', 107 | source_entity_ids=recording_ids, 108 | includes_data=includes_data, 109 | ) 110 | 111 | if 'work-rels' in includes: 112 | get_relationship_info( 113 | db=db, 114 | target_type='work', 115 | source_type='recording', 116 | source_entity_ids=recording_ids, 117 | includes_data=includes_data, 118 | ) 119 | 120 | serial_recordings = {str(mbid): serialize_recording(recording, includes_data[recording.id]) 121 | for mbid, recording in recordings.items()} 122 | 123 | return serial_recordings 124 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/release.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from mbdata.models import Release, ReleaseGroup, Medium, Track, Recording, ArtistCredit, ArtistCreditName 3 | from sqlalchemy.orm import joinedload 4 | from brainzutils.musicbrainz_db import exceptions as mb_exceptions 5 | from brainzutils.musicbrainz_db import mb_session 6 | from brainzutils.musicbrainz_db.includes import check_includes 7 | from brainzutils.musicbrainz_db.serialize import serialize_releases 8 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids 9 | from brainzutils.musicbrainz_db.helpers import get_relationship_info 10 | from brainzutils.musicbrainz_db import recording 11 | 12 | 13 | def get_release_by_mbid(mbid, includes=None): 14 | """Get release with the MusicBrainz ID. 15 | Args: 16 | mbid (uuid): MBID(gid) of the release. 17 | includes (list): List of values to be included. 18 | For list of possible values see includes.py. 19 | Returns: 20 | Dictionary containing the release information, or None if the release doesn't exist. 21 | """ 22 | if includes is None: 23 | includes = [] 24 | 25 | return fetch_multiple_releases( 26 | [mbid], 27 | includes=includes, 28 | ).get(mbid) 29 | 30 | 31 | def fetch_multiple_releases(mbids, includes=None): 32 | """Get info related to multiple releases using their MusicBrainz IDs. 33 | Args: 34 | mbids (list): List of MBIDs of releases. 35 | includes (list): List of information to be included. 36 | Returns: 37 | A dictionary containing info of multiple releases keyed by their MBID. 38 | If an MBID doesn't exist in the database, it isn't returned. 39 | If an MBID is a redirect, the dictionary key will be the MBID given as an argument, 40 | but the returned object will contain the new MBID in the 'mbid' key. 41 | """ 42 | if includes is None: 43 | includes = [] 44 | includes_data = defaultdict(dict) 45 | check_includes('release', includes) 46 | with mb_session() as db: 47 | query = db.query(Release) 48 | if 'release-groups' in includes: 49 | query = query.options(joinedload(Release.release_group)) 50 | if 'artists' in includes: 51 | query = query.options( 52 | joinedload(Release.artist_credit). 53 | joinedload(ArtistCredit.artists). 54 | joinedload(ArtistCreditName.artist) 55 | ) 56 | if 'media' in includes: 57 | # Fetch media with tracks 58 | query = query\ 59 | .options( 60 | joinedload(Release.mediums) 61 | .options( 62 | joinedload(Medium.format), 63 | joinedload(Medium.tracks). 64 | joinedload(Track.recording). 65 | joinedload(Recording.artist_credit). 66 | joinedload(ArtistCredit.artists). 67 | joinedload(ArtistCreditName.artist)) 68 | ) 69 | releases = get_entities_by_gids( 70 | query=query, 71 | entity_type='release', 72 | mbids=mbids, 73 | ) 74 | release_ids = [release.id for release in releases.values()] 75 | 76 | if 'release-groups' in includes: 77 | for release in releases.values(): 78 | includes_data[release.id]['release-groups'] = release.release_group 79 | 80 | if 'artists' in includes: 81 | for release in releases.values(): 82 | artist_credit_names = release.artist_credit.artists 83 | includes_data[release.id]['artist-credit-names'] = artist_credit_names 84 | includes_data[release.id]['artist-credit-phrase'] = release.artist_credit.name 85 | 86 | if 'media' in includes: 87 | for release in releases.values(): 88 | includes_data[release.id]['media'] = release.mediums 89 | 90 | if 'url-rels' in includes: 91 | get_relationship_info( 92 | db=db, 93 | target_type='url', 94 | source_type='release', 95 | source_entity_ids=release_ids, 96 | includes_data=includes_data, 97 | ) 98 | 99 | releases = {str(mbid): serialize_releases(release, includes_data[release.id]) 100 | for mbid, release in releases.items()} 101 | return releases 102 | 103 | 104 | def browse_releases(release_group_id, includes=None): 105 | """Get all the releases by a certain release group. 106 | You need to provide the Release Group's MusicBrainz ID. 107 | """ 108 | if includes is None: 109 | includes = [] 110 | with mb_session() as db: 111 | release_ids = db.query(Release.gid).\ 112 | join(ReleaseGroup).\ 113 | filter(ReleaseGroup.gid == release_group_id).all() 114 | release_ids = [release_id[0] for release_id in release_ids] 115 | releases = fetch_multiple_releases(release_ids, includes=includes) 116 | return releases 117 | 118 | 119 | def get_url_rels_from_releases(releases): 120 | """Returns all url-rels for a list of releases in a single list (of url-rel dictionaries) 121 | Typical usage with browse_releases() 122 | """ 123 | all_url_rels = [] 124 | for release_gid in releases.keys(): 125 | if 'url-rels' in releases[release_gid]: 126 | all_url_rels.extend([url_rel for url_rel in releases[release_gid]['url-rels']]) 127 | return all_url_rels 128 | 129 | 130 | def get_releases_using_recording_mbid(recording_mbid): 131 | """Returns a list of releases that contain the recording with 132 | the given recording MBID. 133 | 134 | Args: 135 | recording_mbid (UUID): recording MBID for which releases are to be fetched. 136 | 137 | Returns: 138 | serial_releases (list): list with dictionary elements of following format:: 139 | 140 | { 141 | 'id': , 142 | 'name': , 143 | } 144 | """ 145 | 146 | # First fetch the recording so that redirects don't create any problem 147 | recording_redirect = recording.get_recording_by_mbid(recording_mbid) 148 | recording_mbid = recording_redirect['mbid'] 149 | with mb_session() as db: 150 | releases = db.query(Release).\ 151 | join(Medium).\ 152 | join(Track).\ 153 | join(Recording).\ 154 | filter(Recording.gid == recording_mbid).all() 155 | 156 | serial_releases = [serialize_releases(release) for release in releases] 157 | if not serial_releases: 158 | raise mb_exceptions.NoDataFoundException("Couldn't find release for recording with MBID: %s." % str(recording_mbid)) 159 | 160 | return serial_releases 161 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/release_group.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from mbdata import models 3 | from sqlalchemy import nullslast, or_ 4 | from sqlalchemy.orm import contains_eager, joinedload 5 | from brainzutils.musicbrainz_db import mb_session 6 | import brainzutils.musicbrainz_db.exceptions as mb_exceptions 7 | from brainzutils.musicbrainz_db.includes import check_includes 8 | from brainzutils.musicbrainz_db.serialize import serialize_release_groups 9 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids 10 | from brainzutils.musicbrainz_db.helpers import get_relationship_info, get_tags 11 | 12 | def get_mapped_release_types(release_types): 13 | """Get release types mapped to their case sensitive name in musicbrainz. 14 | release_group_primary_type table. 15 | 16 | Args: 17 | release_types (list): List of release types. 18 | Returns: 19 | List of mapped release types. 20 | """ 21 | 22 | release_types = [release_type.lower() for release_type in release_types] 23 | mapped_release_types = [] 24 | with mb_session() as db: 25 | supported_types = [release_group_type.name for release_group_type in db.query(models.ReleaseGroupPrimaryType)] 26 | release_type_mapping = {supported_type.lower(): supported_type for supported_type in supported_types} 27 | 28 | for release_type in release_types: 29 | if release_type not in release_type_mapping: 30 | raise mb_exceptions.InvalidTypeError("Bad release_types: {rtype} is not supported".format(rtype = release_type)) 31 | else: 32 | mapped_release_types.append(release_type_mapping[release_type]) 33 | 34 | return mapped_release_types 35 | 36 | 37 | def get_release_group_by_mbid(mbid, includes=None): 38 | """Get release group with the MusicBrainz ID. 39 | Args: 40 | mbid (uuid): MBID(gid) of the release group. 41 | Returns: 42 | Dictionary containing the release group information, or None if the release group doesn't exist. 43 | """ 44 | if includes is None: 45 | includes = [] 46 | 47 | return fetch_multiple_release_groups( 48 | [mbid], 49 | includes=includes, 50 | ).get(mbid) 51 | 52 | 53 | def fetch_multiple_release_groups(mbids, includes=None): 54 | """Get info related to multiple release groups using their MusicBrainz IDs. 55 | Args: 56 | mbids (list): List of MBIDs of releases groups. 57 | includes (list): List of information to be included. 58 | Returns: 59 | A dictionary containing info of multiple release groups keyed by their MBID. 60 | If an MBID doesn't exist in the database, it isn't returned. 61 | If an MBID is a redirect, the dictionary key will be the MBID given as an argument, 62 | but the returned object will contain the new MBID in the 'mbid' key. 63 | """ 64 | if includes is None: 65 | includes = [] 66 | includes_data = defaultdict(dict) 67 | check_includes('release_group', includes) 68 | with mb_session() as db: 69 | # Join table meta which contains release date for a release group 70 | query = db.query(models.ReleaseGroup).options(joinedload(models.ReleaseGroup.meta)).\ 71 | options(joinedload(models.ReleaseGroup.type)) 72 | 73 | if 'artists' in includes: 74 | query = query.\ 75 | options( 76 | joinedload(models.ReleaseGroup.artist_credit). 77 | joinedload(models.ArtistCredit.artists). 78 | joinedload(models.ArtistCreditName.artist) 79 | ) 80 | 81 | release_groups = get_entities_by_gids( 82 | query=query, 83 | entity_type='release_group', 84 | mbids=mbids, 85 | ) 86 | release_group_ids = [release_group.id for release_group in release_groups.values()] 87 | 88 | if 'artists' in includes: 89 | for release_group in release_groups.values(): 90 | artist_credit_names = release_group.artist_credit.artists 91 | includes_data[release_group.id]['artist-credit-names'] = artist_credit_names 92 | includes_data[release_group.id]['artist-credit-phrase'] = release_group.artist_credit.name 93 | 94 | if 'releases' in includes: 95 | query = db.query(models.Release).filter(getattr(models.Release, "release_group_id").in_(release_group_ids)) 96 | for release in query: 97 | includes_data[release.release_group_id].setdefault('releases', []).append(release) 98 | 99 | if 'release-group-rels' in includes: 100 | get_relationship_info( 101 | db=db, 102 | target_type='release_group', 103 | source_type='release_group', 104 | source_entity_ids=release_group_ids, 105 | includes_data=includes_data, 106 | ) 107 | 108 | if 'url-rels' in includes: 109 | get_relationship_info( 110 | db=db, 111 | target_type='url', 112 | source_type='release_group', 113 | source_entity_ids=release_group_ids, 114 | includes_data=includes_data, 115 | ) 116 | 117 | if 'work-rels' in includes: 118 | get_relationship_info( 119 | db=db, 120 | target_type='work', 121 | source_type='release_group', 122 | source_entity_ids=release_group_ids, 123 | includes_data=includes_data, 124 | ) 125 | 126 | if 'tags' in includes: 127 | release_group_tags = get_tags( 128 | db=db, 129 | entity_model=models.ReleaseGroup, 130 | tag_model=models.ReleaseGroupTag, 131 | foreign_tag_id=models.ReleaseGroupTag.release_group_id, 132 | entity_ids=release_group_ids, 133 | ) 134 | for release_group_id, tags in release_group_tags: 135 | includes_data[release_group_id]['tags'] = tags 136 | 137 | for release_group in release_groups.values(): 138 | includes_data[release_group.id]['meta'] = release_group.meta 139 | release_groups = {str(mbid): serialize_release_groups(release_group, includes_data[release_group.id]) 140 | for mbid, release_group in release_groups.items()} 141 | return release_groups 142 | 143 | 144 | def get_release_groups_for_artist(artist_id, release_types=None, limit=None, offset=None): 145 | """Get all release groups linked to an artist. 146 | 147 | Args: 148 | artist_id (uuid): MBID of the artist. 149 | release_types (list): List of types of release groups to be fetched. 150 | limit (int): Max number of release groups to return. 151 | offset (int): Offset that can be used in conjunction with the limit. 152 | 153 | Returns: 154 | Tuple containing the list of dictionaries of release groups ordered by release year 155 | and the total count of the release groups. 156 | """ 157 | artist_id = str(artist_id) 158 | includes_data = defaultdict(dict) 159 | if release_types is None: 160 | release_types = [] 161 | release_types = [release_type.lower() for release_type in release_types] 162 | # map release types to their case sensitive name in musicbrainz.release_group_primary_type table in the database 163 | release_types_mapping = { 164 | 'album': 'Album', 165 | 'single': 'Single', 166 | 'ep': 'EP', 167 | 'broadcast': 'Broadcast', 168 | 'other': 'Other' 169 | } 170 | release_types = [release_types_mapping[release_type] for release_type in release_types] 171 | with mb_session() as db: 172 | release_groups_query = _get_release_groups_for_artist_query(db, artist_id, release_types) 173 | count = release_groups_query.count() 174 | release_groups = release_groups_query.order_by( 175 | nullslast(models.ReleaseGroupMeta.first_release_date_year.desc()) 176 | ).limit(limit).offset(offset).all() 177 | 178 | for release_group in release_groups: 179 | includes_data[release_group.id]['meta'] = release_group.meta 180 | release_groups = ([serialize_release_groups(release_group, includes_data[release_group.id]) 181 | for release_group in release_groups], count) 182 | return release_groups 183 | 184 | 185 | def _get_release_groups_for_artist_query(db, artist_id, release_types): 186 | return db.query(models.ReleaseGroup).\ 187 | options(joinedload(models.ReleaseGroup.meta)).\ 188 | join(models.ReleaseGroupPrimaryType).join(models.ReleaseGroupMeta).\ 189 | join(models.ArtistCreditName, models.ArtistCreditName.artist_credit_id == models.ReleaseGroup.artist_credit_id).\ 190 | join(models.Artist, models.Artist.id == models.ArtistCreditName.artist_id).\ 191 | filter(models.Artist.gid == artist_id).filter(models.ReleaseGroupPrimaryType.name.in_(release_types)) 192 | 193 | 194 | def get_release_groups_for_label(label_mbid, release_types=None, limit=None, offset=None): 195 | """Get all release groups linked to a label. 196 | 197 | Args: 198 | label_id (uuid): MBID of the label. 199 | release_types (list): List of types of release groups to be fetched. The supported release_types are 200 | 'album', 'single', 'ep', 'broadcast', and 'other'. 201 | limit (int): Max number of release groups to return. 202 | offset (int): Offset that can be used in conjunction with the limit. 203 | 204 | Returns: 205 | Tuple containing the list of dictionaries of release groups and the total count of the release groups. 206 | The list of dictionaries of release groups is ordered by release year, release month, 207 | release date, and release name. In case one of these is set to NULL, it will be ordered last. 208 | List also contains release groups with null type if 'Other' is in the list of release types. 209 | """ 210 | label_mbid = str(label_mbid) 211 | includes_data = defaultdict(dict) 212 | if release_types is None: 213 | release_types = [] 214 | release_types = get_mapped_release_types(release_types) 215 | include_null_type = True if "Other" in release_types else False 216 | with mb_session() as db: 217 | release_groups_query = _get_release_groups_for_label_query(db, label_mbid, release_types, include_null_type) 218 | count = release_groups_query.count() 219 | release_groups = release_groups_query.order_by( 220 | nullslast(models.ReleaseGroupMeta.first_release_date_year.desc()), 221 | nullslast(models.ReleaseGroupMeta.first_release_date_month.desc()), 222 | nullslast(models.ReleaseGroupMeta.first_release_date_day.desc()), 223 | nullslast(models.ReleaseGroup.name.asc()) 224 | ).limit(limit).offset(offset).all() 225 | 226 | for release_group in release_groups: 227 | includes_data[release_group.id]['meta'] = release_group.meta 228 | release_groups = [serialize_release_groups(release_group, includes_data[release_group.id]) 229 | for release_group in release_groups] 230 | return release_groups, count 231 | 232 | 233 | def _get_release_groups_for_label_query(db, label_mbid, release_types, include_null_type=False): 234 | release_groups = db.query(models.ReleaseGroup).\ 235 | outerjoin(models.ReleaseGroupPrimaryType).join(models.ReleaseGroupMeta).\ 236 | options(contains_eager(models.ReleaseGroup.meta)).\ 237 | options(contains_eager(models.ReleaseGroup.type)).\ 238 | join(models.Release, models.Release.release_group_id == models.ReleaseGroup.id).\ 239 | join(models.ReleaseLabel, models.ReleaseLabel.release_id == models.Release.id).\ 240 | join(models.Label, models.Label.id == models.ReleaseLabel.label_id).\ 241 | filter(models.Label.gid == label_mbid).\ 242 | group_by(models.ReleaseGroup, models.ReleaseGroupMeta, models.ReleaseGroupPrimaryType) 243 | 244 | if include_null_type and release_types: 245 | release_groups = release_groups.filter(or_(models.ReleaseGroup.type == None, models.ReleaseGroupPrimaryType.name.in_(release_types))) 246 | elif release_types: 247 | release_groups = release_groups.filter(models.ReleaseGroupPrimaryType.name.in_(release_types)) 248 | 249 | return release_groups 250 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/serialize.py: -------------------------------------------------------------------------------- 1 | from brainzutils.musicbrainz_db.models import ENTITY_MODELS 2 | from mbdata.utils.models import get_link_target 3 | 4 | 5 | def serialize_begin_end(entity): 6 | begin_date = entity.begin_date 7 | end_date = entity.end_date 8 | begin = [] 9 | end = [] 10 | if begin_date and begin_date.year: 11 | begin.append(f'{begin_date.year:04}') 12 | if begin_date.month: 13 | begin.append(f'{begin_date.month:02}') 14 | if begin_date.day: 15 | begin.append(f'{begin_date.day:02}') 16 | 17 | if end_date and end_date.year: 18 | end.append(f'{end_date.year:04}') 19 | if end_date.month: 20 | end.append(f'{end_date.month:02}') 21 | if end_date.day: 22 | end.append(f'{end_date.day:02}') 23 | 24 | data = {} 25 | if begin: 26 | data["begin"] = "-".join(begin) 27 | if end: 28 | data["end"] = "-".join(end) 29 | return data 30 | 31 | 32 | def serialize_areas(area, includes=None): 33 | if includes is None: 34 | includes = {} 35 | data = { 36 | 'mbid': str(area.gid), 37 | 'name': area.name, 38 | } 39 | 40 | if area.comment: 41 | data['comment'] = area.comment 42 | 43 | dates = serialize_begin_end(area) 44 | if dates: 45 | data['life-span'] = dates 46 | 47 | if 'relationship_objs' in includes: 48 | serialize_relationships(data, area, includes['relationship_objs']) 49 | return data 50 | 51 | 52 | def serialize_relationships(data, source_obj, relationship_objs): 53 | """Convert relationship objects to dictionaries. 54 | 55 | Args: 56 | data (dict): Dictionary containing info of source object. 57 | source_obj (mbdata.models): object of source entity. 58 | relationship_objs (dict): Dictionary containing list of objects of different relations. 59 | 60 | Returns: 61 | Dictionary containing lists of dictionaries of related entities. 62 | """ 63 | 64 | for entity_type in ENTITY_MODELS: 65 | relation = '{0}-rels'.format(entity_type) 66 | if relation in relationship_objs: 67 | data[relation] = [] 68 | for obj in relationship_objs[relation]: 69 | link_data = { 70 | 'type': obj.link.link_type.name, 71 | 'type-id': str(obj.link.link_type.gid), 72 | 'begin-year': obj.link.begin_date_year, 73 | 'end-year': obj.link.end_date_year, 74 | } 75 | link_data['direction'] = 'forward' if source_obj.id == obj.entity0_id else 'backward' 76 | if obj.link.ended: 77 | link_data['ended'] = True 78 | link_data[entity_type] = SERIALIZE_ENTITIES[entity_type](get_link_target(obj, source_obj)) 79 | data[relation].append(link_data) 80 | 81 | 82 | def serialize_artist_credit(artist_credit): 83 | """Convert artist_credit object into a list of artist credits.""" 84 | data = [] 85 | for artist_credit_name in artist_credit.artists: 86 | artist_credit_data = { 87 | 'mbid': str(artist_credit_name.artist.gid), 88 | 'name': artist_credit_name.artist.name, 89 | } 90 | 91 | if artist_credit_name.name != artist_credit_name.artist.name: 92 | artist_credit_data['credited_name'] = artist_credit_name.name 93 | 94 | if artist_credit_name.join_phrase: 95 | artist_credit_data['join_phrase'] = artist_credit_name.join_phrase 96 | 97 | data.append(artist_credit_data) 98 | 99 | return data 100 | 101 | 102 | def serialize_recording(recording, includes=None): 103 | """Convert recording objects into dictionary.""" 104 | if includes is None: 105 | includes = {} 106 | data = { 107 | 'mbid': str(recording.gid), 108 | 'name': recording.name, 109 | } 110 | 111 | if recording.comment: 112 | data['comment'] = recording.comment 113 | 114 | if recording.length: 115 | # Divide recording length by 1000 to convert milliseconds into seconds 116 | data['length'] = recording.length / 1000.0 117 | 118 | if recording.video: 119 | data['video'] = True 120 | 121 | if getattr(recording, 'rating', None): 122 | data['rating'] = recording.rating 123 | 124 | if 'artist' in includes: 125 | data['artist'] = recording.artist_credit.name 126 | elif 'artists' in includes: 127 | data['artists'] = serialize_artist_credit(recording.artist_credit) 128 | data['artist-credit-phrase'] = includes['artist-credit-phrase'] 129 | 130 | if 'isrc' in includes: 131 | data['isrcs'] = [isrc.isrc for isrc in recording.isrcs] 132 | 133 | return data 134 | 135 | 136 | def serialize_places(place, includes=None): 137 | if includes is None: 138 | includes = {} 139 | data = { 140 | 'mbid': str(place.gid), 141 | 'name': place.name, 142 | 'address': place.address, 143 | } 144 | 145 | if place.comment: 146 | data['comment'] = place.comment 147 | 148 | if place.type: 149 | data['type'] = place.type.name 150 | 151 | if place.area: 152 | data['area'] = serialize_areas(place.area) 153 | 154 | if place.coordinates: 155 | data['coordinates'] = { 156 | 'latitude': place.coordinates[0], 157 | 'longitude': place.coordinates[1], 158 | } 159 | 160 | dates = serialize_begin_end(place) 161 | if dates: 162 | data['life-span'] = dates 163 | 164 | if 'relationship_objs' in includes: 165 | serialize_relationships(data, place, includes['relationship_objs']) 166 | return data 167 | 168 | 169 | def serialize_labels(label, includes=None): 170 | if includes is None: 171 | includes = {} 172 | data = { 173 | 'mbid': str(label.gid), 174 | 'name': label.name, 175 | } 176 | 177 | if label.comment: 178 | data['comment'] = label.comment 179 | 180 | dates = serialize_begin_end(label) 181 | if dates: 182 | data['life-span'] = dates 183 | 184 | if label.type: 185 | data['type'] = label.type.name 186 | 187 | if label.area: 188 | data['area'] = label.area.name 189 | 190 | if getattr(label, 'rating', None): 191 | data['rating'] = label.rating 192 | 193 | if 'relationship_objs' in includes: 194 | serialize_relationships(data, label, includes['relationship_objs']) 195 | 196 | return data 197 | 198 | 199 | def serialize_artists(artist, includes=None): 200 | if includes is None: 201 | includes = {} 202 | data = { 203 | 'mbid': str(artist.gid), 204 | 'name': artist.name, 205 | 'sort_name': artist.sort_name, 206 | } 207 | 208 | if artist.comment: 209 | data['comment'] = artist.comment 210 | 211 | dates = serialize_begin_end(artist) 212 | if dates: 213 | data['life-span'] = dates 214 | 215 | if artist.type: 216 | data['type'] = artist.type.name 217 | 218 | if getattr(artist, 'rating', None): 219 | data['rating'] = artist.rating 220 | 221 | if 'relationship_objs' in includes: 222 | serialize_relationships(data, artist, includes['relationship_objs']) 223 | 224 | return data 225 | 226 | 227 | def serialize_artist_credit_names(artist_credit_name): 228 | data = { 229 | 'name': artist_credit_name.name, 230 | 'artist': serialize_artists(artist_credit_name.artist), 231 | } 232 | if artist_credit_name.join_phrase: 233 | data['join_phrase'] = artist_credit_name.join_phrase 234 | return data 235 | 236 | 237 | def serialize_release_groups(release_group, includes=None): 238 | if includes is None: 239 | includes = {} 240 | 241 | data = { 242 | 'mbid': str(release_group.gid), 243 | 'title': release_group.name, 244 | } 245 | 246 | if release_group.comment: 247 | data['comment'] = release_group.comment 248 | 249 | if release_group.type: 250 | data['type'] = release_group.type.name 251 | 252 | if getattr(release_group, 'rating', None): 253 | data['rating'] = release_group.rating 254 | 255 | if 'artist-credit-phrase' in includes: 256 | data['artist-credit-phrase'] = includes['artist-credit-phrase'] 257 | 258 | if 'meta' in includes and includes['meta'].first_release_date_year: 259 | data['first-release-year'] = includes['meta'].first_release_date_year 260 | 261 | if 'artist-credit-names' in includes: 262 | data['artist-credit'] = [serialize_artist_credit_names(artist_credit_name) 263 | for artist_credit_name in includes['artist-credit-names']] 264 | 265 | if 'releases' in includes: 266 | data['release-list'] = [serialize_releases(release) for release in includes['releases']] 267 | 268 | if 'relationship_objs' in includes: 269 | serialize_relationships(data, release_group, includes['relationship_objs']) 270 | 271 | if 'tags' in includes: 272 | data['tag-list'] = includes['tags'] 273 | return data 274 | 275 | 276 | def serialize_medium(medium, includes=None): 277 | if includes is None: 278 | includes = {} 279 | data = { 280 | 'name': medium.name, 281 | 'track_count': medium.track_count, 282 | 'position': medium.position, 283 | } 284 | if medium.format: 285 | data['format'] = medium.format.name 286 | 287 | if 'tracks' in includes and includes['tracks']: 288 | data['track-list'] = [serialize_track(track) for track in includes['tracks']] 289 | return data 290 | 291 | 292 | def serialize_track(track): 293 | return { 294 | 'mbid': str(track.gid), 295 | 'name': track.name, 296 | 'number': track.number, 297 | 'position': track.position, 298 | 'length': track.length, 299 | 'recording_id': str(track.recording.gid), 300 | 'recording_title': track.recording.name, 301 | 'artist-credit': [serialize_artist_credit_names(artist_credit_name) 302 | for artist_credit_name in track.recording.artist_credit.artists], 303 | 'artist-credit-phrase': track.recording.artist_credit.name 304 | } 305 | 306 | 307 | def serialize_releases(release, includes=None): 308 | if includes is None: 309 | includes = {} 310 | 311 | data = { 312 | 'mbid': str(release.gid), 313 | 'name': release.name, 314 | } 315 | 316 | if 'relationship_objs' in includes: 317 | serialize_relationships(data, release, includes['relationship_objs']) 318 | 319 | if 'release-groups' in includes: 320 | data['release-group'] = serialize_release_groups(includes['release-groups']) 321 | 322 | if 'artist-credit-phrase' in includes: 323 | data['artist-credit-phrase'] = includes['artist-credit-phrase'] 324 | 325 | if 'artist-credit-names' in includes: 326 | data['artist-credit'] = [serialize_artist_credit_names(artist_credit_name) 327 | for artist_credit_name in includes['artist-credit-names']] 328 | 329 | if 'media' in includes: 330 | data['medium-list'] = [serialize_medium(medium, includes={'tracks': medium.tracks}) 331 | for medium in includes['media']] 332 | 333 | if release.comment: 334 | data['comment'] = release.comment 335 | 336 | return data 337 | 338 | 339 | def serialize_events(event, includes=None): 340 | if includes is None: 341 | includes = {} 342 | data = { 343 | 'mbid': str(event.gid), 344 | 'name': event.name, 345 | } 346 | 347 | if event.comment: 348 | data['comment'] = event.comment 349 | 350 | dates = serialize_begin_end(event) 351 | if dates: 352 | data['life-span'] = dates 353 | 354 | if event.type: 355 | data['type'] = event.type.name 356 | 357 | if getattr(event, 'rating', None): 358 | data['rating'] = event.rating 359 | 360 | if 'relationship_objs' in includes: 361 | serialize_relationships(data, event, includes['relationship_objs']) 362 | return data 363 | 364 | 365 | def serialize_url(url, includes=None): 366 | if includes is None: 367 | includes = {} 368 | data = { 369 | 'mbid': str(url.gid), 370 | 'url': url.url, 371 | } 372 | 373 | if 'relationship_objs' in includes: 374 | serialize_relationships(data, url, includes['relationship_objs']) 375 | return data 376 | 377 | 378 | def serialize_works(work, includes=None): 379 | if includes is None: 380 | includes = {} 381 | data = { 382 | 'mbid': str(work.gid), 383 | 'name': work.name, 384 | } 385 | 386 | if work.comment: 387 | data['comment'] = work.comment 388 | 389 | if work.type: 390 | data['type'] = work.type.name 391 | 392 | if getattr(work, 'rating', None): 393 | data['rating'] = work.rating 394 | 395 | if 'relationship_objs' in includes: 396 | serialize_relationships(data, work, includes['relationship_objs']) 397 | 398 | return data 399 | 400 | 401 | def serialize_editor(editor, includes=None): 402 | # TODO: Add includes to data here (BU-18) 403 | data = { 404 | "id": editor.id, 405 | "name": editor.name, 406 | "privs": editor.privs, 407 | "email": editor.email, 408 | "website": editor.website, 409 | "bio": editor.bio, 410 | "member_since": editor.member_since, 411 | "email_confirm_date": editor.email_confirm_date, 412 | "last_login_date": editor.last_login_date, 413 | "last_updated": editor.last_updated, 414 | "birth_date": editor.birth_date, 415 | "deleted": editor.deleted, 416 | "gender": editor.gender, 417 | "area": None 418 | } 419 | if editor.area: 420 | data["area"] = serialize_areas(editor.area) 421 | return data 422 | 423 | 424 | def serialize_series(series, includes=None): 425 | if includes is None: 426 | includes = {} 427 | 428 | data = { 429 | 'mbid': str(series.gid), 430 | 'name': series.name, 431 | } 432 | 433 | if series.comment: 434 | data['comment'] = series.comment 435 | 436 | if 'relationship_objs' in includes: 437 | serialize_relationships(data, series, includes['relationship_objs']) 438 | 439 | return data 440 | 441 | 442 | SERIALIZE_ENTITIES = { 443 | 'artist': serialize_artists, 444 | 'release_group': serialize_release_groups, 445 | 'release': serialize_releases, 446 | 'medium': serialize_medium, 447 | 'url': serialize_url, 448 | 'editor': serialize_editor, 449 | 'recording': serialize_recording, 450 | 'place': serialize_places, 451 | 'area': serialize_areas, 452 | 'event': serialize_events, 453 | 'series': serialize_series, 454 | } 455 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metabrainz/brainzutils-python/bf01c6da15d4a2426d64a31cf232c06bec3860f3/brainzutils/musicbrainz_db/tests/__init__.py -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/tests/test_artist.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from brainzutils.musicbrainz_db import artist as mb_artist 4 | 5 | 6 | @pytest.mark.database 7 | class TestArtist: 8 | 9 | def test_get_artist_by_mbid(self, engine): 10 | artist = mb_artist.get_artist_by_mbid("f59c5520-5f46-4d2c-b2c4-822eabf53419") 11 | assert artist == { 12 | "mbid": "f59c5520-5f46-4d2c-b2c4-822eabf53419", 13 | "name": "Linkin Park", 14 | "sort_name": "Linkin Park", 15 | "comment": "American rock band", 16 | "life-span": {"begin": "1999"}, 17 | "rating": 85, 18 | "type": "Group", 19 | } 20 | 21 | def test_get_artist_by_mbid_redirect(self, engine): 22 | """Using an MBID which is a redirect will return the "canonical" id""" 23 | artist = mb_artist.get_artist_by_mbid("b3d01315-d52a-4f3a-908b-0618315c1ef2") 24 | assert artist == { 25 | "mbid": "79239441-bfd5-4981-a70c-55c3f15c1287", 26 | "name": "Madonna", 27 | "sort_name": "Madonna", 28 | "comment": "“Queen of Pop”", 29 | "life-span": {"begin": "1958-08-16"}, 30 | "rating": 88, 31 | "type": "Person", 32 | } 33 | 34 | def test_fetch_multiple_artists(self, engine): 35 | artists = mb_artist.fetch_multiple_artists([ 36 | "f59c5520-5f46-4d2c-b2c4-822eabf53419", 37 | "f82bcf78-5b69-4622-a5ef-73800768d9ac", 38 | ]) 39 | assert artists["f82bcf78-5b69-4622-a5ef-73800768d9ac"] == { 40 | "mbid": "f82bcf78-5b69-4622-a5ef-73800768d9ac", 41 | "name": "JAY‐Z", 42 | "sort_name": "JAY‐Z", 43 | "type": "Person", 44 | "comment": "US rapper", 45 | "life-span": {"begin": "1969-12-04"}, 46 | "rating": 71, 47 | } 48 | assert artists["f59c5520-5f46-4d2c-b2c4-822eabf53419"] == { 49 | "mbid": "f59c5520-5f46-4d2c-b2c4-822eabf53419", 50 | "name": "Linkin Park", 51 | "sort_name": "Linkin Park", 52 | "type": "Group", 53 | "comment": "American rock band", 54 | "life-span": {"begin": "1999"}, 55 | "rating": 85, 56 | } 57 | 58 | def test_fetch_multiple_artists_redirect(self, engine): 59 | """Artist with a redirect uses redirected mbid in dictionary key, but canonical id in returned data""" 60 | artists = mb_artist.fetch_multiple_artists(["fe008f22-07be-46f0-9206-7cab2d26e89d"]) 61 | assert len(artists) == 1 62 | assert artists["fe008f22-07be-46f0-9206-7cab2d26e89d"] == { 63 | "mbid": "f59c5520-5f46-4d2c-b2c4-822eabf53419", 64 | "name": "Linkin Park", 65 | "sort_name": "Linkin Park", 66 | "comment": "American rock band", 67 | "life-span": {"begin": "1999"}, 68 | "rating": 85, 69 | "type": "Group" 70 | } 71 | 72 | def test_fetch_multiple_artists_missing(self, engine): 73 | """If an artist id doesn't exist, don't fetch it""" 74 | artists = mb_artist.fetch_multiple_artists(["f59c5520-5f46-4d2c-b2c4-822eabf53419", 75 | "f59c5520-aaaa-aaaa-b2c4-822eabf53419"], 76 | includes=['artist-rels', 'url-rels']) 77 | assert list(artists.keys()) == ["f59c5520-5f46-4d2c-b2c4-822eabf53419"] 78 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/tests/test_editor.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import pytest 4 | from mbdata.models import Editor 5 | from psycopg2.tz import FixedOffsetTimezone 6 | 7 | from brainzutils.musicbrainz_db import editor as mb_editor 8 | 9 | 10 | @pytest.mark.database 11 | class TestEditor: 12 | editor_dt = datetime(2014, 12, 1, 14, 6, 42, 321443, tzinfo=FixedOffsetTimezone(offset=0, name=None)) 13 | 14 | editor_1 = dict(id=2323, name="Editor 1", privs=0, member_since=editor_dt, email_confirm_date=editor_dt, 15 | last_login_date=editor_dt, last_updated=editor_dt, deleted=False, password="{CLEARTEXT}pass", 16 | ha1="3f3edade87115ce351d63f42d92a1834") 17 | expected_editor_1 = { 18 | 'area': None, 19 | 'bio': None, 20 | 'birth_date': None, 21 | 'deleted': False, 22 | 'email': None, 23 | 'email_confirm_date': editor_dt, 24 | 'gender': None, 25 | 'id': 2323, 26 | 'last_login_date': editor_dt, 27 | 'last_updated': editor_dt, 28 | 'member_since': editor_dt, 29 | 'name': 'Editor 1', 30 | 'privs': 0, 31 | 'website': None 32 | } 33 | 34 | editor_2 = dict(id=2324, name="Editor 2", privs=3, email="editor@example.com", website="example.com", 35 | bio="Random\neditor", member_since=editor_dt, email_confirm_date=editor_dt, 36 | last_login_date=editor_dt, last_updated=editor_dt, deleted=False, area=None, 37 | password="$2b$12$2odiKUAGktuwM2J.tp/uZ.54bniapSMjCln3J1TfC6zx74QFuawQ6", 38 | ha1="3f3edade87115ce351d63f42d92a1834") 39 | expected_editor_2 = { 40 | "id": 2324, 41 | "name": "Editor 2", 42 | "privs": 3, 43 | "email": "editor@example.com", 44 | "website": "example.com", 45 | "bio": "Random\neditor", 46 | "member_since": editor_dt, 47 | "email_confirm_date": editor_dt, 48 | "last_login_date": editor_dt, 49 | "last_updated": editor_dt, 50 | "birth_date": None, 51 | "deleted": False, 52 | "gender": None, 53 | "area": None, 54 | } 55 | 56 | def test_get_by_id(self, session): 57 | # Manually adding and deleting data in tests can get tedious. However, we have only two tests for which this is 58 | # needed. In case in future we need to add more tests where the test database needs to be modified, we should 59 | # explore other alternatives to ease the process. 60 | with session as db: 61 | # The editors table in test database has many empty columns and fields like last_login_date may change with 62 | # new dump. 63 | insert_editor_1 = Editor(**TestEditor.editor_1) 64 | db.add(insert_editor_1) 65 | db.commit() 66 | try: 67 | editor = mb_editor.get_editor_by_id(2323) 68 | assert editor == TestEditor.expected_editor_1 69 | finally: 70 | # regardless whether the assertion fails or passes, delete the inserted editor to prevent side effects 71 | # on subsequent tests 72 | db.delete(insert_editor_1) 73 | db.commit() 74 | 75 | def test_fetch_multiple_editors(self, session): 76 | # Manually adding and deleting data in tests can get tedious. However, we have only two tests for which this is 77 | # needed. In case in future we need to add more tests where the test database needs to be modified, we should 78 | # explore other alternatives to ease the process. 79 | with session as db: 80 | # The editors table in test database has many empty columns and fields like last_login_date may change with 81 | # new dump. 82 | insert_editor_1 = Editor(**TestEditor.editor_1) 83 | insert_editor_2 = Editor(**TestEditor.editor_2) 84 | db.add(insert_editor_1) 85 | db.add(insert_editor_2) 86 | db.commit() 87 | try: 88 | editors = mb_editor.fetch_multiple_editors([2323, 2324]) 89 | assert editors[2323] == TestEditor.expected_editor_1 90 | assert editors[2324] == TestEditor.expected_editor_2 91 | finally: 92 | # regardless whether the assertion fails or passes, delete the inserted editor to prevent side effects 93 | # on subsequent tests 94 | db.delete(insert_editor_1) 95 | db.delete(insert_editor_2) 96 | db.commit() 97 | 98 | def test_fetch_multiple_editors_empty(self, engine): 99 | editors = mb_editor.fetch_multiple_editors( 100 | [2323, 2324], 101 | ) 102 | assert editors == {} 103 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/tests/test_event.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from brainzutils.musicbrainz_db import event as mb_event 4 | 5 | 6 | @pytest.mark.database 7 | class TestEvent: 8 | 9 | def test_get_event_by_mbid(self, engine): 10 | event = mb_event.get_event_by_mbid('d4921d43-bf92-464e-aef4-bba8540fc5bd') 11 | assert event == { 12 | 'mbid': 'd4921d43-bf92-464e-aef4-bba8540fc5bd', 13 | 'name': 'Butterfly Whirl 2015', 14 | 'life-span': {'begin': '2015-05-22', 'end': '2015-05-25'}, 15 | 'type': 'Festival' 16 | } 17 | 18 | def test_get_event_by_mbid_redirect(self, engine): 19 | """If using an id that is redirected, return the """ 20 | event = mb_event.get_event_by_mbid('b8528315-ef77-46e2-bff9-d1b00d84dc3f') 21 | assert event == { 22 | 'mbid': '499559c8-b84b-422e-8ad7-b746d48c21aa', 23 | 'name': '1995-10-11: Riverport Amphitheatre, Maryland Heights, Missouri', 24 | 'life-span': {'begin': '1995-10-11', 'end': '1995-10-11'}, 25 | 'rating': 100, 26 | 'type': 'Concert', 27 | } 28 | 29 | def test_get_event_by_mbid_with_includes(self, engine): 30 | event = mb_event.get_event_by_mbid('b8528315-ef77-46e2-bff9-d1b00d84dc3f', 31 | includes=['artist-rels']) 32 | assert event['mbid'] == '499559c8-b84b-422e-8ad7-b746d48c21aa' 33 | assert len(event['artist-rels']) == 1 34 | assert event['artist-rels'][0]['type-id'] == '936c7c95-3156-3889-a062-8a0cd57f8946' 35 | 36 | def test_fetch_multiple_events(self, engine): 37 | events = mb_event.fetch_multiple_events( 38 | ['d4921d43-bf92-464e-aef4-bba8540fc5bd', 'b335b093-b3a0-411f-9f3d-7f680a4992d6'], 39 | ) 40 | assert events['d4921d43-bf92-464e-aef4-bba8540fc5bd']['name'] == 'Butterfly Whirl 2015' 41 | assert events['b335b093-b3a0-411f-9f3d-7f680a4992d6']['name'] == 'KISS in Atlanta' 42 | 43 | def test_fetch_multiple_events_redirect(self, engine): 44 | """""" 45 | events = mb_event.fetch_multiple_events( 46 | ['b8528315-ef77-46e2-bff9-d1b00d84dc3f'], 47 | ) 48 | assert events == {'b8528315-ef77-46e2-bff9-d1b00d84dc3f': { 49 | 'mbid': '499559c8-b84b-422e-8ad7-b746d48c21aa', 50 | 'name': '1995-10-11: Riverport Amphitheatre, Maryland Heights, Missouri', 51 | 'life-span': {'begin': '1995-10-11', 'end': '1995-10-11'}, 52 | 'rating': 100, 53 | 'type': 'Concert', 54 | }} 55 | 56 | def test_fetch_multiple_events_empty(self, engine): 57 | """If an event id doesn't exist, don't return it in the list""" 58 | events = mb_event.fetch_multiple_events([ 59 | 'd4921d43-bf92-464e-aef4-bba8540fc5bd', 60 | '40e6153d-4444-4444-4444-b0a47e3825ce' 61 | ], 62 | includes=['artist-rels', 'place-rels', 'series-rels', 'url-rels', 'release-group-rels']) 63 | assert list(events.keys()) == ['d4921d43-bf92-464e-aef4-bba8540fc5bd'] 64 | 65 | def test_get_events_for_place(self, engine): 66 | events = mb_event.get_events_for_place( 67 | place_id='4352063b-a833-421b-a420-e7fb295dece0', 68 | event_types=['Concert', 'Festival'], 69 | include_null_type=False, 70 | ) 71 | assert events[0][0] == { 72 | "life-span": { 73 | "begin": "2015-07-17", 74 | "end": "2015-09-12" 75 | }, 76 | "mbid": "00d6449e-c6d2-42f1-a09e-c01668af1dd7", 77 | "name": "The Proms 2015", 78 | "type": "Festival" 79 | } 80 | 81 | assert events[1] == 5 82 | assert len(events[0]) == 5 83 | 84 | events2 = mb_event.get_events_for_place( 85 | place_id='06e5431e-ef98-424c-a43a-4b7a3cf26327', 86 | event_types=[], 87 | include_null_type=True, 88 | ) 89 | 90 | # first item doesn't have a 'type' key 91 | assert events2[0][0] == { 92 | "life-span": { 93 | "begin": "2015-12-19", 94 | "end": "2015-12-19" 95 | }, 96 | "mbid": "6cc3999a-2f19-433e-b760-f2ff2a6bc86b", 97 | "name": "2015-12-19: Studio 8H, GE Building, Rockefeller Center, New York City, NY, USA", 98 | 'comment': 'Saturday Night Live', 99 | } 100 | 101 | assert events2[1] == 5 102 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/tests/test_helper.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | import pytest 4 | from mbdata import models 5 | 6 | from brainzutils.musicbrainz_db.serialize import serialize_relationships 7 | from brainzutils.musicbrainz_db.helpers import get_relationship_info 8 | import brainzutils.musicbrainz_db as mb 9 | from brainzutils.musicbrainz_db.helpers import get_tags 10 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids 11 | 12 | 13 | @pytest.mark.database 14 | class TestHelpers: 15 | 16 | def test_get_tags(self, engine): 17 | data = defaultdict(dict) 18 | with mb.mb_session() as db: 19 | release_group_tags = get_tags( 20 | db=db, 21 | entity_model=models.ReleaseGroup, 22 | tag_model=models.ReleaseGroupTag, 23 | foreign_tag_id=models.ReleaseGroupTag.release_group_id, 24 | entity_ids=[253487], 25 | ) 26 | for release_group_id, tags in release_group_tags: 27 | data[release_group_id]['tags'] = tags 28 | expected_data = { 29 | 253487: { 30 | 'tags': ['classical', 'ballet'] 31 | } 32 | } 33 | assert dict(data) == expected_data 34 | 35 | def test_get_relationship_info(self, engine): 36 | data = {} 37 | includes_data = defaultdict(dict) 38 | with mb.mb_session() as db: 39 | gid = "3185e028-9a08-448b-83e3-873dfda40476" 40 | place = get_entities_by_gids( 41 | query=db.query(models.Place), 42 | entity_type='place', 43 | mbids=[gid], 44 | )[gid] 45 | get_relationship_info( 46 | db=db, 47 | target_type='url', 48 | source_type='place', 49 | source_entity_ids=[place.id], 50 | includes_data=includes_data, 51 | ) 52 | serialize_relationships(data, place, includes_data[place.id]['relationship_objs']) 53 | expected_data = { 54 | 'url-rels': [ 55 | { 56 | 'type': 'wikidata', 57 | 'type-id': 'e6826618-b410-4b8d-b3b5-52e29eac5e1f', 58 | 'begin-year': None, 59 | 'end-year': None, 60 | 'direction': 'forward', 61 | 'url': { 62 | 'mbid': '86d64bb6-bcee-4cda-b1f8-050394664671', 63 | 'url': 'https://www.wikidata.org/wiki/Q2489904' 64 | } 65 | }, 66 | { 67 | 'type': 'discogs', 68 | 'type-id': '1c140ac8-8dc2-449e-92cb-52c90d525640', 69 | 'begin-year': None, 70 | 'end-year': None, 71 | 'direction': 'forward', 72 | 'url': { 73 | 'mbid': '06332787-5aac-4e4c-95b9-75cf729ae308', 74 | 'url': 'https://www.discogs.com/label/266610' 75 | } 76 | } 77 | ] 78 | } 79 | assert data == expected_data 80 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/tests/test_label.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from brainzutils.musicbrainz_db import label as mb_label 4 | 5 | 6 | @pytest.mark.database 7 | class TestLabel: 8 | 9 | def test_get_label_by_mbid(self, engine): 10 | label = mb_label.get_label_by_mbid('4cccc72a-0bd0-433a-905e-dad87871397d') 11 | assert label == { 12 | "mbid": "4cccc72a-0bd0-433a-905e-dad87871397d", 13 | "name": "Roc‐A‐Fella Records", 14 | "type": "Original Production", 15 | "area": "United States", 16 | "life-span": {"begin": "1996", "end": "2013"}, 17 | "rating": 100, 18 | } 19 | 20 | def test_get_label_by_mbid_redirect(self, engine): 21 | label = mb_label.get_label_by_mbid('67cf4cad-c039-4f01-bc84-f8dab7791ed7') 22 | assert label == { 23 | "mbid": "50c384a2-0b44-401b-b893-8181173339c7", 24 | "name": "Atlantic", 25 | "type": "Imprint", 26 | "area": "United States", 27 | "comment": "Warner Music imprint", 28 | "life-span": {"begin": "1947"}, 29 | "rating": 100, 30 | } 31 | 32 | def test_fetch_multiple_labels(self, engine): 33 | labels = mb_label.fetch_multiple_labels([ 34 | 'c595c289-47ce-4fba-b999-b87503e8cb71', 35 | '4cccc72a-0bd0-433a-905e-dad87871397d' 36 | ]) 37 | assert len(labels) == 2 38 | assert labels["c595c289-47ce-4fba-b999-b87503e8cb71"] == { 39 | "mbid": "c595c289-47ce-4fba-b999-b87503e8cb71", 40 | "name": "Warner Bros. Records", 41 | "comment": '1958–2019; “WB” logo, with or without “records” beneath or on banner across', 42 | "type": "Imprint", 43 | "area": "United States", 44 | "life-span": {"begin": "1958-03-19", "end": "2019-05-28"}, 45 | } 46 | assert labels["4cccc72a-0bd0-433a-905e-dad87871397d"] == { 47 | "mbid": "4cccc72a-0bd0-433a-905e-dad87871397d", 48 | "name": "Roc‐A‐Fella Records", 49 | "type": "Original Production", 50 | "area": "United States", 51 | "life-span": {"begin": "1996", "end": "2013"}, 52 | "rating": 100 53 | } 54 | 55 | def test_fetch_multiple_labels_redirect(self, engine): 56 | labels = mb_label.fetch_multiple_labels([ 57 | '67cf4cad-c039-4f01-bc84-f8dab7791ed7' 58 | ]) 59 | assert len(labels) == 1 60 | assert labels["67cf4cad-c039-4f01-bc84-f8dab7791ed7"] == { 61 | "mbid": "50c384a2-0b44-401b-b893-8181173339c7", 62 | "name": "Atlantic", 63 | "type": "Imprint", 64 | "area": "United States", 65 | "comment": "Warner Music imprint", 66 | "life-span": {"begin": "1947"}, 67 | "rating": 100, 68 | } 69 | 70 | def test_fetch_multiple_labels_missing(self, engine): 71 | labels = mb_label.fetch_multiple_labels([ 72 | '50c384a2-0b44-401b-b893-8181173339c7', 73 | '50c384a2-0000-0000-0000-8181173339c7' 74 | ], includes=['artist-rels', 'url-rels']) 75 | assert list(labels.keys()) == ['50c384a2-0b44-401b-b893-8181173339c7'] 76 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/tests/test_place.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from brainzutils.musicbrainz_db import place as mb_place 4 | 5 | 6 | @pytest.mark.database 7 | class TestPlace: 8 | 9 | def test_get_place_by_mbid(self, engine): 10 | place = mb_place.get_place_by_mbid('4352063b-a833-421b-a420-e7fb295dece0') 11 | assert place['name'] == 'Royal Albert Hall' 12 | assert place['type'] == 'Venue' 13 | assert place['coordinates'] == { 14 | 'latitude': 51.50105, 15 | 'longitude': -0.17748 16 | } 17 | assert place['area'] == { 18 | 'mbid': 'b9576171-3434-4d1b-8883-165ed6e65d2f', 19 | 'name': 'Kensington and Chelsea', 20 | } 21 | 22 | def test_get_place_by_mbid_redirect(self, engine): 23 | place = mb_place.get_place_by_mbid('b1690ae6-5a37-46d7-99ae-b7e2d790485f') 24 | assert place == { 25 | 'address': 'Herbert-von-Karajan-Straße 1, 10785 Berlin, Germany', 26 | 'area': {'mbid': 'c9ac1239-e832-41bc-9930-e252a1fd1105', 'name': 'Berlin'}, 27 | 'coordinates': {'latitude': 52.51, 'longitude': 13.37}, 28 | 'mbid': 'bea135c0-a32e-49be-85fd-9234c73fa0a8', 29 | 'name': 'Berliner Philharmonie', 30 | 'type': 'Venue', 31 | 'life-span': {'begin': '1963'}, 32 | } 33 | 34 | def test_fetch_multiple_places(self, engine): 35 | places = mb_place.fetch_multiple_places( 36 | ['4352063b-a833-421b-a420-e7fb295dece0', '2056ad56-cea9-4536-9f2d-58765a38829c'] 37 | ) 38 | assert places['4352063b-a833-421b-a420-e7fb295dece0']['name'] == 'Royal Albert Hall' 39 | assert places['2056ad56-cea9-4536-9f2d-58765a38829c']['name'] == 'Finnvox' 40 | 41 | def test_fetch_multiple_places_redirect(self, engine): 42 | places = mb_place.fetch_multiple_places( 43 | ['4352063b-a833-421b-a420-e7fb295dece0', 'b1690ae6-5a37-46d7-99ae-b7e2d790485f'] 44 | ) 45 | assert len(places) == 2 46 | assert places['b1690ae6-5a37-46d7-99ae-b7e2d790485f']['mbid'] == 'bea135c0-a32e-49be-85fd-9234c73fa0a8' 47 | assert places['b1690ae6-5a37-46d7-99ae-b7e2d790485f']['name'] == 'Berliner Philharmonie' 48 | 49 | def test_fetch_multiple_places_empty(self, engine): 50 | places = mb_place.fetch_multiple_places( 51 | ['bea135c0-a32e-49be-85fd-9234c73fa0a8', 'bea135c0-3333-3333-3333-9234c73fa0a8'], 52 | includes=['artist-rels', 'place-rels', 'url-rels'] 53 | ) 54 | assert list(places.keys()) == ['bea135c0-a32e-49be-85fd-9234c73fa0a8'] 55 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/tests/test_recording.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from brainzutils.musicbrainz_db import recording as mb_recording 4 | 5 | 6 | @pytest.mark.database 7 | class TestRecording: 8 | 9 | def test_get_recording_by_mbid(self, engine): 10 | """ Tests if appropriate recording is returned for a given MBID. """ 11 | self.maxDiff = None 12 | recording = mb_recording.get_recording_by_mbid('daccb724-8023-432a-854c-e0accb6c8678', includes=['artists']) 13 | 14 | assert recording == { 15 | 'mbid': 'daccb724-8023-432a-854c-e0accb6c8678', 16 | 'name': 'Numb / Encore', 17 | 'comment': 'explicit', 18 | 'length': 205.28, 19 | 'rating': 78, 20 | 'artist-credit-phrase': 'Jay‐Z / Linkin Park', 21 | 'artists': [ 22 | { 23 | 'mbid': 'f82bcf78-5b69-4622-a5ef-73800768d9ac', 24 | 'name': 'JAY‐Z', 25 | 'credited_name': 'Jay‐Z', 26 | 'join_phrase': ' / ', 27 | }, 28 | { 29 | 'mbid': 'f59c5520-5f46-4d2c-b2c4-822eabf53419', 30 | 'name': 'Linkin Park' 31 | } 32 | ] 33 | } 34 | 35 | def test_get_recording_by_mbid_redirect(self, engine): 36 | recording = mb_recording.get_recording_by_mbid('e00d4dce-097e-4098-bbb3-77db884566f3') 37 | assert recording == { 38 | 'mbid': 'fbe3d0b9-3990-4a76-bddb-12f4a0447a2c', 39 | 'name': 'The Perfect Drug (Nine Inch Nails)', 40 | 'length': 499, 41 | 'rating': 60, 42 | } 43 | 44 | def test_fetch_multiple_recordings(self, engine): 45 | """ Tests if appropriate recordings are returned for a given list of MBIDs. """ 46 | self.maxDiff = None 47 | 48 | mbids = ['daccb724-8023-432a-854c-e0accb6c8678', 'ae83579c-5f33-4a35-83f3-89206c44a426'] 49 | recordings = mb_recording.fetch_multiple_recordings(mbids, includes=['artists']) 50 | 51 | assert recordings == { 52 | 'daccb724-8023-432a-854c-e0accb6c8678': { 53 | 'mbid': 'daccb724-8023-432a-854c-e0accb6c8678', 54 | 'name': 'Numb / Encore', 55 | 'comment': 'explicit', 56 | 'length': 205.28, 57 | 'rating': 78, 58 | 'artist-credit-phrase': 'Jay‐Z / Linkin Park', 59 | 'artists': [ 60 | { 61 | 'mbid': 'f82bcf78-5b69-4622-a5ef-73800768d9ac', 62 | 'name': 'JAY‐Z', 63 | 'credited_name': 'Jay‐Z', 64 | 'join_phrase': ' / ', 65 | }, 66 | { 67 | 'mbid': 'f59c5520-5f46-4d2c-b2c4-822eabf53419', 68 | 'name': 'Linkin Park' 69 | } 70 | ] 71 | }, 72 | 'ae83579c-5f33-4a35-83f3-89206c44a426': { 73 | 'mbid': 'ae83579c-5f33-4a35-83f3-89206c44a426', 74 | 'name': "I'm a Stranger Here Myself", 75 | 'length': 344.0, 76 | 'artist-credit-phrase': 'Charlie Byrd & The Washington Guitar Quintet', 77 | 'artists': [ 78 | { 79 | 'mbid': '9d99c378-247c-47a3-94ea-753efa330023', 80 | 'name': 'Charlie Byrd', 81 | 'join_phrase': ' & ' 82 | }, 83 | { 84 | 'mbid': 'c805fb7e-c8ff-49e0-b74f-61d638444fad', 85 | 'name': 'The Washington Guitar Quintet' 86 | } 87 | ] 88 | } 89 | } 90 | 91 | def test_fetch_multiple_recordings_redirect(self, engine): 92 | recordings = mb_recording.fetch_multiple_recordings([ 93 | 'e00d4dce-097e-4098-bbb3-77db884566f3' 94 | ]) 95 | assert recordings == { 96 | 'e00d4dce-097e-4098-bbb3-77db884566f3': { 97 | 'mbid': 'fbe3d0b9-3990-4a76-bddb-12f4a0447a2c', 98 | 'name': 'The Perfect Drug (Nine Inch Nails)', 99 | 'length': 499, 100 | 'rating': 60, 101 | } 102 | } 103 | 104 | def test_fetch_multiple_recordings_missing(self, engine): 105 | """ Tests if appropriate recordings are returned for a given list of MBIDs. """ 106 | 107 | recordings = mb_recording.fetch_multiple_recordings( 108 | ['e00d4dce-097e-4098-bbb3-77db884566f3', 'e00d4dce-0000-0000-0000-77db884566f3'] 109 | ) 110 | 111 | assert list(recordings.keys()) == ['e00d4dce-097e-4098-bbb3-77db884566f3'] 112 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/tests/test_release.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from brainzutils.musicbrainz_db import release as mb_release 4 | 5 | 6 | @pytest.mark.database 7 | class TestRelease: 8 | 9 | def test_get_release_by_mbid(self, engine): 10 | release = mb_release.get_release_by_mbid('fed37cfc-2a6d-4569-9ac0-501a7c7598eb', 11 | includes=['media', 'release-groups']) 12 | assert release["name"] == "Master of Puppets" 13 | assert len(release["medium-list"][0]["track-list"]) == 8 14 | assert release["medium-list"][0]["track-list"] == [ 15 | { 16 | 'mbid': '58c97804-bd98-3bc6-b8c7-5234db05bc2e', 17 | 'name': 'Battery', 18 | 'number': '1', 19 | 'position': 1, 20 | 'length': 312373, 21 | 'recording_id': '3bfda26a-49fa-4bc4-a4d6-8bbfa0767ab7', 22 | 'recording_title': 'Battery', 23 | 'artist-credit': [ 24 | { 25 | 'name': 'Metallica', 26 | 'artist': { 27 | 'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab', 28 | 'name': 'Metallica', 29 | 'sort_name': 'Metallica', 30 | 'life-span': {'begin': '1981-10-28'}, 31 | 'type': 'Group', 32 | } 33 | } 34 | ], 35 | 'artist-credit-phrase': 'Metallica' 36 | }, 37 | { 38 | 'mbid': '51b179fa-8e72-383b-9549-0ae9a6dd9cfb', 39 | 'name': 'Master of Puppets', 40 | 'number': '2', 41 | 'position': 2, 42 | 'length': 515226, 43 | 'recording_id': '0151d8a4-50c8-4036-b824-4a4f4b140e8e', 44 | 'recording_title': 'Master of Puppets', 45 | 'artist-credit': [ 46 | { 47 | 'name': 'Metallica', 48 | 'artist': { 49 | 'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab', 50 | 'name': 'Metallica', 51 | 'sort_name': 'Metallica', 52 | 'life-span': {'begin': '1981-10-28'}, 53 | 'type': 'Group', 54 | } 55 | } 56 | ], 57 | 'artist-credit-phrase': 'Metallica' 58 | }, 59 | { 60 | 'mbid': '052e25d8-373e-3a5a-bced-bd47eb209dc5', 61 | 'name': 'The Thing That Should Not Be', 62 | 'number': '3', 63 | 'position': 3, 64 | 'length': 396200, 65 | 'recording_id': 'f5267fe1-5cb6-47f7-8df2-e6e8f09fa7ad', 66 | 'recording_title': 'The Thing That Should Not Be', 67 | 'artist-credit': [ 68 | { 69 | 'name': 'Metallica', 70 | 'artist': { 71 | 'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab', 72 | 'name': 'Metallica', 73 | 'sort_name': 'Metallica', 74 | 'life-span': {'begin': '1981-10-28'}, 75 | 'type': 'Group', 76 | } 77 | } 78 | ], 79 | 'artist-credit-phrase': 'Metallica'}, 80 | { 81 | 'mbid': '00367246-d956-3a44-af4b-bc3cfd34ec49', 82 | 'name': 'Welcome Home (Sanitarium)', 83 | 'number': '4', 84 | 'position': 4, 85 | 'length': 386866, 86 | 'recording_id': 'a20860e9-7636-422b-a9cd-2da671b242a8', 87 | 'recording_title': 'Welcome Home (Sanitarium)', 88 | 'artist-credit': [ 89 | { 90 | 'name': 'Metallica', 91 | 'artist': { 92 | 'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab', 93 | 'name': 'Metallica', 94 | 'sort_name': 'Metallica', 95 | 'life-span': {'begin': '1981-10-28'}, 96 | 'type': 'Group', 97 | } 98 | } 99 | ], 100 | 'artist-credit-phrase': 'Metallica' 101 | }, 102 | { 103 | 'mbid': '77fac948-8223-3077-a25e-50d9512142f0', 104 | 'name': 'Disposable Heroes', 105 | 'number': '5', 106 | 'position': 5, 107 | 'length': 496640, 108 | 'recording_id': '93ae3251-d9b5-46ee-9849-7b16d5e57d8b', 109 | 'recording_title': 'Disposable Heroes', 110 | 'artist-credit': [ 111 | { 112 | 'name': 'Metallica', 113 | 'artist': { 114 | 'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab', 115 | 'name': 'Metallica', 116 | 'sort_name': 'Metallica', 117 | 'life-span': {'begin': '1981-10-28'}, 118 | 'type': 'Group', 119 | } 120 | } 121 | ], 122 | 'artist-credit-phrase': 'Metallica'}, 123 | { 124 | 'mbid': '7f97a9e0-89ec-37ed-a3d7-5a7390ffa43b', 125 | 'name': 'Leper Messiah', 126 | 'number': '6', 127 | 'position': 6, 128 | 'length': 339866, 129 | 'recording_id': '2d9a5b40-f5e6-4499-ab7a-567fe3b42ab9', 130 | 'recording_title': 'Leper Messiah', 131 | 'artist-credit': [ 132 | { 133 | 'name': 'Metallica', 134 | 'artist': { 135 | 'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab', 136 | 'name': 'Metallica', 137 | 'sort_name': 'Metallica', 138 | 'life-span': {'begin': '1981-10-28'}, 139 | 'type': 'Group', 140 | } 141 | } 142 | ], 143 | 'artist-credit-phrase': 'Metallica' 144 | }, 145 | { 146 | 'mbid': 'b7e772d3-3a9b-32ad-8e5c-e8c079d5e4f4', 147 | 'name': 'Orion', 148 | 'number': '7', 149 | 'position': 7, 150 | 'length': 507426, 151 | 'recording_id': 'b6cbe414-8b21-4600-8588-f6a80fd7043a', 152 | 'recording_title': 'Orion', 153 | 'artist-credit': [ 154 | { 155 | 'name': 'Metallica', 156 | 'artist': { 157 | 'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab', 158 | 'name': 'Metallica', 159 | 'sort_name': 'Metallica', 160 | 'life-span': {'begin': '1981-10-28'}, 161 | 'type': 'Group', 162 | } 163 | } 164 | ], 165 | 'artist-credit-phrase': 'Metallica' 166 | }, 167 | { 168 | 'mbid': '0949ef68-edef-39a1-a3a0-dc666920f629', 169 | 'name': 'Damage, Inc.', 170 | 'number': '8', 171 | 'position': 8, 172 | 'length': 330933, 173 | 'recording_id': '01ea1189-e0d2-48a0-9dc2-c615785a5ae0', 174 | 'recording_title': 'Damage, Inc.', 175 | 'artist-credit': [ 176 | { 177 | 'name': 'Metallica', 178 | 'artist': { 179 | 'mbid': '65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab', 180 | 'name': 'Metallica', 181 | 'sort_name': 'Metallica', 182 | 'life-span': {'begin': '1981-10-28'}, 183 | 'type': 'Group', 184 | } 185 | } 186 | ], 187 | 'artist-credit-phrase': 'Metallica' 188 | } 189 | ] 190 | 191 | def test_get_release_by_mbid_redirect(self, engine): 192 | release = mb_release.get_release_by_mbid('fb2031ae-4e2a-4d2c-9819-32568f9e5e17') 193 | assert release == { 194 | 'mbid': 'a6949d8e-c1eb-4eee-a670-680d28dd80e6', 195 | 'name': 'The College Dropout' 196 | } 197 | 198 | def test_fetch_multiple_releases(self, engine): 199 | releases = mb_release.fetch_multiple_releases( 200 | mbids=['e327da6d-717b-4eb3-b396-bbce6b9466bc', 'b1bb026c-e813-407f-ba7b-db7466cdc56c'], 201 | ) 202 | assert len(releases) == 2 203 | assert releases['e327da6d-717b-4eb3-b396-bbce6b9466bc']['name'] == 'Without a Sound' 204 | assert releases['b1bb026c-e813-407f-ba7b-db7466cdc56c']['name'] == 'War All the Time' 205 | 206 | def test_fetch_multiple_releases_redirect(self, engine): 207 | releases = mb_release.fetch_multiple_releases( 208 | mbids=['fb2031ae-4e2a-4d2c-9819-32568f9e5e17'], 209 | ) 210 | assert releases == { 211 | 'fb2031ae-4e2a-4d2c-9819-32568f9e5e17': { 212 | 'mbid': 'a6949d8e-c1eb-4eee-a670-680d28dd80e6', 213 | 'name': 'The College Dropout' 214 | } 215 | } 216 | 217 | def test_fetch_multiple_releases_missing(self, engine): 218 | releases = mb_release.fetch_multiple_releases( 219 | mbids=['a6949d8e-c1eb-4eee-a670-680d28dd80e6', 'a6949d8e-cccc-cccc-cccc-680d28dd80e6'], 220 | ) 221 | assert list(releases.keys()) == ['a6949d8e-c1eb-4eee-a670-680d28dd80e6'] 222 | 223 | def test_get_releases_using_recording_mbid(self, engine): 224 | """Tests if releases are fetched correctly for a given recording MBID""" 225 | self.maxDiff = None 226 | 227 | releases = mb_release.get_releases_using_recording_mbid('5465ca86-3881-4349-81b2-6efbd3a59451') 228 | 229 | assert releases == [ 230 | {'mbid': 'cb48685f-beea-4ca6-93f3-49ef4d8cbf28', 'name': 'The Blueprint²: The Gift & The Curse'}, 231 | {'mbid': '4c9bd72b-dae9-44bf-a052-9b2f6c0d50de', 'name': 'Back to Bey-Sic', 'comment': 'deluxe edition'}, 232 | {'mbid': '89f64145-2f75-41d1-831a-517b785ed75a', 'name': 'The Blueprint Collector’s Edition'}, 233 | {'mbid': 'f1183a86-36d2-4f1f-ab8f-6f965dc0b033', 'name': 'The Hits Collection Volume One'}, 234 | {'mbid': '7c77ca4d-d84b-4b67-8705-e6afe9eb5878', 'name': 'The Blueprint²: The Gift & The Curse', 'comment': 'MQA, explicit'}, 235 | {'mbid': '77a74b85-0ae0-338f-aaca-4f36cd394f88', 'name': 'Blueprint 2.1'}, 236 | {'mbid': 'cb180855-979d-4d5d-9024-3bc97c64d19c', 'name': 'The Blueprint²: The Gift & The Curse', 'comment': 'explicit'}, 237 | {'mbid': 'b207b569-6323-4426-801b-3d5dbaf28d49', 'name': 'The Blueprint²: The Gift & The Curse', 'comment': 'explicit'}, 238 | {'mbid': '7111c8bc-8549-4abc-8ab9-db13f65b4a55', 'name': 'Blueprint 2.1'}, 239 | {'mbid': '3c535d03-2fcc-467a-8d47-34b3250b8211', 'name': 'The Hits Collection Volume One', 'comment': 'explicit'}, 240 | {'mbid': 'c84d8fa8-6f8d-42c9-87cc-b726e859b41d', 'name': 'The Hits Collection Volume One', 'comment': 'edited version'}, 241 | {'mbid': '8d51f750-7ee9-4937-8907-0243efc2f6df', 'name': 'The Blueprint²: The Gift & The Curse', 'comment': 'explicit'}, 242 | {'mbid': '4f41108c-db36-4616-8614-f504fdef287a', 'name': 'Blueprint 2.1'}, 243 | {'mbid': 'b0075ce9-58c8-47e2-8a72-5f783314a97e', 'name': 'The Hits Collection Volume One', 'comment': 'explicit'}, 244 | {'mbid': '4a441628-2e4d-4032-825f-6bdf4aee382e', 'name': 'The Hits Collection, Volume 1'}, 245 | {'mbid': '5e782ae3-602b-48b7-99be-de6bcffa4aba', 'name': 'The Hits Collection, Volume 1', 'comment': 'Deluxe edition'}, 246 | {'mbid': '7ebaaa95-e316-3b20-8819-7e4ca648c135', 'name': 'The Hits Collection, Volume 1'}, 247 | {'mbid': '240f52cd-9120-452d-98de-8df087e389e8', 'name': 'The Real Best of Both Worlds'} 248 | ] 249 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/tests/test_release_group.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from brainzutils.musicbrainz_db import release_group as mb_release_group 4 | 5 | 6 | @pytest.mark.database 7 | class TestReleaseGroup: 8 | 9 | def test_get_release_group_by_mbid(self, engine): 10 | release_group = mb_release_group.get_release_group_by_mbid('0f18ec88-aa87-38a9-8a65-f03d81763560', 11 | includes=['artists', 'releases', 12 | 'release-group-rels', 'url-rels', 'tags']) 13 | 14 | assert release_group['mbid'] == '0f18ec88-aa87-38a9-8a65-f03d81763560' 15 | assert release_group['title'] == 'Led Zeppelin' 16 | # Check if multiple artists are properly fetched 17 | assert release_group['artist-credit-phrase'] == 'Led Zeppelin' 18 | assert release_group['artist-credit'][0] == { 19 | 'name': 'Led Zeppelin', 20 | 'artist': { 21 | 'mbid': '678d88b2-87b0-403b-b63d-5da7465aecc3', 22 | 'name': 'Led Zeppelin', 23 | 'sort_name': 'Led Zeppelin', 24 | 'life-span': {'begin': '1968', 'end': '1980-09-25'}, 25 | 'type': 'Group', 26 | } 27 | } 28 | 29 | def test_get_release_group_by_mbid_redirect(self, engine): 30 | release_group = mb_release_group.get_release_group_by_mbid('358bbed4-1717-3e1c-ba8e-af54d2d3a5d6') 31 | assert release_group == { 32 | 'mbid': '8a01217e-6947-3927-a39b-6691104694f1', 33 | 'title': 'The College Dropout', 34 | 'first-release-year': 2003, 35 | 'type': 'Album', 36 | 'rating': 88, 37 | } 38 | 39 | def test_fetch_release_groups(self, engine): 40 | release_groups = mb_release_group.fetch_multiple_release_groups( 41 | mbids=['0f18ec88-aa87-38a9-8a65-f03d81763560', '1b36a363-eec6-35ba-b0ed-34a1f2f2cd82'], 42 | ) 43 | assert len(release_groups) == 2 44 | assert release_groups['0f18ec88-aa87-38a9-8a65-f03d81763560']['title'] == 'Led Zeppelin' 45 | assert release_groups['1b36a363-eec6-35ba-b0ed-34a1f2f2cd82']['title'] == 'Cosmic Thing' 46 | 47 | def test_fetch_release_groups_redirect(self, engine): 48 | release_groups = mb_release_group.fetch_multiple_release_groups( 49 | mbids=['358bbed4-1717-3e1c-ba8e-af54d2d3a5d6'], 50 | ) 51 | assert release_groups == { 52 | '358bbed4-1717-3e1c-ba8e-af54d2d3a5d6': { 53 | 'mbid': '8a01217e-6947-3927-a39b-6691104694f1', 54 | 'title': 'The College Dropout', 55 | 'first-release-year': 2003, 56 | 'type': 'Album', 57 | 'rating': 88, 58 | } 59 | } 60 | 61 | def test_fetch_release_groups_missing(self, engine): 62 | release_groups = mb_release_group.fetch_multiple_release_groups( 63 | mbids=['358bbed4-1717-3e1c-ba8e-af54d2d3a5d6', '358bbed4-1111-1111-1111-af54d2d3a5d6'], 64 | ) 65 | assert list(release_groups.keys()) == ['358bbed4-1717-3e1c-ba8e-af54d2d3a5d6'] 66 | 67 | def test_fetch_get_release_groups_for_artist(self, engine): 68 | release_groups = mb_release_group.get_release_groups_for_artist( 69 | artist_id='074e3847-f67f-49f9-81f1-8c8cea147e8e', 70 | release_types=['Single', 'EP'], 71 | ) 72 | assert release_groups[0] == [ 73 | { 74 | 'mbid': '07f5e633-8846-3fe7-8e68-472b54dba159', 75 | 'title': 'This Is What the Edge of Your Seat Was Made For', 76 | 'first-release-year': 2004, 77 | 'type': 'EP', 78 | } 79 | ] 80 | assert release_groups[1] == 1 81 | 82 | def test_fetch_get_release_groups_for_label(self, engine): 83 | release_groups = mb_release_group.get_release_groups_for_label( 84 | label_mbid='4cccc72a-0bd0-433a-905e-dad87871397d', 85 | release_types=['Album'], 86 | ) 87 | assert release_groups[0][0] == { 88 | 'mbid': 'a96597aa-93b4-4e14-9e6e-03892ab24979', 89 | 'title': 'Watch the Throne', 90 | 'first-release-year': 2011, 91 | 'type': 'Album', 92 | } 93 | 94 | assert release_groups[1] == 19 95 | assert len(release_groups[0]) == 19 96 | 97 | # Test release group with null type 98 | release_groups_1 = mb_release_group.get_release_groups_for_label( 99 | label_mbid='d835e36a-78ee-48ba-ac04-b46fb37df41f', 100 | release_types=['Other'], 101 | ) 102 | assert release_groups_1[0][0] == { 103 | 'mbid': '39d08e6e-b877-4c64-aef9-ce79a19f6075', 104 | 'title': 'American Songbook: The American Music Collection, Vol. III', 105 | 'first-release-year': 1996, 106 | } 107 | 108 | assert release_groups_1[1] == 2 109 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/tests/test_serialize.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, date 2 | 3 | from brainzutils.musicbrainz_db.serialize import serialize_recording, serialize_artist_credit, serialize_editor 4 | from brainzutils.musicbrainz_db.test_data import recording_numb_encore_explicit, artistcredit_jay_z_linkin_park, \ 5 | editor_2 6 | from unittest import TestCase 7 | 8 | 9 | class SerializeTestCase(TestCase): 10 | def test_serialize_recording(self): 11 | """Tests that recordings are serialized properly.""" 12 | # Without any includes 13 | recording = serialize_recording(recording_numb_encore_explicit) 14 | self.assertDictEqual(recording, 15 | { 16 | 'length': 205.28, 17 | 'mbid': 'daccb724-8023-432a-854c-e0accb6c8678', 18 | 'name': 'Numb/Encore (explicit)', 19 | } 20 | ) 21 | 22 | # With artists included 23 | artists = recording_numb_encore_explicit.artist_credit.artists 24 | recording = serialize_recording( 25 | recording_numb_encore_explicit, 26 | includes={'artists': artists, 'artist-credit-phrase': 'Jay-Z/Linkin Park'} 27 | ) 28 | self.assertDictEqual(recording, 29 | { 30 | 'mbid': 'daccb724-8023-432a-854c-e0accb6c8678', 31 | 'name': 'Numb/Encore (explicit)', 32 | 'length': 205.28, 33 | 'artist-credit-phrase': 'Jay-Z/Linkin Park', 34 | 'artists': [ 35 | { 36 | 'mbid': 'f82bcf78-5b69-4622-a5ef-73800768d9ac', 37 | 'name': 'JAY Z', 38 | 'credited_name': 'Jay-Z', 39 | 'join_phrase': '/' 40 | }, 41 | { 42 | 'mbid': 'f59c5520-5f46-4d2c-b2c4-822eabf53419', 43 | 'name': 'Linkin Park' 44 | } 45 | ] 46 | } 47 | ) 48 | 49 | def test_serialize_artist_credits(self): 50 | """Test that artist_credits are serialized properly.""" 51 | artist_credits = serialize_artist_credit(artistcredit_jay_z_linkin_park) 52 | self.assertListEqual(artist_credits, 53 | [ 54 | { 55 | 'mbid': 'f82bcf78-5b69-4622-a5ef-73800768d9ac', 56 | 'name': 'JAY Z', 57 | 'credited_name': 'Jay-Z', 58 | 'join_phrase': '/' 59 | }, 60 | { 61 | 'mbid': 'f59c5520-5f46-4d2c-b2c4-822eabf53419', 62 | 'name': 'Linkin Park' 63 | } 64 | ] 65 | ) 66 | 67 | def test_serialize_editor(self): 68 | """Test that sensitive information is removed, everything else is covered in test_editor.""" 69 | editor = serialize_editor(editor_2) 70 | self.assertNotIn("password", editor) 71 | self.assertNotIn("ha1", editor) 72 | self.assertEqual(editor, { 73 | 'id': 2324, 74 | 'name': 'Editor 2', 75 | 'privs': 3, 76 | 'email': 'editor@example.com', 77 | 'website': 'example.com', 78 | 'bio': 'Random\neditor', 79 | 'member_since': datetime(2014, 12, 1, 14, 6, 42, 321443), 80 | 'email_confirm_date': datetime(2014, 12, 1, 14, 6, 42, 321443), 81 | 'last_login_date': datetime(2014, 12, 1, 14, 6, 42, 321443), 82 | 'last_updated': datetime(2014, 12, 1, 14, 6, 42, 321443), 83 | 'birth_date': date(1999, 1, 1), 84 | 'deleted': False, 85 | 'gender': None, 86 | 'area': { 87 | "mbid": "4479c385-74d8-4a2b-bdab-f48d1e6969ba", 88 | "name": "Hämeenlinna" 89 | } 90 | }) 91 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/tests/test_work.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from brainzutils.musicbrainz_db import work as mb_work 4 | 5 | 6 | @pytest.mark.database 7 | class TestWork: 8 | def test_get_work_by_mbid(self, engine): 9 | work = mb_work.get_work_by_mbid('d35f8fb8-52ab-4a12-b1c8-f2054d10cf88') 10 | assert work == { 11 | "mbid": "d35f8fb8-52ab-4a12-b1c8-f2054d10cf88", 12 | "name": "Apple Bush", 13 | "type": "Song", 14 | } 15 | 16 | def test_get_work_by_mbid_redirect(self, engine): 17 | work = mb_work.get_work_by_mbid('4531bed5-073c-37a8-9500-70de8583c0a1') 18 | assert work == { 19 | "mbid": "36e33f94-ef5f-36b5-97b0-c1ed9c5a542f", 20 | "name": "Jesus Walks", 21 | "type": "Song", 22 | } 23 | 24 | def test_get_work_by_mbid_with_includes(self, engine): 25 | work = mb_work.get_work_by_mbid('4531bed5-073c-37a8-9500-70de8583c0a1', 26 | includes=['artist-rels', 'recording-rels']) 27 | assert work["mbid"] == "36e33f94-ef5f-36b5-97b0-c1ed9c5a542f" 28 | assert len(work["artist-rels"]) == 4 29 | assert len(work["recording-rels"]) == 55 30 | 31 | def test_fetch_multiple_works(self, engine): 32 | works = mb_work.fetch_multiple_works([ 33 | 'd35f8fb8-52ab-4a12-b1c8-f2054d10cf88', 34 | '1deb7377-f980-4adb-8f0f-a36355461f38' 35 | ]) 36 | assert works["d35f8fb8-52ab-4a12-b1c8-f2054d10cf88"] == { 37 | "mbid": "d35f8fb8-52ab-4a12-b1c8-f2054d10cf88", 38 | "name": "Apple Bush", 39 | "type": "Song", 40 | } 41 | assert works["1deb7377-f980-4adb-8f0f-a36355461f38"] == { 42 | "mbid": "1deb7377-f980-4adb-8f0f-a36355461f38", 43 | "name": "Fields of Regret", 44 | "type": "Song", 45 | } 46 | 47 | def test_fetch_multiple_works_redirect(self, engine): 48 | works = mb_work.fetch_multiple_works([ 49 | '4531bed5-073c-37a8-9500-70de8583c0a1', 50 | ]) 51 | assert works == { 52 | '4531bed5-073c-37a8-9500-70de8583c0a1': { 53 | "mbid": "36e33f94-ef5f-36b5-97b0-c1ed9c5a542f", 54 | "name": "Jesus Walks", 55 | "type": "Song", 56 | } 57 | } 58 | 59 | def test_fetch_multiple_works_missing(self, engine): 60 | works = mb_work.fetch_multiple_works([ 61 | '36e33f94-ef5f-36b5-97b0-c1ed9c5a542f', 62 | '36e33f94-eeee-eeee-eeee-c1ed9c5a542f' 63 | ]) 64 | assert list(works.keys()) == ['36e33f94-ef5f-36b5-97b0-c1ed9c5a542f'] 65 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/utils.py: -------------------------------------------------------------------------------- 1 | from brainzutils.musicbrainz_db.models import ENTITY_MODELS, META_MODELS, REDIRECT_MODELS 2 | import brainzutils.musicbrainz_db.exceptions as mb_exceptions 3 | 4 | 5 | def get_entities_by_gids(query, entity_type, mbids): 6 | """Get entities using their MBIDs. 7 | 8 | An entity can have multiple MBIDs. This function may be passed another 9 | MBID of an entity, in which case, it is redirected to the original entity. 10 | 11 | Note that the query may be modified before passing it to this 12 | function in order to save queries made to the database. 13 | 14 | Args: 15 | query (Query): SQLAlchemy Query object. 16 | entity_type (str): Type of entity being queried. 17 | mbids (list): IDs of the target entities. 18 | 19 | Returns: 20 | Dictionary of objects of target entities keyed by their MBID. 21 | """ 22 | entity_model = ENTITY_MODELS[entity_type] 23 | if entity_type in META_MODELS: 24 | meta_model = META_MODELS[entity_type] 25 | query = query.add_entity(meta_model).join(meta_model) 26 | 27 | results = query.filter(entity_model.gid.in_(mbids)).all() 28 | entity_gids = set() 29 | entities = {} 30 | if entity_type in META_MODELS: 31 | for entity, entity_meta in results: 32 | entities[entity.gid] = entity 33 | entities[entity.gid].rating = entity_meta.rating 34 | entity_gids.add(entity.gid) 35 | else: 36 | entities = {str(entity.gid): entity for entity in results} 37 | entity_gids = {entity.gid for entity in results} 38 | 39 | remaining_gids = list(set(mbids) - entity_gids) 40 | if remaining_gids: 41 | redirect_model = REDIRECT_MODELS[entity_type] 42 | query = query.add_entity(redirect_model).join(redirect_model) 43 | results = query.filter(redirect_model.gid.in_(remaining_gids)) 44 | 45 | redirect_gids = set() 46 | if entity_type in META_MODELS: 47 | for entity, entity_meta, redirect_obj in results: 48 | entities[redirect_obj.gid] = entity 49 | entities[redirect_obj.gid].rating = entity_meta.rating 50 | redirect_gids.add(redirect_obj.gid) 51 | else: 52 | for entity, redirect_obj in results: 53 | entities[redirect_obj.gid] = entity 54 | redirect_gids.add(redirect_obj.gid) 55 | 56 | return entities 57 | 58 | 59 | def get_entities_by_ids(query, entity_type, ids): 60 | """Get entities using their IDs. 61 | 62 | Note that the query may be modified before passing it to this 63 | function in order to save queries made to the database. 64 | 65 | Args: 66 | query (Query): SQLAlchemy Query object. 67 | entity_type (str): Type of entity being queried. 68 | ids (list): IDs of the target entities. 69 | 70 | Returns: 71 | Dictionary of objects of target entities keyed by their ID. 72 | """ 73 | entity_model = ENTITY_MODELS[entity_type] 74 | results = query.filter(entity_model.id.in_(ids)).all() 75 | entities = {entity.id: entity for entity in results} 76 | 77 | return entities 78 | -------------------------------------------------------------------------------- /brainzutils/musicbrainz_db/work.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from mbdata import models 3 | from sqlalchemy.orm import joinedload 4 | from brainzutils.musicbrainz_db import mb_session 5 | from brainzutils.musicbrainz_db.utils import get_entities_by_gids 6 | from brainzutils.musicbrainz_db.includes import check_includes 7 | from brainzutils.musicbrainz_db.serialize import serialize_works 8 | from brainzutils.musicbrainz_db.helpers import get_relationship_info 9 | 10 | 11 | def get_work_by_mbid(mbid, includes=None): 12 | """Get work with the MusicBrainz ID. 13 | 14 | Args: 15 | mbid (uuid): MBID(gid) of the work. 16 | Returns: 17 | Dictionary containing the work information, or None if the work doesn't exist. 18 | """ 19 | if includes is None: 20 | includes = [] 21 | 22 | return fetch_multiple_works( 23 | [mbid], 24 | includes=includes, 25 | ).get(mbid) 26 | 27 | 28 | def fetch_multiple_works(mbids, includes=None): 29 | """Get info related to multiple works using their MusicBrainz IDs. 30 | 31 | Args: 32 | mbids (list): List of MBIDs of works. 33 | includes (list): List of information to be included. 34 | 35 | Returns: 36 | A dictionary containing info of multiple works keyed by their MBID. 37 | If an MBID doesn't exist in the database, it isn't returned. 38 | If an MBID is a redirect, the dictionary key will be the MBID given as an argument, 39 | but the returned object will contain the new MBID in the 'mbid' key. 40 | """ 41 | if includes is None: 42 | includes = [] 43 | includes_data = defaultdict(dict) 44 | check_includes('work', includes) 45 | with mb_session() as db: 46 | query = db.query(models.Work).options(joinedload(models.Work.type)) 47 | 48 | works = get_entities_by_gids( 49 | query=query, 50 | entity_type='work', 51 | mbids=mbids, 52 | ) 53 | work_ids = [work.id for work in works.values()] 54 | 55 | if 'artist-rels' in includes: 56 | get_relationship_info( 57 | db=db, 58 | target_type='artist', 59 | source_type='work', 60 | source_entity_ids=work_ids, 61 | includes_data=includes_data, 62 | ) 63 | 64 | if 'recording-rels' in includes: 65 | get_relationship_info( 66 | db=db, 67 | target_type='recording', 68 | source_type='work', 69 | source_entity_ids=work_ids, 70 | includes_data=includes_data, 71 | ) 72 | 73 | return {str(mbid): serialize_works(work, includes_data[work.id]) for mbid, work in works.items()} 74 | -------------------------------------------------------------------------------- /brainzutils/ratelimit.py: -------------------------------------------------------------------------------- 1 | # The original version of this code was written by Armin Ronacher: 2 | # 3 | # This snippet by Armin Ronacher can be used freely for anything you like. Consider it public domain. 4 | # 5 | # http://flask.pocoo.org/snippets/70/ 6 | # 7 | import time 8 | from functools import update_wrapper 9 | 10 | from flask import request, g 11 | from werkzeug.exceptions import TooManyRequests 12 | 13 | from brainzutils import cache 14 | 15 | # g key for the timeout when limits must be refreshed from cache 16 | ratelimit_refresh = 60 # in seconds 17 | ratelimit_timeout = "rate_limits_timeout" 18 | 19 | # Defaults 20 | ratelimit_per_token_default = 50 21 | ratelimit_per_ip_default = 30 22 | ratelimit_window_default = 10 23 | 24 | # keys 25 | ratelimit_per_token_key = "rate_limit_per_token_limit" 26 | ratelimit_per_ip_key = "rate_limit_per_ip_limit" 27 | ratelimit_window_key = "rate_limit_window" 28 | ratelimit_cache_namespace = "rate_limit" 29 | 30 | # external functions 31 | ratelimit_user_validation = None 32 | 33 | 34 | class RateLimit(object): 35 | """ 36 | This Ratelimit object is created when a request is started (via the ratelimit decorator) 37 | and is stored in the flask's request context so that the results can be injected into 38 | the response headers before the request is over. 39 | 40 | HOW TO USE THIS MODULE: 41 | 42 | This module defines a set of function that allows your to add ratelimiting to your 43 | flask app. There are three values to know and set: 44 | 45 | per_token_limit - The number of requests that are allowed for a caller who is 46 | setting an:: 47 | 48 | Authorization: Token 49 | 50 | header. This limit can be different than the limit for rate limiting on an IP basis. 51 | 52 | per_ip_limit - The number of requests that are allowed for a caller who is not 53 | providing an Authorization header and is rate limited on their IP address. 54 | 55 | ratelimit_window - The window, in number of seconds, how long long the limits 56 | above are applied for. 57 | 58 | To add ratelimit capabilities to your flask app, follow these steps: 59 | 60 | 1. During app creation add these lines:: 61 | 62 | from brainzutils.ratelimit import ratelimit, inject_x_rate_headers 63 | 64 | @app.after_request 65 | def after_request_callbacks(response): 66 | return inject_x_rate_headers(response) 67 | 68 | 2. Then apply the ratelimit() decorator to any function that should be rate limited:: 69 | 70 | @app.route('/') 71 | @ratelimit() 72 | def index(): 73 | return 'test' 74 | 75 | 3. The default rate limits are defined above (see comment Defaults). If you want to set different 76 | rate limits, which can be also done dynamically without restarting the application, call 77 | the set_rate_limits function:: 78 | 79 | from brainzutils.ratelimit import set_rate_limits 80 | 81 | set_rate_limits(per_token_limit, per_ip_limit, rate_limit_window) 82 | 83 | 4. To enable token based rate limiting, callers need to pass the Authorization header (see above) 84 | and the application needs to provide a user validation function:: 85 | 86 | from brainzutils.ratelimit import set_user_validation_function 87 | 88 | def validate_user(user): 89 | if user == valid_user: 90 | return True 91 | return False 92 | 93 | set_user_validation_function(validate_user) 94 | 95 | """ 96 | 97 | # From the docs: 98 | # We also give the key extra expiration_window seconds time to expire in cache so that badly 99 | # synchronized clocks between the workers and the cache server do not cause problems. 100 | expiration_window = 10 101 | 102 | def __init__(self, key_prefix, limit, per): 103 | current_time = int(time.time()) 104 | self.reset = (current_time // per) * per + per 105 | self.seconds_before_reset = self.reset - current_time 106 | self.key = key_prefix + str(self.reset) 107 | self.limit = limit 108 | self.per = per 109 | self.current = cache.increment(self.key, namespace=ratelimit_cache_namespace) 110 | cache.expireat(self.key, self.reset + self.expiration_window, namespace=ratelimit_cache_namespace) 111 | 112 | remaining = property(lambda x: max(x.limit - x.current, 0)) 113 | over_limit = property(lambda x: x.current > x.limit) 114 | 115 | 116 | def set_user_validation_function(func): 117 | ''' 118 | The function passed to this method should accept on argument, the Authorization header contents 119 | and return a True/False value if this user is a valid user. 120 | ''' 121 | 122 | global ratelimit_user_validation 123 | ratelimit_user_validation = func 124 | 125 | 126 | def set_rate_limits(per_token, per_ip, window): 127 | ''' 128 | Update the current rate limits. This will affect all new rate limiting windows and existing windows will not be changed. 129 | ''' 130 | cache.set(ratelimit_per_token_key, per_token, expirein=0, namespace=ratelimit_cache_namespace) 131 | cache.set(ratelimit_per_ip_key, per_ip, expirein=0, namespace=ratelimit_cache_namespace) 132 | cache.set(ratelimit_window_key, window, expirein=0, namespace=ratelimit_cache_namespace) 133 | 134 | 135 | def inject_x_rate_headers(response): 136 | ''' 137 | Add rate limit headers to responses 138 | ''' 139 | limit = get_view_rate_limit() 140 | if limit: 141 | h = response.headers 142 | h.add('Access-Control-Expose-Headers', 'X-RateLimit-Remaining,X-RateLimit-Limit,X-RateLimit-Reset,X-RateLimit-Reset-In') 143 | h.add('X-RateLimit-Remaining', str(limit.remaining)) 144 | h.add('X-RateLimit-Limit', str(limit.limit)) 145 | h.add('X-RateLimit-Reset', str(limit.reset)) 146 | h.add('X-RateLimit-Reset-In', str(limit.seconds_before_reset)) 147 | return response 148 | 149 | 150 | def get_view_rate_limit(): 151 | ''' 152 | Helper function to fetch the ratelimit limits from the flask context 153 | ''' 154 | return getattr(g, '_view_rate_limit', None) 155 | 156 | 157 | def on_over_limit(limit): 158 | ''' 159 | Set a nice and readable error message for over the limit requests. 160 | ''' 161 | raise TooManyRequests( 162 | 'You have exceeded your rate limit. See the X-RateLimit-* response headers for more ' \ 163 | 'information on your current rate limit.') 164 | 165 | 166 | def check_limit_freshness(): 167 | ''' 168 | This function checks to see if the values we have cached in the current request context 169 | are still fresh enough. If they've existed longer than the timeout value, refresh from 170 | the cache. This allows us to not check the limits for each request, saving cache traffic. 171 | ''' 172 | 173 | limits_timeout = getattr(g, '_' + ratelimit_timeout, 0) 174 | if time.time() <= limits_timeout: 175 | return 176 | 177 | value = int(cache.get(ratelimit_per_token_key, namespace=ratelimit_cache_namespace) or '0') 178 | if not value: 179 | cache.set(ratelimit_per_token_key, ratelimit_per_token_default, expirein=0, namespace=ratelimit_cache_namespace) 180 | value = ratelimit_per_token_default 181 | setattr(g, '_' + ratelimit_per_token_key, value) 182 | 183 | value = int(cache.get(ratelimit_per_ip_key, namespace=ratelimit_cache_namespace) or '0') 184 | if not value: 185 | cache.set(ratelimit_per_ip_key, ratelimit_per_ip_default, expirein=0, namespace=ratelimit_cache_namespace) 186 | value = ratelimit_per_ip_default 187 | setattr(g, '_' + ratelimit_per_ip_key, value) 188 | 189 | value = int(cache.get(ratelimit_window_key, namespace=ratelimit_cache_namespace) or '0') 190 | if not value: 191 | cache.set(ratelimit_window_key, ratelimit_window_default, expirein=0, namespace=ratelimit_cache_namespace) 192 | value = ratelimit_window_default 193 | setattr(g, '_' + ratelimit_window_key, value) 194 | 195 | setattr(g, '_' + ratelimit_timeout, int(time.time()) + ratelimit_refresh) 196 | 197 | 198 | def get_per_ip_limits(): 199 | ''' 200 | Fetch the per IP limits from context/cache 201 | ''' 202 | check_limit_freshness() 203 | return { 204 | 'limit': getattr(g, '_' + ratelimit_per_ip_key), 205 | 'window' : getattr(g, '_' + ratelimit_window_key), 206 | } 207 | 208 | 209 | def get_per_token_limits(): 210 | ''' 211 | Fetch the per token limits from context/cache 212 | ''' 213 | check_limit_freshness() 214 | return { 215 | 'limit': getattr(g, '_' + ratelimit_per_token_key), 216 | 'window' : getattr(g, '_' + ratelimit_window_key), 217 | } 218 | 219 | 220 | def get_rate_limit_data(request): 221 | '''Fetch key for the given request. If an Authorization header is provided, 222 | the caller will get a better and personalized rate limit. If no header is provided, 223 | the caller will be rate limited by IP, which gets an overall lower rate limit. 224 | This should encourage callers to always provide the Authorization token 225 | ''' 226 | 227 | # If a user verification function is provided, parse the Authorization header and try to look up that user 228 | if ratelimit_user_validation: 229 | auth_header = request.headers.get('Authorization') 230 | if auth_header: 231 | auth_token = auth_header[6:] 232 | is_valid = ratelimit_user_validation(auth_token) 233 | if is_valid: 234 | values = get_per_token_limits() 235 | values['key'] = auth_token 236 | return values 237 | 238 | 239 | # no valid auth token provided. Look for a remote addr header provided a the proxy 240 | # or if that isn't available use the IP address from the header 241 | ip = request.environ.get('REMOTE_ADDR', None) 242 | if not ip: 243 | ip = request.remote_addr 244 | 245 | values = get_per_ip_limits() 246 | values['key'] = ip 247 | return values 248 | 249 | 250 | def ratelimit(): 251 | ''' 252 | This is the decorator that should be applied to all view functions that should be 253 | rate limited. 254 | ''' 255 | def decorator(f): 256 | def rate_limited(*args, **kwargs): 257 | data = get_rate_limit_data(request) 258 | rlimit = RateLimit(data['key'], data['limit'], data['window']) 259 | g._view_rate_limit = rlimit 260 | if rlimit.over_limit: 261 | return on_over_limit(rlimit) 262 | return f(*args, **kwargs) 263 | return update_wrapper(rate_limited, f) 264 | return decorator 265 | -------------------------------------------------------------------------------- /brainzutils/sentry.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import sentry_sdk 5 | from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration 6 | from sentry_sdk.integrations.logging import LoggingIntegration 7 | from sentry_sdk.integrations.redis import RedisIntegration 8 | from sentry_sdk.integrations.flask import FlaskIntegration 9 | 10 | 11 | def init_sentry(dsn, level=logging.WARNING, **options): 12 | """Adds Sentry event logging. 13 | 14 | Sentry is a realtime event logging and aggregation platform. 15 | By default, we add integration to the python logger, flask, redis, and sqlalchemy. 16 | 17 | Arguments: 18 | dsn: The sentry DSN to connect to 19 | level: the logging level at which logging messages are sent as events to sentry 20 | options: Any other arguments to be passed to sentry_sdk.init. 21 | See https://docs.sentry.io/platforms/python/configuration/options/ 22 | """ 23 | sentry_sdk.init(dsn, integrations=[LoggingIntegration(level=level), FlaskIntegration(), RedisIntegration(), 24 | SqlalchemyIntegration()], 25 | **options) 26 | # This env variable is set in the MetaBrainz production infrastructure and is unique per container 27 | container_name = os.getenv("CONTAINER_NAME") 28 | if container_name: 29 | sentry_sdk.set_tag("container_name", container_name) 30 | -------------------------------------------------------------------------------- /brainzutils/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metabrainz/brainzutils-python/bf01c6da15d4a2426d64a31cf232c06bec3860f3/brainzutils/test/__init__.py -------------------------------------------------------------------------------- /brainzutils/test/test_cache.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=protected-access 2 | 3 | import datetime 4 | import os 5 | import unittest 6 | from time import sleep, time 7 | 8 | from unittest import mock 9 | import redis 10 | 11 | from brainzutils import cache 12 | 13 | 14 | class CacheTestCase(unittest.TestCase): 15 | """Testing our custom wrapper for redis.""" 16 | host = os.environ.get("REDIS_HOST", "localhost") 17 | port = 6379 18 | namespace = "NS_TEST" 19 | 20 | def setUp(self): 21 | cache.init( 22 | host=self.host, 23 | port=self.port, 24 | namespace=self.namespace, 25 | ) 26 | # Making sure there are no items in cache before we run each test 27 | cache.flush_all() 28 | 29 | def test_no_init(self): 30 | cache._r = None 31 | with self.assertRaises(RuntimeError): 32 | cache.set("test", "testing", expirein=0) 33 | with self.assertRaises(RuntimeError): 34 | cache.get("test") 35 | 36 | def test_single(self): 37 | self.assertTrue(cache.set("test2", "Hello!", expirein=0)) 38 | self.assertEqual(cache.get("test2"), "Hello!") 39 | 40 | def test_single_no_encode(self): 41 | self.assertTrue(cache.set("no encode", 1, expirein=0, encode=False)) 42 | self.assertEqual(cache.get("no encode", decode=False), b"1") 43 | 44 | def test_single_with_namespace(self): 45 | self.assertTrue(cache.set("test", 42, namespace="testing", expirein=0)) 46 | self.assertEqual(cache.get("test", namespace="testing"), 42) 47 | 48 | def test_single_fancy(self): 49 | self.assertTrue(cache.set("test3", u"Привет!", expirein=0)) 50 | self.assertEqual(cache.get("test3"), u"Привет!") 51 | 52 | def test_single_dict(self): 53 | dictionary = { 54 | "fancy": "yeah", 55 | "wow": 11, 56 | } 57 | self.assertTrue(cache.set('some_dict', dictionary, expirein=0)) 58 | self.assertEqual(cache.get('some_dict'), dictionary) 59 | 60 | def test_single_dict_fancy(self): 61 | dictionary = { 62 | "fancy": u"Да", 63 | "тест": 11, 64 | } 65 | cache.set('some_dict', dictionary, expirein=0) 66 | self.assertEqual(cache.get('some_dict'), dictionary) 67 | 68 | def test_datetime(self): 69 | self.assertTrue(cache.set('some_time', datetime.datetime.now(), expirein=0)) 70 | self.assertEqual(type(cache.get('some_time')), datetime.datetime) 71 | 72 | dictionary = { 73 | "id": 1, 74 | "created": datetime.datetime.now(), 75 | } 76 | self.assertTrue(cache.set('some_other_time', dictionary, expirein=0)) 77 | self.assertEqual(cache.get('some_other_time'), dictionary) 78 | 79 | def test_delete(self): 80 | key = "testing" 81 | self.assertTrue(cache.set(key, u"Пример", expirein=0)) 82 | self.assertEqual(cache.get(key), u"Пример") 83 | self.assertEqual(cache.delete(key), 1) 84 | self.assertIsNone(cache.get(key)) 85 | 86 | def test_delete_with_namespace(self): 87 | key = "testing" 88 | namespace = "spaaaaaaace" 89 | self.assertTrue(cache.set(key, u"Пример", namespace=namespace, expirein=0)) 90 | self.assertEqual(cache.get(key, namespace=namespace), u"Пример") 91 | self.assertEqual(cache.delete(key, namespace=namespace), 1) 92 | self.assertIsNone(cache.get(key, namespace=namespace)) 93 | 94 | def test_many(self): 95 | # With namespace 96 | mapping = { 97 | "test1": "Hello", 98 | "test2": "there", 99 | } 100 | self.assertTrue(cache.set_many(mapping, namespace="testing-1", expirein=0)) 101 | self.assertEqual(cache.get_many(list(mapping.keys()), namespace="testing-1"), mapping) 102 | 103 | # With another namespace 104 | test = cache.get_many(list(mapping.keys()), namespace="testing-2") 105 | for key, val in test.items(): 106 | self.assertIn(key, mapping) 107 | self.assertIsNone(val) 108 | 109 | # Without a namespace 110 | mapping = { 111 | "test1": "What's", 112 | "test2": "good", 113 | } 114 | self.assertTrue(cache.set_many(mapping, expirein=0)) 115 | self.assertEqual(cache.get_many(list(mapping.keys())), mapping) 116 | 117 | def test_increment(self): 118 | cache.set("a", 1, encode=False, expirein=0) 119 | self.assertEqual(cache.increment("a"), 2) 120 | 121 | def test_increment_invalid_value(self): 122 | cache.set("a", "not a number", expirein=0) 123 | with self.assertRaises(redis.exceptions.ResponseError): 124 | cache.increment("a") 125 | 126 | def test_expire(self): 127 | cache.set("a", 1, expirein=100) 128 | self.assertEqual(cache.expire("a", 1), True) 129 | sleep(1.1) 130 | self.assertEqual(cache.get("a"), None) 131 | 132 | def test_expireat(self): 133 | cache.set("a", 1, expirein=100) 134 | self.assertEqual(cache.expireat("a", int(time() + 1)), True) 135 | sleep(1.1) 136 | self.assertEqual(cache.get("a"), None) 137 | 138 | def test_sadd(self): 139 | cache.sadd("myset", {"a", "b", "c"}, expirein=1000) 140 | cache.sadd("myset", ["a", "f", "d"], expirein=1000) 141 | cache.sadd("myset", "z", expirein=1000) 142 | self.assertEqual({"a", "b", "c", "d", "f", "z"}, cache.smembers("myset")) 143 | 144 | 145 | class CacheKeyTestCase(unittest.TestCase): 146 | namespace = "NS_TEST" 147 | 148 | @mock.patch('brainzutils.cache.redis.StrictRedis', autospec=True) 149 | def test_set_key(self, mock_redis): 150 | """Test setting a bytes value""" 151 | cache.init(host='host', port=2, namespace=self.namespace) 152 | cache.set('key', u'value'.encode('utf-8'), expirein=0) 153 | 154 | # Keys are encoded into bytes always 155 | expected_key = 'NS_TEST:key' 156 | # msgpack encoded value 157 | expected_value = b'\xc4\x05value' 158 | mock_redis.return_value.mset.assert_called_with({expected_key: expected_value}) 159 | mock_redis.return_value.pexpire.assert_not_called() 160 | 161 | @mock.patch('brainzutils.cache.redis.StrictRedis', autospec=True) 162 | def test_set_key_unicode(self, mock_redis): 163 | """Test setting a unicode value""" 164 | cache.init(host='host', port=2, namespace=self.namespace) 165 | cache.set('key', u'value', expirein=0) 166 | 167 | expected_key = 'NS_TEST:key' 168 | # msgpack encoded value 169 | expected_value = b'\xa5value' 170 | mock_redis.return_value.mset.assert_called_with({expected_key: expected_value}) 171 | mock_redis.return_value.pexpire.assert_not_called() 172 | 173 | @mock.patch('brainzutils.cache.redis.StrictRedis', autospec=True) 174 | def test_key_expire(self, mock_redis): 175 | cache.init(host='host', port=2, namespace=self.namespace) 176 | cache.set('key', u'value'.encode('utf-8'), expirein=30) 177 | expected_key = 'NS_TEST:key' 178 | # msgpack encoded value 179 | expected_value = b'\xc4\x05value' 180 | mock_redis.return_value.mset.assert_called_with({expected_key: expected_value}) 181 | mock_redis.return_value.pexpire.assert_called_with(expected_key, 30000) 182 | -------------------------------------------------------------------------------- /brainzutils/test/test_mail.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import smtplib 3 | from unittest import mock 4 | 5 | from email.mime.multipart import MIMEMultipart 6 | from email.mime.text import MIMEText 7 | from brainzutils import flask 8 | from brainzutils import mail 9 | 10 | class MailTestCase(unittest.TestCase): 11 | 12 | def test_send_email_missing_config(self): 13 | app = flask.CustomFlask(__name__) 14 | with app.app_context(): 15 | with self.assertRaises(ValueError) as err: 16 | mail.send_mail( 17 | subject='ListenBrainz Spotify Importer Error', 18 | text='It is a test mail', 19 | recipients=[], 20 | attachments=None, 21 | from_name='ListenBrainz', 22 | from_addr='noreply@metabrainz.org', 23 | boundary='b' 24 | ) 25 | assert "Flask current_app requires config items" in str(err.exception) 26 | 27 | def test_send_email_string_recipients(self): 28 | app = flask.CustomFlask(__name__) 29 | with app.app_context(): 30 | with self.assertRaises(ValueError) as err: 31 | mail.send_mail( 32 | subject='ListenBrainz Spotify Importer Error', 33 | text='It is a test mail', 34 | recipients='wrongemail@metabrainz.org', 35 | attachments=None, 36 | from_name='ListenBrainz', 37 | from_addr='noreply@metabrainz.org', 38 | boundary='b' 39 | ) 40 | assert str(err.exception) == "recipients must be a list of email addresses" 41 | 42 | @mock.patch('smtplib.SMTP') 43 | def test_send_email(self, mock_smtp): 44 | app = flask.CustomFlask(__name__) 45 | app.config['SMTP_SERVER'] = 'localhost' 46 | app.config['SMTP_PORT'] = 25 47 | 48 | with app.app_context(): 49 | from_address = 'noreply@metabrainz.org' 50 | recipients = ['musicbrainz@metabrainz.org', 'listenbrainz@metabrainz.org'] 51 | text = 'It is a test mail' 52 | from_name = 'ListenBrainz' 53 | subject = 'ListenBrainz Spotify Importer Error' 54 | boundary = '===============2220963697271485568==' 55 | message = MIMEMultipart(boundary=boundary) 56 | message['To'] = "musicbrainz@metabrainz.org, listenbrainz@metabrainz.org" 57 | message['Subject'] = subject 58 | message['From'] = '%s <%s>' % (from_name, from_address) 59 | message.attach(MIMEText(text, _charset='utf-8')) 60 | 61 | mail.send_mail( 62 | subject='ListenBrainz Spotify Importer Error', 63 | text='It is a test mail', 64 | recipients=recipients, 65 | attachments=None, 66 | from_name='ListenBrainz', 67 | from_addr='noreply@metabrainz.org', 68 | boundary=boundary 69 | ) 70 | 71 | mock_smtp.return_value.sendmail.assert_called_once_with(from_address, recipients, message.as_string()) 72 | -------------------------------------------------------------------------------- /brainzutils/test/test_metrics.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest import mock, TestCase 3 | 4 | from brainzutils import cache 5 | from brainzutils import metrics 6 | 7 | 8 | class MetricsTestCase(TestCase): 9 | 10 | def setUp(self): 11 | cache.init('redis') 12 | 13 | def tearDown(self): 14 | metrics._metrics_project_name = None 15 | 16 | @mock.patch('brainzutils.metrics.cache._r.rpush') 17 | def test_set(self, rpush): 18 | metrics.init('listenbrainz.org') 19 | os.environ["PRIVATE_IP"] = "127.0.0.1" 20 | metrics.set("my_metric", timestamp=1619629462352960742, test_i=2, test_fl=.3, test_t=True, test_f=False, test_s="gobble") 21 | rpush.assert_called_with(metrics.REDIS_METRICS_KEY, 22 | 'my_metric,dc=hetzner,server=127.0.0.1,project=listenbrainz.org test_i=2i,test_fl=0.300000,test_t=t,test_f=f,test_s="gobble" 1619629462352960742') 23 | -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from brainzutils.musicbrainz_db import init_db_engine, mb_session 4 | 5 | 6 | @pytest.fixture(scope="session") 7 | def engine(): 8 | init_db_engine("postgresql://musicbrainz@musicbrainz_db/musicbrainz_db") 9 | 10 | 11 | @pytest.fixture(scope="function") 12 | def session(engine): 13 | return mb_session() 14 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metabrainz/brainzutils-python/bf01c6da15d4a2426d64a31cf232c06bec3860f3/docs/_static/.gitkeep -------------------------------------------------------------------------------- /docs/cache.rst: -------------------------------------------------------------------------------- 1 | Cache 2 | ===== 3 | 4 | The cache module provides an interface to redis to store items temporarily 5 | 6 | .. automodule:: brainzutils.cache 7 | :members: -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'BrainzUtils' 21 | copyright = '2020, MetaBrainz Foundation' 22 | author = 'MetaBrainz Foundation' 23 | 24 | 25 | # -- General configuration --------------------------------------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 29 | # ones. 30 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx_rtd_theme'] 31 | 32 | # Add any paths that contain templates here, relative to this directory. 33 | templates_path = ['_templates'] 34 | 35 | # List of patterns, relative to source directory, that match files and 36 | # directories to ignore when looking for source files. 37 | # This pattern also affects html_static_path and html_extra_path. 38 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 39 | 40 | 41 | # -- Options for HTML output ------------------------------------------------- 42 | 43 | # The theme to use for HTML and HTML Help pages. See the documentation for 44 | # a list of builtin themes. 45 | # 46 | html_theme = 'sphinx_rtd_theme' 47 | 48 | # Add any paths that contain custom static files (such as style sheets) here, 49 | # relative to this directory. They are copied after the builtin static files, 50 | # so a file named "default.css" will overwrite the builtin "default.css". 51 | html_static_path = ['_static'] 52 | -------------------------------------------------------------------------------- /docs/flask.rst: -------------------------------------------------------------------------------- 1 | Flask 2 | ===== 3 | 4 | The Flask module provides a Flask application with a few sensible defaults for MetaBrainz projects 5 | 6 | .. automodule:: brainzutils.flask 7 | :members: -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | BrainzUtils 2 | =========== 3 | 4 | BrainzUtils is a set of python tools used in projects by the MetaBrainz foundation. 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | :caption: Contents: 9 | 10 | cache 11 | flask 12 | metrics 13 | mail 14 | musicbrainz_db/index 15 | ratelimit 16 | 17 | Indices and tables 18 | ================== 19 | 20 | * :ref:`genindex` 21 | * :ref:`modindex` 22 | * :ref:`search` 23 | -------------------------------------------------------------------------------- /docs/mail.rst: -------------------------------------------------------------------------------- 1 | Mail 2 | ==== 3 | 4 | The mail module provides tools for sending emails 5 | 6 | .. automodule:: brainzutils.mail 7 | :members: -------------------------------------------------------------------------------- /docs/metrics.rst: -------------------------------------------------------------------------------- 1 | Metrics 2 | ======= 3 | 4 | The metrics module provides a way of storing numerical values that can can be stored in a statistics database. 5 | 6 | .. automodule:: brainzutils.metrics 7 | :members: -------------------------------------------------------------------------------- /docs/musicbrainz_db/artist.rst: -------------------------------------------------------------------------------- 1 | MusicBrainz Artist 2 | ================== 3 | 4 | For loading an artist from musicbrainz 5 | 6 | .. automodule:: brainzutils.musicbrainz_db.artist 7 | :members: -------------------------------------------------------------------------------- /docs/musicbrainz_db/editor.rst: -------------------------------------------------------------------------------- 1 | MusicBrainz Editor 2 | ================== 3 | 4 | For loading an editor from musicbrainz 5 | 6 | .. automodule:: brainzutils.musicbrainz_db.editor 7 | :members: -------------------------------------------------------------------------------- /docs/musicbrainz_db/event.rst: -------------------------------------------------------------------------------- 1 | MusicBrainz Event 2 | ================= 3 | 4 | For loading an event from musicbrainz 5 | 6 | .. automodule:: brainzutils.musicbrainz_db.event 7 | :members: -------------------------------------------------------------------------------- /docs/musicbrainz_db/index.rst: -------------------------------------------------------------------------------- 1 | Direct MusicBrainz access 2 | ========================= 3 | 4 | The musicbrainz_db module provides wrappers around mbdata to load musicbrainz entities 5 | directly from a musicbrainz database. 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | :caption: Contents: 10 | 11 | artist 12 | editor 13 | event 14 | label 15 | place 16 | recording 17 | release 18 | release_group 19 | work 20 | -------------------------------------------------------------------------------- /docs/musicbrainz_db/label.rst: -------------------------------------------------------------------------------- 1 | MusicBrainz Label 2 | ================= 3 | 4 | For loading a label from musicbrainz 5 | 6 | .. automodule:: brainzutils.musicbrainz_db.label 7 | :members: -------------------------------------------------------------------------------- /docs/musicbrainz_db/place.rst: -------------------------------------------------------------------------------- 1 | MusicBrainz Place 2 | ================= 3 | 4 | For loading a place from musicbrainz 5 | 6 | .. automodule:: brainzutils.musicbrainz_db.place 7 | :members: -------------------------------------------------------------------------------- /docs/musicbrainz_db/recording.rst: -------------------------------------------------------------------------------- 1 | MusicBrainz Recording 2 | ===================== 3 | 4 | For loading a recording from musicbrainz 5 | 6 | .. automodule:: brainzutils.musicbrainz_db.recording 7 | :members: -------------------------------------------------------------------------------- /docs/musicbrainz_db/release.rst: -------------------------------------------------------------------------------- 1 | MusicBrainz Release 2 | =================== 3 | 4 | For loading a release from musicbrainz 5 | 6 | .. automodule:: brainzutils.musicbrainz_db.release 7 | :members: -------------------------------------------------------------------------------- /docs/musicbrainz_db/release_group.rst: -------------------------------------------------------------------------------- 1 | MusicBrainz Release Group 2 | ========================= 3 | 4 | For loading release group from musicbrainz 5 | 6 | .. automodule:: brainzutils.musicbrainz_db.release_group 7 | :members: -------------------------------------------------------------------------------- /docs/musicbrainz_db/work.rst: -------------------------------------------------------------------------------- 1 | MusicBrainz Work 2 | ================ 3 | 4 | For loading work from musicbrainz 5 | 6 | .. automodule:: brainzutils.musicbrainz_db.work 7 | :members: -------------------------------------------------------------------------------- /docs/ratelimit.rst: -------------------------------------------------------------------------------- 1 | Ratelimit 2 | ========= 3 | 4 | The ratelimit module provides tools for limiting access to an API based on IP address over a certain amount 5 | of time. The limits are stored in redis. 6 | 7 | .. automodule:: brainzutils.ratelimit 8 | :members: -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx==3.5.1 2 | sphinx_rtd_theme==0.5.1 3 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [MESSAGES CONTROL] 2 | 3 | # Disable the message, report, category or checker with the given id(s). You 4 | # can either give multiple identifiers separated by comma (,) or put this 5 | # option multiple times (only on the command line, not in the configuration 6 | # file where it should appear only once).You can also use "--disable=all" to 7 | # disable everything first and then reenable specific checks. For example, if 8 | # you want to run only the similarities checker, you can use "--disable=all 9 | # --enable=similarities". If you want to run only the classes checker, but have 10 | # no Warning level messages displayed, use"--disable=all --enable=classes 11 | # --disable=W" 12 | disable=missing-docstring,too-many-arguments,fixme,invalid-name,global-statement 13 | 14 | 15 | [REPORTS] 16 | 17 | # Set the output format. Available formats are text, parseable, colorized, msvs 18 | # (visual studio) and html. You can also give a reporter class, eg 19 | # mypackage.mymodule.MyReporterClass. 20 | output-format=parseable 21 | 22 | # Put messages in a separate file for each module / package specified on the 23 | # command line instead of printing them on stdout. Reports (if any) will be 24 | # written in a file name "pylint_global.[txt|html]". This option is deprecated 25 | # and it will be removed in Pylint 2.0. 26 | files-output=no 27 | 28 | 29 | [FORMAT] 30 | 31 | # Maximum number of characters on a single line. 32 | max-line-length=130 33 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "brainzutils" 3 | description = "Python tools for MetaBrainz projects" 4 | authors = [ 5 | { name = "MetaBrainz Foundation", email = "support@metabrainz.org" } 6 | ] 7 | dynamic = ["version"] 8 | requires-python = ">=3.10" 9 | dependencies = [ 10 | "Flask>=3.1.0", 11 | "Jinja2>=3.1.6", 12 | "itsdangerous>=2.2.0", 13 | "click>=8.1.8", 14 | "Werkzeug>=3.1.3", 15 | "Flask-DebugToolbar", 16 | "sentry-sdk[flask]>=2.27.0", 17 | "redis>=5.2.1", 18 | "msgpack>=1.1.0", 19 | "requests>=2.32.3", 20 | "SQLAlchemy>=2.0.40", 21 | "mbdata@git+https://github.com/metabrainz/mbdata.git@v30.0.0" 22 | ] 23 | 24 | [tool.setuptools] 25 | packages = ["brainzutils"] 26 | 27 | [tool.setuptools_scm] 28 | 29 | [build-system] 30 | requires = ["setuptools>=80", "setuptools-scm>=8"] 31 | build-backend = "setuptools.build_meta" 32 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths = brainzutils 3 | addopts = --cov-report html --cov=brainzutils -W always::DeprecationWarning -W error::sqlalchemy.exc.Base20DeprecationWarning 4 | 5 | markers = 6 | database: requires access to the musicbrainz sample database -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask>=3.1.0 2 | Jinja2>=3.1.6 3 | itsdangerous>=2.2.0 4 | click>=8.1.8 5 | Werkzeug>=3.1.3 6 | Flask-DebugToolbar 7 | sentry-sdk[flask]>=2.27.0 8 | redis>=5.2.1 9 | msgpack>=1.1.0 10 | requests>=2.32.3 11 | SQLAlchemy>=2.0.40 12 | mbdata@git+https://github.com/metabrainz/mbdata.git@v29.0.0 13 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | psycopg2-binary==2.9.10 2 | freezegun==1.5.1 3 | pytest==8.3.5 4 | pytest-cov==6.1.1 5 | pylint==3.3.6 6 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #!/bin/bash 4 | 5 | # Github Actions automatically sets the CI environment variable. We use this variable to detect if the script is running 6 | # inside a CI environment and modify its execution as needed. 7 | if [ "$CI" == "true" ] ; then 8 | echo "Running in CI mode" 9 | fi 10 | 11 | # UNIT TESTS 12 | # ./test.sh build unit test containers, bring up, make database, test, bring down 13 | # for development: 14 | # ./test.sh -u build unit test containers, bring up background and load database if needed 15 | # ./test.sh [params] run unit tests, passing optional params to inner test 16 | # ./test.sh -s stop unit test containers without removing 17 | # ./test.sh -d clean unit test containers 18 | 19 | COMPOSE_FILE_LOC=test/docker-compose.yml 20 | COMPOSE_PROJECT_NAME=brainzutils_test 21 | 22 | echo "Checking docker compose version" 23 | if docker compose version &> /dev/null; then 24 | DOCKER_COMPOSE_CMD="docker compose" 25 | else 26 | DOCKER_COMPOSE_CMD="docker-compose" 27 | fi 28 | 29 | function invoke_docker_compose { 30 | $DOCKER_COMPOSE_CMD \ 31 | -f $COMPOSE_FILE_LOC \ 32 | -p $COMPOSE_PROJECT_NAME \ 33 | "$@" 34 | } 35 | 36 | function docker_compose_run { 37 | invoke_docker_compose run --rm --user `id -u`:`id -g` "$@" 38 | } 39 | 40 | function build_unit_containers { 41 | invoke_docker_compose build 42 | } 43 | 44 | function bring_up_unit_db { 45 | invoke_docker_compose up -d redis musicbrainz_db 46 | } 47 | 48 | function is_unit_db_running { 49 | # Check if the database container is running 50 | containername="${COMPOSE_PROJECT_NAME}_musicbrainz_db_1" 51 | res=`docker ps --filter "name=$containername" --filter "status=running" -q` 52 | if [ -n "$res" ]; then 53 | return 0 54 | else 55 | return 1 56 | fi 57 | } 58 | 59 | function is_unit_db_exists { 60 | containername="${COMPOSE_PROJECT_NAME}_musicbrainz_db_1" 61 | res=`docker ps --filter "name=$containername" --filter "status=exited" -q` 62 | if [ -n "$res" ]; then 63 | return 0 64 | else 65 | return 1 66 | fi 67 | } 68 | 69 | # Exit immediately if a command exits with a non-zero status. 70 | # set -e 71 | # trap cleanup EXIT # Cleanup after tests finish running 72 | 73 | 74 | if [ "$1" == "-s" ]; then 75 | echo "Stopping unit test containers" 76 | invoke_docker_compose stop 77 | exit 0 78 | fi 79 | 80 | if [ "$1" == "-d" ]; then 81 | echo "Running docker-compose down" 82 | invoke_docker_compose down 83 | exit 0 84 | fi 85 | 86 | # if -u flag, bring up db, run setup, quit 87 | if [ "$1" == "-u" ]; then 88 | is_unit_db_exists 89 | DB_EXISTS=$? 90 | is_unit_db_running 91 | DB_RUNNING=$? 92 | if [ $DB_EXISTS -eq 0 -o $DB_RUNNING -eq 0 ]; then 93 | echo "Database is already up, doing nothing" 94 | else 95 | echo "Building containers" 96 | invoke_docker_compose build 97 | echo "Bringing up DB" 98 | bring_up_unit_db 99 | fi 100 | exit 0 101 | fi 102 | 103 | is_unit_db_exists 104 | DB_EXISTS=$? 105 | is_unit_db_running 106 | DB_RUNNING=$? 107 | if [ $DB_EXISTS -eq 1 -a $DB_RUNNING -eq 1 ]; then 108 | # If no containers, build them, run setup then run tests, then bring down 109 | invoke_docker_compose build 110 | bring_up_unit_db 111 | echo "Running tests" 112 | docker_compose_run test "$@" 113 | RET=$? 114 | invoke_docker_compose down 115 | exit $RET 116 | else 117 | # Else, we have containers, just run tests 118 | echo "Running tests" 119 | docker_compose_run test "$@" 120 | exit $? 121 | fi 122 | -------------------------------------------------------------------------------- /test/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM metabrainz/python:3.13-20250313 2 | 3 | ENV DOCKERIZE_VERSION v0.6.1 4 | RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \ 5 | && tar -C /usr/local/bin -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz 6 | 7 | RUN mkdir /code 8 | WORKDIR /code 9 | 10 | # Python dependencies 11 | RUN apt-get update \ 12 | && apt-get install -y --no-install-recommends \ 13 | build-essential \ 14 | git 15 | 16 | COPY requirements.txt /code/requirements.txt 17 | COPY requirements_dev.txt /code/requirements_dev.txt 18 | RUN pip install -r requirements.txt 19 | RUN pip install -r requirements_dev.txt 20 | 21 | COPY . /code/ 22 | 23 | ENV REDIS_HOST "redis" 24 | 25 | ENTRYPOINT ["dockerize", "-wait", "tcp://redis:6379", "-timeout", "10s", \ 26 | "dockerize", "-wait", "tcp://musicbrainz_db:5432", "-timeout", "10s", \ 27 | "pytest", "--junitxml=reports/test_results.xml"] 28 | -------------------------------------------------------------------------------- /test/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | test: 4 | build: 5 | context: .. 6 | dockerfile: ./test/Dockerfile 7 | volumes: 8 | - ../:/code 9 | environment: 10 | PYTHONDONTWRITEBYTECODE: 1 11 | SQLALCHEMY_WARN_20: 1 12 | depends_on: 13 | - redis 14 | - musicbrainz_db 15 | 16 | redis: 17 | image: redis:3.2.1 18 | 19 | musicbrainz_db: 20 | image: metabrainz/brainzutils-mb-sample-database:schema-27-2022-05-20.0 21 | environment: 22 | POSTGRES_HOST_AUTH_METHOD: trust 23 | ports: 24 | - "5430:5432" 25 | -------------------------------------------------------------------------------- /test/musicbrainz_db/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM metabrainz/musicbrainz-test-database:beta 2 | 3 | RUN apt-get update && apt-get install -y wget 4 | 5 | RUN mkdir /home/musicbrainz/musicbrainz-server/setup_db 6 | COPY scripts/* /home/musicbrainz/musicbrainz-server/setup_db/ 7 | RUN chmod +x /home/musicbrainz/musicbrainz-server/setup_db/* 8 | 9 | RUN mkdir -p /media/dbdump 10 | RUN chown postgres /media/dbdump 11 | 12 | RUN rm -f /docker-entrypoint-initdb.d/create_test_db.sh 13 | RUN ln -s /home/musicbrainz/musicbrainz-server/setup_db/create_test_db.sh /docker-entrypoint-initdb.d/ -------------------------------------------------------------------------------- /test/musicbrainz_db/README.md: -------------------------------------------------------------------------------- 1 | # Musicbrainz sample database for testing 2 | 3 | This is a postgres docker image that contains a copy of the musicbrainz database, useful 4 | for testing. 5 | 6 | It's based on the https://hub.docker.com/r/metabrainz/musicbrainz-test-database image, but includes 7 | some extra scripts from [musicbrainz-docker](https://github.com/metabrainz/musicbrainz-docker) in order 8 | to download and set up a sample database. The musicbrainz sample database is a very small subset of the 9 | musicbrainz database, but contains real data. This makes it useful for testing on on the database 10 | without importing everything. 11 | 12 | This image can be run in a `docker-compose.yml` file like this: 13 | 14 | ```yaml 15 | musicbrainz_db: 16 | build: 17 | context: musicbrainz_db 18 | dockerfile: Dockerfile 19 | environment: 20 | PGDATA: /var/lib/postgresql/data/pgdata 21 | POSTGRES_HOST_AUTH_METHOD: trust 22 | ports: 23 | - "5430:5432" 24 | ``` 25 | 26 | however, this will cause the sample database to be downloaded and installed every time the container 27 | starts up. This takes between 5-10 minutes depending on how slow your computer is. 28 | 29 | ### Image with built-in data 30 | We also build an image and import the data musicbrainz database in order to have a container that 31 | can start up immediately with all data imported. 32 | 33 | This image is hosted at https://hub.docker.com/r/metabrainz/brainzutils-mb-sample-database 34 | 35 | The steps to create a new version are manual, but should only need to be done each time 36 | the musicbrainz schema changes. 37 | 38 | Build the image: 39 | 40 | docker build -t musicbrainz_db_sample . 41 | 42 | Start the container running bash, this is so that we can do the import and perform some cleanups. 43 | We choose a different PGDATA location because `/var/lib/postgresql/data` by default is configured as 44 | a volume but we don't want the data to be put in a temporary location. 45 | 46 | docker run -ti --rm --name musicbrainz_db_sample -e PGDATA=/var/lib/postgresql-musicbrainz/data -e POSTGRES_HOST_AUTH_METHOD=trust musicbrainz_db_sample bash 47 | 48 | Inside the running container, run these commands 49 | 50 | # Start up postgres, running the entrypoint which imports the database 51 | /docker-entrypoint.sh postgres 52 | # Once the import finishes and postgres starts up, quit it with ^C 53 | # Remove some intermediate data and our custom entrypoint 54 | rm -r /media/dbdump 55 | rm /docker-entrypoint-initdb.d/create_test_db.sh 56 | grep DB_SCHEMA_SEQUENCE /home/musicbrainz/musicbrainz-server/lib/DBDefs.pm 57 | 58 | Without quitting the container, in another terminal on the host, make a new docker commit to build 59 | the new image 60 | 61 | docker commit --change='CMD ["postgres"]' musicbrainz_db_sample metabrainz/brainzutils-mb-sample-database:schema-25-2021-04-04.0 62 | 63 | The first argument is the container name (set with `--name` in `docker run`) and the second argument 64 | is the name of the image to create. We include the database schema number from the grep command. 65 | 66 | Once built, this image can be pushed to docker hub by an approved user 67 | 68 | docker push metabrainz/brainzutils-mb-sample-database:schema-25-2021-04-04.0 69 | -------------------------------------------------------------------------------- /test/musicbrainz_db/scripts/create_test_db.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # During the entrypoint stage, postgres is only listening on a socket 4 | # force it to listen on localhost in order to perform the data load 5 | pg_ctl -o "-c listen_addresses='localhost'" -w restart 6 | 7 | cd /home/musicbrainz/musicbrainz-server 8 | carton exec -- ./setup_db/createdb.sh -sample -fetch 9 | -------------------------------------------------------------------------------- /test/musicbrainz_db/scripts/createdb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e -o pipefail -u 4 | 5 | FTP_MB=ftp://ftp.eu.metabrainz.org/pub/musicbrainz 6 | IMPORT="fullexport" 7 | FETCH_DUMPS="" 8 | WGET_OPTIONS="" 9 | 10 | HELP=$(cat <] [-sample] [-fetch] [MUSICBRAINZ_FTP_URL] 12 | 13 | Options: 14 | -fetch Fetch latest dump from MusicBrainz FTP 15 | -sample Load sample data instead of full data 16 | -wget-opts Pass additional space-separated options list (should be 17 | a single argument, escape spaces if necessary) to wget 18 | 19 | Default MusicBrainz FTP URL: $FTP_MB 20 | EOH 21 | ) 22 | 23 | if [ $# -gt 4 ]; then 24 | echo "$0: too many arguments" 25 | echo "$HELP" 26 | exit 1 27 | fi 28 | 29 | while [ $# -gt 0 ]; do 30 | case "$1" in 31 | -wget-opts ) 32 | shift 33 | WGET_OPTIONS=$1 34 | ;; 35 | -sample ) 36 | IMPORT="sample" 37 | ;; 38 | -fetch ) 39 | FETCH_DUMPS="$1" 40 | ;; 41 | -* ) 42 | echo "$0: unrecognized option '$1'" 43 | echo "$HELP" 44 | exit 1 45 | ;; 46 | * ) 47 | FTP_MB="$1" 48 | ;; 49 | esac 50 | shift 51 | done 52 | 53 | TMP_DIR=/media/dbdump/tmp 54 | 55 | case "$IMPORT" in 56 | fullexport ) 57 | DUMP_FILES=( 58 | mbdump.tar.bz2 59 | mbdump-cdstubs.tar.bz2 60 | mbdump-cover-art-archive.tar.bz2 61 | mbdump-derived.tar.bz2 62 | mbdump-stats.tar.bz2 63 | mbdump-wikidocs.tar.bz2 64 | );; 65 | sample ) 66 | DUMP_FILES=( 67 | mbdump-sample.tar.xz 68 | );; 69 | esac 70 | 71 | if [[ $FETCH_DUMPS == "-fetch" ]]; then 72 | FETCH_OPTIONS=("${IMPORT/fullexport/replica}" --base-ftp-url "$FTP_MB") 73 | if [[ -n "$WGET_OPTIONS" ]]; then 74 | FETCH_OPTIONS+=(--wget-options "$WGET_OPTIONS") 75 | fi 76 | `dirname "$0"`/fetch-dump.sh "${FETCH_OPTIONS[@]}" 77 | fi 78 | 79 | if [[ -a /media/dbdump/"${DUMP_FILES[0]}" ]]; then 80 | echo "found existing dumps" 81 | 82 | mkdir -p $TMP_DIR 83 | #cd /media/dbdump 84 | 85 | INITDB_OPTIONS='--echo --import' 86 | if ! /home/musicbrainz/musicbrainz-server/script/database_exists MAINTENANCE; then 87 | INITDB_OPTIONS="--createdb $INITDB_OPTIONS" 88 | fi 89 | # shellcheck disable=SC2086 90 | ./admin/InitDb.pl --createdb --database READWRITE --import /media/dbdump/mbdump*.tar.xz --echo 91 | #/home/musicbrainz/musicbrainz-server/admin/InitDb.pl $INITDB_OPTIONS -- --skip-editor --tmp-dir $TMP_DIR "${DUMP_FILES[@]}" 92 | else 93 | echo "no dumps found or dumps are incomplete" 94 | fi 95 | -------------------------------------------------------------------------------- /test/musicbrainz_db/scripts/fetch-dump.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -o pipefail -u 4 | 5 | DB_DUMP_DIR=/media/dbdump 6 | SEARCH_DUMP_DIR=/media/searchdump 7 | BASE_FTP_URL='ftp://ftp.eu.metabrainz.org/pub/musicbrainz' 8 | TARGET='' 9 | WGET_CMD=(wget) 10 | 11 | SCRIPT_NAME=$(basename "$0") 12 | HELP=$(cat <] 14 | 15 | Fetch dump files of the MusicBrainz database and/or search indexes. 16 | 17 | Targets: 18 | both Fetch latest search dump with replica dump of the same day. 19 | replica Fetch latest database's replicated tables only. 20 | sample Fetch latest database's sample only. 21 | search Fetch latest search indexes only. 22 | 23 | Options: 24 | --base-ftp-url Specify URL to MetaBrainz/MusicBrainz FTP directory. 25 | (Default: '$BASE_FTP_URL') 26 | --wget-options Specify additional options to be passed to wget, 27 | these should be separated with whitespace, 28 | the list should be a single argument 29 | (escape whitespaces if needed). 30 | 31 | -h, --help Print this help message. 32 | EOH 33 | ) 34 | 35 | # Parse arguments 36 | 37 | while [[ $# -gt 0 ]] 38 | do 39 | case "$1" in 40 | both | replica | sample | search ) 41 | if [[ -n $TARGET ]] 42 | then 43 | echo >&2 "$SCRIPT_NAME: only one target argument can be given" 44 | echo >&2 "Try '$SCRIPT_NAME --help' for usage." 45 | exit 64 # EX_USAGE 46 | fi 47 | TARGET=$1 48 | ;; 49 | --base-ftp-url ) 50 | shift 51 | BASE_FTP_URL="$1" 52 | ;; 53 | --wget-options ) 54 | shift 55 | IFS=' ' read -r -a WGET_OPTIONS <<< "$1" 56 | WGET_CMD+=("${WGET_OPTIONS[@]}") 57 | unset WGET_OPTIONS 58 | ;; 59 | -h | --help ) 60 | echo "$HELP" 61 | exit 0 # EX_OK 62 | ;; 63 | -* ) 64 | echo >&2 "$SCRIPT_NAME: unrecognized option '$1'" 65 | echo >&2 "Try '$SCRIPT_NAME --help' for usage." 66 | exit 64 # EX_USAGE 67 | ;; 68 | * ) 69 | echo >&2 "$SCRIPT_NAME: unrecognized argument '$1'" 70 | echo >&2 "Try '$SCRIPT_NAME --help' for usage." 71 | exit 64 # EX_USAGE 72 | ;; 73 | esac 74 | shift 75 | done 76 | 77 | if [[ -z $TARGET ]] 78 | then 79 | echo >&2 "$SCRIPT_NAME: no dump type has been specified" 80 | echo >&2 "Try '$SCRIPT_NAME --help' for usage." 81 | exit 64 # EX_USAGE 82 | fi 83 | 84 | # Fetch latest search indexes 85 | 86 | if [[ $TARGET =~ ^(both|search)$ ]] 87 | then 88 | echo "$(date): Fetching search indexes dump..." 89 | cd "$SEARCH_DUMP_DIR" && find . -delete && cd - 90 | "${WGET_CMD[@]}" -nd -nH -P "$SEARCH_DUMP_DIR" \ 91 | "$BASE_FTP_URL/data/search-indexes/LATEST" 92 | DUMP_TIMESTAMP=$(cat /media/searchdump/LATEST) 93 | "${WGET_CMD[@]}" -nd -nH -r -P "$SEARCH_DUMP_DIR" \ 94 | "$BASE_FTP_URL/data/search-indexes/$DUMP_TIMESTAMP/" 95 | cd "$SEARCH_DUMP_DIR" && md5sum -c MD5SUMS && cd - 96 | if [[ $TARGET == search ]] 97 | then 98 | echo 'Done fetching search indexes dump' 99 | exit 0 # EX_OK 100 | fi 101 | fi 102 | 103 | # Prepare to fetch database dump 104 | 105 | if [[ $TARGET != search ]] 106 | then 107 | echo "$(date): Fetching database dump..." 108 | 109 | rm -rf "${DB_DUMP_DIR:?}"/* 110 | fi 111 | 112 | case "$TARGET" in 113 | both | replica ) 114 | DB_DUMP_REMOTE_DIR=data/fullexport 115 | DB_DUMP_FILES=( 116 | mbdump.tar.bz2 117 | mbdump-cdstubs.tar.bz2 118 | mbdump-cover-art-archive.tar.bz2 119 | mbdump-derived.tar.bz2 120 | mbdump-stats.tar.bz2 121 | mbdump-wikidocs.tar.bz2 122 | ) 123 | ;; 124 | sample ) 125 | DB_DUMP_REMOTE_DIR=data/sample 126 | DB_DUMP_FILES=( 127 | mbdump-sample.tar.xz 128 | ) 129 | ;; 130 | esac 131 | 132 | if [[ $TARGET == both ]] 133 | then 134 | # Find latest database dump corresponding to search indexes 135 | 136 | SEARCH_DUMP_DAY="${DUMP_TIMESTAMP/-*}" 137 | "${WGET_CMD[@]}" --spider --no-remove-listing -P "$DB_DUMP_DIR" \ 138 | "$BASE_FTP_URL/$DB_DUMP_REMOTE_DIR" 139 | DUMP_TIMESTAMP=$( 140 | grep -E "\\s${SEARCH_DUMP_DAY}-\\d*" "$DB_DUMP_DIR/.listing" \ 141 | | sed -e 's/\s*$//' -e 's/.*\s//' 142 | ) 143 | rm -f "$DB_DUMP_DIR/.listing" 144 | echo "$DUMP_TIMESTAMP" >> "$DB_DUMP_DIR/LATEST-WITH-SEARCH-INDEXES" 145 | elif [[ $TARGET != search ]] 146 | then 147 | # Just find latest database dump 148 | 149 | "${WGET_CMD[@]}" -nd -nH -P "$DB_DUMP_DIR" \ 150 | "$BASE_FTP_URL/$DB_DUMP_REMOTE_DIR/LATEST" 151 | DUMP_TIMESTAMP=$(cat /media/dbdump/LATEST) 152 | fi 153 | 154 | # Actually fetch database dump 155 | 156 | if [[ $TARGET =~ ^(both|replica)$ ]] 157 | then 158 | for F in MD5SUMS "${DB_DUMP_FILES[@]}" 159 | do 160 | "${WGET_CMD[@]}" -P "$DB_DUMP_DIR" \ 161 | "$BASE_FTP_URL/$DB_DUMP_REMOTE_DIR/$DUMP_TIMESTAMP/$F" 162 | done 163 | cd "$DB_DUMP_DIR" 164 | for F in "${DB_DUMP_FILES[@]}" 165 | do 166 | MD5SUM=$(md5sum -b "$F") 167 | grep -Fqx "$MD5SUM" MD5SUMS || { 168 | echo >&2 "$0: unmatched MD5 checksum: $MD5SUM *$F" && 169 | exit 70 # EX_SOFTWARE 170 | } 171 | done 172 | cd - 173 | elif [[ $TARGET == sample ]] 174 | then 175 | for F in "${DB_DUMP_FILES[@]}" 176 | do 177 | "${WGET_CMD[@]}" -P "$DB_DUMP_DIR" \ 178 | "$BASE_FTP_URL/$DB_DUMP_REMOTE_DIR/$DUMP_TIMESTAMP/$F" 179 | done 180 | fi 181 | 182 | echo "$(date): Done fetching dump files." 183 | # vi: set noexpandtab softtabstop=0: 184 | --------------------------------------------------------------------------------