├── .env.example ├── .gitignore ├── .idea ├── .gitignore ├── dataSources.xml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── music-downloader.iml └── vcs.xml ├── .vscode ├── launch.json └── settings.json ├── .woodpecker.yml ├── LICENSE ├── README.md ├── assets ├── element_logo.jpeg ├── element_logo.png ├── logo.jpg ├── logo.svg ├── logo_cropped.jpg ├── logo_wireframe.svg └── room_pictures │ ├── development.png │ └── development.svg ├── build ├── contribute.md ├── development ├── actual_donwload.py └── objects_collection.py ├── documentation ├── config.md ├── connections.md ├── html │ ├── bandcamp │ │ └── artist_page.html │ ├── metal-archives │ │ ├── album.html │ │ ├── artist.html │ │ ├── artist_sources.html │ │ ├── discography.html │ │ └── overview.md │ ├── musify │ │ ├── album_overview.html │ │ ├── details.md │ │ └── song_details.html │ ├── youtube-music │ │ ├── index.html │ │ └── search │ │ │ ├── 01-search-request.json │ │ │ ├── general-result.json │ │ │ └── search.md │ └── youtube │ │ ├── channel_api.md │ │ ├── channel_api_request.md │ │ └── youtube.md ├── objects.md ├── old_implementation.md ├── program_structure.md └── shell.md ├── music_kraken ├── __init__.py ├── __main__.py ├── audio │ ├── __init__.py │ ├── codec.py │ └── metadata.py ├── cli │ ├── __init__.py │ ├── informations │ │ ├── __init__.py │ │ └── paths.py │ ├── main_downloader.py │ ├── options │ │ ├── __init__.py │ │ ├── cache.py │ │ ├── first_config.py │ │ ├── frontend.py │ │ └── settings.py │ └── utils.py ├── connection │ ├── __init__.py │ ├── cache.py │ ├── connection.py │ └── rotating.py ├── download │ ├── __init__.py │ ├── page_attributes.py │ └── results.py ├── objects │ ├── __init__.py │ ├── artwork.py │ ├── collection.py │ ├── contact.py │ ├── country.py │ ├── formatted_text.py │ ├── lint_default_factories.py │ ├── lyrics.py │ ├── metadata.py │ ├── option.py │ ├── parents.py │ ├── song.py │ ├── source.py │ └── target.py ├── pages │ ├── __init__.py │ ├── abstract.py │ ├── bandcamp.py │ ├── encyclopaedia_metallum.py │ ├── genius.py │ ├── musicbrainz.py │ ├── musify.py │ ├── youtube.py │ └── youtube_music │ │ ├── __init__.py │ │ ├── _list_render.py │ │ ├── _music_object_render.py │ │ ├── super_youtube.py │ │ └── youtube_music.py └── utils │ ├── __init__.py │ ├── config │ ├── __init__.py │ ├── attributes │ │ ├── __init__.py │ │ ├── attribute.py │ │ └── special_attributes.py │ ├── config.py │ ├── config_files │ │ ├── __init__.py │ │ ├── logging_config.py │ │ ├── main_config.py │ │ └── youtube_config.py │ └── utils.py │ ├── enums │ ├── __init__.py │ ├── album.py │ ├── colors.py │ └── contact.py │ ├── exception │ ├── __init__.py │ ├── config.py │ ├── download.py │ └── objects.py │ ├── hacking.py │ ├── path_manager │ ├── __init__.py │ ├── config_directory.py │ ├── locations.py │ └── music_directory.py │ ├── shared.py │ ├── string_processing.py │ └── support_classes │ ├── __init__.py │ ├── download_result.py │ └── query.py ├── notes.md ├── pyproject.toml ├── requirements-dev.txt └── tests ├── __init__.py ├── test_collection.py └── test_hash_url.py /.env.example: -------------------------------------------------------------------------------- 1 | STAGE=dev -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | *.mp3 7 | *.cache* 8 | 9 | /dist/* 10 | 11 | /build/* 12 | !/build/build.sh 13 | 14 | # Virtual Environment 15 | venv 16 | .idea/aws.xml 17 | 18 | windows 19 | 20 | .env 21 | 22 | # setuptools_scm 23 | _version.py 24 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.idea/dataSources.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | sqlite.xerial 6 | true 7 | org.sqlite.JDBC 8 | jdbc:sqlite:/tmp/music-downloader/metadata.db 9 | $ProjectFileDir$ 10 | 11 | 12 | sqlite.xerial 13 | true 14 | org.sqlite.JDBC 15 | jdbc:sqlite:$PROJECT_DIR$/src/test.db 16 | $ProjectFileDir$ 17 | 18 | 19 | file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.39.2/sqlite-jdbc-3.39.2.jar 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 10 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /.idea/music-downloader.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python Debugger: Current File", 9 | "type": "debugpy", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal" 13 | }, 14 | { 15 | "name": "Python Debugger: Download script", 16 | "type": "debugpy", 17 | "request": "launch", 18 | "program": "development/actual_donwload.py", 19 | "console": "integratedTerminal" 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.analysis.extraPaths": [ 3 | "./src" 4 | ], 5 | "python.testing.unittestArgs": [ 6 | "-v", 7 | "-s", 8 | "./src", 9 | "-p", 10 | "test*.py" 11 | ], 12 | "python.testing.pytestEnabled": false, 13 | "python.testing.unittestEnabled": true, 14 | "[python]": { 15 | "editor.defaultFormatter": "ms-python.autopep8" 16 | }, 17 | "python.formatting.provider": "none", 18 | "cSpell.words": [ 19 | "albumsort", 20 | "APIC", 21 | "Bandcamp", 22 | "bitrate", 23 | "DEEZER", 24 | "dotenv", 25 | "encyclopaedia", 26 | "ENDC", 27 | "Gitea", 28 | "iframe", 29 | "isrc", 30 | "itemprop", 31 | "levenshtein", 32 | "metallum", 33 | "MUSICBRAINZ", 34 | "musify", 35 | "OKBLUE", 36 | "OKGREEN", 37 | "pathvalidate", 38 | "Referer", 39 | "sponsorblock", 40 | "tracklist", 41 | "tracksort", 42 | "translit", 43 | "unmap", 44 | "youtube", 45 | "youtubei" 46 | ] 47 | } -------------------------------------------------------------------------------- /.woodpecker.yml: -------------------------------------------------------------------------------- 1 | labels: 2 | platform: linux/amd64 3 | 4 | clone: 5 | git: 6 | image: woodpeckerci/plugin-git 7 | settings: 8 | tags: true 9 | 10 | steps: 11 | build-stable: 12 | image: python 13 | commands: 14 | - python -m pip install -r requirements-dev.txt 15 | - python3 -m build 16 | environment: 17 | - SETUPTOOLS_SCM_PRETEND_VERSION=${CI_COMMIT_TAG} 18 | when: 19 | - event: tag 20 | 21 | build-dev: 22 | image: python 23 | commands: 24 | - python -m pip install -r requirements-dev.txt 25 | - python3 -m build 26 | when: 27 | - event: manual 28 | - event: push 29 | branch: experimental 30 | 31 | publish-gitea: 32 | image: gitea.elara.ws/music-kraken/plugin-twine 33 | settings: 34 | repository_url: "https://gitea.elara.ws/api/packages/music-kraken/pypi" 35 | username: 36 | from_secret: gitea_username 37 | password: 38 | from_secret: gitea_password 39 | when: 40 | - event: manual 41 | - event: tag 42 | - event: push 43 | branch: experimental 44 | 45 | publish-pypi: 46 | image: gitea.elara.ws/music-kraken/plugin-twine 47 | settings: 48 | username: 49 | from_secret: pypi_username 50 | password: 51 | from_secret: pypi_password 52 | when: 53 | - event: manual 54 | - event: tag 55 | - event: push 56 | branch: experimental 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Music Kraken 2 | 3 | [![Woodpecker CI Status](https://ci.elara.ws/api/badges/59/status.svg)](https://ci.elara.ws/repos/59) 4 | 5 | music kraken logo 6 | 7 | - [Installation](#installation) 8 | - [Quick-Guide](#quick-guide) 9 | - [How to search properly](#query) 10 | - [Matrix Space](#matrix-space) 11 | 12 | If you want to use this a library or contribute, check out [the wiki](https://gitea.elara.ws/music-kraken/music-kraken-core/wiki) for more information. 13 | 14 | --- 15 | 16 | ## Installation 17 | 18 | You can find and get this project from either [PyPI](https://pypi.org/project/music-kraken/) as a Python-Package, 19 | or simply the source code from [Gitea](https://gitea.elara.ws/music-kraken/music-kraken-core). ** 20 | 21 | **NOTES** 22 | 23 | - Even though everything **SHOULD** work cross-platform, I have only tested it on Ubuntu. 24 | - If you enjoy this project, feel free to give it a star on GitHub. 25 | 26 | ### From source 27 | 28 | ```sh 29 | git clone https://gitea.elara.ws/music-kraken/music-kraken-core.git 30 | python3 -m pip install -e music-kraken-core/ 31 | ``` 32 | 33 | To update the program, if installed like this, go into the `music-kraken-core` directory and run `git pull`. 34 | 35 | ### Get it running on other Systems 36 | 37 | Here are the collected issues, that are related to running the program on different systems. If you have any issues, feel free to open a new one. 38 | 39 | #### Windows + WSL 40 | 41 | Add ` ~/.local/bin` to your `$PATH`. [#2][i2] 42 | 43 | ## Quick-Guide 44 | 45 | The **Genre** you define at the start, is the folder my program will download the files into, as well as the value of the ID3 genre field. 46 | 47 | When it drops you into the **shell** 2 main things are important: 48 | 49 | 1. You search with `s: ` 50 | 2. You choose an option with just the index number of the option 51 | 3. You download with `d: `, where the options are comma separated 52 | 53 | ### Query 54 | 55 | The syntax for the query is really simple. 56 | 57 | ```mk 58 | > s: #a 59 | searches for the artist 60 | 61 | > s: #a #r 62 | searches for the release (album) by the artist 63 | 64 | > s: #r Me #t 65 | searches for the track from the release 66 | ``` 67 | 68 | The escape character is as usual `\`. 69 | 70 | --- 71 | 72 | ## Matrix Space 73 | 74 | music-kraken logo 75 | 76 | I decided against creating a discord server, due to various communities get often banned from discord. A good and free Alternative are Matrix Spaces. I recommend the use of the Client [Element](https://element.io/download). It is completely open source. 77 | 78 | **Click [this invitation](https://matrix.to/#/#music-kraken:matrix.org) _([https://matrix.to/#/#music-kraken:matrix.org](https://matrix.to/#/#music-kraken:matrix.org))_ to join.** 79 | 80 | [i10]: https://github.com/HeIIow2/music-downloader/issues/10 81 | [i2]: https://github.com/HeIIow2/music-downloader/issues/2 82 | -------------------------------------------------------------------------------- /assets/element_logo.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kur01234/music-downloader/665bf0e47517071450b6f25dffbb81fe21944ead/assets/element_logo.jpeg -------------------------------------------------------------------------------- /assets/element_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kur01234/music-downloader/665bf0e47517071450b6f25dffbb81fe21944ead/assets/element_logo.png -------------------------------------------------------------------------------- /assets/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kur01234/music-downloader/665bf0e47517071450b6f25dffbb81fe21944ead/assets/logo.jpg -------------------------------------------------------------------------------- /assets/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 16 | 19 | 22 | 25 | 28 | 31 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /assets/logo_cropped.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kur01234/music-downloader/665bf0e47517071450b6f25dffbb81fe21944ead/assets/logo_cropped.jpg -------------------------------------------------------------------------------- /assets/logo_wireframe.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 7 | 8 | 10 | 33 | 37 | 42 | 46 | 51 | 54 | 69 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /assets/room_pictures/development.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kur01234/music-downloader/665bf0e47517071450b6f25dffbb81fe21944ead/assets/room_pictures/development.png -------------------------------------------------------------------------------- /assets/room_pictures/development.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 39 | 55 | 56 | 58 | 62 | 69 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /build: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | test=false 4 | version_bump="minor" 5 | 6 | while getopts ":b:t" opt; do 7 | case ${opt} in 8 | b ) 9 | version_bump=$OPTARG 10 | ;; 11 | t ) 12 | test=true 13 | ;; 14 | \? ) 15 | echo "Invalid option: $OPTARG" 1>&2 16 | exit 1 17 | ;; 18 | : ) 19 | echo "Invalid option: $OPTARG requires an argument" 1>&2 20 | exit 1 21 | ;; 22 | esac 23 | done 24 | shift $((OPTIND -1)) 25 | 26 | # install dev dependencies 27 | echo "installing dev dependencies..." 28 | python3 -m pip install -r requirements-dev.txt 29 | 30 | # hatch version ${version_bump} 31 | # git add "music_kraken/__init__.py" 32 | # git commit -m "bump: ${version_bump}" 33 | 34 | # build the wheels 35 | python3 -m build 36 | 37 | # install the newest version 38 | # python3 -m pip install . 39 | 40 | if [ "$test" = true ]; 41 | then 42 | echo "just a test" 43 | twine upload --repository testpypi dist/music_kraken* 44 | python3 -m pip install -i https://test.pypi.org/simple/ music-kraken -U 45 | exit 46 | fi 47 | 48 | twine upload dist/music_kraken* 49 | -------------------------------------------------------------------------------- /contribute.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | I am always happy about pull requests. 4 | 5 | If something is missing, like attributes for an object feel free to either add it yourself or open an issue, if you choose to just change it, beware that something may have to change. :3 6 | 7 | So here is a List of what you can do: 8 | 9 | 1. [implement a new page like e.g. Soundcloud](#add-a-new-page) 10 | 2. [help securing this programm](#find-a-security-vulnerability) 11 | 12 | ## Add a new Page 13 | 14 | The audio and Metadata Sources all inherit from the class `Page`, which can be found in [abstract.py](src/music_kraken/pages/abstract.py). 15 | 16 | You can create a subclass of this class for, for example `YouTube` or `Musify` or whatever. 17 | 18 | 1. Just create a new file with the name `your_page.py` 19 | in the [page module](src/music_kraken/pages). 20 | 2. Then you can simply copy the contents of the [preset](src/music_kraken/pages/preset.py) over to your file. 21 | 3. All the functions you need to implement, can be found in the [preset](src/music_kraken/pages/preset.py). 22 | 23 | ### Important notes 24 | 25 | - There is no need to check if you for example added a source of a song twice. I do much post-processing to the data you scrape in the page classes. You can see what exactly I do in [abstract.py](src/music_kraken/pages/abstract.py). 26 | - Use the connection class how it is laid out in the preset to make the request. This will take care of retrying requests, rotating proxies, consistent use of tor (if selected in the config). You have: 27 | - `connection.get()` 28 | - `connection.post()` 29 | - Look at the code of the pages I already have implemented. Namely: 30 | - [musify.club](src/music_kraken/pages/musify.py) _(heavily making use of web scraping)_ 31 | - [YouTube](src/music_kraken/pages/youtube.py) _(using both invidious and piped)_ 32 | - [Metal Archives](src/music_kraken/pages/youtube.py) 33 | 34 | ## Find a security vulnerability 35 | 36 | I take security seriously. Really. 37 | 38 | If you find a vulnerability that is rather critical, [write me on matrix](https://matrix.to/#/@hellow_2:matrix.org). 39 | Under vulnerability counts: 40 | 41 | - If there is a bug, which makes music_kraken ignore a proxy/tor setting. 42 | 43 | BUT... There could be more stuff, that falls under security. 44 | -------------------------------------------------------------------------------- /development/actual_donwload.py: -------------------------------------------------------------------------------- 1 | import music_kraken 2 | 3 | import logging 4 | print("Setting logging-level to DEBUG") 5 | logging.getLogger().setLevel(logging.DEBUG) 6 | 7 | if __name__ == "__main__": 8 | commands = [ 9 | "s: #a Crystal F", 10 | "10", 11 | "1", 12 | "3", 13 | ] 14 | 15 | 16 | music_kraken.cli.download(genre="test", command_list=commands, process_metadata_anyway=True) 17 | _ = "debug" -------------------------------------------------------------------------------- /development/objects_collection.py: -------------------------------------------------------------------------------- 1 | import music_kraken 2 | from music_kraken.objects import Song, Album, Artist, Collection 3 | 4 | if __name__ == "__main__": 5 | song_1 = Song( 6 | title="song", 7 | feature_artist_list=[Artist( 8 | name="main_artist" 9 | )] 10 | ) 11 | 12 | other_artist = Artist(name="other_artist") 13 | 14 | song_2 = Song( 15 | title = "song", 16 | artist_list=[other_artist] 17 | ) 18 | 19 | other_artist.name = "main_artist" 20 | 21 | song_1.merge(song_2) 22 | 23 | print("#" * 120) 24 | print("main", *song_1.artist_collection) 25 | print("feat", *song_1.feature_artist_collection) 26 | -------------------------------------------------------------------------------- /documentation/config.md: -------------------------------------------------------------------------------- 1 | > This is bs, ima use dynaconf 2 | 3 | # Concept 4 | 5 | The core concept is, to have instances of dataclasses that hold all values. On programm start the values are just overridden by those in the file. 6 | 7 | ## Dataclass Structure 8 | 9 | You have one [File](#file) class, that contains a list of [Section](#section) classes. 10 | Every [Section](#section) class contains a list of [SectionElement](#section-elements) classes. 11 | 12 | # Classes 13 | 14 | ## File 15 | 16 | `File` classes have one name, with whom the path will be generated: 17 | 18 | ``` 19 | {CONFIG_DIR}/{file_name}.conf 20 | ``` 21 | 22 | I also pass in the config direcory in the constructor, such that the module can be pretty independently used. Though it's default value is the default config director from `utils.path_manager`. 23 | 24 | 25 | They contain a list of [ConfigElement](#config-elements)s, arguably the most important ones. 26 | 27 | ## Config Elements 28 | 29 | # Config Syntax 30 | 31 | - every line is stripped from all whitespaces at the beginning and end 32 | 33 | ``` 34 | # a comment 35 | 36 | config_name=some_value 37 | 38 | # list 39 | [config_name.list.start] 40 | config_name=one list item 41 | config_name=another list item 42 | [config_name.list.end] 43 | 44 | # dict 45 | [config_name.dict.start] 46 | one_key=one value item 47 | another_key=another value item 48 | [config_name.dict.end] 49 | ``` 50 | 51 | - empty lines will be ignored 52 | - If `#` is at the beginning of the line, it will be ignored 53 | - if there is neither a `\[.*\]` or a `=` in a line, it will raise a warning, but will be ignored 54 | -------------------------------------------------------------------------------- /documentation/connections.md: -------------------------------------------------------------------------------- 1 | # Connections 2 | 3 | ## Functions 4 | 5 | A class, that gives me the options, to make web request 6 | 7 | 8 | -------------------------------------------------------------------------------- /documentation/html/bandcamp/artist_page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Music | Only Smile 9 | 10 | 11 | 12 | 13 | 14 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 44 | 45 |
46 |
47 | 48 |
49 |
50 | 51 |
52 | 53 | 83 | 84 | 85 |
86 | 87 | 88 |
89 |

about

90 | 91 |

92 | Only Smile 93 | Russia 94 |

95 | 96 | 97 | 104 |
105 | 106 |

107 | 108 | Contact Only Smile 109 | 110 |

111 | 112 | 113 |

114 | 115 | Streaming and 116 |
117 | Download help 118 |
119 |

120 |
121 |
122 |
123 |
124 | 125 | 126 | -------------------------------------------------------------------------------- /documentation/html/metal-archives/artist_sources.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 |
7 | 8 | 9 | 10 | 11 | 12 | 14 | 15 | 16 | 19 | 20 | 21 | 23 | 24 | 25 | 27 | 28 | 29 | 31 | 32 | 33 | 35 | 36 | 37 | 39 | 40 | 41 | 42 | 43 | 44 | 46 | 47 | 48 | 51 | 52 | 53 | 55 | 56 | 57 | 59 | 60 | 61 | 62 | 63 | 64 | 66 | 67 |
Official
Bandcamp 13 |
Deezer
Facebook
Instagram
Spotify
Twitter
YouTube
Official merchandise
All In Merchandise
Amazon
Direct Merch
iTunes
Labels
Nuclear Blast
68 |
69 | -------------------------------------------------------------------------------- /documentation/html/metal-archives/discography.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 16 | 17 | 18 | 21 | 22 | 23 | 24 | 25 | 26 | 29 | 30 | 31 | 32 | 33 | 34 | 37 | 38 | 39 | 40 | 41 | 42 | 45 | 46 | 47 | 49 | 50 | 51 | 54 | 55 | 56 | 58 | 59 | 60 | 63 | 64 | 65 |
NameTypeYearReviews
Ghost Bath 15 | EP2013 19 | 2 (29%) 20 |
FuneralFull-length2014 27 | 5 (66%) 28 |
BurialSingle2014 35 |   36 |
MoonloverFull-length2015 43 | 9 (54%) 44 |
Starmourner 48 | Full-length2017 52 | 3 (37%) 53 |
Self 57 | LoatherFull-length2021 61 | 2 (68%) 62 |
-------------------------------------------------------------------------------- /documentation/html/metal-archives/overview.md: -------------------------------------------------------------------------------- 1 | # Metal Archives 2 | https://www.metal-archives.com/ 3 | 4 | - [Artist page (https://www.metal-archives.com/)](artist.html) -------------------------------------------------------------------------------- /documentation/html/musify/details.md: -------------------------------------------------------------------------------- 1 | title | url 2 | --- | --- 3 | song | https://musify.club/track/linkin-park-numb-210765 -------------------------------------------------------------------------------- /documentation/html/youtube-music/search/search.md: -------------------------------------------------------------------------------- 1 | # Search 2 | 3 | ## Files 4 | 5 | what is it | query | file 6 | ---|---|--- 7 | general search response | `psychonaut 4` | [general-result.json](general-result.json) 8 | 9 | ## A general search yields 10 | 11 | - **Top Result** 12 | - The top Artist 13 | - The most popular songs of said artist 14 | - **Songs** (3) excluding the top songs 15 | - Videos (3) 16 | - **Albums** (3) 17 | - Community playlists (3) 18 | - **Artists** (3) excluding the top artist 19 | - if you search for a artist, it might return simmilar artists in style, not in name 20 | 21 | ### Different Renderers 22 | 23 | #### `runs` 24 | 25 | This should be pretty consistently all over the response be parsebal to a list of Music Elements. 26 | 27 | `runs` usually is a list. If a element of the list has the key `navigationEndpoint`, it represents a music elements in a following manner: 28 | 29 | - `text` the name 30 | - `navigationEndpoint` -> `browseEndpoint` 31 | - `browseId` the id of the artist/song/album... 32 | - `browseEndpointContextSupportedConfigs` -> `browseEndpointContextMusicConfig` -> `pageType` the type of the header like element 33 | 34 | #### musicCardShelfRenderer 35 | 36 | Used by e.g. the `Top Results`. 37 | 38 | Contains: 39 | 40 | - One Main-Element (a header like music object) | consists of these keys: 41 | - `thumbnail` the image of the header 42 | - `title` -> `runs` 43 | - for details look [here](#runs). 44 | 45 | 46 | ### Details 47 | 48 | You can get the contents (a list of [renderers](#musiccardshelfrenderer)) this way: 49 | 50 | ```python 51 | data = r.json().get("contents", {}).get("tabbedSearchResultsRenderer", {}).get("tabs", [{}])[0].get("tabRenderer").get("content", {}).get("sectionListRenderer", {}).get("contents", []) 52 | ``` 53 | 54 | Then the list contains following items, in following order: 55 | 56 | 1. _About these results_ (an infobutton) 57 | 2. The **Top result** 58 | 3. The **Songs** [_musicShelfRenderer_] 59 | 4. ... 60 | -------------------------------------------------------------------------------- /documentation/html/youtube/youtube.md: -------------------------------------------------------------------------------- 1 | # YouTube 2 | 3 | I can get data from youtube with either calling: 4 | 5 | - invidious 6 | - youtube 7 | 8 | I will only get the structured data from youtube music => `{artist} - Topic`. Maybe other stuff will be implemented 9 | 10 | ## Functionality 11 | 12 | Every search results, where the channel name doesnt end with ` - Topic` will be ignored. 13 | 14 | ### ISRC search 15 | 16 | ### YT music 17 | 18 | If searchin for artist, the query should be: 19 | `{artist} - Topic` 20 | -------------------------------------------------------------------------------- /documentation/objects.md: -------------------------------------------------------------------------------- 1 | # music_kraken.objects 2 | 3 | ## DatabaseObject 4 | 5 | [music_kraken.objects.DatabaseObject](../src/music_kraken/objects/parents.py) 6 | 7 | This is a parent object, which most Music-Objects inherit from. It provides the **functionality** to: 8 | 9 | - autogenerate id's *(UUID)*, if not passed in the constructur. 10 | - [merge](#databaseobjectmerge) the data of another instance of the same time in self. 11 | - Check if two different instances of the same type represent the same data, using `__eq__`. 12 | 13 | Additionally it provides an **Interface** to: 14 | 15 | - define the attributes used to [merge](#databaseobjectmerge). 16 | - define the attribuse and values used to check for equal data. *(used in `__eq__` and in the merge)* 17 | - get the id3 [metadata](#metadata). 18 | - get all [options](#options) *(used in searching from e.g. the command line)* 19 | 20 | ### DatabaseObject.merge() 21 | 22 | To merge the data of two instances of the same type, the attributes defined in `DatabaseObject.COLLECTION_STRING_ATTRIBUTES` and `SIMPLE_STRING_ATTRIBUTES` are used. 23 | 24 | The simple attributes just get carried from the other instance, to the self instance. 25 | 26 | The collection attributes appends all elements from other.collection to self.collection, but ofc [checks if already exists](#collection). 27 | 28 | ## Collection 29 | 30 | [music_kraken.objects.Collection](../src/music_kraken/objects/collection.py) 31 | 32 | This is an object, which acts as a list. You can save instaces of a subclass of [DatabaseObject](#databaseobject). 33 | 34 | Then you can for example append a new Object. The difference to a normal list is, that if you have two different objects that both represent the same data, it doesn't get added, but all data gets [merged](#databaseobjectmerge) into the existing Object instead. 35 | 36 | For example, you have two different Artist-Objects, where both have one source in common. The one Artist-Object already is in the Collection. The other artist object is passed in the append command. 37 | In this case it doesn't simply add the artist object to the collection, but modifies the already existing Artist-Object, adding all attributes the new artist object has, and then discards the other object. 38 | 39 | ```python 40 | artist_collection = Collection(element_type=Artist) 41 | 42 | # adds the artist to the list (len 1) 43 | artist_collection.append(artist_1) 44 | 45 | # detects artist 2 has a mutual source 46 | # thus not adding but mergin (len 1) 47 | artist_collection.appent(artist_2) 48 | ``` 49 | 50 | Function | Explanation 51 | ---|--- 52 | `append()` | appends an object to the collection 53 | `extend()` | appends a list of objects to the collection 54 | `__len__()` | gets the ammount of objects in collection 55 | `shallow_list` | gets a shallow copy of the list `_data` the objects are contained in 56 | `sort()` | takes the same arguments than `list.sort`, and does the same 57 | `__iter__()` | allows you to use collections e.g. a for loop 58 | 59 | ### Appending and Merging data 60 | 61 | If you want to append for example a Song to an Album, you obviously need to check beforehand if the Song already exists in the Album, and if so, you need to merge their data in one Song object, to not loose any Information. 62 | 63 | This is how I solve this problem: 64 | 65 | ```mermaid 66 | --- 67 | title: "Collection.append(music_object: MusicObject)" 68 | --- 69 | flowchart TD 70 | exist(""" 71 | Check if music_object already exists. 72 |
73 | Gets all indexing values with music_object.indexing_values. 74 | If any returned value exists in Collection._attribute_to_object_map, 75 | the music_object exists 76 | """) 77 | 78 | subgraph merge["Merging"] 79 | 80 | _merge("""merges the passed in object in the already 81 | existing whith existing.merge(new)""") 82 | 83 | _map("""In case a new source or something simmilar 84 | has been addet, it maps the existing object again. 85 | """) 86 | 87 | _merge --> _map 88 | 89 | end 90 | 91 | subgraph add["Adding"] 92 | 93 | __map("""map the values from music_object.indexing_values 94 | to Collection._attribute_to_object_map by writing 95 | those values in the map as keys, and the class I wanna add as values. 96 | """) 97 | 98 | _add("""add the new music object to _data""") 99 | 100 | __map --> _add 101 | 102 | end 103 | 104 | exist-->|"if it doesn't exist"|add --> return 105 | exist-->|"if already exists"|merge --> return 106 | ``` 107 | 108 | This is Implemented in [music_kraken.objects.Collection.append()](documentation/objects.md#collection). The merging which is mentioned in the flowchart is explained in the documentation of [DatabaseObject.merge()](documentation/objects.md#databaseobjectmerge). 109 | 110 | The indexing values are defined in the superclass [DatabaseObject](documentation/objects.md#databaseobject) and get implemented for each Object seperately. I will just give as example its implementation for the `Song` class: 111 | 112 | ```python 113 | @property 114 | def indexing_values(self) -> List[Tuple[str, object]]: 115 | return [ 116 | ('id', self.id), 117 | ('title', self.unified_title), 118 | ('barcode', self.barcode), 119 | *[('url', source.url) for source in self.source_collection] 120 | ] 121 | ``` 122 | 123 | ## Song 124 | 125 | This object inherits from [DatabaseObject](#databaseobject) and implements all its interfaces. 126 | 127 | It has handful attributes, where half of em are self-explanatory, like `title` or `genre`. The ones like `isrc` are only relevant to you, if you know what it is, so I won't elaborate on it. 128 | 129 | Interesting is the `date`. It uses a custom class. More on that [here](#music_krakenid3timestamp). 130 | 131 | ## ID3Timestamp 132 | 133 | For multiple Reasons I don't use the default `datetime.datetime` class. 134 | 135 | The most important reason is, that you need to pass in at least year, month and day. For every other values there are default values, that are indistinguishable from values that are directly passed in. But I need optional values. The ID3 standart allows default values. Additionally `datetime.datetime` is immutable, thus I can't inherint all the methods. Sorry. 136 | 137 | Anyway you can create those custom objects easily. 138 | 139 | ```python 140 | from music_kraken import ID3Timestamp 141 | 142 | # returns an instance of ID3Timestamp with the current time 143 | ID3Timestamp.now() 144 | 145 | # returns an instance of ID3Timestamp with the given values 146 | # all values are optional if unknown 147 | ID3Timestamp(year=1986, month=3, day=1, hour=12, minute=30, second=6) 148 | ``` 149 | -------------------------------------------------------------------------------- /documentation/program_structure.md: -------------------------------------------------------------------------------- 1 | # Downloading 2 | 3 | ## Query 4 | 5 | - parsing query into music objects 6 | 7 | # Pages 8 | 9 | ## from music objects to query 10 | 11 | ``` 12 | song: artist1, artist2 13 | 14 | 1. artist1 - song 15 | 2. artist2 - song 16 | ``` 17 | 18 | ``` 19 | artist: song1, song2 20 | 21 | 1. artist 22 | ``` 23 | -------------------------------------------------------------------------------- /documentation/shell.md: -------------------------------------------------------------------------------- 1 | # Shell 2 | 3 | ## Searching 4 | 5 | ```mkshell 6 | > s: {querry or url} 7 | 8 | # examples 9 | > s: https://musify.club/release/some-random-release-183028492 10 | > s: r: #a an Artist #r some random Release 11 | ``` 12 | 13 | Searches for an url, or a query 14 | 15 | ### Query Syntax 16 | 17 | ``` 18 | #a {artist} #r {release} #t {track} 19 | ``` 20 | 21 | You can escape stuff like `#` doing this: `\#` 22 | 23 | ## Downloading 24 | 25 | To download something, you either need a direct link, or you need to have already searched for options 26 | 27 | ```mkshell 28 | > d: {option ids or direct url} 29 | 30 | # examples 31 | > d: 0, 3, 4 32 | > d: 1 33 | > d: https://musify.club/release/some-random-release-183028492 34 | ``` 35 | 36 | ## Results 37 | 38 | If options are printed in **bold** they can be downloaded. Else they may or maybe can't be downloaded 39 | 40 | ## Misc 41 | 42 | ### Exit 43 | 44 | ```mkshell 45 | > q 46 | > quit 47 | > exit 48 | > abort 49 | ``` 50 | 51 | ### Current Options 52 | 53 | ```mkshell 54 | > . 55 | ``` 56 | 57 | ### Previous Options 58 | 59 | ``` 60 | > .. 61 | ``` 62 | -------------------------------------------------------------------------------- /music_kraken/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import gc 3 | import sys 4 | from pathlib import Path 5 | 6 | from rich.logging import RichHandler 7 | from rich.console import Console 8 | 9 | from .utils.shared import DEBUG, DEBUG_LOGGING 10 | from .utils.config import logging_settings, main_settings, read_config 11 | 12 | read_config() 13 | 14 | console: Console = Console() 15 | def init_logging(): 16 | log_file = main_settings['log_file'] 17 | 18 | if log_file.is_file(): 19 | last_log_file = Path(log_file.parent, "prev." + log_file.name) 20 | 21 | with log_file.open("r", encoding="utf-8") as current_file: 22 | with last_log_file.open("w", encoding="utf-8") as last_file: 23 | last_file.write(current_file.read()) 24 | 25 | rich_handler = RichHandler(rich_tracebacks=True, console=console) 26 | rich_handler.setLevel(logging_settings['log_level'] if not DEBUG_LOGGING else logging.DEBUG) 27 | 28 | file_handler = logging.FileHandler(log_file) 29 | file_handler.setLevel(logging.DEBUG) 30 | 31 | # configure logger default 32 | logging.basicConfig( 33 | level=logging.DEBUG, 34 | format=logging_settings['logging_format'], 35 | datefmt="%Y-%m-%d %H:%M:%S", 36 | handlers=[ 37 | file_handler, 38 | rich_handler, 39 | ] 40 | ) 41 | 42 | init_logging() 43 | 44 | from . import cli 45 | 46 | if DEBUG: 47 | sys.setrecursionlimit(300) 48 | 49 | 50 | if main_settings['modify_gc']: 51 | """ 52 | At the start I modify the garbage collector to run a bit fewer times. 53 | This should increase speed: 54 | https://mkennedy.codes/posts/python-gc-settings-change-this-and-make-your-app-go-20pc-faster/ 55 | """ 56 | # Clean up what might be garbage so far. 57 | gc.collect(2) 58 | 59 | allocs, gen1, gen2 = gc.get_threshold() 60 | allocs = 50_000 # Start the GC sequence every 50K not 700 allocations. 61 | gen1 = gen1 * 2 62 | gen2 = gen2 * 2 63 | gc.set_threshold(allocs, gen1, gen2) 64 | 65 | 66 | -------------------------------------------------------------------------------- /music_kraken/__main__.py: -------------------------------------------------------------------------------- 1 | def cli(): 2 | import argparse 3 | 4 | parser = argparse.ArgumentParser( 5 | description="A simple yet powerful cli to download music with music-kraken.", 6 | epilog="This is a cli for the developers, and it is shipped with music-krakens core.\n" 7 | "While it is a nice and solid cli it will lack some features.\n" 8 | "The proper cli and other frontends will be made or already have been made.\n" 9 | "To see all current frontends check the docs at: https://github.com/HeIIow2/music-downloader" 10 | ) 11 | 12 | # arguments for debug purposes 13 | parser.add_argument( 14 | '-v', '--verbose', 15 | action="store_true", 16 | help="Sets the logging level to debug." 17 | ) 18 | 19 | parser.add_argument( 20 | '-m', '--force-post-process', 21 | action="store_true", 22 | help="If a to downloaded thing is skipped due to being found on disc,\nit will still update the metadata accordingly." 23 | ) 24 | 25 | parser.add_argument( 26 | '-t', '--test', 27 | action="store_true", 28 | help="For the sake of testing. Equals: '-vp -g test'" 29 | ) 30 | 31 | # general arguments 32 | parser.add_argument( 33 | '-a', '--all', 34 | action="store_true", 35 | help="If set it will download EVERYTHING the music downloader can find.\n" 36 | "For example weird compilations from musify." 37 | ) 38 | 39 | parser.add_argument( 40 | '-g', '--genre', 41 | help="Specifies the genre. (Will be overwritten by -t)" 42 | ) 43 | 44 | parser.add_argument( 45 | '-u', '--url', 46 | help="Downloads the content of given url." 47 | ) 48 | 49 | parser.add_argument( 50 | '--settings', 51 | help="Opens a menu to modify the settings", 52 | action="store_true" 53 | ) 54 | 55 | parser.add_argument( 56 | '-s', 57 | '--setting', 58 | help="Modifies a setting directly.", 59 | nargs=2 60 | ) 61 | 62 | parser.add_argument( 63 | "--paths", 64 | "-p", 65 | help="Prints an overview over all music-kraken paths.", 66 | action="store_true" 67 | ) 68 | 69 | parser.add_argument( 70 | "-r", 71 | help="Resets the config file to the default one.", 72 | action="store_true" 73 | ) 74 | 75 | parser.add_argument( 76 | "--frontend", 77 | "-f", 78 | help="Set a good and fast invidious/piped instance from your homecountry, to reduce the latency.", 79 | action="store_true" 80 | ) 81 | 82 | parser.add_argument( 83 | "--clear-cache", 84 | help="Deletes the cache.", 85 | action="store_true" 86 | ) 87 | 88 | parser.add_argument( 89 | "--clean-cache", 90 | help="Deletes the outdated cache. (all expired cached files, and not indexed files)", 91 | action="store_true" 92 | ) 93 | 94 | arguments = parser.parse_args() 95 | 96 | if arguments.verbose or arguments.test: 97 | import logging 98 | print("Setting logging-level to DEBUG") 99 | logging.getLogger().setLevel(logging.DEBUG) 100 | 101 | from . import cli 102 | from .utils.config import read_config 103 | from .utils import shared 104 | 105 | if arguments.r: 106 | import os 107 | 108 | for file in shared.CONFIG_DIRECTORY.iterdir(): 109 | if file.is_file(): 110 | print(f"Deleting {file}....") 111 | file.unlink() 112 | read_config() 113 | 114 | exit() 115 | 116 | read_config() 117 | 118 | if arguments.setting is not None: 119 | cli.settings(*arguments.setting) 120 | 121 | if arguments.settings: 122 | cli.settings() 123 | 124 | if arguments.paths: 125 | cli.print_paths() 126 | 127 | if arguments.frontend: 128 | cli.set_frontend(silent=False) 129 | 130 | if arguments.clear_cache: 131 | from .cli.options import cache 132 | cache.clear_cache() 133 | 134 | if arguments.clean_cache: 135 | from .cli.options import cache 136 | cache.clean_cache() 137 | 138 | # getting the genre 139 | genre: str = arguments.genre 140 | if arguments.test: 141 | genre = "test" 142 | 143 | cli.download( 144 | genre=genre, 145 | download_all=arguments.all, 146 | direct_download_url=arguments.url, 147 | process_metadata_anyway=True or arguments.test 148 | ) 149 | 150 | 151 | if __name__ == "__main__": 152 | cli() 153 | -------------------------------------------------------------------------------- /music_kraken/audio/__init__.py: -------------------------------------------------------------------------------- 1 | from . import metadata 2 | from . import codec 3 | 4 | AudioMetadata = metadata.AudioMetadata 5 | write_many_metadata = metadata.write_many_metadata 6 | write_metadata = metadata.write_metadata 7 | write_metadata_to_target = metadata.write_metadata_to_target 8 | 9 | correct_codec = codec.correct_codec 10 | -------------------------------------------------------------------------------- /music_kraken/audio/codec.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List, Tuple 3 | from tqdm import tqdm 4 | from ffmpeg_progress_yield import FfmpegProgress 5 | 6 | from ..utils.config import main_settings, logging_settings 7 | from ..objects import Target 8 | 9 | 10 | LOGGER = logging_settings["codex_logger"] 11 | 12 | 13 | def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], audio_format: str = main_settings["audio_format"], skip_intervals: List[Tuple[float, float]] = None): 14 | if not target.exists: 15 | LOGGER.warning(f"Target doesn't exist: {target.file_path}") 16 | return 17 | 18 | skip_intervals = skip_intervals or [] 19 | 20 | bitrate_b = int(bitrate_kb / 1024) 21 | 22 | output_target = Target( 23 | file_path=Path(str(target.file_path) + "." + audio_format) 24 | ) 25 | 26 | # get the select thingie 27 | # https://stackoverflow.com/questions/50594412/cut-multiple-parts-of-a-video-with-ffmpeg 28 | aselect_list: List[str] = [] 29 | 30 | start = 0 31 | next_start = 0 32 | for end, next_start in skip_intervals: 33 | aselect_list.append(f"between(t,{start},{end})") 34 | start = next_start 35 | aselect_list.append(f"gte(t,{next_start})") 36 | 37 | select = f"aselect='{'+'.join(aselect_list)}',asetpts=N/SR/TB" 38 | 39 | # build the ffmpeg command 40 | ffmpeg_command = [ 41 | str(main_settings["ffmpeg_binary"]), 42 | "-i", str(target.file_path), 43 | "-af", select, 44 | "-b", str(bitrate_b), 45 | str(output_target.file_path) 46 | ] 47 | 48 | # run the ffmpeg command with a progressbar 49 | ff = FfmpegProgress(ffmpeg_command) 50 | with tqdm(total=100, desc=f"processing") as pbar: 51 | for progress in ff.run_command_with_progress(): 52 | pbar.update(progress-pbar.n) 53 | 54 | LOGGER.debug(ff.stderr) 55 | 56 | output_target.copy_content(target) 57 | output_target.delete() 58 | -------------------------------------------------------------------------------- /music_kraken/audio/metadata.py: -------------------------------------------------------------------------------- 1 | import mutagen 2 | from mutagen.id3 import ID3, Frame, APIC, USLT 3 | from pathlib import Path 4 | from typing import List 5 | import logging 6 | from PIL import Image 7 | 8 | from ..utils.config import logging_settings, main_settings 9 | from ..objects import Song, Target, Metadata 10 | from ..objects.metadata import Mapping 11 | from ..connection import Connection 12 | 13 | LOGGER = logging_settings["tagging_logger"] 14 | 15 | 16 | artwork_connection: Connection = Connection() 17 | 18 | 19 | class AudioMetadata: 20 | def __init__(self, file_location: str = None) -> None: 21 | self._file_location = None 22 | 23 | self.frames: ID3 = ID3() 24 | 25 | if file_location is not None: 26 | self.file_location = file_location 27 | 28 | def add_metadata(self, metadata: Metadata): 29 | for value in metadata: 30 | """ 31 | https://www.programcreek.com/python/example/84797/mutagen.id3.ID3 32 | """ 33 | if value is None: 34 | continue 35 | self.frames.add(value) 36 | 37 | def add_song_metadata(self, song: Song): 38 | self.add_metadata(song.metadata) 39 | 40 | def save(self, file_location: Path = None): 41 | LOGGER.debug(f"saving following frames: {self.frames.pprint()}") 42 | 43 | if file_location is not None: 44 | self.file_location = file_location 45 | 46 | if self.file_location is None: 47 | raise Exception("no file target provided to save the data to") 48 | self.frames.save(self.file_location, v2_version=4) 49 | 50 | def set_file_location(self, file_location: Path): 51 | # try loading the data from the given file. if it doesn't succeed the frame remains empty 52 | try: 53 | self.frames.load(file_location, v2_version=4) 54 | LOGGER.debug(f"loaded following from \"{file_location}\"\n{self.frames.pprint()}") 55 | except mutagen.MutagenError: 56 | LOGGER.warning(f"couldn't find any metadata at: \"{self.file_location}\"") 57 | self._file_location = file_location 58 | 59 | file_location = property(fget=lambda self: self._file_location, fset=set_file_location) 60 | 61 | 62 | def write_metadata_to_target(metadata: Metadata, target: Target, song: Song): 63 | if not target.exists: 64 | LOGGER.warning(f"file {target.file_path} not found") 65 | return 66 | 67 | id3_object = AudioMetadata(file_location=target.file_path) 68 | 69 | LOGGER.info(str(metadata)) 70 | 71 | if song.artwork.best_variant is not None: 72 | best_variant = song.artwork.best_variant 73 | 74 | r = artwork_connection.get( 75 | url=best_variant["url"], 76 | name=song.artwork.get_variant_name(best_variant), 77 | ) 78 | 79 | temp_target: Target = Target.temp() 80 | with temp_target.open("wb") as f: 81 | f.write(r.content) 82 | 83 | converted_target: Target = Target.temp(name=f"{song.title.replace('/', '_')}") 84 | with Image.open(temp_target.file_path) as img: 85 | # crop the image if it isn't square in the middle with minimum data loss 86 | width, height = img.size 87 | if width != height: 88 | if width > height: 89 | img = img.crop((width // 2 - height // 2, 0, width // 2 + height // 2, height)) 90 | else: 91 | img = img.crop((0, height // 2 - width // 2, width, height // 2 + width // 2)) 92 | 93 | # resize the image to the preferred resolution 94 | img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) 95 | 96 | # https://stackoverflow.com/a/59476938/16804841 97 | if img.mode != 'RGB': 98 | img = img.convert('RGB') 99 | 100 | img.save(converted_target.file_path, "JPEG") 101 | 102 | # https://stackoverflow.com/questions/70228440/mutagen-how-can-i-correctly-embed-album-art-into-mp3-file-so-that-i-can-see-t 103 | id3_object.frames.delall("APIC") 104 | id3_object.frames.add( 105 | APIC( 106 | encoding=0, 107 | mime="image/jpeg", 108 | type=3, 109 | desc=u"Cover", 110 | data=converted_target.read_bytes(), 111 | ) 112 | ) 113 | id3_object.frames.delall("USLT") 114 | uslt_val = metadata.get_id3_value(Mapping.UNSYNCED_LYRICS) 115 | id3_object.frames.add( 116 | USLT(encoding=3, lang=u'eng', desc=u'desc', text=uslt_val) 117 | ) 118 | 119 | id3_object.add_metadata(metadata) 120 | id3_object.save() 121 | 122 | 123 | def write_metadata(song: Song, ignore_file_not_found: bool = True): 124 | target: Target 125 | for target in song.target: 126 | if not target.exists: 127 | if ignore_file_not_found: 128 | continue 129 | else: 130 | raise ValueError(f"{song.target.file} not found") 131 | 132 | write_metadata_to_target(metadata=song.metadata, target=target, song=song) 133 | 134 | 135 | def write_many_metadata(song_list: List[Song]): 136 | for song in song_list: 137 | write_metadata(song=song, ignore_file_not_found=True) 138 | -------------------------------------------------------------------------------- /music_kraken/cli/__init__.py: -------------------------------------------------------------------------------- 1 | from .informations import print_paths 2 | from .main_downloader import download 3 | from .options.settings import settings 4 | from .options.frontend import set_frontend 5 | 6 | -------------------------------------------------------------------------------- /music_kraken/cli/informations/__init__.py: -------------------------------------------------------------------------------- 1 | from .paths import print_paths -------------------------------------------------------------------------------- /music_kraken/cli/informations/paths.py: -------------------------------------------------------------------------------- 1 | from ..utils import cli_function 2 | 3 | from ...utils.path_manager import LOCATIONS 4 | from ...utils.config import main_settings 5 | 6 | 7 | def all_paths(): 8 | return { 9 | "Temp dir": main_settings["temp_directory"], 10 | "Music dir": main_settings["music_directory"], 11 | "Conf dir": LOCATIONS.CONFIG_DIRECTORY, 12 | "Conf file": LOCATIONS.CONFIG_FILE, 13 | "logging file": main_settings["log_file"], 14 | "FFMPEG bin": main_settings["ffmpeg_binary"], 15 | "Cache Dir": main_settings["cache_directory"], 16 | } 17 | 18 | 19 | @cli_function 20 | def print_paths(): 21 | for name, path in all_paths().items(): 22 | print(f"{name}:\t{path}") -------------------------------------------------------------------------------- /music_kraken/cli/options/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kur01234/music-downloader/665bf0e47517071450b6f25dffbb81fe21944ead/music_kraken/cli/options/__init__.py -------------------------------------------------------------------------------- /music_kraken/cli/options/cache.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | from ..utils import cli_function 4 | from ...connection.cache import Cache 5 | 6 | 7 | @cli_function 8 | def clear_cache(): 9 | """ 10 | Deletes the cache. 11 | :return: 12 | """ 13 | 14 | Cache("main", getLogger("cache")).clear() 15 | print("Cleared cache") 16 | 17 | 18 | @cli_function 19 | def clean_cache(): 20 | """ 21 | Deletes the outdated cache. (all expired cached files, and not indexed files) 22 | :return: 23 | """ 24 | 25 | Cache("main", getLogger("cache")).clean() 26 | print("Cleaned cache") 27 | -------------------------------------------------------------------------------- /music_kraken/cli/options/first_config.py: -------------------------------------------------------------------------------- 1 | from .frontend import set_frontend 2 | 3 | 4 | def initial_config(): 5 | code = set_frontend(no_cli=True) 6 | return code 7 | -------------------------------------------------------------------------------- /music_kraken/cli/options/frontend.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | from dataclasses import dataclass 3 | from collections import defaultdict 4 | from urllib.parse import urlparse 5 | 6 | from ..utils import cli_function 7 | 8 | from ...objects import Country 9 | from ...utils import config, write_config 10 | from ...utils.config import youtube_settings 11 | from ...connection import Connection 12 | 13 | 14 | @dataclass 15 | class Instance: 16 | """ 17 | Attributes which influence the quality of an instance: 18 | 19 | - users 20 | """ 21 | name: str 22 | uri: str 23 | regions: List[Country] 24 | users: int = 0 25 | 26 | def __str__(self) -> str: 27 | return f"{self.name} with {self.users} users." 28 | 29 | 30 | class FrontendInstance: 31 | SETTING_NAME = "placeholder" 32 | 33 | def __init__(self) -> None: 34 | self.region_instances: Dict[Country, List[Instance]] = defaultdict(list) 35 | self.all_instances: List[Instance] = [] 36 | 37 | def add_instance(self, instance: Instance): 38 | self.all_instances.append(instance) 39 | 40 | youtube_lists = youtube_settings["youtube_url"] 41 | existing_netlocs = set(tuple(url.netloc for url in youtube_lists)) 42 | 43 | parsed_instance = urlparse(instance.uri) 44 | instance_netloc = parsed_instance.netloc 45 | 46 | if instance_netloc not in existing_netlocs: 47 | youtube_lists.append(parsed_instance) 48 | youtube_settings.__setitem__("youtube_url", youtube_lists, is_parsed=True) 49 | 50 | for region in instance.regions: 51 | self.region_instances[region].append(instance) 52 | 53 | def fetch(self, silent: bool = False): 54 | if not silent: 55 | print(f"Downloading {type(self).__name__} instances...") 56 | 57 | def set_instance(self, instance: Instance): 58 | youtube_settings.__setitem__(self.SETTING_NAME, instance.uri) 59 | 60 | def _choose_country(self) -> List[Instance]: 61 | print("Input the country code, an example would be \"US\"") 62 | print('\n'.join(f'{region.name} ({region.alpha_2})' for region in self.region_instances)) 63 | print() 64 | 65 | 66 | available_instances = set(i.alpha_2 for i in self.region_instances) 67 | 68 | chosen_region = "" 69 | 70 | while chosen_region not in available_instances: 71 | chosen_region = input("nearest country: ").strip().upper() 72 | 73 | return self.region_instances[Country.by_alpha_2(chosen_region)] 74 | 75 | def choose(self, silent: bool = False): 76 | instances = self.all_instances if silent else self._choose_country() 77 | instances.sort(key=lambda x: x.users, reverse=True) 78 | 79 | if silent: 80 | self.set_instance(instances[0]) 81 | return 82 | 83 | # output the options 84 | print("Choose your instance (input needs to be a digit):") 85 | for i, instance in enumerate(instances): 86 | print(f"{i}) {instance}") 87 | 88 | print() 89 | 90 | # ask for index 91 | index = "" 92 | while not index.isdigit() or int(index) >= len(instances): 93 | index = input("> ").strip() 94 | 95 | instance = instances[int(index)] 96 | print() 97 | print(f"Setting the instance to {instance}") 98 | 99 | self.set_instance(instance) 100 | 101 | 102 | class Invidious(FrontendInstance): 103 | SETTING_NAME = "invidious_instance" 104 | 105 | def __init__(self) -> None: 106 | self.connection = Connection(host="https://api.invidious.io/") 107 | self.endpoint = "https://api.invidious.io/instances.json" 108 | 109 | super().__init__() 110 | 111 | 112 | def _process_instance(self, all_instance_data: dict): 113 | instance_data = all_instance_data[1] 114 | stats = instance_data["stats"] 115 | 116 | if not instance_data["api"]: 117 | return 118 | if instance_data["type"] != "https": 119 | return 120 | 121 | region = instance_data["region"] 122 | 123 | instance = Instance( 124 | name=all_instance_data[0], 125 | uri=instance_data["uri"], 126 | regions=[Country.by_alpha_2(region)], 127 | users=stats["usage"]["users"]["total"] 128 | ) 129 | 130 | self.add_instance(instance) 131 | 132 | def fetch(self, silent: bool): 133 | r = self.connection.get(self.endpoint) 134 | if r is None: 135 | return 136 | 137 | for instance in r.json(): 138 | self._process_instance(all_instance_data=instance) 139 | 140 | 141 | class Piped(FrontendInstance): 142 | SETTING_NAME = "piped_instance" 143 | 144 | def __init__(self) -> None: 145 | self.connection = Connection(host="https://raw.githubusercontent.com") 146 | 147 | super().__init__() 148 | 149 | def process_instance(self, instance_data: str): 150 | cells = instance_data.split(" | ") 151 | 152 | instance = Instance( 153 | name=cells[0].strip(), 154 | uri=cells[1].strip(), 155 | regions=[Country.by_emoji(flag) for flag in cells[2].split(", ")] 156 | ) 157 | 158 | self.add_instance(instance) 159 | 160 | def fetch(self, silent: bool = False): 161 | r = self.connection.get("https://raw.githubusercontent.com/wiki/TeamPiped/Piped-Frontend/Instances.md") 162 | if r is None: 163 | return 164 | 165 | process = False 166 | 167 | for line in r.content.decode("utf-8").split("\n"): 168 | line = line.strip() 169 | 170 | if line != "" and process: 171 | self.process_instance(line) 172 | 173 | if line.startswith("---"): 174 | process = True 175 | 176 | 177 | class FrontendSelection: 178 | def __init__(self): 179 | self.invidious = Invidious() 180 | self.piped = Piped() 181 | 182 | def choose(self, silent: bool = False): 183 | self.invidious.fetch(silent) 184 | self.invidious.choose(silent) 185 | 186 | self.piped.fetch(silent) 187 | self.piped.choose(silent) 188 | 189 | 190 | @cli_function 191 | def set_frontend(silent: bool = False): 192 | shell = FrontendSelection() 193 | shell.choose(silent=silent) 194 | 195 | return 0 196 | -------------------------------------------------------------------------------- /music_kraken/cli/options/settings.py: -------------------------------------------------------------------------------- 1 | from ..utils import cli_function 2 | 3 | from ...utils.config import config, write_config 4 | from ...utils import exception 5 | 6 | 7 | def modify_setting(_name: str, _value: str, invalid_ok: bool = True) -> bool: 8 | try: 9 | config.set_name_to_value(_name, _value) 10 | except exception.config.SettingException as e: 11 | if invalid_ok: 12 | print(e) 13 | return False 14 | else: 15 | raise e 16 | 17 | write_config() 18 | return True 19 | 20 | 21 | def print_settings(): 22 | for i, attribute in enumerate(config): 23 | print(f"{i:0>2}: {attribute.name}={attribute.value}") 24 | 25 | 26 | def modify_setting_by_index(index: int) -> bool: 27 | attribute = list(config)[index] 28 | 29 | print() 30 | print(attribute) 31 | 32 | input__ = input(f"{attribute.name}=") 33 | if not modify_setting(attribute.name, input__.strip()): 34 | return modify_setting_by_index(index) 35 | 36 | return True 37 | 38 | 39 | def modify_setting_by_index(index: int) -> bool: 40 | attribute = list(config)[index] 41 | 42 | print() 43 | print(attribute) 44 | 45 | input__ = input(f"{attribute.name}=") 46 | if not modify_setting(attribute.name, input__.strip()): 47 | return modify_setting_by_index(index) 48 | 49 | return True 50 | 51 | 52 | @cli_function 53 | def settings( 54 | name: str = None, 55 | value: str = None, 56 | ): 57 | if name is not None and value is not None: 58 | modify_setting(name, value, invalid_ok=True) 59 | return 60 | 61 | while True: 62 | print_settings() 63 | 64 | input_ = input("Id of setting to modify: ") 65 | print() 66 | if input_.isdigit() and int(input_) < len(config): 67 | if modify_setting_by_index(int(input_)): 68 | return 69 | else: 70 | print("Please input a valid ID.") 71 | print() -------------------------------------------------------------------------------- /music_kraken/cli/utils.py: -------------------------------------------------------------------------------- 1 | from ..utils.shared import get_random_message 2 | 3 | 4 | def cli_function(function): 5 | def wrapper(*args, **kwargs): 6 | silent = kwargs.get("no_cli", False) 7 | if "no_cli" in kwargs: 8 | del kwargs["no_cli"] 9 | 10 | if silent: 11 | return function(*args, **kwargs) 12 | return 13 | 14 | code = 0 15 | 16 | print_cute_message() 17 | print() 18 | try: 19 | code = function(*args, **kwargs) 20 | except KeyboardInterrupt: 21 | print("\n\nRaise an issue if I fucked up:\nhttps://github.com/HeIIow2/music-downloader/issues") 22 | 23 | finally: 24 | print() 25 | print_cute_message() 26 | print("See you soon! :3") 27 | 28 | exit() 29 | 30 | return wrapper 31 | 32 | 33 | def print_cute_message(): 34 | message = get_random_message() 35 | try: 36 | print(message) 37 | except UnicodeEncodeError: 38 | message = str(c for c in message if 0 < ord(c) < 127) 39 | print(message) 40 | 41 | 42 | -------------------------------------------------------------------------------- /music_kraken/connection/__init__.py: -------------------------------------------------------------------------------- 1 | from .connection import Connection 2 | -------------------------------------------------------------------------------- /music_kraken/connection/cache.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from dataclasses import dataclass, field 4 | from datetime import datetime, timedelta 5 | from typing import List, Optional 6 | from functools import lru_cache 7 | import logging 8 | 9 | from ..utils import output, BColors 10 | from ..utils.config import main_settings 11 | from ..utils.string_processing import fit_to_file_system 12 | 13 | 14 | @dataclass 15 | class CacheAttribute: 16 | module: str 17 | name: str 18 | 19 | created: datetime 20 | expires: datetime 21 | 22 | additional_info: dict = field(default_factory=dict) 23 | 24 | @property 25 | def id(self): 26 | return f"{self.module}_{self.name}" 27 | 28 | @property 29 | def is_valid(self): 30 | if isinstance(self.expires, str): 31 | self.expires = datetime.fromisoformat(self.expires) 32 | return datetime.now() < self.expires 33 | 34 | def __eq__(self, other): 35 | return self.__dict__ == other.__dict__ 36 | 37 | 38 | @dataclass 39 | class CacheResult: 40 | content: bytes 41 | attribute: CacheAttribute 42 | 43 | 44 | class Cache: 45 | def __init__(self, module: str, logger: logging.Logger): 46 | self.module = module 47 | self.logger: logging.Logger = logger 48 | 49 | self._dir = main_settings["cache_directory"] 50 | self.index = Path(self._dir, "index.json") 51 | 52 | if not self.index.is_file(): 53 | with self.index.open("w") as i: 54 | i.write(json.dumps([])) 55 | 56 | self.cached_attributes: List[CacheAttribute] = [] 57 | self._id_to_attribute = {} 58 | 59 | self._time_fields = {"created", "expires"} 60 | with self.index.open("r") as i: 61 | try: 62 | for c in json.loads(i.read()): 63 | for key in self._time_fields: 64 | c[key] = datetime.fromisoformat(c[key]) 65 | 66 | ca = CacheAttribute(**c) 67 | self.cached_attributes.append(ca) 68 | self._id_to_attribute[ca.id] = ca 69 | except json.JSONDecodeError: 70 | pass 71 | 72 | @lru_cache() 73 | def _init_module(self, module: str) -> Path: 74 | """ 75 | :param module: 76 | :return: the module path 77 | """ 78 | r = Path(self._dir, module) 79 | r.mkdir(exist_ok=True, parents=True) 80 | return r 81 | 82 | def _write_index(self, indent: int = 4): 83 | _json = [] 84 | for c in self.cached_attributes: 85 | d = c.__dict__ 86 | for key in self._time_fields: 87 | if not isinstance(d[key], str): 88 | d[key] = d[key].isoformat() 89 | 90 | _json.append(d) 91 | 92 | with self.index.open("w") as f: 93 | f.write(json.dumps(_json, indent=indent)) 94 | 95 | def _write_attribute(self, cached_attribute: CacheAttribute, write: bool = True) -> bool: 96 | existing_attribute: Optional[CacheAttribute] = self._id_to_attribute.get(cached_attribute.id) 97 | if existing_attribute is not None: 98 | # the attribute exists 99 | if existing_attribute == cached_attribute: 100 | return True 101 | 102 | if existing_attribute.is_valid: 103 | return False 104 | 105 | existing_attribute.__dict__ = cached_attribute.__dict__ 106 | else: 107 | self.cached_attributes.append(cached_attribute) 108 | self._id_to_attribute[cached_attribute.id] = cached_attribute 109 | 110 | if write: 111 | self._write_index() 112 | 113 | return True 114 | 115 | def set(self, content: bytes, name: str, expires_in: float = 10, module: str = "", additional_info: dict = None): 116 | """ 117 | :param content: 118 | :param module: 119 | :param name: 120 | :param expires_in: the unit is days 121 | :return: 122 | """ 123 | if name == "": 124 | return 125 | 126 | additional_info = additional_info or {} 127 | module = self.module if module == "" else module 128 | 129 | module_path = self._init_module(module) 130 | 131 | cache_attribute = CacheAttribute( 132 | module=module, 133 | name=name, 134 | created=datetime.now(), 135 | expires=datetime.now() + timedelta(days=expires_in), 136 | additional_info=additional_info, 137 | ) 138 | self._write_attribute(cache_attribute) 139 | 140 | cache_path = fit_to_file_system(Path(module_path, name.replace("/", "_")), hidden_ok=True) 141 | with cache_path.open("wb") as content_file: 142 | self.logger.debug(f"writing cache to {cache_path}") 143 | content_file.write(content) 144 | 145 | def get(self, name: str) -> Optional[CacheResult]: 146 | path = fit_to_file_system(Path(self._dir, self.module, name.replace("/", "_")), hidden_ok=True) 147 | 148 | if not path.is_file(): 149 | return None 150 | 151 | # check if it is outdated 152 | if f"{self.module}_{name}" not in self._id_to_attribute: 153 | path.unlink() 154 | return 155 | existing_attribute: CacheAttribute = self._id_to_attribute[f"{self.module}_{name}"] 156 | if not existing_attribute.is_valid: 157 | return 158 | 159 | with path.open("rb") as f: 160 | return CacheResult(content=f.read(), attribute=existing_attribute) 161 | 162 | def clean(self): 163 | keep = set() 164 | 165 | for ca in self.cached_attributes.copy(): 166 | if ca.name == "": 167 | continue 168 | 169 | file = fit_to_file_system(Path(self._dir, ca.module, ca.name.replace("/", "_")), hidden_ok=True) 170 | 171 | if not ca.is_valid: 172 | self.logger.debug(f"deleting cache {ca.id}") 173 | file.unlink() 174 | self.cached_attributes.remove(ca) 175 | del self._id_to_attribute[ca.id] 176 | 177 | else: 178 | keep.add(file) 179 | 180 | # iterate through every module (folder) 181 | for module_path in self._dir.iterdir(): 182 | if not module_path.is_dir(): 183 | continue 184 | 185 | # delete all files not in keep 186 | for path in module_path.iterdir(): 187 | if path not in keep: 188 | self.logger.info(f"Deleting cache {path}") 189 | path.unlink() 190 | 191 | # delete all empty directories 192 | for path in module_path.iterdir(): 193 | if path.is_dir() and not list(path.iterdir()): 194 | self.logger.debug(f"Deleting cache directory {path}") 195 | path.rmdir() 196 | 197 | self._write_index() 198 | 199 | def clear(self): 200 | """ 201 | delete every file in the cache directory 202 | :return: 203 | """ 204 | 205 | for path in self._dir.iterdir(): 206 | if path.is_dir(): 207 | for file in path.iterdir(): 208 | output(f"Deleting file {file}", color=BColors.GREY) 209 | file.unlink() 210 | output(f"Deleting folder {path}", color=BColors.HEADER) 211 | path.rmdir() 212 | else: 213 | output(f"Deleting folder {path}", color=BColors.HEADER) 214 | path.unlink() 215 | 216 | self.cached_attributes.clear() 217 | self._id_to_attribute.clear() 218 | 219 | self._write_index() 220 | 221 | def __repr__(self): 222 | return f"" 223 | -------------------------------------------------------------------------------- /music_kraken/connection/rotating.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | import requests 4 | 5 | 6 | class RotatingObject: 7 | """ 8 | This will be used for RotatingProxies and invidious instances. 9 | """ 10 | def __init__(self, object_list: list): 11 | self._object_list: list = object_list 12 | 13 | if len(self._object_list) <= 0: 14 | raise ValueError("There needs to be at least one item in a Rotating structure.") 15 | 16 | self._current_index = 0 17 | 18 | @property 19 | def object(self): 20 | return self._object_list[self._current_index] 21 | 22 | def __len__(self): 23 | return len(self._object_list) 24 | 25 | @property 26 | def next(self): 27 | self._current_index = (self._current_index + 1) % len(self._object_list) 28 | 29 | return self._object_list[self._current_index] 30 | 31 | 32 | class RotatingProxy(RotatingObject): 33 | def __init__(self, proxy_list: List[Dict[str, str]]): 34 | super().__init__( 35 | proxy_list if len(proxy_list) > 0 else [None] 36 | ) 37 | 38 | def rotate(self) -> Dict[str, str]: 39 | return self.next 40 | 41 | @property 42 | def current_proxy(self) -> Dict[str, str]: 43 | return super().object 44 | -------------------------------------------------------------------------------- /music_kraken/download/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Set 3 | 4 | from ..utils.config import main_settings 5 | from ..utils.enums.album import AlbumType 6 | 7 | 8 | @dataclass 9 | class FetchOptions: 10 | download_all: bool = False 11 | album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"])) 12 | 13 | 14 | @dataclass 15 | class DownloadOptions: 16 | download_all: bool = False 17 | album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"])) 18 | 19 | download_again_if_found: bool = False 20 | process_audio_if_found: bool = False 21 | process_metadata_if_found: bool = True 22 | -------------------------------------------------------------------------------- /music_kraken/download/results.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Type, Dict, List, Generator, Union 2 | from dataclasses import dataclass 3 | 4 | from ..objects import DatabaseObject 5 | from ..pages import Page, EncyclopaediaMetallum, Musify 6 | 7 | 8 | @dataclass 9 | class Option: 10 | index: int 11 | music_object: DatabaseObject 12 | 13 | 14 | class Results: 15 | def __init__(self, max_items_per_page: int = 10, **kwargs) -> None: 16 | self._by_index: Dict[int, DatabaseObject] = dict() 17 | self._page_by_index: Dict[int: Type[Page]] = dict() 18 | 19 | self.max_items_per_page = max_items_per_page 20 | 21 | def __iter__(self) -> Generator[DatabaseObject, None, None]: 22 | for option in self.formatted_generator(): 23 | if isinstance(option, Option): 24 | yield option.music_object 25 | 26 | def formatted_generator(self) -> Generator[Union[Type[Page], Option], None, None]: 27 | self._by_index = dict() 28 | self._page_by_index = dict() 29 | 30 | def __len__(self) -> int: 31 | return max(self._by_index.keys()) 32 | 33 | def __getitem__(self, index: int): 34 | return self._by_index[index] 35 | 36 | 37 | class SearchResults(Results): 38 | def __init__( 39 | self, 40 | pages: Tuple[Type[Page], ...] = None, 41 | **kwargs, 42 | ) -> None: 43 | super().__init__(**kwargs) 44 | 45 | self.pages = pages or [] 46 | # this would initialize a list for every page, which I don't think I want 47 | # self.results = Dict[Type[Page], List[DatabaseObject]] = {page: [] for page in self.pages} 48 | self.results: Dict[Type[Page], List[DatabaseObject]] = {} 49 | 50 | def add(self, page: Type[Page], search_result: List[DatabaseObject]): 51 | """ 52 | adds a list of found music objects to the according page 53 | WARNING: if a page already has search results, they are just gonna be overwritten 54 | """ 55 | 56 | self.results[page] = search_result 57 | 58 | def get_page_results(self, page: Type[Page]) -> "PageResults": 59 | return PageResults(page, self.results.get(page, [])) 60 | 61 | def __len__(self) -> int: 62 | return sum(min(self.max_items_per_page, len(results)) for results in self.results.values()) 63 | 64 | def formatted_generator(self): 65 | super().formatted_generator() 66 | i = 0 67 | 68 | for page in self.results: 69 | yield page 70 | 71 | j = 0 72 | for option in self.results[page]: 73 | yield Option(i, option) 74 | self._by_index[i] = option 75 | self._page_by_index[i] = page 76 | i += 1 77 | j += 1 78 | 79 | if j >= self.max_items_per_page: 80 | break 81 | 82 | 83 | class GoToResults(Results): 84 | def __init__(self, results: List[DatabaseObject], **kwargs): 85 | self.results: List[DatabaseObject] = results 86 | 87 | super().__init__(**kwargs) 88 | 89 | def __getitem__(self, index: int): 90 | return self.results[index] 91 | 92 | def __len__(self) -> int: 93 | return len(self.results) 94 | 95 | def formatted_generator(self): 96 | yield from (Option(i, o) for i, o in enumerate(self.results)) 97 | 98 | 99 | 100 | class PageResults(Results): 101 | def __init__(self, page: Type[Page], results: List[DatabaseObject], **kwargs) -> None: 102 | super().__init__(**kwargs) 103 | 104 | self.page: Type[Page] = page 105 | self.results: List[DatabaseObject] = results 106 | 107 | 108 | def formatted_generator(self, max_items_per_page: int = 10): 109 | super().formatted_generator() 110 | i = 0 111 | 112 | yield self.page 113 | 114 | for option in self.results: 115 | yield Option(i, option) 116 | self._by_index[i] = option 117 | self._page_by_index[i] = self.page 118 | i += 1 119 | 120 | def __len__(self) -> int: 121 | return len(self.results) 122 | -------------------------------------------------------------------------------- /music_kraken/objects/__init__.py: -------------------------------------------------------------------------------- 1 | from typing_extensions import TypeVar 2 | from .option import Options 3 | 4 | from .metadata import Metadata, Mapping as ID3Mapping, ID3Timestamp 5 | 6 | from .source import Source, SourceType 7 | 8 | from .song import ( 9 | Song, 10 | Album, 11 | Artist, 12 | Target, 13 | Lyrics, 14 | Label 15 | ) 16 | 17 | from .formatted_text import FormattedText 18 | from .collection import Collection 19 | 20 | from .country import Country 21 | from .contact import Contact 22 | 23 | from .parents import OuterProxy 24 | 25 | from .artwork import Artwork 26 | 27 | DatabaseObject = OuterProxy 28 | -------------------------------------------------------------------------------- /music_kraken/objects/artwork.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import List, Optional, Dict, Tuple, Type, Union, TypedDict 4 | 5 | from .collection import Collection 6 | from .metadata import ( 7 | Mapping as id3Mapping, 8 | ID3Timestamp, 9 | Metadata 10 | ) 11 | from ..utils.string_processing import unify, hash_url 12 | 13 | from .parents import OuterProxy as Base 14 | 15 | from ..utils.config import main_settings 16 | 17 | 18 | class ArtworkVariant(TypedDict): 19 | url: str 20 | width: int 21 | height: int 22 | deviation: float 23 | 24 | 25 | class Artwork: 26 | def __init__(self, *variants: List[ArtworkVariant]) -> None: 27 | self._variant_mapping: Dict[str, ArtworkVariant] = {} 28 | 29 | for variant in variants: 30 | self.append(**variant) 31 | 32 | @staticmethod 33 | def _calculate_deviation(*dimensions: List[int]) -> float: 34 | return sum(abs(d - main_settings["preferred_artwork_resolution"]) for d in dimensions) / len(dimensions) 35 | 36 | def append(self, url: str, width: int = main_settings["preferred_artwork_resolution"], height: int = main_settings["preferred_artwork_resolution"], **kwargs) -> None: 37 | if url is None: 38 | return 39 | 40 | self._variant_mapping[hash_url(url=url)] = { 41 | "url": url, 42 | "width": width, 43 | "height": height, 44 | "deviation": self._calculate_deviation(width, height), 45 | } 46 | 47 | @property 48 | def best_variant(self) -> ArtworkVariant: 49 | if len(self._variant_mapping.keys()) <= 0: 50 | return None 51 | return min(self._variant_mapping.values(), key=lambda x: x["deviation"]) 52 | 53 | def get_variant_name(self, variant: ArtworkVariant) -> str: 54 | return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}" 55 | 56 | def __merge__(self, other: Artwork, **kwargs) -> None: 57 | for key, value in other._variant_mapping.items(): 58 | if key not in self._variant_mapping: 59 | self._variant_mapping[key] = value 60 | 61 | def __eq__(self, other: Artwork) -> bool: 62 | if not isinstance(other, Artwork): 63 | return False 64 | return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys())) 65 | -------------------------------------------------------------------------------- /music_kraken/objects/contact.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List, Tuple 2 | 3 | from ..utils.enums.contact import ContactMethod 4 | from .parents import OuterProxy 5 | 6 | 7 | class Contact(OuterProxy): 8 | COLLECTION_STRING_ATTRIBUTES = tuple() 9 | SIMPLE_STRING_ATTRIBUTES = { 10 | "contact_method": None, 11 | "value": None, 12 | } 13 | 14 | @property 15 | def indexing_values(self) -> List[Tuple[str, object]]: 16 | return [ 17 | ('id', self.id), 18 | ('value', self.value), 19 | ] 20 | 21 | def __init__(self, contact_method: ContactMethod, value: str, **kwargs) -> None: 22 | super().__init__(**kwargs) 23 | self.contact_method: ContactMethod = contact_method 24 | self.value: str = value 25 | 26 | @classmethod 27 | def match_url(cls, url: str) -> Optional["Contact"]: 28 | url = url.strip() 29 | 30 | if url.startswith("mailto:"): 31 | return cls(ContactMethod.EMAIL, url.replace("mailto:", "", 1)) 32 | 33 | if url.startswith("tel:"): 34 | return cls(ContactMethod.PHONE, url.replace("tel:", "", 1)) 35 | 36 | if url.startswith("fax:"): 37 | return cls(ContactMethod.FAX, url.replace("fax:", "", 1)) 38 | 39 | -------------------------------------------------------------------------------- /music_kraken/objects/formatted_text.py: -------------------------------------------------------------------------------- 1 | import mistune 2 | from markdownify import markdownify as md 3 | 4 | 5 | def plain_to_markdown(plain: str) -> str: 6 | return plain.replace("\n", " \n") 7 | 8 | 9 | class FormattedText: 10 | html = "" 11 | 12 | def __init__( 13 | self, 14 | markdown: str = None, 15 | html: str = None, 16 | plain: str = None, 17 | ) -> None: 18 | if html is not None: 19 | self.html = html 20 | elif markdown is not None: 21 | self.html = mistune.markdown(markdown) 22 | elif plain is not None: 23 | self.html = mistune.markdown(plain_to_markdown(plain)) 24 | 25 | @property 26 | def is_empty(self) -> bool: 27 | return self.html == "" 28 | 29 | def __eq__(self, other) -> False: 30 | if type(other) != type(self): 31 | return False 32 | if self.is_empty and other.is_empty: 33 | return True 34 | 35 | return self.html == other.html 36 | 37 | @property 38 | def markdown(self) -> str: 39 | return md(self.html).strip() 40 | 41 | @markdown.setter 42 | def markdown(self, value: str) -> None: 43 | self.html = mistune.markdown(value) 44 | 45 | @property 46 | def plain(self) -> str: 47 | md = self.markdown 48 | return md.replace("\n\n", "\n") 49 | 50 | @plain.setter 51 | def plain(self, value: str) -> None: 52 | self.html = mistune.markdown(plain_to_markdown(value)) 53 | 54 | def __str__(self) -> str: 55 | return self.markdown 56 | 57 | plaintext = plain 58 | 59 | -------------------------------------------------------------------------------- /music_kraken/objects/lint_default_factories.py: -------------------------------------------------------------------------------- 1 | from typing import List, TypeVar, Type 2 | 3 | from .country import Language 4 | from .lyrics import Lyrics 5 | from .parents import OuterProxy 6 | from .song import Song, Album, Artist, Label 7 | from .source import Source 8 | from .target import Target 9 | 10 | T = TypeVar('T', bound=OuterProxy) 11 | ALL_CLASSES: List[Type[T]] = [Song, Album, Artist, Label, Source, Target, Lyrics] 12 | 13 | 14 | def print_lint_res(missing_values: dict): 15 | print("_default_factories = {") 16 | for key, value in missing_values.items(): 17 | print(f'\t"{key}": {value},') 18 | print("}") 19 | 20 | # def __init__(self, foo: str, bar) -> None: ... 21 | 22 | def lint_type(cls: T): 23 | all_values: dict = {} 24 | missing_values: dict = {} 25 | 26 | for key, value in cls.__annotations__.items(): 27 | if value is None: 28 | continue 29 | 30 | if (not key.islower()) or key.startswith("_") or (key.startswith("__") and key.endswith("__")): 31 | continue 32 | 33 | if key in cls._default_factories: 34 | continue 35 | 36 | factory = "lambda: None" 37 | if isinstance(value, str): 38 | if value == "SourceCollection": 39 | factory = "SourceCollection" 40 | elif "collection" in value.lower(): 41 | factory = "Collection" 42 | elif value.istitle(): 43 | factory = value 44 | else: 45 | if value is Language: 46 | factory = 'Language.by_alpha_2("en")' 47 | else: 48 | try: 49 | value() 50 | factory = value.__name__ 51 | except TypeError: 52 | pass 53 | 54 | missing_values[key] = factory 55 | 56 | if len(missing_values) > 0: 57 | print(f"{cls.__name__}:") 58 | print_lint_res(missing_values) 59 | print() 60 | else: 61 | print(f"Everything is fine at {cls.__name__}") 62 | 63 | p = [] 64 | s = [] 65 | for key, value in cls.__annotations__.items(): 66 | has_default = key in cls._default_factories 67 | 68 | if not isinstance(value, str): 69 | value = value.__name__ 70 | 71 | if key.endswith("_collection"): 72 | key = key.replace("_collection", "_list") 73 | 74 | if isinstance(value, str): 75 | if value.startswith("Collection[") and value.endswith("]"): 76 | value = value.replace("Collection", "List") 77 | 78 | if isinstance(value, str) and has_default: 79 | value = value + " = None" 80 | 81 | p.append(f'{key}: {value}') 82 | s.append(f'{key}={key}') 83 | p.append("**kwargs") 84 | s.append("**kwargs") 85 | 86 | print("# This is automatically generated") 87 | print(f"def __init__(self, {', '.join(p)}) -> None:") 88 | print(f"\tsuper().__init__({', '.join(s)})") 89 | print() 90 | 91 | 92 | def lint(): 93 | for i in ALL_CLASSES: 94 | lint_type(i) 95 | 96 | print() 97 | -------------------------------------------------------------------------------- /music_kraken/objects/lyrics.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from collections import defaultdict 3 | import pycountry 4 | 5 | from .parents import OuterProxy 6 | from .source import Source, SourceCollection 7 | from .formatted_text import FormattedText 8 | from .country import Language 9 | from .metadata import ( 10 | Mapping as id3Mapping, 11 | ID3Timestamp, 12 | Metadata 13 | ) 14 | 15 | 16 | class Lyrics(OuterProxy): 17 | text: FormattedText 18 | language: Language 19 | 20 | source_collection: SourceCollection 21 | 22 | _default_factories = { 23 | "text": FormattedText, 24 | "language": lambda: Language.by_alpha_2("en"), 25 | 26 | "source_collection": SourceCollection, 27 | } 28 | 29 | # This is automatically generated 30 | def __init__(self, text: FormattedText = None, language: Language = None, source_list: SourceCollection = None, 31 | **kwargs) -> None: 32 | super().__init__(text=text, language=language, source_list=source_list, **kwargs) 33 | 34 | @property 35 | def metadata(self) -> Metadata: 36 | return Metadata({ 37 | id3Mapping.UNSYNCED_LYRICS: [self.text.plaintext] 38 | }) 39 | 40 | -------------------------------------------------------------------------------- /music_kraken/objects/option.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, List, Iterable 2 | 3 | if TYPE_CHECKING: 4 | from .parents import DatabaseObject 5 | 6 | 7 | class Options: 8 | def __init__(self, option_list: List['DatabaseObject'] = None): 9 | self._data: List['DatabaseObject'] = option_list or list() 10 | 11 | def __str__(self): 12 | return "\n".join(f"{i:02d}: {database_object.option_string}" for i, database_object in enumerate(self._data)) 13 | 14 | def __iter__(self): 15 | for database_object in self._data: 16 | yield database_object 17 | 18 | def append(self, element: 'DatabaseObject'): 19 | self._data.append(element) 20 | 21 | def extend(self, iterable: Iterable['DatabaseObject']): 22 | for element in iterable: 23 | self.append(element) 24 | 25 | def get_next_options(self, index: int) -> 'Options': 26 | if index >= len(self._data): 27 | raise ValueError("Index out of bounds") 28 | 29 | return self._data[index].options 30 | 31 | def __getitem__(self, item: int) -> 'DatabaseObject': 32 | if type(item) != int: 33 | raise TypeError("Key needs to be an Integer") 34 | if item >= len(self._data): 35 | raise ValueError("Index out of bounds") 36 | 37 | return self._data[item] 38 | 39 | def __len__(self) -> int: 40 | return len(self._data) 41 | -------------------------------------------------------------------------------- /music_kraken/objects/target.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import Path 4 | from typing import List, Tuple, TextIO, Union, Optional 5 | import logging 6 | import random 7 | import requests 8 | from tqdm import tqdm 9 | 10 | from .parents import OuterProxy 11 | from ..utils.shared import HIGHEST_ID 12 | from ..utils.config import main_settings, logging_settings 13 | from ..utils.string_processing import fit_to_file_system 14 | 15 | 16 | LOGGER = logging.getLogger("target") 17 | 18 | 19 | class Target(OuterProxy): 20 | """ 21 | create somehow like that 22 | ```python 23 | # I know path is pointless, and I will change that (don't worry about backwards compatibility there) 24 | Target(file="song.mp3", path="~/Music/genre/artist/album") 25 | ``` 26 | """ 27 | 28 | file_path: Path 29 | 30 | _default_factories = { 31 | } 32 | 33 | @classmethod 34 | def temp(cls, name: str = str(random.randint(0, HIGHEST_ID)), file_extension: Optional[str] = None) -> P: 35 | if file_extension is not None: 36 | name = f"{name}.{file_extension}" 37 | 38 | return cls(main_settings["temp_directory"] / name) 39 | 40 | # This is automatically generated 41 | def __init__(self, file_path: Union[Path, str], relative_to_music_dir: bool = False, **kwargs) -> None: 42 | if not isinstance(file_path, Path): 43 | file_path = Path(file_path) 44 | 45 | if relative_to_music_dir: 46 | file_path = Path(main_settings["music_directory"], file_path) 47 | 48 | super().__init__(file_path=fit_to_file_system(file_path), **kwargs) 49 | 50 | self.is_relative_to_music_dir: bool = relative_to_music_dir 51 | 52 | def __repr__(self) -> str: 53 | return str(self.file_path) 54 | 55 | @property 56 | def indexing_values(self) -> List[Tuple[str, object]]: 57 | return [('filepath', self.file_path)] 58 | 59 | @property 60 | def exists(self) -> bool: 61 | return self.file_path.is_file() 62 | 63 | @property 64 | def size(self) -> int: 65 | """ 66 | returns the size the downloaded audio takes up in bytes 67 | returns 0 if the file doesn't exist 68 | """ 69 | if not self.exists: 70 | return 0 71 | 72 | return self.file_path.stat().st_size 73 | 74 | def create_path(self): 75 | self.file_path.parent.mkdir(parents=True, exist_ok=True) 76 | 77 | def copy_content(self, copy_to: Target): 78 | if not self.exists: 79 | LOGGER.warning(f"No file exists at: {self.file_path}") 80 | return 81 | 82 | with open(self.file_path, "rb") as read_from: 83 | copy_to.create_path() 84 | with open(copy_to.file_path, "wb") as write_to: 85 | write_to.write(read_from.read()) 86 | 87 | def stream_into(self, r: requests.Response, desc: str = None) -> bool: 88 | if r is None: 89 | return False 90 | 91 | self.create_path() 92 | 93 | total_size = int(r.headers.get('content-length')) 94 | 95 | with open(self.file_path, 'wb') as f: 96 | try: 97 | """ 98 | https://en.wikipedia.org/wiki/Kilobyte 99 | > The internationally recommended unit symbol for the kilobyte is kB. 100 | """ 101 | with tqdm(total=total_size, unit='B', unit_scale=True, unit_divisor=1024, desc=desc) as t: 102 | 103 | for chunk in r.iter_content(chunk_size=main_settings["chunk_size"]): 104 | size = f.write(chunk) 105 | t.update(size) 106 | return True 107 | 108 | except requests.exceptions.Timeout: 109 | logging_settings["download_logger"].error("Stream timed out.") 110 | return False 111 | 112 | def open(self, file_mode: str, **kwargs) -> TextIO: 113 | return self.file_path.open(file_mode, **kwargs) 114 | 115 | def delete(self): 116 | self.file_path.unlink(missing_ok=True) 117 | 118 | def read_bytes(self) -> bytes: 119 | return self.file_path.read_bytes() 120 | -------------------------------------------------------------------------------- /music_kraken/pages/__init__.py: -------------------------------------------------------------------------------- 1 | from .encyclopaedia_metallum import EncyclopaediaMetallum 2 | from .musify import Musify 3 | from .musicbrainz import Musicbrainz 4 | from .youtube import YouTube 5 | from .youtube_music import YoutubeMusic 6 | from .bandcamp import Bandcamp 7 | from .genius import Genius 8 | 9 | from .abstract import Page, INDEPENDENT_DB_OBJECTS 10 | -------------------------------------------------------------------------------- /music_kraken/pages/abstract.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import random 3 | import re 4 | from copy import copy 5 | from pathlib import Path 6 | from typing import Optional, Union, Type, Dict, Set, List, Tuple, TypedDict 7 | from string import Formatter 8 | from dataclasses import dataclass, field 9 | 10 | import requests 11 | from bs4 import BeautifulSoup 12 | 13 | from ..connection import Connection 14 | from ..objects import ( 15 | Song, 16 | Source, 17 | Album, 18 | Artist, 19 | Target, 20 | DatabaseObject, 21 | Options, 22 | Collection, 23 | Label, 24 | ) 25 | from ..utils.enums import SourceType 26 | from ..utils.enums.album import AlbumType 27 | from ..audio import write_metadata_to_target, correct_codec 28 | from ..utils.config import main_settings 29 | from ..utils.support_classes.query import Query 30 | from ..utils.support_classes.download_result import DownloadResult 31 | from ..utils.string_processing import fit_to_file_system 32 | from ..utils import trace, output, BColors 33 | 34 | INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song] 35 | INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]] 36 | 37 | @dataclass 38 | class FetchOptions: 39 | download_all: bool = False 40 | album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"])) 41 | 42 | @dataclass 43 | class DownloadOptions: 44 | download_all: bool = False 45 | album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"])) 46 | 47 | process_audio_if_found: bool = False 48 | process_metadata_if_found: bool = True 49 | 50 | class Page: 51 | SOURCE_TYPE: SourceType 52 | LOGGER: logging.Logger 53 | 54 | def __new__(cls, *args, **kwargs): 55 | cls.LOGGER = logging.getLogger(cls.__name__) 56 | 57 | return super().__new__(cls) 58 | 59 | def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): 60 | self.SOURCE_TYPE.register_page(self) 61 | 62 | self.download_options: DownloadOptions = download_options or DownloadOptions() 63 | self.fetch_options: FetchOptions = fetch_options or FetchOptions() 64 | 65 | def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None): 66 | """ 67 | Perform a regex search on the given string, using a single or a list of 68 | patterns returning the first matching group. 69 | In case of failure return a default value or raise a WARNING or a 70 | RegexNotFoundError, depending on fatal, specifying the field name. 71 | """ 72 | 73 | if isinstance(pattern, str): 74 | mobj = re.search(pattern, string, flags) 75 | else: 76 | for p in pattern: 77 | mobj = re.search(p, string, flags) 78 | if mobj: 79 | break 80 | 81 | if mobj: 82 | if group is None: 83 | # return the first matching group 84 | return next(g for g in mobj.groups() if g is not None) 85 | elif isinstance(group, (list, tuple)): 86 | return tuple(mobj.group(g) for g in group) 87 | else: 88 | return mobj.group(group) 89 | 90 | return default 91 | 92 | def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: 93 | return None 94 | 95 | def get_soup_from_response(self, r: requests.Response) -> BeautifulSoup: 96 | return BeautifulSoup(r.content, "html.parser") 97 | 98 | # to search stuff 99 | def search(self, query: Query) -> List[DatabaseObject]: 100 | music_object = query.music_object 101 | 102 | search_functions = { 103 | Song: self.song_search, 104 | Album: self.album_search, 105 | Artist: self.artist_search, 106 | Label: self.label_search 107 | } 108 | 109 | if type(music_object) in search_functions: 110 | r = search_functions[type(music_object)](music_object) 111 | if r is not None and len(r) > 0: 112 | return r 113 | 114 | r = [] 115 | for default_query in query.default_search: 116 | for single_option in self.general_search(default_query): 117 | r.append(single_option) 118 | 119 | return r 120 | 121 | def general_search(self, search_query: str) -> List[DatabaseObject]: 122 | return [] 123 | 124 | def label_search(self, label: Label) -> List[Label]: 125 | return [] 126 | 127 | def artist_search(self, artist: Artist) -> List[Artist]: 128 | return [] 129 | 130 | def album_search(self, album: Album) -> List[Album]: 131 | return [] 132 | 133 | def song_search(self, song: Song) -> List[Song]: 134 | return [] 135 | 136 | # to fetch stuff 137 | def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: 138 | return Song() 139 | 140 | def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: 141 | return Album() 142 | 143 | def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: 144 | return Artist() 145 | 146 | def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: 147 | return Label() 148 | 149 | # to download stuff 150 | def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]: 151 | return [] 152 | 153 | def post_process_hook(self, song: Song, temp_target: Target, **kwargs): 154 | pass 155 | 156 | def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: 157 | return DownloadResult() 158 | -------------------------------------------------------------------------------- /music_kraken/pages/musicbrainz.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from dataclasses import dataclass 3 | from enum import Enum 4 | from typing import List, Optional, Type, Union, Generator, Dict, Any 5 | from urllib.parse import urlparse 6 | 7 | import pycountry 8 | import musicbrainzngs 9 | from bs4 import BeautifulSoup 10 | 11 | from ..connection import Connection 12 | from .abstract import Page 13 | from ..utils.enums import SourceType, ALL_SOURCE_TYPES 14 | from ..utils.enums.album import AlbumType, AlbumStatus 15 | from ..objects import ( 16 | Artist, 17 | Source, 18 | Song, 19 | Album, 20 | ID3Timestamp, 21 | FormattedText, 22 | Label, 23 | Target, 24 | DatabaseObject, 25 | Lyrics, 26 | Artwork 27 | ) 28 | from ..utils.config import logging_settings, main_settings 29 | from ..utils import string_processing, shared 30 | from ..utils.string_processing import clean_song_title 31 | from ..utils.support_classes.query import Query 32 | from ..utils.support_classes.download_result import DownloadResult 33 | 34 | 35 | 36 | class Musicbrainz(Page): 37 | SOURCE_TYPE = ALL_SOURCE_TYPES.MUSICBRAINZ 38 | 39 | HOST = "https://musicbrainz.org" 40 | 41 | def __init__(self, *args, **kwargs): 42 | musicbrainzngs.set_useragent("mk", "1") 43 | 44 | super().__init__(*args, **kwargs) 45 | 46 | def general_search(self, search_query: str) -> List[DatabaseObject]: 47 | search_results = [] 48 | 49 | #Artist 50 | search_results += self.artist_search(search_query).copy() 51 | 52 | #Album 53 | search_results += self.album_search(search_query).copy() 54 | 55 | #Song 56 | search_results += self.song_search(search_query).copy() 57 | 58 | return search_results 59 | 60 | def artist_search(self, search_query: str) -> List[Artist]: 61 | artist_list = [] 62 | 63 | #Artist 64 | artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list'] 65 | artist_source_list: List[Source] = [] 66 | for artist_dict in artist_dict_list: 67 | artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id'])) 68 | artist_list.append(Artist( 69 | name=artist_dict['name'], 70 | source_list=artist_source_list 71 | )) 72 | 73 | return artist_list 74 | 75 | def song_search(self, search_query: str) -> List[Song]: 76 | song_list = [] 77 | 78 | #Song 79 | song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list'] 80 | song_source_list: List[Source] = [] 81 | for song_dict in song_dict_list: 82 | song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id'])) 83 | song_list.append(Song( 84 | title=song_dict['title'], 85 | source_list=song_source_list 86 | )) 87 | 88 | return song_list 89 | 90 | def album_search(self, search_query: str) -> List[Album]: 91 | album_list = [] 92 | 93 | #Album 94 | album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list'] 95 | album_source_list: List[Source] = [] 96 | for album_dict in album_dict_list: 97 | album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id'])) 98 | album_list.append(Album( 99 | title=album_dict['title'], 100 | source_list=album_source_list 101 | )) 102 | 103 | return album_list 104 | 105 | 106 | def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: 107 | album_list = [] 108 | 109 | #Album 110 | album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list'] 111 | album_source_list: List[Source] = [] 112 | for album_dict in album_dict_list: 113 | album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id'])) 114 | album_list.append(Album( 115 | title=album_dict['title'], 116 | source_list=album_source_list 117 | )) 118 | 119 | def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: 120 | artist_list = [] 121 | 122 | #Artist 123 | artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list'] 124 | artist_source_list: List[Source] = [] 125 | for artist_dict in artist_dict_list: 126 | artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id'])) 127 | artist_list.append(Artist( 128 | name=artist_dict['name'], 129 | source_list=artist_source_list, 130 | )) 131 | 132 | def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: 133 | song_list = [] 134 | 135 | #Song 136 | song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list'] 137 | song_source_list: List[Source] = [] 138 | for song_dict in song_dict_list: 139 | song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id'])) 140 | song_list.append(Song( 141 | title=song_dict['title'], 142 | source_list=song_source_list 143 | )) 144 | 145 | -------------------------------------------------------------------------------- /music_kraken/pages/youtube_music/__init__.py: -------------------------------------------------------------------------------- 1 | from .youtube_music import YoutubeMusic -------------------------------------------------------------------------------- /music_kraken/pages/youtube_music/_list_render.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Dict, Type 2 | from enum import Enum 3 | 4 | from ...utils.config import logging_settings 5 | from ...objects import Source, DatabaseObject 6 | from ..abstract import Page 7 | from ...objects import ( 8 | Artist, 9 | Source, 10 | Song, 11 | Album, 12 | Label, 13 | Target 14 | ) 15 | from ._music_object_render import parse_run_list, parse_run_element 16 | 17 | LOGGER = logging_settings["youtube_music_logger"] 18 | 19 | 20 | def music_card_shelf_renderer(renderer: dict) -> List[DatabaseObject]: 21 | results = parse_run_list(renderer.get("title", {}).get("runs", [])) 22 | 23 | for sub_renderer in renderer.get("contents", []): 24 | results.extend(parse_renderer(sub_renderer)) 25 | return results 26 | 27 | def music_responsive_list_item_flex_column_renderer(renderer: dict) -> List[DatabaseObject]: 28 | return parse_run_list(renderer.get("text", {}).get("runs", [])) 29 | 30 | 31 | def music_responsive_list_item_renderer(renderer: dict) -> List[DatabaseObject]: 32 | results = [] 33 | 34 | for i, column in enumerate(renderer.get("flexColumns", [])): 35 | _r = parse_renderer(column) 36 | if i == 0 and len(_r) == 0: 37 | renderer["text"] = \ 38 | column.get("musicResponsiveListItemFlexColumnRenderer", {}).get("text", {}).get("runs", [{}])[0].get( 39 | "text") 40 | 41 | results.extend(_r) 42 | 43 | _r = parse_run_element(renderer) 44 | if _r is not None: 45 | results.append(_r) 46 | 47 | song_list: List[Song] = [] 48 | album_list: List[Album] = [] 49 | artist_list: List[Artist] = [] 50 | _map: Dict[Type[DatabaseObject], List[DatabaseObject]] = {Song: song_list, Album: album_list, Artist: artist_list} 51 | 52 | for result in results: 53 | _map[type(result)].append(result) 54 | 55 | if len(song_list) == 1: 56 | song = song_list[0] 57 | song.feature_artist_collection.extend(artist_list) 58 | song.album_collection.extend(album_list) 59 | return [song] 60 | 61 | if len(album_list) == 1: 62 | album = album_list[0] 63 | album.artist_collection.extend(artist_list) 64 | album.song_collection.extend(song_list) 65 | return [album] 66 | 67 | """ 68 | if len(artist_list) == 1: 69 | artist = artist_list[0] 70 | artist.main_album_collection.extend(album_list) 71 | return [artist] 72 | """ 73 | 74 | return results 75 | 76 | 77 | def music_shelf_renderer(renderer: dict) -> List[DatabaseObject]: 78 | result = [] 79 | for subrenderer in renderer.get("contents"): 80 | result.extend(parse_renderer(subrenderer)) 81 | 82 | return result 83 | 84 | 85 | def music_carousel_shelf_renderer(renderer: dict): 86 | return music_shelf_renderer(renderer=renderer) 87 | 88 | 89 | def music_two_row_item_renderer(renderer: dict): 90 | return parse_run_list(renderer.get("title", {}).get("runs", [])) 91 | 92 | 93 | RENDERER_PARSERS = { 94 | "musicCardShelfRenderer": music_card_shelf_renderer, 95 | "musicResponsiveListItemRenderer": music_responsive_list_item_renderer, 96 | "musicResponsiveListItemFlexColumnRenderer": music_responsive_list_item_flex_column_renderer, 97 | "musicShelfRenderer": music_card_shelf_renderer, 98 | "musicCarouselShelfRenderer": music_carousel_shelf_renderer, 99 | "musicTwoRowItemRenderer": music_two_row_item_renderer, 100 | 101 | "itemSectionRenderer": lambda _: [], 102 | } 103 | 104 | 105 | def parse_renderer(renderer: dict) -> List[DatabaseObject]: 106 | result: List[DatabaseObject] = [] 107 | 108 | for renderer_name, renderer in renderer.items(): 109 | if renderer_name not in RENDERER_PARSERS: 110 | LOGGER.warning(f"Can't parse the renderer {renderer_name}.") 111 | continue 112 | 113 | result.extend(RENDERER_PARSERS[renderer_name](renderer)) 114 | 115 | return result 116 | -------------------------------------------------------------------------------- /music_kraken/pages/youtube_music/_music_object_render.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | from enum import Enum 3 | 4 | from ...utils.config import youtube_settings, logging_settings 5 | from ...utils.string_processing import clean_song_title 6 | from ...utils.enums import SourceType, ALL_SOURCE_TYPES 7 | 8 | from ...objects import Source, DatabaseObject 9 | from ..abstract import Page 10 | from ...objects import ( 11 | Artist, 12 | Source, 13 | Song, 14 | Album, 15 | Label, 16 | Target 17 | ) 18 | 19 | LOGGER = logging_settings["youtube_music_logger"] 20 | 21 | 22 | SOURCE_PAGE = ALL_SOURCE_TYPES.YOUTUBE 23 | 24 | 25 | class PageType(Enum): 26 | ARTIST = "MUSIC_PAGE_TYPE_ARTIST" 27 | ALBUM = "MUSIC_PAGE_TYPE_ALBUM" 28 | CHANNEL = "MUSIC_PAGE_TYPE_USER_CHANNEL" 29 | PLAYLIST = "MUSIC_PAGE_TYPE_PLAYLIST" 30 | SONG = "MUSIC_VIDEO_TYPE_ATV" 31 | VIDEO = "MUSIC_VIDEO_TYPE_UGC" 32 | OFFICIAL_MUSIC_VIDEO = "MUSIC_VIDEO_TYPE_OMV" 33 | # returns this type if you search for the band Queen 34 | # S = "MUSIC_VIDEO_TYPE_OFFICIAL_SOURCE_MUSIC" 35 | 36 | 37 | def parse_run_element(run_element: dict) -> Optional[DatabaseObject]: 38 | if "navigationEndpoint" not in run_element: 39 | return 40 | 41 | _temp_nav = run_element.get("navigationEndpoint", {}) 42 | is_video = "watchEndpoint" in _temp_nav 43 | 44 | navigation_endpoint = _temp_nav.get("watchEndpoint", _temp_nav.get("browseEndpoint", {})) 45 | 46 | element_type = PageType.SONG 47 | page_type_string = navigation_endpoint.get("watchEndpointMusicSupportedConfigs", {}).get("watchEndpointMusicConfig", {}).get("musicVideoType", "") 48 | if not is_video: 49 | page_type_string = navigation_endpoint.get("browseEndpointContextSupportedConfigs", {}).get("browseEndpointContextMusicConfig", {}).get("pageType", "") 50 | try: 51 | element_type = PageType(page_type_string) 52 | except ValueError: 53 | return 54 | 55 | element_id = navigation_endpoint.get("videoId", navigation_endpoint.get("browseId")) 56 | element_text = run_element.get("text") 57 | 58 | if element_id is None or element_text is None: 59 | LOGGER.warning("Couldn't find either the id or text of a Youtube music element.") 60 | return 61 | 62 | if element_type == PageType.SONG or (element_type == PageType.VIDEO and not youtube_settings["youtube_music_clean_data"]) or (element_type == PageType.OFFICIAL_MUSIC_VIDEO and not youtube_settings["youtube_music_clean_data"]): 63 | source = Source(SOURCE_PAGE, f"https://music.youtube.com/watch?v={element_id}") 64 | 65 | return Song( 66 | title=clean_song_title(element_text), 67 | source_list=[source] 68 | ) 69 | 70 | if element_type == PageType.ARTIST or (element_type == PageType.CHANNEL and not youtube_settings["youtube_music_clean_data"]): 71 | source = Source(SOURCE_PAGE, f"https://music.youtube.com/channel/{element_id}") 72 | return Artist(name=element_text, source_list=[source]) 73 | 74 | if element_type == PageType.ALBUM or (element_type == PageType.PLAYLIST and not youtube_settings["youtube_music_clean_data"]): 75 | source = Source(SOURCE_PAGE, f"https://music.youtube.com/playlist?list={element_id}") 76 | return Album(title=element_text, source_list=[source]) 77 | 78 | LOGGER.debug(f"Type {page_type_string} wasn't implemented.") 79 | 80 | 81 | def parse_run_list(run_list: List[dict]) -> List[DatabaseObject]: 82 | music_object_list: List[DatabaseObject] = [] 83 | 84 | for run_renderer in run_list: 85 | music_object = parse_run_element(run_renderer) 86 | if music_object is None: 87 | continue 88 | 89 | music_object_list.append(music_object) 90 | 91 | return music_object_list 92 | -------------------------------------------------------------------------------- /music_kraken/pages/youtube_music/super_youtube.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Type, Tuple 2 | from urllib.parse import urlparse, urlunparse, parse_qs 3 | from enum import Enum 4 | import requests 5 | 6 | import python_sponsorblock 7 | 8 | from ...objects import Source, DatabaseObject, Song, Target 9 | from ..abstract import Page 10 | from ...objects import ( 11 | Artist, 12 | Source, 13 | Song, 14 | Album, 15 | Label, 16 | Target, 17 | FormattedText, 18 | ID3Timestamp 19 | ) 20 | from ...connection import Connection 21 | from ...utils.support_classes.download_result import DownloadResult 22 | from ...utils.config import youtube_settings, logging_settings, main_settings 23 | from ...utils.enums import SourceType, ALL_SOURCE_TYPES 24 | 25 | 26 | def get_invidious_url(path: str = "", params: str = "", query: str = "", fragment: str = "") -> str: 27 | return urlunparse((youtube_settings["invidious_instance"].scheme, youtube_settings["invidious_instance"].netloc, path, params, query, fragment)) 28 | 29 | 30 | class YouTubeUrlType(Enum): 31 | CHANNEL = "channel" 32 | PLAYLIST = "playlist" 33 | VIDEO = "watch" 34 | NONE = "" 35 | 36 | 37 | class YouTubeUrl: 38 | """ 39 | Artist 40 | https://yt.artemislena.eu/channel/UCV0Ntl3lVR7xDXKoCU6uUXA 41 | https://www.youtube.com/channel/UCV0Ntl3lVR7xDXKoCU6uUXA 42 | 43 | Release 44 | https://yt.artemislena.eu/playlist?list=OLAK5uy_nEg5joAyFjHBPwnS_ADHYtgSqAjFMQKLw 45 | https://www.youtube.com/playlist?list=OLAK5uy_nEg5joAyFjHBPwnS_ADHYtgSqAjFMQKLw 46 | 47 | Track 48 | https://yt.artemislena.eu/watch?v=SULFl39UjgY&list=OLAK5uy_nEg5joAyFjHBPwnS_ADHYtgSqAjFMQKLw&index=1 49 | https://www.youtube.com/watch?v=SULFl39UjgY 50 | """ 51 | 52 | def __init__(self, url: str) -> None: 53 | self.SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE 54 | 55 | """ 56 | Raises Index exception for wrong url, and value error for not found enum type 57 | """ 58 | self.id = "" 59 | parsed = urlparse(url=url) 60 | 61 | self.url_type: YouTubeUrlType 62 | 63 | type_frag_list = parsed.path.split("/") 64 | if len(type_frag_list) < 2: 65 | self.url_type = YouTubeUrlType.NONE 66 | else: 67 | try: 68 | self.url_type = YouTubeUrlType(type_frag_list[1].strip()) 69 | except ValueError: 70 | self.url_type = YouTubeUrlType.NONE 71 | 72 | if self.url_type == YouTubeUrlType.CHANNEL: 73 | if len(type_frag_list) < 3: 74 | self.couldnt_find_id(url) 75 | else: 76 | self.id = type_frag_list[2] 77 | 78 | elif self.url_type == YouTubeUrlType.PLAYLIST: 79 | query_stuff = parse_qs(parsed.query) 80 | if "list" not in query_stuff: 81 | self.couldnt_find_id(url) 82 | else: 83 | self.id = query_stuff["list"][0] 84 | 85 | elif self.url_type == YouTubeUrlType.VIDEO: 86 | query_stuff = parse_qs(parsed.query) 87 | if "v" not in query_stuff: 88 | self.couldnt_find_id(url) 89 | else: 90 | self.id = query_stuff["v"][0] 91 | 92 | 93 | def couldnt_find_id(self, url: str): 94 | logging_settings["youtube_logger"].warning(f"The id is missing: {url}") 95 | self.url_type = YouTubeUrlType.NONE 96 | 97 | @property 98 | def api(self) -> str: 99 | if self.url_type == YouTubeUrlType.CHANNEL: 100 | return get_invidious_url(path=f"/api/v1/channels/playlists/{self.id}") 101 | 102 | if self.url_type == YouTubeUrlType.PLAYLIST: 103 | return get_invidious_url(path=f"/api/v1/playlists/{id}") 104 | 105 | if self.url_type == YouTubeUrlType.VIDEO: 106 | return get_invidious_url(path=f"/api/v1/videos/{self.id}") 107 | 108 | return get_invidious_url() 109 | 110 | @property 111 | def normal(self) -> str: 112 | if self.url_type.CHANNEL: 113 | return get_invidious_url(path=f"/channel/{self.id}") 114 | 115 | if self.url_type.PLAYLIST: 116 | return get_invidious_url(path="/playlist", query=f"list={self.id}") 117 | 118 | if self.url_type.VIDEO: 119 | return get_invidious_url(path="/watch", query=f"v={self.id}") 120 | 121 | 122 | class SuperYouTube(Page): 123 | # CHANGE 124 | SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE 125 | 126 | NO_ADDITIONAL_DATA_FROM_SONG = False 127 | 128 | def __init__(self, *args, **kwargs): 129 | self.download_connection: Connection = Connection( 130 | host="https://www.youtube.com/", 131 | logger=self.LOGGER, 132 | sleep_after_404=youtube_settings["sleep_after_youtube_403"] 133 | ) 134 | 135 | self.connection: Connection = Connection( 136 | host=get_invidious_url(), 137 | logger=self.LOGGER 138 | ) 139 | 140 | # the stuff with the connection is, to ensure sponsorblock uses the proxies, my programm does 141 | _sponsorblock_connection: Connection = Connection() 142 | self.sponsorblock = python_sponsorblock.SponsorBlock(silent=True, session=_sponsorblock_connection.session) 143 | 144 | super().__init__(*args, **kwargs) 145 | 146 | def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: 147 | _url_type = { 148 | YouTubeUrlType.CHANNEL: Artist, 149 | YouTubeUrlType.PLAYLIST: Album, 150 | YouTubeUrlType.VIDEO: Song, 151 | } 152 | 153 | parsed = YouTubeUrl(source.url) 154 | if parsed.url_type in _url_type: 155 | return _url_type[parsed.url_type] 156 | 157 | def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: 158 | """ 159 | 1. getting the optimal source 160 | Only audio sources allowed 161 | not a bitrate that is smaller than the selected bitrate, but not one that is wayyy huger 162 | 163 | 2. download it 164 | 165 | :param source: 166 | :param target: 167 | :param desc: 168 | :return: 169 | """ 170 | r: requests.Response = self.connection.get(YouTubeUrl(source.url).api) 171 | if r is None: 172 | return DownloadResult(error_message="Api didn't even respond, maybe try another invidious Instance") 173 | 174 | 175 | audio_format = None 176 | best_bitrate = 0 177 | 178 | for possible_format in r.json()["adaptiveFormats"]: 179 | format_type: str = possible_format["type"] 180 | if not format_type.startswith("audio"): 181 | continue 182 | 183 | bitrate = int(possible_format.get("bitrate", 0)) 184 | 185 | if bitrate >= main_settings["bitrate"]: 186 | best_bitrate = bitrate 187 | audio_format = possible_format 188 | break 189 | 190 | if bitrate > best_bitrate: 191 | best_bitrate = bitrate 192 | audio_format = possible_format 193 | 194 | if audio_format is None: 195 | return DownloadResult(error_message="Couldn't find the download link.") 196 | 197 | endpoint = audio_format["url"] 198 | 199 | return self.download_connection.stream_into(endpoint, target, name=desc, raw_url=True) 200 | 201 | def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]: 202 | if not youtube_settings["use_sponsor_block"]: 203 | return [] 204 | 205 | parsed = YouTubeUrl(source.url) 206 | if parsed.url_type != YouTubeUrlType.VIDEO: 207 | self.LOGGER.warning(f"{source.url} is no video url.") 208 | return [] 209 | 210 | segments = [] 211 | try: 212 | segments = self.sponsorblock.get_segments(parsed.id) 213 | except NotFoundException: 214 | self.LOGGER.debug(f"No sponsor found for the video {parsed.id}.") 215 | except HTTPException as e: 216 | self.LOGGER.warning(f"{e}") 217 | 218 | return [(segment.segment[0], segment.segment[1]) for segment in segments] 219 | -------------------------------------------------------------------------------- /music_kraken/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from pathlib import Path 3 | import json 4 | import logging 5 | import inspect 6 | from typing import List, Union 7 | 8 | from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE, DEBUG_OBJECT_TRACE_CALLSTACK 9 | from .config import config, read_config, write_config 10 | from .enums.colors import BColors 11 | from .path_manager import LOCATIONS 12 | from .hacking import merge_args 13 | 14 | """ 15 | IO functions 16 | """ 17 | 18 | def _apply_color(msg: str, color: BColors) -> str: 19 | if not isinstance(msg, str): 20 | msg = str(msg) 21 | 22 | endc = BColors.ENDC.value 23 | 24 | if color is BColors.ENDC: 25 | return msg 26 | 27 | msg = msg.replace(BColors.ENDC.value, BColors.ENDC.value + color.value) 28 | 29 | return color.value + msg + BColors.ENDC.value 30 | 31 | 32 | @merge_args(print) 33 | def output(*msg: List[str], color: BColors = BColors.ENDC, **kwargs): 34 | print(*(_apply_color(s, color) for s in msg), **kwargs) 35 | 36 | 37 | def user_input(msg: str, color: BColors = BColors.ENDC): 38 | return input(_apply_color(msg, color)).strip() 39 | 40 | 41 | def dump_to_file(file_name: str, payload: str, is_json: bool = False, exit_after_dump: bool = False): 42 | if not DEBUG_DUMP: 43 | return 44 | 45 | path = Path(LOCATIONS.TEMP_DIRECTORY, file_name) 46 | logging.warning(f"dumping {file_name} to: \"{path}\"") 47 | 48 | if is_json and isinstance(payload, str): 49 | payload = json.loads(payload) 50 | 51 | if isinstance(payload, dict): 52 | payload = json.dumps(payload, indent=4) 53 | 54 | with path.open("w") as f: 55 | f.write(payload) 56 | 57 | if exit_after_dump: 58 | exit() 59 | 60 | 61 | def trace(msg: str): 62 | if not DEBUG_TRACE: 63 | return 64 | 65 | output(BColors.OKBLUE.value + "trace: " + BColors.ENDC.value + msg) 66 | 67 | def request_trace(msg: str): 68 | if not DEBUG_TRACE: 69 | return 70 | 71 | output(BColors.OKGREEN.value + "request: " + BColors.ENDC.value + msg) 72 | 73 | def object_trace(obj): 74 | if not DEBUG_OBJECT_TRACE: 75 | return 76 | 77 | appendix = f" called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]" if DEBUG_OBJECT_TRACE_CALLSTACK else "" 78 | output("object: " + str(obj) + appendix) 79 | 80 | 81 | """ 82 | misc functions 83 | """ 84 | 85 | def traverse_json_path(data, path: Union[str, List[str]], default=None): 86 | """ 87 | Path parts are concatenated with . or wrapped with [""] for object keys and wrapped in [] for array indices. 88 | """ 89 | 90 | if isinstance(path, str): 91 | path = path.replace('["', '.').replace('"]', '.').replace("[", ".").replace("]", ".") 92 | path = [p for p in path.split(".") if len(p) > 0] 93 | 94 | if len(path) <= 0: 95 | return data 96 | 97 | current = path[0] 98 | path = path[1:] 99 | 100 | new_data = None 101 | 102 | if isinstance(data, dict): 103 | new_data = data.get(current) 104 | 105 | elif isinstance(data, list): 106 | try: 107 | new_data = data[int(current)] 108 | except (IndexError, ValueError): 109 | pass 110 | 111 | if new_data is None: 112 | return default 113 | 114 | return traverse_json_path(data=new_data, path=path, default=default) 115 | 116 | _auto_increment = 0 117 | def generate_id() -> int: 118 | global _auto_increment 119 | _auto_increment += 1 120 | return _auto_increment 121 | 122 | def get_current_millis() -> int: 123 | dt = datetime.now() 124 | return int(dt.microsecond / 1_000) 125 | 126 | 127 | def get_unix_time() -> int: 128 | return int(datetime.now().timestamp()) -------------------------------------------------------------------------------- /music_kraken/utils/config/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | from .config import Config 4 | from .config_files import ( 5 | main_config, 6 | logging_config, 7 | youtube_config, 8 | ) 9 | 10 | _sections: Tuple[Config, ...] = ( 11 | main_config.config, 12 | logging_config.config, 13 | youtube_config.config 14 | ) 15 | 16 | def read_config(): 17 | for section in _sections: 18 | section.read() 19 | 20 | # special cases 21 | if main_settings['tor']: 22 | main_settings['proxies'] = { 23 | 'http': f'socks5h://127.0.0.1:{main_settings["tor_port"]}', 24 | 'https': f'socks5h://127.0.0.1:{main_settings["tor_port"]}' 25 | } 26 | 27 | def write_config(): 28 | for section in _sections: 29 | section.write() 30 | 31 | main_settings: main_config.SettingsStructure = main_config.config.loaded_settings 32 | logging_settings: logging_config.SettingsStructure = logging_config.config.loaded_settings 33 | youtube_settings: youtube_config.SettingsStructure = youtube_config.config.loaded_settings 34 | -------------------------------------------------------------------------------- /music_kraken/utils/config/attributes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kur01234/music-downloader/665bf0e47517071450b6f25dffbb81fe21944ead/music_kraken/utils/config/attributes/__init__.py -------------------------------------------------------------------------------- /music_kraken/utils/config/attributes/attribute.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Optional, List, Union, Iterable, Callable 3 | from dataclasses import dataclass 4 | import logging 5 | import toml 6 | from copy import deepcopy, copy 7 | from urllib.parse import urlparse, urlunparse, ParseResult 8 | 9 | from ...exception.config import SettingValueError 10 | from ..utils import comment 11 | 12 | 13 | LOGGER = logging.getLogger("config") 14 | 15 | COMMENT_PREFIX = "#" 16 | 17 | 18 | def comment_string(uncommented: str) -> str: 19 | unprocessed_lines = uncommented.split("\n") 20 | 21 | processed_lines: List[str] = [] 22 | 23 | for line in unprocessed_lines: 24 | if line.startswith(COMMENT_PREFIX) or line == "": 25 | processed_lines.append(line) 26 | continue 27 | 28 | line = COMMENT_PREFIX + " " + line 29 | processed_lines.append(line) 30 | 31 | return "\n".join(processed_lines) 32 | 33 | 34 | @dataclass 35 | class Description: 36 | description: str 37 | 38 | @property 39 | def toml_string(self): 40 | return comment_string(self.description) 41 | 42 | 43 | class EmptyLine(Description): 44 | def __init__(self): 45 | self.description = "" 46 | 47 | 48 | 49 | class Attribute: 50 | def __init__( 51 | self, 52 | name: str, 53 | default_value: any, 54 | description: Optional[str] = None, 55 | ): 56 | 57 | self.name = name 58 | 59 | self.value = self._recursive_parse_object(default_value, self.parse_simple_value) 60 | 61 | self.description: Optional[str] = description 62 | self.loaded_settings: dict = None 63 | 64 | def initialize_from_config(self, loaded_settings: dict): 65 | self.loaded_settings = loaded_settings 66 | self.loaded_settings.__setitem__(self.name, self.value, True) 67 | 68 | def unparse_simple_value(self, value: any) -> any: 69 | return value 70 | 71 | def parse_simple_value(self, value: any) -> any: 72 | return value 73 | 74 | def _recursive_parse_object(self, __object, callback: Callable): 75 | __object = copy(__object) 76 | 77 | if isinstance(__object, dict): 78 | for key, value in __object.items(): 79 | __object[key] = self._recursive_parse_object(value, callback) 80 | 81 | return __object 82 | 83 | if isinstance(__object, list) or (isinstance(__object, tuple) and not isinstance(__object, ParseResult)): 84 | for i, item in enumerate(__object): 85 | __object[i] = self._recursive_parse_object(item, callback) 86 | return __object 87 | 88 | return callback(__object) 89 | 90 | def parse(self, unparsed_value): 91 | self.value = self._recursive_parse_object(unparsed_value, self.parse_simple_value) 92 | return self.value 93 | 94 | def unparse(self, parsed_value): 95 | return self._recursive_parse_object(parsed_value, self.unparse_simple_value) 96 | 97 | def load_toml(self, loaded_toml: dict) -> bool: 98 | """ 99 | returns true if succesfull 100 | """ 101 | 102 | if self.name not in loaded_toml: 103 | LOGGER.warning(f"No setting by the name {self.name} found in the settings file.") 104 | self.loaded_settings.__setitem__(self.name, self.value, True) 105 | return 106 | 107 | try: 108 | self.parse(loaded_toml[self.name]) 109 | except SettingValueError as settings_error: 110 | logging.warning(settings_error) 111 | return False 112 | 113 | self.loaded_settings.__setitem__(self.name, self.value, True) 114 | 115 | return True 116 | 117 | 118 | @property 119 | def toml_string(self) -> str: 120 | string = "" 121 | 122 | if self.description is not None: 123 | string += comment(self.description) + "\n" 124 | 125 | string += toml.dumps({self.name: self.unparse(self.value)}) 126 | 127 | # print(string) 128 | return string 129 | 130 | def __str__(self): 131 | return f"{self.description}\n{self.name}={self.value}" 132 | 133 | -------------------------------------------------------------------------------- /music_kraken/utils/config/attributes/special_attributes.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path, PosixPath 2 | from typing import Optional, Dict, Set 3 | from urllib.parse import urlparse, urlunparse 4 | import logging 5 | 6 | from .attribute import Attribute 7 | from ...exception.config import SettingValueError 8 | 9 | 10 | class UrlAttribute(Attribute): 11 | def parse_simple_value(self, value: any) -> any: 12 | return urlparse(value) 13 | 14 | def unparse_simple_value(self, value: any) -> any: 15 | return urlunparse((value.scheme, value.netloc, value.path, value.params, value.query, value.fragment)) 16 | 17 | 18 | class PathAttribute(Attribute): 19 | def parse_simple_value(self, value: any) -> Path: 20 | if isinstance(value, Path) or isinstance(value, PosixPath): 21 | return value 22 | return Path(value) 23 | 24 | def unparse_simple_value(self, value: Path) -> any: 25 | return str(value.resolve()) 26 | 27 | 28 | 29 | class SelectAttribute(Attribute): 30 | def __init__(self, name: str, default_value: any, options: tuple, description: Optional[str] = None, ignore_options_for_description = False): 31 | self.options: tuple = options 32 | 33 | new_description = "" 34 | if description is not None: 35 | new_description += description 36 | new_description += "\n" 37 | 38 | if not ignore_options_for_description: 39 | new_description += f"{{{', '.join(self.options)}}}" 40 | 41 | super().__init__(name, default_value, description) 42 | 43 | def parse_simple_value(self, value: any) -> any: 44 | if value in self.options: 45 | return value 46 | 47 | raise SettingValueError( 48 | setting_name=self.name, 49 | setting_value=value, 50 | rule=f"has to be in the options: {{{', '.join(self.options)}}}." 51 | ) 52 | 53 | def unparse_simple_value(self, value: any) -> any: 54 | return value 55 | 56 | 57 | class IntegerSelect(Attribute): 58 | def __init__(self, name: str, default_value: any, options: Dict[int, str], description: Optional[str] = None, ignore_options_for_description = False): 59 | self.options: Dict[str, int] = options 60 | self.option_values: Set[int] = set(self.options.values()) 61 | 62 | new_description = "" 63 | if description is not None: 64 | new_description += description 65 | 66 | description_lines = [] 67 | 68 | if description is not None: 69 | description_lines.append(description) 70 | 71 | description_lines.append("The values can be either an integer or one of the following values:") 72 | 73 | for number, option in self.options.items(): 74 | description_lines.append(f"{number}: {option}") 75 | 76 | super().__init__(name, default_value, "\n".join(description_lines)) 77 | 78 | def parse_simple_value(self, value: any) -> any: 79 | if isinstance(value, str): 80 | if value not in self.options: 81 | raise SettingValueError( 82 | setting_name=self.name, 83 | setting_value=value, 84 | rule=f"has to be in the options: {{{', '.join(self.options.keys())}}}, if it is a string." 85 | ) 86 | 87 | return self.options[value] 88 | 89 | return value 90 | 91 | def unparse_simple_value(self, value: int) -> any: 92 | if value in self.option_values: 93 | for option, v in self.options.items(): 94 | if v == value: 95 | return value 96 | return value 97 | 98 | 99 | ID3_2_FILE_FORMATS = frozenset(( 100 | "mp3", "mp2", "mp1", # MPEG-1 ID3.2 101 | "wav", "wave", "rmi", # RIFF (including WAV) ID3.2 102 | "aiff", "aif", "aifc", # AIFF ID3.2 103 | "aac", "aacp", # Raw AAC ID3.2 104 | "tta", # True Audio ID3.2 105 | )) 106 | _sorted_id3_2_formats = sorted(ID3_2_FILE_FORMATS) 107 | 108 | ID3_1_FILE_FORMATS = frozenset(( 109 | "ape", # Monkey's Audio ID3.1 110 | "mpc", "mpp", "mp+", # MusePack ID3.1 111 | "wv", # WavPack ID3.1 112 | "ofr", "ofs" # OptimFrog ID3.1 113 | )) 114 | _sorted_id3_1_formats = sorted(ID3_1_FILE_FORMATS) 115 | 116 | 117 | class AudioFormatAttribute(Attribute): 118 | def __init__(self, name: str, default_value: any, description: Optional[str] = None, ignore_options_for_description = False): 119 | new_description = "" 120 | if description is not None: 121 | new_description += description 122 | new_description += "\n" 123 | 124 | new_description += f"ID3.2: {{{', '.join(ID3_2_FILE_FORMATS)}}}\n" 125 | new_description += f"ID3.1: {{{', '.join(ID3_1_FILE_FORMATS)}}}" 126 | 127 | super().__init__(name, default_value, description) 128 | 129 | def parse_simple_value(self, value: any) -> any: 130 | value = value.strip().lower() 131 | if value in ID3_2_FILE_FORMATS: 132 | return value 133 | if value in ID3_1_FILE_FORMATS: 134 | logging.debug(f"setting audio format to a format that only supports ID3.1: {v}") 135 | return value 136 | 137 | raise SettingValueError( 138 | setting_name=self.name, 139 | setting_value=value, 140 | rule="has to be a valid audio format, supporting id3 metadata" 141 | ) 142 | 143 | def unparse_simple_value(self, value: any) -> any: 144 | return value 145 | 146 | class LoggerAttribute(Attribute): 147 | def parse_simple_value(self, value: str) -> logging.Logger: 148 | return logging.getLogger(value) 149 | 150 | def unparse_simple_value(self, value: logging.Logger) -> any: 151 | return value.name 152 | -------------------------------------------------------------------------------- /music_kraken/utils/config/config.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Tuple, Union, List 2 | from pathlib import Path 3 | import logging 4 | from datetime import datetime 5 | 6 | import toml 7 | 8 | from .attributes.attribute import Attribute, Description, EmptyLine 9 | 10 | 11 | class ConfigDict(dict): 12 | def __init__(self, config_reference: "Config", *args, **kwargs): 13 | self.config_reference: Config = config_reference 14 | 15 | super().__init__(*args, **kwargs) 16 | 17 | def __getitem__(self, __name: str) -> Any: 18 | return super().__getitem__(__name) 19 | 20 | def __setitem__(self, __key: Any, __value: Any, from_attribute: bool = False, is_parsed: bool = False) -> None: 21 | if not from_attribute: 22 | attribute: Attribute = self.config_reference.attribute_map[__key] 23 | if is_parsed: 24 | attribute.value = __value 25 | else: 26 | attribute.parse(__value) 27 | self.config_reference.write() 28 | 29 | __value = attribute.value 30 | 31 | return super().__setitem__(__key, __value) 32 | 33 | 34 | class Config: 35 | def __init__(self, component_list: Tuple[Union[Attribute, Description, EmptyLine], ...], config_file: Path) -> None: 36 | self.config_file: Path = config_file 37 | 38 | self.component_list: List[Union[Attribute, Description, EmptyLine]] = [ 39 | Description(f"""IMPORTANT: If you modify this file, the changes for the actual setting, will be kept as is. 40 | The changes you make to the comments, will be discarded, next time you run music-kraken. Have fun! 41 | 42 | Latest reset: {datetime.now()} 43 | 44 | _____ 45 | / ____| 46 | | | __ __ _ _ _ 47 | | | |_ | / _` || | | | 48 | | |__| || (_| || |_| | 49 | \_____| \__,_| \__, | 50 | __/ | 51 | |___/ 52 | """)] 53 | 54 | self.component_list.extend(component_list) 55 | self.loaded_settings: ConfigDict = ConfigDict(self) 56 | 57 | self.attribute_map = {} 58 | for component in self.component_list: 59 | if not isinstance(component, Attribute): 60 | continue 61 | 62 | component.initialize_from_config(self.loaded_settings) 63 | self.attribute_map[component.name] = component 64 | 65 | @property 66 | def toml_string(self): 67 | return "\n".join(component.toml_string for component in self.component_list) 68 | 69 | def write(self): 70 | print(self.config_file) 71 | with self.config_file.open("w", encoding="utf-8") as conf_file: 72 | conf_file.write(self.toml_string) 73 | 74 | def read(self): 75 | if not self.config_file.is_file(): 76 | logging.info(f"Config file at '{self.config_file}' doesn't exist => generating") 77 | self.write() 78 | return 79 | 80 | toml_data = {} 81 | with self.config_file.open("r", encoding="utf-8") as conf_file: 82 | toml_data = toml.load(conf_file) 83 | 84 | for component in self.component_list: 85 | if isinstance(component, Attribute): 86 | component.load_toml(toml_data) 87 | -------------------------------------------------------------------------------- /music_kraken/utils/config/config_files/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kur01234/music-downloader/665bf0e47517071450b6f25dffbb81fe21944ead/music_kraken/utils/config/config_files/__init__.py -------------------------------------------------------------------------------- /music_kraken/utils/config/config_files/logging_config.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict, List 2 | from urllib.parse import ParseResult 3 | from logging import Logger 4 | from pathlib import Path 5 | import logging 6 | 7 | from ...path_manager import LOCATIONS 8 | from ..config import Config 9 | from ..attributes.attribute import Attribute, EmptyLine 10 | from ..attributes.special_attributes import ( 11 | IntegerSelect, 12 | LoggerAttribute 13 | ) 14 | 15 | 16 | config = Config([ 17 | Attribute(name="logging_format", default_value="%(levelname)s:%(name)s:%(message)s", description="""Logging settings for the actual logging: 18 | Reference for the logging formats: https://docs.python.org/3/library/logging.html#logrecord-attributes"""), 19 | IntegerSelect( 20 | name="log_level", 21 | default_value=logging.INFO, 22 | options={ 23 | "CRITICAL": 50, 24 | "ERROR": 40, 25 | "WARNING": 30, 26 | "INFO": 20, 27 | "DEBUG": 10, 28 | "NOTSET": 0 29 | } 30 | ), 31 | 32 | LoggerAttribute( 33 | name="download_logger", 34 | description="The logger for downloading.", 35 | default_value="download" 36 | ), 37 | LoggerAttribute( 38 | name="tagging_logger", 39 | description="The logger for tagging id3 containers.", 40 | default_value="tagging" 41 | ), 42 | LoggerAttribute( 43 | name="codex_logger", 44 | description="The logger for streaming the audio into an uniform codex.", 45 | default_value="codex" 46 | ), 47 | LoggerAttribute( 48 | name="object_logger", 49 | description="The logger for creating Data-Objects.", 50 | default_value="object" 51 | ), 52 | LoggerAttribute( 53 | name="database_logger", 54 | description="The logger for Database operations.", 55 | default_value="database" 56 | ), 57 | LoggerAttribute( 58 | name="musify_logger", 59 | description="The logger for the musify scraper.", 60 | default_value="musify" 61 | ), 62 | LoggerAttribute( 63 | name="musicbrainz_logger", 64 | description="The logger for the musicbrainz scraper.", 65 | default_value="musicbrainz" 66 | ), 67 | LoggerAttribute( 68 | name="youtube_logger", 69 | description="The logger for the youtube scraper.", 70 | default_value="youtube" 71 | ), 72 | LoggerAttribute( 73 | name="youtube_music_logger", 74 | description="The logger for the youtube music scraper.\n(The scraper is seperate to the youtube scraper)", 75 | default_value="youtube_music" 76 | ), 77 | LoggerAttribute( 78 | name="metal_archives_logger", 79 | description="The logger for the metal archives scraper.", 80 | default_value="metal_archives" 81 | ), 82 | LoggerAttribute( 83 | name="genius_logger", 84 | description="The logger for the genius scraper", 85 | default_value="genius" 86 | ), 87 | LoggerAttribute( 88 | name="bandcamp_logger", 89 | description="The logger for the bandcamp scraper", 90 | default_value="bandcamp" 91 | ) 92 | 93 | ], LOCATIONS.get_config_file("logging")) 94 | 95 | 96 | class SettingsStructure(TypedDict): 97 | # logging 98 | logging_format: str 99 | log_level: int 100 | download_logger: Logger 101 | tagging_logger: Logger 102 | codex_logger: Logger 103 | object_logger: Logger 104 | database_logger: Logger 105 | musify_logger: Logger 106 | youtube_logger: Logger 107 | youtube_music_logger: Logger 108 | metal_archives_logger: Logger 109 | genius_logger: Logger 110 | bandcamp_logger: Logger -------------------------------------------------------------------------------- /music_kraken/utils/config/config_files/main_config.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict, List 2 | from urllib.parse import ParseResult 3 | from logging import Logger 4 | from pathlib import Path 5 | 6 | from ...path_manager import LOCATIONS 7 | from ..config import Config 8 | from ..attributes.attribute import Attribute, EmptyLine, Description 9 | from ..attributes.special_attributes import ( 10 | SelectAttribute, 11 | PathAttribute, 12 | AudioFormatAttribute 13 | ) 14 | 15 | config = Config(( 16 | Attribute(name="hasnt_yet_started", default_value=False, description="This will be set automatically, to look if it needs to run the scripts that run on start."), 17 | Attribute(name="bitrate", default_value=125, description="Streams the audio with given bitrate [kB/s]. Can't stream with a higher Bitrate, than the audio source provides."), 18 | AudioFormatAttribute(name="audio_format", default_value="mp3", description="""Music Kraken will stream the audio into this format. 19 | You can use Audio formats which support ID3.2 and ID3.1, 20 | but you will have cleaner Metadata using ID3.2."""), 21 | 22 | Attribute(name="result_history", default_value=True, description="""If enabled, you can go back to the previous results. 23 | The consequence is a higher meory consumption, because every result is saved."""), 24 | Attribute(name="history_length", default_value=8, description="""You can choose how far back you can go in the result history. 25 | The further you choose to be able to go back, the higher the memory usage. 26 | '-1' removes the Limit entirely."""), 27 | 28 | EmptyLine(), 29 | 30 | Attribute(name="preferred_artwork_resolution", default_value=1000), 31 | 32 | EmptyLine(), 33 | 34 | Attribute(name="sort_by_date", default_value=True, description="If this is set to true, it will set the albumsort attribute such that,\nthe albums are sorted by date"), 35 | Attribute(name="sort_album_by_type", default_value=True, description="""If this is set to true, it will set the albumsort attribute such that, 36 | the albums are put into categories before being sorted. 37 | This means for example, the Studio Albums and EP's are always in front of Singles, and Compilations are in the back."""), 38 | Attribute(name="download_path", default_value="{genre}/{artist}/{album}", description="""There are multiple fields, you can use for the path and file name: 39 | - genre 40 | - label 41 | - artist 42 | - album 43 | - song 44 | - album_type 45 | The folder music kraken should put the songs into."""), 46 | Attribute(name="download_file", default_value="{song}.{audio_format}", description="The filename of the audio file."), 47 | SelectAttribute(name="album_type_blacklist", default_value=[ 48 | "Compilation Album", 49 | "Live Album", 50 | "Mixtape" 51 | ], options=("Studio Album", "EP (Extended Play)", "Single", "Live Album", "Compilation Album", "Mixtape", "Demo", "Other"), description="""Music Kraken ignores all albums of those types. 52 | Following album types exist in the programm:"""), 53 | Attribute(name="refresh_after", default_value=161, description="The time in seconds, after which a song/album/artist/label is newly fetched."), 54 | 55 | EmptyLine(), 56 | 57 | Attribute(name="proxies", default_value=[], description="This is a dictionary."), 58 | Attribute(name="tor", default_value=False, description="""Route ALL traffic through Tor. 59 | If you use Tor, make sure the Tor browser is installed, and running.I can't guarantee maximum security though!"""), 60 | Attribute(name="tor_port", default_value=9150, description="The port, tor is listening. If tor is already working, don't change it."), 61 | 62 | Attribute(name="chunk_size", default_value=1024, description="Size of the chunks that are streamed.\nHere could be some room for improvement."), 63 | Attribute(name="show_download_errors_threshold", default_value=0.3, description="""If the percentage of failed downloads goes over this threshold, 64 | all the error messages are shown."""), 65 | 66 | Attribute( 67 | name="language", 68 | default_value="en-US,en;q=0.6", 69 | description="The language of the program. This will be used to translate the program in the future.\n" 70 | "Currently it just sets the Accept-Language header.\n" 71 | "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language" 72 | ), 73 | Attribute( 74 | name="user_agent", 75 | default_value="Mozilla/5.0 (X11; Linux x86_64; rv:90.0) Gecko/20100101 Firefox/90.0", 76 | description="The user agent of the program. This will be used to translate the program in the future.\n" 77 | "Currently it just sets the User-Agent header.\n" 78 | "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent" 79 | ), 80 | Attribute( 81 | name="tries_per_proxy", 82 | default_value=2, 83 | description="The retries it should do. These can be overridden by the program, at certain places, and they have to be.", 84 | ), 85 | 86 | EmptyLine(), 87 | 88 | PathAttribute(name="music_directory", default_value=LOCATIONS.MUSIC_DIRECTORY.resolve(), description="The directory, all the music will be downloaded to."), 89 | PathAttribute(name="temp_directory", default_value=LOCATIONS.TEMP_DIRECTORY.resolve(), description="All temporary stuff is gonna be dumped in this directory."), 90 | PathAttribute(name="log_file", default_value=LOCATIONS.get_log_file("download_logs.log").resolve()), 91 | PathAttribute(name="ffmpeg_binary", default_value=LOCATIONS.FFMPEG_BIN.resolve(), description="Set the path to the ffmpeg binary."), 92 | PathAttribute(name="cache_directory", default_value=LOCATIONS.CACHE_DIRECTORY.resolve(), description="Set the path of the cache directory."), 93 | Attribute( 94 | name="not_a_genre_regex", 95 | description="These regular expressions tell music-kraken, which sub-folders of the music-directory\n" 96 | "it should ignore, and not count to genres", 97 | default_value=[ 98 | r'^\.' # is hidden/starts with a "." 99 | ] 100 | ), 101 | 102 | EmptyLine(), 103 | 104 | Attribute(name="happy_messages", default_value=[ 105 | "Support the artist.", 106 | "Star Me: https://github.com/HeIIow2/music-downloader", 107 | "🏳️‍⚧️🏳️‍⚧️ Trans rights are human rights. 🏳️‍⚧️🏳️‍⚧️", 108 | "🏳️‍⚧️🏳️‍⚧️ Trans women are women, trans men are men, and enbies are enbies. 🏳️‍⚧️🏳️‍⚧️", 109 | "🏴‍☠️🏴‍☠️ Unite under one flag, fck borders. 🏴‍☠️🏴‍☠️", 110 | "Join my Matrix Space: https://matrix.to/#/#music-kraken:matrix.org", 111 | "BPJM does cencorship.", 112 | "🏳️‍⚧️🏳️‍⚧️ Protect trans youth. 🏳️‍⚧️🏳️‍⚧️", 113 | "Klassenkampf", 114 | "Rise Proletarians!!" 115 | ], description="""Just some nice and wholesome messages. 116 | If your mindset has traits of a [file corruption], you might not agree. 117 | But anyways... Freedom of thought, so go ahead and change the messages."""), 118 | Attribute(name="modify_gc", default_value=True), 119 | Attribute(name="id_bits", default_value=64, description="I really dunno why I even made this a setting.. Modifying this is a REALLY dumb idea."), 120 | Description("🏳️‍⚧️🏳️‍⚧️ Protect trans youth. 🏳️‍⚧️🏳️‍⚧️\n"), 121 | 122 | ), LOCATIONS.get_config_file("main")) 123 | 124 | 125 | class SettingsStructure(TypedDict): 126 | hasnt_yet_started: bool 127 | result_history: bool 128 | history_length: int 129 | happy_messages: List[str] 130 | modify_gc: bool 131 | id_bits: int 132 | refresh_after: int 133 | 134 | # audio 135 | bitrate: int 136 | audio_format: str 137 | sort_by_date: bool 138 | sort_album_by_type: bool 139 | download_path: str 140 | download_file: str 141 | album_type_blacklist: List[str] 142 | 143 | # connection 144 | proxies: List[dict[str, str]] 145 | tries_per_proxy: int 146 | tor: bool 147 | tor_port: int 148 | chunk_size: int 149 | show_download_errors_threshold: float 150 | language: str 151 | user_agent: str 152 | 153 | # artwork 154 | preferred_artwork_resolution: int 155 | 156 | # paths 157 | music_directory: Path 158 | temp_directory: Path 159 | log_file: Path 160 | not_a_genre_regex: List[str] 161 | ffmpeg_binary: Path 162 | cache_directory: Path 163 | -------------------------------------------------------------------------------- /music_kraken/utils/config/config_files/youtube_config.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict, List 2 | from urllib.parse import ParseResult 3 | from logging import Logger 4 | from pathlib import Path 5 | 6 | from ...path_manager import LOCATIONS 7 | from ..config import Config 8 | from ..attributes.attribute import Attribute 9 | from ..attributes.special_attributes import SelectAttribute, PathAttribute, UrlAttribute 10 | 11 | 12 | config = Config(( 13 | Attribute(name="use_youtube_alongside_youtube_music", default_value=False, description="""If set to true, it will search youtube through invidious and piped, 14 | despite a direct wrapper for the youtube music INNERTUBE api being implemented. 15 | I my INNERTUBE api wrapper doesn't work, set this to true."""), 16 | UrlAttribute(name="invidious_instance", default_value="https://yt.artemislena.eu", description="""This is an attribute, where you can define the invidious instances, 17 | the youtube downloader should use. 18 | Here is a list of active ones: https://docs.invidious.io/instances/ 19 | Instances that use cloudflare or have source code changes could cause issues. 20 | Hidden instances (.onion) will only work, when setting 'tor=true'."""), 21 | UrlAttribute(name="piped_instance", default_value="https://piped-api.privacy.com.de", description="""This is an attribute, where you can define the pioed instances, 22 | the youtube downloader should use. 23 | Here is a list of active ones: https://github.com/TeamPiped/Piped/wiki/Instances 24 | Instances that use cloudflare or have source code changes could cause issues. 25 | Hidden instances (.onion) will only work, when setting 'tor=true"""), 26 | Attribute(name="sleep_after_youtube_403", default_value=30, description="The time to wait, after youtube returned 403 (in seconds)"), 27 | Attribute(name="youtube_music_api_key", default_value="AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30", description="""This is the API key used by YouTube-Music internally. 28 | Dw. if it is empty, Rachel will fetch it automatically for you <333 29 | (she will also update outdated api keys/those that don't work)"""), 30 | Attribute(name="youtube_music_clean_data", default_value=True, description="If set to true, it exclusively fetches artists/albums/songs, not things like user channels etc."), 31 | UrlAttribute(name="youtube_url", default_value=[ 32 | "https://www.youtube.com/", 33 | "https://www.youtu.be/", 34 | "https://music.youtube.com/", 35 | ], description="""This is used to detect, if an url is from youtube, or any alternativ frontend. 36 | If any instance seems to be missing, run music kraken with the -f flag."""), 37 | Attribute(name="use_sponsor_block", default_value=True, description="Use sponsor block to remove adds or simmilar from the youtube videos."), 38 | 39 | Attribute(name="player_url", default_value="https://music.youtube.com/s/player/80b90bfd/player_ias.vflset/en_US/base.js", description=""" 40 | This is needed to fetch videos without invidious 41 | """), 42 | Attribute(name="youtube_music_consent_cookies", default_value={ 43 | "CONSENT": "PENDING+258" 44 | }, description="The cookie with the key CONSENT says to what stuff you agree. Per default you decline all cookies, but it honestly doesn't matter."), 45 | 46 | Attribute(name="youtube_music_innertube_context", default_value={ 47 | "client": { 48 | "hl": "en", 49 | "gl": "DE", 50 | "remoteHost": "87.123.241.77", 51 | "deviceMake": "", 52 | "deviceModel": "", 53 | "visitorData": "CgtiTUxaTHpoXzk1Zyia59WlBg%3D%3D", 54 | "userAgent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36", 55 | "clientName": "WEB_REMIX", 56 | "clientVersion": "1.20230710.01.00", 57 | "osName": "X11", 58 | "osVersion": "", 59 | "originalUrl": "https://music.youtube.com/", 60 | "platform": "DESKTOP", 61 | "clientFormFactor": "UNKNOWN_FORM_FACTOR", 62 | "configInfo": { 63 | "appInstallData": "", 64 | "coldConfigData": "", 65 | "coldHashData": "", 66 | "hotHashData": "" 67 | }, 68 | "userInterfaceTheme": "USER_INTERFACE_THEME_DARK", 69 | "timeZone": "Atlantic/Jan_Mayen", 70 | "browserName": "Firefox", 71 | "browserVersion": "115.0", 72 | "acceptHeader": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", 73 | "deviceExperimentId": "ChxOekkxTmpnek16UTRNVFl4TkRrek1ETTVOdz09EJrn1aUGGJrn1aUG", 74 | "screenWidthPoints": 584, 75 | "screenHeightPoints": 939, 76 | "screenPixelDensity": 1, 77 | "screenDensityFloat": 1, 78 | "utcOffsetMinutes": 120, 79 | "musicAppInfo": { 80 | "pwaInstallabilityStatus": "PWA_INSTALLABILITY_STATUS_UNKNOWN", 81 | "webDisplayMode": "WEB_DISPLAY_MODE_BROWSER", 82 | "storeDigitalGoodsApiSupportStatus": { 83 | "playStoreDigitalGoodsApiSupportStatus": "DIGITAL_GOODS_API_SUPPORT_STATUS_UNSUPPORTED" 84 | } 85 | } 86 | }, 87 | "user": { "lockedSafetyMode": False }, 88 | "request": { 89 | "useSsl": True, 90 | "internalExperimentFlags": [], 91 | "consistencyTokenJars": [] 92 | }, 93 | "adSignalsInfo": { 94 | "params": [] 95 | } 96 | }, description="Don't bother about this. It is something technical, but if you wanna change the innertube requests... go on."), 97 | Attribute(name="ytcfg", description="Please... ignore it.", default_value={}) 98 | ), LOCATIONS.get_config_file("youtube")) 99 | 100 | 101 | class SettingsStructure(TypedDict): 102 | use_youtube_alongside_youtube_music: bool 103 | invidious_instance: ParseResult 104 | piped_instance: ParseResult 105 | sleep_after_youtube_403: float 106 | youtube_music_api_key: str 107 | youtube_music_clean_data: bool 108 | youtube_url: List[ParseResult] 109 | use_sponsor_block: bool 110 | player_url: str 111 | youtube_music_innertube_context: dict 112 | youtube_music_consent_cookies: dict 113 | ytcfg: dict 114 | -------------------------------------------------------------------------------- /music_kraken/utils/config/utils.py: -------------------------------------------------------------------------------- 1 | def comment(uncommented_string: str) -> str: 2 | _fragments = uncommented_string.split("\n") 3 | _fragments = ["# " + frag for frag in _fragments] 4 | return "\n".join(_fragments) 5 | -------------------------------------------------------------------------------- /music_kraken/utils/enums/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass 4 | from typing import Optional, TYPE_CHECKING, Type 5 | if TYPE_CHECKING: 6 | from ...pages.abstract import Page 7 | 8 | @dataclass 9 | class SourceType: 10 | name: str 11 | homepage: Optional[str] = None 12 | download_priority: int = 0 13 | 14 | page_type: Type[Page] = None 15 | page: Page = None 16 | 17 | def register_page(self, page: Page): 18 | self.page = page 19 | 20 | def __hash__(self): 21 | return hash(self.name) 22 | 23 | @property 24 | def has_page(self) -> bool: 25 | return self.page is not None 26 | 27 | # for backwards compatibility 28 | @property 29 | def value(self) -> str: 30 | return self.name 31 | 32 | 33 | class ALL_SOURCE_TYPES: 34 | YOUTUBE = SourceType(name="youtube", homepage="https://music.youtube.com/") 35 | BANDCAMP = SourceType(name="bandcamp", homepage="https://bandcamp.com/", download_priority=10) 36 | MUSIFY = SourceType(name="musify", homepage="https://musify.club/", download_priority=7) 37 | 38 | GENIUS = SourceType(name="genius", homepage="https://genius.com/") 39 | MUSICBRAINZ = SourceType(name="musicbrainz", homepage="https://musicbrainz.org/") 40 | ENCYCLOPAEDIA_METALLUM = SourceType(name="encyclopaedia metallum") 41 | DEEZER = SourceType(name="deezer", homepage="https://www.deezer.com/") 42 | SPOTIFY = SourceType(name="spotify", homepage="https://open.spotify.com/") 43 | 44 | # This has nothing to do with audio, but bands can be here 45 | WIKIPEDIA = SourceType(name="wikipedia", homepage="https://en.wikipedia.org/wiki/Main_Page") 46 | INSTAGRAM = SourceType(name="instagram", homepage="https://www.instagram.com/") 47 | FACEBOOK = SourceType(name="facebook", homepage="https://www.facebook.com/") 48 | TWITTER = SourceType(name="twitter", homepage="https://twitter.com/") 49 | # Yes somehow this ancient site is linked EVERYWHERE 50 | MYSPACE = SourceType(name="myspace", homepage="https://myspace.com/") 51 | 52 | MANUAL = SourceType(name="manual") 53 | 54 | PRESET = SourceType(name="preset") 55 | -------------------------------------------------------------------------------- /music_kraken/utils/enums/album.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class AlbumStatus(Enum): 5 | """ 6 | Enum class representing the possible statuses of an album. 7 | """ 8 | UNRELEASED = "Unreleased" 9 | RELEASED = "Released" 10 | LEAKED = "Leaked" 11 | OFFICIAL = "Official" 12 | BOOTLEG = "Bootleg" 13 | 14 | 15 | class AlbumType(Enum): 16 | """ 17 | Enum class representing the possible types of an album. 18 | """ 19 | STUDIO_ALBUM = "Studio Album" 20 | EP = "EP (Extended Play)" 21 | SINGLE = "Single" 22 | LIVE_ALBUM = "Live Album" 23 | COMPILATION_ALBUM = "Compilation Album" 24 | MIXTAPE = "Mixtape" 25 | DEMO = "Demo" 26 | OTHER = "Other" 27 | -------------------------------------------------------------------------------- /music_kraken/utils/enums/colors.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class BColors(Enum): 5 | # https://stackoverflow.com/a/287944 6 | HEADER = "\033[95m" 7 | OKBLUE = "\033[94m" 8 | OKCYAN = "\033[96m" 9 | OKGREEN = "\033[92m" 10 | WARNING = "\033[93m" 11 | FAIL = "\033[91m" 12 | ENDC = "\033[0m" 13 | BOLD = "\033[1m" 14 | UNDERLINE = "\033[4m" 15 | 16 | GREY = "\x1b[38;20m" 17 | YELLOW = "\x1b[33;20m" 18 | RED = "\x1b[31;20m" 19 | BOLD_RED = "\x1b[31;1m" 20 | -------------------------------------------------------------------------------- /music_kraken/utils/enums/contact.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class ContactMethod(Enum): 5 | EMAIL = "email" 6 | PHONE = "phone" 7 | FAX = "fax" 8 | -------------------------------------------------------------------------------- /music_kraken/utils/exception/__init__.py: -------------------------------------------------------------------------------- 1 | class MKBaseException(Exception): 2 | def __init__(self, message: str = None, **kwargs) -> None: 3 | self.message = message 4 | super().__init__(message, **kwargs) 5 | 6 | 7 | # Downloading 8 | class MKDownloadException(MKBaseException): 9 | pass 10 | 11 | 12 | class MKMissingNameException(MKDownloadException): 13 | pass 14 | 15 | 16 | # Frontend 17 | class MKFrontendException(MKBaseException): 18 | pass 19 | 20 | class MKInvalidInputException(MKFrontendException): 21 | pass 22 | 23 | 24 | -------------------------------------------------------------------------------- /music_kraken/utils/exception/config.py: -------------------------------------------------------------------------------- 1 | class SettingException(Exception): 2 | pass 3 | 4 | 5 | class SettingNotFound(SettingException): 6 | def __init__(self, setting_name: str): 7 | self.setting_name = setting_name 8 | 9 | def __str__(self): 10 | return f"Setting '{self.setting_name}' not found." 11 | 12 | 13 | class SettingValueError(SettingException): 14 | def __init__(self, setting_name: str, setting_value: str, rule: str): 15 | """ 16 | The rule has to be such, that the following format makes sense: 17 | {name} {rule}, not '{value}' 18 | 19 | :param setting_name: 20 | :param setting_value: 21 | :param rule: 22 | """ 23 | self.setting_name = setting_name 24 | self.setting_value = setting_value 25 | self.rule = rule 26 | 27 | def __str__(self): 28 | return f"{self.setting_name} {self.rule}, not '{self.setting_value}'." 29 | -------------------------------------------------------------------------------- /music_kraken/utils/exception/download.py: -------------------------------------------------------------------------------- 1 | class DownloadException(Exception): 2 | pass 3 | 4 | 5 | class UrlNotFoundException(DownloadException): 6 | def __init__(self, url: str, *args: object) -> None: 7 | self.url = url 8 | super().__init__(*args) 9 | 10 | def __str__(self) -> str: 11 | return f"Couldn't find the page of {self.url}" 12 | -------------------------------------------------------------------------------- /music_kraken/utils/exception/objects.py: -------------------------------------------------------------------------------- 1 | class ObjectException(Exception): 2 | pass 3 | 4 | 5 | class IsDynamicException(Exception): 6 | """ 7 | Gets raised, if a dynamic data object tries to perform an action, 8 | which does not make sense for a dynamic object. 9 | """ 10 | pass 11 | -------------------------------------------------------------------------------- /music_kraken/utils/path_manager/__init__.py: -------------------------------------------------------------------------------- 1 | from .locations import Locations 2 | 3 | LOCATIONS = Locations() 4 | -------------------------------------------------------------------------------- /music_kraken/utils/path_manager/config_directory.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import platformdirs 4 | 5 | 6 | def get_config_directory(application_name: str) -> Path: 7 | return platformdirs.user_config_path(appname=application_name) 8 | -------------------------------------------------------------------------------- /music_kraken/utils/path_manager/locations.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | from pathlib import Path 3 | import os 4 | from os.path import expandvars 5 | import logging 6 | from sys import platform 7 | 8 | import tempfile 9 | from typing import Optional 10 | 11 | from pyffmpeg import FFmpeg 12 | 13 | 14 | from .music_directory import get_music_directory 15 | from .config_directory import get_config_directory 16 | 17 | 18 | class Locations: 19 | @staticmethod 20 | def _get_env(key: str, default: Path, default_for_windows: bool = True) -> Optional[Path]: 21 | res = os.environ.get(key.upper()) 22 | if res is not None: 23 | return res 24 | 25 | xdg_user_dirs_file = os.environ.get("XDG_CONFIG_HOME") or Path(Path.home(), ".config", "user-dirs.dirs") 26 | xdg_user_dirs_default_file = Path("/etc/xdg/user-dirs.defaults") 27 | 28 | def get_dir_from_xdg_file(xdg_file_path: os.PathLike) -> Optional[Path]: 29 | nonlocal key 30 | 31 | try: 32 | with open(xdg_file_path, 'r') as f: 33 | data = "[XDG_USER_DIRS]\n" + f.read() 34 | config = configparser.ConfigParser(allow_no_value=True) 35 | config.read_string(data) 36 | xdg_config = config['XDG_USER_DIRS'] 37 | 38 | return Path(expandvars(xdg_config[key.lower()].strip('"'))) 39 | 40 | except (FileNotFoundError, KeyError) as e: 41 | logging.warning( 42 | f"Missing file or No entry found for \"{key}\" in: \"{xdg_file_path}\".\n" 43 | ) 44 | logging.debug(str(e)) 45 | 46 | res = get_dir_from_xdg_file(xdg_user_dirs_file) 47 | if res is not None: 48 | return res 49 | 50 | res = get_dir_from_xdg_file(xdg_user_dirs_default_file) 51 | if res is not None: 52 | return res 53 | 54 | logging.warning(f"couldn't find a {key}, falling back to: {default}") 55 | 56 | if not default_for_windows and platform == "linux": 57 | return 58 | 59 | return default 60 | 61 | def __init__(self, application_name: os.PathLike = "music-kraken"): 62 | self.FILE_ENCODING: str = "utf-8" 63 | 64 | self.TEMP_DIRECTORY = Path(tempfile.gettempdir(), application_name) 65 | self.TEMP_DIRECTORY.mkdir(exist_ok=True, parents=True) 66 | 67 | self.MUSIC_DIRECTORY = get_music_directory() 68 | 69 | self.CONFIG_DIRECTORY = get_config_directory(str(application_name)) 70 | self.CONFIG_DIRECTORY.mkdir(exist_ok=True, parents=True) 71 | self.CONFIG_FILE = Path(self.CONFIG_DIRECTORY, f"{application_name}.conf") 72 | self.LEGACY_CONFIG_FILE = Path(self.CONFIG_DIRECTORY, f"{application_name}.conf") 73 | 74 | self.CACHE_DIRECTORY = self._get_env("XDG_CACHE_HOME", Path(Path.home(), ".cache")) 75 | if self.CACHE_DIRECTORY is None: 76 | logging.warning(f"Could not find a cache dir. Falling back to the temp dir: {self.TEMP_DIRECTORY}") 77 | self.CACHE_DIRECTORY = self.TEMP_DIRECTORY 78 | else: 79 | self.CACHE_DIRECTORY = Path(self.CACHE_DIRECTORY, application_name) 80 | self.CACHE_DIRECTORY.mkdir(parents=True, exist_ok=True) 81 | 82 | self.FFMPEG_BIN = Path(FFmpeg(enable_log=False).get_ffmpeg_bin()) 83 | 84 | def get_config_file(self, config_name: str) -> Path: 85 | return Path(self.CONFIG_DIRECTORY, f"{config_name}.toml") 86 | 87 | def get_log_file(self, file_name: os.PathLike) -> Path: 88 | return Path(self.TEMP_DIRECTORY, file_name) 89 | -------------------------------------------------------------------------------- /music_kraken/utils/path_manager/music_directory.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import Optional 4 | 5 | from sys import platform 6 | import logging 7 | from os.path import expandvars 8 | 9 | import configparser 10 | 11 | DEFAULT_MUSIC_DIRECTORY = Path(Path.home(), "Music") 12 | 13 | 14 | def get_xdg_music_directory() -> Path: 15 | """ 16 | gets the xdg music directory, for all the linux or bsd folks! 17 | Thanks to Distant Thunder, as well as Kevin Gruber for making that pull request: 18 | https://github.com/HeIIow2/music-downloader/pull/6 19 | 20 | XDG_USER_DIRS_FILE reference: 21 | https://freedesktop.org/wiki/Software/xdg-user-dirs/ 22 | https://web.archive.org/web/20230322012953/https://freedesktop.org/wiki/Software/xdg-user-dirs/ 23 | """ 24 | 25 | xdg_user_dirs_file = Path(os.environ.get("XDG_CONFIG_HOME") or Path(Path.home(), ".config"), "user-dirs.dirs") 26 | xdg_user_dirs_default_file = Path("/etc/xdg/user-dirs.defaults") 27 | 28 | def get_music_dir_from_xdg_file(xdg_file_path: os.PathLike) -> Optional[Path]: 29 | try: 30 | with open(xdg_file_path, 'r', encoding="utf-8") as f: 31 | data = "[XDG_USER_DIRS]\n" + f.read() 32 | config = configparser.ConfigParser(allow_no_value=True) 33 | config.read_string(data) 34 | xdg_config = config['XDG_USER_DIRS'] 35 | return Path(expandvars(xdg_config['xdg_music_dir'].strip('"'))) 36 | 37 | except (FileNotFoundError, KeyError) as e: 38 | logging.warning( 39 | f"Missing file or No entry found for \"xdg_music_dir\" in: \"{xdg_file_path}\".\n" 40 | ) 41 | logging.debug(str(e)) 42 | 43 | music_dir = get_music_dir_from_xdg_file(xdg_user_dirs_file) 44 | if music_dir is not None: 45 | return music_dir 46 | music_dir = get_music_dir_from_xdg_file(xdg_user_dirs_default_file) 47 | if music_dir is not None: 48 | return music_dir 49 | 50 | logging.warning(f"couldn't find a XDG music dir, falling back to: {DEFAULT_MUSIC_DIRECTORY}") 51 | return DEFAULT_MUSIC_DIRECTORY 52 | 53 | 54 | def get_music_directory() -> Path: 55 | if platform != "linux": 56 | return DEFAULT_MUSIC_DIRECTORY 57 | 58 | return get_xdg_music_directory() 59 | -------------------------------------------------------------------------------- /music_kraken/utils/shared.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dotenv import load_dotenv 3 | from pathlib import Path 4 | import os 5 | 6 | 7 | from .path_manager import LOCATIONS 8 | from .config import main_settings 9 | 10 | if not load_dotenv(Path(__file__).parent.parent.parent / ".env"): 11 | load_dotenv(Path(__file__).parent.parent.parent / ".env.example") 12 | 13 | __stage__ = os.getenv("STAGE", "prod") 14 | 15 | DEBUG = (__stage__ == "dev") and True 16 | DEBUG_LOGGING = DEBUG and False 17 | DEBUG_TRACE = DEBUG and True 18 | DEBUG_OBJECT_TRACE = DEBUG and False 19 | DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False 20 | DEBUG_YOUTUBE_INITIALIZING = DEBUG and False 21 | DEBUG_PAGES = DEBUG and False 22 | DEBUG_DUMP = DEBUG and True 23 | DEBUG_PRINT_ID = DEBUG and True 24 | 25 | if DEBUG: 26 | print("DEBUG ACTIVE") 27 | 28 | 29 | def get_random_message() -> str: 30 | return random.choice(main_settings['happy_messages']) 31 | 32 | 33 | CONFIG_DIRECTORY = LOCATIONS.CONFIG_DIRECTORY 34 | 35 | HIGHEST_ID = 2 ** main_settings['id_bits'] 36 | 37 | HELP_MESSAGE = """to search: 38 | > s: {query or url} 39 | > s: https://musify.club/release/some-random-release-183028492 40 | > s: #a {artist} #r {release} #t {track} 41 | 42 | to download: 43 | > d: {option ids or direct url} 44 | > d: 0, 3, 4 45 | > d: 1 46 | > d: https://musify.club/release/some-random-release-183028492 47 | 48 | have fun :3""".strip() 49 | 50 | # regex pattern 51 | URL_PATTERN = r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+" 52 | INT_PATTERN = r"^\d*$" 53 | FLOAT_PATTERN = r"^[\d|\,|\.]*$" 54 | -------------------------------------------------------------------------------- /music_kraken/utils/string_processing.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Union, Optional 2 | from pathlib import Path 3 | import string 4 | from functools import lru_cache 5 | 6 | from transliterate.exceptions import LanguageDetectionError 7 | from transliterate import translit 8 | from pathvalidate import sanitize_filename 9 | from urllib.parse import urlparse, ParseResult, parse_qs 10 | 11 | 12 | COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = ( 13 | "(official video)", 14 | ) 15 | OPEN_BRACKETS = "([" 16 | CLOSE_BRACKETS = ")]" 17 | DISALLOWED_SUBSTRING_IN_BRACKETS = ("official", "video", "audio", "lyrics", "prod", "remix", "ft", "feat", "ft.", "feat.") 18 | 19 | @lru_cache 20 | def unify(string: str) -> str: 21 | """ 22 | returns a unified str, to make comparisons easy. 23 | a unified string has the following attributes: 24 | - is lowercase 25 | - is transliterated to Latin characters from e.g. Cyrillic 26 | """ 27 | 28 | if string is None: 29 | return None 30 | 31 | try: 32 | string = translit(string, reversed=True) 33 | except LanguageDetectionError: 34 | pass 35 | 36 | string = unify_punctuation(string) 37 | return string.lower().strip() 38 | 39 | 40 | def fit_to_file_system(string: Union[str, Path], hidden_ok: bool = False) -> Union[str, Path]: 41 | def fit_string(string: str) -> str: 42 | nonlocal hidden_ok 43 | 44 | if string == "/": 45 | return "/" 46 | string = string.strip() 47 | 48 | while string[0] == "." and not hidden_ok: 49 | if len(string) == 0: 50 | return string 51 | 52 | string = string[1:] 53 | 54 | string = string.replace("/", "_").replace("\\", "_") 55 | 56 | try: 57 | string = translit(string, reversed=True) 58 | except LanguageDetectionError: 59 | pass 60 | 61 | string = sanitize_filename(string) 62 | 63 | return string 64 | 65 | if isinstance(string, Path): 66 | return Path(*(fit_string(part) for part in string.parts)) 67 | else: 68 | return fit_string(string) 69 | 70 | 71 | @lru_cache(maxsize=128) 72 | def clean_song_title(raw_song_title: str, artist_name: Optional[str] = None) -> str: 73 | """ 74 | This function cleans common naming "conventions" for non clean song titles, like the title of youtube videos 75 | 76 | cleans: 77 | 78 | - `artist - song` -> `song` 79 | - `song (Official Video)` -> `song` 80 | - ` song` -> `song` 81 | - `song (prod. some producer)` 82 | """ 83 | raw_song_title = raw_song_title.strip() 84 | 85 | # Clean official Video appendix 86 | for dirty_appendix in COMMON_TITLE_APPENDIX_LIST: 87 | if raw_song_title.lower().endswith(dirty_appendix): 88 | raw_song_title = raw_song_title[:-len(dirty_appendix)].strip() 89 | 90 | # remove brackets and their content if they contain disallowed substrings 91 | for open_bracket, close_bracket in zip(OPEN_BRACKETS, CLOSE_BRACKETS): 92 | if open_bracket not in raw_song_title or close_bracket not in raw_song_title: 93 | continue 94 | 95 | start = 0 96 | 97 | while True: 98 | try: 99 | open_bracket_index = raw_song_title.index(open_bracket, start) 100 | except ValueError: 101 | break 102 | try: 103 | close_bracket_index = raw_song_title.index(close_bracket, open_bracket_index + 1) 104 | except ValueError: 105 | break 106 | 107 | substring = raw_song_title[open_bracket_index + 1:close_bracket_index] 108 | if any(disallowed_substring in substring.lower() for disallowed_substring in DISALLOWED_SUBSTRING_IN_BRACKETS): 109 | raw_song_title = raw_song_title[:open_bracket_index] + raw_song_title[close_bracket_index + 1:] 110 | else: 111 | start = close_bracket_index + 1 112 | 113 | # everything that requires the artist name 114 | if artist_name is not None: 115 | artist_name = artist_name.strip() 116 | 117 | # Remove artist from the start of the title 118 | if raw_song_title.lower().startswith(artist_name.lower()): 119 | 120 | possible_new_name = raw_song_title[len(artist_name):].strip() 121 | 122 | for char in ("-", "–", ":", "|"): 123 | if possible_new_name.startswith(char): 124 | raw_song_title = possible_new_name[1:].strip() 125 | break 126 | 127 | return raw_song_title.strip() 128 | 129 | 130 | def comment(uncommented_string: str) -> str: 131 | _fragments = uncommented_string.split("\n") 132 | _fragments = ["# " + frag for frag in _fragments] 133 | return "\n".join(_fragments) 134 | 135 | 136 | # comparisons 137 | TITLE_THRESHOLD_LEVENSHTEIN = 1 138 | UNIFY_TO = " " 139 | 140 | ALLOWED_LENGTH_DISTANCE = 20 141 | 142 | 143 | def unify_punctuation(to_unify: str, unify_to: str = UNIFY_TO) -> str: 144 | for char in string.punctuation: 145 | to_unify = to_unify.replace(char, unify_to) 146 | return to_unify 147 | 148 | @lru_cache(maxsize=128) 149 | def hash_url(url: Union[str, ParseResult]) -> str: 150 | if isinstance(url, str): 151 | url = urlparse(url) 152 | 153 | unify_to = "-" 154 | 155 | def unify_part(part: str) -> str: 156 | nonlocal unify_to 157 | return unify_punctuation(part.lower(), unify_to=unify_to).strip(unify_to) 158 | 159 | # netloc 160 | netloc = unify_part(url.netloc) 161 | if netloc.startswith("www" + unify_to): 162 | netloc = netloc[3 + len(unify_to):] 163 | 164 | # query 165 | query = url.query 166 | query_dict: Optional[dict] = None 167 | try: 168 | query_dict: dict = parse_qs(url.query, strict_parsing=True) 169 | except ValueError: 170 | # the query couldn't be parsed 171 | pass 172 | 173 | if isinstance(query_dict, dict): 174 | # sort keys alphabetically 175 | query = "" 176 | for key, value in sorted(query_dict.items(), key=lambda i: i[0]): 177 | query += f"{key.strip()}-{''.join(i.strip() for i in value)}" 178 | 179 | r = f"{netloc}_{unify_part(url.path)}_{unify_part(query)}" 180 | r = r.lower().strip() 181 | return r 182 | 183 | 184 | def remove_feature_part_from_track(title: str) -> str: 185 | if ")" != title[-1]: 186 | return title 187 | if "(" not in title: 188 | return title 189 | 190 | return title[:title.index("(")] 191 | 192 | 193 | def modify_title(to_modify: str) -> str: 194 | to_modify = to_modify.strip() 195 | to_modify = to_modify.lower() 196 | to_modify = remove_feature_part_from_track(to_modify) 197 | to_modify = unify_punctuation(to_modify) 198 | return to_modify 199 | 200 | 201 | def match_titles(title_1: str, title_2: str): 202 | title_1, title_2 = modify_title(title_1), modify_title(title_2) 203 | distance = jellyfish.levenshtein_distance(title_1, title_2) 204 | return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance 205 | 206 | 207 | def match_artists(artist_1, artist_2: str): 208 | if type(artist_1) == list: 209 | distances = [] 210 | 211 | for artist_1_ in artist_1: 212 | match, distance = match_titles(artist_1_, artist_2) 213 | if not match: 214 | return match, distance 215 | 216 | distances.append(distance) 217 | return True, min(distances) 218 | return match_titles(artist_1, artist_2) 219 | 220 | def match_length(length_1: int | None, length_2: int | None) -> bool: 221 | # returning true if either one is Null, because if one value is not known, 222 | # then it shouldn't be an attribute which could reject an audio source 223 | if length_1 is None or length_2 is None: 224 | return True 225 | return abs(length_1 - length_2) <= ALLOWED_LENGTH_DISTANCE 226 | 227 | def shorten_display_url(url: str, max_length: int = 150, chars_at_end: int = 4, shorten_string: str = "[...]") -> str: 228 | if len(url) <= max_length + chars_at_end + len(shorten_string): 229 | return url 230 | 231 | return url[:max_length] + shorten_string + url[-chars_at_end:] 232 | -------------------------------------------------------------------------------- /music_kraken/utils/support_classes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kur01234/music-downloader/665bf0e47517071450b6f25dffbb81fe21944ead/music_kraken/utils/support_classes/__init__.py -------------------------------------------------------------------------------- /music_kraken/utils/support_classes/download_result.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import List, Tuple 3 | 4 | from ...utils.config import main_settings, logging_settings 5 | from ...utils.enums.colors import BColors 6 | from ...objects import Target 7 | 8 | UNIT_PREFIXES: List[str] = ["", "k", "m", "g", "t"] 9 | UNIT_DIVISOR = 1024 10 | 11 | LOGGER = logging_settings["download_logger"] 12 | 13 | 14 | @dataclass 15 | class DownloadResult: 16 | total: int = 0 17 | fail: int = 0 18 | sponsor_segments: int = 0 19 | error_message: str = None 20 | total_size = 0 21 | found_on_disk: int = 0 22 | 23 | _error_message_list: List[str] = field(default_factory=list) 24 | 25 | @property 26 | def success(self) -> int: 27 | return self.total - self.fail 28 | 29 | @property 30 | def success_percentage(self) -> float: 31 | if self.total == 0: 32 | return 0 33 | return self.success / self.total 34 | 35 | @property 36 | def failure_percentage(self) -> float: 37 | if self.total == 0: 38 | return 1 39 | return self.fail / self.total 40 | 41 | @property 42 | def is_fatal_error(self) -> bool: 43 | return self.error_message is not None 44 | 45 | @property 46 | def is_mild_failure(self) -> bool: 47 | if self.is_fatal_error: 48 | return True 49 | 50 | return self.failure_percentage > main_settings["show_download_errors_threshold"] 51 | 52 | def _size_val_unit_pref_ind(self, val: float, ind: int) -> Tuple[float, int]: 53 | if val < UNIT_DIVISOR: 54 | return val, ind 55 | if ind >= len(UNIT_PREFIXES): 56 | return val, ind 57 | 58 | return self._size_val_unit_pref_ind(val=val / UNIT_DIVISOR, ind=ind + 1) 59 | 60 | @property 61 | def formated_size(self) -> str: 62 | total_size, prefix_index = self._size_val_unit_pref_ind(self.total_size, 0) 63 | return f"{total_size:.{2}f} {UNIT_PREFIXES[prefix_index]}B" 64 | 65 | def add_target(self, target: Target): 66 | self.total_size += target.size 67 | 68 | def merge(self, other: "DownloadResult"): 69 | if other.is_fatal_error: 70 | LOGGER.debug(other.error_message) 71 | self._error_message_list.append(other.error_message) 72 | self.total += 1 73 | self.fail += 1 74 | else: 75 | self.total += other.total 76 | self.fail += other.fail 77 | self._error_message_list.extend(other._error_message_list) 78 | 79 | self.sponsor_segments += other.sponsor_segments 80 | self.total_size += other.total_size 81 | self.found_on_disk += other.found_on_disk 82 | 83 | def __str__(self): 84 | if self.is_fatal_error: 85 | return self.error_message 86 | head = f"{self.fail} from {self.total} downloads failed:\n" \ 87 | f"success-rate:\t{int(self.success_percentage * 100)}%\n" \ 88 | f"fail-rate:\t{int(self.failure_percentage * 100)}%\n" \ 89 | f"total size:\t{self.formated_size}\n" \ 90 | f"skipped segments:\t{self.sponsor_segments}\n" \ 91 | f"found on disc:\t{self.found_on_disk}" 92 | 93 | if not self.is_mild_failure: 94 | return head 95 | 96 | _lines = [head] 97 | _lines.extend(BColors.FAIL.value + s + BColors.ENDC.value for s in self._error_message_list) 98 | return "\n".join(_lines) 99 | -------------------------------------------------------------------------------- /music_kraken/utils/support_classes/query.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | 3 | from ...objects import Artist, Album, Song, DatabaseObject 4 | 5 | class Query: 6 | def __init__( 7 | self, 8 | raw_query: str = "", 9 | music_object: DatabaseObject = None 10 | ) -> None: 11 | self.raw_query: str = raw_query 12 | self.music_object: Optional[DatabaseObject] = music_object 13 | 14 | @property 15 | def is_raw(self) -> bool: 16 | return self.music_object is None 17 | 18 | @property 19 | def default_search(self) -> List[str]: 20 | if self.music_object is None: 21 | return [self.raw_query] 22 | 23 | if isinstance(self.music_object, Artist): 24 | return [self.music_object.name] 25 | 26 | if isinstance(self.music_object, Song): 27 | return [f"{artist.name} - {self.music_object}" for artist in self.music_object.artist_collection] 28 | 29 | if isinstance(self.music_object, Album): 30 | return [f"{artist.name} - {self.music_object}" for artist in self.music_object.artist_collection] 31 | 32 | return [self.raw_query] 33 | -------------------------------------------------------------------------------- /notes.md: -------------------------------------------------------------------------------- 1 | # Which "Modules" do I have 2 | 3 | ## Overview 4 | 5 | - fetching of metadata 6 | - creating the target paths/filenames 7 | - fetching of the download sourced 8 | - downloading of the mp3 9 | - fetching of the lyrics 10 | 11 | ## Naming and Structure of Modules 12 | 13 | - utils 14 | - shared (equivalent to global variables and constants) 15 | - config 16 | - database 17 | - some static methods that are in general usefull for me 18 | - tagging 19 | - song (within a class Song used to get and set the metadata of mp3 files) 20 | - metadata 21 | - search 22 | - fetch 23 | - target 24 | - audio_source 25 | - fetch_source 26 | - fetch_audio 27 | - sources 28 | - musify 29 | - youtube 30 | - lyrics 31 | - lyrics 32 | - genius (will eventually be moved in a folder with lyric sources) 33 | 34 | # Wrong Audio 35 | - Crystal F - Hanging Man 36 | 37 | # Did not found whole release 38 | - Crystal F - Trail Mix 2 (Vollmond) 39 | - Crystal F - Was ist blos mit Hauke los 40 | - Psychonaut 4 - Neurasthenia (Sweet Decadence) 41 | 42 | # Audio Formats 43 | 44 | [Wikipedia](https://en.wikipedia.org/wiki/Audio_file_format) 45 | 46 | > It is important to distinguish between the audio coding format, the container containing the raw audio data, and an audio codec. 47 | 48 | ## audio coding format 49 | 50 | [Wikipedia](https://en.wikipedia.org/wiki/Audio_coding_format) 51 | 52 | The audio coding format is a format, which tries to store audio in a minimal space, while still allowing a decent quality. 53 | 54 | There are two types: 55 | 1. lossless compression 56 | 2. lossy compression 57 | 58 | 59 | [Here a comperison of audio coding formats.](https://en.wikipedia.org/wiki/Comparison_of_audio_coding_formats) 60 | 61 | The two criterias, which NEED to be met is: 62 | 1. open source 63 | 2. the encoder as well as the player need to be free to use 64 | 65 | ## container 66 | 67 | [Wikipedia](https://en.wikipedia.org/wiki/Container_format) 68 | 69 | A container embeddes the metadata in a file. In this case I am interested in containers regarding audio. 70 | 71 | ### considerations 72 | 73 | Important differences between container, that I need to consider when choosing one, *cuz I only implement this shit once*, are: 74 | 1. advanced content, meaning which fields there are, for example title, length, ... 75 | 2. popularity *(the more poppular it is, the better will be the support between different software)* 76 | 3. support for different [codec features](#audio-codec) *(older codecs might not support newer frames)* 77 | 4. overhead *(different file size with different data, though this certainly is less an issue with audio)* 78 | 5. support streaming the media 79 | 80 | These destinctions are sorted from top to bottom. 81 | 82 | 83 | ## audio codec 84 | 85 | [Wikipedia](https://en.wikipedia.org/wiki/Audio_codec) 86 | 87 | The audio codec is simply software/hardware, to convert audio from an [audio coding format](#audio-coding-format) to playable audio and vice versa. 88 | 89 | # ID3 90 | 91 | [ID3](https://en.wikipedia.org/wiki/ID3) is a metadata container format for audio. I am going for [ID3v2.4](https://en.wikipedia.org/wiki/ID3#ID3v2) 92 | 93 | An Application can define its own types of frames. 94 | 95 | There are standard frames for containing cover art, BPM, copyright and license, lyrics, and arbitrary text and URL data, as well as other things. 96 | 97 | Version 2.4 of the specification prescribes that all text fields (the fields that start with a T, except for TXXX) can contain multiple values separated by a null character. The null character varies by [character encoding](https://en.wikipedia.org/wiki/Character_encoding). 98 | 99 | [id3 fields docs](https://docs.puddletag.net/source/id3.html) 100 | 101 | [forum](https://hydrogenaud.io/index.php/topic,51504.0.html) 102 | > Hence, for the best possible compatibility I recommend writing ID3v2.3 tags in the ISO 8859 format. 103 | 104 | [ID3 tag mapping](https://wiki.hydrogenaud.io/index.php?title=Foobar2000:ID3_Tag_Mapping) 105 | 106 | ## Frames 107 | 108 | A frame Name is composed from 4 capital letters $XXXX$ 109 | 110 | The first letter of text frames is $TXXX$ 111 | 112 | --- 113 | 114 | # TODO 115 | 116 | - Add pprint to the song objects 117 | - DOCUMENTATION 118 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling", "hatch-requirements-txt", "hatch-vcs"] 3 | build-backend = "hatchling.build" 4 | 5 | [tool.hatch.build] 6 | directory = "dist" 7 | 8 | [tool.hatch.build.targets.sdist] 9 | include = ["music_kraken/*.py", "music_kraken/**/*.py" ] 10 | 11 | [tool.hatch.build.targets.wheel] 12 | packages = ["music_kraken"] 13 | 14 | [project.scripts] 15 | music-kraken = "music_kraken.__main__:cli" 16 | 17 | [tool.hatch.version] 18 | source = "vcs" 19 | path = "music_kraken/_version.py" 20 | fallback-version = "0.0.0" 21 | 22 | [tool.hatch.version.raw-options] 23 | local_scheme = "no-local-version" 24 | 25 | [tool.hatch.build.hooks.vcs] 26 | version-file = "music_kraken/_version.py" 27 | 28 | [project] 29 | name = "music-kraken" 30 | description = "An extensive music downloader crawling the internet. It gets its metadata from a couple of metadata providers, and it scrapes the audiofiles." 31 | authors = [{ name = "Hellow2", email = "hazel_is_cute@proton.me" }] 32 | license = "AGPL-3.0-or-later" 33 | readme = "README.md" 34 | repository = "https://github.com/HeIIow2/music-downloader" 35 | requires-python = ">=3.10" 36 | classifiers = [ 37 | "Development Status :: 4 - Beta", 38 | "Environment :: Console", 39 | "Intended Audience :: End Users/Desktop", 40 | "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", 41 | "Natural Language :: English", 42 | "Operating System :: OS Independent", 43 | "Programming Language :: Python :: 3.10", 44 | "Topic :: Multimedia :: Sound/Audio", 45 | "Topic :: Utilities", 46 | ] 47 | dependencies = [ 48 | "requests~=2.31.0", 49 | "responses~=0.24.1", 50 | "beautifulsoup4~=4.11.1", 51 | 52 | "pyffmpeg~=2.4.2.18.1", 53 | "ffmpeg-progress-yield~=0.7.8", 54 | "mutagen~=1.46.0", 55 | "pillow~=10.3.0", 56 | 57 | "rich~=13.7.1", 58 | "mistune~=3.0.2", 59 | "markdownify~=0.12.1", 60 | "html2markdown~=0.1.7", 61 | "jellyfish~=0.9.0", 62 | "transliterate~=1.10.2", 63 | "pycountry~=23.12.11", 64 | 65 | "python-dotenv~=1.0.1", 66 | "tqdm~=4.65.0", 67 | "platformdirs~=4.2.0", 68 | "pathvalidate~=2.5.2", 69 | "toml~=0.10.2", 70 | "typing_extensions~=4.7.1", 71 | 72 | "python-sponsorblock~=0.1", 73 | "youtube_dl", 74 | ] 75 | dynamic = [ 76 | "version" 77 | ] 78 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | build 2 | twine 3 | hatch 4 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kur01234/music-downloader/665bf0e47517071450b6f25dffbb81fe21944ead/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_collection.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from music_kraken.objects import Song, Album, Artist, Collection, Country 4 | 5 | class TestCollection(unittest.TestCase): 6 | def test_song_contains_album(self): 7 | """ 8 | Tests that every song contains the album it is added to in its album_collection 9 | """ 10 | 11 | a_1 = Album( 12 | title="album", 13 | song_list= [ 14 | Song(title="song"), 15 | ] 16 | ) 17 | a_2 = a_1.song_collection[0].album_collection[0] 18 | self.assertTrue(a_1.id == a_2.id) 19 | 20 | def test_album_contains_song(self): 21 | """ 22 | Tests that every album contains the song it is added to in its song_collection 23 | """ 24 | s_1 = Song( 25 | title="song", 26 | album_list=[ 27 | Album(title="album"), 28 | ] 29 | ) 30 | s_2 = s_1.album_collection[0].song_collection[0] 31 | self.assertTrue(s_1.id == s_2.id) 32 | 33 | 34 | def test_auto_add_artist_to_album_feature_artist(self): 35 | """ 36 | Tests that every artist is added to the album's feature_artist_collection per default 37 | """ 38 | 39 | a_1 = Artist( 40 | name="artist", 41 | album_list=[ 42 | Album(title="album") 43 | ] 44 | ) 45 | a_2 = a_1.album_collection[0].feature_artist_collection[0] 46 | 47 | self.assertTrue(a_1.id == a_2.id) 48 | 49 | def test_auto_add_artist_to_album_feature_artist_push(self): 50 | """ 51 | Tests that every artist is added to the album's feature_artist_collection per default but pulled into the album's artist_collection if a merge exitst 52 | """ 53 | 54 | a_1 = Artist( 55 | name="artist", 56 | album_list=[ 57 | Album( 58 | title="album", 59 | artist_list=[ 60 | Artist(name="artist"), 61 | ] 62 | ) 63 | ] 64 | ) 65 | a_2 = a_1.album_collection[0].artist_collection[0] 66 | 67 | self.assertTrue(a_1.id == a_2.id) 68 | 69 | 70 | def test_artist_artist_relation(self): 71 | """ 72 | Tests the proper syncing between album.artist_collection and song.artist_collection 73 | """ 74 | 75 | album = Album( 76 | title="album", 77 | song_list=[ 78 | Song(title="song"), 79 | ], 80 | artist_list=[ 81 | Artist(name="artist"), 82 | ] 83 | ) 84 | a_1 = album.artist_collection[0] 85 | a_2 = album.song_collection[0].artist_collection[0] 86 | 87 | self.assertTrue(a_1.id == a_2.id) 88 | 89 | def test_artist_collection_sync(self): 90 | """ 91 | tests the actual implementation of the test above 92 | """ 93 | 94 | album_1 = Album( 95 | title="album", 96 | song_list=[ 97 | Song(title="song", artist_list=[Artist(name="artist")]), 98 | ], 99 | artist_list=[ 100 | Artist(name="artist"), 101 | ] 102 | ) 103 | 104 | album_2 = Album( 105 | title="album", 106 | song_list=[ 107 | Song(title="song", artist_list=[Artist(name="artist")]), 108 | ], 109 | artist_list=[ 110 | Artist(name="artist"), 111 | ] 112 | ) 113 | 114 | album_1.merge(album_2) 115 | 116 | self.assertTrue(id(album_1.artist_collection) == id(album_1.artist_collection) == id(album_1.song_collection[0].artist_collection) == id(album_1.song_collection[0].artist_collection)) 117 | 118 | if __name__ == "__main__": 119 | unittest.main() 120 | -------------------------------------------------------------------------------- /tests/test_hash_url.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from music_kraken.utils.string_processing import hash_url 4 | 5 | 6 | class TestCollection(unittest.TestCase): 7 | def test_remove_schema(self): 8 | self.assertFalse(hash_url("https://www.youtube.com/watch?v=3jZ_D3ELwOQ").startswith("https")) 9 | self.assertFalse(hash_url("ftp://www.youtube.com/watch?v=3jZ_D3ELwOQ").startswith("https")) 10 | self.assertFalse(hash_url("sftp://www.youtube.com/watch?v=3jZ_D3ELwOQ").startswith("https")) 11 | self.assertFalse(hash_url("http://www.youtube.com/watch?v=3jZ_D3ELwOQ").startswith("https")) 12 | 13 | def test_no_punctuation(self): 14 | self.assertNotIn(hash_url("https://www.you_tube.com/watch?v=3jZ_D3ELwOQ"), "you_tube") 15 | self.assertNotIn(hash_url("https://docs.gitea.com/next/install.ation/comparison"), ".") 16 | 17 | def test_three_parts(self): 18 | """ 19 | The url is parsed into three parts [netloc; path; query] 20 | Which are then appended to each other with an underscore between. 21 | """ 22 | 23 | self.assertTrue(hash_url("https://duckduckgo.com/?t=h_&q=dfasf&ia=web").count("_") == 2) 24 | 25 | def test_sort_query(self): 26 | """ 27 | The query is sorted alphabetically 28 | """ 29 | hashed = hash_url("https://duckduckgo.com/?t=h_&q=dfasf&ia=web") 30 | sorted_keys = ["ia-", "q-", "t-"] 31 | 32 | self.assertTrue(hashed.index(sorted_keys[0]) < hashed.index(sorted_keys[1]) < hashed.index(sorted_keys[2])) 33 | 34 | if __name__ == "__main__": 35 | unittest.main() 36 | --------------------------------------------------------------------------------