> p & 1 for x, y in zip(samples[::2], samples[1::2]))
86 | for p in range(16, 24)
87 | )
88 |
89 | if any(s.find(MAGIC) for s in streams):
90 | return True
91 | return False
92 |
--------------------------------------------------------------------------------
/salmon/checks/upconverts.py:
--------------------------------------------------------------------------------
1 | import math
2 | import os
3 | import re
4 | import subprocess
5 |
6 | import click
7 | import mutagen
8 |
9 | from salmon.errors import NotAValidInputFile
10 |
11 |
12 | def upload_upconvert_test(path):
13 | any_upconverts = test_upconverted(path)
14 | if any_upconverts:
15 | if click.confirm(
16 | click.style(
17 | "Possible upconverts detected. Would you like to quit uploading?",
18 | fg="red",
19 | ),
20 | default=True,
21 | ):
22 | raise click.Abort
23 | else:
24 | click.secho(
25 | click.style(
26 | "No upconverts detected (This is not a 100 percent accurate process).",
27 | fg="green",
28 | ),
29 | )
30 |
31 |
32 | def test_upconverted(path):
33 | if os.path.isfile(path):
34 | return _upconvert_check_handler(path)
35 | elif os.path.isdir(path):
36 | any_upconverts = False
37 | for root, _, figles in os.walk(path):
38 | for f in figles:
39 | if f.lower().endswith(".flac"):
40 | filepath = os.path.join(root, f)
41 | click.secho(f"\nChecking {filepath}...", fg="cyan")
42 | if _upconvert_check_handler(filepath):
43 | any_upconverts = True
44 | return any_upconverts
45 |
46 |
47 | def _upconvert_check_handler(filepath):
48 | try:
49 | upconv, wasted_bits, bitdepth = check_upconvert(filepath)
50 | except NotAValidInputFile as e:
51 | click.secho(str(e), fg="yellow")
52 | else:
53 | if upconv:
54 | click.secho(
55 | "This file is likely upconverted from a file of a lesser bitdepth. "
56 | f"Wasted bits: {wasted_bits}/{bitdepth}",
57 | fg="red",
58 | bold=True,
59 | )
60 | else:
61 | click.secho(
62 | f"This file does not have a high number of wasted bits. "
63 | f"Wasted bits: {wasted_bits}/{bitdepth}",
64 | fg="green",
65 | )
66 | return upconv
67 |
68 |
69 | def check_upconvert(filepath):
70 | try:
71 | mut = mutagen.File(filepath)
72 | bitdepth = mut.info.bits_per_sample
73 | except AttributeError:
74 | raise NotAValidInputFile("This is not a FLAC file.")
75 |
76 | if bitdepth == 16:
77 | raise NotAValidInputFile("This is a 16bit FLAC file.")
78 |
79 | with open(os.devnull, "w") as devnull:
80 | response = subprocess.check_output(
81 | ["flac", "-ac", filepath], stderr=devnull
82 | ).decode("utf-8")
83 |
84 | wasted_bits_list = []
85 | for line in response.split("\n"):
86 | r = re.search(r"wasted_bits=(\d+)", line)
87 | if r:
88 | wasted_bits_list.append(int(r[1]))
89 |
90 | wasted_bits = math.ceil(sum(wasted_bits_list) / len(wasted_bits_list))
91 | if wasted_bits >= 8:
92 | return True, wasted_bits, bitdepth
93 | else:
94 | return False, wasted_bits, bitdepth
95 |
--------------------------------------------------------------------------------
/salmon/common/__init__.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import sys
3 |
4 | import click
5 | from requests import RequestException
6 |
7 | from salmon.common.aliases import AliasedCommands # noqa: F401
8 | from salmon.common.constants import RE_FEAT # noqa: F401
9 | from salmon.common.figles import ( # noqa: F401
10 | compress,
11 | create_relative_path,
12 | get_audio_files,
13 | alac_to_flac,
14 | )
15 | from salmon.common.regexes import ( # noqa: F401
16 | parse_copyright,
17 | re_split,
18 | re_strip,
19 | )
20 | from salmon.common.strings import ( # noqa: F401
21 | fetch_genre,
22 | less_uppers,
23 | make_searchstrs,
24 | normalize_accents,
25 | strip_template_keys,
26 | truncate,
27 | format_size,
28 | )
29 | from salmon.errors import ScrapeError
30 |
31 | loop = asyncio.get_event_loop()
32 |
33 |
34 | @click.group(
35 | context_settings=dict(help_option_names=["-h", "--help"]), cls=AliasedCommands
36 | )
37 | def commandgroup():
38 | pass
39 |
40 |
41 | class Prompt:
42 | # https://stackoverflow.com/a/35514777
43 |
44 | def __init__(self):
45 | self.q = asyncio.Queue()
46 | self.reader_added = False
47 |
48 | def got_input(self):
49 | asyncio.create_task(self.q.put(sys.stdin.readline()))
50 |
51 | async def __call__(self, msg, end="\n", flush=False):
52 | if not self.reader_added:
53 | loop.add_reader(sys.stdin, self.got_input)
54 | self.reader_added = True
55 | print(msg, end=end, flush=flush)
56 | return (await self.q.get()).rstrip("\n")
57 |
58 |
59 | prompt_async = Prompt()
60 |
61 |
62 | def flush_stdin():
63 | try:
64 | from termios import tcflush, TCIOFLUSH
65 |
66 | tcflush(sys.stdin, TCIOFLUSH)
67 | except: # noqa E722
68 | try:
69 | import msvcrt
70 |
71 | while msvcrt.kbhit():
72 | msvcrt.getch()
73 | except: # noqa E722
74 | pass
75 |
76 |
77 | def str_to_int_if_int(string, zpad=False):
78 | if string.isdigit():
79 | if zpad:
80 | return f"{int(string):02d}"
81 | return int(string)
82 | return string
83 |
84 |
85 | async def handle_scrape_errors(task, mute=False):
86 | try:
87 | return await task
88 | except (ScrapeError, RequestException, KeyError) as e:
89 | if not mute:
90 | click.secho(f"Error message: {e}", fg="red", bold=True)
91 |
--------------------------------------------------------------------------------
/salmon/common/aliases.py:
--------------------------------------------------------------------------------
1 | import click
2 |
3 | COMMAND_ALIASES = {
4 | "list": "ls",
5 | "upl": "up",
6 | "upload": "up",
7 | "down": "dl",
8 | "download": "dl",
9 | "delete": "rm",
10 | "del": "rm",
11 | "remove": "rm",
12 | }
13 |
14 |
15 | class AliasedCommands(click.Group):
16 | def get_command(self, ctx, cmd_name):
17 | rv = click.Group.get_command(self, ctx, cmd_name)
18 | if rv is not None:
19 | return rv
20 | try:
21 | return click.Group.get_command(self, ctx, COMMAND_ALIASES[cmd_name])
22 | except KeyError:
23 | return None
24 |
--------------------------------------------------------------------------------
/salmon/common/constants.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | SPLIT_CHARS = (
4 | r" \ ",
5 | "/",
6 | "; ",
7 | " & ",
8 | ", ",
9 | )
10 |
11 | COPYRIGHT_SEARCHES = (
12 | r"marketed by (.+?) under",
13 | r"(?:, )?under(?: exclusive)? licen(?:s|c)e to ([^,]+)",
14 | r"d/b/a (.+)",
15 | )
16 |
17 | COPYRIGHT_SUBS = (
18 | r".*(℗|©|\([pc]\))+",
19 | r"^(19|20)\d{2}",
20 | r"(, )?a division of.+",
21 | r"(, )?a .+company.+",
22 | r"all rights reserved.*",
23 | r"(,? )?llc",
24 | r"(,? )ltd",
25 | r"distributed by.+",
26 | r" inc.+$",
27 | r", a division of.+",
28 | r" +for the.+",
29 | r"[,\.]$",
30 | r"^ *, *",
31 | r"^Copyright ",
32 | r"(- )?(an )?imprint of.+",
33 | r"\d+ records dk2?",
34 | )
35 |
36 |
37 | RE_FEAT = re.compile(
38 | r" [\(\[\{]?(?:f(?:ea)?t(?:uring)?\.?|with\.) ([^\)\]\}]+)[\)\]\}]?",
39 | flags=re.IGNORECASE,
40 | )
41 | _RE_SPLIT = re.compile("|".join(re.escape(s) for s in SPLIT_CHARS))
42 |
--------------------------------------------------------------------------------
/salmon/common/figles.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 |
4 | from salmon import config
5 |
6 |
7 | def get_audio_files(path):
8 | """
9 | Iterate over a path and return all the files that match the allowed
10 | audio file extensions.
11 | """
12 | files = []
13 | for root, folders, files_ in os.walk(path):
14 | files += [
15 | create_relative_path(root, path, f)
16 | for f in files_
17 | if os.path.splitext(f.lower())[1] in {".flac", ".mp3", ".m4a"}
18 | ]
19 | return sorted(files)
20 |
21 |
22 | def create_relative_path(root, path, filename):
23 | """
24 | Create a relative path to a filename. For example, given:
25 | root = '/home/xxx/Tidal/Album/Disc 1'
26 | path = '/home/xxx/Tidal/Album'
27 | filename = '01. Track.flac'
28 | 'Disc 1/01. Track.flac' would be returned.
29 | """
30 | return os.path.join(
31 | root.split(path, 1)[1][1:], filename
32 | ) # [1:] to get rid of the slash.
33 |
34 |
35 | def compress(filepath):
36 | """Re-compress a .flac file with the configured level."""
37 | with open(os.devnull, "w") as devnull:
38 | subprocess.call(
39 | [
40 | "flac",
41 | f"-{config.FLAC_COMPRESSION_LEVEL}",
42 | filepath,
43 | "-o",
44 | f"{filepath}.new",
45 | "--delete-input-file",
46 | ],
47 | stdout=devnull,
48 | stderr=devnull,
49 | )
50 | os.rename(f"{filepath}.new", filepath)
51 |
52 |
53 | def alac_to_flac(filepath):
54 | """Convert alac to flac"""
55 | with open(os.devnull, "w") as devnull:
56 | subprocess.call(
57 | [
58 | "ffmpeg",
59 | # "-y",
60 | "-i",
61 | filepath,
62 | "-acodec",
63 | "flac",
64 | f"{filepath}.flac",
65 | # "--delete-input-file",
66 | ],
67 | stdout=devnull,
68 | stderr=devnull,
69 | )
70 | os.rename(f"{filepath}.flac", filepath)
71 |
--------------------------------------------------------------------------------
/salmon/common/regexes.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from salmon.common.constants import (
4 | _RE_SPLIT,
5 | COPYRIGHT_SEARCHES,
6 | COPYRIGHT_SUBS,
7 | )
8 |
9 |
10 | def re_strip(*strs, filter_nonscrape=True):
11 | """Returns a joined string with non-alphanumerical characters stripped out."""
12 | str_ = " ".join(re.sub(r"[/\-\\,]", " ", (s or "").lower()) for s in strs)
13 | while " " in str_:
14 | str_ = str_.replace(" ", " ")
15 | if filter_nonscrape:
16 | return re.sub(r"[\.\(\)]", "", str_)
17 | return str_
18 |
19 |
20 | def re_split(stri):
21 | """
22 | Return a list of strings split based on characters commonly utilized
23 | as separators stored in a constant.
24 | """
25 | return [s.strip() for s in _RE_SPLIT.split(stri) if s.strip()]
26 |
27 |
28 | def parse_copyright(copyright):
29 | """
30 | Filter out a bunch of shit from the copyright fields provided on iTunes
31 | and Tidal pages. Their copyright info does not always accurately represent
32 | the label, but it's the best we can do.
33 | """
34 | if not copyright:
35 | return ""
36 | for search in COPYRIGHT_SEARCHES:
37 | res = re.search(search, copyright, flags=re.IGNORECASE)
38 | if res:
39 | copyright = res[1]
40 | for sub in COPYRIGHT_SUBS:
41 | copyright = re.sub(sub, "", copyright, flags=re.IGNORECASE).strip()
42 | # In case labels are being combined with /, take only the first one.
43 | if "/" in copyright:
44 | copyright = copyright.split("/")[0].strip()
45 | return copyright or None
46 |
--------------------------------------------------------------------------------
/salmon/common/strings.py:
--------------------------------------------------------------------------------
1 | import re
2 | import unicodedata
3 |
4 | from salmon.common.regexes import re_strip
5 | from salmon.constants import GENRE_LIST
6 | from salmon.errors import GenreNotInWhitelist
7 |
8 |
9 | def make_searchstrs(artists, album, normalize=False):
10 | artists = [a for a, i in artists if i == "main"]
11 | album = album or ""
12 | album = re.sub(r" ?(- )? (EP|Single)", "", album)
13 | album = re.sub(r"\(?[Ff]eat(\.|uring)? [^\)]+\)?", "", album)
14 |
15 | if len(artists) > 3 or (artists and any("Various" in a for a in artists)):
16 | search = re_strip(album, filter_nonscrape=False)
17 | elif len(artists) == 1:
18 | search = re_strip(artists[0], album, filter_nonscrape=False)
19 | elif len(artists) <= 3:
20 | search = [re_strip(art, album, filter_nonscrape=False) for art in artists]
21 | return normalize_accents(*search) if normalize else search
22 | return [normalize_accents(search) if normalize else search]
23 |
24 |
25 | def normalize_accents(*strs):
26 | return_strings = []
27 | for str_ in strs:
28 | nkfd_form = unicodedata.normalize("NFKD", str_)
29 | return_strings.append(
30 | "".join(c for c in nkfd_form if not unicodedata.combining(c))
31 | )
32 | if not return_strings:
33 | return ""
34 | return return_strings if len(return_strings) > 1 else return_strings[0]
35 |
36 |
37 | def less_uppers(one, two):
38 | """Return the string with less uppercase letters."""
39 | one_count = sum(1 for c in one if c.islower())
40 | two_count = sum(1 for c in two if c.islower())
41 | return one if one_count >= two_count else two
42 |
43 |
44 | def strip_template_keys(template, key):
45 | """Strip all unused brackets from the folder name."""
46 | folder = re.sub(r" *[\[{\(]*{" + key + r"}[\]}\)]* *", " ", template).strip()
47 | return re.sub(r" *- *$", "", folder)
48 |
49 |
50 | def fetch_genre(genre):
51 | key_search = re.sub(r"[^a-z]", "", genre.lower().replace("&", "and"))
52 | try:
53 | return GENRE_LIST[key_search]
54 | except KeyError:
55 | raise GenreNotInWhitelist
56 |
57 |
58 | def truncate(string, length):
59 | if len(string) < length:
60 | return string
61 | return f"{string[:length - 3]}..."
62 |
63 |
64 | def format_size(num, suffix='B'):
65 | for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
66 | if abs(num) < 1024.0:
67 | return "%3.1f %s%s" % (num, unit, suffix)
68 | num /= 1024.0
69 | return "%.1f %s%s" % (num, 'Yi', suffix)
70 |
--------------------------------------------------------------------------------
/salmon/converter/__init__.py:
--------------------------------------------------------------------------------
1 | import click
2 |
3 | from salmon.common import commandgroup
4 | from salmon.converter.downconverting import convert_folder
5 | from salmon.converter.transcoding import transcode_folder
6 |
7 | VALID_TRANSCODE_BITRATES = ["V0", "320"]
8 |
9 |
10 | def validate_bitrate(ctx, param, value):
11 | if value.upper() in VALID_TRANSCODE_BITRATES:
12 | return value.upper()
13 | else:
14 | raise click.BadParameter(
15 | f"{value} is not a valid bitrate. Valid bitrates are: "
16 | + ", ".join(VALID_TRANSCODE_BITRATES)
17 | )
18 |
19 |
20 | @commandgroup.command()
21 | @click.argument(
22 | "path", type=click.Path(exists=True, file_okay=False, resolve_path=True), nargs=1
23 | )
24 | @click.option(
25 | "--bitrate",
26 | "-b",
27 | type=click.STRING,
28 | callback=validate_bitrate,
29 | required=True,
30 | help=f'Bitrate to transcode to ({", ".join(VALID_TRANSCODE_BITRATES)})',
31 | )
32 | def transcode(path, bitrate):
33 | """Transcode a dir of FLACs into "perfect" MP3"""
34 | transcode_folder(path, bitrate)
35 |
36 |
37 | @commandgroup.command()
38 | @click.argument(
39 | "path", type=click.Path(exists=True, file_okay=False, resolve_path=True), nargs=1
40 | )
41 | def downconv(path):
42 | """Downconvert a dir of 24bit FLACs to 16bit"""
43 | convert_folder(path)
44 |
--------------------------------------------------------------------------------
/salmon/converter/downconverting.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import shlex
4 | import subprocess
5 | import time
6 | from copy import copy
7 | from shutil import copyfile
8 |
9 | import click
10 |
11 | from salmon import config
12 | from salmon.errors import InvalidSampleRate
13 | from salmon.tagger.audio_info import gather_audio_info
14 |
15 | THREADS = [None] * config.SIMULTANEOUS_CONVERSIONS
16 | COMMAND = "sox {input_} -G -b 16 {output} rate -v -L {rate} dither"
17 | FLAC_FOLDER_REGEX = re.compile(r"(24 ?bit )?FLAC", flags=re.IGNORECASE)
18 |
19 |
20 | def convert_folder(path):
21 | new_path = _generate_conversion_path_name(path)
22 | if os.path.isdir(new_path):
23 | return click.secho(
24 | f"{new_path} already exists, please delete it to re-convert.", fg="red"
25 | )
26 |
27 | files_convert, files_copy = _determine_files_actions(path)
28 | _convert_files(path, new_path, files_convert, files_copy)
29 |
30 |
31 | def _determine_files_actions(path):
32 | convert_files = []
33 | copy_files = [os.path.join(r, f) for r, _, files in os.walk(path) for f in files]
34 | audio_info = gather_audio_info(path)
35 | for figle in copy(copy_files):
36 | for info_figle, figle_info in audio_info.items():
37 | if figle.endswith(info_figle) and figle_info["precision"] == 24:
38 | convert_files.append((figle, figle_info["sample rate"]))
39 | copy_files.remove(figle)
40 | return convert_files, copy_files
41 |
42 |
43 | def _generate_conversion_path_name(path):
44 | foldername = os.path.basename(path)
45 | if re.search("24 ?bit FLAC", foldername, flags=re.IGNORECASE):
46 | foldername = re.sub("24 ?bit FLAC", "FLAC", foldername, flags=re.IGNORECASE)
47 | elif re.search("FLAC", foldername, flags=re.IGNORECASE):
48 | foldername = re.sub("FLAC", "16bit FLAC", foldername, flags=re.IGNORECASE)
49 | else:
50 | foldername += " [FLAC]"
51 |
52 | return os.path.join(os.path.dirname(path), foldername)
53 |
54 |
55 | def _convert_files(old_path, new_path, files_convert, files_copy):
56 | files_left = len(files_convert) - 1
57 | files = iter(files_convert)
58 |
59 | for file_ in files_copy:
60 | output = file_.replace(old_path, new_path)
61 | _create_path(output)
62 | copyfile(file_, output)
63 | click.secho(f"Copied {os.path.basename(file_)}")
64 |
65 | converting = True
66 | while converting:
67 | converting = False
68 | for i, thread in enumerate(THREADS):
69 | if thread and thread.poll() is not None:
70 | if thread.poll() != 0:
71 | click.secho(
72 | f"Error downconverting a file, error {thread.poll()}:", fg="red"
73 | )
74 | click.secho(thread.communicate()[1].decode("utf-8", "ignore"))
75 | raise click.Abort
76 | try:
77 | thread.kill()
78 | except: # noqa: E722
79 | pass
80 |
81 | if not thread or thread.poll() is not None:
82 | try:
83 | file_, sample_rate = next(files)
84 | except StopIteration:
85 | break
86 |
87 | output = file_.replace(old_path, new_path)
88 | THREADS[i] = _convert_single_file(
89 | file_, output, sample_rate, files_left
90 | )
91 | files_left -= 1
92 | converting = True
93 | time.sleep(0.1)
94 |
95 |
96 | def _convert_single_file(file_, output, sample_rate, files_left):
97 | click.echo(f"Converting {os.path.basename(file_)} [{files_left} left to convert]")
98 | _create_path(output)
99 | command = COMMAND.format(
100 | input_=shlex.quote(file_),
101 | output=shlex.quote(output),
102 | rate=_get_final_sample_rate(sample_rate),
103 | )
104 | return subprocess.Popen(
105 | command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
106 | )
107 |
108 |
109 | def _create_path(filepath):
110 | p = os.path.dirname(filepath)
111 | if not os.path.isdir(p):
112 | try:
113 | os.makedirs(p)
114 | except FileExistsError:
115 | pass
116 |
117 |
118 | def _get_final_sample_rate(sample_rate):
119 | if sample_rate % 44100 == 0:
120 | return 44100
121 | elif sample_rate % 48000 == 0:
122 | return 48000
123 | raise InvalidSampleRate
124 |
--------------------------------------------------------------------------------
/salmon/converter/transcoding.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import shlex
4 | import subprocess
5 | import time
6 | from shutil import copyfile
7 |
8 | import click
9 | import mutagen
10 |
11 | from salmon import config
12 |
13 | THREADS = [None] * config.SIMULTANEOUS_CONVERSIONS
14 | COMMANDS = {
15 | "320": "ffmpeg -i {input_} -acodec libmp3lame -ab 320k {output}",
16 | "V0": "flac --decode --stdout {input_} | lame -V 0 -q --add-id3v2 "
17 | "--tt {tt} --ta {ta} --ty {ty} --tn {tn} --tl {tl} --tc {tc} --tg {tg} "
18 | "--tv TPUB={label} "
19 | "- {output}",
20 | }
21 | FLAC_FOLDER_REGEX = re.compile(r"(24 ?bit )?FLAC", flags=re.IGNORECASE)
22 | LOSSLESS_FOLDER_REGEX = re.compile(r"Lossless", flags=re.IGNORECASE)
23 | LOSSY_EXTENSION_LIST = {
24 | ".mp3",
25 | ".m4a", # Fuck ALAC.
26 | ".ogg",
27 | ".opus",
28 | }
29 |
30 |
31 | def transcode_folder(path, bitrate):
32 | _validate_folder_is_lossless(path)
33 | new_path = _generate_transcode_path_name(path, bitrate)
34 | if os.path.isdir(new_path):
35 | return click.secho(
36 | f"{new_path} already exists, please delete it to re-transcode.", fg="red"
37 | )
38 | _transcode_files(path, new_path, bitrate)
39 |
40 |
41 | def _validate_folder_is_lossless(path):
42 | for root, _, files in os.walk(path):
43 | for f in files:
44 | ext = os.path.splitext(f)[1].lower()
45 | if ext in LOSSY_EXTENSION_LIST:
46 | click.secho(f"A lossy file was found in the folder ({f}).", fg="red")
47 | raise click.Abort
48 |
49 |
50 | def _get_files_to_handle(path):
51 | files_to_handle = []
52 | for root, _, files in os.walk(path):
53 | for f in files:
54 | files_to_handle.append(os.path.join(root, f))
55 | return files_to_handle
56 |
57 |
58 | def _generate_transcode_path_name(path, bitrate):
59 | to_append = []
60 | foldername = os.path.basename(path)
61 | if FLAC_FOLDER_REGEX.search(foldername):
62 | foldername = FLAC_FOLDER_REGEX.sub("MP3", foldername)
63 | else:
64 | to_append.append("MP3")
65 | if LOSSLESS_FOLDER_REGEX.search(foldername):
66 | foldername = LOSSLESS_FOLDER_REGEX.sub(bitrate, foldername)
67 | else:
68 | to_append.append(bitrate)
69 |
70 | if to_append:
71 | foldername += f' [{" ".join(to_append)}]'
72 |
73 | return os.path.join(os.path.dirname(path), foldername)
74 |
75 |
76 | def _transcode_files(old_path, new_path, bitrate):
77 | files = _get_files_to_handle(old_path)
78 | files_left = len([f for f in files if f.lower().endswith(".flac")]) - 1
79 | files = iter(sorted(files))
80 |
81 | transcoding = True
82 | while transcoding:
83 | transcoding = False
84 | for i, thread in enumerate(THREADS):
85 | if thread and thread.poll() is not None:
86 | if thread.poll() != 0:
87 | click.secho(
88 | f"Error transcoding a file, error {thread.poll()}:", fg="red"
89 | )
90 | click.secho(thread.communicate()[1].decode("utf-8", "ignore"))
91 | raise click.Abort
92 | try:
93 | thread.kill()
94 | except: # noqa: E722
95 | pass
96 |
97 | if not thread or thread.poll() is not None:
98 | try:
99 | file_ = next(files)
100 | except StopIteration:
101 | break
102 |
103 | output = file_.replace(old_path, new_path)
104 | if file_.lower().endswith(".flac"):
105 | output = re.sub(r".flac$", ".mp3", output, flags=re.IGNORECASE)
106 | THREADS[i] = _transcode_single_file(
107 | file_, output, bitrate, files_left
108 | )
109 | files_left -= 1
110 | else:
111 | _create_path(output)
112 | copyfile(file_, output)
113 | click.secho(f"Copied {os.path.basename(file_)}")
114 | transcoding = True
115 | time.sleep(0.1)
116 |
117 |
118 | def _transcode_single_file(file_, output, bitrate, files_left):
119 | click.echo(
120 | f"Transcoding {os.path.basename(file_)} [{files_left} left to transcode]"
121 | )
122 | _create_path(output)
123 | try:
124 | command = COMMANDS[bitrate].format(
125 | input_=shlex.quote(file_), output=shlex.quote(output)
126 | )
127 | except KeyError:
128 | command = COMMANDS[bitrate].format(
129 | input_=shlex.quote(file_), output=shlex.quote(output), **_get_tags(file_)
130 | )
131 | return subprocess.Popen(
132 | command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
133 | )
134 |
135 |
136 | def _create_path(filepath):
137 | p = os.path.dirname(filepath)
138 | if not os.path.isdir(p):
139 | try:
140 | os.makedirs(p)
141 | except FileExistsError:
142 | pass
143 |
144 |
145 | def _get_tags(file_):
146 | tags = {}
147 | tag_assignments = {
148 | "tt": ["title"],
149 | "ta": ["artist"],
150 | "tl": ["album"],
151 | "ty": ["date", "year"],
152 | "tn": ["tracknumber"],
153 | "tc": ["comment"],
154 | "tg": ["genre"],
155 | "label": ["label"],
156 | }
157 |
158 | track = mutagen.File(file_)
159 | for key, tag_keys in tag_assignments.items():
160 | for tag_key in tag_keys:
161 | try:
162 | tags[key] = shlex.quote(track.tags[tag_key][0])
163 | except (KeyError, IndexError):
164 | if key not in tags or tags[key] != "''":
165 | tags[key] = "''"
166 | return tags
167 |
--------------------------------------------------------------------------------
/salmon/database.py:
--------------------------------------------------------------------------------
1 | import sqlite3
2 | from os import listdir, path
3 |
4 | import click
5 |
6 | from salmon.common import commandgroup
7 |
8 | DB_PATH = path.abspath(path.join(path.dirname(path.dirname(__file__)), "smoked.db"))
9 | MIG_DIR = path.abspath(path.join(path.dirname(path.dirname(__file__)), "migrations"))
10 |
11 |
12 | @commandgroup.command()
13 | @click.option(
14 | "--list", "-l", is_flag=True, help="List migrations instead of migrating."
15 | )
16 | def migrate(list):
17 | """Migrate database to newest version"""
18 | if list:
19 | list_migrations()
20 | return
21 |
22 | current_version = get_current_version()
23 | ran_once = False
24 | with sqlite3.connect(DB_PATH) as conn:
25 | for migration in sorted(f for f in listdir(MIG_DIR) if f.endswith(".sql")):
26 | try:
27 | mig_version = int(migration[:4])
28 | except TypeError:
29 | click.secho(
30 | f"\n{migration} is improperly named. It must start with "
31 | "a four digit integer.",
32 | fg="red",
33 | )
34 | raise click.Abort
35 |
36 | if mig_version > current_version:
37 | ran_once = True
38 | click.secho(f"Running {migration}...")
39 | cursor = conn.cursor()
40 | with open(path.join(MIG_DIR, migration), "r") as mig_file:
41 | cursor.executescript(mig_file.read())
42 | cursor.execute(
43 | "INSERT INTO version (id) VALUES (?)", (mig_version,)
44 | )
45 | conn.commit()
46 | cursor.close()
47 |
48 | if not ran_once:
49 | click.secho("You are already caught up with all migrations.", fg="green")
50 |
51 |
52 | def list_migrations():
53 | """List migration history and current status"""
54 | current_version = get_current_version()
55 | for migration in sorted(f for f in listdir(MIG_DIR) if f.endswith(".sql")):
56 | try:
57 | mig_version = int(migration[:4])
58 | except TypeError:
59 | click.secho(
60 | f"\n{migration} is improperly named. It must start with a "
61 | "four digit integer.",
62 | fg="red",
63 | )
64 | raise click.Abort
65 |
66 | if mig_version == current_version:
67 | click.secho(f"{migration} (CURRENT)", fg="cyan", bold=True)
68 | else:
69 | click.echo(migration)
70 |
71 | if not current_version:
72 | click.secho(
73 | "\nYou have not yet ran a migration. Catch your database up with "
74 | "./run.py migrate",
75 | fg="magenta",
76 | bold=True,
77 | )
78 |
79 |
80 | def get_current_version():
81 | if not path.isfile(DB_PATH):
82 | return 0
83 | with sqlite3.connect(DB_PATH) as conn:
84 | cursor = conn.cursor()
85 | try:
86 | cursor.execute("SELECT MAX(id) from version")
87 | except sqlite3.OperationalError:
88 | return 0
89 | return cursor.fetchone()[0]
90 |
91 |
92 | def check_if_migration_is_needed():
93 | current_version = get_current_version()
94 | most_recent_mig = sorted(f for f in listdir(MIG_DIR) if f.endswith(".sql"))[-1:][0]
95 | try:
96 | mig_version = int(most_recent_mig[:4])
97 | except TypeError:
98 | click.secho(
99 | f"\n{most_recent_mig} is improperly named. It must start with a "
100 | "four digit integer.",
101 | fg="red",
102 | )
103 | raise click.Abort
104 | if mig_version > current_version:
105 | click.secho(
106 | "The database needs updating. Please run `salmon migrate`.\n",
107 | fg="red",
108 | bold=True,
109 | )
110 |
111 |
112 | check_if_migration_is_needed()
113 |
--------------------------------------------------------------------------------
/salmon/errors.py:
--------------------------------------------------------------------------------
1 | class ScrapeError(Exception):
2 | def __init__(self, message, payload=None):
3 | self.payload = payload
4 | super().__init__(self, message)
5 |
6 |
7 | class AbortAndDeleteFolder(Exception):
8 | pass
9 |
10 |
11 | class DownloadError(Exception):
12 | pass
13 |
14 |
15 | class UploadError(Exception):
16 | pass
17 |
18 |
19 | class FilterError(Exception):
20 | pass
21 |
22 |
23 | class TrackCombineError(Exception):
24 | pass
25 |
26 |
27 | class SourceNotFoundError(Exception):
28 | pass
29 |
30 |
31 | class InvalidMetadataError(Exception):
32 | pass
33 |
34 |
35 | class ImageUploadFailed(Exception):
36 | pass
37 |
38 |
39 | class InvalidSampleRate(Exception):
40 | pass
41 |
42 |
43 | class GenreNotInWhitelist(Exception):
44 | pass
45 |
46 |
47 | class NotAValidInputFile(Exception):
48 | pass
49 |
50 |
51 | class NoncompliantFolderStructure(Exception):
52 | pass
53 |
54 |
55 | class WebServerIsAlreadyRunning(Exception):
56 | pass
57 |
58 |
59 | class RequestError(Exception):
60 | pass
61 |
62 |
63 | class RateLimitError(RequestError):
64 | pass
65 |
66 |
67 | class RequestFailedError(RequestError):
68 | pass
69 |
70 |
71 | class LoginError(RequestError):
72 | pass
73 |
--------------------------------------------------------------------------------
/salmon/images/base.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import contextlib
3 | import mimetypes
4 | import os
5 | from random import choice
6 |
7 | import requests
8 | from bs4 import BeautifulSoup
9 | from salmon.constants import UAGENTS
10 | from salmon.errors import ImageUploadFailed
11 |
12 | mimetypes.init()
13 | loop = asyncio.get_event_loop()
14 |
15 |
16 | class BaseImageUploader:
17 | def upload_file(self, filename):
18 | # The ExitStack closes files for us when the with block exits
19 | with contextlib.ExitStack() as stack:
20 | open_file = stack.enter_context(open(filename, "rb"))
21 | mime_type, _ = mimetypes.guess_type(filename)
22 | if not mime_type or mime_type.split("/")[0] != "image":
23 | raise ValueError("Unknown image file type {}".format(mime_type))
24 | ext = os.path.splitext(filename)[1]
25 | return self._perform((filename, open_file, mime_type), ext)
26 | # Do we need to strip filenames?
27 | # return self._perform((f"filename{ext}", open_file, mime_type), ext)
28 |
--------------------------------------------------------------------------------
/salmon/images/catbox.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 | from salmon import config
4 | from salmon.errors import ImageUploadFailed
5 | from salmon.images.base import BaseImageUploader
6 |
7 | from random import choice
8 |
9 | from bs4 import BeautifulSoup
10 | from salmon.constants import UAGENTS
11 |
12 |
13 | HEADERS = {
14 | "User-Agent": choice(UAGENTS),
15 | "referrer": "https://catbox.moe/",
16 | }
17 |
18 |
19 | class ImageUploader(BaseImageUploader):
20 | def _perform(self, file_, ext):
21 | data = {
22 | "reqtype": "fileupload",
23 | 'userhash': '',
24 | }
25 | url = "https://catbox.moe/user/api.php"
26 | files = {"fileToUpload": file_}
27 | resp = requests.post(url, headers=HEADERS, data=data, files=files)
28 | if resp.status_code == requests.codes.ok:
29 | try:
30 | return resp.text, None
31 | except ValueError as e:
32 | raise ImageUploadFailed(
33 | f"Failed decoding body:\n{e}\n{resp.content}"
34 | ) from e
35 | else:
36 | raise ImageUploadFailed(
37 | f"Failed. Status {resp.status_code}:\n{resp.content}"
38 | )
39 |
--------------------------------------------------------------------------------
/salmon/images/emp.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import contextlib
3 | import mimetypes
4 | import os
5 | from random import choice
6 |
7 | import requests
8 | from bs4 import BeautifulSoup
9 | from salmon.constants import UAGENTS
10 | from salmon.errors import ImageUploadFailed
11 | from salmon.images.base import BaseImageUploader
12 |
13 | mimetypes.init()
14 | loop = asyncio.get_event_loop()
15 |
16 | HEADERS = {
17 | "User-Agent": choice(UAGENTS),
18 | "referrer": "https://jerking.empornium.ph/",
19 | "Accept": "application/json",
20 | "Linx-Expiry": "0",
21 | }
22 | AUTH_TOKEN = None
23 | cookies = {"AGREE_CONSENT": "1", "PHPSESSID": "45onca6s8hi8oi07ljqla31gfu"}
24 |
25 |
26 | class ImageUploader(BaseImageUploader):
27 | def __init__(self):
28 | "When class is first used we need to fetch an authtoken."
29 | global AUTH_TOKEN
30 | if not AUTH_TOKEN:
31 | resp = requests.get('https://jerking.empornium.ph', cookies=cookies)
32 | soup = BeautifulSoup(resp.text, "html.parser")
33 | AUTH_TOKEN = soup.find(attrs={"name": "auth_token"})['value']
34 | self.auth_token = AUTH_TOKEN
35 | if not self.auth_token:
36 | raise ImageUploadFailed
37 |
38 | def upload_file(self, filename):
39 | # The ExitStack closes files for us when the with block exits
40 | with contextlib.ExitStack() as stack:
41 | open_file = stack.enter_context(open(filename, "rb"))
42 | mime_type, _ = mimetypes.guess_type(filename)
43 | if not mime_type or mime_type.split("/")[0] != "image":
44 | raise ValueError("Unknown image file type {}".format(mime_type))
45 | ext = os.path.splitext(filename)[1]
46 | return self._perform((filename, open_file, mime_type), ext)
47 |
48 | def _perform(self, file_, ext):
49 | url = "https://jerking.empornium.ph/json"
50 | files = {"source": file_}
51 | data = {
52 | "action": "upload",
53 | "type": "file",
54 | "auth_token": self.auth_token,
55 | }
56 |
57 | resp = requests.post(
58 | url, headers=HEADERS, data=data, cookies=cookies, files=files
59 | )
60 | # print(resp.json())
61 | if resp.status_code == requests.codes.ok:
62 | try:
63 | resp_data = resp.json()
64 | return resp_data["image"]["url"], None
65 | except ValueError as e:
66 | raise ImageUploadFailed(
67 | f"Failed decoding body:\n{e}\n{resp.content}"
68 | ) from e
69 | else:
70 | raise ImageUploadFailed(
71 | f"Failed. Status {resp.status_code}:\n{resp.content}"
72 | )
73 |
--------------------------------------------------------------------------------
/salmon/images/imgur.py:
--------------------------------------------------------------------------------
1 | from pyimgurapi import ImgurAPI
2 |
3 | from salmon import config
4 | from salmon.errors import ImageUploadFailed
5 |
6 | CLIENT = ImgurAPI(
7 | refresh_token=config.IMGUR_REFRESH_TOKEN,
8 | client_id=config.IMGUR_CLIENT_ID,
9 | client_secret=config.IMGUR_CLIENT_SECRET,
10 | )
11 |
12 |
13 | class ImageUploader:
14 | def upload_file(self, filename):
15 | try:
16 | CLIENT.auth()
17 | with open(filename, "rb") as f:
18 | url = CLIENT.image.upload(
19 | f,
20 | filename
21 | ).data
22 | return url.link, f"https://imgur.com/delete/{url.deletehash}"
23 | except Exception as e:
24 | raise ImageUploadFailed from e
25 |
--------------------------------------------------------------------------------
/salmon/images/ptpimg.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 | from salmon import config
4 | from salmon.errors import ImageUploadFailed
5 | from salmon.images.base import BaseImageUploader
6 |
7 |
8 | HEADERS = {"referer": "https://ptpimg.me/index.php", "User-Agent": config.USER_AGENT}
9 |
10 |
11 | class ImageUploader(BaseImageUploader):
12 | def _perform(self, file_, ext):
13 | data = {"api_key": config.PTPIMG_KEY}
14 | url = "https://ptpimg.me/upload.php"
15 | files = {"file-upload[0]": file_}
16 | resp = requests.post(url, headers=HEADERS, data=data, files=files)
17 | if resp.status_code == requests.codes.ok:
18 | try:
19 | r = resp.json()[0]
20 | return f"https://ptpimg.me/{r['code']}.{r['ext']}", None
21 | except ValueError as e:
22 | raise ImageUploadFailed(
23 | f"Failed decoding body:\n{e}\n{resp.content}"
24 | ) from e
25 | else:
26 | raise ImageUploadFailed(f"Failed. Status {resp.status_code}:\n{resp.content}")
27 |
--------------------------------------------------------------------------------
/salmon/play.py:
--------------------------------------------------------------------------------
1 | import time
2 | from random import choice
3 |
4 | import click
5 |
6 | from salmon.common import commandgroup
7 |
8 | COLORS = ["magenta", "green", "cyan", "yellow", "red", "blue"]
9 |
10 | DESPACITO = """
11 | Ay
12 | Fonsi
13 | DY
14 | Oh
15 | Oh no, oh no
16 | Oh yeah
17 | Diridiri, dirididi Daddy
18 | Go
19 |
20 | Sí, sabes que ya llevo un rato mirándote
21 | Tengo que bailar contigo hoy (DY)
22 | Vi que tu mirada ya estaba llamándome
23 | Muéstrame el camino que yo voy (Oh)
24 |
25 | Tú, tú eres el imán y yo soy el metal
26 | Me voy acercando y voy armando el plan
27 | Solo con pensarlo se acelera el pulso (Oh yeah)
28 |
29 | Ya, ya me está gustando más de lo normal
30 | Todos mis sentidos van pidiendo más
31 | Esto hay que tomarlo sin ningún apuro
32 |
33 | Despacito
34 | Quiero respirar tu cuello despacito
35 | Deja que te diga cosas al oído
36 | Para que te acuerdes si no estás conmigo
37 |
38 | Despacito
39 | Quiero desnudarte a besos despacito
40 | Firmo en las paredes de tu laberinto
41 | Y hacer de tu cuerpo todo un manuscrito (sube, sube, sube)
42 | (Sube, sube)
43 |
44 | Quiero ver bailar tu pelo
45 | Quiero ser tu ritmo
46 | Que le enseñes a mi boca
47 | Tus lugares favoritos (favoritos, favoritos baby)
48 |
49 | Déjame sobrepasar tus zonas de peligro
50 | Hasta provocar tus gritos
51 | Y que olvides tu apellido (Diridiri, dirididi Daddy)
52 |
53 | Si te pido un beso ven dámelo
54 | Yo sé que estás pensándolo
55 | Llevo tiempo intentándolo
56 | Mami, esto es dando y dándolo
57 | Sabes que tu corazón conmigo te hace bom, bom
58 | Sabes que esa beba está buscando de mi bom, bom
59 | Ven prueba de mi boca para ver cómo te sabe
60 | Quiero, quiero, quiero ver cuánto amor a ti te cabe
61 | Yo no tengo prisa, yo me quiero dar el viaje
62 | Empecemos lento, después salvaje
63 |
64 | Pasito a pasito, suave suavecito
65 | Nos vamos pegando poquito a poquito
66 | Cuando tú me besas con esa destreza
67 | Veo que eres malicia con delicadeza
68 |
69 | Pasito a pasito, suave suavecito
70 | Nos vamos pegando, poquito a poquito
71 | Y es que esa belleza es un rompecabezas
72 | Pero pa montarlo aquí tengo la pieza
73 |
74 | Despacito
75 | Quiero respirar tu cuello despacito
76 | Deja que te diga cosas al oído
77 | Para que te acuerdes si no estás conmigo
78 |
79 | Despacito
80 | Quiero desnudarte a besos despacito
81 | Firmo en las paredes de tu laberinto
82 | Y hacer de tu cuerpo todo un manuscrito (sube, sube, sube)
83 | (Sube, sube)
84 |
85 | Quiero ver bailar tu pelo
86 | Quiero ser tu ritmo
87 | Que le enseñes a mi boca
88 | Tus lugares favoritos (favoritos, favoritos baby)
89 |
90 | Déjame sobrepasar tus zonas de peligro
91 | Hasta provocar tus gritos
92 | Y que olvides tu apellido
93 |
94 | Despacito
95 | Vamos a hacerlo en una playa en Puerto Rico
96 | Hasta que las olas griten "¡ay, bendito!"
97 | Para que mi sello se quede contigo
98 |
99 | Pasito a pasito, suave suavecito
100 | Nos vamos pegando, poquito a poquito
101 | Que le enseñes a mi boca
102 | Tus lugares favoritos (favoritos, favoritos baby)
103 |
104 | Pasito a pasito, suave suavecito
105 | Nos vamos pegando, poquito a poquito
106 | Hasta provocar tus gritos
107 | Y que olvides tu apellido (DY)
108 | Despacito
109 | """
110 |
111 |
112 | @commandgroup.group()
113 | def play():
114 | """This is so sad Alexa play something"""
115 | pass
116 |
117 |
118 | @play.command()
119 | def despacito():
120 | """This is so sad Alexa play Despacito"""
121 | for line in DESPACITO.split("\n"):
122 | for word in line.split():
123 | click.secho(f"{word} ", fg=choice(COLORS), nl=False)
124 | time.sleep(0.2)
125 | click.echo()
126 |
--------------------------------------------------------------------------------
/salmon/search/__init__.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import re
3 | from itertools import chain
4 |
5 | import click
6 |
7 | from salmon import config
8 | from salmon.common import (
9 | commandgroup,
10 | handle_scrape_errors,
11 | normalize_accents,
12 | re_split,
13 | re_strip,
14 | )
15 | from salmon.search import (
16 | bandcamp,
17 | beatport,
18 | deezer,
19 | discogs,
20 | itunes,
21 | junodownload,
22 | musicbrainz,
23 | tidal,
24 | )
25 |
26 | SEARCHSOURCES = {
27 | "Bandcamp": bandcamp,
28 | "MusicBrainz": musicbrainz,
29 | "iTunes": itunes,
30 | "Junodownload": junodownload,
31 | "Discogs": discogs,
32 | "Beatport": beatport,
33 | "Tidal": tidal,
34 | "Deezer": deezer,
35 | }
36 |
37 | loop = asyncio.get_event_loop()
38 |
39 |
40 | @commandgroup.command()
41 | @click.argument("searchstr", nargs=-1, required=True)
42 | @click.option("--track-count", "-t", type=click.INT)
43 | @click.option("--limit", "-l", type=click.INT, default=config.SEARCH_LIMIT)
44 | def metas(searchstr, track_count, limit):
45 | """Search for releases from metadata providers"""
46 | searchstr = " ".join(searchstr)
47 | click.secho(f'Searching {", ".join(SEARCHSOURCES)}', fg="cyan", bold=True)
48 | results = run_metasearch([searchstr], limit=limit, track_count=track_count)
49 | not_found = []
50 | source_errors = SEARCHSOURCES.keys() - [r for r in results]
51 | for source, releases in results.items():
52 | if releases:
53 | click.secho(f"\nResults from {source}:", fg="yellow", bold=True)
54 | for rls_id, release in releases.items():
55 | rls_name = release[0][1]
56 | url = SEARCHSOURCES[source].Searcher.format_url(rls_id, rls_name)
57 | click.echo(f"> {release[1]} {url}")
58 | elif source:
59 | not_found.append(source)
60 |
61 | click.echo()
62 | for source in not_found:
63 | click.secho(f"No results found from {source}.", fg="red")
64 | if source_errors:
65 | click.secho(f'Failed to scrape {", ".join(source_errors)}.', fg="red")
66 |
67 |
68 | def run_metasearch(
69 | searchstrs,
70 | limit=config.SEARCH_LIMIT,
71 | sources=None,
72 | track_count=None,
73 | artists=None,
74 | album=None,
75 | filter=True,
76 | ):
77 | """
78 | Run a search for releases matching the searchstr. Specify the artists and albums
79 | kwargs to have stronger filtering of results.
80 | """
81 | sources = (
82 | SEARCHSOURCES
83 | if not sources
84 | else {k: m for k, m in SEARCHSOURCES.items() if k in sources}
85 | )
86 | results = {}
87 | tasks = [
88 | handle_scrape_errors(s.Searcher().search_releases(search, limit))
89 | for search in searchstrs
90 | for s in sources.values()
91 | ]
92 | task_responses = loop.run_until_complete(asyncio.gather(*tasks))
93 | for source, result in [r or (None, None) for r in task_responses]:
94 | if result:
95 | if filter:
96 | result = filter_results(result, artists, album)
97 | if track_count:
98 | result = filter_by_track_count(result, track_count)
99 | results[source] = result
100 | return results
101 |
102 |
103 | def filter_results(results, artists, album):
104 | filtered = {}
105 | for rls_id, result in (results or {}).items():
106 | if artists:
107 | split_artists = []
108 | for a in artists:
109 | split_artists += re_split(re_strip(normalize_accents(a)))
110 | stripped_rls_artist = re_strip(normalize_accents(result[0].artist))
111 |
112 | if "Various" in result[0].artist:
113 | if len(artists) == 1:
114 | continue
115 | elif not any(a in stripped_rls_artist for a in split_artists):
116 | continue
117 | elif not any(
118 | a in stripped_rls_artist.split()
119 | for a in chain.from_iterable([a.split() for a in split_artists])
120 | ):
121 | continue
122 | if album:
123 | if not _compare_albums(album, result[0].album):
124 | continue
125 | filtered[rls_id] = result
126 | return filtered
127 |
128 |
129 | def filter_by_track_count(results, track_count):
130 | filtered = {}
131 | for rls_id, (ident_data, res_str) in results.items():
132 | if not ident_data.track_count or abs(ident_data.track_count - track_count) <= 1:
133 | filtered[rls_id] = (ident_data, res_str)
134 | return filtered
135 |
136 |
137 | def _compare_albums(one, two):
138 | one, two = normalize_accents(one, two)
139 | if re_strip(one) == re_strip(two):
140 | return True
141 | elif re_strip(
142 | re.sub(r" \(?(mix|feat|with|incl|prod).+", "", one, flags=re.IGNORECASE)
143 | ) == re_strip(
144 | re.sub(r" \(?(mix|feat|with|incl|prod).+", "", two, flags=re.IGNORECASE)
145 | ):
146 | return True
147 | return False
148 |
--------------------------------------------------------------------------------
/salmon/search/bandcamp.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from salmon.errors import ScrapeError
4 | from salmon.search.base import IdentData, SearchMixin
5 | from salmon.sources import BandcampBase
6 |
7 |
8 | class Searcher(BandcampBase, SearchMixin):
9 | async def search_releases(self, searchstr, limit):
10 | releases = {}
11 | soup = await self.create_soup(
12 | self.search_url, params={"q": searchstr}, allow_redirects=False
13 | )
14 | for meta in soup.select(".result-items .searchresult.data-search .result-info"):
15 | try:
16 | re_url = self.regex.search(meta.select(".itemurl a")[0].string)
17 | rls_url = re.sub(r"\?.+", "", re_url[1])
18 | rls_id = re_url[2]
19 | title = meta.select(".heading a")[0].string.strip()
20 | title = title if len(title) < 100 else f"{title[:98]}.."
21 | artists = re.search("by (.+)", meta.select(".subhead")[0].text)[
22 | 1
23 | ].strip()
24 | track_count = int(
25 | re.search(r"(\d+) tracks?", meta.select(".length")[0].text)[1]
26 | )
27 |
28 | releaser = rls_url.split(".bandcamp.com")[0]
29 |
30 | date = meta.select(".released")[0].text.strip()
31 | year = re.search(r"(\d{4})", date)[1]
32 |
33 | releases[(rls_url, rls_id)] = (
34 | IdentData(artists, title, year, track_count, "WEB"),
35 | self.format_result(
36 | artists, title, f"{year} {releaser}", track_count=track_count
37 | ),
38 | )
39 | except (TypeError, IndexError) as e:
40 | raise ScrapeError("Failed to parse scraped search results.") from e
41 | if len(releases) == limit:
42 | break
43 | return "Bandcamp", releases
44 |
--------------------------------------------------------------------------------
/salmon/search/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from collections import namedtuple
3 |
4 | import click
5 |
6 | IdentData = namedtuple(
7 | "IdentData", ["artist", "album", "year", "track_count", "source"]
8 | )
9 | ArtistRlsData = namedtuple(
10 | "ArtistRlsData", ["url", "quality", "year", "artist", "album", "label", "explicit"]
11 | )
12 | LabelRlsData = namedtuple(
13 | "LabelRlsData", ["url", "quality", "year", "artist", "album", "type", "explicit"]
14 | )
15 |
16 |
17 | class SearchMixin(ABC):
18 | @abstractmethod
19 | async def search_releases(self, searchstr, limit):
20 | """
21 | Search the metadata site for a release string and return a dictionary
22 | of release IDs and search results strings.
23 | """
24 | pass
25 |
26 | @staticmethod
27 | def format_result(
28 | artists,
29 | title,
30 | edition,
31 | track_count=None,
32 | ed_title=None,
33 | country_code=None,
34 | explicit=False,
35 | clean=False,
36 | ):
37 | """
38 | Take the attributes of a search result and format them into a
39 | string with ANSI bells and whistles.
40 | """
41 | artists = click.style(artists, fg="yellow")
42 | title = click.style(title, fg="yellow", bold=True)
43 | result = f"{artists} - {title}"
44 |
45 | if track_count:
46 | result += f" {{Tracks: {click.style(str(track_count), fg='green')}}}"
47 | if ed_title:
48 | result += f" {{{click.style(ed_title, fg='yellow')}}}"
49 | if edition:
50 | result += f" {click.style(edition, fg='green')}"
51 | if explicit:
52 | result = click.style("[E] ", fg="red", bold=True) + result
53 | if clean:
54 | result = click.style("[C] ", fg="cyan", bold=True) + result
55 | if country_code:
56 | result = f"[{country_code}] " + result
57 |
58 | return result
59 |
--------------------------------------------------------------------------------
/salmon/search/beatport.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from salmon import config
4 | from salmon.errors import ScrapeError
5 | from salmon.search.base import IdentData, SearchMixin
6 | from salmon.sources import BeatportBase
7 |
8 |
9 | class Searcher(BeatportBase, SearchMixin):
10 | async def search_releases(self, searchstr, limit):
11 | releases = {}
12 | soup = await self.create_soup(self.search_url, params={"q": searchstr})
13 | for meta in soup.select(".bucket-items.ec-bucket li .release-meta"):
14 | try:
15 | rls_id = int(
16 | re.search(r"/release/.+?/(\d+)$", meta.find("a")["href"])[1]
17 | )
18 | ar_li = [
19 | a.string for a in meta.select(".release-artists a") if a.string
20 | ]
21 | title = next(
22 | t.string for t in meta.select(".release-title a") if t.string
23 | )
24 | artists = (
25 | ", ".join(ar_li) if len(ar_li) < 4 else config.VARIOUS_ARTIST_WORD
26 | )
27 | label = meta.select(".release-label a")[0].string
28 | if label.lower() not in config.SEARCH_EXCLUDED_LABELS:
29 | releases[rls_id] = (
30 | IdentData(artists, title, None, None, "WEB"),
31 | self.format_result(artists, title, label),
32 | )
33 | except (TypeError, IndexError) as e:
34 | raise ScrapeError("Failed to parse scraped search results.") from e
35 | if len(releases) == limit:
36 | break
37 | return "Beatport", releases
38 |
--------------------------------------------------------------------------------
/salmon/search/deezer.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import re
3 | from itertools import chain
4 |
5 | from salmon.search.base import ArtistRlsData, LabelRlsData, IdentData, SearchMixin
6 | from salmon.sources import DeezerBase
7 | from ratelimit import limits, sleep_and_retry
8 |
9 |
10 | class Searcher(DeezerBase, SearchMixin):
11 | async def search_releases(self, searchstr, limit):
12 | releases = {}
13 | resp = await self.get_json("/search/album", params={"q": searchstr})
14 | # print(resp)
15 | for rls in resp["data"]:
16 | releases[rls["id"]] = (
17 | IdentData(
18 | rls["artist"]["name"], rls["title"], None, rls["nb_tracks"], "WEB"
19 | ),
20 | self.format_result(
21 | rls["artist"]["name"],
22 | rls["title"],
23 | None,
24 | track_count=rls["nb_tracks"],
25 | ),
26 | )
27 | if len(releases) == limit:
28 | break
29 | return "Deezer", releases
30 |
31 | async def get_artist_releases(self, artiststr):
32 | """
33 | Get the releases of an artist on Deezer. Find their artist page and request
34 | all their releases.
35 | """
36 | artist_ids = await self._get_artist_ids(artiststr)
37 | tasks = [
38 | self._get_artist_albums(artist_id, artiststr) for artist_id in artist_ids
39 | ]
40 | return "Deezer", list(chain.from_iterable(await asyncio.gather(*tasks)))
41 |
42 | async def _get_artist_ids(self, artiststr):
43 | resp = await self.get_json("/search/artist", params={"q": artiststr})
44 | return [a["id"] for a in resp["data"] if a["name"].lower() == artiststr.lower()]
45 |
46 | async def _get_artist_albums(self, artist_id, artist_name):
47 | resp = await self.get_json(f"/artist/{artist_id}/albums")
48 | return [
49 | ArtistRlsData(
50 | url=rls["link"],
51 | quality="LOSSLESS", # Cannot determine.
52 | year=self._parse_year(rls["release_date"]),
53 | artist=artist_name,
54 | album=rls["title"],
55 | label="",
56 | explicit=rls["explicit_lyrics"],
57 | )
58 | for rls in resp["data"]
59 | ]
60 |
61 | async def get_label_releases(self, labelstr, maximum=0, year=None):
62 | """Gets all the albums released by a label up to a total number.
63 | Year filtering doesn't actually work."""
64 | if year:
65 | yearstr = "year='" + year + "'"
66 | else:
67 | yearstr = ""
68 | url_str = f"/search/album&q=label:'{labelstr}' {yearstr}/albums"
69 | resp = await self.get_json(url_str)
70 | albums = []
71 | i = 0
72 | while i < maximum or maximum == 0:
73 | i += 25
74 | for rls in resp["data"]:
75 | album = await self.get_json(f"/album/{rls['id']}")
76 | albums.append(
77 | LabelRlsData(
78 | url=rls['link'],
79 | quality="LOSSLESS", # Cannot determine.
80 | year=str(self._parse_year(album["release_date"])),
81 | artist=rls['artist']['name'],
82 | album=rls["title"],
83 | type=album['record_type'],
84 | explicit=rls["explicit_lyrics"],
85 | )
86 | )
87 | if maximum > 0 and len(albums) >= maximum:
88 | return "Deezer", albums
89 | if "next" in resp.keys():
90 | resp = await self.get_json(url_str, params={"index": i})
91 | else:
92 | return "Deezer", albums
93 | return "Deezer", albums
94 |
95 | @staticmethod
96 | def _parse_year(date):
97 | try:
98 | return int(re.search(r"(\d{4})", date)[0])
99 | except (ValueError, IndexError, TypeError):
100 | return None
101 |
--------------------------------------------------------------------------------
/salmon/search/discogs.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from salmon.search.base import IdentData, SearchMixin
4 | from salmon.sources import DiscogsBase
5 |
6 | SOURCES = {
7 | "Vinyl": "Vinyl",
8 | "File": "WEB",
9 | "CD": "CD",
10 | }
11 |
12 |
13 | class Searcher(DiscogsBase, SearchMixin):
14 | async def search_releases(self, searchstr, limit):
15 | releases = {}
16 | resp = await self.get_json(
17 | "/database/search",
18 | params={"q": searchstr, "type": "release", "perpage": 50},
19 | )
20 | for rls in resp["results"]:
21 | artists, title = rls["title"].split(" - ", 1)
22 | year = rls["year"] if "year" in rls else None
23 | source = parse_source(rls["format"])
24 | ed_title = ", ".join(set(rls["format"]))
25 |
26 | edition = f"{year} {source}"
27 | if rls["label"] and rls["label"][0] != "Not On Label":
28 | edition += f" {rls['label'][0]} {rls['catno']}"
29 | else:
30 | edition += " Not On Label"
31 |
32 | releases[rls["id"]] = (
33 | IdentData(artists, title, year, None, source),
34 | self.format_result(artists, title, edition, ed_title=ed_title),
35 | )
36 | if len(releases) == limit:
37 | break
38 | return "Discogs", releases
39 |
40 |
41 | def sanitize_artist_name(name):
42 | """
43 | Remove parenthentical number disambiguation bullshit from artist names,
44 | as well as the asterisk stuff.
45 | """
46 | name = re.sub(r" \(\d+\)$", "", name)
47 | return re.sub(r"\*+$", "", name)
48 |
49 |
50 | def parse_source(formats):
51 | """
52 | Take the list of format strings provided by Discogs and iterate over them
53 | to find a possible source for the release.
54 | """
55 | for format_s, source in SOURCES.items():
56 | if any(format_s in f for f in formats):
57 | return source
58 |
--------------------------------------------------------------------------------
/salmon/search/itunes.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from salmon.common import parse_copyright
4 | from salmon.search.base import IdentData, SearchMixin
5 | from salmon.sources import iTunesBase
6 |
7 |
8 | class Searcher(iTunesBase, SearchMixin):
9 | async def search_releases(self, searchstr, limit):
10 | releases = {}
11 | resp = await self.get_json(
12 | "/search",
13 | params={
14 | "media": "music",
15 | "entity": "album",
16 | "limit": limit if limit < 25 else 25,
17 | "term": searchstr,
18 | },
19 | )
20 | results = resp["results"]
21 | for rls in results:
22 | artists = rls["artistName"]
23 | title = rls["collectionName"]
24 | track_count = rls["trackCount"]
25 | date = rls["releaseDate"][:10]
26 | year = int(re.search(r"(\d{4})", date)[1])
27 | copyright = (
28 | parse_copyright(rls["copyright"]) if "copyright" in rls else None
29 | )
30 | explicit = rls["collectionExplicitness"] == "explicit"
31 | clean = rls["collectionExplicitness"] == "cleaned"
32 |
33 | releases[rls["collectionId"]] = (
34 | IdentData(artists, title, year, track_count, "WEB"),
35 | self.format_result(
36 | artists,
37 | title,
38 | f"{year} {copyright}",
39 | track_count=track_count,
40 | explicit=explicit,
41 | clean=clean,
42 | ),
43 | )
44 | return "iTunes", releases
45 |
--------------------------------------------------------------------------------
/salmon/search/junodownload.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from salmon import config
4 | from salmon.errors import ScrapeError
5 | from salmon.search.base import IdentData, SearchMixin
6 | from salmon.sources import JunodownloadBase
7 |
8 |
9 | class Searcher(JunodownloadBase, SearchMixin):
10 | async def search_releases(self, searchstr, limit):
11 | releases = {}
12 | soup = await self.create_soup(
13 | self.search_url,
14 | params={
15 | "submit-search": "SEARCH",
16 | "solrorder": "relevancy",
17 | "q[all][]": [searchstr],
18 | },
19 | allow_redirects=False,
20 | )
21 | for meta in soup.find_all(
22 | 'div',
23 | attrs={
24 | 'class': 'row gutters-sm jd-listing-item',
25 | 'data-ua_location': 'release',
26 | },
27 | ):
28 | try:
29 | su_title = meta.find('a', attrs={'class': 'juno-title'})
30 | rls_id = re.search(r"/products/[^/]+/([\d-]+)", su_title["href"])[1]
31 | title = su_title.string
32 |
33 | right_blob = meta.find('div', attrs={'class': 'text-sm mb-3 mb-lg-3'})
34 |
35 | right_blob_elements_count = len(
36 | right_blob.get_text(separator="|").strip().split("|")
37 | )
38 | if right_blob_elements_count != 3:
39 | # skip item missing one or more of: catno, date or genre
40 | continue
41 |
42 | date = right_blob.find('br').next_sibling.strip()
43 | year = int(date[-2:])
44 |
45 | if 40 <= year <= 99:
46 | year = 1900 + year
47 | else:
48 | year = 2000 + year
49 |
50 | catno = right_blob.find('br').previous_sibling.strip().replace(' ', '')
51 |
52 | ar_blob = meta.find('div', attrs={'class': 'col juno-artist'})
53 |
54 | ar_li = [a.string.title() for a in ar_blob.find_all('a') if a.string]
55 | artists = (
56 | ", ".join(ar_li)
57 | if ar_li and len(ar_li) < 5
58 | else config.VARIOUS_ARTIST_WORD
59 | )
60 |
61 | label_blob = meta.find('a', attrs={'class': 'juno-label'})
62 | label = label_blob.text.strip()
63 |
64 | if label.lower() not in config.SEARCH_EXCLUDED_LABELS:
65 | releases[rls_id] = (
66 | IdentData(artists, title, year, None, "WEB"),
67 | self.format_result(artists, title, f"{year} {label} {catno}"),
68 | )
69 | except (TypeError, IndexError, AttributeError) as e:
70 | raise ScrapeError("Failed to parse scraped search results.") from e
71 | if len(releases) == limit:
72 | break
73 | return "Junodownload", releases
74 |
--------------------------------------------------------------------------------
/salmon/search/musicbrainz.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | import musicbrainzngs
4 |
5 | from salmon import config
6 | from salmon.errors import ScrapeError
7 | from salmon.search.base import IdentData, SearchMixin
8 | from salmon.sources import MusicBrainzBase
9 |
10 | loop = asyncio.get_event_loop()
11 |
12 |
13 | class Searcher(MusicBrainzBase, SearchMixin):
14 | async def search_releases(self, searchstr, limit):
15 | releases = {}
16 | soup = await loop.run_in_executor(
17 | None, musicbrainzngs.search_releases, searchstr, 10
18 | )
19 | for rls in soup["release-list"]:
20 | try:
21 | artists = rls["artist-credit-phrase"]
22 | try:
23 | track_count = rls["medium-track-count"]
24 | except KeyError:
25 | track_count = None
26 | label = catno = ""
27 | if (
28 | "label-info-list" in rls
29 | and rls["label-info-list"]
30 | and "label" in rls["label-info-list"][0]
31 | and "name" in rls["label-info-list"][0]["label"]
32 | ):
33 | label = rls["label-info-list"][0]["label"]["name"]
34 | if "catalog_number" in rls["label-info-list"][0]:
35 | catno = rls["label-info-list"][0]["catalog_number"]
36 |
37 | try:
38 | source = rls["medium-list"][0]["format"]
39 | except KeyError:
40 | source = None
41 |
42 | edition = ""
43 | if label:
44 | edition += label
45 | if catno:
46 | edition += " " + catno
47 |
48 | if label.lower() not in config.SEARCH_EXCLUDED_LABELS:
49 | releases[rls["id"]] = (
50 | IdentData(artists, rls["title"], None, track_count, source),
51 | self.format_result(
52 | artists,
53 | rls["title"],
54 | edition,
55 | ed_title=source,
56 | track_count=track_count,
57 | ),
58 | )
59 | except (TypeError, IndexError) as e:
60 | raise ScrapeError("Failed to parse scraped search results.") from e
61 | if len(releases) == limit:
62 | break
63 | return "MusicBrainz", releases
64 |
--------------------------------------------------------------------------------
/salmon/sources/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from salmon.sources.bandcamp import BandcampBase
3 | from salmon.sources.beatport import BeatportBase
4 | from salmon.sources.deezer import DeezerBase
5 | from salmon.sources.discogs import DiscogsBase
6 | from salmon.sources.itunes import iTunesBase
7 | from salmon.sources.junodownload import JunodownloadBase
8 | from salmon.sources.musicbrainz import MusicBrainzBase
9 | from salmon.sources.tidal import TidalBase
10 |
11 | SOURCE_ICONS = {
12 | "Bandcamp": "https://ptpimg.me/1b382r.png",
13 | "Beatport": "https://ptpimg.me/26k503.png",
14 | "Deezer": "https://ptpimg.me/6pm93g.png",
15 | "Discogs": "https://ptpimg.me/nj126r.png",
16 | "iTunes": "https://ptpimg.me/5d47fv.png",
17 | "Junodownload": "https://ptpimg.me/2852h1.png",
18 | "MusicBrainz": "https://ptpimg.me/y87lp2.png",
19 | "Tidal": "https://ptpimg.me/dhyvs6.png",
20 | }
21 |
--------------------------------------------------------------------------------
/salmon/sources/bandcamp.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from salmon.sources.base import BaseScraper
4 |
5 |
6 | class BandcampBase(BaseScraper):
7 |
8 | search_url = "https://bandcamp.com/search/"
9 | regex = re.compile(r"^https?://([^/]+)/album/([^/]+)/?")
10 | release_format = "https://{rls_url}/album/{rls_id}"
11 |
12 | @classmethod
13 | def format_url(cls, rls_id, rls_name=None):
14 | return cls.release_format.format(rls_url=rls_id[0], rls_id=rls_id[1])
15 |
--------------------------------------------------------------------------------
/salmon/sources/base.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 | import re
4 | from collections import namedtuple
5 | from random import choice
6 | from string import Formatter
7 |
8 | import requests
9 | from bs4 import BeautifulSoup
10 |
11 | from salmon.constants import UAGENTS
12 | from salmon.errors import ScrapeError
13 |
14 | HEADERS = {"User-Agent": choice(UAGENTS)}
15 |
16 | IdentData = namedtuple(
17 | "IdentData", ["artist", "album", "year", "track_count", "source"]
18 | )
19 |
20 | loop = asyncio.get_event_loop()
21 |
22 |
23 | class BaseScraper:
24 |
25 | url = NotImplementedError
26 | site_url = NotImplementedError
27 | regex = NotImplementedError
28 | release_format = NotImplementedError
29 | get_params = {}
30 |
31 | @classmethod
32 | def format_url(cls, rls_id, rls_name=None):
33 | """
34 | Format the URL for a scraped release. The ``release_format``
35 | attribute of the scraper is processed and populated by the rls_id
36 | and rls_name. The rls_name is only relevant when back-filling
37 | into the sources that include release name in the URL. Those stores
38 | do not require the release name to reach the webpage, but re-adding
39 | something resembling the link doesn't harm us.
40 | """
41 | keys = [fn for _, fn, _, _ in Formatter().parse(cls.release_format) if fn]
42 | if "rls_name" in keys:
43 | rls_name = rls_name or "a"
44 | return cls.site_url + cls.release_format.format(
45 | rls_id=rls_id, rls_name=cls.url_format_rls_name(rls_name)
46 | )
47 | return cls.site_url + cls.release_format.format(rls_id=rls_id)
48 |
49 | async def get_json(self, url, params=None, headers=None):
50 | """
51 | Run an asynchronius GET request to a JSON API maintained by
52 | a metadata source.
53 | """
54 | return await loop.run_in_executor(
55 | None, lambda: self.get_json_sync(url, params, headers)
56 | )
57 |
58 | def get_json_sync(self, url, params=None, headers=None):
59 | """Make a synchronius get request, usually called by the async get_json."""
60 | params = {**(params or {}), **(self.get_params)}
61 | headers = {**(headers or {}), **HEADERS}
62 | try:
63 | result = requests.get(
64 | self.url + url, params=params, headers=headers, timeout=7
65 | )
66 | if result.status_code != 200:
67 | raise ScrapeError(f"Status code {result.status_code}.", result.json())
68 | return result.json()
69 | except json.decoder.JSONDecodeError as e:
70 | raise ScrapeError("Did not receive JSON from API.") from e
71 |
72 | async def create_soup(self, url, params=None, headers=None, **kwargs):
73 | """
74 | Asynchroniously run a webpage scrape and return a BeautifulSoup
75 | object containing the scraped HTML.
76 | """
77 | params = params or {}
78 | r = await loop.run_in_executor(
79 | None,
80 | lambda: requests.get(
81 | url, params=params, headers=HEADERS, timeout=7, **kwargs
82 | ),
83 | )
84 | if r.status_code != 200:
85 | raise ScrapeError(
86 | f"Failed to successfully scrape page. Status code: {r.status_code}"
87 | )
88 | return BeautifulSoup(r.text, "html.parser")
89 |
90 | @staticmethod
91 | def url_format_rls_name(rls_name):
92 | """
93 | Format the URL release name from the actual release name. This
94 | is not accurate to how the web stores do it; it is merely a
95 | convenience for user readability.
96 | """
97 | url = re.sub(r"[^\-a-z\d]", "", rls_name.lower().replace(" ", "-"))
98 | return re.sub("-+", "-", url)
99 |
--------------------------------------------------------------------------------
/salmon/sources/beatport.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from salmon.sources.base import BaseScraper
4 |
5 |
6 | class BeatportBase(BaseScraper):
7 |
8 | url = site_url = "https://beatport.com"
9 | search_url = "https://beatport.com/search/releases"
10 | release_format = "/release/{rls_name}/{rls_id}"
11 | regex = re.compile(
12 | r"^https?://(?:(?:www|classic)\.)?beatport\.com/release/.+?/(\d+)/?$"
13 | )
14 |
--------------------------------------------------------------------------------
/salmon/sources/deezer.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 | import re
4 | from json.decoder import JSONDecodeError
5 | from random import choice
6 |
7 | import requests
8 |
9 | from salmon.constants import UAGENTS
10 | from salmon.errors import ScrapeError
11 | from salmon.sources.base import BaseScraper
12 |
13 | loop = asyncio.get_event_loop()
14 |
15 | HEADERS = {
16 | "User-Agent": choice(UAGENTS),
17 | "Content-Language": "en-US",
18 | "Cache-Control": "max-age=0",
19 | "Accept": "*/*",
20 | "Accept-Charset": "utf-8,ISO-8859-1;q=0.7,*;q=0.3",
21 | "Accept-Language": "de-DE,de;q=0.8,en-US;q=0.6,en;q=0.4",
22 | }
23 |
24 |
25 | class DeezerBase(BaseScraper):
26 |
27 | url = "https://api.deezer.com"
28 | site_url = "https://www.deezer.com"
29 | regex = re.compile(
30 | r"^https*:\/\/.*?deezer\.com.*?\/(?:[a-z]+\/)?(album|playlist|track)\/([0-9]+)"
31 | )
32 | release_format = "/album/{rls_id}"
33 |
34 | def __init__(self):
35 | self.country_code = None
36 | super().__init__()
37 |
38 | self._csrf_token = None
39 | self._login_csrf_token = None
40 | self._session = None
41 |
42 | @property
43 | def sesh(self):
44 | if self._session:
45 | return self._session
46 |
47 | self._session = requests.Session()
48 | try:
49 | from plugins.downloader.accounts import ACCOUNTS
50 | except ImportError:
51 | return self._session
52 |
53 | self.api_token # Create login csrf token
54 | try:
55 | data = {
56 | "type": "login",
57 | "mail": list(ACCOUNTS["Deezer"]["IDK"].keys())[0],
58 | "password": list(ACCOUNTS["Deezer"]["IDK"].values())[0]["password"],
59 | "checkFormLogin": self._login_csrf_token,
60 | }
61 | cookie = list(ACCOUNTS["Deezer"]["IDK"].values())[0]["cookie"]
62 | except (KeyError, IndexError):
63 | return self._session
64 |
65 | cookies = {"arl": cookie}
66 | if cookie is None:
67 | response = self._session.post(
68 | "https://www.deezer.com/ajax/action.php", headers=HEADERS, data=data
69 | )
70 | if response.status_code != 200 or "success" not in response.text:
71 | return self._session
72 |
73 | response = self._session.get(
74 | "https://www.deezer.com/account", headers=HEADERS, cookies=cookies
75 | )
76 | re_token = re.search(r'"?api(?:_k|K)ey"?: ?["\']([^"\',]*)', response.text)
77 | if response.url == "https://www.deezer.com/account" and re_token:
78 | self._csrf_token = re_token[1]
79 | return self._session
80 |
81 | @property
82 | def api_token(self):
83 | if self._csrf_token:
84 | return self._csrf_token
85 |
86 | params = {"api_version": "1.0", "api_token": "null", "input": "3"}
87 | response = self.sesh.get(
88 | "https://www.deezer.com/ajax/gw-light.php",
89 | params={"method": "deezer.getUserData", **params},
90 | headers=HEADERS,
91 | )
92 | try:
93 | check_data = json.loads(response.text)
94 | self._csrf_token = check_data["results"]["checkForm"]
95 | self._login_csrf_token = check_data["results"]["checkFormLogin"]
96 | except (JSONDecodeError, KeyError):
97 | pass
98 | return self._csrf_token
99 |
100 | @classmethod
101 | def parse_release_id(cls, url):
102 | return cls.regex.search(url)[2]
103 |
104 | async def create_soup(self, url, params=None):
105 | """Run a GET request to Deezer's JSON API for album data."""
106 | params = params or {}
107 | album_id = self.parse_release_id(url)
108 | try:
109 | data = await self.get_json(f"/album/{album_id}", params=params)
110 | internal_data = await self.get_internal_api_data(
111 | f"/album/{album_id}", params
112 | )
113 | data["tracklist"] = self.get_tracks(internal_data)
114 | data["cover_xl"] = self.get_cover(internal_data)
115 | return data
116 | except json.decoder.JSONDecodeError as e:
117 | raise ScrapeError("Deezer page did not return valid JSON.") from e
118 | except (KeyError, ScrapeError) as e:
119 | raise ScrapeError(f"Failed to grab metadata for {url}.") from e
120 |
121 | async def get_internal_api_data(self, url, params=None):
122 | """Deezer puts some things in an api that isn't public facing.
123 | Like track information and album art before a release is available.
124 | """
125 | track_data = await loop.run_in_executor(
126 | None, lambda: self.sesh.get(self.site_url + url, params=(params or {}))
127 | )
128 | r = re.search(
129 | r"window.__DZR_APP_STATE__ = ({.*?}})",
130 | track_data.text.replace("\n", ""),
131 | )
132 | if not r:
133 | raise ScrapeError("Failed to scrape track data.")
134 | raw = re.sub(r"{(\s*)type\: +\'([^\']+)\'", r'{\1type: "\2"', r[1])
135 | raw = re.sub("\t+([^:]+): ", r'"\1":', raw)
136 | return json.loads(raw)
137 |
138 | def get_tracks(self, internal_data):
139 | return internal_data["SONGS"]["data"]
140 |
141 | def get_cover(self, internal_data):
142 | "This uses a hardcoded url. Hope the dns url doesn't change."
143 | artwork_code = internal_data["DATA"]["ALB_PICTURE"]
144 | return f'https://e-cdns-images.dzcdn.net/images/cover/{artwork_code}/1000x1000-000000-100-0-0.jpg'
145 |
--------------------------------------------------------------------------------
/salmon/sources/discogs.py:
--------------------------------------------------------------------------------
1 | import json
2 | import re
3 |
4 | from salmon import config
5 | from salmon.errors import ScrapeError
6 | from salmon.sources.base import BaseScraper
7 |
8 |
9 | class DiscogsBase(BaseScraper):
10 |
11 | url = "https://api.discogs.com"
12 | site_url = "https://www.discogs.com"
13 | regex = re.compile(r"^https?://(?:www\.)?discogs\.com/(?:.+?/)?release/(\d+)/?$")
14 | release_format = "/release/{rls_id}"
15 | get_params = {"token": config.DISCOGS_TOKEN}
16 |
17 | async def create_soup(self, url, params=None):
18 | try:
19 | return await self.get_json(
20 | f"/releases/{self.regex.match(url)[1]}", params=params
21 | )
22 | except json.decoder.JSONDecodeError as e:
23 | raise ScrapeError("Discogs page did not return valid JSON.") from e
24 |
--------------------------------------------------------------------------------
/salmon/sources/itunes.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from .base import BaseScraper
4 |
5 |
6 | class iTunesBase(BaseScraper):
7 |
8 | url = site_url = "https://itunes.apple.com"
9 | search_url = "https://itunes.apple.com/search"
10 | regex = re.compile(
11 | "^https?://itunes\.apple\.com/(?:(\w{2,4})/)?album/(?:[^/]*/)?([^\?]+)"
12 | )
13 | release_format = "/us/album/{rls_id}"
14 |
--------------------------------------------------------------------------------
/salmon/sources/junodownload.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from salmon.sources.base import BaseScraper
4 |
5 |
6 | class JunodownloadBase(BaseScraper):
7 |
8 | url = site_url = "https://junodownload.com"
9 | search_url = "https://www.junodownload.com/search/"
10 | regex = re.compile(
11 | r"^https?://(?:(?:www|secure)\.)?junodownload\.com/products/[^/]+/([^/]*)/?"
12 | )
13 |
14 | release_format = "/products/{rls_name}/{rls_id}"
15 |
--------------------------------------------------------------------------------
/salmon/sources/musicbrainz.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | import musicbrainzngs
4 |
5 | from .base import BaseScraper
6 |
7 | musicbrainzngs.set_useragent("salmon", "1.0", "noreply@salm.on")
8 |
9 |
10 | class MusicBrainzBase(BaseScraper):
11 |
12 | url = site_url = "https://musicbrainz.org"
13 | release_format = "/release/{rls_id}"
14 | regex = re.compile("^https?://(?:www\.)?musicbrainz.org/release/([a-z0-9\-]+)$")
15 |
16 | async def create_soup(self, url):
17 | rls_id = re.search(r"/release/([a-f0-9\-]+)$", url)[1]
18 | return musicbrainzngs.get_release_by_id(
19 | rls_id,
20 | [
21 | "artists",
22 | "labels",
23 | "recordings",
24 | "release-groups",
25 | "media",
26 | "artist-credits",
27 | "artist-rels",
28 | "recording-level-rels",
29 | ],
30 | )["release"]
31 |
--------------------------------------------------------------------------------
/salmon/sources/tidal.py:
--------------------------------------------------------------------------------
1 | import json
2 | import re
3 |
4 | from salmon import config
5 | from salmon.errors import ScrapeError
6 | from salmon.sources.base import BaseScraper
7 |
8 |
9 | class TidalBase(BaseScraper):
10 |
11 | url = "https://api.tidalhifi.com/v1"
12 | site_url = "https://listen.tidal.com"
13 | image_url = "https://resources.tidal.com/images/{album_id}/1280x1280.jpg"
14 | regex = re.compile(
15 | r"^https*:\/\/.*?(?:tidal|wimpmusic)\.com.*?\/(album|track|playlist)\/([0-9a-z\-]+)"
16 | )
17 | release_format = "/album/{rls_id}"
18 | get_params = {"token": config.TIDAL_TOKEN}
19 |
20 | def __init__(self):
21 | self.country_code = None
22 | super().__init__()
23 |
24 | @classmethod
25 | def format_url(cls, rls_id, rls_name=None):
26 | return cls.site_url + cls.release_format.format(rls_id=rls_id[1])
27 |
28 | @classmethod
29 | def parse_release_id(cls, url):
30 | return cls.regex.search(url)[2]
31 |
32 | async def create_soup(self, url, params=None):
33 | """Run a GET request to Tidal's JSON API for album data."""
34 | params = params or {}
35 | album_id = self.parse_release_id(url)
36 | for cc in get_tidal_regions_to_fetch():
37 | try:
38 | self.country_code = cc
39 | params["countrycode"] = cc
40 | data = await self.get_json(f"/albums/{album_id}", params=params)
41 | tracklist = await self.get_json(
42 | f"/albums/{album_id}/tracks", params=params
43 | )
44 | data["tracklist"] = tracklist["items"]
45 | return data
46 | except json.decoder.JSONDecodeError as e:
47 | raise ScrapeError("Tidal page did not return valid JSON.") from e
48 | except (KeyError, ScrapeError):
49 | pass
50 | raise ScrapeError(f"Failed to grab metadata for {url}.")
51 |
52 |
53 | def get_tidal_regions_to_fetch():
54 | if config.TIDAL_FETCH_REGIONS:
55 | return config.TIDAL_FETCH_REGIONS
56 | else:
57 | try:
58 | from plugins.downloader.accounts import ACCOUNTS
59 |
60 | if "Tidal" in ACCOUNTS:
61 | return [k for k, v in ACCOUNTS["Tidal"].items() if v]
62 | except ImportError:
63 | pass
64 | raise ScrapeError("No regions defined for Tidal to grab from")
65 |
--------------------------------------------------------------------------------
/salmon/tagger/__init__.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from itertools import chain
3 | from pprint import pprint
4 |
5 | import click
6 |
7 | from salmon.common import commandgroup
8 | from salmon.constants import (
9 | ARTIST_IMPORTANCES,
10 | RELEASE_TYPES,
11 | SOURCES,
12 | TAG_ENCODINGS,
13 | )
14 | from salmon.errors import InvalidMetadataError, ScrapeError
15 | from salmon.tagger.audio_info import gather_audio_info
16 | from salmon.tagger.cover import download_cover_if_nonexistent
17 | from salmon.tagger.foldername import rename_folder
18 | from salmon.tagger.folderstructure import check_folder_structure
19 | from salmon.tagger.metadata import get_metadata
20 | from salmon.tagger.pre_data import construct_rls_data
21 | from salmon.tagger.retagger import rename_files, tag_files
22 | from salmon.tagger.review import review_metadata
23 | from salmon.tagger.sources import run_metadata
24 | from salmon.tagger.tags import check_tags, gather_tags, standardize_tags
25 |
26 | loop = asyncio.get_event_loop()
27 |
28 |
29 | def validate_source(ctx, param, value):
30 | try:
31 | return SOURCES[value.lower()]
32 | except KeyError:
33 | raise click.BadParameter(f"{value} is not a valid source.")
34 | except AttributeError:
35 | raise click.BadParameter(
36 | "You must provide a source. Possible sources are: "
37 | + ", ".join(SOURCES.values())
38 | )
39 |
40 |
41 | def validate_encoding(ctx, param, value):
42 | try:
43 | return TAG_ENCODINGS[value.upper()]
44 | except KeyError:
45 | raise click.BadParameter(f"{value} is not a valid encoding.")
46 | except AttributeError:
47 | return None, None
48 |
49 |
50 | @commandgroup.command()
51 | @click.argument(
52 | "path", type=click.Path(exists=True, file_okay=False, resolve_path=True)
53 | )
54 | @click.option(
55 | "--source",
56 | "-s",
57 | type=click.STRING,
58 | callback=validate_source,
59 | help=f'Source of files ({"/".join(SOURCES.values())})',
60 | )
61 | @click.option(
62 | "--encoding",
63 | "-e",
64 | type=click.STRING,
65 | callback=validate_encoding,
66 | help="You must specify one of the following encodings if files aren't lossless: "
67 | + ", ".join(TAG_ENCODINGS.keys()),
68 | )
69 | @click.option(
70 | "--overwrite",
71 | "-ow",
72 | is_flag=True,
73 | help="Whether or not to use the original metadata.",
74 | )
75 | def tag(path, source, encoding, overwrite):
76 | """Interactively tag an album"""
77 | click.secho(f"\nProcessing {path}", fg="cyan", bold=True)
78 | standardize_tags(path)
79 | tags = gather_tags(path)
80 | audio_info = gather_audio_info(path)
81 | rls_data = construct_rls_data(
82 | tags, audio_info, source, encoding, overwrite=overwrite
83 | )
84 |
85 | metadata = get_metadata(path, tags, rls_data)
86 | metadata = review_metadata(metadata, metadata_validator_base)
87 | tag_files(path, tags, metadata)
88 |
89 | download_cover_if_nonexistent(path, metadata["cover"])
90 | tags = check_tags(path)
91 | path = rename_folder(path, metadata)
92 | rename_files(path, tags, metadata)
93 | check_folder_structure(path)
94 | click.secho(f"\nProcessed {path}", fg="cyan", bold=True)
95 |
96 |
97 | @commandgroup.command()
98 | @click.argument("url")
99 | def meta(url):
100 | """Scrape metadata from release link"""
101 | try:
102 | metadata = loop.run_until_complete(run_metadata(url))
103 | for key in ["encoding", "media", "encoding_vbr", "source"]:
104 | del metadata[key]
105 | click.echo()
106 | pprint(metadata)
107 | except ScrapeError as e:
108 | click.secho(f"Scrape failed: {e}", fg="red")
109 |
110 |
111 | def metadata_validator_base(metadata):
112 | """Validate that the provided metadata is not an issue."""
113 | artist_importances = set(i for _, i in metadata["artists"])
114 | if "main" not in artist_importances:
115 | raise InvalidMetadataError("You must have at least one main artist.")
116 | for track in chain.from_iterable([d.values() for d in metadata["tracks"].values()]):
117 | if "main" not in set(i for _, i in track["artists"]):
118 | raise InvalidMetadataError(
119 | "You must have at least one main artist per track."
120 | )
121 | if not all(i in ARTIST_IMPORTANCES for i in artist_importances):
122 | raise InvalidMetadataError(
123 | "Invalid artist importance detected: {}.".format(
124 | ", ".join(
125 | i
126 | for i in artist_importances.difference(ARTIST_IMPORTANCES.values())
127 | )
128 | )
129 | )
130 | try:
131 | metadata["year"] = int(metadata["year"])
132 | except (ValueError, TypeError):
133 | raise InvalidMetadataError("Year is not an integer.")
134 | if metadata["rls_type"] not in RELEASE_TYPES:
135 | raise InvalidMetadataError("Invalid release type.")
136 | if not metadata["genres"]:
137 | raise InvalidMetadataError("You must specify at least one genre.")
138 | if metadata["source"] == "CD" and metadata["year"] < 1982:
139 | raise InvalidMetadataError("You cannot have a CD upload from before 1982.")
140 | if metadata["source"] not in SOURCES.values():
141 | raise InvalidMetadataError(f'{metadata["source"]} is not a valid source.')
142 | if metadata["label"] and (
143 | len(metadata["label"]) < 2 or len(metadata["label"]) > 80
144 | ):
145 | raise InvalidMetadataError("Label must be over 2 and under 80 characters.")
146 | if metadata["catno"] and (
147 | len(metadata["catno"]) < 2 or len(metadata["catno"]) > 80
148 | ):
149 | raise InvalidMetadataError("Catno must be over 2 and under 80 characters.")
150 |
151 | return metadata
152 |
--------------------------------------------------------------------------------
/salmon/tagger/audio_info.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import click
4 | import mutagen
5 |
6 | from salmon.common import compress, get_audio_files
7 | from salmon.errors import UploadError
8 |
9 |
10 | def gather_audio_info(path):
11 | """
12 | Iterate over all audio files in the directory and parse the technical
13 | information about the files into a dictionary.
14 | """
15 | files = get_audio_files(path)
16 | if not files:
17 | raise UploadError("No audio files found.")
18 |
19 | audio_info = {}
20 | for filename in files:
21 | mut = mutagen.File(os.path.join(path, filename))
22 | audio_info[filename] = _parse_audio_info(mut.info)
23 | return audio_info
24 |
25 |
26 | def _parse_audio_info(streaminfo):
27 | return {
28 | "channels": streaminfo.channels,
29 | "sample rate": streaminfo.sample_rate,
30 | "bit rate": streaminfo.bitrate,
31 | "precision": getattr(streaminfo, "bits_per_sample", None),
32 | "duration": int(streaminfo.length),
33 | }
34 |
35 |
36 | def check_hybrid(tags):
37 | """Check whether or not the release has mixed precisions/sample rate."""
38 | first_tag = next(iter(tags.values()))
39 | if not all(
40 | t["precision"] == first_tag["precision"]
41 | and t["sample rate"] == first_tag["sample rate"]
42 | for t in tags.values()
43 | ):
44 | click.secho(
45 | "Release has mixed bit depths / sample rates. Flagging as hybrid.",
46 | fg="yellow",
47 | )
48 | return True
49 | return False
50 |
51 |
52 | def recompress_path(path):
53 | """Recompress all flacs in the directory to the configured compression level."""
54 | files = get_audio_files(path)
55 | if not files or not all(".flac" in f for f in files):
56 | return click.secho("No flacs found to recompress. Skipping...", fg="red")
57 | for filename in files:
58 | filepath = os.path.join(path, filename)
59 | compress(filepath)
60 |
--------------------------------------------------------------------------------
/salmon/tagger/combine.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from itertools import chain
3 |
4 | from salmon.common import re_strip
5 | from salmon.errors import TrackCombineError
6 | from salmon.tagger.sources import METASOURCES
7 | from salmon.tagger.sources.base import generate_artists
8 |
9 | PREFERENCES = [
10 | "Tidal",
11 | "Deezer",
12 | "Bandcamp",
13 | "MusicBrainz",
14 | "iTunes",
15 | "Junodownload",
16 | "Discogs",
17 | "Beatport",
18 | ]
19 |
20 |
21 | def get_source_from_link(url):
22 | for name, source in METASOURCES.items():
23 | if source.Scraper.regex.match(url):
24 | return name
25 |
26 |
27 | def combine_metadatas(*metadatas, base=None): # noqa: C901
28 | """
29 | This function takes a bunch of chosen metadata and splices
30 | together values to form one unified metadata dictionary.
31 | It runs through them in the order of the sources specified in
32 | the PREFERENCES list. Nonexistent data is replaced by existing
33 | data, and some are combined, like release comments. Due to this,
34 | it's fairly important that the base metadata contain the correct
35 | number of tracks.
36 | """
37 | url_sources = set()
38 | if base and base.get("url", False):
39 | url_sources.add(get_source_from_link(base["url"]))
40 |
41 | sources = sort_metadatas(metadatas)
42 | for pref in PREFERENCES:
43 | for metadata in sources[pref]:
44 | if not base:
45 | base = metadata
46 | if base.get("url", False):
47 | url_sources.add(get_source_from_link(base["url"]))
48 | continue
49 |
50 | base["genres"] += metadata["genres"]
51 |
52 | try:
53 | base["tracks"] = combine_tracks(base["tracks"], metadata["tracks"])
54 | except TrackCombineError:
55 | pass
56 |
57 | if (
58 | (not base["catno"] or not base["label"])
59 | and metadata["label"]
60 | and metadata["catno"]
61 | and (
62 | not base["label"]
63 | or any(w in metadata["label"] for w in base["label"].split())
64 | )
65 | ):
66 | base["label"] = metadata["label"]
67 | base["catno"] = metadata["catno"]
68 |
69 | if metadata["comment"]:
70 | if not base["comment"]:
71 | base["comment"] = metadata["comment"]
72 | else:
73 | base["comment"] += f'\n\n{"-"*32}\n\n' + metadata["comment"]
74 |
75 | if not base["cover"]:
76 | base["cover"] = metadata["cover"]
77 | if not base["edition_title"]:
78 | base["edition_title"] = metadata["edition_title"]
79 | if not base["year"]:
80 | base["year"] = metadata["year"]
81 | if not base["group_year"] or (
82 | str(metadata["group_year"]).isdigit()
83 | and int(metadata["group_year"]) < int(base["group_year"])
84 | ):
85 | base["group_year"] = metadata["group_year"]
86 | if not base["date"]:
87 | base["date"] = metadata["date"]
88 | base["year"] = metadata["year"]
89 | base["group_year"] = metadata["group_year"]
90 | if not base["rls_type"] or base["rls_type"] == "Album":
91 | base["rls_type"] = metadata["rls_type"]
92 | if not base["upc"]:
93 | base["upc"] = metadata["upc"]
94 |
95 | if sources[pref] and "url" in sources[pref][0]:
96 | link_source = get_source_from_link(sources[pref][0]["url"])
97 | if link_source not in url_sources:
98 | base["urls"].append(sources[pref][0]["url"])
99 | url_sources.add(link_source)
100 |
101 | if "url" in base:
102 | del base["url"]
103 |
104 | base["artists"], base["tracks"] = generate_artists(base["tracks"])
105 | base["genres"] = list(set(base["genres"]))
106 | return base
107 |
108 |
109 | def sort_metadatas(metadatas):
110 | """Split the metadatas by source."""
111 | sources = defaultdict(list)
112 | for source, md in metadatas:
113 | sources[source].append(md)
114 | return sources
115 |
116 |
117 | def combine_tracks(base, meta):
118 | """Combine the metadata for the tracks of two different sources."""
119 | btracks = iter(chain.from_iterable([d.values() for d in base.values()]))
120 | for disc, tracks in meta.items():
121 | for num, track in tracks.items():
122 | try:
123 | btrack = next(btracks)
124 | except StopIteration:
125 | raise TrackCombineError(f"Disc {disc} track {num} does not exist.")
126 |
127 | if re_strip(track["title"]) != re_strip(btrack["title"]):
128 | continue
129 | base_artists = {(re_strip(a[0]), a[1]) for a in btrack["artists"]}
130 | btrack["artists"] = list(btrack["artists"])
131 | for a in track["artists"]:
132 | if (re_strip(a[0]), a[1]) not in base_artists:
133 | btrack["artists"].append(a)
134 | btrack["artists"] = check_for_artist_fragments(btrack["artists"])
135 | if track["explicit"]:
136 | btrack["explicit"] = True
137 | if not btrack["format"]:
138 | btrack["format"] = track["format"]
139 | if not btrack["isrc"]:
140 | btrack["isrc"] = track["isrc"]
141 | if not btrack["replay_gain"]:
142 | btrack["replay_gain"] = track["replay_gain"]
143 | btrack["title"] = track["title"]
144 | if track["tracktotal"] and track["disctotal"]:
145 | btrack["tracktotal"] = track["tracktotal"]
146 | btrack["disctotal"] = track["disctotal"]
147 | base[btrack["disc#"]][btrack["track#"]] = btrack
148 | return base
149 |
150 |
151 | def check_for_artist_fragments(artists):
152 | """Check for artists that may be a fragment of another artist in the release."""
153 | artist_set = {a for a, _ in artists}
154 | for a, i in artists.copy():
155 | for artist in artist_set:
156 | if a != artist and a in artist and len(a) > 1 and (a, i) in artists:
157 | artists.remove((a, i))
158 | return artists
159 |
--------------------------------------------------------------------------------
/salmon/tagger/cover.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 |
4 | import click
5 | import requests
6 |
7 | from salmon import config
8 |
9 |
10 | def download_cover_if_nonexistent(path, cover_url):
11 | for filename in os.listdir(path):
12 | if re.match(r"^(cover|folder)\.(jpe?g|png)$", filename, flags=re.IGNORECASE):
13 | return
14 | if cover_url:
15 | click.secho("\nDownloading cover image...", fg="yellow")
16 | _download_cover(path, cover_url)
17 |
18 |
19 | def _download_cover(path, cover_url):
20 | ext = os.path.splitext(cover_url)[1]
21 | c = "c" if config.LOWERCASE_COVER else "C"
22 | stream = requests.get(cover_url, stream=True)
23 | with open(os.path.join(path, f"{c}over{ext}"), "wb") as f:
24 | for chunk in stream.iter_content(chunk_size=5096):
25 | if chunk:
26 | f.write(chunk)
27 |
--------------------------------------------------------------------------------
/salmon/tagger/foldername.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import shutil
4 | from copy import copy
5 | from string import Formatter
6 |
7 | import click
8 |
9 | from salmon import config
10 | from salmon.common import strip_template_keys
11 | from salmon.constants import (
12 | BLACKLISTED_CHARS,
13 | BLACKLISTED_FULLWIDTH_REPLACEMENTS,
14 | )
15 | from salmon.errors import UploadError
16 |
17 |
18 | def rename_folder(path, metadata, check=True):
19 | """
20 | Create a revised folder name from the new metadata and present it to the
21 | user. Have them decide whether or not to accept the folder name.
22 | Then offer them the ability to edit the folder name in a text editor
23 | before the renaming occurs.
24 | """
25 | old_base = os.path.basename(path)
26 | new_base = generate_folder_name(metadata)
27 |
28 | if check:
29 | click.secho("\nRenaming folder...", fg="cyan", bold=True)
30 | click.echo(f"Old folder name : {old_base}")
31 | click.echo(f"New pending folder name: {new_base}")
32 | if not click.confirm(
33 | click.style(
34 | "\nWould you like to replace the original folder name?",
35 | fg="magenta",
36 | bold=True,
37 | ),
38 | default=True,
39 | ):
40 | return path
41 |
42 | new_base = _edit_folder_interactive(new_base)
43 |
44 | new_path = os.path.join(os.path.dirname(path), new_base)
45 | if os.path.isdir(new_path) and old_base != new_base:
46 | if not check or click.confirm(
47 | click.style(
48 | "A folder already exists with the new folder name, would you like to replace it?",
49 | fg="magenta",
50 | bold=True,
51 | ),
52 | default=True,
53 | ):
54 | shutil.rmtree(new_path)
55 | else:
56 | raise UploadError("New folder name already exists.")
57 | new_path_dirname = os.path.dirname(new_path)
58 | if not os.path.exists(new_path_dirname):
59 | os.makedirs(new_path_dirname)
60 | os.rename(path, new_path)
61 | click.secho(f"Renamed folder to {new_base}.", fg="yellow")
62 | return new_path
63 |
64 |
65 | def generate_folder_name(metadata):
66 | """
67 | Fill in the values from the folder template using the metadata, then strip
68 | away the unnecessary keys.
69 | """
70 | metadata = {**metadata, **{"artists": _compile_artist_str(metadata["artists"])}}
71 | template = config.FOLDER_TEMPLATE
72 | keys = [fn for _, fn, _, _ in Formatter().parse(template) if fn]
73 | for k in keys.copy():
74 | if not metadata.get(k):
75 | template = strip_template_keys(template, k)
76 | keys.remove(k)
77 | sub_metadata = _fix_format(metadata, keys)
78 | return template.format(
79 | **{k: _sub_illegal_characters(sub_metadata[k]) for k in keys}
80 | )
81 |
82 |
83 | def _compile_artist_str(artist_data):
84 | """Create a string to represent the main artists of the release."""
85 | artists = [a[0] for a in artist_data if a[1] == "main"]
86 | if len(artists) > config.VARIOUS_ARTIST_THRESHOLD:
87 | return config.VARIOUS_ARTIST_WORD
88 | c = ", " if len(artists) > 2 or "&" in "".join(artists) else " & "
89 | return c.join(sorted(artists))
90 |
91 |
92 | def _sub_illegal_characters(stri):
93 | if config.FULLWIDTH_REPLACEMENTS:
94 | for char, sub in BLACKLISTED_FULLWIDTH_REPLACEMENTS.items():
95 | stri = str(stri).replace(char, sub)
96 | return re.sub(BLACKLISTED_CHARS, config.BLACKLISTED_SUBSTITUTION, str(stri))
97 |
98 |
99 | def _fix_format(metadata, keys):
100 | """
101 | Add abbreviated encoding to format key when the format is not 'FLAC'.
102 | Helpful for 24 bit FLAC and MP3 320/V0 stuff.
103 |
104 | So far only 24 bit FLAC is supported, when I fix the script for MP3 i will add MP3 encodings.
105 | """
106 | sub_metadata = copy(metadata)
107 | if "format" in keys:
108 | if metadata["format"] == "FLAC" and metadata["encoding"] == "24bit Lossless":
109 | sub_metadata["format"] = "24bit FLAC"
110 | elif metadata["format"] == "MP3":
111 | enc = re.sub(r" \(VBR\)", "", str(metadata["encoding"]))
112 | sub_metadata["format"] = f"MP3 {enc}"
113 | if metadata["encoding_vbr"]:
114 | sub_metadata["format"] += " (VBR)"
115 | elif metadata["format"] == "AAC":
116 | enc = re.sub(r" \(VBR\)", "", metadata["encoding"])
117 | sub_metadata["format"] = f"AAC {enc}"
118 | if metadata["encoding_vbr"]:
119 | sub_metadata["format"] += " (VBR)"
120 | return sub_metadata
121 |
122 |
123 | def _edit_folder_interactive(foldername):
124 | """Allow the user to edit the pending folder name in a text editor."""
125 | if not click.confirm(
126 | click.style(
127 | "Is the new folder name acceptable? ([n] to edit)", fg="magenta", bold=True
128 | ),
129 | default=True,
130 | ):
131 | newname = click.edit(foldername)
132 | while True:
133 | if newname is None:
134 | return foldername
135 | elif re.search(BLACKLISTED_CHARS, newname):
136 | if not click.confirm(
137 | click.style(
138 | "Folder name contains invalid characters, retry?",
139 | fg="magenta",
140 | bold=True,
141 | ),
142 | default=True,
143 | ):
144 | exit()
145 | else:
146 | return newname.strip().replace("\n", "")
147 | newname = click.edit(foldername)
148 | return foldername
149 |
--------------------------------------------------------------------------------
/salmon/tagger/folderstructure.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import click
4 |
5 | from salmon import config
6 | from salmon.constants import ALLOWED_EXTENSIONS
7 | from salmon.errors import NoncompliantFolderStructure
8 |
9 |
10 | def check_folder_structure(path):
11 | """
12 | Run through every filesystem check that causes uploads to violate the rules
13 | or be rejected on the upload form. Verify that path lengths <180, that there
14 | are no zero length folders, and that the file extensions are valid.
15 | """
16 | while True:
17 | click.secho("\nChecking folder structure...", fg="cyan", bold=True)
18 | try:
19 | _check_path_lengths(path)
20 | _check_zero_len_folder(path)
21 | _check_extensions(path)
22 | return
23 | except NoncompliantFolderStructure:
24 | click.confirm(
25 | click.style(
26 | "You need to manually fix the issues present in the upload's folder? "
27 | "Send a [Y] once you have done so, or a [N] to abort.",
28 | fg="magenta",
29 | bold=True,
30 | ),
31 | default=False,
32 | abort=True,
33 | )
34 |
35 |
36 | def _check_path_lengths(path):
37 | """Verify that all path lenghts are <=180 characters."""
38 | offending_files, really_offending_files = [], []
39 | root_len = len(config.DOWNLOAD_DIRECTORY) + 1
40 | for root, _, files in os.walk(path):
41 | if len(os.path.abspath(root)) - root_len > 180:
42 | click.secho("A subfolder has a path length of >180 characters.", fg="red")
43 | raise NoncompliantFolderStructure
44 | for f in files:
45 | filepath = os.path.abspath(os.path.join(root, f))
46 | filepathlen = len(filepath) - root_len
47 | if filepathlen > 180:
48 | if filepathlen < 200:
49 | really_offending_files.append(filepath)
50 | else:
51 | offending_files.append(filepath)
52 |
53 | if really_offending_files:
54 | click.secho(
55 | "The following files exceed 180 characters in length, but cannot "
56 | "be safely truncated:",
57 | fg="red",
58 | bold=True,
59 | )
60 | for f in really_offending_files:
61 | click.echo(f" >> {f}")
62 | raise NoncompliantFolderStructure
63 |
64 | if not offending_files:
65 | return click.secho("No paths exceed 180 characters in length.", fg="green")
66 |
67 | click.secho(
68 | "The following exceed 180 characters in length, truncating...", fg="red"
69 | )
70 | for filepath in sorted(offending_files):
71 | filename, ext = os.path.splitext(filepath)
72 | newpath = filepath[: 178 - len(filename) - len(ext) + root_len] + ".." + ext
73 | os.rename(filepath, newpath)
74 | click.echo(f" >> {newpath}")
75 |
76 |
77 | def _check_zero_len_folder(path):
78 | """Verify that a zero length folder does not exist."""
79 | for root, _, files in os.walk(path):
80 | for filename in files:
81 | foldlist = os.path.join(root, filename)
82 | if "//" in foldlist:
83 | click.secho("A zero length folder exists in this directory.", fg="red")
84 | raise NoncompliantFolderStructure
85 | click.secho("No zero length folders were found.", fg="green")
86 |
87 |
88 | def _check_extensions(path):
89 | """Validate that all file extensions are valid."""
90 | mp3, aac, flac = [], [], []
91 | for root, _, files in os.walk(path):
92 | for fln in files:
93 | _, ext = os.path.splitext(fln.lower())
94 | if ext == ".mp3":
95 | mp3.append(fln)
96 | elif ext == ".flac":
97 | flac.append(fln)
98 | elif ext == ".m4a":
99 | aac.append(fln)
100 | elif ext not in ALLOWED_EXTENSIONS:
101 | _handle_bad_extension(os.path.join(root, fln))
102 |
103 | if len([li for li in [mp3, flac, aac] if li]) > 1:
104 | _handle_multiple_audio_exts()
105 | else:
106 | click.secho("File extensions have been validated.", fg="green")
107 |
108 |
109 | def _handle_bad_extension(filepath):
110 | while True:
111 | resp = click.prompt(
112 | f"{filepath} does not have an approved file extension. "
113 | "[D]elete, [A]bort, or [C]ontinue?",
114 | default="D",
115 | ).lower()
116 | if resp[0].lower() == "d":
117 | return os.remove(filepath)
118 | elif resp[0].lower() == "a":
119 | raise click.Abort
120 | elif resp[0].lower() == "c":
121 | return
122 |
123 |
124 | def _handle_multiple_audio_exts():
125 | while True:
126 | resp = click.prompt(
127 | "There are multiple audio codecs in this folder. " "[A]bort or [C]ontinue?",
128 | default="A",
129 | ).lower()
130 | if resp[0] == "a":
131 | raise click.Abort
132 | if resp[0] == "c":
133 | return
134 |
--------------------------------------------------------------------------------
/salmon/tagger/pre_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | from collections import defaultdict
4 | from copy import deepcopy
5 |
6 | import click
7 |
8 | from salmon.common import RE_FEAT, re_split
9 | from salmon.constants import FORMATS, TAG_ENCODINGS
10 |
11 | EMPTY_METADATA = {
12 | "artists": [],
13 | "title": None,
14 | "group_year": None,
15 | "year": None,
16 | "date": None,
17 | "edition_title": None,
18 | "label": None,
19 | "catno": None,
20 | "rls_type": None,
21 | "genres": [],
22 | "format": None,
23 | "encoding": None,
24 | "encoding_vbr": None,
25 | "source": None,
26 | "cover": None,
27 | "upc": None,
28 | "comment": None,
29 | "urls": [],
30 | "tracks": {},
31 | }
32 |
33 |
34 | def construct_rls_data(
35 | tags,
36 | audio_info,
37 | source,
38 | encoding,
39 | existing=None,
40 | overwrite=False,
41 | prompt_encoding=False,
42 | ):
43 | """Create the default release metadata from the tags."""
44 | if not existing:
45 | metadata = deepcopy(EMPTY_METADATA)
46 | tag_track = next(iter(tags.values()))
47 | metadata["title"] = tag_track.album or "None"
48 | if not overwrite:
49 | metadata["artists"] = construct_artists_li(tags)
50 | try:
51 | metadata["year"] = re.search(r"(\d{4})", str(tag_track.date))[1]
52 | except (ValueError, IndexError, TypeError):
53 | pass
54 | metadata["group_year"] = metadata["year"]
55 | metadata["upc"] = tag_track.upc
56 | metadata["label"] = tag_track.label
57 | metadata["catno"] = tag_track.catno
58 | metadata["genres"] = split_genres(tag_track.genre)
59 | metadata["tracks"] = create_track_list(tags, overwrite)
60 | else:
61 | metadata = {"artists": existing["artists"]}
62 | del existing["artists"]
63 | metadata = {**metadata, **existing}
64 | metadata["source"] = source
65 | metadata["format"] = parse_format(next(iter(tags.keys())))
66 |
67 | audio_track = next(iter(audio_info.values()))
68 | metadata["encoding"], metadata["encoding_vbr"] = parse_encoding(
69 | metadata["format"], audio_track, encoding, prompt_encoding
70 | )
71 | return metadata
72 |
73 |
74 | def construct_artists_li(tags):
75 | """Create a list of artists from the artist string."""
76 | artists = []
77 | for track in tags.values():
78 | if track.artist:
79 | artists += parse_artists(track.artist)
80 | return list(set(artists))
81 |
82 |
83 | def split_genres(genres_list):
84 | """Create a list of genres from splitting the string."""
85 | genres = set()
86 | if genres_list:
87 | for g in genres_list:
88 | for genre in re_split(g):
89 | genres.add(genre.strip())
90 | return list(genres)
91 |
92 |
93 | def parse_format(filename):
94 | return FORMATS[os.path.splitext(filename)[1].lower()]
95 |
96 |
97 | def parse_encoding(format_, track, supplied_encoding, prompt_encoding):
98 | """Get the encoding from the FLAC files, otherwise require the user to specify it."""
99 | if format_ == "FLAC":
100 | if track["precision"] == 16:
101 | return "Lossless", False
102 | elif track["precision"] == 24:
103 | return "24bit Lossless", False
104 | if supplied_encoding and list(supplied_encoding) != [None, None]:
105 | return supplied_encoding
106 | if prompt_encoding:
107 | return _prompt_encoding()
108 | click.secho(
109 | "An encoding must be specified if the files are not lossless.", fg="red"
110 | )
111 | raise click.Abort
112 |
113 |
114 | def create_track_list(tags, overwrite):
115 | """Generate the track data from each track tag."""
116 | tracks = defaultdict(dict)
117 | trackindex = 0
118 | for _, track in sorted(tags.items(), key=lambda k: k):
119 | trackindex += 1
120 | discnumber = track.discnumber or "1"
121 | tracknumber = track.tracknumber or str(trackindex)
122 | tracks[discnumber][tracknumber] = {
123 | "track#": tracknumber,
124 | "disc#": discnumber,
125 | "tracktotal": track.tracktotal,
126 | "disctotal": track.disctotal,
127 | "artists": parse_artists(track.artist),
128 | "title": track.title,
129 | "replay_gain": track.replay_gain,
130 | "peak": track.peak,
131 | "isrc": track.isrc,
132 | "explicit": None,
133 | "format": None,
134 | "streamable": None,
135 | }
136 | if overwrite:
137 | tracks[track.discnumber][track.tracknumber]["artists"] = []
138 | tracks[track.discnumber][track.tracknumber]["replay_gain"] = None
139 | tracks[track.discnumber][track.tracknumber]["peak"] = None
140 | tracks[track.discnumber][track.tracknumber]["isrc"] = None
141 | return dict(tracks)
142 |
143 |
144 | def parse_artists(artist_list):
145 | """Split the artists by common split characters, and aso accomodate features."""
146 | artists = []
147 | if not artist_list:
148 | artist_list = "none"
149 | if isinstance(artist_list, str):
150 | artist_list = [artist_list]
151 | for artist in artist_list:
152 | feat = RE_FEAT.search(artist)
153 | if feat:
154 | for a in re_split(feat[1]):
155 | artists.append((a, "guest"))
156 | artist = artist.replace(feat[0], "")
157 | remix = re.search(r" \(?remix(?:\.|ed|ed by)? ([^\)]+)\)?", artist)
158 | if remix:
159 | for a in re_split(remix[1]):
160 | artists.append((a, "remixer"))
161 | artist = artist.replace(remix[0], "")
162 | for a in re_split(artist):
163 | artists.append((a, "main"))
164 | return artists
165 |
166 |
167 | def _prompt_encoding():
168 | click.echo(f'\nValid encodings: {", ".join(TAG_ENCODINGS.keys())}')
169 | while True:
170 | enc = click.prompt(
171 | click.style(
172 | "What is the encoding of this release? [a]bort",
173 | fg="magenta",
174 | bold=True,
175 | ),
176 | default="",
177 | )
178 | try:
179 | return TAG_ENCODINGS[enc.upper()]
180 | except KeyError:
181 | if enc.lower().startswith("a"):
182 | raise click.Abort
183 | click.secho(f"{enc} is not a valid encoding.", fg="red")
184 |
--------------------------------------------------------------------------------
/salmon/tagger/sources/__init__.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | import click
4 |
5 | from salmon.errors import ScrapeError
6 | from salmon.tagger.sources import (
7 | bandcamp,
8 | beatport,
9 | deezer,
10 | discogs,
11 | itunes,
12 | junodownload,
13 | musicbrainz,
14 | tidal,
15 | )
16 |
17 | METASOURCES = {
18 | "MusicBrainz": musicbrainz,
19 | "iTunes": itunes,
20 | "Junodownload": junodownload,
21 | "Tidal": tidal,
22 | "Deezer": deezer,
23 | "Discogs": discogs,
24 | "Beatport": beatport,
25 | "Bandcamp": bandcamp, # Must be last due to the catch-all nature of its URLs.
26 | }
27 |
28 | loop = asyncio.get_event_loop()
29 |
30 |
31 | async def run_metadata(url, sources=None, return_source_name=False):
32 | """Run a scrape for the metadata of a URL"""
33 | sources = (
34 | METASOURCES
35 | if not sources
36 | else {name: source for name, source in METASOURCES.items() if name in sources}
37 | )
38 | for name, source in sources.items():
39 | if source.Scraper.regex.match(url):
40 | click.secho(f"Getting metadata from {name}.", fg="cyan")
41 | if return_source_name:
42 | return await source.Scraper().scrape_release(url), name
43 | return await source.Scraper().scrape_release(url)
44 | raise ScrapeError("URL did not match a scraper.")
45 |
--------------------------------------------------------------------------------
/salmon/tagger/sources/bandcamp.py:
--------------------------------------------------------------------------------
1 | import re
2 | from collections import defaultdict
3 | from datetime import datetime
4 |
5 | from salmon.common import RE_FEAT, fetch_genre, re_split
6 | from salmon.errors import GenreNotInWhitelist, ScrapeError
7 | from salmon.sources import BandcampBase
8 | from salmon.tagger.sources.base import MetadataMixin
9 |
10 |
11 | class Scraper(BandcampBase, MetadataMixin):
12 | def parse_release_title(self, soup):
13 | try:
14 | return soup.select("#name-section .trackTitle")[0].string.strip()
15 | except (TypeError, IndexError) as e:
16 | raise ScrapeError("Failed to parse scraped title.") from e
17 |
18 | def parse_cover_url(self, soup):
19 | try:
20 | return soup.select("#tralbumArt img")[0]["src"]
21 | except (TypeError, IndexError) as e:
22 | raise ScrapeError("Could not parse cover URL.") from e
23 |
24 | def parse_genres(self, soup):
25 | genres = set()
26 | try:
27 | for a in soup.select(".tralbumData.tralbum-tags a"):
28 | try:
29 | genres |= fetch_genre(a.string)
30 | except GenreNotInWhitelist:
31 | pass
32 | return genres
33 | except TypeError as e:
34 | raise ScrapeError("Could not parse genres.") from e
35 |
36 | def parse_release_year(self, soup):
37 | try:
38 | return int(re.search(r"(\d{4})", self.parse_release_date(soup))[1])
39 | except TypeError as e:
40 | raise ScrapeError("Could not parse release year.") from e
41 |
42 | def parse_release_date(self, soup):
43 | try:
44 | date = re.search(
45 | r"release(?:d|s) ([^\d]+ \d+, \d{4})",
46 | soup.select(".tralbumData.tralbum-credits")[0].text,
47 | )[1]
48 | return datetime.strptime(date, "%B %d, %Y").strftime("%Y-%m-%d")
49 | except (TypeError, IndexError) as e:
50 | raise ScrapeError("Could not parse release date.") from e
51 |
52 | def parse_release_label(self, soup):
53 | try:
54 | artist = soup.select('#name-section span[itemprop="byArtist"] a')[0].string
55 | label = soup.select("#band-name-location .title")[0].string
56 | if artist != label:
57 | return label
58 | except IndexError as e:
59 | raise ScrapeError("Could not parse record label.") from e
60 |
61 | def parse_tracks(self, soup):
62 | tracks = defaultdict(dict)
63 | artist = soup.select('#name-section span[itemprop="byArtist"] a')[0].string
64 | tracklist_scrape = soup.select("#track_table tr.track_row_view")
65 | various = all(
66 | " - " in t.select('.title-col span[itemprop="name"]')[0].string
67 | for t in tracklist_scrape
68 | )
69 | for track in tracklist_scrape:
70 | try:
71 | num = track.select(".track-number-col .track_number")[0].text.rstrip(
72 | "."
73 | )
74 | title = track.select('.title-col span[itemprop="name"]')[0].string
75 | tracks["1"][num] = self.generate_track(
76 | trackno=int(num),
77 | discno=1,
78 | artists=parse_artists(artist, title),
79 | title=parse_title(title, various=various),
80 | )
81 | except (ValueError, IndexError, TypeError) as e:
82 | raise ScrapeError("Could not parse tracks.") from e
83 | return dict(tracks)
84 |
85 |
86 | def parse_artists(artist, title):
87 | """
88 | Parse guest artists from the title and add them to the list
89 | of artists as guests.
90 | """
91 | feat_artists = RE_FEAT.search(title)
92 | artists = []
93 | if feat_artists:
94 | artists = [(a, "guest") for a in re_split(feat_artists[1])]
95 | try:
96 | if " - " not in title:
97 | raise IndexError
98 | track_artists = title.split(" - ", 1)[0]
99 | artists += [(a, "main") for a in re_split(track_artists)]
100 | except (IndexError, TypeError):
101 | if "various" not in artist.lower():
102 | artists += [(a, "main") for a in re_split(artist)]
103 | return artists
104 |
105 |
106 | def parse_title(title, various):
107 | """Strip featuring artists from title; they belong with artists."""
108 | if various and " - " in title:
109 | title = title.split(" - ", 1)[1]
110 | return RE_FEAT.sub("", title).rstrip()
111 |
--------------------------------------------------------------------------------
/salmon/tagger/sources/beatport.py:
--------------------------------------------------------------------------------
1 | import re
2 | from collections import defaultdict
3 |
4 | from salmon.errors import ScrapeError
5 | from salmon.sources import BeatportBase
6 | from salmon.tagger.sources.base import MetadataMixin
7 |
8 | SPLIT_GENRES = {
9 | "Leftfield House & Techno": {"Leftfield House", "Techno"},
10 | "Melodic House & Techno": {"Melodic House", "Techno"},
11 | "Electronica / Downtempo": {"Electronic", "Downtempo"},
12 | "Funk / Soul / Disco": {"Funk", "Soul", "Disco"},
13 | "Trap / Future Bass": {"Trap", "Future Bass"},
14 | "Indie Dance / Nu Disco": {"Indie Dance", "Nu Disco"},
15 | "Hardcore / Hard Techno": {"Hard Techno"},
16 | "Funky / Groove / Jackin' House": {"Funky", "Groove", "Jackin' House"},
17 | "Hip-Hop / R&B": {"Hip-Hop", "Rhythm & Blues"},
18 | "Minimal / Deep Tech": {"Minimal", "Deep Tech"},
19 | "Garage / Bassline / Grime": {"Garage", "Bassline", "Grime"},
20 | "Reggae / Dancehall / Dub": {"Reggae", "Dancehall", "Dub"},
21 | }
22 |
23 |
24 | class Scraper(BeatportBase, MetadataMixin):
25 | def parse_release_title(self, soup):
26 | return soup.h1.string
27 |
28 | def parse_cover_url(self, soup):
29 | res = soup.select("img.interior-release-chart-artwork")
30 | try:
31 | return res[0]["src"]
32 | except IndexError as e:
33 | raise ScrapeError("Could not parse cover self.url.") from e
34 |
35 | def parse_genres(self, soup):
36 | genres = {"Electronic"}
37 | tracks_sc = soup.select(
38 | ".bucket.tracks.interior-release-tracks .bucket-item.ec-item.track"
39 | )
40 | for track in tracks_sc:
41 | for a in track.select(".buk-track-genre a"):
42 | try:
43 | genres |= SPLIT_GENRES[a.string]
44 | except KeyError:
45 | genres.add(a.string)
46 | return genres
47 |
48 | def parse_release_year(self, soup):
49 | date = self.parse_release_date(soup)
50 | try:
51 | return int(re.search(r"(\d{4})", date)[1])
52 | except (TypeError, IndexError) as e:
53 | raise ScrapeError("Could not parse release year.") from e
54 |
55 | def parse_release_date(self, soup):
56 | ul = soup.select(".interior-release-chart-content-item--desktop li")
57 | try:
58 | return ul[0].select("span.value")[0].string
59 | except IndexError as e:
60 | raise ScrapeError("Could not parse release date.") from e
61 |
62 | def parse_release_label(self, soup):
63 | ul = soup.select(".interior-release-chart-content-item--desktop li")
64 | try:
65 | return ul[1].select("a")[0].string
66 | except IndexError as e:
67 | raise ScrapeError("Could not parse record label.") from e
68 |
69 | def parse_release_catno(self, soup):
70 | ul = soup.select(".interior-release-chart-content-item--desktop li")
71 | try:
72 | return ul[2].select("span.value")[0].string
73 | except IndexError as e:
74 | raise ScrapeError("Could not parse catalog number.") from e
75 |
76 | def parse_comment(self, soup):
77 | try:
78 | return soup.select(".interior-expandable-wrapper .interior-expandable")[
79 | 0
80 | ].text.strip()
81 | except IndexError:
82 | return None # Comment does not exist.
83 |
84 | def parse_tracks(self, soup):
85 | tracks = defaultdict(dict)
86 | cur_disc = 1
87 | tracks_sc = soup.select(
88 | ".bucket.tracks.interior-release-tracks " ".bucket-item.ec-item.track"
89 | )
90 | for track in tracks_sc:
91 | try:
92 | track_num = track.select(".buk-track-num")[0].string
93 | tracks[str(cur_disc)][track_num] = self.generate_track(
94 | trackno=track_num,
95 | discno=cur_disc,
96 | artists=parse_artists(track),
97 | title=parse_title(track),
98 | )
99 | except (ValueError, IndexError) as e:
100 | raise ScrapeError("Could not parse tracks.") from e
101 | return dict(tracks)
102 |
103 |
104 | def parse_title(track):
105 | """Add the remix string to the track title, as long as it's not OM."""
106 | title = track.select(".buk-track-primary-title")[0].string
107 | remix = track.select(".buk-track-remixed")
108 | if remix and remix[0].string != "Original Mix": # yw pootsu
109 | title += (
110 | f" ({remix[0].string})" # TODO: Move this into base class along with Tidal
111 | )
112 | return title
113 |
114 |
115 | def parse_artists(track):
116 | """Parse remixers and main artists; return a list of them."""
117 | artists, remixers = [], []
118 | for artist in [e.string for e in track.select(".buk-track-artists a")]:
119 | for split in re.split(" & |; | / ", artist):
120 | artists.append(split)
121 | for remixer in [e.string for e in track.select(".buk-track-remixers a")]:
122 | for split in re.split(" & |; | / ", remixer):
123 | remixers.append(split)
124 |
125 | return [
126 | *((name, "main") for name in artists),
127 | *((name, "remixer") for name in remixers),
128 | ]
129 |
--------------------------------------------------------------------------------
/salmon/tagger/sources/deezer.py:
--------------------------------------------------------------------------------
1 | import re
2 | from collections import defaultdict
3 | from html import unescape
4 |
5 | from salmon.common import RE_FEAT, parse_copyright, re_split
6 | from salmon.sources import DeezerBase
7 | from salmon.tagger.sources.base import MetadataMixin
8 |
9 | RECORD_TYPES = {
10 | "album": "Album",
11 | "ep": "EP",
12 | "single": "Single",
13 | }
14 |
15 |
16 | class Scraper(DeezerBase, MetadataMixin):
17 | def parse_release_title(self, soup):
18 | return RE_FEAT.sub("", soup["title"])
19 |
20 | def parse_cover_url(self, soup):
21 | return soup["cover_xl"]
22 |
23 | def parse_release_year(self, soup):
24 | try:
25 | return int(re.search(r"(\d{4})", soup["release_date"])[1])
26 | except TypeError:
27 | return None
28 | # raise ScrapeError('Could not parse release year.') from e
29 |
30 | def parse_release_date(self, soup):
31 | return soup["release_date"]
32 |
33 | def parse_release_label(self, soup):
34 | return parse_copyright(soup["label"])
35 |
36 | def parse_genres(self, soup):
37 | return {g["name"] for g in soup["genres"]["data"]}
38 |
39 | def parse_release_type(self, soup):
40 | try:
41 | return RECORD_TYPES[soup["record_type"]]
42 | except KeyError:
43 | return None
44 |
45 | def parse_upc(self, soup):
46 | return soup["upc"]
47 |
48 | def parse_tracks(self, soup):
49 | tracks = defaultdict(dict)
50 | for track in soup["tracklist"]:
51 | tracks[str(track["DISK_NUMBER"])][
52 | str(track["TRACK_NUMBER"])
53 | ] = self.generate_track(
54 | trackno=track["TRACK_NUMBER"],
55 | discno=track["DISK_NUMBER"],
56 | artists=self.parse_artists(
57 | track["SNG_CONTRIBUTORS"], track["ARTISTS"], track["SNG_TITLE"]
58 | ),
59 | title=self.parse_title(track["SNG_TITLE"], track.get("VERSION", None)),
60 | isrc=track["ISRC"],
61 | explicit=track["EXPLICIT_LYRICS"],
62 | stream_id=track["SNG_ID"],
63 | md5_origin=track.get("MD5_ORIGIN"),
64 | media_version=track.get("MEDIA_VERSION"),
65 | lossless=True,
66 | mp3_320=True,
67 | )
68 | return dict(tracks)
69 |
70 | def process_label(self, data):
71 | if isinstance(data["label"], str):
72 | if any(
73 | data["label"].lower() == a.lower() and i == "main"
74 | for a, i in data["artists"]
75 | ):
76 | return "Self-Released"
77 | return data["label"]
78 |
79 | def parse_artists(self, artists, default_artists, title):
80 | """
81 | Iterate over all artists and roles, returning a compliant list of
82 | artist tuples.
83 | """
84 | result = []
85 |
86 | feat = RE_FEAT.search(title)
87 | if feat:
88 | for artist in re_split(feat[1]):
89 | result.append((unescape(artist), "guest"))
90 |
91 | if artists:
92 | for a in artists.get("mainartist") or artists.get("main_artist", []):
93 | for b in re_split(a):
94 | if (b, "main") not in result:
95 | result.append((b, "main"))
96 | for a in artists.get("featuredartist", []):
97 | for b in re_split(a):
98 | if (b, "guest") not in result:
99 | result.append((b, "guest"))
100 | else:
101 | for artist in default_artists:
102 | for b in re_split(artist["ART_NAME"]):
103 | if (b, "main") not in result:
104 | result.append((b, "main"))
105 |
106 | return result
107 |
--------------------------------------------------------------------------------
/salmon/tagger/sources/discogs.py:
--------------------------------------------------------------------------------
1 | import re
2 | from collections import defaultdict
3 |
4 | from salmon.sources import DiscogsBase
5 | from salmon.tagger.sources.base import MetadataMixin
6 |
7 | VALID_EDITION_TITLES = {
8 | "Remastered",
9 | "Reissue",
10 | "Repress",
11 | "Club Edition",
12 | "Deluxe Edition",
13 | "Enhanced",
14 | "Limited Edition",
15 | "Mixed",
16 | "Partially Mixed",
17 | "Promo",
18 | "Special Edition",
19 | "Mono",
20 | "Quadraphonic",
21 | "Ambisonic",
22 | "Unofficial Release",
23 | }
24 |
25 | ROLES = {
26 | "Composed By": "composer",
27 | "Producer": "producer",
28 | "Featuring": "guest",
29 | "Vocals": "guest",
30 | "Featuring [Vocals]": "guest",
31 | "Remix": "remixer",
32 | }
33 |
34 | RELEASE_TYPES = {
35 | "Album": "Album",
36 | "Mini-Album": "Album",
37 | "EP": "EP",
38 | "Sampler": "EP",
39 | "Single": "Single",
40 | "Maxi-Single": "Single",
41 | "Compilation": "Compilation",
42 | "Mixtape": "Mixtape",
43 | }
44 |
45 | SOURCES = {
46 | "Vinyl": "Vinyl",
47 | "File": "WEB",
48 | "CD": "CD",
49 | }
50 |
51 |
52 | class Scraper(DiscogsBase, MetadataMixin):
53 | def parse_release_title(self, soup):
54 | return soup["title"]
55 |
56 | def parse_cover_url(self, soup):
57 | try:
58 | return soup["images"][0]["resource_url"]
59 | except (KeyError, IndexError):
60 | return None
61 |
62 | def parse_genres(self, soup):
63 | return set(soup["genres"])
64 |
65 | def parse_release_year(self, soup):
66 | return int(soup["year"])
67 |
68 | def parse_release_date(self, soup):
69 | if "released" in soup and re.match(r"\d{4}-\d{2}-\d{2}", soup["released"]):
70 | return soup["released"]
71 |
72 | def parse_edition_title(self, soup):
73 | if soup["formats"] and "descriptions" in soup["formats"][0]:
74 | return (
75 | " / ".join(
76 | [
77 | w
78 | for w in soup["formats"][0]["descriptions"]
79 | if any(v in w for v in VALID_EDITION_TITLES)
80 | ]
81 | )
82 | or None
83 | )
84 |
85 | def parse_release_label(self, soup):
86 | if soup["labels"]:
87 | return sanitize_artist_name(soup["labels"][0]["name"])
88 | return "Not On Label"
89 |
90 | def parse_release_catno(self, soup):
91 | if soup["labels"] and soup["labels"][0]["catno"] != "none":
92 | return soup["labels"][0]["catno"]
93 |
94 | def parse_release_type(self, soup):
95 | if "formats" in soup and soup["formats"]:
96 | if "descriptions" in soup["formats"][0]:
97 | try:
98 | return next(
99 | iter(
100 | RELEASE_TYPES[f]
101 | for f in soup["formats"][0]["descriptions"]
102 | if f in RELEASE_TYPES
103 | )
104 | )
105 | except StopIteration:
106 | return
107 |
108 | def parse_tracks(self, soup):
109 | tracks = defaultdict(dict)
110 | cur_disc = 1
111 | for track in soup["tracklist"]:
112 | if track["type_"] == "heading" and tracks:
113 | cur_disc += 1
114 | elif track["type_"] == "track":
115 | track_num = track["position"].upper()
116 | tracks[str(cur_disc)][track_num] = self.generate_track(
117 | trackno=track_num,
118 | discno=cur_disc,
119 | artists=parse_artists(soup["artists"], track),
120 | title=track["title"],
121 | )
122 | return dict(tracks)
123 |
124 |
125 | def parse_artists(artist_soup, track):
126 | """
127 | Generate the artists list from the artist dictionary provided with
128 | each track.
129 | """
130 | if "artists" in track:
131 | artists = [
132 | *((sanitize_artist_name(art["name"]), "main") for art in track["artists"])
133 | ]
134 | else:
135 | artists = [
136 | *(
137 | (sanitize_artist_name(art["name"]), "main")
138 | for art in artist_soup
139 | if art["name"] != "Various"
140 | )
141 | ]
142 | if "extraartists" in track:
143 | artists += [
144 | *(
145 | (sanitize_artist_name(art["name"]), ROLES[art["role"]])
146 | for art in track["extraartists"]
147 | if art["role"] in ROLES
148 | )
149 | ]
150 | for a, i in [
151 | (a, i) for a, i in artists if i != "main" and (a, "main") in artists
152 | ]:
153 | artists.remove((a, "main"))
154 | return artists
155 |
156 |
157 | def sanitize_artist_name(name):
158 | """
159 | Remove parenthentical number disambiguation bullshit from artist names,
160 | as well as the asterisk stuff.
161 | """
162 | name = re.sub(r" \(\d+\)$", "", name)
163 | return re.sub(r"\*+$", "", name)
164 |
165 |
166 | def parse_source(formats):
167 | """
168 | Take the list of format strings provided by Discogs and iterate over them
169 | to find a possible source for the release.
170 | """
171 | for format_s, source in SOURCES.items():
172 | if any(format_s in f for f in formats):
173 | return source
174 |
--------------------------------------------------------------------------------
/salmon/tagger/sources/junodownload.py:
--------------------------------------------------------------------------------
1 | import re
2 | from collections import defaultdict
3 | from datetime import datetime
4 |
5 | from salmon.common import re_split
6 | from salmon.errors import ScrapeError
7 | from salmon.sources import JunodownloadBase
8 | from salmon.tagger.sources.base import MetadataMixin
9 |
10 |
11 | class Scraper(JunodownloadBase, MetadataMixin):
12 | def parse_release_title(self, soup):
13 | try:
14 | return soup.select(".product-title a")[0].string
15 | except (TypeError, IndexError) as e:
16 | raise ScrapeError("Failed to parse scraped title.") from e
17 |
18 | def parse_cover_url(self, soup):
19 | try:
20 | return (
21 | soup.select(".img-release img")[0]["src"][::-1]
22 | .replace("MED"[::-1], "BIG"[::-1], 1)[::-1]
23 | .replace("/300/", "/full/")
24 | )
25 | except (TypeError, IndexError) as e:
26 | raise ScrapeError("Could not parse cover URL.") from e
27 |
28 | def parse_genres(self, soup):
29 | try:
30 | genre_str = re.sub(
31 | r"[^A-Za-z]+$", "", soup.select('meta[itemprop="genre"]')[0]["content"]
32 | )
33 | return {"Electronic", *(set(genre_str.split("/")))}
34 | except TypeError as e:
35 | raise ScrapeError("Could not parse genres.") from e
36 |
37 | def parse_release_year(self, soup):
38 | try:
39 | return int(re.search(r"(\d{4})", self.parse_release_date(soup))[1])
40 | except TypeError as e:
41 | raise ScrapeError("Could not parse release year.") from e
42 |
43 | def parse_release_date(self, soup):
44 | try:
45 | date = soup.select('span[itemprop="datePublished"]')[0].string.strip()
46 | return datetime.strptime(date, "%d %B, %Y").strftime("%Y-%m-%d")
47 | except (IndexError, AttributeError) as e:
48 | raise ScrapeError("Could not parse release date.") from e
49 |
50 | def parse_release_label(self, soup):
51 | try:
52 | return soup.select(".product-label a")[0].string
53 | except IndexError as e:
54 | raise ScrapeError("Could not parse record label.") from e
55 |
56 | def parse_release_catno(self, soup):
57 | try:
58 | catblob = soup.find_all('div', attrs={'class': 'mb-3'})[1]
59 | return (
60 | catblob.find('strong', text='Cat:')
61 | .next_sibling.strip()
62 | .replace(" ", "")
63 | )
64 | except IndexError as e:
65 | raise ScrapeError("Could not parse catalog number.") from e
66 |
67 | def parse_comment(self, soup):
68 | try:
69 | return soup.select('#product_release_note span[itemprop="reviewBody"]')[
70 | 0
71 | ].string
72 | except IndexError:
73 | return None
74 |
75 | def parse_tracks(self, soup):
76 | tracks = defaultdict(dict)
77 | cur_disc = 1
78 | for track in soup.find_all(
79 | 'div',
80 | attrs={
81 | 'class': 'row gutters-sm align-items-center product-tracklist-track'
82 | },
83 | ):
84 | try:
85 | num = track.text.strip().split(".", 1)[0]
86 | tobj = track.find('div', attrs={'class': 'col track-title'})
87 | title = tobj.find('a').text
88 | tracks[str(cur_disc)][num] = self.generate_track(
89 | trackno=(num),
90 | discno=cur_disc,
91 | artists=parse_artists(soup, track, title),
92 | title=parse_title(title, track),
93 | )
94 | except (ValueError, IndexError) as e:
95 | raise ScrapeError("Could not parse tracks.") from e
96 | return dict(tracks)
97 |
98 |
99 | def parse_title(title, track):
100 | """Parse the track title from the HTML."""
101 | try:
102 | artist = track.select('meta[itemprop="byArtist"]')[0]["content"]
103 | title = title.split(artist, 1)[1].lstrip(" -")
104 | except (TypeError, IndexError):
105 | pass
106 | # A bit convoluted so we can have `(feat artist - Club edit)` --> `- Club edit`
107 | return (
108 | re.sub(
109 | r"( -)? \(?(original mix|feat [^((?! - ).)]+|album mix)\)?",
110 | "",
111 | title,
112 | flags=re.IGNORECASE,
113 | ).strip()
114 | # .rstrip(")") Why was this here?
115 | )
116 |
117 |
118 | def parse_artists(soup, track, title):
119 | """
120 | Parse the per-track artists from the tracks or the header."""
121 | raw_rls_arts = [
122 | s.string
123 | for s in soup.select("#topbar_bread h1 a")
124 | if "/artists/" in s["href"] and s.string
125 | ] or [s.string.title() for s in soup.select("#product_heading_artist a")]
126 |
127 | artists = []
128 | for art in raw_rls_arts:
129 | for split in re_split(art):
130 | artists.append(split)
131 |
132 | try:
133 | artists = split_artists(
134 | track.select('meta[itemprop="byArtist"]')[0]["content"], artists
135 | )
136 | except (TypeError, IndexError):
137 | artists = [(a, "main") for a in artists]
138 |
139 | guests = re.search(r"[Ff]eat\.? ([^\)]+)", title)
140 | if guests:
141 | artists += [
142 | (re.sub(r"( -)? .+? (mix|edit)", "", a, flags=re.IGNORECASE), "guest")
143 | for a in re_split(guests[1])
144 | ]
145 | return artists
146 |
147 |
148 | def split_artists(artist, rls_artists):
149 | """
150 | Split an artist string by known delimiter characters and compare them
151 | to the album artists. If any release artists match the split artists,
152 | return the release artists that matched.
153 | """
154 | art_li = [a.strip() for a in re_split(artist) if a]
155 | rls_artists = [a.lower() for a in rls_artists]
156 | return [(r, "main") for r in art_li]
157 |
--------------------------------------------------------------------------------
/salmon/tagger/sources/musicbrainz.py:
--------------------------------------------------------------------------------
1 | import re
2 | from collections import defaultdict
3 |
4 | import musicbrainzngs
5 |
6 | from salmon.errors import ScrapeError
7 | from salmon.sources import MusicBrainzBase
8 | from salmon.tagger.sources.base import MetadataMixin
9 |
10 | RELEASE_TYPES = {
11 | "Album": "Album",
12 | "Single": "Single",
13 | "EP": "EP",
14 | "Compilation": "Compilation",
15 | "Soundtrack": "Soundtrack",
16 | "Interview": "Interview",
17 | "Live": "Live album",
18 | "Remix": "Remix",
19 | "DJ-mix": "DJ Mix",
20 | "Mixtape/Street": "Mixtape",
21 | }
22 |
23 |
24 | musicbrainzngs.set_useragent("salmon", "1.0", "noreply@salm.on")
25 |
26 |
27 | class Scraper(MusicBrainzBase, MetadataMixin):
28 | def parse_release_title(self, soup):
29 | return soup["title"]
30 |
31 | def parse_cover_url(self, soup):
32 | if soup["cover-art-archive"] and soup["cover-art-archive"]["front"] == "true":
33 | try:
34 | r = musicbrainzngs.get_image_list(soup["id"])
35 | except musicbrainzngs.musicbrainz.ResponseError:
36 | return None
37 |
38 | for image in r["images"]:
39 | if image["approved"] and image["front"]:
40 | return image["image"]
41 | return None
42 |
43 | def parse_release_year(self, soup):
44 | date = self.parse_release_date(soup)
45 | try:
46 | return int(re.search(r"(\d{4})", date)[1])
47 | except (TypeError, IndexError):
48 | return None
49 |
50 | def parse_release_date(self, soup):
51 | try:
52 | return soup["release-event-list"][0]["date"]
53 | except (KeyError, IndexError):
54 | return None
55 |
56 | def parse_release_group_year(self, soup):
57 | try:
58 | return re.search(r"(\d{4})", soup["release-group"]["first-release-date"])[1]
59 | except (KeyError, IndexError, TypeError):
60 | return self.parse_release_year(soup)
61 |
62 | def parse_release_label(self, soup):
63 | try:
64 | return soup["label-info-list"][0]["label"]["name"]
65 | except (KeyError, IndexError):
66 | return None
67 |
68 | def parse_release_catno(self, soup):
69 | try:
70 | return soup["label-info-list"][0]["catalog-number"]
71 | except (KeyError, IndexError):
72 | return None
73 |
74 | def parse_release_type(self, soup):
75 | try:
76 | return RELEASE_TYPES[soup["release-group"]["type"]]
77 | except KeyError:
78 | return None
79 |
80 | def parse_tracks(self, soup):
81 | tracks = defaultdict(dict)
82 | for disc in soup["medium-list"]:
83 | for track in disc["track-list"]:
84 | try:
85 | tracks[str(disc["position"])][
86 | str(track["number"])
87 | ] = self.generate_track(
88 | trackno=track["number"],
89 | discno=disc["position"],
90 | artists=parse_artists(track["recording"]["artist-credit"]),
91 | title=track["recording"]["title"],
92 | )
93 | except (ValueError, IndexError) as e:
94 | raise ScrapeError("Could not parse tracks.") from e
95 | return dict(tracks)
96 |
97 |
98 | def parse_artists(artist_credits):
99 | """
100 | Create the artists list from the given list of artists. MusicBrainz does
101 | some weird bullshit for guests, where it will separate the big list of
102 | artists with the string ' feat. ', after which point all of the artists are guests.
103 | """
104 | artists = []
105 | is_guest = False
106 | for artist in artist_credits:
107 | if artist == " feat. ":
108 | is_guest = True
109 | elif isinstance(artist, dict):
110 | artists.append((artist["artist"]["name"], "guest" if is_guest else "main"))
111 | return artists
112 |
--------------------------------------------------------------------------------
/salmon/tagger/sources/tidal.py:
--------------------------------------------------------------------------------
1 | import re
2 | from collections import defaultdict
3 | from html import unescape
4 |
5 | from salmon.common import RE_FEAT, parse_copyright, re_split
6 | from salmon.errors import ScrapeError
7 | from salmon.sources import TidalBase
8 | from salmon.tagger.sources.base import MetadataMixin
9 |
10 | ROLES = {
11 | "MAIN": "main",
12 | "FEATURED": "guest",
13 | }
14 |
15 |
16 | class Scraper(TidalBase, MetadataMixin):
17 |
18 | regex = re.compile(r"^https?://.*(?:tidal|wimpmusic)\.com.*\/(album)\/([0-9]+)")
19 |
20 | def parse_release_title(self, soup):
21 | return RE_FEAT.sub("", soup["title"])
22 |
23 | def parse_cover_url(self, soup):
24 | if not soup["cover"]:
25 | return None
26 | return self.image_url.format(album_id=soup["cover"].replace("-", "/"))
27 |
28 | def parse_release_year(self, soup):
29 | try:
30 | return int(re.search(r"(\d{4})", soup["releaseDate"])[1])
31 | except TypeError:
32 | return None
33 |
34 | def parse_release_date(self, soup):
35 | date = soup["releaseDate"]
36 | if not date or date.endswith("01-01") and int(date[:4]) < 2013:
37 | return None
38 | return date
39 |
40 | def parse_release_label(self, soup):
41 | return parse_copyright(soup["copyright"])
42 |
43 | def parse_upc(self, soup):
44 | return soup["upc"]
45 |
46 | def parse_tracks(self, soup):
47 | tracks = defaultdict(dict)
48 | for track in soup["tracklist"]:
49 | tracks[str(track["volumeNumber"])][
50 | str(track["trackNumber"])
51 | ] = self.generate_track(
52 | trackno=track["trackNumber"],
53 | discno=track["volumeNumber"],
54 | artists=self.parse_artists(
55 | track["artists"], track["title"], track["id"]
56 | ),
57 | title=self.parse_title(track["title"], track["version"]),
58 | replay_gain=track["replayGain"],
59 | peak=track["peak"],
60 | isrc=track["isrc"],
61 | explicit=track["explicit"],
62 | format_=track["audioQuality"],
63 | stream_id=track["id"],
64 | streamable=track["allowStreaming"],
65 | )
66 | return dict(tracks)
67 |
68 | def process_label(self, data):
69 | if isinstance(data["label"], str):
70 | if any(
71 | data["label"].lower() == a.lower() and i == "main"
72 | for a, i in data["artists"]
73 | ):
74 | return "Self-Released"
75 | return data["label"]
76 |
77 | def parse_artists(self, artists, title, track_id): # noqa: C901
78 | """
79 | Iterate over all artists and roles, returning a compliant list of
80 | artist tuples.
81 | """
82 | result = []
83 | artist_set = set()
84 |
85 | feat = RE_FEAT.search(title)
86 | if feat:
87 | for artist in re_split(feat[1]):
88 | result.append((unescape(artist), "guest"))
89 | artist_set.add(unescape(artist).lower())
90 |
91 | remix_str = ""
92 | remixer_str = re.search(r" \((.*) [Rr]emix\)", title)
93 | if remixer_str:
94 | remix_str = unescape(remixer_str[1]).lower()
95 |
96 | all_guests = all(a["type"] == "FEATURED" for a in artists)
97 | for artist in artists:
98 | for a in re_split(artist["name"]):
99 | feat = RE_FEAT.search(a)
100 | if feat:
101 | for artist_ in re_split(feat[1]):
102 | result.append((unescape(artist_), "guest"))
103 | artist_set.add(unescape(artist_).lower())
104 | a = re.sub(feat[0] + "$", "", a).rstrip()
105 | if artist["type"] in ROLES and unescape(a).lower() not in artist_set:
106 | if unescape(a).lower() in remix_str:
107 | result.append((unescape(a), "remixer"))
108 | elif all_guests:
109 | result.append((unescape(a), "main"))
110 | else:
111 | result.append((unescape(a), ROLES[artist["type"]]))
112 | artist_set.add(unescape(a).lower())
113 |
114 | if "mix" in title.lower(): # Get contributors for (re)mixes.
115 | attempts = 0
116 | while True:
117 | try:
118 | artists = self.get_json_sync(
119 | f"/tracks/{track_id}/contributors",
120 | params={"countryCode": self.country_code, "limit": 25},
121 | )["items"]
122 | break
123 | except ScrapeError:
124 | attempts += 1
125 | if attempts > 3:
126 | break
127 | for artist in artists:
128 | if (
129 | artist["role"] == "Remixer"
130 | and artist["name"].lower() not in artist_set
131 | ):
132 | result.append((unescape(artist["name"]), "remixer"))
133 | artist_set.add(artist["name"].lower())
134 |
135 | # In case something is fucked, have a failsafe of returning all artists.
136 | return result if result else [(unescape(a["name"]), "main") for a in artists]
137 |
--------------------------------------------------------------------------------
/salmon/tagger/tagfile.py:
--------------------------------------------------------------------------------
1 | import click
2 | import mutagen
3 | from mutagen import id3
4 |
5 | TAG_FIELDS = {
6 | "FLAC": {
7 | "album": "album",
8 | "date": "date",
9 | "upc": "upc",
10 | "label": "label",
11 | "catno": "catalognumber",
12 | "genre": "genre",
13 | "tracknumber": "tracknumber",
14 | "discnumber": "discnumber",
15 | "tracktotal": "tracktotal",
16 | "disctotal": "disctotal",
17 | "artist": "artist",
18 | "title": "title",
19 | "replay_gain": "replaygain_track_gain",
20 | "peak": "replaygain_track_peak",
21 | "isrc": "isrc",
22 | "comment": "comment",
23 | "albumartist": "albumartist",
24 | },
25 | "MP3": {
26 | "album": ["TALB"],
27 | "date": ["TDRC", "TYER"],
28 | "label": ["TPUB"],
29 | "genre": ["TCON"],
30 | "tracknumber": ["TRCK"], # Special
31 | "tracktotal": ["TRCK"],
32 | "discnumber": ["TPOS"],
33 | "disctotal": ["TPOS"],
34 | "artist": ["TPE1"],
35 | "title": ["TIT2"],
36 | "isrc": ["TSRC"],
37 | "comment": ["COMM"],
38 | "albumartist": ["TPE2"],
39 | },
40 | "AAC": {
41 | "album": ["\xa9alb"],
42 | "date": ["\xa9day"],
43 | "genre": ["\xa9gen"],
44 | "tracknumber": ["trkn"],
45 | "tracktotal": ["trkn"],
46 | "discnumber": ["disk"],
47 | "disctotal": ["disk"],
48 | "artist": ["\xa9ART"],
49 | "title": ["\xa9nam"],
50 | "comment": ["\xa9cmt"],
51 | "albumartist": ["aART"],
52 | },
53 | }
54 |
55 |
56 | class TagFile:
57 | def __init__(self, filepath):
58 | super().__setattr__("mut", mutagen.File(filepath))
59 |
60 | def __getattr__(self, attr):
61 | try:
62 | if isinstance(self.mut, mutagen.flac.FLAC):
63 | if attr in {"artist", "genre"}:
64 | return list(self.mut[TAG_FIELDS["FLAC"][attr]]) or []
65 | return "; ".join(self.mut[TAG_FIELDS["FLAC"][attr]]) or None
66 | elif isinstance(self.mut, mutagen.mp3.MP3):
67 | return self.parse_tag(attr, "MP3")
68 | elif isinstance(self.mut, mutagen.mp4.MP4):
69 | tag = self.parse_tag(attr, "AAC")
70 | return tag
71 | except KeyError:
72 | return None
73 |
74 | def parse_tag(self, attr, format):
75 | fields = TAG_FIELDS[format][attr]
76 | for field in fields:
77 | try:
78 | if attr in {"tracknumber", "tracktotal", "discnumber", "disctotal"}:
79 | try:
80 | val = str(self.mut.tags[field].text[0])
81 | if "number" in attr:
82 | return val.split("/")[0]
83 | elif "total" in attr and "/" in val:
84 | return val.split("/")[1]
85 | except (AttributeError, KeyError):
86 | number, total = self.mut.tags[field][0]
87 | return (number if "number" in attr else total) or None
88 | try:
89 | if attr in {"artist", "genre"}:
90 | try:
91 | return list(self.mut.tags[field].text) or []
92 | except AttributeError:
93 | return list(self.mut.tags[field]) or []
94 | try:
95 | return "; ".join(self.mut.tags[field].text) or None
96 | except AttributeError:
97 | return self.mut.tags[field][0] or None
98 | except TypeError:
99 | return self.mut.tags[field].text[0].get_text()
100 | except KeyError:
101 | pass
102 | return None
103 |
104 | def __setattr__(self, key, value):
105 | try:
106 | if isinstance(self.mut, mutagen.flac.FLAC):
107 | self.mut.tags[TAG_FIELDS["FLAC"][key]] = value
108 | elif isinstance(self.mut, mutagen.mp3.MP3):
109 | self.set_mp3_tag(key, value)
110 | elif isinstance(self.mut, mutagen.mp4.MP4):
111 | self.set_aac_tag(key, value)
112 | except KeyError:
113 | return super().__setattr__(key, value)
114 |
115 | def set_mp3_tag(self, key, value):
116 | if not self.mut.tags:
117 | self.mut.tags = mutagen.id3.ID3()
118 | if key in {"tracknumber", "discnumber"}:
119 | tag_key = TAG_FIELDS["MP3"][key][0]
120 | try:
121 | _, total = self.mut.tags[tag_key].text[0].split("/")
122 | value = f"{value}/{total}"
123 | except (ValueError, KeyError):
124 | pass
125 | frame = getattr(id3, tag_key)(text=value)
126 | self.mut.tags.delall(tag_key)
127 | self.mut.tags.add(frame)
128 | elif key in {"tracktotal", "disctotal"}:
129 | tag_key = TAG_FIELDS["MP3"][key][0]
130 | try:
131 | track, _ = self.mut.tags[tag_key].text[0].split("/")
132 | except ValueError:
133 | track = self.mut.tags[tag_key].text[0]
134 | except KeyError: # Well fuck...
135 | return
136 | frame = getattr(id3, tag_key)(text=f"{track}/{value}")
137 | self.mut.tags.delall(tag_key)
138 | self.mut.tags.add(frame)
139 | else:
140 | try:
141 | tag_key, desc = TAG_FIELDS["MP3"][key][0].split(":")
142 | frame = getattr(id3, tag_key)(desc=desc, text=value)
143 | self.mut.tags.add(frame)
144 | except ValueError:
145 | tag_key = TAG_FIELDS["MP3"][key][0]
146 | frame = getattr(id3, tag_key)(text=value)
147 | self.mut.tags.delall(tag_key)
148 | self.mut.tags.add(frame)
149 |
150 | def set_aac_tag(self, key, value):
151 | tag_key = TAG_FIELDS["AAC"][key][0]
152 | if key in {"tracknumber", "discnumber"}:
153 | try:
154 | _, total = self.mut.tags[tag_key][0]
155 | except (ValueError, KeyError):
156 | total = 0
157 | try:
158 | self.mut.tags[tag_key] = [(int(value), int(total))]
159 | except ValueError as e:
160 | click.secho("Can't have non-numeric AAC number tags, sorry!")
161 | raise e
162 | elif key in {"tracktotal", "disctotal"}:
163 | try:
164 | track, _ = self.mut.tags[tag_key][0]
165 | except (ValueError, KeyError): # fack
166 | return
167 | try:
168 | self.mut.tags[tag_key] = [(int(track), int(value))]
169 | except ValueError as e:
170 | click.secho("Can't have non-numeric AAC number tags, sorry!")
171 | raise e
172 | else:
173 | self.mut.tags[tag_key] = value
174 |
175 | def save(self):
176 | self.mut.save()
177 |
--------------------------------------------------------------------------------
/salmon/tagger/tags.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 |
4 | import click
5 | import mutagen
6 |
7 | from salmon import config
8 | from salmon.common import get_audio_files
9 | from salmon.tagger.tagfile import TagFile
10 |
11 | STANDARDIZED_TAGS = {
12 | "date": ["year"],
13 | "label": ["recordlabel", "organization", "publisher"],
14 | "catalognumber": ["labelno", "catalog #", "catno"],
15 | }
16 |
17 |
18 | def check_tags(path):
19 | """Get and then check the tags for problems. Offer user way to edit tags."""
20 | click.secho("\nChecking tags...", fg="yellow", bold=True)
21 | tags = gather_tags(path)
22 | if not tags:
23 | raise IndexError("No tracks were found.")
24 |
25 | check_required_tags(tags)
26 |
27 | if config.PROMPT_PUDDLETAG:
28 | print_a_tag(next(iter(tags.values())))
29 | if prompt_editor(path):
30 | tags = gather_tags(path)
31 |
32 | return tags
33 |
34 |
35 | def gather_tags(path):
36 | """Get the tags of each file."""
37 | tags = {}
38 | for filename in get_audio_files(path):
39 | tags[filename] = TagFile(os.path.join(path, filename))
40 | return tags
41 |
42 |
43 | def check_required_tags(tags):
44 | """Verify that every track has the required tag fields."""
45 | offending_files = []
46 | for fln, tags in tags.items():
47 | for t in ["title", "artist", "album", "tracknumber"]:
48 | missing = []
49 | if not getattr(tags, t, False):
50 | missing.add(t)
51 | if missing:
52 | offending_files.append(f'{fln} ({", ".join(missing)})')
53 |
54 | if offending_files:
55 | click.secho(
56 | "The following files do not contain all the required tags: {}.".format(
57 | ", ".join(offending_files)
58 | ),
59 | fg="red",
60 | )
61 | else:
62 | click.secho("Verified that all files contain the required tags.", fg="green")
63 |
64 |
65 | def print_a_tag(tags):
66 | """Print all tags in a tag set."""
67 | for key, value in tags.items():
68 | click.echo(f"> {key}: {value}")
69 |
70 |
71 | def prompt_editor(path):
72 | """Ask user whether or not to open the files in a tag editor."""
73 | if not click.confirm(
74 | click.style(
75 | "\nAre the above tags acceptable? ([n] to open in tag editor)",
76 | fg="magenta",
77 | bold=True,
78 | ),
79 | default=True,
80 | ):
81 | with open(os.devnull, "w") as devnull:
82 | subprocess.call(["puddletag", path], stdout=devnull, stderr=devnull)
83 | return True
84 | return False
85 |
86 |
87 | def standardize_tags(path):
88 | """
89 | Change ambiguously defined tags field values into the fields I arbitrarily
90 | decided are the ones this script will use.
91 | """
92 | for filename in get_audio_files(path):
93 | mut = mutagen.File(os.path.join(path, filename))
94 | found_aliased = set()
95 | for tag, aliases in STANDARDIZED_TAGS.items():
96 | for alias in aliases:
97 | if alias in mut.tags:
98 | mut.tags[tag] = mut.tags[alias]
99 | del mut.tags[alias]
100 | found_aliased.add(alias)
101 | if found_aliased:
102 | mut.save()
103 | click.secho(
104 | f"Unaliased the following tags for {filename}: "
105 | + ", ".join(found_aliased)
106 | )
107 |
--------------------------------------------------------------------------------
/salmon/trackers/__init__.py:
--------------------------------------------------------------------------------
1 | import click
2 | from urllib import parse
3 |
4 | from salmon import ConfigError, config
5 | from salmon.trackers import red, ops
6 |
7 | # hard coded as it needs to reflect the imports anyway.
8 | tracker_classes = {'RED': red.RedApi, 'OPS': ops.OpsApi}
9 | tracker_url_code_map = {'redacted.ch': 'RED', 'orpheus.network': 'OPS'}
10 |
11 | # tracker_list is used to offer the user choices. Generated if not specified in the config.
12 | if hasattr(config, 'TRACKER_LIST'):
13 | tracker_list = config.TRACKER_LIST
14 | else:
15 | tracker_list = []
16 | if hasattr(config, 'RED_SESSION'):
17 | tracker_list.append('RED')
18 | if hasattr(config, 'OPS_SESSION'):
19 | tracker_list.append('OPS')
20 | if len(tracker_list) == 0:
21 | raise ConfigError("You need a tracker session cookie in your config!")
22 |
23 |
24 | def get_class(site_code):
25 | "Returns the api class from the tracker string."
26 | return tracker_classes[site_code]
27 |
28 |
29 | def choose_tracker(choices):
30 | """Allows the user to choose a tracker from choices."""
31 | while True:
32 | # Loop until we have chosen a tracker or aborted.
33 | tracker_input = click.prompt(
34 | click.style(
35 | f'Your choices are {" , ".join(choices)} ' 'or [a]bort.',
36 | fg="magenta",
37 | bold=True,
38 | ),
39 | default=choices[0],
40 | )
41 | tracker_input = tracker_input.strip().upper()
42 | if tracker_input in choices:
43 | click.secho(f"Using tracker: {tracker_input}", fg="green")
44 | return tracker_input
45 | # this part allows input of the first letter of the tracker.
46 | elif tracker_input in [choice[0] for choice in choices]:
47 | for choice in choices:
48 | if tracker_input == choice[0]:
49 | click.secho(f"Using tracker: {choice}", fg="green")
50 | return choice
51 | elif tracker_input.lower().startswith("a"):
52 | click.secho("\nDone with this release.", fg="green")
53 | raise click.Abort
54 |
55 |
56 | def choose_tracker_first_time(question="Which tracker would you like to upload to?"):
57 | """Specific logic for the first time a tracker choice is offered.
58 | Uses default if there is one and uses the only tracker if there is only one."""
59 | choices = tracker_list
60 | if len(choices) == 1:
61 | click.secho(f"Using tracker: {choices[0]}")
62 | return choices[0]
63 | if config.DEFAULT_TRACKER:
64 | click.secho(f"Using tracker: {config.DEFAULT_TRACKER}", fg="green")
65 | return config.DEFAULT_TRACKER
66 | click.secho(question, fg="magenta", bold=True)
67 | tracker = choose_tracker(choices)
68 | return tracker
69 |
70 |
71 | def validate_tracker(ctx, param, value):
72 | """Only allow trackers in the config tracker dict.
73 | If it isn't there. Prompt to choose.
74 | """
75 | try:
76 | if value is None:
77 | return choose_tracker_first_time()
78 | if value.upper() in tracker_list:
79 | click.secho(f"Using tracker: {value.upper()}", fg="green")
80 | return value.upper()
81 | else:
82 | click.secho(f"{value} is not a tracker in your config.", fg="red")
83 | return choose_tracker(tracker_list)
84 | except AttributeError:
85 | raise click.BadParameter(
86 | "This flag requires a tracker. Possible sources are: "
87 | + ", ".join(tracker_list)
88 | )
89 |
90 |
91 | def validate_request(gazelle_site, request):
92 | """Check the request id is a url or number. and return the number.
93 | Should it check more? Currently not checking it is the right tracker.
94 | """
95 | try:
96 | if request is None:
97 | return None
98 | if request.strip().isdigit():
99 | pass
100 | elif (
101 | request.strip().lower().startswith(gazelle_site.base_url + "/requests.php")
102 | ):
103 | request = parse.parse_qs(parse.urlparse(request).query)['id'][0]
104 | click.secho(
105 | f"Attempting to fill {gazelle_site.base_url}/requests.php?action=view&id={request}",
106 | fg="green",
107 | )
108 | return request
109 | except (KeyError, AttributeError):
110 | raise click.BadParameter("This flag requires a request, either as a url or ID")
111 |
--------------------------------------------------------------------------------
/salmon/trackers/ops.py:
--------------------------------------------------------------------------------
1 | from salmon.trackers.base import BaseGazelleApi
2 |
3 | from salmon import config
4 | import click
5 | import requests
6 | from requests.exceptions import ConnectTimeout, ReadTimeout
7 |
8 |
9 | class OpsApi(BaseGazelleApi):
10 | def __init__(self):
11 | self.headers = {
12 | "Connection": "keep-alive",
13 | "Cache-Control": "max-age=0",
14 | "User-Agent": config.USER_AGENT,
15 | }
16 | self.site_code = 'OPS'
17 | self.base_url = 'https://orpheus.network'
18 | self.tracker_url = 'https://home.opsfet.ch'
19 | self.site_string = 'OPS'
20 | if config.OPS_DOTTORRENTS_DIR:
21 | self.dot_torrents_dir = config.OPS_DOTTORRENTS_DIR
22 | else:
23 | self.dot_torrents_dir = config.DOTTORRENTS_DIR
24 |
25 | self.cookie = config.OPS_SESSION
26 |
27 | self.session = requests.Session()
28 | self.session.headers.update(self.headers)
29 |
30 | self.authkey = None
31 | self.passkey = None
32 | self.authenticate()
33 |
--------------------------------------------------------------------------------
/salmon/trackers/red.py:
--------------------------------------------------------------------------------
1 | import click
2 | import requests
3 | import asyncio
4 | from requests.exceptions import ConnectTimeout, ReadTimeout
5 |
6 | from salmon.trackers.base import BaseGazelleApi
7 | from salmon import config
8 | from salmon.errors import (
9 | LoginError,
10 | RateLimitError,
11 | RequestError,
12 | RequestFailedError,
13 | )
14 |
15 | loop = asyncio.get_event_loop()
16 |
17 |
18 | class RedApi(BaseGazelleApi):
19 | def __init__(self):
20 | self.headers = {
21 | "Connection": "keep-alive",
22 | "Cache-Control": "max-age=0",
23 | "User-Agent": config.USER_AGENT,
24 | }
25 | self.site_code = 'RED'
26 | self.base_url = 'https://redacted.ch'
27 | self.tracker_url = 'https://flacsfor.me'
28 | self.site_string = 'RED'
29 | self.cookie = config.RED_SESSION
30 | if config.RED_API_KEY:
31 | self.api_key = config.RED_API_KEY
32 |
33 | if config.RED_DOTTORRENTS_DIR:
34 | self.dot_torrents_dir = config.RED_DOTTORRENTS_DIR
35 | else:
36 | self.dot_torrents_dir = config.DOTTORRENTS_DIR
37 |
38 | self.session = requests.Session()
39 | self.session.headers.update(self.headers)
40 |
41 | self.authkey = None
42 | self.passkey = None
43 | self.authenticate()
44 |
45 | async def report_lossy_master(self, torrent_id, comment, source):
46 | """Automagically report a torrent for lossy master/web approval.
47 | Use LWA if the torrent is web, otherwise LMA."""
48 |
49 | url = self.base_url + "/reportsv2.php"
50 | params = {"action": "takereport"}
51 | type_ = "lossywebapproval" if source == "WEB" else "lossyapproval"
52 | data = {
53 | "auth": self.authkey,
54 | "torrentid": torrent_id,
55 | "categoryid": 1,
56 | "type": type_,
57 | "extra": comment,
58 | "submit": True,
59 | }
60 | r = await loop.run_in_executor(
61 | None,
62 | lambda: self.session.post(
63 | url, params=params, data=data, headers=self.headers
64 | ),
65 | )
66 | if "torrents.php" in r.url:
67 | return True
68 | raise RequestError(
69 | f"Failed to report the torrent for lossy master, code {r.status_code}."
70 | )
71 |
--------------------------------------------------------------------------------
/salmon/uploader/preassumptions.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from html import unescape
3 |
4 | import click
5 |
6 | from salmon import config
7 | from salmon.errors import UploadError
8 |
9 | from salmon.errors import RequestError
10 |
11 | loop = asyncio.get_event_loop()
12 |
13 |
14 | def print_preassumptions(
15 | gazelle_site, path, group_id, source, lossy, spectrals, encoding, spectrals_after
16 | ):
17 | """Print what all the passed CLI options will do."""
18 | click.secho(f"\nProcessing {path}", fg="cyan", bold=True)
19 | second = []
20 | if source:
21 | second.append(f"from {source}")
22 | if list(encoding) != [None, None]:
23 | text = f"as {encoding[0]}"
24 | if encoding[1]:
25 | text += " (VBR)"
26 | second.append(text)
27 | if lossy is not None:
28 | second.append(f"with lossy master status as {lossy}")
29 | if second:
30 | click.secho(f'Uploading {" ".join(second)}.', fg="cyan")
31 | if spectrals:
32 | if spectrals == (0,):
33 | click.secho("Uploading no spectrals.", fg="yellow")
34 | else:
35 | click.secho(
36 | f'Uploading spectrals {", ".join(str(s) for s in spectrals)}.',
37 | fg="yellow",
38 | )
39 | if spectrals_after:
40 | click.secho(
41 | 'Assessing spectrals after upload.', fg="yellow",
42 | )
43 |
44 | if lossy and not spectrals:
45 | raise UploadError(
46 | "\nYou cannot report a torrent for lossy master without spectrals."
47 | )
48 |
49 | if group_id:
50 | print_group_info(gazelle_site, group_id, source)
51 | click.confirm(
52 | click.style(
53 | "\nWould you like to continue to upload to this group?",
54 | fg="magenta",
55 | bold=True,
56 | ),
57 | default=True,
58 | abort=True,
59 | )
60 |
61 |
62 | def print_group_info(gazelle_site, group_id, source):
63 | """
64 | Print information about the torrent group that was passed as a CLI argument.
65 | Also print all the torrents that are in that group.
66 | """
67 | try:
68 | group = loop.run_until_complete(gazelle_site.torrentgroup(group_id))
69 | except RequestError:
70 | raise UploadError("Could not get information about torrent group from RED.")
71 |
72 | artists = [a["name"] for a in group["group"]["musicInfo"]["artists"]]
73 | artists = ", ".join(artists) if len(artists) < 4 else config.VARIOUS_ARTIST_WORD
74 | click.secho(
75 | f"\nTorrents matching source {source} in (Group {group_id}) "
76 | f'{artists} - {group["group"]["name"]}:',
77 | fg="yellow",
78 | bold=True,
79 | )
80 |
81 | for t in group["torrents"]:
82 | if t["media"] == source:
83 | if t["remastered"]:
84 | click.echo(
85 | unescape(
86 | f"> {t['remasterYear']} / {t['remasterRecordLabel']} / "
87 | f"{t['remasterCatalogueNumber']} / {t['format']} / "
88 | f"{t['encoding']}"
89 | )
90 | )
91 | if not t["remastered"]:
92 | click.echo(
93 | unescape(
94 | f"> OR / {group['group']['recordLabel']} / "
95 | f"{group['group']['catalogueNumber']} / {t['format']} / "
96 | f"{t['encoding']}"
97 | )
98 | )
99 |
--------------------------------------------------------------------------------
/salmon/web/__init__.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from os.path import dirname, join
3 |
4 | import aiohttp
5 | import aiohttp_jinja2
6 | import click
7 | import jinja2
8 | from aiohttp_jinja2 import render_template
9 |
10 | from salmon import config
11 | from salmon.common import commandgroup
12 | from salmon.errors import WebServerIsAlreadyRunning
13 | from salmon.web import spectrals
14 |
15 | loop = asyncio.get_event_loop()
16 |
17 |
18 | @commandgroup.command()
19 | def web():
20 | """Start the salmon web server"""
21 | app = create_app() # noqa: F841
22 | click.secho(f"Running webserver on http://127.0.0.1:{config.WEB_PORT}", fg="cyan")
23 | loop.run_forever()
24 |
25 |
26 | def create_app():
27 | app = aiohttp.web.Application()
28 | add_routes(app)
29 | aiohttp_jinja2.setup(
30 | app, loader=jinja2.FileSystemLoader(join(dirname(__file__), "templates"))
31 | )
32 | return loop.run_until_complete(
33 | loop.create_server(app.make_handler(), host="127.0.0.1", port=config.WEB_PORT)
34 | )
35 |
36 |
37 | async def create_app_async():
38 | app = aiohttp.web.Application()
39 | add_routes(app)
40 | aiohttp_jinja2.setup(
41 | app, loader=jinja2.FileSystemLoader(join(dirname(__file__), "templates"))
42 | )
43 | runner = aiohttp.web.AppRunner(app)
44 | await runner.setup()
45 | site = aiohttp.web.TCPSite(runner, "127.0.0.1", config.WEB_PORT)
46 | try:
47 | await site.start()
48 | except OSError:
49 | raise WebServerIsAlreadyRunning
50 | return runner
51 |
52 |
53 | def add_routes(app):
54 | app.router.add_static(
55 | "/static", join(dirname(__file__), "static"), follow_symlinks=True
56 | )
57 | app.router.add_route("GET", "/", handle_index)
58 | app.router.add_route("GET", "/spectrals", spectrals.handle_spectrals)
59 | app["static_root_url"] = config.WEB_STATIC_ROOT_URL
60 |
61 |
62 | def handle_index(request, **kwargs):
63 | return render_template("index.html", request, {})
64 |
--------------------------------------------------------------------------------
/salmon/web/spectrals.py:
--------------------------------------------------------------------------------
1 | import sqlite3
2 | from itertools import chain
3 |
4 | import aiohttp
5 | from aiohttp_jinja2 import render_template
6 |
7 | from salmon.database import DB_PATH
8 |
9 |
10 | async def handle_spectrals(request, **kwargs):
11 | active_spectrals = get_active_spectrals()
12 | if active_spectrals:
13 | return render_template("spectrals.html", request, active_spectrals)
14 | return aiohttp.web.HTTPNrtFound()
15 |
16 |
17 | def set_active_spectrals(spectrals):
18 | with sqlite3.connect(DB_PATH) as conn:
19 | conn.row_factory = sqlite3.Row
20 | cursor = conn.cursor()
21 | cursor.execute("DELETE FROM spectrals")
22 | cursor.execute(
23 | "INSERT INTO spectrals (id, filename) VALUES "
24 | + ", ".join("(?, ?)" for _ in range(len(spectrals))),
25 | tuple(chain.from_iterable(list(spectrals.items()))),
26 | )
27 | conn.commit()
28 |
29 |
30 | def get_active_spectrals():
31 | with sqlite3.connect(DB_PATH) as conn:
32 | conn.row_factory = sqlite3.Row
33 | cursor = conn.cursor()
34 | cursor.execute("SELECT id, filename FROM spectrals ORDER BY ID ASC")
35 | return {"spectrals": {r["id"]: r["filename"] for r in cursor.fetchall()}}
36 |
--------------------------------------------------------------------------------
/salmon/web/static/css/main.css:
--------------------------------------------------------------------------------
1 | /* rich black #04080f
2 | * cyan azure #507dbc
3 | * baby blue eyes #a1c6ea
4 | * pale aqua #bbd1ea
5 | * platinum #dae3e5
6 | **/
7 |
8 | * {
9 | margin: 0;
10 | padding: 0;
11 | color: #04080f;
12 | font-family: "Lucida Console", Monaco, monospace;
13 | }
14 |
15 | body {
16 | background-color: #dae3e5;
17 | font-family: "Lucida Console", Monaco, monospace;
18 | }
19 |
20 | header {
21 | height: 50px;
22 | width: calc(100% - 40px);
23 | max-width: 1400px;
24 | margin: 0 auto;
25 | padding: 0 20px;
26 | font-family: "Lucida Console", Monaco, monospace;
27 | }
28 |
29 | .headerbg {
30 | background-color: #bbd1ea;
31 | height: 50px;
32 | width: 100%;
33 | position: absolute;
34 | top: 0;
35 | left: 0;
36 | z-index: -1;
37 | }
38 |
39 | header .logo {
40 | height: 100%;
41 | width: auto;
42 | }
43 |
44 | header .logo img {
45 | max-height: 100%;
46 | }
47 |
48 | div.body {
49 | max-width: 1400px;
50 | padding: 40px 20px;
51 | margin: 0 auto;
52 | font-family: "Lucida Console", Monaco, monospace;
53 | }
54 |
55 | footer {
56 | width: 95%;
57 | max-width: 1340px;
58 | padding: 10px 0;
59 | margin: 0 auto;
60 | border-top: 1px solid #507dbc;
61 | font-size: 11px;
62 | font-family: "Lucida Console", Monaco, monospace;
63 | }
64 | p {
65 | font-family: "Lucida Console", Monaco, monospace;
66 | }
67 |
68 | .centered {
69 | display: block;
70 | text-align: center;
71 | }
72 |
73 | .message {
74 | margin-bottom: 30px;
75 | font-family: "Lucida Console", Monaco, monospace;
76 | }
77 |
78 | .single_message{
79 | font-family: "Lucida Console", Monaco, monospace;
80 | }
81 |
82 | .track_spectrals {
83 | display: flex;
84 | flex-flow: horizontal nowrap;
85 | }
86 |
87 | .full_spectral_image {
88 | width: 86%;
89 | }
90 |
91 | .zoom_spectral_image {
92 | width: 13.85%;
93 | margin-left: 0.15%;
94 | }
95 |
96 | .full_spectral_image img, .zoom_spectral_image img, #lightbox img {
97 | max-width: 100%;
98 | max-height: 100%;
99 | }
100 |
101 | #lightbox {
102 | position: fixed;
103 | top: 0;
104 | left: 0;
105 | width: 100vw;
106 | height: 100vh;
107 | background: rgba(4, 8, 15, 0.8);
108 | transition: all 0.2s linear;
109 | }
110 |
111 | #lightbox img {
112 | display: block;
113 | max-height: 90vh;
114 | max-width: 90%;
115 | margin: 5vh auto;
116 | }
117 |
--------------------------------------------------------------------------------
/salmon/web/static/images/apple-touch-icon-114x114.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/apple-touch-icon-114x114.png
--------------------------------------------------------------------------------
/salmon/web/static/images/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/apple-touch-icon-120x120.png
--------------------------------------------------------------------------------
/salmon/web/static/images/apple-touch-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/apple-touch-icon-144x144.png
--------------------------------------------------------------------------------
/salmon/web/static/images/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/apple-touch-icon-152x152.png
--------------------------------------------------------------------------------
/salmon/web/static/images/apple-touch-icon-57x57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/apple-touch-icon-57x57.png
--------------------------------------------------------------------------------
/salmon/web/static/images/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/apple-touch-icon-60x60.png
--------------------------------------------------------------------------------
/salmon/web/static/images/apple-touch-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/apple-touch-icon-72x72.png
--------------------------------------------------------------------------------
/salmon/web/static/images/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/apple-touch-icon-76x76.png
--------------------------------------------------------------------------------
/salmon/web/static/images/favicon-128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/favicon-128.png
--------------------------------------------------------------------------------
/salmon/web/static/images/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/favicon-16x16.png
--------------------------------------------------------------------------------
/salmon/web/static/images/favicon-196x196.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/favicon-196x196.png
--------------------------------------------------------------------------------
/salmon/web/static/images/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/favicon-32x32.png
--------------------------------------------------------------------------------
/salmon/web/static/images/favicon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/favicon-96x96.png
--------------------------------------------------------------------------------
/salmon/web/static/images/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/favicon.ico
--------------------------------------------------------------------------------
/salmon/web/static/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/logo.png
--------------------------------------------------------------------------------
/salmon/web/static/images/mstile-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/mstile-144x144.png
--------------------------------------------------------------------------------
/salmon/web/static/images/mstile-150x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/mstile-150x150.png
--------------------------------------------------------------------------------
/salmon/web/static/images/mstile-310x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/mstile-310x150.png
--------------------------------------------------------------------------------
/salmon/web/static/images/mstile-310x310.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/mstile-310x310.png
--------------------------------------------------------------------------------
/salmon/web/static/images/mstile-70x70.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ghoto/smoked-salmon/b148e3140a103cfcb0c6ebb179343e36892d1b18/salmon/web/static/images/mstile-70x70.png
--------------------------------------------------------------------------------
/salmon/web/static/scripts/lightbox.js:
--------------------------------------------------------------------------------
1 | function zero_pad (nr) {
2 | return String(nr).length === 1 ? `0${nr}` : nr;
3 | }
4 |
5 | function clear_lightbox() {
6 | var lightbox = document.getElementById('lightbox');
7 | while (lightbox.firstChild) {
8 | lightbox.removeChild(lightbox.firstChild);
9 | }
10 | lightbox.style.display = 'none';
11 | }
12 |
13 |
14 | function show_lightbox(spec_id) {
15 | clear_lightbox();
16 | var lightbox = document.getElementById('lightbox');
17 |
18 | var static_dir = document.getElementById('static_dir').innerHTML;
19 | var url = `${static_dir}specs/${zero_pad(spec_id)} Zoom.png`;
20 | var img = document.createElement('img');
21 | img.setAttribute('src', url);
22 | img.setAttribute('id', 'lightbox_img');
23 |
24 | lightbox.appendChild(img);
25 | lightbox.style.display = 'block';
26 | lightbox.onclick = clear_lightbox;
27 | }
28 |
--------------------------------------------------------------------------------
/salmon/web/templates/index.html:
--------------------------------------------------------------------------------
1 | {% extends "layout.html" %}
2 | {% block title %}Index{% endblock %}
3 |
4 | {% block content %}
5 | Welcome to salmon's web interface!
6 | {% endblock %}
7 |
--------------------------------------------------------------------------------
/salmon/web/templates/layout.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | {% block head %}
5 |
6 |
7 | {% block title %}{% endblock %} :: Salmon
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 | {% endblock %}
31 |
32 |
33 |
34 |
35 |
36 |
 }})
37 |
38 |
39 |
40 |
41 |
42 | {{ static('') }}
43 |
44 |
45 |
46 | {% block content %}
47 | {% endblock %}
48 |
49 |
50 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/salmon/web/templates/spectrals.html:
--------------------------------------------------------------------------------
1 | {% extends "layout.html" %}
2 | {% block title %}Spectrals{% endblock %}
3 |
4 | {% block head %}
5 | {{ super() }}
6 |
7 | {% endblock %}
8 |
9 | {% block content %}
10 | Click any spectral to view an enlarged version of its zoom.
11 |
12 | {% for id, filename in spectrals.items() %}
13 |
{{ filename }}
14 |
15 |
16 |
 }}/{{ '%02d' | format(id) }} Full.png)
19 |
20 |
21 |
 }}/{{ '%02d' | format(id) }} Zoom.png)
24 |
25 |
26 | {% endfor %}
27 |
28 |
29 |
30 | {% endblock %}
31 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length: 88
3 | max-complexity: 16
4 | exclude: migrations
5 | ignore = E203 W503
6 |
7 | [isort]
8 | skip = migrations
9 | not_skip = __init__.py
10 | line_length = 79
11 | include_trailing_comma = true
12 | wrap_length = 1
13 | multi_line_output = 3
14 |
--------------------------------------------------------------------------------