├── .gitignore ├── requirements.txt ├── template.env ├── README.md └── main.py /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .vscode/ -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.11.1 2 | eyed3==0.9.6 3 | python-dotenv==0.20.0 4 | requests==2.27.1 5 | colorist==1.8.3 6 | -------------------------------------------------------------------------------- /template.env: -------------------------------------------------------------------------------- 1 | GENIUS_ACCESS_TOKEN="client access token goes here" 2 | 3 | HEADER="go to https://whatmyuseragent.com/ and copy the top field here." 4 | 5 | I_WANT_SYNCED_LYRICS=if this is not "True" then Lyricsify searches will not run. 6 | 7 | STATIC_WORKING_DIR="if you want to always run in the same folder, put the full path here. You can override this by manually entering a path at runtime." 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AutoLyricize 2 | 3 | > Automatically find and embed song lyrics. 4 | 5 | This script scans a specified directory for audio files, and for each file, finds lyrics from Lyricsify.com or Genius.com (as a fallback), and saves them to the file's metadata. 6 | 7 | Perfect for use with [Spotiflyer](https://github.com/Shabinder/SpotiFlyer/), [Retro Music Player](https://github.com/RetroMusicPlayer/RetroMusicPlayer), and [Syncthing](https://github.com/syncthing/syncthing) to reduce your dependence on music streaming services. 8 | 9 | ## Setup 10 | 11 | 1. Install [Python](https://www.python.org/). 12 | 2. Install dependencies by running `pip install -r requirements.txt`. 13 | 3. Copy `template.env` to a new file called `.env` and add a valid [Genius.com](https://docs.genius.com/) access token to it (or set the appropriate environment variable some other way). 14 | - If no token is provided, only [Lyricsify.com](https://www.lyricsify.com/) will be used as a data source. 15 | - Other useful fields and instructions can be found in the same file. 16 | 17 | ## Usage 18 | 19 | Run `python main.py "path/to/folder"`. 20 | *useful tip for noobs: you can drag a folder into the terminal window to paste its full path* 👍 21 | 22 | ## Limitations 23 | 24 | - Inexact search uses Genius's search system, which loves to give you results that have absolutely nothing to do with the original track. If you use inexact search with instrumental tracks or albums, you are going to get garbage data. 25 | - Only works with mp3 files due to a limitation in the eyed3 library. 26 | - Only supports one lyrics field in the metadata! The script will DELETE all the others. If you have multiple lyrics fields for different languages, kiss them goodbye. 27 | 28 | This script took a little under 3 hours to run for my 5000-song library. 29 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script scans a specified directory for audio files, and for each file, 3 | finds lyrics from Lyricsify.com or Genius.com (as a fallback), 4 | and saves them to the file's metadata. 5 | """ 6 | 7 | import sys 8 | import urllib 9 | import json 10 | from bs4 import BeautifulSoup 11 | import requests 12 | import os 13 | import re 14 | import eyed3 15 | from colorist import Color 16 | from dotenv import load_dotenv 17 | load_dotenv() 18 | 19 | 20 | 21 | 22 | 23 | 24 | def lyricsify_find_song_lyrics(query): 25 | """ 26 | Return song lyrics from Lyricsify.com for the first song found using the provided search string. 27 | If not found, return None. 28 | """ 29 | # Search Lyricsify for the song using web scraping 30 | global inexact 31 | inexact = 0 32 | link = BeautifulSoup( 33 | requests.get(url="https://www.lyricsify.com/lyrics/" + 34 | query.lower().replace( 35 | " - ", "/").replace(" ", "-"), 36 | headers={ 37 | "User-Agent": os.getenv("HEADER") 38 | }).text, 39 | "html.parser") 40 | divs = link.find_all("div", id=re.compile(r"lyrics_.*_details"))# The site obfuscates(?) the div name but we can bypass this with the power of regex 41 | 42 | # If not found, return None 43 | if divs is None: 44 | return None 45 | # Scrape the song html for the lyrics text 46 | try: song_html=str('\n'.join(str(divs[0]).split('\n')[1:-1]).replace("
","")) 47 | except: 48 | return None 49 | return(song_html[song_html.find("[ar: "):]) 50 | 51 | 52 | 53 | 54 | 55 | inexact_url="" 56 | def genius_find_song_lyrics(query, access_token): 57 | """ 58 | Return song lyrics from Genius.com for the first song found using the provided search string. 59 | If not found, return None. 60 | Requires a Genius.com access token. 61 | """ 62 | headers = { 63 | "User-Agent": os.getenv("HEADER"), 64 | "Authorization": "Bearer " + access_token, 65 | } 66 | results = json.loads(requests.get(url="https://api.genius.com/search?q=" + urllib.parse.quote(query), headers={ 67 | "Authorization": "Bearer " + access_token, 68 | "User-Agent": os.getenv("HEADER") 69 | }).text) 70 | if len(results["response"]["hits"]) <= 0: 71 | return None 72 | song = results["response"]["hits"][0]["result"] 73 | query_lower = query.lower() 74 | 75 | # Use Genius sucky search if you can and there's no exact match 76 | # Also sets variables to make this more transparent 77 | global inexact 78 | global inexact_url 79 | inexact = 0 80 | if query_lower.find(song["title"].lower()) <= 0 or query_lower.find(song["primary_artist"]["name"].lower()) <= 0: 81 | if requireexact == "y": 82 | return None 83 | inexact = 1 84 | inexact_url = song["url"] 85 | 86 | # Scrape the song URL for the lyrics text 87 | page = requests.get(song["url"], headers=headers) 88 | html = BeautifulSoup(page.text, "html.parser") 89 | if (html.find("div", id="cloudflare_content")): 90 | raise Exception( 91 | f"{Color.RED}Scraping encountered Cloudflare and cannot continue.{Color.OFF}") 92 | target_divs = html.find_all("div", {'data-lyrics-container': "true"}) 93 | lyrics = [] 94 | 95 | # Processing the fetched data 96 | for div in target_divs: 97 | if div is None: 98 | return None 99 | else: 100 | lyrics = "\n".join("\n".join(div.strings) for div in target_divs).split("\n") 101 | final_lyrics = "\n".join(lyrics) 102 | if final_lyrics == "": 103 | inexact = 0 104 | return "inst" 105 | final_lyrics = final_lyrics.replace("(\n","(").replace("\n)",")").replace(" \n"," ").replace("\n "," ").replace("\n]","]").replace("\n,",",").replace("\n'\n","\n'").replace("\n\n[","\n[").replace("\n[","\n\n[") 106 | # Removing unwanted line breaks lol 107 | return final_lyrics 108 | 109 | # Hyperlink on inexact match 110 | def link(uri, label=None): 111 | if label is None: 112 | label = uri 113 | parameters = '' 114 | # OSC 8 ; params ; URI ST OSC 8 ;; ST 115 | escape_mask = '\033]8;{};{}\033\\{}\033]8;;\033\\' 116 | return escape_mask.format(parameters, uri, label) 117 | 118 | 119 | 120 | 121 | # Start of the main script 122 | if (len(sys.argv) < 2) and not os.getenv("STATIC_WORKING_DIR"): 123 | raise NameError( 124 | "The song directory path has not been provided as a parameter.") 125 | if len(sys.argv) >= 2: 126 | song_dir = sys.argv[1] 127 | else: 128 | song_dir = os.getenv("STATIC_WORKING_DIR") 129 | 130 | 131 | # Resetting files 132 | try: os.remove('current.txt') 133 | except OSError: pass 134 | open('current.txt', 'a').close() 135 | try: os.remove('short.txt') 136 | except OSError: pass 137 | open('short.txt', 'a').close() 138 | inexact = 0 139 | 140 | 141 | # Tallying all the tracks 142 | with open('current.txt', 'a') as current, open('short.txt', 'a') as short: 143 | total = 0 144 | for folder, subs, files in os.walk(song_dir): 145 | for file in files: 146 | current.write(folder + '/' + file + '\n') 147 | short.write(file + '\n') 148 | total += 1 149 | 150 | 151 | # Environment variables and user input 152 | genius_access_token = os.getenv("GENIUS_ACCESS_TOKEN") 153 | if len(genius_access_token) == 0: 154 | genius_access_token = None 155 | if genius_access_token is None: 156 | print(f"{Color.YELLOW}The GENIUS_ACCESS_TOKEN environment variable has not been defined. Genius searches will not be conducted.{Color.OFF}") 157 | 158 | if total == 0: 159 | print("Directory is empty or does not exist.") 160 | else: 161 | yesno = [ "y", "n" ] 162 | overwrite = input("Overwrite current lyrics? y/N ").lower() 163 | if overwrite not in yesno: 164 | print(f"{Color.YELLOW}Interpreting unknown response as no{Color.OFF}") 165 | evenifunsynced = "n" 166 | elif overwrite == "y" and os.getenv("I_WANT_SYNCED_LYRICS") == "True": 167 | evenifunsynced = input("Even if the new ones are unsynced? y/N ").lower() 168 | if evenifunsynced != "y": 169 | print(f"{Color.YELLOW}Intepreting unknown response as no{Color.OFF}") 170 | evenifunsynced = "n" 171 | requireexact = input("Require exact artist and title? (Recommended with large folders!!!) Y/n ").lower() 172 | if requireexact not in yesno: 173 | print(f"{Color.YELLOW}Interpreting unknown response as yes{Color.OFF}") 174 | requireexact = "y" 175 | print("\n") 176 | # To suppress CRC check failed warnings - as a pre-existing CRC issue should not affect lyrics 177 | eyed3.log.setLevel("ERROR") 178 | with open('current.txt') as current: 179 | shlong = open("short.txt", 'r') 180 | short = shlong.readlines() 181 | 182 | 183 | for i, file in enumerate(current): 184 | try: audio_file = eyed3.load(file.strip()) 185 | except: 186 | print(str(i+1) + "\tof " + str(total) + f" : {Color.RED}Failed{Color.OFF} : File does not appear to exist : " + 187 | short[i].strip()) 188 | continue 189 | if audio_file is None: 190 | print(str(i+1) + "\tof " + str(total) + f" : {Color.RED}Failed{Color.OFF} : Unsupported file format : " + 191 | short[i].strip()) 192 | continue 193 | 194 | existing_lyrics = "" 195 | try: 196 | for lyric in audio_file.tag.lyrics: 197 | existing_lyrics += lyric.text 198 | except: 199 | existing_lyrics = "" 200 | if len(existing_lyrics) > 0 and overwrite != 'y': 201 | print(str(i+1) + "\tof " + str(total) + f" : {Color.YELLOW}Skipped{Color.OFF} : File already has lyrics : " + 202 | short[i].strip()) 203 | continue 204 | # Note: re.sub... removes anything in brackets - used for "(feat. ...) as this improves search results" 205 | try: 206 | query = re.sub(r" \[^]+\)", "", 207 | audio_file.tag.artist + " - " + audio_file.tag.title) 208 | except: 209 | query = re.sub(r" \[^]+\)", "", short[i].strip()[:-4]) 210 | print(str(i+1) + "\tof " + str(total) + f" : {Color.YELLOW}Warning{Color.OFF} : No info, setting query to filename : " + 211 | short[i].strip()) 212 | 213 | # Calling Lyricsify script 214 | if os.getenv("I_WANT_SYNCED_LYRICS") == "True": 215 | site_used = "Lyricsify" 216 | try: 217 | lyrics = lyricsify_find_song_lyrics(query) 218 | except Exception as e: 219 | print(f"{Color.RED}Error getting Lyricsify lyrics for: " + short[i].strip() + f"{Color.OFF}") 220 | raise e 221 | 222 | 223 | # Calling Genius script 224 | if lyrics is None and genius_access_token is not None and ( len(existing_lyrics) == 0 or evenifunsynced == "y" ): 225 | site_used = "Genius " 226 | try: 227 | lyrics = genius_find_song_lyrics(query, genius_access_token) 228 | except Exception as e: 229 | print(f"{Color.RED}Error getting Genius lyrics for: " + short[i].strip() + f"{Color.OFF}") 230 | raise e 231 | 232 | 233 | # Dealing with double lyrics tags. These were a pain in the ass 234 | if b'USLT' in audio_file.tag.frame_set and lyrics is not None : 235 | del audio_file.tag.frame_set[b'USLT'] 236 | audio_file.tag.save() # Utterly villainous way to delete the previous lyrics 237 | # If this throws an error you should run print(audio_file.tag.frame_set.keys()) to check what tag to use instead 238 | # USLT is lyrics. b'USLT' means it's stored in bytes instead of as a string 239 | 240 | 241 | # Saving tags and logging success 242 | if lyrics is not None: 243 | # Instrumental 244 | if lyrics == "inst": 245 | print(str(i+1) + "\tof " + str(total) + f" : {Color.YELLOW}Success{Color.OFF} : Genius says song is an instrumental : " + 246 | short[i].strip()) 247 | continue 248 | audio_file.tag.lyrics.set(lyrics) 249 | audio_file.tag.save() 250 | if inexact == 1: 251 | # Success with inexact search 252 | print(str(i+1) + "\tof " + str(total) + f" : {Color.GREEN}Success{Color.OFF} : Lyrics from " + site_used + f" saved to {Color.YELLOW}" + link(inexact_url, "(i)") + f"{Color.OFF} : " + 253 | short[i].strip()) 254 | else: 255 | # Success with perfect match 256 | print(str(i+1) + "\tof " + str(total) + f" : {Color.GREEN}Success{Color.OFF} : Lyrics from " + site_used + " saved to : " + 257 | short[i].strip()) 258 | 259 | # Logging failures 260 | elif evenifunsynced != "y" and len(existing_lyrics) > 0: 261 | # No synced results when only synced results are allowed 262 | print(str(i+1) + "\tof " + str(total) + f" : {Color.YELLOW}Failed{Color.OFF} : No synced lyrics found, preserving : " + 263 | short[i].strip()) 264 | else: 265 | # No lyrics found at all 266 | print(str(i+1) + "\tof " + str(total) + f" : {Color.RED}Failed{Color.OFF} : Lyrics not found for : " + 267 | short[i].strip()) 268 | 269 | 270 | 271 | os.remove('current.txt') 272 | os.remove('short.txt') 273 | 274 | 275 | # To generate lrc files from AutoLyricize-processed audio files if needed (bash script, requires exiftool): 276 | # for f in *; do lrc="$(exiftool -lyrics "$f" | tail -c +35 | sed 's/\.\./\n/g' | sed 's/\.\[/\n[/g')"; if [ -n "$lrc" ]; then echo "$lrc" > "${f%.*}".lrc; fi; done 277 | 278 | # This file really didn't need this many comments did it 279 | --------------------------------------------------------------------------------