├── .gitignore
├── requirements.txt
├── template.env
├── README.md
└── main.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | .vscode/


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.11.1
2 | eyed3==0.9.6
3 | python-dotenv==0.20.0
4 | requests==2.27.1
5 | colorist==1.8.3
6 | 


--------------------------------------------------------------------------------
/template.env:
--------------------------------------------------------------------------------
1 | GENIUS_ACCESS_TOKEN="client access token goes here"
2 | 
3 | HEADER="go to https://whatmyuseragent.com/ and copy the top field here."
4 | 
5 | I_WANT_SYNCED_LYRICS=if this is not "True" then Lyricsify searches will not run.
6 | 
7 | STATIC_WORKING_DIR="if you want to always run in the same folder, put the full path here. You can override this by manually entering a path at runtime."
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AutoLyricize
 2 | 
 3 | > Automatically find and embed song lyrics.
 4 | 
 5 | This script scans a specified directory for audio files, and for each file, finds lyrics from Lyricsify.com or Genius.com (as a fallback), and saves them to the file's metadata.
 6 | 
 7 | Perfect for use with [Spotiflyer](https://github.com/Shabinder/SpotiFlyer/), [Retro Music Player](https://github.com/RetroMusicPlayer/RetroMusicPlayer), and [Syncthing](https://github.com/syncthing/syncthing) to reduce your dependence on music streaming services.
 8 | 
 9 | ## Setup
10 | 
11 | 1. Install [Python](https://www.python.org/).
12 | 2. Install dependencies by running `pip install -r requirements.txt`.
13 | 3. Copy `template.env` to a new file called `.env` and add a valid [Genius.com](https://docs.genius.com/) access token to it (or set the appropriate environment variable some other way).
14 |     - If no token is provided, only [Lyricsify.com](https://www.lyricsify.com/) will be used as a data source.
15 |     - Other useful fields and instructions can be found in the same file.
16 | 
17 | ## Usage
18 | 
19 | Run `python main.py "path/to/folder"`.
20 | *useful tip for noobs: you can drag a folder into the terminal window to paste its full path* 👍
21 | 
22 | ## Limitations
23 | 
24 | - Inexact search uses Genius's search system, which loves to give you results that have absolutely nothing to do with the original track. If you use inexact search with instrumental tracks or albums, you are going to get garbage data.
25 | - Only works with mp3 files due to a limitation in the eyed3 library.
26 | - Only supports one lyrics field in the metadata! The script will DELETE all the others. If you have multiple lyrics fields for different languages, kiss them goodbye.
27 | 
28 | This script took a little under 3 hours to run for my 5000-song library.
29 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script scans a specified directory for audio files, and for each file, 
  3 | finds lyrics from Lyricsify.com or Genius.com (as a fallback), 
  4 | and saves them to the file's metadata.
  5 | """
  6 | 
  7 | import sys
  8 | import urllib
  9 | import json
 10 | from bs4 import BeautifulSoup
 11 | import requests
 12 | import os
 13 | import re
 14 | import eyed3
 15 | from colorist import Color
 16 | from dotenv import load_dotenv
 17 | load_dotenv()
 18 | 
 19 | 
 20 | 
 21 | 
 22 | 
 23 | 
 24 | def lyricsify_find_song_lyrics(query):
 25 |     """
 26 |     Return song lyrics from Lyricsify.com for the first song found using the provided search string.
 27 |     If not found, return None.
 28 |     """
 29 |     # Search Lyricsify for the song using web scraping
 30 |     global inexact
 31 |     inexact = 0
 32 |     link = BeautifulSoup(
 33 |         requests.get(url="https://www.lyricsify.com/lyrics/" +
 34 |                      query.lower().replace(
 35 |                          " - ", "/").replace(" ", "-"),
 36 |                      headers={
 37 |                          "User-Agent": os.getenv("HEADER")
 38 |                      }).text,
 39 |         "html.parser")
 40 |     divs = link.find_all("div", id=re.compile(r"lyrics_.*_details"))# The site obfuscates(?) the div name but we can bypass this with the power of regex
 41 |     
 42 |     # If not found, return None
 43 |     if divs is None:
 44 |         return None
 45 |     # Scrape the song html for the lyrics text
 46 |     try: song_html=str('\n'.join(str(divs[0]).split('\n')[1:-1]).replace("<br/>",""))
 47 |     except:
 48 |         return None
 49 |     return(song_html[song_html.find("[ar: "):])
 50 | 
 51 | 
 52 | 
 53 | 
 54 | 
 55 | inexact_url=""
 56 | def genius_find_song_lyrics(query, access_token):
 57 |     """
 58 |     Return song lyrics from Genius.com for the first song found using the provided search string.
 59 |     If not found, return None.
 60 |     Requires a Genius.com access token.
 61 |     """
 62 |     headers = {
 63 |         "User-Agent": os.getenv("HEADER"),
 64 |         "Authorization": "Bearer " + access_token,
 65 |     }
 66 |     results = json.loads(requests.get(url="https://api.genius.com/search?q=" + urllib.parse.quote(query), headers={
 67 |         "Authorization": "Bearer " + access_token,
 68 |         "User-Agent": os.getenv("HEADER")
 69 |     }).text)
 70 |     if len(results["response"]["hits"]) <= 0:
 71 |         return None
 72 |     song = results["response"]["hits"][0]["result"]
 73 |     query_lower = query.lower()
 74 |     
 75 |     # Use Genius sucky search if you can and there's no exact match
 76 |     # Also sets variables to make this more transparent
 77 |     global inexact
 78 |     global inexact_url
 79 |     inexact = 0
 80 |     if query_lower.find(song["title"].lower()) <= 0 or query_lower.find(song["primary_artist"]["name"].lower()) <= 0:
 81 |         if requireexact == "y":
 82 |             return None
 83 |         inexact = 1
 84 |         inexact_url = song["url"]
 85 |  
 86 |     # Scrape the song URL for the lyrics text
 87 |     page = requests.get(song["url"], headers=headers)
 88 |     html = BeautifulSoup(page.text, "html.parser")
 89 |     if (html.find("div", id="cloudflare_content")):
 90 |         raise Exception(
 91 |             f"{Color.RED}Scraping encountered Cloudflare and cannot continue.{Color.OFF}")
 92 |     target_divs = html.find_all("div", {'data-lyrics-container': "true"})
 93 |     lyrics = []
 94 |     
 95 |     # Processing the fetched data
 96 |     for div in target_divs:    
 97 |         if div is None:
 98 |             return None
 99 |         else:
100 |             lyrics = "\n".join("\n".join(div.strings) for div in target_divs).split("\n")
101 |     final_lyrics = "\n".join(lyrics)
102 |     if final_lyrics == "":
103 |         inexact = 0
104 |         return "inst"
105 |     final_lyrics = final_lyrics.replace("(\n","(").replace("\n)",")").replace(" \n"," ").replace("\n "," ").replace("\n]","]").replace("\n,",",").replace("\n'\n","\n'").replace("\n\n[","\n[").replace("\n[","\n\n[") 
106 |     # Removing unwanted line breaks lol
107 |     return final_lyrics
108 | 
109 | # Hyperlink on inexact match
110 | def link(uri, label=None):
111 |     if label is None: 
112 |         label = uri
113 |     parameters = ''
114 |     # OSC 8 ; params ; URI ST <name> OSC 8 ;; ST 
115 |     escape_mask = '\033]8;{};{}\033\\{}\033]8;;\033\\'
116 |     return escape_mask.format(parameters, uri, label)
117 | 
118 | 
119 | 
120 | 
121 | # Start of the main script
122 | if (len(sys.argv) < 2) and not os.getenv("STATIC_WORKING_DIR"):
123 |     raise NameError(
124 |         "The song directory path has not been provided as a parameter.")
125 | if len(sys.argv) >= 2:
126 | 	song_dir = sys.argv[1]
127 | else:
128 | 	song_dir = os.getenv("STATIC_WORKING_DIR")
129 | 
130 | 
131 | # Resetting files
132 | try: os.remove('current.txt')
133 | except OSError: pass
134 | open('current.txt', 'a').close()
135 | try: os.remove('short.txt')
136 | except OSError: pass
137 | open('short.txt', 'a').close()
138 | inexact = 0
139 | 
140 | 
141 | # Tallying all the tracks
142 | with open('current.txt', 'a') as current, open('short.txt', 'a') as short:
143 |     total = 0
144 |     for folder, subs, files in os.walk(song_dir):
145 |         for file in files:
146 |             current.write(folder + '/' + file + '\n')
147 |             short.write(file + '\n')
148 |             total += 1
149 | 
150 | 
151 | # Environment variables and user input
152 | genius_access_token = os.getenv("GENIUS_ACCESS_TOKEN")
153 | if len(genius_access_token) == 0:
154 |     genius_access_token = None
155 | if genius_access_token is None:
156 |     print(f"{Color.YELLOW}The GENIUS_ACCESS_TOKEN environment variable has not been defined. Genius searches will not be conducted.{Color.OFF}")
157 |     
158 | if total == 0:
159 |     print("Directory is empty or does not exist.")
160 | else:
161 |     yesno = [ "y", "n" ]
162 |     overwrite = input("Overwrite current lyrics? y/N ").lower()
163 |     if overwrite not in yesno:
164 |         print(f"{Color.YELLOW}Interpreting unknown response as no{Color.OFF}")
165 |         evenifunsynced = "n"
166 |     elif overwrite == "y" and os.getenv("I_WANT_SYNCED_LYRICS") == "True":
167 |         evenifunsynced = input("Even if the new ones are unsynced? y/N ").lower()
168 |         if evenifunsynced != "y":
169 |             print(f"{Color.YELLOW}Intepreting unknown response as no{Color.OFF}")
170 |             evenifunsynced = "n"
171 |     requireexact = input("Require exact artist and title? (Recommended with large folders!!!) Y/n ").lower()
172 |     if requireexact not in yesno:
173 |         print(f"{Color.YELLOW}Interpreting unknown response as yes{Color.OFF}")
174 |         requireexact = "y"
175 |     print("\n")
176 | # To suppress CRC check failed warnings - as a pre-existing CRC issue should not affect lyrics
177 | eyed3.log.setLevel("ERROR")
178 | with open('current.txt') as current:
179 |     shlong = open("short.txt", 'r')
180 |     short = shlong.readlines()
181 |     
182 |     
183 |     for i, file in enumerate(current):
184 |         try: audio_file = eyed3.load(file.strip())
185 |         except:
186 |             print(str(i+1) + "\tof " + str(total) + f" : {Color.RED}Failed{Color.OFF}  : File does not appear to exist        : " +
187 |                   short[i].strip())
188 |             continue
189 |         if audio_file is None:
190 |             print(str(i+1) + "\tof " + str(total) + f" : {Color.RED}Failed{Color.OFF}  : Unsupported file format              : " +
191 |                   short[i].strip())
192 |             continue
193 |         
194 |         existing_lyrics = ""
195 |         try:
196 |             for lyric in audio_file.tag.lyrics:
197 |                 existing_lyrics += lyric.text
198 |         except:
199 |             existing_lyrics = ""
200 |         if len(existing_lyrics) > 0 and overwrite != 'y':
201 |             print(str(i+1) + "\tof " + str(total) + f" : {Color.YELLOW}Skipped{Color.OFF} : File already has lyrics              : " +
202 |                   short[i].strip())
203 |             continue
204 |         # Note: re.sub... removes anything in brackets - used for "(feat. ...) as this improves search results"
205 |         try:
206 |             query = re.sub(r" \[^]+\)", "",
207 |                    audio_file.tag.artist + " - " + audio_file.tag.title)
208 |         except:
209 |             query = re.sub(r" \[^]+\)", "", short[i].strip()[:-4])
210 |             print(str(i+1) + "\tof " + str(total) + f" : {Color.YELLOW}Warning{Color.OFF} : No info, setting query to filename   : " +
211 |               short[i].strip())
212 |         
213 |         # Calling Lyricsify script
214 |         if os.getenv("I_WANT_SYNCED_LYRICS") == "True":
215 |             site_used = "Lyricsify"
216 |             try:
217 |                 lyrics = lyricsify_find_song_lyrics(query)
218 |             except Exception as e:
219 |                 print(f"{Color.RED}Error getting Lyricsify lyrics for: " + short[i].strip() + f"{Color.OFF}")
220 |                 raise e
221 |                 
222 |         
223 |         # Calling Genius script
224 |         if lyrics is None and genius_access_token is not None and ( len(existing_lyrics) == 0 or evenifunsynced == "y" ):
225 |             site_used = "Genius   "
226 |             try:
227 |                 lyrics = genius_find_song_lyrics(query, genius_access_token)
228 |             except Exception as e:
229 |                 print(f"{Color.RED}Error getting Genius lyrics for: " + short[i].strip() + f"{Color.OFF}")
230 |                 raise e
231 |                 
232 |                 
233 |         # Dealing with double lyrics tags. These were a pain in the ass
234 |         if b'USLT' in audio_file.tag.frame_set and lyrics is not None :
235 |             del audio_file.tag.frame_set[b'USLT'] 
236 |             audio_file.tag.save() # Utterly villainous way to delete the previous lyrics
237 |             # If this throws an error you should run print(audio_file.tag.frame_set.keys()) to check what tag to use instead
238 |             # USLT is lyrics. b'USLT' means it's stored in bytes instead of as a string
239 |                 
240 |              
241 |         # Saving tags and logging success
242 |         if lyrics is not None:
243 |             # Instrumental
244 |             if lyrics == "inst":
245 |                 print(str(i+1) + "\tof " + str(total) + f" : {Color.YELLOW}Success{Color.OFF} : Genius says song is an instrumental  : " +
246 |                       short[i].strip())
247 |                 continue
248 |             audio_file.tag.lyrics.set(lyrics)
249 |             audio_file.tag.save()
250 |             if inexact == 1:
251 |                 # Success with inexact search
252 |                 print(str(i+1) + "\tof " + str(total) + f" : {Color.GREEN}Success{Color.OFF} : Lyrics from " + site_used + f" saved to  {Color.YELLOW}" + link(inexact_url, "(i)") + f"{Color.OFF}  : " +
253 |                       short[i].strip())
254 |             else:
255 |                 # Success with perfect match
256 |                 print(str(i+1) + "\tof " + str(total) + f" : {Color.GREEN}Success{Color.OFF} : Lyrics from " + site_used + " saved to       : " +
257 |                       short[i].strip())
258 |                       
259 |         # Logging failures              
260 |         elif evenifunsynced != "y" and len(existing_lyrics) > 0:
261 |             # No synced results when only synced results are allowed
262 |             print(str(i+1) + "\tof " + str(total) + f" : {Color.YELLOW}Failed{Color.OFF}  : No synced lyrics found, preserving   : " +
263 |               short[i].strip())
264 |         else:
265 |             # No lyrics found at all
266 |             print(str(i+1) + "\tof " + str(total) + f" : {Color.RED}Failed{Color.OFF}  : Lyrics not found for                 : " +
267 |               short[i].strip())
268 |               
269 |               
270 |               
271 | os.remove('current.txt')
272 | os.remove('short.txt')
273 | 
274 | 
275 | # To generate lrc files from AutoLyricize-processed audio files if needed (bash script, requires exiftool):
276 | # for f in *; do lrc="$(exiftool -lyrics "$f" | tail -c +35 | sed 's/\.\./\n/g' | sed 's/\.\[/\n[/g')"; if [ -n "$lrc" ]; then echo "$lrc" > "${f%.*}".lrc; fi; done
277 | 
278 | # This file really didn't need this many comments did it
279 | 


--------------------------------------------------------------------------------