├── lyrico ├── __init__.py ├── lyrico_sources │ ├── __init__.py │ ├── build_requests.py │ ├── lyrics_helper.py │ ├── lyricsmode.py │ ├── musix_match.py │ └── az_lyrics.py ├── __main__.py ├── config.ini ├── audio_format_keys.py ├── helper.py ├── lyrico.py ├── song_helper.py ├── config.py ├── song.py └── docopt.py ├── tests ├── __init__.py ├── lyrico_sources │ ├── __init__.py │ ├── test_az_lyrics.py │ ├── test_lyricsmode.py │ └── test_musix_match.py └── dummy.py ├── .gitattributes ├── setup.cfg ├── LICENSE ├── MANIFEST.in ├── .gitignore ├── requirements.txt ├── .editorconfig ├── lyrico-runner.py ├── setup.py └── README.rst /lyrico/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text eol=lf -------------------------------------------------------------------------------- /lyrico/lyrico_sources/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/lyrico_sources/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This project is in the public domain. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include lyrico/lyrico_sources/*.py 3 | include lyrico/*.ini 4 | include requirements.txt -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # general things to ignore 2 | build/ 3 | dist/ 4 | *.egg-info/ 5 | *.egg 6 | *.eggs/ 7 | *.py[cod] 8 | __pycache__/ 9 | *.so 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | glob2 2 | requests>=2.9.1 3 | mutagen>=1.31 4 | beautifulsoup4>=4.4.1 5 | win-unicode-console>=0.4; sys_platform == 'win32' 6 | appdirs>=1.4.3 7 | -------------------------------------------------------------------------------- /lyrico/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | """lyrico.__main__: executed when lyrico directory is called as script.""" 5 | 6 | 7 | from .lyrico import main 8 | main() 9 | -------------------------------------------------------------------------------- /tests/dummy.py: -------------------------------------------------------------------------------- 1 | class DummySong: 2 | def __init__(self, artist, title): 3 | self.artist = artist 4 | self.title = title 5 | self.album = None 6 | self.lyrics = None 7 | self.error = None 8 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org/ 2 | root = true 3 | 4 | [*] 5 | end_of_line = lf 6 | insert_final_newline = true 7 | 8 | [*.py] 9 | charset = utf-8 10 | indent_style = tab 11 | tab_width = 4 12 | -------------------------------------------------------------------------------- /lyrico/config.ini: -------------------------------------------------------------------------------- 1 | [actions] 2 | save_to_file = True 3 | save_to_tag = False 4 | overwrite = False 5 | 6 | [paths] 7 | source_dir = None 8 | lyrics_dir = None 9 | 10 | [sources] 11 | musix_match = True 12 | lyricsmode = True 13 | az_lyrics = False 14 | -------------------------------------------------------------------------------- /lyrico-runner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | """Convenience wrapper for running lyrico directly from source tree.""" 6 | 7 | import sys 8 | from lyrico.lyrico import main 9 | 10 | 11 | if __name__ == '__main__': 12 | main() 13 | -------------------------------------------------------------------------------- /tests/lyrico_sources/test_az_lyrics.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from tests.dummy import DummySong 3 | from lyrico.lyrico_sources.az_lyrics import download_from_az_lyrics 4 | 5 | class TestAzLyrics(unittest.TestCase): 6 | 7 | def test_download_from_az_lyrics(self): 8 | song = DummySong(u'Azure Ray', u'Don\'t Make A Sound') 9 | download_from_az_lyrics(song) 10 | self.assertIsNone(song.error) 11 | self.assertIsNotNone(song.lyrics) 12 | self.assertEqual(song.lyrics[0:21], 'You could go anywhere') 13 | self.assertEqual(song.lyrics[-21:], '\nAnd not make a sound') 14 | -------------------------------------------------------------------------------- /tests/lyrico_sources/test_lyricsmode.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from tests.dummy import DummySong 3 | from lyrico.lyrico_sources.lyricsmode import download_from_lyricsmode 4 | 5 | class TestLyricsmode(unittest.TestCase): 6 | 7 | def test_download_from_lyricsmode(self): 8 | song = DummySong(u'Azure Ray', u'4th of july') 9 | download_from_lyricsmode(song) 10 | self.assertIsNone(song.error) 11 | self.assertIsNotNone(song.lyrics) 12 | self.assertEqual(song.lyrics[0:24], 'We met on that wednesday') 13 | self.assertEqual(song.lyrics[-31:], 'I know this love will never die') 14 | -------------------------------------------------------------------------------- /lyrico/audio_format_keys.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | This module only holds the keys used to extract data from 4 | mutagen's tag objects for the supported audio formats. 5 | """ 6 | 7 | from __future__ import print_function 8 | from __future__ import unicode_literals 9 | 10 | 11 | VORBIS_COMMENTS_KEYS = { 12 | 'artist': 'artist', 13 | 'title': 'title', 14 | 'album':'album', 15 | 'lyrics':'LYRICS' 16 | } 17 | 18 | MP4_KEYS = { 19 | 'artist': '\xa9ART', 20 | 'title': '\xa9nam', 21 | 'album':'\xa9alb', 22 | 'lyrics':'\xa9lyr' 23 | } 24 | 25 | 26 | 27 | FORMAT_KEYS = { 28 | 29 | #ID3 TAGS 30 | 'mp3': { 31 | 'artist': 'TPE1', 32 | 'title': 'TIT2', 33 | 'album':'TALB', 34 | 'lyrics':'USLT' 35 | }, 36 | 37 | 'mp4' : MP4_KEYS, 38 | 'm4a' : MP4_KEYS, 39 | 40 | 'flac': VORBIS_COMMENTS_KEYS, 41 | 'ogg' : VORBIS_COMMENTS_KEYS, 42 | 'oga' : VORBIS_COMMENTS_KEYS, 43 | 'opus' : VORBIS_COMMENTS_KEYS, 44 | 45 | 'wma' : { 46 | 'artist': 'Author', 47 | 'title': 'Title', 48 | 'album':'WM/AlbumTitle', 49 | 'lyrics':'WM/Lyrics' 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /lyrico/lyrico_sources/build_requests.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function 3 | from __future__ import unicode_literals 4 | 5 | import copy 6 | import random 7 | 8 | 9 | user_agents = [ 10 | 'Mozilla/5.0 (X11; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0', 11 | 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', 12 | ] 13 | 14 | request_headers = { 15 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 16 | 'Accept-Encoding': 'gzip, deflate', 17 | 'Accept-Language': 'en-GB,en-US;q=0.8,en;q=0.6', 18 | 'DNT': '1', 19 | } 20 | 21 | # randint inculdes both upper and lower bounds 22 | 23 | def get_lyrico_headers(site_name=None): 24 | 25 | # Since each module requesting from different souce uses the same 26 | # request headers for a lyrico operation, make deep copies of base headers 27 | # before giving it to modules. 28 | 29 | headers_copy = copy.deepcopy(request_headers) 30 | headers_copy['User-Agent'] = user_agents[random.randint(0, (len(user_agents) - 1))] 31 | return headers_copy 32 | 33 | def test_req_dic(): 34 | print(request_headers) 35 | -------------------------------------------------------------------------------- /tests/lyrico_sources/test_musix_match.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from tests.dummy import DummySong 3 | from lyrico.lyrico_sources.musix_match import download_from_musix_match 4 | 5 | class TestMusixMatch(unittest.TestCase): 6 | 7 | def test_download_from_musix_match(self): 8 | song = DummySong('Sarah Connor', 'Unendlich') 9 | download_from_musix_match(song) 10 | self.assertIsNone(song.error) 11 | self.assertIsNotNone(song.lyrics) 12 | self.assertEqual(song.lyrics[0:21], 'Immer wenn ich tiefer') 13 | self.assertEqual(song.lyrics[-12:], ', unendlich\n') 14 | 15 | def test_download_from_musix_match_single_quote_end_of_word(self): 16 | song = DummySong('Ronan Keating', "Lovin' Each Day") 17 | download_from_musix_match(song) 18 | self.assertIsNone(song.error) 19 | self.assertIsNotNone(song.lyrics) 20 | self.assertEqual(song.lyrics[0:15], 'Ah c′mon, yeah\n') 21 | self.assertEqual(song.lyrics[-26:], 'Oh, baby, I need you here\n') 22 | 23 | def test_download_from_musix_match_eminem_unauthorized(self): 24 | song = DummySong('Eminem', 'The Real Slim Shady') 25 | download_from_musix_match(song) 26 | self.assertEqual(song.error, 'Musixmatch may not show the lyrics') 27 | -------------------------------------------------------------------------------- /lyrico/lyrico_sources/lyrics_helper.py: -------------------------------------------------------------------------------- 1 | 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | import unicodedata 8 | 9 | 10 | def remove_accents(input_str): 11 | 12 | """ 13 | Convert accented into non-accented characters 14 | http://stackoverflow.com/a/517974/2426469 15 | """ 16 | 17 | nfkd_form = unicodedata.normalize('NFKD', input_str) 18 | return u"".join([c for c in nfkd_form if not unicodedata.combining(c)]) 19 | 20 | def test_lyrics(lyrics): 21 | 22 | """ 23 | Test lyrics downloaded to detect license restrinction string: 24 | 'We are not in a position to display these lyrics due to licensing restrictions. 25 | Sorry for the inconvinience.' 26 | 27 | Also test lyrics by looking for multiple new line characters. 28 | 29 | Returns booleans accordingly 30 | """ 31 | 32 | if not lyrics: 33 | return False 34 | 35 | license_str1 = 'We are not in a position to display these lyrics due to licensing restrictions. Sorry for the inconvinience.' 36 | license_str2 = 'display these lyrics due to licensing restrictions' 37 | license_str3 = 'We are not in a position to display these lyrics due to licensing restrictions.\nSorry for the inconvinience.' 38 | 39 | # If either of license string is found in lyrics downloaded or it has less than 4 new line characters 40 | if (license_str1 in lyrics or license_str2 in lyrics or license_str3 in lyrics or 41 | lyrics.count('\n') < 4): 42 | return False 43 | 44 | return True 45 | -------------------------------------------------------------------------------- /lyrico/helper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | Contains general helper functions and Error classes. 5 | """ 6 | 7 | from __future__ import print_function 8 | from __future__ import unicode_literals 9 | 10 | import sys 11 | import re 12 | import os 13 | from appdirs import * 14 | 15 | 16 | class BadConfigError(Exception): 17 | def __init__(self, errno, value): 18 | self.value = value 19 | self.errno = errno 20 | 21 | def __str__(self): 22 | return repr(self.value) 23 | 24 | 25 | def get_config_path(): 26 | 27 | """ 28 | Gets the absolute path of dir containing script running the function. 29 | Uses that to get the path of config file, since it is located in same dir. 30 | If config file is missing, a new one is created. 31 | """ 32 | config_path = user_config_dir("lyrico") + ".ini" 33 | if not os.path.isfile(config_path): 34 | write_default_config(config_path) 35 | 36 | return config_path 37 | 38 | def sanitize_data(s): 39 | """Removes excess white-space from strings""" 40 | 41 | # If string only empty spaces return None 42 | if not s or s.isspace(): 43 | return None 44 | 45 | # remove any white-space from beginning or end of the string 46 | s = s.strip() 47 | 48 | # remove double white-spaces or tabs if any 49 | s = re.sub(r'\s+', ' ', s) 50 | 51 | return s 52 | 53 | def write_default_config(config_path): 54 | # Import ConfigParser 55 | try: 56 | # >3.2 57 | from configparser import ConfigParser 58 | except ImportError: 59 | # python27 60 | # Refer to the older SafeConfigParser as ConfigParser 61 | from ConfigParser import SafeConfigParser as ConfigParser 62 | 63 | # Load lyrico.ini 64 | config = ConfigParser() 65 | 66 | # Force all settings to intended defaults 67 | config.add_section('actions') 68 | config.set('actions', 'save_to_file', 'True') 69 | config.set('actions', 'save_to_tag', 'False') 70 | config.set('actions', 'overwrite', 'False') 71 | 72 | config.add_section('paths') 73 | config.set('paths', 'source_dir', 'None') 74 | config.set('paths', 'lyrics_dir', 'None') 75 | 76 | config.add_section('sources') 77 | config.set('sources', 'lyrics_n_music', 'True') 78 | config.set('sources', 'musix_match', 'True') 79 | config.set('sources', 'lyricsmode', 'True') 80 | config.set('sources', 'az_lyrics', 'False') 81 | 82 | # save to config.ini 83 | with open(config_path, 'w') as configfile: 84 | config.write(configfile) 85 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | """setup.py: setuptools control. Always build with Python3""" 5 | 6 | 7 | import sys 8 | import re 9 | from setuptools import setup, find_packages 10 | from subprocess import call 11 | from setuptools.command.install import install 12 | 13 | 14 | version = re.search( 15 | '^__version__\s*=\s*"(.*)"', 16 | open('lyrico/lyrico.py').read(), 17 | re.M 18 | ).group(1) 19 | 20 | 21 | # http://rst.ninjs.org/?n=86de1f4d5843b454098745d4a6026376&theme=basic 22 | with open("README.rst", "rb") as f: 23 | long_descr = f.read().decode("utf-8") 24 | 25 | 26 | # Install dependencies from requirements.txt (install_requires is not working) 27 | # With requirements.txt, win-unicode-console will only be installed for Windows users. 28 | class MyInstall(install): 29 | def run(self): 30 | # Call subprocess to run te 'pip' command. 31 | # Only works, when user installs from sdist 32 | call(['pip', 'install', '-r', 'requirements.txt']) 33 | 34 | # Run 'install' to install lyrico 35 | install.run(self) 36 | 37 | 38 | setup( 39 | name = "lyrico", 40 | packages = ["lyrico"], 41 | entry_points = { 42 | "console_scripts": ['lyrico = lyrico.lyrico:main'] 43 | }, 44 | 45 | cmdclass={'install': MyInstall}, 46 | 47 | version = version, 48 | description = "A simple command-line lyrics downloader.", 49 | long_description = long_descr, 50 | keywords='lyrics audio foobar2000 tags mp3', 51 | classifiers=[ 52 | 'Development Status :: 3 - Alpha', 53 | 54 | 'Intended Audience :: End Users/Desktop', 55 | 'Topic :: Multimedia :: Sound/Audio', 56 | 57 | 'License :: OSI Approved :: MIT License', 58 | 'Natural Language :: English', 59 | 60 | 'Programming Language :: Python :: 2.7', 61 | 'Programming Language :: Python :: 3.4', 62 | 'Programming Language :: Python :: 3.5', 63 | 64 | 'Operating System :: Microsoft', 65 | 'Operating System :: Unix', 66 | ], 67 | 68 | author = "Abhimanyu Pathania", 69 | author_email = "abpindia1944@gmail.com", 70 | url = "https://github.com/abhimanyuPathania/lyrico", 71 | license='MIT', 72 | 73 | include_package_data = True, 74 | package_data = { 75 | # If any package contains *.ini files, include them: 76 | '': ['*.ini'], 77 | }, 78 | 79 | ) 80 | -------------------------------------------------------------------------------- /lyrico/lyrico_sources/lyricsmode.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | """ 5 | This module downloads lyrics from LYRICSMODE. The URL format is: 6 | 7 | http://www.lyricsmode.com/lyrics///.html 8 | 9 | LYRICSMODE only uses non-alphanumeric ascii in it its urls. It replaces spaces with 10 | underscores. It removes every non-alphanumeric except '-' from artist names. 11 | 12 | LYRICSMODE also replaces some accented characters in artist with non-accented. 13 | Uses correction mapping for known exception to artist names. 14 | """ 15 | 16 | from __future__ import print_function 17 | from __future__ import unicode_literals 18 | 19 | import re 20 | import string 21 | import requests 22 | 23 | try: 24 | from string import ascii_lowercase as LOWERCASE_CHARS 25 | except ImportError: 26 | # Python27 27 | from string import lowercase as LOWERCASE_CHARS 28 | 29 | from requests import ConnectionError, HTTPError, Timeout 30 | from bs4 import BeautifulSoup 31 | 32 | from .build_requests import get_lyrico_headers 33 | from .lyrics_helper import remove_accents, test_lyrics 34 | 35 | 36 | # Defining 'request_headers' outside download function makes a single profile 37 | # per lyrico operation and not a new profile per each download in an operation. 38 | request_headers = get_lyrico_headers() 39 | 40 | # This correction mapping only is valid for top approx 3000 artists which LYRICSMODE 41 | # displays as lists. 42 | LYRICSMODE_CORRECTION = { 43 | 'the_all_american_rejects': 'all_american_rejects', 44 | 'acdc':'ac_dc', 45 | 'die_arzte': 'die_rzte', 46 | 'gilbert_becaud': 'gilbert_bcaud', 47 | 'yo': 'y' 48 | } 49 | 50 | def download_from_lyricsmode(song=None): 51 | 52 | """ 53 | Takes reference to the song object as input and 54 | adds lyrics to self.lyrics or add error string to self.error 55 | property of the song object. 56 | """ 57 | 58 | 59 | # temp var to hold value for final checking 60 | lyrics = None 61 | 62 | # Match everything accept lowercase alphabets, numbers, spaces and dashes 63 | regex_non_alphanumeric = re.compile(r'[^a-z0-9\s\-]+') 64 | 65 | # Replace accented characters by non-accented before parsing regex 66 | artist = regex_non_alphanumeric.sub('', remove_accents(song.artist).lower()) 67 | title = regex_non_alphanumeric.sub('', song.title.lower()) 68 | 69 | # Match multiple spaces or dashes and replace them by underscores 70 | regex_underscores = re.compile(r'[\s|\-]+') 71 | artist = regex_underscores.sub('_', artist) 72 | title = regex_underscores.sub('_', title) 73 | 74 | # Check for corrections 75 | if artist in LYRICSMODE_CORRECTION: 76 | artist = LYRICSMODE_CORRECTION[artist] 77 | 78 | # If the first char of artist is not a alphabet, use '0-9' 79 | first_artist_char = artist[0] 80 | if first_artist_char not in LOWERCASE_CHARS: 81 | first_artist_char = '0-9' 82 | 83 | lyricsmode_url = 'http://www.lyricsmode.com/lyrics/%s/%s/%s.html' % (first_artist_char, artist, title) 84 | try: 85 | print('\tTrying LYRICSMODE:', lyricsmode_url) 86 | 87 | res = requests.get(lyricsmode_url, headers = request_headers) 88 | res.raise_for_status() 89 | 90 | # Catch network errors 91 | except (ConnectionError, Timeout): 92 | song.error = 'No network connectivity.' 93 | except HTTPError as e: 94 | song.error = 'Lyrics not found. Check artist or title name.' 95 | 96 | # No exceptions raised and the HTML for lyrics page was fetched 97 | else: 98 | soup = BeautifulSoup(res.text, 'html.parser') 99 | 100 | # For lyricsmode, the lyrics are present in a div with id 'lyrics_text' 101 | lyrics_text = soup.find(id='lyrics_text') 102 | for tag in lyrics_text.find_all('div'): 103 | tag.clear() 104 | 105 | lyrics = lyrics_text.get_text().strip() if lyrics_text else None 106 | 107 | # Final check 108 | if test_lyrics(lyrics): 109 | song.lyrics = lyrics 110 | song.source = 'LrMOD' 111 | song.error = None 112 | else: 113 | # Don't overwrite and previous errors 114 | if not song.error: 115 | song.error = 'Lyrics not found. Check artist or title name.' 116 | -------------------------------------------------------------------------------- /lyrico/lyrico_sources/musix_match.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | """ 5 | This module downloads lyrics from musixmatch. The URL structure is: 6 | 7 | https://www.musixmatch.com/lyrics/<artist>/<title> 8 | 9 | musixmatch uses dashes, '-', for spaces and removes every other non-alphanumeric characters. 10 | It also replaces apostrophes with dashes. So "Don't" becomes "Don-t". There are few 11 | exceptions but the server seems to be a bit flexible with URLs. 12 | """ 13 | 14 | from __future__ import print_function 15 | from __future__ import unicode_literals 16 | 17 | import json 18 | import re 19 | import sys 20 | import requests 21 | 22 | try: 23 | from urllib.parse import quote 24 | except ImportError: 25 | # Python27 26 | from urllib import quote 27 | 28 | from requests import ConnectionError, HTTPError, Timeout 29 | from bs4 import BeautifulSoup 30 | 31 | from .build_requests import get_lyrico_headers 32 | from .lyrics_helper import test_lyrics 33 | 34 | # Defining 'request_headers' outside download function makes a single profile 35 | # per lyrico operation and not a new profile per each download in an operation. 36 | request_headers = get_lyrico_headers() 37 | 38 | 39 | def download_from_musix_match(song): 40 | 41 | """ 42 | Takes reference to the song object as input and 43 | adds lyrics to self.lyrics or add error string to self.error 44 | property of the song object. 45 | """ 46 | 47 | 48 | # temp var to hold value for final checking 49 | lyrics = None 50 | 51 | # Replace upper(apostrophe) commas with dashes '-' 52 | artist = song.artist.replace("'", '-') 53 | title = song.title.replace("'", '-') 54 | 55 | # some special characters found in songs 56 | title = title.replace('‐', '-') 57 | title = title.replace('’', '-') 58 | 59 | # This regex mathches anything other than Alphanumeric, spaces and dashes 60 | # and removes them. 61 | # Make regex unicode aware 're.UNICODE' for Python27. It is redundant for Python3. 62 | regex_non_alphanum = re.compile(r'[^\w\s\-]*', re.UNICODE) 63 | artist = regex_non_alphanum.sub('', artist) 64 | title = regex_non_alphanum.sub('', title) 65 | 66 | # Replace spaces with dashes to imporve URL logging. 67 | regex_spaces = re.compile(r'[\s]+', re.UNICODE) 68 | artist = regex_spaces.sub('-', artist) 69 | title = regex_spaces.sub('-', title) 70 | 71 | # See lyric_wikia module for comments on manual encoding 72 | if sys.version_info[0] < 3: 73 | artist = artist.encode('utf-8') 74 | title = title.encode('utf-8') 75 | 76 | mxm_url = 'https://www.musixmatch.com/lyrics/%s/%s' % (quote(artist), quote(title)) 77 | mxm_url = mxm_url.replace('--', '-') 78 | 79 | try: 80 | print('\tTrying musixmatch:', mxm_url) 81 | 82 | res = requests.get(mxm_url, headers = request_headers) 83 | res.raise_for_status() 84 | 85 | # Catch network errors 86 | except (ConnectionError, Timeout) as e: 87 | song.error = 'No network connectivity.' 88 | except HTTPError as e: 89 | print(e) 90 | song.error = 'Lyrics not found. Check artist or title name.' 91 | 92 | # No exceptions raised and the HTML for lyrics page was fetched 93 | else: 94 | soup = BeautifulSoup(res.text, 'html.parser') 95 | 96 | lyric_text = "" 97 | lyric_jsons = soup.find_all(type='application/json') 98 | for jsonTag in lyric_jsons: 99 | lyric_json = json.loads(jsonTag.get_text()) 100 | lyric_type = lyric_json.get('props', {}).get('pageProps', {}).get('data', {}).get('trackInfo', {}).get('data', {}).get('type', {}).strip() 101 | if lyric_type == 'restricted': 102 | song.error = 'Musixmatch may not show the lyrics' 103 | continue 104 | 105 | lyric_text += lyric_json.get('props', {}).get('pageProps', {}).get('data', {}).get('trackInfo', {}).get('data', {}).get('lyrics', {}).get('body', '').strip() + "\n" 106 | 107 | lyrics = lyric_text if lyric_text else None 108 | 109 | # Final check 110 | if test_lyrics(lyrics): 111 | song.lyrics = lyrics 112 | song.source = 'mXm' 113 | song.error = None 114 | else: 115 | # Don't overwrite and previous errors 116 | if not song.error: 117 | song.error = 'Lyrics not found. Check artist or title name.' 118 | -------------------------------------------------------------------------------- /lyrico/lyrico.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """lyrico 4 | 5 | Usage: 6 | lyrico [<source_dir>] 7 | lyrico (enable | disable) (<lyrico_action>) 8 | lyrico set (<dir_type>) (<full_path_to_dir>) 9 | lyrico (-h | --help) 10 | lyrico --version 11 | lyrico --settings 12 | 13 | Options: 14 | -h --help Show this screen. 15 | --version Show version. 16 | --settings Show current settings. 17 | """ 18 | 19 | from __future__ import print_function 20 | from __future__ import unicode_literals 21 | 22 | import platform 23 | 24 | from .docopt import docopt 25 | 26 | from .song import Song 27 | from .song_helper import get_song_list 28 | from .config import Config 29 | 30 | # testpypi 0.7.0 31 | __version__ = "0.7.0" 32 | 33 | 34 | def main(): 35 | 36 | # Fix console for windows users 37 | if platform.system() == 'Windows': 38 | import win_unicode_console 39 | win_unicode_console.enable() 40 | 41 | args = docopt(__doc__, version = ('lyrico ' + __version__)) 42 | 43 | Config.load_config() 44 | 45 | if args['--settings']: 46 | # show current settings 47 | Config.show_settings() 48 | return 49 | 50 | if args['set']: 51 | # setting 'lyrics_dir' or 'source_dir' 52 | 53 | # This general try catch block is intended for os.makedirs call if 54 | # it raises OSError which is not due to directory already existing or 55 | # some other error than OSError 56 | try: 57 | Config.set_dir(args['<dir_type>'], args['<full_path_to_dir>']) 58 | Config.save() 59 | except Exception as e: 60 | print(e) 61 | return 62 | 63 | if args['enable'] or args['disable']: 64 | # setting 'save_to_file', 'save_to_tag' or 'overwrite'. 65 | # detect wether user wants to enable or disable a lyrico action 66 | update_type = 'enable' if args['enable'] else 'disable' 67 | Config.update_lyrico_actions(args['<lyrico_action>'], update_type) 68 | Config.save() 69 | return 70 | 71 | # User wants to download lyrics. 72 | 73 | if args['<source_dir>']: 74 | # if lyrico <source_dir> invocation is used: 75 | # update user's "source_dir" in config 76 | # update Config class' 'source_dir' class variable 77 | 78 | # This general try catch block is intended for os.makedirs call if 79 | # it raises OSError which is not due to directory already existing or 80 | # some other error than OSError 81 | try: 82 | set_dir_success = Config.set_dir('source_dir', args['<source_dir>']) 83 | except Exception as e: 84 | print(e) 85 | # Don't go ahead with excution since user gave bad path or might have 86 | # correct system settings? 87 | return 88 | 89 | # For this usage if user provides non existing dir, return by using boolean 90 | # return value of Config.set_dir 91 | if not set_dir_success: 92 | return 93 | 94 | #settings changes are done, we need a valid config now 95 | if not Config.check(): 96 | return 97 | 98 | song_list = [Song(song_path) for song_path in get_song_list(Config.source_dir)] 99 | print(len(song_list), 'songs detected.') 100 | print('Metadata extracted for', (str(Song.valid_metadata_count) + '/' + str(len(song_list))), 'songs.') 101 | for song in song_list: 102 | # Only download lyrics if 'title' and 'artist' is present 103 | # Error str is already present in song.error 104 | if song.artist and song.title: 105 | song.download_lyrics() 106 | 107 | # Show immidiate log in console 108 | else: 109 | # If title was present, use that 110 | if song.title: 111 | print(song.title, 'was ignored.', song.error) 112 | # else use audio file path 113 | else: 114 | print(song.path, 'was ignored.', song.error) 115 | 116 | 117 | print('\nBuilding log...') 118 | Song.log_results(song_list) 119 | print( 120 | '{songs} songs, {tagged} tagged, {files} lyric files, {existing} existing, {errors} errors'.format( 121 | songs = len(song_list), 122 | tagged = Song.lyrics_saved_to_tag_count, 123 | files = Song.lyrics_saved_to_file_count, 124 | existing = Song.lyrics_existing_count, 125 | errors = Song.lyrics_errors_count 126 | ) 127 | ) 128 | print('FINISHED') 129 | 130 | # Disable windows unicode console anyways 131 | if platform.system() == 'Windows': 132 | win_unicode_console.disable() 133 | -------------------------------------------------------------------------------- /lyrico/lyrico_sources/az_lyrics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | """ 5 | This module downloads lyrics from AZLyrics by scraping them off its HTML pages. 6 | The url structure used is: 7 | 8 | http://www.azlyrics.com/lyrics/<artist>/<title>.html 9 | 10 | AZLyrics only allows lowercase alphanumeric(no '_') URLs. 11 | 12 | This source is least accurate since BeautifulSoup is not able to parse the HTML pages 13 | correctly and the module depends on regular expressions. 14 | 15 | AZLyrics also hates 'The' in artists name for some reason and removes it. Yet there are some 16 | exceptions to this rule. 'lyrico' uses the AZLyrics_CORRECTION mapping for this. 17 | 18 | """ 19 | 20 | 21 | from __future__ import print_function 22 | from __future__ import unicode_literals 23 | 24 | import re 25 | import sys 26 | import requests 27 | 28 | from requests import ConnectionError, HTTPError, Timeout 29 | from bs4 import BeautifulSoup 30 | 31 | from .build_requests import get_lyrico_headers 32 | from .lyrics_helper import test_lyrics 33 | 34 | 35 | # Defining 'request_headers' outside download function makes a single profile 36 | # per lyrico operation and not a new profile per each download in an operation. 37 | request_headers = get_lyrico_headers() 38 | 39 | # Holds corerction for Artist names 40 | # key(artist name built from our song metadata): value(corresponding value used by AZLyrics) 41 | AZLyrics_CORRECTION = { 42 | 'the': 'thethe' 43 | } 44 | 45 | def download_from_az_lyrics(song): 46 | 47 | """ 48 | Takes reference to the song object as input and 49 | adds lyrics to self.lyrics or add error string to self.error 50 | property of the song object. 51 | """ 52 | 53 | 54 | # temp var to hold value for final checking 55 | lyrics = None 56 | 57 | # Assume this won't work. Be a realist. 58 | error = 'Lyrics not found. Check artist or title name.' 59 | 60 | artist = song.artist 61 | title = song.title 62 | 63 | # This looks for 'The' followed by a 'space' which is followed by any non-space(\s) char. 64 | # Caret(^) forces to find it only from beginning 65 | # If true then remove the 'The' from the artist name. 66 | regex_the = re.compile(r'^The[ ]{1}\S', re.IGNORECASE) 67 | match_the = re.search(regex_the, artist) 68 | if match_the: 69 | # Remove 'The ' 70 | artist = artist[4:] 71 | 72 | 73 | # Convert artist and title to lower case and strip off any 74 | # non-alphanumeric characters and '_'. '\W' Equivalent to set [^a-zA-Z0-9_] 75 | # Make regex Unicode UNAWARE 76 | if sys.version_info[0] < 3: 77 | # Python27 78 | # By default ignores Unicode. 79 | regex_url = re.compile('[\W_]+') 80 | else: 81 | # Use re.ASCII flag to extract ASCII characters only 82 | regex_url = re.compile('[\W_]+', re.ASCII) 83 | 84 | artist = regex_url.sub('', artist.lower()) 85 | title = regex_url.sub('', title.lower()) 86 | 87 | # Check if correction for artist is present in lyrico 88 | if artist in AZLyrics_CORRECTION: 89 | artist = AZLyrics_CORRECTION[artist] 90 | 91 | azlyrics_url = 'http://www.azlyrics.com/lyrics/%s/%s.html' % (artist, title) 92 | try: 93 | print('\tTrying AZLyrics:', azlyrics_url) 94 | 95 | res = requests.get(azlyrics_url, headers = request_headers) 96 | res.raise_for_status() 97 | # 'requests' was guessing the encoding from azlyrics as ISO-8859-1. 98 | # AZLyrics sends 'UTF-8' in its meta tag 99 | 100 | # Force request to use 'UTF-8'. This is used when 'res.text' is read to get 'soup' 101 | res.encoding = 'utf-8' 102 | 103 | # Catch network errors 104 | except (ConnectionError, Timeout) as e: 105 | print(e) 106 | error = 'No network connectivity.' 107 | except HTTPError: 108 | # Already carrying error string 109 | pass 110 | 111 | # No exceptions raised and the HTML for lyrics was downloaded 112 | else: 113 | soup = BeautifulSoup(res.text, 'html.parser') 114 | lyric_tag = soup.find('div', class_=None, id=None) 115 | lyrics = lyric_tag.get_text().strip() 116 | 117 | # Final check 118 | if test_lyrics(lyrics): 119 | song.lyrics = lyrics 120 | song.error = None 121 | song.source = 'AZLr' 122 | else: 123 | song.error = error 124 | 125 | 126 | def check_siblings(sib, title, regex): 127 | 128 | """ 129 | This function checks the conditions under which buggy parsing seems 130 | to work for AZLyrics' HTML. Function only returns true if parsing 131 | conditions are same as when tested during development. 132 | 133 | 'sib' is list of 'lyricsh' div's siblings. 134 | 'title' and 'regex' are the one used to build AZLyrics' URL. 135 | 136 | """ 137 | 138 | # The siblings list of 'lyricsh' should contain following structure: 139 | # i : 'name' 'class' 140 | 141 | # 0 : div ['ringtone'] 142 | # 1 : b None 143 | # 2 : br None 144 | # 3 : div ['col-lg-2', 'text-center', 'hidden-md', 'hidden-sm', 'hidden-xs', 'noprint'] 145 | 146 | # The third member should be the buggy 'br' tag which contains the lyrics 147 | 148 | if not sib: 149 | return False 150 | 151 | # Check if silbling has atleast 4 members which exist. 152 | if not (len(sib) >= 4 and sib[1] and sib[2] and sib[3] and 153 | sib[2].name == 'br'): 154 | return False 155 | 156 | # Extract the class list of sib[3] or the <div> to which BeautifulSoup jumps 157 | # due to buggy <br> tag 158 | jump_div_class_list = sib[3].attrs.get('class') 159 | if not jump_div_class_list: 160 | return False 161 | 162 | # Check for required keywords in the class list 163 | jump_div_class = ' '.join(jump_div_class_list) 164 | if not('noprint' in jump_div_class and 'hidden' in jump_div_class and 165 | 'col-lg-2' in jump_div_class): 166 | return False 167 | 168 | # sib[1] is a <b> tag which contains the title of the song. 169 | # Test it with the one used to build URL using the same regex 170 | title_extracted = sib[1].get_text() 171 | if title_extracted: 172 | title_extracted = regex.sub('', title_extracted.lower()) 173 | 174 | if title_extracted != title: 175 | return False 176 | 177 | # If all conditions are met return true to extract lyrics out of <br tag> 178 | return True 179 | -------------------------------------------------------------------------------- /lyrico/song_helper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | Contains helper functions specific to instantiate Song class. 5 | """ 6 | 7 | from __future__ import print_function 8 | from __future__ import unicode_literals 9 | 10 | import sys 11 | import os 12 | import glob2 13 | import platform 14 | 15 | try: 16 | from urllib.parse import quote 17 | except ImportError: 18 | # Python27 19 | from urllib import quote 20 | 21 | from mutagen.id3 import ID3 22 | from mutagen.mp4 import MP4 23 | from mutagen.flac import FLAC 24 | from mutagen.oggopus import OggOpus 25 | from mutagen.oggvorbis import OggVorbis 26 | from mutagen.oggflac import OggFLAC 27 | from mutagen.asf import ASF 28 | from mutagen import MutagenError 29 | 30 | from .config import Config 31 | from .helper import sanitize_data 32 | from .audio_format_keys import FORMAT_KEYS 33 | 34 | 35 | def get_key(tag, key, format): 36 | # data stores the result of key lookup from the dictionary like object 37 | # returned by mutagen. The results of key lookups are lists or None when it does not exist. 38 | data = None 39 | 40 | # result is the final value returned by get_key function. 41 | result = None 42 | 43 | if not tag: 44 | return result 45 | 46 | # extra keys to read from FLAC and ogg formats 47 | lyrics_keys = ['LYRICS', 'UNSYNCEDLYRICS', 'UNSYNCED LYRICS', 'SYNCED LYRICS'] 48 | 49 | if format == 'mp3': 50 | ## 'get' for mp3 tags is not fetching lyrics(None). Using getall instead. 51 | data = tag.getall(key) 52 | if not len(data): 53 | return result 54 | 55 | # for USLT(lyrics frame) only return lyrics if exist 56 | if key == 'USLT': 57 | result = data[0].text if len(data[0].text) else None 58 | else: 59 | # for TPE1, TIT2, TALB frames, the text field is a list itself 60 | # so we look one list deeper 61 | result = data[0].text[0] 62 | 63 | elif format == 'wma': 64 | # For ASF Frames key lookups are lists containing ASFUnicodeAttribute type 65 | # type objects instead of Unicode objects 66 | data = tag.get(key) 67 | 68 | # Safely extract the Unicode 'value' from ASFUnicodeAttribute object 69 | result = tag.get(key)[0].value if data else None 70 | else: 71 | # mp4, m4a, flac, ogg 72 | 73 | # For all these formats, the data object is a simple dictionary 74 | # with keys mapping to lists. 75 | 76 | if format == 'm4a' or format == 'mp4': 77 | 78 | # For python27 encoding key(which is a unicode object due to futures import) 79 | # to 'latin-1' fixes the fetch from dictionary 80 | 81 | # mp4 standard uses latin-1 encoding for these tag names. 82 | # \xa9 is copyright symbol in that encoding. 83 | if sys.version_info[0] < 3: 84 | key = key.encode('latin-1') 85 | 86 | # Python3 is able to handle it internally due to implicit encoding(?) 87 | data = tag.get(key) 88 | 89 | if format == 'flac' or format == 'ogg' or format == 'oga' or format == 'opus': 90 | 91 | if key == FORMAT_KEYS[format]['lyrics']: 92 | 93 | # separately treat lookup of lyrics in these formats 94 | 95 | # Loop through different keys to look for lyrics. 96 | 97 | # 'LYRICS' will be used as standard for 'lyrico' for Vorbis Comments 98 | # This includes .flac, .ogg(Vorbis and FLAC) files 99 | for lr_key in lyrics_keys: 100 | # also try lowercases 101 | data = tag.get(lr_key) or tag.get(lr_key.lower()) 102 | 103 | # if we find lyrics, stop looping 104 | if data: 105 | break 106 | else: 107 | # Normal lookup for other properties 108 | data = tag.get(key) 109 | 110 | # till here the data ( for mp4, m4a, flac, ogg) will be a list 111 | # containing the value or None. Safely lookup in list 112 | result = data[0] if data else None 113 | 114 | # return sanitized value of result 115 | return sanitize_data(result) 116 | 117 | 118 | def extract_ogg_tag(path): 119 | 120 | """ 121 | Read tags out of .ogg files encoded with different codecs 122 | Returns a tuple (tag, error) 123 | """ 124 | ogg_tag = None 125 | error = None 126 | 127 | # Encapsulate all try except blocks in if statements. 128 | # Only read for tag if it already does not exist. 129 | 130 | if not ogg_tag: 131 | try: 132 | # Try to read ogg-Vorbis files 133 | ogg_tag = OggVorbis(path) 134 | 135 | except Exception: 136 | # move to next codec type 137 | pass 138 | 139 | if not ogg_tag: 140 | try: 141 | # Try to read ogg-FLAC files 142 | ogg_tag = OggFLAC(path) 143 | 144 | except Exception: 145 | # move to next codec type 146 | pass 147 | 148 | if not ogg_tag: 149 | # log error for user to see 150 | error = 'Unable to read metadata from the .ogg/.oga file. Only Vorbis and FLAC are supported.' 151 | 152 | return (ogg_tag, error) 153 | 154 | def get_song_data(path): 155 | 156 | """ 157 | Extracts song artist, album, title and lyrics if present 158 | from audio file. 159 | 160 | This is method is called by constructor of Song class which uses 161 | the dict returned to instantiate song objects. 162 | 163 | 'path' is the absolute path to the audio file. 164 | """ 165 | data = {} 166 | 167 | tag = None 168 | artist = None 169 | title = None 170 | album = None 171 | lyrics = None 172 | song_format = None 173 | 174 | lyrics_file_name = None 175 | lyrics_file_path = None 176 | 177 | lyrics_file_present = False 178 | lyrics_tag_present = False 179 | 180 | error = None 181 | 182 | # format will the part of string after last '.' character 183 | # only use lowercase for formats 184 | song_format = path[ path.rfind('.') + 1 : ].lower() 185 | 186 | 187 | try: 188 | if song_format == 'mp3': 189 | tag = ID3(path) 190 | if song_format == 'mp4' or song_format == 'm4a': 191 | tag = MP4(path) 192 | if song_format == 'flac': 193 | tag = FLAC(path) 194 | if song_format == 'opus': 195 | tag = OggOpus(path) 196 | if song_format == 'wma': 197 | tag = ASF(path) 198 | if song_format == 'ogg' or song_format == 'oga': 199 | tag, error = extract_ogg_tag(path) 200 | except IOError: 201 | error = 'Unable to locate the file. Could have been moved during operation.' 202 | except MutagenError: 203 | error = 'Unable to read metadata. Unsupported codec or tag does not exist.' 204 | except Exception as e: 205 | error = str(e) 206 | print(e) 207 | else: 208 | # This only runs if reading tags creates no exceptions 209 | artist = get_key(tag, FORMAT_KEYS[song_format]['artist'], song_format) 210 | title = get_key(tag, FORMAT_KEYS[song_format]['title'], song_format) 211 | album = get_key(tag, FORMAT_KEYS[song_format]['album'], song_format) 212 | lyrics = get_key(tag, FORMAT_KEYS[song_format]['lyrics'], song_format) 213 | 214 | # build URL, filename and filepath 215 | # If tag is not read or either of artist name or title is not preset 216 | # those properties of the Song object would be intialized to None 217 | if artist and title: 218 | lyrics_file_name = '%s - %s.txt' % (artist, title) 219 | lyrics_file_path = os.path.join(Config.lyrics_dir, lyrics_file_name) 220 | else: 221 | # Only log the following error if the tags have been read correctly but 222 | # artist or title was simply not present in the tag. 223 | # Else the pre-existing error due to reading of tags should be logged 224 | if not error: 225 | error = 'Artist name or song title not found.' 226 | 227 | 228 | # check if lyrics file already exists in LYRICS_DIR 229 | if lyrics_file_path in Config.lyric_files_in_dir: 230 | lyrics_file_present = True 231 | 232 | # check if lyrics already embedded in tag 233 | if lyrics: 234 | lyrics_tag_present = True 235 | 236 | # build dict 237 | data['tag'] = tag 238 | data['artist'] = artist 239 | data['title'] = title 240 | data['album'] = album 241 | data['format'] = song_format 242 | 243 | data['lyrics_file_name'] = lyrics_file_name 244 | data['lyrics_file_path'] = lyrics_file_path 245 | 246 | data['lyrics_file_present'] = lyrics_file_present 247 | data['lyrics_tag_present'] = lyrics_tag_present 248 | 249 | data['error'] = error 250 | 251 | return data 252 | 253 | def get_song_list(path): 254 | 255 | """ Return list of paths to all valid audio files in dir located at path. 256 | Valid audio formats are imported from settings module. 257 | Also checks for any inner directories.""" 258 | 259 | song_list = [] 260 | 261 | path = path 262 | 263 | for ext in Config.audio_formats: 264 | pattern = '**/*.' + ext 265 | pattern_uppercase = '**/*.' + ext.upper() 266 | 267 | song_list.extend(glob2.glob(os.path.join(path, pattern))) 268 | 269 | # Windows is case-insensitive towards extensions. So the glob2 module detects 270 | # ex. .ogg and .OGG as well. But in Linux the extensions are case-sensitive. 271 | 272 | # Add detection for uppercase extensions 273 | if platform.system() == 'Linux': 274 | song_list.extend(glob2.glob(os.path.join(path, pattern_uppercase))) 275 | 276 | return song_list 277 | -------------------------------------------------------------------------------- /lyrico/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | import os 8 | import glob2 9 | 10 | 11 | try: 12 | # Import the base class for all configparser errors as BaseConfigParserError 13 | # >3.2 14 | from configparser import ConfigParser 15 | from configparser import Error as BaseConfigParserError 16 | except ImportError: 17 | # python27 18 | # Refer to the older SafeConfigParser as ConfigParser 19 | from ConfigParser import SafeConfigParser as ConfigParser 20 | from ConfigParser import Error as BaseConfigParserError 21 | 22 | from .helper import get_config_path 23 | from .helper import BadConfigError 24 | 25 | # Maintian a dict of lyrico actions to check target on update_lyrico_actions() 26 | # Also save the corresponding section in 27 | 28 | LYRICO_ACTIONS = { 29 | 'save_to_file': 'actions', 30 | 'save_to_tag': 'actions', 31 | 'overwrite': 'actions', 32 | 33 | 'musix_match': 'sources', 34 | 'lyricsmode' : 'sources', 35 | 'az_lyrics': 'sources', 36 | } 37 | 38 | # Used to print commandline logging for enable/disable sources 39 | SOURCE_STR_MAP = { 40 | 'musix_match': 'musiXmatch', 41 | 'lyricsmode': 'LYRICSMODE', 42 | 'az_lyrics': 'AZLyrics', 43 | } 44 | 45 | class Config(): 46 | 47 | """ 48 | Class wrapper build around user settings loaded from 49 | config.ini 50 | 51 | All setting are stored are class variables and all methods are 52 | static methods. 53 | 54 | A Config object is never instantiated, only the class is imported 55 | into other modules to access class variables and methods. 56 | 57 | """ 58 | 59 | # Audio formats supported are not loaded from config.ini 60 | 61 | # This list is used by the 'glob2' module to scan 'source_dir' for audio files. 62 | audio_formats = ['mp3', 'flac', 'm4a', 'mp4', 'ogg', 'oga', 'opus', 'wma'] 63 | 64 | lyrics_dir = None 65 | source_dir = None 66 | 67 | save_to_file = True 68 | save_to_tag = False 69 | 70 | overwrite = False 71 | lyric_files_in_dir = None 72 | 73 | @staticmethod 74 | def check(): 75 | """ 76 | Check if the configuration is valid 77 | """ 78 | # This forces user to set dirs before running the app for first time. 79 | if len(Config.lyrics_dir) == 0: 80 | # see which directory in not set and raise BadConfigError with that as value 81 | print('lyrics_dir is not set.') 82 | print('Please use the "set" command to set lyrics_dir.') 83 | print('use "lyrico --help" to view commands.') 84 | return False 85 | 86 | if len(Config.source_dir) == 0: 87 | # see which directory in not set and raise BadConfigError with that as value 88 | print('source_dir is not set.') 89 | print('Please use the "set" command to set source_dir or pass it as parameter.') 90 | print('use "lyrico --help" to view commands.') 91 | return False 92 | 93 | # if user disable both saving mode. Notify & force user to correct on next run. 94 | if not Config.save_to_file and not Config.save_to_tag: 95 | print('Both "save_to_file" and "save_to_tag" modes are disabled. Please enable one.') 96 | print('use "lyrico --help" to view commands.') 97 | return False 98 | 99 | # if user disables all sources. Notify & force user to enable one. 100 | if (not Config.az_lyrics 101 | and not Config.musix_match 102 | and not Config.lyricsmode): 103 | print('All lyrics sources are disabled. Please enable one.') 104 | print('use "lyrico --help" to view commands.') 105 | return False 106 | return True 107 | 108 | @staticmethod 109 | def load_config(): 110 | """ 111 | Called only once by main to read user settings from config.ini 112 | and save them to the class variables. 113 | """ 114 | try: 115 | conf = ConfigParser() 116 | 117 | config_path = get_config_path() 118 | conf.read(config_path) 119 | 120 | # save references to conf, and config_path in class variables 121 | Config.config_path = config_path 122 | Config.conf = conf 123 | 124 | Config.source_dir = conf.get('paths', 'source_dir') 125 | Config.lyrics_dir = conf.get('paths', 'lyrics_dir') 126 | 127 | Config.save_to_file = conf.getboolean('actions', 'save_to_file') 128 | Config.save_to_tag = conf.getboolean('actions', 'save_to_tag') 129 | 130 | Config.overwrite = conf.getboolean('actions', 'overwrite') 131 | 132 | # Load all the sources 133 | Config.musix_match = conf.getboolean('sources', 'musix_match') 134 | Config.lyricsmode = conf.getboolean('sources', 'lyricsmode') 135 | Config.az_lyrics = conf.getboolean('sources', 'az_lyrics') 136 | 137 | # Loading this with user config, we need to call the load_config only once at start. 138 | Config.lyric_files_in_dir = glob2.glob(os.path.join(Config.lyrics_dir, '**/*.txt')) 139 | 140 | 141 | # Catch file handling errors 142 | except IOError as e: 143 | print('Unable to load config.') 144 | print(e) 145 | 146 | @staticmethod 147 | def save(): 148 | """ 149 | Save configuration file contents 150 | """ 151 | try: 152 | #paths 153 | Config.conf.set('paths', 'source_dir', Config.source_dir) 154 | Config.conf.set('paths', 'lyrics_dir', Config.lyrics_dir) 155 | 156 | #actions 157 | Config.setBool('actions', 'save_to_file', Config.save_to_file) 158 | Config.setBool('actions', 'save_to_tag', Config.save_to_tag) 159 | 160 | #sources 161 | Config.setBool('sources', 'musix_match', Config.musix_match) 162 | Config.setBool('sources', 'lyricsmode', Config.lyricsmode) 163 | Config.setBool('sources', 'az_lyrics', Config.az_lyrics) 164 | 165 | with open(Config.config_path, 'w') as configfile: 166 | Config.conf.write(configfile) 167 | return True 168 | 169 | # Catch all config parser errors 170 | except BaseConfigParserError as e: 171 | print('Unable to save settings to config.') 172 | print(e) 173 | return False 174 | 175 | # Catch file handling errors 176 | except IOError as e: 177 | print('Unable to save settings to config.') 178 | print(e) 179 | return False 180 | 181 | @staticmethod 182 | def setBool(section, option, value): 183 | svalue = 'True' if value == True else 'False' 184 | Config.conf.set(section, option, svalue) 185 | 186 | 187 | @staticmethod 188 | def set_dir(dir_type, path): 189 | 190 | """ 191 | Takes an absolute path as saves it as 'source_dir' or 'lyrics_dir' 192 | in config.ini. 193 | path is user input from the cmdline. 194 | """ 195 | 196 | if dir_type != 'source_dir' and dir_type != 'lyrics_dir': 197 | print('Invalid "dir_type". Only "source_dir" or "lyrics_dir" are valid types.') 198 | print('You gave "dir_type":', dir_type) 199 | print('use "lyrico --help" to view commands.') 200 | return False 201 | 202 | # If user is setting "source_dir", return if the path provided does not exist. 203 | # This improves the usage - lyrico <source_dir> 204 | if dir_type == 'source_dir': 205 | if not os.path.isdir(path): 206 | print('"source_dir" does not exist. ', end="") 207 | print('You gave "source_dir":', path) 208 | print('Please enter path to an existing folder.') 209 | return False 210 | Config.source_dir = path 211 | # make directory if user is setting "lyrics_dir" and it does not exists. 212 | # Refer http://stackoverflow.com/a/14364249/2426469 213 | elif dir_type == 'lyrics_dir': 214 | try: 215 | os.makedirs(path) 216 | print('Directory does not exist. Creating new one.') 217 | except OSError: 218 | if not os.path.isdir(path): 219 | # this exception is handled by function calling set_dir 220 | raise 221 | Config.lyrics_dir = path 222 | 223 | print(dir_type, 'updated.') 224 | if dir_type == 'source_dir': 225 | print('lyrico will scan the following folder for audio files:') 226 | else: 227 | print('lyrico will save lyrics files in the following folder:') 228 | print(' ', path) 229 | return True 230 | 231 | @staticmethod 232 | def update_lyrico_actions(target, update_type): 233 | 234 | if target not in LYRICO_ACTIONS: 235 | print('Invalid lyrico action change attempted') 236 | print('''"save_to_file", "save_to_tag" and "overwrite" are the only settings that can be enabled or disabled.''') 237 | print('''"musix_match", "lyricsmode" and "az_lyrics" are the only sources that can be enabled or disabled.''') 238 | print('You attempted to change:', target) 239 | print('use "lyrico --help" to view commands.') 240 | return 241 | 242 | # User is updating valid action/source 243 | bval = True if update_type == 'enable' else False 244 | log_str = '' if update_type == 'enable' else 'not ' 245 | 246 | setattr(Config, target, bval) 247 | print(target, (update_type + 'd')) 248 | 249 | if target == 'save_to_file': 250 | print('lyrico will %ssave the downloaded lyrics to text files.' % log_str) 251 | 252 | elif target == 'save_to_tag': 253 | print('lyrico will %sembed the downloaded lyrics into song tags.' % log_str) 254 | 255 | elif target == 'overwrite': 256 | if update_type == 'disable': 257 | print('lyrico will detect the songs that already have lyrics, and will ignore them.') 258 | else: 259 | print('''lyrico will download lyrics for all songs detected in "source_dir" and overwrite lyrics if already present.''') 260 | else: 261 | # Action is to enable/disable a source. 262 | print('lyrico will %suse %s as a source for lyrics.' % (log_str, SOURCE_STR_MAP[target])) 263 | 264 | @staticmethod 265 | def show_settings(): 266 | 267 | print('Your current settings:\n') 268 | # get list of section in config 269 | for section in Config.conf.sections(): 270 | # for each section get list items. 271 | # items are returned as list of tuples of type (key, value) 272 | print(section.upper()) 273 | for item in Config.conf.items(section): 274 | print(' ', item[0], '=', item[1]) 275 | print('\n') 276 | -------------------------------------------------------------------------------- /lyrico/song.py: -------------------------------------------------------------------------------- 1 | 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | 8 | import time 9 | import sys 10 | import os 11 | 12 | from mutagen.id3 import USLT 13 | from mutagen.asf import ASFUnicodeAttribute 14 | from mutagen import MutagenError 15 | from bs4 import BeautifulSoup 16 | 17 | # Import all the sources modules 18 | from .lyrico_sources.az_lyrics import download_from_az_lyrics 19 | from .lyrico_sources.musix_match import download_from_musix_match 20 | from .lyrico_sources.lyricsmode import download_from_lyricsmode 21 | 22 | from .song_helper import get_song_data, get_song_list 23 | from .config import Config 24 | from .audio_format_keys import FORMAT_KEYS 25 | 26 | # If we are using python27, import codec module and replace native 'open' 27 | # with 'codec.open' to write unicode strings to file. 28 | 29 | if sys.version_info[0] < 3: 30 | import codecs 31 | open = codecs.open 32 | 33 | 34 | class Song(): 35 | """Container objects repersenting each song globbed from source_dir""" 36 | 37 | # holds count for songs for valid metadata 38 | valid_metadata_count = 0 39 | 40 | # Count for songs whose lyrics are successfully saved to file. 41 | lyrics_saved_to_file_count = 0 42 | 43 | # Count for songs whose lyrics are successfully saved to tag. 44 | lyrics_saved_to_tag_count = 0 45 | 46 | # Number of errors during download or tagging 47 | lyrics_errors_count = 0 48 | 49 | # Number of songs that already had lyrics 50 | lyrics_existing_count = 0 51 | 52 | def __init__(self, path): 53 | 54 | self.path = path 55 | 56 | # extract data from song 57 | data = get_song_data(path) 58 | 59 | # Initialize instance variables from data extracted 60 | self.tag = data['tag'] 61 | self.artist = data['artist'] 62 | self.title = data['title'] 63 | self.album = data['album'] 64 | self.format = data['format'] 65 | 66 | self.lyrics_file_name = data['lyrics_file_name'] 67 | self.lyrics_file_path = data['lyrics_file_path'] 68 | 69 | # If the required lyrics file is already present in LYRICS_DIR 70 | self.lyrics_file_present = data['lyrics_file_present'] 71 | 72 | # If the required lyrics is already embedded in tag 73 | self.lyrics_tag_present = data['lyrics_tag_present'] 74 | 75 | 76 | # Holds the downloaded lyrics 77 | self.lyrics = None 78 | 79 | # Final status to build log 80 | self.saved_to_tag = False 81 | self.saved_to_file = False 82 | self.source = None 83 | self.error = data['error'] 84 | 85 | # As the songs are read from the files, update the class variable. 86 | # This is count of songs that have valid artist and title. 87 | if self.title and self.artist: 88 | Song.valid_metadata_count += 1 89 | 90 | def download_lyrics(self): 91 | 92 | """ 93 | Only called when song has artist and title. 94 | Calls self.save_lyrics to save them. 95 | 96 | """ 97 | 98 | if not self.download_required(): 99 | Song.lyrics_existing_count += 1 100 | print('\nSkipping', self.artist, '-', self.title) 101 | print('Lyrics already present.') 102 | return 103 | 104 | # At this point there is nothing in self.error 105 | print('\nDownloading:', self.artist, '-', self.title) 106 | 107 | # Only try other sources if required 108 | 109 | if not self.lyrics and Config.musix_match: 110 | download_from_musix_match(self) 111 | 112 | if not self.lyrics and Config.lyricsmode: 113 | download_from_lyricsmode(self) 114 | 115 | if not self.lyrics and Config.az_lyrics: 116 | download_from_az_lyrics(self) 117 | 118 | self.save_lyrics() 119 | 120 | def save_lyrics(self): 121 | 122 | """ 123 | Called by self.download_lyrics to save lyrics according to 124 | Config.save_to_file, Config.save_to_tag settings. 125 | 126 | Handles the case if lyrics is not found. Logs errors to console 127 | and Song object. 128 | 129 | """ 130 | 131 | if not self.lyrics: 132 | Song.lyrics_errors_count += 1 133 | print('Failed:', self.error) 134 | return 135 | 136 | if self.lyrics and Config.save_to_file: 137 | try: 138 | with open(self.lyrics_file_path, 'w', encoding='utf-8') as f: 139 | f.write('Artist - ' + self.artist + '\n') 140 | f.write('Title - ' + self.title + '\n') 141 | 142 | album_str = 'Album - Unkown' 143 | if self.album: 144 | album_str = 'Album - ' + self.album 145 | f.write(album_str) 146 | f.write('\n\n') 147 | 148 | f.write(self.lyrics) 149 | 150 | # update class variable 151 | Song.lyrics_saved_to_file_count += 1 152 | 153 | # update the Song instance flag 154 | self.saved_to_file = True 155 | 156 | self.download_status = "ok" 157 | print('Success: Lyrics saved to file.') 158 | 159 | except IOError as e: 160 | err_str = str(e) 161 | if e.errno == 22: 162 | err_str = 'Cannot save lyrics to file. Unable to create file with song metadata.' 163 | if e.errno == 13: 164 | err_str = 'Cannot save lyrics to file. The file is opened or in use.' 165 | if e.errno == 2: 166 | err_str = '"lyrics_dir" does not exist. Please set a "lyrics_dir" which exists.' 167 | 168 | self.error = err_str 169 | Song.lyrics_errors_count += 1 170 | print('Failed:', err_str) 171 | 172 | if self.lyrics and Config.save_to_tag: 173 | lyrics_key = FORMAT_KEYS[self.format]['lyrics'] 174 | try: 175 | if self.format == 'mp3': 176 | # encoding = 3 for UTF-8 177 | self.tag.add(USLT(encoding=3, lang = u'eng', 178 | text=self.lyrics)) 179 | 180 | if self.format == 'm4a' or self.format == 'mp4': 181 | # lyrics_key = '\xa9lyr' 182 | 183 | if sys.version_info[0] < 3: 184 | lyrics_key = lyrics_key.encode('latin-1') 185 | self.tag[lyrics_key] = self.lyrics 186 | 187 | # Both flac, opus and ogg/oga(Vorbis & FLAC), are being read/write as Vorbis Comments. 188 | # Vorbis Comments don't have a standard 'lyrics' tag. The 'LYRICS' tag is 189 | # most common non-standard tag used for lyrics. 190 | if self.format == 'flac' or self.format == 'ogg' or self.format == 'oga' or self.format == 'opus': 191 | self.tag[lyrics_key] = self.lyrics 192 | 193 | if self.format == 'wma': 194 | # ASF Format uses ASFUnicodeAttribute objects instead of Python's Unicode 195 | self.tag[lyrics_key] = ASFUnicodeAttribute(self.lyrics) 196 | 197 | self.tag.save() 198 | self.saved_to_tag = True 199 | Song.lyrics_saved_to_tag_count += 1 200 | 201 | print('Success: Lyrics saved to tag.') 202 | 203 | except MutagenError: 204 | err_str = 'Cannot save lyrics to tag. Codec/Format not supported' 205 | self.error = err_str 206 | Song.lyrics_errors_count += 1 207 | print('Failed:', err_str) 208 | 209 | except IOError as e: 210 | err_str = 'Cannot save lyrics to tag. The file is opened or in use.' 211 | self.error = err_str 212 | Song.lyrics_errors_count += 1 213 | print('Failed:', err_str) 214 | 215 | def download_required(self): 216 | """ 217 | Checks if a lyrics are required to be download. 218 | Uses Config.save_to_file, Config.save_to_tag and Config.overwrite settings 219 | and returns True when download is required. 220 | 221 | """ 222 | if Config.overwrite: 223 | # If user wants to overwite existing lyrics, always download 224 | # and save according to Config.save_to_file, Config.save_to_tag settings 225 | return True 226 | else: 227 | 228 | # Do we need to download lyrics and save to file 229 | file_required = False 230 | 231 | # Do we need to download lyrics and save to tag 232 | tag_required = False 233 | 234 | if Config.save_to_file and not self.lyrics_file_present: 235 | # if user wants to save to file and the file is not 236 | # present in the set LYRICS_DIR, the we need 237 | # to download it and save to the file. 238 | file_required = True 239 | 240 | if Config.save_to_tag and not self.lyrics_tag_present: 241 | # if user wants to save to tag and the tag does not 242 | # has lyrics field saved, then we need 243 | # to download it and save to the tag. 244 | tag_required = True 245 | 246 | # If either is required, we need to make the download request. 247 | # Data is then saved accordingly to the settings. 248 | return file_required or tag_required 249 | 250 | def get_log_string(self): 251 | """ 252 | returns the log string of the song which is used in final log. 253 | 254 | """ 255 | template = '. \t{file}\t{tag}\t{source}\t\t{song}\t\t{error}\n' 256 | log = {} 257 | 258 | # file_status and tag each have 4 possible values 259 | # 'Saved' - File or tag was saved successfully 260 | # 'Failed' - Download or save failed. Show error. 261 | # 'Ignored' - Ignored according to Config.save_to_file, Config.save_to_tag setting by user. 262 | # 'Present' - Detected tag or file and skipped download skipped by lyrico as per Config.overwrite setting. 263 | 264 | if Config.save_to_file: 265 | if not self.download_required(): 266 | file_status = 'Present' 267 | else: 268 | if self.saved_to_file: 269 | file_status = 'Saved' 270 | else: 271 | file_status = 'Failed' 272 | else: 273 | file_status = 'Ignored' 274 | 275 | if Config.save_to_tag: 276 | if not self.download_required(): 277 | tag = 'Present' 278 | else: 279 | if self.saved_to_tag: 280 | tag = 'Saved' 281 | else: 282 | tag = 'Failed' 283 | else: 284 | tag = 'Ignored' 285 | 286 | # avoid exceptions raised for concatinating Unicode and None types 287 | if self.artist and self.title: 288 | log['song'] = self.artist + ' - ' + self.title 289 | else: 290 | log['song'] = self.path 291 | 292 | log['error'] = self.error 293 | 294 | log['file'] = file_status 295 | log['tag'] = tag 296 | log['source'] = self.source 297 | 298 | return template.format(**log) 299 | 300 | @staticmethod 301 | def log_results(song_list): 302 | 303 | try: 304 | log_date = time.strftime("%H:%M:%S %d/%m/%y") 305 | log_file_name = 'log.txt' 306 | with open(os.path.join(Config.lyrics_dir, log_file_name), 'w', encoding='utf-8') as f: 307 | 308 | f.write('\t\t\t\tlyrico\n\n') 309 | 310 | f.write('Log Date ' + log_date + '\n') 311 | f.write('\n') 312 | 313 | f.write('Audio files detected: ' + str(len(song_list))) 314 | f.write('\n') 315 | 316 | f.write('Metadata extracted for: ' + str(Song.valid_metadata_count)) 317 | f.write('\n') 318 | 319 | f.write('Lyrics files saved: ' + str(Song.lyrics_saved_to_file_count)) 320 | f.write('\n') 321 | 322 | f.write('Tags saved: ' + str(Song.lyrics_saved_to_tag_count)) 323 | f.write('\n\n') 324 | 325 | table_header = ' \t[FILE]\t[TAG]\t[SOURCE]\t\t\t[ARTIST-TITLE]\t\t\t\t[ERROR]\n' 326 | table_border = '='*100 + '\n' 327 | 328 | f.write(table_header) 329 | f.write(table_border) 330 | 331 | # write individual song log strings 332 | index_number = 1 333 | for song in song_list: 334 | f.write(str(index_number)) 335 | f.write(song.get_log_string()) 336 | index_number += 1 337 | 338 | # Add STATUS KEY to log 339 | f.write('\n\n\t**** STATUS KEY ****\n') 340 | 341 | f.write("\t# 'Saved' - File or tag was saved successfully.") 342 | f.write("\n") 343 | 344 | f.write("\t# 'Failed' - Download or save failed. See error.") 345 | f.write("\n") 346 | 347 | f.write("\t# 'Ignored' - Ignored according to 'save_to_file', 'save_to_tag' setting.") 348 | f.write("\n") 349 | 350 | f.write("\t# 'Present' - Detected tag or file and skipped download as per 'overwrite' setting.") 351 | f.write("\n") 352 | 353 | # Add source key to log 354 | f.write('\n\n\t**** SOURCE KEY ****\n') 355 | 356 | f.write("\t# 'mXm' - musiXmatch") 357 | f.write("\n") 358 | 359 | f.write("\t# 'LrMOD' - LYRICSMODE") 360 | f.write("\n") 361 | 362 | f.write("\t# 'AZLr' - AZLyrics") 363 | f.write("\n\n") 364 | 365 | # Add credits 366 | f.write(table_border) 367 | 368 | f.write("'lyrico' has been built and is maintained by Abhimanyu Pathania.") 369 | f.write("\n\n") 370 | 371 | f.write('If you encounter a bug, please raise an issue on GitHub.') 372 | f.write("\n") 373 | 374 | f.write('\thttps://github.com/abhimanyuPathania/lyrico/issues') 375 | f.write("\n") 376 | 377 | f.write('Or you can mail me: abpindia1944@gmail.com') 378 | f.write("\n\n") 379 | 380 | f.write('Cheers!') 381 | f.write('\n\n\n\n') 382 | 383 | except IOError as e: 384 | print('Unable to build log.') 385 | print('"lyrics_dir" does not exist. Please set "lyrics_dir" to a folder which exists.') 386 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | lyrico 2 | ======== 3 | 4 | ``lyrico`` is a command line application which downloads lyrics for your songs. When given a folder, ``lyrico`` will: 5 | 6 | - scan it, and all inner folders, for audio files 7 | - read the metadata for all songs that it detects 8 | - download the lyrics for each song 9 | - embed the lyrics downloaded into the song (as standard lyrics tag) and also save it to a text file 10 | 11 | Current version of ``lyrico`` supports only **unsynced lyrics**. 12 | 13 | Support 14 | ========= 15 | 16 | - **Audio Formats** - mp3, flac, m4a, mp4, opus, wma, ogg/oga (Vorbis and FLAC). 17 | 18 | - **Python** - Python 27 and Python 3 (tested on Python 3.5 Python 3.4) 19 | 20 | - **OS** - Windows, Linux (tested on Ubuntu). 21 | 22 | 23 | Installation 24 | ============= 25 | Use the standard ``pip`` install:: 26 | 27 | pip install lyrico 28 | 29 | This will also install the dependencies. Hence, it is recommended to install ``lyrico`` on a separate `virtual environment <https://pypi.python.org/pypi/virtualenv>`_. 30 | 31 | You can test if ``lyrico`` was installed correctly by running the 'lyrico' command, which now should be available:: 32 | 33 | lyrico 34 | 35 | This would give the following output:: 36 | 37 | source_dir is not set. Please use the "set" command to set source_dir. 38 | use "lyrico --help" to view commands. 39 | Your current settings: 40 | 41 | ACTIONS 42 | save_to_file = True 43 | save_to_tag = False 44 | overwrite = False 45 | 46 | 47 | PATHS 48 | source_dir = None 49 | lyrics_dir = None 50 | 51 | 52 | SOURCES 53 | musix_match = True 54 | lyricsmode = True 55 | az_lyrics = False 56 | 57 | If you get this screen, that means ``lyrico`` and its dependencies were installed correctly. 58 | 59 | 60 | If you see an error like ``ImportError: No module named mutagen.id3``, this means that the dependencies were not installed for some reason. In that case you can install them very easily with single command. Here's what you do: 61 | 62 | 1. Go to ``lyrico``'s `GitHub page <https://github.com/abhimanyuPathania/lyrico>`_. 63 | 2. Download repository as ZIP and extract the ``requirements.txt`` file from it. It is in the root directory of repository. This is the only file you need. 64 | 3. Open command prompt in directory containing the ``requirements.txt`` and run following command (if you're using a virtual environment, activate it before running the command):: 65 | 66 | pip install -r requirements.txt 67 | 68 | This will install all of the ``lyrico``'s dependencies and now you can try testing with the 'lyrico' command. It should give no errors. 69 | 70 | 71 | Running ``lyrico`` 72 | ===================== 73 | ``lyrico`` operates using two directories (folders): 74 | 75 | - Source Directory (``source_dir``): This is the directory which ``lyrico`` scans for audio files. The scan also includes all the directories contained within. 76 | 77 | - Lyrics Directory (``lyrics_dir``): This is where ``lyrico`` will save the lyrics' text files. 78 | 79 | Before running ``lyrico`` you must set these using the ``set`` command. Values must be absolute paths to the directories. Once set, ``lyrico`` will remember your settings (which can be changed easily at any time). So this has to be done only for the first time. 80 | 81 | This is how an example first-run would look like on Windows. 82 | 83 | 1. Set the ``source_dir``:: 84 | 85 | lyrico set source_dir D:\test\Music 86 | 87 | This logs the following message:: 88 | 89 | source_dir updated. 90 | lyrico will scan the following folder for audio files: 91 | D:\test\Music 92 | 93 | When setting ``source_dir``, the directory must exist beforehand. ``lyrico`` will **not create** the ``source_dir`` for you. 94 | 95 | 2. Set the ``lyrics_dir``:: 96 | 97 | lyrico set lyrics_dir D:\test\Lyrics 98 | 99 | This logs the following in command prompt:: 100 | 101 | Directory does not exist. Creating new one. 102 | lyrics_dir updated. 103 | lyrico will save lyrics files in the following folder: 104 | D:\test\Lyrics 105 | 106 | Unlike ``source_dir``, when setting the ``lyrics_dir`` to folder that does not exist (as in this example); ``lyrico`` **will** create it for you. 107 | 108 | 3. Run lyrico:: 109 | 110 | lyrico 111 | 112 | This will start the application and it will start downloading the lyrics for songs that it detects in the ``source_dir``. You will be able to see the status (song name, lyrics URL) in the command prompt as it downloads, one at a time, the lyrics for each song. 113 | 114 | Finally it builds the log of whole operation and saves it in the ``log.txt`` file. ``log.txt`` is located in your ``lyrics_dir``. 115 | 116 | 117 | Other Settings and Commands 118 | ============================= 119 | 120 | Basic settings like ``source_dir`` and ``lyrics_dir`` can be repeatedly changed using the ``set`` command as described in the example above. There are few more settings that are available to control ``lyrico``'s actions. These actions can be either disabled or enabled. 121 | 122 | - ``save_to_file`` - When enabled, ``lyrico`` will save the lyrics downloaded to a text file and put it in the ``lyrics_dir``. The naming convention of file is as follows: 123 | 124 | [artist name] - [title].txt 125 | 126 | where [artist name] and [title] are extracted from the song's metadata. It either of this is not found, lyrics won't be downloaded and you will see that in the final ``log.txt``. This naming convention in the current version cannot be changed. 127 | 128 | **enabled by default** 129 | 130 | - ``save_to_tag`` - When enabled, ``lyrico`` will embed the lyrics downloaded into song tags. ``lyrico`` uses the standard lyrics tags for different formats. This means, as long as your music player can read standard lyrics tags from the song's metadata, it should display them. 131 | 132 | **disabled by default** 133 | 134 | - ``overwrite`` - When enabled, ``lyrico`` will always download the lyrics for a song ignoring they might already be present in the lyrics tag or in the ``lyrics_dir`` as a text file. After the download, it overwrites any existing lyrics in the tag or the text file. 135 | 136 | This setting is meant to avoid repetitive download of lyrics. For example, if there is a song 'ABC' in the ``source_dir``. And ``overwrite`` is **disabled**. When ``lyrico`` is run, it will first look into ``lyrics_dir`` if it already has lyrics. If yes, then it would ignore the song. 137 | 138 | ``overwrite`` takes into account, the ``save_to_file`` and ``save_to_tag`` settings to decide what to do. For ``save_to_file``, it looks in ``lyrics_dir`` and for ``save_to_tag`` it searches for existing lyrics in songs's metadata. Whenever there is a void, download happens and old lyrics will be replaced by downloaded ones in both, text file and song metadata as per your settings. 139 | 140 | **disabled by default** 141 | 142 | The above three settings can be changed using ``enable`` and ``disable`` commands. This is how you will enable ``save_to_tag`` from its default 'disabled' setting:: 143 | 144 | lyrico enable save_to_tag 145 | 146 | This would log:: 147 | 148 | save_to_tag enabled 149 | lyrico will embed the downloaded lyrics into song tags. 150 | 151 | Similarly to disable ``save_to_file``:: 152 | 153 | lyrico disable save_to_file 154 | 155 | This gives following message in command prompt:: 156 | 157 | save_to_file disabled 158 | lyrico will not save the downloaded lyrics to text files. 159 | 160 | 161 | - *Viewing current settings* - To view current settings use the following command:: 162 | 163 | lyrico --settings 164 | 165 | - *Help* - You can always view all the commands by asking for the help screen:: 166 | 167 | lyrico --help 168 | 169 | - ``lyrico`` **quick invocation** - you can supply ``source_dir`` along with ``lyrico`` command. The following command:: 170 | 171 | lyrico full_path_to_source_dir 172 | 173 | is same as running the two commands:: 174 | 175 | lyrico set source_dir full_path_to_source_dir 176 | lyrico 177 | 178 | However this won't work for the very first run. When running ``lyrico`` for the first time after installation, the ``source_dir`` must be set explicitly using the ``set`` command. 179 | 180 | Lyrics Sources 181 | ================ 182 | ``lyrico`` uses the following sources from where it downloads the lyrics: 183 | 184 | 1. `musiXmatch <https://www.musixmatch.com/>`_ : ``musix_match`` 185 | 186 | 2. `LYRICSMODE <http://www.lyricsmode.com/>`_ : ``lyricsmode`` 187 | 188 | 3. `AZLyrics <http://www.azlyrics.com/>`_ : ``az_lyrics`` (**disabled by default**) 189 | 190 | The search order is same as enumerated above and cannot be changed. You can, however, disable or enable any of the sources using the same ``enable`` and ``disable`` commands. When a source is disabled, it is simply skipped during the search. 191 | 192 | For example, to enable AZLyrics:: 193 | 194 | lyrico enable az_lyrics 195 | 196 | Use the command line name for the source, which is mentioned after the link to the source in the above list. This logs the following message indicating that ``az_lyrics`` will be used as a source:: 197 | 198 | az_lyrics enabled 199 | lyrico will use AZLyrics as a source for lyrics. 200 | 201 | Or to disable AZLyrics:: 202 | 203 | lyrico disable az_lyrics: 204 | 205 | This logs the following message:: 206 | 207 | az_lyrics disabled 208 | lyrico will not use AZLyrics as a source for lyrics. 209 | 210 | 211 | Audio Formats and Tags 212 | ======================= 213 | Below is the table of supported audio formats and their supported tags: 214 | 215 | +--------------------------------------------+----------------------------------------------+ 216 | | Audio Format | Tag | 217 | +============================================+==============================================+ 218 | | flac | Vorbis Comments | 219 | +--------------------------------------------+----------------------------------------------+ 220 | | m4a, mp4 | MP4 Tags (iTunes metadata) | 221 | +--------------------------------------------+----------------------------------------------+ 222 | | mp3 | ID3 Tags | 223 | +--------------------------------------------+----------------------------------------------+ 224 | | ogg, oga | Vorbis Comments | 225 | +--------------------------------------------+----------------------------------------------+ 226 | | opus | Vorbis Comments | 227 | +--------------------------------------------+----------------------------------------------+ 228 | | wma | ASF | 229 | +--------------------------------------------+----------------------------------------------+ 230 | 231 | ``lyrico`` goodness 232 | ===================== 233 | 234 | Here are somethings that ``lyrico`` does well: 235 | 236 | - **No junk** - ``lyrico`` will not insert junk text into your lyrics files or audio tags. It won't create blank files or blank lyrics tags. Neither it would create lyrics files or tags containing errors etc. 237 | 238 | - **Language** - Since ``lyrico`` uses your song's artist name and title to construct the URLs; so as long as they are correct and the source has the lyrics, it would work no matter which language. 239 | 240 | - **foobar2000** - The poor performance of the `Lyric Show Panel 3 <https://www.foobar2000.org/components/view/foo_uie_lyrics3>`_ component was main reason I wrote this application. It simply won't work for me. ``lyrico`` plays nicely with 'Lyric Show Panel'. ``lyrico``'s file-naming convention matches 'Lyric Show Panel's default settings. Just point 'Lyric Show Panel' to your ``lyrics_dir`` and done. 241 | 242 | I recommend simply removing all of 'Lyric Show Panel' online sources and use offline mode (Tag search, Files search, Associations search) with ``lyrico``. It is the next best thing to automatic search. Because 'Lyric Show Panel' on failure embeds errors in lyrics files and tags! 243 | 244 | Even if you don't use foobar2000 or your music player cannot read lyrics from text files like that, you can always embed lyrics into tags which should work with any decent music player including **iTunes**. 245 | 246 | - **log.txt** - ``log.txt`` created at end of every ``lyrico`` run is nice way to see what have you fetched. It show list of every song present in ``source_dir`` along with status of download or errors that happened. 247 | 248 | ``lyrico`` gotchas 249 | ==================== 250 | 251 | Here are few points you should know before using ``lyrico``: 252 | 253 | - **Your tags** - ``lyrico`` uses metadata in your tags for building URLs. Hence your songs should be tagged with correct 'artist', 'title' information. 254 | 255 | ``lyrico`` also assumes that you're using standard tags for each format (container) of your songs. For example, ``lyrico`` assumes that your ``.mp3`` files are using the standard ``ID3`` tags and only reads metadata for those. If you are using something like an ``APEv2`` tag with an ``.mp3`` file, ``lyrico`` won't be able to read it and would log the pertinent error in the ``log.txt``. 256 | 257 | You don't need to be concerned about this unless you have forcibly embedded non-standard tags in your songs with some other software. *Table of supported tags for audio formats is given above.* 258 | 259 | - **ID3 tag versions** - ``lyrico`` will convert any old ID3 tag to ID3v2.4 if ``save_to_tag`` is enabled. This is the default behavior of *mutagen*; the underlying dependency used by ``lyrico`` to read ID3 tags. 260 | 261 | This has never caused any problem for me till date. And from my understanding you should be using ID3v2.4 tags anyways. I have used ``lyrico`` on hundreds of mp3 files and had no issues. You can always test ``lyrico`` on few songs and check. Or you can just disable ``save_to_tag``. 262 | 263 | - **Song metadata** - Lyrics are fetched using a URL generated using song's artist name and title. This means that if the song has titles like: 264 | 265 | - ABC(acoustic) 266 | - ABC(live version) 267 | 268 | or an artist like: 269 | 270 | - XYZ(feat. Blah) 271 | 272 | the download might fail. Sometimes artist-name or title contain characters like '?'. For this, Windows won't be able to create the text file as it is a restricted character. But the lyrics will be downloaded anyways and saved to tag if ``save_to_tag`` is enabled. 273 | 274 | - **windows console** - If you are using Windows, like me, you must use some other font than the default 'raster fonts' in the command prompt to view in-prompt logging for songs using other characters than English in their metadata. 275 | 276 | But the problem does not end here. Even after enabling other allowed fonts like ``Consolas`` or ``Lucida Console``, you still won't be able to see in-prompt logging (you will see question marks or boxes) for Asian languages like Mandarin, Japanese, Korean etc. Though European language are displayed correctly. 277 | 278 | Despite any issues with windows console display, ``lyrico`` downloads and saves the lyrics correctly to files and tags. 279 | 280 | 281 | Dependencies 282 | ================ 283 | ``lyrico`` uses and thanks the following python packages: 284 | 285 | - `glob2 <https://pypi.python.org/pypi/glob2>`_: to allow simple recursive directory search in Python 27. 286 | 287 | - `requests <https://pypi.python.org/pypi/requests>`_: HTTP for Humans. 288 | 289 | - `mutagen <https://pypi.python.org/pypi/mutagen>`_: to read tags from audio files and embed lyrics in tags for multiple audio formats. 290 | 291 | - `beautifulsoup4 <https://pypi.python.org/pypi/beautifulsoup4>`_: to extract the lyrics. 292 | 293 | - `win_unicode_console <https://pypi.python.org/pypi/win_unicode_console>`_: because Python 27, Unicode and command prompt is a nightmare. 294 | 295 | 296 | - `docopt <https://pypi.python.org/pypi/docopt>`_: to create beautiful command-line interfaces. 297 | 298 | 299 | A note on mass downloading 300 | =========================== 301 | 302 | Since ``lyrico`` is simply scraping lyrics off the HTML pages of the sources, please don't set ``source_dir`` to a folder having thousands of songs. 303 | 304 | They might ban your bot. ``az_lyrics`` sometimes bans your IP (not sure if permanent) if you hit them with too many failed requests. Though, refreshing your IP by restarting your router or using a VPN solves that. Hence, ``az_lyrics`` as a source is disabled by default. Only use it if you are looking for recent lyrics. 305 | 306 | Also, downloading 1000s of lyrics will be slow since ``lyrico`` does not batch-download. It sends one request to one source at a time. This is by design. 307 | 308 | I personally use it at one or two albums at time and keep checking for any errors in ``log.txt``. 309 | 310 | Integration tests 311 | ================= 312 | Run them:: 313 | 314 | $ python3 -m unittest discover 315 | 316 | Run a single test:: 317 | 318 | $ python3 -m unittest tests/lyrico_sources/test_musix_match.py -k test_download 319 | 320 | 321 | Changelog 322 | ========== 323 | - 0.7.0 2024-05 324 | 325 | - python3 compatibility 326 | - store configuration in correct folder depending on operating system 327 | - remove LYRICSnMUSIC (service shutdown) 328 | - remove LyricsWikia (service shutdown) 329 | - fixes for Musixmatch 330 | - fixes for AZLyrics 331 | - 0.6.0 2016-08 332 | 333 | - Added support for ``oga`` audio format. 334 | - Detect uppercase extensions in Linux. 335 | - 0.5.0 2016-02 336 | 337 | - Added musiXmatch and LYRICSMODE to sources. 338 | - Include detection for licensing errors. 339 | - 0.4.0 Added LYRICSnMUSIC and AZLyrics as sources. Expanded the command line interface to control sources. Added `requests <https://pypi.python.org/pypi/requests>`_ to dependencies. 340 | - 0.3.0 Added support for ``ogg`` and ``wma`` audio formats. Replaced ``UNSYNCED LYRICS`` with ``LYRICS`` tags to embed lyrics in Vorbis Comments. 341 | - 0.2.0 Added documentation and tutorial. 342 | - 0.1.0 Initial release. 343 | -------------------------------------------------------------------------------- /lyrico/docopt.py: -------------------------------------------------------------------------------- 1 | """Pythonic command-line interface parser that will make you smile. 2 | 3 | * http://docopt.org 4 | * Repository and issue-tracker: https://github.com/docopt/docopt 5 | * Licensed under terms of MIT license (see LICENSE-MIT) 6 | * Copyright (c) 2013 Vladimir Keleshev, vladimir@keleshev.com 7 | 8 | """ 9 | import sys 10 | import re 11 | 12 | 13 | __all__ = ['docopt'] 14 | __version__ = '0.6.1' 15 | 16 | 17 | class DocoptLanguageError(Exception): 18 | 19 | """Error in construction of usage-message by developer.""" 20 | 21 | 22 | class DocoptExit(SystemExit): 23 | 24 | """Exit in case user invoked program with incorrect arguments.""" 25 | 26 | usage = '' 27 | 28 | def __init__(self, message=''): 29 | SystemExit.__init__(self, (message + '\n' + self.usage).strip()) 30 | 31 | 32 | class Pattern(object): 33 | 34 | def __eq__(self, other): 35 | return repr(self) == repr(other) 36 | 37 | def __hash__(self): 38 | return hash(repr(self)) 39 | 40 | def fix(self): 41 | self.fix_identities() 42 | self.fix_repeating_arguments() 43 | return self 44 | 45 | def fix_identities(self, uniq=None): 46 | """Make pattern-tree tips point to same object if they are equal.""" 47 | if not hasattr(self, 'children'): 48 | return self 49 | uniq = list(set(self.flat())) if uniq is None else uniq 50 | for i, child in enumerate(self.children): 51 | if not hasattr(child, 'children'): 52 | assert child in uniq 53 | self.children[i] = uniq[uniq.index(child)] 54 | else: 55 | child.fix_identities(uniq) 56 | 57 | def fix_repeating_arguments(self): 58 | """Fix elements that should accumulate/increment values.""" 59 | either = [list(child.children) for child in transform(self).children] 60 | for case in either: 61 | for e in [child for child in case if case.count(child) > 1]: 62 | if type(e) is Argument or type(e) is Option and e.argcount: 63 | if e.value is None: 64 | e.value = [] 65 | elif type(e.value) is not list: 66 | e.value = e.value.split() 67 | if type(e) is Command or type(e) is Option and e.argcount == 0: 68 | e.value = 0 69 | return self 70 | 71 | 72 | def transform(pattern): 73 | """Expand pattern into an (almost) equivalent one, but with single Either. 74 | 75 | Example: ((-a | -b) (-c | -d)) => (-a -c | -a -d | -b -c | -b -d) 76 | Quirks: [-a] => (-a), (-a...) => (-a -a) 77 | 78 | """ 79 | result = [] 80 | groups = [[pattern]] 81 | while groups: 82 | children = groups.pop(0) 83 | parents = [Required, Optional, OptionsShortcut, Either, OneOrMore] 84 | if any(t in map(type, children) for t in parents): 85 | child = [c for c in children if type(c) in parents][0] 86 | children.remove(child) 87 | if type(child) is Either: 88 | for c in child.children: 89 | groups.append([c] + children) 90 | elif type(child) is OneOrMore: 91 | groups.append(child.children * 2 + children) 92 | else: 93 | groups.append(child.children + children) 94 | else: 95 | result.append(children) 96 | return Either(*[Required(*e) for e in result]) 97 | 98 | 99 | class LeafPattern(Pattern): 100 | 101 | """Leaf/terminal node of a pattern tree.""" 102 | 103 | def __init__(self, name, value=None): 104 | self.name, self.value = name, value 105 | 106 | def __repr__(self): 107 | return '%s(%r, %r)' % (self.__class__.__name__, self.name, self.value) 108 | 109 | def flat(self, *types): 110 | return [self] if not types or type(self) in types else [] 111 | 112 | def match(self, left, collected=None): 113 | collected = [] if collected is None else collected 114 | pos, match = self.single_match(left) 115 | if match is None: 116 | return False, left, collected 117 | left_ = left[:pos] + left[pos + 1:] 118 | same_name = [a for a in collected if a.name == self.name] 119 | if type(self.value) in (int, list): 120 | if type(self.value) is int: 121 | increment = 1 122 | else: 123 | increment = ([match.value] if type(match.value) is str 124 | else match.value) 125 | if not same_name: 126 | match.value = increment 127 | return True, left_, collected + [match] 128 | same_name[0].value += increment 129 | return True, left_, collected 130 | return True, left_, collected + [match] 131 | 132 | 133 | class BranchPattern(Pattern): 134 | 135 | """Branch/inner node of a pattern tree.""" 136 | 137 | def __init__(self, *children): 138 | self.children = list(children) 139 | 140 | def __repr__(self): 141 | return '%s(%s)' % (self.__class__.__name__, 142 | ', '.join(repr(a) for a in self.children)) 143 | 144 | def flat(self, *types): 145 | if type(self) in types: 146 | return [self] 147 | return sum([child.flat(*types) for child in self.children], []) 148 | 149 | 150 | class Argument(LeafPattern): 151 | 152 | def single_match(self, left): 153 | for n, pattern in enumerate(left): 154 | if type(pattern) is Argument: 155 | return n, Argument(self.name, pattern.value) 156 | return None, None 157 | 158 | @classmethod 159 | def parse(class_, source): 160 | name = re.findall('(<\S*?>)', source)[0] 161 | value = re.findall('\[default: (.*)\]', source, flags=re.I) 162 | return class_(name, value[0] if value else None) 163 | 164 | 165 | class Command(Argument): 166 | 167 | def __init__(self, name, value=False): 168 | self.name, self.value = name, value 169 | 170 | def single_match(self, left): 171 | for n, pattern in enumerate(left): 172 | if type(pattern) is Argument: 173 | if pattern.value == self.name: 174 | return n, Command(self.name, True) 175 | else: 176 | break 177 | return None, None 178 | 179 | 180 | class Option(LeafPattern): 181 | 182 | def __init__(self, short=None, long=None, argcount=0, value=False): 183 | assert argcount in (0, 1) 184 | self.short, self.long, self.argcount = short, long, argcount 185 | self.value = None if value is False and argcount else value 186 | 187 | @classmethod 188 | def parse(class_, option_description): 189 | short, long, argcount, value = None, None, 0, False 190 | options, _, description = option_description.strip().partition(' ') 191 | options = options.replace(',', ' ').replace('=', ' ') 192 | for s in options.split(): 193 | if s.startswith('--'): 194 | long = s 195 | elif s.startswith('-'): 196 | short = s 197 | else: 198 | argcount = 1 199 | if argcount: 200 | matched = re.findall('\[default: (.*)\]', description, flags=re.I) 201 | value = matched[0] if matched else None 202 | return class_(short, long, argcount, value) 203 | 204 | def single_match(self, left): 205 | for n, pattern in enumerate(left): 206 | if self.name == pattern.name: 207 | return n, pattern 208 | return None, None 209 | 210 | @property 211 | def name(self): 212 | return self.long or self.short 213 | 214 | def __repr__(self): 215 | return 'Option(%r, %r, %r, %r)' % (self.short, self.long, 216 | self.argcount, self.value) 217 | 218 | 219 | class Required(BranchPattern): 220 | 221 | def match(self, left, collected=None): 222 | collected = [] if collected is None else collected 223 | l = left 224 | c = collected 225 | for pattern in self.children: 226 | matched, l, c = pattern.match(l, c) 227 | if not matched: 228 | return False, left, collected 229 | return True, l, c 230 | 231 | 232 | class Optional(BranchPattern): 233 | 234 | def match(self, left, collected=None): 235 | collected = [] if collected is None else collected 236 | for pattern in self.children: 237 | m, left, collected = pattern.match(left, collected) 238 | return True, left, collected 239 | 240 | 241 | class OptionsShortcut(Optional): 242 | 243 | """Marker/placeholder for [options] shortcut.""" 244 | 245 | 246 | class OneOrMore(BranchPattern): 247 | 248 | def match(self, left, collected=None): 249 | assert len(self.children) == 1 250 | collected = [] if collected is None else collected 251 | l = left 252 | c = collected 253 | l_ = None 254 | matched = True 255 | times = 0 256 | while matched: 257 | # could it be that something didn't match but changed l or c? 258 | matched, l, c = self.children[0].match(l, c) 259 | times += 1 if matched else 0 260 | if l_ == l: 261 | break 262 | l_ = l 263 | if times >= 1: 264 | return True, l, c 265 | return False, left, collected 266 | 267 | 268 | class Either(BranchPattern): 269 | 270 | def match(self, left, collected=None): 271 | collected = [] if collected is None else collected 272 | outcomes = [] 273 | for pattern in self.children: 274 | matched, _, _ = outcome = pattern.match(left, collected) 275 | if matched: 276 | outcomes.append(outcome) 277 | if outcomes: 278 | return min(outcomes, key=lambda outcome: len(outcome[1])) 279 | return False, left, collected 280 | 281 | 282 | class Tokens(list): 283 | 284 | def __init__(self, source, error=DocoptExit): 285 | self += source.split() if hasattr(source, 'split') else source 286 | self.error = error 287 | 288 | @staticmethod 289 | def from_pattern(source): 290 | source = re.sub(r'([\[\]\(\)\|]|\.\.\.)', r' \1 ', source) 291 | source = [s for s in re.split('\s+|(\S*<.*?>)', source) if s] 292 | return Tokens(source, error=DocoptLanguageError) 293 | 294 | def move(self): 295 | return self.pop(0) if len(self) else None 296 | 297 | def current(self): 298 | return self[0] if len(self) else None 299 | 300 | 301 | def parse_long(tokens, options): 302 | """long ::= '--' chars [ ( ' ' | '=' ) chars ] ;""" 303 | long, eq, value = tokens.move().partition('=') 304 | assert long.startswith('--') 305 | value = None if eq == value == '' else value 306 | similar = [o for o in options if o.long == long] 307 | if tokens.error is DocoptExit and similar == []: # if no exact match 308 | similar = [o for o in options if o.long and o.long.startswith(long)] 309 | if len(similar) > 1: # might be simply specified ambiguously 2+ times? 310 | raise tokens.error('%s is not a unique prefix: %s?' % 311 | (long, ', '.join(o.long for o in similar))) 312 | elif len(similar) < 1: 313 | argcount = 1 if eq == '=' else 0 314 | o = Option(None, long, argcount) 315 | options.append(o) 316 | if tokens.error is DocoptExit: 317 | o = Option(None, long, argcount, value if argcount else True) 318 | else: 319 | o = Option(similar[0].short, similar[0].long, 320 | similar[0].argcount, similar[0].value) 321 | if o.argcount == 0: 322 | if value is not None: 323 | raise tokens.error('%s must not have an argument' % o.long) 324 | else: 325 | if value is None: 326 | if tokens.current() in [None, '--']: 327 | raise tokens.error('%s requires argument' % o.long) 328 | value = tokens.move() 329 | if tokens.error is DocoptExit: 330 | o.value = value if value is not None else True 331 | return [o] 332 | 333 | 334 | def parse_shorts(tokens, options): 335 | """shorts ::= '-' ( chars )* [ [ ' ' ] chars ] ;""" 336 | token = tokens.move() 337 | assert token.startswith('-') and not token.startswith('--') 338 | left = token.lstrip('-') 339 | parsed = [] 340 | while left != '': 341 | short, left = '-' + left[0], left[1:] 342 | similar = [o for o in options if o.short == short] 343 | if len(similar) > 1: 344 | raise tokens.error('%s is specified ambiguously %d times' % 345 | (short, len(similar))) 346 | elif len(similar) < 1: 347 | o = Option(short, None, 0) 348 | options.append(o) 349 | if tokens.error is DocoptExit: 350 | o = Option(short, None, 0, True) 351 | else: # why copying is necessary here? 352 | o = Option(short, similar[0].long, 353 | similar[0].argcount, similar[0].value) 354 | value = None 355 | if o.argcount != 0: 356 | if left == '': 357 | if tokens.current() in [None, '--']: 358 | raise tokens.error('%s requires argument' % short) 359 | value = tokens.move() 360 | else: 361 | value = left 362 | left = '' 363 | if tokens.error is DocoptExit: 364 | o.value = value if value is not None else True 365 | parsed.append(o) 366 | return parsed 367 | 368 | 369 | def parse_pattern(source, options): 370 | tokens = Tokens.from_pattern(source) 371 | result = parse_expr(tokens, options) 372 | if tokens.current() is not None: 373 | raise tokens.error('unexpected ending: %r' % ' '.join(tokens)) 374 | return Required(*result) 375 | 376 | 377 | def parse_expr(tokens, options): 378 | """expr ::= seq ( '|' seq )* ;""" 379 | seq = parse_seq(tokens, options) 380 | if tokens.current() != '|': 381 | return seq 382 | result = [Required(*seq)] if len(seq) > 1 else seq 383 | while tokens.current() == '|': 384 | tokens.move() 385 | seq = parse_seq(tokens, options) 386 | result += [Required(*seq)] if len(seq) > 1 else seq 387 | return [Either(*result)] if len(result) > 1 else result 388 | 389 | 390 | def parse_seq(tokens, options): 391 | """seq ::= ( atom [ '...' ] )* ;""" 392 | result = [] 393 | while tokens.current() not in [None, ']', ')', '|']: 394 | atom = parse_atom(tokens, options) 395 | if tokens.current() == '...': 396 | atom = [OneOrMore(*atom)] 397 | tokens.move() 398 | result += atom 399 | return result 400 | 401 | 402 | def parse_atom(tokens, options): 403 | """atom ::= '(' expr ')' | '[' expr ']' | 'options' 404 | | long | shorts | argument | command ; 405 | """ 406 | token = tokens.current() 407 | result = [] 408 | if token in '([': 409 | tokens.move() 410 | matching, pattern = {'(': [')', Required], '[': [']', Optional]}[token] 411 | result = pattern(*parse_expr(tokens, options)) 412 | if tokens.move() != matching: 413 | raise tokens.error("unmatched '%s'" % token) 414 | return [result] 415 | elif token == 'options': 416 | tokens.move() 417 | return [OptionsShortcut()] 418 | elif token.startswith('--') and token != '--': 419 | return parse_long(tokens, options) 420 | elif token.startswith('-') and token not in ('-', '--'): 421 | return parse_shorts(tokens, options) 422 | elif token.startswith('<') and token.endswith('>') or token.isupper(): 423 | return [Argument(tokens.move())] 424 | else: 425 | return [Command(tokens.move())] 426 | 427 | 428 | def parse_argv(tokens, options, options_first=False): 429 | """Parse command-line argument vector. 430 | 431 | If options_first: 432 | argv ::= [ long | shorts ]* [ argument ]* [ '--' [ argument ]* ] ; 433 | else: 434 | argv ::= [ long | shorts | argument ]* [ '--' [ argument ]* ] ; 435 | 436 | """ 437 | parsed = [] 438 | while tokens.current() is not None: 439 | if tokens.current() == '--': 440 | return parsed + [Argument(None, v) for v in tokens] 441 | elif tokens.current().startswith('--'): 442 | parsed += parse_long(tokens, options) 443 | elif tokens.current().startswith('-') and tokens.current() != '-': 444 | parsed += parse_shorts(tokens, options) 445 | elif options_first: 446 | return parsed + [Argument(None, v) for v in tokens] 447 | else: 448 | parsed.append(Argument(None, tokens.move())) 449 | return parsed 450 | 451 | 452 | def parse_defaults(doc): 453 | defaults = [] 454 | for s in parse_section('options:', doc): 455 | # FIXME corner case "bla: options: --foo" 456 | _, _, s = s.partition(':') # get rid of "options:" 457 | split = re.split('\n[ \t]*(-\S+?)', '\n' + s)[1:] 458 | split = [s1 + s2 for s1, s2 in zip(split[::2], split[1::2])] 459 | options = [Option.parse(s) for s in split if s.startswith('-')] 460 | defaults += options 461 | return defaults 462 | 463 | 464 | def parse_section(name, source): 465 | pattern = re.compile('^([^\n]*' + name + '[^\n]*\n?(?:[ \t].*?(?:\n|$))*)', 466 | re.IGNORECASE | re.MULTILINE) 467 | return [s.strip() for s in pattern.findall(source)] 468 | 469 | 470 | def formal_usage(section): 471 | _, _, section = section.partition(':') # drop "usage:" 472 | pu = section.split() 473 | return '( ' + ' '.join(') | (' if s == pu[0] else s for s in pu[1:]) + ' )' 474 | 475 | 476 | def extras(help, version, options, doc): 477 | if help and any((o.name in ('-h', '--help')) and o.value for o in options): 478 | print(doc.strip("\n")) 479 | sys.exit() 480 | if version and any(o.name == '--version' and o.value for o in options): 481 | print(version) 482 | sys.exit() 483 | 484 | 485 | class Dict(dict): 486 | def __repr__(self): 487 | return '{%s}' % ',\n '.join('%r: %r' % i for i in sorted(self.items())) 488 | 489 | 490 | def docopt(doc, argv=None, help=True, version=None, options_first=False): 491 | """Parse `argv` based on command-line interface described in `doc`. 492 | 493 | `docopt` creates your command-line interface based on its 494 | description that you pass as `doc`. Such description can contain 495 | --options, <positional-argument>, commands, which could be 496 | [optional], (required), (mutually | exclusive) or repeated... 497 | 498 | Parameters 499 | ---------- 500 | doc : str 501 | Description of your command-line interface. 502 | argv : list of str, optional 503 | Argument vector to be parsed. sys.argv[1:] is used if not 504 | provided. 505 | help : bool (default: True) 506 | Set to False to disable automatic help on -h or --help 507 | options. 508 | version : any object 509 | If passed, the object will be printed if --version is in 510 | `argv`. 511 | options_first : bool (default: False) 512 | Set to True to require options precede positional arguments, 513 | i.e. to forbid options and positional arguments intermix. 514 | 515 | Returns 516 | ------- 517 | args : dict 518 | A dictionary, where keys are names of command-line elements 519 | such as e.g. "--verbose" and "<path>", and values are the 520 | parsed values of those elements. 521 | 522 | Example 523 | ------- 524 | >>> from docopt import docopt 525 | >>> doc = ''' 526 | ... Usage: 527 | ... my_program tcp <host> <port> [--timeout=<seconds>] 528 | ... my_program serial <port> [--baud=<n>] [--timeout=<seconds>] 529 | ... my_program (-h | --help | --version) 530 | ... 531 | ... Options: 532 | ... -h, --help Show this screen and exit. 533 | ... --baud=<n> Baudrate [default: 9600] 534 | ... ''' 535 | >>> argv = ['tcp', '127.0.0.1', '80', '--timeout', '30'] 536 | >>> docopt(doc, argv) 537 | {'--baud': '9600', 538 | '--help': False, 539 | '--timeout': '30', 540 | '--version': False, 541 | '<host>': '127.0.0.1', 542 | '<port>': '80', 543 | 'serial': False, 544 | 'tcp': True} 545 | 546 | See also 547 | -------- 548 | * For video introduction see http://docopt.org 549 | * Full documentation is available in README.rst as well as online 550 | at https://github.com/docopt/docopt#readme 551 | 552 | """ 553 | argv = sys.argv[1:] if argv is None else argv 554 | 555 | usage_sections = parse_section('usage:', doc) 556 | if len(usage_sections) == 0: 557 | raise DocoptLanguageError('"usage:" (case-insensitive) not found.') 558 | if len(usage_sections) > 1: 559 | raise DocoptLanguageError('More than one "usage:" (case-insensitive).') 560 | DocoptExit.usage = usage_sections[0] 561 | 562 | options = parse_defaults(doc) 563 | pattern = parse_pattern(formal_usage(DocoptExit.usage), options) 564 | # [default] syntax for argument is disabled 565 | #for a in pattern.flat(Argument): 566 | # same_name = [d for d in arguments if d.name == a.name] 567 | # if same_name: 568 | # a.value = same_name[0].value 569 | argv = parse_argv(Tokens(argv), list(options), options_first) 570 | pattern_options = set(pattern.flat(Option)) 571 | for options_shortcut in pattern.flat(OptionsShortcut): 572 | doc_options = parse_defaults(doc) 573 | options_shortcut.children = list(set(doc_options) - pattern_options) 574 | #if any_options: 575 | # options_shortcut.children += [Option(o.short, o.long, o.argcount) 576 | # for o in argv if type(o) is Option] 577 | extras(help, version, argv, doc) 578 | matched, left, collected = pattern.fix().match(argv) 579 | if matched and left == []: # better error message if left? 580 | return Dict((a.name, a.value) for a in (pattern.flat() + collected)) 581 | raise DocoptExit() 582 | --------------------------------------------------------------------------------