├── lyrico
    ├── __init__.py
    ├── lyrico_sources
    │   ├── __init__.py
    │   ├── build_requests.py
    │   ├── lyrics_helper.py
    │   ├── lyricsmode.py
    │   ├── musix_match.py
    │   └── az_lyrics.py
    ├── __main__.py
    ├── config.ini
    ├── audio_format_keys.py
    ├── helper.py
    ├── lyrico.py
    ├── song_helper.py
    ├── config.py
    ├── song.py
    └── docopt.py
├── tests
    ├── __init__.py
    ├── lyrico_sources
    │   ├── __init__.py
    │   ├── test_az_lyrics.py
    │   ├── test_lyricsmode.py
    │   └── test_musix_match.py
    └── dummy.py
├── .gitattributes
├── setup.cfg
├── LICENSE
├── MANIFEST.in
├── .gitignore
├── requirements.txt
├── .editorconfig
├── lyrico-runner.py
├── setup.py
└── README.rst


/lyrico/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text eol=lf


--------------------------------------------------------------------------------
/lyrico/lyrico_sources/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/lyrico_sources/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This project is in the public domain. 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include lyrico/lyrico_sources/*.py
3 | include lyrico/*.ini
4 | include requirements.txt


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # general things to ignore
 2 | build/
 3 | dist/
 4 | *.egg-info/
 5 | *.egg
 6 | *.eggs/
 7 | *.py[cod]
 8 | __pycache__/
 9 | *.so
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | glob2
2 | requests>=2.9.1
3 | mutagen>=1.31
4 | beautifulsoup4>=4.4.1
5 | win-unicode-console>=0.4; sys_platform == 'win32'
6 | appdirs>=1.4.3
7 | 


--------------------------------------------------------------------------------
/lyrico/__main__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | 
4 | """lyrico.__main__: executed when lyrico directory is called as script."""
5 | 
6 | 
7 | from .lyrico import main
8 | main()
9 | 


--------------------------------------------------------------------------------
/tests/dummy.py:
--------------------------------------------------------------------------------
1 | class DummySong:
2 | 	def __init__(self, artist, title):
3 | 		self.artist = artist
4 | 		self.title = title
5 | 		self.album = None
6 | 		self.lyrics = None
7 | 		self.error = None
8 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org/
 2 | root = true
 3 | 
 4 | [*]
 5 | end_of_line = lf
 6 | insert_final_newline = true
 7 | 
 8 | [*.py]
 9 | charset = utf-8
10 | indent_style = tab
11 | tab_width = 4
12 | 


--------------------------------------------------------------------------------
/lyrico/config.ini:
--------------------------------------------------------------------------------
 1 | [actions]
 2 | save_to_file = True
 3 | save_to_tag = False
 4 | overwrite = False
 5 | 
 6 | [paths]
 7 | source_dir = None
 8 | lyrics_dir = None
 9 | 
10 | [sources]
11 | musix_match = True
12 | lyricsmode = True
13 | az_lyrics = False
14 | 


--------------------------------------------------------------------------------
/lyrico-runner.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | 
 5 | """Convenience wrapper for running lyrico directly from source tree."""
 6 | 
 7 | import sys
 8 | from lyrico.lyrico import main
 9 | 
10 | 
11 | if __name__ == '__main__':
12 |     main()
13 | 


--------------------------------------------------------------------------------
/tests/lyrico_sources/test_az_lyrics.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from tests.dummy import DummySong
 3 | from lyrico.lyrico_sources.az_lyrics import download_from_az_lyrics
 4 | 
 5 | class TestAzLyrics(unittest.TestCase):
 6 | 
 7 | 	def test_download_from_az_lyrics(self):
 8 | 		song = DummySong(u'Azure Ray', u'Don\'t Make A Sound')
 9 | 		download_from_az_lyrics(song)
10 | 		self.assertIsNone(song.error)
11 | 		self.assertIsNotNone(song.lyrics)
12 | 		self.assertEqual(song.lyrics[0:21], 'You could go anywhere')
13 | 		self.assertEqual(song.lyrics[-21:], '\nAnd not make a sound')
14 | 


--------------------------------------------------------------------------------
/tests/lyrico_sources/test_lyricsmode.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from tests.dummy import DummySong
 3 | from lyrico.lyrico_sources.lyricsmode import download_from_lyricsmode
 4 | 
 5 | class TestLyricsmode(unittest.TestCase):
 6 | 
 7 | 	def test_download_from_lyricsmode(self):
 8 | 		song = DummySong(u'Azure Ray', u'4th of july')
 9 | 		download_from_lyricsmode(song)
10 | 		self.assertIsNone(song.error)
11 | 		self.assertIsNotNone(song.lyrics)
12 | 		self.assertEqual(song.lyrics[0:24], 'We met on that wednesday')
13 | 		self.assertEqual(song.lyrics[-31:], 'I know this love will never die')
14 | 


--------------------------------------------------------------------------------
/lyrico/audio_format_keys.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | 	This module only holds the keys used to extract data from
 4 | 	mutagen's tag objects for the supported audio formats.
 5 | """
 6 | 
 7 | from __future__ import print_function
 8 | from __future__ import unicode_literals
 9 | 
10 | 
11 | VORBIS_COMMENTS_KEYS = {
12 | 	'artist': 'artist',
13 | 	'title': 'title',
14 | 	'album':'album',
15 | 	'lyrics':'LYRICS'
16 | }
17 | 
18 | MP4_KEYS = {
19 | 	'artist': '\xa9ART',
20 | 	'title': '\xa9nam',
21 | 	'album':'\xa9alb',
22 | 	'lyrics':'\xa9lyr'
23 | }
24 | 
25 | 
26 | 
27 | FORMAT_KEYS = {
28 | 
29 | 	#ID3 TAGS
30 | 	'mp3': {
31 | 		'artist': 'TPE1',
32 | 		'title': 'TIT2',
33 | 		'album':'TALB',
34 | 		'lyrics':'USLT'
35 | 	},
36 | 
37 | 	'mp4' : MP4_KEYS,
38 | 	'm4a' : MP4_KEYS,
39 | 
40 | 	'flac': VORBIS_COMMENTS_KEYS,
41 | 	'ogg' : VORBIS_COMMENTS_KEYS,
42 | 	'oga' : VORBIS_COMMENTS_KEYS,
43 | 	'opus' : VORBIS_COMMENTS_KEYS,
44 | 
45 | 	'wma' : {
46 | 		'artist': 'Author',
47 | 		'title': 'Title',
48 | 		'album':'WM/AlbumTitle',
49 | 		'lyrics':'WM/Lyrics'
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/lyrico/lyrico_sources/build_requests.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from __future__ import print_function
 3 | from __future__ import unicode_literals
 4 | 
 5 | import copy
 6 | import random
 7 | 
 8 | 
 9 | user_agents = [
10 | 	'Mozilla/5.0 (X11; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0',
11 | 	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
12 | ]
13 | 
14 | request_headers = {
15 | 	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
16 | 	'Accept-Encoding': 'gzip, deflate',
17 | 	'Accept-Language': 'en-GB,en-US;q=0.8,en;q=0.6',
18 | 	'DNT': '1',
19 | }
20 | 
21 | # randint inculdes both upper and lower bounds
22 | 
23 | def get_lyrico_headers(site_name=None):
24 | 
25 | 	# Since each module requesting from different souce uses the same
26 | 	# request headers for a lyrico operation, make deep copies of base headers
27 | 	# before giving it to modules.
28 | 
29 | 	headers_copy = copy.deepcopy(request_headers)
30 | 	headers_copy['User-Agent'] = user_agents[random.randint(0, (len(user_agents) - 1))]
31 | 	return headers_copy
32 | 
33 | def test_req_dic():
34 | 	print(request_headers)
35 | 


--------------------------------------------------------------------------------
/tests/lyrico_sources/test_musix_match.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from tests.dummy import DummySong
 3 | from lyrico.lyrico_sources.musix_match import download_from_musix_match
 4 | 
 5 | class TestMusixMatch(unittest.TestCase):
 6 | 
 7 | 	def test_download_from_musix_match(self):
 8 | 		song = DummySong('Sarah Connor', 'Unendlich')
 9 | 		download_from_musix_match(song)
10 | 		self.assertIsNone(song.error)
11 | 		self.assertIsNotNone(song.lyrics)
12 | 		self.assertEqual(song.lyrics[0:21], 'Immer wenn ich tiefer')
13 | 		self.assertEqual(song.lyrics[-12:], ', unendlich\n')
14 | 
15 | 	def test_download_from_musix_match_single_quote_end_of_word(self):
16 | 		song = DummySong('Ronan Keating', "Lovin' Each Day")
17 | 		download_from_musix_match(song)
18 | 		self.assertIsNone(song.error)
19 | 		self.assertIsNotNone(song.lyrics)
20 | 		self.assertEqual(song.lyrics[0:15], 'Ah c′mon, yeah\n')
21 | 		self.assertEqual(song.lyrics[-26:], 'Oh, baby, I need you here\n')
22 | 
23 | 	def test_download_from_musix_match_eminem_unauthorized(self):
24 | 		song = DummySong('Eminem', 'The Real Slim Shady')
25 | 		download_from_musix_match(song)
26 | 		self.assertEqual(song.error, 'Musixmatch may not show the lyrics')
27 | 


--------------------------------------------------------------------------------
/lyrico/lyrico_sources/lyrics_helper.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | from __future__ import unicode_literals
 6 | 
 7 | import unicodedata
 8 | 
 9 | 
10 | def remove_accents(input_str):
11 | 	
12 | 	"""
13 | 		Convert accented into non-accented characters
14 | 		http://stackoverflow.com/a/517974/2426469
15 | 	"""
16 | 
17 | 	nfkd_form = unicodedata.normalize('NFKD', input_str)
18 | 	return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
19 | 
20 | def test_lyrics(lyrics):
21 | 	
22 | 	"""
23 | 		Test lyrics downloaded to detect license restrinction string:
24 | 		'We are not in a position to display these lyrics due to licensing restrictions.
25 | 		Sorry for the inconvinience.'
26 | 
27 | 		Also test lyrics by looking for multiple new line characters.
28 | 
29 | 		Returns booleans accordingly
30 | 	"""
31 | 
32 | 	if not lyrics:
33 | 		return False
34 | 	
35 | 	license_str1 = 'We are not in a position to display these lyrics due to licensing restrictions. Sorry for the inconvinience.'
36 | 	license_str2 = 'display these lyrics due to licensing restrictions'
37 | 	license_str3 = 'We are not in a position to display these lyrics due to licensing restrictions.\nSorry for the inconvinience.'
38 | 
39 | 	# If either of license string is found in lyrics downloaded or it has less than 4 new line characters
40 | 	if (license_str1 in lyrics or license_str2 in lyrics or license_str3 in lyrics or
41 | 		lyrics.count('\n') < 4):
42 | 		return False
43 | 		
44 | 	return True
45 | 


--------------------------------------------------------------------------------
/lyrico/helper.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | 	Contains general helper functions and Error classes.
 5 | """
 6 | 
 7 | from __future__ import print_function
 8 | from __future__ import unicode_literals
 9 | 
10 | import sys
11 | import re
12 | import os
13 | from appdirs import *
14 | 
15 | 
16 | class BadConfigError(Exception):
17 | 	def __init__(self, errno, value):
18 | 		self.value = value
19 | 		self.errno = errno
20 | 
21 | 	def __str__(self):
22 | 		return repr(self.value)
23 | 
24 | 
25 | def get_config_path():
26 | 
27 | 	"""
28 | 		Gets the absolute path of dir containing script running the function.
29 | 		Uses that to get the path of config file, since it is located in same dir.
30 | 		If config file is missing, a new one is created.
31 | 	"""
32 | 	config_path = user_config_dir("lyrico") + ".ini"
33 | 	if not os.path.isfile(config_path):
34 | 		write_default_config(config_path)
35 | 
36 | 	return config_path
37 | 
38 | def sanitize_data(s):
39 | 	"""Removes excess white-space from strings"""
40 | 
41 | 	# If string only empty spaces return None
42 | 	if not s or s.isspace():
43 | 		return None
44 | 
45 | 	# remove any white-space from beginning or end of the string
46 | 	s = s.strip()
47 | 
48 | 	# remove double white-spaces or tabs if any
49 | 	s = re.sub(r'\s+', ' ', s)
50 | 
51 | 	return s
52 | 
53 | def write_default_config(config_path):
54 | 	# Import ConfigParser
55 | 	try:
56 | 		# >3.2
57 | 		from configparser import ConfigParser
58 | 	except ImportError:
59 | 		# python27
60 | 		# Refer to the older SafeConfigParser as ConfigParser
61 | 		from ConfigParser import SafeConfigParser as ConfigParser
62 | 
63 | 	# Load lyrico.ini
64 | 	config = ConfigParser()
65 | 
66 | 	# Force all settings to intended defaults
67 | 	config.add_section('actions')
68 | 	config.set('actions', 'save_to_file', 'True')
69 | 	config.set('actions', 'save_to_tag', 'False')
70 | 	config.set('actions', 'overwrite', 'False')
71 | 
72 | 	config.add_section('paths')
73 | 	config.set('paths', 'source_dir', 'None')
74 | 	config.set('paths', 'lyrics_dir', 'None')
75 | 
76 | 	config.add_section('sources')
77 | 	config.set('sources', 'lyrics_n_music', 'True')
78 | 	config.set('sources', 'musix_match', 'True')
79 | 	config.set('sources', 'lyricsmode', 'True')
80 | 	config.set('sources', 'az_lyrics', 'False')
81 | 
82 | 	# save to config.ini
83 | 	with open(config_path, 'w') as configfile:
84 | 		config.write(configfile)
85 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | """setup.py: setuptools control. Always build with Python3"""
 5 | 
 6 | 
 7 | import sys
 8 | import re
 9 | from setuptools import setup, find_packages
10 | from subprocess import call
11 | from setuptools.command.install import install
12 | 
13 | 
14 | version = re.search(
15 |     '^__version__\s*=\s*"(.*)"',
16 |     open('lyrico/lyrico.py').read(),
17 |     re.M
18 |     ).group(1)
19 | 
20 | 
21 | # http://rst.ninjs.org/?n=86de1f4d5843b454098745d4a6026376&theme=basic
22 | with open("README.rst", "rb") as f:
23 |     long_descr = f.read().decode("utf-8")
24 |     
25 | 
26 | # Install dependencies from requirements.txt (install_requires is not working)
27 | # With requirements.txt, win-unicode-console will only be installed for Windows users.
28 | class MyInstall(install):
29 |     def run(self):
30 |         # Call subprocess to run te 'pip' command.
31 |         # Only works, when user installs from sdist
32 |         call(['pip', 'install', '-r', 'requirements.txt'])
33 | 
34 |         # Run 'install' to install lyrico
35 |         install.run(self)
36 | 
37 | 
38 | setup(
39 |     name = "lyrico",
40 |     packages = ["lyrico"],
41 |     entry_points = {
42 |         "console_scripts": ['lyrico = lyrico.lyrico:main']
43 |         },
44 | 
45 |     cmdclass={'install': MyInstall},
46 | 
47 |     version = version,
48 |     description = "A simple command-line lyrics downloader.",
49 |     long_description = long_descr,
50 |     keywords='lyrics audio foobar2000 tags mp3',
51 |     classifiers=[
52 |         'Development Status :: 3 - Alpha',
53 | 
54 |         'Intended Audience :: End Users/Desktop',
55 |         'Topic :: Multimedia :: Sound/Audio',
56 | 
57 |         'License :: OSI Approved :: MIT License',
58 |         'Natural Language :: English',
59 | 
60 |         'Programming Language :: Python :: 2.7',
61 |         'Programming Language :: Python :: 3.4',
62 |         'Programming Language :: Python :: 3.5',
63 | 
64 |         'Operating System :: Microsoft',
65 |         'Operating System :: Unix',
66 |     ],
67 | 
68 |     author = "Abhimanyu Pathania",
69 |     author_email = "abpindia1944@gmail.com",
70 |     url = "https://github.com/abhimanyuPathania/lyrico",
71 |     license='MIT',
72 | 
73 |     include_package_data = True,
74 |     package_data = {
75 |         # If any package contains *.ini files, include them:
76 |         '': ['*.ini'],
77 |     },
78 | 
79 |     )
80 | 


--------------------------------------------------------------------------------
/lyrico/lyrico_sources/lyricsmode.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | 
  4 | """
  5 | 	This module downloads lyrics from LYRICSMODE. The URL format is:
  6 | 
  7 | 		http://www.lyricsmode.com/lyrics/<first char of artist>/<artist>/<title>.html
  8 | 
  9 | 	LYRICSMODE only uses non-alphanumeric ascii in it its urls. It replaces spaces with
 10 | 	underscores. It removes every non-alphanumeric except '-' from artist names.
 11 | 	
 12 | 	LYRICSMODE also replaces some accented characters in artist with non-accented.
 13 | 	Uses correction mapping for known exception to artist names.
 14 | """
 15 | 
 16 | from __future__ import print_function
 17 | from __future__ import unicode_literals
 18 | 
 19 | import re
 20 | import string
 21 | import requests
 22 | 
 23 | try:
 24 | 	from string  import ascii_lowercase as LOWERCASE_CHARS
 25 | except ImportError:
 26 | 	# Python27
 27 | 	from string  import lowercase as LOWERCASE_CHARS
 28 | 
 29 | from requests import ConnectionError, HTTPError, Timeout
 30 | from bs4 import BeautifulSoup
 31 | 
 32 | from .build_requests import get_lyrico_headers
 33 | from .lyrics_helper import remove_accents, test_lyrics
 34 | 
 35 | 
 36 | # Defining 'request_headers' outside download function makes a single profile
 37 | # per lyrico operation and not a new profile per each download in an operation.
 38 | request_headers = get_lyrico_headers()
 39 | 
 40 | # This correction mapping only is valid for top approx 3000 artists which LYRICSMODE
 41 | # displays as lists.
 42 | LYRICSMODE_CORRECTION = {
 43 | 	'the_all_american_rejects': 'all_american_rejects',
 44 | 	'acdc':'ac_dc',
 45 | 	'die_arzte': 'die_rzte',
 46 | 	'gilbert_becaud': 'gilbert_bcaud',
 47 | 	'yo': 'y'
 48 | }
 49 | 
 50 | def download_from_lyricsmode(song=None):
 51 | 	
 52 | 	"""
 53 | 		Takes reference to the song object as input and
 54 | 		adds lyrics to self.lyrics or add error string to self.error
 55 | 		property of the song object. 
 56 | 	"""
 57 | 
 58 | 
 59 | 	# temp var to hold value for final checking
 60 | 	lyrics = None
 61 | 
 62 | 	# Match everything accept lowercase alphabets, numbers, spaces and dashes
 63 | 	regex_non_alphanumeric = re.compile(r'[^a-z0-9\s\-]+')
 64 | 
 65 | 	# Replace accented characters by non-accented before parsing regex
 66 | 	artist = regex_non_alphanumeric.sub('', remove_accents(song.artist).lower())
 67 | 	title = regex_non_alphanumeric.sub('', song.title.lower())
 68 | 
 69 | 	# Match multiple spaces or dashes and replace them by underscores 
 70 | 	regex_underscores = re.compile(r'[\s|\-]+')
 71 | 	artist = regex_underscores.sub('_', artist)
 72 | 	title = regex_underscores.sub('_', title)
 73 | 
 74 | 	# Check for corrections
 75 | 	if artist in LYRICSMODE_CORRECTION:
 76 | 		artist = LYRICSMODE_CORRECTION[artist]
 77 | 
 78 | 	# If the first char of artist is not a alphabet, use '0-9'
 79 | 	first_artist_char = artist[0]
 80 | 	if first_artist_char not in LOWERCASE_CHARS:
 81 | 		first_artist_char = '0-9'
 82 | 
 83 | 	lyricsmode_url = 'http://www.lyricsmode.com/lyrics/%s/%s/%s.html' % (first_artist_char, artist, title)
 84 | 	try:
 85 | 		print('\tTrying LYRICSMODE:', lyricsmode_url)
 86 | 
 87 | 		res = requests.get(lyricsmode_url, headers = request_headers)
 88 | 		res.raise_for_status()
 89 | 		
 90 | 	# Catch network errors
 91 | 	except (ConnectionError, Timeout):
 92 | 		song.error = 'No network connectivity.'
 93 | 	except HTTPError as e:
 94 | 		song.error = 'Lyrics not found. Check artist or title name.'
 95 | 	
 96 | 	# No exceptions raised and the HTML for lyrics page was fetched		
 97 | 	else:
 98 | 		soup = BeautifulSoup(res.text, 'html.parser')
 99 | 
100 | 		# For lyricsmode, the lyrics are present in a div with id 'lyrics_text'
101 | 		lyrics_text = soup.find(id='lyrics_text')
102 | 		for tag in lyrics_text.find_all('div'):
103 | 			tag.clear()
104 | 
105 | 		lyrics = lyrics_text.get_text().strip() if lyrics_text else None
106 | 
107 | 	# Final check
108 | 	if test_lyrics(lyrics):
109 | 		song.lyrics = lyrics
110 | 		song.source = 'LrMOD'
111 | 		song.error = None
112 | 	else:
113 | 		# Don't overwrite and previous errors
114 | 		if not song.error:
115 | 			song.error = 'Lyrics not found. Check artist or title name.'
116 | 


--------------------------------------------------------------------------------
/lyrico/lyrico_sources/musix_match.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | 
  4 | """
  5 | 	This module downloads lyrics from musixmatch. The URL structure is:
  6 | 
  7 | 	https://www.musixmatch.com/lyrics/<artist>/<title>
  8 | 
  9 | 	musixmatch uses dashes, '-', for spaces and removes every other non-alphanumeric characters.
 10 | 	It also replaces apostrophes with dashes. So "Don't" becomes "Don-t". There are few
 11 | 	exceptions but the server seems to be a bit flexible with URLs.
 12 | """
 13 | 
 14 | from __future__ import print_function
 15 | from __future__ import unicode_literals
 16 | 
 17 | import json
 18 | import re
 19 | import sys
 20 | import requests
 21 | 
 22 | try:
 23 | 	from urllib.parse  import quote
 24 | except ImportError:
 25 | 	# Python27
 26 | 	from urllib import quote
 27 | 
 28 | from requests import ConnectionError, HTTPError, Timeout
 29 | from bs4 import BeautifulSoup
 30 | 
 31 | from .build_requests import get_lyrico_headers
 32 | from .lyrics_helper import test_lyrics
 33 | 
 34 | # Defining 'request_headers' outside download function makes a single profile
 35 | # per lyrico operation and not a new profile per each download in an operation.
 36 | request_headers = get_lyrico_headers()
 37 | 
 38 | 
 39 | def download_from_musix_match(song):
 40 | 
 41 | 	"""
 42 | 		Takes reference to the song object as input and
 43 | 		adds lyrics to self.lyrics or add error string to self.error
 44 | 		property of the song object.
 45 | 	"""
 46 | 
 47 | 
 48 | 	# temp var to hold value for final checking
 49 | 	lyrics = None
 50 | 
 51 | 	# Replace upper(apostrophe) commas with dashes '-'
 52 | 	artist = song.artist.replace("'", '-')
 53 | 	title = song.title.replace("'", '-')
 54 | 
 55 | 	# some special characters found in songs
 56 | 	title = title.replace('‐', '-')
 57 | 	title = title.replace('’', '-')
 58 | 
 59 | 	# This regex mathches anything other than Alphanumeric, spaces and dashes
 60 | 	# and removes them.
 61 | 	# Make regex unicode aware 're.UNICODE' for Python27. It is redundant for Python3.
 62 | 	regex_non_alphanum = re.compile(r'[^\w\s\-]*', re.UNICODE)
 63 | 	artist = regex_non_alphanum.sub('', artist)
 64 | 	title = regex_non_alphanum.sub('', title)
 65 | 
 66 | 	# Replace spaces with dashes to imporve URL logging.
 67 | 	regex_spaces = re.compile(r'[\s]+', re.UNICODE)
 68 | 	artist = regex_spaces.sub('-', artist)
 69 | 	title = regex_spaces.sub('-', title)
 70 | 
 71 | 	# See lyric_wikia module for comments on manual encoding
 72 | 	if sys.version_info[0] < 3:
 73 | 		artist = artist.encode('utf-8')
 74 | 		title = title.encode('utf-8')
 75 | 
 76 | 	mxm_url = 'https://www.musixmatch.com/lyrics/%s/%s' % (quote(artist), quote(title))
 77 | 	mxm_url = mxm_url.replace('--', '-')
 78 | 
 79 | 	try:
 80 | 		print('\tTrying musixmatch:', mxm_url)
 81 | 
 82 | 		res = requests.get(mxm_url, headers = request_headers)
 83 | 		res.raise_for_status()
 84 | 
 85 | 	# Catch network errors
 86 | 	except (ConnectionError, Timeout) as e:
 87 | 		song.error = 'No network connectivity.'
 88 | 	except HTTPError as e:
 89 | 		print(e)
 90 | 		song.error = 'Lyrics not found. Check artist or title name.'
 91 | 
 92 | 	# No exceptions raised and the HTML for lyrics page was fetched
 93 | 	else:
 94 | 		soup = BeautifulSoup(res.text, 'html.parser')
 95 | 
 96 | 		lyric_text = ""
 97 | 		lyric_jsons = soup.find_all(type='application/json')
 98 | 		for jsonTag in lyric_jsons:
 99 | 			lyric_json = json.loads(jsonTag.get_text())
100 | 			lyric_type = lyric_json.get('props', {}).get('pageProps', {}).get('data', {}).get('trackInfo', {}).get('data', {}).get('type', {}).strip()
101 | 			if lyric_type == 'restricted':
102 | 				song.error = 'Musixmatch may not show the lyrics'
103 | 				continue
104 | 
105 | 			lyric_text += lyric_json.get('props', {}).get('pageProps', {}).get('data', {}).get('trackInfo', {}).get('data', {}).get('lyrics', {}).get('body', '').strip() + "\n"
106 | 
107 | 		lyrics = lyric_text if lyric_text else None
108 | 
109 | 	# Final check
110 | 	if test_lyrics(lyrics):
111 | 		song.lyrics = lyrics
112 | 		song.source = 'mXm'
113 | 		song.error = None
114 | 	else:
115 | 		# Don't overwrite and previous errors
116 | 		if not song.error:
117 | 			song.error = 'Lyrics not found. Check artist or title name.'
118 | 


--------------------------------------------------------------------------------
/lyrico/lyrico.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """lyrico
  4 | 
  5 | Usage:
  6 |   lyrico [<source_dir>]
  7 |   lyrico (enable | disable) (<lyrico_action>)
  8 |   lyrico set (<dir_type>) (<full_path_to_dir>)
  9 |   lyrico (-h | --help)
 10 |   lyrico --version
 11 |   lyrico --settings
 12 | 
 13 | Options:
 14 |   -h --help     Show this screen.
 15 |   --version     Show version.
 16 |   --settings    Show current settings.
 17 | """
 18 | 
 19 | from __future__ import print_function
 20 | from __future__ import unicode_literals
 21 | 
 22 | import platform
 23 | 
 24 | from .docopt import docopt
 25 | 
 26 | from .song import Song
 27 | from .song_helper import get_song_list
 28 | from .config import Config
 29 | 
 30 | # testpypi 0.7.0
 31 | __version__ = "0.7.0"
 32 | 
 33 | 
 34 | def main():
 35 | 
 36 | 	# Fix console for windows users
 37 | 	if platform.system() == 'Windows':
 38 | 		import win_unicode_console
 39 | 		win_unicode_console.enable()
 40 | 
 41 | 	args = docopt(__doc__, version = ('lyrico ' + __version__))
 42 | 
 43 | 	Config.load_config()
 44 | 
 45 | 	if args['--settings']:
 46 | 		# show current settings
 47 | 		Config.show_settings()
 48 | 		return
 49 | 
 50 | 	if args['set']:
 51 | 		# setting 'lyrics_dir' or 'source_dir'
 52 | 
 53 | 		# This general try catch block is intended for os.makedirs call if
 54 | 		# it raises OSError which is not due to directory already existing or
 55 | 		# some other error than OSError
 56 | 		try:
 57 | 			Config.set_dir(args['<dir_type>'], args['<full_path_to_dir>'])
 58 | 			Config.save()
 59 | 		except Exception as e:
 60 | 			print(e)
 61 | 		return
 62 | 
 63 | 	if args['enable'] or args['disable']:
 64 | 		# setting 'save_to_file', 'save_to_tag' or 'overwrite'.
 65 | 		# detect wether user wants to enable or disable a lyrico action
 66 | 		update_type = 'enable' if args['enable'] else 'disable'
 67 | 		Config.update_lyrico_actions(args['<lyrico_action>'], update_type)
 68 | 		Config.save()
 69 | 		return
 70 | 
 71 | 	# User wants to download lyrics.
 72 | 
 73 | 	if args['<source_dir>']:
 74 | 		# if lyrico <source_dir> invocation is used:
 75 | 		# update user's "source_dir" in config
 76 | 		# update Config class' 'source_dir' class variable
 77 | 
 78 | 		# This general try catch block is intended for os.makedirs call if
 79 | 		# it raises OSError which is not due to directory already existing or
 80 | 		# some other error than OSError
 81 | 		try:
 82 | 			set_dir_success = Config.set_dir('source_dir', args['<source_dir>'])
 83 | 		except Exception as e:
 84 | 			print(e)
 85 | 			# Don't go ahead with excution since user gave bad path or might have
 86 | 			# correct system settings?
 87 | 			return
 88 | 
 89 | 		# For this usage if user provides non existing dir, return by using boolean
 90 | 		# return value of Config.set_dir
 91 | 		if not set_dir_success:
 92 | 			return
 93 | 
 94 | 	#settings changes are done, we need a valid config now
 95 | 	if not Config.check():
 96 | 		return
 97 | 
 98 | 	song_list = [Song(song_path) for song_path in get_song_list(Config.source_dir)]
 99 | 	print(len(song_list), 'songs detected.')
100 | 	print('Metadata extracted for', (str(Song.valid_metadata_count) + '/' + str(len(song_list))), 'songs.')
101 | 	for song in song_list:
102 | 		# Only download lyrics if 'title' and 'artist' is present
103 | 		# Error str is already present in song.error
104 | 		if song.artist and song.title:
105 | 			song.download_lyrics()
106 | 
107 | 		# Show immidiate log in console
108 | 		else:
109 | 			# If title was present, use that
110 | 			if song.title:
111 | 				print(song.title, 'was ignored.', song.error)
112 | 			# else use audio file path
113 | 			else:
114 | 				print(song.path, 'was ignored.', song.error)
115 | 
116 | 
117 | 	print('\nBuilding log...')
118 | 	Song.log_results(song_list)
119 | 	print(
120 | 		'{songs} songs, {tagged} tagged, {files} lyric files, {existing} existing, {errors} errors'.format(
121 | 			songs = len(song_list),
122 | 			tagged = Song.lyrics_saved_to_tag_count,
123 | 			files = Song.lyrics_saved_to_file_count,
124 | 			existing = Song.lyrics_existing_count,
125 | 			errors = Song.lyrics_errors_count
126 | 		)
127 | 	)
128 | 	print('FINISHED')
129 | 
130 | 	# Disable windows unicode console anyways
131 | 	if platform.system() == 'Windows':
132 | 		win_unicode_console.disable()
133 | 


--------------------------------------------------------------------------------
/lyrico/lyrico_sources/az_lyrics.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | 
  4 | """
  5 | 	This module downloads lyrics from AZLyrics by scraping them off its HTML pages.
  6 | 	The url structure used is:
  7 | 
  8 | 		http://www.azlyrics.com/lyrics/<artist>/<title>.html
  9 | 
 10 | 	AZLyrics only allows lowercase alphanumeric(no '_') URLs.
 11 | 
 12 | 	This source is least accurate since BeautifulSoup is not able to parse the HTML pages
 13 | 	correctly and the module depends on regular expressions.
 14 | 
 15 | 	AZLyrics also hates 'The' in artists name for some reason and removes it. Yet there are some
 16 | 	exceptions to this rule. 'lyrico' uses the AZLyrics_CORRECTION mapping for this.
 17 | 
 18 | """
 19 | 
 20 | 
 21 | from __future__ import print_function
 22 | from __future__ import unicode_literals
 23 | 
 24 | import re
 25 | import sys
 26 | import requests
 27 | 
 28 | from requests import ConnectionError, HTTPError, Timeout
 29 | from bs4 import BeautifulSoup
 30 | 
 31 | from .build_requests import get_lyrico_headers
 32 | from .lyrics_helper import test_lyrics
 33 | 
 34 | 
 35 | # Defining 'request_headers' outside download function makes a single profile
 36 | # per lyrico operation and not a new profile per each download in an operation.
 37 | request_headers = get_lyrico_headers()
 38 | 
 39 | # Holds corerction for Artist names
 40 | # key(artist name built from our song metadata): value(corresponding value used by AZLyrics)
 41 | AZLyrics_CORRECTION = {
 42 | 	'the': 'thethe'
 43 | }
 44 | 
 45 | def download_from_az_lyrics(song):
 46 | 
 47 | 	"""
 48 | 		Takes reference to the song object as input and
 49 | 		adds lyrics to self.lyrics or add error string to self.error
 50 | 		property of the song object.
 51 | 	"""
 52 | 
 53 | 
 54 | 	# temp var to hold value for final checking
 55 | 	lyrics = None
 56 | 
 57 | 	# Assume this won't work. Be a realist.
 58 | 	error = 'Lyrics not found. Check artist or title name.'
 59 | 
 60 | 	artist = song.artist
 61 | 	title = song.title
 62 | 
 63 | 	# This looks for 'The' followed by a 'space' which is followed by any non-space(\s) char.
 64 | 	# Caret(^) forces to find it only from beginning
 65 | 	# If true then remove the 'The' from the artist name.
 66 | 	regex_the = re.compile(r'^The[ ]{1}\S', re.IGNORECASE)
 67 | 	match_the = re.search(regex_the, artist)
 68 | 	if match_the:
 69 | 		# Remove 'The '
 70 | 	    artist = artist[4:]
 71 | 
 72 | 
 73 | 	# Convert artist and title to lower case and strip off any
 74 | 	# non-alphanumeric characters and '_'. '\W' Equivalent to set [^a-zA-Z0-9_]
 75 | 	# Make regex Unicode UNAWARE
 76 | 	if sys.version_info[0] < 3:
 77 | 		# Python27
 78 | 		# By default ignores Unicode.
 79 | 	    regex_url = re.compile('[\W_]+')
 80 | 	else:
 81 | 		# Use re.ASCII flag to extract ASCII characters only
 82 | 	    regex_url = re.compile('[\W_]+', re.ASCII)
 83 | 
 84 | 	artist = regex_url.sub('', artist.lower())
 85 | 	title = regex_url.sub('', title.lower())
 86 | 
 87 | 	# Check if correction for artist is present in lyrico
 88 | 	if artist in AZLyrics_CORRECTION:
 89 | 		artist = AZLyrics_CORRECTION[artist]
 90 | 
 91 | 	azlyrics_url = 'http://www.azlyrics.com/lyrics/%s/%s.html' % (artist, title)
 92 | 	try:
 93 | 		print('\tTrying AZLyrics:', azlyrics_url)
 94 | 
 95 | 		res = requests.get(azlyrics_url, headers = request_headers)
 96 | 		res.raise_for_status()
 97 | 		# 'requests' was guessing the encoding from azlyrics as ISO-8859-1.
 98 | 		# AZLyrics sends 'UTF-8' in its meta tag
 99 | 
100 | 		# Force request to use 'UTF-8'. This is used when 'res.text' is read to get 'soup'
101 | 		res.encoding = 'utf-8'
102 | 
103 | 	# Catch network errors
104 | 	except (ConnectionError, Timeout) as e:
105 | 		print(e)
106 | 		error = 'No network connectivity.'
107 | 	except HTTPError:
108 | 		# Already carrying error string
109 | 		pass
110 | 
111 | 	# No exceptions raised and the HTML for lyrics was downloaded
112 | 	else:
113 | 		soup = BeautifulSoup(res.text, 'html.parser')
114 | 		lyric_tag = soup.find('div', class_=None, id=None)
115 | 		lyrics = lyric_tag.get_text().strip()
116 | 
117 | 	# Final check
118 | 	if test_lyrics(lyrics):
119 | 		song.lyrics = lyrics
120 | 		song.error = None
121 | 		song.source = 'AZLr'
122 | 	else:
123 | 		song.error = error
124 | 
125 | 
126 | def check_siblings(sib, title, regex):
127 | 
128 | 	"""
129 | 		This function checks the conditions under which buggy parsing seems
130 | 		to work for AZLyrics' HTML. Function only returns true if parsing
131 | 		conditions are same as when tested during development.
132 | 
133 | 		'sib' is list of 'lyricsh' div's siblings.
134 | 		'title' and 'regex' are the one used to build AZLyrics' URL.
135 | 
136 | 	"""
137 | 
138 | 	# The siblings list of 'lyricsh' should contain following structure:
139 | 		# i : 'name' 'class'
140 | 
141 | 		# 0 : div ['ringtone']
142 | 		# 1 : b None
143 | 		# 2 : br None
144 | 		# 3 : div ['col-lg-2', 'text-center', 'hidden-md', 'hidden-sm', 'hidden-xs', 'noprint']
145 | 
146 | 		# The third member should be the buggy 'br' tag which contains the lyrics
147 | 
148 | 	if not sib:
149 | 		return False
150 | 
151 | 	# Check if silbling has atleast 4 members which exist.
152 | 	if not (len(sib) >= 4 and sib[1] and sib[2] and sib[3] and
153 | 		sib[2].name == 'br'):
154 | 		return False
155 | 
156 | 	# Extract the class list of sib[3] or the <div> to which BeautifulSoup jumps
157 | 	# due to buggy <br> tag
158 | 	jump_div_class_list = sib[3].attrs.get('class')
159 | 	if not jump_div_class_list:
160 | 		return False
161 | 
162 | 	# Check for required keywords in the class list
163 | 	jump_div_class = ' '.join(jump_div_class_list)
164 | 	if not('noprint' in jump_div_class and 'hidden' in jump_div_class and
165 | 		'col-lg-2' in jump_div_class):
166 | 		return False
167 | 
168 | 	# sib[1] is a <b> tag which contains the title of the song.
169 | 	# Test it with the one used to build URL using the same regex
170 | 	title_extracted = sib[1].get_text()
171 | 	if title_extracted:
172 | 		title_extracted = regex.sub('', title_extracted.lower())
173 | 
174 | 	if title_extracted != title:
175 | 		return False
176 | 
177 | 	# If all conditions are met return true to extract lyrics out of <br tag>
178 | 	return True
179 | 


--------------------------------------------------------------------------------
/lyrico/song_helper.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | 	Contains helper functions specific to instantiate Song class.
  5 | """
  6 | 
  7 | from __future__ import print_function
  8 | from __future__ import unicode_literals
  9 | 
 10 | import sys
 11 | import os
 12 | import glob2
 13 | import platform
 14 | 
 15 | try:
 16 | 	from urllib.parse  import quote
 17 | except ImportError:
 18 | 	# Python27
 19 | 	from urllib import quote
 20 | 
 21 | from mutagen.id3 import ID3
 22 | from mutagen.mp4 import MP4
 23 | from mutagen.flac import FLAC
 24 | from mutagen.oggopus import OggOpus
 25 | from mutagen.oggvorbis import OggVorbis
 26 | from mutagen.oggflac import OggFLAC
 27 | from mutagen.asf import ASF
 28 | from mutagen import MutagenError
 29 | 
 30 | from .config import Config
 31 | from .helper import sanitize_data
 32 | from .audio_format_keys import FORMAT_KEYS
 33 | 
 34 | 
 35 | def get_key(tag, key, format):
 36 | 	# data stores the result of key lookup from the dictionary like object
 37 | 	# returned by mutagen. The results of key lookups are lists or None when it does not exist.
 38 | 	data = None
 39 | 
 40 | 	# result is the final value returned by get_key function.
 41 | 	result = None
 42 | 
 43 | 	if not tag:
 44 | 		return result
 45 | 
 46 | 	# extra keys to read from FLAC and ogg formats
 47 | 	lyrics_keys = ['LYRICS', 'UNSYNCEDLYRICS', 'UNSYNCED LYRICS', 'SYNCED LYRICS']
 48 | 
 49 | 	if format == 'mp3':
 50 | 		## 'get' for mp3 tags is not fetching lyrics(None). Using getall instead.
 51 | 		data = tag.getall(key)
 52 | 		if not len(data):
 53 | 			return result
 54 | 
 55 | 		# for USLT(lyrics frame) only return lyrics if exist
 56 | 		if key == 'USLT':
 57 | 			result = data[0].text if len(data[0].text) else None
 58 | 		else:
 59 | 			# for TPE1, TIT2, TALB frames, the text field is a list itself
 60 | 			# so we look one list deeper
 61 | 			result = data[0].text[0]
 62 | 
 63 | 	elif format == 'wma':
 64 | 		# For ASF Frames key lookups are lists containing ASFUnicodeAttribute type
 65 | 		# type objects instead of Unicode objects
 66 | 		data = tag.get(key)
 67 | 
 68 | 		# Safely extract the Unicode 'value' from ASFUnicodeAttribute object
 69 | 		result = tag.get(key)[0].value if data else None
 70 | 	else:
 71 | 		# mp4, m4a, flac, ogg
 72 | 
 73 | 		# For all these formats, the data object is a simple dictionary
 74 | 		# with keys mapping to lists.
 75 | 
 76 | 		if format == 'm4a' or format == 'mp4':
 77 | 
 78 | 			# For python27 encoding key(which is a unicode object due to futures import)
 79 | 			# to 'latin-1' fixes the fetch from dictionary
 80 | 
 81 | 			# mp4 standard uses latin-1 encoding for these tag names.
 82 | 			# \xa9 is copyright symbol in that encoding.
 83 | 			if sys.version_info[0] < 3:
 84 | 				key = key.encode('latin-1')
 85 | 
 86 | 			# Python3 is able to handle it internally due to implicit encoding(?)
 87 | 			data = tag.get(key)
 88 | 
 89 | 		if format == 'flac' or format == 'ogg' or format == 'oga' or format == 'opus':
 90 | 
 91 | 			if key == FORMAT_KEYS[format]['lyrics']:
 92 | 
 93 | 				# separately treat lookup of lyrics in these formats
 94 | 
 95 | 				# Loop through different keys to look for lyrics.
 96 | 
 97 | 				# 'LYRICS' will be used as standard for 'lyrico' for Vorbis Comments
 98 | 				# This includes .flac, .ogg(Vorbis and FLAC) files
 99 | 				for lr_key in lyrics_keys:
100 | 					# also try lowercases
101 | 					data = tag.get(lr_key) or tag.get(lr_key.lower())
102 | 
103 | 					# if we find lyrics, stop looping
104 | 					if data:
105 | 						break
106 | 			else:
107 | 				# Normal lookup for other properties
108 | 				data = tag.get(key)
109 | 
110 | 		# till here the data ( for mp4, m4a, flac, ogg) will be a list
111 | 		# containing the value or None. Safely lookup in list
112 | 		result = data[0] if data else None
113 | 
114 | 	# return sanitized value of result
115 | 	return sanitize_data(result)
116 | 
117 | 
118 | def extract_ogg_tag(path):
119 | 
120 | 	"""
121 | 		Read tags out of .ogg files encoded with different codecs
122 | 		Returns a tuple (tag, error)
123 | 	"""
124 | 	ogg_tag = None
125 | 	error = None
126 | 
127 | 	# Encapsulate all try except blocks in if statements.
128 | 	# Only read for tag if it already does not exist.
129 | 
130 | 	if not ogg_tag:
131 | 		try:
132 | 			# Try to read ogg-Vorbis files
133 | 			ogg_tag = OggVorbis(path)
134 | 
135 | 		except Exception:
136 | 			# move to next codec type
137 | 			pass
138 | 
139 | 	if not ogg_tag:
140 | 		try:
141 | 			# Try to read ogg-FLAC files
142 | 			ogg_tag = OggFLAC(path)
143 | 
144 | 		except Exception:
145 | 			# move to next codec type
146 | 			pass
147 | 
148 | 	if not ogg_tag:
149 | 		# log error for user to see
150 | 		error = 'Unable to read metadata from the .ogg/.oga file. Only Vorbis and FLAC are supported.'
151 | 
152 | 	return (ogg_tag, error)
153 | 
154 | def get_song_data(path):
155 | 
156 | 	"""
157 | 		Extracts song artist, album, title and lyrics if present
158 | 		from audio file.
159 | 
160 | 		This is method is called by constructor of Song class which uses
161 | 		the dict returned to instantiate song objects.
162 | 
163 | 		'path' is the absolute path to the audio file.
164 | 	"""
165 | 	data = {}
166 | 
167 | 	tag = None
168 | 	artist = None
169 | 	title = None
170 | 	album = None
171 | 	lyrics = None
172 | 	song_format = None
173 | 
174 | 	lyrics_file_name = None
175 | 	lyrics_file_path = None
176 | 
177 | 	lyrics_file_present = False
178 | 	lyrics_tag_present = False
179 | 
180 | 	error = None
181 | 
182 | 	# format will the part of string after last '.' character
183 | 	# only use lowercase for formats
184 | 	song_format = path[ path.rfind('.') + 1 : ].lower()
185 | 
186 | 
187 | 	try:
188 | 		if song_format == 'mp3':
189 | 			tag = ID3(path)
190 | 		if song_format == 'mp4' or song_format == 'm4a':
191 | 			tag = MP4(path)
192 | 		if song_format == 'flac':
193 | 			tag = FLAC(path)
194 | 		if song_format == 'opus':
195 | 			tag = OggOpus(path)
196 | 		if song_format == 'wma':
197 | 			tag = ASF(path)
198 | 		if song_format == 'ogg' or song_format == 'oga':
199 | 			tag, error = extract_ogg_tag(path)
200 | 	except IOError:
201 | 		error = 'Unable to locate the file. Could have been moved during operation.'
202 | 	except MutagenError:
203 | 		error = 'Unable to read metadata. Unsupported codec or tag does not exist.'
204 | 	except Exception as e:
205 | 		error = str(e)
206 | 		print(e)
207 | 	else:
208 | 		# This only runs if reading tags creates no exceptions
209 | 		artist = get_key(tag, FORMAT_KEYS[song_format]['artist'], song_format)
210 | 		title = get_key(tag, FORMAT_KEYS[song_format]['title'], song_format)
211 | 		album = get_key(tag, FORMAT_KEYS[song_format]['album'], song_format)
212 | 		lyrics = get_key(tag, FORMAT_KEYS[song_format]['lyrics'], song_format)
213 | 
214 | 	# build URL, filename and filepath
215 | 	# If tag is not read or either of artist name or title is not preset
216 | 	# those properties of the Song object would be intialized to None
217 | 	if artist and title:
218 | 		lyrics_file_name = '%s - %s.txt' % (artist, title)
219 | 		lyrics_file_path = os.path.join(Config.lyrics_dir, lyrics_file_name)
220 | 	else:
221 | 		# Only log the following error if the tags have been read correctly but
222 | 		# artist or title was simply not present in the tag.
223 | 		# Else the pre-existing error due to reading of tags should be logged
224 | 		if not error:
225 | 			error = 'Artist name or song title not found.'
226 | 
227 | 
228 | 	# check if lyrics file already exists in LYRICS_DIR
229 | 	if lyrics_file_path in Config.lyric_files_in_dir:
230 | 		lyrics_file_present = True
231 | 
232 | 	# check if lyrics already embedded in tag
233 | 	if lyrics:
234 | 		lyrics_tag_present = True
235 | 
236 | 	# build dict
237 | 	data['tag'] = tag
238 | 	data['artist'] = artist
239 | 	data['title'] = title
240 | 	data['album'] = album
241 | 	data['format'] = song_format
242 | 
243 | 	data['lyrics_file_name'] = lyrics_file_name
244 | 	data['lyrics_file_path'] = lyrics_file_path
245 | 
246 | 	data['lyrics_file_present'] = lyrics_file_present
247 | 	data['lyrics_tag_present'] = lyrics_tag_present
248 | 
249 | 	data['error'] = error
250 | 
251 | 	return data
252 | 
253 | def get_song_list(path):
254 | 
255 | 	""" Return list of paths to all valid audio files in dir located at path.
256 | 		Valid audio formats are imported from settings module.
257 | 		Also checks for any inner directories."""
258 | 
259 | 	song_list = []
260 | 
261 | 	path = path
262 | 
263 | 	for ext in Config.audio_formats:
264 | 		pattern = '**/*.' + ext
265 | 		pattern_uppercase = '**/*.' + ext.upper()
266 | 
267 | 		song_list.extend(glob2.glob(os.path.join(path, pattern)))
268 | 
269 | 		# Windows is case-insensitive towards extensions. So the glob2 module detects
270 | 		# ex. .ogg and .OGG as well. But in Linux the extensions are case-sensitive.
271 | 
272 | 		# Add detection for uppercase extensions
273 | 		if platform.system() == 'Linux':
274 | 			song_list.extend(glob2.glob(os.path.join(path, pattern_uppercase)))
275 | 
276 | 	return song_list
277 | 


--------------------------------------------------------------------------------
/lyrico/config.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | 
  4 | from __future__ import print_function
  5 | from __future__ import unicode_literals
  6 | 
  7 | import os
  8 | import glob2
  9 | 
 10 | 
 11 | try:
 12 | 	# Import the base class for all configparser errors as BaseConfigParserError
 13 | 	# >3.2
 14 | 	from configparser import ConfigParser
 15 | 	from configparser import Error as BaseConfigParserError
 16 | except ImportError:
 17 | 	# python27
 18 | 	# Refer to the older SafeConfigParser as ConfigParser
 19 | 	from ConfigParser import SafeConfigParser as ConfigParser
 20 | 	from ConfigParser import Error as BaseConfigParserError
 21 | 
 22 | from .helper import get_config_path
 23 | from .helper import BadConfigError
 24 | 
 25 | # Maintian a dict of lyrico actions to check target on update_lyrico_actions()
 26 | # Also save the corresponding section in
 27 | 
 28 | LYRICO_ACTIONS = {
 29 | 	'save_to_file': 'actions',
 30 | 	'save_to_tag': 'actions',
 31 | 	'overwrite': 'actions',
 32 | 
 33 | 	'musix_match': 'sources',
 34 | 	'lyricsmode' : 'sources',
 35 | 	'az_lyrics': 'sources',
 36 | }
 37 | 
 38 | # Used to print commandline logging for enable/disable sources
 39 | SOURCE_STR_MAP = {
 40 | 	'musix_match': 'musiXmatch',
 41 | 	'lyricsmode': 'LYRICSMODE',
 42 | 	'az_lyrics': 'AZLyrics',
 43 | }
 44 | 
 45 | class Config():
 46 | 
 47 | 	"""
 48 | 		Class wrapper build around user settings loaded from
 49 | 		config.ini
 50 | 
 51 | 		All setting are stored are class variables and all methods are
 52 | 		static methods.
 53 | 
 54 | 		A Config object is never instantiated, only the class is imported
 55 | 		into other modules to access class variables and methods.
 56 | 
 57 | 	"""
 58 | 
 59 | 	# Audio formats supported are not loaded from config.ini
 60 | 
 61 | 	# This list is used by the 'glob2' module to scan 'source_dir' for audio files.
 62 | 	audio_formats = ['mp3', 'flac', 'm4a', 'mp4', 'ogg', 'oga', 'opus', 'wma']
 63 | 
 64 | 	lyrics_dir = None
 65 | 	source_dir = None
 66 | 
 67 | 	save_to_file = True
 68 | 	save_to_tag = False
 69 | 
 70 | 	overwrite = False
 71 | 	lyric_files_in_dir = None
 72 | 
 73 | 	@staticmethod
 74 | 	def check():
 75 | 		"""
 76 | 		Check if the configuration is valid
 77 | 		"""
 78 | 		# This forces user to set dirs before running the app for first time.
 79 | 		if len(Config.lyrics_dir) == 0:
 80 | 			# see which directory in not set and raise BadConfigError with that as value
 81 | 			print('lyrics_dir is not set.')
 82 | 			print('Please use the "set" command to set lyrics_dir.')
 83 | 			print('use "lyrico --help" to view commands.')
 84 | 			return False
 85 | 
 86 | 		if len(Config.source_dir) == 0:
 87 | 			# see which directory in not set and raise BadConfigError with that as value
 88 | 			print('source_dir is not set.')
 89 | 			print('Please use the "set" command to set source_dir or pass it as parameter.')
 90 | 			print('use "lyrico --help" to view commands.')
 91 | 			return False
 92 | 
 93 | 		# if user disable both saving mode. Notify & force user to correct on next run.
 94 | 		if not Config.save_to_file and not Config.save_to_tag:
 95 | 			print('Both "save_to_file" and "save_to_tag" modes are disabled. Please enable one.')
 96 | 			print('use "lyrico --help" to view commands.')
 97 | 			return False
 98 | 
 99 | 		# if user disables all sources. Notify & force user to enable one.
100 | 		if (not Config.az_lyrics
101 | 		    and not Config.musix_match
102 | 		    and not Config.lyricsmode):
103 | 			print('All lyrics sources are disabled. Please enable one.')
104 | 			print('use "lyrico --help" to view commands.')
105 | 			return False
106 | 		return True
107 | 
108 | 	@staticmethod
109 | 	def load_config():
110 | 		"""
111 | 		Called only once by main to read user settings from config.ini
112 | 		and save them to the class variables.
113 | 		"""
114 | 		try:
115 | 			conf = ConfigParser()
116 | 
117 | 			config_path = get_config_path()
118 | 			conf.read(config_path)
119 | 
120 | 			# save references to conf, and config_path in class variables
121 | 			Config.config_path = config_path
122 | 			Config.conf = conf
123 | 
124 | 			Config.source_dir = conf.get('paths', 'source_dir')
125 | 			Config.lyrics_dir = conf.get('paths', 'lyrics_dir')
126 | 
127 | 			Config.save_to_file = conf.getboolean('actions', 'save_to_file')
128 | 			Config.save_to_tag = conf.getboolean('actions', 'save_to_tag')
129 | 
130 | 			Config.overwrite = conf.getboolean('actions', 'overwrite')
131 | 
132 | 			# Load all the sources
133 | 			Config.musix_match = conf.getboolean('sources', 'musix_match')
134 | 			Config.lyricsmode = conf.getboolean('sources', 'lyricsmode')
135 | 			Config.az_lyrics = conf.getboolean('sources', 'az_lyrics')
136 | 
137 | 			# Loading this with user config, we need to call the load_config only once at start.
138 | 			Config.lyric_files_in_dir = glob2.glob(os.path.join(Config.lyrics_dir, '**/*.txt'))
139 | 
140 | 
141 | 		# Catch file handling errors
142 | 		except IOError as e:
143 | 			print('Unable to load config.')
144 | 			print(e)
145 | 
146 | 	@staticmethod
147 | 	def save():
148 | 		"""
149 | 		Save configuration file contents
150 | 		"""
151 | 		try:
152 | 			#paths
153 | 			Config.conf.set('paths', 'source_dir', Config.source_dir)
154 | 			Config.conf.set('paths', 'lyrics_dir', Config.lyrics_dir)
155 | 
156 | 			#actions
157 | 			Config.setBool('actions', 'save_to_file', Config.save_to_file)
158 | 			Config.setBool('actions', 'save_to_tag', Config.save_to_tag)
159 | 
160 | 			#sources
161 | 			Config.setBool('sources', 'musix_match', Config.musix_match)
162 | 			Config.setBool('sources', 'lyricsmode', Config.lyricsmode)
163 | 			Config.setBool('sources', 'az_lyrics', Config.az_lyrics)
164 | 
165 | 			with open(Config.config_path, 'w') as configfile:
166 | 				Config.conf.write(configfile)
167 | 			return True
168 | 
169 | 		# Catch all config parser errors
170 | 		except BaseConfigParserError as e:
171 | 			print('Unable to save settings to config.')
172 | 			print(e)
173 | 			return False
174 | 
175 | 		# Catch file handling errors
176 | 		except IOError as e:
177 | 			print('Unable to save settings to config.')
178 | 			print(e)
179 | 			return False
180 | 
181 | 	@staticmethod
182 | 	def setBool(section, option, value):
183 | 		svalue = 'True' if value == True else 'False'
184 | 		Config.conf.set(section, option, svalue)
185 | 
186 | 
187 | 	@staticmethod
188 | 	def set_dir(dir_type, path):
189 | 
190 | 		"""
191 | 			Takes an absolute path as saves it as 'source_dir' or 'lyrics_dir'
192 | 			in config.ini.
193 | 			path is user input from the cmdline.
194 | 		"""
195 | 
196 | 		if dir_type != 'source_dir' and dir_type != 'lyrics_dir':
197 | 			print('Invalid "dir_type". Only "source_dir" or "lyrics_dir" are valid types.')
198 | 			print('You gave "dir_type":', dir_type)
199 | 			print('use "lyrico --help" to view commands.')
200 | 			return False
201 | 
202 | 		# If user is setting "source_dir", return if the path provided does not exist.
203 | 		# This improves the usage - lyrico <source_dir>
204 | 		if dir_type == 'source_dir':
205 | 			if not os.path.isdir(path):
206 | 				print('"source_dir" does not exist. ', end="")
207 | 				print('You gave "source_dir":', path)
208 | 				print('Please enter path to an existing folder.')
209 | 				return False
210 | 			Config.source_dir = path
211 | 		# make directory if user is setting "lyrics_dir" and it does not exists.
212 | 		# Refer http://stackoverflow.com/a/14364249/2426469
213 | 		elif dir_type == 'lyrics_dir':
214 | 			try:
215 | 				os.makedirs(path)
216 | 				print('Directory does not exist. Creating new one.')
217 | 			except OSError:
218 | 				if not os.path.isdir(path):
219 | 					# this exception is handled by function calling set_dir
220 | 					raise
221 | 			Config.lyrics_dir = path
222 | 
223 | 		print(dir_type, 'updated.')
224 | 		if dir_type == 'source_dir':
225 | 			print('lyrico will scan the following folder for audio files:')
226 | 		else:
227 | 			print('lyrico will save lyrics files in the following folder:')
228 | 		print('    ', path)
229 | 		return True
230 | 
231 | 	@staticmethod
232 | 	def update_lyrico_actions(target, update_type):
233 | 
234 | 		if target not in LYRICO_ACTIONS:
235 | 			print('Invalid lyrico action change attempted')
236 | 			print('''"save_to_file", "save_to_tag" and "overwrite" are the only settings that can be enabled or disabled.''')
237 | 			print('''"musix_match", "lyricsmode" and "az_lyrics" are the only sources that can be enabled or disabled.''')
238 | 			print('You attempted to change:', target)
239 | 			print('use "lyrico --help" to view commands.')
240 | 			return
241 | 
242 | 		# User is updating valid action/source
243 | 		bval = True if update_type == 'enable' else False
244 | 		log_str = '' if update_type == 'enable' else 'not '
245 | 
246 | 		setattr(Config, target, bval)
247 | 		print(target, (update_type + 'd'))
248 | 
249 | 		if target == 'save_to_file':
250 | 			print('lyrico will %ssave the downloaded lyrics to text files.' % log_str)
251 | 
252 | 		elif target == 'save_to_tag':
253 | 			print('lyrico will %sembed the downloaded lyrics into song tags.' % log_str)
254 | 
255 | 		elif target == 'overwrite':
256 | 			if update_type == 'disable':
257 | 				print('lyrico will detect the songs that already have lyrics, and will ignore them.')
258 | 			else:
259 | 				print('''lyrico will download lyrics for all songs detected in "source_dir" and overwrite lyrics if already present.''')
260 | 		else:
261 | 			# Action is to enable/disable a source.
262 | 			print('lyrico will %suse %s as a source for lyrics.' % (log_str, SOURCE_STR_MAP[target]))
263 | 
264 | 	@staticmethod
265 | 	def show_settings():
266 | 
267 | 		print('Your current settings:\n')
268 | 		# get list of section in config
269 | 		for section in Config.conf.sections():
270 | 			# for each section get list items.
271 | 			# items are returned as list of tuples of type (key, value)
272 | 			print(section.upper())
273 | 			for item in Config.conf.items(section):
274 | 				print('   ', item[0], '=', item[1])
275 | 			print('\n')
276 | 


--------------------------------------------------------------------------------
/lyrico/song.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | from __future__ import unicode_literals
  6 | 
  7 | 
  8 | import time
  9 | import sys
 10 | import os
 11 | 
 12 | from mutagen.id3 import USLT
 13 | from mutagen.asf import ASFUnicodeAttribute
 14 | from mutagen import MutagenError
 15 | from bs4 import BeautifulSoup
 16 | 
 17 | # Import all the sources modules
 18 | from .lyrico_sources.az_lyrics import download_from_az_lyrics
 19 | from .lyrico_sources.musix_match import download_from_musix_match
 20 | from .lyrico_sources.lyricsmode import download_from_lyricsmode
 21 | 
 22 | from .song_helper import get_song_data, get_song_list
 23 | from .config import Config
 24 | from .audio_format_keys import FORMAT_KEYS
 25 | 
 26 | # If we are using python27, import codec module and replace native 'open'
 27 | # with 'codec.open' to write unicode strings to file.
 28 | 
 29 | if sys.version_info[0] < 3:
 30 |     import codecs
 31 |     open = codecs.open
 32 | 
 33 | 
 34 | class Song():
 35 | 	"""Container objects repersenting each song globbed from source_dir"""
 36 | 
 37 | 	# holds count for songs for valid metadata
 38 | 	valid_metadata_count = 0
 39 | 
 40 | 	# Count for songs whose lyrics are successfully saved to file.
 41 | 	lyrics_saved_to_file_count = 0
 42 | 
 43 | 	# Count for songs whose lyrics are successfully saved to tag.
 44 | 	lyrics_saved_to_tag_count = 0
 45 | 
 46 | 	# Number of errors during download or tagging
 47 | 	lyrics_errors_count = 0
 48 | 
 49 | 	# Number of songs that already had lyrics
 50 | 	lyrics_existing_count = 0
 51 | 
 52 | 	def __init__(self, path):
 53 | 
 54 | 		self.path = path
 55 | 
 56 | 		# extract data from song
 57 | 		data = get_song_data(path)
 58 | 
 59 | 		# Initialize instance variables from data extracted
 60 | 		self.tag = data['tag']
 61 | 		self.artist = data['artist']
 62 | 		self.title = data['title']
 63 | 		self.album = data['album']
 64 | 		self.format = data['format']
 65 | 
 66 | 		self.lyrics_file_name = data['lyrics_file_name']
 67 | 		self.lyrics_file_path = data['lyrics_file_path']
 68 | 
 69 | 		# If the required lyrics file is already present in LYRICS_DIR
 70 | 		self.lyrics_file_present = data['lyrics_file_present']
 71 | 
 72 | 		# If the required lyrics is already embedded in tag
 73 | 		self.lyrics_tag_present = data['lyrics_tag_present']
 74 | 
 75 | 
 76 | 		# Holds the downloaded lyrics
 77 | 		self.lyrics = None
 78 | 
 79 | 		# Final status to build log
 80 | 		self.saved_to_tag = False
 81 | 		self.saved_to_file = False
 82 | 		self.source = None
 83 | 		self.error = data['error']
 84 | 
 85 | 		# As the songs are read from the files, update the class variable.
 86 | 		# This is count of songs that have valid artist and title.
 87 | 		if self.title and self.artist:
 88 | 			Song.valid_metadata_count += 1
 89 | 
 90 | 	def download_lyrics(self):
 91 | 
 92 | 		"""
 93 | 			Only called when song has artist and title.
 94 | 			Calls self.save_lyrics to save them.
 95 | 
 96 | 		"""
 97 | 
 98 | 		if not self.download_required():
 99 | 			Song.lyrics_existing_count += 1
100 | 			print('\nSkipping', self.artist, '-', self.title)
101 | 			print('Lyrics already present.')
102 | 			return
103 | 
104 | 		# At this point there is nothing in self.error
105 | 		print('\nDownloading:', self.artist, '-', self.title)
106 | 
107 | 		# Only try other sources if required
108 | 
109 | 		if not self.lyrics and Config.musix_match:
110 | 			download_from_musix_match(self)
111 | 
112 | 		if not self.lyrics and Config.lyricsmode:
113 | 			download_from_lyricsmode(self)
114 | 
115 | 		if not self.lyrics and Config.az_lyrics:
116 | 			download_from_az_lyrics(self)
117 | 
118 | 		self.save_lyrics()
119 | 
120 | 	def save_lyrics(self):
121 | 
122 | 		"""
123 | 			Called by self.download_lyrics to save lyrics according to
124 | 			Config.save_to_file, Config.save_to_tag settings.
125 | 
126 | 			Handles the case if lyrics is not found. Logs errors to console
127 | 			and Song object.
128 | 
129 | 		"""
130 | 
131 | 		if not self.lyrics:
132 | 			Song.lyrics_errors_count += 1
133 | 			print('Failed:', self.error)
134 | 			return
135 | 
136 | 		if self.lyrics and Config.save_to_file:
137 | 			try:
138 | 				with open(self.lyrics_file_path, 'w', encoding='utf-8') as f:
139 | 					f.write('Artist - ' + self.artist + '\n')
140 | 					f.write('Title - ' + self.title + '\n')
141 | 
142 | 					album_str = 'Album - Unkown'
143 | 					if self.album:
144 | 						album_str = 'Album - ' + self.album
145 | 					f.write(album_str)
146 | 					f.write('\n\n')
147 | 
148 | 					f.write(self.lyrics)
149 | 
150 | 				# update class variable
151 | 				Song.lyrics_saved_to_file_count += 1
152 | 
153 | 				# update the Song instance flag
154 | 				self.saved_to_file = True
155 | 
156 | 				self.download_status = "ok"
157 | 				print('Success: Lyrics saved to file.')
158 | 
159 | 			except IOError as e:
160 | 				err_str = str(e)
161 | 				if e.errno == 22:
162 | 					err_str = 'Cannot save lyrics to file. Unable to create file with song metadata.'
163 | 				if e.errno == 13:
164 | 					err_str = 'Cannot save lyrics to file. The file is opened or in use.'
165 | 				if e.errno == 2:
166 | 					err_str = '"lyrics_dir" does not exist. Please set a "lyrics_dir" which exists.'
167 | 
168 | 				self.error = err_str
169 | 				Song.lyrics_errors_count += 1
170 | 				print('Failed:', err_str)
171 | 
172 | 		if self.lyrics and Config.save_to_tag:
173 | 			lyrics_key = FORMAT_KEYS[self.format]['lyrics']
174 | 			try:
175 | 				if self.format == 'mp3':
176 | 					# encoding = 3 for UTF-8
177 | 					self.tag.add(USLT(encoding=3, lang = u'eng',
178 | 									text=self.lyrics))
179 | 
180 | 				if self.format == 'm4a' or self.format == 'mp4':
181 | 					# lyrics_key = '\xa9lyr'
182 | 
183 | 					if sys.version_info[0] < 3:
184 | 						lyrics_key = lyrics_key.encode('latin-1')
185 | 					self.tag[lyrics_key] = self.lyrics
186 | 
187 | 				# Both flac, opus and ogg/oga(Vorbis & FLAC), are being read/write as Vorbis Comments.
188 | 				# Vorbis Comments don't have a standard 'lyrics' tag. The 'LYRICS' tag is
189 | 				# most common non-standard tag used for lyrics.
190 | 				if self.format == 'flac' or self.format == 'ogg' or self.format == 'oga' or self.format == 'opus':
191 | 					self.tag[lyrics_key] = self.lyrics
192 | 
193 | 				if self.format == 'wma':
194 | 					# ASF Format uses ASFUnicodeAttribute objects instead of Python's Unicode
195 | 					self.tag[lyrics_key] = ASFUnicodeAttribute(self.lyrics)
196 | 
197 | 				self.tag.save()
198 | 				self.saved_to_tag = True
199 | 				Song.lyrics_saved_to_tag_count += 1
200 | 
201 | 				print('Success: Lyrics saved to tag.')
202 | 
203 | 			except MutagenError:
204 | 				err_str = 'Cannot save lyrics to tag. Codec/Format not supported'
205 | 				self.error = err_str
206 | 				Song.lyrics_errors_count += 1
207 | 				print('Failed:', err_str)
208 | 
209 | 			except IOError as e:
210 | 				err_str = 'Cannot save lyrics to tag. The file is opened or in use.'
211 | 				self.error = err_str
212 | 				Song.lyrics_errors_count += 1
213 | 				print('Failed:', err_str)
214 | 
215 | 	def download_required(self):
216 | 		"""
217 | 		Checks if a lyrics are required to be download.
218 | 		Uses Config.save_to_file, Config.save_to_tag and Config.overwrite settings
219 | 		and returns True when download is required.
220 | 
221 | 		"""
222 | 		if Config.overwrite:
223 | 			# If user wants to overwite existing lyrics, always download
224 | 			# and save according to Config.save_to_file, Config.save_to_tag settings
225 | 			return True
226 | 		else:
227 | 
228 | 			# Do we need to download lyrics and save to file
229 | 			file_required = False
230 | 
231 | 			# Do we need to download lyrics and save to tag
232 | 			tag_required = False
233 | 
234 | 			if Config.save_to_file and not self.lyrics_file_present:
235 | 				# if user wants to save to file and the file is not
236 | 				# present in the set LYRICS_DIR, the we need
237 | 				# to download it and save to the file.
238 | 				file_required = True
239 | 
240 | 			if Config.save_to_tag and not self.lyrics_tag_present:
241 | 				# if user wants to save to tag and the tag does not
242 | 				# has lyrics field saved, then we need
243 | 				# to download it and save to the tag.
244 | 				tag_required = True
245 | 
246 | 			# If either is required, we need to make the download request.
247 | 			# Data is then saved accordingly to the settings.
248 | 			return file_required or tag_required
249 | 
250 | 	def get_log_string(self):
251 | 		"""
252 | 		returns the log string of the song which is used in final log.
253 | 
254 | 		"""
255 | 		template = '. \t{file}\t{tag}\t{source}\t\t{song}\t\t{error}\n'
256 | 		log = {}
257 | 
258 | 		# file_status and tag each have 4 possible values
259 | 			# 'Saved' - File or tag was saved successfully
260 | 			# 'Failed' - Download or save failed. Show error.
261 | 			# 'Ignored' - Ignored according to Config.save_to_file, Config.save_to_tag setting by user.
262 | 			# 'Present' - Detected tag or file and skipped download skipped by lyrico as per Config.overwrite setting.
263 | 
264 | 		if Config.save_to_file:
265 | 			if not self.download_required():
266 | 				file_status = 'Present'
267 | 			else:
268 | 				if self.saved_to_file:
269 | 					file_status = 'Saved'
270 | 				else:
271 | 					file_status = 'Failed'
272 | 		else:
273 | 			file_status = 'Ignored'
274 | 
275 | 		if Config.save_to_tag:
276 | 			if not self.download_required():
277 | 				tag = 'Present'
278 | 			else:
279 | 				if self.saved_to_tag:
280 | 					tag = 'Saved'
281 | 				else:
282 | 					tag = 'Failed'
283 | 		else:
284 | 			tag = 'Ignored'
285 | 
286 | 		# avoid exceptions raised for concatinating Unicode and None types
287 | 		if self.artist and self.title:
288 | 			log['song'] = self.artist + ' - ' + self.title
289 | 		else:
290 | 			log['song'] = self.path
291 | 
292 | 		log['error'] = self.error
293 | 
294 | 		log['file'] = file_status
295 | 		log['tag'] = tag
296 | 		log['source'] = self.source
297 | 
298 | 		return template.format(**log)
299 | 
300 | 	@staticmethod
301 | 	def log_results(song_list):
302 | 
303 | 		try:
304 | 			log_date = time.strftime("%H:%M:%S  %d/%m/%y")
305 | 			log_file_name = 'log.txt'
306 | 			with open(os.path.join(Config.lyrics_dir, log_file_name), 'w', encoding='utf-8') as f:
307 | 
308 | 				f.write('\t\t\t\tlyrico\n\n')
309 | 
310 | 				f.write('Log Date ' + log_date + '\n')
311 | 				f.write('\n')
312 | 
313 | 				f.write('Audio files detected: ' + str(len(song_list)))
314 | 				f.write('\n')
315 | 
316 | 				f.write('Metadata extracted for: ' + str(Song.valid_metadata_count))
317 | 				f.write('\n')
318 | 
319 | 				f.write('Lyrics files saved: ' + str(Song.lyrics_saved_to_file_count))
320 | 				f.write('\n')
321 | 
322 | 				f.write('Tags saved: ' + str(Song.lyrics_saved_to_tag_count))
323 | 				f.write('\n\n')
324 | 
325 | 				table_header = '  \t[FILE]\t[TAG]\t[SOURCE]\t\t\t[ARTIST-TITLE]\t\t\t\t[ERROR]\n'
326 | 				table_border = '='*100 + '\n'
327 | 
328 | 				f.write(table_header)
329 | 				f.write(table_border)
330 | 
331 | 				# write individual song log strings
332 | 				index_number = 1
333 | 				for song in song_list:
334 | 					f.write(str(index_number))
335 | 					f.write(song.get_log_string())
336 | 					index_number += 1
337 | 
338 | 				# Add STATUS KEY to log
339 | 				f.write('\n\n\t**** STATUS KEY ****\n')
340 | 
341 | 				f.write("\t# 'Saved' - File or tag was saved successfully.")
342 | 				f.write("\n")
343 | 
344 | 				f.write("\t# 'Failed' - Download or save failed. See error.")
345 | 				f.write("\n")
346 | 
347 | 				f.write("\t# 'Ignored' - Ignored according to 'save_to_file', 'save_to_tag' setting.")
348 | 				f.write("\n")
349 | 
350 | 				f.write("\t# 'Present' - Detected tag or file and skipped download as per 'overwrite' setting.")
351 | 				f.write("\n")
352 | 
353 | 				# Add source key to log
354 | 				f.write('\n\n\t**** SOURCE KEY  ****\n')
355 | 
356 | 				f.write("\t# 'mXm' - musiXmatch")
357 | 				f.write("\n")
358 | 
359 | 				f.write("\t# 'LrMOD' - LYRICSMODE")
360 | 				f.write("\n")
361 | 
362 | 				f.write("\t# 'AZLr' - AZLyrics")
363 | 				f.write("\n\n")
364 | 
365 | 				# Add credits
366 | 				f.write(table_border)
367 | 
368 | 				f.write("'lyrico' has been built and is maintained by Abhimanyu Pathania.")
369 | 				f.write("\n\n")
370 | 
371 | 				f.write('If you encounter a bug, please raise an issue on GitHub.')
372 | 				f.write("\n")
373 | 
374 | 				f.write('\thttps://github.com/abhimanyuPathania/lyrico/issues')
375 | 				f.write("\n")
376 | 
377 | 				f.write('Or you can mail me: abpindia1944@gmail.com')
378 | 				f.write("\n\n")
379 | 
380 | 				f.write('Cheers!')
381 | 				f.write('\n\n\n\n')
382 | 
383 | 		except IOError as e:
384 | 			print('Unable to build log.')
385 | 			print('"lyrics_dir" does not exist. Please set "lyrics_dir" to a folder which exists.')
386 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | lyrico
  2 | ========
  3 | 
  4 | ``lyrico`` is a command line application which downloads lyrics for your songs. When given a folder, ``lyrico`` will:
  5 | 
  6 | - scan it, and all inner folders, for audio files
  7 | - read the metadata for all songs that it detects
  8 | - download the lyrics for each song
  9 | - embed the lyrics downloaded into the song (as standard lyrics tag) and also save it to a text file
 10 | 
 11 | Current version of ``lyrico`` supports only **unsynced lyrics**.
 12 | 
 13 | Support
 14 | =========
 15 | 
 16 | - **Audio Formats** - mp3, flac, m4a, mp4, opus, wma, ogg/oga (Vorbis and FLAC).
 17 | 
 18 | - **Python** - Python 27 and Python 3 (tested on Python 3.5 Python 3.4)
 19 | 
 20 | - **OS** - Windows, Linux (tested on Ubuntu).
 21 | 
 22 | 
 23 | Installation
 24 | =============
 25 | Use the standard ``pip`` install::
 26 | 
 27 |     pip install lyrico
 28 | 
 29 | This will also install the dependencies. Hence, it is recommended to install ``lyrico`` on a separate `virtual environment <https://pypi.python.org/pypi/virtualenv>`_.
 30 | 
 31 | You can test if ``lyrico`` was installed correctly by running the 'lyrico' command, which now should be available::
 32 | 
 33 |     lyrico
 34 | 
 35 | This would give the following output::
 36 | 
 37 |     source_dir is not set. Please use the "set" command to set source_dir.
 38 |     use "lyrico --help" to view commands.
 39 |     Your current settings:
 40 | 
 41 |     ACTIONS
 42 |         save_to_file = True
 43 |         save_to_tag = False
 44 |         overwrite = False
 45 | 
 46 | 
 47 |     PATHS
 48 |         source_dir = None
 49 |         lyrics_dir = None
 50 | 
 51 | 
 52 |     SOURCES
 53 |         musix_match = True
 54 |         lyricsmode = True
 55 |         az_lyrics = False
 56 | 
 57 | If you get this screen, that means ``lyrico`` and its dependencies were installed correctly.
 58 | 
 59 | 
 60 | If you see an error like ``ImportError: No module named mutagen.id3``, this means that the dependencies were not installed for some reason. In that case you can install them very easily with single command. Here's what you do:
 61 | 
 62 | 1. Go to ``lyrico``'s `GitHub page <https://github.com/abhimanyuPathania/lyrico>`_.
 63 | 2. Download repository as ZIP and extract the ``requirements.txt`` file from it. It is in the root directory of repository. This is the only file you need.
 64 | 3. Open command prompt in directory containing the ``requirements.txt`` and run following command (if you're using a virtual environment, activate it before running the command)::
 65 | 
 66 |     pip install -r requirements.txt
 67 | 
 68 |    This will install all of the ``lyrico``'s dependencies and now you can try testing with the 'lyrico' command. It should give no errors.
 69 | 
 70 | 
 71 | Running ``lyrico``
 72 | =====================
 73 | ``lyrico`` operates using two directories (folders):
 74 | 
 75 | - Source Directory (``source_dir``): This is the directory which ``lyrico`` scans for audio files. The scan also includes all the directories contained within.
 76 | 
 77 | - Lyrics Directory (``lyrics_dir``): This is where ``lyrico`` will save the lyrics' text files.
 78 | 
 79 | Before running ``lyrico`` you must set these using the ``set`` command. Values must be absolute paths to the directories. Once set, ``lyrico`` will remember your settings (which can be changed easily at any time). So this has to be done only for the first time.
 80 | 
 81 | This is how an example first-run would look like on Windows.
 82 | 
 83 | 1. Set the ``source_dir``::
 84 | 
 85 |     lyrico set source_dir D:\test\Music
 86 | 
 87 |    This logs the following message::
 88 | 
 89 |        source_dir updated.
 90 |        lyrico will scan the following folder for audio files:
 91 |            D:\test\Music
 92 | 
 93 |    When setting ``source_dir``, the directory must exist beforehand. ``lyrico`` will **not create** the ``source_dir`` for you.
 94 | 
 95 | 2. Set the ``lyrics_dir``::
 96 | 
 97 |     lyrico set lyrics_dir D:\test\Lyrics
 98 | 
 99 |    This logs the following in command prompt::
100 | 
101 |        Directory does not exist. Creating new one.
102 |        lyrics_dir updated.
103 |        lyrico will save lyrics files in the following folder:
104 |            D:\test\Lyrics
105 | 
106 |    Unlike ``source_dir``, when setting the ``lyrics_dir`` to folder that does not exist (as in this example); ``lyrico`` **will** create it for you.
107 | 
108 | 3. Run lyrico::
109 | 
110 |     lyrico
111 | 
112 |    This will start the application and it will start downloading the lyrics for songs that it detects in the ``source_dir``. You will be able to see the status (song name, lyrics URL) in the command prompt as it downloads, one at a time, the lyrics for each song.
113 | 
114 |    Finally it builds the log of whole operation and saves it in the ``log.txt`` file. ``log.txt`` is located in your ``lyrics_dir``.
115 | 
116 | 
117 | Other Settings and Commands
118 | =============================
119 | 
120 | Basic settings like ``source_dir`` and ``lyrics_dir`` can be repeatedly changed using the ``set`` command as described in the example above. There are few more settings that are available to control ``lyrico``'s actions. These actions can be either disabled or enabled.
121 | 
122 | - ``save_to_file`` - When enabled, ``lyrico`` will save the lyrics downloaded to a text file and put it in the ``lyrics_dir``. The naming convention of file is as follows:
123 | 
124 |    [artist name] - [title].txt
125 | 
126 |   where  [artist name] and [title] are extracted from the song's metadata. It either of this is not found, lyrics won't be downloaded and you will see that in the final ``log.txt``. This naming convention in the current version cannot be changed.
127 | 
128 |   **enabled by default**
129 | 
130 | - ``save_to_tag`` - When enabled, ``lyrico`` will embed the lyrics downloaded into song tags. ``lyrico`` uses the standard lyrics tags for different formats. This means, as long as your music player can read standard lyrics tags from the song's metadata, it should display them.
131 | 
132 |   **disabled by default**
133 | 
134 | - ``overwrite`` - When enabled, ``lyrico`` will always download the lyrics for a song ignoring they might already be present in the lyrics tag or in the ``lyrics_dir`` as a text file. After the download, it overwrites any existing lyrics in the tag or the text file.
135 | 
136 |   This setting is meant to avoid repetitive download of lyrics. For example, if there is a song 'ABC' in the ``source_dir``. And ``overwrite`` is **disabled**. When ``lyrico`` is run, it will first look into ``lyrics_dir`` if it already has lyrics. If yes, then it would ignore the song.
137 | 
138 |   ``overwrite`` takes into account, the ``save_to_file`` and ``save_to_tag`` settings to decide what to do. For ``save_to_file``, it looks in ``lyrics_dir`` and for ``save_to_tag`` it searches for existing lyrics in songs's metadata. Whenever there is a void, download happens and old lyrics will be replaced by downloaded ones in both, text file and song metadata as per your settings.
139 | 
140 |   **disabled by default**
141 | 
142 | The above three settings can be changed using ``enable`` and ``disable`` commands. This is how you will enable ``save_to_tag`` from its default 'disabled' setting::
143 | 
144 |     lyrico enable save_to_tag
145 | 
146 | This would log::
147 | 
148 |     save_to_tag enabled
149 |     lyrico will embed the downloaded lyrics into song tags.
150 | 
151 | Similarly to disable ``save_to_file``::
152 | 
153 |     lyrico disable save_to_file
154 | 
155 | This gives following message in command prompt::
156 | 
157 |     save_to_file disabled
158 |     lyrico will not save the downloaded lyrics to text files.
159 | 
160 | 
161 | - *Viewing current settings* - To view current settings use the following command::
162 | 
163 |    lyrico --settings
164 | 
165 | - *Help* - You can always view all the commands by asking for the help screen::
166 | 
167 |     lyrico --help
168 | 
169 | - ``lyrico`` **quick invocation** - you can supply ``source_dir`` along with ``lyrico`` command. The following command::
170 | 
171 |    lyrico full_path_to_source_dir
172 | 
173 |   is same as running the two commands::
174 | 
175 |     lyrico set source_dir full_path_to_source_dir
176 |     lyrico
177 | 
178 |   However this won't work for the very first run. When running ``lyrico`` for the first time after installation, the ``source_dir`` must be set explicitly using the ``set`` command.
179 | 
180 | Lyrics Sources
181 | ================
182 | ``lyrico`` uses the following sources from where it downloads the lyrics:
183 | 
184 | 1. `musiXmatch <https://www.musixmatch.com/>`_ : ``musix_match``
185 | 
186 | 2. `LYRICSMODE <http://www.lyricsmode.com/>`_ : ``lyricsmode``
187 | 
188 | 3. `AZLyrics <http://www.azlyrics.com/>`_ : ``az_lyrics`` (**disabled by default**)
189 | 
190 | The search order is same as enumerated above and cannot be changed. You can, however, disable or enable any of the sources using the same ``enable`` and ``disable`` commands. When a source is disabled, it is simply skipped during the search.
191 | 
192 | For example, to enable AZLyrics::
193 | 
194 |     lyrico enable az_lyrics
195 | 
196 | Use the command line name for the source, which is mentioned after the link to the source in the above list. This logs the following message indicating that ``az_lyrics`` will be used as a source::
197 | 
198 |     az_lyrics enabled
199 |     lyrico will use AZLyrics as a source for lyrics.
200 | 
201 | Or to disable AZLyrics::
202 | 
203 |     lyrico disable az_lyrics:
204 | 
205 | This logs the following message::
206 | 
207 |     az_lyrics disabled
208 |     lyrico will not use AZLyrics as a source for lyrics.
209 | 
210 | 
211 | Audio Formats and Tags
212 | =======================
213 | Below is the table of supported audio formats and their supported tags:
214 | 
215 | +--------------------------------------------+----------------------------------------------+
216 | | Audio Format                               | Tag                                          |
217 | +============================================+==============================================+
218 | | flac                                       | Vorbis Comments                              |
219 | +--------------------------------------------+----------------------------------------------+
220 | | m4a, mp4                                   | MP4 Tags (iTunes metadata)                   |
221 | +--------------------------------------------+----------------------------------------------+
222 | | mp3                                        | ID3 Tags                                     |
223 | +--------------------------------------------+----------------------------------------------+
224 | | ogg, oga                                   | Vorbis Comments                              |
225 | +--------------------------------------------+----------------------------------------------+
226 | | opus                                       | Vorbis Comments                              |
227 | +--------------------------------------------+----------------------------------------------+
228 | | wma                                        | ASF                                          |
229 | +--------------------------------------------+----------------------------------------------+
230 | 
231 | ``lyrico`` goodness
232 | =====================
233 | 
234 | Here are somethings that ``lyrico`` does well:
235 | 
236 | - **No junk** - ``lyrico`` will not insert junk text into your lyrics files or audio tags. It won't create blank files or blank lyrics tags. Neither it would create lyrics files or tags containing errors etc.
237 | 
238 | - **Language** - Since ``lyrico`` uses your song's artist name and title to construct the URLs; so as long as they are correct and the source has the lyrics, it would work no matter which language.
239 | 
240 | - **foobar2000** - The poor performance of the `Lyric Show Panel 3 <https://www.foobar2000.org/components/view/foo_uie_lyrics3>`_ component was main reason I wrote this application. It simply won't work for me. ``lyrico`` plays nicely with 'Lyric Show Panel'. ``lyrico``'s file-naming convention matches 'Lyric Show Panel's default settings. Just point 'Lyric Show Panel' to your ``lyrics_dir`` and done.
241 | 
242 |   I recommend simply removing all of 'Lyric Show Panel' online sources and use offline mode (Tag search, Files search, Associations search) with ``lyrico``. It is the next best thing to automatic search. Because 'Lyric Show Panel' on failure embeds errors in lyrics files and tags!
243 | 
244 |   Even if you don't use foobar2000 or your music player cannot read lyrics from text files like that, you can always embed lyrics into tags which should work with any decent music player including **iTunes**.
245 | 
246 | - **log.txt** - ``log.txt`` created at end of every ``lyrico`` run is nice way to see what have you fetched. It show list of every song present in ``source_dir`` along with status of download or errors that happened.
247 | 
248 | ``lyrico`` gotchas
249 | ====================
250 | 
251 | Here are few points you should know before using ``lyrico``:
252 | 
253 | - **Your tags** - ``lyrico`` uses metadata in your tags for building URLs. Hence your songs should be tagged with correct 'artist', 'title' information.
254 | 
255 |   ``lyrico`` also assumes that you're using standard tags for each format (container) of your songs. For example, ``lyrico`` assumes that your ``.mp3`` files are using the standard ``ID3`` tags and only reads metadata for those. If you are using something like an ``APEv2`` tag with an ``.mp3`` file,  ``lyrico`` won't be able to read it and would log the pertinent error in the ``log.txt``.
256 | 
257 |   You don't need to be concerned about this unless you have forcibly embedded non-standard tags in your songs with some other software. *Table of supported tags for audio formats is given above.*
258 | 
259 | - **ID3 tag versions** - ``lyrico`` will convert any old ID3 tag to ID3v2.4 if ``save_to_tag`` is enabled. This is the default behavior of *mutagen*; the underlying dependency used by ``lyrico`` to read ID3 tags.
260 | 
261 |   This has never caused any problem for me till date. And from my understanding you should be using ID3v2.4 tags anyways. I have used ``lyrico`` on hundreds of mp3 files and had no issues. You can always test ``lyrico`` on few songs and check. Or you can just disable ``save_to_tag``.
262 | 
263 | - **Song metadata** - Lyrics are fetched using a URL generated using song's artist name and title. This means that if the song has titles like:
264 | 
265 |   - ABC(acoustic)
266 |   - ABC(live version)
267 | 
268 |   or an artist like:
269 | 
270 |   - XYZ(feat. Blah)
271 | 
272 |   the download might fail. Sometimes artist-name or title contain characters like '?'.  For this, Windows won't be able to create the text file as it is a restricted character. But the lyrics will be downloaded anyways and saved to tag if ``save_to_tag`` is enabled.
273 | 
274 | - **windows console** - If you are using Windows, like me, you must use some other font than the default 'raster fonts' in the command prompt to view in-prompt logging for songs using other characters than English in their metadata.
275 | 
276 |   But the problem does not end here. Even after enabling other allowed fonts like ``Consolas`` or ``Lucida Console``, you still won't be able to see in-prompt logging (you will see question marks or boxes) for Asian languages like Mandarin, Japanese, Korean etc. Though European language are displayed correctly.
277 | 
278 |   Despite any issues with windows console display, ``lyrico`` downloads and saves the lyrics correctly to files and tags.
279 | 
280 | 
281 | Dependencies
282 | ================
283 | ``lyrico`` uses and thanks the following python packages:
284 | 
285 | - `glob2 <https://pypi.python.org/pypi/glob2>`_: to allow simple recursive directory search in Python 27.
286 | 
287 | - `requests <https://pypi.python.org/pypi/requests>`_: HTTP for Humans.
288 | 
289 | - `mutagen <https://pypi.python.org/pypi/mutagen>`_: to read tags from audio files and embed lyrics in tags for multiple audio formats.
290 | 
291 | - `beautifulsoup4 <https://pypi.python.org/pypi/beautifulsoup4>`_: to extract the lyrics.
292 | 
293 | - `win_unicode_console <https://pypi.python.org/pypi/win_unicode_console>`_: because Python 27, Unicode and command prompt is a nightmare.
294 | 
295 | 
296 | - `docopt <https://pypi.python.org/pypi/docopt>`_: to create beautiful command-line interfaces.
297 | 
298 | 
299 | A note on mass downloading
300 | ===========================
301 | 
302 | Since ``lyrico`` is simply scraping lyrics off the HTML pages of the sources, please don't set ``source_dir`` to a folder having thousands of songs.
303 | 
304 | They might ban your bot. ``az_lyrics`` sometimes bans your IP (not sure if permanent) if you hit them with too many failed requests. Though, refreshing your IP by restarting your router or using a VPN solves that. Hence, ``az_lyrics`` as a source is disabled by default. Only use it if you are looking for recent lyrics.
305 | 
306 | Also, downloading 1000s of lyrics will be slow since ``lyrico`` does not batch-download. It sends one request to one source at a time. This is by design.
307 | 
308 | I personally use it at one or two albums at time and keep checking for any errors in ``log.txt``.
309 | 
310 | Integration tests
311 | =================
312 | Run them::
313 | 
314 |     $ python3 -m unittest discover
315 | 
316 | Run a single test::
317 | 
318 |     $ python3 -m unittest tests/lyrico_sources/test_musix_match.py -k test_download
319 | 
320 | 
321 | Changelog
322 | ==========
323 | - 0.7.0 2024-05
324 | 
325 |   - python3 compatibility
326 |   - store configuration in correct folder depending on operating system
327 |   - remove LYRICSnMUSIC (service shutdown)
328 |   - remove LyricsWikia (service shutdown)
329 |   - fixes for Musixmatch
330 |   - fixes for AZLyrics
331 | - 0.6.0 2016-08
332 | 
333 |   - Added support for ``oga`` audio format.
334 |   - Detect uppercase extensions in Linux.
335 | - 0.5.0 2016-02
336 | 
337 |   - Added musiXmatch and LYRICSMODE to sources.
338 |   - Include detection for licensing errors.
339 | - 0.4.0 Added LYRICSnMUSIC and AZLyrics as sources. Expanded the command line interface to control sources. Added `requests <https://pypi.python.org/pypi/requests>`_ to dependencies.
340 | - 0.3.0 Added support for ``ogg`` and ``wma`` audio formats. Replaced ``UNSYNCED LYRICS`` with ``LYRICS`` tags to embed lyrics in Vorbis Comments.
341 | - 0.2.0 Added documentation and tutorial.
342 | - 0.1.0 Initial release.
343 | 


--------------------------------------------------------------------------------
/lyrico/docopt.py:
--------------------------------------------------------------------------------
  1 | """Pythonic command-line interface parser that will make you smile.
  2 | 
  3 |  * http://docopt.org
  4 |  * Repository and issue-tracker: https://github.com/docopt/docopt
  5 |  * Licensed under terms of MIT license (see LICENSE-MIT)
  6 |  * Copyright (c) 2013 Vladimir Keleshev, vladimir@keleshev.com
  7 | 
  8 | """
  9 | import sys
 10 | import re
 11 | 
 12 | 
 13 | __all__ = ['docopt']
 14 | __version__ = '0.6.1'
 15 | 
 16 | 
 17 | class DocoptLanguageError(Exception):
 18 | 
 19 |     """Error in construction of usage-message by developer."""
 20 | 
 21 | 
 22 | class DocoptExit(SystemExit):
 23 | 
 24 |     """Exit in case user invoked program with incorrect arguments."""
 25 | 
 26 |     usage = ''
 27 | 
 28 |     def __init__(self, message=''):
 29 |         SystemExit.__init__(self, (message + '\n' + self.usage).strip())
 30 | 
 31 | 
 32 | class Pattern(object):
 33 | 
 34 |     def __eq__(self, other):
 35 |         return repr(self) == repr(other)
 36 | 
 37 |     def __hash__(self):
 38 |         return hash(repr(self))
 39 | 
 40 |     def fix(self):
 41 |         self.fix_identities()
 42 |         self.fix_repeating_arguments()
 43 |         return self
 44 | 
 45 |     def fix_identities(self, uniq=None):
 46 |         """Make pattern-tree tips point to same object if they are equal."""
 47 |         if not hasattr(self, 'children'):
 48 |             return self
 49 |         uniq = list(set(self.flat())) if uniq is None else uniq
 50 |         for i, child in enumerate(self.children):
 51 |             if not hasattr(child, 'children'):
 52 |                 assert child in uniq
 53 |                 self.children[i] = uniq[uniq.index(child)]
 54 |             else:
 55 |                 child.fix_identities(uniq)
 56 | 
 57 |     def fix_repeating_arguments(self):
 58 |         """Fix elements that should accumulate/increment values."""
 59 |         either = [list(child.children) for child in transform(self).children]
 60 |         for case in either:
 61 |             for e in [child for child in case if case.count(child) > 1]:
 62 |                 if type(e) is Argument or type(e) is Option and e.argcount:
 63 |                     if e.value is None:
 64 |                         e.value = []
 65 |                     elif type(e.value) is not list:
 66 |                         e.value = e.value.split()
 67 |                 if type(e) is Command or type(e) is Option and e.argcount == 0:
 68 |                     e.value = 0
 69 |         return self
 70 | 
 71 | 
 72 | def transform(pattern):
 73 |     """Expand pattern into an (almost) equivalent one, but with single Either.
 74 | 
 75 |     Example: ((-a | -b) (-c | -d)) => (-a -c | -a -d | -b -c | -b -d)
 76 |     Quirks: [-a] => (-a), (-a...) => (-a -a)
 77 | 
 78 |     """
 79 |     result = []
 80 |     groups = [[pattern]]
 81 |     while groups:
 82 |         children = groups.pop(0)
 83 |         parents = [Required, Optional, OptionsShortcut, Either, OneOrMore]
 84 |         if any(t in map(type, children) for t in parents):
 85 |             child = [c for c in children if type(c) in parents][0]
 86 |             children.remove(child)
 87 |             if type(child) is Either:
 88 |                 for c in child.children:
 89 |                     groups.append([c] + children)
 90 |             elif type(child) is OneOrMore:
 91 |                 groups.append(child.children * 2 + children)
 92 |             else:
 93 |                 groups.append(child.children + children)
 94 |         else:
 95 |             result.append(children)
 96 |     return Either(*[Required(*e) for e in result])
 97 | 
 98 | 
 99 | class LeafPattern(Pattern):
100 | 
101 |     """Leaf/terminal node of a pattern tree."""
102 | 
103 |     def __init__(self, name, value=None):
104 |         self.name, self.value = name, value
105 | 
106 |     def __repr__(self):
107 |         return '%s(%r, %r)' % (self.__class__.__name__, self.name, self.value)
108 | 
109 |     def flat(self, *types):
110 |         return [self] if not types or type(self) in types else []
111 | 
112 |     def match(self, left, collected=None):
113 |         collected = [] if collected is None else collected
114 |         pos, match = self.single_match(left)
115 |         if match is None:
116 |             return False, left, collected
117 |         left_ = left[:pos] + left[pos + 1:]
118 |         same_name = [a for a in collected if a.name == self.name]
119 |         if type(self.value) in (int, list):
120 |             if type(self.value) is int:
121 |                 increment = 1
122 |             else:
123 |                 increment = ([match.value] if type(match.value) is str
124 |                              else match.value)
125 |             if not same_name:
126 |                 match.value = increment
127 |                 return True, left_, collected + [match]
128 |             same_name[0].value += increment
129 |             return True, left_, collected
130 |         return True, left_, collected + [match]
131 | 
132 | 
133 | class BranchPattern(Pattern):
134 | 
135 |     """Branch/inner node of a pattern tree."""
136 | 
137 |     def __init__(self, *children):
138 |         self.children = list(children)
139 | 
140 |     def __repr__(self):
141 |         return '%s(%s)' % (self.__class__.__name__,
142 |                            ', '.join(repr(a) for a in self.children))
143 | 
144 |     def flat(self, *types):
145 |         if type(self) in types:
146 |             return [self]
147 |         return sum([child.flat(*types) for child in self.children], [])
148 | 
149 | 
150 | class Argument(LeafPattern):
151 | 
152 |     def single_match(self, left):
153 |         for n, pattern in enumerate(left):
154 |             if type(pattern) is Argument:
155 |                 return n, Argument(self.name, pattern.value)
156 |         return None, None
157 | 
158 |     @classmethod
159 |     def parse(class_, source):
160 |         name = re.findall('(<\S*?>)', source)[0]
161 |         value = re.findall('\[default: (.*)\]', source, flags=re.I)
162 |         return class_(name, value[0] if value else None)
163 | 
164 | 
165 | class Command(Argument):
166 | 
167 |     def __init__(self, name, value=False):
168 |         self.name, self.value = name, value
169 | 
170 |     def single_match(self, left):
171 |         for n, pattern in enumerate(left):
172 |             if type(pattern) is Argument:
173 |                 if pattern.value == self.name:
174 |                     return n, Command(self.name, True)
175 |                 else:
176 |                     break
177 |         return None, None
178 | 
179 | 
180 | class Option(LeafPattern):
181 | 
182 |     def __init__(self, short=None, long=None, argcount=0, value=False):
183 |         assert argcount in (0, 1)
184 |         self.short, self.long, self.argcount = short, long, argcount
185 |         self.value = None if value is False and argcount else value
186 | 
187 |     @classmethod
188 |     def parse(class_, option_description):
189 |         short, long, argcount, value = None, None, 0, False
190 |         options, _, description = option_description.strip().partition('  ')
191 |         options = options.replace(',', ' ').replace('=', ' ')
192 |         for s in options.split():
193 |             if s.startswith('--'):
194 |                 long = s
195 |             elif s.startswith('-'):
196 |                 short = s
197 |             else:
198 |                 argcount = 1
199 |         if argcount:
200 |             matched = re.findall('\[default: (.*)\]', description, flags=re.I)
201 |             value = matched[0] if matched else None
202 |         return class_(short, long, argcount, value)
203 | 
204 |     def single_match(self, left):
205 |         for n, pattern in enumerate(left):
206 |             if self.name == pattern.name:
207 |                 return n, pattern
208 |         return None, None
209 | 
210 |     @property
211 |     def name(self):
212 |         return self.long or self.short
213 | 
214 |     def __repr__(self):
215 |         return 'Option(%r, %r, %r, %r)' % (self.short, self.long,
216 |                                            self.argcount, self.value)
217 | 
218 | 
219 | class Required(BranchPattern):
220 | 
221 |     def match(self, left, collected=None):
222 |         collected = [] if collected is None else collected
223 |         l = left
224 |         c = collected
225 |         for pattern in self.children:
226 |             matched, l, c = pattern.match(l, c)
227 |             if not matched:
228 |                 return False, left, collected
229 |         return True, l, c
230 | 
231 | 
232 | class Optional(BranchPattern):
233 | 
234 |     def match(self, left, collected=None):
235 |         collected = [] if collected is None else collected
236 |         for pattern in self.children:
237 |             m, left, collected = pattern.match(left, collected)
238 |         return True, left, collected
239 | 
240 | 
241 | class OptionsShortcut(Optional):
242 | 
243 |     """Marker/placeholder for [options] shortcut."""
244 | 
245 | 
246 | class OneOrMore(BranchPattern):
247 | 
248 |     def match(self, left, collected=None):
249 |         assert len(self.children) == 1
250 |         collected = [] if collected is None else collected
251 |         l = left
252 |         c = collected
253 |         l_ = None
254 |         matched = True
255 |         times = 0
256 |         while matched:
257 |             # could it be that something didn't match but changed l or c?
258 |             matched, l, c = self.children[0].match(l, c)
259 |             times += 1 if matched else 0
260 |             if l_ == l:
261 |                 break
262 |             l_ = l
263 |         if times >= 1:
264 |             return True, l, c
265 |         return False, left, collected
266 | 
267 | 
268 | class Either(BranchPattern):
269 | 
270 |     def match(self, left, collected=None):
271 |         collected = [] if collected is None else collected
272 |         outcomes = []
273 |         for pattern in self.children:
274 |             matched, _, _ = outcome = pattern.match(left, collected)
275 |             if matched:
276 |                 outcomes.append(outcome)
277 |         if outcomes:
278 |             return min(outcomes, key=lambda outcome: len(outcome[1]))
279 |         return False, left, collected
280 | 
281 | 
282 | class Tokens(list):
283 | 
284 |     def __init__(self, source, error=DocoptExit):
285 |         self += source.split() if hasattr(source, 'split') else source
286 |         self.error = error
287 | 
288 |     @staticmethod
289 |     def from_pattern(source):
290 |         source = re.sub(r'([\[\]\(\)\|]|\.\.\.)', r' \1 ', source)
291 |         source = [s for s in re.split('\s+|(\S*<.*?>)', source) if s]
292 |         return Tokens(source, error=DocoptLanguageError)
293 | 
294 |     def move(self):
295 |         return self.pop(0) if len(self) else None
296 | 
297 |     def current(self):
298 |         return self[0] if len(self) else None
299 | 
300 | 
301 | def parse_long(tokens, options):
302 |     """long ::= '--' chars [ ( ' ' | '=' ) chars ] ;"""
303 |     long, eq, value = tokens.move().partition('=')
304 |     assert long.startswith('--')
305 |     value = None if eq == value == '' else value
306 |     similar = [o for o in options if o.long == long]
307 |     if tokens.error is DocoptExit and similar == []:  # if no exact match
308 |         similar = [o for o in options if o.long and o.long.startswith(long)]
309 |     if len(similar) > 1:  # might be simply specified ambiguously 2+ times?
310 |         raise tokens.error('%s is not a unique prefix: %s?' %
311 |                            (long, ', '.join(o.long for o in similar)))
312 |     elif len(similar) < 1:
313 |         argcount = 1 if eq == '=' else 0
314 |         o = Option(None, long, argcount)
315 |         options.append(o)
316 |         if tokens.error is DocoptExit:
317 |             o = Option(None, long, argcount, value if argcount else True)
318 |     else:
319 |         o = Option(similar[0].short, similar[0].long,
320 |                    similar[0].argcount, similar[0].value)
321 |         if o.argcount == 0:
322 |             if value is not None:
323 |                 raise tokens.error('%s must not have an argument' % o.long)
324 |         else:
325 |             if value is None:
326 |                 if tokens.current() in [None, '--']:
327 |                     raise tokens.error('%s requires argument' % o.long)
328 |                 value = tokens.move()
329 |         if tokens.error is DocoptExit:
330 |             o.value = value if value is not None else True
331 |     return [o]
332 | 
333 | 
334 | def parse_shorts(tokens, options):
335 |     """shorts ::= '-' ( chars )* [ [ ' ' ] chars ] ;"""
336 |     token = tokens.move()
337 |     assert token.startswith('-') and not token.startswith('--')
338 |     left = token.lstrip('-')
339 |     parsed = []
340 |     while left != '':
341 |         short, left = '-' + left[0], left[1:]
342 |         similar = [o for o in options if o.short == short]
343 |         if len(similar) > 1:
344 |             raise tokens.error('%s is specified ambiguously %d times' %
345 |                                (short, len(similar)))
346 |         elif len(similar) < 1:
347 |             o = Option(short, None, 0)
348 |             options.append(o)
349 |             if tokens.error is DocoptExit:
350 |                 o = Option(short, None, 0, True)
351 |         else:  # why copying is necessary here?
352 |             o = Option(short, similar[0].long,
353 |                        similar[0].argcount, similar[0].value)
354 |             value = None
355 |             if o.argcount != 0:
356 |                 if left == '':
357 |                     if tokens.current() in [None, '--']:
358 |                         raise tokens.error('%s requires argument' % short)
359 |                     value = tokens.move()
360 |                 else:
361 |                     value = left
362 |                     left = ''
363 |             if tokens.error is DocoptExit:
364 |                 o.value = value if value is not None else True
365 |         parsed.append(o)
366 |     return parsed
367 | 
368 | 
369 | def parse_pattern(source, options):
370 |     tokens = Tokens.from_pattern(source)
371 |     result = parse_expr(tokens, options)
372 |     if tokens.current() is not None:
373 |         raise tokens.error('unexpected ending: %r' % ' '.join(tokens))
374 |     return Required(*result)
375 | 
376 | 
377 | def parse_expr(tokens, options):
378 |     """expr ::= seq ( '|' seq )* ;"""
379 |     seq = parse_seq(tokens, options)
380 |     if tokens.current() != '|':
381 |         return seq
382 |     result = [Required(*seq)] if len(seq) > 1 else seq
383 |     while tokens.current() == '|':
384 |         tokens.move()
385 |         seq = parse_seq(tokens, options)
386 |         result += [Required(*seq)] if len(seq) > 1 else seq
387 |     return [Either(*result)] if len(result) > 1 else result
388 | 
389 | 
390 | def parse_seq(tokens, options):
391 |     """seq ::= ( atom [ '...' ] )* ;"""
392 |     result = []
393 |     while tokens.current() not in [None, ']', ')', '|']:
394 |         atom = parse_atom(tokens, options)
395 |         if tokens.current() == '...':
396 |             atom = [OneOrMore(*atom)]
397 |             tokens.move()
398 |         result += atom
399 |     return result
400 | 
401 | 
402 | def parse_atom(tokens, options):
403 |     """atom ::= '(' expr ')' | '[' expr ']' | 'options'
404 |              | long | shorts | argument | command ;
405 |     """
406 |     token = tokens.current()
407 |     result = []
408 |     if token in '([':
409 |         tokens.move()
410 |         matching, pattern = {'(': [')', Required], '[': [']', Optional]}[token]
411 |         result = pattern(*parse_expr(tokens, options))
412 |         if tokens.move() != matching:
413 |             raise tokens.error("unmatched '%s'" % token)
414 |         return [result]
415 |     elif token == 'options':
416 |         tokens.move()
417 |         return [OptionsShortcut()]
418 |     elif token.startswith('--') and token != '--':
419 |         return parse_long(tokens, options)
420 |     elif token.startswith('-') and token not in ('-', '--'):
421 |         return parse_shorts(tokens, options)
422 |     elif token.startswith('<') and token.endswith('>') or token.isupper():
423 |         return [Argument(tokens.move())]
424 |     else:
425 |         return [Command(tokens.move())]
426 | 
427 | 
428 | def parse_argv(tokens, options, options_first=False):
429 |     """Parse command-line argument vector.
430 | 
431 |     If options_first:
432 |         argv ::= [ long | shorts ]* [ argument ]* [ '--' [ argument ]* ] ;
433 |     else:
434 |         argv ::= [ long | shorts | argument ]* [ '--' [ argument ]* ] ;
435 | 
436 |     """
437 |     parsed = []
438 |     while tokens.current() is not None:
439 |         if tokens.current() == '--':
440 |             return parsed + [Argument(None, v) for v in tokens]
441 |         elif tokens.current().startswith('--'):
442 |             parsed += parse_long(tokens, options)
443 |         elif tokens.current().startswith('-') and tokens.current() != '-':
444 |             parsed += parse_shorts(tokens, options)
445 |         elif options_first:
446 |             return parsed + [Argument(None, v) for v in tokens]
447 |         else:
448 |             parsed.append(Argument(None, tokens.move()))
449 |     return parsed
450 | 
451 | 
452 | def parse_defaults(doc):
453 |     defaults = []
454 |     for s in parse_section('options:', doc):
455 |         # FIXME corner case "bla: options: --foo"
456 |         _, _, s = s.partition(':')  # get rid of "options:"
457 |         split = re.split('\n[ \t]*(-\S+?)', '\n' + s)[1:]
458 |         split = [s1 + s2 for s1, s2 in zip(split[::2], split[1::2])]
459 |         options = [Option.parse(s) for s in split if s.startswith('-')]
460 |         defaults += options
461 |     return defaults
462 | 
463 | 
464 | def parse_section(name, source):
465 |     pattern = re.compile('^([^\n]*' + name + '[^\n]*\n?(?:[ \t].*?(?:\n|$))*)',
466 |                          re.IGNORECASE | re.MULTILINE)
467 |     return [s.strip() for s in pattern.findall(source)]
468 | 
469 | 
470 | def formal_usage(section):
471 |     _, _, section = section.partition(':')  # drop "usage:"
472 |     pu = section.split()
473 |     return '( ' + ' '.join(') | (' if s == pu[0] else s for s in pu[1:]) + ' )'
474 | 
475 | 
476 | def extras(help, version, options, doc):
477 |     if help and any((o.name in ('-h', '--help')) and o.value for o in options):
478 |         print(doc.strip("\n"))
479 |         sys.exit()
480 |     if version and any(o.name == '--version' and o.value for o in options):
481 |         print(version)
482 |         sys.exit()
483 | 
484 | 
485 | class Dict(dict):
486 |     def __repr__(self):
487 |         return '{%s}' % ',\n '.join('%r: %r' % i for i in sorted(self.items()))
488 | 
489 | 
490 | def docopt(doc, argv=None, help=True, version=None, options_first=False):
491 |     """Parse `argv` based on command-line interface described in `doc`.
492 | 
493 |     `docopt` creates your command-line interface based on its
494 |     description that you pass as `doc`. Such description can contain
495 |     --options, <positional-argument>, commands, which could be
496 |     [optional], (required), (mutually | exclusive) or repeated...
497 | 
498 |     Parameters
499 |     ----------
500 |     doc : str
501 |         Description of your command-line interface.
502 |     argv : list of str, optional
503 |         Argument vector to be parsed. sys.argv[1:] is used if not
504 |         provided.
505 |     help : bool (default: True)
506 |         Set to False to disable automatic help on -h or --help
507 |         options.
508 |     version : any object
509 |         If passed, the object will be printed if --version is in
510 |         `argv`.
511 |     options_first : bool (default: False)
512 |         Set to True to require options precede positional arguments,
513 |         i.e. to forbid options and positional arguments intermix.
514 | 
515 |     Returns
516 |     -------
517 |     args : dict
518 |         A dictionary, where keys are names of command-line elements
519 |         such as e.g. "--verbose" and "<path>", and values are the
520 |         parsed values of those elements.
521 | 
522 |     Example
523 |     -------
524 |     >>> from docopt import docopt
525 |     >>> doc = '''
526 |     ... Usage:
527 |     ...     my_program tcp <host> <port> [--timeout=<seconds>]
528 |     ...     my_program serial <port> [--baud=<n>] [--timeout=<seconds>]
529 |     ...     my_program (-h | --help | --version)
530 |     ...
531 |     ... Options:
532 |     ...     -h, --help  Show this screen and exit.
533 |     ...     --baud=<n>  Baudrate [default: 9600]
534 |     ... '''
535 |     >>> argv = ['tcp', '127.0.0.1', '80', '--timeout', '30']
536 |     >>> docopt(doc, argv)
537 |     {'--baud': '9600',
538 |      '--help': False,
539 |      '--timeout': '30',
540 |      '--version': False,
541 |      '<host>': '127.0.0.1',
542 |      '<port>': '80',
543 |      'serial': False,
544 |      'tcp': True}
545 | 
546 |     See also
547 |     --------
548 |     * For video introduction see http://docopt.org
549 |     * Full documentation is available in README.rst as well as online
550 |       at https://github.com/docopt/docopt#readme
551 | 
552 |     """
553 |     argv = sys.argv[1:] if argv is None else argv
554 | 
555 |     usage_sections = parse_section('usage:', doc)
556 |     if len(usage_sections) == 0:
557 |         raise DocoptLanguageError('"usage:" (case-insensitive) not found.')
558 |     if len(usage_sections) > 1:
559 |         raise DocoptLanguageError('More than one "usage:" (case-insensitive).')
560 |     DocoptExit.usage = usage_sections[0]
561 | 
562 |     options = parse_defaults(doc)
563 |     pattern = parse_pattern(formal_usage(DocoptExit.usage), options)
564 |     # [default] syntax for argument is disabled
565 |     #for a in pattern.flat(Argument):
566 |     #    same_name = [d for d in arguments if d.name == a.name]
567 |     #    if same_name:
568 |     #        a.value = same_name[0].value
569 |     argv = parse_argv(Tokens(argv), list(options), options_first)
570 |     pattern_options = set(pattern.flat(Option))
571 |     for options_shortcut in pattern.flat(OptionsShortcut):
572 |         doc_options = parse_defaults(doc)
573 |         options_shortcut.children = list(set(doc_options) - pattern_options)
574 |         #if any_options:
575 |         #    options_shortcut.children += [Option(o.short, o.long, o.argcount)
576 |         #                    for o in argv if type(o) is Option]
577 |     extras(help, version, argv, doc)
578 |     matched, left, collected = pattern.fix().match(argv)
579 |     if matched and left == []:  # better error message if left?
580 |         return Dict((a.name, a.value) for a in (pattern.flat() + collected))
581 |     raise DocoptExit()
582 | 


--------------------------------------------------------------------------------