├── .gitignore
├── README.md
├── interface.py
└── lyrics.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AZlyrics
 2 | A simple CLI tool for searching lyrics from http://azlyrics.com/
 3 | 
 4 | ## Dependencies
 5 | It requires `python3` and `beautifulsoup`.  
 6 | To install beautifulsoup, you need `pip3` for python3.
 7 | 
 8 | ```bash
 9 | pip3 install beautifulsoup4.
10 | ```
11 | 
12 | 
13 | ## Usage
14 | You can directly run the script **lyrics.py** with the queries supplied from the command line.  
15 | 
16 | Example:
17 | 
18 | ```bash
19 | python3 lyrcs.py american idiot green day
20 | ```
21 | 


--------------------------------------------------------------------------------
/interface.py:
--------------------------------------------------------------------------------
  1 | from tkinter import *
  2 | from tkinter.messagebox import showinfo
  3 | import re
  4 | from bs4 import BeautifulSoup
  5 | import urllib.request, urllib.error, urllib.parse
  6 | 
  7 | url = "http://search.azlyrics.com/search.php"
  8 | song_and_name = []
  9 | 
 10 | def urlencode(text):
 11 |     """
 12 |         Url encode the text
 13 |     """
 14 |     q = {}
 15 |     encoded = ""
 16 |     if(text):
 17 |         q['q'] = text
 18 |         encoded = urllib.parse.urlencode(q)
 19 |         encoded = encoded[2::]
 20 |     return encoded
 21 | 
 22 | def search2(query):
 23 |     """
 24 |         Search the possible songs for this query.
 25 |         Returns the list of url for the song.
 26 |     """
 27 | 
 28 |     query = urlencode(query.lower())
 29 |     url_query = "?q={}".format(query)
 30 |     url_search = url + url_query
 31 |     response = urllib.request.urlopen(url_search)
 32 |     extractor = BeautifulSoup(response.read(), "html.parser")
 33 | 
 34 |     anchors = []
 35 |     links = []
 36 |     global song_and_name
 37 |     song_and_name = []
 38 | 
 39 |     panels = extractor.find_all('div', {'class' : 'panel'})
 40 | 
 41 |     to_extract = ""
 42 |     for panel in panels:
 43 |         if re.search("song results", panel.text, re.IGNORECASE):
 44 |             to_extract = panel
 45 |             break
 46 | 
 47 |     if not to_extract:
 48 |         links = []
 49 |         song_and_name = []
 50 | 
 51 |     else:
 52 |         table = to_extract.find_all("table", {'class' : 'table'})[0]
 53 |         rows = table.find_all('tr')
 54 |         for row in rows:
 55 |             pair = []
 56 |             for elem in row.find('td'):
 57 |                 if "<b>" in str(elem):
 58 |                     pair.append(elem.getText())
 59 |                 if "<small>" in str(elem):
 60 |                     pair.insert(3,elem.getText())
 61 |                 if "href" in str(elem):
 62 |                     pair.insert(0,elem.get('href'))
 63 |             song_and_name.append(pair)
 64 |             
 65 |         song_and_name = [ anchor for anchor in song_and_name if not url_query in anchor[0] ]
 66 | 
 67 | def lyrics_full(url):
 68 |     response = urllib.request.urlopen(url)
 69 |     read_lyrics = response.read()
 70 |     soup = BeautifulSoup(read_lyrics, "html.parser")
 71 |     lyrics = soup.find_all("div", attrs={"class": None, "id": None})
 72 |     lyrics = [x.getText() for x in lyrics][0]
 73 |     return lyrics
 74 | 
 75 | def get_lyrics(index):
 76 |     url = song_and_name[index][0]
 77 |     return [song_and_name[index][1] + " - " + song_and_name[index][2], lyrics_full(url)]
 78 | 
 79 | class Application(Frame):
 80 |     """
 81 |         Interface for the user
 82 |     """
 83 | 
 84 |     def __init__(self, master):
 85 |         """
 86 |             Initializes the Window
 87 |         """
 88 | 
 89 |         self.master = master
 90 |         self.frame = Frame(self.master)
 91 |         self.frame.winfo_toplevel().title("Lyric finder")
 92 |         self.label_info = Label(self.frame, text="Type the keyword")
 93 |         self.label_info.grid(row=0,column=0,padx=10,pady=10)
 94 |         self.entry_text = Entry(self.frame,width=30)
 95 |         self.entry_text.grid(row=0,column=1,padx=10,pady=10)
 96 |         self.button_bonus = Button(self.frame, width=20, text="Get songs",
 97 |         command=lambda: [f for f in [search2(self.entry_text.get()), self.update_ListBox()]])
 98 |         self.button_bonus.grid(row=0,column=2,padx=10,pady=10)
 99 | 
100 |         self.selectsong = Listbox(self.frame,selectmode="simple")
101 |         self.selectsong.config(width=50)
102 |         self.selectsong.grid(row=1,column=0,padx=10,pady=10,rowspan=2,columnspan=2)
103 |         self.lyrics_label = Label(self.frame, text="")
104 |         self.lyrics_label.config(width=50,wraplength=250)
105 |         self.lyrics_label.grid(row=1,column=2,padx=10,pady=10,rowspan=2,columnspan=2)
106 | 
107 |         self.button_showinfo = Button(self.frame, width=20, text="Show Song",
108 |         command=self.new_window, state="disabled")
109 |         self.button_showinfo.grid(row=0,column=3,padx=10,pady=10)
110 | 
111 |         self.update_lyricsPreview()
112 |         self.frame.pack()
113 | 
114 |     def update_ListBox(self):
115 |         self.selectsong.delete(0,END)
116 |         for x in reversed(song_and_name):
117 |             self.selectsong.insert(0,x[1] + "-" + x[2])
118 | 
119 |     def update_lyricsPreview(self):
120 |         if self.selectsong.curselection():
121 |             self.button_showinfo.config(state="normal")
122 |             self.lyrics_label.configure(text=song_and_name[self.selectsong.curselection()[0]][3])
123 |         else:
124 |             self.button_showinfo.config(state="disabled")
125 |             self.lyrics_label.configure(text="")
126 |         self.frame.after(1000, self.update_lyricsPreview)
127 | 
128 |     def new_window(self):
129 |         song, lyrics = get_lyrics(self.selectsong.curselection()[0])
130 |         self.newWindow = Toplevel(self.master)
131 |         self.app = LyricWindow(self.newWindow, song, lyrics)
132 | 
133 | class LyricWindow:
134 |     """
135 |         Window where lyrics will be displayed
136 |     """
137 | 
138 |     def __init__(self, master, song, lyrics):
139 |         """
140 |             Initializes new Window
141 |         """
142 | 
143 |         self.master = master
144 |         self.frame = Frame(self.master)
145 |         self.frame.winfo_toplevel().title(song)
146 |         self.canvas = Canvas(self.frame)
147 |         self.scrollbary = Scrollbar(self.frame, command=self.canvas.yview,orient="vertical")
148 |         self.scrollbary.pack(side=RIGHT, fill=Y)
149 |         self.lyrics_output = Label(self.canvas, text = lyrics)
150 |         self.lyrics_output.pack(padx=10,pady=10)
151 |         self.lyrics_output.config(wraplength=0)
152 | 
153 |         self.canvas.create_window((0,0), window=self.lyrics_output, anchor='nw')
154 |         self.canvas.configure(scrollregion=self.canvas.bbox('all'),yscrollcommand=self.scrollbary.set)
155 |         self.canvas.pack(fill='both', expand=True, side=LEFT)
156 | 
157 |         self.frame.pack()
158 | 
159 |     def close_windows(self):
160 |         """
161 |             Destroys current window
162 |         """
163 |         self.master.destroy()
164 | 
165 | def main():
166 |     root = Tk()
167 | 
168 |     app = Application(root)
169 | 
170 |     root.mainloop()
171 | 
172 | if __name__ == "__main__":
173 |     main()
174 | 


--------------------------------------------------------------------------------
/lyrics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import re
  4 | from bs4 import BeautifulSoup
  5 | import urllib.request, urllib.error, urllib.parse
  6 | import sys
  7 | 
  8 | class ManualError(Exception):
  9 |     def __init__(self, args):
 10 |         self.args = args
 11 |     def display(self):
 12 |         print(' '.join(self.args))
 13 | 
 14 | class LyricFinder:
 15 | 
 16 |     def search(self, query):
 17 |         return self._search_lyricswikia(query)
 18 |         # return self._search_azlyrics(query)
 19 | 
 20 |     def _search_lyricswikia(self, query):
 21 |         print("Searching lyrics.wikia.com")
 22 |         query = remove_multiple_spaces(query).lower()
 23 |         tokens1 = query.split()
 24 |         query = urlencode(query.lower())
 25 |         url = "http://lyrics.wikia.com/wiki/Special:Search?query={}".format(query)
 26 |         response = urllib.request.urlopen(url)
 27 |         extractor = BeautifulSoup(response.read(), "html.parser")
 28 |         divs = extractor.find_all("li", {'class' : 'result'})
 29 |         matches = []
 30 |         for div in divs:
 31 |             anchor = div.findAll('a')[0]
 32 |             title = anchor.text
 33 |             title = remove_multiple_spaces(remove_punct(title)).lower()
 34 |             tokens2 = title.split()
 35 |             link = anchor.attrs['href']
 36 |             dist = compute_jaccard(tokens1, tokens2)
 37 |             matches.append((title, link, dist))
 38 |         matches = sorted(matches, key = lambda x : x[2], reverse=True)
 39 |         if not matches:
 40 |             return ""
 41 | 
 42 |         url_full = matches[0][1]
 43 |         response = urllib.request.urlopen(url_full)
 44 |         extractor = BeautifulSoup(response.read(), "html.parser")
 45 |         div = extractor.find('div', {'class' : 'lyricbox'})
 46 |         return "" if not div else div.get_text('\n').strip()
 47 | 
 48 |     def _search_azlyrics(self, query):
 49 |         print("Searching azlyrics.com")
 50 |         links = self._get_links_azlyrics(query)
 51 |         print(links)
 52 |         if links:
 53 |             return self._get_links_azlyrics(links[0])
 54 |         else:
 55 |             return ''
 56 | 
 57 |     def _get_links_azlyrics(self, query):
 58 |         """
 59 |             Search the possible songs for this query.
 60 |             Returns the list of url for the song.
 61 |         """
 62 | 
 63 |         # first encode
 64 |         url = "http://search.azlyrics.com/search.php"
 65 |         query = urlencode(query.lower())
 66 |         url_query = "?q={}".format(query)
 67 |         url_search = url + url_query
 68 |         response = urllib.request.urlopen(url_search)
 69 |         extractor = BeautifulSoup(response.read(), "html.parser")
 70 | 
 71 |         anchors = []
 72 |         links = []
 73 | 
 74 |         # extract all the panels -> album, song, artist
 75 |         # since the search can give 3 type of div (panel)
 76 |         panels = extractor.find_all('div', {'class' : 'panel'})
 77 | 
 78 |         # now find the panel containing list of all the songs
 79 |         to_extract = ""
 80 |         for panel in panels:
 81 |             if re.search("song results", panel.text, re.IGNORECASE):
 82 |                 to_extract = panel
 83 |                 break
 84 | 
 85 |         # if nothing found
 86 |         if not to_extract:
 87 |             links = []
 88 |         else:
 89 |             table = to_extract.find_all("table", {'class' : 'table'})[0]
 90 |             rows = table.find_all('tr')
 91 |             anchors = [ row.find('td').find('a').get('href')  for row in rows ]
 92 | 
 93 |             # discard if the link/anchor is just a pagination link
 94 |             links = [ anchor for anchor in anchors if not url_query in anchor ]
 95 |         return links
 96 | 
 97 |     def _get_from_url_azlyric(self, url):
 98 |         response = urllib.request.urlopen(url)
 99 |         read_lyrics = response.read()
100 |         soup = BeautifulSoup(read_lyrics, "html.parser")
101 |         lyrics = soup.find_all("div", attrs={"class": None, "id": None})
102 |         lyrics = [x.getText() for x in lyrics][0]
103 |         return lyrics
104 | 
105 | 
106 | 
107 | def urlencode(text):
108 |     """
109 |         Url encode the text
110 |     """
111 |     q = {}
112 |     encoded = ""
113 |     if(text):
114 |         q['q'] = text
115 |         encoded = urllib.parse.urlencode(q)
116 |         encoded = encoded[2::]
117 |     return encoded
118 | 
119 | def compute_jaccard(tokens1, tokens2):
120 |     union = set(tokens1).union(tokens2)
121 |     # input(union)
122 |     intersect = set(tokens1).intersection(tokens2)
123 |     # input(intersect)
124 |     return len(intersect)/len(union)
125 | 
126 | def remove_multiple_spaces(string):
127 |     return re.sub(r'\s+', ' ', string)
128 | 
129 | def remove_punct(string):
130 |     string = re.sub(r"[']+", '', string)
131 |     return re.sub(r"[-:_!,/.()#?]+", ' ', string)
132 | 
133 | 
134 | def main():
135 |     args =  sys.argv
136 |     # url = "http://search.azlyrics.com/search.php"
137 |     query = ""
138 |     lyric = ''
139 |     lfinder = LyricFinder()
140 |     if(len(args) > 1):
141 |         query = ' '.join(args[1::])
142 |         print("Searching...\nHave patience and be an awesome potato...")
143 |         lyric = lfinder.search(query)
144 |     if lyric:
145 |         print(lyric)
146 |     else:
147 |         print("No songs found... -_-")
148 | 
149 | if __name__ == "__main__":
150 |     main()
151 | 
152 | 


--------------------------------------------------------------------------------