├── .gitignore ├── README.md ├── interface.py └── lyrics.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AZlyrics 2 | A simple CLI tool for searching lyrics from http://azlyrics.com/ 3 | 4 | ## Dependencies 5 | It requires `python3` and `beautifulsoup`. 6 | To install beautifulsoup, you need `pip3` for python3. 7 | 8 | ```bash 9 | pip3 install beautifulsoup4. 10 | ``` 11 | 12 | 13 | ## Usage 14 | You can directly run the script **lyrics.py** with the queries supplied from the command line. 15 | 16 | Example: 17 | 18 | ```bash 19 | python3 lyrcs.py american idiot green day 20 | ``` 21 | -------------------------------------------------------------------------------- /interface.py: -------------------------------------------------------------------------------- 1 | from tkinter import * 2 | from tkinter.messagebox import showinfo 3 | import re 4 | from bs4 import BeautifulSoup 5 | import urllib.request, urllib.error, urllib.parse 6 | 7 | url = "http://search.azlyrics.com/search.php" 8 | song_and_name = [] 9 | 10 | def urlencode(text): 11 | """ 12 | Url encode the text 13 | """ 14 | q = {} 15 | encoded = "" 16 | if(text): 17 | q['q'] = text 18 | encoded = urllib.parse.urlencode(q) 19 | encoded = encoded[2::] 20 | return encoded 21 | 22 | def search2(query): 23 | """ 24 | Search the possible songs for this query. 25 | Returns the list of url for the song. 26 | """ 27 | 28 | query = urlencode(query.lower()) 29 | url_query = "?q={}".format(query) 30 | url_search = url + url_query 31 | response = urllib.request.urlopen(url_search) 32 | extractor = BeautifulSoup(response.read(), "html.parser") 33 | 34 | anchors = [] 35 | links = [] 36 | global song_and_name 37 | song_and_name = [] 38 | 39 | panels = extractor.find_all('div', {'class' : 'panel'}) 40 | 41 | to_extract = "" 42 | for panel in panels: 43 | if re.search("song results", panel.text, re.IGNORECASE): 44 | to_extract = panel 45 | break 46 | 47 | if not to_extract: 48 | links = [] 49 | song_and_name = [] 50 | 51 | else: 52 | table = to_extract.find_all("table", {'class' : 'table'})[0] 53 | rows = table.find_all('tr') 54 | for row in rows: 55 | pair = [] 56 | for elem in row.find('td'): 57 | if "" in str(elem): 58 | pair.append(elem.getText()) 59 | if "" in str(elem): 60 | pair.insert(3,elem.getText()) 61 | if "href" in str(elem): 62 | pair.insert(0,elem.get('href')) 63 | song_and_name.append(pair) 64 | 65 | song_and_name = [ anchor for anchor in song_and_name if not url_query in anchor[0] ] 66 | 67 | def lyrics_full(url): 68 | response = urllib.request.urlopen(url) 69 | read_lyrics = response.read() 70 | soup = BeautifulSoup(read_lyrics, "html.parser") 71 | lyrics = soup.find_all("div", attrs={"class": None, "id": None}) 72 | lyrics = [x.getText() for x in lyrics][0] 73 | return lyrics 74 | 75 | def get_lyrics(index): 76 | url = song_and_name[index][0] 77 | return [song_and_name[index][1] + " - " + song_and_name[index][2], lyrics_full(url)] 78 | 79 | class Application(Frame): 80 | """ 81 | Interface for the user 82 | """ 83 | 84 | def __init__(self, master): 85 | """ 86 | Initializes the Window 87 | """ 88 | 89 | self.master = master 90 | self.frame = Frame(self.master) 91 | self.frame.winfo_toplevel().title("Lyric finder") 92 | self.label_info = Label(self.frame, text="Type the keyword") 93 | self.label_info.grid(row=0,column=0,padx=10,pady=10) 94 | self.entry_text = Entry(self.frame,width=30) 95 | self.entry_text.grid(row=0,column=1,padx=10,pady=10) 96 | self.button_bonus = Button(self.frame, width=20, text="Get songs", 97 | command=lambda: [f for f in [search2(self.entry_text.get()), self.update_ListBox()]]) 98 | self.button_bonus.grid(row=0,column=2,padx=10,pady=10) 99 | 100 | self.selectsong = Listbox(self.frame,selectmode="simple") 101 | self.selectsong.config(width=50) 102 | self.selectsong.grid(row=1,column=0,padx=10,pady=10,rowspan=2,columnspan=2) 103 | self.lyrics_label = Label(self.frame, text="") 104 | self.lyrics_label.config(width=50,wraplength=250) 105 | self.lyrics_label.grid(row=1,column=2,padx=10,pady=10,rowspan=2,columnspan=2) 106 | 107 | self.button_showinfo = Button(self.frame, width=20, text="Show Song", 108 | command=self.new_window, state="disabled") 109 | self.button_showinfo.grid(row=0,column=3,padx=10,pady=10) 110 | 111 | self.update_lyricsPreview() 112 | self.frame.pack() 113 | 114 | def update_ListBox(self): 115 | self.selectsong.delete(0,END) 116 | for x in reversed(song_and_name): 117 | self.selectsong.insert(0,x[1] + "-" + x[2]) 118 | 119 | def update_lyricsPreview(self): 120 | if self.selectsong.curselection(): 121 | self.button_showinfo.config(state="normal") 122 | self.lyrics_label.configure(text=song_and_name[self.selectsong.curselection()[0]][3]) 123 | else: 124 | self.button_showinfo.config(state="disabled") 125 | self.lyrics_label.configure(text="") 126 | self.frame.after(1000, self.update_lyricsPreview) 127 | 128 | def new_window(self): 129 | song, lyrics = get_lyrics(self.selectsong.curselection()[0]) 130 | self.newWindow = Toplevel(self.master) 131 | self.app = LyricWindow(self.newWindow, song, lyrics) 132 | 133 | class LyricWindow: 134 | """ 135 | Window where lyrics will be displayed 136 | """ 137 | 138 | def __init__(self, master, song, lyrics): 139 | """ 140 | Initializes new Window 141 | """ 142 | 143 | self.master = master 144 | self.frame = Frame(self.master) 145 | self.frame.winfo_toplevel().title(song) 146 | self.canvas = Canvas(self.frame) 147 | self.scrollbary = Scrollbar(self.frame, command=self.canvas.yview,orient="vertical") 148 | self.scrollbary.pack(side=RIGHT, fill=Y) 149 | self.lyrics_output = Label(self.canvas, text = lyrics) 150 | self.lyrics_output.pack(padx=10,pady=10) 151 | self.lyrics_output.config(wraplength=0) 152 | 153 | self.canvas.create_window((0,0), window=self.lyrics_output, anchor='nw') 154 | self.canvas.configure(scrollregion=self.canvas.bbox('all'),yscrollcommand=self.scrollbary.set) 155 | self.canvas.pack(fill='both', expand=True, side=LEFT) 156 | 157 | self.frame.pack() 158 | 159 | def close_windows(self): 160 | """ 161 | Destroys current window 162 | """ 163 | self.master.destroy() 164 | 165 | def main(): 166 | root = Tk() 167 | 168 | app = Application(root) 169 | 170 | root.mainloop() 171 | 172 | if __name__ == "__main__": 173 | main() 174 | -------------------------------------------------------------------------------- /lyrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import re 4 | from bs4 import BeautifulSoup 5 | import urllib.request, urllib.error, urllib.parse 6 | import sys 7 | 8 | class ManualError(Exception): 9 | def __init__(self, args): 10 | self.args = args 11 | def display(self): 12 | print(' '.join(self.args)) 13 | 14 | class LyricFinder: 15 | 16 | def search(self, query): 17 | return self._search_lyricswikia(query) 18 | # return self._search_azlyrics(query) 19 | 20 | def _search_lyricswikia(self, query): 21 | print("Searching lyrics.wikia.com") 22 | query = remove_multiple_spaces(query).lower() 23 | tokens1 = query.split() 24 | query = urlencode(query.lower()) 25 | url = "http://lyrics.wikia.com/wiki/Special:Search?query={}".format(query) 26 | response = urllib.request.urlopen(url) 27 | extractor = BeautifulSoup(response.read(), "html.parser") 28 | divs = extractor.find_all("li", {'class' : 'result'}) 29 | matches = [] 30 | for div in divs: 31 | anchor = div.findAll('a')[0] 32 | title = anchor.text 33 | title = remove_multiple_spaces(remove_punct(title)).lower() 34 | tokens2 = title.split() 35 | link = anchor.attrs['href'] 36 | dist = compute_jaccard(tokens1, tokens2) 37 | matches.append((title, link, dist)) 38 | matches = sorted(matches, key = lambda x : x[2], reverse=True) 39 | if not matches: 40 | return "" 41 | 42 | url_full = matches[0][1] 43 | response = urllib.request.urlopen(url_full) 44 | extractor = BeautifulSoup(response.read(), "html.parser") 45 | div = extractor.find('div', {'class' : 'lyricbox'}) 46 | return "" if not div else div.get_text('\n').strip() 47 | 48 | def _search_azlyrics(self, query): 49 | print("Searching azlyrics.com") 50 | links = self._get_links_azlyrics(query) 51 | print(links) 52 | if links: 53 | return self._get_links_azlyrics(links[0]) 54 | else: 55 | return '' 56 | 57 | def _get_links_azlyrics(self, query): 58 | """ 59 | Search the possible songs for this query. 60 | Returns the list of url for the song. 61 | """ 62 | 63 | # first encode 64 | url = "http://search.azlyrics.com/search.php" 65 | query = urlencode(query.lower()) 66 | url_query = "?q={}".format(query) 67 | url_search = url + url_query 68 | response = urllib.request.urlopen(url_search) 69 | extractor = BeautifulSoup(response.read(), "html.parser") 70 | 71 | anchors = [] 72 | links = [] 73 | 74 | # extract all the panels -> album, song, artist 75 | # since the search can give 3 type of div (panel) 76 | panels = extractor.find_all('div', {'class' : 'panel'}) 77 | 78 | # now find the panel containing list of all the songs 79 | to_extract = "" 80 | for panel in panels: 81 | if re.search("song results", panel.text, re.IGNORECASE): 82 | to_extract = panel 83 | break 84 | 85 | # if nothing found 86 | if not to_extract: 87 | links = [] 88 | else: 89 | table = to_extract.find_all("table", {'class' : 'table'})[0] 90 | rows = table.find_all('tr') 91 | anchors = [ row.find('td').find('a').get('href') for row in rows ] 92 | 93 | # discard if the link/anchor is just a pagination link 94 | links = [ anchor for anchor in anchors if not url_query in anchor ] 95 | return links 96 | 97 | def _get_from_url_azlyric(self, url): 98 | response = urllib.request.urlopen(url) 99 | read_lyrics = response.read() 100 | soup = BeautifulSoup(read_lyrics, "html.parser") 101 | lyrics = soup.find_all("div", attrs={"class": None, "id": None}) 102 | lyrics = [x.getText() for x in lyrics][0] 103 | return lyrics 104 | 105 | 106 | 107 | def urlencode(text): 108 | """ 109 | Url encode the text 110 | """ 111 | q = {} 112 | encoded = "" 113 | if(text): 114 | q['q'] = text 115 | encoded = urllib.parse.urlencode(q) 116 | encoded = encoded[2::] 117 | return encoded 118 | 119 | def compute_jaccard(tokens1, tokens2): 120 | union = set(tokens1).union(tokens2) 121 | # input(union) 122 | intersect = set(tokens1).intersection(tokens2) 123 | # input(intersect) 124 | return len(intersect)/len(union) 125 | 126 | def remove_multiple_spaces(string): 127 | return re.sub(r'\s+', ' ', string) 128 | 129 | def remove_punct(string): 130 | string = re.sub(r"[']+", '', string) 131 | return re.sub(r"[-:_!,/.()#?]+", ' ', string) 132 | 133 | 134 | def main(): 135 | args = sys.argv 136 | # url = "http://search.azlyrics.com/search.php" 137 | query = "" 138 | lyric = '' 139 | lfinder = LyricFinder() 140 | if(len(args) > 1): 141 | query = ' '.join(args[1::]) 142 | print("Searching...\nHave patience and be an awesome potato...") 143 | lyric = lfinder.search(query) 144 | if lyric: 145 | print(lyric) 146 | else: 147 | print("No songs found... -_-") 148 | 149 | if __name__ == "__main__": 150 | main() 151 | 152 | --------------------------------------------------------------------------------