├── .gitignore ├── README.md ├── requirements.txt ├── run.py ├── server ├── __init__.py ├── parser.py ├── tab.py ├── tab_parser.py └── views.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Virtual Environment 7 | venv/ 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ultimate-API 2 | 3 | :guitar: *An API for ultimate-guitar.com* 4 | 5 | ![Python-Version](https://img.shields.io/badge/Python-3.6.1-blue.svg) 6 | 7 | ## Setup 8 | 1. Install python3 from https://www.python.org/downloads/ 9 | 10 | 1. Create a virtual environment of python3: 11 | 12 | ```Python 13 | # Install virtualenv: 14 | # pip install virtualenv 15 | virtualenv -p /usr/local/bin/python3 venv 16 | source venv/bin/activate 17 | ``` 18 | 19 | 1. Install dependancies: 20 | 21 | ```Python 22 | pip install -r requirements.txt 23 | ``` 24 | 25 | 1. Usage: 26 | 27 | ```Python 28 | export FLASK_DEBUG=1 // Export for debug 29 | python run.py 30 | ``` 31 | 32 | ## Endpoints 33 | 34 | | Method | Endpoint | Parameters | Result | 35 | | ------ | -------- | ---------- | ------ | 36 | | `GET` | `/tab` | `url`: A full (including protocol) url for an ultimate-guitar.com tab. | JSON response containing tab info as well as each tab line 37 | 38 | ## Running Tests 39 | To run the full test suite execute the following from the top level directory. 40 | ```Python 41 | python test.py 42 | ``` 43 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.6.0 2 | bs4==0.0.1 3 | certifi==2017.7.27.1 4 | chardet==3.0.4 5 | click==6.7 6 | Flask==0.12.2 7 | idna==2.5 8 | itsdangerous==0.24 9 | Jinja2==2.9.6 10 | MarkupSafe==1.0 11 | requests==2.18.3 12 | urllib3==1.22 13 | Werkzeug==0.12.2 14 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | from server import app 2 | 3 | if __name__ == '__main__': 4 | app.run() 5 | -------------------------------------------------------------------------------- /server/__init__.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | app = Flask(__name__) 3 | 4 | # Import views 5 | import server.views 6 | -------------------------------------------------------------------------------- /server/parser.py: -------------------------------------------------------------------------------- 1 | import json 2 | from bs4 import BeautifulSoup 3 | from .tab import UltimateTab, UltimateTabInfo 4 | import re 5 | 6 | def _tab_info_from_soup(soup: BeautifulSoup) -> UltimateTabInfo: 7 | ''' 8 | Returns a populated UltimateTabInfo object based on the provided soup. 9 | Parses based on UG site construction as of 9/3/17. 10 | 11 | Parameters: 12 | - soup: A BeautifulSoup for a Ultimate Guitar tab's html (or html body) 13 | ''' 14 | # Get song title and artist 15 | try: 16 | song_title = soup.find(attrs={'itemprop': 'name'}).text 17 | song_title = re.compile(re.escape('chords'), re.IGNORECASE).sub(r'', song_title).strip() # Remove the word 'chords' 18 | except: 19 | song_title = "UNKNOWN" 20 | 21 | try: 22 | artist_name = soup.find(attrs={'class': 't_autor'}).text.replace('\n', '') 23 | artist_name = re.compile(re.escape('by'), re.IGNORECASE).sub(r'', artist_name).strip()# Remove the word 'by' 24 | except: 25 | artist_name = "UNKNOWN" 26 | 27 | # Get info - author, capo, tuning, etc. 28 | author = "UNKNOWN" 29 | difficulty = None 30 | key = None 31 | capo = None 32 | tuning = None 33 | try: 34 | info_header_text = soup.find(attrs={'class': 't_dt'}).text.replace('\n', '') 35 | info_headers = [x.lower() for x in info_header_text.split(' ') if x] # Split string and make lowercase 36 | info_header_values = soup.findAll(attrs={'class': 't_dtde'}) 37 | 38 | for index, header in enumerate(info_headers): 39 | try: 40 | if header == 'author': 41 | author = info_header_values[index].a.text 42 | elif header == 'difficulty': 43 | difficulty = info_header_values[index].text.strip() 44 | elif header == 'key': 45 | key = info_header_values[index].text.strip() 46 | elif header == 'capo': 47 | capo = info_header_values[index].text.strip() 48 | elif header == 'tuning': 49 | tuning = info_header_values[index].text.strip() 50 | except: 51 | continue 52 | except: 53 | pass 54 | 55 | tab_info = UltimateTabInfo(song_title, artist_name, author, difficulty, key, capo, tuning) 56 | return tab_info 57 | 58 | 59 | def html_tab_to_json_dict(html_body: str, pre_class_tags: [str]) -> json: 60 | ''' 61 | Returns a json form of a 'pre' tag in an untimate guitar html tabs body. 62 | 63 | Parameters: 64 | - html_body: The full html body of an ultimate guitar tab site 65 | - pre_class_tags: An array of strings for the class names of a 'pre' tag where the chords are located to parse 66 | ''' 67 | soup = BeautifulSoup(html_body, "html.parser") 68 | 69 | # Get UltimateTabInfo object from soup html for artist, title, etc. 70 | tab_info = _tab_info_from_soup(soup) 71 | 72 | # Get tab's content from html (lyrics + chords) 73 | tabs_html_content = soup.find('pre', attrs={'class': pre_class_tags}) 74 | 75 | # Strip `pre` tag and convert to string to parse 76 | formatted_tab_string = ''.join(map(str, tabs_html_content.contents)) 77 | 78 | # Parse each line of the string into json 79 | tab = UltimateTab() 80 | for tab_line in formatted_tab_string.split('\n'): 81 | re_span_tag = re.compile(r']*>|<\/span[^>]*>') 82 | 83 | if not tab_line: # Line is blank 84 | tab.append_blank_line() 85 | elif re_span_tag.search(tab_line): # Line contains chords 86 | sanitized_tab_line = re_span_tag.sub(r' ', tab_line) 87 | tab.append_chord_line(sanitized_tab_line) 88 | else: # Line contains lyrics/string 89 | #tab_line = tab_line.encode('ascii', 'replace') # Encode as ascii 90 | tab.append_lyric_line(tab_line) 91 | 92 | # Construct full json object 93 | json = { 94 | 'title': tab_info.title, 95 | 'artist_name': tab_info.artist, 96 | 'author': tab_info.author 97 | } 98 | 99 | # add tab info if it exists 100 | if tab_info.difficulty is not None: 101 | json['difficulty'] = tab_info.difficulty 102 | if tab_info.key is not None: 103 | json['key'] = tab_info.key 104 | if tab_info.capo is not None: 105 | json['capo'] = tab_info.capo 106 | if tab_info.tuning is not None: 107 | json['tuning'] = tab_info.tuning 108 | 109 | json['lines'] = tab.as_json_dictionary()['lines'] 110 | 111 | # Return constructed json under a single tag 112 | return {'tab': json} 113 | -------------------------------------------------------------------------------- /server/tab.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | # tab { 4 | # title: "tab name", 5 | # artist_name: "", 6 | # author: "", 7 | # capo: "" (can be null), 8 | # Tuning: "" (can be null), 9 | # 10 | # lines: [ 11 | # { 12 | # type: "chord" (OR "lyrics", "blank"), 13 | # chords: [ 14 | # { 15 | # note: "G", 16 | # pre_spaces: 10 17 | # }, 18 | # { 19 | # note: "Em", 20 | # pre_spaces: 8 21 | # } 22 | # ] 23 | # }, 24 | # { 25 | # type: "lyrics", 26 | # lyrics: "I found a love for me" 27 | # }, 28 | # { 29 | # type: "blank" 30 | # } 31 | # ] 32 | # } 33 | 34 | class UltimateTabInfo(object): 35 | ''' 36 | Represents the info of an ultimate guitar tab. Does not contain any lyrics or chords 37 | ''' 38 | 39 | def __init__(self, title: str, artist: str, author: str, difficulty: str = None, key: str = None, capo: str = None, tuning: str = None): 40 | self.title = title 41 | self.artist = artist 42 | self.author = author 43 | # Optionals: 44 | self.difficulty = difficulty 45 | self.key = key 46 | self.capo = capo 47 | self.tuning = tuning 48 | 49 | 50 | class UltimateTab(object): 51 | ''' 52 | Represents an ultimate guitar tab containing Lyrics and Chords 53 | 54 | A `queue-like` object which will append lines to object 55 | and can be parsed to formatted json. 56 | ''' 57 | 58 | JSON_CONTAINER_NAME = 'lines' 59 | JSON_KEY_CHORD_ARRAY = 'chords' 60 | JSON_KEY_NOTE = 'note' 61 | JSON_KEY_LYRIC = 'lyric' 62 | JSON_KEY_BLANK = 'blank' 63 | JSON_KEY_TYPE = 'type' 64 | JOSN_KEY_LEAD_SPACES = 'pre_spaces' 65 | 66 | def __init__(self): 67 | self.lines = [] 68 | 69 | 70 | def _append_new_line(self, type: str, content_tag: str, content: Any) -> None: 71 | line = {'type': type} 72 | if content_tag is not None: 73 | line[content_tag] = content 74 | 75 | self.lines.append(line) 76 | 77 | 78 | def append_chord_line(self, chords_line: str) -> None: 79 | ''' 80 | Appends a chord line to the tab. 81 | 82 | Parameters: 83 | - chords_line: A single-line string containing leading spaces and guitar chords (i.e. G, Em, etc.) 84 | ''' 85 | chords = [] # Array of dictionary of chords 86 | 87 | leading_spaces = 0 88 | for c in chords_line.split(' '): 89 | if not c: # A space character recognized 90 | leading_spaces += 1 91 | else: 92 | chord = { 93 | self.JSON_KEY_NOTE: c, 94 | self.JOSN_KEY_LEAD_SPACES: leading_spaces 95 | } 96 | chords.append(chord) 97 | leading_spaces = 1 # reset for next chord to read in - resets to 1 to compensate for `split` 98 | 99 | self._append_new_line(self.JSON_KEY_CHORD_ARRAY, self.JSON_KEY_CHORD_ARRAY, chords) 100 | 101 | def append_lyric_line(self, lyric_line: str) -> None: 102 | ''' 103 | Appends a lyric line to the tab. 104 | 105 | Parameters: 106 | - lyric_line: A single-line string containing lyrics (and any leading spaces needed) 107 | ''' 108 | self._append_new_line(self.JSON_KEY_LYRIC, self.JSON_KEY_LYRIC, lyric_line) 109 | 110 | def append_blank_line(self) -> None: 111 | ''' 112 | Appends a blank line to the tab. 113 | ''' 114 | self._append_new_line(self.JSON_KEY_BLANK, None, None) 115 | 116 | def as_json_dictionary(self) -> dict: 117 | ''' 118 | Returns a dictionary representation of the tab object. 119 | Properly formatted for use as a json object. 120 | ''' 121 | return {self.JSON_CONTAINER_NAME: self.lines} 122 | -------------------------------------------------------------------------------- /server/tab_parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import requests 4 | from .parser import html_tab_to_json_dict 5 | 6 | def dict_from_ultimate_tab(url: str) -> json: 7 | ''' 8 | Given a Ultimate Guitar tab url, will return a dictionary representing the 9 | song along with the song info 10 | ''' 11 | html = requests.get(url).content 12 | ug_tags = ['js-tab-content', 'js-copy-content'] # tags the tabs are contained in 13 | tab_dict = html_tab_to_json_dict(html, ug_tags) 14 | return tab_dict 15 | 16 | 17 | def json_from_ultimate_tab(url: str) -> json: 18 | ''' 19 | Given a Ultimate Guitar tab url, will return a json object representing the 20 | song along with the song info 21 | ''' 22 | tab_dict = dict_from_ultimate_tab(url) 23 | data = json.dumps(tab_dict, ensure_ascii=False) 24 | return data 25 | 26 | 27 | if __name__ == '__main__': 28 | try: 29 | url = sys.argv[1] 30 | except: 31 | print('INCORRECT USAGE\n') 32 | print(' Usage:') 33 | print(' python %s {url}' % sys.argv[0]) 34 | sys.exit() 35 | 36 | json_data = json_from_ultimate_tab(url) 37 | 38 | pretty_format_json = json.dumps(json.loads(json_data), indent=4, sort_keys=True) 39 | print(pretty_format_json) 40 | -------------------------------------------------------------------------------- /server/views.py: -------------------------------------------------------------------------------- 1 | from server import app 2 | from flask import request, jsonify 3 | from urllib.parse import urlparse 4 | from .tab_parser import dict_from_ultimate_tab 5 | 6 | 7 | SUPPORTED_UG_URI = 'tabs.ultimate-guitar.com' 8 | 9 | @app.route('/') 10 | def index(): 11 | return 'hi' 12 | 13 | @app.route('/tab') 14 | def tab(): 15 | try: 16 | ultimate_url = request.args.get('url') 17 | 18 | # Ensure sanitized url 19 | parsed_url = urlparse(ultimate_url) 20 | location = parsed_url.netloc 21 | if location != SUPPORTED_UG_URI: 22 | raise Exception('unsupported url scheme') 23 | except Exception as e: 24 | return jsonify({'error': str(e)}), 500 25 | 26 | tab_dict = dict_from_ultimate_tab(ultimate_url) 27 | return jsonify(tab_dict) 28 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | ## ToDo: 2 | - [ ] Add Unit Tests 3 | - Test for Ed Sheeran - Perfect 4 | - Test for Jason Mraz - I'm Yours (because of the title parsing) 5 | - Test for Passenger - Let Her Go 6 | --------------------------------------------------------------------------------