├── doug-mckenzie-jazz ├── codes │ ├── log.txt │ ├── DMjazzCrawler.py │ └── DMjazzCrawler.ipynb └── archive │ └── A Sleepin' Bee.mid ├── vgmdb ├── utils │ ├── log.txt │ ├── VGMCrawler.py │ └── VGMCrawler.ipynb └── archive │ └── 3do │ └── 3do │ └── 37245_Super-Street-Fighter-2--Guile.mid ├── theorytab ├── utils │ ├── requirements.txt │ ├── youtube_crawler.py │ ├── theorytab_crawler.py │ └── theorytab_crawler.ipynb ├── archive │ ├── archive_artist.json │ └── a │ │ └── aage-aleksandersen │ │ └── fire-pils-og-en-pizza │ │ ├── video.mp4 │ │ ├── song_info.json │ │ ├── video_info.json │ │ ├── verse.xml │ │ ├── intro.xml │ │ └── chorus.xml └── README.md ├── docs ├── hey_jude_chorus.PNG └── 5-track_pianoroll.PNG ├── piano-e-competition ├── utils │ ├── log.txt │ └── EPcompCrawler.py └── archive │ └── 2004 │ ├── midi │ └── ADIG01.mid │ └── zip │ └── Adigezalzade_M_ESEQ.zip ├── 5-track-pianoroll ├── sample │ ├── 51901.png │ └── 51901_test_round.mid ├── compile.py ├── readme.md └── parser.py ├── hymnal ├── archive │ └── children │ │ └── 1 │ │ ├── all.mid │ │ ├── audio.mp3 │ │ ├── ls_text.pdf │ │ ├── melody.mid │ │ ├── ls_guitar.pdf │ │ ├── ls_paino.pdf │ │ ├── song_metadata.json │ │ └── lyric.xml └── utils │ ├── log.txt │ ├── hymnCrawler.py │ └── hymnCrawler.ipynb └── README.md /doug-mckenzie-jazz/codes/log.txt: -------------------------------------------------------------------------------- 1 | Total: 297 2 | -------------------------------------------------------------------------------- /vgmdb/utils/log.txt: -------------------------------------------------------------------------------- 1 | total: 28419 songs 2 | Elapsed time: 12:26:37 3 | -------------------------------------------------------------------------------- /theorytab/utils/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4 2 | requests 3 | pafy 4 | ffmpy 5 | youtube-dl 6 | lxml -------------------------------------------------------------------------------- /docs/hey_jude_chorus.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/docs/hey_jude_chorus.PNG -------------------------------------------------------------------------------- /piano-e-competition/utils/log.txt: -------------------------------------------------------------------------------- 1 | Total midi files 1573 2 | Total zip files 964 3 | 4 | Elapsed time: 23:59:52 5 | -------------------------------------------------------------------------------- /docs/5-track_pianoroll.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/docs/5-track_pianoroll.PNG -------------------------------------------------------------------------------- /5-track-pianoroll/sample/51901.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/5-track-pianoroll/sample/51901.png -------------------------------------------------------------------------------- /hymnal/archive/children/1/all.mid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/hymnal/archive/children/1/all.mid -------------------------------------------------------------------------------- /hymnal/archive/children/1/audio.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/hymnal/archive/children/1/audio.mp3 -------------------------------------------------------------------------------- /hymnal/archive/children/1/ls_text.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/hymnal/archive/children/1/ls_text.pdf -------------------------------------------------------------------------------- /hymnal/archive/children/1/melody.mid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/hymnal/archive/children/1/melody.mid -------------------------------------------------------------------------------- /hymnal/archive/children/1/ls_guitar.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/hymnal/archive/children/1/ls_guitar.pdf -------------------------------------------------------------------------------- /hymnal/archive/children/1/ls_paino.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/hymnal/archive/children/1/ls_paino.pdf -------------------------------------------------------------------------------- /5-track-pianoroll/sample/51901_test_round.mid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/5-track-pianoroll/sample/51901_test_round.mid -------------------------------------------------------------------------------- /doug-mckenzie-jazz/archive/A Sleepin' Bee.mid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/doug-mckenzie-jazz/archive/A Sleepin' Bee.mid -------------------------------------------------------------------------------- /piano-e-competition/archive/2004/midi/ADIG01.mid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/piano-e-competition/archive/2004/midi/ADIG01.mid -------------------------------------------------------------------------------- /theorytab/archive/archive_artist.json: -------------------------------------------------------------------------------- 1 | { 2 | "a": { 3 | "aage-aleksandersen": [ 4 | "fire-pils-og-en-pizza", 5 | "norge-mitt-norge" 6 | ] 7 | } 8 | } -------------------------------------------------------------------------------- /piano-e-competition/archive/2004/zip/Adigezalzade_M_ESEQ.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/piano-e-competition/archive/2004/zip/Adigezalzade_M_ESEQ.zip -------------------------------------------------------------------------------- /vgmdb/archive/3do/3do/37245_Super-Street-Fighter-2--Guile.mid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/vgmdb/archive/3do/3do/37245_Super-Street-Fighter-2--Guile.mid -------------------------------------------------------------------------------- /theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/video.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/video.mp4 -------------------------------------------------------------------------------- /hymnal/archive/children/1/song_metadata.json: -------------------------------------------------------------------------------- 1 | {"Category": "Praise of the Lord", "Subcategory": "His Love", "Music": "Carey Bonner\u00a0(1859-1938)", "Key": "C Major", "Time": "2/4", "Hymn Code": "1135671653", "title": "Praise Him, praise Him, all ye little children"} -------------------------------------------------------------------------------- /theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/song_info.json: -------------------------------------------------------------------------------- 1 | {"genres": ["Rock"], "song_url": "https://www.hooktheory.com/theorytab/view/aage-aleksandersen/fire-pils-og-en-pizza", "pk": ["205007", "205011", "205014"], "section": ["intro", "verse", "chorus"], "wikiid": "9479"} -------------------------------------------------------------------------------- /theorytab/README.md: -------------------------------------------------------------------------------- 1 | ## Theorytab 2 | The codes here are out-of-date. Visitors can check out my another repo: 3 | * [Lead Sheet Dataset](https://github.com/wayne391/Lead-Sheet-Dataset) 4 | 5 | It's also crawled from [Theorytab](https://www.hooktheory.com/site). Furhtermore, I write a set of tools to generate various format. 6 | 7 | 8 | -------------------------------------------------------------------------------- /theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/video_info.json: -------------------------------------------------------------------------------- 1 | {"crawl_time": "2017-11-07 20:56:41", "length": 244, "author": "Skytebas", "duration": "00:04:04", "dislikes": 26, "YouTubeID": "-I_zKOfTKIM", "description": "Fire pils og en pizza", "viewcount": 430507, "likes": 570, "title": "\u00c5ge Aleksandersen -Fire pils og en pizza", "rating": 4.8255033493} -------------------------------------------------------------------------------- /hymnal/archive/children/1/lyric.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | 9 | 12 | 13 | 14 | 15 | 18 | 19 | 20 |
4 |
1
5 |
Praise Him, praise Him, all ye little children,
God is love, God is love;
Praise Him, praise Him, all ye little children,
God is love, God is love.
10 |
2
11 |
Love Him, love Him, all ye little children,
    God is love, God is love;
Love Him, love Him, all ye little children,
    God is love, God is love.
16 |
3
17 |
Thank Him, thank Him, all ye little children,
    God is love, God is love;
Thank Him, thank Him, all ye little children,
    God is love, God is love.
-------------------------------------------------------------------------------- /5-track-pianoroll/compile.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | print('[*] loading...') 4 | proc_list = np.load('segments.npy') 5 | print('[*] processing...') 6 | num_item = len(proc_list) 7 | print(num_item) 8 | compiled_list = [] 9 | for lidx in range(num_item): 10 | multi_track = proc_list[lidx] 11 | pianorolls = [] 12 | 13 | for tracks in multi_track.tracks: 14 | pianorolls.append(tracks.pianoroll[:, :, np.newaxis]) 15 | 16 | pianoroll_compiled = np.reshape(np.concatenate(pianorolls, axis=2)[:, 24:108, :], (8, 48, 84, 5)) 17 | pianoroll_compiled = pianoroll_compiled[np.newaxis, :] > 0 18 | compiled_list.append(pianoroll_compiled.astype(bool)) 19 | 20 | final = np.concatenate(compiled_list, axis=0) 21 | print(final.shape) 22 | print('[*] saving...') 23 | np.save('x_lpd_5_phr.npy', final) 24 | print('Done!!') -------------------------------------------------------------------------------- /theorytab/utils/youtube_crawler.py: -------------------------------------------------------------------------------- 1 | import pafy 2 | import ffmpy 3 | from time import gmtime, strftime 4 | import json 5 | import os 6 | 7 | def video_crawler(y_id, filepath=''): 8 | 9 | try: 10 | video = pafy.new(y_id) 11 | 12 | video_info = dict() 13 | video_info['YouTubeID'] = y_id 14 | video_info['title'] = video.title 15 | video_info['rating'] = video.rating 16 | video_info['viewcount'] = video.viewcount 17 | video_info['author'] = video.author 18 | video_info['length'] = video.length 19 | video_info['duration'] = video.duration 20 | video_info['likes'] = video.likes 21 | video_info['dislikes'] = video.dislikes 22 | video_info['crawl_time'] = strftime("%Y-%m-%d %H:%M:%S", gmtime()) 23 | video_info['description'] = video.description 24 | 25 | with open(os.path.join(filepath,'video_info.json'), 'w') as f: 26 | json.dump(video_info, f) 27 | 28 | best = video.getbest(preftype="mp4") 29 | best.download(quiet=True, filepath=os.path.join(filepath,'video.mp4')) 30 | 31 | return True 32 | except: 33 | print('Download Failed: %s ' % y_id) 34 | return False 35 | 36 | 37 | 38 | if __name__ == '__main__': 39 | y_id = 'n1BtExxkP0M' 40 | video_crawler(y_id, '') 41 | video_crawler(y_id, '') -------------------------------------------------------------------------------- /5-track-pianoroll/readme.md: -------------------------------------------------------------------------------- 1 | # 5-track pianoroll dataset 2 | 3 | This repository contains pre-processing codes and processed datasets of [LPD](https://github.com/salu133445/lakh-pianoroll-dataset) dataset. 4 | 5 | ## Source Codes for Pre-processing 6 | 7 | 1. Download 'lpd_cleansed' from [here](https://github.com/salu133445/lakh-pianoroll-dataset) 8 | 9 | 2. run *parse.py* 10 | 3. run *compile.py* 11 | 12 | You can change the setting to customize your own piano-roll dataset. 13 | 14 | ## Processed 5-track Piano-roll Datasets 15 | 16 | #### Latest Versoin (ver.2) 17 | 18 | * num of tracks: **5** 19 | * *Bass, Drum, Guitar, String and Piano* 20 | * time resolution: **48** 21 | * pitch range: **84** 22 | * num of bar: **8** 23 | * num of phrases: **34126** 24 | * the shape of the tensor is **34126 x 8 x 48 x 84 x 5** 25 | * 5.12 GB 26 | * select segments with higher qulity. One instrument for one track. 27 | * [Download](https://drive.google.com/file/d/17FBw7c_vrK33_mEgsA919GTSlHoJ7M6T/view?usp=sharing) 28 | 29 | #### Old Version for MuseGAN (ver.1) 30 | 31 | * num of tracks: **5** 32 | * *Bass, Drum, Guitar, String and Piano* 33 | * time resolution: **96** 34 | * pitch range: **84** 35 | * num of bar: **4** 36 | * num of phrases: **50266** 37 | * the shape of the tensor is **50266 x 384 x 84 x 5** 38 | * 7.54 GB 39 | * Compress instruments in the same midi family into one track. See [here](https://github.com/salu133445/musegan/tree/master/v1/training) 40 | * [Download](https://drive.google.com/file/d/1yj-5CsAwSoj1LHk4QwEQ09VB5fS69Vnq/view?usp=sharing) 41 | 42 | Generally, version 2 has richer but clear textures. 43 | 44 | -------------- 45 | Sample image of 5-track Piano-roll Datasets (ver.2): 46 | 47 | ![image](https://github.com/wayne391/List-of-Symbolic-Musical-Datasets/blob/master/docs/5-track_pianoroll.PNG) 48 | 49 | The generated samples of version 2 on MuseGAN is available [here](sample). 50 | -------------------------------------------------------------------------------- /hymnal/utils/log.txt: -------------------------------------------------------------------------------- 1 | ================================================= 2 | https://www.hymnal.net/en/song-index/h/A 3 | https://www.hymnal.net/en/song-index/h/B 4 | https://www.hymnal.net/en/song-index/h/C 5 | https://www.hymnal.net/en/song-index/h/D 6 | https://www.hymnal.net/en/song-index/h/E 7 | https://www.hymnal.net/en/song-index/h/F 8 | https://www.hymnal.net/en/song-index/h/G 9 | https://www.hymnal.net/en/song-index/h/H 10 | https://www.hymnal.net/en/song-index/h/I 11 | https://www.hymnal.net/en/song-index/h/J 12 | https://www.hymnal.net/en/song-index/h/K 13 | https://www.hymnal.net/en/song-index/h/L 14 | https://www.hymnal.net/en/song-index/h/M 15 | https://www.hymnal.net/en/song-index/h/N 16 | https://www.hymnal.net/en/song-index/h/O 17 | https://www.hymnal.net/en/song-index/h/P 18 | https://www.hymnal.net/en/song-index/h/R 19 | https://www.hymnal.net/en/song-index/h/S 20 | https://www.hymnal.net/en/song-index/h/T 21 | https://www.hymnal.net/en/song-index/h/U 22 | https://www.hymnal.net/en/song-index/h/V 23 | https://www.hymnal.net/en/song-index/h/W 24 | https://www.hymnal.net/en/song-index/h/Y 25 | https://www.hymnal.net/en/song-index/nt 26 | https://www.hymnal.net/en/song-index/ns/1 27 | https://www.hymnal.net/en/song-index/ns/2 28 | https://www.hymnal.net/en/song-index/ns/A 29 | https://www.hymnal.net/en/song-index/ns/B 30 | https://www.hymnal.net/en/song-index/ns/C 31 | https://www.hymnal.net/en/song-index/ns/D 32 | https://www.hymnal.net/en/song-index/ns/E 33 | https://www.hymnal.net/en/song-index/ns/F 34 | https://www.hymnal.net/en/song-index/ns/G 35 | https://www.hymnal.net/en/song-index/ns/H 36 | https://www.hymnal.net/en/song-index/ns/I 37 | https://www.hymnal.net/en/song-index/ns/J 38 | https://www.hymnal.net/en/song-index/ns/K 39 | https://www.hymnal.net/en/song-index/ns/L 40 | https://www.hymnal.net/en/song-index/ns/M 41 | https://www.hymnal.net/en/song-index/ns/N 42 | https://www.hymnal.net/en/song-index/ns/O 43 | https://www.hymnal.net/en/song-index/ns/P 44 | https://www.hymnal.net/en/song-index/ns/R 45 | https://www.hymnal.net/en/song-index/ns/S 46 | https://www.hymnal.net/en/song-index/ns/T 47 | https://www.hymnal.net/en/song-index/ns/U 48 | https://www.hymnal.net/en/song-index/ns/V 49 | https://www.hymnal.net/en/song-index/ns/W 50 | https://www.hymnal.net/en/song-index/ns/Y 51 | https://www.hymnal.net/en/song-index/ns/Z 52 | https://www.hymnal.net/en/song-index/c 53 | > classic 54 | (1/1950) https://www.hymnal.net/en/hymn/h/877 55 | (2/1950) https://www.hymnal.net/en/hymn/h/292 56 | (3/1950) https://www.hymnal.net/en/hymn/h/509 57 | (4/1950) https://www.hymnal.net/en/hymn/h/952 58 | (5/1950) https://www.hymnal.net/en/hymn/h/724 59 | (6/1950) https://www.hymnal.net/en/hymn/h/886 60 | (7/1950) https://www.hymnal.net/en/hymn/h/1198 61 | -------------------------------------------------------------------------------- /doug-mckenzie-jazz/codes/DMjazzCrawler.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import os 4 | import sys 5 | import time 6 | import json 7 | import re 8 | 9 | class DMjazzCrawler(): 10 | BASE_URL = 'http://www.bushgrafts.com/jazz' 11 | ROOT = 'archive' 12 | 13 | def __init__(self, sleep_time=0.1, log=True): 14 | self.sleep_time = sleep_time 15 | self.log = log 16 | 17 | def _request_url(self, url, doctype='html'): 18 | # set header 19 | response = requests.get(url, headers = {"Cache-Control":"max-age=0"}) 20 | 21 | # sleep 22 | time.sleep(self.sleep_time) 23 | 24 | # return 25 | if doctype =='html': 26 | soup = BeautifulSoup(response.text, 'html.parser') 27 | return soup 28 | elif doctype =='content': 29 | return response.content 30 | else: 31 | return response 32 | 33 | def _log_print(self, log, quite=False): 34 | if not quite: 35 | print(log) 36 | 37 | if self.log: 38 | with open("log.txt", "a") as f: 39 | print(log, file=f) 40 | 41 | def fetch_song(self): 42 | self.soup = self._request_url(self.BASE_URL+'/midi.htm') 43 | a_list = dmc.soup.find_all('a') 44 | midi_list = [] 45 | name_list = [] 46 | 47 | cnt = 0 48 | for idx, a in enumerate(a_list): 49 | str_ = a.get('href') 50 | if str_ and (str_ not in midi_list) and ('.mid' in str_): 51 | song_name = re.sub( '\s+', ' ' , a.text.replace( '\r\n' , '' )).strip(' ') 52 | if song_name: 53 | midi_fn = str_.split('/')[1] 54 | midi_list.append(midi_fn ) 55 | name_list.append(song_name) 56 | print('%3d | %-40s %s'%(idx, song_name, midi_fn)) 57 | cnt += 1 58 | 59 | self._log_print('Total: %d'%cnt) 60 | 61 | return dict(zip(midi_list, name_list)) 62 | 63 | def crawl_song(self, song_dict): 64 | for idx, k in enumerate(song_dict.keys()): 65 | 66 | url = self.BASE_URL + '/Midi%20site/' + k 67 | print('%3d %s' %(idx, url)) 68 | content = self._request_url(url, doctype='content') 69 | 70 | with open(os.path.join(self.ROOT,k), "wb") as f: 71 | f.write(content) 72 | 73 | def run(self): 74 | 75 | song_dict = self.fetch_song() 76 | 77 | if not os.path.exists(self.ROOT): 78 | os.makedirs(self.ROOT) 79 | with open(os.path.join(self.ROOT, 'archive.json'), "w") as f: 80 | json.dump(song_dict, f) 81 | 82 | self.crawl_song(song_dict) 83 | 84 | 85 | if __name__ == '__main__': 86 | dmc = DMjazzCrawler() 87 | dmc.run() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # List of Symbolic Musical Datasets 2 | 3 | This repository aims to collect accessible symolic musical datasets on the Net. 4 | Generally, each dataset is organized as the following way: 5 | * archive: samples from the dataset 6 | * utils: codes for crawling or processing 7 | 8 | ## Contents 9 | * Piano-roll 10 | * Lead Sheet 11 | * MIDI 12 | * MISC 13 | 14 | --- 15 | 16 | ## Piano Roll 17 | ### 5 track piano-roll dataset 18 | ![image](https://github.com/wayne391/List-of-Symbolic-Musical-Datasets/blob/master/docs/5-track_pianoroll.PNG) 19 | 20 | This dataset is derived from [LPD](https://github.com/salu133445/lakh-pianoroll-dataset) with new pre-processing policy. 21 | 22 | ### lead sheet dataset 23 | ![image](https://github.com/wayne391/List-of-Symbolic-Musical-Datasets/blob/master/docs/hey_jude_chorus.PNG) 24 | 25 | This dataset is derived from [Theorytab]. However, it also has potentials to incoperate with other lead sheet datasets. For further understanding, please refer to this [repo](https://github.com/wayne391/Lead-Sheet-Analysis/tree/master/lead_sheet_dataset). 26 | 27 | --- 28 | 29 | ## Lead Sheets 30 | one melody track accompanied with one chord track 31 | 32 | ### Crawled Datasets 33 | | Source | Genre | Format | Chord | Melody | Songs | Src | 34 | |-----------------------|:----------:|:------:|:-----:|:------:|:------:|:----:| 35 | | [Theorytab] | pop | XML | V | V | 10148 | [O](https://drive.google.com/file/d/13AEVD9xaZIaicEgd8tF1l6aOiRTymJxL/view?usp=sharing) 36 | | [Wikifonia] | pop | XML | V | V | 6675 | [O](https://drive.google.com/file/d/155FZ9Uq7QLySv9y2bAtk5LD37XZDo0DF/view?usp=sharing) 37 | | [Hymnal] | hymn | MIDI | Δ | V | 3358 | [O](https://drive.google.com/drive/folders/1fP9OmQa9amz-nwaaaITggCEWs3ewz1_8?usp=sharing) 38 | 39 | 40 | #### Links 41 | 42 | * WJazzD: http://jazzomat.hfm-weimar.de/dbformat/dboverview.html 43 | * MIDI format of Theorytab is now available: [Link](https://drive.google.com/file/d/1K1t8L9IRTHnQ1ozRIMRGEyxk_yhN6kLr/view?usp=sharing). 44 | -------------- 45 | 46 | ## Midi 47 | ### Crawled Datasets 48 | | Source | Genre | Multi-track | Format |Songs | src | 49 | |-----------------------|:----------:|:-----------:|:------:|:------:|:---:| 50 | | [VGMdb] | game | V | MIDI | 28419 | [O](https://drive.google.com/drive/folders/1IW83MmH-RJ81yog6sbOUOTHimobE4FuK?usp=sharing) 51 | | [Doug McKenzie Jazz] | jazz | V | MIDI | 297 | [O](https://drive.google.com/drive/folders/1wVVDpcov5VV6Govhn1-CT0BOifqoF-Od?usp=sharing) 52 | | [Piano-e-Competition] | classical | | MIDI | 1573 | [O](https://drive.google.com/drive/folders/17yAGt3AR6txSZv8DBcbAbT3luTMkrkIb?usp=sharing) 53 | 54 | ### Online Resources 55 | #### Jazz 56 | * [profesordepiano](http://www.profesordepiano.com/Real%20Book/Realbook.htm?fbclid=IwAR09XcuMD6PMEyUFq0gXAIVFsJVPw8uQSXq5s-o46JFv7OlYVQnwArFOmSk) 57 | * [minor9](http://bhs.minor9.com) 58 | 59 | #### Drum 60 | * [Groove MIDI Dataset (Magenta)](https://magenta.tensorflow.org/datasets/groove) 61 | 62 | ### MIDI MAN (on reddit) 63 | * [Midi Man](https://www.reddit.com/r/WeAreTheMusicMakers/comments/3anwu8/the_drum_percussion_midi_archive_800k/) 64 | https://www.reddit.com/r/WeAreTheMusicMakers/comments/3ajwe4/the_largest_midi_collection_on_the_internet/ 65 | 66 | #### full-scale 67 | * [midiworld](http://www.midiworld.com) 68 | * [Lakh MIDI dataset](http://colinraffel.com/projects/lmd/) 69 | 70 | 71 | --- 72 | 73 | ## MISC 74 | ### Unchecked 75 | * http://www.musicstudents.com/jam.html (backing track and chord charts) 76 | * https://www.cs.hmc.edu/~keller/jazz/ 77 | * http://www.ralphpatt.com/Song.html 78 | * http://www.saxuet.qc.ca/TheSaxyPage/midi.htm 79 | * http://www.thejazzpage.de/index1.html 80 | * http://cjam.lassecollin.se/ 81 | * http://www.jazzpla.net/jazznote3000.htm 82 | 83 | 84 | [Theorytab]: https://www.hooktheory.com/theorytab 85 | [Hymnal]: https://www.hymnal.net/en/home 86 | [Wikifonia]: http://www.wikifonia.org/ 87 | [Piano-e-Competition]: http://www.piano-e-competition.com 88 | [VGMdb]: https://www.vgmusic.com 89 | [Doug McKenzie Jazz]: http://bushgrafts.com/wp/ 90 | -------------------------------------------------------------------------------- /vgmdb/utils/VGMCrawler.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import os 4 | import sys 5 | import time 6 | import json 7 | 8 | class VGMCrawler(): 9 | BASE_URL = 'https://www.vgmusic.com/music/console/' 10 | archive_dir = 'archive' 11 | 12 | def __init__(self, sleep_time=0.1, log=True): 13 | self.sleep_time = sleep_time 14 | self.log = log 15 | self.count = 0 16 | 17 | def _request_url(self, url, doctype='html'): 18 | # set header 19 | response = requests.get(url) 20 | 21 | # sleep 22 | time.sleep(self.sleep_time) 23 | 24 | # return 25 | if doctype =='html': 26 | soup = BeautifulSoup(response.text, 'html.parser') 27 | return soup 28 | elif doctype =='content': 29 | return response.content 30 | else: 31 | return response 32 | 33 | def _log_print(self, log, quite=False): 34 | if not quite: 35 | print(log) 36 | 37 | if self.log: 38 | with open("log.txt", "a") as f: 39 | print(log, file=f) 40 | 41 | def fetch_dirs(self, url): 42 | soup = self._request_url(url) 43 | tr_list = soup.find('table').find_all('tr') 44 | dir_list = [] 45 | 46 | for i in range(3, len(tr_list)-1): 47 | dir_list.append(tr_list[i].find_all('td')[1].text) 48 | return dir_list 49 | 50 | def fetch_songs(self, url): 51 | soup = self._request_url(url) 52 | tr_list = soup.find('table').find_all('tr') 53 | song_list = [] 54 | 55 | if len(tr_list) == 4: 56 | return None 57 | for i in range(2, len(tr_list)): 58 | now_tr = tr_list[i] 59 | if now_tr.get("class") == ['header']: 60 | album_name = now_tr.text.strip('\n') 61 | else: 62 | if not now_tr.td.get('colspan'): 63 | 64 | # get info 65 | info = now_tr.td.text.split('\n') 66 | song_midi = now_tr.a['href'] 67 | song_name = info[0] 68 | song_size = info[1] 69 | song_author = info[3] 70 | song_list.append({ 71 | 'filename':song_midi, 72 | 'song_name':song_name, 73 | 'song_size':song_size, 74 | 'song_author':song_author, 75 | 'album_name':album_name}) 76 | print(' |%30s |%30s |%13s |%10s |%s '%(album_name, song_name, song_size, song_author, song_midi )) 77 | 78 | 79 | return song_list 80 | 81 | def crawl_songs(self, url, song_list, dir_path): 82 | if not os.path.exists(dir_path): 83 | os.makedirs(dir_path) 84 | 85 | for idx, s in enumerate(song_list): 86 | sys.stdout.write('%d/%d - total: %d\n' % (idx, len(song_list), self.count)) 87 | sys.stdout.flush() 88 | midi_url = url + s['filename'] 89 | content = self._request_url(midi_url, doctype='content') 90 | fn = os.path.join(dir_path, s['filename']) 91 | with open(fn, "wb") as f: 92 | f.write(content) 93 | 94 | self.count += 1 95 | 96 | def crawl_archive(self): 97 | dir_list = self.fetch_dirs(self.BASE_URL) 98 | 99 | if not os.path.exists(self.archive_dir,): 100 | os.makedirs(self.archive_dir,) 101 | 102 | info = dict() 103 | for d in dir_list: 104 | root_dir = d.strip('/') 105 | print('{{%s}}' % root_dir) 106 | subdir_url = self.BASE_URL + d 107 | subdir_list = self.fetch_dirs(subdir_url) 108 | 109 | tmp_dict = dict() 110 | for sd in subdir_list: 111 | root_subdir = sd.strip('/') 112 | print('[%s]'%root_subdir) 113 | page_url = subdir_url + sd 114 | sl = self.fetch_songs(page_url) 115 | if sl: 116 | self.crawl_songs(page_url, sl, os.path.join(self.archive_dir, root_dir, root_subdir)) 117 | tmp_dict[sd] = sl 118 | 119 | info[d] = tmp_dict 120 | 121 | with open(os.path.join(self.archive_dir,'archive.json'), "w") as f: 122 | json.dump(info, f) 123 | 124 | def run(self): 125 | s = time.time() 126 | self.crawl_archive() 127 | e = time.time() 128 | self._log_print(time.strftime("\nElapsed time: %H:%M:%S", time.gmtime(s-e))) 129 | self._log_print('Total %d Songs'%self.count) 130 | 131 | if __name__ == '__main__': 132 | vc = VGMCrawler() 133 | vc.run() 134 | -------------------------------------------------------------------------------- /piano-e-competition/utils/EPcompCrawler.py: -------------------------------------------------------------------------------- 1 | import random 2 | import requests 3 | from bs4 import BeautifulSoup 4 | import os 5 | import sys 6 | import time 7 | import json 8 | import re 9 | import random 10 | 11 | class EPcompCrawler(): 12 | BASE_URL = 'http://www.piano-e-competition.com' 13 | ROOT = 'archive' 14 | YEARS = ['/midi_2002.asp', '/midi_2004.asp', '/midi_2006.asp', 15 | '/midi_2008.asp', '/midi_2009.asp', '/midi_2011.asp'] 16 | 17 | def __init__(self, sleep_time=0.1, log=True): 18 | self.sleep_time = sleep_time 19 | self.log = log 20 | self.mid_cnt = 0 21 | self.zip_cnt = 0 22 | 23 | def _request_url(self, url, doctype='html'): 24 | response = requests.get(url) 25 | 26 | # sleep 27 | time.sleep(self.sleep_time) 28 | 29 | # return 30 | if doctype =='html': 31 | soup = BeautifulSoup(response.text, 'html.parser') 32 | return soup 33 | elif doctype =='content': 34 | return response.content 35 | else: 36 | return response 37 | 38 | def _log_print(self, log, quite=False): 39 | if not quite: 40 | print(log) 41 | 42 | if self.log: 43 | with open("log.txt", "a") as f: 44 | print(log, file=f) 45 | 46 | def fetch_year_songs(self, year_url): 47 | soup = self._request_url(year_url) 48 | a_list = soup.find_all('a') 49 | midi_list = [] 50 | zip_list = [] 51 | print(len(a_list)) 52 | 53 | for idx in range(len(a_list)): 54 | a = a_list[idx] 55 | 56 | url = a.get('href') 57 | if url and (('.MID' in url) or ('.mid' in url)): 58 | now_performer = re.search('(\D+?)(\d+?).', url.split('/')[-1]).group(1) 59 | song_name = re.sub( '\s+', ' ', a.text.replace('\r', '').replace('\n', '')).strip() 60 | try: 61 | composer = a.parent.parent.td.text.strip() 62 | except: 63 | try: 64 | composer = a.parent.parent.parent.td.text.strip() 65 | except: 66 | composer = a.parent.parent.parent.parent.td.text.strip() 67 | print('%-10s |%-40s |[%s | %s]' % (now_performer, url, song_name, composer)) 68 | midi_list.append((now_performer, url, song_name)) 69 | 70 | if url and (('.ZIP' in url) or ('.zip' in url)): 71 | zip_fn = url.lstrip('../') 72 | print('%-10s %s'%(now_performer, zip_fn)) 73 | zip_list.append((now_performer, zip_fn)) 74 | return midi_list, zip_list 75 | 76 | def crawl_year_songs(self, midi_list, zip_list, dir_path): 77 | path_midi = os.path.join(dir_path, 'midi') 78 | path_zip = os.path.join(dir_path, 'zip') 79 | 80 | if not os.path.exists(path_midi): 81 | os.makedirs(path_midi) 82 | if not os.path.exists(path_zip): 83 | os.makedirs(path_zip) 84 | 85 | print('=================midi=================') 86 | for idx in range(len(midi_list)): 87 | m_url = midi_list[idx][1] 88 | 89 | if '/ecompetition' not in m_url: 90 | m_url = 'http://www.piano-e-competition.com/ecompetition/' + m_url 91 | else: 92 | m_url = self.BASE_URL + m_url 93 | 94 | 95 | print(idx, m_url) 96 | content = self._request_url(m_url, doctype='content') 97 | fn = m_url.split('/')[-1] 98 | 99 | with open(os.path.join(path_midi, fn), "wb") as f: 100 | f.write(content) 101 | 102 | print('=================zip=================') 103 | for idx in range(len(zip_list)): 104 | z_url = zip_list[idx][1] 105 | z_url = self.BASE_URL + '/' + z_url 106 | print(idx, z_url) 107 | content = self._request_url(z_url, doctype='content') 108 | fn = z_url.split('/')[-1] 109 | 110 | with open(os.path.join(path_zip, fn), "wb") as f: 111 | f.write(content) 112 | 113 | def crawl_archive(self): 114 | 115 | if not os.path.exists(self.ROOT): 116 | os.makedirs(self.ROOT) 117 | 118 | archive_dict = dict() 119 | 120 | for y in self.YEARS: 121 | ml, zl = self.fetch_year_songs(self.BASE_URL+y) 122 | year = re.search('(\d{4}).',y).group(1) 123 | path_year = os.path.join(self.ROOT, year) 124 | if not os.path.exists( path_year ): 125 | os.makedirs( path_year ) 126 | 127 | print('{%s}' % year) 128 | 129 | self.mid_cnt += len(ml) 130 | self.zip_cnt += len(zl) 131 | 132 | tmp = {'mid':ml, 'zip':zl} 133 | archive_dict[year] = tmp 134 | 135 | self.crawl_year_songs(ml, zl, path_year) 136 | 137 | with open(os.path.join(self.ROOT, 'archive.json'), "w") as f: 138 | json.dump(archive_dict, f) 139 | 140 | def run(self): 141 | s = time.time() 142 | self.crawl_archive() 143 | e = time.time() 144 | 145 | self._log_print('Total midi files %d'%self.mid_cnt) 146 | self._log_print('Total zip files %d'%self.zip_cnt) 147 | self._log_print(time.strftime("\nElapsed time: %H:%M:%S", time.gmtime(s-e))) 148 | 149 | 150 | if __name__ == '__main__': 151 | ec = EPcompCrawler() 152 | ec.run() -------------------------------------------------------------------------------- /theorytab/utils/theorytab_crawler.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import os 4 | import time 5 | import json 6 | import string 7 | 8 | 9 | website = 'https://www.hooktheory.com' 10 | base_url = website + '/theorytab/artists/' 11 | sleep_time = 0.11 12 | alphabet_list = string.ascii_lowercase 13 | 14 | 15 | def song_retrieval(artist, song, path_song): 16 | 17 | song_url = 'https://www.hooktheory.com/theorytab/view/' + artist + '/' + song 18 | response_song = requests.get(song_url) 19 | 20 | soup = BeautifulSoup(response_song.text, 'html.parser') 21 | li_list = soup.findAll("li", {"role": "presentation"}) 22 | 23 | section_list = [] 24 | pk_list = [] 25 | 26 | # section 27 | for i in range(len(li_list)-1): 28 | sec = li_list[i].text.strip().lower().replace(" ", "-") 29 | section_list.append(sec) 30 | pk_list.append(soup.findAll("div", {"role": "tabpanel", "id": sec})[0].contents[0]['id']) 31 | 32 | # save xml 33 | for idx, pk in enumerate(pk_list): 34 | req_url = 'https://www.hooktheory.com/songs/getXmlByPk?pk=' + str(pk) 35 | response_info = requests.get(req_url) 36 | content = response_info.text 37 | 38 | with open(os.path.join(path_song, section_list[idx] + ".xml"), "w", encoding="utf-8") as f: 39 | f.write(content) 40 | time.sleep(0.08) 41 | 42 | # get genre 43 | wikiid = soup.findAll("multiselect", {"items": "genres"})[0]['wikiid'] 44 | response_genre = requests.get('https://www.hooktheory.com/wiki/' + str(wikiid) + '/genres') 45 | genre_act_list = json.loads(response_genre.text) 46 | genres = [] 47 | for g in genre_act_list: 48 | if g['active']: 49 | genres.append(g['name']) 50 | 51 | # saving 52 | info = {'section': section_list, 'pk': pk_list, 'song_url': song_url, 53 | 'genres': genres, 'wikiid': wikiid} 54 | 55 | with open(os.path.join(path_song, 'song_info.json'), "w") as f: 56 | json.dump(info, f) 57 | 58 | 59 | def get_song_list(url_artist, quite=False): 60 | response_tmp = requests.get(website + url_artist) 61 | soup = BeautifulSoup(response_tmp.text, 'html.parser') 62 | item_list = soup.find_all("li", {"class": "grid-item"}) 63 | 64 | song_name_list = [] 65 | for item in item_list: 66 | song_name = item.find_all("a", {"class": "a-tab-cover"})[0]['href'].split('/')[-1] 67 | song_name_list.append(song_name) 68 | if not quite: 69 | print(' > %s' % song_name) 70 | return song_name_list 71 | 72 | 73 | def traverse_website(): 74 | ''' 75 | Retrieve all urls of artists and songs from the website 76 | ''' 77 | 78 | list_pages = [] 79 | archive_artist = dict() 80 | artist_count = 0 81 | song_count = 0 82 | 83 | for ch in alphabet_list: 84 | time.sleep(sleep_time) 85 | url = base_url + ch 86 | response_tmp = requests.get(url) 87 | soup = BeautifulSoup(response_tmp.text, 'html.parser') 88 | page_count = 0 89 | 90 | print('==[%c]=================================================' % ch) 91 | 92 | # get artists list by pages 93 | url_artist_list = [] 94 | for page in range(1, 9999): 95 | url = 'https://www.hooktheory.com/theorytab/artists/'+ch+'?page=' + str(page) 96 | 97 | time.sleep(sleep_time) 98 | response_tmp = requests.get(url) 99 | soup = BeautifulSoup(response_tmp.text, 'html.parser') 100 | item_list = soup.find_all("li", {"class": "overlay-trigger"}) 101 | 102 | if item_list: 103 | print(url) 104 | page_count += 1 105 | else: 106 | break 107 | 108 | for item in item_list: 109 | url_artist_list.append(item.find_all("a", {"class": "a-no-decoration"})[0]['href']) 110 | 111 | print('Total:', len(url_artist_list)) 112 | 113 | print('----') 114 | 115 | if not page_count: 116 | page_count = 1 117 | 118 | # get song of artists 119 | artist_song_dict = dict() 120 | 121 | for url_artist in url_artist_list: 122 | artist_count += 1 123 | time.sleep(sleep_time) 124 | artist_name = url_artist.split('/')[-1] 125 | print(artist_name) 126 | song_name_list = get_song_list(url_artist) 127 | song_count += len(song_name_list) 128 | artist_song_dict[artist_name] = song_name_list 129 | 130 | archive_artist[ch] = artist_song_dict 131 | list_pages.append(page_count) 132 | 133 | print('=======================================================') 134 | print(list_pages) 135 | print('Artists:', artist_count) 136 | print('Songs:', song_count) 137 | 138 | archive_artist['num_song'] = song_count 139 | archive_artist['num_artist'] = artist_count 140 | 141 | with open('archive_artist.json', "w") as f: 142 | json.dump(archive_artist, f) 143 | 144 | 145 | if __name__ == '__main__': 146 | 147 | traverse_website() 148 | 149 | # root for crawled dataset 150 | root_dir = 'archive' 151 | with open('archive_artist.json', "r") as f: 152 | archive_artist = json.load(f) 153 | 154 | count_ok = 0 155 | song_count = archive_artist['num_song'] 156 | 157 | for ch in alphabet_list: 158 | path_ch = os.path.join(root_dir, ch) 159 | print('==[%c]=================================================' % ch) 160 | 161 | if not os.path.exists(path_ch): 162 | os.makedirs(path_ch) 163 | 164 | for a_name in archive_artist[ch].keys(): 165 | for s_name in archive_artist[ch][a_name]: 166 | 167 | try: 168 | print('(%3d/%3d) %s %s' % (count_ok, song_count, a_name, s_name)) 169 | path_song = os.path.join(path_ch, a_name, s_name) 170 | 171 | if not os.path.exists(path_song): 172 | os.makedirs(path_song) 173 | 174 | time.sleep(sleep_time) 175 | song_retrieval(a_name, s_name, path_song) 176 | 177 | count_ok += 1 178 | 179 | except Exception as e: 180 | print(e) 181 | 182 | print('total:', count_ok) 183 | -------------------------------------------------------------------------------- /5-track-pianoroll/parser.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from pypianoroll import Multitrack, Track 4 | import json 5 | import pickle 6 | 7 | family_name=[ 8 | 'drum', 9 | 'bass', 10 | 'guitar', 11 | 'string', 12 | 'piano', 13 | ] 14 | 15 | family_thres = [ 16 | (2, 24), # drum 17 | (1, 96), # bass 18 | (2, 156), # guitar 19 | (2, 156), # string, 20 | (2, 156), # piano 21 | ] 22 | 23 | 24 | def findall_endswith(root): 25 | """Traverse `root` recursively and yield all files ending with `postfix`""" 26 | for dirpath, _, filenames in os.walk(root): 27 | for filename in filenames: 28 | if filename.endswith('.npz'): 29 | yield os.path.join(dirpath, filename) 30 | 31 | def check_which_family(track): 32 | is_piano = lambda program, is_drum: not is_drum and ((program >= 0 and program <= 7) 33 | or (program >= 16 and program <= 23)) 34 | is_guitar = lambda program: program >= 24 and program <= 31 35 | is_bass = lambda program: program >= 32 and program <= 39 36 | is_string = lambda program: program >=40 and program <= 51 37 | 38 | # drum, bass, guitar, string, piano 39 | is_instr_act = lambda program, is_drum: np.array([is_drum, is_bass(program), is_guitar(program), 40 | is_string(program), is_piano(program, is_drum)]) 41 | 42 | instr_act = is_instr_act(track.program, track.is_drum) 43 | return instr_act 44 | 45 | def check_instr_act(multitrack): 46 | instr_act_all = np.zeros(5) 47 | for track in multitrack.tracks: 48 | instr_act = check_which_family(track) 49 | instr_act_all += instr_act 50 | instr_act_cnt = sum(instr_act_all > 0) 51 | return instr_act_all, instr_act_cnt 52 | 53 | def segment_quality(pianoroll, thres_pitch, thres_beats): 54 | pitch_sum = sum(np.sum(pianoroll, axis=0) > 0) 55 | beat_sum = sum(np.sum(pianoroll, axis=1) > 0) 56 | score = pitch_sum + beat_sum 57 | return (pitch_sum >= thres_pitch) and (beat_sum >= thres_beats), (pitch_sum, beat_sum) 58 | 59 | def proc_instr_intersection_list(npz_list): 60 | cnt_ok = 0 61 | list_ok = [] 62 | thres_instr_num = 5 63 | for nidx in range(len(npz_list)): #len(npz_list) 64 | if nidx % 500 is 0: 65 | print(nidx, '-', cnt_ok) 66 | npz_file = npz_list[nidx] 67 | multitrack = Multitrack(npz_file) 68 | 69 | if len(multitrack.tracks) < 5: 70 | continue 71 | 72 | instr_act_all, instr_act_cnt = check_instr_act(multitrack) 73 | 74 | if instr_act_cnt != 5: 75 | continue 76 | 77 | list_ok.append(npz_file) 78 | cnt_ok += 1 79 | 80 | print(cnt_ok) 81 | return list_ok 82 | 83 | 84 | if __name__ == '__main__': 85 | # root = 'lpd_cleansed' 86 | # npz_list = list(findall_endswith(root)) 87 | 88 | # with open('npz_list.pickle', 'wb') as f: 89 | # pickle.dump(npz_list, f, protocol=pickle.HIGHEST_PROTOCOL) 90 | # with open('npz_list.pickle', 'rb') as f: 91 | # npz_list = pickle.load(f) 92 | 93 | # list_ok = proc_instr_intersection_list(npz_list) 94 | # with open('list_ok.pickle', 'wb') as f: 95 | # pickle.dump(list_ok, f, protocol=pickle.HIGHEST_PROTOCOL) 96 | with open('list_ok.pickle', 'rb') as f: 97 | list_ok = pickle.load(f) 98 | 99 | 100 | 101 | num_consecutive_bar = 8 102 | resol = 96 103 | down_sample = 2 104 | cnt_totall_segments = 0 105 | cnt_augmented = 0 106 | ok_segment_list = [] 107 | hop_size = (num_consecutive_bar / 4) 108 | 109 | num_list_ok = len(list_ok) 110 | for oid in range(len(list_ok)): 111 | print('==', oid, '/', num_list_ok,'===============') 112 | npz_ok = list_ok[oid] 113 | multitrack = Multitrack(npz_ok) 114 | downbeat = multitrack.downbeat 115 | 116 | num_bar = len(downbeat) // resol 117 | hop_iter = 0 118 | 119 | song_ok_segments = [] 120 | for bidx in range(num_bar-num_consecutive_bar): 121 | if hop_iter > 0: 122 | hop_iter -= 1 123 | continue 124 | 125 | 126 | st = bidx * resol 127 | ed = st + num_consecutive_bar * resol 128 | 129 | best_instr = [None] * 5 130 | best_score = [-1] * 5 131 | second_act = [False] * 5 132 | second_instr = [None] * 5 133 | is_all_ok = [False] * 5 134 | for tidx, track in enumerate(multitrack.tracks): 135 | # track[st:ed].plot() 136 | tmp_map = check_which_family(track) 137 | in_family = np.where(tmp_map)[0] 138 | 139 | if not len(in_family): 140 | continue 141 | family = in_family[0] 142 | 143 | tmp_pianoroll = track[st:ed:down_sample].pianoroll 144 | is_ok, score = segment_quality(tmp_pianoroll, family_thres[family][0], family_thres[family][1]) 145 | 146 | if is_ok and sum(score) > best_score[family]: 147 | track.name = family_name[family] 148 | best_instr[family] = track[st:ed:down_sample] 149 | best_score[family] = sum(score) 150 | is_all_ok[family] = True 151 | 152 | if sum(is_all_ok) == 5: 153 | # print(bidx) 154 | hop_iter = np.random.randint(0, 1) + hop_size 155 | song_ok_segments.append(Multitrack(tracks=best_instr, 156 | downbeat=list(range(0, 383, 48)), beat_resolution=12)) 157 | 158 | cnt_ok_segment = len(song_ok_segments) 159 | if cnt_ok_segment > 6: 160 | seed = (6, cnt_ok_segment//2) 161 | if cnt_ok_segment > 11: 162 | seed = (11, cnt_ok_segment//3) 163 | if cnt_ok_segment > 15: 164 | seed = (15, cnt_ok_segment//4) 165 | 166 | rand_idx = np.random.permutation(cnt_ok_segment)[:max(seed)] 167 | song_ok_segments = [song_ok_segments[ridx] for ridx in rand_idx] 168 | ok_segment_list.extend(song_ok_segments) 169 | cnt_ok_segment = len(rand_idx) 170 | else: 171 | ok_segment_list.extend(song_ok_segments) 172 | 173 | cnt_totall_segments += len(song_ok_segments) 174 | print('cur:%d | acc:%d'%(cnt_ok_segment, cnt_totall_segments)) 175 | 176 | print('---') 177 | print(cnt_totall_segments) 178 | print(len(ok_segment_list)) 179 | np.save('segments.npy', ok_segment_list) -------------------------------------------------------------------------------- /hymnal/utils/hymnCrawler.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import os 4 | import time 5 | import json 6 | import string 7 | import random 8 | from lxml import etree 9 | 10 | class HymnCrawler(): 11 | BASE_URL = 'https://www.hymnal.net' 12 | 13 | def __init__(self, sleep_time = 0.1, log=True): 14 | self.sleep_time =sleep_time 15 | self.meta_category = {'classic': self.BASE_URL + '/en/song-index/h', 16 | 'new_tunes': self.BASE_URL + '/en/song-index/nt', 17 | 'new_songs': self.BASE_URL + '/en/song-index/ns', 18 | 'children': self.BASE_URL + '/en/song-index/c'} 19 | 20 | self.log = log 21 | self.metadata = None 22 | 23 | def _request_url(self, url, doctype='html'): 24 | response = requests.get(url) 25 | time.sleep(self.sleep_time) 26 | if doctype =='html': 27 | soup = BeautifulSoup(response.text, 'html.parser') 28 | return soup 29 | elif doctype =='content': 30 | return response.content 31 | else: 32 | pass 33 | 34 | def _log_print(self, log): 35 | print(log) 36 | if self.log: 37 | with open("log.txt", "a") as f: 38 | print(log, file=f) 39 | 40 | def fetch_page_list(self, url): 41 | soup = self._request_url(url) 42 | tag_list = soup.find_all('div', {'class':'list-group'})[0].find_all('a', {'class':'list-group-item'}) 43 | return [t['href'] for t in tag_list] 44 | 45 | def fetch_category_list(self, url): 46 | song_list = [] 47 | soup = self._request_url(url) 48 | try: 49 | tag_list = soup.find_all('div', {'class':'letters'})[0].findAll('a') 50 | alphabet_list = [t.text for t in tag_list] 51 | for ch in alphabet_list: 52 | page_url = url+ '/' + ch 53 | self._log_print(page_url) 54 | song_list.extend(self.fetch_page_list(page_url)) 55 | except: 56 | self._log_print(url) 57 | song_list.extend(self.fetch_page_list(url)) 58 | return song_list 59 | 60 | def fetch_song(self, url, song_dir): 61 | 62 | soup = self._request_url(url) 63 | 64 | # (url, extension, filename) 65 | data_list = [('/f=mid', '.mid', 'all'), ('/f=mp3', '.mp3', 'audio'), ('/f=tune', '.mid', 'melody'), 66 | ('/f=ppdf', '.pdf', 'ls_paino'), ('/f=pdf', '.pdf','ls_guitar'), ('/f=gtpdf', '.pdf', 'ls_text')] 67 | 68 | # save download files 69 | for d in data_list: 70 | r = requests.get(url+ d[0]) 71 | 72 | if song_dir: 73 | with open(os.path.join(song_dir,d[2] + d[1]), 'wb') as f: 74 | f.write(r.content) 75 | 76 | # metadata 77 | content_list = [] 78 | tag_list = soup.find_all('div', {'class':'row common-panel'})[0].find_all('div', {'class':'col-xs-7 col-sm-8 no-padding'}) 79 | 80 | for t in tag_list: 81 | content_list.append(t.text.strip()) 82 | label_list = [] 83 | tag_list = soup.find_all('div', {'class':'row common-panel'})[0].find_all('label', {'class':'col-xs-5 col-sm-4'}) 84 | for t in tag_list: 85 | label_list.append(t.text.replace(':','')) 86 | metadata = dict(zip(label_list, content_list)) 87 | 88 | # title 89 | title = soup.find('h1', {'class':"text-center"}).text.strip() 90 | metadata['title'] = title 91 | 92 | # lyric table 93 | lyric_xml = soup.find_all('div', {'class':'col-xs-12 lyrics'})[0].find('table') 94 | 95 | if song_dir: 96 | with open( os.path.join(song_dir, 'song_metadata.json'), "w") as f: 97 | json.dump(metadata , f) 98 | 99 | with open( os.path.join(song_dir, 'lyric.xml'), "w", encoding='utf-8') as f: 100 | f.write(str(lyric_xml)) 101 | 102 | return lyric_xml, metadata 103 | 104 | def craw_archive(self, archive_dir='archive'): 105 | metadata = dict() 106 | for k in self.meta_category.keys(): 107 | category_url = self.meta_category[k] 108 | metadata[k] = self.fetch_category_list(category_url) 109 | 110 | # saving 111 | if archive_dir: 112 | if not os.path.exists(archive_dir): 113 | os.makedirs(archive_dir) 114 | 115 | with open(os.path.join(archive_dir, 'archive_metadata.json'), "w") as f: 116 | json.dump(metadata , f) 117 | 118 | return metadata 119 | 120 | def craw_songs(self, metadata, archive_dir='archive'): 121 | count = 0 122 | count_success = 0 123 | for k in list(metadata): 124 | self._log_print('> %s'%k) 125 | category_dir = os.path.join(archive_dir, k) 126 | if not os.path.exists(category_dir): 127 | os.makedirs(category_dir) 128 | 129 | song_list = metadata[k] 130 | 131 | numOfSongs = len(song_list) 132 | for i in range(numOfSongs): 133 | song_url = self.BASE_URL + song_list[i] 134 | song_id = song_url.split('/')[-1] 135 | self._log_print(' (%d/%d) %s'%(i+1, numOfSongs, song_url)) 136 | song_dir = os.path.join(category_dir, song_id) 137 | 138 | if not os.path.exists(song_dir): 139 | os.makedirs(song_dir) 140 | 141 | try: 142 | self.fetch_song(song_url, song_dir) 143 | metadata['err'] = False 144 | count_success += 1 145 | except: 146 | self._log_print('error!!') 147 | metadata['err'] = True 148 | 149 | count += 1 150 | self._log_print('total: %d songs'%count) 151 | 152 | return metadata 153 | 154 | def reload(self, archive_dir='archive'): 155 | with open(os.path.join(archive_dir, 'archive_metadata.json'), "r") as f: 156 | self.metadata =json.load(f) 157 | 158 | def run(self, archive_dir='archive', reload=False): 159 | 160 | self._log_print("=================================================") 161 | 162 | if not reload: 163 | self.metadata = self.craw_archive(archive_dir=archive_dir) 164 | else: 165 | self.reload(archive_dir=archive_dir) 166 | 167 | self.metadata = self.craw_songs(self.metadata, archive_dir=archive_dir) 168 | 169 | with open(os.path.join(archive_dir, 'archive_metadata.json'), "w") as f: 170 | json.dump(self.metadata, f) 171 | 172 | if __name__ == '__main__': 173 | 174 | hc = HymnCrawler() 175 | s = time.time() 176 | hc.run() 177 | e = time.time() 178 | time.strftime("\nElapsed time: %H:%M:%S", time.gmtime(s-e)) 179 | 180 | -------------------------------------------------------------------------------- /vgmdb/utils/VGMCrawler.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import requests\n", 12 | "from bs4 import BeautifulSoup\n", 13 | "import os\n", 14 | "import sys\n", 15 | "import time\n", 16 | "import json" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 190, 22 | "metadata": { 23 | "collapsed": false 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "class VGMCrawler():\n", 28 | " BASE_URL = 'https://www.vgmusic.com/music/console/'\n", 29 | " archive_dir = 'archive'\n", 30 | " \n", 31 | " def __init__(self, sleep_time=0.1, log=True):\n", 32 | " self.sleep_time = sleep_time\n", 33 | " self.log = log\n", 34 | " self.count = 0\n", 35 | " \n", 36 | " def _request_url(self, url, doctype='html'):\n", 37 | " # set header\n", 38 | " response = requests.get(url)\n", 39 | "\n", 40 | " # sleep\n", 41 | " time.sleep(self.sleep_time)\n", 42 | "\n", 43 | " # return\n", 44 | " if doctype =='html':\n", 45 | " soup = BeautifulSoup(response.text, 'html.parser')\n", 46 | " return soup\n", 47 | " elif doctype =='content':\n", 48 | " return response.content\n", 49 | " else:\n", 50 | " return response\n", 51 | "\n", 52 | " def _log_print(self, log, quite=False):\n", 53 | " if not quite:\n", 54 | " print(log)\n", 55 | "\n", 56 | " if self.log:\n", 57 | " with open(\"log.txt\", \"a\") as f:\n", 58 | " print(log, file=f)\n", 59 | " \n", 60 | " def fetch_dirs(self, url):\n", 61 | " soup = self._request_url(url)\n", 62 | " tr_list = soup.find('table').find_all('tr')\n", 63 | " dir_list = []\n", 64 | "\n", 65 | " for i in range(3, len(tr_list)-1):\n", 66 | " dir_list.append(tr_list[i].find_all('td')[1].text)\n", 67 | " return dir_list\n", 68 | " \n", 69 | " def fetch_songs(self, url):\n", 70 | " soup = self._request_url(url)\n", 71 | " tr_list = soup.find('table').find_all('tr')\n", 72 | " song_list = []\n", 73 | " \n", 74 | " if len(tr_list) == 4:\n", 75 | " return None\n", 76 | " for i in range(2, len(tr_list)):\n", 77 | " now_tr = tr_list[i]\n", 78 | " if now_tr.get(\"class\") == ['header']:\n", 79 | " album_name = now_tr.text.strip('\\n')\n", 80 | " else:\n", 81 | " if not now_tr.td.get('colspan'):\n", 82 | "\n", 83 | " # get info\n", 84 | " info = now_tr.td.text.split('\\n')\n", 85 | " song_midi = now_tr.a['href']\n", 86 | " song_name = info[0]\n", 87 | " song_size = info[1]\n", 88 | " song_author = info[3]\n", 89 | " song_list.append({\n", 90 | " 'filename':song_midi, \n", 91 | " 'song_name':song_name,\n", 92 | " 'song_size':song_size,\n", 93 | " 'song_author':song_author,\n", 94 | " 'album_name':album_name})\n", 95 | " print(' |%30s |%30s |%13s |%10s |%s '%(album_name, song_name, song_size, song_author, song_midi ))\n", 96 | " \n", 97 | " \n", 98 | " return song_list\n", 99 | " \n", 100 | " def crawl_songs(self, url, song_list, dir_path):\n", 101 | " if not os.path.exists(dir_path):\n", 102 | " os.makedirs(dir_path)\n", 103 | " \n", 104 | " for idx, s in enumerate(song_list):\n", 105 | " sys.stdout.write('%d/%d - total: %d\\n' % (idx, len(song_list), self.count))\n", 106 | " sys.stdout.flush()\n", 107 | " midi_url = url + s['filename']\n", 108 | " content = self._request_url(midi_url, doctype='content')\n", 109 | " fn = os.path.join(dir_path, s['filename'])\n", 110 | " with open(fn, \"wb\") as f:\n", 111 | " f.write(content)\n", 112 | " \n", 113 | " self.count += 1\n", 114 | " \n", 115 | " def crawl_archive(self):\n", 116 | " dir_list = self.fetch_dirs(self.BASE_URL)\n", 117 | " \n", 118 | " if not os.path.exists(self.archive_dir,):\n", 119 | " os.makedirs(self.archive_dir,)\n", 120 | " \n", 121 | " info = dict()\n", 122 | " for d in dir_list:\n", 123 | " root_dir = d.strip('/')\n", 124 | " print('{{%s}}' % root_dir)\n", 125 | " subdir_url = self.BASE_URL + d\n", 126 | " subdir_list = self.fetch_dirs(subdir_url)\n", 127 | " \n", 128 | " tmp_dict = dict()\n", 129 | " for sd in subdir_list:\n", 130 | " root_subdir = sd.strip('/')\n", 131 | " print('[%s]'%root_subdir)\n", 132 | " page_url = subdir_url + sd\n", 133 | " sl = self.fetch_songs(page_url)\n", 134 | " if sl:\n", 135 | " self.crawl_songs(page_url, sl, os.path.join(self.archive_dir, root_dir, root_subdir))\n", 136 | " tmp_dict[sd] = sl\n", 137 | "\n", 138 | " info[d] = tmp_dict\n", 139 | " \n", 140 | " with open(os.path.join(self.archive_dir,'archive.json'), \"w\") as f:\n", 141 | " json.dump(info, f) \n", 142 | " \n", 143 | " def run(self): \n", 144 | " s = time.time()\n", 145 | " self.crawl_archive()\n", 146 | " e = time.time()\n", 147 | " self._log_print(time.strftime(\"\\nElapsed time: %H:%M:%S\", time.gmtime(s-e)))\n", 148 | " self._log_print('Total %d Songs'&self.count)\n", 149 | " \n" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "collapsed": true 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "if __name__ == '__main__':\n", 161 | " vc = VGMCrawler()\n", 162 | " vc.run()" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 191, 168 | "metadata": { 169 | "collapsed": false 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "vc = VGMCrawler()\n", 174 | "vc.fetch_songs('https://www.vgmusic.com/music/console/magnavox/odyssey/')" 175 | ] 176 | } 177 | ], 178 | "metadata": { 179 | "kernelspec": { 180 | "display_name": "Python [mir]", 181 | "language": "python", 182 | "name": "Python [mir]" 183 | }, 184 | "language_info": { 185 | "codemirror_mode": { 186 | "name": "ipython", 187 | "version": 3 188 | }, 189 | "file_extension": ".py", 190 | "mimetype": "text/x-python", 191 | "name": "python", 192 | "nbconvert_exporter": "python", 193 | "pygments_lexer": "ipython3", 194 | "version": "3.5.2" 195 | } 196 | }, 197 | "nbformat": 4, 198 | "nbformat_minor": 0 199 | } 200 | -------------------------------------------------------------------------------- /hymnal/utils/hymnCrawler.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import requests\n", 12 | "from bs4 import BeautifulSoup\n", 13 | "import os\n", 14 | "import time\n", 15 | "import json\n", 16 | "import string\n", 17 | "import random\n", 18 | "from lxml import etree" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "class HymnCraler():\n", 30 | " BASE_URL = 'https://www.hymnal.net'\n", 31 | " \n", 32 | " def __init__(self, sleep_time = 0.1, log=True):\n", 33 | " self.sleep_time =sleep_time\n", 34 | " self.meta_category = {'classic': self.BASE_URL + '/en/song-index/h', \n", 35 | " 'new_tunes': self.BASE_URL + '/en/song-index/nt', \n", 36 | " 'new_songs': self.BASE_URL + '/en/song-index/ns', \n", 37 | " 'children': self.BASE_URL + '/en/song-index/c'}\n", 38 | " \n", 39 | " self.log = log\n", 40 | " self.metadata = None \n", 41 | "\n", 42 | " def _request_url(self, url, doctype='html'):\n", 43 | " response = requests.get(url)\n", 44 | " if doctype =='html':\n", 45 | " soup = BeautifulSoup(response.text, 'html.parser')\n", 46 | " return soup\n", 47 | " elif doctype =='content':\n", 48 | " return response.content\n", 49 | " else:\n", 50 | " pass\n", 51 | "\n", 52 | " def _log_print(self, log):\n", 53 | " print(log)\n", 54 | " if self.log:\n", 55 | " with open(\"log.txt\", \"a\") as f:\n", 56 | " print(log, file=f)\n", 57 | "\n", 58 | " def fetch_page_list(self, url):\n", 59 | " soup = self._request_url(url)\n", 60 | " tag_list = soup.find_all('div', {'class':'list-group'})[0].find_all('a', {'class':'list-group-item'})\n", 61 | " return [t['href'] for t in tag_list]\n", 62 | "\n", 63 | " def fetch_category_list(self, url):\n", 64 | " song_list = []\n", 65 | " soup = self._request_url(url)\n", 66 | " try:\n", 67 | " tag_list = soup.find_all('div', {'class':'letters'})[0].findAll('a')\n", 68 | " alphabet_list = [t.text for t in tag_list]\n", 69 | " for ch in alphabet_list:\n", 70 | " page_url = url+ '/' + ch\n", 71 | " self._log_print(page_url)\n", 72 | " song_list.extend(self.fetch_page_list(page_url))\n", 73 | " except:\n", 74 | " self._log_print(url)\n", 75 | " song_list.extend(self.fetch_page_list(url))\n", 76 | " return song_list\n", 77 | "\n", 78 | " def fetch_song(self, url, song_dir):\n", 79 | "\n", 80 | " soup = self._request_url(url)\n", 81 | " \n", 82 | " # (url, extension, filename)\n", 83 | " data_list = [('/f=mid', '.mid', 'all'), ('/f=mp3', '.mp3', 'audio'), ('/f=tune', '.mid', 'melody'),\n", 84 | " ('/f=ppdf', '.pdf', 'ls_paino'), ('/f=pdf', '.pdf','ls_guitar'), ('/f=gtpdf', '.pdf', 'ls_text')]\n", 85 | "\n", 86 | " # save download files\n", 87 | " for d in data_list:\n", 88 | " r = requests.get(url+ d[0])\n", 89 | "\n", 90 | " if song_dir:\n", 91 | " with open(os.path.join(song_dir,d[2] + d[1]), 'wb') as f:\n", 92 | " f.write(r.content)\n", 93 | "\n", 94 | " # metadata\n", 95 | " content_list = [] \n", 96 | " tag_list = soup.find_all('div', {'class':'row common-panel'})[0].find_all('div', {'class':'col-xs-7 col-sm-8 no-padding'})\n", 97 | "\n", 98 | " for t in tag_list:\n", 99 | " content_list.append(t.text.strip())\n", 100 | " label_list = []\n", 101 | " tag_list = soup.find_all('div', {'class':'row common-panel'})[0].find_all('label', {'class':'col-xs-5 col-sm-4'})\n", 102 | " for t in tag_list:\n", 103 | " label_list.append(t.text.replace(':',''))\n", 104 | " metadata = dict(zip(label_list, content_list))\n", 105 | " \n", 106 | " # title\n", 107 | " title = soup.find('h1', {'class':\"text-center\"}).text.strip()\n", 108 | " metadata['title'] = title\n", 109 | " \n", 110 | " # lyric table\n", 111 | " lyric_xml = soup.find_all('div', {'class':'col-xs-12 lyrics'})[0].find('table')\n", 112 | "\n", 113 | " if song_dir:\n", 114 | " with open( os.path.join(song_dir, 'song_metadata.json'), \"w\") as f:\n", 115 | " json.dump(metadata , f)\n", 116 | "\n", 117 | " with open( os.path.join(song_dir, 'lyric.xml'), \"w\", encoding='utf-8') as f:\n", 118 | " f.write(str(lyric_xml))\n", 119 | "\n", 120 | " return lyric_xml, metadata\n", 121 | "\n", 122 | " def craw_archive(self, archive_dir='archive'):\n", 123 | " metadata = dict()\n", 124 | " for k in self.meta_category.keys():\n", 125 | " category_url = self.meta_category[k]\n", 126 | " metadata[k] = self.fetch_category_list(category_url)\n", 127 | "\n", 128 | " # saving\n", 129 | " if archive_dir:\n", 130 | " if not os.path.exists(archive_dir):\n", 131 | " os.makedirs(archive_dir)\n", 132 | "\n", 133 | " with open(os.path.join(archive_dir, 'archive_metadata.json'), \"w\") as f:\n", 134 | " json.dump(metadata , f)\n", 135 | "\n", 136 | " return metadata\n", 137 | "\n", 138 | " def craw_songs(self, metadata, archive_dir='archive'):\n", 139 | " count = 0\n", 140 | " count_success = 0\n", 141 | " for k in list(metadata):\n", 142 | " self._log_print('> %s'%k)\n", 143 | " category_dir = os.path.join(archive_dir, k)\n", 144 | " if not os.path.exists(category_dir):\n", 145 | " os.makedirs(category_dir)\n", 146 | "\n", 147 | " song_list = metadata[k]\n", 148 | "\n", 149 | " numOfSongs = len(song_list)\n", 150 | " for i in range(numOfSongs):\n", 151 | " song_url = self.BASE_URL + song_list[i]\n", 152 | " song_id = song_url.split('/')[-1]\n", 153 | " self._log_print(' (%d/%d) %s'%(i+1, numOfSongs, song_url))\n", 154 | " song_dir = os.path.join(category_dir, song_id)\n", 155 | "\n", 156 | " if not os.path.exists(song_dir):\n", 157 | " os.makedirs(song_dir)\n", 158 | " \n", 159 | " try:\n", 160 | " self.fetch_song(song_url, song_dir)\n", 161 | " metadata['err'] = False\n", 162 | " count_success += 1\n", 163 | " except:\n", 164 | " self._log_print('error!!')\n", 165 | " metadata['err'] = True\n", 166 | " \n", 167 | " count += 1\n", 168 | " self._log_print('total: %d songs'%count)\n", 169 | " \n", 170 | " return metadata\n", 171 | " \n", 172 | " def reload(self, archive_dir='archive'):\n", 173 | " with open(os.path.join(archive_dir, 'archive_metadata.json'), \"r\") as f:\n", 174 | " self.metadata =json.load(f)\n", 175 | " \n", 176 | " def run(self, archive_dir='archive', reload=False): \n", 177 | " \n", 178 | " self._log_print(\"=================================================\")\n", 179 | " \n", 180 | " if not reload:\n", 181 | " self.metadata = self.craw_archive(archive_dir=archive_dir)\n", 182 | " else:\n", 183 | " self.reload(archive_dir=archive_dir)\n", 184 | "\n", 185 | " self.metadata = self.craw_songs(self.metadata, archive_dir=archive_dir)\n", 186 | " \n", 187 | " with open(os.path.join(archive_dir, 'archive_metadata.json'), \"w\") as f:\n", 188 | " json.dump(self.metadata, f)\n", 189 | " \n" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "collapsed": false 197 | }, 198 | "outputs": [ 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | "> new_songs\n", 204 | " (1/1070) https://www.hymnal.net/en/hymn/ns/528\n", 205 | " (2/1070) https://www.hymnal.net/en/hymn/ns/595\n", 206 | " (3/1070) https://www.hymnal.net/en/hymn/ns/584\n", 207 | " (4/1070) https://www.hymnal.net/en/hymn/ns/524\n", 208 | " (5/1070) https://www.hymnal.net/en/hymn/ns/501\n", 209 | " (6/1070) https://www.hymnal.net/en/hymn/lb/27\n", 210 | " (7/1070) https://www.hymnal.net/en/hymn/ns/550\n", 211 | " (8/1070) https://www.hymnal.net/en/hymn/ns/378\n", 212 | " (9/1070) https://www.hymnal.net/en/hymn/ns/309\n", 213 | "error!!\n", 214 | " (10/1070) https://www.hymnal.net/en/hymn/ns/398\n" 215 | ] 216 | } 217 | ], 218 | "source": [ 219 | "if __name__ == '__main__':\n", 220 | " \n", 221 | " hc = HymnCraler()\n", 222 | " \n", 223 | " s = time.time()\n", 224 | " hc.run(reload=True)\n", 225 | " e = time.time()\n", 226 | " time.strftime(\"\\nElapsed time: %H:%M:%S\", time.gmtime(s-e))\n", 227 | " \n" 228 | ] 229 | } 230 | ], 231 | "metadata": { 232 | "anaconda-cloud": {}, 233 | "kernelspec": { 234 | "display_name": "Python [mir]", 235 | "language": "python", 236 | "name": "Python [mir]" 237 | }, 238 | "language_info": { 239 | "codemirror_mode": { 240 | "name": "ipython", 241 | "version": 3 242 | }, 243 | "file_extension": ".py", 244 | "mimetype": "text/x-python", 245 | "name": "python", 246 | "nbconvert_exporter": "python", 247 | "pygments_lexer": "ipython3", 248 | "version": "3.5.2" 249 | } 250 | }, 251 | "nbformat": 4, 252 | "nbformat_minor": 0 253 | } 254 | -------------------------------------------------------------------------------- /theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/verse.xml: -------------------------------------------------------------------------------- 1 | 2 | 1.2 3 | 4 | 5 | 4 pils 2 6 | 4 7 | 121 8 | C 9 | -I_zKOfTKIM 10 | 6 11 | 12 | 13 | 14 | Piano 15 | 0.8 16 | 0 17 | false 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | Piano 1/4s 26 | 0.8 27 | 0 28 | false 29 | 30 | 31 | 32 | 33 | Piano Bass Dotted 34 | 0.8 35 | 0 36 | false 37 | 38 | 39 | 40 | 41 | 30.48 42 | 19.92 43 | 1.99 44 | 17.93 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 0 54 | 1 55 | 1 56 | 1 57 | rest 58 | 0 59 | 1 60 | 61 | 62 | 1 63 | 1 64 | 2 65 | 0.5 66 | 1 67 | 0 68 | 0 69 | 70 | 71 | 1.5 72 | 1 73 | 2.5 74 | 0.5 75 | 1 76 | 0 77 | 0 78 | 79 | 80 | 2 81 | 1 82 | 3 83 | 1 84 | 1 85 | 0 86 | 0 87 | 88 | 89 | 3 90 | 1 91 | 4 92 | 0.5 93 | 2 94 | 0 95 | 0 96 | 97 | 98 | 3.5 99 | 1 100 | 4.5 101 | 0.5 102 | 3 103 | 0 104 | 0 105 | 106 | 107 | 4 108 | 2 109 | 1 110 | 0.5 111 | 4 112 | 0 113 | 0 114 | 115 | 116 | 4.5 117 | 2 118 | 1.5 119 | 1 120 | 2 121 | 0 122 | 0 123 | 124 | 125 | 5.5 126 | 2 127 | 2.5 128 | 1 129 | rest 130 | 0 131 | 1 132 | 133 | 134 | 6.5 135 | 2 136 | 3.5 137 | 1 138 | rest 139 | 0 140 | 1 141 | 142 | 143 | 7.5 144 | 2 145 | 4.5 146 | 0.5 147 | rest 148 | 0 149 | 1 150 | 151 | 152 | 8 153 | 3 154 | 1 155 | 0.5 156 | rest 157 | 0 158 | 1 159 | 160 | 161 | 8.5 162 | 3 163 | 1.5 164 | 0.5 165 | 2 166 | 0 167 | 0 168 | 169 | 170 | 9 171 | 3 172 | 2 173 | 0.5 174 | 2 175 | 0 176 | 0 177 | 178 | 179 | 9.5 180 | 3 181 | 2.5 182 | 0.5 183 | 2 184 | 0 185 | 0 186 | 187 | 188 | 10 189 | 3 190 | 3 191 | 0.5 192 | 2 193 | 0 194 | 0 195 | 196 | 197 | 10.5 198 | 3 199 | 3.5 200 | 0.5 201 | 2 202 | 0 203 | 0 204 | 205 | 206 | 11 207 | 3 208 | 4 209 | 0.5 210 | 3 211 | 0 212 | 0 213 | 214 | 215 | 11.5 216 | 3 217 | 4.5 218 | 0.5 219 | 2 220 | 0 221 | 0 222 | 223 | 224 | 12 225 | 4 226 | 1 227 | 1 228 | 1 229 | 0 230 | 0 231 | 232 | 233 | 13 234 | 4 235 | 2 236 | 1 237 | rest 238 | 0 239 | 1 240 | 241 | 242 | 14 243 | 4 244 | 3 245 | 1 246 | rest 247 | 0 248 | 1 249 | 250 | 251 | 15 252 | 4 253 | 4 254 | 1 255 | rest 256 | 0 257 | 1 258 | 259 | 260 | 16 261 | 5 262 | 1 263 | 1 264 | rest 265 | 0 266 | 1 267 | 268 | 269 | 17 270 | 5 271 | 2 272 | 0.5 273 | 1 274 | 0 275 | 0 276 | 277 | 278 | 17.5 279 | 5 280 | 2.5 281 | 0.5 282 | 1 283 | 0 284 | 0 285 | 286 | 287 | 18 288 | 5 289 | 3 290 | 0.5 291 | 1 292 | 0 293 | 0 294 | 295 | 296 | 18.5 297 | 5 298 | 3.5 299 | 0.5 300 | 1 301 | 0 302 | 0 303 | 304 | 305 | 19 306 | 5 307 | 4 308 | 0.5 309 | 2 310 | 0 311 | 0 312 | 313 | 314 | 19.5 315 | 5 316 | 4.5 317 | 0.5 318 | 3 319 | 0 320 | 0 321 | 322 | 323 | 20 324 | 6 325 | 1 326 | 0.5 327 | 4 328 | 0 329 | 0 330 | 331 | 332 | 20.5 333 | 6 334 | 1.5 335 | 0.5 336 | 2 337 | 0 338 | 0 339 | 340 | 341 | 21 342 | 6 343 | 2 344 | 1 345 | rest 346 | 0 347 | 1 348 | 349 | 350 | 22 351 | 6 352 | 3 353 | 1 354 | rest 355 | 0 356 | 1 357 | 358 | 359 | 23 360 | 6 361 | 4 362 | 0.5 363 | rest 364 | 0 365 | 1 366 | 367 | 368 | 23.5 369 | 6 370 | 4.5 371 | 0.5 372 | 3 373 | 0 374 | 0 375 | 376 | 377 | 24 378 | 7 379 | 1 380 | 0.5 381 | 3 382 | 0 383 | 0 384 | 385 | 386 | 24.5 387 | 7 388 | 1.5 389 | 0.5 390 | 3 391 | 0 392 | 0 393 | 394 | 395 | 25 396 | 7 397 | 2 398 | 1 399 | rest 400 | 0 401 | 1 402 | 403 | 404 | 26 405 | 7 406 | 3 407 | 0.5 408 | rest 409 | 0 410 | 1 411 | 412 | 413 | 26.5 414 | 7 415 | 3.5 416 | 0.5 417 | 5 418 | 0 419 | 0 420 | 421 | 422 | 27 423 | 7 424 | 4 425 | 0.5 426 | 5 427 | 0 428 | 0 429 | 430 | 431 | 27.5 432 | 7 433 | 4.5 434 | 0.5 435 | 3 436 | 0 437 | 0 438 | 439 | 440 | 28 441 | 8 442 | 1 443 | 1 444 | 4 445 | 0 446 | 0 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 1 466 | 467 | 468 | 469 | 470 | 471 | 472 | 4 473 | 1 474 | 1 475 | 0 476 | 0 477 | 478 | 479 | 7 480 | 481 | 482 | 483 | 484 | 485 | 486 | 4 487 | 2 488 | 1 489 | 4 490 | 0 491 | 492 | 493 | 7 494 | 495 | 496 | 497 | 498 | 499 | 500 | 4 501 | 3 502 | 1 503 | 8 504 | 0 505 | 506 | 507 | 1 508 | 509 | 510 | 511 | 512 | 513 | 514 | 4 515 | 4 516 | 1 517 | 12 518 | 0 519 | 520 | 521 | 1 522 | 523 | 524 | 525 | 526 | 527 | 528 | 4 529 | 5 530 | 1 531 | 16 532 | 0 533 | 534 | 535 | 7 536 | 537 | 538 | 539 | 540 | 541 | 542 | 4 543 | 6 544 | 1 545 | 20 546 | 0 547 | 548 | 549 | 3 550 | 551 | 552 | 553 | 554 | 555 | 556 | 4 557 | 7 558 | 1 559 | 24 560 | 0 561 | 562 | 563 | 7 564 | 565 | 566 | 567 | 568 | 569 | 570 | 2 571 | 8 572 | 1 573 | 28 574 | 0 575 | 576 | 577 | 5 578 | 579 | 580 | 581 | 582 | 583 | 0 584 | 2 585 | 8 586 | 3 587 | 30 588 | 0 589 | 590 | 591 | 8 592 | 593 | 594 | -------------------------------------------------------------------------------- /theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/intro.xml: -------------------------------------------------------------------------------- 1 | 2 | 1.2 3 | 4 | 5 | 4 pils intro 6 | 4 7 | 124 8 | C 9 | -I_zKOfTKIM 10 | 6 11 | 12 | 13 | 14 | Piano 15 | 0.8 16 | 0 17 | false 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | Piano 1/4s 26 | 0.8 27 | 0 28 | false 29 | 30 | 31 | 32 | 33 | Piano Bass Dotted 34 | 0.8 35 | 0 36 | false 37 | 38 | 39 | 40 | 41 | 15.14 42 | 19.4 43 | 1.94 44 | 17.46 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 0 54 | 1 55 | 1 56 | 0.5 57 | 3 58 | 0 59 | 0 60 | 61 | 62 | 0.5 63 | 1 64 | 1.5 65 | 0.5 66 | rest 67 | 0 68 | 1 69 | 70 | 71 | 1 72 | 1 73 | 2 74 | 0.5 75 | 1 76 | 0 77 | 0 78 | 79 | 80 | 1.5 81 | 1 82 | 2.5 83 | 0.5 84 | 1 85 | 0 86 | 0 87 | 88 | 89 | 2 90 | 1 91 | 3 92 | 2 93 | rest 94 | 0 95 | 1 96 | 97 | 98 | 4 99 | 2 100 | 1 101 | 0.5 102 | 4 103 | 0 104 | 0 105 | 106 | 107 | 4.5 108 | 2 109 | 1.5 110 | 0.5 111 | rest 112 | 0 113 | 1 114 | 115 | 116 | 5 117 | 2 118 | 2 119 | 0.5 120 | 2 121 | 0 122 | 0 123 | 124 | 125 | 5.5 126 | 2 127 | 2.5 128 | 0.5 129 | 2 130 | 0 131 | 0 132 | 133 | 134 | 6 135 | 2 136 | 3 137 | 2 138 | rest 139 | 0 140 | 1 141 | 142 | 143 | 8 144 | 3 145 | 1 146 | 0.5 147 | 5 148 | 0 149 | 0 150 | 151 | 152 | 8.5 153 | 3 154 | 1.5 155 | 0.5 156 | rest 157 | 0 158 | 1 159 | 160 | 161 | 9 162 | 3 163 | 2 164 | 0.5 165 | 3 166 | 0 167 | 0 168 | 169 | 170 | 9.5 171 | 3 172 | 2.5 173 | 0.5 174 | 3 175 | 0 176 | 0 177 | 178 | 179 | 10 180 | 3 181 | 3 182 | 2 183 | rest 184 | 0 185 | 1 186 | 187 | 188 | 12 189 | 4 190 | 1 191 | 0.5 192 | 4 193 | 0 194 | 0 195 | 196 | 197 | 12.5 198 | 4 199 | 1.5 200 | 0.5 201 | rest 202 | 0 203 | 1 204 | 205 | 206 | 13 207 | 4 208 | 2 209 | 0.5 210 | 2 211 | 0 212 | 0 213 | 214 | 215 | 13.5 216 | 4 217 | 2.5 218 | 0.5 219 | 2 220 | 0 221 | 0 222 | 223 | 224 | 14 225 | 4 226 | 3 227 | 0.5 228 | rest 229 | 0 230 | 1 231 | 232 | 233 | 14.5 234 | 4 235 | 3.5 236 | 0.5 237 | rest 238 | 0 239 | 1 240 | 241 | 242 | 15 243 | 4 244 | 4 245 | 0.5 246 | rest 247 | 0 248 | 1 249 | 250 | 251 | 15.5 252 | 4 253 | 4.5 254 | 0.5 255 | rest 256 | 0 257 | 1 258 | 259 | 260 | 16 261 | 5 262 | 1 263 | 0.5 264 | 3 265 | 0 266 | 0 267 | 268 | 269 | 16.5 270 | 5 271 | 1.5 272 | 0.5 273 | rest 274 | 0 275 | 1 276 | 277 | 278 | 17 279 | 5 280 | 2 281 | 0.5 282 | 1 283 | 0 284 | 0 285 | 286 | 287 | 17.5 288 | 5 289 | 2.5 290 | 0.5 291 | 1 292 | 0 293 | 0 294 | 295 | 296 | 18 297 | 5 298 | 3 299 | 1 300 | rest 301 | 0 302 | 1 303 | 304 | 305 | 19 306 | 5 307 | 4 308 | 1 309 | rest 310 | 0 311 | 1 312 | 313 | 314 | 20 315 | 6 316 | 1 317 | 0.5 318 | 4 319 | 0 320 | 0 321 | 322 | 323 | 20.5 324 | 6 325 | 1.5 326 | 0.5 327 | rest 328 | 0 329 | 1 330 | 331 | 332 | 21 333 | 6 334 | 2 335 | 0.5 336 | 2 337 | 0 338 | 0 339 | 340 | 341 | 21.5 342 | 6 343 | 2.5 344 | 0.5 345 | 2 346 | 0 347 | 0 348 | 349 | 350 | 22 351 | 6 352 | 3 353 | 1 354 | rest 355 | 0 356 | 1 357 | 358 | 359 | 23 360 | 6 361 | 4 362 | 1 363 | rest 364 | 0 365 | 1 366 | 367 | 368 | 24 369 | 7 370 | 1 371 | 0.5 372 | 5 373 | 0 374 | 0 375 | 376 | 377 | 24.5 378 | 7 379 | 1.5 380 | 0.5 381 | rest 382 | 0 383 | 1 384 | 385 | 386 | 25 387 | 7 388 | 2 389 | 0.5 390 | 3 391 | 0 392 | 0 393 | 394 | 395 | 25.5 396 | 7 397 | 2.5 398 | 0.5 399 | 3 400 | 0 401 | 0 402 | 403 | 404 | 26 405 | 7 406 | 3 407 | 1 408 | rest 409 | 0 410 | 1 411 | 412 | 413 | 27 414 | 7 415 | 4 416 | 1 417 | rest 418 | 0 419 | 1 420 | 421 | 422 | 28 423 | 8 424 | 1 425 | 0.5 426 | 4 427 | 0 428 | 0 429 | 430 | 431 | 28.5 432 | 8 433 | 1.5 434 | 0.5 435 | rest 436 | 0 437 | 1 438 | 439 | 440 | 29 441 | 8 442 | 2 443 | 0.5 444 | 2 445 | 0 446 | 0 447 | 448 | 449 | 29.5 450 | 8 451 | 2.5 452 | 0.5 453 | 2 454 | 0 455 | 0 456 | 457 | 458 | 30 459 | 8 460 | 3 461 | 2 462 | rest 463 | 0 464 | 1 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 1 484 | 485 | 486 | 487 | 488 | 489 | 490 | 4 491 | 1 492 | 1 493 | 0 494 | 0 495 | 496 | 497 | 7 498 | 499 | 500 | 501 | 502 | 503 | 504 | 4 505 | 2 506 | 1 507 | 4 508 | 0 509 | 510 | 511 | 3 512 | 513 | 514 | 515 | 516 | 517 | 518 | 4 519 | 3 520 | 1 521 | 8 522 | 0 523 | 524 | 525 | 7 526 | 527 | 528 | 529 | 530 | 531 | 532 | 2 533 | 4 534 | 1 535 | 12 536 | 0 537 | 538 | 539 | 5 540 | 541 | 542 | 543 | 544 | 545 | 0 546 | 2 547 | 4 548 | 3 549 | 14 550 | 0 551 | 552 | 553 | 1 554 | 555 | 556 | 557 | 558 | 559 | 560 | 4 561 | 5 562 | 1 563 | 16 564 | 0 565 | 566 | 567 | 7 568 | 569 | 570 | 571 | 572 | 573 | 574 | 4 575 | 6 576 | 1 577 | 20 578 | 0 579 | 580 | 581 | 3 582 | 583 | 584 | 585 | 586 | 587 | 588 | 4 589 | 7 590 | 1 591 | 24 592 | 0 593 | 594 | 595 | 7 596 | 597 | 598 | 599 | 600 | 601 | 602 | 2 603 | 8 604 | 1 605 | 28 606 | 0 607 | 608 | 609 | 5 610 | 611 | 612 | 613 | 614 | 615 | 0 616 | 2 617 | 8 618 | 3 619 | 30 620 | 0 621 | 622 | 623 | 8 624 | 625 | 626 | -------------------------------------------------------------------------------- /theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/chorus.xml: -------------------------------------------------------------------------------- 1 | 2 | 1.2 3 | 4 | 5 | 4 pils r 6 | 4 7 | 121 8 | C 9 | -I_zKOfTKIM 10 | 6 11 | 12 | 13 | 14 | Piano 15 | 0.8 16 | 0 17 | false 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | Piano 1/4s 26 | 0.8 27 | 0 28 | false 29 | 30 | 31 | 32 | 33 | Piano Bass Dotted 34 | 0.8 35 | 0 36 | false 37 | 38 | 39 | 40 | 41 | 46.82 42 | 19.83 43 | 1.98 44 | 17.84 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 0 54 | 1 55 | 1 56 | 1 57 | rest 58 | 0 59 | 1 60 | 61 | 62 | 1 63 | 1 64 | 2 65 | 0.5 66 | 1 67 | 0 68 | 0 69 | 70 | 71 | 1.5 72 | 1 73 | 2.5 74 | 0.5 75 | 1 76 | 0 77 | 0 78 | 79 | 80 | 2 81 | 1 82 | 3 83 | 1 84 | 1 85 | 0 86 | 0 87 | 88 | 89 | 3 90 | 1 91 | 4 92 | 0.5 93 | 2 94 | 0 95 | 0 96 | 97 | 98 | 3.5 99 | 1 100 | 4.5 101 | 0.5 102 | 3 103 | 0 104 | 0 105 | 106 | 107 | 4 108 | 2 109 | 1 110 | 0.5 111 | 4 112 | 0 113 | 0 114 | 115 | 116 | 4.5 117 | 2 118 | 1.5 119 | 0.5 120 | 2 121 | 0 122 | 0 123 | 124 | 125 | 5 126 | 2 127 | 2 128 | 0.5 129 | rest 130 | 0 131 | 1 132 | 133 | 134 | 5.5 135 | 2 136 | 2.5 137 | 0.5 138 | rest 139 | 0 140 | 1 141 | 142 | 143 | 6 144 | 2 145 | 3 146 | 0.5 147 | rest 148 | 0 149 | 1 150 | 151 | 152 | 6.5 153 | 2 154 | 3.5 155 | 0.5 156 | rest 157 | 0 158 | 1 159 | 160 | 161 | 7 162 | 2 163 | 4 164 | 0.5 165 | rest 166 | 0 167 | 1 168 | 169 | 170 | 7.5 171 | 2 172 | 4.5 173 | 0.5 174 | rest 175 | 0 176 | 1 177 | 178 | 179 | 8 180 | 3 181 | 1 182 | 0.5 183 | rest 184 | 0 185 | 1 186 | 187 | 188 | 8.5 189 | 3 190 | 1.5 191 | 0.5 192 | 2 193 | 0 194 | 0 195 | 196 | 197 | 9 198 | 3 199 | 2 200 | 0.5 201 | 2 202 | 0 203 | 0 204 | 205 | 206 | 9.5 207 | 3 208 | 2.5 209 | 0.5 210 | 2 211 | 0 212 | 0 213 | 214 | 215 | 10 216 | 3 217 | 3 218 | 0.5 219 | 2 220 | 0 221 | 0 222 | 223 | 224 | 10.5 225 | 3 226 | 3.5 227 | 0.5 228 | 2 229 | 0 230 | 0 231 | 232 | 233 | 11 234 | 3 235 | 4 236 | 0.5 237 | 3 238 | 0 239 | 0 240 | 241 | 242 | 11.5 243 | 3 244 | 4.5 245 | 0.5 246 | 2 247 | 0 248 | 0 249 | 250 | 251 | 12 252 | 4 253 | 1 254 | 0.5 255 | 1 256 | 0 257 | 0 258 | 259 | 260 | 12.5 261 | 4 262 | 1.5 263 | 0.5 264 | rest 265 | 0 266 | 1 267 | 268 | 269 | 13 270 | 4 271 | 2 272 | 0.5 273 | rest 274 | 0 275 | 1 276 | 277 | 278 | 13.5 279 | 4 280 | 2.5 281 | 0.5 282 | rest 283 | 0 284 | 1 285 | 286 | 287 | 14 288 | 4 289 | 3 290 | 0.5 291 | rest 292 | 0 293 | 1 294 | 295 | 296 | 14.5 297 | 4 298 | 3.5 299 | 0.5 300 | rest 301 | 0 302 | 1 303 | 304 | 305 | 15 306 | 4 307 | 4 308 | 0.5 309 | rest 310 | 0 311 | 1 312 | 313 | 314 | 15.5 315 | 4 316 | 4.5 317 | 0.5 318 | rest 319 | 0 320 | 1 321 | 322 | 323 | 16 324 | 5 325 | 1 326 | 0.5 327 | rest 328 | 0 329 | 1 330 | 331 | 332 | 16.5 333 | 5 334 | 1.5 335 | 0.5 336 | 5 337 | 0 338 | 0 339 | 340 | 341 | 17 342 | 5 343 | 2 344 | 0.5 345 | 5 346 | 0 347 | 0 348 | 349 | 350 | 17.5 351 | 5 352 | 2.5 353 | 0.5 354 | 5 355 | 0 356 | 0 357 | 358 | 359 | 18 360 | 5 361 | 3 362 | 0.5 363 | 5 364 | 0 365 | 0 366 | 367 | 368 | 18.5 369 | 5 370 | 3.5 371 | 0.5 372 | 5 373 | 0 374 | 0 375 | 376 | 377 | 19 378 | 5 379 | 4 380 | 0.5 381 | 5 382 | 0 383 | 0 384 | 385 | 386 | 19.5 387 | 5 388 | 4.5 389 | 0.5 390 | 5 391 | 0 392 | 0 393 | 394 | 395 | 20 396 | 6 397 | 1 398 | 0.5 399 | 4 400 | 0 401 | 0 402 | 403 | 404 | 20.5 405 | 6 406 | 1.5 407 | 0.5 408 | 2 409 | 0 410 | 0 411 | 412 | 413 | 21 414 | 6 415 | 2 416 | 0.5 417 | rest 418 | 0 419 | 1 420 | 421 | 422 | 21.5 423 | 6 424 | 2.5 425 | 0.5 426 | rest 427 | 0 428 | 1 429 | 430 | 431 | 22 432 | 6 433 | 3 434 | 0.5 435 | rest 436 | 0 437 | 1 438 | 439 | 440 | 22.5 441 | 6 442 | 3.5 443 | 0.5 444 | rest 445 | 0 446 | 1 447 | 448 | 449 | 23 450 | 6 451 | 4 452 | 0.5 453 | rest 454 | 0 455 | 1 456 | 457 | 458 | 23.5 459 | 6 460 | 4.5 461 | 0.5 462 | rest 463 | 0 464 | 1 465 | 466 | 467 | 24 468 | 7 469 | 1 470 | 0.5 471 | rest 472 | 0 473 | 1 474 | 475 | 476 | 24.5 477 | 7 478 | 1.5 479 | 0.5 480 | 3 481 | 0 482 | 0 483 | 484 | 485 | 25 486 | 7 487 | 2 488 | 0.5 489 | 3 490 | 0 491 | 0 492 | 493 | 494 | 25.5 495 | 7 496 | 2.5 497 | 0.5 498 | 4 499 | 0 500 | 0 501 | 502 | 503 | 26 504 | 7 505 | 3 506 | 1 507 | 5 508 | 0 509 | 0 510 | 511 | 512 | 27 513 | 7 514 | 4 515 | 1 516 | 7 517 | -1 518 | 0 519 | 520 | 521 | 28 522 | 8 523 | 1 524 | 1 525 | 4 526 | 0 527 | 0 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 1 547 | 548 | 549 | 550 | 551 | 552 | 553 | 4 554 | 1 555 | 1 556 | 0 557 | 0 558 | 559 | 560 | 7 561 | 562 | 563 | 564 | 565 | 566 | 567 | 4 568 | 2 569 | 1 570 | 4 571 | 0 572 | 573 | 574 | 7 575 | 576 | 577 | 578 | 579 | 580 | 581 | 4 582 | 3 583 | 1 584 | 8 585 | 0 586 | 587 | 588 | 1 589 | 590 | 591 | 592 | 593 | 594 | 595 | 2 596 | 4 597 | 1 598 | 12 599 | 0 600 | 601 | 602 | 3 603 | 604 | 605 | 606 | 607 | 608 | 609 | 1 610 | 4 611 | 3 612 | 14 613 | 0 614 | 615 | 616 | 2 617 | 618 | 619 | 620 | 621 | 622 | 0 623 | 1 624 | 4 625 | 4 626 | 15 627 | 0 628 | 629 | 630 | 1 631 | 632 | 633 | 634 | 635 | 636 | 637 | 4 638 | 5 639 | 1 640 | 16 641 | 0 642 | 643 | 644 | 7 645 | 646 | 647 | 648 | 649 | 650 | 651 | 4 652 | 6 653 | 1 654 | 20 655 | 0 656 | 657 | 658 | 3 659 | 660 | 661 | 662 | 663 | 664 | 665 | 4 666 | 7 667 | 1 668 | 24 669 | 0 670 | 671 | 672 | 7 673 | 674 | 675 | 676 | 677 | 678 | 679 | 2 680 | 8 681 | 1 682 | 28 683 | 0 684 | 685 | 686 | 5 687 | 688 | 689 | 690 | 691 | 692 | 0 693 | 2 694 | 8 695 | 3 696 | 30 697 | 0 698 | 699 | 700 | 8 701 | 702 | 703 | -------------------------------------------------------------------------------- /doug-mckenzie-jazz/codes/DMjazzCrawler.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 76, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import requests\n", 12 | "from bs4 import BeautifulSoup\n", 13 | "import os\n", 14 | "import sys\n", 15 | "import time\n", 16 | "import json\n", 17 | "import re" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 153, 23 | "metadata": { 24 | "collapsed": false 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "class DMjazzCrawler():\n", 29 | " BASE_URL = 'http://www.bushgrafts.com/jazz'\n", 30 | " ROOT = 'archive'\n", 31 | " \n", 32 | " def __init__(self, sleep_time=0.1, log=True):\n", 33 | " self.sleep_time = sleep_time\n", 34 | " self.log = log\n", 35 | "\n", 36 | " def _request_url(self, url, doctype='html'):\n", 37 | " # set header\n", 38 | " response = requests.get(url, headers = {\"Cache-Control\":\"max-age=0\"})\n", 39 | "\n", 40 | " # sleep\n", 41 | " time.sleep(self.sleep_time)\n", 42 | "\n", 43 | " # return\n", 44 | " if doctype =='html':\n", 45 | " soup = BeautifulSoup(response.text, 'html.parser')\n", 46 | " return soup\n", 47 | " elif doctype =='content':\n", 48 | " return response.content\n", 49 | " else:\n", 50 | " return response\n", 51 | "\n", 52 | " def _log_print(self, log, quite=False):\n", 53 | " if not quite:\n", 54 | " print(log)\n", 55 | "\n", 56 | " if self.log:\n", 57 | " with open(\"log.txt\", \"a\") as f:\n", 58 | " print(log, file=f)\n", 59 | " \n", 60 | " def fetch_song(self):\n", 61 | " self.soup = self._request_url(self.BASE_URL+'/midi.htm')\n", 62 | " a_list = dmc.soup.find_all('a')\n", 63 | " midi_list = []\n", 64 | " name_list = []\n", 65 | "\n", 66 | " cnt = 0\n", 67 | " for idx, a in enumerate(a_list):\n", 68 | " str_ = a.get('href')\n", 69 | " if str_ and (str_ not in midi_list) and ('.mid' in str_):\n", 70 | " song_name = re.sub( '\\s+', ' ' , a.text.replace( '\\r\\n' , '' )).strip(' ')\n", 71 | " if song_name:\n", 72 | " midi_fn = str_.split('/')[1]\n", 73 | " midi_list.append(midi_fn )\n", 74 | " name_list.append(song_name)\n", 75 | " print('%3d | %-40s %s'%(idx, song_name, midi_fn))\n", 76 | " cnt += 1\n", 77 | "\n", 78 | " self._log_print('Total: %d'%cnt)\n", 79 | " \n", 80 | " return dict(zip(midi_list, name_list))\n", 81 | " \n", 82 | " def crawl_song(self, song_dict):\n", 83 | " for idx, k in enumerate(song_dict.keys()):\n", 84 | " \n", 85 | " url = self.BASE_URL + '/Midi%20site/' + k\n", 86 | " print('%3d %s' %(idx, url))\n", 87 | " content = self._request_url(url, doctype='content')\n", 88 | " \n", 89 | " with open(os.path.join(self.ROOT,k), \"wb\") as f:\n", 90 | " f.write(content)\n", 91 | " \n", 92 | " def run(self):\n", 93 | " \n", 94 | " song_dict = self.fetch_song()\n", 95 | " \n", 96 | " if not os.path.exists(self.ROOT):\n", 97 | " os.makedirs(self.ROOT)\n", 98 | " with open(os.path.join(self.ROOT, 'archive.json'), \"w\") as f:\n", 99 | " json.dump(song_dict, f)\n", 100 | " \n", 101 | " self.crawl_song(song_dict)\n" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 152, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [ 111 | { 112 | "name": "stdout", 113 | "output_type": "stream", 114 | "text": [ 115 | " 33 | A Fine Romance - take 1 afine-1.mid\n", 116 | " 35 | A Fine Romance - take 2 afine-2.mid\n", 117 | " 36 | A Ghost Of A Chance Aghostofachance.mid\n", 118 | " 37 | A House Is Not A Home AHouseis.mid\n", 119 | " 38 | A Nightingale Sang... Anighting.mid\n", 120 | " 39 | A Remark You Made ARemarkYouMade.mid\n", 121 | " 40 | A Sleepin' Bee A Sleepin' Bee.mid\n", 122 | " 41 | After You've Gone AfterYou.mid\n", 123 | " 42 | Alfie alfiepno.mid\n", 124 | " 43 | Alice In Wonderland AliceInWonderland.mid\n", 125 | " 44 | All The Things You Are AllTheThings V2.mid\n", 126 | " 45 | All The Things You Are/2 All The Things You Are.mid\n", 127 | " 46 | All The Things Reharmonized AllTheThings Reharmonized.mid\n", 128 | " 47 | Alone Together (trio) Alone Together.mid\n", 129 | " 49 | Ask Me Now (Monk) Ask Me Now 2.mid\n", 130 | " 50 | Ave Maria Bach Prelude No 1 (Ave Maria).mid\n", 131 | " 51 | Autumn In New York Autumn In NY.mid\n", 132 | " 52 | Autumn Leaves AutumnLeaves.mid\n", 133 | " 55 | Baby It's Cold Outside Baby its Cold outside.mid\n", 134 | " 57 | Beautiful Love Beautiful Love (Doug McKenzie).mid\n", 135 | " 58 | Beethoven Pathetique Pathetique.mid\n", 136 | " 59 | Begin The Beguine Begin The Beguine.mid\n", 137 | " 60 | Bess You Is... Bess You Is.mid\n", 138 | " 61 | Blackbird Blackbird (Brad Mehldau).mid\n", 139 | " 62 | Blame It On My Youth Blameiton.mid\n", 140 | " 63 | Blue Bossa (tk1) BlueBossa1GM.mid\n", 141 | " 64 | Blue Bossa (tk2) BlueBossa3GM.mid\n", 142 | " 65 | Blue Room Blue room midi file.mid\n", 143 | " 66 | Brazilian Like Brazillike.mid\n", 144 | " 67 | Brazilian Suite BrazilianSuite.mid\n", 145 | " 68 | Broadway broadway.mid\n", 146 | " 69 | But Beautiful But Beautifulsolo.mid\n", 147 | " 70 | By Myself Bymyself.mid\n", 148 | " 71 | By Myself /2 By Myself3.mid\n", 149 | " 73 | By The Time I Get... By The Time.mid\n", 150 | " 76 | Cantabile Cantabile 2.mid\n", 151 | " 77 | Caravan (trio) Caravan2.mid\n", 152 | " 78 | Carnival (Black Orpheus) Carnival.mid\n", 153 | " 80 | Cast Your Fate... Cast Your Fate.mid\n", 154 | " 81 | Chelsea Bridge Chelsea Bridge.mid\n", 155 | " 82 | Chopin Waltz Chopin WaltzAb.mid\n", 156 | " 83 | Cinema Paradiso cinema.mid\n", 157 | " 84 | Close Your Eyes Close your eyes.mid\n", 158 | " 85 | Come Rain Or Come Shine (solo) Come Rain or Come Shine V1.mid\n", 159 | " 86 | Come Rain Or Come Shine (duet) Come Rain or Come Shine duo.mid\n", 160 | " 87 | Come Sunday comesun.mid\n", 161 | " 88 | Cry Me A River Cry me a river.mid\n", 162 | " 89 | Cubano Chant Cubano Chant 2.mid\n", 163 | " 92 | Dancing On The Ceiling DancingontheCeiling.mid\n", 164 | " 93 | Danny Boy Dannyboy.mid\n", 165 | " 94 | Day Dream Day Dream.mid\n", 166 | " 95 | Days Of Wine And Roses DaysofWine.mid\n", 167 | " 96 | Dearly Beloved dearlybeloved.mid\n", 168 | " 97 | Deep Purple DeepPurple.mid\n", 169 | " 98 | Desafinado Desafinado.mid\n", 170 | "100 | Desire desire.mid\n", 171 | "101 | Detour Ahead Detour ahead.mid\n", 172 | "102 | Do I Love You Because... Do I Love You Because you're Beautiful.mid\n", 173 | "103 | Dolphin Dance dolphindance3.mid\n", 174 | "104 | Don't Explain Don't Explain solo.mid\n", 175 | "105 | Dreamsville Dreamsville.mid\n", 176 | "108 | Easy Does It [M] Easy does it.mid\n", 177 | "109 | Easy Living [M] Easy Living 3.mid\n", 178 | "110 | Easy To Love EasytoLove2.mid\n", 179 | "111 | Effendi Effendi - McCoy Tyner.mid\n", 180 | "112 | Eleanor Rigby Eleanor Rigby.mid\n", 181 | "114 | Emily emily.mid\n", 182 | "116 | Estate estate.mid\n", 183 | "117 | Exactly Like You Exactly Like You.mid\n", 184 | "121 | Falling Grace Falling Grace .mid\n", 185 | "122 | Falling In Love With Love (trio) Falling in Love with Love trio.mid\n", 186 | "124 | For Sentimental Reasons ForSentimentalReasons.mid\n", 187 | "127 | Gaviota Gaviota trio 2.mid\n", 188 | "128 | Georgia On My Mind Georgia.mid\n", 189 | "129 | Girl Talk GirlTalk.mid\n", 190 | "130 | Give Me The Simple Life simplelife.mid\n", 191 | "131 | Goin' Home Goin'_Home_(Real_Time)_(Antonin_Dvorak).mid\n", 192 | "132 | Good Bait goodbait.mid\n", 193 | "133 | Good Bait GoodBaitPiano.mid\n", 194 | "134 | Good Bait goodbaitGM.mid\n", 195 | "135 | Gone With The Wind gonewind.mid\n", 196 | "136 | Gone With The Wind (II) [M] Gone with the wind.mid\n", 197 | "137 | Green Dolphin Street Green Dolph solo.mid\n", 198 | "138 | Gymnopedie/Medley Gymnopedie-It Never Entered My Mind.mid\n", 199 | "141 | Have You Met Miss Jones (duet) Have You Met - duet.mid\n", 200 | "142 | Have You Met Miss Jones (latin) Have You Met Miss Jones - latin.mid\n", 201 | "143 | How Come You Like Me... HowkumU.mid\n", 202 | "145 | How Deep Is The Ocean (old) How Deep is the Ocean (Doug McKenzie).mid\n", 203 | "146 | How Deep Is The Ocean (solo) howdsolo.mid\n", 204 | "147 | How Deep Is The Ocean (trio) hwdptrio.mid\n", 205 | "148 | Hymn To Freedom Hymn To Freedom.mid\n", 206 | "151 | I'm Old Fashioned (trio) [M] oldfashionedtrio.mid\n", 207 | "152 | I Can't Get Started Icantgetstarted.mid\n", 208 | "153 | I Could Write A Book couldwrite.mid\n", 209 | "154 | I Concentrate On You I Concentrate on You.mid\n", 210 | "155 | I Cover The Waterfront I Cover the Waterfront - solo.mid\n", 211 | "156 | I Fall In Love Too Easily I fall in Love Too Easily.mid\n", 212 | "157 | I Fall In Love Too Easily I Fall in Love v2.mid\n", 213 | "158 | I Hear A Rhapsody I Hear a Rhapsody.mid\n", 214 | "159 | I'll Close My Eyes (trio) I'll Close My Eyes 2.mid\n", 215 | "161 | I Love's You Porgy /1 [M] porgy1.mid\n", 216 | "162 | I Love's You Porgy /2 [M] porgy2.mid\n", 217 | "163 | I Never Knew INeverKnew.mid\n", 218 | "164 | I Remember Clifford (tk1) Clifford1.mid\n", 219 | "165 | I Remember Clifford (tk 2) Clifford2.mid\n", 220 | "166 | I Remember You (solo) [M] Irememberyousolo.mid\n", 221 | "167 | I Should Care McKenzie-Ishouldcare2.mid\n", 222 | "168 | I Should Care / take 2 ishouldcare2.mid\n", 223 | "169 | I Thought About You I Thought About You (Doug McKenzie).mid\n", 224 | "170 | I Want To Be Happy IWantToBeHappyXG.mid\n", 225 | "171 | If Ever I Should Leave You If Ever I should leave You.mid\n", 226 | "172 | If I Loved You if i loved you.mid\n", 227 | "173 | If I Should Leave You IfIshouldLeaveYou.mid\n", 228 | "174 | If I Were A Bell IfIwere.mid\n", 229 | "175 | I'm Confessin' I'm Confessin' - solo.mid\n", 230 | "176 | I'm Old Fashioned I'mOldFash.mid\n", 231 | "177 | I'm Old Fashioned (tk 2) ImOldFash.mid\n", 232 | "178 | In A Sentimental Mood Inasent.mid\n", 233 | "180 | Indiana Indiana.mid\n", 234 | "181 | In Your Own Sweet Way (tk1) SweetWay1.mid\n", 235 | "182 | In Your Own Sweet Way (tk2) SweetWay2.mid\n", 236 | "183 | In Your Own Sweet Way (tk3) SweetWay3.mid\n", 237 | "184 | Isfahan (Elf) Isfahan (Elf).mid\n", 238 | "185 | Isn't It Romantic [M] Isn_t_it_Romantic.mid\n", 239 | "186 | It Could Happen To You It Could Happen V2.mid\n", 240 | "187 | It Could Happen To You (trio) [M] It Could Happen - piano trio.mid\n", 241 | "188 | It Could Happen To You ItCouldHappenGM.mid\n", 242 | "189 | It Don't Mean A Thing... [M] Itdon'tmeanathing.mid\n", 243 | "190 | It's Easy To Remember Its Easy to Remember.mid\n", 244 | "191 | It Might As Well Be Spring It Might as well be Spring v2.mid\n", 245 | "193 | It's The Talk Of The Town Its theTalk Of The Town - solo.mid\n", 246 | "194 | I've Grown Accustomed... accustomed.mid\n", 247 | "197 | Just A Gigolo Just a Gigolo.mid\n", 248 | "198 | Just Friends justfrien solo.mid\n", 249 | "199 | Just Friends (trio) Just Friends Trio.mid\n", 250 | "200 | Just You, Just Me justyou_mark.mid\n", 251 | "203 | Ladies In Mercedes Ladies in Mercedes.mid\n", 252 | "204 | Lady Be Good ladybegood.mid\n", 253 | "205 | Laura laura.mid\n", 254 | "206 | Les Grelots (Petrucciani) grelots.mid\n", 255 | "208 | Like Being In Love It'sAlmostLikeBeingInLove.mid\n", 256 | "209 | Little Girl Blue Little Girl Blue.mid\n", 257 | "210 | Long Ago And Far Away [M] Long Ago and Far away - 3 versions.mid\n", 258 | "211 | Look For The Silver Lining [M] LookfortheSilverLining.mid\n", 259 | "212 | Louisiana louisiana.mid\n", 260 | "213 | Love Letters love letters.mid\n", 261 | "214 | Love Walked In Love walked In.mid\n", 262 | "215 | Love Walked In LoveWalked.mid\n", 263 | "216 | Lover Come Back... lovercome piano.mid\n", 264 | "217 | Lullaby Lullaby.mid\n", 265 | "219 | Lush Life LushLife trio.mid\n", 266 | "222 | Mad About The Boy Mad About the Boy.mid\n", 267 | "223 | Maiden Voyage maidenvoyage.mid\n", 268 | "224 | Maid(en)s Of Cadiz Maids of Cadiz 2.mid\n", 269 | "225 | Make Someone Happy Make Someone Happy.mid\n", 270 | "226 | Manteca manteca.mid\n", 271 | "227 | Mean To Me Mean to Me (Doug McKenzie).mid\n", 272 | "228 | Medley \"The King And I\" KingandI.mid\n", 273 | "229 | Memories Of Paris Memories of Paris.mid\n", 274 | "230 | Merry Christmas merryxmas.mid\n", 275 | "231 | Mike Pee Mike Pee 2.mid\n", 276 | "233 | Milestones milestones.mid\n", 277 | "234 | Miyako Miyako.mid\n", 278 | "235 | Mood Indigo Mood Indigo - solo.mid\n", 279 | "236 | Moon And Sand Moon and Sand.mid\n", 280 | "237 | Moon River Moon River 3.mid\n", 281 | "239 | Moonlight In Vermont moonlightinvermont.mid\n", 282 | "240 | My Foolish Heart MyFoolishHeart.mid\n", 283 | "241 | My Funny Valentine MyFunny3.mid\n", 284 | "242 | My Funny Valentine (solo) funny val solo.mid\n", 285 | "244 | My Heart Stood Still myheartstoodstill edited a bit.mid\n", 286 | "245 | My Man's Gone Now ManGone.mid\n", 287 | "247 | My One And Only Love Myoneand.mid\n", 288 | "248 | My Romance My Romance.mid\n", 289 | "249 | My Shining Hour My Shining Hour.mid\n", 290 | "250 | My Shining Hour (solo) MyShiningHoursolo.mid\n", 291 | "251 | My Ship My ship solo.mid\n", 292 | "254 | Nardis Nardis.mid\n", 293 | "256 | New Orleans NewOrleans.mid\n", 294 | "257 | Nocturne In Eb NocturneEbChopin.mid\n", 295 | "260 | Old Folks Old Folks.mid\n", 296 | "261 | Old Man River Old Man River.mid\n", 297 | "262 | Once Upon A Summertime Once upon a summertime.mid\n", 298 | "263 | On Green Dolphin Street On Green Dolphin 4.mid\n", 299 | "264 | Our Love Is Here To Stay (trio) loveishere to stay.mid\n", 300 | "265 | Out Of Nowhere (1) Out of Nowhere 1.mid\n", 301 | "266 | Out Of Nowhere (2) Out of Nowhere 2.mid\n", 302 | "267 | Out Of Nowhere (3) Out of Nowhere 3.mid\n", 303 | "268 | Over The Rainbow (trio) Over the rain trio.mid\n", 304 | "269 | Over The Rainbow (GM) OverTheRainbowGM.mid\n", 305 | "270 | Over The Rainbow (XG) OverTheRainbowXG.mid\n", 306 | "274 | Pent Up House pentupHouse.mid\n", 307 | "275 | Penthouse Serenade penthouse s.mid\n", 308 | "276 | People Will Say We're In Love People Will Say We're in Love2.mid\n", 309 | "277 | Periscope periscope.mid\n", 310 | "278 | Pick Yourself Pick yourself.mid\n", 311 | "279 | Poinciana poinciana.mid\n", 312 | "281 | Prelude To A Kiss Prelude to a kiss.mid\n", 313 | "282 | Pure Imagination Pure Imagination.mid\n", 314 | "285 | Rachid Rachid.mid\n", 315 | "286 | Rain Waltz Rainwalt.mid\n", 316 | "287 | Recardo Bossa Nova Recardo.mid\n", 317 | "288 | Recordame Recordame.mid\n", 318 | "289 | Remember Remember - solo jazz piano.mid\n", 319 | "290 | Renewal [M] Renewaltrio.mid\n", 320 | "293 | Sammy Walked In SammyWalked.mid\n", 321 | "294 | St Patrick's Day Special Danny Boy.mid\n", 322 | "295 | Secret Love McKenzie-secret love.mid\n", 323 | "296 | Send In The Clowns send in the clowns.mid\n", 324 | "297 | Serenata Serenata solo.mid\n", 325 | "298 | Shenandoah Shenandoah.mid\n", 326 | "299 | Shiny Stockings shinystockings.mid\n", 327 | "300 | Since We Met Since We Met Jazz Kit.mid\n", 328 | "301 | Sixteen Going On Seventeen sixteengoing.mid\n", 329 | "302 | Skylark Skylark 2.mid\n", 330 | "303 | Strollin' Strollin'.mid\n", 331 | "304 | So In Love (solo) SoinLovesolo.mid\n", 332 | "305 | So In Love (trio) SoInlove - trio version.mid\n", 333 | "306 | Some Day My Prince... Some day My Prince.mid\n", 334 | "308 | Some Other Time Some Other Time.mid\n", 335 | "309 | Something Wonderful something wonderful.mid\n", 336 | "310 | Sometime Ago Sometimeago.mid\n", 337 | "311 | Sonnymoon For 2 sonnymoo.mid\n", 338 | "313 | Soon soon.mid\n", 339 | "314 | Sophisticated Lady SophisticatedLady.mid\n", 340 | "315 | Soul Eyes SoulEyessolo.mid\n", 341 | "316 | Spain / 2 Spain_2_(Chick_Corea).mid\n", 342 | "317 | Spain / 3 spain-3.mid\n", 343 | "318 | Theme From Spartacus Spartacus- 2 pianos.mid\n", 344 | "319 | Speak Low Speak Low (Doug McKenzie).mid\n", 345 | "320 | Spring Is Here Spring Is Here - Bill Evans chords.mid\n", 346 | "321 | Spring Is Here /2 Spring is Here 2 (K Barron).mid\n", 347 | "323 | Spring Is Here /3 Spring Is Here2-Bill Evans chords.mid\n", 348 | "327 | Star Eyes star eyes.mid\n", 349 | "328 | Stars Fell On Alabama (duet) Alabama.mid\n", 350 | "329 | Stella By Starlight (solo) Stella solo.mid\n", 351 | "331 | Stormy Weather stormyweather.mid\n", 352 | "332 | Strange Meadow Lark StrangeMeadowlark.mid\n", 353 | "333 | Surrey/Beautiful Medley Surrey medley.mid\n", 354 | "334 | Sweet And Lovely SweetAndLovely.mid\n", 355 | "335 | Sweet And Lovely (2) Sweet1.mid\n", 356 | "336 | Sweet Lorraine Sweetlorraine.mid\n", 357 | "339 | Take The A Train taketheatrain.mid\n", 358 | "340 | Taking A Chance (on Love) TakinACh.mid\n", 359 | "341 | Taking A Chance On Love (solo) Taking a Chance On Love - Solo piano.mid\n", 360 | "342 | Tea For Two [M] Tea for two.mid\n", 361 | "343 | That Old Devil Called Love That Ole Devil Called Love.mid\n", 362 | "344 | The Duke TheDuke.mid\n", 363 | "345 | The End Of A Love Affair The End of a Love Affair.mid\n", 364 | "347 | The Folks Who Live... The folks who live on the hill.mid\n", 365 | "348 | The Man That Got Away TheManThatGotAway.mid\n", 366 | "349 | The Masquerade Is Over The Masquerade is Over.mid\n", 367 | "350 | The More I See You moreicu.mid\n", 368 | "352 | The Peacocks McKenzie-ThePeacocks.mid\n", 369 | "353 | The Peanut Vendor Peanut Vendor (El Manisero).mid\n", 370 | "354 | The Song Is You (trio) [M] TheSongisYoutrio.mid\n", 371 | "355 | The Summer Wind Summer wind 2.mid\n", 372 | "356 | The Way You Look Tonight (trio) The Way You look trio.mid\n", 373 | "358 | There Is No Greater Love thereisnogreaterlove.mid\n", 374 | "359 | These Foolish Things These Foolish Things.mid\n", 375 | "360 | Things Ain't What They Used To Be Thingsaintwhattheyusedtobe.mid\n", 376 | "361 | This Nearly Was Mine This nearly was mine.mid\n", 377 | "362 | This Time The Dreams On Me This Time the Dreams on Me.mid\n", 378 | "363 | There Will Never Be Another You There will never be another you.mid\n", 379 | "364 | They Say Its Wonderful They say its wonderful.mid\n", 380 | "365 | Time After Time Time after Time 2.mid\n", 381 | "367 | Time Remembered /1 timerem2.mid\n", 382 | "368 | Time Remembered /2 Time remembered.mid\n", 383 | "369 | Too Young To Go Steady TooYoungtogoSteady.mid\n", 384 | "370 | Try A Little Tenderness Tryalittle21.mid\n", 385 | "371 | Try To Remember Try To Remember.mid\n", 386 | "372 | Two For The Road Two For The Road 5.mid\n", 387 | "374 | Turn Out The Stars turnoutthestars.mid\n", 388 | "377 | Up With The Lark upwiththelark.mid\n", 389 | "380 | Very Early (solo) Very Early solo piano.mid\n", 390 | "383 | Waltz For Dave Waltse For Dave.mid\n", 391 | "384 | Warm Valley Warmvalleysolo.mid\n", 392 | "385 | We Will Meet Again We Will Meet Again.mid\n", 393 | "386 | What Is There To Say What is there to say.mid\n", 394 | "388 | What Is This Thing... What is this thing - solo.mid\n", 395 | "389 | When I Fall In Love When I Fall in Love.mid\n", 396 | "390 | Where Is Love Where is Love - solo.mid\n", 397 | "391 | While We're Young Whilewereyoung.mid\n", 398 | "392 | Witchcraft Witchcraft.mid\n", 399 | "394 | Who Can I Turn To? WhoCanI.mid\n", 400 | "395 | Why Did I Choose You whydidi.mid\n", 401 | "396 | Why Do I Love You WhyDoILoveYou.mid\n", 402 | "399 | Yesterday Yesterday.mid\n", 403 | "400 | Yesterdays Yesterdays1.mid\n", 404 | "401 | You Are So Beautiful You Are So Beautiful The Grand.mid\n", 405 | "402 | You Do Something To Me You do something to Me.mid\n", 406 | "404 | You Don't Know What ... youdontknow trio.mid\n", 407 | "405 | You Stepped Out Of... You Stepped Out of a Dream (Doug McKenzie).mid\n", 408 | "406 | You Took Advantage... youtookadvantageofme.mid\n", 409 | "407 | You've Changed youhavechanged.mid\n", 410 | "408 | Young And Foolish Young and Foolish.mid\n", 411 | "412 | Zingaro Zingaro - Jobim.mid\n", 412 | "Total: 297\n", 413 | " 0 http://www.bushgrafts.com/jazz//Midi%20site/afine-1.mid\n", 414 | " 1 http://www.bushgrafts.com/jazz//Midi%20site/afine-2.mid\n", 415 | " 2 http://www.bushgrafts.com/jazz//Midi%20site/Aghostofachance.mid\n", 416 | " 3 http://www.bushgrafts.com/jazz//Midi%20site/AHouseis.mid\n", 417 | " 4 http://www.bushgrafts.com/jazz//Midi%20site/Anighting.mid\n", 418 | " 5 http://www.bushgrafts.com/jazz//Midi%20site/ARemarkYouMade.mid\n", 419 | " 6 http://www.bushgrafts.com/jazz//Midi%20site/A Sleepin' Bee.mid\n" 420 | ] 421 | }, 422 | { 423 | "ename": "KeyboardInterrupt", 424 | "evalue": "", 425 | "output_type": "error", 426 | "traceback": [ 427 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 428 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 429 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mdmc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mDMjazzCrawler\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdmc\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 430 | "\u001b[0;32m\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0mjson\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msong_dict\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m---> 74\u001b[0;31m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcrawl_song\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msong_dict\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 431 | "\u001b[0;32m\u001b[0m in \u001b[0;36mcrawl_song\u001b[0;34m(self, song_dict)\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0murl\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mBASE_URL\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m'/Midi%20site/'\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'%3d %s'\u001b[0m \u001b[1;33m%\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0midx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m---> 60\u001b[0;31m \u001b[0mcontent\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_request_url\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdoctype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'content'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 61\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mROOT\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"wb\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 432 | "\u001b[0;32m\u001b[0m in \u001b[0;36m_request_url\u001b[0;34m(self, url, doctype)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[1;31m# sleep\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msleep_time\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[1;31m# return\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 433 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 434 | ] 435 | } 436 | ], 437 | "source": [ 438 | "dmc = DMjazzCrawler()\n", 439 | "dmc.run()" 440 | ] 441 | } 442 | ], 443 | "metadata": { 444 | "kernelspec": { 445 | "display_name": "Python 3", 446 | "language": "python", 447 | "name": "python3" 448 | }, 449 | "language_info": { 450 | "codemirror_mode": { 451 | "name": "ipython", 452 | "version": 3 453 | }, 454 | "file_extension": ".py", 455 | "mimetype": "text/x-python", 456 | "name": "python", 457 | "nbconvert_exporter": "python", 458 | "pygments_lexer": "ipython3", 459 | "version": "3.6.0" 460 | } 461 | }, 462 | "nbformat": 4, 463 | "nbformat_minor": 2 464 | } 465 | -------------------------------------------------------------------------------- /theorytab/utils/theorytab_crawler.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import requests\n", 12 | "from bs4 import BeautifulSoup\n", 13 | "import os\n", 14 | "import time\n", 15 | "import re\n", 16 | "import json\n", 17 | "import string\n", 18 | "from youtube_crawler import video_crawler\n", 19 | "from lxml import etree" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 9, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "def song_retrieval(artist, song, path_song):\n", 31 | "\n", 32 | " song_url = 'https://www.hooktheory.com/theorytab/view/' + artist + '/' + song \n", 33 | " response_song = requests.get(song_url)\n", 34 | "\n", 35 | " soup = BeautifulSoup(response_song.text, 'html.parser')\n", 36 | " li_list = soup.findAll(\"li\", { \"role\":\"presentation\"})\n", 37 | "\n", 38 | " section_list = []\n", 39 | " pk_list = []\n", 40 | " \n", 41 | " ## section \n", 42 | " for i in range(len(li_list)-1):\n", 43 | " sec = li_list[i].text.strip().lower().replace(\" \", \"-\")\n", 44 | " section_list.append(sec)\n", 45 | " pk_list.append(soup.findAll(\"div\", { \"role\":\"tabpanel\", \"id\":sec})[0].contents[0]['id'])\n", 46 | "\n", 47 | " ## save xml\n", 48 | " for idx, pk in enumerate(pk_list):\n", 49 | " req_url = 'https://www.hooktheory.com/songs/getXmlByPk?pk=' + str(pk) ##\n", 50 | " response_info = requests.get(req_url)\n", 51 | " content = response_info.text\n", 52 | " \n", 53 | " with open(os.path.join(path_song, section_list[idx] + \".xml\"), \"w\", encoding=\"utf-8\") as f:\n", 54 | " f.write(content)\n", 55 | " time.sleep(0.08)\n", 56 | " \n", 57 | " ## get genre\n", 58 | " wikiid = soup.findAll(\"multiselect\", { \"items\":\"genres\"})[0]['wikiid']\n", 59 | " response_genre = requests.get('https://www.hooktheory.com/wiki/'+ str(wikiid) +'/genres')\n", 60 | " genre_act_list = json.loads(response_genre.text)\n", 61 | " genres = []\n", 62 | " for g in genre_act_list:\n", 63 | " if g['active']:\n", 64 | " genres.append(g['name']) \n", 65 | " ## saving\n", 66 | " info = {'section': section_list, 'pk':pk_list, 'song_url':song_url,\n", 67 | " 'genres': genres, 'wikiid':wikiid}\n", 68 | "\n", 69 | " with open(os.path.join(path_song, 'song_info.json'), \"w\") as f:\n", 70 | " json.dump(info, f)\n", 71 | "\n", 72 | " ## youtube\n", 73 | " parser = etree.XMLParser(recover=True)\n", 74 | " root = etree.fromstring(content, parser=parser)\n", 75 | " y_id = root.find('meta').find('YouTubeID').text\n", 76 | "\n", 77 | " video_crawler(y_id, path_song)\n", 78 | " \n", 79 | " \n", 80 | "def get_song_list(url_artist, quite=False):\n", 81 | " response_tmp = requests.get(website + url_artist)\n", 82 | " soup = BeautifulSoup(response_tmp.text, 'html.parser')\n", 83 | " item_list = soup.find_all(\"li\", { \"class\":\"grid-item\"})\n", 84 | "\n", 85 | " song_name_list = []\n", 86 | " for item in item_list:\n", 87 | " song_name = item.find_all(\"a\", { \"class\":\"a-tab-cover\"})[0]['href'].split('/')[-1]\n", 88 | " song_name_list.append(song_name)\n", 89 | " if not quite:\n", 90 | " print(' > %s' % song_name)\n", 91 | " return song_name_list \n", 92 | " " 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "### Retrive urls of all artists and songs" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 16, 105 | "metadata": { 106 | "collapsed": false 107 | }, 108 | "outputs": [ 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "==[w]=================================================\n", 114 | "https://www.hooktheory.com/theorytab/artists/w?page=1\n", 115 | "Total: 77\n", 116 | "----\n", 117 | "waka-flocka-flame\n", 118 | " > no-hands\n", 119 | "wakabayashi-mitsuru\n", 120 | " > this-game\n", 121 | "walden\n", 122 | " > ciaco\n", 123 | "walk-off-the-earth\n", 124 | " > lightning-bolt\n", 125 | "walk-the-moon\n", 126 | " > shut-up-and-dance\n", 127 | "wallpaper\n", 128 | " > best-song-everrr\n", 129 | "wally-badarou\n", 130 | " > the-dachstein-angels\n", 131 | "walter-jack-rollins-and-steve-nelson\n", 132 | " > frosty-the-snowman\n", 133 | "walter-buckley\n", 134 | " > crazy-little-thing-called-love\n", 135 | "walter-murphy\n", 136 | " > american-dad---theme-song\n", 137 | " > family-guy---theme-song\n", 138 | " > family-guy-theme-song\n", 139 | " > the-freaking-fcc\n", 140 | "wan-wan\n", 141 | " > divas-device\n", 142 | " > life-guiding-song---prospect-mirai\n", 143 | " > sky-filling-departed-spirit---lightning-word\n", 144 | " > surpass-the-gate-of-the-silver-key\n", 145 | " > the-princess-that-was-erased-from-history\n", 146 | " > world-yamataizer\n", 147 | "wang-chung\n", 148 | " > everybody-have-fun-tonight\n", 149 | "wanting\n", 150 | " > jar-of-love\n", 151 | "war\n", 152 | " > low-rider\n", 153 | " > why-cant-we-be-friends\n", 154 | "warpaint\n", 155 | " > baby\n", 156 | " > shadows\n", 157 | "warrant\n", 158 | " > i-saw-red\n", 159 | "warren-zevon\n", 160 | " > accidentally-like-a-martyr\n", 161 | " > please-stay\n", 162 | " > things-to-do-in-denver-when-youre-dead\n", 163 | " > werewolves-of-london\n", 164 | "warsongs\n", 165 | " > piercing-light\n", 166 | "washed-out\n", 167 | " > new-theory\n", 168 | "wave-master\n", 169 | " > deja-vu-canals\n", 170 | " > time-square\n", 171 | "wave-racer\n", 172 | " > flash-drive\n", 173 | " > streamers\n", 174 | "wavetraxx\n", 175 | " > beach-stringz---lennox-remix\n", 176 | "wavves\n", 177 | " > afraid-of-heights\n", 178 | " > dreams-of-grandeur\n", 179 | " > nine-is-god\n", 180 | " > pony\n", 181 | " > super-soaker\n", 182 | " > thats-on-me\n", 183 | " > way-too-much\n", 184 | "we-lost-the-sea\n", 185 | " > bogatyri\n", 186 | "we-the-kings\n", 187 | " > check-yes-juliet\n", 188 | " > stay-young\n", 189 | "we-were-evergreen\n", 190 | " > baby-blue\n", 191 | " > best-thing\n", 192 | "weather-report\n", 193 | " > birdland\n", 194 | "weeabophone\n", 195 | " > beeeeees\n", 196 | "weebl\n", 197 | " > amazing-horse\n", 198 | " > donkeys\n", 199 | " > fat-labrador\n", 200 | " > owls\n", 201 | "ween\n", 202 | " > freedom-of-76\n", 203 | " > happy-colored-marbles\n", 204 | " > object\n", 205 | " > ocean-man\n", 206 | " > roses-are-free\n", 207 | " > the-hiv-song\n", 208 | " > transdermal-celebration\n", 209 | " > what-deaner-was-talking-about\n", 210 | "weezer\n", 211 | " > buddy-holly\n", 212 | " > dreamin\n", 213 | " > el-scorcho\n", 214 | " > fall-together-\n", 215 | " > falling-for-you\n", 216 | " > i-want-you-to\n", 217 | " > island-in-the-sun\n", 218 | " > king-of-the-world\n", 219 | " > my-name-is-jonas\n", 220 | " > no-one-else\n", 221 | " > say-it-aint-so\n", 222 | " > the-greatest-man-that-ever-lived\n", 223 | "weird-al-yankovic\n", 224 | " > do-i-creep-you-out\n", 225 | " > dont-download-this-song\n", 226 | " > everything-you-know-is-wrong\n", 227 | " > hardware-store\n", 228 | " > midnight-star\n", 229 | " > ringtone\n", 230 | " > since-youve-been-gone\n", 231 | " > skipper-dan\n", 232 | " > stop-forwarding-that-crap-to-me\n", 233 | " > the-night-santa-went-crazy\n", 234 | " > virus-alert\n", 235 | "wellenrausch\n", 236 | " > citylights\n", 237 | "wendell192\n", 238 | " > robots\n", 239 | "wet-wet-wet\n", 240 | " > love-is-all-around\n", 241 | "wg-snuffy-walden\n", 242 | " > west-wing-suite\n", 243 | "wham\n", 244 | " > freedom\n", 245 | " > last-christmas\n", 246 | "white-town\n", 247 | " > once-i-flew\n", 248 | " > your-woman\n", 249 | "whiteflame\n", 250 | " > senbonzakura\n", 251 | "whitesnake\n", 252 | " > here-i-go-again\n", 253 | " > is-this-love\n", 254 | "whitney-houston\n", 255 | " > all-the-man-that-i-need\n", 256 | " > greatest-love-of-all\n", 257 | " > how-will-i-know\n", 258 | " > i-have-nothing\n", 259 | " > i-wanna-dance-with-somebody\n", 260 | " > i-will-always-love-you\n", 261 | " > im-your-baby-tonight\n", 262 | " > its-not-right-but-its-okay---remix\n", 263 | " > one-moment-in-time\n", 264 | " > saving-all-my-love-for-you\n", 265 | " > so-emotional\n", 266 | "wii-sports\n", 267 | " > tennis-results\n", 268 | "wiktoria\n", 269 | " > save-me\n", 270 | "wilco\n", 271 | " > elt\n", 272 | " > heavy-metal-drummer\n", 273 | " > i-am-trying-to-break-your-heart\n", 274 | " > nothingsevergonnastandinmyway\n", 275 | " > poor-places\n", 276 | " > shes-a-jar\n", 277 | "wild-nothing\n", 278 | " > shadow\n", 279 | "will-b-and-steve-allen\n", 280 | " > alchera---firestorm-and-steve-allen-remix\n", 281 | "will-i-am\n", 282 | " > thatpower-ft-justin-bieber\n", 283 | " > this-is-love\n", 284 | "will-i-am-and-britney-spears\n", 285 | " > scream-and-shout\n", 286 | "will-young\n", 287 | " > evergreen\n", 288 | "willie-nelson\n", 289 | " > blue-eyes-cryin\n", 290 | " > on-the-road-again\n", 291 | "willym\n", 292 | " > happy\n", 293 | "wilson-phillips\n", 294 | " > hold-on\n", 295 | "wintergatan\n", 296 | " > marble-machine\n", 297 | " > starmachine2000\n", 298 | "wire\n", 299 | " > outdoor-miner\n", 300 | "with-me\n", 301 | " > destinys-child\n", 302 | "within-temptation\n", 303 | " > a-demons-fate\n", 304 | " > iron\n", 305 | "wiz-khalifa\n", 306 | " > black-and-yellow\n", 307 | " > no-sleep\n", 308 | "wiz-khalifa-ft-charlie-puth\n", 309 | " > see-you-again\n", 310 | "wizard\n", 311 | " > fleet\n", 312 | "wizet\n", 313 | " > maplestory---cash-shop\n", 314 | "wjsn\n", 315 | " > i-wish\n", 316 | " > momomo\n", 317 | "wolfgang-amadeus-mozart\n", 318 | " > a-little-night-music\n", 319 | " > canzonetta-sullaria---che-soave-zeffiretto\n", 320 | " > confutatis-from-requiem\n", 321 | " > contrapuntal-sketch-no2-in-c-minor---kv-deest\n", 322 | " > der-holle-rache-kocht-in-meinem-herzen\n", 323 | " > k387-string-quartet-no14-in-g-major\n", 324 | " > kyrie-from-mass-in-c-minor-k-427\n", 325 | " > lacrimosa-from-requiem\n", 326 | " > piano-sonata-no-16-in-c-major\n", 327 | " > rondo-alla-turca\n", 328 | " > string-quartet-no-19-in-c-major-k-465\n", 329 | " > symphony-25-in-g-minor-movement-i\n", 330 | " > symphony-40-in-g-minor-movement-i\n", 331 | " > symphony-no-40-in-g-minor---i\n", 332 | "wolfgang-gartner\n", 333 | " > fire-power\n", 334 | " > illmerica\n", 335 | " > love-and-war\n", 336 | " > space-junk\n", 337 | " > the-way-it-was\n", 338 | " > undertaker\n", 339 | "wolfgang-gartner-and-will-i-am\n", 340 | " > forever\n", 341 | "wolfgun\n", 342 | " > firmament\n", 343 | " > lights\n", 344 | "womack-and-womack\n", 345 | " > teardrops\n", 346 | "woodentoaster\n", 347 | " > beyond-her-garden\n", 348 | " > nightmare-night\n", 349 | " > rainbow-factory\n", 350 | "woodkid\n", 351 | " > brooklyn\n", 352 | " > i-love-you\n", 353 | " > iron\n", 354 | "woody-guthrie\n", 355 | " > this-land-is-your-land\n", 356 | "working-for-a-nuclear-free-city\n", 357 | " > je-suis-le-vent\n", 358 | "wretch-32\n", 359 | " > alright-with-me\n", 360 | "wrld\n", 361 | " > chase-it-ft-savoi\n", 362 | " > drift-away\n", 363 | "wstr\n", 364 | " > nail-the-casket\n", 365 | "wwe\n", 366 | " > glorious-domination\n", 367 | " > real-american\n", 368 | "wwewewe\n", 369 | " > wwe\n", 370 | "wyclef-jean\n", 371 | " > divine-sorrow\n", 372 | "wyd-krakow-2016\n", 373 | " > blogoslawieni-milosierni\n", 374 | "==[x]=================================================\n", 375 | "https://www.hooktheory.com/theorytab/artists/x?page=1\n", 376 | "Total: 6\n", 377 | "----\n", 378 | "x-ambassadors\n", 379 | " > unsteady\n", 380 | "xi\n", 381 | " > freedom-dive\n", 382 | "xilent\n", 383 | " > animation\n", 384 | " > choose-me\n", 385 | " > disconnect\n", 386 | " > synthony\n", 387 | " > the-place\n", 388 | "xxanaxx\n", 389 | " > give-u-the-world\n", 390 | " > got-u-under---spisek-jednego-remix\n", 391 | " > story\n", 392 | "xxxtentacion\n", 393 | " > orlando\n", 394 | "xyconstant\n", 395 | " > white-noise\n", 396 | "=======================================================\n", 397 | "[1, 1]\n", 398 | "Artists: 83\n", 399 | "Songs: 192\n" 400 | ] 401 | } 402 | ], 403 | "source": [ 404 | "list_pages = []\n", 405 | "archive_artist = dict()\n", 406 | "sleep_time = 0.11\n", 407 | "alphabet_list = string.ascii_lowercase[-4:-2]\n", 408 | "\n", 409 | "website = 'https://www.hooktheory.com'\n", 410 | "base_url = website + '/theorytab/artists/'\n", 411 | "\n", 412 | "artist_count = 0\n", 413 | "song_count = 0\n", 414 | "\n", 415 | "for ch in alphabet_list:\n", 416 | " time.sleep(sleep_time) \n", 417 | " url = base_url+ch\n", 418 | " response_tmp = requests.get(url)\n", 419 | " soup = BeautifulSoup(response_tmp.text, 'html.parser')\n", 420 | " page_count = 0\n", 421 | " \n", 422 | " print('==[%c]================================================='%ch)\n", 423 | " \n", 424 | " ## get artists list by pages\n", 425 | " url_artist_list = []\n", 426 | " for page in range(1,9999):\n", 427 | " url = 'https://www.hooktheory.com/theorytab/artists/'+ch+'?page=' + str(page)\n", 428 | " \n", 429 | " time.sleep(sleep_time) \n", 430 | " response_tmp = requests.get(url)\n", 431 | " soup = BeautifulSoup(response_tmp.text, 'html.parser')\n", 432 | " item_list = soup.find_all(\"li\", { \"class\":\"grid-item\"})\n", 433 | " \n", 434 | " if item_list:\n", 435 | " print(url)\n", 436 | " page_count += 1\n", 437 | " else:\n", 438 | " break\n", 439 | " \n", 440 | " for item in item_list:\n", 441 | " url_artist_list.append(item.find_all(\"a\", { \"class\":\"a-tab-cover\"})[0]['href'])\n", 442 | "\n", 443 | " print('Total:', len(url_artist_list))\n", 444 | " \n", 445 | " print('----')\n", 446 | " \n", 447 | " if not page_count:\n", 448 | " page_count = 1 \n", 449 | " \n", 450 | " ## get song of artists\n", 451 | " artist_song_dict = dict()\n", 452 | " \n", 453 | " for url_artist in url_artist_list:\n", 454 | " artist_count+=1\n", 455 | " time.sleep(sleep_time)\n", 456 | " artist_name = url_artist.split('/')[-1]\n", 457 | " print(artist_name)\n", 458 | " song_name_list = get_song_list(url_artist)\n", 459 | " song_count += len(song_name_list)\n", 460 | " artist_song_dict[artist_name] = song_name_list\n", 461 | " \n", 462 | " archive_artist[ch] = artist_song_dict\n", 463 | " list_pages.append(page_count)\n", 464 | "\n", 465 | "print('=======================================================')\n", 466 | "print(list_pages)\n", 467 | "print('Artists:', artist_count)\n", 468 | "print('Songs:', song_count)\n", 469 | "\n", 470 | "\n", 471 | "with open('archive_artist.json', \"w\") as f:\n", 472 | " json.dump(archive_artist, f)" 473 | ] 474 | }, 475 | { 476 | "cell_type": "markdown", 477 | "metadata": {}, 478 | "source": [ 479 | "### Retrieve each songs" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 19, 485 | "metadata": { 486 | "collapsed": false, 487 | "scrolled": true 488 | }, 489 | "outputs": [ 490 | { 491 | "name": "stdout", 492 | "output_type": "stream", 493 | "text": [ 494 | "==[w]=================================================\n", 495 | "( 1/192) war low-rider\n", 496 | "( 2/192) war why-cant-we-be-friends\n", 497 | "( 3/192) white-town once-i-flew\n" 498 | ] 499 | }, 500 | { 501 | "name": "stderr", 502 | "output_type": "stream", 503 | "text": [ 504 | "ERROR: KPo-6RSGxQw: YouTube said: This video is not available.\n" 505 | ] 506 | }, 507 | { 508 | "name": "stdout", 509 | "output_type": "stream", 510 | "text": [ 511 | "Download Failed: KPo-6RSGxQw \n", 512 | "( 4/192) white-town your-woman\n", 513 | "( 5/192) wizet maplestory---cash-shop\n", 514 | "( 6/192) weezer buddy-holly\n", 515 | "( 7/192) weezer dreamin\n", 516 | "( 8/192) weezer el-scorcho\n", 517 | "( 9/192) weezer fall-together-\n", 518 | "( 10/192) weezer falling-for-you\n", 519 | "( 11/192) weezer i-want-you-to\n", 520 | "( 12/192) weezer island-in-the-sun\n", 521 | "( 13/192) weezer king-of-the-world\n", 522 | "( 14/192) weezer my-name-is-jonas\n", 523 | "( 15/192) weezer no-one-else\n", 524 | "( 16/192) weezer say-it-aint-so\n", 525 | "( 17/192) weezer the-greatest-man-that-ever-lived\n", 526 | "( 18/192) warsongs piercing-light\n", 527 | "( 19/192) wiz-khalifa black-and-yellow\n" 528 | ] 529 | }, 530 | { 531 | "name": "stderr", 532 | "output_type": "stream", 533 | "text": [ 534 | "ERROR: Unable to download webpage: (caused by URLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:645)'),))\n" 535 | ] 536 | }, 537 | { 538 | "name": "stdout", 539 | "output_type": "stream", 540 | "text": [ 541 | "Download Failed: UePtoxDhJSw \n", 542 | "( 20/192) wiz-khalifa no-sleep\n", 543 | "( 21/192) wave-racer flash-drive\n" 544 | ] 545 | }, 546 | { 547 | "name": "stderr", 548 | "output_type": "stream", 549 | "text": [ 550 | "ERROR: _f9wKhbYEAA: YouTube said: This video is not available.\n" 551 | ] 552 | }, 553 | { 554 | "name": "stdout", 555 | "output_type": "stream", 556 | "text": [ 557 | "Download Failed: _f9wKhbYEAA \n", 558 | "( 22/192) wave-racer streamers\n", 559 | "( 23/192) wallpaper best-song-everrr\n", 560 | "( 24/192) wilco elt\n", 561 | "( 25/192) wilco heavy-metal-drummer\n", 562 | "( 26/192) wilco i-am-trying-to-break-your-heart\n", 563 | "( 27/192) wilco nothingsevergonnastandinmyway\n", 564 | "( 28/192) wilco poor-places\n", 565 | "( 29/192) wilco shes-a-jar\n", 566 | "( 30/192) wolfgun firmament\n", 567 | "( 31/192) wolfgun lights\n", 568 | "( 32/192) wilson-phillips hold-on\n", 569 | "( 33/192) wintergatan marble-machine\n", 570 | "( 34/192) wintergatan starmachine2000\n", 571 | "( 35/192) working-for-a-nuclear-free-city je-suis-le-vent\n", 572 | "( 36/192) wwe glorious-domination\n", 573 | "( 37/192) wwe real-american\n", 574 | "( 38/192) weird-al-yankovic do-i-creep-you-out\n", 575 | "( 39/192) weird-al-yankovic dont-download-this-song\n", 576 | "( 40/192) weird-al-yankovic everything-you-know-is-wrong\n", 577 | "( 41/192) weird-al-yankovic hardware-store\n" 578 | ] 579 | }, 580 | { 581 | "name": "stderr", 582 | "output_type": "stream", 583 | "text": [ 584 | "ERROR: YtKHBtqulOA: YouTube said: This video does not exist.\n" 585 | ] 586 | }, 587 | { 588 | "name": "stdout", 589 | "output_type": "stream", 590 | "text": [ 591 | "Download Failed: YtKHBtqulOA \n", 592 | "( 42/192) weird-al-yankovic midnight-star\n", 593 | "( 43/192) weird-al-yankovic ringtone\n", 594 | "( 44/192) weird-al-yankovic since-youve-been-gone\n", 595 | "( 45/192) weird-al-yankovic skipper-dan\n", 596 | "( 46/192) weird-al-yankovic stop-forwarding-that-crap-to-me\n", 597 | "( 47/192) weird-al-yankovic the-night-santa-went-crazy\n", 598 | "( 48/192) weird-al-yankovic virus-alert\n", 599 | "( 49/192) wg-snuffy-walden west-wing-suite\n", 600 | "( 50/192) ween freedom-of-76\n" 601 | ] 602 | }, 603 | { 604 | "name": "stderr", 605 | "output_type": "stream", 606 | "text": [ 607 | "ERROR: T6QjWZujAls: YouTube said: This video does not exist.\n" 608 | ] 609 | }, 610 | { 611 | "name": "stdout", 612 | "output_type": "stream", 613 | "text": [ 614 | "Download Failed: T6QjWZujAls \n", 615 | "( 51/192) ween happy-colored-marbles\n", 616 | "( 52/192) ween object\n", 617 | "( 53/192) ween ocean-man\n", 618 | "( 54/192) ween roses-are-free\n" 619 | ] 620 | }, 621 | { 622 | "name": "stderr", 623 | "output_type": "stream", 624 | "text": [ 625 | "ERROR: t4PzQJCF2X8: YouTube said: This video does not exist.\n" 626 | ] 627 | }, 628 | { 629 | "name": "stdout", 630 | "output_type": "stream", 631 | "text": [ 632 | "Download Failed: t4PzQJCF2X8 \n", 633 | "( 55/192) ween the-hiv-song\n", 634 | "( 56/192) ween transdermal-celebration\n" 635 | ] 636 | }, 637 | { 638 | "name": "stderr", 639 | "output_type": "stream", 640 | "text": [ 641 | "ERROR: kpheHGX3m1Q: YouTube said: This video does not exist.\n" 642 | ] 643 | }, 644 | { 645 | "name": "stdout", 646 | "output_type": "stream", 647 | "text": [ 648 | "Download Failed: kpheHGX3m1Q \n", 649 | "( 57/192) ween what-deaner-was-talking-about\n" 650 | ] 651 | }, 652 | { 653 | "name": "stderr", 654 | "output_type": "stream", 655 | "text": [ 656 | "ERROR: iDkt-K3vjMM: YouTube said: This video does not exist.\n" 657 | ] 658 | }, 659 | { 660 | "name": "stdout", 661 | "output_type": "stream", 662 | "text": [ 663 | "Download Failed: iDkt-K3vjMM \n", 664 | "( 58/192) weeabophone beeeeees\n", 665 | "( 59/192) wham freedom\n", 666 | "Download Failed: None \n", 667 | "( 60/192) wham last-christmas\n", 668 | "( 61/192) woodkid brooklyn\n", 669 | "( 62/192) woodkid i-love-you\n", 670 | "( 63/192) woodkid iron\n", 671 | "( 64/192) weebl amazing-horse\n", 672 | "( 65/192) weebl donkeys\n", 673 | "( 66/192) weebl fat-labrador\n", 674 | "( 67/192) weebl owls\n", 675 | "( 68/192) will-i-am-and-britney-spears scream-and-shout\n", 676 | "( 69/192) wakabayashi-mitsuru this-game\n" 677 | ] 678 | }, 679 | { 680 | "name": "stderr", 681 | "output_type": "stream", 682 | "text": [ 683 | "ERROR: sj_0rSdTDX8: YouTube said: This video does not exist.\n" 684 | ] 685 | }, 686 | { 687 | "name": "stdout", 688 | "output_type": "stream", 689 | "text": [ 690 | "Download Failed: sj_0rSdTDX8 \n", 691 | "( 70/192) wally-badarou the-dachstein-angels\n" 692 | ] 693 | }, 694 | { 695 | "name": "stderr", 696 | "output_type": "stream", 697 | "text": [ 698 | "ERROR: 76RbI-Ro7hE: YouTube said: \"The Dachstein Ange...\"\n", 699 | "This video is no longer available due to a copyright claim by Wally Badarou.\n" 700 | ] 701 | }, 702 | { 703 | "name": "stdout", 704 | "output_type": "stream", 705 | "text": [ 706 | "Download Failed: 76RbI-Ro7hE \n", 707 | "( 71/192) wang-chung everybody-have-fun-tonight\n", 708 | "( 72/192) wet-wet-wet love-is-all-around\n", 709 | "( 73/192) whitney-houston all-the-man-that-i-need\n", 710 | "( 74/192) whitney-houston greatest-love-of-all\n", 711 | "( 75/192) whitney-houston how-will-i-know\n", 712 | "( 76/192) whitney-houston i-have-nothing\n", 713 | "( 77/192) whitney-houston i-wanna-dance-with-somebody\n", 714 | "( 78/192) whitney-houston i-will-always-love-you\n", 715 | "( 79/192) whitney-houston im-your-baby-tonight\n", 716 | "( 80/192) whitney-houston its-not-right-but-its-okay---remix\n", 717 | "( 81/192) whitney-houston one-moment-in-time\n", 718 | "( 82/192) whitney-houston saving-all-my-love-for-you\n", 719 | "( 83/192) whitney-houston so-emotional\n", 720 | "( 84/192) willie-nelson blue-eyes-cryin\n", 721 | "( 85/192) willie-nelson on-the-road-again\n", 722 | "( 86/192) walter-buckley crazy-little-thing-called-love\n" 723 | ] 724 | }, 725 | { 726 | "name": "stderr", 727 | "output_type": "stream", 728 | "text": [ 729 | "ERROR: EE34cSvZCd8: YouTube said: Please sign in to view this video.\n" 730 | ] 731 | }, 732 | { 733 | "name": "stdout", 734 | "output_type": "stream", 735 | "text": [ 736 | "Download Failed: EE34cSvZCd8 \n", 737 | "( 87/192) wyd-krakow-2016 blogoslawieni-milosierni\n", 738 | "( 88/192) willym happy\n", 739 | "( 89/192) whiteflame senbonzakura\n", 740 | "( 90/192) wrld chase-it-ft-savoi\n", 741 | "( 91/192) wrld drift-away\n", 742 | "( 92/192) warren-zevon accidentally-like-a-martyr\n", 743 | "( 93/192) warren-zevon please-stay\n", 744 | "( 94/192) warren-zevon things-to-do-in-denver-when-youre-dead\n" 745 | ] 746 | }, 747 | { 748 | "name": "stderr", 749 | "output_type": "stream", 750 | "text": [ 751 | "ERROR: sQOyoPALBrE: YouTube said: \"Warren Zevon - Things to do...\"\n", 752 | "The YouTube account associated with this video has been terminated due to multiple third-party notifications of copyright infringement.\n" 753 | ] 754 | }, 755 | { 756 | "name": "stdout", 757 | "output_type": "stream", 758 | "text": [ 759 | "Download Failed: sQOyoPALBrE \n", 760 | "( 95/192) warren-zevon werewolves-of-london\n", 761 | "( 96/192) warpaint baby\n", 762 | "( 97/192) warpaint shadows\n", 763 | "Download Failed: FZtF3OdtAnc \n", 764 | "( 98/192) warrant i-saw-red\n", 765 | "( 99/192) we-the-kings check-yes-juliet\n", 766 | "(100/192) we-the-kings stay-young\n", 767 | "(101/192) wendell192 robots\n", 768 | "(102/192) wan-wan divas-device\n", 769 | "(103/192) wan-wan life-guiding-song---prospect-mirai\n", 770 | "(104/192) wan-wan sky-filling-departed-spirit---lightning-word\n", 771 | "(105/192) wan-wan surpass-the-gate-of-the-silver-key\n", 772 | "(106/192) wan-wan the-princess-that-was-erased-from-history\n", 773 | "(107/192) wan-wan world-yamataizer\n", 774 | "(108/192) waka-flocka-flame no-hands\n", 775 | "(109/192) with-me destinys-child\n", 776 | "Download Failed: null \n", 777 | "(110/192) wizard fleet\n", 778 | "(111/192) walk-off-the-earth lightning-bolt\n", 779 | "(112/192) we-lost-the-sea bogatyri\n", 780 | "(113/192) wolfgang-gartner-and-will-i-am forever\n", 781 | "(114/192) wjsn i-wish\n", 782 | "(115/192) wjsn momomo\n", 783 | "(116/192) walk-the-moon shut-up-and-dance\n", 784 | "(117/192) we-were-evergreen baby-blue\n", 785 | "(118/192) we-were-evergreen best-thing\n", 786 | "(119/192) wolfgang-gartner fire-power\n", 787 | "(120/192) wolfgang-gartner illmerica\n", 788 | "(121/192) wolfgang-gartner love-and-war\n", 789 | "(122/192) wolfgang-gartner space-junk\n", 790 | "(123/192) wolfgang-gartner the-way-it-was\n", 791 | "(124/192) wolfgang-gartner undertaker\n", 792 | "(125/192) womack-and-womack teardrops\n", 793 | "(126/192) wiz-khalifa-ft-charlie-puth see-you-again\n", 794 | "(127/192) walter-murphy american-dad---theme-song\n", 795 | "(128/192) walter-murphy family-guy---theme-song\n" 796 | ] 797 | }, 798 | { 799 | "name": "stderr", 800 | "output_type": "stream", 801 | "text": [ 802 | "ERROR: y1VNjb9iXXw: YouTube said: This video does not exist.\n" 803 | ] 804 | }, 805 | { 806 | "name": "stdout", 807 | "output_type": "stream", 808 | "text": [ 809 | "Download Failed: y1VNjb9iXXw \n", 810 | "(129/192) walter-murphy family-guy-theme-song\n" 811 | ] 812 | }, 813 | { 814 | "name": "stderr", 815 | "output_type": "stream", 816 | "text": [ 817 | "ERROR: y1VNjb9iXXw: YouTube said: This video does not exist.\n" 818 | ] 819 | }, 820 | { 821 | "name": "stdout", 822 | "output_type": "stream", 823 | "text": [ 824 | "Download Failed: y1VNjb9iXXw \n", 825 | "(130/192) walter-murphy the-freaking-fcc\n" 826 | ] 827 | }, 828 | { 829 | "name": "stderr", 830 | "output_type": "stream", 831 | "text": [ 832 | "ERROR: 2NDPT0Ph5rA: YouTube said: Please sign in to view this video.\n" 833 | ] 834 | }, 835 | { 836 | "name": "stdout", 837 | "output_type": "stream", 838 | "text": [ 839 | "Download Failed: 2NDPT0Ph5rA \n", 840 | "(131/192) wyclef-jean divine-sorrow\n", 841 | "(132/192) wire outdoor-miner\n", 842 | "(133/192) walter-jack-rollins-and-steve-nelson frosty-the-snowman\n", 843 | "(134/192) wstr nail-the-casket\n", 844 | "(135/192) wavetraxx beach-stringz---lennox-remix\n", 845 | "(136/192) wiktoria save-me\n", 846 | "(137/192) wavves afraid-of-heights\n", 847 | "(138/192) wavves dreams-of-grandeur\n", 848 | "(139/192) wavves nine-is-god\n", 849 | "(140/192) wavves pony\n", 850 | "(141/192) wavves super-soaker\n", 851 | "(142/192) wavves thats-on-me\n", 852 | "(143/192) wavves way-too-much\n", 853 | "(144/192) walden ciaco\n", 854 | "(145/192) weather-report birdland\n", 855 | "(146/192) woody-guthrie this-land-is-your-land\n", 856 | "(147/192) will-i-am thatpower-ft-justin-bieber\n" 857 | ] 858 | }, 859 | { 860 | "name": "stderr", 861 | "output_type": "stream", 862 | "text": [ 863 | "ERROR: A0hUdBLiASQ: YouTube said: This video does not exist.\n" 864 | ] 865 | }, 866 | { 867 | "name": "stdout", 868 | "output_type": "stream", 869 | "text": [ 870 | "Download Failed: A0hUdBLiASQ \n", 871 | "(148/192) will-i-am this-is-love\n", 872 | "(149/192) washed-out new-theory\n", 873 | "(150/192) wanting jar-of-love\n", 874 | "(151/192) wwewewe wwe\n", 875 | "(152/192) wii-sports tennis-results\n", 876 | "(153/192) within-temptation a-demons-fate\n", 877 | "(154/192) within-temptation iron\n", 878 | "(155/192) woodentoaster beyond-her-garden\n", 879 | "(156/192) woodentoaster nightmare-night\n", 880 | "(157/192) woodentoaster rainbow-factory\n", 881 | "(158/192) whitesnake here-i-go-again\n" 882 | ] 883 | }, 884 | { 885 | "name": "stderr", 886 | "output_type": "stream", 887 | "text": [ 888 | "ERROR: The uploader has not made this video available in your country.\n", 889 | "You might want to use a VPN or a proxy server (with --proxy) to workaround.\n" 890 | ] 891 | }, 892 | { 893 | "name": "stdout", 894 | "output_type": "stream", 895 | "text": [ 896 | "Download Failed: i3MXiTeH_Pg \n", 897 | "(159/192) whitesnake is-this-love\n" 898 | ] 899 | }, 900 | { 901 | "name": "stderr", 902 | "output_type": "stream", 903 | "text": [ 904 | "ERROR: ujnH4yNqL8E: YouTube said: This video contains content from WMG, who has blocked it in your country on copyright grounds.\n" 905 | ] 906 | }, 907 | { 908 | "name": "stdout", 909 | "output_type": "stream", 910 | "text": [ 911 | "Download Failed: ujnH4yNqL8E \n", 912 | "(160/192) wellenrausch citylights\n", 913 | "(161/192) wild-nothing shadow\n", 914 | "(162/192) will-young evergreen\n", 915 | "(163/192) wave-master deja-vu-canals\n", 916 | "(164/192) wave-master time-square\n", 917 | "(165/192) will-b-and-steve-allen alchera---firestorm-and-steve-allen-remix\n" 918 | ] 919 | }, 920 | { 921 | "name": "stderr", 922 | "output_type": "stream", 923 | "text": [ 924 | "ERROR: lGZVvP8QDZA: YouTube said: \"Will B & Steve Allen - Alch...\"\n", 925 | "The YouTube account associated with this video has been terminated due to multiple third-party notifications of copyright infringement.\n" 926 | ] 927 | }, 928 | { 929 | "name": "stdout", 930 | "output_type": "stream", 931 | "text": [ 932 | "Download Failed: lGZVvP8QDZA \n", 933 | "(166/192) wretch-32 alright-with-me\n", 934 | "(167/192) wolfgang-amadeus-mozart a-little-night-music\n", 935 | "(168/192) wolfgang-amadeus-mozart canzonetta-sullaria---che-soave-zeffiretto\n", 936 | "(169/192) wolfgang-amadeus-mozart confutatis-from-requiem\n" 937 | ] 938 | }, 939 | { 940 | "name": "stderr", 941 | "output_type": "stream", 942 | "text": [ 943 | "ERROR: _Lav_lDJ2LM: YouTube said: This video does not exist.\n" 944 | ] 945 | }, 946 | { 947 | "name": "stdout", 948 | "output_type": "stream", 949 | "text": [ 950 | "Download Failed: _Lav_lDJ2LM \n", 951 | "(170/192) wolfgang-amadeus-mozart contrapuntal-sketch-no2-in-c-minor---kv-deest\n" 952 | ] 953 | }, 954 | { 955 | "name": "stderr", 956 | "output_type": "stream", 957 | "text": [ 958 | "ERROR: heHnBqa4GOg: YouTube said: This video is not available.\n" 959 | ] 960 | }, 961 | { 962 | "name": "stdout", 963 | "output_type": "stream", 964 | "text": [ 965 | "Download Failed: heHnBqa4GOg \n", 966 | "(171/192) wolfgang-amadeus-mozart der-holle-rache-kocht-in-meinem-herzen\n", 967 | "(172/192) wolfgang-amadeus-mozart k387-string-quartet-no14-in-g-major\n", 968 | "(173/192) wolfgang-amadeus-mozart kyrie-from-mass-in-c-minor-k-427\n" 969 | ] 970 | }, 971 | { 972 | "name": "stderr", 973 | "output_type": "stream", 974 | "text": [ 975 | "ERROR: hieCzEZwRw4: YouTube said: This video does not exist.\n" 976 | ] 977 | }, 978 | { 979 | "name": "stdout", 980 | "output_type": "stream", 981 | "text": [ 982 | "Download Failed: hieCzEZwRw4 \n", 983 | "(174/192) wolfgang-amadeus-mozart lacrimosa-from-requiem\n", 984 | "(175/192) wolfgang-amadeus-mozart piano-sonata-no-16-in-c-major\n", 985 | "(176/192) wolfgang-amadeus-mozart rondo-alla-turca\n", 986 | "(177/192) wolfgang-amadeus-mozart string-quartet-no-19-in-c-major-k-465\n" 987 | ] 988 | }, 989 | { 990 | "name": "stderr", 991 | "output_type": "stream", 992 | "text": [ 993 | "ERROR: ZmXtpmr9UQc: YouTube said: This video has been removed by the user.\n" 994 | ] 995 | }, 996 | { 997 | "name": "stdout", 998 | "output_type": "stream", 999 | "text": [ 1000 | "Download Failed: ZmXtpmr9UQc \n", 1001 | "(178/192) wolfgang-amadeus-mozart symphony-25-in-g-minor-movement-i\n", 1002 | "(179/192) wolfgang-amadeus-mozart symphony-40-in-g-minor-movement-i\n", 1003 | "(180/192) wolfgang-amadeus-mozart symphony-no-40-in-g-minor---i\n", 1004 | "==[x]=================================================\n", 1005 | "(181/192) xxanaxx give-u-the-world\n", 1006 | "(182/192) xxanaxx got-u-under---spisek-jednego-remix\n", 1007 | "(183/192) xxanaxx story\n", 1008 | "(184/192) xilent animation\n", 1009 | "(185/192) xilent choose-me\n", 1010 | "(186/192) xilent disconnect\n" 1011 | ] 1012 | }, 1013 | { 1014 | "name": "stderr", 1015 | "output_type": "stream", 1016 | "text": [ 1017 | "ERROR: OB046qaE6cM: YouTube said: This video is not available.\n" 1018 | ] 1019 | }, 1020 | { 1021 | "name": "stdout", 1022 | "output_type": "stream", 1023 | "text": [ 1024 | "Download Failed: OB046qaE6cM \n", 1025 | "(187/192) xilent synthony\n", 1026 | "(188/192) xilent the-place\n", 1027 | "(189/192) xxxtentacion orlando\n", 1028 | "(190/192) x-ambassadors unsteady\n", 1029 | "(191/192) xi freedom-dive\n", 1030 | "(192/192) xyconstant white-noise\n" 1031 | ] 1032 | } 1033 | ], 1034 | "source": [ 1035 | "root_dir = 'archive'\n", 1036 | "\n", 1037 | "with open('archive_artist.json', \"r\") as f:\n", 1038 | " archive_artist = json.load(f)\n", 1039 | "\n", 1040 | "now_count = 1\n", 1041 | " \n", 1042 | "for ch in alphabet_list:\n", 1043 | " path_ch = os.path.join(root_dir, ch)\n", 1044 | " print('==[%c]================================================='%ch)\n", 1045 | " \n", 1046 | " if not os.path.exists(path_ch):\n", 1047 | " os.makedirs(path_ch)\n", 1048 | " \n", 1049 | " for a_name in archive_artist[ch].keys(): \n", 1050 | " for s_name in archive_artist[ch][a_name]:\n", 1051 | "\n", 1052 | " print('(%3d/%3d) %s %s' % (now_count, song_count, a_name , s_name))\n", 1053 | " path_song = os.path.join(path_ch, a_name, s_name)\n", 1054 | " \n", 1055 | " if not os.path.exists(path_song):\n", 1056 | " os.makedirs(path_song)\n", 1057 | " \n", 1058 | " time.sleep(sleep_time) \n", 1059 | " song_retrieval(a_name, s_name, path_song)\n", 1060 | " \n", 1061 | " now_count+=1 \n", 1062 | " \n", 1063 | " " 1064 | ] 1065 | } 1066 | ], 1067 | "metadata": { 1068 | "kernelspec": { 1069 | "display_name": "Python [mir]", 1070 | "language": "python", 1071 | "name": "Python [mir]" 1072 | }, 1073 | "language_info": { 1074 | "codemirror_mode": { 1075 | "name": "ipython", 1076 | "version": 3 1077 | }, 1078 | "file_extension": ".py", 1079 | "mimetype": "text/x-python", 1080 | "name": "python", 1081 | "nbconvert_exporter": "python", 1082 | "pygments_lexer": "ipython3", 1083 | "version": "3.5.2" 1084 | } 1085 | }, 1086 | "nbformat": 4, 1087 | "nbformat_minor": 0 1088 | } 1089 | --------------------------------------------------------------------------------