├── doug-mckenzie-jazz
├── codes
│ ├── log.txt
│ ├── DMjazzCrawler.py
│ └── DMjazzCrawler.ipynb
└── archive
│ └── A Sleepin' Bee.mid
├── vgmdb
├── utils
│ ├── log.txt
│ ├── VGMCrawler.py
│ └── VGMCrawler.ipynb
└── archive
│ └── 3do
│ └── 3do
│ └── 37245_Super-Street-Fighter-2--Guile.mid
├── theorytab
├── utils
│ ├── requirements.txt
│ ├── youtube_crawler.py
│ ├── theorytab_crawler.py
│ └── theorytab_crawler.ipynb
├── archive
│ ├── archive_artist.json
│ └── a
│ │ └── aage-aleksandersen
│ │ └── fire-pils-og-en-pizza
│ │ ├── video.mp4
│ │ ├── song_info.json
│ │ ├── video_info.json
│ │ ├── verse.xml
│ │ ├── intro.xml
│ │ └── chorus.xml
└── README.md
├── docs
├── hey_jude_chorus.PNG
└── 5-track_pianoroll.PNG
├── piano-e-competition
├── utils
│ ├── log.txt
│ └── EPcompCrawler.py
└── archive
│ └── 2004
│ ├── midi
│ └── ADIG01.mid
│ └── zip
│ └── Adigezalzade_M_ESEQ.zip
├── 5-track-pianoroll
├── sample
│ ├── 51901.png
│ └── 51901_test_round.mid
├── compile.py
├── readme.md
└── parser.py
├── hymnal
├── archive
│ └── children
│ │ └── 1
│ │ ├── all.mid
│ │ ├── audio.mp3
│ │ ├── ls_text.pdf
│ │ ├── melody.mid
│ │ ├── ls_guitar.pdf
│ │ ├── ls_paino.pdf
│ │ ├── song_metadata.json
│ │ └── lyric.xml
└── utils
│ ├── log.txt
│ ├── hymnCrawler.py
│ └── hymnCrawler.ipynb
└── README.md
/doug-mckenzie-jazz/codes/log.txt:
--------------------------------------------------------------------------------
1 | Total: 297
2 |
--------------------------------------------------------------------------------
/vgmdb/utils/log.txt:
--------------------------------------------------------------------------------
1 | total: 28419 songs
2 | Elapsed time: 12:26:37
3 |
--------------------------------------------------------------------------------
/theorytab/utils/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4
2 | requests
3 | pafy
4 | ffmpy
5 | youtube-dl
6 | lxml
--------------------------------------------------------------------------------
/docs/hey_jude_chorus.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/docs/hey_jude_chorus.PNG
--------------------------------------------------------------------------------
/piano-e-competition/utils/log.txt:
--------------------------------------------------------------------------------
1 | Total midi files 1573
2 | Total zip files 964
3 |
4 | Elapsed time: 23:59:52
5 |
--------------------------------------------------------------------------------
/docs/5-track_pianoroll.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/docs/5-track_pianoroll.PNG
--------------------------------------------------------------------------------
/5-track-pianoroll/sample/51901.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/5-track-pianoroll/sample/51901.png
--------------------------------------------------------------------------------
/hymnal/archive/children/1/all.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/hymnal/archive/children/1/all.mid
--------------------------------------------------------------------------------
/hymnal/archive/children/1/audio.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/hymnal/archive/children/1/audio.mp3
--------------------------------------------------------------------------------
/hymnal/archive/children/1/ls_text.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/hymnal/archive/children/1/ls_text.pdf
--------------------------------------------------------------------------------
/hymnal/archive/children/1/melody.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/hymnal/archive/children/1/melody.mid
--------------------------------------------------------------------------------
/hymnal/archive/children/1/ls_guitar.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/hymnal/archive/children/1/ls_guitar.pdf
--------------------------------------------------------------------------------
/hymnal/archive/children/1/ls_paino.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/hymnal/archive/children/1/ls_paino.pdf
--------------------------------------------------------------------------------
/5-track-pianoroll/sample/51901_test_round.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/5-track-pianoroll/sample/51901_test_round.mid
--------------------------------------------------------------------------------
/doug-mckenzie-jazz/archive/A Sleepin' Bee.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/doug-mckenzie-jazz/archive/A Sleepin' Bee.mid
--------------------------------------------------------------------------------
/piano-e-competition/archive/2004/midi/ADIG01.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/piano-e-competition/archive/2004/midi/ADIG01.mid
--------------------------------------------------------------------------------
/theorytab/archive/archive_artist.json:
--------------------------------------------------------------------------------
1 | {
2 | "a": {
3 | "aage-aleksandersen": [
4 | "fire-pils-og-en-pizza",
5 | "norge-mitt-norge"
6 | ]
7 | }
8 | }
--------------------------------------------------------------------------------
/piano-e-competition/archive/2004/zip/Adigezalzade_M_ESEQ.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/piano-e-competition/archive/2004/zip/Adigezalzade_M_ESEQ.zip
--------------------------------------------------------------------------------
/vgmdb/archive/3do/3do/37245_Super-Street-Fighter-2--Guile.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/vgmdb/archive/3do/3do/37245_Super-Street-Fighter-2--Guile.mid
--------------------------------------------------------------------------------
/theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/video.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/symbolic-music-datasets/HEAD/theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/video.mp4
--------------------------------------------------------------------------------
/hymnal/archive/children/1/song_metadata.json:
--------------------------------------------------------------------------------
1 | {"Category": "Praise of the Lord", "Subcategory": "His Love", "Music": "Carey Bonner\u00a0(1859-1938)", "Key": "C Major", "Time": "2/4", "Hymn Code": "1135671653", "title": "Praise Him, praise Him, all ye little children"}
--------------------------------------------------------------------------------
/theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/song_info.json:
--------------------------------------------------------------------------------
1 | {"genres": ["Rock"], "song_url": "https://www.hooktheory.com/theorytab/view/aage-aleksandersen/fire-pils-og-en-pizza", "pk": ["205007", "205011", "205014"], "section": ["intro", "verse", "chorus"], "wikiid": "9479"}
--------------------------------------------------------------------------------
/theorytab/README.md:
--------------------------------------------------------------------------------
1 | ## Theorytab
2 | The codes here are out-of-date. Visitors can check out my another repo:
3 | * [Lead Sheet Dataset](https://github.com/wayne391/Lead-Sheet-Dataset)
4 |
5 | It's also crawled from [Theorytab](https://www.hooktheory.com/site). Furhtermore, I write a set of tools to generate various format.
6 |
7 |
8 |
--------------------------------------------------------------------------------
/theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/video_info.json:
--------------------------------------------------------------------------------
1 | {"crawl_time": "2017-11-07 20:56:41", "length": 244, "author": "Skytebas", "duration": "00:04:04", "dislikes": 26, "YouTubeID": "-I_zKOfTKIM", "description": "Fire pils og en pizza", "viewcount": 430507, "likes": 570, "title": "\u00c5ge Aleksandersen -Fire pils og en pizza", "rating": 4.8255033493}
--------------------------------------------------------------------------------
/hymnal/archive/children/1/lyric.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | |
4 | 1
5 | |
6 | Praise Him, praise Him, all ye little children, God is love, God is love; Praise Him, praise Him, all ye little children, God is love, God is love. |
7 |
8 |
9 | |
10 | 2
11 | |
12 | Love Him, love Him, all ye little children, God is love, God is love; Love Him, love Him, all ye little children, God is love, God is love. |
13 |
14 |
15 | |
16 | 3
17 | |
18 | Thank Him, thank Him, all ye little children, God is love, God is love; Thank Him, thank Him, all ye little children, God is love, God is love. |
19 |
20 |
--------------------------------------------------------------------------------
/5-track-pianoroll/compile.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | print('[*] loading...')
4 | proc_list = np.load('segments.npy')
5 | print('[*] processing...')
6 | num_item = len(proc_list)
7 | print(num_item)
8 | compiled_list = []
9 | for lidx in range(num_item):
10 | multi_track = proc_list[lidx]
11 | pianorolls = []
12 |
13 | for tracks in multi_track.tracks:
14 | pianorolls.append(tracks.pianoroll[:, :, np.newaxis])
15 |
16 | pianoroll_compiled = np.reshape(np.concatenate(pianorolls, axis=2)[:, 24:108, :], (8, 48, 84, 5))
17 | pianoroll_compiled = pianoroll_compiled[np.newaxis, :] > 0
18 | compiled_list.append(pianoroll_compiled.astype(bool))
19 |
20 | final = np.concatenate(compiled_list, axis=0)
21 | print(final.shape)
22 | print('[*] saving...')
23 | np.save('x_lpd_5_phr.npy', final)
24 | print('Done!!')
--------------------------------------------------------------------------------
/theorytab/utils/youtube_crawler.py:
--------------------------------------------------------------------------------
1 | import pafy
2 | import ffmpy
3 | from time import gmtime, strftime
4 | import json
5 | import os
6 |
7 | def video_crawler(y_id, filepath=''):
8 |
9 | try:
10 | video = pafy.new(y_id)
11 |
12 | video_info = dict()
13 | video_info['YouTubeID'] = y_id
14 | video_info['title'] = video.title
15 | video_info['rating'] = video.rating
16 | video_info['viewcount'] = video.viewcount
17 | video_info['author'] = video.author
18 | video_info['length'] = video.length
19 | video_info['duration'] = video.duration
20 | video_info['likes'] = video.likes
21 | video_info['dislikes'] = video.dislikes
22 | video_info['crawl_time'] = strftime("%Y-%m-%d %H:%M:%S", gmtime())
23 | video_info['description'] = video.description
24 |
25 | with open(os.path.join(filepath,'video_info.json'), 'w') as f:
26 | json.dump(video_info, f)
27 |
28 | best = video.getbest(preftype="mp4")
29 | best.download(quiet=True, filepath=os.path.join(filepath,'video.mp4'))
30 |
31 | return True
32 | except:
33 | print('Download Failed: %s ' % y_id)
34 | return False
35 |
36 |
37 |
38 | if __name__ == '__main__':
39 | y_id = 'n1BtExxkP0M'
40 | video_crawler(y_id, '')
41 | video_crawler(y_id, '')
--------------------------------------------------------------------------------
/5-track-pianoroll/readme.md:
--------------------------------------------------------------------------------
1 | # 5-track pianoroll dataset
2 |
3 | This repository contains pre-processing codes and processed datasets of [LPD](https://github.com/salu133445/lakh-pianoroll-dataset) dataset.
4 |
5 | ## Source Codes for Pre-processing
6 |
7 | 1. Download 'lpd_cleansed' from [here](https://github.com/salu133445/lakh-pianoroll-dataset)
8 |
9 | 2. run *parse.py*
10 | 3. run *compile.py*
11 |
12 | You can change the setting to customize your own piano-roll dataset.
13 |
14 | ## Processed 5-track Piano-roll Datasets
15 |
16 | #### Latest Versoin (ver.2)
17 |
18 | * num of tracks: **5**
19 | * *Bass, Drum, Guitar, String and Piano*
20 | * time resolution: **48**
21 | * pitch range: **84**
22 | * num of bar: **8**
23 | * num of phrases: **34126**
24 | * the shape of the tensor is **34126 x 8 x 48 x 84 x 5**
25 | * 5.12 GB
26 | * select segments with higher qulity. One instrument for one track.
27 | * [Download](https://drive.google.com/file/d/17FBw7c_vrK33_mEgsA919GTSlHoJ7M6T/view?usp=sharing)
28 |
29 | #### Old Version for MuseGAN (ver.1)
30 |
31 | * num of tracks: **5**
32 | * *Bass, Drum, Guitar, String and Piano*
33 | * time resolution: **96**
34 | * pitch range: **84**
35 | * num of bar: **4**
36 | * num of phrases: **50266**
37 | * the shape of the tensor is **50266 x 384 x 84 x 5**
38 | * 7.54 GB
39 | * Compress instruments in the same midi family into one track. See [here](https://github.com/salu133445/musegan/tree/master/v1/training)
40 | * [Download](https://drive.google.com/file/d/1yj-5CsAwSoj1LHk4QwEQ09VB5fS69Vnq/view?usp=sharing)
41 |
42 | Generally, version 2 has richer but clear textures.
43 |
44 | --------------
45 | Sample image of 5-track Piano-roll Datasets (ver.2):
46 |
47 | 
48 |
49 | The generated samples of version 2 on MuseGAN is available [here](sample).
50 |
--------------------------------------------------------------------------------
/hymnal/utils/log.txt:
--------------------------------------------------------------------------------
1 | =================================================
2 | https://www.hymnal.net/en/song-index/h/A
3 | https://www.hymnal.net/en/song-index/h/B
4 | https://www.hymnal.net/en/song-index/h/C
5 | https://www.hymnal.net/en/song-index/h/D
6 | https://www.hymnal.net/en/song-index/h/E
7 | https://www.hymnal.net/en/song-index/h/F
8 | https://www.hymnal.net/en/song-index/h/G
9 | https://www.hymnal.net/en/song-index/h/H
10 | https://www.hymnal.net/en/song-index/h/I
11 | https://www.hymnal.net/en/song-index/h/J
12 | https://www.hymnal.net/en/song-index/h/K
13 | https://www.hymnal.net/en/song-index/h/L
14 | https://www.hymnal.net/en/song-index/h/M
15 | https://www.hymnal.net/en/song-index/h/N
16 | https://www.hymnal.net/en/song-index/h/O
17 | https://www.hymnal.net/en/song-index/h/P
18 | https://www.hymnal.net/en/song-index/h/R
19 | https://www.hymnal.net/en/song-index/h/S
20 | https://www.hymnal.net/en/song-index/h/T
21 | https://www.hymnal.net/en/song-index/h/U
22 | https://www.hymnal.net/en/song-index/h/V
23 | https://www.hymnal.net/en/song-index/h/W
24 | https://www.hymnal.net/en/song-index/h/Y
25 | https://www.hymnal.net/en/song-index/nt
26 | https://www.hymnal.net/en/song-index/ns/1
27 | https://www.hymnal.net/en/song-index/ns/2
28 | https://www.hymnal.net/en/song-index/ns/A
29 | https://www.hymnal.net/en/song-index/ns/B
30 | https://www.hymnal.net/en/song-index/ns/C
31 | https://www.hymnal.net/en/song-index/ns/D
32 | https://www.hymnal.net/en/song-index/ns/E
33 | https://www.hymnal.net/en/song-index/ns/F
34 | https://www.hymnal.net/en/song-index/ns/G
35 | https://www.hymnal.net/en/song-index/ns/H
36 | https://www.hymnal.net/en/song-index/ns/I
37 | https://www.hymnal.net/en/song-index/ns/J
38 | https://www.hymnal.net/en/song-index/ns/K
39 | https://www.hymnal.net/en/song-index/ns/L
40 | https://www.hymnal.net/en/song-index/ns/M
41 | https://www.hymnal.net/en/song-index/ns/N
42 | https://www.hymnal.net/en/song-index/ns/O
43 | https://www.hymnal.net/en/song-index/ns/P
44 | https://www.hymnal.net/en/song-index/ns/R
45 | https://www.hymnal.net/en/song-index/ns/S
46 | https://www.hymnal.net/en/song-index/ns/T
47 | https://www.hymnal.net/en/song-index/ns/U
48 | https://www.hymnal.net/en/song-index/ns/V
49 | https://www.hymnal.net/en/song-index/ns/W
50 | https://www.hymnal.net/en/song-index/ns/Y
51 | https://www.hymnal.net/en/song-index/ns/Z
52 | https://www.hymnal.net/en/song-index/c
53 | > classic
54 | (1/1950) https://www.hymnal.net/en/hymn/h/877
55 | (2/1950) https://www.hymnal.net/en/hymn/h/292
56 | (3/1950) https://www.hymnal.net/en/hymn/h/509
57 | (4/1950) https://www.hymnal.net/en/hymn/h/952
58 | (5/1950) https://www.hymnal.net/en/hymn/h/724
59 | (6/1950) https://www.hymnal.net/en/hymn/h/886
60 | (7/1950) https://www.hymnal.net/en/hymn/h/1198
61 |
--------------------------------------------------------------------------------
/doug-mckenzie-jazz/codes/DMjazzCrawler.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | import os
4 | import sys
5 | import time
6 | import json
7 | import re
8 |
9 | class DMjazzCrawler():
10 | BASE_URL = 'http://www.bushgrafts.com/jazz'
11 | ROOT = 'archive'
12 |
13 | def __init__(self, sleep_time=0.1, log=True):
14 | self.sleep_time = sleep_time
15 | self.log = log
16 |
17 | def _request_url(self, url, doctype='html'):
18 | # set header
19 | response = requests.get(url, headers = {"Cache-Control":"max-age=0"})
20 |
21 | # sleep
22 | time.sleep(self.sleep_time)
23 |
24 | # return
25 | if doctype =='html':
26 | soup = BeautifulSoup(response.text, 'html.parser')
27 | return soup
28 | elif doctype =='content':
29 | return response.content
30 | else:
31 | return response
32 |
33 | def _log_print(self, log, quite=False):
34 | if not quite:
35 | print(log)
36 |
37 | if self.log:
38 | with open("log.txt", "a") as f:
39 | print(log, file=f)
40 |
41 | def fetch_song(self):
42 | self.soup = self._request_url(self.BASE_URL+'/midi.htm')
43 | a_list = dmc.soup.find_all('a')
44 | midi_list = []
45 | name_list = []
46 |
47 | cnt = 0
48 | for idx, a in enumerate(a_list):
49 | str_ = a.get('href')
50 | if str_ and (str_ not in midi_list) and ('.mid' in str_):
51 | song_name = re.sub( '\s+', ' ' , a.text.replace( '\r\n' , '' )).strip(' ')
52 | if song_name:
53 | midi_fn = str_.split('/')[1]
54 | midi_list.append(midi_fn )
55 | name_list.append(song_name)
56 | print('%3d | %-40s %s'%(idx, song_name, midi_fn))
57 | cnt += 1
58 |
59 | self._log_print('Total: %d'%cnt)
60 |
61 | return dict(zip(midi_list, name_list))
62 |
63 | def crawl_song(self, song_dict):
64 | for idx, k in enumerate(song_dict.keys()):
65 |
66 | url = self.BASE_URL + '/Midi%20site/' + k
67 | print('%3d %s' %(idx, url))
68 | content = self._request_url(url, doctype='content')
69 |
70 | with open(os.path.join(self.ROOT,k), "wb") as f:
71 | f.write(content)
72 |
73 | def run(self):
74 |
75 | song_dict = self.fetch_song()
76 |
77 | if not os.path.exists(self.ROOT):
78 | os.makedirs(self.ROOT)
79 | with open(os.path.join(self.ROOT, 'archive.json'), "w") as f:
80 | json.dump(song_dict, f)
81 |
82 | self.crawl_song(song_dict)
83 |
84 |
85 | if __name__ == '__main__':
86 | dmc = DMjazzCrawler()
87 | dmc.run()
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # List of Symbolic Musical Datasets
2 |
3 | This repository aims to collect accessible symolic musical datasets on the Net.
4 | Generally, each dataset is organized as the following way:
5 | * archive: samples from the dataset
6 | * utils: codes for crawling or processing
7 |
8 | ## Contents
9 | * Piano-roll
10 | * Lead Sheet
11 | * MIDI
12 | * MISC
13 |
14 | ---
15 |
16 | ## Piano Roll
17 | ### 5 track piano-roll dataset
18 | 
19 |
20 | This dataset is derived from [LPD](https://github.com/salu133445/lakh-pianoroll-dataset) with new pre-processing policy.
21 |
22 | ### lead sheet dataset
23 | 
24 |
25 | This dataset is derived from [Theorytab]. However, it also has potentials to incoperate with other lead sheet datasets. For further understanding, please refer to this [repo](https://github.com/wayne391/Lead-Sheet-Analysis/tree/master/lead_sheet_dataset).
26 |
27 | ---
28 |
29 | ## Lead Sheets
30 | one melody track accompanied with one chord track
31 |
32 | ### Crawled Datasets
33 | | Source | Genre | Format | Chord | Melody | Songs | Src |
34 | |-----------------------|:----------:|:------:|:-----:|:------:|:------:|:----:|
35 | | [Theorytab] | pop | XML | V | V | 10148 | [O](https://drive.google.com/file/d/13AEVD9xaZIaicEgd8tF1l6aOiRTymJxL/view?usp=sharing)
36 | | [Wikifonia] | pop | XML | V | V | 6675 | [O](https://drive.google.com/file/d/155FZ9Uq7QLySv9y2bAtk5LD37XZDo0DF/view?usp=sharing)
37 | | [Hymnal] | hymn | MIDI | Δ | V | 3358 | [O](https://drive.google.com/drive/folders/1fP9OmQa9amz-nwaaaITggCEWs3ewz1_8?usp=sharing)
38 |
39 |
40 | #### Links
41 |
42 | * WJazzD: http://jazzomat.hfm-weimar.de/dbformat/dboverview.html
43 | * MIDI format of Theorytab is now available: [Link](https://drive.google.com/file/d/1K1t8L9IRTHnQ1ozRIMRGEyxk_yhN6kLr/view?usp=sharing).
44 | --------------
45 |
46 | ## Midi
47 | ### Crawled Datasets
48 | | Source | Genre | Multi-track | Format |Songs | src |
49 | |-----------------------|:----------:|:-----------:|:------:|:------:|:---:|
50 | | [VGMdb] | game | V | MIDI | 28419 | [O](https://drive.google.com/drive/folders/1IW83MmH-RJ81yog6sbOUOTHimobE4FuK?usp=sharing)
51 | | [Doug McKenzie Jazz] | jazz | V | MIDI | 297 | [O](https://drive.google.com/drive/folders/1wVVDpcov5VV6Govhn1-CT0BOifqoF-Od?usp=sharing)
52 | | [Piano-e-Competition] | classical | | MIDI | 1573 | [O](https://drive.google.com/drive/folders/17yAGt3AR6txSZv8DBcbAbT3luTMkrkIb?usp=sharing)
53 |
54 | ### Online Resources
55 | #### Jazz
56 | * [profesordepiano](http://www.profesordepiano.com/Real%20Book/Realbook.htm?fbclid=IwAR09XcuMD6PMEyUFq0gXAIVFsJVPw8uQSXq5s-o46JFv7OlYVQnwArFOmSk)
57 | * [minor9](http://bhs.minor9.com)
58 |
59 | #### Drum
60 | * [Groove MIDI Dataset (Magenta)](https://magenta.tensorflow.org/datasets/groove)
61 |
62 | ### MIDI MAN (on reddit)
63 | * [Midi Man](https://www.reddit.com/r/WeAreTheMusicMakers/comments/3anwu8/the_drum_percussion_midi_archive_800k/)
64 | https://www.reddit.com/r/WeAreTheMusicMakers/comments/3ajwe4/the_largest_midi_collection_on_the_internet/
65 |
66 | #### full-scale
67 | * [midiworld](http://www.midiworld.com)
68 | * [Lakh MIDI dataset](http://colinraffel.com/projects/lmd/)
69 |
70 |
71 | ---
72 |
73 | ## MISC
74 | ### Unchecked
75 | * http://www.musicstudents.com/jam.html (backing track and chord charts)
76 | * https://www.cs.hmc.edu/~keller/jazz/
77 | * http://www.ralphpatt.com/Song.html
78 | * http://www.saxuet.qc.ca/TheSaxyPage/midi.htm
79 | * http://www.thejazzpage.de/index1.html
80 | * http://cjam.lassecollin.se/
81 | * http://www.jazzpla.net/jazznote3000.htm
82 |
83 |
84 | [Theorytab]: https://www.hooktheory.com/theorytab
85 | [Hymnal]: https://www.hymnal.net/en/home
86 | [Wikifonia]: http://www.wikifonia.org/
87 | [Piano-e-Competition]: http://www.piano-e-competition.com
88 | [VGMdb]: https://www.vgmusic.com
89 | [Doug McKenzie Jazz]: http://bushgrafts.com/wp/
90 |
--------------------------------------------------------------------------------
/vgmdb/utils/VGMCrawler.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | import os
4 | import sys
5 | import time
6 | import json
7 |
8 | class VGMCrawler():
9 | BASE_URL = 'https://www.vgmusic.com/music/console/'
10 | archive_dir = 'archive'
11 |
12 | def __init__(self, sleep_time=0.1, log=True):
13 | self.sleep_time = sleep_time
14 | self.log = log
15 | self.count = 0
16 |
17 | def _request_url(self, url, doctype='html'):
18 | # set header
19 | response = requests.get(url)
20 |
21 | # sleep
22 | time.sleep(self.sleep_time)
23 |
24 | # return
25 | if doctype =='html':
26 | soup = BeautifulSoup(response.text, 'html.parser')
27 | return soup
28 | elif doctype =='content':
29 | return response.content
30 | else:
31 | return response
32 |
33 | def _log_print(self, log, quite=False):
34 | if not quite:
35 | print(log)
36 |
37 | if self.log:
38 | with open("log.txt", "a") as f:
39 | print(log, file=f)
40 |
41 | def fetch_dirs(self, url):
42 | soup = self._request_url(url)
43 | tr_list = soup.find('table').find_all('tr')
44 | dir_list = []
45 |
46 | for i in range(3, len(tr_list)-1):
47 | dir_list.append(tr_list[i].find_all('td')[1].text)
48 | return dir_list
49 |
50 | def fetch_songs(self, url):
51 | soup = self._request_url(url)
52 | tr_list = soup.find('table').find_all('tr')
53 | song_list = []
54 |
55 | if len(tr_list) == 4:
56 | return None
57 | for i in range(2, len(tr_list)):
58 | now_tr = tr_list[i]
59 | if now_tr.get("class") == ['header']:
60 | album_name = now_tr.text.strip('\n')
61 | else:
62 | if not now_tr.td.get('colspan'):
63 |
64 | # get info
65 | info = now_tr.td.text.split('\n')
66 | song_midi = now_tr.a['href']
67 | song_name = info[0]
68 | song_size = info[1]
69 | song_author = info[3]
70 | song_list.append({
71 | 'filename':song_midi,
72 | 'song_name':song_name,
73 | 'song_size':song_size,
74 | 'song_author':song_author,
75 | 'album_name':album_name})
76 | print(' |%30s |%30s |%13s |%10s |%s '%(album_name, song_name, song_size, song_author, song_midi ))
77 |
78 |
79 | return song_list
80 |
81 | def crawl_songs(self, url, song_list, dir_path):
82 | if not os.path.exists(dir_path):
83 | os.makedirs(dir_path)
84 |
85 | for idx, s in enumerate(song_list):
86 | sys.stdout.write('%d/%d - total: %d\n' % (idx, len(song_list), self.count))
87 | sys.stdout.flush()
88 | midi_url = url + s['filename']
89 | content = self._request_url(midi_url, doctype='content')
90 | fn = os.path.join(dir_path, s['filename'])
91 | with open(fn, "wb") as f:
92 | f.write(content)
93 |
94 | self.count += 1
95 |
96 | def crawl_archive(self):
97 | dir_list = self.fetch_dirs(self.BASE_URL)
98 |
99 | if not os.path.exists(self.archive_dir,):
100 | os.makedirs(self.archive_dir,)
101 |
102 | info = dict()
103 | for d in dir_list:
104 | root_dir = d.strip('/')
105 | print('{{%s}}' % root_dir)
106 | subdir_url = self.BASE_URL + d
107 | subdir_list = self.fetch_dirs(subdir_url)
108 |
109 | tmp_dict = dict()
110 | for sd in subdir_list:
111 | root_subdir = sd.strip('/')
112 | print('[%s]'%root_subdir)
113 | page_url = subdir_url + sd
114 | sl = self.fetch_songs(page_url)
115 | if sl:
116 | self.crawl_songs(page_url, sl, os.path.join(self.archive_dir, root_dir, root_subdir))
117 | tmp_dict[sd] = sl
118 |
119 | info[d] = tmp_dict
120 |
121 | with open(os.path.join(self.archive_dir,'archive.json'), "w") as f:
122 | json.dump(info, f)
123 |
124 | def run(self):
125 | s = time.time()
126 | self.crawl_archive()
127 | e = time.time()
128 | self._log_print(time.strftime("\nElapsed time: %H:%M:%S", time.gmtime(s-e)))
129 | self._log_print('Total %d Songs'%self.count)
130 |
131 | if __name__ == '__main__':
132 | vc = VGMCrawler()
133 | vc.run()
134 |
--------------------------------------------------------------------------------
/piano-e-competition/utils/EPcompCrawler.py:
--------------------------------------------------------------------------------
1 | import random
2 | import requests
3 | from bs4 import BeautifulSoup
4 | import os
5 | import sys
6 | import time
7 | import json
8 | import re
9 | import random
10 |
11 | class EPcompCrawler():
12 | BASE_URL = 'http://www.piano-e-competition.com'
13 | ROOT = 'archive'
14 | YEARS = ['/midi_2002.asp', '/midi_2004.asp', '/midi_2006.asp',
15 | '/midi_2008.asp', '/midi_2009.asp', '/midi_2011.asp']
16 |
17 | def __init__(self, sleep_time=0.1, log=True):
18 | self.sleep_time = sleep_time
19 | self.log = log
20 | self.mid_cnt = 0
21 | self.zip_cnt = 0
22 |
23 | def _request_url(self, url, doctype='html'):
24 | response = requests.get(url)
25 |
26 | # sleep
27 | time.sleep(self.sleep_time)
28 |
29 | # return
30 | if doctype =='html':
31 | soup = BeautifulSoup(response.text, 'html.parser')
32 | return soup
33 | elif doctype =='content':
34 | return response.content
35 | else:
36 | return response
37 |
38 | def _log_print(self, log, quite=False):
39 | if not quite:
40 | print(log)
41 |
42 | if self.log:
43 | with open("log.txt", "a") as f:
44 | print(log, file=f)
45 |
46 | def fetch_year_songs(self, year_url):
47 | soup = self._request_url(year_url)
48 | a_list = soup.find_all('a')
49 | midi_list = []
50 | zip_list = []
51 | print(len(a_list))
52 |
53 | for idx in range(len(a_list)):
54 | a = a_list[idx]
55 |
56 | url = a.get('href')
57 | if url and (('.MID' in url) or ('.mid' in url)):
58 | now_performer = re.search('(\D+?)(\d+?).', url.split('/')[-1]).group(1)
59 | song_name = re.sub( '\s+', ' ', a.text.replace('\r', '').replace('\n', '')).strip()
60 | try:
61 | composer = a.parent.parent.td.text.strip()
62 | except:
63 | try:
64 | composer = a.parent.parent.parent.td.text.strip()
65 | except:
66 | composer = a.parent.parent.parent.parent.td.text.strip()
67 | print('%-10s |%-40s |[%s | %s]' % (now_performer, url, song_name, composer))
68 | midi_list.append((now_performer, url, song_name))
69 |
70 | if url and (('.ZIP' in url) or ('.zip' in url)):
71 | zip_fn = url.lstrip('../')
72 | print('%-10s %s'%(now_performer, zip_fn))
73 | zip_list.append((now_performer, zip_fn))
74 | return midi_list, zip_list
75 |
76 | def crawl_year_songs(self, midi_list, zip_list, dir_path):
77 | path_midi = os.path.join(dir_path, 'midi')
78 | path_zip = os.path.join(dir_path, 'zip')
79 |
80 | if not os.path.exists(path_midi):
81 | os.makedirs(path_midi)
82 | if not os.path.exists(path_zip):
83 | os.makedirs(path_zip)
84 |
85 | print('=================midi=================')
86 | for idx in range(len(midi_list)):
87 | m_url = midi_list[idx][1]
88 |
89 | if '/ecompetition' not in m_url:
90 | m_url = 'http://www.piano-e-competition.com/ecompetition/' + m_url
91 | else:
92 | m_url = self.BASE_URL + m_url
93 |
94 |
95 | print(idx, m_url)
96 | content = self._request_url(m_url, doctype='content')
97 | fn = m_url.split('/')[-1]
98 |
99 | with open(os.path.join(path_midi, fn), "wb") as f:
100 | f.write(content)
101 |
102 | print('=================zip=================')
103 | for idx in range(len(zip_list)):
104 | z_url = zip_list[idx][1]
105 | z_url = self.BASE_URL + '/' + z_url
106 | print(idx, z_url)
107 | content = self._request_url(z_url, doctype='content')
108 | fn = z_url.split('/')[-1]
109 |
110 | with open(os.path.join(path_zip, fn), "wb") as f:
111 | f.write(content)
112 |
113 | def crawl_archive(self):
114 |
115 | if not os.path.exists(self.ROOT):
116 | os.makedirs(self.ROOT)
117 |
118 | archive_dict = dict()
119 |
120 | for y in self.YEARS:
121 | ml, zl = self.fetch_year_songs(self.BASE_URL+y)
122 | year = re.search('(\d{4}).',y).group(1)
123 | path_year = os.path.join(self.ROOT, year)
124 | if not os.path.exists( path_year ):
125 | os.makedirs( path_year )
126 |
127 | print('{%s}' % year)
128 |
129 | self.mid_cnt += len(ml)
130 | self.zip_cnt += len(zl)
131 |
132 | tmp = {'mid':ml, 'zip':zl}
133 | archive_dict[year] = tmp
134 |
135 | self.crawl_year_songs(ml, zl, path_year)
136 |
137 | with open(os.path.join(self.ROOT, 'archive.json'), "w") as f:
138 | json.dump(archive_dict, f)
139 |
140 | def run(self):
141 | s = time.time()
142 | self.crawl_archive()
143 | e = time.time()
144 |
145 | self._log_print('Total midi files %d'%self.mid_cnt)
146 | self._log_print('Total zip files %d'%self.zip_cnt)
147 | self._log_print(time.strftime("\nElapsed time: %H:%M:%S", time.gmtime(s-e)))
148 |
149 |
150 | if __name__ == '__main__':
151 | ec = EPcompCrawler()
152 | ec.run()
--------------------------------------------------------------------------------
/theorytab/utils/theorytab_crawler.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | import os
4 | import time
5 | import json
6 | import string
7 |
8 |
9 | website = 'https://www.hooktheory.com'
10 | base_url = website + '/theorytab/artists/'
11 | sleep_time = 0.11
12 | alphabet_list = string.ascii_lowercase
13 |
14 |
15 | def song_retrieval(artist, song, path_song):
16 |
17 | song_url = 'https://www.hooktheory.com/theorytab/view/' + artist + '/' + song
18 | response_song = requests.get(song_url)
19 |
20 | soup = BeautifulSoup(response_song.text, 'html.parser')
21 | li_list = soup.findAll("li", {"role": "presentation"})
22 |
23 | section_list = []
24 | pk_list = []
25 |
26 | # section
27 | for i in range(len(li_list)-1):
28 | sec = li_list[i].text.strip().lower().replace(" ", "-")
29 | section_list.append(sec)
30 | pk_list.append(soup.findAll("div", {"role": "tabpanel", "id": sec})[0].contents[0]['id'])
31 |
32 | # save xml
33 | for idx, pk in enumerate(pk_list):
34 | req_url = 'https://www.hooktheory.com/songs/getXmlByPk?pk=' + str(pk)
35 | response_info = requests.get(req_url)
36 | content = response_info.text
37 |
38 | with open(os.path.join(path_song, section_list[idx] + ".xml"), "w", encoding="utf-8") as f:
39 | f.write(content)
40 | time.sleep(0.08)
41 |
42 | # get genre
43 | wikiid = soup.findAll("multiselect", {"items": "genres"})[0]['wikiid']
44 | response_genre = requests.get('https://www.hooktheory.com/wiki/' + str(wikiid) + '/genres')
45 | genre_act_list = json.loads(response_genre.text)
46 | genres = []
47 | for g in genre_act_list:
48 | if g['active']:
49 | genres.append(g['name'])
50 |
51 | # saving
52 | info = {'section': section_list, 'pk': pk_list, 'song_url': song_url,
53 | 'genres': genres, 'wikiid': wikiid}
54 |
55 | with open(os.path.join(path_song, 'song_info.json'), "w") as f:
56 | json.dump(info, f)
57 |
58 |
59 | def get_song_list(url_artist, quite=False):
60 | response_tmp = requests.get(website + url_artist)
61 | soup = BeautifulSoup(response_tmp.text, 'html.parser')
62 | item_list = soup.find_all("li", {"class": "grid-item"})
63 |
64 | song_name_list = []
65 | for item in item_list:
66 | song_name = item.find_all("a", {"class": "a-tab-cover"})[0]['href'].split('/')[-1]
67 | song_name_list.append(song_name)
68 | if not quite:
69 | print(' > %s' % song_name)
70 | return song_name_list
71 |
72 |
73 | def traverse_website():
74 | '''
75 | Retrieve all urls of artists and songs from the website
76 | '''
77 |
78 | list_pages = []
79 | archive_artist = dict()
80 | artist_count = 0
81 | song_count = 0
82 |
83 | for ch in alphabet_list:
84 | time.sleep(sleep_time)
85 | url = base_url + ch
86 | response_tmp = requests.get(url)
87 | soup = BeautifulSoup(response_tmp.text, 'html.parser')
88 | page_count = 0
89 |
90 | print('==[%c]=================================================' % ch)
91 |
92 | # get artists list by pages
93 | url_artist_list = []
94 | for page in range(1, 9999):
95 | url = 'https://www.hooktheory.com/theorytab/artists/'+ch+'?page=' + str(page)
96 |
97 | time.sleep(sleep_time)
98 | response_tmp = requests.get(url)
99 | soup = BeautifulSoup(response_tmp.text, 'html.parser')
100 | item_list = soup.find_all("li", {"class": "overlay-trigger"})
101 |
102 | if item_list:
103 | print(url)
104 | page_count += 1
105 | else:
106 | break
107 |
108 | for item in item_list:
109 | url_artist_list.append(item.find_all("a", {"class": "a-no-decoration"})[0]['href'])
110 |
111 | print('Total:', len(url_artist_list))
112 |
113 | print('----')
114 |
115 | if not page_count:
116 | page_count = 1
117 |
118 | # get song of artists
119 | artist_song_dict = dict()
120 |
121 | for url_artist in url_artist_list:
122 | artist_count += 1
123 | time.sleep(sleep_time)
124 | artist_name = url_artist.split('/')[-1]
125 | print(artist_name)
126 | song_name_list = get_song_list(url_artist)
127 | song_count += len(song_name_list)
128 | artist_song_dict[artist_name] = song_name_list
129 |
130 | archive_artist[ch] = artist_song_dict
131 | list_pages.append(page_count)
132 |
133 | print('=======================================================')
134 | print(list_pages)
135 | print('Artists:', artist_count)
136 | print('Songs:', song_count)
137 |
138 | archive_artist['num_song'] = song_count
139 | archive_artist['num_artist'] = artist_count
140 |
141 | with open('archive_artist.json', "w") as f:
142 | json.dump(archive_artist, f)
143 |
144 |
145 | if __name__ == '__main__':
146 |
147 | traverse_website()
148 |
149 | # root for crawled dataset
150 | root_dir = 'archive'
151 | with open('archive_artist.json', "r") as f:
152 | archive_artist = json.load(f)
153 |
154 | count_ok = 0
155 | song_count = archive_artist['num_song']
156 |
157 | for ch in alphabet_list:
158 | path_ch = os.path.join(root_dir, ch)
159 | print('==[%c]=================================================' % ch)
160 |
161 | if not os.path.exists(path_ch):
162 | os.makedirs(path_ch)
163 |
164 | for a_name in archive_artist[ch].keys():
165 | for s_name in archive_artist[ch][a_name]:
166 |
167 | try:
168 | print('(%3d/%3d) %s %s' % (count_ok, song_count, a_name, s_name))
169 | path_song = os.path.join(path_ch, a_name, s_name)
170 |
171 | if not os.path.exists(path_song):
172 | os.makedirs(path_song)
173 |
174 | time.sleep(sleep_time)
175 | song_retrieval(a_name, s_name, path_song)
176 |
177 | count_ok += 1
178 |
179 | except Exception as e:
180 | print(e)
181 |
182 | print('total:', count_ok)
183 |
--------------------------------------------------------------------------------
/5-track-pianoroll/parser.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | from pypianoroll import Multitrack, Track
4 | import json
5 | import pickle
6 |
7 | family_name=[
8 | 'drum',
9 | 'bass',
10 | 'guitar',
11 | 'string',
12 | 'piano',
13 | ]
14 |
15 | family_thres = [
16 | (2, 24), # drum
17 | (1, 96), # bass
18 | (2, 156), # guitar
19 | (2, 156), # string,
20 | (2, 156), # piano
21 | ]
22 |
23 |
24 | def findall_endswith(root):
25 | """Traverse `root` recursively and yield all files ending with `postfix`"""
26 | for dirpath, _, filenames in os.walk(root):
27 | for filename in filenames:
28 | if filename.endswith('.npz'):
29 | yield os.path.join(dirpath, filename)
30 |
31 | def check_which_family(track):
32 | is_piano = lambda program, is_drum: not is_drum and ((program >= 0 and program <= 7)
33 | or (program >= 16 and program <= 23))
34 | is_guitar = lambda program: program >= 24 and program <= 31
35 | is_bass = lambda program: program >= 32 and program <= 39
36 | is_string = lambda program: program >=40 and program <= 51
37 |
38 | # drum, bass, guitar, string, piano
39 | is_instr_act = lambda program, is_drum: np.array([is_drum, is_bass(program), is_guitar(program),
40 | is_string(program), is_piano(program, is_drum)])
41 |
42 | instr_act = is_instr_act(track.program, track.is_drum)
43 | return instr_act
44 |
45 | def check_instr_act(multitrack):
46 | instr_act_all = np.zeros(5)
47 | for track in multitrack.tracks:
48 | instr_act = check_which_family(track)
49 | instr_act_all += instr_act
50 | instr_act_cnt = sum(instr_act_all > 0)
51 | return instr_act_all, instr_act_cnt
52 |
53 | def segment_quality(pianoroll, thres_pitch, thres_beats):
54 | pitch_sum = sum(np.sum(pianoroll, axis=0) > 0)
55 | beat_sum = sum(np.sum(pianoroll, axis=1) > 0)
56 | score = pitch_sum + beat_sum
57 | return (pitch_sum >= thres_pitch) and (beat_sum >= thres_beats), (pitch_sum, beat_sum)
58 |
59 | def proc_instr_intersection_list(npz_list):
60 | cnt_ok = 0
61 | list_ok = []
62 | thres_instr_num = 5
63 | for nidx in range(len(npz_list)): #len(npz_list)
64 | if nidx % 500 is 0:
65 | print(nidx, '-', cnt_ok)
66 | npz_file = npz_list[nidx]
67 | multitrack = Multitrack(npz_file)
68 |
69 | if len(multitrack.tracks) < 5:
70 | continue
71 |
72 | instr_act_all, instr_act_cnt = check_instr_act(multitrack)
73 |
74 | if instr_act_cnt != 5:
75 | continue
76 |
77 | list_ok.append(npz_file)
78 | cnt_ok += 1
79 |
80 | print(cnt_ok)
81 | return list_ok
82 |
83 |
84 | if __name__ == '__main__':
85 | # root = 'lpd_cleansed'
86 | # npz_list = list(findall_endswith(root))
87 |
88 | # with open('npz_list.pickle', 'wb') as f:
89 | # pickle.dump(npz_list, f, protocol=pickle.HIGHEST_PROTOCOL)
90 | # with open('npz_list.pickle', 'rb') as f:
91 | # npz_list = pickle.load(f)
92 |
93 | # list_ok = proc_instr_intersection_list(npz_list)
94 | # with open('list_ok.pickle', 'wb') as f:
95 | # pickle.dump(list_ok, f, protocol=pickle.HIGHEST_PROTOCOL)
96 | with open('list_ok.pickle', 'rb') as f:
97 | list_ok = pickle.load(f)
98 |
99 |
100 |
101 | num_consecutive_bar = 8
102 | resol = 96
103 | down_sample = 2
104 | cnt_totall_segments = 0
105 | cnt_augmented = 0
106 | ok_segment_list = []
107 | hop_size = (num_consecutive_bar / 4)
108 |
109 | num_list_ok = len(list_ok)
110 | for oid in range(len(list_ok)):
111 | print('==', oid, '/', num_list_ok,'===============')
112 | npz_ok = list_ok[oid]
113 | multitrack = Multitrack(npz_ok)
114 | downbeat = multitrack.downbeat
115 |
116 | num_bar = len(downbeat) // resol
117 | hop_iter = 0
118 |
119 | song_ok_segments = []
120 | for bidx in range(num_bar-num_consecutive_bar):
121 | if hop_iter > 0:
122 | hop_iter -= 1
123 | continue
124 |
125 |
126 | st = bidx * resol
127 | ed = st + num_consecutive_bar * resol
128 |
129 | best_instr = [None] * 5
130 | best_score = [-1] * 5
131 | second_act = [False] * 5
132 | second_instr = [None] * 5
133 | is_all_ok = [False] * 5
134 | for tidx, track in enumerate(multitrack.tracks):
135 | # track[st:ed].plot()
136 | tmp_map = check_which_family(track)
137 | in_family = np.where(tmp_map)[0]
138 |
139 | if not len(in_family):
140 | continue
141 | family = in_family[0]
142 |
143 | tmp_pianoroll = track[st:ed:down_sample].pianoroll
144 | is_ok, score = segment_quality(tmp_pianoroll, family_thres[family][0], family_thres[family][1])
145 |
146 | if is_ok and sum(score) > best_score[family]:
147 | track.name = family_name[family]
148 | best_instr[family] = track[st:ed:down_sample]
149 | best_score[family] = sum(score)
150 | is_all_ok[family] = True
151 |
152 | if sum(is_all_ok) == 5:
153 | # print(bidx)
154 | hop_iter = np.random.randint(0, 1) + hop_size
155 | song_ok_segments.append(Multitrack(tracks=best_instr,
156 | downbeat=list(range(0, 383, 48)), beat_resolution=12))
157 |
158 | cnt_ok_segment = len(song_ok_segments)
159 | if cnt_ok_segment > 6:
160 | seed = (6, cnt_ok_segment//2)
161 | if cnt_ok_segment > 11:
162 | seed = (11, cnt_ok_segment//3)
163 | if cnt_ok_segment > 15:
164 | seed = (15, cnt_ok_segment//4)
165 |
166 | rand_idx = np.random.permutation(cnt_ok_segment)[:max(seed)]
167 | song_ok_segments = [song_ok_segments[ridx] for ridx in rand_idx]
168 | ok_segment_list.extend(song_ok_segments)
169 | cnt_ok_segment = len(rand_idx)
170 | else:
171 | ok_segment_list.extend(song_ok_segments)
172 |
173 | cnt_totall_segments += len(song_ok_segments)
174 | print('cur:%d | acc:%d'%(cnt_ok_segment, cnt_totall_segments))
175 |
176 | print('---')
177 | print(cnt_totall_segments)
178 | print(len(ok_segment_list))
179 | np.save('segments.npy', ok_segment_list)
--------------------------------------------------------------------------------
/hymnal/utils/hymnCrawler.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | import os
4 | import time
5 | import json
6 | import string
7 | import random
8 | from lxml import etree
9 |
10 | class HymnCrawler():
11 | BASE_URL = 'https://www.hymnal.net'
12 |
13 | def __init__(self, sleep_time = 0.1, log=True):
14 | self.sleep_time =sleep_time
15 | self.meta_category = {'classic': self.BASE_URL + '/en/song-index/h',
16 | 'new_tunes': self.BASE_URL + '/en/song-index/nt',
17 | 'new_songs': self.BASE_URL + '/en/song-index/ns',
18 | 'children': self.BASE_URL + '/en/song-index/c'}
19 |
20 | self.log = log
21 | self.metadata = None
22 |
23 | def _request_url(self, url, doctype='html'):
24 | response = requests.get(url)
25 | time.sleep(self.sleep_time)
26 | if doctype =='html':
27 | soup = BeautifulSoup(response.text, 'html.parser')
28 | return soup
29 | elif doctype =='content':
30 | return response.content
31 | else:
32 | pass
33 |
34 | def _log_print(self, log):
35 | print(log)
36 | if self.log:
37 | with open("log.txt", "a") as f:
38 | print(log, file=f)
39 |
40 | def fetch_page_list(self, url):
41 | soup = self._request_url(url)
42 | tag_list = soup.find_all('div', {'class':'list-group'})[0].find_all('a', {'class':'list-group-item'})
43 | return [t['href'] for t in tag_list]
44 |
45 | def fetch_category_list(self, url):
46 | song_list = []
47 | soup = self._request_url(url)
48 | try:
49 | tag_list = soup.find_all('div', {'class':'letters'})[0].findAll('a')
50 | alphabet_list = [t.text for t in tag_list]
51 | for ch in alphabet_list:
52 | page_url = url+ '/' + ch
53 | self._log_print(page_url)
54 | song_list.extend(self.fetch_page_list(page_url))
55 | except:
56 | self._log_print(url)
57 | song_list.extend(self.fetch_page_list(url))
58 | return song_list
59 |
60 | def fetch_song(self, url, song_dir):
61 |
62 | soup = self._request_url(url)
63 |
64 | # (url, extension, filename)
65 | data_list = [('/f=mid', '.mid', 'all'), ('/f=mp3', '.mp3', 'audio'), ('/f=tune', '.mid', 'melody'),
66 | ('/f=ppdf', '.pdf', 'ls_paino'), ('/f=pdf', '.pdf','ls_guitar'), ('/f=gtpdf', '.pdf', 'ls_text')]
67 |
68 | # save download files
69 | for d in data_list:
70 | r = requests.get(url+ d[0])
71 |
72 | if song_dir:
73 | with open(os.path.join(song_dir,d[2] + d[1]), 'wb') as f:
74 | f.write(r.content)
75 |
76 | # metadata
77 | content_list = []
78 | tag_list = soup.find_all('div', {'class':'row common-panel'})[0].find_all('div', {'class':'col-xs-7 col-sm-8 no-padding'})
79 |
80 | for t in tag_list:
81 | content_list.append(t.text.strip())
82 | label_list = []
83 | tag_list = soup.find_all('div', {'class':'row common-panel'})[0].find_all('label', {'class':'col-xs-5 col-sm-4'})
84 | for t in tag_list:
85 | label_list.append(t.text.replace(':',''))
86 | metadata = dict(zip(label_list, content_list))
87 |
88 | # title
89 | title = soup.find('h1', {'class':"text-center"}).text.strip()
90 | metadata['title'] = title
91 |
92 | # lyric table
93 | lyric_xml = soup.find_all('div', {'class':'col-xs-12 lyrics'})[0].find('table')
94 |
95 | if song_dir:
96 | with open( os.path.join(song_dir, 'song_metadata.json'), "w") as f:
97 | json.dump(metadata , f)
98 |
99 | with open( os.path.join(song_dir, 'lyric.xml'), "w", encoding='utf-8') as f:
100 | f.write(str(lyric_xml))
101 |
102 | return lyric_xml, metadata
103 |
104 | def craw_archive(self, archive_dir='archive'):
105 | metadata = dict()
106 | for k in self.meta_category.keys():
107 | category_url = self.meta_category[k]
108 | metadata[k] = self.fetch_category_list(category_url)
109 |
110 | # saving
111 | if archive_dir:
112 | if not os.path.exists(archive_dir):
113 | os.makedirs(archive_dir)
114 |
115 | with open(os.path.join(archive_dir, 'archive_metadata.json'), "w") as f:
116 | json.dump(metadata , f)
117 |
118 | return metadata
119 |
120 | def craw_songs(self, metadata, archive_dir='archive'):
121 | count = 0
122 | count_success = 0
123 | for k in list(metadata):
124 | self._log_print('> %s'%k)
125 | category_dir = os.path.join(archive_dir, k)
126 | if not os.path.exists(category_dir):
127 | os.makedirs(category_dir)
128 |
129 | song_list = metadata[k]
130 |
131 | numOfSongs = len(song_list)
132 | for i in range(numOfSongs):
133 | song_url = self.BASE_URL + song_list[i]
134 | song_id = song_url.split('/')[-1]
135 | self._log_print(' (%d/%d) %s'%(i+1, numOfSongs, song_url))
136 | song_dir = os.path.join(category_dir, song_id)
137 |
138 | if not os.path.exists(song_dir):
139 | os.makedirs(song_dir)
140 |
141 | try:
142 | self.fetch_song(song_url, song_dir)
143 | metadata['err'] = False
144 | count_success += 1
145 | except:
146 | self._log_print('error!!')
147 | metadata['err'] = True
148 |
149 | count += 1
150 | self._log_print('total: %d songs'%count)
151 |
152 | return metadata
153 |
154 | def reload(self, archive_dir='archive'):
155 | with open(os.path.join(archive_dir, 'archive_metadata.json'), "r") as f:
156 | self.metadata =json.load(f)
157 |
158 | def run(self, archive_dir='archive', reload=False):
159 |
160 | self._log_print("=================================================")
161 |
162 | if not reload:
163 | self.metadata = self.craw_archive(archive_dir=archive_dir)
164 | else:
165 | self.reload(archive_dir=archive_dir)
166 |
167 | self.metadata = self.craw_songs(self.metadata, archive_dir=archive_dir)
168 |
169 | with open(os.path.join(archive_dir, 'archive_metadata.json'), "w") as f:
170 | json.dump(self.metadata, f)
171 |
172 | if __name__ == '__main__':
173 |
174 | hc = HymnCrawler()
175 | s = time.time()
176 | hc.run()
177 | e = time.time()
178 | time.strftime("\nElapsed time: %H:%M:%S", time.gmtime(s-e))
179 |
180 |
--------------------------------------------------------------------------------
/vgmdb/utils/VGMCrawler.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import requests\n",
12 | "from bs4 import BeautifulSoup\n",
13 | "import os\n",
14 | "import sys\n",
15 | "import time\n",
16 | "import json"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 190,
22 | "metadata": {
23 | "collapsed": false
24 | },
25 | "outputs": [],
26 | "source": [
27 | "class VGMCrawler():\n",
28 | " BASE_URL = 'https://www.vgmusic.com/music/console/'\n",
29 | " archive_dir = 'archive'\n",
30 | " \n",
31 | " def __init__(self, sleep_time=0.1, log=True):\n",
32 | " self.sleep_time = sleep_time\n",
33 | " self.log = log\n",
34 | " self.count = 0\n",
35 | " \n",
36 | " def _request_url(self, url, doctype='html'):\n",
37 | " # set header\n",
38 | " response = requests.get(url)\n",
39 | "\n",
40 | " # sleep\n",
41 | " time.sleep(self.sleep_time)\n",
42 | "\n",
43 | " # return\n",
44 | " if doctype =='html':\n",
45 | " soup = BeautifulSoup(response.text, 'html.parser')\n",
46 | " return soup\n",
47 | " elif doctype =='content':\n",
48 | " return response.content\n",
49 | " else:\n",
50 | " return response\n",
51 | "\n",
52 | " def _log_print(self, log, quite=False):\n",
53 | " if not quite:\n",
54 | " print(log)\n",
55 | "\n",
56 | " if self.log:\n",
57 | " with open(\"log.txt\", \"a\") as f:\n",
58 | " print(log, file=f)\n",
59 | " \n",
60 | " def fetch_dirs(self, url):\n",
61 | " soup = self._request_url(url)\n",
62 | " tr_list = soup.find('table').find_all('tr')\n",
63 | " dir_list = []\n",
64 | "\n",
65 | " for i in range(3, len(tr_list)-1):\n",
66 | " dir_list.append(tr_list[i].find_all('td')[1].text)\n",
67 | " return dir_list\n",
68 | " \n",
69 | " def fetch_songs(self, url):\n",
70 | " soup = self._request_url(url)\n",
71 | " tr_list = soup.find('table').find_all('tr')\n",
72 | " song_list = []\n",
73 | " \n",
74 | " if len(tr_list) == 4:\n",
75 | " return None\n",
76 | " for i in range(2, len(tr_list)):\n",
77 | " now_tr = tr_list[i]\n",
78 | " if now_tr.get(\"class\") == ['header']:\n",
79 | " album_name = now_tr.text.strip('\\n')\n",
80 | " else:\n",
81 | " if not now_tr.td.get('colspan'):\n",
82 | "\n",
83 | " # get info\n",
84 | " info = now_tr.td.text.split('\\n')\n",
85 | " song_midi = now_tr.a['href']\n",
86 | " song_name = info[0]\n",
87 | " song_size = info[1]\n",
88 | " song_author = info[3]\n",
89 | " song_list.append({\n",
90 | " 'filename':song_midi, \n",
91 | " 'song_name':song_name,\n",
92 | " 'song_size':song_size,\n",
93 | " 'song_author':song_author,\n",
94 | " 'album_name':album_name})\n",
95 | " print(' |%30s |%30s |%13s |%10s |%s '%(album_name, song_name, song_size, song_author, song_midi ))\n",
96 | " \n",
97 | " \n",
98 | " return song_list\n",
99 | " \n",
100 | " def crawl_songs(self, url, song_list, dir_path):\n",
101 | " if not os.path.exists(dir_path):\n",
102 | " os.makedirs(dir_path)\n",
103 | " \n",
104 | " for idx, s in enumerate(song_list):\n",
105 | " sys.stdout.write('%d/%d - total: %d\\n' % (idx, len(song_list), self.count))\n",
106 | " sys.stdout.flush()\n",
107 | " midi_url = url + s['filename']\n",
108 | " content = self._request_url(midi_url, doctype='content')\n",
109 | " fn = os.path.join(dir_path, s['filename'])\n",
110 | " with open(fn, \"wb\") as f:\n",
111 | " f.write(content)\n",
112 | " \n",
113 | " self.count += 1\n",
114 | " \n",
115 | " def crawl_archive(self):\n",
116 | " dir_list = self.fetch_dirs(self.BASE_URL)\n",
117 | " \n",
118 | " if not os.path.exists(self.archive_dir,):\n",
119 | " os.makedirs(self.archive_dir,)\n",
120 | " \n",
121 | " info = dict()\n",
122 | " for d in dir_list:\n",
123 | " root_dir = d.strip('/')\n",
124 | " print('{{%s}}' % root_dir)\n",
125 | " subdir_url = self.BASE_URL + d\n",
126 | " subdir_list = self.fetch_dirs(subdir_url)\n",
127 | " \n",
128 | " tmp_dict = dict()\n",
129 | " for sd in subdir_list:\n",
130 | " root_subdir = sd.strip('/')\n",
131 | " print('[%s]'%root_subdir)\n",
132 | " page_url = subdir_url + sd\n",
133 | " sl = self.fetch_songs(page_url)\n",
134 | " if sl:\n",
135 | " self.crawl_songs(page_url, sl, os.path.join(self.archive_dir, root_dir, root_subdir))\n",
136 | " tmp_dict[sd] = sl\n",
137 | "\n",
138 | " info[d] = tmp_dict\n",
139 | " \n",
140 | " with open(os.path.join(self.archive_dir,'archive.json'), \"w\") as f:\n",
141 | " json.dump(info, f) \n",
142 | " \n",
143 | " def run(self): \n",
144 | " s = time.time()\n",
145 | " self.crawl_archive()\n",
146 | " e = time.time()\n",
147 | " self._log_print(time.strftime(\"\\nElapsed time: %H:%M:%S\", time.gmtime(s-e)))\n",
148 | " self._log_print('Total %d Songs'&self.count)\n",
149 | " \n"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {
156 | "collapsed": true
157 | },
158 | "outputs": [],
159 | "source": [
160 | "if __name__ == '__main__':\n",
161 | " vc = VGMCrawler()\n",
162 | " vc.run()"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 191,
168 | "metadata": {
169 | "collapsed": false
170 | },
171 | "outputs": [],
172 | "source": [
173 | "vc = VGMCrawler()\n",
174 | "vc.fetch_songs('https://www.vgmusic.com/music/console/magnavox/odyssey/')"
175 | ]
176 | }
177 | ],
178 | "metadata": {
179 | "kernelspec": {
180 | "display_name": "Python [mir]",
181 | "language": "python",
182 | "name": "Python [mir]"
183 | },
184 | "language_info": {
185 | "codemirror_mode": {
186 | "name": "ipython",
187 | "version": 3
188 | },
189 | "file_extension": ".py",
190 | "mimetype": "text/x-python",
191 | "name": "python",
192 | "nbconvert_exporter": "python",
193 | "pygments_lexer": "ipython3",
194 | "version": "3.5.2"
195 | }
196 | },
197 | "nbformat": 4,
198 | "nbformat_minor": 0
199 | }
200 |
--------------------------------------------------------------------------------
/hymnal/utils/hymnCrawler.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import requests\n",
12 | "from bs4 import BeautifulSoup\n",
13 | "import os\n",
14 | "import time\n",
15 | "import json\n",
16 | "import string\n",
17 | "import random\n",
18 | "from lxml import etree"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": null,
24 | "metadata": {
25 | "collapsed": false
26 | },
27 | "outputs": [],
28 | "source": [
29 | "class HymnCraler():\n",
30 | " BASE_URL = 'https://www.hymnal.net'\n",
31 | " \n",
32 | " def __init__(self, sleep_time = 0.1, log=True):\n",
33 | " self.sleep_time =sleep_time\n",
34 | " self.meta_category = {'classic': self.BASE_URL + '/en/song-index/h', \n",
35 | " 'new_tunes': self.BASE_URL + '/en/song-index/nt', \n",
36 | " 'new_songs': self.BASE_URL + '/en/song-index/ns', \n",
37 | " 'children': self.BASE_URL + '/en/song-index/c'}\n",
38 | " \n",
39 | " self.log = log\n",
40 | " self.metadata = None \n",
41 | "\n",
42 | " def _request_url(self, url, doctype='html'):\n",
43 | " response = requests.get(url)\n",
44 | " if doctype =='html':\n",
45 | " soup = BeautifulSoup(response.text, 'html.parser')\n",
46 | " return soup\n",
47 | " elif doctype =='content':\n",
48 | " return response.content\n",
49 | " else:\n",
50 | " pass\n",
51 | "\n",
52 | " def _log_print(self, log):\n",
53 | " print(log)\n",
54 | " if self.log:\n",
55 | " with open(\"log.txt\", \"a\") as f:\n",
56 | " print(log, file=f)\n",
57 | "\n",
58 | " def fetch_page_list(self, url):\n",
59 | " soup = self._request_url(url)\n",
60 | " tag_list = soup.find_all('div', {'class':'list-group'})[0].find_all('a', {'class':'list-group-item'})\n",
61 | " return [t['href'] for t in tag_list]\n",
62 | "\n",
63 | " def fetch_category_list(self, url):\n",
64 | " song_list = []\n",
65 | " soup = self._request_url(url)\n",
66 | " try:\n",
67 | " tag_list = soup.find_all('div', {'class':'letters'})[0].findAll('a')\n",
68 | " alphabet_list = [t.text for t in tag_list]\n",
69 | " for ch in alphabet_list:\n",
70 | " page_url = url+ '/' + ch\n",
71 | " self._log_print(page_url)\n",
72 | " song_list.extend(self.fetch_page_list(page_url))\n",
73 | " except:\n",
74 | " self._log_print(url)\n",
75 | " song_list.extend(self.fetch_page_list(url))\n",
76 | " return song_list\n",
77 | "\n",
78 | " def fetch_song(self, url, song_dir):\n",
79 | "\n",
80 | " soup = self._request_url(url)\n",
81 | " \n",
82 | " # (url, extension, filename)\n",
83 | " data_list = [('/f=mid', '.mid', 'all'), ('/f=mp3', '.mp3', 'audio'), ('/f=tune', '.mid', 'melody'),\n",
84 | " ('/f=ppdf', '.pdf', 'ls_paino'), ('/f=pdf', '.pdf','ls_guitar'), ('/f=gtpdf', '.pdf', 'ls_text')]\n",
85 | "\n",
86 | " # save download files\n",
87 | " for d in data_list:\n",
88 | " r = requests.get(url+ d[0])\n",
89 | "\n",
90 | " if song_dir:\n",
91 | " with open(os.path.join(song_dir,d[2] + d[1]), 'wb') as f:\n",
92 | " f.write(r.content)\n",
93 | "\n",
94 | " # metadata\n",
95 | " content_list = [] \n",
96 | " tag_list = soup.find_all('div', {'class':'row common-panel'})[0].find_all('div', {'class':'col-xs-7 col-sm-8 no-padding'})\n",
97 | "\n",
98 | " for t in tag_list:\n",
99 | " content_list.append(t.text.strip())\n",
100 | " label_list = []\n",
101 | " tag_list = soup.find_all('div', {'class':'row common-panel'})[0].find_all('label', {'class':'col-xs-5 col-sm-4'})\n",
102 | " for t in tag_list:\n",
103 | " label_list.append(t.text.replace(':',''))\n",
104 | " metadata = dict(zip(label_list, content_list))\n",
105 | " \n",
106 | " # title\n",
107 | " title = soup.find('h1', {'class':\"text-center\"}).text.strip()\n",
108 | " metadata['title'] = title\n",
109 | " \n",
110 | " # lyric table\n",
111 | " lyric_xml = soup.find_all('div', {'class':'col-xs-12 lyrics'})[0].find('table')\n",
112 | "\n",
113 | " if song_dir:\n",
114 | " with open( os.path.join(song_dir, 'song_metadata.json'), \"w\") as f:\n",
115 | " json.dump(metadata , f)\n",
116 | "\n",
117 | " with open( os.path.join(song_dir, 'lyric.xml'), \"w\", encoding='utf-8') as f:\n",
118 | " f.write(str(lyric_xml))\n",
119 | "\n",
120 | " return lyric_xml, metadata\n",
121 | "\n",
122 | " def craw_archive(self, archive_dir='archive'):\n",
123 | " metadata = dict()\n",
124 | " for k in self.meta_category.keys():\n",
125 | " category_url = self.meta_category[k]\n",
126 | " metadata[k] = self.fetch_category_list(category_url)\n",
127 | "\n",
128 | " # saving\n",
129 | " if archive_dir:\n",
130 | " if not os.path.exists(archive_dir):\n",
131 | " os.makedirs(archive_dir)\n",
132 | "\n",
133 | " with open(os.path.join(archive_dir, 'archive_metadata.json'), \"w\") as f:\n",
134 | " json.dump(metadata , f)\n",
135 | "\n",
136 | " return metadata\n",
137 | "\n",
138 | " def craw_songs(self, metadata, archive_dir='archive'):\n",
139 | " count = 0\n",
140 | " count_success = 0\n",
141 | " for k in list(metadata):\n",
142 | " self._log_print('> %s'%k)\n",
143 | " category_dir = os.path.join(archive_dir, k)\n",
144 | " if not os.path.exists(category_dir):\n",
145 | " os.makedirs(category_dir)\n",
146 | "\n",
147 | " song_list = metadata[k]\n",
148 | "\n",
149 | " numOfSongs = len(song_list)\n",
150 | " for i in range(numOfSongs):\n",
151 | " song_url = self.BASE_URL + song_list[i]\n",
152 | " song_id = song_url.split('/')[-1]\n",
153 | " self._log_print(' (%d/%d) %s'%(i+1, numOfSongs, song_url))\n",
154 | " song_dir = os.path.join(category_dir, song_id)\n",
155 | "\n",
156 | " if not os.path.exists(song_dir):\n",
157 | " os.makedirs(song_dir)\n",
158 | " \n",
159 | " try:\n",
160 | " self.fetch_song(song_url, song_dir)\n",
161 | " metadata['err'] = False\n",
162 | " count_success += 1\n",
163 | " except:\n",
164 | " self._log_print('error!!')\n",
165 | " metadata['err'] = True\n",
166 | " \n",
167 | " count += 1\n",
168 | " self._log_print('total: %d songs'%count)\n",
169 | " \n",
170 | " return metadata\n",
171 | " \n",
172 | " def reload(self, archive_dir='archive'):\n",
173 | " with open(os.path.join(archive_dir, 'archive_metadata.json'), \"r\") as f:\n",
174 | " self.metadata =json.load(f)\n",
175 | " \n",
176 | " def run(self, archive_dir='archive', reload=False): \n",
177 | " \n",
178 | " self._log_print(\"=================================================\")\n",
179 | " \n",
180 | " if not reload:\n",
181 | " self.metadata = self.craw_archive(archive_dir=archive_dir)\n",
182 | " else:\n",
183 | " self.reload(archive_dir=archive_dir)\n",
184 | "\n",
185 | " self.metadata = self.craw_songs(self.metadata, archive_dir=archive_dir)\n",
186 | " \n",
187 | " with open(os.path.join(archive_dir, 'archive_metadata.json'), \"w\") as f:\n",
188 | " json.dump(self.metadata, f)\n",
189 | " \n"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": null,
195 | "metadata": {
196 | "collapsed": false
197 | },
198 | "outputs": [
199 | {
200 | "name": "stdout",
201 | "output_type": "stream",
202 | "text": [
203 | "> new_songs\n",
204 | " (1/1070) https://www.hymnal.net/en/hymn/ns/528\n",
205 | " (2/1070) https://www.hymnal.net/en/hymn/ns/595\n",
206 | " (3/1070) https://www.hymnal.net/en/hymn/ns/584\n",
207 | " (4/1070) https://www.hymnal.net/en/hymn/ns/524\n",
208 | " (5/1070) https://www.hymnal.net/en/hymn/ns/501\n",
209 | " (6/1070) https://www.hymnal.net/en/hymn/lb/27\n",
210 | " (7/1070) https://www.hymnal.net/en/hymn/ns/550\n",
211 | " (8/1070) https://www.hymnal.net/en/hymn/ns/378\n",
212 | " (9/1070) https://www.hymnal.net/en/hymn/ns/309\n",
213 | "error!!\n",
214 | " (10/1070) https://www.hymnal.net/en/hymn/ns/398\n"
215 | ]
216 | }
217 | ],
218 | "source": [
219 | "if __name__ == '__main__':\n",
220 | " \n",
221 | " hc = HymnCraler()\n",
222 | " \n",
223 | " s = time.time()\n",
224 | " hc.run(reload=True)\n",
225 | " e = time.time()\n",
226 | " time.strftime(\"\\nElapsed time: %H:%M:%S\", time.gmtime(s-e))\n",
227 | " \n"
228 | ]
229 | }
230 | ],
231 | "metadata": {
232 | "anaconda-cloud": {},
233 | "kernelspec": {
234 | "display_name": "Python [mir]",
235 | "language": "python",
236 | "name": "Python [mir]"
237 | },
238 | "language_info": {
239 | "codemirror_mode": {
240 | "name": "ipython",
241 | "version": 3
242 | },
243 | "file_extension": ".py",
244 | "mimetype": "text/x-python",
245 | "name": "python",
246 | "nbconvert_exporter": "python",
247 | "pygments_lexer": "ipython3",
248 | "version": "3.5.2"
249 | }
250 | },
251 | "nbformat": 4,
252 | "nbformat_minor": 0
253 | }
254 |
--------------------------------------------------------------------------------
/theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/verse.xml:
--------------------------------------------------------------------------------
1 |
2 | 1.2
3 |
4 |
5 | 4 pils 2
6 | 4
7 | 121
8 | C
9 | -I_zKOfTKIM
10 | 6
11 |
12 |
13 |
14 | Piano
15 | 0.8
16 | 0
17 | false
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | Piano 1/4s
26 | 0.8
27 | 0
28 | false
29 |
30 |
31 |
32 |
33 | Piano Bass Dotted
34 | 0.8
35 | 0
36 | false
37 |
38 |
39 |
40 |
41 | 30.48
42 | 19.92
43 | 1.99
44 | 17.93
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 | 0
54 | 1
55 | 1
56 | 1
57 | rest
58 | 0
59 | 1
60 |
61 |
62 | 1
63 | 1
64 | 2
65 | 0.5
66 | 1
67 | 0
68 | 0
69 |
70 |
71 | 1.5
72 | 1
73 | 2.5
74 | 0.5
75 | 1
76 | 0
77 | 0
78 |
79 |
80 | 2
81 | 1
82 | 3
83 | 1
84 | 1
85 | 0
86 | 0
87 |
88 |
89 | 3
90 | 1
91 | 4
92 | 0.5
93 | 2
94 | 0
95 | 0
96 |
97 |
98 | 3.5
99 | 1
100 | 4.5
101 | 0.5
102 | 3
103 | 0
104 | 0
105 |
106 |
107 | 4
108 | 2
109 | 1
110 | 0.5
111 | 4
112 | 0
113 | 0
114 |
115 |
116 | 4.5
117 | 2
118 | 1.5
119 | 1
120 | 2
121 | 0
122 | 0
123 |
124 |
125 | 5.5
126 | 2
127 | 2.5
128 | 1
129 | rest
130 | 0
131 | 1
132 |
133 |
134 | 6.5
135 | 2
136 | 3.5
137 | 1
138 | rest
139 | 0
140 | 1
141 |
142 |
143 | 7.5
144 | 2
145 | 4.5
146 | 0.5
147 | rest
148 | 0
149 | 1
150 |
151 |
152 | 8
153 | 3
154 | 1
155 | 0.5
156 | rest
157 | 0
158 | 1
159 |
160 |
161 | 8.5
162 | 3
163 | 1.5
164 | 0.5
165 | 2
166 | 0
167 | 0
168 |
169 |
170 | 9
171 | 3
172 | 2
173 | 0.5
174 | 2
175 | 0
176 | 0
177 |
178 |
179 | 9.5
180 | 3
181 | 2.5
182 | 0.5
183 | 2
184 | 0
185 | 0
186 |
187 |
188 | 10
189 | 3
190 | 3
191 | 0.5
192 | 2
193 | 0
194 | 0
195 |
196 |
197 | 10.5
198 | 3
199 | 3.5
200 | 0.5
201 | 2
202 | 0
203 | 0
204 |
205 |
206 | 11
207 | 3
208 | 4
209 | 0.5
210 | 3
211 | 0
212 | 0
213 |
214 |
215 | 11.5
216 | 3
217 | 4.5
218 | 0.5
219 | 2
220 | 0
221 | 0
222 |
223 |
224 | 12
225 | 4
226 | 1
227 | 1
228 | 1
229 | 0
230 | 0
231 |
232 |
233 | 13
234 | 4
235 | 2
236 | 1
237 | rest
238 | 0
239 | 1
240 |
241 |
242 | 14
243 | 4
244 | 3
245 | 1
246 | rest
247 | 0
248 | 1
249 |
250 |
251 | 15
252 | 4
253 | 4
254 | 1
255 | rest
256 | 0
257 | 1
258 |
259 |
260 | 16
261 | 5
262 | 1
263 | 1
264 | rest
265 | 0
266 | 1
267 |
268 |
269 | 17
270 | 5
271 | 2
272 | 0.5
273 | 1
274 | 0
275 | 0
276 |
277 |
278 | 17.5
279 | 5
280 | 2.5
281 | 0.5
282 | 1
283 | 0
284 | 0
285 |
286 |
287 | 18
288 | 5
289 | 3
290 | 0.5
291 | 1
292 | 0
293 | 0
294 |
295 |
296 | 18.5
297 | 5
298 | 3.5
299 | 0.5
300 | 1
301 | 0
302 | 0
303 |
304 |
305 | 19
306 | 5
307 | 4
308 | 0.5
309 | 2
310 | 0
311 | 0
312 |
313 |
314 | 19.5
315 | 5
316 | 4.5
317 | 0.5
318 | 3
319 | 0
320 | 0
321 |
322 |
323 | 20
324 | 6
325 | 1
326 | 0.5
327 | 4
328 | 0
329 | 0
330 |
331 |
332 | 20.5
333 | 6
334 | 1.5
335 | 0.5
336 | 2
337 | 0
338 | 0
339 |
340 |
341 | 21
342 | 6
343 | 2
344 | 1
345 | rest
346 | 0
347 | 1
348 |
349 |
350 | 22
351 | 6
352 | 3
353 | 1
354 | rest
355 | 0
356 | 1
357 |
358 |
359 | 23
360 | 6
361 | 4
362 | 0.5
363 | rest
364 | 0
365 | 1
366 |
367 |
368 | 23.5
369 | 6
370 | 4.5
371 | 0.5
372 | 3
373 | 0
374 | 0
375 |
376 |
377 | 24
378 | 7
379 | 1
380 | 0.5
381 | 3
382 | 0
383 | 0
384 |
385 |
386 | 24.5
387 | 7
388 | 1.5
389 | 0.5
390 | 3
391 | 0
392 | 0
393 |
394 |
395 | 25
396 | 7
397 | 2
398 | 1
399 | rest
400 | 0
401 | 1
402 |
403 |
404 | 26
405 | 7
406 | 3
407 | 0.5
408 | rest
409 | 0
410 | 1
411 |
412 |
413 | 26.5
414 | 7
415 | 3.5
416 | 0.5
417 | 5
418 | 0
419 | 0
420 |
421 |
422 | 27
423 | 7
424 | 4
425 | 0.5
426 | 5
427 | 0
428 | 0
429 |
430 |
431 | 27.5
432 | 7
433 | 4.5
434 | 0.5
435 | 3
436 | 0
437 | 0
438 |
439 |
440 | 28
441 | 8
442 | 1
443 | 1
444 | 4
445 | 0
446 | 0
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 | 1
466 |
467 |
468 |
469 |
470 |
471 |
472 | 4
473 | 1
474 | 1
475 | 0
476 | 0
477 |
478 |
479 | 7
480 |
481 |
482 |
483 |
484 |
485 |
486 | 4
487 | 2
488 | 1
489 | 4
490 | 0
491 |
492 |
493 | 7
494 |
495 |
496 |
497 |
498 |
499 |
500 | 4
501 | 3
502 | 1
503 | 8
504 | 0
505 |
506 |
507 | 1
508 |
509 |
510 |
511 |
512 |
513 |
514 | 4
515 | 4
516 | 1
517 | 12
518 | 0
519 |
520 |
521 | 1
522 |
523 |
524 |
525 |
526 |
527 |
528 | 4
529 | 5
530 | 1
531 | 16
532 | 0
533 |
534 |
535 | 7
536 |
537 |
538 |
539 |
540 |
541 |
542 | 4
543 | 6
544 | 1
545 | 20
546 | 0
547 |
548 |
549 | 3
550 |
551 |
552 |
553 |
554 |
555 |
556 | 4
557 | 7
558 | 1
559 | 24
560 | 0
561 |
562 |
563 | 7
564 |
565 |
566 |
567 |
568 |
569 |
570 | 2
571 | 8
572 | 1
573 | 28
574 | 0
575 |
576 |
577 | 5
578 |
579 |
580 |
581 |
582 |
583 | 0
584 | 2
585 | 8
586 | 3
587 | 30
588 | 0
589 |
590 |
591 | 8
592 |
593 |
594 |
--------------------------------------------------------------------------------
/theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/intro.xml:
--------------------------------------------------------------------------------
1 |
2 | 1.2
3 |
4 |
5 | 4 pils intro
6 | 4
7 | 124
8 | C
9 | -I_zKOfTKIM
10 | 6
11 |
12 |
13 |
14 | Piano
15 | 0.8
16 | 0
17 | false
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | Piano 1/4s
26 | 0.8
27 | 0
28 | false
29 |
30 |
31 |
32 |
33 | Piano Bass Dotted
34 | 0.8
35 | 0
36 | false
37 |
38 |
39 |
40 |
41 | 15.14
42 | 19.4
43 | 1.94
44 | 17.46
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 | 0
54 | 1
55 | 1
56 | 0.5
57 | 3
58 | 0
59 | 0
60 |
61 |
62 | 0.5
63 | 1
64 | 1.5
65 | 0.5
66 | rest
67 | 0
68 | 1
69 |
70 |
71 | 1
72 | 1
73 | 2
74 | 0.5
75 | 1
76 | 0
77 | 0
78 |
79 |
80 | 1.5
81 | 1
82 | 2.5
83 | 0.5
84 | 1
85 | 0
86 | 0
87 |
88 |
89 | 2
90 | 1
91 | 3
92 | 2
93 | rest
94 | 0
95 | 1
96 |
97 |
98 | 4
99 | 2
100 | 1
101 | 0.5
102 | 4
103 | 0
104 | 0
105 |
106 |
107 | 4.5
108 | 2
109 | 1.5
110 | 0.5
111 | rest
112 | 0
113 | 1
114 |
115 |
116 | 5
117 | 2
118 | 2
119 | 0.5
120 | 2
121 | 0
122 | 0
123 |
124 |
125 | 5.5
126 | 2
127 | 2.5
128 | 0.5
129 | 2
130 | 0
131 | 0
132 |
133 |
134 | 6
135 | 2
136 | 3
137 | 2
138 | rest
139 | 0
140 | 1
141 |
142 |
143 | 8
144 | 3
145 | 1
146 | 0.5
147 | 5
148 | 0
149 | 0
150 |
151 |
152 | 8.5
153 | 3
154 | 1.5
155 | 0.5
156 | rest
157 | 0
158 | 1
159 |
160 |
161 | 9
162 | 3
163 | 2
164 | 0.5
165 | 3
166 | 0
167 | 0
168 |
169 |
170 | 9.5
171 | 3
172 | 2.5
173 | 0.5
174 | 3
175 | 0
176 | 0
177 |
178 |
179 | 10
180 | 3
181 | 3
182 | 2
183 | rest
184 | 0
185 | 1
186 |
187 |
188 | 12
189 | 4
190 | 1
191 | 0.5
192 | 4
193 | 0
194 | 0
195 |
196 |
197 | 12.5
198 | 4
199 | 1.5
200 | 0.5
201 | rest
202 | 0
203 | 1
204 |
205 |
206 | 13
207 | 4
208 | 2
209 | 0.5
210 | 2
211 | 0
212 | 0
213 |
214 |
215 | 13.5
216 | 4
217 | 2.5
218 | 0.5
219 | 2
220 | 0
221 | 0
222 |
223 |
224 | 14
225 | 4
226 | 3
227 | 0.5
228 | rest
229 | 0
230 | 1
231 |
232 |
233 | 14.5
234 | 4
235 | 3.5
236 | 0.5
237 | rest
238 | 0
239 | 1
240 |
241 |
242 | 15
243 | 4
244 | 4
245 | 0.5
246 | rest
247 | 0
248 | 1
249 |
250 |
251 | 15.5
252 | 4
253 | 4.5
254 | 0.5
255 | rest
256 | 0
257 | 1
258 |
259 |
260 | 16
261 | 5
262 | 1
263 | 0.5
264 | 3
265 | 0
266 | 0
267 |
268 |
269 | 16.5
270 | 5
271 | 1.5
272 | 0.5
273 | rest
274 | 0
275 | 1
276 |
277 |
278 | 17
279 | 5
280 | 2
281 | 0.5
282 | 1
283 | 0
284 | 0
285 |
286 |
287 | 17.5
288 | 5
289 | 2.5
290 | 0.5
291 | 1
292 | 0
293 | 0
294 |
295 |
296 | 18
297 | 5
298 | 3
299 | 1
300 | rest
301 | 0
302 | 1
303 |
304 |
305 | 19
306 | 5
307 | 4
308 | 1
309 | rest
310 | 0
311 | 1
312 |
313 |
314 | 20
315 | 6
316 | 1
317 | 0.5
318 | 4
319 | 0
320 | 0
321 |
322 |
323 | 20.5
324 | 6
325 | 1.5
326 | 0.5
327 | rest
328 | 0
329 | 1
330 |
331 |
332 | 21
333 | 6
334 | 2
335 | 0.5
336 | 2
337 | 0
338 | 0
339 |
340 |
341 | 21.5
342 | 6
343 | 2.5
344 | 0.5
345 | 2
346 | 0
347 | 0
348 |
349 |
350 | 22
351 | 6
352 | 3
353 | 1
354 | rest
355 | 0
356 | 1
357 |
358 |
359 | 23
360 | 6
361 | 4
362 | 1
363 | rest
364 | 0
365 | 1
366 |
367 |
368 | 24
369 | 7
370 | 1
371 | 0.5
372 | 5
373 | 0
374 | 0
375 |
376 |
377 | 24.5
378 | 7
379 | 1.5
380 | 0.5
381 | rest
382 | 0
383 | 1
384 |
385 |
386 | 25
387 | 7
388 | 2
389 | 0.5
390 | 3
391 | 0
392 | 0
393 |
394 |
395 | 25.5
396 | 7
397 | 2.5
398 | 0.5
399 | 3
400 | 0
401 | 0
402 |
403 |
404 | 26
405 | 7
406 | 3
407 | 1
408 | rest
409 | 0
410 | 1
411 |
412 |
413 | 27
414 | 7
415 | 4
416 | 1
417 | rest
418 | 0
419 | 1
420 |
421 |
422 | 28
423 | 8
424 | 1
425 | 0.5
426 | 4
427 | 0
428 | 0
429 |
430 |
431 | 28.5
432 | 8
433 | 1.5
434 | 0.5
435 | rest
436 | 0
437 | 1
438 |
439 |
440 | 29
441 | 8
442 | 2
443 | 0.5
444 | 2
445 | 0
446 | 0
447 |
448 |
449 | 29.5
450 | 8
451 | 2.5
452 | 0.5
453 | 2
454 | 0
455 | 0
456 |
457 |
458 | 30
459 | 8
460 | 3
461 | 2
462 | rest
463 | 0
464 | 1
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 | 1
484 |
485 |
486 |
487 |
488 |
489 |
490 | 4
491 | 1
492 | 1
493 | 0
494 | 0
495 |
496 |
497 | 7
498 |
499 |
500 |
501 |
502 |
503 |
504 | 4
505 | 2
506 | 1
507 | 4
508 | 0
509 |
510 |
511 | 3
512 |
513 |
514 |
515 |
516 |
517 |
518 | 4
519 | 3
520 | 1
521 | 8
522 | 0
523 |
524 |
525 | 7
526 |
527 |
528 |
529 |
530 |
531 |
532 | 2
533 | 4
534 | 1
535 | 12
536 | 0
537 |
538 |
539 | 5
540 |
541 |
542 |
543 |
544 |
545 | 0
546 | 2
547 | 4
548 | 3
549 | 14
550 | 0
551 |
552 |
553 | 1
554 |
555 |
556 |
557 |
558 |
559 |
560 | 4
561 | 5
562 | 1
563 | 16
564 | 0
565 |
566 |
567 | 7
568 |
569 |
570 |
571 |
572 |
573 |
574 | 4
575 | 6
576 | 1
577 | 20
578 | 0
579 |
580 |
581 | 3
582 |
583 |
584 |
585 |
586 |
587 |
588 | 4
589 | 7
590 | 1
591 | 24
592 | 0
593 |
594 |
595 | 7
596 |
597 |
598 |
599 |
600 |
601 |
602 | 2
603 | 8
604 | 1
605 | 28
606 | 0
607 |
608 |
609 | 5
610 |
611 |
612 |
613 |
614 |
615 | 0
616 | 2
617 | 8
618 | 3
619 | 30
620 | 0
621 |
622 |
623 | 8
624 |
625 |
626 |
--------------------------------------------------------------------------------
/theorytab/archive/a/aage-aleksandersen/fire-pils-og-en-pizza/chorus.xml:
--------------------------------------------------------------------------------
1 |
2 | 1.2
3 |
4 |
5 | 4 pils r
6 | 4
7 | 121
8 | C
9 | -I_zKOfTKIM
10 | 6
11 |
12 |
13 |
14 | Piano
15 | 0.8
16 | 0
17 | false
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | Piano 1/4s
26 | 0.8
27 | 0
28 | false
29 |
30 |
31 |
32 |
33 | Piano Bass Dotted
34 | 0.8
35 | 0
36 | false
37 |
38 |
39 |
40 |
41 | 46.82
42 | 19.83
43 | 1.98
44 | 17.84
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 | 0
54 | 1
55 | 1
56 | 1
57 | rest
58 | 0
59 | 1
60 |
61 |
62 | 1
63 | 1
64 | 2
65 | 0.5
66 | 1
67 | 0
68 | 0
69 |
70 |
71 | 1.5
72 | 1
73 | 2.5
74 | 0.5
75 | 1
76 | 0
77 | 0
78 |
79 |
80 | 2
81 | 1
82 | 3
83 | 1
84 | 1
85 | 0
86 | 0
87 |
88 |
89 | 3
90 | 1
91 | 4
92 | 0.5
93 | 2
94 | 0
95 | 0
96 |
97 |
98 | 3.5
99 | 1
100 | 4.5
101 | 0.5
102 | 3
103 | 0
104 | 0
105 |
106 |
107 | 4
108 | 2
109 | 1
110 | 0.5
111 | 4
112 | 0
113 | 0
114 |
115 |
116 | 4.5
117 | 2
118 | 1.5
119 | 0.5
120 | 2
121 | 0
122 | 0
123 |
124 |
125 | 5
126 | 2
127 | 2
128 | 0.5
129 | rest
130 | 0
131 | 1
132 |
133 |
134 | 5.5
135 | 2
136 | 2.5
137 | 0.5
138 | rest
139 | 0
140 | 1
141 |
142 |
143 | 6
144 | 2
145 | 3
146 | 0.5
147 | rest
148 | 0
149 | 1
150 |
151 |
152 | 6.5
153 | 2
154 | 3.5
155 | 0.5
156 | rest
157 | 0
158 | 1
159 |
160 |
161 | 7
162 | 2
163 | 4
164 | 0.5
165 | rest
166 | 0
167 | 1
168 |
169 |
170 | 7.5
171 | 2
172 | 4.5
173 | 0.5
174 | rest
175 | 0
176 | 1
177 |
178 |
179 | 8
180 | 3
181 | 1
182 | 0.5
183 | rest
184 | 0
185 | 1
186 |
187 |
188 | 8.5
189 | 3
190 | 1.5
191 | 0.5
192 | 2
193 | 0
194 | 0
195 |
196 |
197 | 9
198 | 3
199 | 2
200 | 0.5
201 | 2
202 | 0
203 | 0
204 |
205 |
206 | 9.5
207 | 3
208 | 2.5
209 | 0.5
210 | 2
211 | 0
212 | 0
213 |
214 |
215 | 10
216 | 3
217 | 3
218 | 0.5
219 | 2
220 | 0
221 | 0
222 |
223 |
224 | 10.5
225 | 3
226 | 3.5
227 | 0.5
228 | 2
229 | 0
230 | 0
231 |
232 |
233 | 11
234 | 3
235 | 4
236 | 0.5
237 | 3
238 | 0
239 | 0
240 |
241 |
242 | 11.5
243 | 3
244 | 4.5
245 | 0.5
246 | 2
247 | 0
248 | 0
249 |
250 |
251 | 12
252 | 4
253 | 1
254 | 0.5
255 | 1
256 | 0
257 | 0
258 |
259 |
260 | 12.5
261 | 4
262 | 1.5
263 | 0.5
264 | rest
265 | 0
266 | 1
267 |
268 |
269 | 13
270 | 4
271 | 2
272 | 0.5
273 | rest
274 | 0
275 | 1
276 |
277 |
278 | 13.5
279 | 4
280 | 2.5
281 | 0.5
282 | rest
283 | 0
284 | 1
285 |
286 |
287 | 14
288 | 4
289 | 3
290 | 0.5
291 | rest
292 | 0
293 | 1
294 |
295 |
296 | 14.5
297 | 4
298 | 3.5
299 | 0.5
300 | rest
301 | 0
302 | 1
303 |
304 |
305 | 15
306 | 4
307 | 4
308 | 0.5
309 | rest
310 | 0
311 | 1
312 |
313 |
314 | 15.5
315 | 4
316 | 4.5
317 | 0.5
318 | rest
319 | 0
320 | 1
321 |
322 |
323 | 16
324 | 5
325 | 1
326 | 0.5
327 | rest
328 | 0
329 | 1
330 |
331 |
332 | 16.5
333 | 5
334 | 1.5
335 | 0.5
336 | 5
337 | 0
338 | 0
339 |
340 |
341 | 17
342 | 5
343 | 2
344 | 0.5
345 | 5
346 | 0
347 | 0
348 |
349 |
350 | 17.5
351 | 5
352 | 2.5
353 | 0.5
354 | 5
355 | 0
356 | 0
357 |
358 |
359 | 18
360 | 5
361 | 3
362 | 0.5
363 | 5
364 | 0
365 | 0
366 |
367 |
368 | 18.5
369 | 5
370 | 3.5
371 | 0.5
372 | 5
373 | 0
374 | 0
375 |
376 |
377 | 19
378 | 5
379 | 4
380 | 0.5
381 | 5
382 | 0
383 | 0
384 |
385 |
386 | 19.5
387 | 5
388 | 4.5
389 | 0.5
390 | 5
391 | 0
392 | 0
393 |
394 |
395 | 20
396 | 6
397 | 1
398 | 0.5
399 | 4
400 | 0
401 | 0
402 |
403 |
404 | 20.5
405 | 6
406 | 1.5
407 | 0.5
408 | 2
409 | 0
410 | 0
411 |
412 |
413 | 21
414 | 6
415 | 2
416 | 0.5
417 | rest
418 | 0
419 | 1
420 |
421 |
422 | 21.5
423 | 6
424 | 2.5
425 | 0.5
426 | rest
427 | 0
428 | 1
429 |
430 |
431 | 22
432 | 6
433 | 3
434 | 0.5
435 | rest
436 | 0
437 | 1
438 |
439 |
440 | 22.5
441 | 6
442 | 3.5
443 | 0.5
444 | rest
445 | 0
446 | 1
447 |
448 |
449 | 23
450 | 6
451 | 4
452 | 0.5
453 | rest
454 | 0
455 | 1
456 |
457 |
458 | 23.5
459 | 6
460 | 4.5
461 | 0.5
462 | rest
463 | 0
464 | 1
465 |
466 |
467 | 24
468 | 7
469 | 1
470 | 0.5
471 | rest
472 | 0
473 | 1
474 |
475 |
476 | 24.5
477 | 7
478 | 1.5
479 | 0.5
480 | 3
481 | 0
482 | 0
483 |
484 |
485 | 25
486 | 7
487 | 2
488 | 0.5
489 | 3
490 | 0
491 | 0
492 |
493 |
494 | 25.5
495 | 7
496 | 2.5
497 | 0.5
498 | 4
499 | 0
500 | 0
501 |
502 |
503 | 26
504 | 7
505 | 3
506 | 1
507 | 5
508 | 0
509 | 0
510 |
511 |
512 | 27
513 | 7
514 | 4
515 | 1
516 | 7
517 | -1
518 | 0
519 |
520 |
521 | 28
522 | 8
523 | 1
524 | 1
525 | 4
526 | 0
527 | 0
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 | 1
547 |
548 |
549 |
550 |
551 |
552 |
553 | 4
554 | 1
555 | 1
556 | 0
557 | 0
558 |
559 |
560 | 7
561 |
562 |
563 |
564 |
565 |
566 |
567 | 4
568 | 2
569 | 1
570 | 4
571 | 0
572 |
573 |
574 | 7
575 |
576 |
577 |
578 |
579 |
580 |
581 | 4
582 | 3
583 | 1
584 | 8
585 | 0
586 |
587 |
588 | 1
589 |
590 |
591 |
592 |
593 |
594 |
595 | 2
596 | 4
597 | 1
598 | 12
599 | 0
600 |
601 |
602 | 3
603 |
604 |
605 |
606 |
607 |
608 |
609 | 1
610 | 4
611 | 3
612 | 14
613 | 0
614 |
615 |
616 | 2
617 |
618 |
619 |
620 |
621 |
622 | 0
623 | 1
624 | 4
625 | 4
626 | 15
627 | 0
628 |
629 |
630 | 1
631 |
632 |
633 |
634 |
635 |
636 |
637 | 4
638 | 5
639 | 1
640 | 16
641 | 0
642 |
643 |
644 | 7
645 |
646 |
647 |
648 |
649 |
650 |
651 | 4
652 | 6
653 | 1
654 | 20
655 | 0
656 |
657 |
658 | 3
659 |
660 |
661 |
662 |
663 |
664 |
665 | 4
666 | 7
667 | 1
668 | 24
669 | 0
670 |
671 |
672 | 7
673 |
674 |
675 |
676 |
677 |
678 |
679 | 2
680 | 8
681 | 1
682 | 28
683 | 0
684 |
685 |
686 | 5
687 |
688 |
689 |
690 |
691 |
692 | 0
693 | 2
694 | 8
695 | 3
696 | 30
697 | 0
698 |
699 |
700 | 8
701 |
702 |
703 |
--------------------------------------------------------------------------------
/doug-mckenzie-jazz/codes/DMjazzCrawler.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 76,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import requests\n",
12 | "from bs4 import BeautifulSoup\n",
13 | "import os\n",
14 | "import sys\n",
15 | "import time\n",
16 | "import json\n",
17 | "import re"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 153,
23 | "metadata": {
24 | "collapsed": false
25 | },
26 | "outputs": [],
27 | "source": [
28 | "class DMjazzCrawler():\n",
29 | " BASE_URL = 'http://www.bushgrafts.com/jazz'\n",
30 | " ROOT = 'archive'\n",
31 | " \n",
32 | " def __init__(self, sleep_time=0.1, log=True):\n",
33 | " self.sleep_time = sleep_time\n",
34 | " self.log = log\n",
35 | "\n",
36 | " def _request_url(self, url, doctype='html'):\n",
37 | " # set header\n",
38 | " response = requests.get(url, headers = {\"Cache-Control\":\"max-age=0\"})\n",
39 | "\n",
40 | " # sleep\n",
41 | " time.sleep(self.sleep_time)\n",
42 | "\n",
43 | " # return\n",
44 | " if doctype =='html':\n",
45 | " soup = BeautifulSoup(response.text, 'html.parser')\n",
46 | " return soup\n",
47 | " elif doctype =='content':\n",
48 | " return response.content\n",
49 | " else:\n",
50 | " return response\n",
51 | "\n",
52 | " def _log_print(self, log, quite=False):\n",
53 | " if not quite:\n",
54 | " print(log)\n",
55 | "\n",
56 | " if self.log:\n",
57 | " with open(\"log.txt\", \"a\") as f:\n",
58 | " print(log, file=f)\n",
59 | " \n",
60 | " def fetch_song(self):\n",
61 | " self.soup = self._request_url(self.BASE_URL+'/midi.htm')\n",
62 | " a_list = dmc.soup.find_all('a')\n",
63 | " midi_list = []\n",
64 | " name_list = []\n",
65 | "\n",
66 | " cnt = 0\n",
67 | " for idx, a in enumerate(a_list):\n",
68 | " str_ = a.get('href')\n",
69 | " if str_ and (str_ not in midi_list) and ('.mid' in str_):\n",
70 | " song_name = re.sub( '\\s+', ' ' , a.text.replace( '\\r\\n' , '' )).strip(' ')\n",
71 | " if song_name:\n",
72 | " midi_fn = str_.split('/')[1]\n",
73 | " midi_list.append(midi_fn )\n",
74 | " name_list.append(song_name)\n",
75 | " print('%3d | %-40s %s'%(idx, song_name, midi_fn))\n",
76 | " cnt += 1\n",
77 | "\n",
78 | " self._log_print('Total: %d'%cnt)\n",
79 | " \n",
80 | " return dict(zip(midi_list, name_list))\n",
81 | " \n",
82 | " def crawl_song(self, song_dict):\n",
83 | " for idx, k in enumerate(song_dict.keys()):\n",
84 | " \n",
85 | " url = self.BASE_URL + '/Midi%20site/' + k\n",
86 | " print('%3d %s' %(idx, url))\n",
87 | " content = self._request_url(url, doctype='content')\n",
88 | " \n",
89 | " with open(os.path.join(self.ROOT,k), \"wb\") as f:\n",
90 | " f.write(content)\n",
91 | " \n",
92 | " def run(self):\n",
93 | " \n",
94 | " song_dict = self.fetch_song()\n",
95 | " \n",
96 | " if not os.path.exists(self.ROOT):\n",
97 | " os.makedirs(self.ROOT)\n",
98 | " with open(os.path.join(self.ROOT, 'archive.json'), \"w\") as f:\n",
99 | " json.dump(song_dict, f)\n",
100 | " \n",
101 | " self.crawl_song(song_dict)\n"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 152,
107 | "metadata": {
108 | "collapsed": false
109 | },
110 | "outputs": [
111 | {
112 | "name": "stdout",
113 | "output_type": "stream",
114 | "text": [
115 | " 33 | A Fine Romance - take 1 afine-1.mid\n",
116 | " 35 | A Fine Romance - take 2 afine-2.mid\n",
117 | " 36 | A Ghost Of A Chance Aghostofachance.mid\n",
118 | " 37 | A House Is Not A Home AHouseis.mid\n",
119 | " 38 | A Nightingale Sang... Anighting.mid\n",
120 | " 39 | A Remark You Made ARemarkYouMade.mid\n",
121 | " 40 | A Sleepin' Bee A Sleepin' Bee.mid\n",
122 | " 41 | After You've Gone AfterYou.mid\n",
123 | " 42 | Alfie alfiepno.mid\n",
124 | " 43 | Alice In Wonderland AliceInWonderland.mid\n",
125 | " 44 | All The Things You Are AllTheThings V2.mid\n",
126 | " 45 | All The Things You Are/2 All The Things You Are.mid\n",
127 | " 46 | All The Things Reharmonized AllTheThings Reharmonized.mid\n",
128 | " 47 | Alone Together (trio) Alone Together.mid\n",
129 | " 49 | Ask Me Now (Monk) Ask Me Now 2.mid\n",
130 | " 50 | Ave Maria Bach Prelude No 1 (Ave Maria).mid\n",
131 | " 51 | Autumn In New York Autumn In NY.mid\n",
132 | " 52 | Autumn Leaves AutumnLeaves.mid\n",
133 | " 55 | Baby It's Cold Outside Baby its Cold outside.mid\n",
134 | " 57 | Beautiful Love Beautiful Love (Doug McKenzie).mid\n",
135 | " 58 | Beethoven Pathetique Pathetique.mid\n",
136 | " 59 | Begin The Beguine Begin The Beguine.mid\n",
137 | " 60 | Bess You Is... Bess You Is.mid\n",
138 | " 61 | Blackbird Blackbird (Brad Mehldau).mid\n",
139 | " 62 | Blame It On My Youth Blameiton.mid\n",
140 | " 63 | Blue Bossa (tk1) BlueBossa1GM.mid\n",
141 | " 64 | Blue Bossa (tk2) BlueBossa3GM.mid\n",
142 | " 65 | Blue Room Blue room midi file.mid\n",
143 | " 66 | Brazilian Like Brazillike.mid\n",
144 | " 67 | Brazilian Suite BrazilianSuite.mid\n",
145 | " 68 | Broadway broadway.mid\n",
146 | " 69 | But Beautiful But Beautifulsolo.mid\n",
147 | " 70 | By Myself Bymyself.mid\n",
148 | " 71 | By Myself /2 By Myself3.mid\n",
149 | " 73 | By The Time I Get... By The Time.mid\n",
150 | " 76 | Cantabile Cantabile 2.mid\n",
151 | " 77 | Caravan (trio) Caravan2.mid\n",
152 | " 78 | Carnival (Black Orpheus) Carnival.mid\n",
153 | " 80 | Cast Your Fate... Cast Your Fate.mid\n",
154 | " 81 | Chelsea Bridge Chelsea Bridge.mid\n",
155 | " 82 | Chopin Waltz Chopin WaltzAb.mid\n",
156 | " 83 | Cinema Paradiso cinema.mid\n",
157 | " 84 | Close Your Eyes Close your eyes.mid\n",
158 | " 85 | Come Rain Or Come Shine (solo) Come Rain or Come Shine V1.mid\n",
159 | " 86 | Come Rain Or Come Shine (duet) Come Rain or Come Shine duo.mid\n",
160 | " 87 | Come Sunday comesun.mid\n",
161 | " 88 | Cry Me A River Cry me a river.mid\n",
162 | " 89 | Cubano Chant Cubano Chant 2.mid\n",
163 | " 92 | Dancing On The Ceiling DancingontheCeiling.mid\n",
164 | " 93 | Danny Boy Dannyboy.mid\n",
165 | " 94 | Day Dream Day Dream.mid\n",
166 | " 95 | Days Of Wine And Roses DaysofWine.mid\n",
167 | " 96 | Dearly Beloved dearlybeloved.mid\n",
168 | " 97 | Deep Purple DeepPurple.mid\n",
169 | " 98 | Desafinado Desafinado.mid\n",
170 | "100 | Desire desire.mid\n",
171 | "101 | Detour Ahead Detour ahead.mid\n",
172 | "102 | Do I Love You Because... Do I Love You Because you're Beautiful.mid\n",
173 | "103 | Dolphin Dance dolphindance3.mid\n",
174 | "104 | Don't Explain Don't Explain solo.mid\n",
175 | "105 | Dreamsville Dreamsville.mid\n",
176 | "108 | Easy Does It [M] Easy does it.mid\n",
177 | "109 | Easy Living [M] Easy Living 3.mid\n",
178 | "110 | Easy To Love EasytoLove2.mid\n",
179 | "111 | Effendi Effendi - McCoy Tyner.mid\n",
180 | "112 | Eleanor Rigby Eleanor Rigby.mid\n",
181 | "114 | Emily emily.mid\n",
182 | "116 | Estate estate.mid\n",
183 | "117 | Exactly Like You Exactly Like You.mid\n",
184 | "121 | Falling Grace Falling Grace .mid\n",
185 | "122 | Falling In Love With Love (trio) Falling in Love with Love trio.mid\n",
186 | "124 | For Sentimental Reasons ForSentimentalReasons.mid\n",
187 | "127 | Gaviota Gaviota trio 2.mid\n",
188 | "128 | Georgia On My Mind Georgia.mid\n",
189 | "129 | Girl Talk GirlTalk.mid\n",
190 | "130 | Give Me The Simple Life simplelife.mid\n",
191 | "131 | Goin' Home Goin'_Home_(Real_Time)_(Antonin_Dvorak).mid\n",
192 | "132 | Good Bait goodbait.mid\n",
193 | "133 | Good Bait GoodBaitPiano.mid\n",
194 | "134 | Good Bait goodbaitGM.mid\n",
195 | "135 | Gone With The Wind gonewind.mid\n",
196 | "136 | Gone With The Wind (II) [M] Gone with the wind.mid\n",
197 | "137 | Green Dolphin Street Green Dolph solo.mid\n",
198 | "138 | Gymnopedie/Medley Gymnopedie-It Never Entered My Mind.mid\n",
199 | "141 | Have You Met Miss Jones (duet) Have You Met - duet.mid\n",
200 | "142 | Have You Met Miss Jones (latin) Have You Met Miss Jones - latin.mid\n",
201 | "143 | How Come You Like Me... HowkumU.mid\n",
202 | "145 | How Deep Is The Ocean (old) How Deep is the Ocean (Doug McKenzie).mid\n",
203 | "146 | How Deep Is The Ocean (solo) howdsolo.mid\n",
204 | "147 | How Deep Is The Ocean (trio) hwdptrio.mid\n",
205 | "148 | Hymn To Freedom Hymn To Freedom.mid\n",
206 | "151 | I'm Old Fashioned (trio) [M] oldfashionedtrio.mid\n",
207 | "152 | I Can't Get Started Icantgetstarted.mid\n",
208 | "153 | I Could Write A Book couldwrite.mid\n",
209 | "154 | I Concentrate On You I Concentrate on You.mid\n",
210 | "155 | I Cover The Waterfront I Cover the Waterfront - solo.mid\n",
211 | "156 | I Fall In Love Too Easily I fall in Love Too Easily.mid\n",
212 | "157 | I Fall In Love Too Easily I Fall in Love v2.mid\n",
213 | "158 | I Hear A Rhapsody I Hear a Rhapsody.mid\n",
214 | "159 | I'll Close My Eyes (trio) I'll Close My Eyes 2.mid\n",
215 | "161 | I Love's You Porgy /1 [M] porgy1.mid\n",
216 | "162 | I Love's You Porgy /2 [M] porgy2.mid\n",
217 | "163 | I Never Knew INeverKnew.mid\n",
218 | "164 | I Remember Clifford (tk1) Clifford1.mid\n",
219 | "165 | I Remember Clifford (tk 2) Clifford2.mid\n",
220 | "166 | I Remember You (solo) [M] Irememberyousolo.mid\n",
221 | "167 | I Should Care McKenzie-Ishouldcare2.mid\n",
222 | "168 | I Should Care / take 2 ishouldcare2.mid\n",
223 | "169 | I Thought About You I Thought About You (Doug McKenzie).mid\n",
224 | "170 | I Want To Be Happy IWantToBeHappyXG.mid\n",
225 | "171 | If Ever I Should Leave You If Ever I should leave You.mid\n",
226 | "172 | If I Loved You if i loved you.mid\n",
227 | "173 | If I Should Leave You IfIshouldLeaveYou.mid\n",
228 | "174 | If I Were A Bell IfIwere.mid\n",
229 | "175 | I'm Confessin' I'm Confessin' - solo.mid\n",
230 | "176 | I'm Old Fashioned I'mOldFash.mid\n",
231 | "177 | I'm Old Fashioned (tk 2) ImOldFash.mid\n",
232 | "178 | In A Sentimental Mood Inasent.mid\n",
233 | "180 | Indiana Indiana.mid\n",
234 | "181 | In Your Own Sweet Way (tk1) SweetWay1.mid\n",
235 | "182 | In Your Own Sweet Way (tk2) SweetWay2.mid\n",
236 | "183 | In Your Own Sweet Way (tk3) SweetWay3.mid\n",
237 | "184 | Isfahan (Elf) Isfahan (Elf).mid\n",
238 | "185 | Isn't It Romantic [M] Isn_t_it_Romantic.mid\n",
239 | "186 | It Could Happen To You It Could Happen V2.mid\n",
240 | "187 | It Could Happen To You (trio) [M] It Could Happen - piano trio.mid\n",
241 | "188 | It Could Happen To You ItCouldHappenGM.mid\n",
242 | "189 | It Don't Mean A Thing... [M] Itdon'tmeanathing.mid\n",
243 | "190 | It's Easy To Remember Its Easy to Remember.mid\n",
244 | "191 | It Might As Well Be Spring It Might as well be Spring v2.mid\n",
245 | "193 | It's The Talk Of The Town Its theTalk Of The Town - solo.mid\n",
246 | "194 | I've Grown Accustomed... accustomed.mid\n",
247 | "197 | Just A Gigolo Just a Gigolo.mid\n",
248 | "198 | Just Friends justfrien solo.mid\n",
249 | "199 | Just Friends (trio) Just Friends Trio.mid\n",
250 | "200 | Just You, Just Me justyou_mark.mid\n",
251 | "203 | Ladies In Mercedes Ladies in Mercedes.mid\n",
252 | "204 | Lady Be Good ladybegood.mid\n",
253 | "205 | Laura laura.mid\n",
254 | "206 | Les Grelots (Petrucciani) grelots.mid\n",
255 | "208 | Like Being In Love It'sAlmostLikeBeingInLove.mid\n",
256 | "209 | Little Girl Blue Little Girl Blue.mid\n",
257 | "210 | Long Ago And Far Away [M] Long Ago and Far away - 3 versions.mid\n",
258 | "211 | Look For The Silver Lining [M] LookfortheSilverLining.mid\n",
259 | "212 | Louisiana louisiana.mid\n",
260 | "213 | Love Letters love letters.mid\n",
261 | "214 | Love Walked In Love walked In.mid\n",
262 | "215 | Love Walked In LoveWalked.mid\n",
263 | "216 | Lover Come Back... lovercome piano.mid\n",
264 | "217 | Lullaby Lullaby.mid\n",
265 | "219 | Lush Life LushLife trio.mid\n",
266 | "222 | Mad About The Boy Mad About the Boy.mid\n",
267 | "223 | Maiden Voyage maidenvoyage.mid\n",
268 | "224 | Maid(en)s Of Cadiz Maids of Cadiz 2.mid\n",
269 | "225 | Make Someone Happy Make Someone Happy.mid\n",
270 | "226 | Manteca manteca.mid\n",
271 | "227 | Mean To Me Mean to Me (Doug McKenzie).mid\n",
272 | "228 | Medley \"The King And I\" KingandI.mid\n",
273 | "229 | Memories Of Paris Memories of Paris.mid\n",
274 | "230 | Merry Christmas merryxmas.mid\n",
275 | "231 | Mike Pee Mike Pee 2.mid\n",
276 | "233 | Milestones milestones.mid\n",
277 | "234 | Miyako Miyako.mid\n",
278 | "235 | Mood Indigo Mood Indigo - solo.mid\n",
279 | "236 | Moon And Sand Moon and Sand.mid\n",
280 | "237 | Moon River Moon River 3.mid\n",
281 | "239 | Moonlight In Vermont moonlightinvermont.mid\n",
282 | "240 | My Foolish Heart MyFoolishHeart.mid\n",
283 | "241 | My Funny Valentine MyFunny3.mid\n",
284 | "242 | My Funny Valentine (solo) funny val solo.mid\n",
285 | "244 | My Heart Stood Still myheartstoodstill edited a bit.mid\n",
286 | "245 | My Man's Gone Now ManGone.mid\n",
287 | "247 | My One And Only Love Myoneand.mid\n",
288 | "248 | My Romance My Romance.mid\n",
289 | "249 | My Shining Hour My Shining Hour.mid\n",
290 | "250 | My Shining Hour (solo) MyShiningHoursolo.mid\n",
291 | "251 | My Ship My ship solo.mid\n",
292 | "254 | Nardis Nardis.mid\n",
293 | "256 | New Orleans NewOrleans.mid\n",
294 | "257 | Nocturne In Eb NocturneEbChopin.mid\n",
295 | "260 | Old Folks Old Folks.mid\n",
296 | "261 | Old Man River Old Man River.mid\n",
297 | "262 | Once Upon A Summertime Once upon a summertime.mid\n",
298 | "263 | On Green Dolphin Street On Green Dolphin 4.mid\n",
299 | "264 | Our Love Is Here To Stay (trio) loveishere to stay.mid\n",
300 | "265 | Out Of Nowhere (1) Out of Nowhere 1.mid\n",
301 | "266 | Out Of Nowhere (2) Out of Nowhere 2.mid\n",
302 | "267 | Out Of Nowhere (3) Out of Nowhere 3.mid\n",
303 | "268 | Over The Rainbow (trio) Over the rain trio.mid\n",
304 | "269 | Over The Rainbow (GM) OverTheRainbowGM.mid\n",
305 | "270 | Over The Rainbow (XG) OverTheRainbowXG.mid\n",
306 | "274 | Pent Up House pentupHouse.mid\n",
307 | "275 | Penthouse Serenade penthouse s.mid\n",
308 | "276 | People Will Say We're In Love People Will Say We're in Love2.mid\n",
309 | "277 | Periscope periscope.mid\n",
310 | "278 | Pick Yourself Pick yourself.mid\n",
311 | "279 | Poinciana poinciana.mid\n",
312 | "281 | Prelude To A Kiss Prelude to a kiss.mid\n",
313 | "282 | Pure Imagination Pure Imagination.mid\n",
314 | "285 | Rachid Rachid.mid\n",
315 | "286 | Rain Waltz Rainwalt.mid\n",
316 | "287 | Recardo Bossa Nova Recardo.mid\n",
317 | "288 | Recordame Recordame.mid\n",
318 | "289 | Remember Remember - solo jazz piano.mid\n",
319 | "290 | Renewal [M] Renewaltrio.mid\n",
320 | "293 | Sammy Walked In SammyWalked.mid\n",
321 | "294 | St Patrick's Day Special Danny Boy.mid\n",
322 | "295 | Secret Love McKenzie-secret love.mid\n",
323 | "296 | Send In The Clowns send in the clowns.mid\n",
324 | "297 | Serenata Serenata solo.mid\n",
325 | "298 | Shenandoah Shenandoah.mid\n",
326 | "299 | Shiny Stockings shinystockings.mid\n",
327 | "300 | Since We Met Since We Met Jazz Kit.mid\n",
328 | "301 | Sixteen Going On Seventeen sixteengoing.mid\n",
329 | "302 | Skylark Skylark 2.mid\n",
330 | "303 | Strollin' Strollin'.mid\n",
331 | "304 | So In Love (solo) SoinLovesolo.mid\n",
332 | "305 | So In Love (trio) SoInlove - trio version.mid\n",
333 | "306 | Some Day My Prince... Some day My Prince.mid\n",
334 | "308 | Some Other Time Some Other Time.mid\n",
335 | "309 | Something Wonderful something wonderful.mid\n",
336 | "310 | Sometime Ago Sometimeago.mid\n",
337 | "311 | Sonnymoon For 2 sonnymoo.mid\n",
338 | "313 | Soon soon.mid\n",
339 | "314 | Sophisticated Lady SophisticatedLady.mid\n",
340 | "315 | Soul Eyes SoulEyessolo.mid\n",
341 | "316 | Spain / 2 Spain_2_(Chick_Corea).mid\n",
342 | "317 | Spain / 3 spain-3.mid\n",
343 | "318 | Theme From Spartacus Spartacus- 2 pianos.mid\n",
344 | "319 | Speak Low Speak Low (Doug McKenzie).mid\n",
345 | "320 | Spring Is Here Spring Is Here - Bill Evans chords.mid\n",
346 | "321 | Spring Is Here /2 Spring is Here 2 (K Barron).mid\n",
347 | "323 | Spring Is Here /3 Spring Is Here2-Bill Evans chords.mid\n",
348 | "327 | Star Eyes star eyes.mid\n",
349 | "328 | Stars Fell On Alabama (duet) Alabama.mid\n",
350 | "329 | Stella By Starlight (solo) Stella solo.mid\n",
351 | "331 | Stormy Weather stormyweather.mid\n",
352 | "332 | Strange Meadow Lark StrangeMeadowlark.mid\n",
353 | "333 | Surrey/Beautiful Medley Surrey medley.mid\n",
354 | "334 | Sweet And Lovely SweetAndLovely.mid\n",
355 | "335 | Sweet And Lovely (2) Sweet1.mid\n",
356 | "336 | Sweet Lorraine Sweetlorraine.mid\n",
357 | "339 | Take The A Train taketheatrain.mid\n",
358 | "340 | Taking A Chance (on Love) TakinACh.mid\n",
359 | "341 | Taking A Chance On Love (solo) Taking a Chance On Love - Solo piano.mid\n",
360 | "342 | Tea For Two [M] Tea for two.mid\n",
361 | "343 | That Old Devil Called Love That Ole Devil Called Love.mid\n",
362 | "344 | The Duke TheDuke.mid\n",
363 | "345 | The End Of A Love Affair The End of a Love Affair.mid\n",
364 | "347 | The Folks Who Live... The folks who live on the hill.mid\n",
365 | "348 | The Man That Got Away TheManThatGotAway.mid\n",
366 | "349 | The Masquerade Is Over The Masquerade is Over.mid\n",
367 | "350 | The More I See You moreicu.mid\n",
368 | "352 | The Peacocks McKenzie-ThePeacocks.mid\n",
369 | "353 | The Peanut Vendor Peanut Vendor (El Manisero).mid\n",
370 | "354 | The Song Is You (trio) [M] TheSongisYoutrio.mid\n",
371 | "355 | The Summer Wind Summer wind 2.mid\n",
372 | "356 | The Way You Look Tonight (trio) The Way You look trio.mid\n",
373 | "358 | There Is No Greater Love thereisnogreaterlove.mid\n",
374 | "359 | These Foolish Things These Foolish Things.mid\n",
375 | "360 | Things Ain't What They Used To Be Thingsaintwhattheyusedtobe.mid\n",
376 | "361 | This Nearly Was Mine This nearly was mine.mid\n",
377 | "362 | This Time The Dreams On Me This Time the Dreams on Me.mid\n",
378 | "363 | There Will Never Be Another You There will never be another you.mid\n",
379 | "364 | They Say Its Wonderful They say its wonderful.mid\n",
380 | "365 | Time After Time Time after Time 2.mid\n",
381 | "367 | Time Remembered /1 timerem2.mid\n",
382 | "368 | Time Remembered /2 Time remembered.mid\n",
383 | "369 | Too Young To Go Steady TooYoungtogoSteady.mid\n",
384 | "370 | Try A Little Tenderness Tryalittle21.mid\n",
385 | "371 | Try To Remember Try To Remember.mid\n",
386 | "372 | Two For The Road Two For The Road 5.mid\n",
387 | "374 | Turn Out The Stars turnoutthestars.mid\n",
388 | "377 | Up With The Lark upwiththelark.mid\n",
389 | "380 | Very Early (solo) Very Early solo piano.mid\n",
390 | "383 | Waltz For Dave Waltse For Dave.mid\n",
391 | "384 | Warm Valley Warmvalleysolo.mid\n",
392 | "385 | We Will Meet Again We Will Meet Again.mid\n",
393 | "386 | What Is There To Say What is there to say.mid\n",
394 | "388 | What Is This Thing... What is this thing - solo.mid\n",
395 | "389 | When I Fall In Love When I Fall in Love.mid\n",
396 | "390 | Where Is Love Where is Love - solo.mid\n",
397 | "391 | While We're Young Whilewereyoung.mid\n",
398 | "392 | Witchcraft Witchcraft.mid\n",
399 | "394 | Who Can I Turn To? WhoCanI.mid\n",
400 | "395 | Why Did I Choose You whydidi.mid\n",
401 | "396 | Why Do I Love You WhyDoILoveYou.mid\n",
402 | "399 | Yesterday Yesterday.mid\n",
403 | "400 | Yesterdays Yesterdays1.mid\n",
404 | "401 | You Are So Beautiful You Are So Beautiful The Grand.mid\n",
405 | "402 | You Do Something To Me You do something to Me.mid\n",
406 | "404 | You Don't Know What ... youdontknow trio.mid\n",
407 | "405 | You Stepped Out Of... You Stepped Out of a Dream (Doug McKenzie).mid\n",
408 | "406 | You Took Advantage... youtookadvantageofme.mid\n",
409 | "407 | You've Changed youhavechanged.mid\n",
410 | "408 | Young And Foolish Young and Foolish.mid\n",
411 | "412 | Zingaro Zingaro - Jobim.mid\n",
412 | "Total: 297\n",
413 | " 0 http://www.bushgrafts.com/jazz//Midi%20site/afine-1.mid\n",
414 | " 1 http://www.bushgrafts.com/jazz//Midi%20site/afine-2.mid\n",
415 | " 2 http://www.bushgrafts.com/jazz//Midi%20site/Aghostofachance.mid\n",
416 | " 3 http://www.bushgrafts.com/jazz//Midi%20site/AHouseis.mid\n",
417 | " 4 http://www.bushgrafts.com/jazz//Midi%20site/Anighting.mid\n",
418 | " 5 http://www.bushgrafts.com/jazz//Midi%20site/ARemarkYouMade.mid\n",
419 | " 6 http://www.bushgrafts.com/jazz//Midi%20site/A Sleepin' Bee.mid\n"
420 | ]
421 | },
422 | {
423 | "ename": "KeyboardInterrupt",
424 | "evalue": "",
425 | "output_type": "error",
426 | "traceback": [
427 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
428 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
429 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mdmc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mDMjazzCrawler\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdmc\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
430 | "\u001b[0;32m\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0mjson\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msong_dict\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m---> 74\u001b[0;31m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcrawl_song\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msong_dict\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
431 | "\u001b[0;32m\u001b[0m in \u001b[0;36mcrawl_song\u001b[0;34m(self, song_dict)\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0murl\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mBASE_URL\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m'/Midi%20site/'\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'%3d %s'\u001b[0m \u001b[1;33m%\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0midx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m---> 60\u001b[0;31m \u001b[0mcontent\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_request_url\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdoctype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'content'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 61\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mROOT\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"wb\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
432 | "\u001b[0;32m\u001b[0m in \u001b[0;36m_request_url\u001b[0;34m(self, url, doctype)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[1;31m# sleep\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msleep_time\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[1;31m# return\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
433 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
434 | ]
435 | }
436 | ],
437 | "source": [
438 | "dmc = DMjazzCrawler()\n",
439 | "dmc.run()"
440 | ]
441 | }
442 | ],
443 | "metadata": {
444 | "kernelspec": {
445 | "display_name": "Python 3",
446 | "language": "python",
447 | "name": "python3"
448 | },
449 | "language_info": {
450 | "codemirror_mode": {
451 | "name": "ipython",
452 | "version": 3
453 | },
454 | "file_extension": ".py",
455 | "mimetype": "text/x-python",
456 | "name": "python",
457 | "nbconvert_exporter": "python",
458 | "pygments_lexer": "ipython3",
459 | "version": "3.6.0"
460 | }
461 | },
462 | "nbformat": 4,
463 | "nbformat_minor": 2
464 | }
465 |
--------------------------------------------------------------------------------
/theorytab/utils/theorytab_crawler.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {
7 | "collapsed": false
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import requests\n",
12 | "from bs4 import BeautifulSoup\n",
13 | "import os\n",
14 | "import time\n",
15 | "import re\n",
16 | "import json\n",
17 | "import string\n",
18 | "from youtube_crawler import video_crawler\n",
19 | "from lxml import etree"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 9,
25 | "metadata": {
26 | "collapsed": true
27 | },
28 | "outputs": [],
29 | "source": [
30 | "def song_retrieval(artist, song, path_song):\n",
31 | "\n",
32 | " song_url = 'https://www.hooktheory.com/theorytab/view/' + artist + '/' + song \n",
33 | " response_song = requests.get(song_url)\n",
34 | "\n",
35 | " soup = BeautifulSoup(response_song.text, 'html.parser')\n",
36 | " li_list = soup.findAll(\"li\", { \"role\":\"presentation\"})\n",
37 | "\n",
38 | " section_list = []\n",
39 | " pk_list = []\n",
40 | " \n",
41 | " ## section \n",
42 | " for i in range(len(li_list)-1):\n",
43 | " sec = li_list[i].text.strip().lower().replace(\" \", \"-\")\n",
44 | " section_list.append(sec)\n",
45 | " pk_list.append(soup.findAll(\"div\", { \"role\":\"tabpanel\", \"id\":sec})[0].contents[0]['id'])\n",
46 | "\n",
47 | " ## save xml\n",
48 | " for idx, pk in enumerate(pk_list):\n",
49 | " req_url = 'https://www.hooktheory.com/songs/getXmlByPk?pk=' + str(pk) ##\n",
50 | " response_info = requests.get(req_url)\n",
51 | " content = response_info.text\n",
52 | " \n",
53 | " with open(os.path.join(path_song, section_list[idx] + \".xml\"), \"w\", encoding=\"utf-8\") as f:\n",
54 | " f.write(content)\n",
55 | " time.sleep(0.08)\n",
56 | " \n",
57 | " ## get genre\n",
58 | " wikiid = soup.findAll(\"multiselect\", { \"items\":\"genres\"})[0]['wikiid']\n",
59 | " response_genre = requests.get('https://www.hooktheory.com/wiki/'+ str(wikiid) +'/genres')\n",
60 | " genre_act_list = json.loads(response_genre.text)\n",
61 | " genres = []\n",
62 | " for g in genre_act_list:\n",
63 | " if g['active']:\n",
64 | " genres.append(g['name']) \n",
65 | " ## saving\n",
66 | " info = {'section': section_list, 'pk':pk_list, 'song_url':song_url,\n",
67 | " 'genres': genres, 'wikiid':wikiid}\n",
68 | "\n",
69 | " with open(os.path.join(path_song, 'song_info.json'), \"w\") as f:\n",
70 | " json.dump(info, f)\n",
71 | "\n",
72 | " ## youtube\n",
73 | " parser = etree.XMLParser(recover=True)\n",
74 | " root = etree.fromstring(content, parser=parser)\n",
75 | " y_id = root.find('meta').find('YouTubeID').text\n",
76 | "\n",
77 | " video_crawler(y_id, path_song)\n",
78 | " \n",
79 | " \n",
80 | "def get_song_list(url_artist, quite=False):\n",
81 | " response_tmp = requests.get(website + url_artist)\n",
82 | " soup = BeautifulSoup(response_tmp.text, 'html.parser')\n",
83 | " item_list = soup.find_all(\"li\", { \"class\":\"grid-item\"})\n",
84 | "\n",
85 | " song_name_list = []\n",
86 | " for item in item_list:\n",
87 | " song_name = item.find_all(\"a\", { \"class\":\"a-tab-cover\"})[0]['href'].split('/')[-1]\n",
88 | " song_name_list.append(song_name)\n",
89 | " if not quite:\n",
90 | " print(' > %s' % song_name)\n",
91 | " return song_name_list \n",
92 | " "
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {},
98 | "source": [
99 | "### Retrive urls of all artists and songs"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 16,
105 | "metadata": {
106 | "collapsed": false
107 | },
108 | "outputs": [
109 | {
110 | "name": "stdout",
111 | "output_type": "stream",
112 | "text": [
113 | "==[w]=================================================\n",
114 | "https://www.hooktheory.com/theorytab/artists/w?page=1\n",
115 | "Total: 77\n",
116 | "----\n",
117 | "waka-flocka-flame\n",
118 | " > no-hands\n",
119 | "wakabayashi-mitsuru\n",
120 | " > this-game\n",
121 | "walden\n",
122 | " > ciaco\n",
123 | "walk-off-the-earth\n",
124 | " > lightning-bolt\n",
125 | "walk-the-moon\n",
126 | " > shut-up-and-dance\n",
127 | "wallpaper\n",
128 | " > best-song-everrr\n",
129 | "wally-badarou\n",
130 | " > the-dachstein-angels\n",
131 | "walter-jack-rollins-and-steve-nelson\n",
132 | " > frosty-the-snowman\n",
133 | "walter-buckley\n",
134 | " > crazy-little-thing-called-love\n",
135 | "walter-murphy\n",
136 | " > american-dad---theme-song\n",
137 | " > family-guy---theme-song\n",
138 | " > family-guy-theme-song\n",
139 | " > the-freaking-fcc\n",
140 | "wan-wan\n",
141 | " > divas-device\n",
142 | " > life-guiding-song---prospect-mirai\n",
143 | " > sky-filling-departed-spirit---lightning-word\n",
144 | " > surpass-the-gate-of-the-silver-key\n",
145 | " > the-princess-that-was-erased-from-history\n",
146 | " > world-yamataizer\n",
147 | "wang-chung\n",
148 | " > everybody-have-fun-tonight\n",
149 | "wanting\n",
150 | " > jar-of-love\n",
151 | "war\n",
152 | " > low-rider\n",
153 | " > why-cant-we-be-friends\n",
154 | "warpaint\n",
155 | " > baby\n",
156 | " > shadows\n",
157 | "warrant\n",
158 | " > i-saw-red\n",
159 | "warren-zevon\n",
160 | " > accidentally-like-a-martyr\n",
161 | " > please-stay\n",
162 | " > things-to-do-in-denver-when-youre-dead\n",
163 | " > werewolves-of-london\n",
164 | "warsongs\n",
165 | " > piercing-light\n",
166 | "washed-out\n",
167 | " > new-theory\n",
168 | "wave-master\n",
169 | " > deja-vu-canals\n",
170 | " > time-square\n",
171 | "wave-racer\n",
172 | " > flash-drive\n",
173 | " > streamers\n",
174 | "wavetraxx\n",
175 | " > beach-stringz---lennox-remix\n",
176 | "wavves\n",
177 | " > afraid-of-heights\n",
178 | " > dreams-of-grandeur\n",
179 | " > nine-is-god\n",
180 | " > pony\n",
181 | " > super-soaker\n",
182 | " > thats-on-me\n",
183 | " > way-too-much\n",
184 | "we-lost-the-sea\n",
185 | " > bogatyri\n",
186 | "we-the-kings\n",
187 | " > check-yes-juliet\n",
188 | " > stay-young\n",
189 | "we-were-evergreen\n",
190 | " > baby-blue\n",
191 | " > best-thing\n",
192 | "weather-report\n",
193 | " > birdland\n",
194 | "weeabophone\n",
195 | " > beeeeees\n",
196 | "weebl\n",
197 | " > amazing-horse\n",
198 | " > donkeys\n",
199 | " > fat-labrador\n",
200 | " > owls\n",
201 | "ween\n",
202 | " > freedom-of-76\n",
203 | " > happy-colored-marbles\n",
204 | " > object\n",
205 | " > ocean-man\n",
206 | " > roses-are-free\n",
207 | " > the-hiv-song\n",
208 | " > transdermal-celebration\n",
209 | " > what-deaner-was-talking-about\n",
210 | "weezer\n",
211 | " > buddy-holly\n",
212 | " > dreamin\n",
213 | " > el-scorcho\n",
214 | " > fall-together-\n",
215 | " > falling-for-you\n",
216 | " > i-want-you-to\n",
217 | " > island-in-the-sun\n",
218 | " > king-of-the-world\n",
219 | " > my-name-is-jonas\n",
220 | " > no-one-else\n",
221 | " > say-it-aint-so\n",
222 | " > the-greatest-man-that-ever-lived\n",
223 | "weird-al-yankovic\n",
224 | " > do-i-creep-you-out\n",
225 | " > dont-download-this-song\n",
226 | " > everything-you-know-is-wrong\n",
227 | " > hardware-store\n",
228 | " > midnight-star\n",
229 | " > ringtone\n",
230 | " > since-youve-been-gone\n",
231 | " > skipper-dan\n",
232 | " > stop-forwarding-that-crap-to-me\n",
233 | " > the-night-santa-went-crazy\n",
234 | " > virus-alert\n",
235 | "wellenrausch\n",
236 | " > citylights\n",
237 | "wendell192\n",
238 | " > robots\n",
239 | "wet-wet-wet\n",
240 | " > love-is-all-around\n",
241 | "wg-snuffy-walden\n",
242 | " > west-wing-suite\n",
243 | "wham\n",
244 | " > freedom\n",
245 | " > last-christmas\n",
246 | "white-town\n",
247 | " > once-i-flew\n",
248 | " > your-woman\n",
249 | "whiteflame\n",
250 | " > senbonzakura\n",
251 | "whitesnake\n",
252 | " > here-i-go-again\n",
253 | " > is-this-love\n",
254 | "whitney-houston\n",
255 | " > all-the-man-that-i-need\n",
256 | " > greatest-love-of-all\n",
257 | " > how-will-i-know\n",
258 | " > i-have-nothing\n",
259 | " > i-wanna-dance-with-somebody\n",
260 | " > i-will-always-love-you\n",
261 | " > im-your-baby-tonight\n",
262 | " > its-not-right-but-its-okay---remix\n",
263 | " > one-moment-in-time\n",
264 | " > saving-all-my-love-for-you\n",
265 | " > so-emotional\n",
266 | "wii-sports\n",
267 | " > tennis-results\n",
268 | "wiktoria\n",
269 | " > save-me\n",
270 | "wilco\n",
271 | " > elt\n",
272 | " > heavy-metal-drummer\n",
273 | " > i-am-trying-to-break-your-heart\n",
274 | " > nothingsevergonnastandinmyway\n",
275 | " > poor-places\n",
276 | " > shes-a-jar\n",
277 | "wild-nothing\n",
278 | " > shadow\n",
279 | "will-b-and-steve-allen\n",
280 | " > alchera---firestorm-and-steve-allen-remix\n",
281 | "will-i-am\n",
282 | " > thatpower-ft-justin-bieber\n",
283 | " > this-is-love\n",
284 | "will-i-am-and-britney-spears\n",
285 | " > scream-and-shout\n",
286 | "will-young\n",
287 | " > evergreen\n",
288 | "willie-nelson\n",
289 | " > blue-eyes-cryin\n",
290 | " > on-the-road-again\n",
291 | "willym\n",
292 | " > happy\n",
293 | "wilson-phillips\n",
294 | " > hold-on\n",
295 | "wintergatan\n",
296 | " > marble-machine\n",
297 | " > starmachine2000\n",
298 | "wire\n",
299 | " > outdoor-miner\n",
300 | "with-me\n",
301 | " > destinys-child\n",
302 | "within-temptation\n",
303 | " > a-demons-fate\n",
304 | " > iron\n",
305 | "wiz-khalifa\n",
306 | " > black-and-yellow\n",
307 | " > no-sleep\n",
308 | "wiz-khalifa-ft-charlie-puth\n",
309 | " > see-you-again\n",
310 | "wizard\n",
311 | " > fleet\n",
312 | "wizet\n",
313 | " > maplestory---cash-shop\n",
314 | "wjsn\n",
315 | " > i-wish\n",
316 | " > momomo\n",
317 | "wolfgang-amadeus-mozart\n",
318 | " > a-little-night-music\n",
319 | " > canzonetta-sullaria---che-soave-zeffiretto\n",
320 | " > confutatis-from-requiem\n",
321 | " > contrapuntal-sketch-no2-in-c-minor---kv-deest\n",
322 | " > der-holle-rache-kocht-in-meinem-herzen\n",
323 | " > k387-string-quartet-no14-in-g-major\n",
324 | " > kyrie-from-mass-in-c-minor-k-427\n",
325 | " > lacrimosa-from-requiem\n",
326 | " > piano-sonata-no-16-in-c-major\n",
327 | " > rondo-alla-turca\n",
328 | " > string-quartet-no-19-in-c-major-k-465\n",
329 | " > symphony-25-in-g-minor-movement-i\n",
330 | " > symphony-40-in-g-minor-movement-i\n",
331 | " > symphony-no-40-in-g-minor---i\n",
332 | "wolfgang-gartner\n",
333 | " > fire-power\n",
334 | " > illmerica\n",
335 | " > love-and-war\n",
336 | " > space-junk\n",
337 | " > the-way-it-was\n",
338 | " > undertaker\n",
339 | "wolfgang-gartner-and-will-i-am\n",
340 | " > forever\n",
341 | "wolfgun\n",
342 | " > firmament\n",
343 | " > lights\n",
344 | "womack-and-womack\n",
345 | " > teardrops\n",
346 | "woodentoaster\n",
347 | " > beyond-her-garden\n",
348 | " > nightmare-night\n",
349 | " > rainbow-factory\n",
350 | "woodkid\n",
351 | " > brooklyn\n",
352 | " > i-love-you\n",
353 | " > iron\n",
354 | "woody-guthrie\n",
355 | " > this-land-is-your-land\n",
356 | "working-for-a-nuclear-free-city\n",
357 | " > je-suis-le-vent\n",
358 | "wretch-32\n",
359 | " > alright-with-me\n",
360 | "wrld\n",
361 | " > chase-it-ft-savoi\n",
362 | " > drift-away\n",
363 | "wstr\n",
364 | " > nail-the-casket\n",
365 | "wwe\n",
366 | " > glorious-domination\n",
367 | " > real-american\n",
368 | "wwewewe\n",
369 | " > wwe\n",
370 | "wyclef-jean\n",
371 | " > divine-sorrow\n",
372 | "wyd-krakow-2016\n",
373 | " > blogoslawieni-milosierni\n",
374 | "==[x]=================================================\n",
375 | "https://www.hooktheory.com/theorytab/artists/x?page=1\n",
376 | "Total: 6\n",
377 | "----\n",
378 | "x-ambassadors\n",
379 | " > unsteady\n",
380 | "xi\n",
381 | " > freedom-dive\n",
382 | "xilent\n",
383 | " > animation\n",
384 | " > choose-me\n",
385 | " > disconnect\n",
386 | " > synthony\n",
387 | " > the-place\n",
388 | "xxanaxx\n",
389 | " > give-u-the-world\n",
390 | " > got-u-under---spisek-jednego-remix\n",
391 | " > story\n",
392 | "xxxtentacion\n",
393 | " > orlando\n",
394 | "xyconstant\n",
395 | " > white-noise\n",
396 | "=======================================================\n",
397 | "[1, 1]\n",
398 | "Artists: 83\n",
399 | "Songs: 192\n"
400 | ]
401 | }
402 | ],
403 | "source": [
404 | "list_pages = []\n",
405 | "archive_artist = dict()\n",
406 | "sleep_time = 0.11\n",
407 | "alphabet_list = string.ascii_lowercase[-4:-2]\n",
408 | "\n",
409 | "website = 'https://www.hooktheory.com'\n",
410 | "base_url = website + '/theorytab/artists/'\n",
411 | "\n",
412 | "artist_count = 0\n",
413 | "song_count = 0\n",
414 | "\n",
415 | "for ch in alphabet_list:\n",
416 | " time.sleep(sleep_time) \n",
417 | " url = base_url+ch\n",
418 | " response_tmp = requests.get(url)\n",
419 | " soup = BeautifulSoup(response_tmp.text, 'html.parser')\n",
420 | " page_count = 0\n",
421 | " \n",
422 | " print('==[%c]================================================='%ch)\n",
423 | " \n",
424 | " ## get artists list by pages\n",
425 | " url_artist_list = []\n",
426 | " for page in range(1,9999):\n",
427 | " url = 'https://www.hooktheory.com/theorytab/artists/'+ch+'?page=' + str(page)\n",
428 | " \n",
429 | " time.sleep(sleep_time) \n",
430 | " response_tmp = requests.get(url)\n",
431 | " soup = BeautifulSoup(response_tmp.text, 'html.parser')\n",
432 | " item_list = soup.find_all(\"li\", { \"class\":\"grid-item\"})\n",
433 | " \n",
434 | " if item_list:\n",
435 | " print(url)\n",
436 | " page_count += 1\n",
437 | " else:\n",
438 | " break\n",
439 | " \n",
440 | " for item in item_list:\n",
441 | " url_artist_list.append(item.find_all(\"a\", { \"class\":\"a-tab-cover\"})[0]['href'])\n",
442 | "\n",
443 | " print('Total:', len(url_artist_list))\n",
444 | " \n",
445 | " print('----')\n",
446 | " \n",
447 | " if not page_count:\n",
448 | " page_count = 1 \n",
449 | " \n",
450 | " ## get song of artists\n",
451 | " artist_song_dict = dict()\n",
452 | " \n",
453 | " for url_artist in url_artist_list:\n",
454 | " artist_count+=1\n",
455 | " time.sleep(sleep_time)\n",
456 | " artist_name = url_artist.split('/')[-1]\n",
457 | " print(artist_name)\n",
458 | " song_name_list = get_song_list(url_artist)\n",
459 | " song_count += len(song_name_list)\n",
460 | " artist_song_dict[artist_name] = song_name_list\n",
461 | " \n",
462 | " archive_artist[ch] = artist_song_dict\n",
463 | " list_pages.append(page_count)\n",
464 | "\n",
465 | "print('=======================================================')\n",
466 | "print(list_pages)\n",
467 | "print('Artists:', artist_count)\n",
468 | "print('Songs:', song_count)\n",
469 | "\n",
470 | "\n",
471 | "with open('archive_artist.json', \"w\") as f:\n",
472 | " json.dump(archive_artist, f)"
473 | ]
474 | },
475 | {
476 | "cell_type": "markdown",
477 | "metadata": {},
478 | "source": [
479 | "### Retrieve each songs"
480 | ]
481 | },
482 | {
483 | "cell_type": "code",
484 | "execution_count": 19,
485 | "metadata": {
486 | "collapsed": false,
487 | "scrolled": true
488 | },
489 | "outputs": [
490 | {
491 | "name": "stdout",
492 | "output_type": "stream",
493 | "text": [
494 | "==[w]=================================================\n",
495 | "( 1/192) war low-rider\n",
496 | "( 2/192) war why-cant-we-be-friends\n",
497 | "( 3/192) white-town once-i-flew\n"
498 | ]
499 | },
500 | {
501 | "name": "stderr",
502 | "output_type": "stream",
503 | "text": [
504 | "ERROR: KPo-6RSGxQw: YouTube said: This video is not available.\n"
505 | ]
506 | },
507 | {
508 | "name": "stdout",
509 | "output_type": "stream",
510 | "text": [
511 | "Download Failed: KPo-6RSGxQw \n",
512 | "( 4/192) white-town your-woman\n",
513 | "( 5/192) wizet maplestory---cash-shop\n",
514 | "( 6/192) weezer buddy-holly\n",
515 | "( 7/192) weezer dreamin\n",
516 | "( 8/192) weezer el-scorcho\n",
517 | "( 9/192) weezer fall-together-\n",
518 | "( 10/192) weezer falling-for-you\n",
519 | "( 11/192) weezer i-want-you-to\n",
520 | "( 12/192) weezer island-in-the-sun\n",
521 | "( 13/192) weezer king-of-the-world\n",
522 | "( 14/192) weezer my-name-is-jonas\n",
523 | "( 15/192) weezer no-one-else\n",
524 | "( 16/192) weezer say-it-aint-so\n",
525 | "( 17/192) weezer the-greatest-man-that-ever-lived\n",
526 | "( 18/192) warsongs piercing-light\n",
527 | "( 19/192) wiz-khalifa black-and-yellow\n"
528 | ]
529 | },
530 | {
531 | "name": "stderr",
532 | "output_type": "stream",
533 | "text": [
534 | "ERROR: Unable to download webpage: (caused by URLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:645)'),))\n"
535 | ]
536 | },
537 | {
538 | "name": "stdout",
539 | "output_type": "stream",
540 | "text": [
541 | "Download Failed: UePtoxDhJSw \n",
542 | "( 20/192) wiz-khalifa no-sleep\n",
543 | "( 21/192) wave-racer flash-drive\n"
544 | ]
545 | },
546 | {
547 | "name": "stderr",
548 | "output_type": "stream",
549 | "text": [
550 | "ERROR: _f9wKhbYEAA: YouTube said: This video is not available.\n"
551 | ]
552 | },
553 | {
554 | "name": "stdout",
555 | "output_type": "stream",
556 | "text": [
557 | "Download Failed: _f9wKhbYEAA \n",
558 | "( 22/192) wave-racer streamers\n",
559 | "( 23/192) wallpaper best-song-everrr\n",
560 | "( 24/192) wilco elt\n",
561 | "( 25/192) wilco heavy-metal-drummer\n",
562 | "( 26/192) wilco i-am-trying-to-break-your-heart\n",
563 | "( 27/192) wilco nothingsevergonnastandinmyway\n",
564 | "( 28/192) wilco poor-places\n",
565 | "( 29/192) wilco shes-a-jar\n",
566 | "( 30/192) wolfgun firmament\n",
567 | "( 31/192) wolfgun lights\n",
568 | "( 32/192) wilson-phillips hold-on\n",
569 | "( 33/192) wintergatan marble-machine\n",
570 | "( 34/192) wintergatan starmachine2000\n",
571 | "( 35/192) working-for-a-nuclear-free-city je-suis-le-vent\n",
572 | "( 36/192) wwe glorious-domination\n",
573 | "( 37/192) wwe real-american\n",
574 | "( 38/192) weird-al-yankovic do-i-creep-you-out\n",
575 | "( 39/192) weird-al-yankovic dont-download-this-song\n",
576 | "( 40/192) weird-al-yankovic everything-you-know-is-wrong\n",
577 | "( 41/192) weird-al-yankovic hardware-store\n"
578 | ]
579 | },
580 | {
581 | "name": "stderr",
582 | "output_type": "stream",
583 | "text": [
584 | "ERROR: YtKHBtqulOA: YouTube said: This video does not exist.\n"
585 | ]
586 | },
587 | {
588 | "name": "stdout",
589 | "output_type": "stream",
590 | "text": [
591 | "Download Failed: YtKHBtqulOA \n",
592 | "( 42/192) weird-al-yankovic midnight-star\n",
593 | "( 43/192) weird-al-yankovic ringtone\n",
594 | "( 44/192) weird-al-yankovic since-youve-been-gone\n",
595 | "( 45/192) weird-al-yankovic skipper-dan\n",
596 | "( 46/192) weird-al-yankovic stop-forwarding-that-crap-to-me\n",
597 | "( 47/192) weird-al-yankovic the-night-santa-went-crazy\n",
598 | "( 48/192) weird-al-yankovic virus-alert\n",
599 | "( 49/192) wg-snuffy-walden west-wing-suite\n",
600 | "( 50/192) ween freedom-of-76\n"
601 | ]
602 | },
603 | {
604 | "name": "stderr",
605 | "output_type": "stream",
606 | "text": [
607 | "ERROR: T6QjWZujAls: YouTube said: This video does not exist.\n"
608 | ]
609 | },
610 | {
611 | "name": "stdout",
612 | "output_type": "stream",
613 | "text": [
614 | "Download Failed: T6QjWZujAls \n",
615 | "( 51/192) ween happy-colored-marbles\n",
616 | "( 52/192) ween object\n",
617 | "( 53/192) ween ocean-man\n",
618 | "( 54/192) ween roses-are-free\n"
619 | ]
620 | },
621 | {
622 | "name": "stderr",
623 | "output_type": "stream",
624 | "text": [
625 | "ERROR: t4PzQJCF2X8: YouTube said: This video does not exist.\n"
626 | ]
627 | },
628 | {
629 | "name": "stdout",
630 | "output_type": "stream",
631 | "text": [
632 | "Download Failed: t4PzQJCF2X8 \n",
633 | "( 55/192) ween the-hiv-song\n",
634 | "( 56/192) ween transdermal-celebration\n"
635 | ]
636 | },
637 | {
638 | "name": "stderr",
639 | "output_type": "stream",
640 | "text": [
641 | "ERROR: kpheHGX3m1Q: YouTube said: This video does not exist.\n"
642 | ]
643 | },
644 | {
645 | "name": "stdout",
646 | "output_type": "stream",
647 | "text": [
648 | "Download Failed: kpheHGX3m1Q \n",
649 | "( 57/192) ween what-deaner-was-talking-about\n"
650 | ]
651 | },
652 | {
653 | "name": "stderr",
654 | "output_type": "stream",
655 | "text": [
656 | "ERROR: iDkt-K3vjMM: YouTube said: This video does not exist.\n"
657 | ]
658 | },
659 | {
660 | "name": "stdout",
661 | "output_type": "stream",
662 | "text": [
663 | "Download Failed: iDkt-K3vjMM \n",
664 | "( 58/192) weeabophone beeeeees\n",
665 | "( 59/192) wham freedom\n",
666 | "Download Failed: None \n",
667 | "( 60/192) wham last-christmas\n",
668 | "( 61/192) woodkid brooklyn\n",
669 | "( 62/192) woodkid i-love-you\n",
670 | "( 63/192) woodkid iron\n",
671 | "( 64/192) weebl amazing-horse\n",
672 | "( 65/192) weebl donkeys\n",
673 | "( 66/192) weebl fat-labrador\n",
674 | "( 67/192) weebl owls\n",
675 | "( 68/192) will-i-am-and-britney-spears scream-and-shout\n",
676 | "( 69/192) wakabayashi-mitsuru this-game\n"
677 | ]
678 | },
679 | {
680 | "name": "stderr",
681 | "output_type": "stream",
682 | "text": [
683 | "ERROR: sj_0rSdTDX8: YouTube said: This video does not exist.\n"
684 | ]
685 | },
686 | {
687 | "name": "stdout",
688 | "output_type": "stream",
689 | "text": [
690 | "Download Failed: sj_0rSdTDX8 \n",
691 | "( 70/192) wally-badarou the-dachstein-angels\n"
692 | ]
693 | },
694 | {
695 | "name": "stderr",
696 | "output_type": "stream",
697 | "text": [
698 | "ERROR: 76RbI-Ro7hE: YouTube said: \"The Dachstein Ange...\"\n",
699 | "This video is no longer available due to a copyright claim by Wally Badarou.\n"
700 | ]
701 | },
702 | {
703 | "name": "stdout",
704 | "output_type": "stream",
705 | "text": [
706 | "Download Failed: 76RbI-Ro7hE \n",
707 | "( 71/192) wang-chung everybody-have-fun-tonight\n",
708 | "( 72/192) wet-wet-wet love-is-all-around\n",
709 | "( 73/192) whitney-houston all-the-man-that-i-need\n",
710 | "( 74/192) whitney-houston greatest-love-of-all\n",
711 | "( 75/192) whitney-houston how-will-i-know\n",
712 | "( 76/192) whitney-houston i-have-nothing\n",
713 | "( 77/192) whitney-houston i-wanna-dance-with-somebody\n",
714 | "( 78/192) whitney-houston i-will-always-love-you\n",
715 | "( 79/192) whitney-houston im-your-baby-tonight\n",
716 | "( 80/192) whitney-houston its-not-right-but-its-okay---remix\n",
717 | "( 81/192) whitney-houston one-moment-in-time\n",
718 | "( 82/192) whitney-houston saving-all-my-love-for-you\n",
719 | "( 83/192) whitney-houston so-emotional\n",
720 | "( 84/192) willie-nelson blue-eyes-cryin\n",
721 | "( 85/192) willie-nelson on-the-road-again\n",
722 | "( 86/192) walter-buckley crazy-little-thing-called-love\n"
723 | ]
724 | },
725 | {
726 | "name": "stderr",
727 | "output_type": "stream",
728 | "text": [
729 | "ERROR: EE34cSvZCd8: YouTube said: Please sign in to view this video.\n"
730 | ]
731 | },
732 | {
733 | "name": "stdout",
734 | "output_type": "stream",
735 | "text": [
736 | "Download Failed: EE34cSvZCd8 \n",
737 | "( 87/192) wyd-krakow-2016 blogoslawieni-milosierni\n",
738 | "( 88/192) willym happy\n",
739 | "( 89/192) whiteflame senbonzakura\n",
740 | "( 90/192) wrld chase-it-ft-savoi\n",
741 | "( 91/192) wrld drift-away\n",
742 | "( 92/192) warren-zevon accidentally-like-a-martyr\n",
743 | "( 93/192) warren-zevon please-stay\n",
744 | "( 94/192) warren-zevon things-to-do-in-denver-when-youre-dead\n"
745 | ]
746 | },
747 | {
748 | "name": "stderr",
749 | "output_type": "stream",
750 | "text": [
751 | "ERROR: sQOyoPALBrE: YouTube said: \"Warren Zevon - Things to do...\"\n",
752 | "The YouTube account associated with this video has been terminated due to multiple third-party notifications of copyright infringement.\n"
753 | ]
754 | },
755 | {
756 | "name": "stdout",
757 | "output_type": "stream",
758 | "text": [
759 | "Download Failed: sQOyoPALBrE \n",
760 | "( 95/192) warren-zevon werewolves-of-london\n",
761 | "( 96/192) warpaint baby\n",
762 | "( 97/192) warpaint shadows\n",
763 | "Download Failed: FZtF3OdtAnc \n",
764 | "( 98/192) warrant i-saw-red\n",
765 | "( 99/192) we-the-kings check-yes-juliet\n",
766 | "(100/192) we-the-kings stay-young\n",
767 | "(101/192) wendell192 robots\n",
768 | "(102/192) wan-wan divas-device\n",
769 | "(103/192) wan-wan life-guiding-song---prospect-mirai\n",
770 | "(104/192) wan-wan sky-filling-departed-spirit---lightning-word\n",
771 | "(105/192) wan-wan surpass-the-gate-of-the-silver-key\n",
772 | "(106/192) wan-wan the-princess-that-was-erased-from-history\n",
773 | "(107/192) wan-wan world-yamataizer\n",
774 | "(108/192) waka-flocka-flame no-hands\n",
775 | "(109/192) with-me destinys-child\n",
776 | "Download Failed: null \n",
777 | "(110/192) wizard fleet\n",
778 | "(111/192) walk-off-the-earth lightning-bolt\n",
779 | "(112/192) we-lost-the-sea bogatyri\n",
780 | "(113/192) wolfgang-gartner-and-will-i-am forever\n",
781 | "(114/192) wjsn i-wish\n",
782 | "(115/192) wjsn momomo\n",
783 | "(116/192) walk-the-moon shut-up-and-dance\n",
784 | "(117/192) we-were-evergreen baby-blue\n",
785 | "(118/192) we-were-evergreen best-thing\n",
786 | "(119/192) wolfgang-gartner fire-power\n",
787 | "(120/192) wolfgang-gartner illmerica\n",
788 | "(121/192) wolfgang-gartner love-and-war\n",
789 | "(122/192) wolfgang-gartner space-junk\n",
790 | "(123/192) wolfgang-gartner the-way-it-was\n",
791 | "(124/192) wolfgang-gartner undertaker\n",
792 | "(125/192) womack-and-womack teardrops\n",
793 | "(126/192) wiz-khalifa-ft-charlie-puth see-you-again\n",
794 | "(127/192) walter-murphy american-dad---theme-song\n",
795 | "(128/192) walter-murphy family-guy---theme-song\n"
796 | ]
797 | },
798 | {
799 | "name": "stderr",
800 | "output_type": "stream",
801 | "text": [
802 | "ERROR: y1VNjb9iXXw: YouTube said: This video does not exist.\n"
803 | ]
804 | },
805 | {
806 | "name": "stdout",
807 | "output_type": "stream",
808 | "text": [
809 | "Download Failed: y1VNjb9iXXw \n",
810 | "(129/192) walter-murphy family-guy-theme-song\n"
811 | ]
812 | },
813 | {
814 | "name": "stderr",
815 | "output_type": "stream",
816 | "text": [
817 | "ERROR: y1VNjb9iXXw: YouTube said: This video does not exist.\n"
818 | ]
819 | },
820 | {
821 | "name": "stdout",
822 | "output_type": "stream",
823 | "text": [
824 | "Download Failed: y1VNjb9iXXw \n",
825 | "(130/192) walter-murphy the-freaking-fcc\n"
826 | ]
827 | },
828 | {
829 | "name": "stderr",
830 | "output_type": "stream",
831 | "text": [
832 | "ERROR: 2NDPT0Ph5rA: YouTube said: Please sign in to view this video.\n"
833 | ]
834 | },
835 | {
836 | "name": "stdout",
837 | "output_type": "stream",
838 | "text": [
839 | "Download Failed: 2NDPT0Ph5rA \n",
840 | "(131/192) wyclef-jean divine-sorrow\n",
841 | "(132/192) wire outdoor-miner\n",
842 | "(133/192) walter-jack-rollins-and-steve-nelson frosty-the-snowman\n",
843 | "(134/192) wstr nail-the-casket\n",
844 | "(135/192) wavetraxx beach-stringz---lennox-remix\n",
845 | "(136/192) wiktoria save-me\n",
846 | "(137/192) wavves afraid-of-heights\n",
847 | "(138/192) wavves dreams-of-grandeur\n",
848 | "(139/192) wavves nine-is-god\n",
849 | "(140/192) wavves pony\n",
850 | "(141/192) wavves super-soaker\n",
851 | "(142/192) wavves thats-on-me\n",
852 | "(143/192) wavves way-too-much\n",
853 | "(144/192) walden ciaco\n",
854 | "(145/192) weather-report birdland\n",
855 | "(146/192) woody-guthrie this-land-is-your-land\n",
856 | "(147/192) will-i-am thatpower-ft-justin-bieber\n"
857 | ]
858 | },
859 | {
860 | "name": "stderr",
861 | "output_type": "stream",
862 | "text": [
863 | "ERROR: A0hUdBLiASQ: YouTube said: This video does not exist.\n"
864 | ]
865 | },
866 | {
867 | "name": "stdout",
868 | "output_type": "stream",
869 | "text": [
870 | "Download Failed: A0hUdBLiASQ \n",
871 | "(148/192) will-i-am this-is-love\n",
872 | "(149/192) washed-out new-theory\n",
873 | "(150/192) wanting jar-of-love\n",
874 | "(151/192) wwewewe wwe\n",
875 | "(152/192) wii-sports tennis-results\n",
876 | "(153/192) within-temptation a-demons-fate\n",
877 | "(154/192) within-temptation iron\n",
878 | "(155/192) woodentoaster beyond-her-garden\n",
879 | "(156/192) woodentoaster nightmare-night\n",
880 | "(157/192) woodentoaster rainbow-factory\n",
881 | "(158/192) whitesnake here-i-go-again\n"
882 | ]
883 | },
884 | {
885 | "name": "stderr",
886 | "output_type": "stream",
887 | "text": [
888 | "ERROR: The uploader has not made this video available in your country.\n",
889 | "You might want to use a VPN or a proxy server (with --proxy) to workaround.\n"
890 | ]
891 | },
892 | {
893 | "name": "stdout",
894 | "output_type": "stream",
895 | "text": [
896 | "Download Failed: i3MXiTeH_Pg \n",
897 | "(159/192) whitesnake is-this-love\n"
898 | ]
899 | },
900 | {
901 | "name": "stderr",
902 | "output_type": "stream",
903 | "text": [
904 | "ERROR: ujnH4yNqL8E: YouTube said: This video contains content from WMG, who has blocked it in your country on copyright grounds.\n"
905 | ]
906 | },
907 | {
908 | "name": "stdout",
909 | "output_type": "stream",
910 | "text": [
911 | "Download Failed: ujnH4yNqL8E \n",
912 | "(160/192) wellenrausch citylights\n",
913 | "(161/192) wild-nothing shadow\n",
914 | "(162/192) will-young evergreen\n",
915 | "(163/192) wave-master deja-vu-canals\n",
916 | "(164/192) wave-master time-square\n",
917 | "(165/192) will-b-and-steve-allen alchera---firestorm-and-steve-allen-remix\n"
918 | ]
919 | },
920 | {
921 | "name": "stderr",
922 | "output_type": "stream",
923 | "text": [
924 | "ERROR: lGZVvP8QDZA: YouTube said: \"Will B & Steve Allen - Alch...\"\n",
925 | "The YouTube account associated with this video has been terminated due to multiple third-party notifications of copyright infringement.\n"
926 | ]
927 | },
928 | {
929 | "name": "stdout",
930 | "output_type": "stream",
931 | "text": [
932 | "Download Failed: lGZVvP8QDZA \n",
933 | "(166/192) wretch-32 alright-with-me\n",
934 | "(167/192) wolfgang-amadeus-mozart a-little-night-music\n",
935 | "(168/192) wolfgang-amadeus-mozart canzonetta-sullaria---che-soave-zeffiretto\n",
936 | "(169/192) wolfgang-amadeus-mozart confutatis-from-requiem\n"
937 | ]
938 | },
939 | {
940 | "name": "stderr",
941 | "output_type": "stream",
942 | "text": [
943 | "ERROR: _Lav_lDJ2LM: YouTube said: This video does not exist.\n"
944 | ]
945 | },
946 | {
947 | "name": "stdout",
948 | "output_type": "stream",
949 | "text": [
950 | "Download Failed: _Lav_lDJ2LM \n",
951 | "(170/192) wolfgang-amadeus-mozart contrapuntal-sketch-no2-in-c-minor---kv-deest\n"
952 | ]
953 | },
954 | {
955 | "name": "stderr",
956 | "output_type": "stream",
957 | "text": [
958 | "ERROR: heHnBqa4GOg: YouTube said: This video is not available.\n"
959 | ]
960 | },
961 | {
962 | "name": "stdout",
963 | "output_type": "stream",
964 | "text": [
965 | "Download Failed: heHnBqa4GOg \n",
966 | "(171/192) wolfgang-amadeus-mozart der-holle-rache-kocht-in-meinem-herzen\n",
967 | "(172/192) wolfgang-amadeus-mozart k387-string-quartet-no14-in-g-major\n",
968 | "(173/192) wolfgang-amadeus-mozart kyrie-from-mass-in-c-minor-k-427\n"
969 | ]
970 | },
971 | {
972 | "name": "stderr",
973 | "output_type": "stream",
974 | "text": [
975 | "ERROR: hieCzEZwRw4: YouTube said: This video does not exist.\n"
976 | ]
977 | },
978 | {
979 | "name": "stdout",
980 | "output_type": "stream",
981 | "text": [
982 | "Download Failed: hieCzEZwRw4 \n",
983 | "(174/192) wolfgang-amadeus-mozart lacrimosa-from-requiem\n",
984 | "(175/192) wolfgang-amadeus-mozart piano-sonata-no-16-in-c-major\n",
985 | "(176/192) wolfgang-amadeus-mozart rondo-alla-turca\n",
986 | "(177/192) wolfgang-amadeus-mozart string-quartet-no-19-in-c-major-k-465\n"
987 | ]
988 | },
989 | {
990 | "name": "stderr",
991 | "output_type": "stream",
992 | "text": [
993 | "ERROR: ZmXtpmr9UQc: YouTube said: This video has been removed by the user.\n"
994 | ]
995 | },
996 | {
997 | "name": "stdout",
998 | "output_type": "stream",
999 | "text": [
1000 | "Download Failed: ZmXtpmr9UQc \n",
1001 | "(178/192) wolfgang-amadeus-mozart symphony-25-in-g-minor-movement-i\n",
1002 | "(179/192) wolfgang-amadeus-mozart symphony-40-in-g-minor-movement-i\n",
1003 | "(180/192) wolfgang-amadeus-mozart symphony-no-40-in-g-minor---i\n",
1004 | "==[x]=================================================\n",
1005 | "(181/192) xxanaxx give-u-the-world\n",
1006 | "(182/192) xxanaxx got-u-under---spisek-jednego-remix\n",
1007 | "(183/192) xxanaxx story\n",
1008 | "(184/192) xilent animation\n",
1009 | "(185/192) xilent choose-me\n",
1010 | "(186/192) xilent disconnect\n"
1011 | ]
1012 | },
1013 | {
1014 | "name": "stderr",
1015 | "output_type": "stream",
1016 | "text": [
1017 | "ERROR: OB046qaE6cM: YouTube said: This video is not available.\n"
1018 | ]
1019 | },
1020 | {
1021 | "name": "stdout",
1022 | "output_type": "stream",
1023 | "text": [
1024 | "Download Failed: OB046qaE6cM \n",
1025 | "(187/192) xilent synthony\n",
1026 | "(188/192) xilent the-place\n",
1027 | "(189/192) xxxtentacion orlando\n",
1028 | "(190/192) x-ambassadors unsteady\n",
1029 | "(191/192) xi freedom-dive\n",
1030 | "(192/192) xyconstant white-noise\n"
1031 | ]
1032 | }
1033 | ],
1034 | "source": [
1035 | "root_dir = 'archive'\n",
1036 | "\n",
1037 | "with open('archive_artist.json', \"r\") as f:\n",
1038 | " archive_artist = json.load(f)\n",
1039 | "\n",
1040 | "now_count = 1\n",
1041 | " \n",
1042 | "for ch in alphabet_list:\n",
1043 | " path_ch = os.path.join(root_dir, ch)\n",
1044 | " print('==[%c]================================================='%ch)\n",
1045 | " \n",
1046 | " if not os.path.exists(path_ch):\n",
1047 | " os.makedirs(path_ch)\n",
1048 | " \n",
1049 | " for a_name in archive_artist[ch].keys(): \n",
1050 | " for s_name in archive_artist[ch][a_name]:\n",
1051 | "\n",
1052 | " print('(%3d/%3d) %s %s' % (now_count, song_count, a_name , s_name))\n",
1053 | " path_song = os.path.join(path_ch, a_name, s_name)\n",
1054 | " \n",
1055 | " if not os.path.exists(path_song):\n",
1056 | " os.makedirs(path_song)\n",
1057 | " \n",
1058 | " time.sleep(sleep_time) \n",
1059 | " song_retrieval(a_name, s_name, path_song)\n",
1060 | " \n",
1061 | " now_count+=1 \n",
1062 | " \n",
1063 | " "
1064 | ]
1065 | }
1066 | ],
1067 | "metadata": {
1068 | "kernelspec": {
1069 | "display_name": "Python [mir]",
1070 | "language": "python",
1071 | "name": "Python [mir]"
1072 | },
1073 | "language_info": {
1074 | "codemirror_mode": {
1075 | "name": "ipython",
1076 | "version": 3
1077 | },
1078 | "file_extension": ".py",
1079 | "mimetype": "text/x-python",
1080 | "name": "python",
1081 | "nbconvert_exporter": "python",
1082 | "pygments_lexer": "ipython3",
1083 | "version": "3.5.2"
1084 | }
1085 | },
1086 | "nbformat": 4,
1087 | "nbformat_minor": 0
1088 | }
1089 |
--------------------------------------------------------------------------------