├── README ├── grab_test.py └── grab.py /README: -------------------------------------------------------------------------------- 1 | MICROGRAB 2 | ========= 3 | 4 | A python version of a more rudimentary bash script used to download music from 5 | blogs. 6 | -------------------------------------------------------------------------------- /grab_test.py: -------------------------------------------------------------------------------- 1 | from urlparse import urlparse 2 | import urllib2 3 | import lxml.html 4 | import os 5 | import sys 6 | import datetime 7 | import eyeD3 8 | 9 | def create_playlist(site_name, songs): 10 | print ('songs',songs) 11 | # create new playlist for downloaded songs 12 | today = datetime.date.today() 13 | folder_date = today + datetime.timedelta(days=-today.weekday(), weeks=1) 14 | p_f = open(site_name + '_' + str(folder_date) + '.m3u', 'wf') 15 | p_f.write("#EXTM3U\n") 16 | for song, path in songs: 17 | tag = eyeD3.Tag() 18 | tag.link(path) 19 | song_length = 0 20 | song_data = eyeD3.Tag() 21 | song_data.link(path) 22 | p_f.write('#EXTINFO:' + str(song_length)+ ',' + song_data.getArtist() + 23 | ' - ' + song_data.getTitle() + "\n") 24 | p_f.write(path + "\n") 25 | p_f.close() 26 | 27 | def get_songs(blog_path): 28 | print(blog_path) 29 | files = [] 30 | for dirname, dirnames, filenames in os.walk(blog_path): 31 | for subdirname in dirnames: 32 | files.append(get_songs(path)) 33 | for filename in filenames: 34 | files.append((filename,os.path.join(dirname,filename))) 35 | return files 36 | 37 | files = get_songs("/home/marshall/Music/musigh/2012-03-26") 38 | create_playlist("musigh", files) 39 | 40 | -------------------------------------------------------------------------------- /grab.py: -------------------------------------------------------------------------------- 1 | from urlparse import urlparse 2 | from subprocess import call 3 | import urllib2 4 | import lxml.html 5 | import os 6 | import sys 7 | import datetime 8 | import eyeD3 9 | 10 | class Usage(Exception): 11 | def __init__(self, msg): 12 | self.msg = msg 13 | 14 | # takes as arguments the destination path of the file to download and the url to 15 | # download. It does so, and as a side effect, displays the progress 16 | def download_file(dest, url): 17 | name = url.split('/')[-1] 18 | u = urllib2.urlopen(url) 19 | path = dest + '/' + name 20 | mp3_f = open(path, 'wb') 21 | meta = u.info() 22 | size = int(meta.getheaders("Content-Length")[0]) 23 | print "Downloading: %s Bytes: %s" % (name, size) 24 | dl_size = 0 25 | block_sz = 8192 26 | while True: 27 | buffer = u.read(block_sz) 28 | if not buffer: 29 | break 30 | dl_size += len(buffer) 31 | mp3_f.write(buffer) 32 | status = r"%10d [%3.2f%%]" % (dl_size, dl_size * 100. / size) 33 | status = status + chr(8)*(len(status)+1) 34 | print status, 35 | mp3_f.close() 36 | return path 37 | 38 | # recursively gets other songs downloaded in the path 39 | def get_previously_downloaded(blog_path): 40 | files = [] 41 | for dirname, dirnames, filenames in os.walk(blog_path): 42 | for subdirname in dirnames: 43 | files.append(get_previously_downloaded(os.path.join(dirname, subdirname))) 44 | for filename in filenames: 45 | files.append((filename, os.path.join(dirname,filename))) 46 | return files 47 | 48 | # creates a simple m3u playlist. takes as input the name of the site and the 49 | # list of songs that have been downloaded 50 | def create_playlist(site_name, songs): 51 | today = datetime.date.today() 52 | folder_date = today + datetime.timedelta(days=-today.weekday(), weeks=1) 53 | playlist_name = site_name + '_' + str(folder_date) + '.m3u' 54 | p_f = open(playlist_name , 'wf') 55 | p_f.write("#EXTM3U\n") 56 | for song, path in songs: 57 | tag = eyeD3.Tag() 58 | tag.link(path) 59 | song_data = eyeD3.Tag() 60 | song_data.link(path) 61 | p_f.write('#EXTINFO: 0 ,' + song_data.getArtist() + ' - ' 62 | + song_data.getTitle() + "\n") 63 | p_f.write(path + "\n") 64 | p_f.close() 65 | return playlist_name 66 | 67 | def main(): 68 | if len(sys.argv) != 3: 69 | print("Error. Usage is " + sys.argv[0] + 70 | " /full/path/to/music/directory http://download_from_here.com") 71 | return 1 72 | else: 73 | music_dir_path = sys.argv[1] 74 | 75 | # check if there is a trailing '/' and remove it 76 | if music_dir_path[len(music_dir_path)-1] is '/': 77 | music_dir_path = music_dir_path[:-1] 78 | 79 | for i in range(2,len(sys.argv)): 80 | f = urllib2.urlopen(sys.argv[i]) 81 | htmlcode = f.read() 82 | 83 | site_name = urlparse(sys.argv[i]).netloc.rsplit('.')[0] 84 | today = datetime.date.today() 85 | folder_date = today + datetime.timedelta(days=-today.weekday(), weeks=1) 86 | dir_name = str(site_name) + '/' + str(folder_date) 87 | full_path = music_dir_path + '/' + dir_name 88 | 89 | # create the directory to put the downloaded songs into 90 | try: 91 | os.makedirs(full_path) 92 | except: 93 | print("Directory already existed, moving along") 94 | 95 | 96 | # download all files that aren't duplicates 97 | downloaded_files = [] 98 | previous_files = get_previously_downloaded(full_path+'/../') 99 | tree = lxml.html.fromstring(htmlcode) 100 | 101 | # get all mp3 urls within the html source and download them 102 | for link in tree.findall(".//a"): 103 | url = link.get("href") 104 | file_name = url.split('/')[-1] 105 | if url.endswith(".mp3") and file_name not in previous_files: 106 | path = download_file(full_path, url) 107 | downloaded_files.append([file_name,path]) 108 | f.close() 109 | 110 | # create a playlist 111 | playlist_name = create_playlist(site_name, downloaded_files) 112 | os.system("open /Applications/iTunes.app/ "+playlist_name) 113 | os.system("osascript -e 'tell application \"iTunes\" to pause'"); 114 | 115 | return 0 116 | 117 | if __name__ == "__main__": 118 | sys.exit(main()) 119 | --------------------------------------------------------------------------------