├── README
├── grab_test.py
└── grab.py


/README:
--------------------------------------------------------------------------------
1 | MICROGRAB
2 | =========
3 | 
4 | A python version of a more rudimentary bash script used to download music from
5 | blogs.
6 | 


--------------------------------------------------------------------------------
/grab_test.py:
--------------------------------------------------------------------------------
 1 | from urlparse import urlparse
 2 | import urllib2
 3 | import lxml.html
 4 | import os
 5 | import sys
 6 | import datetime
 7 | import eyeD3
 8 | 
 9 | def create_playlist(site_name, songs):
10 |   print ('songs',songs)
11 |   # create new playlist for downloaded songs
12 |   today       = datetime.date.today()
13 |   folder_date = today + datetime.timedelta(days=-today.weekday(), weeks=1)
14 |   p_f = open(site_name + '_' + str(folder_date) + '.m3u', 'wf')
15 |   p_f.write("#EXTM3U\n")
16 |   for song, path in songs:
17 |     tag = eyeD3.Tag()
18 |     tag.link(path)
19 |     song_length = 0
20 |     song_data   = eyeD3.Tag()
21 |     song_data.link(path)
22 |     p_f.write('#EXTINFO:' + str(song_length)+ ',' + song_data.getArtist() +
23 |               ' - ' + song_data.getTitle() + "\n")
24 |     p_f.write(path + "\n")
25 |   p_f.close()
26 | 
27 | def get_songs(blog_path):
28 |   print(blog_path)
29 |   files = []
30 |   for dirname, dirnames, filenames in os.walk(blog_path):
31 |     for subdirname in dirnames:
32 |       files.append(get_songs(path))
33 |     for filename in filenames:
34 |       files.append((filename,os.path.join(dirname,filename)))
35 |   return files
36 | 
37 | files = get_songs("/home/marshall/Music/musigh/2012-03-26")
38 | create_playlist("musigh", files)
39 | 
40 | 


--------------------------------------------------------------------------------
/grab.py:
--------------------------------------------------------------------------------
  1 | from urlparse import urlparse
  2 | from subprocess import call
  3 | import urllib2
  4 | import lxml.html
  5 | import os
  6 | import sys
  7 | import datetime
  8 | import eyeD3
  9 | 
 10 | class Usage(Exception):
 11 |   def __init__(self, msg):
 12 |     self.msg = msg
 13 | 
 14 | # takes as arguments the destination path of the file to download and the url to
 15 | # download. It does so, and as a side effect, displays the progress
 16 | def download_file(dest, url):
 17 |   name  = url.split('/')[-1]
 18 |   u     = urllib2.urlopen(url)
 19 |   path  = dest + '/' + name
 20 |   mp3_f = open(path, 'wb')
 21 |   meta  = u.info()
 22 |   size  = int(meta.getheaders("Content-Length")[0])
 23 |   print "Downloading: %s Bytes: %s" % (name, size)
 24 |   dl_size = 0
 25 |   block_sz = 8192
 26 |   while True:
 27 |     buffer = u.read(block_sz)
 28 |     if not buffer:
 29 |       break
 30 |     dl_size += len(buffer)
 31 |     mp3_f.write(buffer)
 32 |     status = r"%10d  [%3.2f%%]" % (dl_size, dl_size * 100. / size)
 33 |     status = status + chr(8)*(len(status)+1)
 34 |     print status,
 35 |   mp3_f.close()
 36 |   return path
 37 | 
 38 | # recursively gets other songs downloaded in the path
 39 | def get_previously_downloaded(blog_path):
 40 |   files = []
 41 |   for dirname, dirnames, filenames in os.walk(blog_path):
 42 |     for subdirname in dirnames:
 43 |       files.append(get_previously_downloaded(os.path.join(dirname, subdirname)))
 44 |     for filename in filenames:
 45 |       files.append((filename, os.path.join(dirname,filename)))
 46 |   return files
 47 | 
 48 | # creates a simple m3u playlist. takes as input the name of the site and the
 49 | # list of songs that have been downloaded
 50 | def create_playlist(site_name, songs):
 51 |   today       = datetime.date.today()
 52 |   folder_date = today + datetime.timedelta(days=-today.weekday(), weeks=1)
 53 |   playlist_name = site_name + '_' + str(folder_date) + '.m3u'
 54 |   p_f = open(playlist_name , 'wf')
 55 |   p_f.write("#EXTM3U\n")
 56 |   for song, path in songs:
 57 |     tag = eyeD3.Tag()
 58 |     tag.link(path)
 59 |     song_data   = eyeD3.Tag()
 60 |     song_data.link(path)
 61 |     p_f.write('#EXTINFO: 0 ,' + song_data.getArtist() + ' - '
 62 |               + song_data.getTitle() + "\n")
 63 |     p_f.write(path + "\n")
 64 |   p_f.close()
 65 |   return playlist_name
 66 | 
 67 | def main():
 68 |   if len(sys.argv) != 3:
 69 |     print("Error. Usage is " + sys.argv[0] +
 70 |           " /full/path/to/music/directory http://download_from_here.com")
 71 |     return 1
 72 |   else:
 73 |     music_dir_path = sys.argv[1]
 74 | 
 75 |     # check if there is a trailing '/' and remove it
 76 |     if music_dir_path[len(music_dir_path)-1] is '/':
 77 |       music_dir_path = music_dir_path[:-1]
 78 | 
 79 |     for i in range(2,len(sys.argv)):
 80 |       f        = urllib2.urlopen(sys.argv[i])
 81 |       htmlcode = f.read()
 82 | 
 83 |       site_name   = urlparse(sys.argv[i]).netloc.rsplit('.')[0]
 84 |       today       = datetime.date.today()
 85 |       folder_date = today + datetime.timedelta(days=-today.weekday(), weeks=1)
 86 |       dir_name    = str(site_name) + '/' + str(folder_date)
 87 |       full_path   = music_dir_path + '/' + dir_name
 88 | 
 89 |       # create the directory to put the downloaded songs into
 90 |       try:
 91 |         os.makedirs(full_path)
 92 |       except:
 93 |         print("Directory already existed, moving along")
 94 | 
 95 | 
 96 |       # download all files that aren't duplicates
 97 |       downloaded_files = []
 98 |       previous_files   = get_previously_downloaded(full_path+'/../')
 99 |       tree = lxml.html.fromstring(htmlcode)
100 | 
101 |       # get all mp3 urls within the html source and download them
102 |       for link in tree.findall(".//a"):
103 |         url       = link.get("href")
104 |         file_name = url.split('/')[-1]
105 |         if url.endswith(".mp3") and file_name not in previous_files:
106 |           path = download_file(full_path, url)
107 |           downloaded_files.append([file_name,path])
108 |       f.close()
109 | 
110 |       # create a playlist
111 |       playlist_name = create_playlist(site_name, downloaded_files)
112 |       os.system("open /Applications/iTunes.app/ "+playlist_name)
113 |       os.system("osascript -e 'tell application \"iTunes\" to pause'");
114 | 
115 |   return 0
116 | 
117 | if __name__ == "__main__":
118 |   sys.exit(main())
119 | 


--------------------------------------------------------------------------------