├── .gitignore ├── fp-scrobble ├── ext ├── codegen.Darwin └── codegen.Linux-i686 ├── fp-scrobble.py ├── split.py ├── README.md ├── recorder.py ├── fp.py ├── echonest.py ├── scrobbyl.py ├── lastfm.py └── parsemp3.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.so 3 | -------------------------------------------------------------------------------- /fp-scrobble: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | LD_LIBRARY_PATH=. python scrobble.py "$@" 4 | -------------------------------------------------------------------------------- /ext/codegen.Darwin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alastair/scrobbyl/HEAD/ext/codegen.Darwin -------------------------------------------------------------------------------- /ext/codegen.Linux-i686: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alastair/scrobbyl/HEAD/ext/codegen.Linux-i686 -------------------------------------------------------------------------------- /fp-scrobble.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | import fp 6 | import echonest 7 | 8 | supported_types = [".wav", ".mp3", ".ogg", ".flac"] 9 | 10 | def main(dir): 11 | matches = {} 12 | artists = {} 13 | count = 0 14 | for f in os.listdir(dir): 15 | print "file",f 16 | if os.path.splitext(f)[1] not in supported_types: 17 | print "skipping",f 18 | continue 19 | code = fp.fingerprint(os.path.join(dir, f)) 20 | match = echonest.fp_lookup(code) 21 | echonest.pp(match) 22 | 23 | if __name__ == "__main__": 24 | if len(sys.argv) < 2: 25 | print >>sys.stderr, "usage: %s " % sys.argv[0] 26 | sys.exit(1) 27 | else: 28 | main(sys.argv[1]) 29 | -------------------------------------------------------------------------------- /split.py: -------------------------------------------------------------------------------- 1 | #/usr/bin/python 2 | 3 | import parsemp3 4 | import sys 5 | import os 6 | import math 7 | import tempfile 8 | import subprocess 9 | 10 | def main(): 11 | file = os.path.abspath(sys.argv[1]) 12 | data = parsemp3.parsemp3(sys.argv[1]) 13 | print data["duration"]/1000 14 | 15 | tracklen = data["duration"]/1000 16 | splits = math.ceil(tracklen/20) 17 | 18 | for i in range(splits): 19 | start = i * 20 20 | end = start + 20 21 | print "start",start,"end",end 22 | (fd,outfile)=tempfile.mkstemp(suffix=".wav") 23 | os.close(fd) 24 | args = ["ffmpeg", "-y", "-i", file, "-ac", "1", "-ar", "22050", "-f", "wav", "-t", "20", "-ss", "%d" %start, outfile] 25 | subprocess.call(args, stderr=open("/dev/null", "w")) 26 | os.rename(outfile, "data/%d.wav" %start) 27 | 28 | 29 | if __name__=="__main__": 30 | main() 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Scrobbyl 2 | ========================== 3 | 4 | Don't you wish you could **scrobb**le your vin**yl**? Well, now you can. 5 | 6 | What it does 7 | ------------ 8 | 9 | 1. Listens to line-in for 20 seconds 10 | 2. Uses the echonest fingerprinter to work out what the song is 11 | 3. If this segment is different to the previous 20 seconds, scrobble it 12 | 4. rinse and repeat 13 | 14 | To run 15 | ---------- 16 | 17 | * make sure ffmpeg is in your path 18 | * run 'python lastfm.py auth' to link scrobbyl to your account 19 | * plug in your turntable through your line in 20 | * run 21 | 22 | To do 23 | ---------- 24 | Stay tuned for fingerprinting with a microphone instead of line-in 25 | 26 | FAQ 27 | ---------- 28 | **Wait, I can't work out how to run it** 29 | 30 | Scrobbyl is only in proof of concept stage now. We'll have an OS X and Linux frontend available soon. 31 | 32 | **How can I scrobble from a cafe/stereo/concert/store?** 33 | 34 | Sorry, you can only scrobble from a direct line-in at the moment. 35 | 36 | -------------------------------------------------------------------------------- /recorder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import pyaudio 4 | import sys 5 | import wave 6 | 7 | chunk = 1024 8 | FORMAT = pyaudio.paInt16 9 | CHANNELS = 1 10 | RATE = 44100 11 | RECORD_SECONDS = 5 12 | 13 | p = pyaudio.PyAudio() 14 | 15 | stream = p.open(format = FORMAT, 16 | channels = CHANNELS, 17 | rate = RATE, 18 | input = True, 19 | output = True, 20 | frames_per_buffer = chunk) 21 | 22 | print "* recording" 23 | all = [] 24 | for i in range(0, 44100 / chunk * RECORD_SECONDS): 25 | data = stream.read(chunk) 26 | all.append(data) 27 | # check for silence here by comparing the level with 0 (or some threshold) for 28 | # the contents of data. 29 | # then write data or not to a file 30 | 31 | print "* done" 32 | 33 | stream.stop_stream() 34 | stream.close() 35 | p.terminate() 36 | 37 | data = ''.join(all) 38 | wf = wave.open("recorded.wav", 'wb') 39 | wf.setnchannels(CHANNELS) 40 | wf.setsampwidth(p.get_sample_size(FORMAT)) 41 | wf.setframerate(RATE) 42 | wf.writeframes(data) 43 | wf.close() 44 | -------------------------------------------------------------------------------- /fp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import os 4 | import subprocess 5 | import wave 6 | import tempfile 7 | import pycodegen 8 | import struct 9 | import json 10 | import sys 11 | 12 | supported_types = [".mp3", ".ogg", ".flac"] 13 | 14 | 15 | def decode(file): 16 | if os.path.splitext(file)[1] in supported_types: 17 | (fd,outfile)=tempfile.mkstemp(suffix=".wav") 18 | os.close(fd) 19 | args = ["ffmpeg", "-y", "-i", file, "-ac", "1", "-ar", "22050", "-f", "wav", "-t", "20", "-ss", "10", outfile] 20 | print "decoding",file,"..." 21 | subprocess.call(args, stderr=open("/dev/null", "w")) 22 | return outfile 23 | else: 24 | return None 25 | 26 | def fingerprint(file): 27 | outfile = file 28 | MAGIC = 32768.0 29 | try: 30 | if not file.endswith(".wav"): 31 | outfile = decode(file) 32 | if outfile is not None: 33 | wav = wave.open(outfile, "rb") 34 | 35 | frames = wav.readframes(wav.getnframes()) 36 | fs = [] 37 | for i in range(0, len(frames), 2): 38 | fs.append(struct.unpack(">sys.stderr, "usage: %s " % sys.argv[0] 52 | sys.exit(1) 53 | else: 54 | print json.dumps(fingerprint(sys.argv[1])) 55 | -------------------------------------------------------------------------------- /echonest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import os 4 | import sys 5 | import urllib2 6 | import urlparse 7 | import urllib 8 | import json 9 | 10 | API_KEY = "F4LP3UJVBPYSPVKRZ" 11 | 12 | def _do_en_query(method, postdata=None, **kwargs): 13 | args = {} 14 | for k,v in kwargs.items(): 15 | args[k] = v.encode("utf8") 16 | args["api_key"] = API_KEY 17 | args["format"]="json" 18 | 19 | url=urlparse.urlunparse(('http', 20 | 'developer.echonest.com', 21 | '/api/v4/%s' % method, 22 | '', 23 | urllib.urlencode(args), 24 | '')) 25 | #print >> sys.stderr, "opening url",url 26 | f = urllib2.Request(url) 27 | try: 28 | f = urllib2.urlopen(f) 29 | except Exception, e: 30 | print >> sys.stderr, e.msg 31 | print >> sys.stderr, e.fp.read() 32 | raise 33 | return json.loads(f.read()) 34 | 35 | def artist_profile(artistid): 36 | return _do_en_query("artist/profile", bucket="id:musicbrainz", id=artistid) 37 | 38 | def fp_lookup(code): 39 | return _do_en_query("song/identify", code=code) 40 | 41 | def track_profile(id): 42 | return _do_en_query("track/profile", id=id) 43 | 44 | def pp(data): 45 | print json.dumps(data, indent=4) 46 | 47 | def main(): 48 | pp(artist_profile("ARH6W4X1187B99274F")) 49 | 50 | if __name__ =="__main__": 51 | if len(sys.argv) < 2: 52 | print "usage: %s [mbid|] " % sys.argv[0] 53 | sys.exit() 54 | else: 55 | main() 56 | -------------------------------------------------------------------------------- /scrobbyl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import os 5 | import subprocess 6 | import json 7 | import echonest 8 | import time 9 | 10 | def fingerprint(file): 11 | platform = os.uname()[0] 12 | if platform == "Darwin": 13 | codegen = "./ext/codegen.Darwin" 14 | path = ".:"+os.getenv("PATH") 15 | elif platform == "Linux": 16 | codegen = "./ext/codegen.Linux-i686" 17 | path = os.getenv("PATH") 18 | proclist = [codegen, os.path.abspath(file), "0", "20"] 19 | p = subprocess.Popen(proclist, env={"PATH":path}, stdout=subprocess.PIPE) 20 | code = p.communicate()[0] 21 | return json.loads(code) 22 | 23 | def main(file): 24 | statusfile = os.path.expanduser("~/.scrobbyl") 25 | lines = [] 26 | if os.path.exists(statusfile): 27 | fp = open(statusfile, "r") 28 | lines = fp.readlines() 29 | fp.close() 30 | lasttime = 0 31 | lastartist = "" 32 | lasttrack = "" 33 | if len(lines) == 3: 34 | lasttime = int(lines[0]) 35 | lastartist = lines[1] 36 | lasttrack = lines[2] 37 | 38 | f = fingerprint(file) 39 | 40 | code = f[0]["code"] 41 | song = echonest.fp_lookup(code) 42 | echonest.pp(song) 43 | 44 | if "response" in song and "status" in song["response"] \ 45 | and song["response"]["status"]["message"] == "Success" \ 46 | and len(song["response"]["songs"]) > 0: 47 | 48 | track = song["response"]["songs"][0]["title"] 49 | artist = song["response"]["songs"][0]["artist_name"] 50 | now = time.time() 51 | 52 | print (now-lasttime) 53 | if now - lasttime < 100: 54 | # Only scrobble if we've just been playing 55 | if lasttrack != "" and lasttrack != track: 56 | print "Last track was",lasttrack,"now",track,", scrobbling" 57 | else: 58 | print "same song" 59 | else: 60 | print "too long since we last did it,", now-lasttime 61 | fp = open(statusfile, "w") 62 | fp.write("%d\n%s\n%s" % (now, artist, track)) 63 | fp.close() 64 | 65 | if __name__=="__main__": 66 | if len(sys.argv) < 2: 67 | print >>sys.stderr, "usage: %s " % sys.argv[0] 68 | sys.exit(0) 69 | main(sys.argv[1]) 70 | -------------------------------------------------------------------------------- /lastfm.py: -------------------------------------------------------------------------------- 1 | import urllib2 2 | import urllib 3 | import urlparse 4 | import xml.etree.ElementTree 5 | import re 6 | from htmlentitydefs import name2codepoint 7 | import hashlib 8 | import sys 9 | import time 10 | import os 11 | 12 | key="5071d44086caaeb4dce9fba2053ac768" 13 | secret="4c2b1b277e94f73d50429f891185db3f" 14 | 15 | if os.path.exists(os.path.expanduser("~/.lastfmsess")): 16 | sessfp = open(os.path.expanduser("~/.lastfmsess"), "r") 17 | session = sessfp.read().strip() 18 | sessfp.close() 19 | 20 | def htmlentitydecode(s): 21 | os= re.sub('&(%s);' % '|'.join(name2codepoint), 22 | lambda m: unichr(name2codepoint[m.group(1)]), s) 23 | return os 24 | 25 | def clean_trackid(trackid): 26 | m=re.match(".*([a-fA-Z0-9]{8}-[a-fA-Z0-9]{4}-[a-fA-Z0-9]{4}-[a-fA-Z0-9]{4}-[a-fA-Z0-9]{12}).*",trackid) 27 | assert m 28 | mbid=m.group(1) 29 | return mbid 30 | 31 | def _cleanname(x): 32 | if x is None: 33 | return '' 34 | return htmlentitydecode(x) 35 | 36 | def _etree_to_dict(etree): 37 | result={} 38 | for i in etree: 39 | if i.tag not in result: 40 | result[i.tag]=[] 41 | if len(i): 42 | result[i.tag].append(_etree_to_dict(i)) 43 | else: 44 | result[i.tag].append(_cleanname(i.text)) 45 | return result 46 | 47 | def _do_raw_lastfm_query(url): 48 | f = urllib2.Request(url) 49 | f.add_header('User-Agent','Scrobbyl') 50 | try: 51 | f = urllib2.urlopen(f) 52 | except urllib2.URLError, e: 53 | raise 54 | 55 | tree = xml.etree.ElementTree.ElementTree(file=f) 56 | result=_etree_to_dict(tree.getroot()) 57 | return result 58 | 59 | def _do_lastfm_post(url, data): 60 | f = urllib2.Request(url) 61 | f.add_header('User-Agent','Scrobbyl') 62 | try: 63 | f = urllib2.urlopen(f, data) 64 | except urllib2.URLError, e: 65 | raise 66 | 67 | 68 | def _do_lastfm_query(type, method,**kwargs): 69 | args = { 70 | "method" : method, 71 | "api_key" : key, 72 | } 73 | for k,v in kwargs.items(): 74 | args[k] = v.encode("utf8") 75 | s = "" 76 | for k in sorted(args.keys()): 77 | s+=k+args[k] 78 | s+=secret 79 | if "sk" in args.keys() or "token" in args.keys(): 80 | args["api_sig"] = hashlib.md5(s).hexdigest() 81 | 82 | if type == "GET": 83 | url=urlparse.urlunparse(('http', 84 | 'ws.audioscrobbler.com', 85 | '/2.0/', 86 | '', 87 | urllib.urlencode(args), 88 | '')) 89 | return _do_raw_lastfm_query(url) 90 | elif type == "POST": 91 | url=urlparse.urlunparse(('http', 92 | 'ws.audioscrobbler.com', 93 | '/2.0/', '', '', '')) 94 | _do_lastfm_post(url, urllib.urlencode(args)) 95 | 96 | def _get_auth_token(): 97 | token = _do_lastfm_query("GET", "auth.getToken") 98 | return token["token"][0] 99 | 100 | def _make_session(token): 101 | sess = _do_lastfm_query("GET", "auth.getSession", token=token) 102 | key = sess["session"][0]["key"][0] 103 | fp = open(os.path.expanduser("~/.lastfmsess"), "w") 104 | fp.write(key) 105 | fp.close() 106 | print "session successfully created. thank you." 107 | 108 | def scrobble(artist, track): 109 | # OK, not the real start time. but that'll do 110 | ts = "%d" % (time.time() - 100) 111 | _do_lastfm_query("POST", "track.scrobble", timestamp=ts, artist=artist, track=track, sk=session) 112 | 113 | if __name__=="__main__": 114 | if len(sys.argv) > 1: 115 | if sys.argv[1] == "auth": 116 | t = _get_auth_token() 117 | print "Please open this url then come back to this window: http://www.last.fm/api/auth/?api_key=%s&token=%s" % (key, t) 118 | i = 0 119 | while i < 5: 120 | try: 121 | _make_session(t) 122 | break 123 | except urllib2.HTTPError ,e: 124 | print e.code 125 | time.sleep(10) 126 | else: 127 | scrobble("The New Pornographers", "Failsafe") 128 | 129 | -------------------------------------------------------------------------------- /parsemp3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import StringIO 3 | 4 | versiontbl = [ 2.5, 0, 2, 1 ] 5 | layertbl = [ 0, 3, 2, 1 ] 6 | 7 | bitratetbl = [ 8 | [ ], 9 | [ [], # Version 1 10 | [ 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448 ], #l1 11 | [ 0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384 ], #l2 12 | [ 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320 ], #l3 13 | ], 14 | [ [], # Version 2 15 | [ 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256 ], #l1 16 | [ 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160 ], #l2 17 | [ 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160 ], #l3 18 | ], 19 | ] 20 | 21 | sampleratetbl = [ 22 | [ ], 23 | [ 44100, 48000, 32000 ], 24 | [ 22050, 24000, 16000 ], 25 | ] 26 | 27 | v2_2_0_to_v2_4_0 = { 28 | "BUF":"RBUF", # Recommended Buffer Size 29 | "CNT":"PCNT", # Play Count 30 | "COM":"COMM", # Comments 31 | "CRA":"AENC", # Audio Encryption 32 | # "CRM":, # Encrypted Meta Frame 33 | # "ETC":, # Equalisation timing codes 34 | "EQU":"EQUA", # Equalisation 35 | "GEO":"GEOB", # General Encapsulation Object 36 | "IPL":"IPLS", # Involved People List 37 | "LNK":"LINK", # Linked Information 38 | "MCI":"MCDI", # Music CD Identifier 39 | "MUL":"MLLT", # MPEG Location Lookup Table 40 | "PIC":"APIC", # Attached Picture 41 | "POP":"POPM", # Popularimeter 42 | "REV":"RVRB", # Reverb 43 | "RVA":"RVAD", # Relative Volume Adjustment 44 | "SLT":"SYLT", # Syncronised Text/Lyrics 45 | "STC":"SYTC", # Synconrised Tempo Codes 46 | "TAL":"TALB", # Album/Movie/Show Title 47 | "TBP":"TBPM", # BPM 48 | "TCM":"TCOM", # Composer 49 | "TCO":"TCON", # Content Type 50 | "TCP":"TCP", # Compilation. DEPRECATED IN 2.4 51 | "TCR":"TCOP", # Copyright Message 52 | # "TDA":, # Textual Data 53 | "TDA":"TDAT", # Date 54 | "TPL":"TDLY", # Playlist Delay 55 | "TEN":"TENC", # Encoded by 56 | "TFT":"TFLT", # File type 57 | "TIM":"TIME", # Time 58 | "TKE":"TKEY", # Initial key 59 | "TLA":"TLAN", # Language(s) 60 | "TLE":"TLEN", # Length 61 | "TMT":"TMED", # Media type 62 | "TOA":"TOPE", # Original artist(s)/performer(s) 63 | "TOF":"TOFN", # Original filename 64 | "TOL":"TOLY", # Original Lyricist(s)/text writer(s) 65 | "TOR":"TORY", # Original release year 66 | "TOT":"TOAL", # Original album/Movie/Show title 67 | "TP1":"TPE1", # Lead artist(s)/Lead performer(s)/Soloist(s)/Performing group 68 | "TP2":"TPE2", # Band/Orchestra/Accompaniment 69 | "TP3":"TPE3", # Conductor/Performer refinement 70 | "TP4":"TPE4", # Interpreted, remixed, or otherwise modified by 71 | "TPA":"TPOS", # Part of a set 72 | "TPB":"TPUB", # Publisher 73 | "TRC":"TSRC", # ISRC (International Standard Recording Code) 74 | "TRD":"TRDA", # Recording dates 75 | "TRK":"TRCK", # Track number/Position in set 76 | "TSI":"TSIZ", # Size 77 | "TSS":"TSSE", # Software/hardware and settings used for encoding 78 | "TT1":"TIT1", # Content group description 79 | "TT2":"TIT2", # Title/Songname/Content description 80 | "TT3":"TIT3", # Subtitle/Description refinement 81 | "TXT":"TEXT", # Lyricist/text writer 82 | "TXX":"TXXX", # User defined text information frame 83 | "TYE":"TYER", # Year 84 | "UFI":"UFID", # Unique file identifier 85 | "ULT":"USLT", # Unsychronized lyric/text transcription 86 | "WAF":"WOAF", # Official audio file webpage 87 | "WAR":"WOAR", # Official artist/performer webpage 88 | "WAS":"WOAS", # Official audio source webpage 89 | "WCM":"WCOM", # Commercial information 90 | "WCP":"WCOP", # Copyright/Legal information 91 | "WPB":"WPUB", # Publishers official webpage 92 | "WXX":"WXXX", # User defined URL link frame 93 | "CM1":"CM1", # Apple proprietary rating/popularity 94 | } 95 | 96 | genres = [ 97 | u"Blues",u"Classic Rock",u"Country",u"Dance",u"Disco",u"Funk", 98 | u"Grunge",u"Hip-Hop",u"Jazz",u"Metal",u"New Age",u"Oldies",u"Other", 99 | u"Pop",u"R&B",u"Rap",u"Reggae",u"Rock",u"Techno",u"Industrial", 100 | u"Alternative",u"Ska",u"Death Metal",u"Pranks",u"Soundtrack", 101 | u"Euro-Techno",u"Ambient",u"Trip-Hop",u"Vocal",u"Jazz+Funk",u"Fusion", 102 | u"Trance",u"Classical",u"Instrumental",u"Acid",u"House",u"Game", 103 | u"Sound Clip",u"Gospel",u"Noise",u"Alt. Rock",u"Bass",u"Soul", 104 | u"Punk",u"Space",u"Meditative",u"Instrum. Pop",u"Instrum. Rock", 105 | u"Ethnic",u"Gothic",u"Darkwave",u"Techno-Industu",u"Electronic", 106 | u"Pop-Folk",u"Eurodance",u"Dream",u"Southern Rock",u"Comedy", 107 | u"Cult",u"Gangsta",u"Top 4u",u"Christian Rap",u"Pop/Funk",u"Jungle", 108 | u"Native American",u"Cabaret",u"New Wave",u"Psychadelic",u"Rave", 109 | u"Showtunes",u"Trailer",u"Lo-Fi",u"Tribal",u"Acid Punk",u"Acid Jazz", 110 | u"Polka",u"Retro",u"Musical",u"Rock & Roll",u"Hard Rock",u"Folk", 111 | u"Folk/Rock",u"National Folk",u"Swing",u"Fusion",u"Bebob",u"Latin", 112 | u"Revival",u"Celtic",u"Bluegrass",u"Avantgarde",u"Gothic Rock", 113 | u"Progress. Rock",u"Psychadel. Rock",u"Symphonic Rock",u"Slow Rock", 114 | u"Big Band",u"Chorus",u"Easy Listening",u"Acoustic",u"Humour", 115 | u"Speech",u"Chanson",u"Opera",u"Chamber Music",u"Sonata",u"Symphony", 116 | u"Booty Bass",u"Primus",u"Porn Groove",u"Satire",u"Slow Jam", 117 | u"Club",u"Tango",u"Samba",u"Folklore",u"Ballad",u"Power Ballad", 118 | u"Rhythmic Soul",u"Freestyle",u"Duet",u"Punk Rock",u"Drum Solo", 119 | u"A Capella",u"Euro-House",u"Dance Hall",u"Goa",u"Drum & Bass", 120 | u"Club-House",u"Hardcore",u"Terror",u"Indie",u"BritPop",u"Negerpunk", 121 | u"Polsk Punk",u"Beat",u"Christian Gangsta Rap",u"Heavy Metal", 122 | u"Black Metal",u"Crossover",u"Contemporary Christian",u"Christian Rock", 123 | u"Merengue",u"Salsa",u"Thrash Metal",u"Anime",u"Jpop",u"Synthpop" 124 | ] 125 | 126 | tagconvertfrom={ 127 | "v1" : { 128 | "TIT2" : "TIT2", 129 | "TYER" : "TYER", 130 | "TRCK" : "TRCK", 131 | "TPE1" : "TPE1", 132 | "TALB" : "TALB", 133 | "COMM" : "", # Doesn't match exactly with ID3v2 134 | "TCON" : "TCON", 135 | }, 136 | "v2" : { 137 | "TYER" : "TYER", 138 | "TIT2" : "TIT2", 139 | "TRCK" : "TRCK", 140 | "TPE1" : "TPE1", 141 | "TALB" : "TALB", 142 | "COMM" : "COMM", 143 | "TCON" : "TCON", 144 | }, 145 | "ape" : { 146 | "title" : "TIT2", 147 | "Track" : "TIT2", 148 | "Album" : "TALB", 149 | "album" : "TALB", 150 | "artist": "TPE1", 151 | "Year" : "TYER", 152 | "genre" : "TCON", 153 | }, 154 | "lyrics" : { 155 | "IND" : "", # Indication, specific to lyrics v2 156 | "EAL" : "TALB", 157 | "ETT" : "TIT2", 158 | "EAR" : "TPE1", 159 | "INF" : "", # Doesn't match exactly with COMM 160 | "CRC" : "", # The CRC is pretty annoying. 161 | }, 162 | } 163 | 164 | tagconvertto={} 165 | 166 | for tagtype in tagconvertfrom: 167 | tagconvertto[tagtype]={} 168 | for tagname in tagconvertfrom[tagtype]: 169 | tagconvertto[tagtype][tagconvertfrom[tagtype][tagname]]=tagname 170 | 171 | def strip_padding(x): 172 | while x.endswith("\x00"): 173 | x=x[:-1] 174 | return x 175 | 176 | def parse_unicode(x): 177 | if x.startswith("\xff\xfe"): 178 | st= x.decode("utf-16-le") 179 | else: 180 | st= x.decode("utf-16-be") 181 | # Strip leading whitespace 182 | while st.startswith(u"\ufeff"): 183 | st=st[1:] 184 | return st 185 | 186 | # How many bloody versions of id3 do we REALLY need?! 187 | 188 | def v1(tag): 189 | assert len(tag)==128-3 190 | try: 191 | data={ 192 | "TIT2" : strip_padding(tag[:30]).decode("ISO-8859-1"), 193 | "TPE1" : strip_padding(tag[30:60]).decode("ISO-8859-1"), 194 | "TALB" : strip_padding(tag[60:90]).decode("ISO-8859-1"), 195 | "TYER" : strip_padding(tag[90:94]).decode("ISO-8859-1"), 196 | "COMM" : (u"From ID3v1",strip_padding(tag[94:122]).decode("ISO-8859-1")), 197 | "TRCK" : unicode(ord(tag[123])), 198 | } 199 | if ord(tag[124])!=0xff: 200 | genreid=ord(tag[124]) 201 | if genreid in genres: 202 | genrename=genres[genreid] 203 | else: 204 | genrename=u"unknowngenre#%d" % genreid 205 | data["TCON"]=u"("+unicode(ord(tag[124]))+u")"+genrename 206 | return data 207 | except: 208 | print "genres:",`tag[124]` 209 | raise 210 | 211 | 212 | def v2_2_0(tag): 213 | data={} 214 | while tag!="": 215 | if tag[0]=="\x00": 216 | while tag.startswith("\x00"): 217 | tag=tag[1:] 218 | if tag!="": 219 | raise "Err, padding had data in it?" 220 | break 221 | 222 | tagid=tag[:3] 223 | tag=tag[3:] 224 | taglen=ord(tag[0])*128*128+ord(tag[1])*128+ord(tag[2]) 225 | tag=tag[3:] 226 | tagdata=tag[:taglen] 227 | tag=tag[taglen:] 228 | if tagid.startswith("T"): 229 | if tagdata[0]=="\x00": # latin-1 230 | tagdata=tagdata[1:] 231 | if "\x00" in tagdata: 232 | tagdata=tagdata[:tagdata.index("\x00")] 233 | tagdata=tagdata.decode("ISO-8859-1") 234 | elif tagdata[0]=="\x01": # utf16 235 | if "\x00" in tagdata: 236 | # This is meant to remove extra null's at the end of 237 | # the string, but it eats into the string. 238 | # tagdata=tagdata[:tagdata.index("\x00")] 239 | pass 240 | tagdata=parse_unicode(tagdata[1:]) 241 | else: 242 | raise "Unknown Encoding" 243 | elif tagid=="COM": 244 | encoding=tagdata[0] 245 | commenttype,commentdata=tagdata[1:].split("\x00",1) 246 | if encoding=="\x00": 247 | commenttype=commenttype.decode("ISO-8859-1") 248 | commentdata=commentdata.decode("ISO-8859-1") 249 | elif encoding=="\x01": 250 | commenttype=parse_unicode(commenttype) 251 | commentdata=parse_unicode(commentdata) 252 | else: 253 | raise "Unknown encoding" 254 | tagdata=(commenttype,commentdata) 255 | #print "tag:",`tagid`,"(",v2_2_0_to_v2_4_0[tagid],")" 256 | #print "taglen:",taglen 257 | #print "data:",`tagdata` 258 | if tagid not in v2_2_0_to_v2_4_0: 259 | print "Unknown ID3v2.2.0 tag:",`tagid` 260 | else: 261 | data[v2_2_0_to_v2_4_0[tagid]]=tagdata 262 | return data 263 | 264 | def v2_3_0(tag, version): 265 | data={} 266 | while tag: 267 | if tag[0]=="\x00": 268 | while tag.startswith("\x00"): 269 | tag=tag[1:] 270 | if tag!="": 271 | # mp3ext puts data in here. Ignore it. 272 | #raise "Err, padding had data in it?",`tag` 273 | pass 274 | break 275 | tagid=tag[:4] 276 | if version == 3: 277 | taglen=ord(tag[4])*256*256*256+ord(tag[5])*256*256+ord(tag[6])*256+ord(tag[7]) 278 | elif version == 4: 279 | taglen=ord(tag[4])*128*128*128+ord(tag[5])*128*128+ord(tag[6])*128+ord(tag[7]) 280 | tagflag=ord(tag[8])*256+ord(tag[9]) 281 | tagdata=tag[10:10+taglen] 282 | #print "got tagid %s, taglen is %d (version %s)" % (tagid, taglen, version) 283 | tag=tag[10+taglen:] 284 | if tagid.startswith("T"): 285 | if tagdata[0]=="\x00": # latin-1 286 | #if "\x00" in tagdata: 287 | # tagdata=tagdata[:tagdata.index("\x00")] 288 | tagdata=tagdata[1:].decode("ISO-8859-1") 289 | elif tagdata[0]=="\x01": # utf16 290 | #if "\x00\x00" in tagdata: 291 | # tagdata=tagdata[:tagdata.index("\x00\x00")] 292 | tagdata=parse_unicode(tagdata[1:]) 293 | elif tagdata[0]=="\x03": # utf8 294 | #if "\x00" in tagdata: 295 | # tagdata=tagdata[:tagdata.index("\x00")] 296 | tagdata=tagdata[1:].decode("utf8") 297 | else: 298 | raise "Unknown Encoding",`tagdata[0]` 299 | if tagid in data and type(data[tagid]) == type([]): 300 | data[tagid].append(tagdata) 301 | elif tagid in data and data[tagid] != tagdata: 302 | data[tagid] = [data[tagid], tagdata] 303 | else: 304 | data[tagid]=tagdata 305 | return data 306 | 307 | def lyricsv2(data): 308 | assert data.startswith("LYRICSBEGIN"),`data` 309 | data=data[11:] 310 | retdata={} 311 | while data: 312 | tagid=data[:3] 313 | tagsize=int(data[3:8]) 314 | tagdata=data[8:8+tagsize] 315 | data=data[8+tagsize:] 316 | retdata[tagid]=tagdata 317 | return retdata 318 | 319 | def apev2(ape): 320 | data={} 321 | assert ape[-32:].startswith("APETAGEX"),`ape` 322 | ape=ape[32:] 323 | while ape!="": 324 | apelen=ord(ape[3])*256*256*256+ord(ape[2])*256*256+ord(ape[ 1])*256+ord(ape[ 0]) 325 | apeflg=ord(ape[7])*256*256*256+ord(ape[6])*256*256+ord(ape[ 5])*256+ord(ape[ 4]) 326 | apekey,ape=ape[8:].split("\x00",1) 327 | apedata=ape[:apelen] 328 | ape=ape[apelen:] 329 | data[apekey]=apedata 330 | return data 331 | 332 | # Just read id3 tags, skipping the bitstream and any other tag types 333 | def readid3(fname): 334 | f=open(fname,"rb") 335 | 336 | f.seek(-128,2) 337 | id3v1=f.read(128) 338 | flength=f.tell() 339 | if id3v1.startswith("TAG"): 340 | v1data=v1(id3v1[3:]) 341 | flength-=128 342 | else: 343 | v1data={} 344 | 345 | f.seek(0) 346 | 347 | # Find a ID3v2 tag 348 | if f.read(3)=="ID3": 349 | id3version = ord(f.read(1))+ord(f.read(1))/256.0 350 | id3flags = ord(f.read(1)) 351 | id3len = ord(f.read(1))*128*128*128+ord(f.read(1))*128*128+ord(f.read(1))*128+ord(f.read(1)) 352 | #print "ID3 v 2",id3version,"found" 353 | #print "flags:",id3flags 354 | #print "len:",id3len 355 | tag=f.read(id3len) 356 | if int(id3version)==2: 357 | v2data=v2_2_0(tag) 358 | elif int(id3version) in [3,4]: 359 | v2data=v2_3_0(tag, id3version) 360 | else: 361 | print "Unknown tag version ID3v2.",int(id3version) 362 | v2data={} 363 | v2data["version"]=u"ID3v2.%s" % unicode(id3version) 364 | else: 365 | f.seek(0) 366 | v2data={} 367 | 368 | return { 369 | "v2" : v2data, 370 | "v1" : v1data 371 | } 372 | 373 | def parsemp3(fname): 374 | fin=open(fname,"rb") 375 | contents = fin.read() 376 | fin.close() 377 | f = StringIO.StringIO(contents) 378 | 379 | # Decode the ID3v1 380 | f.seek(-128,2) 381 | id3v1=f.read(128) 382 | flength=f.tell() 383 | if id3v1.startswith("TAG"): 384 | v1data=v1(id3v1[3:]) 385 | flength-=128 386 | else: 387 | v1data={} 388 | 389 | # Decode as many of the tags at the end that we can 390 | lyricsdata={} 391 | apedata={} 392 | while 1: 393 | # Decode a lyrics v2.0 tag 394 | f.seek(flength-9) 395 | lyrics=f.read(9) 396 | if lyrics=="LYRICS200": 397 | f.seek(flength-9-6) 398 | read=int(f.read(6)) 399 | f.seek(flength-(9+read+6)) 400 | data=f.read(read) 401 | flength-=(9+6+read) 402 | lyricsdata=lyricsv2(data) 403 | continue 404 | 405 | # Decode a APEv2 tag at the end 406 | f.seek(flength-32) 407 | ape=f.read(32) 408 | if ape.startswith("APETAGEX"): 409 | apever=ord(ape[11])*256*256*256+ord(ape[10])*256*256+ord(ape[ 9])*256+ord(ape[ 8]) 410 | apelen=ord(ape[15])*256*256*256+ord(ape[14])*256*256+ord(ape[13])*256+ord(ape[12]) 411 | apecnt=ord(ape[19])*256*256*256+ord(ape[18])*256*256+ord(ape[17])*256+ord(ape[16]) 412 | apeflg=ord(ape[23])*256*256*256+ord(ape[22])*256*256+ord(ape[21])*256+ord(ape[20]) 413 | flength-=32 414 | flength-=apelen 415 | 416 | #print "apever:",hex(apever) 417 | #print "apelen:",apelen 418 | #print "apecnt:",apecnt 419 | #print "apeflg:",hex(apeflg) 420 | f.seek(flength) 421 | apedata=f.read(apelen+32) 422 | 423 | apedata=apev2(apedata) 424 | 425 | continue 426 | 427 | break 428 | 429 | # Goto the start of the file 430 | f.seek(0) 431 | 432 | # Find a ID3v2 tag 433 | if f.read(3)=="ID3": 434 | id3version = ord(f.read(1))+ord(f.read(1))/256.0 435 | id3flags = ord(f.read(1)) 436 | id3len = ord(f.read(1))*128*128*128+ord(f.read(1))*128*128+ord(f.read(1))*128+ord(f.read(1)) 437 | #print "ID3 v 2",id3version,"found" 438 | #print "flags:",id3flags 439 | #print "len:",id3len 440 | tag=f.read(id3len) 441 | if int(id3version)==2: 442 | v2data=v2_2_0(tag) 443 | elif int(id3version) in [3,4]: 444 | v2data=v2_3_0(tag, id3version) 445 | else: 446 | print "Unknown tag version ID3v2.",int(id3version) 447 | v2data={} 448 | v2data["version"]=u"ID3v2.%s" % unicode(id3version) 449 | else: 450 | f.seek(0) 451 | v2data={} 452 | 453 | # Start decoding the mp3 stream 454 | bitstream="" 455 | frames=0 456 | duration=0 457 | bitrates={} 458 | layers={} 459 | unknowns=[] 460 | unknown="" 461 | errors=[] 462 | offset=f.tell() 463 | while 1: 464 | # skip until we find an mpeg frame header 465 | if f.tell()>=flength: 466 | break 467 | b=f.read(1) 468 | if b=="": 469 | print "Expected",flength-f.tell(),"more bytes!" 470 | break 471 | bitstream+=b 472 | b=ord(b) 473 | if b!=255: 474 | #print "not a header 1",`chr(b)` 475 | unknown+=chr(b) 476 | continue 477 | b=f.read(1) 478 | if b=="": 479 | bitstream=bitstream[:-1] # strip off the incomplete header 480 | print "Truncated header" 481 | break 482 | bitstream+=b 483 | b=ord(b) 484 | if b&0xe0 != 0xe0: 485 | unknown+=chr(255)+chr(b) 486 | continue 487 | if unknown!="": 488 | unknowns.append((offset,unknown)) 489 | unknown="" 490 | offset=f.tell() 491 | # Now we've found mpeg header 492 | version=versiontbl[(b>>3)&0x03] 493 | layer=layertbl[(b>>1)&0x03] 494 | crcprotection=(b&0x01) 495 | 496 | #print "version:",version,"layer:",layer 497 | 498 | b=f.read(1) 499 | bitstream+=b 500 | b=ord(b) 501 | try: 502 | bitrate=bitratetbl[version][layer][(b>>4)&0x0F]*1000 503 | except: 504 | errors.append(("error","Unknown bitrate, V%d/%d enc: %d" % 505 | (version,layer,(b>>4)&0x0f))) 506 | continue 507 | try: 508 | samplerate=sampleratetbl[version][(b>>2)&0x03] 509 | except: 510 | errors.append(("error","Unknown samplerate, v%d enc: %d" % 511 | (version,(b>>2)&0x03))) 512 | continue 513 | 514 | padding=(b>>1)&0x01 515 | private=(b&0x01) 516 | 517 | b=f.read(1) 518 | bitstream+=b 519 | b=ord(b) 520 | stereomode=(b>>6)&0x03 521 | modeextension = (b>>4)&0x03 522 | copyright = (b>>3)&0x01 523 | original = (b>>2)&0x01 524 | emphasis = b&0x03 525 | 526 | if bitrate in bitrates: 527 | bitrates[bitrate]+=1 528 | else: 529 | bitrates[bitrate]=1 530 | 531 | # Record the layers used 532 | if version not in layers: layers[version]={} 533 | if layer not in layers[version]: layers[version][layer]=0 534 | layers[version][layer]+=1 535 | 536 | #print "bitrate:",bitrate 537 | #print "samplerate:",samplerate 538 | #print "padding:",padding 539 | #print "private:",private 540 | #print "stereomode:",stereomode 541 | #print "mode extension:",modeextension 542 | #print "copyright:",copyright 543 | #print "original:",original 544 | #print "emphasis:",emphasis 545 | 546 | if layer==1: 547 | framelengthinbytes = (12 * bitrate / samplerate + padding) * 4 548 | else: 549 | framelengthinbytes = 144 * bitrate / samplerate + padding 550 | 551 | # Durations are in milliseconds 552 | if framelengthinbytes == 0 or bitrate == 0: 553 | frameduration = 0 554 | frames+=1 555 | else: 556 | frameduration=framelengthinbytes*8.0*1000/bitrate 557 | duration+=frameduration 558 | frames+=1 559 | 560 | #print "duration:",frameduration 561 | #print "skipping",framelengthinbytes 562 | skip=f.read(min(framelengthinbytes-4,flength-f.tell())) 563 | bitstream+=skip 564 | if len(skip) != framelengthinbytes-4: 565 | #errors.append("Truncated frame, missing %d bytes" % ( 566 | # (framelengthinbytes-4)-len(skip))) 567 | break 568 | if unknown!="": 569 | unknowns.append((frames,unknown)) 570 | unknown="" 571 | 572 | return { 573 | "filename" : fname, 574 | "duration" : duration, 575 | "frames": frames, 576 | "bitrates":bitrates, 577 | "v1" : v1data, 578 | "v2" : v2data, 579 | "lyrics" : lyricsdata, 580 | "unknown" : unknowns, 581 | "ape" : apedata, 582 | "errors" : errors, 583 | "layers" : layers, 584 | "bitstream" : bitstream, 585 | } 586 | 587 | def validate(song): 588 | errors=[] 589 | for i in ["v1","v2","ape","lyrics"]: 590 | for j in song[i]: 591 | if type(song[i][j])==type(u"") and song[i][j].strip()!=song[i][j]: 592 | errors.append(("warning","%s %s tag has bad whitespace (%s)" % (i,j,`song[i][j]`))) 593 | for j in ["v1","v2","ape","lyrics"]: 594 | # No point in comparing stuff against itself 595 | if i>=j: 596 | continue 597 | for itagname in song[i]: 598 | try: 599 | v2tagname=tagconvertfrom[i][itagname] 600 | except: 601 | print "Unknown tag %s: %s (%s)" % (`i`,`itagname`,song[i][itagname]) 602 | continue 603 | # No tag that means this? 604 | if v2tagname=="": 605 | continue 606 | try: 607 | jtagname=tagconvertto[j][v2tagname] 608 | except: 609 | print "Unknown tag %s: %s" % (`j`,`v2tagname`) 610 | 611 | if jtagname not in song[j]: 612 | continue 613 | 614 | itagvalue=song[i][itagname] 615 | jtagvalue=song[j][jtagname] 616 | # v1 is truncated, so ignore the rest of the tag 617 | if i=="v1" and v2tagname in ["TIT2","IDE1","TALB"]: 618 | jtagvalue=jtagvalue[:30] 619 | if j=="v1" and v2tagname in ["TIT2","IDE1","TALB"]: 620 | itagvalue=itagvalue[:30] 621 | 622 | # Track numbers should be treated as numbers 623 | itagvala = 0 624 | itagvalb = 0 625 | jtagvala = 0 626 | jtagvalb = 0 627 | if jtagvalue=="": 628 | jtagvalue="0" 629 | if v2tagname=="TRCK": 630 | # TODO: Deal with x/y 631 | if type(itagvalue) == type(u"") and itagvalue.find("/") != -1: 632 | [itagvala, itagvalb] = itagvalue.split("/") 633 | else: 634 | itagvala = itagvalb = int(itagvalue) 635 | if type(jtagvalue) == type(u"") and jtagvalue.find("/") != -1: 636 | [jtagvala, jtagvalb] = jtagvalue.split("/") 637 | else: 638 | jtagvala = jtagvalb = int(jtagvalue) 639 | 640 | # Do comparisons 641 | if itagvala == jtagvala and itagvalb == jtagvalb: 642 | continue 643 | # Is one tag truncated? 644 | if len(itagvalue)