├── .gitignore
├── fp-scrobble
├── ext
├── codegen.Darwin
└── codegen.Linux-i686
├── fp-scrobble.py
├── split.py
├── README.md
├── recorder.py
├── fp.py
├── echonest.py
├── scrobbyl.py
├── lastfm.py
└── parsemp3.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.so
3 |
--------------------------------------------------------------------------------
/fp-scrobble:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | LD_LIBRARY_PATH=. python scrobble.py "$@"
4 |
--------------------------------------------------------------------------------
/ext/codegen.Darwin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alastair/scrobbyl/HEAD/ext/codegen.Darwin
--------------------------------------------------------------------------------
/ext/codegen.Linux-i686:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alastair/scrobbyl/HEAD/ext/codegen.Linux-i686
--------------------------------------------------------------------------------
/fp-scrobble.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import sys
4 | import os
5 | import fp
6 | import echonest
7 |
8 | supported_types = [".wav", ".mp3", ".ogg", ".flac"]
9 |
10 | def main(dir):
11 | matches = {}
12 | artists = {}
13 | count = 0
14 | for f in os.listdir(dir):
15 | print "file",f
16 | if os.path.splitext(f)[1] not in supported_types:
17 | print "skipping",f
18 | continue
19 | code = fp.fingerprint(os.path.join(dir, f))
20 | match = echonest.fp_lookup(code)
21 | echonest.pp(match)
22 |
23 | if __name__ == "__main__":
24 | if len(sys.argv) < 2:
25 | print >>sys.stderr, "usage: %s
" % sys.argv[0]
26 | sys.exit(1)
27 | else:
28 | main(sys.argv[1])
29 |
--------------------------------------------------------------------------------
/split.py:
--------------------------------------------------------------------------------
1 | #/usr/bin/python
2 |
3 | import parsemp3
4 | import sys
5 | import os
6 | import math
7 | import tempfile
8 | import subprocess
9 |
10 | def main():
11 | file = os.path.abspath(sys.argv[1])
12 | data = parsemp3.parsemp3(sys.argv[1])
13 | print data["duration"]/1000
14 |
15 | tracklen = data["duration"]/1000
16 | splits = math.ceil(tracklen/20)
17 |
18 | for i in range(splits):
19 | start = i * 20
20 | end = start + 20
21 | print "start",start,"end",end
22 | (fd,outfile)=tempfile.mkstemp(suffix=".wav")
23 | os.close(fd)
24 | args = ["ffmpeg", "-y", "-i", file, "-ac", "1", "-ar", "22050", "-f", "wav", "-t", "20", "-ss", "%d" %start, outfile]
25 | subprocess.call(args, stderr=open("/dev/null", "w"))
26 | os.rename(outfile, "data/%d.wav" %start)
27 |
28 |
29 | if __name__=="__main__":
30 | main()
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Scrobbyl
2 | ==========================
3 |
4 | Don't you wish you could **scrobb**le your vin**yl**? Well, now you can.
5 |
6 | What it does
7 | ------------
8 |
9 | 1. Listens to line-in for 20 seconds
10 | 2. Uses the echonest fingerprinter to work out what the song is
11 | 3. If this segment is different to the previous 20 seconds, scrobble it
12 | 4. rinse and repeat
13 |
14 | To run
15 | ----------
16 |
17 | * make sure ffmpeg is in your path
18 | * run 'python lastfm.py auth' to link scrobbyl to your account
19 | * plug in your turntable through your line in
20 | * run
21 |
22 | To do
23 | ----------
24 | Stay tuned for fingerprinting with a microphone instead of line-in
25 |
26 | FAQ
27 | ----------
28 | **Wait, I can't work out how to run it**
29 |
30 | Scrobbyl is only in proof of concept stage now. We'll have an OS X and Linux frontend available soon.
31 |
32 | **How can I scrobble from a cafe/stereo/concert/store?**
33 |
34 | Sorry, you can only scrobble from a direct line-in at the moment.
35 |
36 |
--------------------------------------------------------------------------------
/recorder.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import pyaudio
4 | import sys
5 | import wave
6 |
7 | chunk = 1024
8 | FORMAT = pyaudio.paInt16
9 | CHANNELS = 1
10 | RATE = 44100
11 | RECORD_SECONDS = 5
12 |
13 | p = pyaudio.PyAudio()
14 |
15 | stream = p.open(format = FORMAT,
16 | channels = CHANNELS,
17 | rate = RATE,
18 | input = True,
19 | output = True,
20 | frames_per_buffer = chunk)
21 |
22 | print "* recording"
23 | all = []
24 | for i in range(0, 44100 / chunk * RECORD_SECONDS):
25 | data = stream.read(chunk)
26 | all.append(data)
27 | # check for silence here by comparing the level with 0 (or some threshold) for
28 | # the contents of data.
29 | # then write data or not to a file
30 |
31 | print "* done"
32 |
33 | stream.stop_stream()
34 | stream.close()
35 | p.terminate()
36 |
37 | data = ''.join(all)
38 | wf = wave.open("recorded.wav", 'wb')
39 | wf.setnchannels(CHANNELS)
40 | wf.setsampwidth(p.get_sample_size(FORMAT))
41 | wf.setframerate(RATE)
42 | wf.writeframes(data)
43 | wf.close()
44 |
--------------------------------------------------------------------------------
/fp.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import os
4 | import subprocess
5 | import wave
6 | import tempfile
7 | import pycodegen
8 | import struct
9 | import json
10 | import sys
11 |
12 | supported_types = [".mp3", ".ogg", ".flac"]
13 |
14 |
15 | def decode(file):
16 | if os.path.splitext(file)[1] in supported_types:
17 | (fd,outfile)=tempfile.mkstemp(suffix=".wav")
18 | os.close(fd)
19 | args = ["ffmpeg", "-y", "-i", file, "-ac", "1", "-ar", "22050", "-f", "wav", "-t", "20", "-ss", "10", outfile]
20 | print "decoding",file,"..."
21 | subprocess.call(args, stderr=open("/dev/null", "w"))
22 | return outfile
23 | else:
24 | return None
25 |
26 | def fingerprint(file):
27 | outfile = file
28 | MAGIC = 32768.0
29 | try:
30 | if not file.endswith(".wav"):
31 | outfile = decode(file)
32 | if outfile is not None:
33 | wav = wave.open(outfile, "rb")
34 |
35 | frames = wav.readframes(wav.getnframes())
36 | fs = []
37 | for i in range(0, len(frames), 2):
38 | fs.append(struct.unpack(">sys.stderr, "usage: %s " % sys.argv[0]
52 | sys.exit(1)
53 | else:
54 | print json.dumps(fingerprint(sys.argv[1]))
55 |
--------------------------------------------------------------------------------
/echonest.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import os
4 | import sys
5 | import urllib2
6 | import urlparse
7 | import urllib
8 | import json
9 |
10 | API_KEY = "F4LP3UJVBPYSPVKRZ"
11 |
12 | def _do_en_query(method, postdata=None, **kwargs):
13 | args = {}
14 | for k,v in kwargs.items():
15 | args[k] = v.encode("utf8")
16 | args["api_key"] = API_KEY
17 | args["format"]="json"
18 |
19 | url=urlparse.urlunparse(('http',
20 | 'developer.echonest.com',
21 | '/api/v4/%s' % method,
22 | '',
23 | urllib.urlencode(args),
24 | ''))
25 | #print >> sys.stderr, "opening url",url
26 | f = urllib2.Request(url)
27 | try:
28 | f = urllib2.urlopen(f)
29 | except Exception, e:
30 | print >> sys.stderr, e.msg
31 | print >> sys.stderr, e.fp.read()
32 | raise
33 | return json.loads(f.read())
34 |
35 | def artist_profile(artistid):
36 | return _do_en_query("artist/profile", bucket="id:musicbrainz", id=artistid)
37 |
38 | def fp_lookup(code):
39 | return _do_en_query("song/identify", code=code)
40 |
41 | def track_profile(id):
42 | return _do_en_query("track/profile", id=id)
43 |
44 | def pp(data):
45 | print json.dumps(data, indent=4)
46 |
47 | def main():
48 | pp(artist_profile("ARH6W4X1187B99274F"))
49 |
50 | if __name__ =="__main__":
51 | if len(sys.argv) < 2:
52 | print "usage: %s [mbid|] " % sys.argv[0]
53 | sys.exit()
54 | else:
55 | main()
56 |
--------------------------------------------------------------------------------
/scrobbyl.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys
4 | import os
5 | import subprocess
6 | import json
7 | import echonest
8 | import time
9 |
10 | def fingerprint(file):
11 | platform = os.uname()[0]
12 | if platform == "Darwin":
13 | codegen = "./ext/codegen.Darwin"
14 | path = ".:"+os.getenv("PATH")
15 | elif platform == "Linux":
16 | codegen = "./ext/codegen.Linux-i686"
17 | path = os.getenv("PATH")
18 | proclist = [codegen, os.path.abspath(file), "0", "20"]
19 | p = subprocess.Popen(proclist, env={"PATH":path}, stdout=subprocess.PIPE)
20 | code = p.communicate()[0]
21 | return json.loads(code)
22 |
23 | def main(file):
24 | statusfile = os.path.expanduser("~/.scrobbyl")
25 | lines = []
26 | if os.path.exists(statusfile):
27 | fp = open(statusfile, "r")
28 | lines = fp.readlines()
29 | fp.close()
30 | lasttime = 0
31 | lastartist = ""
32 | lasttrack = ""
33 | if len(lines) == 3:
34 | lasttime = int(lines[0])
35 | lastartist = lines[1]
36 | lasttrack = lines[2]
37 |
38 | f = fingerprint(file)
39 |
40 | code = f[0]["code"]
41 | song = echonest.fp_lookup(code)
42 | echonest.pp(song)
43 |
44 | if "response" in song and "status" in song["response"] \
45 | and song["response"]["status"]["message"] == "Success" \
46 | and len(song["response"]["songs"]) > 0:
47 |
48 | track = song["response"]["songs"][0]["title"]
49 | artist = song["response"]["songs"][0]["artist_name"]
50 | now = time.time()
51 |
52 | print (now-lasttime)
53 | if now - lasttime < 100:
54 | # Only scrobble if we've just been playing
55 | if lasttrack != "" and lasttrack != track:
56 | print "Last track was",lasttrack,"now",track,", scrobbling"
57 | else:
58 | print "same song"
59 | else:
60 | print "too long since we last did it,", now-lasttime
61 | fp = open(statusfile, "w")
62 | fp.write("%d\n%s\n%s" % (now, artist, track))
63 | fp.close()
64 |
65 | if __name__=="__main__":
66 | if len(sys.argv) < 2:
67 | print >>sys.stderr, "usage: %s " % sys.argv[0]
68 | sys.exit(0)
69 | main(sys.argv[1])
70 |
--------------------------------------------------------------------------------
/lastfm.py:
--------------------------------------------------------------------------------
1 | import urllib2
2 | import urllib
3 | import urlparse
4 | import xml.etree.ElementTree
5 | import re
6 | from htmlentitydefs import name2codepoint
7 | import hashlib
8 | import sys
9 | import time
10 | import os
11 |
12 | key="5071d44086caaeb4dce9fba2053ac768"
13 | secret="4c2b1b277e94f73d50429f891185db3f"
14 |
15 | if os.path.exists(os.path.expanduser("~/.lastfmsess")):
16 | sessfp = open(os.path.expanduser("~/.lastfmsess"), "r")
17 | session = sessfp.read().strip()
18 | sessfp.close()
19 |
20 | def htmlentitydecode(s):
21 | os= re.sub('&(%s);' % '|'.join(name2codepoint),
22 | lambda m: unichr(name2codepoint[m.group(1)]), s)
23 | return os
24 |
25 | def clean_trackid(trackid):
26 | m=re.match(".*([a-fA-Z0-9]{8}-[a-fA-Z0-9]{4}-[a-fA-Z0-9]{4}-[a-fA-Z0-9]{4}-[a-fA-Z0-9]{12}).*",trackid)
27 | assert m
28 | mbid=m.group(1)
29 | return mbid
30 |
31 | def _cleanname(x):
32 | if x is None:
33 | return ''
34 | return htmlentitydecode(x)
35 |
36 | def _etree_to_dict(etree):
37 | result={}
38 | for i in etree:
39 | if i.tag not in result:
40 | result[i.tag]=[]
41 | if len(i):
42 | result[i.tag].append(_etree_to_dict(i))
43 | else:
44 | result[i.tag].append(_cleanname(i.text))
45 | return result
46 |
47 | def _do_raw_lastfm_query(url):
48 | f = urllib2.Request(url)
49 | f.add_header('User-Agent','Scrobbyl')
50 | try:
51 | f = urllib2.urlopen(f)
52 | except urllib2.URLError, e:
53 | raise
54 |
55 | tree = xml.etree.ElementTree.ElementTree(file=f)
56 | result=_etree_to_dict(tree.getroot())
57 | return result
58 |
59 | def _do_lastfm_post(url, data):
60 | f = urllib2.Request(url)
61 | f.add_header('User-Agent','Scrobbyl')
62 | try:
63 | f = urllib2.urlopen(f, data)
64 | except urllib2.URLError, e:
65 | raise
66 |
67 |
68 | def _do_lastfm_query(type, method,**kwargs):
69 | args = {
70 | "method" : method,
71 | "api_key" : key,
72 | }
73 | for k,v in kwargs.items():
74 | args[k] = v.encode("utf8")
75 | s = ""
76 | for k in sorted(args.keys()):
77 | s+=k+args[k]
78 | s+=secret
79 | if "sk" in args.keys() or "token" in args.keys():
80 | args["api_sig"] = hashlib.md5(s).hexdigest()
81 |
82 | if type == "GET":
83 | url=urlparse.urlunparse(('http',
84 | 'ws.audioscrobbler.com',
85 | '/2.0/',
86 | '',
87 | urllib.urlencode(args),
88 | ''))
89 | return _do_raw_lastfm_query(url)
90 | elif type == "POST":
91 | url=urlparse.urlunparse(('http',
92 | 'ws.audioscrobbler.com',
93 | '/2.0/', '', '', ''))
94 | _do_lastfm_post(url, urllib.urlencode(args))
95 |
96 | def _get_auth_token():
97 | token = _do_lastfm_query("GET", "auth.getToken")
98 | return token["token"][0]
99 |
100 | def _make_session(token):
101 | sess = _do_lastfm_query("GET", "auth.getSession", token=token)
102 | key = sess["session"][0]["key"][0]
103 | fp = open(os.path.expanduser("~/.lastfmsess"), "w")
104 | fp.write(key)
105 | fp.close()
106 | print "session successfully created. thank you."
107 |
108 | def scrobble(artist, track):
109 | # OK, not the real start time. but that'll do
110 | ts = "%d" % (time.time() - 100)
111 | _do_lastfm_query("POST", "track.scrobble", timestamp=ts, artist=artist, track=track, sk=session)
112 |
113 | if __name__=="__main__":
114 | if len(sys.argv) > 1:
115 | if sys.argv[1] == "auth":
116 | t = _get_auth_token()
117 | print "Please open this url then come back to this window: http://www.last.fm/api/auth/?api_key=%s&token=%s" % (key, t)
118 | i = 0
119 | while i < 5:
120 | try:
121 | _make_session(t)
122 | break
123 | except urllib2.HTTPError ,e:
124 | print e.code
125 | time.sleep(10)
126 | else:
127 | scrobble("The New Pornographers", "Failsafe")
128 |
129 |
--------------------------------------------------------------------------------
/parsemp3.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | import StringIO
3 |
4 | versiontbl = [ 2.5, 0, 2, 1 ]
5 | layertbl = [ 0, 3, 2, 1 ]
6 |
7 | bitratetbl = [
8 | [ ],
9 | [ [], # Version 1
10 | [ 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448 ], #l1
11 | [ 0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384 ], #l2
12 | [ 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320 ], #l3
13 | ],
14 | [ [], # Version 2
15 | [ 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256 ], #l1
16 | [ 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160 ], #l2
17 | [ 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160 ], #l3
18 | ],
19 | ]
20 |
21 | sampleratetbl = [
22 | [ ],
23 | [ 44100, 48000, 32000 ],
24 | [ 22050, 24000, 16000 ],
25 | ]
26 |
27 | v2_2_0_to_v2_4_0 = {
28 | "BUF":"RBUF", # Recommended Buffer Size
29 | "CNT":"PCNT", # Play Count
30 | "COM":"COMM", # Comments
31 | "CRA":"AENC", # Audio Encryption
32 | # "CRM":, # Encrypted Meta Frame
33 | # "ETC":, # Equalisation timing codes
34 | "EQU":"EQUA", # Equalisation
35 | "GEO":"GEOB", # General Encapsulation Object
36 | "IPL":"IPLS", # Involved People List
37 | "LNK":"LINK", # Linked Information
38 | "MCI":"MCDI", # Music CD Identifier
39 | "MUL":"MLLT", # MPEG Location Lookup Table
40 | "PIC":"APIC", # Attached Picture
41 | "POP":"POPM", # Popularimeter
42 | "REV":"RVRB", # Reverb
43 | "RVA":"RVAD", # Relative Volume Adjustment
44 | "SLT":"SYLT", # Syncronised Text/Lyrics
45 | "STC":"SYTC", # Synconrised Tempo Codes
46 | "TAL":"TALB", # Album/Movie/Show Title
47 | "TBP":"TBPM", # BPM
48 | "TCM":"TCOM", # Composer
49 | "TCO":"TCON", # Content Type
50 | "TCP":"TCP", # Compilation. DEPRECATED IN 2.4
51 | "TCR":"TCOP", # Copyright Message
52 | # "TDA":, # Textual Data
53 | "TDA":"TDAT", # Date
54 | "TPL":"TDLY", # Playlist Delay
55 | "TEN":"TENC", # Encoded by
56 | "TFT":"TFLT", # File type
57 | "TIM":"TIME", # Time
58 | "TKE":"TKEY", # Initial key
59 | "TLA":"TLAN", # Language(s)
60 | "TLE":"TLEN", # Length
61 | "TMT":"TMED", # Media type
62 | "TOA":"TOPE", # Original artist(s)/performer(s)
63 | "TOF":"TOFN", # Original filename
64 | "TOL":"TOLY", # Original Lyricist(s)/text writer(s)
65 | "TOR":"TORY", # Original release year
66 | "TOT":"TOAL", # Original album/Movie/Show title
67 | "TP1":"TPE1", # Lead artist(s)/Lead performer(s)/Soloist(s)/Performing group
68 | "TP2":"TPE2", # Band/Orchestra/Accompaniment
69 | "TP3":"TPE3", # Conductor/Performer refinement
70 | "TP4":"TPE4", # Interpreted, remixed, or otherwise modified by
71 | "TPA":"TPOS", # Part of a set
72 | "TPB":"TPUB", # Publisher
73 | "TRC":"TSRC", # ISRC (International Standard Recording Code)
74 | "TRD":"TRDA", # Recording dates
75 | "TRK":"TRCK", # Track number/Position in set
76 | "TSI":"TSIZ", # Size
77 | "TSS":"TSSE", # Software/hardware and settings used for encoding
78 | "TT1":"TIT1", # Content group description
79 | "TT2":"TIT2", # Title/Songname/Content description
80 | "TT3":"TIT3", # Subtitle/Description refinement
81 | "TXT":"TEXT", # Lyricist/text writer
82 | "TXX":"TXXX", # User defined text information frame
83 | "TYE":"TYER", # Year
84 | "UFI":"UFID", # Unique file identifier
85 | "ULT":"USLT", # Unsychronized lyric/text transcription
86 | "WAF":"WOAF", # Official audio file webpage
87 | "WAR":"WOAR", # Official artist/performer webpage
88 | "WAS":"WOAS", # Official audio source webpage
89 | "WCM":"WCOM", # Commercial information
90 | "WCP":"WCOP", # Copyright/Legal information
91 | "WPB":"WPUB", # Publishers official webpage
92 | "WXX":"WXXX", # User defined URL link frame
93 | "CM1":"CM1", # Apple proprietary rating/popularity
94 | }
95 |
96 | genres = [
97 | u"Blues",u"Classic Rock",u"Country",u"Dance",u"Disco",u"Funk",
98 | u"Grunge",u"Hip-Hop",u"Jazz",u"Metal",u"New Age",u"Oldies",u"Other",
99 | u"Pop",u"R&B",u"Rap",u"Reggae",u"Rock",u"Techno",u"Industrial",
100 | u"Alternative",u"Ska",u"Death Metal",u"Pranks",u"Soundtrack",
101 | u"Euro-Techno",u"Ambient",u"Trip-Hop",u"Vocal",u"Jazz+Funk",u"Fusion",
102 | u"Trance",u"Classical",u"Instrumental",u"Acid",u"House",u"Game",
103 | u"Sound Clip",u"Gospel",u"Noise",u"Alt. Rock",u"Bass",u"Soul",
104 | u"Punk",u"Space",u"Meditative",u"Instrum. Pop",u"Instrum. Rock",
105 | u"Ethnic",u"Gothic",u"Darkwave",u"Techno-Industu",u"Electronic",
106 | u"Pop-Folk",u"Eurodance",u"Dream",u"Southern Rock",u"Comedy",
107 | u"Cult",u"Gangsta",u"Top 4u",u"Christian Rap",u"Pop/Funk",u"Jungle",
108 | u"Native American",u"Cabaret",u"New Wave",u"Psychadelic",u"Rave",
109 | u"Showtunes",u"Trailer",u"Lo-Fi",u"Tribal",u"Acid Punk",u"Acid Jazz",
110 | u"Polka",u"Retro",u"Musical",u"Rock & Roll",u"Hard Rock",u"Folk",
111 | u"Folk/Rock",u"National Folk",u"Swing",u"Fusion",u"Bebob",u"Latin",
112 | u"Revival",u"Celtic",u"Bluegrass",u"Avantgarde",u"Gothic Rock",
113 | u"Progress. Rock",u"Psychadel. Rock",u"Symphonic Rock",u"Slow Rock",
114 | u"Big Band",u"Chorus",u"Easy Listening",u"Acoustic",u"Humour",
115 | u"Speech",u"Chanson",u"Opera",u"Chamber Music",u"Sonata",u"Symphony",
116 | u"Booty Bass",u"Primus",u"Porn Groove",u"Satire",u"Slow Jam",
117 | u"Club",u"Tango",u"Samba",u"Folklore",u"Ballad",u"Power Ballad",
118 | u"Rhythmic Soul",u"Freestyle",u"Duet",u"Punk Rock",u"Drum Solo",
119 | u"A Capella",u"Euro-House",u"Dance Hall",u"Goa",u"Drum & Bass",
120 | u"Club-House",u"Hardcore",u"Terror",u"Indie",u"BritPop",u"Negerpunk",
121 | u"Polsk Punk",u"Beat",u"Christian Gangsta Rap",u"Heavy Metal",
122 | u"Black Metal",u"Crossover",u"Contemporary Christian",u"Christian Rock",
123 | u"Merengue",u"Salsa",u"Thrash Metal",u"Anime",u"Jpop",u"Synthpop"
124 | ]
125 |
126 | tagconvertfrom={
127 | "v1" : {
128 | "TIT2" : "TIT2",
129 | "TYER" : "TYER",
130 | "TRCK" : "TRCK",
131 | "TPE1" : "TPE1",
132 | "TALB" : "TALB",
133 | "COMM" : "", # Doesn't match exactly with ID3v2
134 | "TCON" : "TCON",
135 | },
136 | "v2" : {
137 | "TYER" : "TYER",
138 | "TIT2" : "TIT2",
139 | "TRCK" : "TRCK",
140 | "TPE1" : "TPE1",
141 | "TALB" : "TALB",
142 | "COMM" : "COMM",
143 | "TCON" : "TCON",
144 | },
145 | "ape" : {
146 | "title" : "TIT2",
147 | "Track" : "TIT2",
148 | "Album" : "TALB",
149 | "album" : "TALB",
150 | "artist": "TPE1",
151 | "Year" : "TYER",
152 | "genre" : "TCON",
153 | },
154 | "lyrics" : {
155 | "IND" : "", # Indication, specific to lyrics v2
156 | "EAL" : "TALB",
157 | "ETT" : "TIT2",
158 | "EAR" : "TPE1",
159 | "INF" : "", # Doesn't match exactly with COMM
160 | "CRC" : "", # The CRC is pretty annoying.
161 | },
162 | }
163 |
164 | tagconvertto={}
165 |
166 | for tagtype in tagconvertfrom:
167 | tagconvertto[tagtype]={}
168 | for tagname in tagconvertfrom[tagtype]:
169 | tagconvertto[tagtype][tagconvertfrom[tagtype][tagname]]=tagname
170 |
171 | def strip_padding(x):
172 | while x.endswith("\x00"):
173 | x=x[:-1]
174 | return x
175 |
176 | def parse_unicode(x):
177 | if x.startswith("\xff\xfe"):
178 | st= x.decode("utf-16-le")
179 | else:
180 | st= x.decode("utf-16-be")
181 | # Strip leading whitespace
182 | while st.startswith(u"\ufeff"):
183 | st=st[1:]
184 | return st
185 |
186 | # How many bloody versions of id3 do we REALLY need?!
187 |
188 | def v1(tag):
189 | assert len(tag)==128-3
190 | try:
191 | data={
192 | "TIT2" : strip_padding(tag[:30]).decode("ISO-8859-1"),
193 | "TPE1" : strip_padding(tag[30:60]).decode("ISO-8859-1"),
194 | "TALB" : strip_padding(tag[60:90]).decode("ISO-8859-1"),
195 | "TYER" : strip_padding(tag[90:94]).decode("ISO-8859-1"),
196 | "COMM" : (u"From ID3v1",strip_padding(tag[94:122]).decode("ISO-8859-1")),
197 | "TRCK" : unicode(ord(tag[123])),
198 | }
199 | if ord(tag[124])!=0xff:
200 | genreid=ord(tag[124])
201 | if genreid in genres:
202 | genrename=genres[genreid]
203 | else:
204 | genrename=u"unknowngenre#%d" % genreid
205 | data["TCON"]=u"("+unicode(ord(tag[124]))+u")"+genrename
206 | return data
207 | except:
208 | print "genres:",`tag[124]`
209 | raise
210 |
211 |
212 | def v2_2_0(tag):
213 | data={}
214 | while tag!="":
215 | if tag[0]=="\x00":
216 | while tag.startswith("\x00"):
217 | tag=tag[1:]
218 | if tag!="":
219 | raise "Err, padding had data in it?"
220 | break
221 |
222 | tagid=tag[:3]
223 | tag=tag[3:]
224 | taglen=ord(tag[0])*128*128+ord(tag[1])*128+ord(tag[2])
225 | tag=tag[3:]
226 | tagdata=tag[:taglen]
227 | tag=tag[taglen:]
228 | if tagid.startswith("T"):
229 | if tagdata[0]=="\x00": # latin-1
230 | tagdata=tagdata[1:]
231 | if "\x00" in tagdata:
232 | tagdata=tagdata[:tagdata.index("\x00")]
233 | tagdata=tagdata.decode("ISO-8859-1")
234 | elif tagdata[0]=="\x01": # utf16
235 | if "\x00" in tagdata:
236 | # This is meant to remove extra null's at the end of
237 | # the string, but it eats into the string.
238 | # tagdata=tagdata[:tagdata.index("\x00")]
239 | pass
240 | tagdata=parse_unicode(tagdata[1:])
241 | else:
242 | raise "Unknown Encoding"
243 | elif tagid=="COM":
244 | encoding=tagdata[0]
245 | commenttype,commentdata=tagdata[1:].split("\x00",1)
246 | if encoding=="\x00":
247 | commenttype=commenttype.decode("ISO-8859-1")
248 | commentdata=commentdata.decode("ISO-8859-1")
249 | elif encoding=="\x01":
250 | commenttype=parse_unicode(commenttype)
251 | commentdata=parse_unicode(commentdata)
252 | else:
253 | raise "Unknown encoding"
254 | tagdata=(commenttype,commentdata)
255 | #print "tag:",`tagid`,"(",v2_2_0_to_v2_4_0[tagid],")"
256 | #print "taglen:",taglen
257 | #print "data:",`tagdata`
258 | if tagid not in v2_2_0_to_v2_4_0:
259 | print "Unknown ID3v2.2.0 tag:",`tagid`
260 | else:
261 | data[v2_2_0_to_v2_4_0[tagid]]=tagdata
262 | return data
263 |
264 | def v2_3_0(tag, version):
265 | data={}
266 | while tag:
267 | if tag[0]=="\x00":
268 | while tag.startswith("\x00"):
269 | tag=tag[1:]
270 | if tag!="":
271 | # mp3ext puts data in here. Ignore it.
272 | #raise "Err, padding had data in it?",`tag`
273 | pass
274 | break
275 | tagid=tag[:4]
276 | if version == 3:
277 | taglen=ord(tag[4])*256*256*256+ord(tag[5])*256*256+ord(tag[6])*256+ord(tag[7])
278 | elif version == 4:
279 | taglen=ord(tag[4])*128*128*128+ord(tag[5])*128*128+ord(tag[6])*128+ord(tag[7])
280 | tagflag=ord(tag[8])*256+ord(tag[9])
281 | tagdata=tag[10:10+taglen]
282 | #print "got tagid %s, taglen is %d (version %s)" % (tagid, taglen, version)
283 | tag=tag[10+taglen:]
284 | if tagid.startswith("T"):
285 | if tagdata[0]=="\x00": # latin-1
286 | #if "\x00" in tagdata:
287 | # tagdata=tagdata[:tagdata.index("\x00")]
288 | tagdata=tagdata[1:].decode("ISO-8859-1")
289 | elif tagdata[0]=="\x01": # utf16
290 | #if "\x00\x00" in tagdata:
291 | # tagdata=tagdata[:tagdata.index("\x00\x00")]
292 | tagdata=parse_unicode(tagdata[1:])
293 | elif tagdata[0]=="\x03": # utf8
294 | #if "\x00" in tagdata:
295 | # tagdata=tagdata[:tagdata.index("\x00")]
296 | tagdata=tagdata[1:].decode("utf8")
297 | else:
298 | raise "Unknown Encoding",`tagdata[0]`
299 | if tagid in data and type(data[tagid]) == type([]):
300 | data[tagid].append(tagdata)
301 | elif tagid in data and data[tagid] != tagdata:
302 | data[tagid] = [data[tagid], tagdata]
303 | else:
304 | data[tagid]=tagdata
305 | return data
306 |
307 | def lyricsv2(data):
308 | assert data.startswith("LYRICSBEGIN"),`data`
309 | data=data[11:]
310 | retdata={}
311 | while data:
312 | tagid=data[:3]
313 | tagsize=int(data[3:8])
314 | tagdata=data[8:8+tagsize]
315 | data=data[8+tagsize:]
316 | retdata[tagid]=tagdata
317 | return retdata
318 |
319 | def apev2(ape):
320 | data={}
321 | assert ape[-32:].startswith("APETAGEX"),`ape`
322 | ape=ape[32:]
323 | while ape!="":
324 | apelen=ord(ape[3])*256*256*256+ord(ape[2])*256*256+ord(ape[ 1])*256+ord(ape[ 0])
325 | apeflg=ord(ape[7])*256*256*256+ord(ape[6])*256*256+ord(ape[ 5])*256+ord(ape[ 4])
326 | apekey,ape=ape[8:].split("\x00",1)
327 | apedata=ape[:apelen]
328 | ape=ape[apelen:]
329 | data[apekey]=apedata
330 | return data
331 |
332 | # Just read id3 tags, skipping the bitstream and any other tag types
333 | def readid3(fname):
334 | f=open(fname,"rb")
335 |
336 | f.seek(-128,2)
337 | id3v1=f.read(128)
338 | flength=f.tell()
339 | if id3v1.startswith("TAG"):
340 | v1data=v1(id3v1[3:])
341 | flength-=128
342 | else:
343 | v1data={}
344 |
345 | f.seek(0)
346 |
347 | # Find a ID3v2 tag
348 | if f.read(3)=="ID3":
349 | id3version = ord(f.read(1))+ord(f.read(1))/256.0
350 | id3flags = ord(f.read(1))
351 | id3len = ord(f.read(1))*128*128*128+ord(f.read(1))*128*128+ord(f.read(1))*128+ord(f.read(1))
352 | #print "ID3 v 2",id3version,"found"
353 | #print "flags:",id3flags
354 | #print "len:",id3len
355 | tag=f.read(id3len)
356 | if int(id3version)==2:
357 | v2data=v2_2_0(tag)
358 | elif int(id3version) in [3,4]:
359 | v2data=v2_3_0(tag, id3version)
360 | else:
361 | print "Unknown tag version ID3v2.",int(id3version)
362 | v2data={}
363 | v2data["version"]=u"ID3v2.%s" % unicode(id3version)
364 | else:
365 | f.seek(0)
366 | v2data={}
367 |
368 | return {
369 | "v2" : v2data,
370 | "v1" : v1data
371 | }
372 |
373 | def parsemp3(fname):
374 | fin=open(fname,"rb")
375 | contents = fin.read()
376 | fin.close()
377 | f = StringIO.StringIO(contents)
378 |
379 | # Decode the ID3v1
380 | f.seek(-128,2)
381 | id3v1=f.read(128)
382 | flength=f.tell()
383 | if id3v1.startswith("TAG"):
384 | v1data=v1(id3v1[3:])
385 | flength-=128
386 | else:
387 | v1data={}
388 |
389 | # Decode as many of the tags at the end that we can
390 | lyricsdata={}
391 | apedata={}
392 | while 1:
393 | # Decode a lyrics v2.0 tag
394 | f.seek(flength-9)
395 | lyrics=f.read(9)
396 | if lyrics=="LYRICS200":
397 | f.seek(flength-9-6)
398 | read=int(f.read(6))
399 | f.seek(flength-(9+read+6))
400 | data=f.read(read)
401 | flength-=(9+6+read)
402 | lyricsdata=lyricsv2(data)
403 | continue
404 |
405 | # Decode a APEv2 tag at the end
406 | f.seek(flength-32)
407 | ape=f.read(32)
408 | if ape.startswith("APETAGEX"):
409 | apever=ord(ape[11])*256*256*256+ord(ape[10])*256*256+ord(ape[ 9])*256+ord(ape[ 8])
410 | apelen=ord(ape[15])*256*256*256+ord(ape[14])*256*256+ord(ape[13])*256+ord(ape[12])
411 | apecnt=ord(ape[19])*256*256*256+ord(ape[18])*256*256+ord(ape[17])*256+ord(ape[16])
412 | apeflg=ord(ape[23])*256*256*256+ord(ape[22])*256*256+ord(ape[21])*256+ord(ape[20])
413 | flength-=32
414 | flength-=apelen
415 |
416 | #print "apever:",hex(apever)
417 | #print "apelen:",apelen
418 | #print "apecnt:",apecnt
419 | #print "apeflg:",hex(apeflg)
420 | f.seek(flength)
421 | apedata=f.read(apelen+32)
422 |
423 | apedata=apev2(apedata)
424 |
425 | continue
426 |
427 | break
428 |
429 | # Goto the start of the file
430 | f.seek(0)
431 |
432 | # Find a ID3v2 tag
433 | if f.read(3)=="ID3":
434 | id3version = ord(f.read(1))+ord(f.read(1))/256.0
435 | id3flags = ord(f.read(1))
436 | id3len = ord(f.read(1))*128*128*128+ord(f.read(1))*128*128+ord(f.read(1))*128+ord(f.read(1))
437 | #print "ID3 v 2",id3version,"found"
438 | #print "flags:",id3flags
439 | #print "len:",id3len
440 | tag=f.read(id3len)
441 | if int(id3version)==2:
442 | v2data=v2_2_0(tag)
443 | elif int(id3version) in [3,4]:
444 | v2data=v2_3_0(tag, id3version)
445 | else:
446 | print "Unknown tag version ID3v2.",int(id3version)
447 | v2data={}
448 | v2data["version"]=u"ID3v2.%s" % unicode(id3version)
449 | else:
450 | f.seek(0)
451 | v2data={}
452 |
453 | # Start decoding the mp3 stream
454 | bitstream=""
455 | frames=0
456 | duration=0
457 | bitrates={}
458 | layers={}
459 | unknowns=[]
460 | unknown=""
461 | errors=[]
462 | offset=f.tell()
463 | while 1:
464 | # skip until we find an mpeg frame header
465 | if f.tell()>=flength:
466 | break
467 | b=f.read(1)
468 | if b=="":
469 | print "Expected",flength-f.tell(),"more bytes!"
470 | break
471 | bitstream+=b
472 | b=ord(b)
473 | if b!=255:
474 | #print "not a header 1",`chr(b)`
475 | unknown+=chr(b)
476 | continue
477 | b=f.read(1)
478 | if b=="":
479 | bitstream=bitstream[:-1] # strip off the incomplete header
480 | print "Truncated header"
481 | break
482 | bitstream+=b
483 | b=ord(b)
484 | if b&0xe0 != 0xe0:
485 | unknown+=chr(255)+chr(b)
486 | continue
487 | if unknown!="":
488 | unknowns.append((offset,unknown))
489 | unknown=""
490 | offset=f.tell()
491 | # Now we've found mpeg header
492 | version=versiontbl[(b>>3)&0x03]
493 | layer=layertbl[(b>>1)&0x03]
494 | crcprotection=(b&0x01)
495 |
496 | #print "version:",version,"layer:",layer
497 |
498 | b=f.read(1)
499 | bitstream+=b
500 | b=ord(b)
501 | try:
502 | bitrate=bitratetbl[version][layer][(b>>4)&0x0F]*1000
503 | except:
504 | errors.append(("error","Unknown bitrate, V%d/%d enc: %d" %
505 | (version,layer,(b>>4)&0x0f)))
506 | continue
507 | try:
508 | samplerate=sampleratetbl[version][(b>>2)&0x03]
509 | except:
510 | errors.append(("error","Unknown samplerate, v%d enc: %d" %
511 | (version,(b>>2)&0x03)))
512 | continue
513 |
514 | padding=(b>>1)&0x01
515 | private=(b&0x01)
516 |
517 | b=f.read(1)
518 | bitstream+=b
519 | b=ord(b)
520 | stereomode=(b>>6)&0x03
521 | modeextension = (b>>4)&0x03
522 | copyright = (b>>3)&0x01
523 | original = (b>>2)&0x01
524 | emphasis = b&0x03
525 |
526 | if bitrate in bitrates:
527 | bitrates[bitrate]+=1
528 | else:
529 | bitrates[bitrate]=1
530 |
531 | # Record the layers used
532 | if version not in layers: layers[version]={}
533 | if layer not in layers[version]: layers[version][layer]=0
534 | layers[version][layer]+=1
535 |
536 | #print "bitrate:",bitrate
537 | #print "samplerate:",samplerate
538 | #print "padding:",padding
539 | #print "private:",private
540 | #print "stereomode:",stereomode
541 | #print "mode extension:",modeextension
542 | #print "copyright:",copyright
543 | #print "original:",original
544 | #print "emphasis:",emphasis
545 |
546 | if layer==1:
547 | framelengthinbytes = (12 * bitrate / samplerate + padding) * 4
548 | else:
549 | framelengthinbytes = 144 * bitrate / samplerate + padding
550 |
551 | # Durations are in milliseconds
552 | if framelengthinbytes == 0 or bitrate == 0:
553 | frameduration = 0
554 | frames+=1
555 | else:
556 | frameduration=framelengthinbytes*8.0*1000/bitrate
557 | duration+=frameduration
558 | frames+=1
559 |
560 | #print "duration:",frameduration
561 | #print "skipping",framelengthinbytes
562 | skip=f.read(min(framelengthinbytes-4,flength-f.tell()))
563 | bitstream+=skip
564 | if len(skip) != framelengthinbytes-4:
565 | #errors.append("Truncated frame, missing %d bytes" % (
566 | # (framelengthinbytes-4)-len(skip)))
567 | break
568 | if unknown!="":
569 | unknowns.append((frames,unknown))
570 | unknown=""
571 |
572 | return {
573 | "filename" : fname,
574 | "duration" : duration,
575 | "frames": frames,
576 | "bitrates":bitrates,
577 | "v1" : v1data,
578 | "v2" : v2data,
579 | "lyrics" : lyricsdata,
580 | "unknown" : unknowns,
581 | "ape" : apedata,
582 | "errors" : errors,
583 | "layers" : layers,
584 | "bitstream" : bitstream,
585 | }
586 |
587 | def validate(song):
588 | errors=[]
589 | for i in ["v1","v2","ape","lyrics"]:
590 | for j in song[i]:
591 | if type(song[i][j])==type(u"") and song[i][j].strip()!=song[i][j]:
592 | errors.append(("warning","%s %s tag has bad whitespace (%s)" % (i,j,`song[i][j]`)))
593 | for j in ["v1","v2","ape","lyrics"]:
594 | # No point in comparing stuff against itself
595 | if i>=j:
596 | continue
597 | for itagname in song[i]:
598 | try:
599 | v2tagname=tagconvertfrom[i][itagname]
600 | except:
601 | print "Unknown tag %s: %s (%s)" % (`i`,`itagname`,song[i][itagname])
602 | continue
603 | # No tag that means this?
604 | if v2tagname=="":
605 | continue
606 | try:
607 | jtagname=tagconvertto[j][v2tagname]
608 | except:
609 | print "Unknown tag %s: %s" % (`j`,`v2tagname`)
610 |
611 | if jtagname not in song[j]:
612 | continue
613 |
614 | itagvalue=song[i][itagname]
615 | jtagvalue=song[j][jtagname]
616 | # v1 is truncated, so ignore the rest of the tag
617 | if i=="v1" and v2tagname in ["TIT2","IDE1","TALB"]:
618 | jtagvalue=jtagvalue[:30]
619 | if j=="v1" and v2tagname in ["TIT2","IDE1","TALB"]:
620 | itagvalue=itagvalue[:30]
621 |
622 | # Track numbers should be treated as numbers
623 | itagvala = 0
624 | itagvalb = 0
625 | jtagvala = 0
626 | jtagvalb = 0
627 | if jtagvalue=="":
628 | jtagvalue="0"
629 | if v2tagname=="TRCK":
630 | # TODO: Deal with x/y
631 | if type(itagvalue) == type(u"") and itagvalue.find("/") != -1:
632 | [itagvala, itagvalb] = itagvalue.split("/")
633 | else:
634 | itagvala = itagvalb = int(itagvalue)
635 | if type(jtagvalue) == type(u"") and jtagvalue.find("/") != -1:
636 | [jtagvala, jtagvalb] = jtagvalue.split("/")
637 | else:
638 | jtagvala = jtagvalb = int(jtagvalue)
639 |
640 | # Do comparisons
641 | if itagvala == jtagvala and itagvalb == jtagvalb:
642 | continue
643 | # Is one tag truncated?
644 | if len(itagvalue)