├── ExamplePlaylist.txt ├── .gitignore ├── test ├── atestframe.py ├── z-README.txt └── test-common.py ├── LICENSE ├── ExamplePlaylist.csv ├── preferences.py ├── README.md ├── ExportLists.py ├── common.py └── ImportList.py /ExamplePlaylist.txt: -------------------------------------------------------------------------------- 1 | ExamplePlaylist.csv -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.csv 2 | *.log 3 | *.pyc 4 | 5 | !ExamplePlaylist.csv 6 | -------------------------------------------------------------------------------- /test/atestframe.py: -------------------------------------------------------------------------------- 1 | # put the parent directory onto the path 2 | from os import sys, path 3 | sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) 4 | import unittest 5 | 6 | def run_test(): 7 | unittest.main(verbosity=2) 8 | -------------------------------------------------------------------------------- /test/z-README.txt: -------------------------------------------------------------------------------- 1 | run tests as regular python executables like so: 2 | python test* 3 | 4 | if you have the coverage.py script installed 5 | the tests can be run with coverage info like so: 6 | python -m coverage run --branch test* 7 | python -m coverage html -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 John Elkins 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /ExamplePlaylist.csv: -------------------------------------------------------------------------------- 1 | ,test comment (and blank track) 2 | 3 | ,test fuzzy artist title search 4 | stray cats stray cat strut 5 | 6 | ,test fuzzy title artist search 7 | just what i needed the cars 8 | 9 | ,test fuzzy search for song that should return a low match 10 | instant karma! we all shine on john lennon 11 | 12 | ,"test detailed library search (this song isn't in aa, but it's in my library)" 13 | classical gas,vanessa-mae 14 | 15 | ,test detailed all access search (the top fuzzy result is incorrect) 16 | am/fm,!!!,strange weather 17 | 18 | ,this should return a low result 19 | back in black,ac/dc,back in black 20 | 21 | ,test detailed search which should return song from library if you have it 22 | orion,metallica,master of puppets 23 | 24 | ,test album distinction and slight artist mismatch 25 | Moments in Love,The Art of Noise,And What Have You Done with My Body God? 26 | Moments in Love,The Art of Noise,Daft 27 | 28 | ,test slight title mismatch 29 | Making Love Out of Nothing at All,Air Supply,Ultimate Air Supply 30 | 31 | ,"test low score, mismatched title, and mistmatched artist, and comma in entry" 32 | Blame It on the Rain,Milli Vanilli,Greatest Hits 33 | 34 | ,"test low score, mismatched title, mismatched artist, mismatched song, and entry comma" 35 | 1o1,Chris Duarte Groop,Ronp 36 | 37 | ,test useless info in brackets and duplicate checks 38 | 1o1 (Live!) [In Concert] {World Tour},Chris Duarte Groop,Ronp 39 | 40 | ,test title only search 41 | Be Thou My Vision,Dallan Forgaill, 42 | 43 | ,test initial unmatched fuzzy with info in brackets 44 | stray cats (asdfDoNotMatchMe1234) stray cat strut 45 | 46 | , 47 | ,expected results 48 | ,13/15 tracks imported 49 | ,2 duplicate tracks 50 | , 51 | 52 | -------------------------------------------------------------------------------- /preferences.py: -------------------------------------------------------------------------------- 1 | 2 | # the username to use 3 | username = 'john.elkins@gmail.com' 4 | 5 | # the separator to use for detailed track information 6 | track_info_separator = u',' 7 | #track_info_separator = u'\\' 8 | #track_info_separator = u'|' 9 | 10 | # the order of the track details 11 | track_info_order = ['title','artist','album','songid'] 12 | #track_info_order = ['title','artist','album','genre','year','durationMillis','playCount','rating','songid'] 13 | 14 | # output debug information to the log 15 | debug = False 16 | 17 | # don't import or export the same song twice 18 | allow_duplicates = False 19 | 20 | # == ImportList.py preferences ============================================== 21 | 22 | # ignore mismatched albums. An album mismatch often doesn't mean the song is 23 | # wrong. This is set to true so that mismatched albums don't scew the results 24 | # and flag too many songs with low scores 25 | ignore_album_mismatch = True 26 | 27 | # search for tracks in the personal library, tracks found there will work 28 | # for you, but if you share your playlist others may not be able to play 29 | # some tracks. Set to false if you want to make sure that your playlist doesn't 30 | # contain any tracks that are not shareable. 31 | search_personal_library = True 32 | 33 | # when unable to locate a track using full details (title,artist,album); perform 34 | # a search using only the song title. this will hopefully find something to 35 | # at least put into the track spot. this is handy for playlists that list the 36 | # composer or songwriter for a song instead of a singer. 37 | search_title_only = True 38 | 39 | # log high matches in addition to the songs that couldn't be found or had 40 | # low matches. 41 | log_high_matches = False 42 | 43 | # export "Thumbs Up" playlist 44 | export_thumbs_up = True 45 | 46 | # export "ALL" playlist 47 | export_all = True 48 | -------------------------------------------------------------------------------- /test/test-common.py: -------------------------------------------------------------------------------- 1 | from atestframe import * 2 | from common import * 3 | 4 | class TestCommon(unittest.TestCase): 5 | 6 | def test_get_csv_fields(self): 7 | """ test that quoted and unquoted fields are being recognized """ 8 | fields = get_csv_fields(u'something,"good",to "eat","like a ""hot""",dog',u',') 9 | self.assertEqual(fields[0],u'something') 10 | self.assertEqual(fields[1],u'good') 11 | self.assertEqual(fields[2],u'to "eat"') 12 | self.assertEqual(fields[3],u'like a "hot"') 13 | self.assertEqual(fields[4],u'dog') 14 | fields = get_csv_fields(u',hello',u',') 15 | self.assertEqual(fields[0],u'') 16 | self.assertEqual(fields[1],u'hello') 17 | fields = get_csv_fields(u'test,"commas, in, the, field"',u',') 18 | self.assertEqual(len(fields),2) 19 | self.assertEqual(fields[0],u'test') 20 | self.assertEqual(fields[1],u'commas, in, the, field') 21 | 22 | def test_handle_quote_input(self): 23 | """ test that quotes are being removed as expected """ 24 | self.assertEqual(handle_quote_input(u''),u'') 25 | self.assertEqual(handle_quote_input(u'a'),u'a') 26 | self.assertEqual(handle_quote_input(u'""'),u'') 27 | self.assertEqual(handle_quote_input(u'""asdf""'),u'"asdf"') 28 | self.assertEqual(handle_quote_input(u'"asdf"'),u'asdf') 29 | 30 | def test_handle_quote_output(self): 31 | """ test that quotes are applied only when needed """ 32 | self.assertEqual(handle_quote_output("nothing to quote"),"nothing to quote") 33 | self.assertEqual(handle_quote_output('this "needs" quoting'),'"this ""needs"" quoting"') 34 | self.assertEqual(handle_quote_output('tsep, in field'),'"tsep, in field"') 35 | 36 | def test_quote_unquote(self): 37 | """ test for verifying the quoting and unquoting that occurs in track values """ 38 | test_values = (("", ""), 39 | ("bog", "bog"), 40 | ("\"bog", "\"\"\"bog\""), 41 | ("\"bog\"", "\"\"\"bog\"\"\""), 42 | ("b\"o\"g", "\"b\"\"o\"\"g\""), 43 | ("\"", "\"\"\"\"")) 44 | for (invalue, expected) in test_values: 45 | actual_out = handle_quote_output(invalue) 46 | self.assertEqual(actual_out, expected) 47 | 48 | actual_in = handle_quote_input(actual_out) 49 | self.assertEqual(actual_in, invalue) 50 | 51 | run_test() 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | gmusic-playlist 2 | =============== 3 | 4 | playlist scripts for gmusic 5 | 6 | ## Prerequisites 7 | 8 | - python 2.7 - https://www.python.org 9 | - gmusicapi - https://github.com/simon-weber/Unofficial-Google-Music-API 10 | 11 | Before using the scripts, open up the preferences.py file and change the username. 12 | 13 | When the scripts are run they will prompt for your password. If you use two factor authentication you will need to create and use an application password. 14 | 15 | ## ExportLists.py 16 | 17 | This script will export all playlists to a given directory as csv files. For the purpose of these scripts CSV stands for character seperated value. The default separator charator is ',' The separator character is configurable in the preferences file. Versions of the code previous to Aug 16 2015 used a '\' separator character as the default. Most spreadsheet apps can open csv files. 18 | 19 | The order in which the artist, album, and title information appears as well as the separating character between each piece of information is configured in the preference.py file. The default order and separator character will output song info as: "title","artist","album","songid" 20 | 21 | The csv files can be re-imported using the ImportList.py script. 22 | 23 | Command Line Usage: python ExportLists.py OutputDir 24 | 25 | OutputDir is a directory you would like the playlists to be output to. 26 | 27 | The export progress will be output to the console and to a log file. At the completion of the export a status of the overal makeup of the playlist will be output. 28 | 29 | ## ImportList.py 30 | 31 | This script will import a given csv file into google music as a playlist. The title of the playlist will be the name of the text file and each track will be matched to each line in the text file. 32 | 33 | Command Line Usage: python ImportList.py ExamplePlaylist.csv 34 | 35 | The progress of the playlist creation will be output to the console and to a log file. Tracks that could not be found are prefixed with !! and tracks that were found but may not be a good match are prefixed with -. One or more of the following will appear after a track with a low match: {A}{a}{T}{s} These markings indicate why the match was low, {A} means the artist didn't match, {T} means the title didn't match, {a} means the album didn't match, and {s} means it had a low result score. In addition to a log file, a csv file is created which contains all tracks found and their associated google music song id. 36 | 37 | The csv file output from the ImportList.py script can be used to fix any song that didn't import correctly. Open the csv file, look for the songs without any song id and see if there is something that you can change in the track info to get google to find the song. Save the file and then re-run it through the ImportList.py script. Since the csv file will contain the song id's for songs it already found it won't need to look those up again and will just focus on finding the songs that don't have id's yet. 38 | 39 | You can also look up the song you want via google music's web interface and get the song id by clicking share > get link. The song id is given in the link. 40 | 41 | ## Playlist files 42 | 43 | The format of each track in a playlist file can either be fuzzy or detailed info. Comments are also supported. 44 | 45 | A fuzzy track is a track that has no separating characters and simply lists a song title, song title and author, or song author and title. See the ExamplePlaylist.csv file for a few examples of fuzzy tracks. Fuzzy tracks will only be matched to all access tracks. If you have a song in a playlist that isn't in all access, but is in your personal library you will need to use a detailed track. 46 | 47 | A detailed track lists title,artist,and album information separated by the separator character and in the order defined in the preferences.py file. The songId is optional, and will be added by the scripts when outputting a csv file. See the ExamplePlaylist.csv file for a few examples of detailed track lists. The album can be left out if not required. 48 | 49 | A comment in a playlist file follows the form of Ccomment where C is the separator character and comment is the comment. See the ExamplePlaylist.csv file. 50 | 51 | ## see also 52 | 53 | [a javascript version](https://github.com/soulfx/gmusic-playlist.js) for doing import / export directly within google music. 54 | -------------------------------------------------------------------------------- /ExportLists.py: -------------------------------------------------------------------------------- 1 | # Author: John Elkins 2 | # License: MIT 3 | 4 | from common import * 5 | 6 | if len(sys.argv) < 2: 7 | log('ERROR output directory is required') 8 | time.sleep(3) 9 | exit() 10 | 11 | # setup the output directory, create it if needed 12 | output_dir = sys.argv[1] 13 | if not os.path.exists(output_dir): 14 | os.makedirs(output_dir) 15 | 16 | # log in and load personal library 17 | api = open_api() 18 | library = load_personal_library() 19 | 20 | def playlist_handler(playlist_name, playlist_description, playlist_tracks): 21 | # skip empty and no-name playlists 22 | if not playlist_name: return 23 | if len(playlist_tracks) == 0: return 24 | 25 | # setup output files 26 | playlist_name = playlist_name.replace('/', '') 27 | open_log(os.path.join(output_dir,playlist_name+u'.log')) 28 | outfile = codecs.open(os.path.join(output_dir,playlist_name+u'.csv'), 29 | encoding='utf-8',mode='w') 30 | 31 | # keep track of stats 32 | stats = create_stats() 33 | export_skipped = 0 34 | # keep track of songids incase we need to skip duplicates 35 | song_ids = [] 36 | 37 | log('') 38 | log('============================================================') 39 | log(u'Exporting '+ unicode(len(playlist_tracks)) +u' tracks from ' 40 | +playlist_name) 41 | log('============================================================') 42 | 43 | # add the playlist description as a "comment" 44 | if playlist_description: 45 | outfile.write(tsep) 46 | outfile.write(playlist_description) 47 | outfile.write(os.linesep) 48 | 49 | for tnum, pl_track in enumerate(playlist_tracks): 50 | track = pl_track.get('track') 51 | 52 | # we need to look up these track in the library 53 | if not track: 54 | library_track = [ 55 | item for item in library if item.get('id') 56 | in pl_track.get('trackId')] 57 | if len(library_track) == 0: 58 | log(u'!! '+str(tnum+1)+repr(pl_track)) 59 | export_skipped += 1 60 | continue 61 | track = library_track[0] 62 | 63 | result_details = create_result_details(track) 64 | 65 | if not allow_duplicates and result_details['songid'] in song_ids: 66 | log('{D} '+str(tnum+1)+'. '+create_details_string(result_details,True)) 67 | export_skipped += 1 68 | continue 69 | 70 | # update the stats 71 | update_stats(track,stats) 72 | 73 | # export the track 74 | song_ids.append(result_details['songid']) 75 | outfile.write(create_details_string(result_details)) 76 | outfile.write(os.linesep) 77 | 78 | # calculate the stats 79 | stats_results = calculate_stats_results(stats,len(playlist_tracks)) 80 | 81 | # output the stats to the log 82 | log('') 83 | log_stats(stats_results) 84 | log(u'export skipped: '+unicode(export_skipped)) 85 | 86 | # close the files 87 | close_log() 88 | outfile.close() 89 | 90 | # the personal library is used so we can lookup tracks that fail to return 91 | # info from the ...playlist_contents() call 92 | 93 | playlist_contents = api.get_all_user_playlist_contents() 94 | 95 | for playlist in playlist_contents: 96 | playlist_name = playlist.get('name') 97 | playlist_description = playlist.get('description') 98 | playlist_tracks = playlist.get('tracks') 99 | 100 | playlist_handler(playlist_name, playlist_description, playlist_tracks) 101 | 102 | if export_thumbs_up: 103 | # get thumbs up playlist 104 | thumbs_up_tracks = [] 105 | for track in library: 106 | if track.get('rating') is not None and int(track.get('rating')) > 1: 107 | thumbs_up_tracks.append(track) 108 | 109 | 110 | # modify format of each dictionary to match the data type 111 | # of the other playlists 112 | thumbs_up_tracks_formatted = [] 113 | for t in thumbs_up_tracks: 114 | thumbs_up_tracks_formatted.append({'track': t}) 115 | 116 | playlist_handler('Thumbs up', 'Thumbs up tracks', thumbs_up_tracks_formatted) 117 | 118 | if export_all: 119 | all_tracks_formatted = [] 120 | for t in library: 121 | all_tracks_formatted.append({'track': t}) 122 | 123 | playlist_handler('All', 'All tracks', all_tracks_formatted) 124 | 125 | close_api() 126 | 127 | -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | # Author: John Elkins 2 | # License: MIT 3 | 4 | __version__ = '0.160530' 5 | 6 | __required_gmusicapi_version__ = '10.0.0' 7 | 8 | from collections import Counter 9 | from gmusicapi import __version__ as gmusicapi_version 10 | from gmusicapi import Mobileclient 11 | from gmusicapi.exceptions import CallFailure 12 | from preferences import * 13 | import re 14 | import time 15 | import getpass 16 | import sys 17 | import os 18 | import codecs 19 | 20 | # the api to use for accessing google music 21 | api = None 22 | 23 | # the logfile for keeping track of things 24 | logfile = None 25 | 26 | # provide a shortcut for track_info_separator 27 | tsep = track_info_separator 28 | 29 | # flag indicating if account is all access capable 30 | allaccess = True 31 | 32 | # check for debug set via cmd line 33 | if '-dDEBUG' in sys.argv: 34 | debug = True 35 | 36 | # check versions 37 | def assert_prerequisites(): 38 | 39 | required = __required_gmusicapi_version__ 40 | actual = gmusicapi_version 41 | 42 | def version(ver): 43 | return int(re.sub(r'\D','',ver)) 44 | 45 | if ( version(actual) < version(required) ): 46 | log("ERROR gmusicapi version of at least "+required+" is required. ") 47 | exit() 48 | 49 | # loads the personal library 50 | def load_personal_library(): 51 | plog('Loading personal library... ') 52 | plib = api.get_all_songs() 53 | log('done. '+str(len(plib))+' personal tracks loaded.') 54 | return plib 55 | 56 | # opens the log for writing 57 | def open_log(filename): 58 | global logfile 59 | logfile = codecs.open(filename, encoding='utf-8', mode='w', buffering=1) 60 | return logfile 61 | 62 | # closes the log 63 | def close_log(): 64 | if logfile: 65 | logfile.close() 66 | 67 | # logs to both the console and log file if it exists 68 | def log(message, nl = True): 69 | if nl: 70 | message += os.linesep 71 | sys.stdout.write(message.encode(sys.stdout.encoding, errors='replace')) 72 | if logfile: 73 | logfile.write(message) 74 | 75 | # logs a message if debug is true 76 | def dlog(message): 77 | if debug: 78 | log(message) 79 | 80 | # logs a progress message (a message without a line return) 81 | def plog(message): 82 | log(message, nl = False) 83 | 84 | # search all access 85 | def aa_search(search_string,max_results): 86 | global allaccess 87 | results = [] 88 | if allaccess: 89 | try: 90 | results = api.search(search_string, 91 | max_results=max_results).get('song_hits') 92 | except CallFailure: 93 | allaccess = False 94 | log('WARNING no all access subscription detected. '+ 95 | ' all access search disabled.') 96 | return results 97 | 98 | # gets the track details available for google tracks 99 | def get_google_track_details(sample_song = 'one u2'): 100 | results = aa_search(sample_song,1) 101 | if len(results): 102 | return (results[0].get('track').keys()) 103 | return "['title','artist','album']" 104 | 105 | # creates result details from the given track 106 | def create_result_details(track): 107 | result_details = {} 108 | for key, value in track.iteritems(): 109 | result_details[key] = value 110 | result_details['songid'] = (track.get('storeId') 111 | if track.get('storeId') else track.get('id')) 112 | return result_details 113 | 114 | # creates details dictionary based off the given details list 115 | def create_details(details_list): 116 | details = {} 117 | details['artist'] = None 118 | details['album'] = None 119 | details['title'] = None 120 | details['songid'] = None 121 | if len(details_list) < 2: 122 | return details 123 | for pos, nfo in enumerate(details_list): 124 | if len(track_info_order) <= pos: 125 | continue 126 | details[track_info_order[pos]] = nfo.strip() 127 | return details 128 | 129 | # split a csv line into it's separate fields 130 | def get_csv_fields(csvString,sepChar=tsep): 131 | fields = [] 132 | fieldValue = u'' 133 | ignoreTsep = False 134 | for c in csvString: 135 | if c == sepChar and not ignoreTsep: 136 | fields.append(handle_quote_input(fieldValue)) 137 | fieldValue = u'' 138 | continue 139 | elif c == '"': 140 | ignoreTsep = (not ignoreTsep) 141 | fieldValue += c 142 | fields.append(handle_quote_input(fieldValue)) 143 | return fields 144 | 145 | # add quotes around a csv field and return the quoted field 146 | def handle_quote_output(aString): 147 | """ See: https://en.wikipedia.org/wiki/Comma-separated_values#Basic_rules_and_examples """ 148 | if aString.find('"') > -1 or aString.find(tsep) > -1: 149 | return '"%s"' % aString.replace('"', '""') 150 | else: 151 | return aString 152 | 153 | # remove the quotes from around a csv field, and return the unquoted field 154 | def handle_quote_input(aString): 155 | if len(aString) > 0 and aString[0] == '"' and aString[-1] == '"': 156 | return aString[1:-1].replace('""', '"') 157 | else: 158 | return aString 159 | 160 | # creates details string based off the given details dictionary 161 | def create_details_string(details_dict, skip_id = False): 162 | out_string = u'' 163 | for nfo in track_info_order: 164 | if skip_id and nfo == 'songid': 165 | continue 166 | if len(out_string) != 0: 167 | out_string += track_info_separator 168 | try: 169 | out_string += handle_quote_output(unicode(details_dict[nfo])) 170 | except KeyError: 171 | # some songs don't have info like year, genre, etc 172 | pass 173 | return out_string 174 | 175 | # logs into google music api 176 | def open_api(): 177 | global api 178 | log('Logging into google music...') 179 | # get the password each time so that it isn't stored in plain text 180 | password = getpass.getpass(username + '\'s password: ') 181 | 182 | api = Mobileclient() 183 | if not api.login(username, password, Mobileclient.FROM_MAC_ADDRESS): 184 | log('ERROR unable to login') 185 | time.sleep(3) 186 | exit() 187 | 188 | password = None 189 | log('Login Successful.') 190 | dlog(u'Available track details: '+str(get_google_track_details())) 191 | return api 192 | 193 | # logs out of the google music api 194 | def close_api(): 195 | if api: 196 | api.logout() 197 | 198 | # creates a stats dictionary 199 | def create_stats(): 200 | stats = {} 201 | stats['genres'] = [] 202 | stats['artists'] = [] 203 | stats['years'] = [] 204 | stats['total_playcount'] = 0 205 | return stats 206 | 207 | # updates the stats dictionary with info from the track 208 | def update_stats(track,stats): 209 | stats['artists'].append(track.get('artist')) 210 | if track.get('genre'): stats['genres'].append(track.get('genre')) 211 | if track.get('year'): stats['years'].append(track.get('year')) 212 | if track.get('playCount'): stats['total_playcount'] += track.get( 213 | 'playCount') 214 | 215 | # calculates stats 216 | def calculate_stats_results(stats,total_tracks): 217 | results = {} 218 | results['genres'] = Counter(stats['genres']) 219 | results['artists'] = Counter(stats['artists']) 220 | results['years'] = Counter(stats['years']) 221 | results['playback_ratio'] = stats['total_playcount']/float(total_tracks) 222 | return results 223 | 224 | # logs the stats results 225 | def log_stats(results): 226 | log(u'top 3 genres: '+repr(results['genres'].most_common(3))) 227 | log(u'top 3 artists: '+repr(results['artists'].most_common(3))) 228 | log(u'top 3 years: '+repr(results['years'].most_common(3))) 229 | log(u'playlist playback ratio: '+unicode(results['playback_ratio'])) 230 | 231 | # display version and check prerequisites 232 | log("gmusic-playlist: "+__version__) 233 | log("gmusicapi: "+gmusicapi_version) 234 | assert_prerequisites(); 235 | -------------------------------------------------------------------------------- /ImportList.py: -------------------------------------------------------------------------------- 1 | # Author: John Elkins 2 | # License: MIT 3 | 4 | import re 5 | import datetime 6 | import math 7 | import time 8 | from common import * 9 | 10 | # the file for outputing the information google has one each song 11 | csvfile = None 12 | 13 | # cleans up any open resources 14 | def cleanup(): 15 | if csvfile: 16 | csvfile.close() 17 | close_log() 18 | close_api() 19 | 20 | # compares two strings based only on their characters 21 | def s_in_s(string1,string2): 22 | if not string1 or not string2: 23 | return False 24 | s1 = re.compile('[\W_]+', re.UNICODE).sub(u'',string1.lower()) 25 | s2 = re.compile('[\W_]+', re.UNICODE).sub(u'',string2.lower()) 26 | 27 | return s1 in s2 or s2 in s1 28 | 29 | # sleeps a little bit after printing message before exiting 30 | def delayed_exit(message): 31 | log(message) 32 | time.sleep(5) 33 | cleanup() 34 | exit() 35 | 36 | # add the song 37 | def add_song(details,score): 38 | (result_score,score_reason) = score 39 | 40 | if ('+' in result_score and log_high_matches) or '-' in result_score: 41 | log(result_score+track+score_reason+u' #'+str(len(song_ids))) 42 | log (u' ' + create_details_string(details, True)) 43 | 44 | if not allow_duplicates and details['songid'] in song_ids: 45 | return 46 | 47 | song_ids.append(details['songid']) 48 | csvfile.write(create_details_string(details)) 49 | csvfile.write(os.linesep) 50 | 51 | # log an unmatched track 52 | def log_unmatched(track): 53 | global no_matches 54 | log(u'!! '+track) 55 | csvfile.write(track) 56 | csvfile.write(os.linesep) 57 | no_matches += 1 58 | 59 | # search for the song with the given details 60 | def search_for_track(details): 61 | search_results = [] 62 | dlog('search details: '+str(details)) 63 | 64 | # search the personal library for the track 65 | lib_album_match = False 66 | if details['artist'] and details['title'] and search_personal_library: 67 | lib_results = [item for item in library if 68 | s_in_s(details['artist'],item.get('artist')) 69 | and s_in_s(details['title'],item.get('title'))] 70 | dlog('lib search results: '+str(len(lib_results))) 71 | for result in lib_results: 72 | if s_in_s(result['album'],details['album']): 73 | lib_album_match = True 74 | item = {} 75 | item[u'track'] = result 76 | item[u'score'] = 200 77 | search_results.append(item) 78 | 79 | # search all access for the track 80 | if not lib_album_match: 81 | query = u'' 82 | if details['artist']: 83 | query = details['artist'] 84 | if details['title']: 85 | query += u' ' + details['title'] 86 | if not len(query): 87 | query = track 88 | dlog('aa search query:'+query) 89 | aa_results = aa_search(query,7) 90 | dlog('aa search results: '+str(len(aa_results))) 91 | search_results.extend(aa_results) 92 | 93 | if not len(search_results): 94 | return None 95 | 96 | top_result = search_results[0] 97 | # if we have detailed info, perform a detailed search 98 | if details['artist'] and details['title']: 99 | search_results = [item for item in search_results if 100 | s_in_s(details['title'],item['track']['title']) 101 | and s_in_s(details['artist'],item['track']['artist'])] 102 | if details['album']: 103 | search_results = [item for item in search_results if 104 | s_in_s(details['album'],item['track']['album'])] 105 | dlog('detail search results: '+str(len(search_results))) 106 | if len(search_results) != 0: 107 | top_result = search_results[0] 108 | 109 | return top_result 110 | 111 | # match score stats 112 | no_matches = 0 113 | low_scores = 0 114 | low_titles = 0 115 | low_artists = 0 116 | track_count = 0 117 | duplicates = 0 118 | 119 | # score the match against the query 120 | def score_track(details,result_details,top_score = 200): 121 | global low_scores 122 | global low_titles 123 | global low_artists 124 | global duplicates 125 | 126 | # check for low quality matches 127 | result_score = u' + ' 128 | score_reason = u' ' 129 | is_low_result = False 130 | if top_score < 120: 131 | score_reason += u'{s}' 132 | #low scores alone don't seem to me a good indication of an issue 133 | #is_low_result = True 134 | # wrong song 135 | if ((details['title'] 136 | and not s_in_s(details['title'],result_details['title'])) 137 | or (not details['title'] 138 | and not s_in_s(track,result_details['title']))): 139 | score_reason += u'{T}' 140 | low_titles += 1 141 | is_low_result = True 142 | # wrong album 143 | if (details['album'] and not ignore_album_mismatch 144 | and not s_in_s(details['album'],result_details['album'])): 145 | score_reason += u'{a}' 146 | is_low_result = True 147 | # wrong artist 148 | if (details['artist'] 149 | and not s_in_s(details['artist'],result_details['artist'])): 150 | score_reason += u'{A}' 151 | low_artists += 1 152 | is_low_result = True 153 | # duplicate song 154 | if not allow_duplicates and result_details['songid'] in song_ids: 155 | score_reason += u'{D}' 156 | duplicates += 1 157 | is_low_result = True 158 | 159 | if is_low_result: 160 | result_score = u' - ' 161 | low_scores += 1 162 | 163 | return (result_score,score_reason) 164 | 165 | # check to make sure a filename was given 166 | if len(sys.argv) < 2: 167 | delayed_exit(u'ERROR input filename is required') 168 | 169 | 170 | # setup the input and output filenames and derive the playlist name 171 | input_filename = sys.argv[1].decode('utf-8') 172 | output_filename = os.path.splitext(input_filename)[0] 173 | output_filename = re.compile('_\d{14}$').sub(u'',output_filename) 174 | playlist_name = os.path.basename(output_filename) 175 | 176 | output_filename += u'_' + unicode(datetime.datetime.now().strftime( 177 | '%Y%m%d%H%M%S')) 178 | log_filename = output_filename + u'.log' 179 | csv_filename = output_filename + u'.csv' 180 | 181 | #open the log and output csv files 182 | csvfile = codecs.open(csv_filename, encoding='utf-8', mode='w', buffering=1) 183 | open_log(log_filename) 184 | 185 | # read the playlist file into the tracks variable 186 | tracks = [] 187 | plog('Reading playlist... ') 188 | with codecs.open(input_filename, encoding='utf-8', mode='r', errors='ignore') as f: 189 | tracks = f.read().splitlines() 190 | log('done. '+str(len(tracks))+' lines loaded.') 191 | 192 | # log in and load personal library 193 | api = open_api() 194 | library = load_personal_library() 195 | 196 | # begin searching for the tracks 197 | log('===============================================================') 198 | log(u'Searching for songs from: '+playlist_name) 199 | log('===============================================================') 200 | 201 | 202 | # gather up the song_ids and submit as a batch 203 | song_ids = [] 204 | 205 | # collect some stats on the songs 206 | stats = create_stats() 207 | 208 | # time how long it takes 209 | start_time = time.time() 210 | 211 | # loop over the tracks that were read from the input file 212 | for track in tracks: 213 | 214 | # skip empty lines 215 | if not track: 216 | continue 217 | 218 | # parse the track info if the line is in detail format 219 | details_list = get_csv_fields(track) 220 | details = create_details(details_list) 221 | 222 | # skip comment lines 223 | if len(details_list) == 2 and not details_list[0]: 224 | log(details_list[1]) 225 | csvfile.write(tsep) 226 | csvfile.write(details_list[1]) 227 | csvfile.write(os.linesep) 228 | continue 229 | 230 | # skip empty details records 231 | if (len(details_list) >= 3 and not details['artist'] 232 | and not details['album'] and not details['title']): 233 | continue 234 | 235 | # at this point we should have a valid track 236 | track_count += 1 237 | 238 | # don't search if we already have a track id 239 | if details['songid']: 240 | add_song(details,score_track(details,details)) 241 | continue 242 | 243 | # search for the song 244 | search_result = search_for_track(details) 245 | 246 | # a details dictionary we can use for 'smart' searching 247 | smart_details = {} 248 | smart_details['title'] = details['title'] 249 | smart_details['artist'] = details['artist'] 250 | smart_details['album'] = details['album'] 251 | 252 | if not details['title']: 253 | smart_details['title'] = track 254 | 255 | # if we didn't find anything strip out any (),{},[],<> from title 256 | match_string = '\[.*?\]|{.*?}|\(.*?\)|<.*?>' 257 | if not search_result and re.search(match_string,smart_details['title']): 258 | dlog('No results found, attempting search again with modified title.') 259 | smart_details['title'] = re.sub(match_string,'',smart_details['title']) 260 | search_result = search_for_track(smart_details) 261 | 262 | # if there isn't a result, try searching for the title only 263 | if not search_result and search_title_only: 264 | dlog('Attempting to search for title only') 265 | smart_details['artist'] = None 266 | smart_details['album'] = None 267 | smart_details['title_only_search'] = True 268 | search_result = search_for_track(smart_details) 269 | 270 | # check for a result 271 | if not search_result: 272 | log_unmatched(track) 273 | continue 274 | 275 | # gather up info about result 276 | result = search_result.get('track') 277 | result_details = create_result_details(result) 278 | result_score = score_track(details,result_details, 279 | search_result.get('score')) 280 | 281 | # if the song title doesn't match after a title only search, skip it 282 | (score,reason) = result_score 283 | if '{T}' in reason and 'title_only_search' in smart_details: 284 | log_unmatched(track) 285 | continue 286 | 287 | update_stats(result,stats) 288 | 289 | # add the song to the id list 290 | add_song(result_details,result_score) 291 | 292 | total_time = time.time() - start_time 293 | 294 | log('===============================================================') 295 | log(u'Adding '+unicode(len(song_ids))+' found songs to: '+playlist_name) 296 | log('===============================================================') 297 | 298 | # add the songs to the playlist(s) 299 | max_playlist_size = 1000 300 | current_playlist = 1 301 | total_playlists_needed = int(math.ceil(len(song_ids)/float(max_playlist_size))) 302 | while current_playlist <= total_playlists_needed: 303 | # build the playlist name, add part number if needed 304 | current_playlist_name = playlist_name 305 | if total_playlists_needed > 1: 306 | current_playlist_name += u' Part ' + unicode(current_playlist) 307 | 308 | # create the playlist and add the songs 309 | playlist_id = api.create_playlist(current_playlist_name) 310 | current_playlist_index = ( current_playlist - 1 ) * max_playlist_size 311 | current_songs = song_ids[current_playlist_index : 312 | current_playlist_index + max_playlist_size] 313 | 314 | added_songs = api.add_songs_to_playlist(playlist_id,current_songs) 315 | 316 | log(u' + '+current_playlist_name+u' - '+unicode(len(added_songs))+ 317 | u'/'+unicode(len(current_songs))+' songs') 318 | 319 | # go to the next playlist section 320 | current_playlist += 1 321 | 322 | # log a final status 323 | no_match_ratio = float(no_matches) / track_count if track_count else 0 324 | low_score_ratio = float(low_scores) / track_count if track_count else 0 325 | low_artists_ratio = float(low_artists) / low_scores if low_scores else 0 326 | low_titles_ratio = float(low_titles) / low_scores if low_scores else 0 327 | found_ratio = 1 - no_match_ratio - low_score_ratio 328 | 329 | log('===============================================================') 330 | log(' ' + str(len(song_ids)) + '/' + str(track_count) + ' tracks imported') 331 | log(' ! ' + str(no_match_ratio*100) + '% of tracks could not be matched') 332 | log(' - ' + str(low_score_ratio*100) + '% of tracks had low match scores') 333 | log(' {T} ' + str(low_titles) 334 | + ' low matches were due to a song title mismatch') 335 | log(' {A} ' + str(low_artists) 336 | + ' low matches were due to song artist mismatch') 337 | if not allow_duplicates: 338 | log (' {D} ' + str(duplicates) 339 | + ' duplicates were found and skipped') 340 | log(' + ' + str(found_ratio*100) + '% of tracks had high match scores') 341 | log('') 342 | stats_results = calculate_stats_results(stats,len(song_ids)) 343 | log_stats(stats_results) 344 | 345 | log('\nsearch time: '+str(total_time)) 346 | 347 | cleanup() 348 | 349 | --------------------------------------------------------------------------------