├── .gitignore ├── README.md ├── img ├── example.png └── mirrorfulLogo.png ├── requirements.txt └── visualizeSongs.py /.gitignore: -------------------------------------------------------------------------------- 1 | **p3_env 2 | 3 | .env 4 | 5 | .DS_Store 6 | 7 | 8 | 9 | *.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🎵 Setlist Visualizer 2 | 3 | ![graph](./img/example.png) 4 | 5 | ### Introduction 6 | 7 | Python script that scrapes setlistfm for a given band and visualizes their setlists for a given tour. 8 | 9 | _Setlist Visualizer is one of the first things I ever "programmed." There's cool functionality here that could be improved upon. I come back to it every few years after my favorite artists go on tour._ 10 | 11 | ## Usage: 12 | 13 | ### 1. Install dependencies: 14 | 15 | ``` 16 | mkdir p3_env; python3 -m venv p3_env; source p3_env/bin/activate; pip3 install -r requirements.txt 17 | ``` 18 | 19 | ### 2. Change constants: 20 | 21 | At the top of the script, change the relevant constants. 22 | 23 | ### 3. Then, get the data in xlsx format 24 | 25 | `python3 visualizeSongs.py` 26 | 27 | ### 4. Finally, visualize the data once you have it 28 | 29 | `python3 visualizeSongs.py` 30 | 31 | ### Acknowledgements: 32 | 33 | Author of original setlist.fm scraping: the talented **Ryan Lee Watts** 34 | 35 | Github: https://github.com/ryanleewatts 36 | 37 | Script: https://github.com/ryanleewatts/coding-project/blob/master/scraper/SetlistScript.py 38 | 39 | ## Sponsored by 40 | 41 | Setlist Visualizer is proudly sponsored by [Magic Patterns](https://www.magicpatterns.com/), an AI design tool. 42 | -------------------------------------------------------------------------------- /img/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexdanilowicz/Setlist-Visualizer/9afc46400c7a67626886cec63803db936178ea09/img/example.png -------------------------------------------------------------------------------- /img/mirrorfulLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexdanilowicz/Setlist-Visualizer/9afc46400c7a67626886cec63803db936178ea09/img/mirrorfulLogo.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.8.0 2 | bs4==0.0.1 3 | certifi==2023.7.22 4 | chardet==3.0.4 5 | contourpy==1.0.5 6 | cycler==0.10.0 7 | et-xmlfile==1.0.1 8 | fonttools==4.37.3 9 | idna==2.8 10 | jdcal==1.4.1 11 | kiwisolver==1.1.0 12 | lxml==4.4.1 13 | matplotlib==3.6.0 14 | numpy==1.23.3 15 | openpyxl==3.0.7 16 | packaging==21.3 17 | pandas==1.5.0 18 | Pillow==10.0.1 19 | pyparsing==2.4.2 20 | python-dateutil==2.8.2 21 | pytz==2022.2.1 22 | requests==2.31.0 23 | six==1.12.0 24 | soupsieve==1.9.3 25 | urllib3==1.26.18 26 | xlrd==1.2.0 27 | -------------------------------------------------------------------------------- /visualizeSongs.py: -------------------------------------------------------------------------------- 1 | # Author of original setlistfm scraping: ryanleewatts 2 | # Github: https://github.com/ryanleewatts 3 | # Original script: https://github.com/ryanleewatts/coding-project/blob/master/scraper/SetlistScript.py 4 | # 5 | # Example Usage: 6 | # python3 visualizeSongs.py 7 | # 8 | # Author: Alex Danilowicz 9 | # Wrote for fun as a summer personal project. 10 | # Started as just a way to see what Radiohead songs would be played... 11 | # 12 | # First run it with scrape(), then once the file is created, run it with visualize() 13 | 14 | from bs4 import BeautifulSoup as bs 15 | import requests 16 | import pandas as pd 17 | from matplotlib.pyplot import cm 18 | import matplotlib.pyplot as plt 19 | import matplotlib.ticker as mtick 20 | import numpy as np 21 | import random 22 | from collections import defaultdict 23 | from pathlib import Path 24 | 25 | # THINGS YOU MUST CHANGE 26 | ARTIST = "The National" 27 | UNIQUE = "the-national-53d69b79.html" # MAKE SURE TO INCLUDE .html part. This is unique for the band 28 | URL_TO_STOP_AT = "/auditorium-theatre-chicago-il-53b9d7a9.html" # Note: get rid of HTTPS part 29 | URL_TO_START_AT = "html" # this url will be the first one to be scraped. If you want first one, put in nothing or html 30 | CUSTOM = True # if true, be sure to define returnCustomAlbumDict 31 | TOUR_DEFINITION = "/setlist/the-national/2023" 32 | 33 | # OPTIONAL THINGS TO CHANGE 34 | YEAR = "2023" 35 | SORT_ALBUM = False # toggle if you want to sort by album or not. if false, sorts by count 36 | FILE = ARTIST + "-Data" + "-" + YEAR +".xlsx" # filename 37 | TITLE = "The National's 2023 Tour, so far... " 38 | SONGS_TO_IGNORE = ["I Can't Forget"] 39 | MAX_PAGES = 10000 # max to scrape, not used if URL_TO_STOP set properly 40 | FONT_SIZE_TICKS = 8 41 | FONT_Y = 10 # for labels 42 | color_album_dict = {} 43 | 44 | def scrape(): 45 | if ('.html' not in UNIQUE): 46 | raise Exception("You must add .html to the unique var otherwise you will stuck in an infinite loop") 47 | 48 | UNIQUE_URL = "https://www.setlist.fm/setlists/" + UNIQUE + "?page=" 49 | SONG_URL = "https://www.setlist.fm/stats/songs/" + UNIQUE + "?song=" # notice difference: /stats/ 50 | visited = {} # key song, album is value 51 | links = [] 52 | dm = [] 53 | my_file = Path("./" + FILE) 54 | if not my_file.is_file(): # only do scraping if file doesn't exist already 55 | break_bool = False 56 | start = False 57 | for i in range(MAX_PAGES): 58 | if break_bool: 59 | break 60 | url = UNIQUE_URL + str(i + 1) 61 | print('🎉 Page ', url) 62 | r = requests.get(url) 63 | soup = bs(r.content, "lxml") 64 | #print('🍲 Soup', soup) 65 | for link in soup.find_all('a', class_='summary url'): 66 | setlist = (link.get('href')) 67 | completeurl = 'http://www.setlists.fm' + setlist[2:] 68 | if URL_TO_START_AT in completeurl: 69 | start = True 70 | if start: 71 | print("Considering url: ", completeurl) 72 | if completeurl not in links: 73 | if TOUR_DEFINITION in completeurl: 74 | print("ℹ️ Getting url: " + completeurl) # print the output 75 | links.append(completeurl) 76 | # stop at this url 77 | if URL_TO_STOP_AT in completeurl: 78 | break_bool = True 79 | break # stop at this setlist 80 | 81 | # Scrape every url in that list 82 | for item in links: 83 | print('🎹 Looking at setlist for: ', item) 84 | # 1. Scrape the date 85 | r = requests.get(item) 86 | soup = bs(r.content, "lxml") 87 | for datehtml in soup.find_all('em', class_='link', text=True): 88 | date = datehtml.text[:-7] 89 | date = date.partition(",")[0] 90 | date = date.replace(" ", "") 91 | 92 | # 2. Scrape the setlist 93 | songs = [] 94 | for songHTML in soup.find_all('div', class_='songPart'): 95 | songstext = songHTML.text 96 | # hardcoded these, cause too lazy to put into list 97 | # skip the intro/outro songs that are always there 98 | thesong = songstext.encode('utf-8').rstrip().strip().decode("utf-8") 99 | if thesong not in SONGS_TO_IGNORE: 100 | songs.append(thesong) 101 | 102 | #3. Scrape the album 103 | for song in songs: 104 | if str(song) not in visited: 105 | print('Getting the song 🎵', song) 106 | # hardcoded this one for Radiohead cause url format is wonky, can fix later 107 | if "2 + 2 = 5" in song: 108 | r = requests.get("https://www.setlist.fm/stats/songs/radiohead-bd6bd12.html?song=" + "2+%2B+2+%3D+5") 109 | else: 110 | r = requests.get(SONG_URL + song) 111 | soup = bs(r.content, "lxml") 112 | 113 | thenext = False 114 | for album in soup.find_all('span'): 115 | if thenext: 116 | thealbum = album.text 117 | thealbum = thealbum.replace("(Album)", "") 118 | thealbum = thealbum.replace("(Single)", "") 119 | thealbum = thealbum.strip("'") 120 | thealbum = thealbum.rstrip() 121 | # harcoded but you could just see if the key is in th 122 | # if thealbum not in returnCustomAlbumDict(color_album_dict): 123 | # thealbum = "Other" 124 | if (str(song) == "Deep End"): 125 | visited[str(song)] = "New Album" 126 | if (str(song) == "Space Invader (Threaded Gold)"): 127 | visited[str(song)] = "New Album" 128 | if (str(song) == "Coat on a Hook"): 129 | visited[str(song)] = "New Album" 130 | if (str(song) == "Tour Manager"): 131 | visited[str(song)] = "New Album" 132 | else: 133 | visited[str(song)] = thealbum 134 | break 135 | if album.text == "From the release": # album name falls under this span 136 | thenext = True 137 | else: 138 | thealbum = visited[str(song)] 139 | 140 | try: 141 | dm.append([date, song, thealbum]) 142 | except: 143 | print("skipping over this song") 144 | 145 | df = pd.DataFrame(dm, columns=['Date', 'Track', 'Album']) 146 | 147 | df.to_excel(FILE, index=False) 148 | else: 149 | visualize_album() 150 | def return_original_df(): 151 | return pd.read_excel(FILE, sheet_name="Sheet1") 152 | 153 | def create_clean_df(): 154 | df = return_original_df() 155 | total_df = df.copy() 156 | count = len(total_df['Track'].unique()) 157 | print(count) 158 | total = len(total_df['Date'].unique()) 159 | 160 | albums = df[['Track', 'Album']] 161 | # put track as index, date in row 162 | unique_df = df.groupby(df['Track']).nunique() # get count 163 | # clean up and rename 164 | del unique_df['Album'] 165 | unique_df = unique_df.rename(columns={'Date': 'Count_Played'}) 166 | 167 | # merge with albums df, I assume there's a better way... 168 | albums = albums.set_index('Track') 169 | albums = albums[~albums.index.duplicated(keep='first')] 170 | unique_df = pd.merge(unique_df, albums, left_index=True, right_index=True) 171 | 172 | return (total, unique_df) 173 | 174 | def visualize_album(): 175 | (total, unique_df) = create_clean_df() 176 | album_df = unique_df.copy() 177 | color_album_dict = return_color_album_dict(album_df['Album'].unique().tolist()) 178 | 179 | unique_df['Album'] = pd.Categorical(unique_df['Album'], color_album_dict.keys()) # order it by dictionary 180 | 181 | if SORT_ALBUM: # sort by album, then count 182 | unique_df = unique_df.sort_values(['Album', 'Count_Played'], ascending=True) 183 | ORDERCOUNT = "" 184 | else: 185 | ORDERCOUNT = "-OrderedByCount" 186 | unique_df = unique_df.sort_values(['Count_Played', 'Album'], ascending=True) 187 | 188 | # convert to percentages 189 | unique_df['Frequency'] = unique_df['Count_Played'].div(total).multiply(100) 190 | 191 | c = [] 192 | l = unique_df['Album'].tolist() 193 | for val in l: 194 | c.append(color_album_dict[val]) 195 | # https://github.com/pandas-dev/pandas/issues/16822#issuecomment-1257284602 196 | ax = unique_df.plot.barh(y='Frequency', color=c) 197 | 198 | 199 | format(ax, color_album_dict, total, unique_df) 200 | 201 | plt.savefig("./Visual-" + ARTIST + ORDERCOUNT + YEAR + ".png", format='png', dpi=1200) 202 | plt.show() 203 | 204 | def format(ax, color_dict, total, df): 205 | plt.rcParams["font.family"] = "Helvetica" 206 | hfont = {'fontname':'Helvetica'} 207 | 208 | # The following two lines generate custom fake lines that will be used as legend entries: 209 | markers = [plt.Line2D([0,0],[0,0],color=color, marker='o', linestyle='') for color in color_dict.values()] 210 | plt.legend(markers, color_dict.keys(), numpoints=1, fontsize='7') 211 | 212 | # formatting labels 213 | ax.set_xlabel("Frequency" + " (" + str(total) + " concerts total)", **hfont) 214 | fmt = '%.0f%%' # Format you want the ticks, e.g. '40%' 215 | xticks = mtick.FormatStrFormatter(fmt) 216 | ax.xaxis.set_major_formatter(xticks) 217 | 218 | plt.xticks(fontname='Helvetica') 219 | 220 | 221 | ax.set_ylabel("Track (" + str(len(df.index)) + ")", **hfont) 222 | plt.yticks(fontsize=FONT_Y, fontname='Helvetica') 223 | plt.title(TITLE, fontsize=16, **hfont) 224 | plt.figtext(0.5, 0.01, 'Created by github.com/alexdanilowicz/Setlist-Visualizer', wrap=True, horizontalalignment='center', fontsize=7, **hfont) 225 | 226 | 227 | for i in ax.patches: 228 | ax.text(i.get_width()+.3, i.get_y()+.38, str(round((i.get_width()*total/100), 1)).replace(".0", ""), fontsize=FONT_SIZE_TICKS, color='dimgrey', **hfont) 229 | 230 | ax.invert_yaxis() 231 | plt.tight_layout() 232 | 233 | 234 | # helper function to sort by date 235 | def sorting(date): 236 | string = ''.join(x for x in date if x.isdigit()) 237 | return int(string) 238 | 239 | def return_color_album_dict(albums_list): 240 | color_album_dict = {} 241 | 242 | # HERE YOU CAN SPECIFY ALBUM COLORS SO THEY FIT YOUR ARTIST's ALBUM ARTWORK 243 | # Make sure you catch everything, or it will not map properly 244 | if CUSTOM: 245 | color_album_dict = returnCustomAlbumDict(color_album_dict) 246 | 247 | else: # otherwise, just get random ugly hex colors 248 | number_of_colors = len(albums_list) 249 | random_hex_list = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)]) 250 | for i in range(number_of_colors)] 251 | 252 | i = 0 253 | for album in albums_list: 254 | color_album_dict[album] = str(random_hex_list[i]) 255 | i += 1 256 | 257 | return color_album_dict 258 | 259 | def returnCustomAlbumDict(color_album_dict): 260 | color_album_dict["Boxer"] = '#79B791' #lime green 261 | color_album_dict["Trouble Will Find Me"] = '#96C9DC' #sky blue 262 | color_album_dict["Alligator"] = '#F06C9B' # dark pink 263 | color_album_dict["Cherry Tree"] = '#F5D491' # yellow 264 | color_album_dict["High Violet"] = '#666A86' # purplish 265 | color_album_dict["Sleep Well Beast"] = '#333333' # grey 266 | color_album_dict["I Am Easy to Find"] = '#61A0AF' # dark blue 267 | color_album_dict["First Two Pages of Frankenstein"] = '#D7C2D1' # pink 268 | color_album_dict["New Album"] = '#FFA500' # orange 269 | color_album_dict["Sad Songs for Dirty Lovers"] = '#880808' # red 270 | 271 | return color_album_dict 272 | 273 | if __name__ == "__main__": 274 | scrape() 275 | --------------------------------------------------------------------------------