├── .env ├── .gitattributes ├── README.md ├── src ├── __pycache__ │ └── funkyFunctions.cpython-310.pyc ├── main.py ├── funkyFunctions.py ├── countryJUP.ipynb ├── main2.ipynb └── rapJUP.ipynb ├── LICENSE ├── csv ├── country_ERA.csv └── rap_ERA.csv └── test.ipynb /.env: -------------------------------------------------------------------------------- 1 | GENUIS_KEY = 7EkiVg_CBSoY7qz85XBEEZUPaY9TZlvUuW-X0N7nqfl8JeVqtb6YZuJuR2oFzRzA -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Read the Full Where is the Love? Article on my Website 2 | https://calebklinger.netlify.app/whereisthelove 3 | -------------------------------------------------------------------------------- /src/__pycache__/funkyFunctions.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TrippingLettuce/Where-is-the-love/main/src/__pycache__/funkyFunctions.cpython-310.pyc -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Caleb Klinger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | from lyricsgenius import Genius 5 | from dotenv import load_dotenv 6 | import funkyFunctions 7 | 8 | load_dotenv() # Load environment variables from the .env file 9 | 10 | genius_key = os.getenv('GENUIS_KEY') 11 | genius = Genius(genius_key,timeout=15) 12 | genius.verbose = False 13 | genius.remove_section_headers = True 14 | genius.excluded_terms = ["(Remix)", "(Live)"] 15 | 16 | #Define Dataframes 17 | #csv_start = pd.read_csv('/home/lettuce/MyCode/Statistical-Analysis-of-Music-Genres/src/rap_ERA.csv') 18 | 19 | 20 | 21 | #Get data 22 | #funkyFunctions.getData(csv_start) 23 | #Clean Data 24 | csv_mid = pd.read_csv('/home/lettuce/MyCode/pandasproject/rap_mid1.csv') 25 | funkyFunctions.cleanData(csv_mid) 26 | #Organize Data 27 | #csv_mid = pd.read_csv('/home/lettuce/MyCode/pandasproject/rap_mid2.csv') 28 | #funkyFunctions.organizeDataTotal(csv_mid) 29 | #Orgaize Data Artist 30 | #csv_final = pd.read_csv('/home/lettuce/MyCode/pandasproject/rap_end.csv') 31 | #funkyFunctions.organizeDataArtist(csv_mid, csv_final) 32 | 33 | #Anaylize Data 34 | 35 | 36 | #Eras of rap 37 | 38 | 39 | # Take 50 top hip hop and 50 top rap combo 40 | 41 | #TOp 100 country 42 | 43 | #raper_df = pd.read_csv('/home/lettuce/MyCode/pandasproject/raper_name.csv') 44 | 45 | #print(raper_df.head().to_string()) -------------------------------------------------------------------------------- /csv/country_ERA.csv: -------------------------------------------------------------------------------- 1 | Country Name,Era 2 | Johnny Cash,1960s 3 | Hank Williams,1950s 4 | George Strait,1980s 5 | Merle Haggard,1970s 6 | Alan Jackson,1990s 7 | Waylon Jennings,1970s 8 | George Jones,1960s 9 | Willie Nelson,1970s 10 | Conway Twitty,1960s 11 | Randy Travis,1980s 12 | Dolly Parton,1970s 13 | Patsy Cline,1960s 14 | "Hank Williams, Jr.",1980s 15 | Brooks & Dunn,1990s 16 | Reba McEntire,1980s 17 | Loretta Lynn,1960s 18 | Alabama,1980s 19 | Garth Brooks,1990s 20 | Charlie Daniels,1970s 21 | Toby Keith,2000s 22 | Kenny Rogers,1970s 23 | Chris Stapleton,2010s 24 | John Denver,1970s 25 | Dwight Yoakam,1980s 26 | Travis Tritt,1990s 27 | Carrie Underwood,2000s 28 | Kenny Chesney,2000s 29 | Tim McGraw,1990s 30 | The Charlie Daniels Band,1970s 31 | Brad Paisley,2010s 32 | Shania Twain,1990s 33 | Emmylou Harris,1970s 34 | Roy Orbison,1960s 35 | Tammy Wynette,1960s 36 | Lynyrd Skynyrd,1970s 37 | Buck Owens,1960s 38 | Keith Whitley,1980s 39 | Don Williams,1970s 40 | Vince Gill,1990s 41 | Glen Campbell,1960s 42 | Eric Church,2020s 43 | Kris Kristofferson,1970s 44 | The Oak Ridge Boys,1980s 45 | Zac Brown Band,2010s 46 | Nitty Gritty Dirt Band,1970s 47 | John Anderson,1980s 48 | Alison Krauss,2000s 49 | John Michael Montgomery,1990s 50 | Luke Combs,2020s 51 | David Allan Coe,1980s 52 | Martina McBride,2000s 53 | Blake Shelton,2010s 54 | Joe Diffie,1990s 55 | Johnny Paycheck,1970s 56 | Jerry Reed,1970s 57 | Roger Miller,1960s 58 | Crystal Gayle,1970s 59 | The Judds,1980s 60 | Trace Adkins,2010s 61 | Tanya Tucker,1970s 62 | Ernest Tubb,1940s 63 | Ronnie Milsap,1970s 64 | Jimmie Rodgers,1930s 65 | Gene Autry,1940s 66 | Jim Reeves,1950s 67 | Tracy Lawrence,1990s 68 | June Carter Cash,1960s 69 | Miranda Lambert,2010s 70 | Wynonna Judd,1990s 71 | Ray Price,1950s 72 | Faith Hill,2000s 73 | The Eagles,1970s 74 | Dierks Bentley,2010s 75 | Johnny Horton,1950s 76 | Mel Tillis,1970s 77 | Montgomery Gentry,2000s 78 | The Statler Brothers,1960s 79 | LeAnn Rimes,2000s 80 | Pam Tillis,1990s 81 | Marty Stuart,1990s 82 | Diamond Rio,1990s 83 | Jerry Lee Lewis,1950s 84 | Sammy Kershaw,1990s 85 | Chris LeDoux,1980s 86 | Bob Wills,1930s 87 | Lefty Frizzell,1960s 88 | Jason Aldean,2010s 89 | Tom T. Hall,1960s 90 | Eddie Rabbitt,1970s 91 | Darius Rucker,2010s 92 | Trisha Yearwood,2000s 93 | Keith Urban,2010s 94 | Roy Clark,1960s 95 | Mark Chesnutt,1990s 96 | Ray Charles,1950s 97 | Kitty Wells,1980s 98 | Bobby Bare,1990s 99 | Patty Loveless,1980s 100 | Rascal Flatts,2000s 101 | Ricky Skaggs,1970s 102 | -------------------------------------------------------------------------------- /csv/rap_ERA.csv: -------------------------------------------------------------------------------- 1 | Rap Name,Era 2 | The Notorious B.I.G.,1990s 3 | Tupac,1990s 4 | Eminem,2000s 5 | Kendrick Lamar,2010s 6 | Nas,1990s 7 | Dr. Dre,1990s 8 | Ice Cube,1990s 9 | Snoop Dogg,1990s 10 | JAY-Z,2000s 11 | Kanye West,2010s 12 | J. Cole,2010s 13 | Lil Wayne,2000s 14 | André 3000,2000s 15 | 50 Cent,2000s 16 | Eazy-E,1980s 17 | Drake,2010s 18 | DMX,1990s 19 | A$AP Rocky,2010s 20 | MF Doom,2000s 21 | Childish Gambino,2010s 22 | Kid Cudi,2010s 23 | "Tyler, the Creator",2010s 24 | Busta Rhymes,2000s 25 | Travis Scott,2010s 26 | XXXTentacion,2010s 27 | Ghostface Killah,1990s 28 | Chance the Rapper,2010s 29 | Juice Wrld,2010s 30 | Logic,2010s 31 | Nate Dogg,2000s 32 | Wiz Khalifa,2010s 33 | Future,2010s 34 | Joey Bada$$,2010s 35 | Method Man,1990s 36 | Mac Miller,2010s 37 | Big Sean,2010s 38 | Lil Uzi Vert,2010s 39 | The Game,2000s 40 | 21 Savage,2010s 41 | Tech N9ne,2000s 42 | Meek Mill,2010s 43 | Joyner Lucas,2010s 44 | Schoolboy Q,2010s 45 | 2 Chainz,2010s 46 | Nelly,2000s 47 | Gucci Mane,2010s 48 | Rakim,1990s 49 | Ski Mask The Slump God,2010s 50 | Ludacris,2000s 51 | Chief Keef,2010s 52 | Ice-T,1980s 53 | T.I.,2000s 54 | Missy Elliott,1990s 55 | Warren G,1990s 56 | Redman,1990s 57 | LL Cool J,1980s 58 | DMC,1980s 59 | A$AP Ferg,2010s 60 | Big L,1990s 61 | Ol' Dirty Bastard,1990s 62 | RZA,1990s 63 | Rick Ross,2000s 64 | Twista,1990s 65 | Big Pun,1990s 66 | Raekwon,1990s 67 | J.I.D,2020s 68 | Xzibit,2000s 69 | Scarface,1990s 70 | Lil Dicky,2010s 71 | Will Smith,1990s 72 | Mos Def,2000s 73 | Hopsin,2010s 74 | Big Boi,2000s 75 | G-Eazy,2010s 76 | Slick Rick,1980s 77 | GZA,1990s 78 | Q-Tip,1990s 79 | Prodigy,2000s 80 | Post Malone,2020s 81 | Young Thug,2010s 82 | NF,2010s 83 | Lil Baby,2020s 84 | T-Pain,2000s 85 | Coolio,1990s 86 | Polo G,2020s 87 | MC Ren,1980s 88 | Lupe Fiasco,2000s 89 | Quavo,2020s 90 | Common,2000s 91 | E-40,1990s 92 | Fat Joe,1990s 93 | Roddy Ricch,2020s 94 | DaBaby,2020s 95 | YG,2010s 96 | Grandmaster Flash,1980s 97 | KRS-One,1980s 98 | Big Daddy Kane,1980s 99 | Pharrell Williams,2000s 100 | Juicy J,2000s 101 | Kurupt,2000s 102 | Vince Staples,2010s 103 | Nicki Minaj,2010s 104 | A Boogie wit da Hoodie,2020s 105 | Playboi Carti,2020s 106 | NLE Choppa,2020s 107 | N.W.A,1980s 108 | Public Enemy,1980s 109 | Kurtis Blow,1980s 110 | De La Soul,1980s 111 | Chuck D,1980s 112 | Doja Cat,2020s 113 | Jack Harlow,2020s 114 | lil nas x,2020s 115 | lil Durk,2020s 116 | Baby Keem,2020s 117 | JPEGMAFIA,2020s 118 | BROCKHAMPTON,2020s 119 | JID,2020s 120 | Denzel Curry,2020s 121 | Isaiah Rashad,2010s 122 | Death Grips,2010s 123 | DJ Khaled,2010s 124 | Grandmaster Flash and the Furious Five,1980s 125 | The Sugarhill Gang,1980s 126 | Biz Markie,1980s 127 | Roxanne Shante,1980s 128 | De La Soul,1980s 129 | Run-DMC,1980s 130 | Pop Smoke,2020s 131 | Megan Thee Stallion,2020s -------------------------------------------------------------------------------- /test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 3, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/html": [ 20 | "
\n", 21 | "\n", 34 | "\n", 35 | " \n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | "
Country NameEra
0Johnny Cash1960s
1Hank Williams1950s
2George Strait1980s
3Merle Haggard1970s
4Alan Jackson1990s
.........
95Kitty Wells1980s
96Bobby Bare1990s
97Patty Loveless1980s
98Rascal Flatts2000s
99Ricky Skaggs1970s
\n", 100 | "

100 rows × 2 columns

\n", 101 | "
" 102 | ], 103 | "text/plain": [ 104 | " Country Name Era\n", 105 | "0 Johnny Cash 1960s\n", 106 | "1 Hank Williams 1950s\n", 107 | "2 George Strait 1980s\n", 108 | "3 Merle Haggard 1970s\n", 109 | "4 Alan Jackson 1990s\n", 110 | ".. ... ...\n", 111 | "95 Kitty Wells 1980s\n", 112 | "96 Bobby Bare 1990s\n", 113 | "97 Patty Loveless 1980s\n", 114 | "98 Rascal Flatts 2000s\n", 115 | "99 Ricky Skaggs 1970s\n", 116 | "\n", 117 | "[100 rows x 2 columns]" 118 | ] 119 | }, 120 | "execution_count": 3, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "df = pd.read_csv('/home/lettuce/MyCode/Where is the love/Where-is-the-love/csv/country_ERA.csv')\n", 127 | "df" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 4, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "1970s 22\n", 139 | "1990s 17\n", 140 | "1960s 15\n", 141 | "1980s 14\n", 142 | "2000s 10\n", 143 | "2010s 10\n", 144 | "1950s 6\n", 145 | "2020s 2\n", 146 | "1940s 2\n", 147 | "1930s 2\n", 148 | "Name: Era, dtype: int64" 149 | ] 150 | }, 151 | "execution_count": 4, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "df['Era'].value_counts()\n", 158 | "7,9,6,8,00,10" 159 | ] 160 | } 161 | ], 162 | "metadata": { 163 | "kernelspec": { 164 | "display_name": "Python 3", 165 | "language": "python", 166 | "name": "python3" 167 | }, 168 | "language_info": { 169 | "codemirror_mode": { 170 | "name": "ipython", 171 | "version": 3 172 | }, 173 | "file_extension": ".py", 174 | "mimetype": "text/x-python", 175 | "name": "python", 176 | "nbconvert_exporter": "python", 177 | "pygments_lexer": "ipython3", 178 | "version": "3.10.12" 179 | }, 180 | "orig_nbformat": 4 181 | }, 182 | "nbformat": 4, 183 | "nbformat_minor": 2 184 | } 185 | -------------------------------------------------------------------------------- /src/funkyFunctions.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | from lyricsgenius import Genius 5 | from dotenv import load_dotenv 6 | import re 7 | import requests 8 | 9 | 10 | load_dotenv() # Load environment variables from the .env file 11 | 12 | genius_key = os.getenv('GENUIS_KEY') 13 | genius = Genius(genius_key,timeout=30) 14 | genius.verbose = False 15 | genius.remove_section_headers = True 16 | genius.excluded_terms = ["(Remix)", "(Live)"] 17 | 18 | def getData(rap_df): 19 | 20 | song_number = 2 21 | #Fill Songs and Lyrics Column with Nan 22 | rap_df['Songs'] = np.nan 23 | rap_df['Lyrics'] = np.nan 24 | #Show head 25 | print(rap_df.head()) 26 | 27 | #Loop through rows 28 | #for x in range(loc(rap_df)): 29 | for x in range(3): 30 | #Get artist name 31 | artist_name = rap_df.loc[x][0] 32 | print(f"Getting Data for {artist_name}") 33 | #Pull song data from genuis 34 | # Retry mechanism 35 | retries = 3 36 | while retries > 0: 37 | try: 38 | # Pull song data from genius 39 | artist = genius.search_artist(artist_name, max_songs=song_number, sort="popularity") 40 | break 41 | except requests.exceptions.Timeout: 42 | retries -= 1 43 | print(f"Request timed out. Retrying... {retries} attempts remaining.") 44 | if retries == 0: 45 | print(f"Failed to get data for {artist_name}. Skipping...") 46 | continue 47 | #Temp song and lyric list 48 | song_list = [] 49 | song_lyrics_list = [] 50 | 51 | #iiterate through songs and year 52 | for song in artist.songs: 53 | print(song.title) 54 | song_list.append(song.title) 55 | 56 | 57 | # Add to row 58 | rap_df['Songs'][x] = song_list 59 | 60 | # Iterate over the lyrics and add them to the list 61 | for song in song_list: 62 | lyrics = genius.search_song(artist_name,song) 63 | song_lyrics_list.append(lyrics.lyrics) 64 | #Add to row 65 | rap_df['Lyrics'][x] = song_lyrics_list 66 | 67 | print("Data Has Been Recived") 68 | #Show head 69 | print(rap_df.head()) 70 | 71 | 72 | 73 | def cleanData(rap_df): 74 | print("Cleaning Data...") 75 | 76 | for x in range(2): 77 | name = rap_df["Rap Name"][x] 78 | print(f"Cleaning data for {name}") 79 | 80 | lyrics_list = rap_df["Lyrics"][x] 81 | cleaned_lyrics_list = [] 82 | 83 | #Maybe not go trhough loop 84 | 85 | cleaned_lyric = process_string(lyrics_list) 86 | cleaned_lyrics_list.append(cleaned_lyric) 87 | 88 | rap_df["Lyrics"][x] = cleaned_lyrics_list 89 | 90 | print("Cleaned Data") 91 | rap_df.to_csv("/home/lettuce/MyCode/Statistical-Analysis-of-Music-Genres/src/rap_mid2.csv",index=False) 92 | print(rap_df.head()) 93 | 94 | 95 | def process_string(input_string): 96 | # Remove all occurrences of a standalone backslash 97 | input_string = re.sub(r'\\(?![n])', '', input_string) 98 | 99 | # Replace all occurrences of \n with a single space 100 | input_string = re.sub(r'\\n', ' ', input_string) 101 | 102 | # Remove specified characters: ',', ''', '(', ')', '?', '"', ':', '-', '!' 103 | input_string = re.sub(r"[,'\(\)\?\":\-!]", '', input_string) 104 | 105 | return input_string 106 | 107 | 108 | #Create a dictornary Key is word Value is count 109 | # IF there is a new word apphend the dictonary 110 | # IF word exist then add 1 to value 111 | def organizeDataTotal(rap_df): 112 | #Overall for all rappers 113 | lyric_dict_all_rap = {} 114 | 115 | print("") 116 | print("Organizing TotalData...") 117 | print("") 118 | 119 | #Itterate through each row 120 | for x in range(4): 121 | #Take lyric column 122 | lyrics = rap_df['Lyrics'][x] 123 | words = lyrics.lower().split() 124 | for word in words: 125 | if word in lyric_dict_all_rap: 126 | lyric_dict_all_rap[word] += 1 127 | else: 128 | lyric_dict_all_rap[word] = 1 129 | 130 | 131 | # Append the key-value pair to the dictionary 132 | #my_dict[key] = value 133 | #Sorting the dictonary 134 | sorted_word_count = sorted(lyric_dict_all_rap.items(), key=lambda item: item[1], reverse=True) 135 | #Put in new data frame 136 | #Rows 137 | my_index = ['Total'] 138 | for x in range(len(rap_df)): 139 | my_index.append(rap_df['Rap Name'][x]) 140 | #Put in new data frame 141 | #Columns 142 | my_columns = [] #Go to top 500 words when we have more data 143 | for key, value in sorted_word_count[:500]: 144 | my_columns.append(key) 145 | 146 | #Fill in data with Nan 147 | nan_array = np.empty((len(my_index), len(my_columns))) 148 | nan_array[:] = np.NaN 149 | #Filling in Total Row 150 | total_list = [] 151 | for key, value in sorted_word_count[:500]: 152 | total_list.append(value) 153 | 154 | #Create DF 155 | rap_final = pd.DataFrame(nan_array,index=my_index,columns=my_columns) 156 | #Add total values 157 | rap_final.loc["Total"] = total_list 158 | 159 | # Create a new DataFrame with "Artist Name" as the first column 160 | artist_name_df = pd.DataFrame(['Total'] + rap_df['Rap Name'].tolist(), columns=["Artist Name"]) 161 | 162 | # Reset the index of rap_final DataFrame to be numeric 163 | rap_final.reset_index(drop=True, inplace=True) 164 | 165 | # Concatenate the artist_name_df DataFrame with rap_final DataFrame 166 | rap_final = pd.concat([artist_name_df, rap_final], axis=1) 167 | 168 | #Index needs to be True 169 | #rap_final.to_csv("/home/lettuce/MyCode/pandasproject/rap_end.csv",index=False) 170 | 171 | 172 | #Had to use ChatGpt for some of this as Idk 173 | def organizeDataArtist(rap_df, rap_final): 174 | print("Organizing Data...") 175 | 176 | #Grab Total Row 177 | total = rap_final.iloc[0].to_dict() 178 | 179 | #Get index list 180 | index_list = rap_final.columns.tolist() 181 | 182 | for x in range(4): 183 | artist_name = rap_df["Rap Name"][x] # Update the index to start from 0 184 | print(f"Organizing Lyrics for {artist_name}") 185 | 186 | # Temp dictionary reset (Uses most common [] words) 187 | dick = {key: 0 for key in index_list} 188 | 189 | # Add artist column 190 | dick["Artist Name"] = artist_name 191 | 192 | # Take lyric column 193 | lyrics = rap_df['Lyrics'][x] # Update the index to start from 0 194 | words = lyrics.lower().split() 195 | for word in words: 196 | if word in index_list: 197 | # Add one to dick 198 | dick[word] += 1 199 | 200 | # Update the row in rap_final DataFrame 201 | rap_final.loc[x] = dick 202 | 203 | #So bassically I have to create a new empty row and save the total to top cuz I am a dumb ass somehow 204 | nan_row = pd.DataFrame(columns=rap_final.columns, index=[0]) 205 | nan_row.loc[0] = np.nan 206 | rap_final = pd.concat([nan_row, rap_final], ignore_index=True) 207 | 208 | rap_final.loc[0] = total 209 | 210 | #Gonna also change nword to nword ok 211 | #Want to talk or not to talk about the use of the nword in rap 212 | #N-word N-words HardR 213 | 214 | #Add like column to front 215 | 216 | 217 | #rap_final.to_csv("/home/lettuce/MyCode/pandasproject/rap_end.csv",index=False) 218 | print("Organized Data") 219 | 220 | 221 | 222 | 223 | 224 | -------------------------------------------------------------------------------- /src/countryJUP.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np " 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 3, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/html": [ 21 | "
\n", 22 | "\n", 35 | "\n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | "
Country NameCountry Like
0Johnny Cash4,997 votes
1Hank Williams3,721 votes
2George Strait4,732 votes
3Merle Haggard3,498 votes
4Alan Jackson3,606 votes
\n", 71 | "
" 72 | ], 73 | "text/plain": [ 74 | " Country Name Country Like\n", 75 | "0 Johnny Cash 4,997 votes\n", 76 | "1 Hank Williams 3,721 votes\n", 77 | "2 George Strait 4,732 votes\n", 78 | "3 Merle Haggard 3,498 votes\n", 79 | "4 Alan Jackson 3,606 votes" 80 | ] 81 | }, 82 | "execution_count": 3, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "# Import the data\n", 89 | "df = pd.read_csv('country_names.csv')\n", 90 | "df.head()" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "1920 2\n", 100 | "1930 3\n", 101 | "1940 4\n", 102 | "1950 5\n", 103 | "1960 6\n", 104 | "1970 7\n", 105 | "1980 8\n", 106 | "1990 9\n", 107 | "2000 00\n", 108 | "2010 10\n", 109 | "2020 20" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 9, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "100\n" 122 | ] 123 | } 124 | ], 125 | "source": [ 126 | "era = [6,5,8,7,9,7,6,7,6,8,7,6,8,9,8,6,8,9,7,00,7,10,7,8,9,00,00,9,7,10,9,7,6,6,7,6,8,7,9,6,20,7,8,10,7,8,00,9,20,8,00,10,9,7,7,6,7,8,10,7,4,7,3,4,5,9,6,10,9,5,00,7,10,5,7,00,6,00,9,9,9,5,9,8,3,6,10,6,7,10,00,10,6,9,5,8,9,8,5,10]\n", 127 | "print(len(era))" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 7, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "['1960s',\n", 139 | " '1950s',\n", 140 | " '1980s',\n", 141 | " '1970s',\n", 142 | " '1990s',\n", 143 | " '1970s',\n", 144 | " '1960s',\n", 145 | " '1970s',\n", 146 | " '1960s',\n", 147 | " '1980s',\n", 148 | " '1970s',\n", 149 | " '1960s',\n", 150 | " '1980s',\n", 151 | " '1990s',\n", 152 | " '1980s',\n", 153 | " '1960s',\n", 154 | " '1980s',\n", 155 | " '1990s',\n", 156 | " '1970s',\n", 157 | " '2000s',\n", 158 | " '1970s',\n", 159 | " '2010s',\n", 160 | " '1970s',\n", 161 | " '1980s',\n", 162 | " '1990s',\n", 163 | " '2000s',\n", 164 | " '2000s',\n", 165 | " '1990s',\n", 166 | " '1970s',\n", 167 | " '2010s',\n", 168 | " '1990s',\n", 169 | " '1970s',\n", 170 | " '1960s',\n", 171 | " '1960s',\n", 172 | " '1970s',\n", 173 | " '1960s',\n", 174 | " '1980s',\n", 175 | " '1970s',\n", 176 | " '1990s',\n", 177 | " '1960s',\n", 178 | " '2020s',\n", 179 | " '1970s',\n", 180 | " '1980s',\n", 181 | " '2010s',\n", 182 | " '1970s',\n", 183 | " '1980s',\n", 184 | " '2000s',\n", 185 | " '1990s',\n", 186 | " '2020s',\n", 187 | " '1980s',\n", 188 | " '2000s',\n", 189 | " '2010s',\n", 190 | " '1990s',\n", 191 | " '1970s',\n", 192 | " '1970s',\n", 193 | " '1960s',\n", 194 | " '1970s',\n", 195 | " '1980s',\n", 196 | " '2010s',\n", 197 | " '1970s',\n", 198 | " '1940s',\n", 199 | " '1970s',\n", 200 | " '1930s',\n", 201 | " '1940s',\n", 202 | " '1950s',\n", 203 | " '1990s',\n", 204 | " '1960s',\n", 205 | " '2010s',\n", 206 | " '1990s',\n", 207 | " '1950s',\n", 208 | " '2000s',\n", 209 | " '1970s',\n", 210 | " '2010s',\n", 211 | " '1950s',\n", 212 | " '1970s',\n", 213 | " '2000s',\n", 214 | " '1960s',\n", 215 | " '2000s',\n", 216 | " '1990s',\n", 217 | " '1990s',\n", 218 | " '1990s',\n", 219 | " '1950s',\n", 220 | " '1990s',\n", 221 | " '1980s',\n", 222 | " '1930s',\n", 223 | " '1960s',\n", 224 | " '2010s',\n", 225 | " '1960s',\n", 226 | " '1970s',\n", 227 | " '2010s',\n", 228 | " '2000s',\n", 229 | " '2010s',\n", 230 | " '1960s',\n", 231 | " '1990s',\n", 232 | " '1950s',\n", 233 | " '1980s',\n", 234 | " '1990s',\n", 235 | " '1980s',\n", 236 | " '1950s',\n", 237 | " '2010s']" 238 | ] 239 | }, 240 | "execution_count": 7, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "era_fix = []\n", 247 | "era = [6,5,8,7,9,7,6,7,6,8,7,6,8,9,8,6,8,9,7,00,7,10,7,8,9,00,00,9,7,10,9,7,6,6,7,6,8,7,9,6,20,7,8,10,7,8,00,9,20,8,00,10,9,7,7,6,7,8,10,7,4,7,3,4,5,9,6,10,9,5,00,7,10,5,7,00,6,00,9,9,9,5,9,8,3,6,10,6,7,10,00,10,6,9,5,8,9,8,5,10]\n", 248 | "for x in range(len(era)):\n", 249 | " if era[x] == 2:\n", 250 | " era_fix.append('1920s')\n", 251 | " if era[x] == 3:\n", 252 | " era_fix.append('1930s')\n", 253 | " elif era[x] == 4:\n", 254 | " era_fix.append('1940s')\n", 255 | " elif era[x] == 5:\n", 256 | " era_fix.append('1950s')\n", 257 | " elif era[x] == 6:\n", 258 | " era_fix.append('1960s')\n", 259 | " elif era[x] == 7:\n", 260 | " era_fix.append('1970s')\n", 261 | " elif era[x] == 8:\n", 262 | " era_fix.append('1980s')\n", 263 | " elif era[x] == 9:\n", 264 | " era_fix.append('1990s')\n", 265 | " elif era[x] == 00:\n", 266 | " era_fix.append('2000s')\n", 267 | " elif era[x] == 10:\n", 268 | " era_fix.append('2010s')\n", 269 | " elif era[x] == 20:\n", 270 | " era_fix.append('2020s')\n", 271 | " \n" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 29, 277 | "metadata": {}, 278 | "outputs": [ 279 | { 280 | "data": { 281 | "text/plain": [ 282 | "2" 283 | ] 284 | }, 285 | "execution_count": 29, 286 | "metadata": {}, 287 | "output_type": "execute_result" 288 | } 289 | ], 290 | "source": [ 291 | "era = [6,5,8,7,9,7,6,7,6,8,7,6,8,9,8,6,8,9,7,00,7,10,7,8,9,00,00,9,7,10,9,7,6,6,7,6,8,7,9,6,20,7,8,10,7,8,00,9,20,8,00,10,9,7,7,6,7,8,10,7,4,7,3,4,5,9,6,10,9,5,00,7,10,5,7,00,6,00,9,9,9,5,9,8,3,6,10,6,7,10,00,10,6,9,5,8,9,8,5,10]\n", 292 | "era.count(20)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 11, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "country = pd.read_csv('country_names.csv')\n", 302 | "country[\"Era\"] = era_fix\n" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 14, 308 | "metadata": {}, 309 | "outputs": [ 310 | { 311 | "data": { 312 | "text/html": [ 313 | "
\n", 314 | "\n", 327 | "\n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | "
Country NameEra
0Johnny Cash1960s
1Hank Williams1950s
2George Strait1980s
3Merle Haggard1970s
4Alan Jackson1990s
\n", 363 | "
" 364 | ], 365 | "text/plain": [ 366 | " Country Name Era\n", 367 | "0 Johnny Cash 1960s\n", 368 | "1 Hank Williams 1950s\n", 369 | "2 George Strait 1980s\n", 370 | "3 Merle Haggard 1970s\n", 371 | "4 Alan Jackson 1990s" 372 | ] 373 | }, 374 | "execution_count": 14, 375 | "metadata": {}, 376 | "output_type": "execute_result" 377 | } 378 | ], 379 | "source": [ 380 | "country = country.drop('Country Like' , axis=1)\n", 381 | "country.to_csv('country_names.csv',index=False)\n", 382 | "country.head()\n" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 4, 388 | "metadata": {}, 389 | "outputs": [ 390 | { 391 | "data": { 392 | "text/html": [ 393 | "
\n", 394 | "\n", 407 | "\n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | "
Country NameCountry Like
63Gene Autry364 votes
64Jim Reeves364 votes
65Tracy Lawrence957 votes
66June Carter Cash891 votes
67Miranda Lambert959 votes
68Wynonna Judd1,188 votes
69Ray Price244 votes
70Faith Hill828 votes
71The Eagles1,053 votes
72Dierks Bentley1,194 votes
73Johnny Horton789 votes
74Mel Tillis848 votes
75Montgomery Gentry716 votes
76The Statler Brothers689 votes
77LeAnn Rimes1,065 votes
78Pam Tillis475 votes
79Marty Stuart256 votes
80Diamond Rio530 votes
81Jerry Lee Lewis675 votes
82Sammy Kershaw718 votes
83Chris LeDoux493 votes
84Bob Wills694 votes
85Lefty Frizzell629 votes
86Jason Aldean884 votes
87Tom T. Hall542 votes
88Eddie Rabbitt618 votes
89Darius Rucker915 votes
90Trisha Yearwood1,059 votes
91Keith Urban747 votes
92Roy Clark982 votes
93Mark Chesnutt709 votes
94Ray Charles650 votes
95Kitty Wells417 votes
96Bobby Bare558 votes
97Patty Loveless832 votes
98Rascal Flatts1,161 votes
99Ricky Skaggs998 votes
100Lorrie Morgan298 votes
101Lonestar669 votes
102The Bellamy Brothers674 votes
103Lester Flatt & Earl Scruggs475 votes
104Gary Allan673 votes
105Lady A546 votes
106Chet Atkins1,011 votes
107Hank Snow727 votes
108Ricky Van Shelton363 votes
109Charlie Rich861 votes
110Lee Ann Womack641 votes
111Roy Rogers467 votes
112Bill Monroe and His Bluegrass Boys734 votes
113Roy Acuff864 votes
114Steve Earle485 votes
115Eddy Arnold779 votes
116Kacey Musgraves395 votes
117The Marshall Tucker Band504 votes
118Porter Wagoner242 votes
119Billy Currington695 votes
120Barbara Mandrell482 votes
121Clay Walker371 votes
122Jon Pardi609 votes
\n", 718 | "
" 719 | ], 720 | "text/plain": [ 721 | " Country Name Country Like\n", 722 | "63 Gene Autry 364 votes\n", 723 | "64 Jim Reeves 364 votes\n", 724 | "65 Tracy Lawrence 957 votes\n", 725 | "66 June Carter Cash 891 votes\n", 726 | "67 Miranda Lambert 959 votes\n", 727 | "68 Wynonna Judd 1,188 votes\n", 728 | "69 Ray Price 244 votes\n", 729 | "70 Faith Hill 828 votes\n", 730 | "71 The Eagles 1,053 votes\n", 731 | "72 Dierks Bentley 1,194 votes\n", 732 | "73 Johnny Horton 789 votes\n", 733 | "74 Mel Tillis 848 votes\n", 734 | "75 Montgomery Gentry 716 votes\n", 735 | "76 The Statler Brothers 689 votes\n", 736 | "77 LeAnn Rimes 1,065 votes\n", 737 | "78 Pam Tillis 475 votes\n", 738 | "79 Marty Stuart 256 votes\n", 739 | "80 Diamond Rio 530 votes\n", 740 | "81 Jerry Lee Lewis 675 votes\n", 741 | "82 Sammy Kershaw 718 votes\n", 742 | "83 Chris LeDoux 493 votes\n", 743 | "84 Bob Wills 694 votes\n", 744 | "85 Lefty Frizzell 629 votes\n", 745 | "86 Jason Aldean 884 votes\n", 746 | "87 Tom T. Hall 542 votes\n", 747 | "88 Eddie Rabbitt 618 votes\n", 748 | "89 Darius Rucker 915 votes\n", 749 | "90 Trisha Yearwood 1,059 votes\n", 750 | "91 Keith Urban 747 votes\n", 751 | "92 Roy Clark 982 votes\n", 752 | "93 Mark Chesnutt 709 votes\n", 753 | "94 Ray Charles 650 votes\n", 754 | "95 Kitty Wells 417 votes\n", 755 | "96 Bobby Bare 558 votes\n", 756 | "97 Patty Loveless 832 votes\n", 757 | "98 Rascal Flatts 1,161 votes\n", 758 | "99 Ricky Skaggs 998 votes\n", 759 | "100 Lorrie Morgan 298 votes\n", 760 | "101 Lonestar 669 votes\n", 761 | "102 The Bellamy Brothers 674 votes\n", 762 | "103 Lester Flatt & Earl Scruggs 475 votes\n", 763 | "104 Gary Allan 673 votes\n", 764 | "105 Lady A 546 votes\n", 765 | "106 Chet Atkins 1,011 votes\n", 766 | "107 Hank Snow 727 votes\n", 767 | "108 Ricky Van Shelton 363 votes\n", 768 | "109 Charlie Rich 861 votes\n", 769 | "110 Lee Ann Womack 641 votes\n", 770 | "111 Roy Rogers 467 votes\n", 771 | "112 Bill Monroe and His Bluegrass Boys 734 votes\n", 772 | "113 Roy Acuff 864 votes\n", 773 | "114 Steve Earle 485 votes\n", 774 | "115 Eddy Arnold 779 votes\n", 775 | "116 Kacey Musgraves 395 votes\n", 776 | "117 The Marshall Tucker Band 504 votes\n", 777 | "118 Porter Wagoner 242 votes\n", 778 | "119 Billy Currington 695 votes\n", 779 | "120 Barbara Mandrell 482 votes\n", 780 | "121 Clay Walker 371 votes\n", 781 | "122 Jon Pardi 609 votes" 782 | ] 783 | }, 784 | "execution_count": 4, 785 | "metadata": {}, 786 | "output_type": "execute_result" 787 | } 788 | ], 789 | "source": [ 790 | "[6,5,8,7,9,7,6,7,6,8,7,6,8,9,8,6,8,9,7,00,7,10,7,8,9,00,00,9,7,10,9,7,6,6,7,6,8,7,9,6,20,7,8,10,7,8,00,9,20,8,00,10,9,7,7,6,7,8,10,7,4,7,3,4,5,9,6,10,9,5,00,7,10,5,7,00,6,00,9,9,9,5,9,8,3,6,10,6,7,10,00,10,6,9,5,8,9,8,5,10]\n", 791 | "df.tail(60)" 792 | ] 793 | } 794 | ], 795 | "metadata": { 796 | "kernelspec": { 797 | "display_name": "Python 3", 798 | "language": "python", 799 | "name": "python3" 800 | }, 801 | "language_info": { 802 | "codemirror_mode": { 803 | "name": "ipython", 804 | "version": 3 805 | }, 806 | "file_extension": ".py", 807 | "mimetype": "text/x-python", 808 | "name": "python", 809 | "nbconvert_exporter": "python", 810 | "pygments_lexer": "ipython3", 811 | "version": "3.10.6" 812 | }, 813 | "orig_nbformat": 4 814 | }, 815 | "nbformat": 4, 816 | "nbformat_minor": 2 817 | } 818 | -------------------------------------------------------------------------------- /src/main2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import pandas as pd \n", 11 | "import numpy as np \n", 12 | "from lyricsgenius import Genius\n", 13 | "from dotenv import load_dotenv\n", 14 | "import funkyFunctions\n", 15 | "import re\n", 16 | "import os\n", 17 | "import pandas as pd \n", 18 | "import numpy as np \n", 19 | "from lyricsgenius import Genius\n", 20 | "from dotenv import load_dotenv\n", 21 | "import re\n", 22 | "import requests\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "load_dotenv() # Load environment variables from the .env file\n", 32 | "\n", 33 | "genius_key = os.getenv('GENUIS_KEY')\n", 34 | "genius = Genius(genius_key,timeout=15)\n", 35 | "genius.verbose = False\n", 36 | "genius.remove_section_headers = True\n", 37 | "genius.excluded_terms = [\"(Remix)\", \"(Live)\"]" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "def getLove(rap_df):\n", 47 | "\n", 48 | " love_words = ['love', 'loves', 'loved', 'loving']\n", 49 | " song_number = 3\n", 50 | " artist = 3\n", 51 | " #Fill Songs and Lyrics Column with Nan\n", 52 | " rap_df['Songs'] = np.nan\n", 53 | " rap_df['Lyrics'] = np.nan\n", 54 | " #Show head\n", 55 | " print(rap_df.head())\n", 56 | "\n", 57 | " #Loop through rows\n", 58 | " #for x in range(loc(rap_df)):\n", 59 | " for x in range(artist):\n", 60 | " #Get artist name\n", 61 | " artist_name = rap_df.loc[x][0]\n", 62 | " print(f\"Getting Data for {artist_name}\")\n", 63 | " #Pull song data from genuis\n", 64 | " # Retry mechanism\n", 65 | " retries = 3\n", 66 | " while retries > 0:\n", 67 | " try:\n", 68 | " # Pull song data from genius\n", 69 | " artist = genius.search_artist(artist_name, max_songs=song_number, sort=\"popularity\")\n", 70 | " break\n", 71 | " except requests.exceptions.Timeout:\n", 72 | " retries -= 1\n", 73 | " print(f\"Request timed out. Retrying... {retries} attempts remaining.\")\n", 74 | " if retries == 0:\n", 75 | " print(f\"Failed to get data for {artist_name}. Skipping...\")\n", 76 | " continue\n", 77 | " #Temp song and lyric list\n", 78 | " song_list = []\n", 79 | " song_lyrics_list = []\n", 80 | "\n", 81 | " #iiterate through songs and year \n", 82 | " for song in artist.songs:\n", 83 | " print(song.title)\n", 84 | " song_list.append(song.title)\n", 85 | " # Add to row\n", 86 | " rap_df['Songs'][x] = song_list\n", 87 | "\n", 88 | " # Iterate over the songs again for lyrics\n", 89 | " for song in song_list:\n", 90 | " lyrics = genius.search_song(artist_name, song)\n", 91 | " \n", 92 | " # Split lyrics into lines\n", 93 | " lines = lyrics.lyrics.split('\\n')\n", 94 | " \n", 95 | " # Iterate over the lines\n", 96 | " for line in lines:\n", 97 | " # Convert the line to lowercase for case-insensitive matching\n", 98 | " line_lower = line.lower()\n", 99 | " \n", 100 | " # If any love word is in the line, append it to song_lyrics_list\n", 101 | " if any(love_word in line_lower for love_word in love_words) and len(line.split()) <= 45:\n", 102 | " song_lyrics_list.append(line)\n", 103 | "\n", 104 | " # After the loops, update the DataFrame\n", 105 | " rap_df.loc[x, 'Songs'] = ', '.join(song_list)\n", 106 | " rap_df.loc[x, 'Lyrics'] = ', '.join(song_lyrics_list)\n", 107 | "\n", 108 | " print(\"Data Has Been Recived\")\n", 109 | " #Show head\n", 110 | " print(rap_df.head())\n", 111 | " " 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 44, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "def cleanData(rap_df):\n", 121 | " print(\"Cleaning Data...\")\n", 122 | "\n", 123 | " for x in range(3):\n", 124 | " name = rap_df[\"Rap Name\"][x]\n", 125 | " print(f\"Cleaning data for {name}\")\n", 126 | "\n", 127 | " lyrics_list = rap_df[\"Lyrics\"][x]\n", 128 | " cleaned_lyrics_list = []\n", 129 | "\n", 130 | " #Maybe not go trhough loop\n", 131 | "\n", 132 | " cleaned_lyric = process_string(lyrics_list)\n", 133 | " cleaned_lyrics_list.append(cleaned_lyric)\n", 134 | "\n", 135 | " rap_df[\"Lyrics\"][x] = cleaned_lyrics_list\n", 136 | " print( rap_df[\"Lyrics\"][x])\n", 137 | "\n", 138 | " print(\"Cleaned Data\")\n", 139 | " print(rap_df.head())\n", 140 | "\n", 141 | "\n", 142 | "def process_string(input_string):\n", 143 | " # Remove all occurrences of a standalone backslash\n", 144 | " input_string = re.sub(r'\\\\(?![n])', '', input_string)\n", 145 | " \n", 146 | " # Replace all occurrences of \\n with a single space\n", 147 | " input_string = re.sub(r'\\\\n', ' ', input_string)\n", 148 | " \n", 149 | " # Remove specified characters: ',', ''', '(', ')', '?', '\"', ':', '-', '!'\n", 150 | " input_string = re.sub(r\"[,'\\(\\)\\?\\\":\\-!]\", '', input_string)\n", 151 | "\n", 152 | " return input_string" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 66, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "def organizeDataTotal(rap_df):\n", 162 | " #Overall for all rappers\n", 163 | " lyric_dict_all_rap = {}\n", 164 | "\n", 165 | " print(\"\")\n", 166 | " print(\"Organizing TotalData...\")\n", 167 | " print(\"\")\n", 168 | "\n", 169 | " #Itterate through each row \n", 170 | " for x in range(3):\n", 171 | " #Take lyric column\n", 172 | " lyrics = rap_df['Lyrics'][x]\n", 173 | " words = lyrics.lower().split()\n", 174 | " for word in words: \n", 175 | " if word in lyric_dict_all_rap:\n", 176 | " lyric_dict_all_rap[word] += 1\n", 177 | " else:\n", 178 | " lyric_dict_all_rap[word] = 1\n", 179 | "\n", 180 | "\n", 181 | " # Append the key-value pair to the dictionary\n", 182 | " #my_dict[key] = value\n", 183 | " #Sorting the dictonary \n", 184 | " sorted_word_count = sorted(lyric_dict_all_rap.items(), key=lambda item: item[1], reverse=True)\n", 185 | " #Put in new data frame \n", 186 | " #Rows \n", 187 | " my_index = ['Total']\n", 188 | " for x in range(len(rap_df)):\n", 189 | " my_index.append(rap_df['Rap Name'][x])\n", 190 | " #Put in new data frame\n", 191 | " #Columns\n", 192 | " my_columns = [] #Go to top 1000 words when we have more data\n", 193 | " for key, value in sorted_word_count[:500]:\n", 194 | " my_columns.append(key)\n", 195 | "\n", 196 | " #Fill in data with Nan\n", 197 | " nan_array = np.empty((len(my_index), len(my_columns)))\n", 198 | " nan_array[:] = np.NaN\n", 199 | " #Filling in Total Row\n", 200 | " total_list = [] \n", 201 | " for key, value in sorted_word_count[:500]:\n", 202 | " total_list.append(value)\n", 203 | "\n", 204 | " #Create DF\n", 205 | " rap_final = pd.DataFrame(nan_array,index=my_index,columns=my_columns) \n", 206 | " #Add total values\n", 207 | " rap_final.loc[\"Total\"] = total_list\n", 208 | " \n", 209 | " # Create a new DataFrame with \"Artist Name\" as the first column\n", 210 | " artist_name_df = pd.DataFrame(['Total'] + rap_df['Rap Name'].tolist(), columns=[\"Artist Name\"])\n", 211 | " \n", 212 | " # Reset the index of rap_final DataFrame to be numeric\n", 213 | " rap_final.reset_index(drop=True, inplace=True)\n", 214 | "\n", 215 | " # Concatenate the artist_name_df DataFrame with rap_final DataFrame\n", 216 | " rap_final = pd.concat([artist_name_df, rap_final], axis=1)\n", 217 | "\n", 218 | " #Index needs to be True\n", 219 | " print(\"Done Saved in rap_NEWT.csv\")\n", 220 | " rap_final.to_csv('rap_LOVE_NEWT.csv', index=True)\n" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 67, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "def organizeDataArtist(rap_df, rap_final):\n", 230 | " print(\"Organizing Data...\")\n", 231 | "\n", 232 | " #Grab Total Row\n", 233 | " total = rap_final.iloc[0].to_dict()\n", 234 | "\n", 235 | " #Get index list\n", 236 | " index_list = rap_final.columns.tolist()\n", 237 | "\n", 238 | " for x in range(3):\n", 239 | " artist_name = rap_df[\"Rap Name\"][x] # Update the index to start from 0\n", 240 | " print(f\"Organizing Lyrics for {artist_name}\")\n", 241 | "\n", 242 | " # Temp dictionary reset (Uses most common [] words)\n", 243 | " dick = {key: 0 for key in index_list}\n", 244 | "\n", 245 | " # Add artist column\n", 246 | " dick[\"Artist Name\"] = artist_name\n", 247 | "\n", 248 | " # Take lyric column\n", 249 | " lyrics = rap_df['Lyrics'][x] # Update the index to start from 0\n", 250 | " words = lyrics.lower().split()\n", 251 | " for word in words:\n", 252 | " if word in index_list:\n", 253 | " # Add one to dick\n", 254 | " dick[word] += 1\n", 255 | "\n", 256 | " # Update the row in rap_final DataFrame\n", 257 | " rap_final.loc[x] = dick\n", 258 | "\n", 259 | " #So bassically I have to create a new empty row and save the total to top cuz I am a dumb ass somehow\n", 260 | " nan_row = pd.DataFrame(columns=rap_final.columns, index=[0])\n", 261 | " nan_row.loc[0] = np.nan\n", 262 | " rap_final = pd.concat([nan_row, rap_final], ignore_index=True)\n", 263 | " \n", 264 | " rap_final.loc[0] = total\n", 265 | "\n", 266 | " #Gonna also change nword to nword ok\n", 267 | " #Want to talk or not to talk about the use of the nword in rap \n", 268 | " #N-word N-words HardR\n", 269 | " \n", 270 | " #Add like column to front\n", 271 | " \n", 272 | "\n", 273 | " rap_final.to_csv(\"/home/lettuce/MyCode/Where is the love/Where-is-the-love/csv/final/rap_LOVE_2.csv\",index=True)\n", 274 | " print(\"Organized Data\")" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 56, 280 | "metadata": {}, 281 | "outputs": [ 282 | { 283 | "data": { 284 | "text/html": [ 285 | "
\n", 286 | "\n", 299 | "\n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | "
Rap NameEra
0The Notorious B.I.G.1990s
1Tupac1990s
2Eminem2000s
3Kendrick Lamar2010s
4Nas1990s
\n", 335 | "
" 336 | ], 337 | "text/plain": [ 338 | " Rap Name Era\n", 339 | "0 The Notorious B.I.G. 1990s\n", 340 | "1 Tupac 1990s\n", 341 | "2 Eminem 2000s\n", 342 | "3 Kendrick Lamar 2010s\n", 343 | "4 Nas 1990s" 344 | ] 345 | }, 346 | "execution_count": 56, 347 | "metadata": {}, 348 | "output_type": "execute_result" 349 | } 350 | ], 351 | "source": [ 352 | "csv_start = pd.read_csv('/home/lettuce/MyCode/Where is the love/Where-is-the-love/csv/rap_ERA.csv')\n", 353 | "csv_start.head()" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 57, 359 | "metadata": {}, 360 | "outputs": [ 361 | { 362 | "name": "stdout", 363 | "output_type": "stream", 364 | "text": [ 365 | " Rap Name Era Songs Lyrics\n", 366 | "0 The Notorious B.I.G. 1990s NaN NaN\n", 367 | "1 Tupac 1990s NaN NaN\n", 368 | "2 Eminem 2000s NaN NaN\n", 369 | "3 Kendrick Lamar 2010s NaN NaN\n", 370 | "4 Nas 1990s NaN NaN\n", 371 | "Getting Data for The Notorious B.I.G.\n", 372 | "Juicy\n", 373 | "Big Poppa\n", 374 | "Suicidal Thoughts\n" 375 | ] 376 | }, 377 | { 378 | "name": "stderr", 379 | "output_type": "stream", 380 | "text": [ 381 | "/tmp/ipykernel_3468/3398968083.py:41: SettingWithCopyWarning: \n", 382 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 383 | "\n", 384 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 385 | " rap_df['Songs'][x] = song_list\n" 386 | ] 387 | }, 388 | { 389 | "name": "stdout", 390 | "output_type": "stream", 391 | "text": [ 392 | "Getting Data for Tupac\n", 393 | "Hit ’Em Up\n", 394 | "Changes\n", 395 | "Keep Ya Head Up\n", 396 | "Getting Data for Eminem\n", 397 | "Rap God\n", 398 | "Killshot\n", 399 | "Lose Yourself\n", 400 | "Data Has Been Recived\n", 401 | " Rap Name Era Songs \\\n", 402 | "0 The Notorious B.I.G. 1990s Juicy, Big Poppa, Suicidal Thoughts \n", 403 | "1 Tupac 1990s Hit ’Em Up, Changes, Keep Ya Head Up \n", 404 | "2 Eminem 2000s Rap God, Killshot, Lose Yourself \n", 405 | "3 Kendrick Lamar 2010s NaN \n", 406 | "4 Nas 1990s NaN \n", 407 | "\n", 408 | " Lyrics \n", 409 | "0 Funkmaster Flex, Lovebug Starski, Spread love,... \n", 410 | "1 With my AK, I'm still the thug that you love t... \n", 411 | "2 Everybody loves to root for a nuisance, That a... \n", 412 | "3 NaN \n", 413 | "4 NaN \n" 414 | ] 415 | }, 416 | { 417 | "data": { 418 | "text/html": [ 419 | "
\n", 420 | "\n", 433 | "\n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | "
Rap NameEraSongsLyrics
0The Notorious B.I.G.1990sJuicy, Big Poppa, Suicidal ThoughtsFunkmaster Flex, Lovebug Starski, Spread love,...
1Tupac1990sHit ’Em Up, Changes, Keep Ya Head UpWith my AK, I'm still the thug that you love t...
2Eminem2000sRap God, Killshot, Lose YourselfEverybody loves to root for a nuisance, That a...
3Kendrick Lamar2010sNaNNaN
4Nas1990sNaNNaN
\n", 481 | "
" 482 | ], 483 | "text/plain": [ 484 | " Rap Name Era Songs \\\n", 485 | "0 The Notorious B.I.G. 1990s Juicy, Big Poppa, Suicidal Thoughts \n", 486 | "1 Tupac 1990s Hit ’Em Up, Changes, Keep Ya Head Up \n", 487 | "2 Eminem 2000s Rap God, Killshot, Lose Yourself \n", 488 | "3 Kendrick Lamar 2010s NaN \n", 489 | "4 Nas 1990s NaN \n", 490 | "\n", 491 | " Lyrics \n", 492 | "0 Funkmaster Flex, Lovebug Starski, Spread love,... \n", 493 | "1 With my AK, I'm still the thug that you love t... \n", 494 | "2 Everybody loves to root for a nuisance, That a... \n", 495 | "3 NaN \n", 496 | "4 NaN " 497 | ] 498 | }, 499 | "execution_count": 57, 500 | "metadata": {}, 501 | "output_type": "execute_result" 502 | } 503 | ], 504 | "source": [ 505 | "getLove(csv_start)\n", 506 | "#Save to new csv\n", 507 | "csv_start.to_csv('/home/lettuce/MyCode/Where is the love/Where-is-the-love/rap_TRAN_LOVE.csv', index=False)\n", 508 | "csv_start = pd.read_csv('/home/lettuce/MyCode/Where is the love/Where-is-the-love/rap_TRAN_LOVE.csv')\n", 509 | "csv_start.head()" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": 58, 515 | "metadata": {}, 516 | "outputs": [ 517 | { 518 | "name": "stdout", 519 | "output_type": "stream", 520 | "text": [ 521 | "Cleaning Data...\n", 522 | "Cleaning data for The Notorious B.I.G.\n", 523 | "['Funkmaster Flex Lovebug Starski Spread love its the Brooklyn way And she loves to show me off of course I love it when you call me Big Poppa I love it when you call me Big Poppa I love it when you call me Big Poppa I love it when you call me Big Poppa I love it when you call me Big Poppa I love it when you call me Big Poppa I love it when you call me Big Poppa I love it when you call me Big Poppa I love it when you call me Big Poppa She dont even love me like she did when I was younger Yo get a hold of yourself nigga']\n", 524 | "Cleaning data for Tupac\n", 525 | "['With my AK Im still the thug that you love to hate I got love for my brother Id love to go back to when we played as kids And if he cant learn to love you you should leave him Wants to know why his daddy dont love him no mo']\n", 526 | "Cleaning data for Eminem\n", 527 | "['Everybody loves to root for a nuisance That a death threat or a love letter You know I love you1.8KEmbed Mom I love you but this trailers got']\n", 528 | "Cleaned Data\n", 529 | " Rap Name Era Songs \\\n", 530 | "0 The Notorious B.I.G. 1990s Juicy, Big Poppa, Suicidal Thoughts \n", 531 | "1 Tupac 1990s Hit ’Em Up, Changes, Keep Ya Head Up \n", 532 | "2 Eminem 2000s Rap God, Killshot, Lose Yourself \n", 533 | "3 Kendrick Lamar 2010s NaN \n", 534 | "4 Nas 1990s NaN \n", 535 | "\n", 536 | " Lyrics \n", 537 | "0 [Funkmaster Flex Lovebug Starski Spread love i... \n", 538 | "1 [With my AK Im still the thug that you love to... \n", 539 | "2 [Everybody loves to root for a nuisance That a... \n", 540 | "3 NaN \n", 541 | "4 NaN \n" 542 | ] 543 | } 544 | ], 545 | "source": [ 546 | "cleanData(csv_start)" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": 60, 552 | "metadata": {}, 553 | "outputs": [ 554 | { 555 | "data": { 556 | "text/html": [ 557 | "
\n", 558 | "\n", 571 | "\n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | "
Rap NameEraSongsLyrics
0The Notorious B.I.G.1990sJuicy, Big Poppa, Suicidal Thoughts['Funkmaster Flex Lovebug Starski Spread love ...
1Tupac1990sHit ’Em Up, Changes, Keep Ya Head Up['With my AK Im still the thug that you love t...
2Eminem2000sRap God, Killshot, Lose Yourself['Everybody loves to root for a nuisance That ...
3Kendrick Lamar2010sNaNNaN
4Nas1990sNaNNaN
\n", 619 | "
" 620 | ], 621 | "text/plain": [ 622 | " Rap Name Era Songs \\\n", 623 | "0 The Notorious B.I.G. 1990s Juicy, Big Poppa, Suicidal Thoughts \n", 624 | "1 Tupac 1990s Hit ’Em Up, Changes, Keep Ya Head Up \n", 625 | "2 Eminem 2000s Rap God, Killshot, Lose Yourself \n", 626 | "3 Kendrick Lamar 2010s NaN \n", 627 | "4 Nas 1990s NaN \n", 628 | "\n", 629 | " Lyrics \n", 630 | "0 ['Funkmaster Flex Lovebug Starski Spread love ... \n", 631 | "1 ['With my AK Im still the thug that you love t... \n", 632 | "2 ['Everybody loves to root for a nuisance That ... \n", 633 | "3 NaN \n", 634 | "4 NaN " 635 | ] 636 | }, 637 | "execution_count": 60, 638 | "metadata": {}, 639 | "output_type": "execute_result" 640 | } 641 | ], 642 | "source": [ 643 | "csv_start.to_csv('/home/lettuce/MyCode/Where is the love/Where-is-the-love/rap_TRAN_LOVE.csv', index=False)\n", 644 | "csv_start = pd.read_csv('/home/lettuce/MyCode/Where is the love/Where-is-the-love/rap_TRAN_LOVE.csv')\n", 645 | "csv_start.head()" 646 | ] 647 | }, 648 | { 649 | "cell_type": "code", 650 | "execution_count": 61, 651 | "metadata": {}, 652 | "outputs": [ 653 | { 654 | "name": "stdout", 655 | "output_type": "stream", 656 | "text": [ 657 | "\n", 658 | "Organizing TotalData...\n", 659 | "\n", 660 | "Done Saved in rap_NEWT.csv\n" 661 | ] 662 | } 663 | ], 664 | "source": [ 665 | "organizeDataTotal(csv_start)\n", 666 | "csv_start.to_csv('/home/lettuce/MyCode/Where is the love/Where-is-the-love/rap_TRAN_LOVE.csv', index=False)\n", 667 | "csv_start = pd.read_csv('/home/lettuce/MyCode/Where is the love/Where-is-the-love/rap_TRAN_LOVE.csv')\n" 668 | ] 669 | }, 670 | { 671 | "cell_type": "code", 672 | "execution_count": 62, 673 | "metadata": {}, 674 | "outputs": [], 675 | "source": [ 676 | "rap_final = pd.read_csv('/home/lettuce/MyCode/Where is the love/Where-is-the-love/src/rap_LOVE_NEWT.csv')" 677 | ] 678 | }, 679 | { 680 | "cell_type": "code", 681 | "execution_count": 64, 682 | "metadata": {}, 683 | "outputs": [ 684 | { 685 | "name": "stdout", 686 | "output_type": "stream", 687 | "text": [ 688 | "Organizing Data...\n", 689 | "Organizing Lyrics for The Notorious B.I.G.\n", 690 | "Organizing Lyrics for Tupac\n", 691 | "Organized Data\n" 692 | ] 693 | } 694 | ], 695 | "source": [ 696 | "organizeDataArtist(csv_start, rap_final)" 697 | ] 698 | } 699 | ], 700 | "metadata": { 701 | "kernelspec": { 702 | "display_name": "Python 3", 703 | "language": "python", 704 | "name": "python3" 705 | }, 706 | "language_info": { 707 | "codemirror_mode": { 708 | "name": "ipython", 709 | "version": 3 710 | }, 711 | "file_extension": ".py", 712 | "mimetype": "text/x-python", 713 | "name": "python", 714 | "nbconvert_exporter": "python", 715 | "pygments_lexer": "ipython3", 716 | "version": "3.10.12" 717 | }, 718 | "orig_nbformat": 4 719 | }, 720 | "nbformat": 4, 721 | "nbformat_minor": 2 722 | } 723 | -------------------------------------------------------------------------------- /src/rapJUP.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import spacy\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 3, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/html": [ 22 | "
\n", 23 | "\n", 36 | "\n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | "
Artist Nametheyouaitoandinitup...placeworknobodyparkkneeltotinshouldarmpayhomage
0Total491.0372.0325.0306.0271.0241.0162.0152.0147.0...3.03.03.03.03.03.03.03.03.03.0
1The Notorious B.I.G.85.082.090.073.070.051.037.030.010.0...1.01.01.01.01.01.01.01.01.01.0
2Tupac100.075.038.038.045.037.018.047.022.0...1.00.00.00.00.01.01.00.00.00.0
3Eminem85.082.090.073.070.051.037.030.010.0...1.01.01.01.01.01.01.01.01.01.0
4Kendrick Lamar74.036.036.040.014.029.023.017.075.0...0.00.01.00.00.00.00.00.00.00.0
\n", 186 | "

5 rows × 501 columns

\n", 187 | "
" 188 | ], 189 | "text/plain": [ 190 | " Artist Name the you a i to and in \\\n", 191 | "0 Total 491.0 372.0 325.0 306.0 271.0 241.0 162.0 \n", 192 | "1 The Notorious B.I.G. 85.0 82.0 90.0 73.0 70.0 51.0 37.0 \n", 193 | "2 Tupac 100.0 75.0 38.0 38.0 45.0 37.0 18.0 \n", 194 | "3 Eminem 85.0 82.0 90.0 73.0 70.0 51.0 37.0 \n", 195 | "4 Kendrick Lamar 74.0 36.0 36.0 40.0 14.0 29.0 23.0 \n", 196 | "\n", 197 | " it up ... place work nobody park kneel totin should arm \\\n", 198 | "0 152.0 147.0 ... 3.0 3.0 3.0 3.0 3.0 3.0 3.0 3.0 \n", 199 | "1 30.0 10.0 ... 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 \n", 200 | "2 47.0 22.0 ... 1.0 0.0 0.0 0.0 0.0 1.0 1.0 0.0 \n", 201 | "3 30.0 10.0 ... 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 \n", 202 | "4 17.0 75.0 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", 203 | "\n", 204 | " pay homage \n", 205 | "0 3.0 3.0 \n", 206 | "1 1.0 1.0 \n", 207 | "2 0.0 0.0 \n", 208 | "3 1.0 1.0 \n", 209 | "4 0.0 0.0 \n", 210 | "\n", 211 | "[5 rows x 501 columns]" 212 | ] 213 | }, 214 | "execution_count": 3, 215 | "metadata": {}, 216 | "output_type": "execute_result" 217 | } 218 | ], 219 | "source": [ 220 | "csv_rap_final = pd.read_csv('/home/lettuce/MyCode/pandasproject/Rap-vs-Country-StatisticalStudy/final_rap.csv')\n", 221 | "csv_rap_final.head()" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 4, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/html": [ 232 | "
\n", 233 | "\n", 246 | "\n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | "
Artist Namegotgetdontwayknowseedocauseaint...freepourfameflocktimesnormalwantedgodzillabreathliveget
0Total93.087.082.073.069.069.064.061.059.0...5.05.05.05.05.05.05.05.05.05.0
1The Notorious B.I.G.13.010.020.04.025.015.08.015.08.0...0.00.00.00.00.00.00.00.00.00.0
2Tupac20.024.031.041.021.029.025.014.028.0...2.01.00.00.00.00.00.00.00.00.0
3Eminem23.039.021.09.013.06.016.026.08.0...0.00.05.01.03.05.05.05.05.00.0
4Kendrick Lamar37.014.010.019.010.019.015.06.015.0...3.04.00.04.02.00.00.00.00.05.0
\n", 396 | "

5 rows × 303 columns

\n", 397 | "
" 398 | ], 399 | "text/plain": [ 400 | " Artist Name got get dont way know see do cause \\\n", 401 | "0 Total 93.0 87.0 82.0 73.0 69.0 69.0 64.0 61.0 \n", 402 | "1 The Notorious B.I.G. 13.0 10.0 20.0 4.0 25.0 15.0 8.0 15.0 \n", 403 | "2 Tupac 20.0 24.0 31.0 41.0 21.0 29.0 25.0 14.0 \n", 404 | "3 Eminem 23.0 39.0 21.0 9.0 13.0 6.0 16.0 26.0 \n", 405 | "4 Kendrick Lamar 37.0 14.0 10.0 19.0 10.0 19.0 15.0 6.0 \n", 406 | "\n", 407 | " aint ... free pour fame flock times normal wanted godzilla \\\n", 408 | "0 59.0 ... 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 \n", 409 | "1 8.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 410 | "2 28.0 ... 2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 411 | "3 8.0 ... 0.0 0.0 5.0 1.0 3.0 5.0 5.0 5.0 \n", 412 | "4 15.0 ... 3.0 4.0 0.0 4.0 2.0 0.0 0.0 0.0 \n", 413 | "\n", 414 | " breath liveget \n", 415 | "0 5.0 5.0 \n", 416 | "1 0.0 0.0 \n", 417 | "2 0.0 0.0 \n", 418 | "3 5.0 0.0 \n", 419 | "4 0.0 5.0 \n", 420 | "\n", 421 | "[5 rows x 303 columns]" 422 | ] 423 | }, 424 | "execution_count": 4, 425 | "metadata": {}, 426 | "output_type": "execute_result" 427 | } 428 | ], 429 | "source": [ 430 | "df = pd.read_csv('/home/lettuce/MyCode/pandasproject/rap_end.csv')\n", 431 | "\n", 432 | "# Load the English model in Spacy\n", 433 | "nlp = spacy.load('en_core_web_sm')\n", 434 | "\n", 435 | "# Define a function to check if a word is a noun, verb, or adjective\n", 436 | "def is_noun_verb_adj(word):\n", 437 | " pos = nlp(word)[0].pos_\n", 438 | " return pos in ['NOUN', 'VERB', 'ADJ']\n", 439 | "\n", 440 | "# Iterate over columns and drop those not meeting the criteria\n", 441 | "for col in df.columns:\n", 442 | " if not is_noun_verb_adj(col):\n", 443 | " df = df.drop(columns=[col])\n", 444 | " \n", 445 | "df.head()" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 5, 451 | "metadata": {}, 452 | "outputs": [ 453 | { 454 | "data": { 455 | "text/html": [ 456 | "
\n", 457 | "\n", 470 | "\n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | "
kill
014.0
10.0
26.0
34.0
44.0
......
121NaN
122NaN
123NaN
124NaN
125NaN
\n", 524 | "

126 rows × 1 columns

\n", 525 | "
" 526 | ], 527 | "text/plain": [ 528 | " kill\n", 529 | "0 14.0\n", 530 | "1 0.0\n", 531 | "2 6.0\n", 532 | "3 4.0\n", 533 | "4 4.0\n", 534 | ".. ...\n", 535 | "121 NaN\n", 536 | "122 NaN\n", 537 | "123 NaN\n", 538 | "124 NaN\n", 539 | "125 NaN\n", 540 | "\n", 541 | "[126 rows x 1 columns]" 542 | ] 543 | }, 544 | "execution_count": 5, 545 | "metadata": {}, 546 | "output_type": "execute_result" 547 | } 548 | ], 549 | "source": [ 550 | "#df.set_index('Artist Name', inplace=True)\n", 551 | "df[['kill']]" 552 | ] 553 | }, 554 | { 555 | "cell_type": "code", 556 | "execution_count": 6, 557 | "metadata": {}, 558 | "outputs": [ 559 | { 560 | "data": { 561 | "text/html": [ 562 | "
\n", 563 | "\n", 576 | "\n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | "
Artist Namegetitsniggaseecauseifbackaintwas...fameflocktimesnormalamwantedgodzillabreathlamarliveget
0Total87.077.072.069.061.061.059.059.058.0...5.05.05.05.05.05.05.05.05.05.0
1The Notorious B.I.G.10.020.028.015.015.017.08.08.07.0...0.00.00.00.00.00.00.00.00.00.0
2Tupac24.024.018.029.014.015.019.028.019.0...0.00.00.00.00.00.00.00.00.00.0
3Eminem39.026.00.06.026.014.023.08.015.0...5.01.03.05.02.05.05.05.00.00.0
4Kendrick Lamar14.07.026.019.06.015.09.015.017.0...0.04.02.00.03.00.00.00.05.05.0
\n", 726 | "

5 rows × 462 columns

\n", 727 | "
" 728 | ], 729 | "text/plain": [ 730 | " Artist Name get its nigga see cause if back aint \\\n", 731 | "0 Total 87.0 77.0 72.0 69.0 61.0 61.0 59.0 59.0 \n", 732 | "1 The Notorious B.I.G. 10.0 20.0 28.0 15.0 15.0 17.0 8.0 8.0 \n", 733 | "2 Tupac 24.0 24.0 18.0 29.0 14.0 15.0 19.0 28.0 \n", 734 | "3 Eminem 39.0 26.0 0.0 6.0 26.0 14.0 23.0 8.0 \n", 735 | "4 Kendrick Lamar 14.0 7.0 26.0 19.0 6.0 15.0 9.0 15.0 \n", 736 | "\n", 737 | " was ... fame flock times normal am wanted godzilla breath \\\n", 738 | "0 58.0 ... 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 \n", 739 | "1 7.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 740 | "2 19.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 741 | "3 15.0 ... 5.0 1.0 3.0 5.0 2.0 5.0 5.0 5.0 \n", 742 | "4 17.0 ... 0.0 4.0 2.0 0.0 3.0 0.0 0.0 0.0 \n", 743 | "\n", 744 | " lamar liveget \n", 745 | "0 5.0 5.0 \n", 746 | "1 0.0 0.0 \n", 747 | "2 0.0 0.0 \n", 748 | "3 0.0 0.0 \n", 749 | "4 5.0 5.0 \n", 750 | "\n", 751 | "[5 rows x 462 columns]" 752 | ] 753 | }, 754 | "execution_count": 6, 755 | "metadata": {}, 756 | "output_type": "execute_result" 757 | } 758 | ], 759 | "source": [ 760 | "csv = pd.read_csv('/home/lettuce/MyCode/pandasproject/rap_end.csv')\n", 761 | "dropper = ['the','you','i','dont','a','know','way','when','to','me','it','my','and','in','up','on','your','of','im','that','with','like','is','we','but','this','all','just','be','got','for','what','do','yeah','no','now','thats','they','never']\n", 762 | "csv_drop = csv.drop(dropper, axis=1)\n", 763 | "csv_drop.head()" 764 | ] 765 | }, 766 | { 767 | "cell_type": "code", 768 | "execution_count": 99, 769 | "metadata": {}, 770 | "outputs": [ 771 | { 772 | "name": "stdout", 773 | "output_type": "stream", 774 | "text": [ 775 | "15\n", 776 | "26\n", 777 | "24\n", 778 | "36\n", 779 | "21\n" 780 | ] 781 | }, 782 | { 783 | "data": { 784 | "text/plain": [ 785 | "122" 786 | ] 787 | }, 788 | "execution_count": 99, 789 | "metadata": {}, 790 | "output_type": "execute_result" 791 | } 792 | ], 793 | "source": [ 794 | "era = [2,2,3,4,2,2,2,2,3,4,4,3,3,3,1,4,2,4,3,4,4,5,3,4,4,2,4,4,4,3,4,4,4,2,4,4,4,3,4,3,4,4,4,4,3,4,2,4,3,4,1,3,2,2,2,1,4,2,2,2,3,2,2,2,5,3,2,4,2,3,4,3,4,1,2,2,3,5,4,4,5,3,2,5,1,3,5,3,2,2,5,5,4,1,1,1,3,3,3,4,4,5,5,5,1,1,1,1,1,5,5,5,5,5,5,5,5,5,5,4,1]\n", 795 | "y = 0\n", 796 | "era.insert(56,1)\n", 797 | "\n", 798 | "for x in range(1,6):\n", 799 | " print(era.count(x))\n", 800 | " y += era.count(x)\n", 801 | "y" 802 | ] 803 | }, 804 | { 805 | "cell_type": "code", 806 | "execution_count": 89, 807 | "metadata": {}, 808 | "outputs": [ 809 | { 810 | "data": { 811 | "text/plain": [ 812 | "['1990s',\n", 813 | " '1990s',\n", 814 | " '2000s',\n", 815 | " '2010s',\n", 816 | " '1990s',\n", 817 | " '1990s',\n", 818 | " '1990s',\n", 819 | " '1990s',\n", 820 | " '2000s',\n", 821 | " '2010s',\n", 822 | " '2010s',\n", 823 | " '2000s',\n", 824 | " '2000s',\n", 825 | " '2000s',\n", 826 | " '1980s',\n", 827 | " '2010s',\n", 828 | " '1990s',\n", 829 | " '2010s',\n", 830 | " '2000s',\n", 831 | " '2010s',\n", 832 | " '2010s',\n", 833 | " '2020s',\n", 834 | " '2000s',\n", 835 | " '2010s',\n", 836 | " '2010s',\n", 837 | " '1990s',\n", 838 | " '2010s',\n", 839 | " '2010s',\n", 840 | " '2010s',\n", 841 | " '2000s',\n", 842 | " '2010s',\n", 843 | " '2010s',\n", 844 | " '2010s',\n", 845 | " '1990s',\n", 846 | " '2010s',\n", 847 | " '2010s',\n", 848 | " '2010s',\n", 849 | " '2000s',\n", 850 | " '2010s',\n", 851 | " '2000s',\n", 852 | " '2010s',\n", 853 | " '2010s',\n", 854 | " '2010s',\n", 855 | " '2010s',\n", 856 | " '2000s',\n", 857 | " '2010s',\n", 858 | " '1990s',\n", 859 | " '2010s',\n", 860 | " '2000s',\n", 861 | " '2010s',\n", 862 | " '1980s',\n", 863 | " '2000s',\n", 864 | " '1990s',\n", 865 | " '1990s',\n", 866 | " '1990s',\n", 867 | " '1980s',\n", 868 | " '1980s',\n", 869 | " '2010s',\n", 870 | " '1990s',\n", 871 | " '1990s',\n", 872 | " '1990s',\n", 873 | " '2000s',\n", 874 | " '1990s',\n", 875 | " '1990s',\n", 876 | " '1990s',\n", 877 | " '2020s',\n", 878 | " '2000s',\n", 879 | " '1990s',\n", 880 | " '2010s',\n", 881 | " '1990s',\n", 882 | " '2000s',\n", 883 | " '2010s',\n", 884 | " '2000s',\n", 885 | " '2010s',\n", 886 | " '1980s',\n", 887 | " '1990s',\n", 888 | " '1990s',\n", 889 | " '2000s',\n", 890 | " '2020s',\n", 891 | " '2010s',\n", 892 | " '2010s',\n", 893 | " '2020s',\n", 894 | " '2000s',\n", 895 | " '1990s',\n", 896 | " '2020s',\n", 897 | " '1980s',\n", 898 | " '2000s',\n", 899 | " '2020s',\n", 900 | " '2000s',\n", 901 | " '1990s',\n", 902 | " '1990s',\n", 903 | " '2020s',\n", 904 | " '2020s',\n", 905 | " '2010s',\n", 906 | " '1980s',\n", 907 | " '1980s',\n", 908 | " '1980s',\n", 909 | " '2000s',\n", 910 | " '2000s',\n", 911 | " '2000s',\n", 912 | " '2010s',\n", 913 | " '2010s',\n", 914 | " '2020s',\n", 915 | " '2020s',\n", 916 | " '2020s',\n", 917 | " '1980s',\n", 918 | " '1980s',\n", 919 | " '1980s',\n", 920 | " '1980s',\n", 921 | " '1980s',\n", 922 | " '2020s',\n", 923 | " '2020s',\n", 924 | " '2020s',\n", 925 | " '2020s',\n", 926 | " '2020s',\n", 927 | " '2020s',\n", 928 | " '2020s',\n", 929 | " '2020s',\n", 930 | " '2020s',\n", 931 | " '2020s',\n", 932 | " '2010s']" 933 | ] 934 | }, 935 | "execution_count": 89, 936 | "metadata": {}, 937 | "output_type": "execute_result" 938 | } 939 | ], 940 | "source": [ 941 | "era_fix = []\n", 942 | "\n", 943 | "for x in range(0,121):\n", 944 | " if era[x] == 0:\n", 945 | " era_fix.append('0')\n", 946 | " if era[x] == 1:\n", 947 | " era_fix.append('1980s')\n", 948 | " elif era[x] == 2:\n", 949 | " era_fix.append('1990s')\n", 950 | " elif era[x] == 3:\n", 951 | " era_fix.append('2000s')\n", 952 | " elif era[x] == 4:\n", 953 | " era_fix.append('2010s')\n", 954 | " elif era[x] == 5:\n", 955 | " era_fix.append('2020s')\n", 956 | "era_fix" 957 | ] 958 | }, 959 | { 960 | "cell_type": "code", 961 | "execution_count": null, 962 | "metadata": {}, 963 | "outputs": [], 964 | "source": [] 965 | }, 966 | { 967 | "cell_type": "code", 968 | "execution_count": 90, 969 | "metadata": {}, 970 | "outputs": [], 971 | "source": [ 972 | "rap = pd.read_csv('/home/lettuce/MyCode/pandasproject/rap.csv')\n", 973 | "rap['Era'] = era_fix" 974 | ] 975 | }, 976 | { 977 | "cell_type": "code", 978 | "execution_count": 91, 979 | "metadata": {}, 980 | "outputs": [ 981 | { 982 | "data": { 983 | "text/html": [ 984 | "
\n", 985 | "\n", 998 | "\n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | "
Rap NameRap LikeEra
61Rick Ross51,444 votes2000s
62Twista34,152 votes1990s
63Big Pun28,002 votes1990s
64Raekwon24,683 votes1990s
65J.I.D23,725 votes2020s
66Xzibit44,070 votes2000s
67Scarface33,305 votes1990s
68Lil Dicky5,383 votes2010s
69Will Smith26,314 votes1990s
70Mos Def34,456 votes2000s
71Hopsin27,285 votes2010s
72Big Boi32,577 votes2000s
73G-Eazy61,228 votes2010s
74Slick Rick29,938 votes1980s
75GZA22,168 votes1990s
76Q-Tip23,304 votes1990s
77Prodigy35,486 votes2000s
78Post Malone26,074 votes2020s
79Young Thug24,176 votes2010s
80NF17,992 votes2010s
81Lil Baby13,537 votes2020s
82T-Pain15,293 votes2000s
83Coolio14,119 votes1990s
84Polo G9,695 votes2020s
85MC Ren9,646 votes1980s
86Lupe Fiasco17,836 votes2000s
87Quavo9,582 votes2020s
88Common17,586 votes2000s
89E-4031,916 votes1990s
90Fat Joe19,617 votes1990s
91Roddy Ricch47,404 votes2020s
92DaBaby20,355 votes2020s
93YG14,664 votes2010s
94Grandmaster Flash7,459 votes1980s
95KRS-One10,139 votes1980s
96Big Daddy Kane16,006 votes1980s
97Pharrell Williams15,556 votes2000s
98Juicy J50,620 votes2000s
99Kurupt28,500 votes2000s
100Vince Staples16,261 votes2010s
101Nicki Minaj19,098 votes2010s
102A Boogie wit da Hoodie10,280 votes2020s
103Playboi Carti4,784 votes2020s
104NLE Choppa5,818 votes2020s
105N.W.ANaN1980s
106Public EnemyNaN1980s
107Kurtis Blow\"\"1980s
108De La Soul\"\"1980s
109Chuck D\"\"1980s
110Doja Cat\"\"2020s
111Jack Harlow\"\"2020s
112lil nas x\"\"2020s
113lil Durk\"\"2020s
114Baby Keem\"\"2020s
115JPEGMAFIA\"\"2020s
116Death Grips\"\"2020s
117BROCKHAMPTON\"\"2020s
118JIDNaN2020s
119Denzel Curry\"\"2020s
120Isaiah Rashad\"\"2010s
\n", 1370 | "
" 1371 | ], 1372 | "text/plain": [ 1373 | " Rap Name Rap Like Era\n", 1374 | "61 Rick Ross 51,444 votes 2000s\n", 1375 | "62 Twista 34,152 votes 1990s\n", 1376 | "63 Big Pun 28,002 votes 1990s\n", 1377 | "64 Raekwon 24,683 votes 1990s\n", 1378 | "65 J.I.D 23,725 votes 2020s\n", 1379 | "66 Xzibit 44,070 votes 2000s\n", 1380 | "67 Scarface 33,305 votes 1990s\n", 1381 | "68 Lil Dicky 5,383 votes 2010s\n", 1382 | "69 Will Smith 26,314 votes 1990s\n", 1383 | "70 Mos Def 34,456 votes 2000s\n", 1384 | "71 Hopsin 27,285 votes 2010s\n", 1385 | "72 Big Boi 32,577 votes 2000s\n", 1386 | "73 G-Eazy 61,228 votes 2010s\n", 1387 | "74 Slick Rick 29,938 votes 1980s\n", 1388 | "75 GZA 22,168 votes 1990s\n", 1389 | "76 Q-Tip 23,304 votes 1990s\n", 1390 | "77 Prodigy 35,486 votes 2000s\n", 1391 | "78 Post Malone 26,074 votes 2020s\n", 1392 | "79 Young Thug 24,176 votes 2010s\n", 1393 | "80 NF 17,992 votes 2010s\n", 1394 | "81 Lil Baby 13,537 votes 2020s\n", 1395 | "82 T-Pain 15,293 votes 2000s\n", 1396 | "83 Coolio 14,119 votes 1990s\n", 1397 | "84 Polo G 9,695 votes 2020s\n", 1398 | "85 MC Ren 9,646 votes 1980s\n", 1399 | "86 Lupe Fiasco 17,836 votes 2000s\n", 1400 | "87 Quavo 9,582 votes 2020s\n", 1401 | "88 Common 17,586 votes 2000s\n", 1402 | "89 E-40 31,916 votes 1990s\n", 1403 | "90 Fat Joe 19,617 votes 1990s\n", 1404 | "91 Roddy Ricch 47,404 votes 2020s\n", 1405 | "92 DaBaby 20,355 votes 2020s\n", 1406 | "93 YG 14,664 votes 2010s\n", 1407 | "94 Grandmaster Flash 7,459 votes 1980s\n", 1408 | "95 KRS-One 10,139 votes 1980s\n", 1409 | "96 Big Daddy Kane 16,006 votes 1980s\n", 1410 | "97 Pharrell Williams 15,556 votes 2000s\n", 1411 | "98 Juicy J 50,620 votes 2000s\n", 1412 | "99 Kurupt 28,500 votes 2000s\n", 1413 | "100 Vince Staples 16,261 votes 2010s\n", 1414 | "101 Nicki Minaj 19,098 votes 2010s\n", 1415 | "102 A Boogie wit da Hoodie 10,280 votes 2020s\n", 1416 | "103 Playboi Carti 4,784 votes 2020s\n", 1417 | "104 NLE Choppa 5,818 votes 2020s\n", 1418 | "105 N.W.A NaN 1980s\n", 1419 | "106 Public Enemy NaN 1980s\n", 1420 | "107 Kurtis Blow \"\" 1980s\n", 1421 | "108 De La Soul \"\" 1980s\n", 1422 | "109 Chuck D \"\" 1980s\n", 1423 | "110 Doja Cat \"\" 2020s\n", 1424 | "111 Jack Harlow \"\" 2020s\n", 1425 | "112 lil nas x \"\" 2020s\n", 1426 | "113 lil Durk \"\" 2020s\n", 1427 | "114 Baby Keem \"\" 2020s\n", 1428 | "115 JPEGMAFIA \"\" 2020s\n", 1429 | "116 Death Grips \"\" 2020s\n", 1430 | "117 BROCKHAMPTON \"\" 2020s\n", 1431 | "118 JID NaN 2020s\n", 1432 | "119 Denzel Curry \"\" 2020s\n", 1433 | "120 Isaiah Rashad \"\" 2010s" 1434 | ] 1435 | }, 1436 | "execution_count": 91, 1437 | "metadata": {}, 1438 | "output_type": "execute_result" 1439 | } 1440 | ], 1441 | "source": [ 1442 | "rap.tail(60)" 1443 | ] 1444 | }, 1445 | { 1446 | "cell_type": "code", 1447 | "execution_count": 97, 1448 | "metadata": {}, 1449 | "outputs": [], 1450 | "source": [ 1451 | "#remove Rap Like column from df\n", 1452 | "#rap = rap.drop(['Rap Like'], axis=1)\n", 1453 | "#rap.head()\n", 1454 | "\n", 1455 | "#write to csv\n", 1456 | "rap.to_csv('/home/lettuce/MyCode/pandasproject/rap_hold.csv', index=False)" 1457 | ] 1458 | } 1459 | ], 1460 | "metadata": { 1461 | "kernelspec": { 1462 | "display_name": "Python 3", 1463 | "language": "python", 1464 | "name": "python3" 1465 | }, 1466 | "language_info": { 1467 | "codemirror_mode": { 1468 | "name": "ipython", 1469 | "version": 3 1470 | }, 1471 | "file_extension": ".py", 1472 | "mimetype": "text/x-python", 1473 | "name": "python", 1474 | "nbconvert_exporter": "python", 1475 | "pygments_lexer": "ipython3", 1476 | "version": "3.10.6" 1477 | }, 1478 | "orig_nbformat": 4 1479 | }, 1480 | "nbformat": 4, 1481 | "nbformat_minor": 2 1482 | } 1483 | --------------------------------------------------------------------------------