└── README.md /README.md: -------------------------------------------------------------------------------- 1 | # python project 2 | import pandas as pd 3 | import numpy as np 4 | import seaborn as sns 5 | import matplotlib.pyplot as plt 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.preprocessing import StandardScaler, LabelEncoder 8 | from sklearn.ensemble import RandomForestRegressor 9 | from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score 10 | file_path "dataset_spotify.csv" 11 | df pd.read_csv(file_path) 12 | df.head() 13 | df df.drop(columns=["Unnamed: 0", "track_id", "album_name"]) 14 | columns_to_drop ["track_name", "artists"] 15 | df.drop(columns=columns_to_drop, inplace=True) 16 | df.head() 17 | print("Missing values per column:\n", df.isna().sum()) 18 | track_genre 19 | dtype: int64 20 | 21 | # Distribution of popularity 22 | plt.figure(figsize=(8, 6)) 23 | sns.histplot(df['popularity'], bins 30, color='skyblue') 24 | plt.title("Distribution of Song Popularity") 25 | plt.xlabel("Popularity") 26 | plt.ylabel("Frequency") 27 | plt.show() 28 | 29 | # Popularity vs Danceability 30 | plt.figure(figsize=(8, 6)) 31 | sns.scatterplot(x='danceability', y='popularity', data=df, hue='explicit', alpha=0.7) 32 | plt.title("Danceability vs Popularity") 33 | plt.xlabel("Danceability") 34 | plt.ylabel("Popularity") 35 | plt.legend(title="Explicit") 36 | plt.show() 37 | 38 | #Boxplot of Popularity by Genre 39 | plt.figure(figsize=(12, 6)) 40 | sns.boxplot(x='track_genre', y='popularity', data=df) 41 | plt.title("Popularity by Genre") 42 | plt.xlabel("Track Genre (Encoded)") 43 | plt.ylabel("Popularity") 44 | plt.xticks(rotation=90) 45 | plt.show() 46 | 47 | # Pie chart of explicit vs. non-explicit songs 48 | explicit_counts df['explicit'].value_counts() 49 | labels ['Non-Explicit', 'Explicit'] 50 | colors ['#66b3ff', '#ff9999'] 51 | plt.figure(figsize=(6, 6)) 52 | plt.pie(explicit_counts, labels labels, autopct='%1.1f%%', startangle=140, colors colors) 53 | plt.title("Distribution of Explicit vs Non-Explicit Songs") 54 | plt.axis('equal') 55 | plt.show() 56 | plt.figure(figsize=(8, 6)) 57 | 58 | sns.scatterplot(x= 'acousticness', y='loudness', data df, hue='explicit', alpha=0.6) 59 | plt.title("Acousticness vs Loudness") 60 | plt.xlabel("Acousticness") 61 | plt.ylabel("Loudness (dB)") 62 | plt.legend(title="Explicit") 63 | plt.show() 64 | plt.figure(figsize=(14,6)) 65 | 66 | popularity'].mean().sort_values(ascending=False) 67 | 68 | genre_popularity df.groupby('track_genre') [' sns.barplot(x=genre popularity.index, y=genre popularity.values, palette='viridis') 69 | plt.title("Average Popularity by Genre") 70 | 71 | plt.xlabel("Track Genre") 72 | plt.ylabel("Average Popularity") 73 | plt.xticks(rotation-90) 74 | plt.show() 75 | sns.barplot(x-genre_popularity.index, y-genre_popularity.values, palette='viridis') 76 | plt.figure(figsize-(12, 8)) 77 | 78 | numeric_df df.select_dtypes (include-[np.number]) sns.heatmap(numeric_df.corr(), annot True, cmap-"coolwarm", fnt=".2f") 79 | 80 | plt.title("Feature Correlation Heatmap") 81 | 82 | plt.show() --------------------------------------------------------------------------------