└── README.md


/README.md:
--------------------------------------------------------------------------------
 1 | # python project 
 2 | import pandas as pd
 3 | import numpy as np
 4 | import seaborn as sns
 5 | import matplotlib.pyplot as plt
 6 | from sklearn.model_selection import train_test_split
 7 | from sklearn.preprocessing import StandardScaler, LabelEncoder
 8 | from sklearn.ensemble import RandomForestRegressor
 9 | from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score 
10 | file_path "dataset_spotify.csv"
11 | df pd.read_csv(file_path)
12 | df.head()
13 | df df.drop(columns=["Unnamed: 0", "track_id", "album_name"])
14 | columns_to_drop ["track_name", "artists"]
15 | df.drop(columns=columns_to_drop, inplace=True)
16 | df.head()
17 | print("Missing values per column:\n", df.isna().sum())
18 | track_genre
19 | dtype: int64
20 | 
21 | # Distribution of popularity
22 | plt.figure(figsize=(8, 6))
23 | sns.histplot(df['popularity'], bins 30, color='skyblue')
24 | plt.title("Distribution of Song Popularity")
25 | plt.xlabel("Popularity")
26 | plt.ylabel("Frequency")
27 | plt.show()
28 | 
29 | # Popularity vs Danceability
30 | plt.figure(figsize=(8, 6))
31 | sns.scatterplot(x='danceability', y='popularity', data=df, hue='explicit', alpha=0.7)
32 | plt.title("Danceability vs Popularity")
33 | plt.xlabel("Danceability")
34 | plt.ylabel("Popularity")
35 | plt.legend(title="Explicit")
36 | plt.show()
37 | 
38 | #Boxplot of Popularity by Genre
39 | plt.figure(figsize=(12, 6))
40 | sns.boxplot(x='track_genre', y='popularity', data=df)
41 | plt.title("Popularity by Genre")
42 | plt.xlabel("Track Genre (Encoded)")
43 | plt.ylabel("Popularity")
44 | plt.xticks(rotation=90)
45 | plt.show()
46 | 
47 | # Pie chart of explicit vs. non-explicit songs
48 | explicit_counts df['explicit'].value_counts()
49 | labels ['Non-Explicit', 'Explicit']
50 | colors ['#66b3ff', '#ff9999']
51 | plt.figure(figsize=(6, 6))
52 | plt.pie(explicit_counts, labels labels, autopct='%1.1f%%', startangle=140, colors colors)
53 | plt.title("Distribution of Explicit vs Non-Explicit Songs")
54 | plt.axis('equal')
55 | plt.show()
56 | plt.figure(figsize=(8, 6))
57 | 
58 | sns.scatterplot(x= 'acousticness', y='loudness', data df, hue='explicit', alpha=0.6)
59 | plt.title("Acousticness vs Loudness")
60 | plt.xlabel("Acousticness")
61 | plt.ylabel("Loudness (dB)")
62 | plt.legend(title="Explicit")
63 | plt.show()
64 | plt.figure(figsize=(14,6))
65 | 
66 | popularity'].mean().sort_values(ascending=False)
67 | 
68 | genre_popularity df.groupby('track_genre') [' sns.barplot(x=genre popularity.index, y=genre popularity.values, palette='viridis')
69 | plt.title("Average Popularity by Genre")
70 | 
71 | plt.xlabel("Track Genre")
72 | plt.ylabel("Average Popularity")
73 | plt.xticks(rotation-90)
74 | plt.show()
75 | sns.barplot(x-genre_popularity.index, y-genre_popularity.values, palette='viridis')
76 | plt.figure(figsize-(12, 8))
77 | 
78 | numeric_df df.select_dtypes (include-[np.number]) sns.heatmap(numeric_df.corr(), annot True, cmap-"coolwarm", fnt=".2f")
79 | 
80 | plt.title("Feature Correlation Heatmap")
81 | 
82 | plt.show()


--------------------------------------------------------------------------------