└── Agricultural Land use in india /Agricultural Land use in india: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | import warnings 6 | warnings.filterwarnings("ignore") 7 | 8 | # Load dataset 9 | df = pd.read_csv(r"C:\Users\Asus\OneDrive\Desktop\ProjectData.csv") 10 | print(df.info()) 11 | print(df.describe()) 12 | 13 | # Check for missing values 14 | print("\nMissing values in each column:\n", df.isna().sum()) #checking per col. 15 | 16 | # Check column names 17 | print("Columns:", df.columns.tolist()) 18 | 19 | # Chart 1: Histogram - Net Area Sown 20 | plt.figure(figsize=(8, 5)) 21 | sns.histplot(df['Net area sown'], bins=25, kde=True, color='skyblue') 22 | plt.title('Distribution of Net Area Sown') 23 | plt.xlabel('Net Area Sown') 24 | plt.ylabel('Count') 25 | plt.tight_layout() 26 | plt.show() 27 | 28 | # Chart 2: Bar Plot - Mean Net Area Cultivated by Category of Holdings 29 | plt.figure(figsize=(7, 5)) 30 | mean_values = df.groupby('Category of holdings')['Net area cultivated'].mean().sort_values(ascending=False) 31 | sns.barplot(x=mean_values.index, y=mean_values.values, palette='viridis') 32 | plt.title('Average Net Area Cultivated by Category of Holdings') 33 | plt.ylabel('Mean Net Area Cultivated') 34 | plt.xticks(rotation=45) 35 | plt.tight_layout() 36 | plt.show() 37 | 38 | # Chart 3: Barplot - Mean Net Area Sown by State 39 | state_avg = df.groupby('srcStateName')['Net area sown'].mean().sort_values(ascending=False).head(10) 40 | plt.figure(figsize=(10, 6)) 41 | sns.barplot(x=state_avg.values, y=state_avg.index, palette='viridis') 42 | plt.title('Top 10 States by Mean Net Area Sown') 43 | plt.xlabel('Mean Net Area Sown') 44 | plt.ylabel('State') 45 | plt.tight_layout() 46 | plt.show() 47 | 48 | # Chart 4: Pie Chart - Distribution of Category of Holdings 49 | cat_counts = df['Category of holdings'].value_counts() 50 | plt.figure(figsize=(6, 6)) 51 | plt.pie(cat_counts, labels=cat_counts.index, autopct='%1.1f%%', startangle=140) 52 | plt.title('Distribution of Land Holdings Categories') 53 | plt.tight_layout() 54 | plt.show() 55 | 56 | 57 | 58 | # Chart 5: Correlation Heatmap 59 | num_cols = ['Net area sown', 'Area under current fallows', 'Net area cultivated', 'Uncultivated area '] 60 | corr = df[num_cols].corr() 61 | plt.figure(figsize=(8, 6)) 62 | sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5, 63 | linecolor='gray', square=True, cbar_kws={'shrink': 0.5}) 64 | plt.title('Correlation Heatmap of Area Features', fontsize=14, fontweight='bold') 65 | plt.tight_layout() 66 | plt.show() 67 | 68 | 69 | #chart 6: Create a box plot for 'Value' grouped by 'Group' 70 | plt.figure(figsize=(8, 6)) 71 | sns.boxplot(x='Category of holdings', y='Social group type', data=df, palette='Set2') 72 | plt.title('Box Plot of Values by Group') 73 | plt.xlabel('Group') 74 | plt.ylabel('Value') 75 | plt.tight_layout() 76 | plt.show() 77 | 78 | 79 | #chart 7: Create the scatter plot 80 | plt.figure(figsize=(8, 6)) 81 | sns.scatterplot(data=df, x='Net area sown', y='Net area cultivated', hue='Category of holdings', palette='viridis') 82 | plt.title('Scatter Plot: Net Area Sown vs Net Area Cultivated') 83 | plt.xlabel('Net area sown') 84 | plt.ylabel('Net area cultivated') 85 | plt.tight_layout() 86 | plt.show() 87 | 88 | # --- Linear Regression ---- 89 | 90 | # Prepare features and target variable 91 | x = df['Net area sown'].values 92 | y = df['Net area cultivated'].values 93 | 94 | 95 | # Manual train-test split using pandas 96 | train = df.sample(frac=0.8, random_state=42) 97 | test = df.drop(train.index) 98 | x_train = train['Net area sown'].values 99 | y_train = train['Net area cultivated'].values 100 | x_test = test['Net area sown'].values 101 | y_test = test['Net area cultivated'].values 102 | 103 | # Fit linear regression using NumPy's polyfit on the training set 104 | slope, intercept = np.polyfit(x_train, y_train, 1) 105 | 106 | # Prediction example for Net area sown = 5000 107 | pred_5000 = slope * 5000 + intercept 108 | print("Predicted Net Area Cultivated for 5000 Net Area Sown:", pred_5000) 109 | 110 | 111 | 112 | # Plot regression line 113 | plt.figure(figsize=(8, 5)) 114 | plt.scatter(x, y, color="green", label="Data Points") 115 | plt.plot(x, slope * x + intercept, color='red', linewidth=3, label="Regression Line") 116 | plt.xlabel("Net area sown") 117 | plt.ylabel("Net area cultivated") 118 | plt.title("Linear Regression Fit") 119 | plt.legend() 120 | plt.tight_layout() 121 | plt.show() 122 | --------------------------------------------------------------------------------