├── README.md ├── final report python.pdf ├── Area_Weighted_Monthly_Seasonal_And_Annual_Rainfall_0.xls └── secondcode.py /README.md: -------------------------------------------------------------------------------- 1 | # python -------------------------------------------------------------------------------- /final report python.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamaljit9/python/HEAD/final report python.pdf -------------------------------------------------------------------------------- /Area_Weighted_Monthly_Seasonal_And_Annual_Rainfall_0.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamaljit9/python/HEAD/Area_Weighted_Monthly_Seasonal_And_Annual_Rainfall_0.xls -------------------------------------------------------------------------------- /secondcode.py: -------------------------------------------------------------------------------- 1 | # 1. Import Libraries 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | 6 | # 2. Load Dataset 7 | df = pd.read_csv("C:/Users/Surji/OneDrive/Desktop/ca2 project/new python .csv") 8 | print("Dataset Head:\n", df.head()) 9 | 10 | # 3. Dataset Info 11 | print("\nINFO:\n") 12 | print(df.info()) 13 | 14 | # 4. Check Missing Values 15 | print("\nMissing Values:\n", df.isnull().sum()) 16 | df_cleaned = df.fillna(0) 17 | print(df_cleaned) 18 | 19 | # 5. Summary Statistics 20 | print("\nSummary Statistics:\n", df.describe()) 21 | 22 | # 6. Dataset Shape and Columns 23 | print("\nDataset Shape:", df.shape) 24 | print("Columns:", df.columns.tolist()) 25 | 26 | # 7. Unique Years/States 27 | print("\nUnique Years:", df['YEAR'].unique()) 28 | print("Unique Subdivisions:", df['SD_Name'].unique()) 29 | 30 | # 8. Group-wise Annual Rainfall 31 | annual_rainfall = df.groupby('YEAR')['ANNUAL'].sum() 32 | print("\nTotal Annual Rainfall by Year:\n", annual_rainfall.head()) 33 | 34 | # 9. Correlation Matrix 35 | numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns 36 | corr = df[numerical_cols].corr() 37 | print("\nCorrelation Matrix:\n", corr) 38 | 39 | # 10. Heatmap 40 | plt.figure(figsize=(10, 6)) 41 | sns.heatmap(corr, annot=True, cmap='coolwarm') 42 | plt.title("Correlation Heatmap of Rainfall Features") 43 | plt.tight_layout() 44 | plt.show() 45 | 46 | # 11. Line Plot: Annual Rainfall over Years 47 | plt.figure(figsize=(10, 6)) 48 | annual_rainfall.plot(marker='o', color='blue') 49 | plt.title("Total Annual Rainfall Over Years") 50 | plt.xlabel("Year") 51 | plt.ylabel("Total Rainfall (mm)") 52 | plt.grid(True) 53 | plt.tight_layout() 54 | plt.show() 55 | 56 | # 12. Subdivision-wise Average Rainfall 57 | avg_by_region = df.groupby('SD_Name')['ANNUAL'].mean().sort_values(ascending=False).reset_index() 58 | print("\nAverage Rainfall by Subdivision:\n", avg_by_region) 59 | 60 | # Bar Plot 61 | plt.figure(figsize=(12, 8)) 62 | sns.barplot(data=avg_by_region, x='ANNUAL', y='SD_Name', hue='SD_Name', palette='Set3', legend=False) 63 | plt.title("Average Annual Rainfall by Subdivision") 64 | plt.xlabel("Average Rainfall (mm)") 65 | plt.ylabel("Subdivision") 66 | plt.tight_layout() 67 | plt.show() 68 | 69 | # 13. Rainfall in a Selected State Over Time (e.g., PUNJAB) 70 | punjab_data = df[df['SD_Name'].str.upper().str.contains("PUNJAB")] 71 | plt.figure(figsize=(10, 5)) 72 | plt.plot(punjab_data['YEAR'], punjab_data['ANNUAL'], marker='o', linestyle='-', color='green') 73 | plt.title("Annual Rainfall in Punjab Over Time") 74 | plt.xlabel("Year") 75 | plt.ylabel("Annual Rainfall (mm)") 76 | plt.grid(True) 77 | plt.tight_layout() 78 | plt.show() 79 | 80 | # 14. Monthly Rainfall Distribution (Boxplot) 81 | monthly_cols = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] 82 | monthly_df = df[monthly_cols] 83 | 84 | plt.figure(figsize=(12, 6)) 85 | sns.boxplot(data=monthly_df, palette='Set2') 86 | plt.title("Monthly Rainfall Distribution Across India") 87 | plt.xlabel("Month") 88 | plt.ylabel("Rainfall (mm)") 89 | plt.tight_layout() 90 | plt.show() 91 | 92 | # 15. Seasonal Rainfall Comparison 93 | seasonal_cols = ['JAN-FEB', 'Mar-May', 'Jun-Sep', 'Oct-Dec'] 94 | seasonal_means = df[seasonal_cols].mean() 95 | print("\nSeasonal Rainfall Mean (All India):\n", seasonal_means) 96 | 97 | # Pie Chart 98 | plt.figure(figsize=(7, 7)) 99 | colors = sns.color_palette("Set3") 100 | plt.pie(seasonal_means, labels=seasonal_means.index, autopct='%1.1f%%', colors=colors) 101 | plt.title("Seasonal Contribution to Total Rainfall") 102 | plt.tight_layout() 103 | plt.show() 104 | 105 | # 16. Scatter Plot: JUN-SEP vs ANNUAL 106 | plt.figure(figsize=(8, 6)) 107 | sns.scatterplot(data=df, x='Jun-Sep', y='ANNUAL', color='purple', alpha=0.6) 108 | plt.title("JUN-SEP Rainfall vs Annual Rainfall") 109 | plt.xlabel("JUN-SEP Rainfall (mm)") 110 | plt.ylabel("Annual Rainfall (mm)") 111 | plt.grid(True) 112 | plt.tight_layout() 113 | plt.show() 114 | 115 | # 17. Regression Plot: JUN-SEP vs ANNUAL 116 | plt.figure(figsize=(8, 6)) 117 | sns.regplot(data=df, x='Jun-Sep', y='ANNUAL', color='darkblue', alpha=0.4) 118 | plt.title("Regression: JUN-SEP vs Annual Rainfall") 119 | plt.xlabel("JUN-SEP Rainfall (mm)") 120 | plt.ylabel("Annual Rainfall (mm)") 121 | plt.tight_layout() 122 | plt.show() 123 | 124 | # 18. Regression Plot: MAR-MAY vs ANNUAL 125 | plt.figure(figsize=(8, 6)) 126 | sns.regplot(data=df, x='Mar-May', y='ANNUAL', color='orange', scatter_kws={"alpha":0.5}) 127 | plt.title("Regression: MAR-MAY vs Annual Rainfall") 128 | plt.xlabel("MAR-MAY Rainfall (mm)") 129 | plt.ylabel("Annual Rainfall (mm)") 130 | plt.tight_layout() 131 | plt.show() 132 | 133 | # 19. Scatter: Year vs Annual Rainfall for All India 134 | plt.figure(figsize=(10, 6)) 135 | sns.scatterplot(data=df, x='YEAR', y='ANNUAL', alpha=0.6) 136 | plt.title("Annual Rainfall Trend Across India") 137 | plt.xlabel("Year") 138 | plt.ylabel("Annual Rainfall (mm)") 139 | plt.grid(True) 140 | plt.tight_layout() 141 | plt.show() 142 | 143 | # 20. Violin Plot: Monthly Rainfall Distribution 144 | # Reshape monthly columns to long format 145 | monthly_df = df[monthly_cols].melt(var_name='Month', value_name='Rainfall') 146 | 147 | # Violin Plot 148 | plt.figure(figsize=(12, 6)) 149 | sns.violinplot(x='Month', y='Rainfall', data=monthly_df, palette='Pastel2') 150 | plt.title("Violin Plot of Monthly Rainfall Distribution Across India") 151 | plt.xlabel("Month") 152 | plt.ylabel("Rainfall (mm)") 153 | plt.tight_layout() 154 | plt.show() 155 | 156 | --------------------------------------------------------------------------------