├── README.md └── Python_project ├── Python.ca2 (1).pdf └── pythonproject.txt /README.md: -------------------------------------------------------------------------------- 1 | # python-project -------------------------------------------------------------------------------- /Python_project/Python.ca2 (1).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rahulvarma9676/python-project/HEAD/Python_project/Python.ca2 (1).pdf -------------------------------------------------------------------------------- /Python_project/pythonproject.txt: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from scipy.stats import zscore 6 | 7 | # Load dataset 8 | df = pd.read_csv("C:\\Users\\rahul\\Downloads\\serious-injury-outcome-indicators-2000-2023.csv") 9 | 10 | # Info about the dataset 11 | print("Dataset Info:") 12 | print(df.info()) 13 | print("\nFirst 5 Rows:") 14 | print(df.head()) 15 | 16 | # Check for missing values 17 | print("\nMissing Values:\n", df.isnull().sum()) 18 | 19 | # ------------------------------- 20 | # Statistical Summary 21 | numeric_cols = ['Data_value', 'Lower_CI', 'Upper_CI'] 22 | print("\nStatistical Summary:\n", df[numeric_cols].describe()) 23 | 24 | # ------------------------------- 25 | # Outlier Detection using Z-Score 26 | z_scores = np.abs(zscore(df[numeric_cols])) 27 | outliers = (z_scores > 3) 28 | print("\nOutliers (Z-Score > 3):\n", outliers.sum()) 29 | 30 | # ------------------------------- 31 | # Correlation Heatmap 32 | plt.figure(figsize=(8, 6)) 33 | sns.heatmap(df[numeric_cols].corr(), annot=True, cmap='coolwarm') 34 | plt.title('Correlation Heatmap') 35 | plt.show() 36 | 37 | # ------------------------------- 38 | # Boxplot for Outliers 39 | plt.figure(figsize=(8, 6)) 40 | sns.boxplot(data=df[numeric_cols]) 41 | plt.title('Boxplot - Outliers') 42 | plt.show() 43 | 44 | # ------------------------------- 45 | # Line Chart of Data over Time 46 | df['Period'] = pd.to_datetime(df['Period'], errors='coerce') # Convert to datetime 47 | df_sorted = df.sort_values(by='Period') 48 | plt.figure(figsize=(12, 6)) 49 | plt.plot(df_sorted['Period'], df_sorted['Data_value']) 50 | plt.title('Line Chart - Data Value over Time') 51 | plt.xlabel('Period') 52 | plt.ylabel('Data Value') 53 | plt.xticks(rotation=45) 54 | plt.tight_layout() 55 | plt.show() 56 | 57 | # ------------------------------- 58 | # Bar Plot - Data by Severity 59 | plt.figure(figsize=(8, 6)) 60 | sns.barplot(x='Severity', y='Data_value', data=df) 61 | plt.title('Bar Plot - Data Value by Severity') 62 | plt.show() 63 | 64 | # ------------------------------- 65 | # Column Chart - Top 10 Causes 66 | plt.figure(figsize=(10, 6)) 67 | top_causes = df.groupby('Cause')['Data_value'].mean().sort_values(ascending=False).head(10) 68 | top_causes.plot(kind='bar', color='skyblue') 69 | plt.title('Top 10 Causes by Average Data Value') 70 | plt.ylabel('Average Data Value') 71 | plt.xticks(rotation=45) 72 | plt.tight_layout() 73 | plt.show() 74 | 75 | # ------------------------------- 76 | # Scatter Plot - CI bounds 77 | plt.figure(figsize=(8, 6)) 78 | sns.scatterplot(data=df, x='Lower_CI', y='Upper_CI', hue='Severity') 79 | plt.title('Scatter Plot - Confidence Intervals by Severity') 80 | plt.show() 81 | 82 | # ------------------------------- 83 | # Pair Plot of Numeric Columns 84 | sns.pairplot(df[numeric_cols]) 85 | plt.suptitle('Pair Plot of Numeric Variables', y=1.02) 86 | plt.show() 87 | 88 | # Info about the dataset 89 | print("Dataset Info:") 90 | print(df.info()) 91 | print("\nFirst 5 Rows:") 92 | print(df.head()) 93 | 94 | # Check for missing values 95 | print("\nMissing Values:\n", df.isnull().sum()) 96 | 97 | # ------------------------------- 98 | # Statistical Summary 99 | numeric_cols = ['Data_value', 'Lower_CI', 'Upper_CI'] 100 | print("\nStatistical Summary:\n", df[numeric_cols].describe()) 101 | 102 | # ------------------------------- 103 | # Outlier Detection using Z-Score 104 | z_scores = np.abs(zscore(df[numeric_cols])) 105 | outliers = (z_scores > 3) 106 | print("\nOutliers (Z-Score > 3):\n", outliers.sum()) 107 | 108 | # ------------------------------- 109 | # Correlation Heatmap 110 | plt.figure(figsize=(8, 6)) 111 | sns.heatmap(df[numeric_cols].corr(), annot=True, cmap='coolwarm') 112 | plt.title('Correlation Heatmap') 113 | plt.show() 114 | 115 | # ------------------------------- 116 | # Boxplot for Outliers 117 | plt.figure(figsize=(8, 6)) 118 | sns.boxplot(data=df[numeric_cols]) 119 | plt.title('Boxplot - Outliers') 120 | plt.show() 121 | 122 | # ------------------------------- 123 | # Line Chart of Data over Time 124 | df['Period'] = pd.to_datetime(df['Period'], errors='coerce') # Convert to datetime 125 | df_sorted = df.sort_values(by='Period') 126 | plt.figure(figsize=(12, 6)) 127 | plt.plot(df_sorted['Period'], df_sorted['Data_value']) 128 | plt.title('Line Chart - Data Value over Time') 129 | plt.xlabel('Period') 130 | plt.ylabel('Data Value') 131 | plt.xticks(rotation=45) 132 | plt.tight_layout() 133 | plt.show() 134 | 135 | # ------------------------------- 136 | # Bar Plot - Data by Severity 137 | plt.figure(figsize=(8, 6)) 138 | sns.barplot(x='Severity', y='Data_value', data=df) 139 | plt.title('Bar Plot - Data Value by Severity') 140 | plt.show() 141 | 142 | # ------------------------------- 143 | # Column Chart - Top 10 Causes 144 | plt.figure(figsize=(10, 6)) 145 | top_causes = df.groupby('Cause')['Data_value'].mean().sort_values(ascending=False).head(10) 146 | top_causes.plot(kind='bar', color='skyblue') 147 | plt.title('Top 10 Causes by Average Data Value') 148 | plt.ylabel('Average Data Value') 149 | plt.xticks(rotation=45) 150 | plt.tight_layout() 151 | plt.show() 152 | 153 | # ------------------------------- 154 | # Scatter Plot - CI bounds 155 | plt.figure(figsize=(8, 6)) 156 | sns.scatterplot(data=df, x='Lower_CI', y='Upper_CI', hue='Severity') 157 | plt.title('Scatter Plot - Confidence Intervals by Severity') 158 | plt.show() 159 | 160 | # ------------------------------- 161 | # Pair Plot of Numeric Columns 162 | sns.pairplot(df[numeric_cols]) 163 | plt.suptitle('Pair Plot of Numeric Variables', y=1.02) 164 | plt.show() --------------------------------------------------------------------------------