├── Greenhouse_Gas_Emissions_Analysis_Presentation (1)-compressed.pdf ├── README.md ├── greenhouse-gas-emissions-industry-and-household-September-2024-quarter.csv ├── import pyt.py ├── python code └── report pyt.docx /Greenhouse_Gas_Emissions_Analysis_Presentation (1)-compressed.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yaseen7975/Yaseen-python-Project-/a8d55cccb8ba51f50c3e46b692d89008cfdae87d/Greenhouse_Gas_Emissions_Analysis_Presentation (1)-compressed.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Greenhouse gas emmision 2 | -------------------------------------------------------------------------------- /import pyt.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from scipy.stats import zscore 5 | 6 | # Load dataset 7 | df = pd.read_csv("C:\\Users\\moham\\Downloads\\greenhouse-gas-emissions-industry-and-household-September-2024-quarter.csv") 8 | 9 | # Info and head 10 | print(df.info()) 11 | print(df.head()) 12 | print(df.describe()) 13 | print(df.nunique()) 14 | 15 | # Missing values 16 | print(df.isnull().sum()) 17 | 18 | # Plot style 19 | sns.set(style="whitegrid") 20 | plt.rcParams["figure.figsize"] = (12, 6) 21 | 22 | # Get the latest period 23 | latest = df['Period'].max() 24 | 25 | # Filter data for the latest period 26 | latest_df = df[df['Period'] == latest] 27 | 28 | # Group and sort emissions by sector 29 | grouped = latest_df.groupby('Anzsic_descriptor')['Data_value'].sum() 30 | top10 = grouped.sort_values(ascending=False).head(10) 31 | 32 | # Create basic vertical bar graph 33 | plt.bar(range(len(top10)), top10.values, color='green') 34 | plt.xticks(range(len(top10)), top10.index, rotation=45) 35 | plt.title('Top 10 Emitting Sectors') 36 | plt.xlabel('Sector') 37 | plt.ylabel('Emissions (Kilotonnes)') 38 | plt.tight_layout() 39 | plt.show() 40 | #line graph 41 | df_households = df[df['Anzsic_descriptor'] == 'Households'] 42 | 43 | # Group and sum emissions by period 44 | g = df_households.groupby('Period')['Data_value'].sum() 45 | 46 | # Create the line plot with markers 47 | plt.plot(g.index, g.values, marker='o') 48 | 49 | plt.title('Emission Trend - Households') 50 | plt.xlabel('Period') 51 | plt.ylabel('Emissions (Kilotonnes)') 52 | plt.xticks(rotation=45) 53 | plt.tight_layout() 54 | plt.show() 55 | 56 | #Pie Chart - Top 5 sectors 57 | latest = df['Period'].max() 58 | 59 | # Filter data for the latest period 60 | latest_df = df[df['Period'] == latest] 61 | 62 | # Group, sum, and get top 5 contributors 63 | grouped5 = latest_df.groupby('Anzsic_descriptor')['Data_value'].sum().sort_values(ascending=False).head(5) 64 | 65 | # Plot basic pie chart 66 | plt.pie(grouped5, labels=grouped5.index, autopct='%1.1f%%', startangle=100) 67 | plt.title('Top 5 Emission Contributors') 68 | plt.tight_layout() 69 | plt.show() 70 | 71 | # 4. Emission Intensity (Scatter Plot) 72 | df_sector = df[df['Anzsic_descriptor'] == 'Households'] 73 | 74 | # Group by Period and sum the emissions 75 | grouped = df_sector.groupby('Period')['Data_value'].sum().reset_index() 76 | 77 | # Create scatter plot 78 | plt.scatter(grouped['Period'], grouped['Data_value']) 79 | 80 | plt.title('Emission Intensity - Households') 81 | plt.xlabel('Period') 82 | plt.ylabel('Emissions (Kilotonnes)') 83 | plt.xticks(rotation=45) 84 | plt.tight_layout() 85 | plt.show() 86 | 87 | # Pair Plot 88 | sns.pairplot(df[['Period', 'Data_value']]) 89 | plt.suptitle("Pair Plot of Numerical Variables", y=2.02) 90 | plt.tight_layout() 91 | plt.show() 92 | 93 | # 5. Heatmap (Correlation) 94 | corr_data = df[['Period', 'Data_value']].corr() 95 | sns.heatmap(corr_data, annot=True, cmap='coolwarm') 96 | plt.title('Correlation Heatmap') 97 | plt.tight_layout() 98 | plt.show() 99 | 100 | # 6. Boxplot for Outliers 101 | sns.boxplot(data=df, x='Data_value') 102 | plt.title('Outliers in Emissions') 103 | plt.tight_layout() 104 | plt.show() 105 | 106 | # 7. Z-score Outliers 107 | df['Z_score'] = zscore(df['Data_value']) 108 | outliers = df[abs(df['Z_score']) > 3] 109 | print(outliers[['Anzsic_descriptor', 'Period', 'Data_value', 'Z_score']]) 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /python code: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from scipy.stats import zscore 5 | 6 | # Load dataset 7 | df = pd.read_csv("C:\\Users\\moham\\Downloads\\greenhouse-gas-emissions-industry-and-household-September-2024-quarter.csv") 8 | 9 | # Info and head 10 | print(df.info()) 11 | print(df.head()) 12 | print(df.describe()) 13 | print(df.nunique()) 14 | 15 | # Missing values 16 | print(df.isnull().sum()) 17 | 18 | # Plot style 19 | sns.set(style="whitegrid") 20 | plt.rcParams["figure.figsize"] = (12, 6) 21 | 22 | # Get the latest period 23 | latest = df['Period'].max() 24 | 25 | # Filter data for the latest period 26 | latest_df = df[df['Period'] == latest] 27 | 28 | # Group and sort emissions by sector 29 | grouped = latest_df.groupby('Anzsic_descriptor')['Data_value'].sum() 30 | top10 = grouped.sort_values(ascending=False).head(10) 31 | 32 | # Create basic vertical bar graph 33 | plt.bar(range(len(top10)), top10.values, color='green') 34 | plt.xticks(range(len(top10)), top10.index, rotation=45) 35 | plt.title('Top 10 Emitting Sectors') 36 | plt.xlabel('Sector') 37 | plt.ylabel('Emissions (Kilotonnes)') 38 | plt.tight_layout() 39 | plt.show() 40 | #line graph 41 | df_households = df[df['Anzsic_descriptor'] == 'Households'] 42 | 43 | # Group and sum emissions by period 44 | g = df_households.groupby('Period')['Data_value'].sum() 45 | 46 | # Create the line plot with markers 47 | plt.plot(g.index, g.values, marker='o') 48 | 49 | plt.title('Emission Trend - Households') 50 | plt.xlabel('Period') 51 | plt.ylabel('Emissions (Kilotonnes)') 52 | plt.xticks(rotation=45) 53 | plt.tight_layout() 54 | plt.show() 55 | 56 | #Pie Chart - Top 5 sectors 57 | latest = df['Period'].max() 58 | 59 | # Filter data for the latest period 60 | latest_df = df[df['Period'] == latest] 61 | 62 | # Group, sum, and get top 5 contributors 63 | grouped5 = latest_df.groupby('Anzsic_descriptor')['Data_value'].sum().sort_values(ascending=False).head(5) 64 | 65 | # Plot basic pie chart 66 | plt.pie(grouped5, labels=grouped5.index, autopct='%1.1f%%', startangle=100) 67 | plt.title('Top 5 Emission Contributors') 68 | plt.tight_layout() 69 | plt.show() 70 | 71 | # 4. Emission Intensity (Scatter Plot) 72 | df_sector = df[df['Anzsic_descriptor'] == 'Households'] 73 | 74 | # Group by Period and sum the emissions 75 | grouped = df_sector.groupby('Period')['Data_value'].sum().reset_index() 76 | 77 | # Create scatter plot 78 | plt.scatter(grouped['Period'], grouped['Data_value']) 79 | 80 | plt.title('Emission Intensity - Households') 81 | plt.xlabel('Period') 82 | plt.ylabel('Emissions (Kilotonnes)') 83 | plt.xticks(rotation=45) 84 | plt.tight_layout() 85 | plt.show() 86 | 87 | # Pair Plot 88 | sns.pairplot(df[['Period', 'Data_value']]) 89 | plt.suptitle("Pair Plot of Numerical Variables", y=2.02) 90 | plt.tight_layout() 91 | plt.show() 92 | 93 | # 5. Heatmap (Correlation) 94 | corr_data = df[['Period', 'Data_value']].corr() 95 | sns.heatmap(corr_data, annot=True, cmap='coolwarm') 96 | plt.title('Correlation Heatmap') 97 | plt.tight_layout() 98 | plt.show() 99 | 100 | # 6. Boxplot for Outliers 101 | sns.boxplot(data=df, x='Data_value') 102 | plt.title('Outliers in Emissions') 103 | plt.tight_layout() 104 | plt.show() 105 | 106 | # 7. Z-score Outliers 107 | df['Z_score'] = zscore(df['Data_value']) 108 | outliers = df[abs(df['Z_score']) > 3] 109 | print(outliers[['Anzsic_descriptor', 'Period', 'Data_value', 'Z_score']]) 110 | -------------------------------------------------------------------------------- /report pyt.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yaseen7975/Yaseen-python-Project-/a8d55cccb8ba51f50c3e46b692d89008cfdae87d/report pyt.docx --------------------------------------------------------------------------------