├── 2025_IOLP_BUILDINGS_DATASET.csv ├── PROJECT.py └── README.md /PROJECT.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import seaborn as sb 5 | 6 | #OBJECTIVE 1: 7 | #Bar Chart of Total Square Footage with Mean and Z-Score 8 | 9 | data = pd.read_csv('2025_IOLP_BUILDINGS_DATASET.csv') 10 | print(data.head()) 11 | total = data.groupby('GSA Region')['Building Rentable Square Feet'].sum() 12 | total = total.sort_values(ascending=False) 13 | plt.figure(figsize=(12, 7)) 14 | colors = ['red', 'blue', 'green', 'orange', 'purple', 'yellow', 'pink', 'cyan', 'lime', 'teal', 'gold'] 15 | plt.bar(total.index, total, color=colors, edgecolor='black', width=0.7) 16 | plt.title('TOTAL SQUARE FEET BY REGION', fontsize=16, color='darkblue') 17 | plt.xlabel('REGIONS', fontsize=12, color='purple') 18 | plt.ylabel('SQUARE FEET', fontsize=12, color='purple') 19 | plt.xticks(rotation=45, fontsize=10) 20 | mean = total.mean() 21 | plt.axhline(mean, color='red', linestyle='--', linewidth=2, label=f'Average: {mean:.2f}') 22 | plt.legend(fontsize=10) 23 | plt.grid(axis='y', linestyle='--', alpha=0.5, color='gray') 24 | plt.show() 25 | z = (total - mean) / total.std() 26 | print('Average square feet is:', mean) 27 | print(" ") 28 | print('Z-scores for each region:\n', z) 29 | print(" ") 30 | print('The most extreme region is', z.idxmax(), 'with a z-score of', z.max()) 31 | print(" ") 32 | 33 | #OBJECTIVE 2: 34 | #Pie Chart and Box Plot of Property Counts with Covariance 35 | data = pd.read_csv('2025_IOLP_BUILDINGS_DATASET.csv') 36 | count = data['GSA Region'].value_counts() 37 | plt.figure(figsize=(8, 8)) 38 | plt.pie(count, labels=count.index, autopct='%1.1f%%', colors=['lightblue', 'lightpink', 'lightgreen', 'cyan', 'blue', 'yellow', 'orange', 'violet', 'lime', 'teal', 'gold']) 39 | plt.title('PROPERTIES IN EACH REGION', fontsize=16, color='darkgreen') 40 | plt.show() 41 | totals = data.groupby('GSA Region')['Building Rentable Square Feet'].sum() 42 | cov = np.cov(totals, count)[0, 1] 43 | print('Covariance between square feet and counts is:', cov) 44 | print('Region with most properties is', count.idxmax(), 'with', count.max()) 45 | print('Region with least properties is', count.idxmin(), 'with', count.min()) 46 | 47 | 48 | #OBJECTIVE 3: 49 | #Scatter of Counts vs Square Feet (scatter),Square Feet Box Plot (Box) 50 | 51 | data = pd.read_csv('2025_IOLP_BUILDINGS_DATASET.csv') 52 | summary = data.groupby('GSA Region').agg({ 53 | 'Building Rentable Square Feet': 'sum', 54 | 'Real Property Asset Name': 'count' 55 | }) 56 | summary.columns = ['Square Feet', 'Count'] 57 | plt.figure(figsize=(10, 6)) 58 | plt.scatter(summary['Count'], summary['Square Feet'], color='blue', s=100) 59 | plt.title('SQUARE FEET AND COUNTS', fontsize=12, color='blue') 60 | plt.xlabel('COUNT OF PROPERTIES', fontsize=10, color = "blue") 61 | plt.ylabel('SQUARE FEET', fontsize=10, color = "blue") 62 | plt.plot(summary.index, summary['Square Feet'], color='red', label='Square Feet Line') 63 | plt.legend() 64 | plt.grid(True) 65 | plt.show() 66 | plt.figure(figsize=(10, 6)) 67 | plt.boxplot([data[data['GSA Region'] == r]['Building Rentable Square Feet'] for r in data['GSA Region'].unique()]) 68 | plt.title('SQUARE FEET BOX', fontsize=14, color='darkblue') 69 | plt.xlabel('REGIONS', fontsize=12, color = "darkblue") 70 | plt.ylabel('SQUARE FEET', fontsize=12, color = "darkblue") 71 | unique_regions = data['GSA Region'].unique() 72 | plt.xticks(range(1, len(unique_regions) + 1), unique_regions) 73 | plt.grid(True) 74 | plt.show() 75 | cov = np.cov(summary['Square Feet'], summary['Count'])[0, 1] 76 | print(" ") 77 | print('Covariance between square feet and counts is:', cov) 78 | 79 | 80 | 81 | #OBJECTIVE 4 82 | #Average Square Feet per Property 83 | data = pd.read_csv('2025_IOLP_BUILDINGS_DATASET.csv') 84 | avg = data.groupby('GSA Region')['Building Rentable Square Feet'].mean() 85 | avg = avg.sort_values(ascending=False) 86 | 87 | # Bar chart 88 | plt.figure(figsize=(10, 6)) 89 | plt.bar(avg.index, avg, color='green') 90 | plt.title("AVERAGE SQUARE FEET PER PROPERTY", fontsize=14, color="darkblue") 91 | plt.xlabel('REGIONS', fontsize=12,color="darkblue") 92 | plt.ylabel('AVERAGE SQUARE FEET', fontsize=12,color="darkblue") 93 | plt.xticks(rotation=45) 94 | plt.grid(True) 95 | plt.show() 96 | 97 | #Stacked Line Chart 98 | stats = data.groupby('GSA Region')['Building Rentable Square Feet'].agg(['mean', 'sum']) 99 | plt.figure(figsize=(10, 6)) 100 | plt.plot(stats.index, stats['mean'], color='blue', label='Average') 101 | plt.plot(stats.index, stats['sum'], color='red', label='Total') 102 | plt.title('Stacked Line - Mean and Total', fontsize=14, color='darkblue') 103 | plt.xlabel('REGIONS', fontsize=12,color= "darkblue") 104 | plt.ylabel('SQUARE FEET', fontsize=12,color = "darkblue") 105 | plt.xticks(rotation=45) 106 | plt.legend() 107 | plt.grid(True) 108 | plt.show() 109 | 110 | # Heatmap 111 | sheet1 = pd.read_csv("2025_IOLP_BUILDINGS_DATASET.csv") 112 | sheet1['Building Rentable Square Feet'] = pd.to_numeric(sheet1['Building Rentable Square Feet'], errors='coerce') 113 | region_data = sheet1.groupby('GSA Region')['Building Rentable Square Feet'].sum().reset_index() 114 | heatmap_data = region_data.pivot_table( 115 | index='GSA Region', 116 | values='Building Rentable Square Feet' 117 | ) 118 | plt.figure(figsize=(10, 6)) 119 | plt.title('Total Rentable Square Footage by GSA Region', fontsize=18, fontweight='bold') 120 | sb.heatmap( 121 | heatmap_data, 122 | annot=True, 123 | fmt=".0f", 124 | cmap="YlGnBu", 125 | linewidths=0.5, 126 | linecolor='white' 127 | ) 128 | plt.xlabel("Rentable Square Feet", fontsize=12) 129 | plt.ylabel("GSA Region", fontsize=12) 130 | plt.tight_layout() 131 | plt.show() 132 | 133 | 134 | 135 | #OBJECTIVE 5: 136 | #Geospatial Distribution of Properties(OWNED OR LEASED) 137 | 138 | data = pd.read_csv("2025_IOLP_BUILDINGS_DATASET.csv") 139 | geo = data[['Latitude', 'Longitude', 'Owned or Leased', 'City', 'State']].dropna() 140 | print(data.head()) 141 | sb.set(style="whitegrid", palette="pastel", font_scale=1.2) 142 | plt.figure(figsize=(14, 8)) 143 | plt.title('Geospatial Distribution of Federal Properties\n(Colored by Ownership Type)', fontsize=18, fontweight='bold') 144 | 145 | sb.scatterplot( 146 | data=geo, 147 | x="Longitude", 148 | y="Latitude", 149 | hue="Owned or Leased", 150 | s=100, 151 | edgecolor="black", 152 | alpha=0.7 153 | ) 154 | 155 | plt.xlabel("LONGITUDE", fontsize=14, color = "darkblue") 156 | plt.ylabel("LATITUDE", fontsize=14,color = "darkblue") 157 | plt.legend(title="OWNERSHIP TYPE", loc='upper right') 158 | plt.grid(True, linestyle='--', alpha=0.5) 159 | plt.tight_layout() 160 | 161 | plt.show() 162 | 163 | #OBJECTIVE 6: 164 | # Construction Trend Over Time 165 | 166 | df = pd.read_csv("2025_IOLP_BUILDINGS_DATASET.csv") 167 | df['Construction Date'] = pd.to_numeric(df['Construction Date'], errors='coerce') 168 | df['Decade'] = (df['Construction Date'] // 10) * 10 169 | 170 | # Count constructions per decade 171 | construction_counts = df['Decade'].value_counts().sort_index() 172 | 173 | # Plotting 174 | plt.figure(figsize=(10, 6)) 175 | palette = sb.color_palette("husl", len(construction_counts)) 176 | sb.lineplot( 177 | x=construction_counts.index, 178 | y=construction_counts.values, 179 | marker='o', 180 | linewidth=2.5, 181 | markersize=8, 182 | color='blue' 183 | ) 184 | for i, (x, y) in enumerate(zip(construction_counts.index, construction_counts.values)): 185 | plt.plot(x, y, 'o', markersize=10, color=palette[i]) 186 | plt.title("CONSTRUCTION TREND OVER TIME", fontsize=16,color = "darkblue") 187 | plt.xlabel("DECADE", fontsize=12,color="darkblue") 188 | plt.ylabel("NUMBER OF OBSERVATION", fontsize=12,color = "darkblue") 189 | plt.grid(True) 190 | plt.tight_layout() 191 | plt.show() 192 | 193 | #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 194 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PYTHON PROJECT ON IOLP_BUILDING_DATASET 2 | # Objective 1:- 3 | Total Square Footage by Region:
4 | -> A bar chart visualized total rentable square feet per GSA region, with 5 | regions sorted by size.
6 | -> The mean square footage across regions was plotted as a reference line, and 7 | z-scores identified outliers.
8 | -> The most extreme region (highest z-score) indicated a significant 9 | concentration of large properties
10 | 11 | # Objective 2:- 12 | Property Counts by Region:
13 | -> A pie chart showed the distribution of properties across regions, with 14 | percentages highlighting disparities.
15 | -> The region with the most properties was identified, alongside the one with 16 | the least.
17 | -> Covariance between square footage and property counts suggested a 18 | moderate positive relationship, indicating that regions with more properties 19 | tend to have larger total square footage.
20 | 21 | # Objective 3:- 22 | Square Footage vs. Property Counts:
23 | -> A scatter plot revealed the relationship between property counts and total 24 | square footage per region.
25 | -> A box plot displayed the distribution of square footage within each region, 26 | highlighting variability and outliers.
27 | -> Covariance confirmed a positive correlation between counts and square 28 | footage.
29 | 30 | # Objective 4:- 31 | Average Square Footage per Property:
32 | -> A bar chart compared average square feet per property across regions, 33 | sorted in descending order.
34 | -> A stacked line chart contrasted total and average square footage, showing 35 | regional trends.
36 | -> A heatmap visualized total square footage with annotated values, 37 | emphasizing high-density regions.
38 | 39 | # Objective 5:- 40 | Geospatial Distribution:
41 | -> A scatter plot mapped properties by latitude and longitude, colored by 42 | ownership type (Owned or Leased).
43 | -> The visualization highlighted geographic concentrations and ownership 44 | patterns, aiding spatial planning.
45 | 46 | # Objective 6:- 47 | Construction Trends Over Time:
48 | -> A line plot tracked the number of buildings constructed per decade, 49 | revealing historical trends.
50 | -> Peaks in certain decades suggested periods of significant federal 51 | infrastructure development.
52 | --------------------------------------------------------------------------------