├── 2025_IOLP_BUILDINGS_DATASET.csv
├── PROJECT.py
└── README.md
/PROJECT.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 | import numpy as np
4 | import seaborn as sb
5 |
6 | #OBJECTIVE 1:
7 | #Bar Chart of Total Square Footage with Mean and Z-Score
8 |
9 | data = pd.read_csv('2025_IOLP_BUILDINGS_DATASET.csv')
10 | print(data.head())
11 | total = data.groupby('GSA Region')['Building Rentable Square Feet'].sum()
12 | total = total.sort_values(ascending=False)
13 | plt.figure(figsize=(12, 7))
14 | colors = ['red', 'blue', 'green', 'orange', 'purple', 'yellow', 'pink', 'cyan', 'lime', 'teal', 'gold']
15 | plt.bar(total.index, total, color=colors, edgecolor='black', width=0.7)
16 | plt.title('TOTAL SQUARE FEET BY REGION', fontsize=16, color='darkblue')
17 | plt.xlabel('REGIONS', fontsize=12, color='purple')
18 | plt.ylabel('SQUARE FEET', fontsize=12, color='purple')
19 | plt.xticks(rotation=45, fontsize=10)
20 | mean = total.mean()
21 | plt.axhline(mean, color='red', linestyle='--', linewidth=2, label=f'Average: {mean:.2f}')
22 | plt.legend(fontsize=10)
23 | plt.grid(axis='y', linestyle='--', alpha=0.5, color='gray')
24 | plt.show()
25 | z = (total - mean) / total.std()
26 | print('Average square feet is:', mean)
27 | print(" ")
28 | print('Z-scores for each region:\n', z)
29 | print(" ")
30 | print('The most extreme region is', z.idxmax(), 'with a z-score of', z.max())
31 | print(" ")
32 |
33 | #OBJECTIVE 2:
34 | #Pie Chart and Box Plot of Property Counts with Covariance
35 | data = pd.read_csv('2025_IOLP_BUILDINGS_DATASET.csv')
36 | count = data['GSA Region'].value_counts()
37 | plt.figure(figsize=(8, 8))
38 | plt.pie(count, labels=count.index, autopct='%1.1f%%', colors=['lightblue', 'lightpink', 'lightgreen', 'cyan', 'blue', 'yellow', 'orange', 'violet', 'lime', 'teal', 'gold'])
39 | plt.title('PROPERTIES IN EACH REGION', fontsize=16, color='darkgreen')
40 | plt.show()
41 | totals = data.groupby('GSA Region')['Building Rentable Square Feet'].sum()
42 | cov = np.cov(totals, count)[0, 1]
43 | print('Covariance between square feet and counts is:', cov)
44 | print('Region with most properties is', count.idxmax(), 'with', count.max())
45 | print('Region with least properties is', count.idxmin(), 'with', count.min())
46 |
47 |
48 | #OBJECTIVE 3:
49 | #Scatter of Counts vs Square Feet (scatter),Square Feet Box Plot (Box)
50 |
51 | data = pd.read_csv('2025_IOLP_BUILDINGS_DATASET.csv')
52 | summary = data.groupby('GSA Region').agg({
53 | 'Building Rentable Square Feet': 'sum',
54 | 'Real Property Asset Name': 'count'
55 | })
56 | summary.columns = ['Square Feet', 'Count']
57 | plt.figure(figsize=(10, 6))
58 | plt.scatter(summary['Count'], summary['Square Feet'], color='blue', s=100)
59 | plt.title('SQUARE FEET AND COUNTS', fontsize=12, color='blue')
60 | plt.xlabel('COUNT OF PROPERTIES', fontsize=10, color = "blue")
61 | plt.ylabel('SQUARE FEET', fontsize=10, color = "blue")
62 | plt.plot(summary.index, summary['Square Feet'], color='red', label='Square Feet Line')
63 | plt.legend()
64 | plt.grid(True)
65 | plt.show()
66 | plt.figure(figsize=(10, 6))
67 | plt.boxplot([data[data['GSA Region'] == r]['Building Rentable Square Feet'] for r in data['GSA Region'].unique()])
68 | plt.title('SQUARE FEET BOX', fontsize=14, color='darkblue')
69 | plt.xlabel('REGIONS', fontsize=12, color = "darkblue")
70 | plt.ylabel('SQUARE FEET', fontsize=12, color = "darkblue")
71 | unique_regions = data['GSA Region'].unique()
72 | plt.xticks(range(1, len(unique_regions) + 1), unique_regions)
73 | plt.grid(True)
74 | plt.show()
75 | cov = np.cov(summary['Square Feet'], summary['Count'])[0, 1]
76 | print(" ")
77 | print('Covariance between square feet and counts is:', cov)
78 |
79 |
80 |
81 | #OBJECTIVE 4
82 | #Average Square Feet per Property
83 | data = pd.read_csv('2025_IOLP_BUILDINGS_DATASET.csv')
84 | avg = data.groupby('GSA Region')['Building Rentable Square Feet'].mean()
85 | avg = avg.sort_values(ascending=False)
86 |
87 | # Bar chart
88 | plt.figure(figsize=(10, 6))
89 | plt.bar(avg.index, avg, color='green')
90 | plt.title("AVERAGE SQUARE FEET PER PROPERTY", fontsize=14, color="darkblue")
91 | plt.xlabel('REGIONS', fontsize=12,color="darkblue")
92 | plt.ylabel('AVERAGE SQUARE FEET', fontsize=12,color="darkblue")
93 | plt.xticks(rotation=45)
94 | plt.grid(True)
95 | plt.show()
96 |
97 | #Stacked Line Chart
98 | stats = data.groupby('GSA Region')['Building Rentable Square Feet'].agg(['mean', 'sum'])
99 | plt.figure(figsize=(10, 6))
100 | plt.plot(stats.index, stats['mean'], color='blue', label='Average')
101 | plt.plot(stats.index, stats['sum'], color='red', label='Total')
102 | plt.title('Stacked Line - Mean and Total', fontsize=14, color='darkblue')
103 | plt.xlabel('REGIONS', fontsize=12,color= "darkblue")
104 | plt.ylabel('SQUARE FEET', fontsize=12,color = "darkblue")
105 | plt.xticks(rotation=45)
106 | plt.legend()
107 | plt.grid(True)
108 | plt.show()
109 |
110 | # Heatmap
111 | sheet1 = pd.read_csv("2025_IOLP_BUILDINGS_DATASET.csv")
112 | sheet1['Building Rentable Square Feet'] = pd.to_numeric(sheet1['Building Rentable Square Feet'], errors='coerce')
113 | region_data = sheet1.groupby('GSA Region')['Building Rentable Square Feet'].sum().reset_index()
114 | heatmap_data = region_data.pivot_table(
115 | index='GSA Region',
116 | values='Building Rentable Square Feet'
117 | )
118 | plt.figure(figsize=(10, 6))
119 | plt.title('Total Rentable Square Footage by GSA Region', fontsize=18, fontweight='bold')
120 | sb.heatmap(
121 | heatmap_data,
122 | annot=True,
123 | fmt=".0f",
124 | cmap="YlGnBu",
125 | linewidths=0.5,
126 | linecolor='white'
127 | )
128 | plt.xlabel("Rentable Square Feet", fontsize=12)
129 | plt.ylabel("GSA Region", fontsize=12)
130 | plt.tight_layout()
131 | plt.show()
132 |
133 |
134 |
135 | #OBJECTIVE 5:
136 | #Geospatial Distribution of Properties(OWNED OR LEASED)
137 |
138 | data = pd.read_csv("2025_IOLP_BUILDINGS_DATASET.csv")
139 | geo = data[['Latitude', 'Longitude', 'Owned or Leased', 'City', 'State']].dropna()
140 | print(data.head())
141 | sb.set(style="whitegrid", palette="pastel", font_scale=1.2)
142 | plt.figure(figsize=(14, 8))
143 | plt.title('Geospatial Distribution of Federal Properties\n(Colored by Ownership Type)', fontsize=18, fontweight='bold')
144 |
145 | sb.scatterplot(
146 | data=geo,
147 | x="Longitude",
148 | y="Latitude",
149 | hue="Owned or Leased",
150 | s=100,
151 | edgecolor="black",
152 | alpha=0.7
153 | )
154 |
155 | plt.xlabel("LONGITUDE", fontsize=14, color = "darkblue")
156 | plt.ylabel("LATITUDE", fontsize=14,color = "darkblue")
157 | plt.legend(title="OWNERSHIP TYPE", loc='upper right')
158 | plt.grid(True, linestyle='--', alpha=0.5)
159 | plt.tight_layout()
160 |
161 | plt.show()
162 |
163 | #OBJECTIVE 6:
164 | # Construction Trend Over Time
165 |
166 | df = pd.read_csv("2025_IOLP_BUILDINGS_DATASET.csv")
167 | df['Construction Date'] = pd.to_numeric(df['Construction Date'], errors='coerce')
168 | df['Decade'] = (df['Construction Date'] // 10) * 10
169 |
170 | # Count constructions per decade
171 | construction_counts = df['Decade'].value_counts().sort_index()
172 |
173 | # Plotting
174 | plt.figure(figsize=(10, 6))
175 | palette = sb.color_palette("husl", len(construction_counts))
176 | sb.lineplot(
177 | x=construction_counts.index,
178 | y=construction_counts.values,
179 | marker='o',
180 | linewidth=2.5,
181 | markersize=8,
182 | color='blue'
183 | )
184 | for i, (x, y) in enumerate(zip(construction_counts.index, construction_counts.values)):
185 | plt.plot(x, y, 'o', markersize=10, color=palette[i])
186 | plt.title("CONSTRUCTION TREND OVER TIME", fontsize=16,color = "darkblue")
187 | plt.xlabel("DECADE", fontsize=12,color="darkblue")
188 | plt.ylabel("NUMBER OF OBSERVATION", fontsize=12,color = "darkblue")
189 | plt.grid(True)
190 | plt.tight_layout()
191 | plt.show()
192 |
193 | #-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
194 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PYTHON PROJECT ON IOLP_BUILDING_DATASET
2 | # Objective 1:-
3 | Total Square Footage by Region:
4 | -> A bar chart visualized total rentable square feet per GSA region, with
5 | regions sorted by size.
6 | -> The mean square footage across regions was plotted as a reference line, and
7 | z-scores identified outliers.
8 | -> The most extreme region (highest z-score) indicated a significant
9 | concentration of large properties
10 |
11 | # Objective 2:-
12 | Property Counts by Region:
13 | -> A pie chart showed the distribution of properties across regions, with
14 | percentages highlighting disparities.
15 | -> The region with the most properties was identified, alongside the one with
16 | the least.
17 | -> Covariance between square footage and property counts suggested a
18 | moderate positive relationship, indicating that regions with more properties
19 | tend to have larger total square footage.
20 |
21 | # Objective 3:-
22 | Square Footage vs. Property Counts:
23 | -> A scatter plot revealed the relationship between property counts and total
24 | square footage per region.
25 | -> A box plot displayed the distribution of square footage within each region,
26 | highlighting variability and outliers.
27 | -> Covariance confirmed a positive correlation between counts and square
28 | footage.
29 |
30 | # Objective 4:-
31 | Average Square Footage per Property:
32 | -> A bar chart compared average square feet per property across regions,
33 | sorted in descending order.
34 | -> A stacked line chart contrasted total and average square footage, showing
35 | regional trends.
36 | -> A heatmap visualized total square footage with annotated values,
37 | emphasizing high-density regions.
38 |
39 | # Objective 5:-
40 | Geospatial Distribution:
41 | -> A scatter plot mapped properties by latitude and longitude, colored by
42 | ownership type (Owned or Leased).
43 | -> The visualization highlighted geographic concentrations and ownership
44 | patterns, aiding spatial planning.
45 |
46 | # Objective 6:-
47 | Construction Trends Over Time:
48 | -> A line plot tracked the number of buildings constructed per decade,
49 | revealing historical trends.
50 | -> Peaks in certain decades suggested periods of significant federal
51 | infrastructure development.
52 |
--------------------------------------------------------------------------------