├── int 375 project.docx
├── Screenshot 2025-05-02 104742.png
├── Screenshot 2025-05-02 104752.png
├── Screenshot 2025-05-02 104809.png
├── Screenshot 2025-05-02 104818.png
├── Screenshot 2025-05-02 105150.png
├── Screenshot 2025-05-02 105201.png
└── Cse_375_Himanshu 123.py


/int 375 project.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/int 375 project.docx


--------------------------------------------------------------------------------
/Screenshot 2025-05-02 104742.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/Screenshot 2025-05-02 104742.png


--------------------------------------------------------------------------------
/Screenshot 2025-05-02 104752.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/Screenshot 2025-05-02 104752.png


--------------------------------------------------------------------------------
/Screenshot 2025-05-02 104809.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/Screenshot 2025-05-02 104809.png


--------------------------------------------------------------------------------
/Screenshot 2025-05-02 104818.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/Screenshot 2025-05-02 104818.png


--------------------------------------------------------------------------------
/Screenshot 2025-05-02 105150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/Screenshot 2025-05-02 105150.png


--------------------------------------------------------------------------------
/Screenshot 2025-05-02 105201.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/Screenshot 2025-05-02 105201.png


--------------------------------------------------------------------------------
/Cse_375_Himanshu 123.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | # import seaborn as sns
  5 | df = pd.read_csv("C:\\Users\\acer\\Downloads\\Traffic_Volumes_AADT (1).csv")
  6 | # 1.Load and inspect the dataset
  7 | 
  8 | # Display the first 5 rows
  9 | print("First 5 rows of the dataset:")
 10 | print(df.head())
 11 | # Dataset information (columns, non-null counts, data types)
 12 | print("\nDataset Info:")
 13 | print(df.info())
 14 | # Summary statistics for numerical columns
 15 | print("\nSummary Statistics:")
 16 | print(df.describe())
 17 | # Check for missing values
 18 | print("\nMissing Values:")
 19 | print(df.isnull().sum())
 20 | '''
 21 | #2. Handling missing data
 22 | '''
 23 | #Check for missing values
 24 | print("Missing Values (Count):")
 25 | print(df.isnull().sum())
 26 | 
 27 | print("\nMissing Values (%):")
 28 | print((df.isnull().sum() / len(df)) * 100)
 29 | 
 30 | #Drop rows with any missing values (if appropriate)
 31 | df_dropped_rows = df.dropna()
 32 | print("\nData shape after dropping rows with missing values:", df_dropped_rows.shape)
 33 | 
 34 | #Drop columns with any missing values (if appropriate)
 35 | df_dropped_columns = df.dropna(axis=1)
 36 | print("Data shape after dropping columns with missing values:", df_dropped_columns.shape)
 37 | '''
 38 | #3. Convert a columns to numpy array and perform a calculation
 39 | '''
 40 | # Step 1: Clean column names
 41 | df.columns = df.columns.str.strip()
 42 | 
 43 | # Step 2: Confirm column names (optional)
 44 | print("Available columns:")
 45 | print(df.columns.tolist())
 46 | 
 47 | # Step 3: Select 'BACK_AADT' for conversion
 48 | column_name = 'BACK_AADT'
 49 | 
 50 | # Check if column exists
 51 | if column_name in df.columns:
 52 |     # Convert to NumPy array
 53 |     column_array = df[column_name].to_numpy()
 54 | 
 55 |     # Handle potential missing values (optional)
 56 |     column_array = np.nan_to_num(column_array, nan=0)
 57 | 
 58 |     # Perform calculations
 59 |     mean_value = np.mean(column_array)
 60 |     squared_array = np.square(column_array)
 61 | 
 62 |     # Output
 63 |     print(f"\nFirst 5 values in '{column_name}':", column_array[:5])
 64 |     print(f"Mean of '{column_name}': {mean_value}")
 65 |     print(f"First 5 squared values:", squared_array[:5])
 66 | else:
 67 |     print(f"\nError: Column '{column_name}' not found.")
 68 | '''
 69 | #4. create a linePlot to show Traffic trends overtime
 70 | '''
 71 | # Step 1: Clean column names (remove leading/trailing spaces)
 72 | df.columns = df.columns.str.strip()
 73 | 
 74 | # Step 2: Handle missing data
 75 | # Show missing value count per column
 76 | print("\nMissing values before cleaning:")
 77 | print(df.isnull().sum())
 78 | 
 79 | # Drop rows where BOTH BACK_AADT and AHEAD_AADT are missing
 80 | df = df.dropna(subset=['BACK_AADT', 'AHEAD_AADT'], how='all')
 81 | 
 82 | # Fill remaining missing values with 0
 83 | df[['BACK_AADT', 'AHEAD_AADT']] = df[['BACK_AADT', 'AHEAD_AADT']].fillna(0)
 84 | 
 85 | print("\nMissing values after cleaning:")
 86 | print(df.isnull().sum())
 87 | 
 88 | # Step 3: Simulate time axis using row numbers
 89 | df['Time'] = range(1, len(df) + 1)
 90 | 
 91 | # Step 4: Plot traffic volume trends over simulated time
 92 | plt.figure(figsize=(12, 5))
 93 | plt.plot(df['Time'], df['BACK_AADT'], marker='o', linestyle='-', color='blue', label='Back AADT')
 94 | plt.plot(df['Time'], df['AHEAD_AADT'], marker='x', linestyle='-', color='green', label='Ahead AADT')
 95 | plt.xlabel("Time (Simulated)")
 96 | plt.ylabel("Traffic Volume (AADT)")
 97 | plt.title("Traffic Volume Trend Over Simulated Time")
 98 | plt.legend()
 99 | plt.grid(True)
100 | plt.tight_layout()
101 | plt.show()
102 | 
103 | #5. Create a barplot for monthly traffic trends
104 | # Clean column names
105 | df.columns = df.columns.str.strip()
106 | 
107 | # Handle missing data
108 | df = df.dropna(subset=['BACK_AADT', 'AHEAD_AADT'], how='all')
109 | df[['BACK_AADT', 'AHEAD_AADT']] = df[['BACK_AADT', 'AHEAD_AADT']].fillna(0)
110 | 
111 | # Create fake 'Date' column (daily starting from Jan 2023)
112 | df['Date'] = pd.date_range(start='2023-01-01', periods=len(df), freq='D')
113 | 
114 | # Extract month name
115 | df['Month'] = df['Date'].dt.strftime('%B')
116 | 
117 | # Create 'Total_Traffic' column
118 | df['Total_Traffic'] = df['BACK_AADT'] + df['AHEAD_AADT']
119 | 
120 | # Group by month and sum total traffic
121 | month_order = ['January', 'February', 'March', 'April', 'May', 'June',
122 |                'July', 'August', 'September', 'October', 'November', 'December']
123 | 
124 | monthly_total = df.groupby('Month')['Total_Traffic'].sum().reindex(month_order).dropna()
125 | 
126 | # Plot using matplotlib
127 | plt.figure(figsize=(12, 6))
128 | plt.bar(monthly_total.index, monthly_total.values, color='orange')
129 | plt.xlabel("Month")
130 | plt.ylabel("Total Traffic Volume")
131 | plt.title("Monthly Traffic Volume Trends")
132 | plt.xticks(rotation=45)
133 | plt.tight_layout()
134 | plt.show()
135 | 


--------------------------------------------------------------------------------