├── int 375 project.docx ├── Screenshot 2025-05-02 104742.png ├── Screenshot 2025-05-02 104752.png ├── Screenshot 2025-05-02 104809.png ├── Screenshot 2025-05-02 104818.png ├── Screenshot 2025-05-02 105150.png ├── Screenshot 2025-05-02 105201.png └── Cse_375_Himanshu 123.py /int 375 project.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/int 375 project.docx -------------------------------------------------------------------------------- /Screenshot 2025-05-02 104742.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/Screenshot 2025-05-02 104742.png -------------------------------------------------------------------------------- /Screenshot 2025-05-02 104752.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/Screenshot 2025-05-02 104752.png -------------------------------------------------------------------------------- /Screenshot 2025-05-02 104809.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/Screenshot 2025-05-02 104809.png -------------------------------------------------------------------------------- /Screenshot 2025-05-02 104818.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/Screenshot 2025-05-02 104818.png -------------------------------------------------------------------------------- /Screenshot 2025-05-02 105150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/Screenshot 2025-05-02 105150.png -------------------------------------------------------------------------------- /Screenshot 2025-05-02 105201.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HimanshuChaudharii/PythonProjectINT375/HEAD/Screenshot 2025-05-02 105201.png -------------------------------------------------------------------------------- /Cse_375_Himanshu 123.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | # import seaborn as sns 5 | df = pd.read_csv("C:\\Users\\acer\\Downloads\\Traffic_Volumes_AADT (1).csv") 6 | # 1.Load and inspect the dataset 7 | 8 | # Display the first 5 rows 9 | print("First 5 rows of the dataset:") 10 | print(df.head()) 11 | # Dataset information (columns, non-null counts, data types) 12 | print("\nDataset Info:") 13 | print(df.info()) 14 | # Summary statistics for numerical columns 15 | print("\nSummary Statistics:") 16 | print(df.describe()) 17 | # Check for missing values 18 | print("\nMissing Values:") 19 | print(df.isnull().sum()) 20 | ''' 21 | #2. Handling missing data 22 | ''' 23 | #Check for missing values 24 | print("Missing Values (Count):") 25 | print(df.isnull().sum()) 26 | 27 | print("\nMissing Values (%):") 28 | print((df.isnull().sum() / len(df)) * 100) 29 | 30 | #Drop rows with any missing values (if appropriate) 31 | df_dropped_rows = df.dropna() 32 | print("\nData shape after dropping rows with missing values:", df_dropped_rows.shape) 33 | 34 | #Drop columns with any missing values (if appropriate) 35 | df_dropped_columns = df.dropna(axis=1) 36 | print("Data shape after dropping columns with missing values:", df_dropped_columns.shape) 37 | ''' 38 | #3. Convert a columns to numpy array and perform a calculation 39 | ''' 40 | # Step 1: Clean column names 41 | df.columns = df.columns.str.strip() 42 | 43 | # Step 2: Confirm column names (optional) 44 | print("Available columns:") 45 | print(df.columns.tolist()) 46 | 47 | # Step 3: Select 'BACK_AADT' for conversion 48 | column_name = 'BACK_AADT' 49 | 50 | # Check if column exists 51 | if column_name in df.columns: 52 | # Convert to NumPy array 53 | column_array = df[column_name].to_numpy() 54 | 55 | # Handle potential missing values (optional) 56 | column_array = np.nan_to_num(column_array, nan=0) 57 | 58 | # Perform calculations 59 | mean_value = np.mean(column_array) 60 | squared_array = np.square(column_array) 61 | 62 | # Output 63 | print(f"\nFirst 5 values in '{column_name}':", column_array[:5]) 64 | print(f"Mean of '{column_name}': {mean_value}") 65 | print(f"First 5 squared values:", squared_array[:5]) 66 | else: 67 | print(f"\nError: Column '{column_name}' not found.") 68 | ''' 69 | #4. create a linePlot to show Traffic trends overtime 70 | ''' 71 | # Step 1: Clean column names (remove leading/trailing spaces) 72 | df.columns = df.columns.str.strip() 73 | 74 | # Step 2: Handle missing data 75 | # Show missing value count per column 76 | print("\nMissing values before cleaning:") 77 | print(df.isnull().sum()) 78 | 79 | # Drop rows where BOTH BACK_AADT and AHEAD_AADT are missing 80 | df = df.dropna(subset=['BACK_AADT', 'AHEAD_AADT'], how='all') 81 | 82 | # Fill remaining missing values with 0 83 | df[['BACK_AADT', 'AHEAD_AADT']] = df[['BACK_AADT', 'AHEAD_AADT']].fillna(0) 84 | 85 | print("\nMissing values after cleaning:") 86 | print(df.isnull().sum()) 87 | 88 | # Step 3: Simulate time axis using row numbers 89 | df['Time'] = range(1, len(df) + 1) 90 | 91 | # Step 4: Plot traffic volume trends over simulated time 92 | plt.figure(figsize=(12, 5)) 93 | plt.plot(df['Time'], df['BACK_AADT'], marker='o', linestyle='-', color='blue', label='Back AADT') 94 | plt.plot(df['Time'], df['AHEAD_AADT'], marker='x', linestyle='-', color='green', label='Ahead AADT') 95 | plt.xlabel("Time (Simulated)") 96 | plt.ylabel("Traffic Volume (AADT)") 97 | plt.title("Traffic Volume Trend Over Simulated Time") 98 | plt.legend() 99 | plt.grid(True) 100 | plt.tight_layout() 101 | plt.show() 102 | 103 | #5. Create a barplot for monthly traffic trends 104 | # Clean column names 105 | df.columns = df.columns.str.strip() 106 | 107 | # Handle missing data 108 | df = df.dropna(subset=['BACK_AADT', 'AHEAD_AADT'], how='all') 109 | df[['BACK_AADT', 'AHEAD_AADT']] = df[['BACK_AADT', 'AHEAD_AADT']].fillna(0) 110 | 111 | # Create fake 'Date' column (daily starting from Jan 2023) 112 | df['Date'] = pd.date_range(start='2023-01-01', periods=len(df), freq='D') 113 | 114 | # Extract month name 115 | df['Month'] = df['Date'].dt.strftime('%B') 116 | 117 | # Create 'Total_Traffic' column 118 | df['Total_Traffic'] = df['BACK_AADT'] + df['AHEAD_AADT'] 119 | 120 | # Group by month and sum total traffic 121 | month_order = ['January', 'February', 'March', 'April', 'May', 'June', 122 | 'July', 'August', 'September', 'October', 'November', 'December'] 123 | 124 | monthly_total = df.groupby('Month')['Total_Traffic'].sum().reindex(month_order).dropna() 125 | 126 | # Plot using matplotlib 127 | plt.figure(figsize=(12, 6)) 128 | plt.bar(monthly_total.index, monthly_total.values, color='orange') 129 | plt.xlabel("Month") 130 | plt.ylabel("Total Traffic Volume") 131 | plt.title("Monthly Traffic Volume Trends") 132 | plt.xticks(rotation=45) 133 | plt.tight_layout() 134 | plt.show() 135 | --------------------------------------------------------------------------------