├── README.md └── project.py /README.md: -------------------------------------------------------------------------------- 1 | ![image](https://github.com/user-attachments/assets/7c1f06ac-2fd3-43d4-8446-d626cfd5e411) 2 | 3 | # Austin-Animal-Center-Analysis 4 | 📌Austin Animal Center Outcomes 5 | 6 | ➡️ This Project analyzes data from the Austin Animal Center, the largest 'No-Kill' animal shelter in the United States. 7 | 8 | 📁 Dataset 9 | The data comes from the publicly available dataset, https://catalog.data.gov/dataset/austin-animal-center-outcomes. It includes detailed information about animals entering the shelter, their intake type, medical status, and final outcomes. 10 | 11 | 🎯 Project Objectives 12 | Analyze the success rate of Austin’s No-Kill Policy 13 | Study animal outcome types by species 14 | Investigate age-wise adoption patterns 15 | Explore the impact of neutered/spayed/intact status 16 | Examine seasonal and yearly outcome trends 17 | Assess the effect of color and breed on adoptions 18 | Understand the impact of intake conditions on final outcomes 19 | 20 | 🧼 Data Cleaning 21 | Before diving into the analysis, the dataset was thoroughly cleaned: 22 | Handled missing and inconsistent values 23 | Standardized categorical variables 24 | Parsed and converted date formats 25 | Removed duplicates for reliable insights 26 | 27 | 📊 Exploratory Data Analysis (EDA) 28 | To uncover patterns and trends, the following techniques and visualizations were used: 29 | Pie charts, stacked bar charts, and boxplots 30 | Time series analysis 31 | Distribution comparisons 32 | Correlation checks 33 | 34 | 🛠️ Libraries Used 35 | pandas 36 | numpy 37 | matplotlib 38 | seaborn 39 | datetime 40 | 41 | 🔍 Key Insights 42 | Neutered/spayed animals had significantly better outcomes 43 | Younger animals were adopted at higher rates 44 | Breed and color influenced adoption likelihood 45 | Seasonal trends showed adoption surges in specific months 46 | Intake conditions played a strong role in outcomes 47 | 48 | 🌍 Why This Project Matters 49 | Animal shelters like Austin Animal Center rely on data to make life-saving decisions. This project shows how data analysis can improve adoption rates, support No-Kill policies, and guide compassionate, data-driven strategies — ultimately turning insights into impact for animal welfare. 50 | 51 | 🔭 Future Scope 52 | Predictive Modeling 53 | - Apply machine learning to forecast outcomes using features like age, breed, and neuter status. 54 | Survival Analysis 55 | - Study how long animals stay in the shelter before different outcomes. 56 | Breed-Specific Trends 57 | - Analyze adoption rates, shelter duration, and non-live outcomes by breed. 58 | Interactive Dashboards 59 | - Create dashboards to inform the public, support organizations, and assist volunteers 60 | 61 | 🤝 Let's Connect! 62 | Have ideas for improvements or want to collaborate on a similar project? Feel free to reach out or fork the repo! 63 | 64 | 🔖 Tags 65 | #DataScience #AnimalWelfare #Python #Pandas #Seaborn #EDA #AustinAnimalCenter #NoKillPolicy #AdoptionTrends #ShelterAnalytics #DataVisualization #RealWorldData 66 | 67 | -------------------------------------------------------------------------------- /project.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | from matplotlib.dates import YearLocator, DateFormatter 6 | 7 | # Import the dataset 8 | df = pd.read_csv("C:\\Users\\Nishtha Sethi\\Downloads\\Austin_Animal_Center_Outcomes.csv") 9 | 10 | # Getting file information 11 | print("First few rows of the dataset:") 12 | print(df.head()) 13 | print("\nDataset Info:") 14 | print(df.info()) 15 | 16 | # Cleaning up the data 17 | df.replace('', pd.NA, inplace=True) 18 | df['Outcome Type'] = df['Outcome Type'].replace('Unknown', 'Other') 19 | df['Sex upon Outcome'] = df['Sex upon Outcome'].replace('Unknown', 'Intact') 20 | df['Animal Type'] = df['Animal Type'].replace('Unknown', 'Other') 21 | df['Breed'] = df['Breed'].replace('Unknown', 'Mixed') 22 | df['Color'] = df['Color'].replace('Unknown', 'Brown/Black') 23 | df['Name'] = df['Name'].replace(np.nan, 'No Name') 24 | 25 | for col in ['Outcome Type', 'Sex upon Outcome', 'Animal Type', 'Breed', 'Color', 'Age upon Outcome']: 26 | if df[col].isnull().any(): 27 | df[col] = df[col].fillna(df[col].mode()[0]) 28 | df.drop_duplicates(inplace=True) 29 | df['DateTime'] = pd.to_datetime(df['DateTime'], format="%m/%d/%Y %I:%M:%S %p", errors='coerce') 30 | df = df.dropna(subset=['DateTime']) 31 | df.dropna(inplace=True) 32 | print("\nMissing values after final cleaning:") 33 | print(df.isnull().sum()) 34 | 35 | # ========== Objective 1: Quantify "No-Kill" policy ========== 36 | live_outcomes = ['Adoption', 'Transfer', 'Return to Owner'] 37 | df['Is_Live'] = df['Outcome Type'].isin(live_outcomes) 38 | live_rate = df['Is_Live'].mean() * 100 39 | non_live_rate = 100 - live_rate 40 | print(f"\nLive Outcome Rate: {live_rate:.2f}%") 41 | print(f"Non-Live Outcome Rate: {non_live_rate:.2f}%") 42 | outcome_counts = df['Is_Live'].value_counts() 43 | labels = ['Live Outcomes', 'Non-Live Outcomes'] 44 | colors = ['green', 'red'] 45 | plt.figure(figsize=(6, 6)) 46 | plt.pie(outcome_counts, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90) 47 | plt.title('Proportion of Live vs Non-Live Outcomes') 48 | plt.axis('equal') 49 | plt.show() 50 | 51 | # ========== Objective 2: Analyze outcome types across animal types ========== 52 | grouped = df.groupby(['Animal Type', 'Outcome Type']).size().reset_index(name='Count') 53 | plt.figure(figsize=(12, 6)) 54 | sns.barplot(data=grouped, x='Outcome Type', y='Count', hue='Animal Type') 55 | plt.xticks(rotation=45) 56 | plt.title('Outcome Types by Animal Type') 57 | plt.xlabel('Outcome Type') 58 | plt.ylabel('Count') 59 | plt.legend(title='Animal Type') 60 | plt.tight_layout() 61 | plt.show() 62 | 63 | # ========== Objective 3: Effect of Age on Outcomes ========== 64 | def convert_age_to_weeks(age_str): 65 | try: 66 | number, unit = age_str.split()[:2] 67 | number = int(number) 68 | unit = unit.lower() 69 | if 'day' in unit: 70 | return number / 7 71 | elif 'week' in unit: 72 | return number 73 | elif 'month' in unit: 74 | return number * 4 75 | elif 'year' in unit: 76 | return number * 52 77 | else: 78 | return None 79 | except: 80 | return None 81 | 82 | df['AgeWeeks'] = df['Age upon Outcome'].apply(convert_age_to_weeks) 83 | adopted_df = df[df['Outcome Type'] == 'Adoption'].copy() 84 | adopted_df.dropna(subset=['AgeWeeks'], inplace=True) 85 | plt.figure(figsize=(10, 6)) 86 | sns.histplot(adopted_df['AgeWeeks'], bins=30, kde=True, color='orange') 87 | plt.title('Distribution of Age (in Weeks) Among Adopted Animals', fontsize=14) 88 | plt.xlabel('Age in Weeks') 89 | plt.ylabel('Number of Adopted Animals') 90 | plt.grid(True, linestyle='--', alpha=0.5) 91 | plt.show() 92 | 93 | # ========== Objective 4: Effect of Neutered/Spayed on Outcome ========== 94 | print("\nOutcome distribution by Neutering status:") 95 | neuter_outcome = pd.crosstab(df['Sex upon Outcome'], df['Outcome Type'], normalize='index') * 100 96 | print(neuter_outcome) 97 | statuses = ['Neutered Male', 'Spayed Female', 'Intact Male', 'Intact Female'] 98 | df_filtered = df[df['Sex upon Outcome'].isin(statuses)] 99 | adopt_counts = df_filtered[df_filtered['Outcome Type'] == 'Adoption']['Sex upon Outcome'].value_counts() 100 | total_counts = df_filtered['Sex upon Outcome'].value_counts() 101 | adoption_rates = (adopt_counts / total_counts * 100).reindex(statuses).fillna(0) 102 | print("\nAdoption Rate (%) by Sex upon Outcome:") 103 | print(adoption_rates) 104 | plt.figure(figsize=(8, 5)) 105 | sns.barplot(x=adoption_rates.index, y=adoption_rates.values, 106 | hue=adoption_rates.index, palette='pastel', legend=False) 107 | plt.title('Adoption Rate by Neutered/Spayed/Intact Status') 108 | plt.ylabel('Adoption Rate (%)') 109 | plt.xlabel('Sex upon Outcome') 110 | plt.ylim(0, 100) 111 | plt.grid(axis='y', linestyle='--', alpha=0.5) 112 | plt.tight_layout() 113 | plt.show() 114 | 115 | # ========== Objective 5: Adoption Trends Over Time ========== 116 | df['Year'] = df['DateTime'].dt.year 117 | df['Month'] = df['DateTime'].dt.month 118 | df['YearMonth'] = df['DateTime'].dt.to_period('M') 119 | adoptions = df[df['Outcome Type'] == 'Adoption'] 120 | monthly_adoptions = adoptions.groupby('YearMonth').size().reset_index(name='AdoptionCount') 121 | monthly_adoptions['YearMonth'] = monthly_adoptions['YearMonth'].dt.to_timestamp() 122 | yearly_adoptions = adoptions.groupby('Year').size().reset_index(name='AdoptionCount') 123 | plt.figure(figsize=(14, 6)) 124 | plt.subplot(1, 2, 1) 125 | ax1 = plt.gca() 126 | sns.lineplot(data=monthly_adoptions, x='YearMonth', y='AdoptionCount', 127 | color='blue', linewidth=2, marker='o', markersize=5, ax=ax1) 128 | plt.title('Monthly Adoption Trends (2013-2025)', fontsize=14, pad=20) 129 | plt.xlabel('Year', fontsize=12) 130 | plt.ylabel('Number of Adoptions', fontsize=12) 131 | ax1.xaxis.set_major_locator(YearLocator()) 132 | ax1.xaxis.set_major_formatter(DateFormatter('%Y')) 133 | plt.xticks(rotation=45) 134 | plt.grid(True, linestyle='--', alpha=0.7) 135 | plt.subplot(1, 2, 2) 136 | sns.lineplot(data=yearly_adoptions, x='Year', y='AdoptionCount', marker='o') 137 | plt.title('Yearly Adoption Trends (2013-2025)') 138 | plt.xlabel('Year') 139 | plt.ylabel('Number of Adoptions') 140 | plt.grid(True) 141 | plt.tight_layout() 142 | plt.show() 143 | 144 | # ========== Objective 6: Color impact on outcomes ========== 145 | adopted_df = df[df['Outcome Type'] == 'Adoption'] 146 | color_adoptions = adopted_df.groupby('Color').size().reset_index(name='Adoption Count') 147 | top_colors = color_adoptions.sort_values(by='Adoption Count', ascending=False).head(15) 148 | plt.figure(figsize=(12, 6)) 149 | sns.barplot(data=top_colors, x='Adoption Count', y='Color', hue='Color', palette='viridis', legend=False) 150 | plt.title('Top 15 Coat Colors by Number of Adoptions') 151 | plt.xlabel('Number of Adoptions') 152 | plt.ylabel('Coat Color') 153 | plt.tight_layout() 154 | plt.show() 155 | 156 | # ========== Objective 7: Chances of specific outcomes by species ========== 157 | species_outcome_chances = df.groupby(['Animal Type', 'Outcome Type']).size().unstack() 158 | species_outcome_percent = species_outcome_chances.div(species_outcome_chances.sum(axis=1), axis=0) * 100 159 | print("\nChances of Specific Outcomes by Species (in %):") 160 | print(species_outcome_percent.round(2)) 161 | species_outcome_percent.plot(kind='bar', stacked=True, colormap='Set2') 162 | plt.title('Chances of Specific Outcomes by Species') 163 | plt.ylabel('Percentage') 164 | plt.xticks(rotation=0) 165 | plt.legend(title='Outcome Type', bbox_to_anchor=(1.05, 1), loc='upper left') 166 | plt.tight_layout() 167 | plt.show() 168 | 169 | # ========== Objective 8: Chances of specific outcomes by breed ========== 170 | breed_outcome_counts = df.groupby(['Breed', 'Outcome Type']).size().unstack(fill_value=0) 171 | breed_outcome_percent = breed_outcome_counts.div(breed_outcome_counts.sum(axis=1), axis=0) * 100 172 | top_breeds = df['Breed'].value_counts().head(10).index 173 | top_breed_outcomes = breed_outcome_percent.loc[top_breeds] 174 | print("\nChances of Specific Outcomes by Top Breeds (in %):") 175 | print(top_breed_outcomes.round(2)) 176 | top_breed_outcomes.plot(kind='bar', stacked=True, colormap='tab20') 177 | plt.title('Chances of Specific Outcomes by Top 10 Breeds') 178 | plt.ylabel('Percentage') 179 | plt.xticks(rotation=45, ha='right') 180 | plt.legend(title='Outcome Type', bbox_to_anchor=(1.05, 1), loc='upper left') 181 | plt.tight_layout() 182 | plt.show() 183 | 184 | # ========== Correlation Heatmap using AgeWeeks ========== 185 | plt.figure(figsize=(6, 4)) 186 | sns.heatmap(df[['AgeWeeks', 'Is_Live']].dropna().corr(), annot=True, cmap='coolwarm') 187 | plt.title('Correlation Heatmap (Age in Weeks vs Live Outcome)') 188 | plt.show() 189 | 190 | def convert_age_to_days(age_str): 191 | try: 192 | number, unit = age_str.split()[:2] 193 | number = int(number) 194 | unit = unit.lower() 195 | if 'day' in unit: 196 | return number 197 | elif 'week' in unit: 198 | return number * 7 199 | elif 'month' in unit: 200 | return number * 30 201 | elif 'year' in unit: 202 | return number * 365 203 | else: 204 | return None 205 | except: 206 | return None 207 | 208 | df['AgeDays'] = df['Age upon Outcome'].apply(convert_age_to_days) 209 | 210 | # ========== Correlation Heatmap using AgeDays ========== 211 | plt.figure(figsize=(6, 4)) 212 | sns.heatmap(df[['AgeDays', 'Is_Live']].dropna().corr(), annot=True, cmap='coolwarm') 213 | plt.title('Correlation Heatmap (Age in Days vs Live Outcome)') 214 | plt.show() 215 | 216 | # ========== Boxplot to show AgeWeeks by Outcome Type ========== 217 | plt.figure(figsize=(10, 6)) 218 | sns.boxplot(data=df, x='Outcome Type', y='AgeWeeks') 219 | plt.xticks(rotation=45) 220 | plt.title('Age (in Weeks) Distribution by Outcome Type (Boxplot)') 221 | plt.ylabel('Age in Weeks') 222 | plt.xlabel('Outcome Type') 223 | plt.tight_layout() 224 | plt.show() 225 | 226 | # ========== Boxplot to show AgeDays by Outcome Type ========== 227 | plt.figure(figsize=(10, 6)) 228 | sns.boxplot(data=df, x='Outcome Type', y='AgeDays') 229 | plt.xticks(rotation=45) 230 | plt.title('Age (in Days) Distribution by Outcome Type (Boxplot)') 231 | plt.ylabel('Age in Days') 232 | plt.xlabel('Outcome Type') 233 | plt.tight_layout() 234 | plt.show() 235 | --------------------------------------------------------------------------------