├── .DS_Store ├── E-commerce Customer Churn Dashboard.PNG ├── EcommerceCustomerChurnAnalysis.sql ├── README.md └── ecommerce_churn.csv /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Judithokon/Ecommerce-Customer-Churn-Analysis-Using-SQL/3c96d4c804ce8edfb7503e67d4c3cde642fcde35/.DS_Store -------------------------------------------------------------------------------- /E-commerce Customer Churn Dashboard.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Judithokon/Ecommerce-Customer-Churn-Analysis-Using-SQL/3c96d4c804ce8edfb7503e67d4c3cde642fcde35/E-commerce Customer Churn Dashboard.PNG -------------------------------------------------------------------------------- /EcommerceCustomerChurnAnalysis.sql: -------------------------------------------------------------------------------- 1 | /******************** 2 | Data Cleaning 3 | *********************/ 4 | 5 | 6 | -- 1. Find the total number of customers 7 | SELECT DISTINCT COUNT(CustomerID) as TotalNumberOfCustomers 8 | FROM ecommercechurn 9 | -- Answer = There are 5,630 customers in this dataset 10 | 11 | -- 2. Check for duplicate rows 12 | SELECT CustomerID, COUNT (CustomerID) as Count 13 | FROM ecommercechurn 14 | GROUP BY CustomerID 15 | Having COUNT (CustomerID) > 1 16 | -- Answer = There are no duplicate rows 17 | 18 | -- 3. Check for null values count for columns with null values 19 | SELECT 'Tenure' as ColumnName, COUNT(*) AS NullCount 20 | FROM ecommercechurn 21 | WHERE Tenure IS NULL 22 | UNION 23 | SELECT 'WarehouseToHome' as ColumnName, COUNT(*) AS NullCount 24 | FROM ecommercechurn 25 | WHERE warehousetohome IS NULL 26 | UNION 27 | SELECT 'HourSpendonApp' as ColumnName, COUNT(*) AS NullCount 28 | FROM ecommercechurn 29 | WHERE hourspendonapp IS NULL 30 | UNION 31 | SELECT 'OrderAmountHikeFromLastYear' as ColumnName, COUNT(*) AS NullCount 32 | FROM ecommercechurn 33 | WHERE orderamounthikefromlastyear IS NULL 34 | UNION 35 | SELECT 'CouponUsed' as ColumnName, COUNT(*) AS NullCount 36 | FROM ecommercechurn 37 | WHERE couponused IS NULL 38 | UNION 39 | SELECT 'OrderCount' as ColumnName, COUNT(*) AS NullCount 40 | FROM ecommercechurn 41 | WHERE ordercount IS NULL 42 | UNION 43 | SELECT 'DaySinceLastOrder' as ColumnName, COUNT(*) AS NullCount 44 | FROM ecommercechurn 45 | WHERE daysincelastorder IS NULL 46 | 47 | -- 3.1 Handle null values 48 | -- We will fill null values with their mean. 49 | UPDATE ecommercechurn 50 | SET Hourspendonapp = (SELECT AVG(Hourspendonapp) FROM ecommercechurn) 51 | WHERE Hourspendonapp IS NULL 52 | 53 | UPDATE ecommercechurn 54 | SET tenure = (SELECT AVG(tenure) FROM ecommercechurn) 55 | WHERE tenure IS NULL 56 | 57 | UPDATE ecommercechurn 58 | SET orderamounthikefromlastyear = (SELECT AVG(orderamounthikefromlastyear) FROM ecommercechurn) 59 | WHERE orderamounthikefromlastyear IS NULL 60 | 61 | UPDATE ecommercechurn 62 | SET WarehouseToHome = (SELECT AVG(WarehouseToHome) FROM ecommercechurn) 63 | WHERE WarehouseToHome IS NULL 64 | 65 | UPDATE ecommercechurn 66 | SET couponused = (SELECT AVG(couponused) FROM ecommercechurn) 67 | WHERE couponused IS NULL 68 | 69 | UPDATE ecommercechurn 70 | SET ordercount = (SELECT AVG(ordercount) FROM ecommercechurn) 71 | WHERE ordercount IS NULL 72 | 73 | UPDATE ecommercechurn 74 | SET daysincelastorder = (SELECT AVG(daysincelastorder) FROM ecommercechurn) 75 | WHERE daysincelastorder IS NULL 76 | 77 | 78 | --4. Create a new column based off the values of churn column. 79 | -- The values in churn column are 0 and 1 values were O means stayed and 1 means churned. I will create a new column 80 | -- called customerstatus that shows 'Stayed' and 'Churned' instead of 0 and 1 81 | ALTER TABLE ecommercechurn 82 | ADD CustomerStatus NVARCHAR(50) 83 | 84 | UPDATE ecommercechurn 85 | SET CustomerStatus = 86 | CASE 87 | WHEN Churn = 1 THEN 'Churned' 88 | WHEN Churn = 0 THEN 'Stayed' 89 | END 90 | 91 | -- 5. Create a new column based off the values of complain column. 92 | -- The values in complain column are 0 and 1 values were O means No and 1 means Yes. I will create a new column 93 | -- called complainrecieved that shows 'Yes' and 'No' instead of 0 and 1 94 | ALTER TABLE ecommercechurn 95 | ADD ComplainRecieved NVARCHAR(10) 96 | 97 | UPDATE ecommercechurn 98 | SET ComplainRecieved = 99 | CASE 100 | WHEN complain = 1 THEN 'Yes' 101 | WHEN complain = 0 THEN 'No' 102 | END 103 | 104 | 105 | -- 6. Check values in each column for correctness and accuracy 106 | 107 | -- 6.1 a) Check distinct values for preferredlogindevice column 108 | select distinct preferredlogindevice 109 | from ecommercechurn 110 | -- the result shows phone and mobile phone which indicates the same thing, so I will replace mobile phone with phone 111 | 112 | -- 6.1 b) Replace mobile phone with phone 113 | UPDATE ecommercechurn 114 | SET preferredlogindevice = 'phone' 115 | WHERE preferredlogindevice = 'mobile phone' 116 | 117 | -- 6.2 a) Check distinct values for preferedordercat column 118 | select distinct preferedordercat 119 | from ecommercechurn 120 | -- the result shows mobile phone and mobile, so I replace mobile with mobile phone 121 | 122 | -- 6.2 b) Replace mobile with mobile phone 123 | UPDATE ecommercechurn 124 | SET preferedordercat = 'Mobile Phone' 125 | WHERE Preferedordercat = 'Mobile' 126 | 127 | -- 6.3 a) Check distinct values for preferredpaymentmode column 128 | select distinct PreferredPaymentMode 129 | from ecommercechurn 130 | -- the result shows Cash on Delivery and COD which mean the same thing, so I replace COD with Cash on Delivery 131 | 132 | -- 6.3 b) Replace mobile with mobile phone 133 | UPDATE ecommercechurn 134 | SET PreferredPaymentMode = 'Cash on Delivery' 135 | WHERE PreferredPaymentMode = 'COD' 136 | 137 | -- 6.4 a) check distinct value in warehousetohome column 138 | SELECT DISTINCT warehousetohome 139 | FROM ecommercechurn 140 | -- I can see two values 126 and 127 that are outliers, it could be a data entry error, so I will correct it to 26 & 27 respectively 141 | 142 | -- 6.4 b) Replace value 127 with 27 143 | UPDATE ecommercechurn 144 | SET warehousetohome = '27' 145 | WHERE warehousetohome = '127' 146 | 147 | -- 6.4 C) Replace value 126 with 26 148 | UPDATE ecommercechurn 149 | SET warehousetohome = '26' 150 | WHERE warehousetohome = '126' 151 | 152 | 153 | /************************************************** 154 | Data Exploration and Answering business questions 155 | ***************************************************/ 156 | 157 | 158 | -- 1. What is the overall customer churn rate? 159 | SELECT TotalNumberofCustomers, 160 | TotalNumberofChurnedCustomers, 161 | CAST((TotalNumberofChurnedCustomers * 1.0 / TotalNumberofCustomers * 1.0)*100 AS DECIMAL(10,2)) AS ChurnRate 162 | FROM 163 | (SELECT COUNT(*) AS TotalNumberofCustomers 164 | FROM ecommercechurn) AS Total, 165 | (SELECT COUNT(*) AS TotalNumberofChurnedCustomers 166 | FROM ecommercechurn 167 | WHERE CustomerStatus = 'churned') AS Churned 168 | -- Answer = The Churn rate is 16.84% 169 | 170 | 171 | -- 2. How does the churn rate vary based on the preferred login device? 172 | SELECT preferredlogindevice, 173 | COUNT(*) AS TotalCustomers, 174 | SUM(churn) AS ChurnedCustomers, 175 | CAST(SUM (churn) * 1.0 / COUNT(*) * 100 AS DECIMAL(10,2)) AS ChurnRate 176 | FROM ecommercechurn 177 | GROUP BY preferredlogindevice 178 | -- Answer = The prefered login devices are computer and phone. Computer accounts for the highest churnrate 179 | -- with 19.83% and then phone with 15.62%. 180 | 181 | 182 | -- 3. What is the distribution of customers across different city tiers? 183 | SELECT citytier, 184 | COUNT(*) AS TotalCustomer, 185 | SUM(Churn) AS ChurnedCustomers, 186 | CAST(SUM (churn) * 1.0 / COUNT(*) * 100 AS DECIMAL(10,2)) AS ChurnRate 187 | FROM ecommercechurn 188 | GROUP BY citytier 189 | ORDER BY churnrate DESC 190 | -- Answer = citytier3 has the highest churn rate, followed by citytier2 and then citytier1 has the least churn rate. 191 | 192 | 193 | -- 4. Is there any correlation between the warehouse-to-home distance and customer churn? 194 | -- Firstly, we will create a new column that provides a distance range based on the values in warehousetohome column 195 | ALTER TABLE ecommercechurn 196 | ADD warehousetohomerange NVARCHAR(50) 197 | 198 | UPDATE ecommercechurn 199 | SET warehousetohomerange = 200 | CASE 201 | WHEN warehousetohome <= 10 THEN 'Very close distance' 202 | WHEN warehousetohome > 10 AND warehousetohome <= 20 THEN 'Close distance' 203 | WHEN warehousetohome > 20 AND warehousetohome <= 30 THEN 'Moderate distance' 204 | WHEN warehousetohome > 30 THEN 'Far distance' 205 | END 206 | 207 | -- Finding correlation between warehousetohome and churnrate 208 | SELECT warehousetohomerange, 209 | COUNT(*) AS TotalCustomer, 210 | SUM(Churn) AS CustomerChurn, 211 | CAST(SUM(Churn) * 1.0 /COUNT(*) * 100 AS DECIMAL(10,2)) AS Churnrate 212 | FROM ecommercechurn 213 | GROUP BY warehousetohomerange 214 | ORDER BY Churnrate DESC 215 | -- Answer = The churn rate increases as the warehousetohome distance increases 216 | 217 | 218 | -- 5. Which is the most prefered payment mode among churned customers? 219 | SELECT preferredpaymentmode, 220 | COUNT(*) AS TotalCustomer, 221 | SUM(Churn) AS CustomerChurn, 222 | CAST(SUM(Churn) * 1.0 /COUNT(*) * 100 AS DECIMAL(10,2)) AS Churnrate 223 | FROM ecommercechurn 224 | GROUP BY preferredpaymentmode 225 | ORDER BY Churnrate DESC 226 | -- Answer = The most prefered payment mode among churned customers is Cash on Delivery 227 | 228 | 229 | -- 6. What is the typical tenure for churned customers? 230 | -- Firstly, we will create a new column that provides a tenure range based on the values in tenure column 231 | ALTER TABLE ecommercechurn 232 | ADD TenureRange NVARCHAR(50) 233 | 234 | UPDATE ecommercechurn 235 | SET TenureRange = 236 | CASE 237 | WHEN tenure <= 6 THEN '6 Months' 238 | WHEN tenure > 6 AND tenure <= 12 THEN '1 Year' 239 | WHEN tenure > 12 AND tenure <= 24 THEN '2 Years' 240 | WHEN tenure > 24 THEN 'more than 2 years' 241 | END 242 | 243 | -- Finding typical tenure for churned customers 244 | SELECT TenureRange, 245 | COUNT(*) AS TotalCustomer, 246 | SUM(Churn) AS CustomerChurn, 247 | CAST(SUM(Churn) * 1.0 /COUNT(*) * 100 AS DECIMAL(10,2)) AS Churnrate 248 | FROM ecommercechurn 249 | GROUP BY TenureRange 250 | ORDER BY Churnrate DESC 251 | -- Answer = Most customers churned within a 6 months tenure period 252 | 253 | 254 | -- 7. Is there any difference in churn rate between male and female customers? 255 | SELECT gender, 256 | COUNT(*) AS TotalCustomer, 257 | SUM(Churn) AS CustomerChurn, 258 | CAST(SUM(Churn) * 1.0 /COUNT(*) * 100 AS DECIMAL(10,2)) AS Churnrate 259 | FROM ecommercechurn 260 | GROUP BY gender 261 | ORDER BY Churnrate DESC 262 | -- Answer = More men churned in comaprison to wowen 263 | 264 | 265 | -- 8. How does the average time spent on the app differ for churned and non-churned customers? 266 | SELECT customerstatus, avg(hourspendonapp) AS AverageHourSpentonApp 267 | FROM ecommercechurn 268 | GROUP BY customerstatus 269 | -- Answer = There is no difference between the average time spent on the app for churned and non-churned customers 270 | 271 | 272 | -- 9. Does the number of registered devices impact the likelihood of churn? 273 | SELECT NumberofDeviceRegistered, 274 | COUNT(*) AS TotalCustomer, 275 | SUM(Churn) AS CustomerChurn, 276 | CAST(SUM(Churn) * 1.0 /COUNT(*) * 100 AS DECIMAL(10,2)) AS Churnrate 277 | FROM ecommercechurn 278 | GROUP BY NumberofDeviceRegistered 279 | ORDER BY Churnrate DESC 280 | -- Answer = As the number of registered devices increseas the churn rate increases. 281 | 282 | 283 | -- 10. Which order category is most prefered among churned customers? 284 | SELECT preferedordercat, 285 | COUNT(*) AS TotalCustomer, 286 | SUM(Churn) AS CustomerChurn, 287 | CAST(SUM(Churn) * 1.0 /COUNT(*) * 100 AS DECIMAL(10,2)) AS Churnrate 288 | FROM ecommercechurn 289 | GROUP BY preferedordercat 290 | ORDER BY Churnrate DESC 291 | -- Answer = Mobile phone category has the highest churn rate and grocery has the least churn rate 292 | 293 | 294 | -- 11. Is there any relationship between customer satisfaction scores and churn? 295 | SELECT satisfactionscore, 296 | COUNT(*) AS TotalCustomer, 297 | SUM(Churn) AS CustomerChurn, 298 | CAST(SUM(Churn) * 1.0 /COUNT(*) * 100 AS DECIMAL(10,2)) AS Churnrate 299 | FROM ecommercechurn 300 | GROUP BY satisfactionscore 301 | ORDER BY Churnrate DESC 302 | -- Answer = Customer satisfaction score of 5 has the highest churn rate, satisfaction score of 1 has the least churn rate 303 | 304 | 305 | -- 12. Does the marital status of customers influence churn behavior? 306 | SELECT maritalstatus, 307 | COUNT(*) AS TotalCustomer, 308 | SUM(Churn) AS CustomerChurn, 309 | CAST(SUM(Churn) * 1.0 /COUNT(*) * 100 AS DECIMAL(10,2)) AS Churnrate 310 | FROM ecommercechurn 311 | GROUP BY maritalstatus 312 | ORDER BY Churnrate DESC 313 | -- Answer = Single customers have the highest churn rate while married customers have the least churn rate 314 | 315 | 316 | -- 13. How many addresses do churned customers have on average? 317 | SELECT AVG(numberofaddress) AS Averagenumofchurnedcustomeraddress 318 | FROM ecommercechurn 319 | WHERE customerstatus = 'stayed' 320 | -- Answer = On average, churned customers have 4 addresses 321 | 322 | 323 | -- 14. Does customer complaints influence churned behavior? 324 | SELECT complainrecieved, 325 | COUNT(*) AS TotalCustomer, 326 | SUM(Churn) AS CustomerChurn, 327 | CAST(SUM(Churn) * 1.0 /COUNT(*) * 100 AS DECIMAL(10,2)) AS Churnrate 328 | FROM ecommercechurn 329 | GROUP BY complainrecieved 330 | ORDER BY Churnrate DESC 331 | -- Answer = Customers with complains had the highest churn rate 332 | 333 | 334 | -- 15. How does the usage of coupons differ between churned and non-churned customers? 335 | SELECT customerstatus, SUM(couponused) AS SumofCouponUsed 336 | FROM ecommercechurn 337 | GROUP BY customerstatus 338 | -- Churned customers used less coupons in comparison to non churned customers 339 | 340 | 341 | -- 16. What is the average number of days since the last order for churned customers? 342 | SELECT AVG(daysincelastorder) AS AverageNumofDaysSinceLastOrder 343 | FROM ecommercechurn 344 | WHERE customerstatus = 'churned' 345 | -- Answer = The average number of days since last order for churned customer is 3 346 | 347 | 348 | -- 17. Is there any correlation between cashback amount and churn rate? 349 | -- Firstly, we will create a new column that provides a tenure range based on the values in tenure column 350 | ALTER TABLE ecommercechurn 351 | ADD cashbackamountrange NVARCHAR(50) 352 | 353 | UPDATE ecommercechurn 354 | SET cashbackamountrange = 355 | CASE 356 | WHEN cashbackamount <= 100 THEN 'Low Cashback Amount' 357 | WHEN cashbackamount > 100 AND cashbackamount <= 200 THEN 'Moderate Cashback Amount' 358 | WHEN cashbackamount > 200 AND cashbackamount <= 300 THEN 'High Cashback Amount' 359 | WHEN cashbackamount > 300 THEN 'Very High Cashback Amount' 360 | END 361 | 362 | -- Finding correlation between cashbackamountrange and churned rate 363 | SELECT cashbackamountrange, 364 | COUNT(*) AS TotalCustomer, 365 | SUM(Churn) AS CustomerChurn, 366 | CAST(SUM(Churn) * 1.0 /COUNT(*) * 100 AS DECIMAL(10,2)) AS Churnrate 367 | FROM ecommercechurn 368 | GROUP BY cashbackamountrange 369 | ORDER BY Churnrate DESC 370 | -- Answer = Customers with a Moderate Cashback Amount (Between 100 and 200) have the highest churn rate, follwed by 371 | -- High cashback amount, then very high cashback amount and finally low cashback amount 372 | 373 | 374 | 375 | 376 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ecommerce Customer Churn Analysis Using SQL 2 | 3 | Welcome to the Customer Churn Analysis Project repository! This project aims to analyze customer churn patterns and provide insights and recommendations to reduce churn rates. The analysis is based on a dataset containing various customer attributes and behaviors. 4 | 5 | 6 | ### Full Project Article 7 | This repository contains the SQL Query written to analyze this dataset. 8 | 9 | For a detailed description of the project, insights, and recommendations, please check out the full article on Medium. The article provides a comprehensive analysis of the customer churn patterns and offers valuable recommendations to reduce churn rates. You can find the article using the link below: 10 | 11 | [Medium Article](https://medium.com/@okon.judith/e-commerce-customer-churn-analysis-using-sql-a34a95c4a367) 12 | 13 | Feel free to reach out to me with any questions or suggestions regarding the project or the analysis. 14 | 15 | Happy analyzing! 16 | --------------------------------------------------------------------------------