├── 01_data ├── CAC_calc.csv └── LTV_calc.csv ├── 02_code ├── .ipynb_checkpoints │ └── marketing_data_viz-checkpoint.ipynb ├── analysis_queries.sql └── marketing_data_viz.ipynb ├── 03_images ├── expenditure_vs_purchases_per_month.png └── tableschema.png └── README.md /01_data/CAC_calc.csv: -------------------------------------------------------------------------------- 1 | channel,impressions,CPM ($),total_CPM,pre_campaign_click_through,click-through,click-through_rate (%),CPV ($),CPC ($),CPC_total,conversions,conversion_rate,CAC ($) 2 | facebook,4067,5,20.335,23,41,1.01,6.84,7,287,8,0.20,38.42 3 | instagram,284,3,0.852,5,9,3.17,3.99,4,36,2,0.70,18.43 4 | twitter,6807,2,13.614,233,408,5.99,2.85,3,1224,82,1.20,15.09 5 | linkedin,8630,2,17.26,197,345,4.00,2.85,3,1035,69,0.80,15.25 6 | pinterest,4226,4,16.904,7,13,0.31,4.56,4,52,3,0.07,22.97 7 | email,3232,0,0,92,162,5.01,0,0,0,32,0.99,0.00 8 | website,8939,0,0,51,89,1.00,0,0,0,18,0.20,0.00 -------------------------------------------------------------------------------- /01_data/LTV_calc.csv: -------------------------------------------------------------------------------- 1 | customer_id,total_purchases_per_month,purchases_per_month,customer_duration (months) 2 | 87411,211,15,8 3 | 37059,897,6,18 4 | 34747,783,3,15 5 | 34964,153,9,4 6 | 15147,813,14,10 7 | 96293,887,9,12 8 | 25927,846,6,13 9 | 55037,407,14,8 10 | 53461,436,0,7 11 | 24006,137,8,1 12 | 60650,362,14,11 13 | 95291,333,11,12 14 | 68175,622,11,11 15 | 61760,155,3,16 16 | 34029,809,13,9 17 | 75207,783,14,1 18 | 58086,229,6,4 19 | 67908,971,14,6 20 | 38427,82,8,14 21 | 53186,324,7,10 22 | 61725,781,15,17 23 | 65070,154,1,6 24 | 76640,71,5,11 25 | 30450,575,1,1 26 | 88078,699,15,15 27 | 67120,318,4,6 28 | 87932,499,7,5 29 | 34458,311,2,2 30 | 26823,748,9,3 31 | 30135,870,12,15 32 | 64692,411,15,12 33 | 81824,666,7,2 34 | 26967,264,4,15 35 | 60629,154,7,7 36 | 37499,189,13,17 37 | 65208,996,0,12 38 | 54996,796,12,6 39 | 36645,834,3,8 40 | 61914,762,15,4 41 | 53044,133,15,14 42 | 99209,534,2,8 43 | 19459,963,13,15 44 | 73685,494,9,6 45 | 68788,666,12,13 46 | 86897,892,7,17 47 | 78144,661,9,15 48 | 12347,664,5,2 49 | 16338,551,8,5 50 | 92148,84,10,10 51 | 15858,162,9,12 52 | 79896,73,13,16 53 | 62761,98,10,11 54 | 78717,755,11,13 55 | 20101,176,9,8 56 | 23419,148,13,5 57 | 11626,94,9,9 58 | 22419,805,5,11 59 | 59146,272,3,11 60 | 63906,124,1,1 61 | 22534,96,1,2 62 | 13028,163,10,13 63 | 24651,524,5,18 64 | 49018,811,12,14 65 | 54595,216,0,9 66 | 24580,257,9,5 67 | 42678,225,12,2 68 | 74260,355,5,2 69 | 33870,221,1,12 70 | 20751,440,6,14 71 | 19777,821,3,6 72 | 39226,474,15,14 73 | 78104,227,3,17 74 | 84185,399,8,4 75 | 68584,207,0,18 76 | 50017,888,13,17 77 | 23189,607,11,5 78 | 80896,481,10,2 79 | 79605,743,14,10 80 | 23402,433,13,6 81 | 73593,647,0,3 82 | 59741,404,10,11 83 | 83619,88,9,17 84 | 48267,604,9,18 85 | 43411,212,1,7 86 | 98072,621,0,5 87 | 26927,498,10,5 88 | 37691,595,10,13 89 | 76929,158,13,4 90 | 82453,177,7,13 91 | 70569,943,15,9 92 | 49423,465,9,13 93 | 48704,60,12,3 94 | 91123,79,0,5 95 | 35709,91,8,18 96 | 74765,548,6,12 97 | 87597,385,13,3 98 | 89295,291,6,7 99 | 84102,157,8,4 100 | 51409,438,11,4 -------------------------------------------------------------------------------- /02_code/.ipynb_checkpoints/marketing_data_viz-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 46, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "C:\\Users\\riley\\Documents\\Coding\\marketing_analyst_practice\\marketing_data_analysis\\01_data\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "#standard viz library imports\n", 18 | "import numpy as np, pandas as pd\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "%matplotlib inline\n", 21 | "import seaborn as sns\n", 22 | "\n", 23 | "#Set the directory in the data folder of the repo\n", 24 | "%cd ../01_data\n", 25 | "\n", 26 | "ltv_df = pd.read_csv('LTV_calc.csv')" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 47, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "Index(['customer_id', 'total_purchases_per_month', 'purchases_per_month',\n", 38 | " 'customer_duration (months)'],\n", 39 | " dtype='object')" 40 | ] 41 | }, 42 | "execution_count": 47, 43 | "metadata": {}, 44 | "output_type": "execute_result" 45 | } 46 | ], 47 | "source": [ 48 | "ltv_df.columns" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 48, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "#calculate the LTV\n", 58 | "avg_ltv = np.average(ltv_df['total_purchases_per_month']) * np.average(ltv_df['purchases_per_month']) * np.average(ltv_df['customer_duration (months)'])" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 49, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | "34484.97685764903" 70 | ] 71 | }, 72 | "execution_count": 49, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": [ 78 | "avg_ltv" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 50, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "image/png": "\n", 89 | "text/plain": [ 90 | "
" 91 | ] 92 | }, 93 | "metadata": { 94 | "needs_background": "light" 95 | }, 96 | "output_type": "display_data" 97 | } 98 | ], 99 | "source": [ 100 | "# See the association between purchases per month and total amount spend per month\n", 101 | "plt.scatter('total_purchases_per_month', 'purchases_per_month', data=ltv_df);\n", 102 | "plt.title('Total Amount Spent/Mo vs. Number of purchases/mo');\n", 103 | "plt.xlabel('Total Amount Spent');\n", 104 | "plt.ylabel('Number of purchases/mo');" 105 | ] 106 | } 107 | ], 108 | "metadata": { 109 | "kernelspec": { 110 | "display_name": "Python 3", 111 | "language": "python", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 3 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython3", 124 | "version": "3.7.1" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 2 129 | } 130 | -------------------------------------------------------------------------------- /02_code/analysis_queries.sql: -------------------------------------------------------------------------------- 1 | -- Make the database 2 | CREATE DATABASE marketing_analysis; 3 | 4 | -- Use the database 5 | USE marketing_analysis; 6 | 7 | -- I used MySQL Workbench's table data import wizard to import the .csv files 8 | -- into the database. Since the dataframes are small, this is fine to do. 9 | -- You're welcome to do it however you see fit. 10 | 11 | -- Drop the previous calculated columns so that you can calculate them here in SQL 12 | CREATE TABLE data_to_calculate AS 13 | SELECT channel, 14 | impressions, 15 | `CPM ($)`, 16 | `pre_campaign_click_through`, 17 | `click-through`, 18 | `click-through_rate (%)`, 19 | `CPC ($)`, 20 | conversions FROM cac_calc; 21 | 22 | -- Calculates the cost per impression 23 | SELECT impressions*`CPM ($)` / 1000 AS cost_per_impression_total FROM data_to_calculate; 24 | 25 | -- Calculates the click through rate 26 | SELECT `click-through`/impressions * 100 AS `click_through_rate(%)` FROM data_to_calculate; 27 | 28 | -- Let's say we had impressions and click-throughs prior to a 29 | -- big marketing campaign and after a big marketing campaign. 30 | -- Impressions and click-throughs prior to the campaign could be for example just 31 | -- general facebook posts and such. How does click-through 32 | -- (AKA visits to site or CTA CTR) respond to the new marketing initiative? 33 | 34 | -- Calculates CPV (cost per visit) (USD) 35 | SELECT ( `CPC ($)` + `CPM ($)` ) / ( `click-through` / pre_campaign_click_through ) 36 | AS `CPV ($)` from data_to_calculate; 37 | 38 | -- Calculates total cost per clicks 39 | SELECT `CPC ($)`*`click-through` 40 | AS CPC_total FROM data_to_calculate; 41 | 42 | -- The cheapest and most fruitful channels to focus marketing campaign efforts on 43 | SELECT * FROM cac_calc 44 | ORDER BY conversions DESC; 45 | 46 | -- Based on this analysis, twitter and linkedin campaigns should be 47 | -- iterated on to improve metrics accross the board and further boost conversions 48 | 49 | -- Determine the LTV of the average customer 50 | SELECT AVG(total_purchases_per_month) 51 | *AVG(purchases_per_month) 52 | *AVG(`customer_duration (months)`) 53 | AS LTV FROM LTV_calc; 54 | 55 | -- Subtracting out the average CAC (cost to acquire a customer) gets the net worth 56 | -- LTV of the average customer: 57 | SELECT AVG(total_purchases_per_month) 58 | *AVG(purchases_per_month) 59 | *AVG(`customer_duration (months)`) - 15.65 FROM LTV_calc; 60 | 61 | -- Numbers are highly variable between channels, so it would be good to run these 62 | -- queries on each channel to see what the ROI is for each and what the net 63 | -- worth is for a customer from each. -------------------------------------------------------------------------------- /02_code/marketing_data_viz.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 46, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "C:\\Users\\riley\\Documents\\Coding\\marketing_analyst_practice\\marketing_data_analysis\\01_data\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "#standard viz library imports\n", 18 | "import numpy as np, pandas as pd\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "%matplotlib inline\n", 21 | "import seaborn as sns\n", 22 | "\n", 23 | "#Set the directory in the data folder of the repo\n", 24 | "%cd ../01_data\n", 25 | "\n", 26 | "ltv_df = pd.read_csv('LTV_calc.csv')" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 47, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "Index(['customer_id', 'total_purchases_per_month', 'purchases_per_month',\n", 38 | " 'customer_duration (months)'],\n", 39 | " dtype='object')" 40 | ] 41 | }, 42 | "execution_count": 47, 43 | "metadata": {}, 44 | "output_type": "execute_result" 45 | } 46 | ], 47 | "source": [ 48 | "ltv_df.columns" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 48, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "#calculate the LTV\n", 58 | "avg_ltv = np.average(ltv_df['total_purchases_per_month']) * np.average(ltv_df['purchases_per_month']) * np.average(ltv_df['customer_duration (months)'])" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 49, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | "34484.97685764903" 70 | ] 71 | }, 72 | "execution_count": 49, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": [ 78 | "avg_ltv" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 50, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "image/png": "\n", 89 | "text/plain": [ 90 | "
" 91 | ] 92 | }, 93 | "metadata": { 94 | "needs_background": "light" 95 | }, 96 | "output_type": "display_data" 97 | } 98 | ], 99 | "source": [ 100 | "# See the association between purchases per month and total amount spend per month\n", 101 | "plt.scatter('total_purchases_per_month', 'purchases_per_month', data=ltv_df);\n", 102 | "plt.title('Total Amount Spent/Mo vs. Number of purchases/mo');\n", 103 | "plt.xlabel('Total Amount Spent');\n", 104 | "plt.ylabel('Number of purchases/mo');" 105 | ] 106 | } 107 | ], 108 | "metadata": { 109 | "kernelspec": { 110 | "display_name": "Python 3", 111 | "language": "python", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 3 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython3", 124 | "version": "3.7.1" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 2 129 | } 130 | -------------------------------------------------------------------------------- /03_images/expenditure_vs_purchases_per_month.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rileypredum/marketing_analytics_sql/cbe08b92fd87e53cdd1c54b8e81044ed83373fcf/03_images/expenditure_vs_purchases_per_month.png -------------------------------------------------------------------------------- /03_images/tableschema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rileypredum/marketing_analytics_sql/cbe08b92fd87e53cdd1c54b8e81044ed83373fcf/03_images/tableschema.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SQL and Python Marketing KPIs/Metrics 2 | 3 | ## Introduction 4 | This is a SQL script/Jupyter Notebook duo that calculates key metrics for marketing departments, like the cost of acquiring a new customer (CAC), cost per click (CPC), cost per impression (CPM), customer lifetime value (LTV), and click-through rate (CTR). 5 | 6 | ## Setup 7 | 8 | 1. You'll need to fork or clone the repo, then go into MySQL or whatever other SQL client you use and input the .csv files from `/01_data/` into a database. I called my database marketing_analysis. You can call yours whatever you like. 9 | 10 | 2. Next, you'll connect to the database. It should work to just follow along in the analysis_queries.sql file. 11 | 12 | ### When working on the Jupyter Notebook, make sure to adjust the %cd command at the top to the directory of your local repo. 13 | 14 | Enjoy! 15 | --------------------------------------------------------------------------------