├── Images ├── Early_hour.PNG ├── id_holder_2.PNG ├── id_holder_18.PNG ├── id_holder_25.PNG ├── QuickDBD-export.png ├── anomalous_transaction.PNG ├── credit_card_fraudster.jpg └── anomalous_transaction_2.PNG ├── Data ├── merchant_category.csv ├── card_holder.csv ├── credit_card.csv └── merchant.csv ├── SQL └── schema.sql ├── README.md └── Starter_file └── challenge.ipynb /Images/Early_hour.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/Early_hour.PNG -------------------------------------------------------------------------------- /Images/id_holder_2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/id_holder_2.PNG -------------------------------------------------------------------------------- /Data/merchant_category.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,restaurant 3 | 2,coffee shop 4 | 3,bar 5 | 4,pub 6 | 5,food truck 7 | -------------------------------------------------------------------------------- /Images/id_holder_18.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/id_holder_18.PNG -------------------------------------------------------------------------------- /Images/id_holder_25.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/id_holder_25.PNG -------------------------------------------------------------------------------- /Images/QuickDBD-export.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/QuickDBD-export.png -------------------------------------------------------------------------------- /Images/anomalous_transaction.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/anomalous_transaction.PNG -------------------------------------------------------------------------------- /Images/credit_card_fraudster.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/credit_card_fraudster.jpg -------------------------------------------------------------------------------- /Images/anomalous_transaction_2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/anomalous_transaction_2.PNG -------------------------------------------------------------------------------- /Data/card_holder.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,Robert Johnson 3 | 2,Shane Shaffer 4 | 3,Elizabeth Sawyer 5 | 4,Danielle Green 6 | 5,Sara Cooper 7 | 6,Beth Hernandez 8 | 7,Sean Taylor 9 | 8,Michael Floyd 10 | 9,Laurie Gibbs 11 | 10,Matthew Gutierrez 12 | 11,Brandon Pineda 13 | 12,Megan Price 14 | 13,John Martin 15 | 14,Gary Jacobs 16 | 15,Kyle Tucker 17 | 16,Crystal Clark 18 | 17,Michael Carroll 19 | 18,Malik Carlson 20 | 19,Peter Mckay 21 | 20,Kevin Spencer 22 | 21,Dana Washington 23 | 22,Austin Johnson 24 | 23,Mark Lewis 25 | 24,Stephanie Dalton 26 | 25,Nancy Contreras 27 | -------------------------------------------------------------------------------- /Data/credit_card.csv: -------------------------------------------------------------------------------- 1 | card,id_card_holder 2 | 3.51711E+15,1 3 | 4.76105E+18,1 4 | 4.86676E+18,2 5 | 6.75911E+11,2 6 | 3.00783E+13,3 7 | 4.26369E+15,4 8 | 5.84227E+11,4 9 | 4.27647E+12,5 10 | 4.26849E+15,5 11 | 3.58135E+15,6 12 | 4.15984E+18,6 13 | 3.51695E+15,7 14 | 4.53999E+15,7 15 | 4.83448E+15,8 16 | 3.00633E+13,8 17 | 3.0182E+13,9 18 | 4.96292E+18,10 19 | 4.16531E+18,10 20 | 2.13194E+14,10 21 | 1.80099E+14,11 22 | 4.64401E+18,11 23 | 4.02791E+15,11 24 | 5.0188E+11,12 25 | 5.29719E+15,12 26 | 3.76028E+14,12 27 | 4.71177E+15,13 28 | 5.13584E+15,13 29 | 3.56195E+15,13 30 | 5.17595E+15,14 31 | 4.72378E+18,15 32 | 6.50024E+15,15 33 | 5.03843E+11,16 34 | 5.5706E+15,16 35 | 5.50071E+15,16 36 | 6.01199E+15,17 37 | 4.498E+12,18 38 | 3.4412E+14,18 39 | 4.7432E+18,19 40 | 5.36178E+15,19 41 | 3.56107E+15,19 42 | 3.53565E+15,20 43 | 4.50641E+15,20 44 | 4.58696E+18,20 45 | 4.2791E+18,21 46 | 5.01809E+11,22 47 | 4.74104E+12,23 48 | 4.18816E+15,23 49 | 4.15072E+15,23 50 | 4.6819E+12,24 51 | 3.0143E+13,24 52 | 3.5822E+15,24 53 | 4.31965E+12,25 54 | 3.72415E+14,25 -------------------------------------------------------------------------------- /SQL/schema.sql: -------------------------------------------------------------------------------- 1 | -- Exported from QuickDBD: https://www.quickdatabasediagrams.com/ 2 | -- Link to schema: https://app.quickdatabasediagrams.com/#/d/s8Wnqm 3 | -- NOTE! If you have used non-SQL datatypes in your design, you will have to change these here. 4 | 5 | --Card Holder 6 | 7 | CREATE TABLE "card_holder" ( 8 | "id" SERIAL NOT NULL, 9 | "name" VARCHAR(50) NOT NULL, 10 | CONSTRAINT "pk_card_holder" PRIMARY KEY ( 11 | "id" 12 | ) 13 | ); 14 | 15 | -- Credit Card 16 | 17 | CREATE TABLE "credit_card" ( 18 | "card" VARCHAR(20) NOT NULL, 19 | "id_card_holder" INT NOT NULL, 20 | CONSTRAINT "pk_credit_card" PRIMARY KEY ( 21 | "card" 22 | ) 23 | ); 24 | 25 | -- Merchant 26 | 27 | CREATE TABLE "merchant" ( 28 | "id" SERIAL NOT NULL, 29 | "name" VARCHAR(255) NOT NULL, 30 | "id_merchant_category" INT NOT NULL, 31 | CONSTRAINT "pk_merchant" PRIMARY KEY ( 32 | "id" 33 | ) 34 | ); 35 | 36 | -- Merchant Category 37 | 38 | CREATE TABLE "merchant_category" ( 39 | "id" SERIAL NOT NULL, 40 | "name" VARCHAR(50) NOT NULL, 41 | CONSTRAINT "pk_merchant_category" PRIMARY KEY ( 42 | "id" 43 | ) 44 | ); 45 | 46 | -- Transaction 47 | 48 | CREATE TABLE "transaction" ( 49 | "id" INT NOT NULL, 50 | "date" TIMESTAMP NOT NULL, 51 | "amount" FLOAT NOT NULL, 52 | "card" VARCHAR(20) NOT NULL, 53 | "id_merchant" INT NOT NULL, 54 | CONSTRAINT "pk_transaction" PRIMARY KEY ( 55 | "id" 56 | ) 57 | ); 58 | 59 | 60 | ALTER TABLE "credit_card" ADD CONSTRAINT "fk_credit_card_id_card_holder" FOREIGN KEY("id_card_holder") 61 | REFERENCES "card_holder" ("id"); 62 | 63 | ALTER TABLE "credit_card" ADD CONSTRAINT "check_credit_card_length" CHECK (char_length("card") <= 20); 64 | 65 | --ALTER TABLE "credit_card" DROP CONSTRAINT check_credit_card_length 66 | 67 | ALTER TABLE "merchant" ADD CONSTRAINT "fk_merchant_id_merchant_category" FOREIGN KEY("id_merchant_category") 68 | REFERENCES "merchant_category" ("id"); 69 | 70 | ALTER TABLE "transaction" ADD CONSTRAINT "fk_transaction_card" FOREIGN KEY("card") 71 | REFERENCES "credit_card" ("card"); 72 | 73 | ALTER TABLE "transaction" ADD CONSTRAINT "fk_transaction_id_merchant" FOREIGN KEY("id_merchant") 74 | REFERENCES "merchant" ("id"); -------------------------------------------------------------------------------- /Data/merchant.csv: -------------------------------------------------------------------------------- 1 | id,name,id_merchant_category 2 | 1,"Murphy, Heath and Fields",1 3 | 2,Riggs-Adams,1 4 | 3,"Sanders, Parks and Mcfarland",2 5 | 4,Mccarty-Thomas,3 6 | 5,Miller-Blevins,4 7 | 6,Wilson and Sons,1 8 | 7,Gomez-Kelly,4 9 | 8,Russell-Thomas,1 10 | 9,"Curry, Scott and Richardson",3 11 | 10,Herrera Group,1 12 | 11,Stanton Group,4 13 | 12,"Bell, Gonzalez and Lowe",4 14 | 13,Giles and Sons,4 15 | 14,Osborne-Page,2 16 | 15,"Long, Harrell and Johnson",5 17 | 16,"Bryant, Thomas and Collins",4 18 | 17,Bauer-Cole,3 19 | 18,Romero-Jordan,5 20 | 19,Santos-Fitzgerald,4 21 | 20,Kim-Lopez,2 22 | 21,Robertson-Smith,4 23 | 22,"Dalton, Cameron and Jones",3 24 | 23,"Wilson, Roberts and Davenport",5 25 | 24,"Rodgers, Johnston and Macias",5 26 | 25,"Vaughn, Wilson and Hall",1 27 | 26,Smith-Stephens,2 28 | 27,Horn Ltd,2 29 | 28,Hess-Fischer,5 30 | 29,Browning-Cantu,4 31 | 30,Atkinson Ltd,3 32 | 31,"Fisher, Salazar and Thomas",5 33 | 32,"Norton, Burton and Smith",5 34 | 33,Vasquez-Parker,3 35 | 34,Combs-Jones,5 36 | 35,Jarvis-Turner,4 37 | 36,Hamilton-Mcfarland,1 38 | 37,"Nguyen, Bautista and Williams",4 39 | 38,Brown LLC,3 40 | 39,Young-Navarro,5 41 | 40,"Cox, Montgomery and Morgan",3 42 | 41,"Ford, Williams and Dunn",4 43 | 42,Kennedy-Chen,3 44 | 43,Wallace and Sons,2 45 | 44,Little-Floyd,4 46 | 45,Velazquez Ltd,4 47 | 46,"Miller, Chavez and Cobb",5 48 | 47,Martin Inc,1 49 | 48,Baker Inc,5 50 | 49,"Davis, Lowe and Baxter",5 51 | 50,Johnson-Watts,1 52 | 51,Fisher-Bolton,1 53 | 52,Jensen-Stanley,5 54 | 53,Wallace PLC,4 55 | 54,Berry-Lopez,1 56 | 55,"Johnson, Rivas and Anderson",1 57 | 56,Smith PLC,3 58 | 57,Thornton-Williams,4 59 | 58,"Young, Hull and Williams",4 60 | 59,Williams Group,3 61 | 60,Smith-Richards,4 62 | 61,"Richardson, Smith and Jordan",5 63 | 62,"Cooper, Carpenter and Jackson",5 64 | 63,Reed Group,5 65 | 64,"Cline, Myers and Strong",1 66 | 65,"Allen, Ramos and Carroll",4 67 | 66,Robles Inc,3 68 | 67,"Maxwell, Tapia and Villanueva",2 69 | 68,Ramirez-Carr,2 70 | 69,Dominguez PLC,5 71 | 70,White-Hall,3 72 | 71,Greene LLC,1 73 | 72,Lopez-Kelly,1 74 | 73,Colon Ltd,3 75 | 74,Skinner-Williams,4 76 | 75,Martinez Group,1 77 | 76,Lowe PLC,1 78 | 77,"Brown, Ballard and Glass",1 79 | 78,Ruiz-Anderson,4 80 | 79,Lee LLC,1 81 | 80,"Kelly, Dyer and Schmitt",5 82 | 81,Fowler and Sons,5 83 | 82,Day-Murray,5 84 | 83,Solis Group,5 85 | 84,Marshall-Rojas,2 86 | 85,Patton-Rivera,3 87 | 86,"Walker, Campbell and Sullivan",5 88 | 87,Griffin-Woodard,3 89 | 88,Armstrong PLC,5 90 | 89,Kelley-Roberts,5 91 | 90,Brown-Cunningham,4 92 | 91,Turner Ltd,4 93 | 92,Garcia-White,4 94 | 93,Rodriguez-Parker,5 95 | 94,Yoder-Zavala,5 96 | 95,Baxter-Smith,1 97 | 96,Johnson-Fuller,4 98 | 97,Ruiz-Chavez,3 99 | 98,Rivera PLC,4 100 | 99,"Bond, Lewis and Rangel",1 101 | 100,Townsend-Anderson,1 102 | 101,Whitehead-Sexton,4 103 | 102,Walters-Ward,1 104 | 103,"Jones, Clark and Hoover",2 105 | 104,"Mcdaniel, Hines and Mcfarland",2 106 | 105,Garcia and Sons,4 107 | 106,Carter-Blackwell,4 108 | 107,Rowe-Abbott,4 109 | 108,Best Inc,1 110 | 109,Collins LLC,2 111 | 110,"Rodriguez, Dunlap and Nunez",2 112 | 111,Padilla-Clements,2 113 | 112,Greer Inc,3 114 | 113,Edwards-Aguirre,2 115 | 114,Greene-Wood,3 116 | 115,Williams Inc,4 117 | 116,Ferguson Ltd,2 118 | 117,Mitchell Group,2 119 | 118,Maldonado Group,2 120 | 119,Henderson and Sons,1 121 | 120,"Vega, Jones and Castro",5 122 | 121,"Fleming, Smith and Collins",3 123 | 122,Perry and Sons,3 124 | 123,"Boone, Davis and Townsend",4 125 | 124,Mccarty PLC,1 126 | 125,Russell and Sons,4 127 | 126,Bartlett and Sons,4 128 | 127,"Williams, Wright and Wagner",2 129 | 128,"Pitts, Salinas and Garcia",2 130 | 129,Sweeney-Paul,2 131 | 130,"Brown, Estrada and Powers",2 132 | 131,"Harrison, Newton and Hansen",1 133 | 132,Pugh-Williams,3 134 | 133,"Scott, Hess and Finley",3 135 | 134,"Jenkins, Peterson and Beck",1 136 | 135,"Jacobs, Torres and Walker",3 137 | 136,Martinez-Robinson,3 138 | 137,Garcia PLC,5 139 | 138,Mccullough-Murphy,5 140 | 139,Kidd-Lopez,5 141 | 140,Wheeler-Moreno,5 142 | 141,Wood-Ramirez,3 143 | 142,Thomas-Garcia,5 144 | 143,"Guzman, Garcia and Church",3 145 | 144,"Walker, Deleon and Wolf",1 146 | 145,Hood-Phillips,3 147 | 146,"Pitts, Smith and Gonzalez",4 148 | 147,Marshall-Lopez,5 149 | 148,"Huerta, Keith and Walters",5 150 | 149,Clark and Sons,5 151 | 150,Johnson and Sons,2 152 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fraud_Detection_SQL 2 | 3 | ![Credit card fraudster](Images/credit_card_fraudster.jpg) 4 | 5 | *[Credit Card Fraudster by Richard Patterson](https://www.flickr.com/photos/136770128@N07/42252105582/) | [Creative Commons Licensed](https://creativecommons.org/licenses/by/2.0/)* 6 | 7 | ## Background 8 | 9 | Fraud is everywhere these days—whether you are a small taco shop or a large international business. While there are emerging technologies that employ machine learning and artificial intelligence to detect fraud, many instances of fraud detection still require strong data analytics to find abnormal charges. 10 | 11 | Application of new SQL skills to analyze historical credit card transactions and consumption patterns in order to identify possible fraudulent transactions. 12 | 13 | Accomplish three main tasks: 14 | 15 | 1. [Data Modeling](#Data-Modeling): 16 | Define a database model to store the credit card transactions data and create a new PostgreSQL database using your model. 17 | 18 | 2. [Data Engineering](#Data-Engineering): Create a database schema on PostgreSQL and populate your database from the CSV files provided. 19 | 20 | 3. [Data Analysis](#Data-Analysis): Analyze the data to identify possible fraudulent transactions. 21 | 22 | --- 23 | 24 | ## Files 25 | 26 | ### Query Files 27 | 28 | * [schema.sql](SQL/schema.sql) 29 | * [seed.sql](SQL/seed.sql) 30 | 31 | ### CSV Files 32 | 33 | * [card_holder.csv](Data/card_holder.csv) 34 | * [credit_card.csv](Data/credit_card.csv) 35 | * [merchant_category.csv](Data/merchant_category.csv) 36 | * [merchant.csv](Data/merchant.csv) 37 | * [transaction.csv](Data/transaction.csv) 38 | 39 | 40 | ### Data Modeling 41 | 42 | Create an entity relationship diagram (ERD) by inspecting the provided CSV files. 43 | 44 | **Note:** For the `credit_card` table, the `card` column should be a VARCHAR(20) datatype rather than an INT. 45 | 46 | Tool used to develop ERD [Quick Database Diagrams](https://app.quickdatabasediagrams.com/#/) to create your model. 47 | 48 | ![QuickDBD-export](Images/QuickDBD-export.png) 49 | 50 | ### Data Engineering 51 | 52 | Using your database model as a blueprint, create a database schema for each of your tables and relationships. Specify data types, primary keys, foreign keys, and any other constraints you defined. 53 | 54 | After creating the database schema, import the data from the corresponding CSV files. 55 | 56 | 57 | ### Data Analysis 58 | 59 | Now that your data is prepared within the database, it's finally time to identify fraudulent transactions using SQL and Pandas DataFrames. 60 | 61 | Top 100 highest transactions during early hours i.e. 7:00 to 9:00 AM 62 | 63 | ![Early_hour](Images/Early_hour.PNG) 64 | 65 | * Some fraudsters hack a credit card by making several small payments (generally less than $2.00), which are typically ignored by cardholders. Count the transactions that are less than $2.00 per cardholder. Is there any evidence to suggest that a credit card has been hacked? Explain your rationale. 66 | 67 | * What are the top five merchants prone to being hacked using small transactions? 68 | 69 | * Once you have a query that can be reused, create a view for each of the previous queries. 70 | 71 | Created a report for fraudulent transactions of some top customers of the firm using Pandas, Plotly Express, hvPlot, and SQLAlchemy to create the visualizations. 72 | 73 | * Fraudulent transactions in the history of two of the most important customers of the firm on the basis of their cardholders' IDs are 18 and 2. 74 | 75 | ![id_holder_2](Images/id_holder_2.PNG) 76 | ![id_holder_18](Images/id_holder_18.PNG) 77 | 78 | * Observation : `The consumption pattern for both the id holder is very different. Id Holder 2 makes too many small transactions. Id Holder 18 has transactions ranging till $1839. Id Holder 2 is more suspectable to fraudulent transactions` 79 | 80 | * The CEO of the firm's biggest customer suspects that someone has used her corporate credit card without authorization in the first quarter of 2018 to pay for several expensive restaurant bills. You are asked to find any anomalous transactions during that period. 81 | 82 | * Using Plotly Express, created a series of six box plots, one for each month, in order to identify how many outliers there are per month for cardholder ID 25. 83 | 84 | ![id_holder_25](Images/id_holder_25.PNG) 85 | 86 | * Observations : `There seems to be fraudulent transactions pertaining to Restaurant & Food Truck category where Food Truck is ranging from $1.46 to $1046` 87 | 88 | 89 | ## Challenge 90 | 91 | Another approach to identify fraudulent transactions is to look for outliers in the data. Standard deviation or quartiles are often used to detect outliers. 92 | 93 | #### Identifying Outliers based on Standard Deviation 94 | 95 | ![anomalous_transaction](Images/anomalous_transaction.PNG) 96 | 97 | #### Identifying Outliers based on Interquartile Range 98 | 99 | ![anomalous_transaction](Images/anomalous_transaction.PNG) 100 | 101 | -------------------------------------------------------------------------------- /Starter_file/challenge.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | " # Challenge\n", 8 | "\n", 9 | " ## Identifying Outliers using Standard Deviation" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 91, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# initial imports\n", 19 | "import pandas as pd\n", 20 | "import numpy as np\n", 21 | "import random\n", 22 | "from sqlalchemy import create_engine\n", 23 | "from numpy import mean\n", 24 | "from numpy import std\n", 25 | "from numpy import percentile\n", 26 | "import plotly.express as px\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 92, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "# create a connection to the database\n", 36 | "engine = create_engine(\"postgresql://postgres:Istay@10314@localhost:5432/fraud_detection\")\n", 37 | "\n", 38 | "# loading data from the database\n", 39 | "\n", 40 | "def execute_query(query):\n", 41 | "\n", 42 | " transaction_df = pd.read_sql(sql=query, con=engine, index_col='date', parse_dates='date')\n", 43 | "\n", 44 | " return transaction_df\n", 45 | "\n" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 93, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "text/html": [ 56 | "
\n", 57 | "\n", 70 | "\n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | "
idnamecardamountcategory
date
2018-01-01 21:35:1013John Martin35619544879886056.22food truck
2018-01-01 21:43:1213John Martin51358376886714963.83bar
2018-01-01 22:41:2110Matthew Gutierrez2131939469803039.61food truck
2018-01-01 23:13:304Danielle Green426369406253301719.03pub
2018-01-01 23:15:1018Malik Carlson44980027583002.95restaurant
\n", 132 | "
" 133 | ], 134 | "text/plain": [ 135 | " id name card amount \\\n", 136 | "date \n", 137 | "2018-01-01 21:35:10 13 John Martin 3561954487988605 6.22 \n", 138 | "2018-01-01 21:43:12 13 John Martin 5135837688671496 3.83 \n", 139 | "2018-01-01 22:41:21 10 Matthew Gutierrez 213193946980303 9.61 \n", 140 | "2018-01-01 23:13:30 4 Danielle Green 4263694062533017 19.03 \n", 141 | "2018-01-01 23:15:10 18 Malik Carlson 4498002758300 2.95 \n", 142 | "\n", 143 | " category \n", 144 | "date \n", 145 | "2018-01-01 21:35:10 food truck \n", 146 | "2018-01-01 21:43:12 bar \n", 147 | "2018-01-01 22:41:21 food truck \n", 148 | "2018-01-01 23:13:30 pub \n", 149 | "2018-01-01 23:15:10 restaurant " 150 | ] 151 | }, 152 | "execution_count": 93, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "# loading data of daily transactions from jan to jun 2018 for card holder 25\n", 159 | "query = f'SELECT a.id, a.name, b.card, c.date, c.amount, e.name as \"category\" \\\n", 160 | " FROM public.card_holder a, public.credit_card b, public.transaction c, public.merchant d, public.merchant_category e \\\n", 161 | " WHERE a.id = b.id_card_holder AND b.card=c.card AND c.id_merchant=d.id AND d.id_merchant_category=e.id'\n", 162 | "\n", 163 | "transaction_df = execute_query(query)\n", 164 | "transaction_df.head()" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 94, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "name": "stdout", 174 | "output_type": "stream", 175 | "text": [ 176 | "Identified outliers: 77\n", 177 | "Non-outlier observations: 3423\n" 178 | ] 179 | }, 180 | { 181 | "data": { 182 | "text/html": [ 183 | "
\n", 184 | "\n", 197 | "\n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | "
idnamecardamountcategoryoutlier
date
2018-01-02 23:27:4612Megan Price5018796574651031.0restaurantTrue
2018-01-04 03:05:187Sean Taylor35169523960802471685.0food truckTrue
2018-01-08 02:34:326Beth Hernandez35813459435439421029.0barTrue
2018-01-22 08:07:0316Crystal Clark55706006428658571131.0restaurantTrue
2018-01-23 06:29:3712Megan Price5018796574651678.0pubTrue
2018-01-24 13:17:191Robert Johnson47610496457115558111691.0coffee shopTrue
2018-01-30 18:31:0025Nancy Contreras43196535135071177.0restaurantTrue
2018-02-17 01:27:1916Crystal Clark55706006428658571430.0restaurantTrue
2018-02-19 16:00:437Sean Taylor35169523960802471072.0food truckTrue
2018-02-19 22:48:2518Malik Carlson3441196239208921839.0restaurantTrue
2018-02-27 15:27:326Beth Hernandez35813459435439421145.0barTrue
2018-03-01 21:29:053Elizabeth Sawyer300782990535121119.0pubTrue
2018-03-04 15:50:539Laurie Gibbs301819639133401534.0coffee shopTrue
2018-03-05 08:26:0816Crystal Clark55706006428658571617.0barTrue
2018-03-06 07:18:0925Nancy Contreras43196535135071334.0barTrue
2018-03-12 00:44:0112Megan Price5018796574651530.0coffee shopTrue
2018-03-20 10:19:2512Megan Price501879657465852.0pubTrue
2018-03-20 13:05:5424Stephanie Dalton301429666991871011.0barTrue
2018-03-26 07:41:599Laurie Gibbs301819639133401009.0coffee shopTrue
2018-04-03 03:23:3718Malik Carlson3441196239208921077.0restaurantTrue
2018-04-08 06:03:5025Nancy Contreras43196535135071063.0pubTrue
2018-04-18 23:23:297Sean Taylor35169523960802471086.0coffee shopTrue
2018-04-21 19:41:516Beth Hernandez35813459435439422108.0coffee shopTrue
2018-05-08 13:21:0124Stephanie Dalton301429666991871901.0restaurantTrue
2018-05-13 06:31:2025Nancy Contreras43196535135071046.0food truckTrue
2018-05-29 02:55:0816Crystal Clark55706006428658571203.0food truckTrue
2018-06-03 20:02:2818Malik Carlson3441196239208921814.0pubTrue
2018-06-04 03:46:1525Nancy Contreras43196535135071162.0pubTrue
2018-06-06 21:50:1725Nancy Contreras4319653513507749.0restaurantTrue
2018-06-10 04:54:279Laurie Gibbs301819639133401795.0pubTrue
.....................
2018-09-04 01:35:391Robert Johnson47610496457115558111790.0coffee shopTrue
2018-09-06 08:28:551Robert Johnson47610496457115558111017.0barTrue
2018-09-06 21:55:021Robert Johnson47610496457115558111056.0restaurantTrue
2018-09-10 22:49:4118Malik Carlson3441196239208921176.0restaurantTrue
2018-09-11 15:16:476Beth Hernandez35813459435439421856.0food truckTrue
2018-09-23 19:20:2312Megan Price5018796574651075.0pubTrue
2018-09-25 23:23:219Laurie Gibbs301819639133401095.0food truckTrue
2018-09-26 08:48:401Robert Johnson47610496457115558111060.0restaurantTrue
2018-10-07 14:40:343Elizabeth Sawyer30078299053512757.0barTrue
2018-10-07 18:29:209Laurie Gibbs301819639133401179.0pubTrue
2018-10-19 01:07:373Elizabeth Sawyer300782990535121053.0restaurantTrue
2018-11-13 17:07:2516Crystal Clark55706006428658571911.0restaurantTrue
2018-11-17 05:30:4318Malik Carlson3441196239208921769.0food truckTrue
2018-11-20 05:24:283Elizabeth Sawyer300782990535121054.0barTrue
2018-11-25 20:44:0712Megan Price5018796574651123.0barTrue
2018-11-27 15:36:0512Megan Price5018796574651802.0barTrue
2018-11-27 17:20:296Beth Hernandez35813459435439421279.0restaurantTrue
2018-11-27 17:27:341Robert Johnson47610496457115558111660.0pubTrue
2018-12-03 02:38:5216Crystal Clark55706006428658571014.0restaurantTrue
2018-12-07 07:22:031Robert Johnson47610496457115558111894.0barTrue
2018-12-13 12:09:5818Malik Carlson3441196239208921154.0restaurantTrue
2018-12-13 15:51:597Sean Taylor35169523960802472249.0food truckTrue
2018-12-14 08:51:4112Megan Price501879657465748.0pubTrue
2018-12-18 13:33:3725Nancy Contreras43196535135071074.0coffee shopTrue
2018-12-18 17:20:337Sean Taylor35169523960802471296.0barTrue
2018-12-19 16:10:039Laurie Gibbs301819639133401724.0pubTrue
2018-12-21 09:56:3224Stephanie Dalton301429666991871301.0pubTrue
2018-12-24 15:55:0616Crystal Clark55706006428658571634.0pubTrue
2018-12-25 19:10:4224Stephanie Dalton301429666991871035.0pubTrue
2018-12-30 23:23:091Robert Johnson47610496457115558111033.0pubTrue
\n", 770 | "

77 rows × 6 columns

\n", 771 | "
" 772 | ], 773 | "text/plain": [ 774 | " id name card amount \\\n", 775 | "date \n", 776 | "2018-01-02 23:27:46 12 Megan Price 501879657465 1031.0 \n", 777 | "2018-01-04 03:05:18 7 Sean Taylor 3516952396080247 1685.0 \n", 778 | "2018-01-08 02:34:32 6 Beth Hernandez 3581345943543942 1029.0 \n", 779 | "2018-01-22 08:07:03 16 Crystal Clark 5570600642865857 1131.0 \n", 780 | "2018-01-23 06:29:37 12 Megan Price 501879657465 1678.0 \n", 781 | "2018-01-24 13:17:19 1 Robert Johnson 4761049645711555811 1691.0 \n", 782 | "2018-01-30 18:31:00 25 Nancy Contreras 4319653513507 1177.0 \n", 783 | "2018-02-17 01:27:19 16 Crystal Clark 5570600642865857 1430.0 \n", 784 | "2018-02-19 16:00:43 7 Sean Taylor 3516952396080247 1072.0 \n", 785 | "2018-02-19 22:48:25 18 Malik Carlson 344119623920892 1839.0 \n", 786 | "2018-02-27 15:27:32 6 Beth Hernandez 3581345943543942 1145.0 \n", 787 | "2018-03-01 21:29:05 3 Elizabeth Sawyer 30078299053512 1119.0 \n", 788 | "2018-03-04 15:50:53 9 Laurie Gibbs 30181963913340 1534.0 \n", 789 | "2018-03-05 08:26:08 16 Crystal Clark 5570600642865857 1617.0 \n", 790 | "2018-03-06 07:18:09 25 Nancy Contreras 4319653513507 1334.0 \n", 791 | "2018-03-12 00:44:01 12 Megan Price 501879657465 1530.0 \n", 792 | "2018-03-20 10:19:25 12 Megan Price 501879657465 852.0 \n", 793 | "2018-03-20 13:05:54 24 Stephanie Dalton 30142966699187 1011.0 \n", 794 | "2018-03-26 07:41:59 9 Laurie Gibbs 30181963913340 1009.0 \n", 795 | "2018-04-03 03:23:37 18 Malik Carlson 344119623920892 1077.0 \n", 796 | "2018-04-08 06:03:50 25 Nancy Contreras 4319653513507 1063.0 \n", 797 | "2018-04-18 23:23:29 7 Sean Taylor 3516952396080247 1086.0 \n", 798 | "2018-04-21 19:41:51 6 Beth Hernandez 3581345943543942 2108.0 \n", 799 | "2018-05-08 13:21:01 24 Stephanie Dalton 30142966699187 1901.0 \n", 800 | "2018-05-13 06:31:20 25 Nancy Contreras 4319653513507 1046.0 \n", 801 | "2018-05-29 02:55:08 16 Crystal Clark 5570600642865857 1203.0 \n", 802 | "2018-06-03 20:02:28 18 Malik Carlson 344119623920892 1814.0 \n", 803 | "2018-06-04 03:46:15 25 Nancy Contreras 4319653513507 1162.0 \n", 804 | "2018-06-06 21:50:17 25 Nancy Contreras 4319653513507 749.0 \n", 805 | "2018-06-10 04:54:27 9 Laurie Gibbs 30181963913340 1795.0 \n", 806 | "... .. ... ... ... \n", 807 | "2018-09-04 01:35:39 1 Robert Johnson 4761049645711555811 1790.0 \n", 808 | "2018-09-06 08:28:55 1 Robert Johnson 4761049645711555811 1017.0 \n", 809 | "2018-09-06 21:55:02 1 Robert Johnson 4761049645711555811 1056.0 \n", 810 | "2018-09-10 22:49:41 18 Malik Carlson 344119623920892 1176.0 \n", 811 | "2018-09-11 15:16:47 6 Beth Hernandez 3581345943543942 1856.0 \n", 812 | "2018-09-23 19:20:23 12 Megan Price 501879657465 1075.0 \n", 813 | "2018-09-25 23:23:21 9 Laurie Gibbs 30181963913340 1095.0 \n", 814 | "2018-09-26 08:48:40 1 Robert Johnson 4761049645711555811 1060.0 \n", 815 | "2018-10-07 14:40:34 3 Elizabeth Sawyer 30078299053512 757.0 \n", 816 | "2018-10-07 18:29:20 9 Laurie Gibbs 30181963913340 1179.0 \n", 817 | "2018-10-19 01:07:37 3 Elizabeth Sawyer 30078299053512 1053.0 \n", 818 | "2018-11-13 17:07:25 16 Crystal Clark 5570600642865857 1911.0 \n", 819 | "2018-11-17 05:30:43 18 Malik Carlson 344119623920892 1769.0 \n", 820 | "2018-11-20 05:24:28 3 Elizabeth Sawyer 30078299053512 1054.0 \n", 821 | "2018-11-25 20:44:07 12 Megan Price 501879657465 1123.0 \n", 822 | "2018-11-27 15:36:05 12 Megan Price 501879657465 1802.0 \n", 823 | "2018-11-27 17:20:29 6 Beth Hernandez 3581345943543942 1279.0 \n", 824 | "2018-11-27 17:27:34 1 Robert Johnson 4761049645711555811 1660.0 \n", 825 | "2018-12-03 02:38:52 16 Crystal Clark 5570600642865857 1014.0 \n", 826 | "2018-12-07 07:22:03 1 Robert Johnson 4761049645711555811 1894.0 \n", 827 | "2018-12-13 12:09:58 18 Malik Carlson 344119623920892 1154.0 \n", 828 | "2018-12-13 15:51:59 7 Sean Taylor 3516952396080247 2249.0 \n", 829 | "2018-12-14 08:51:41 12 Megan Price 501879657465 748.0 \n", 830 | "2018-12-18 13:33:37 25 Nancy Contreras 4319653513507 1074.0 \n", 831 | "2018-12-18 17:20:33 7 Sean Taylor 3516952396080247 1296.0 \n", 832 | "2018-12-19 16:10:03 9 Laurie Gibbs 30181963913340 1724.0 \n", 833 | "2018-12-21 09:56:32 24 Stephanie Dalton 30142966699187 1301.0 \n", 834 | "2018-12-24 15:55:06 16 Crystal Clark 5570600642865857 1634.0 \n", 835 | "2018-12-25 19:10:42 24 Stephanie Dalton 30142966699187 1035.0 \n", 836 | "2018-12-30 23:23:09 1 Robert Johnson 4761049645711555811 1033.0 \n", 837 | "\n", 838 | " category outlier \n", 839 | "date \n", 840 | "2018-01-02 23:27:46 restaurant True \n", 841 | "2018-01-04 03:05:18 food truck True \n", 842 | "2018-01-08 02:34:32 bar True \n", 843 | "2018-01-22 08:07:03 restaurant True \n", 844 | "2018-01-23 06:29:37 pub True \n", 845 | "2018-01-24 13:17:19 coffee shop True \n", 846 | "2018-01-30 18:31:00 restaurant True \n", 847 | "2018-02-17 01:27:19 restaurant True \n", 848 | "2018-02-19 16:00:43 food truck True \n", 849 | "2018-02-19 22:48:25 restaurant True \n", 850 | "2018-02-27 15:27:32 bar True \n", 851 | "2018-03-01 21:29:05 pub True \n", 852 | "2018-03-04 15:50:53 coffee shop True \n", 853 | "2018-03-05 08:26:08 bar True \n", 854 | "2018-03-06 07:18:09 bar True \n", 855 | "2018-03-12 00:44:01 coffee shop True \n", 856 | "2018-03-20 10:19:25 pub True \n", 857 | "2018-03-20 13:05:54 bar True \n", 858 | "2018-03-26 07:41:59 coffee shop True \n", 859 | "2018-04-03 03:23:37 restaurant True \n", 860 | "2018-04-08 06:03:50 pub True \n", 861 | "2018-04-18 23:23:29 coffee shop True \n", 862 | "2018-04-21 19:41:51 coffee shop True \n", 863 | "2018-05-08 13:21:01 restaurant True \n", 864 | "2018-05-13 06:31:20 food truck True \n", 865 | "2018-05-29 02:55:08 food truck True \n", 866 | "2018-06-03 20:02:28 pub True \n", 867 | "2018-06-04 03:46:15 pub True \n", 868 | "2018-06-06 21:50:17 restaurant True \n", 869 | "2018-06-10 04:54:27 pub True \n", 870 | "... ... ... \n", 871 | "2018-09-04 01:35:39 coffee shop True \n", 872 | "2018-09-06 08:28:55 bar True \n", 873 | "2018-09-06 21:55:02 restaurant True \n", 874 | "2018-09-10 22:49:41 restaurant True \n", 875 | "2018-09-11 15:16:47 food truck True \n", 876 | "2018-09-23 19:20:23 pub True \n", 877 | "2018-09-25 23:23:21 food truck True \n", 878 | "2018-09-26 08:48:40 restaurant True \n", 879 | "2018-10-07 14:40:34 bar True \n", 880 | "2018-10-07 18:29:20 pub True \n", 881 | "2018-10-19 01:07:37 restaurant True \n", 882 | "2018-11-13 17:07:25 restaurant True \n", 883 | "2018-11-17 05:30:43 food truck True \n", 884 | "2018-11-20 05:24:28 bar True \n", 885 | "2018-11-25 20:44:07 bar True \n", 886 | "2018-11-27 15:36:05 bar True \n", 887 | "2018-11-27 17:20:29 restaurant True \n", 888 | "2018-11-27 17:27:34 pub True \n", 889 | "2018-12-03 02:38:52 restaurant True \n", 890 | "2018-12-07 07:22:03 bar True \n", 891 | "2018-12-13 12:09:58 restaurant True \n", 892 | "2018-12-13 15:51:59 food truck True \n", 893 | "2018-12-14 08:51:41 pub True \n", 894 | "2018-12-18 13:33:37 coffee shop True \n", 895 | "2018-12-18 17:20:33 bar True \n", 896 | "2018-12-19 16:10:03 pub True \n", 897 | "2018-12-21 09:56:32 pub True \n", 898 | "2018-12-24 15:55:06 pub True \n", 899 | "2018-12-25 19:10:42 pub True \n", 900 | "2018-12-30 23:23:09 pub True \n", 901 | "\n", 902 | "[77 rows x 6 columns]" 903 | ] 904 | }, 905 | "execution_count": 94, 906 | "metadata": {}, 907 | "output_type": "execute_result" 908 | } 909 | ], 910 | "source": [ 911 | "# code a function to identify outliers based on standard deviation\n", 912 | "# calculate summary statistics\n", 913 | "data_mean, data_std = mean(transaction_df['amount']), std(transaction_df['amount'])\n", 914 | "\n", 915 | "# identify outliers\n", 916 | "cut_off = data_std * 3\n", 917 | "\n", 918 | "lower, upper = data_mean - cut_off, data_mean + cut_off\n", 919 | "\n", 920 | "# identify outliers\n", 921 | "outliers = [x for x in transaction_df['amount'] if x < lower or x > upper]\n", 922 | "\n", 923 | "print('Identified outliers: %d' % len(outliers))\n", 924 | "\n", 925 | "# remove outliers\n", 926 | "outliers_removed = [x for x in transaction_df['amount'] if x >= lower and x <= upper]\n", 927 | "\n", 928 | "print('Non-outlier observations: %d' % len(outliers_removed))\n", 929 | "\n", 930 | "transaction_df['outlier'] = (transaction_df['amount'] > upper) | (transaction_df['amount'] < lower)\n", 931 | "\n", 932 | "outlier = transaction_df[transaction_df['outlier']==True]\n", 933 | "outlier" 934 | ] 935 | }, 936 | { 937 | "cell_type": "code", 938 | "execution_count": 96, 939 | "metadata": {}, 940 | "outputs": [ 941 | { 942 | "data": { 943 | "application/vnd.plotly.v1+json": { 944 | "config": { 945 | "plotlyServerURL": "https://plot.ly" 946 | }, 947 | "data": [ 948 | { 949 | "hovertemplate": "category=bar
name=%{x}
amount=%{y}", 950 | "legendgroup": "bar", 951 | "marker": { 952 | "color": "#636efa", 953 | "symbol": "circle" 954 | }, 955 | "mode": "markers", 956 | "name": "bar", 957 | "showlegend": true, 958 | "type": "scatter", 959 | "x": [ 960 | "Robert Johnson", 961 | "Crystal Clark", 962 | "Nancy Contreras", 963 | "Robert Johnson" 964 | ], 965 | "xaxis": "x", 966 | "y": [ 967 | 1894, 968 | 1617, 969 | 1334, 970 | 1017 971 | ], 972 | "yaxis": "y" 973 | }, 974 | { 975 | "hovertemplate": "category=restaurant
name=%{x}
amount=%{y}", 976 | "legendgroup": "restaurant", 977 | "marker": { 978 | "color": "#EF553B", 979 | "symbol": "circle" 980 | }, 981 | "mode": "markers", 982 | "name": "restaurant", 983 | "showlegend": true, 984 | "type": "scatter", 985 | "x": [ 986 | "Crystal Clark", 987 | "Robert Johnson" 988 | ], 989 | "xaxis": "x", 990 | "y": [ 991 | 1131, 992 | 1060 993 | ], 994 | "yaxis": "y" 995 | }, 996 | { 997 | "hovertemplate": "category=coffee shop
name=%{x}
amount=%{y}", 998 | "legendgroup": "coffee shop", 999 | "marker": { 1000 | "color": "#00cc96", 1001 | "symbol": "circle" 1002 | }, 1003 | "mode": "markers", 1004 | "name": "coffee shop", 1005 | "showlegend": true, 1006 | "type": "scatter", 1007 | "x": [ 1008 | "Laurie Gibbs" 1009 | ], 1010 | "xaxis": "x", 1011 | "y": [ 1012 | 1009 1013 | ], 1014 | "yaxis": "y" 1015 | }, 1016 | { 1017 | "hovertemplate": "category=pub
name=%{x}
amount=%{y}", 1018 | "legendgroup": "pub", 1019 | "marker": { 1020 | "color": "#ab63fa", 1021 | "symbol": "circle" 1022 | }, 1023 | "mode": "markers", 1024 | "name": "pub", 1025 | "showlegend": true, 1026 | "type": "scatter", 1027 | "x": [ 1028 | "Megan Price" 1029 | ], 1030 | "xaxis": "x", 1031 | "y": [ 1032 | 748 1033 | ], 1034 | "yaxis": "y" 1035 | } 1036 | ], 1037 | "layout": { 1038 | "autosize": true, 1039 | "legend": { 1040 | "title": { 1041 | "text": "category" 1042 | }, 1043 | "tracegroupgap": 0 1044 | }, 1045 | "template": { 1046 | "data": { 1047 | "bar": [ 1048 | { 1049 | "error_x": { 1050 | "color": "#2a3f5f" 1051 | }, 1052 | "error_y": { 1053 | "color": "#2a3f5f" 1054 | }, 1055 | "marker": { 1056 | "line": { 1057 | "color": "#E5ECF6", 1058 | "width": 0.5 1059 | } 1060 | }, 1061 | "type": "bar" 1062 | } 1063 | ], 1064 | "barpolar": [ 1065 | { 1066 | "marker": { 1067 | "line": { 1068 | "color": "#E5ECF6", 1069 | "width": 0.5 1070 | } 1071 | }, 1072 | "type": "barpolar" 1073 | } 1074 | ], 1075 | "carpet": [ 1076 | { 1077 | "aaxis": { 1078 | "endlinecolor": "#2a3f5f", 1079 | "gridcolor": "white", 1080 | "linecolor": "white", 1081 | "minorgridcolor": "white", 1082 | "startlinecolor": "#2a3f5f" 1083 | }, 1084 | "baxis": { 1085 | "endlinecolor": "#2a3f5f", 1086 | "gridcolor": "white", 1087 | "linecolor": "white", 1088 | "minorgridcolor": "white", 1089 | "startlinecolor": "#2a3f5f" 1090 | }, 1091 | "type": "carpet" 1092 | } 1093 | ], 1094 | "choropleth": [ 1095 | { 1096 | "colorbar": { 1097 | "outlinewidth": 0, 1098 | "ticks": "" 1099 | }, 1100 | "type": "choropleth" 1101 | } 1102 | ], 1103 | "contour": [ 1104 | { 1105 | "colorbar": { 1106 | "outlinewidth": 0, 1107 | "ticks": "" 1108 | }, 1109 | "colorscale": [ 1110 | [ 1111 | 0, 1112 | "#0d0887" 1113 | ], 1114 | [ 1115 | 0.1111111111111111, 1116 | "#46039f" 1117 | ], 1118 | [ 1119 | 0.2222222222222222, 1120 | "#7201a8" 1121 | ], 1122 | [ 1123 | 0.3333333333333333, 1124 | "#9c179e" 1125 | ], 1126 | [ 1127 | 0.4444444444444444, 1128 | "#bd3786" 1129 | ], 1130 | [ 1131 | 0.5555555555555556, 1132 | "#d8576b" 1133 | ], 1134 | [ 1135 | 0.6666666666666666, 1136 | "#ed7953" 1137 | ], 1138 | [ 1139 | 0.7777777777777778, 1140 | "#fb9f3a" 1141 | ], 1142 | [ 1143 | 0.8888888888888888, 1144 | "#fdca26" 1145 | ], 1146 | [ 1147 | 1, 1148 | "#f0f921" 1149 | ] 1150 | ], 1151 | "type": "contour" 1152 | } 1153 | ], 1154 | "contourcarpet": [ 1155 | { 1156 | "colorbar": { 1157 | "outlinewidth": 0, 1158 | "ticks": "" 1159 | }, 1160 | "type": "contourcarpet" 1161 | } 1162 | ], 1163 | "heatmap": [ 1164 | { 1165 | "colorbar": { 1166 | "outlinewidth": 0, 1167 | "ticks": "" 1168 | }, 1169 | "colorscale": [ 1170 | [ 1171 | 0, 1172 | "#0d0887" 1173 | ], 1174 | [ 1175 | 0.1111111111111111, 1176 | "#46039f" 1177 | ], 1178 | [ 1179 | 0.2222222222222222, 1180 | "#7201a8" 1181 | ], 1182 | [ 1183 | 0.3333333333333333, 1184 | "#9c179e" 1185 | ], 1186 | [ 1187 | 0.4444444444444444, 1188 | "#bd3786" 1189 | ], 1190 | [ 1191 | 0.5555555555555556, 1192 | "#d8576b" 1193 | ], 1194 | [ 1195 | 0.6666666666666666, 1196 | "#ed7953" 1197 | ], 1198 | [ 1199 | 0.7777777777777778, 1200 | "#fb9f3a" 1201 | ], 1202 | [ 1203 | 0.8888888888888888, 1204 | "#fdca26" 1205 | ], 1206 | [ 1207 | 1, 1208 | "#f0f921" 1209 | ] 1210 | ], 1211 | "type": "heatmap" 1212 | } 1213 | ], 1214 | "heatmapgl": [ 1215 | { 1216 | "colorbar": { 1217 | "outlinewidth": 0, 1218 | "ticks": "" 1219 | }, 1220 | "colorscale": [ 1221 | [ 1222 | 0, 1223 | "#0d0887" 1224 | ], 1225 | [ 1226 | 0.1111111111111111, 1227 | "#46039f" 1228 | ], 1229 | [ 1230 | 0.2222222222222222, 1231 | "#7201a8" 1232 | ], 1233 | [ 1234 | 0.3333333333333333, 1235 | "#9c179e" 1236 | ], 1237 | [ 1238 | 0.4444444444444444, 1239 | "#bd3786" 1240 | ], 1241 | [ 1242 | 0.5555555555555556, 1243 | "#d8576b" 1244 | ], 1245 | [ 1246 | 0.6666666666666666, 1247 | "#ed7953" 1248 | ], 1249 | [ 1250 | 0.7777777777777778, 1251 | "#fb9f3a" 1252 | ], 1253 | [ 1254 | 0.8888888888888888, 1255 | "#fdca26" 1256 | ], 1257 | [ 1258 | 1, 1259 | "#f0f921" 1260 | ] 1261 | ], 1262 | "type": "heatmapgl" 1263 | } 1264 | ], 1265 | "histogram": [ 1266 | { 1267 | "marker": { 1268 | "colorbar": { 1269 | "outlinewidth": 0, 1270 | "ticks": "" 1271 | } 1272 | }, 1273 | "type": "histogram" 1274 | } 1275 | ], 1276 | "histogram2d": [ 1277 | { 1278 | "colorbar": { 1279 | "outlinewidth": 0, 1280 | "ticks": "" 1281 | }, 1282 | "colorscale": [ 1283 | [ 1284 | 0, 1285 | "#0d0887" 1286 | ], 1287 | [ 1288 | 0.1111111111111111, 1289 | "#46039f" 1290 | ], 1291 | [ 1292 | 0.2222222222222222, 1293 | "#7201a8" 1294 | ], 1295 | [ 1296 | 0.3333333333333333, 1297 | "#9c179e" 1298 | ], 1299 | [ 1300 | 0.4444444444444444, 1301 | "#bd3786" 1302 | ], 1303 | [ 1304 | 0.5555555555555556, 1305 | "#d8576b" 1306 | ], 1307 | [ 1308 | 0.6666666666666666, 1309 | "#ed7953" 1310 | ], 1311 | [ 1312 | 0.7777777777777778, 1313 | "#fb9f3a" 1314 | ], 1315 | [ 1316 | 0.8888888888888888, 1317 | "#fdca26" 1318 | ], 1319 | [ 1320 | 1, 1321 | "#f0f921" 1322 | ] 1323 | ], 1324 | "type": "histogram2d" 1325 | } 1326 | ], 1327 | "histogram2dcontour": [ 1328 | { 1329 | "colorbar": { 1330 | "outlinewidth": 0, 1331 | "ticks": "" 1332 | }, 1333 | "colorscale": [ 1334 | [ 1335 | 0, 1336 | "#0d0887" 1337 | ], 1338 | [ 1339 | 0.1111111111111111, 1340 | "#46039f" 1341 | ], 1342 | [ 1343 | 0.2222222222222222, 1344 | "#7201a8" 1345 | ], 1346 | [ 1347 | 0.3333333333333333, 1348 | "#9c179e" 1349 | ], 1350 | [ 1351 | 0.4444444444444444, 1352 | "#bd3786" 1353 | ], 1354 | [ 1355 | 0.5555555555555556, 1356 | "#d8576b" 1357 | ], 1358 | [ 1359 | 0.6666666666666666, 1360 | "#ed7953" 1361 | ], 1362 | [ 1363 | 0.7777777777777778, 1364 | "#fb9f3a" 1365 | ], 1366 | [ 1367 | 0.8888888888888888, 1368 | "#fdca26" 1369 | ], 1370 | [ 1371 | 1, 1372 | "#f0f921" 1373 | ] 1374 | ], 1375 | "type": "histogram2dcontour" 1376 | } 1377 | ], 1378 | "mesh3d": [ 1379 | { 1380 | "colorbar": { 1381 | "outlinewidth": 0, 1382 | "ticks": "" 1383 | }, 1384 | "type": "mesh3d" 1385 | } 1386 | ], 1387 | "parcoords": [ 1388 | { 1389 | "line": { 1390 | "colorbar": { 1391 | "outlinewidth": 0, 1392 | "ticks": "" 1393 | } 1394 | }, 1395 | "type": "parcoords" 1396 | } 1397 | ], 1398 | "pie": [ 1399 | { 1400 | "automargin": true, 1401 | "type": "pie" 1402 | } 1403 | ], 1404 | "scatter": [ 1405 | { 1406 | "marker": { 1407 | "colorbar": { 1408 | "outlinewidth": 0, 1409 | "ticks": "" 1410 | } 1411 | }, 1412 | "type": "scatter" 1413 | } 1414 | ], 1415 | "scatter3d": [ 1416 | { 1417 | "line": { 1418 | "colorbar": { 1419 | "outlinewidth": 0, 1420 | "ticks": "" 1421 | } 1422 | }, 1423 | "marker": { 1424 | "colorbar": { 1425 | "outlinewidth": 0, 1426 | "ticks": "" 1427 | } 1428 | }, 1429 | "type": "scatter3d" 1430 | } 1431 | ], 1432 | "scattercarpet": [ 1433 | { 1434 | "marker": { 1435 | "colorbar": { 1436 | "outlinewidth": 0, 1437 | "ticks": "" 1438 | } 1439 | }, 1440 | "type": "scattercarpet" 1441 | } 1442 | ], 1443 | "scattergeo": [ 1444 | { 1445 | "marker": { 1446 | "colorbar": { 1447 | "outlinewidth": 0, 1448 | "ticks": "" 1449 | } 1450 | }, 1451 | "type": "scattergeo" 1452 | } 1453 | ], 1454 | "scattergl": [ 1455 | { 1456 | "marker": { 1457 | "colorbar": { 1458 | "outlinewidth": 0, 1459 | "ticks": "" 1460 | } 1461 | }, 1462 | "type": "scattergl" 1463 | } 1464 | ], 1465 | "scattermapbox": [ 1466 | { 1467 | "marker": { 1468 | "colorbar": { 1469 | "outlinewidth": 0, 1470 | "ticks": "" 1471 | } 1472 | }, 1473 | "type": "scattermapbox" 1474 | } 1475 | ], 1476 | "scatterpolar": [ 1477 | { 1478 | "marker": { 1479 | "colorbar": { 1480 | "outlinewidth": 0, 1481 | "ticks": "" 1482 | } 1483 | }, 1484 | "type": "scatterpolar" 1485 | } 1486 | ], 1487 | "scatterpolargl": [ 1488 | { 1489 | "marker": { 1490 | "colorbar": { 1491 | "outlinewidth": 0, 1492 | "ticks": "" 1493 | } 1494 | }, 1495 | "type": "scatterpolargl" 1496 | } 1497 | ], 1498 | "scatterternary": [ 1499 | { 1500 | "marker": { 1501 | "colorbar": { 1502 | "outlinewidth": 0, 1503 | "ticks": "" 1504 | } 1505 | }, 1506 | "type": "scatterternary" 1507 | } 1508 | ], 1509 | "surface": [ 1510 | { 1511 | "colorbar": { 1512 | "outlinewidth": 0, 1513 | "ticks": "" 1514 | }, 1515 | "colorscale": [ 1516 | [ 1517 | 0, 1518 | "#0d0887" 1519 | ], 1520 | [ 1521 | 0.1111111111111111, 1522 | "#46039f" 1523 | ], 1524 | [ 1525 | 0.2222222222222222, 1526 | "#7201a8" 1527 | ], 1528 | [ 1529 | 0.3333333333333333, 1530 | "#9c179e" 1531 | ], 1532 | [ 1533 | 0.4444444444444444, 1534 | "#bd3786" 1535 | ], 1536 | [ 1537 | 0.5555555555555556, 1538 | "#d8576b" 1539 | ], 1540 | [ 1541 | 0.6666666666666666, 1542 | "#ed7953" 1543 | ], 1544 | [ 1545 | 0.7777777777777778, 1546 | "#fb9f3a" 1547 | ], 1548 | [ 1549 | 0.8888888888888888, 1550 | "#fdca26" 1551 | ], 1552 | [ 1553 | 1, 1554 | "#f0f921" 1555 | ] 1556 | ], 1557 | "type": "surface" 1558 | } 1559 | ], 1560 | "table": [ 1561 | { 1562 | "cells": { 1563 | "fill": { 1564 | "color": "#EBF0F8" 1565 | }, 1566 | "line": { 1567 | "color": "white" 1568 | } 1569 | }, 1570 | "header": { 1571 | "fill": { 1572 | "color": "#C8D4E3" 1573 | }, 1574 | "line": { 1575 | "color": "white" 1576 | } 1577 | }, 1578 | "type": "table" 1579 | } 1580 | ] 1581 | }, 1582 | "layout": { 1583 | "annotationdefaults": { 1584 | "arrowcolor": "#2a3f5f", 1585 | "arrowhead": 0, 1586 | "arrowwidth": 1 1587 | }, 1588 | "coloraxis": { 1589 | "colorbar": { 1590 | "outlinewidth": 0, 1591 | "ticks": "" 1592 | } 1593 | }, 1594 | "colorscale": { 1595 | "diverging": [ 1596 | [ 1597 | 0, 1598 | "#8e0152" 1599 | ], 1600 | [ 1601 | 0.1, 1602 | "#c51b7d" 1603 | ], 1604 | [ 1605 | 0.2, 1606 | "#de77ae" 1607 | ], 1608 | [ 1609 | 0.3, 1610 | "#f1b6da" 1611 | ], 1612 | [ 1613 | 0.4, 1614 | "#fde0ef" 1615 | ], 1616 | [ 1617 | 0.5, 1618 | "#f7f7f7" 1619 | ], 1620 | [ 1621 | 0.6, 1622 | "#e6f5d0" 1623 | ], 1624 | [ 1625 | 0.7, 1626 | "#b8e186" 1627 | ], 1628 | [ 1629 | 0.8, 1630 | "#7fbc41" 1631 | ], 1632 | [ 1633 | 0.9, 1634 | "#4d9221" 1635 | ], 1636 | [ 1637 | 1, 1638 | "#276419" 1639 | ] 1640 | ], 1641 | "sequential": [ 1642 | [ 1643 | 0, 1644 | "#0d0887" 1645 | ], 1646 | [ 1647 | 0.1111111111111111, 1648 | "#46039f" 1649 | ], 1650 | [ 1651 | 0.2222222222222222, 1652 | "#7201a8" 1653 | ], 1654 | [ 1655 | 0.3333333333333333, 1656 | "#9c179e" 1657 | ], 1658 | [ 1659 | 0.4444444444444444, 1660 | "#bd3786" 1661 | ], 1662 | [ 1663 | 0.5555555555555556, 1664 | "#d8576b" 1665 | ], 1666 | [ 1667 | 0.6666666666666666, 1668 | "#ed7953" 1669 | ], 1670 | [ 1671 | 0.7777777777777778, 1672 | "#fb9f3a" 1673 | ], 1674 | [ 1675 | 0.8888888888888888, 1676 | "#fdca26" 1677 | ], 1678 | [ 1679 | 1, 1680 | "#f0f921" 1681 | ] 1682 | ], 1683 | "sequentialminus": [ 1684 | [ 1685 | 0, 1686 | "#0d0887" 1687 | ], 1688 | [ 1689 | 0.1111111111111111, 1690 | "#46039f" 1691 | ], 1692 | [ 1693 | 0.2222222222222222, 1694 | "#7201a8" 1695 | ], 1696 | [ 1697 | 0.3333333333333333, 1698 | "#9c179e" 1699 | ], 1700 | [ 1701 | 0.4444444444444444, 1702 | "#bd3786" 1703 | ], 1704 | [ 1705 | 0.5555555555555556, 1706 | "#d8576b" 1707 | ], 1708 | [ 1709 | 0.6666666666666666, 1710 | "#ed7953" 1711 | ], 1712 | [ 1713 | 0.7777777777777778, 1714 | "#fb9f3a" 1715 | ], 1716 | [ 1717 | 0.8888888888888888, 1718 | "#fdca26" 1719 | ], 1720 | [ 1721 | 1, 1722 | "#f0f921" 1723 | ] 1724 | ] 1725 | }, 1726 | "colorway": [ 1727 | "#636efa", 1728 | "#EF553B", 1729 | "#00cc96", 1730 | "#ab63fa", 1731 | "#FFA15A", 1732 | "#19d3f3", 1733 | "#FF6692", 1734 | "#B6E880", 1735 | "#FF97FF", 1736 | "#FECB52" 1737 | ], 1738 | "font": { 1739 | "color": "#2a3f5f" 1740 | }, 1741 | "geo": { 1742 | "bgcolor": "white", 1743 | "lakecolor": "white", 1744 | "landcolor": "#E5ECF6", 1745 | "showlakes": true, 1746 | "showland": true, 1747 | "subunitcolor": "white" 1748 | }, 1749 | "hoverlabel": { 1750 | "align": "left" 1751 | }, 1752 | "hovermode": "closest", 1753 | "mapbox": { 1754 | "style": "light" 1755 | }, 1756 | "paper_bgcolor": "white", 1757 | "plot_bgcolor": "#E5ECF6", 1758 | "polar": { 1759 | "angularaxis": { 1760 | "gridcolor": "white", 1761 | "linecolor": "white", 1762 | "ticks": "" 1763 | }, 1764 | "bgcolor": "#E5ECF6", 1765 | "radialaxis": { 1766 | "gridcolor": "white", 1767 | "linecolor": "white", 1768 | "ticks": "" 1769 | } 1770 | }, 1771 | "scene": { 1772 | "xaxis": { 1773 | "backgroundcolor": "#E5ECF6", 1774 | "gridcolor": "white", 1775 | "gridwidth": 2, 1776 | "linecolor": "white", 1777 | "showbackground": true, 1778 | "ticks": "", 1779 | "zerolinecolor": "white" 1780 | }, 1781 | "yaxis": { 1782 | "backgroundcolor": "#E5ECF6", 1783 | "gridcolor": "white", 1784 | "gridwidth": 2, 1785 | "linecolor": "white", 1786 | "showbackground": true, 1787 | "ticks": "", 1788 | "zerolinecolor": "white" 1789 | }, 1790 | "zaxis": { 1791 | "backgroundcolor": "#E5ECF6", 1792 | "gridcolor": "white", 1793 | "gridwidth": 2, 1794 | "linecolor": "white", 1795 | "showbackground": true, 1796 | "ticks": "", 1797 | "zerolinecolor": "white" 1798 | } 1799 | }, 1800 | "shapedefaults": { 1801 | "line": { 1802 | "color": "#2a3f5f" 1803 | } 1804 | }, 1805 | "ternary": { 1806 | "aaxis": { 1807 | "gridcolor": "white", 1808 | "linecolor": "white", 1809 | "ticks": "" 1810 | }, 1811 | "baxis": { 1812 | "gridcolor": "white", 1813 | "linecolor": "white", 1814 | "ticks": "" 1815 | }, 1816 | "bgcolor": "#E5ECF6", 1817 | "caxis": { 1818 | "gridcolor": "white", 1819 | "linecolor": "white", 1820 | "ticks": "" 1821 | } 1822 | }, 1823 | "title": { 1824 | "x": 0.05 1825 | }, 1826 | "xaxis": { 1827 | "automargin": true, 1828 | "gridcolor": "white", 1829 | "linecolor": "white", 1830 | "ticks": "", 1831 | "title": { 1832 | "standoff": 15 1833 | }, 1834 | "zerolinecolor": "white", 1835 | "zerolinewidth": 2 1836 | }, 1837 | "yaxis": { 1838 | "automargin": true, 1839 | "gridcolor": "white", 1840 | "linecolor": "white", 1841 | "ticks": "", 1842 | "title": { 1843 | "standoff": 15 1844 | }, 1845 | "zerolinecolor": "white", 1846 | "zerolinewidth": 2 1847 | } 1848 | } 1849 | }, 1850 | "title": { 1851 | "text": "Anomalous Transactions" 1852 | }, 1853 | "xaxis": { 1854 | "anchor": "y", 1855 | "autorange": true, 1856 | "domain": [ 1857 | 0, 1858 | 1 1859 | ], 1860 | "range": [ 1861 | -0.24975222993062435, 1862 | 4.249752229930625 1863 | ], 1864 | "title": { 1865 | "text": "name" 1866 | }, 1867 | "type": "category" 1868 | }, 1869 | "yaxis": { 1870 | "anchor": "x", 1871 | "autorange": true, 1872 | "domain": [ 1873 | 0, 1874 | 1 1875 | ], 1876 | "range": [ 1877 | 664.0573248407643, 1878 | 1977.9426751592357 1879 | ], 1880 | "title": { 1881 | "text": "amount" 1882 | }, 1883 | "type": "linear" 1884 | } 1885 | } 1886 | }, 1887 | "text/html": [ 1888 | "
\n", 1889 | " \n", 1890 | " \n", 1891 | "
\n", 1892 | " \n", 1930 | "
" 1931 | ] 1932 | }, 1933 | "metadata": {}, 1934 | "output_type": "display_data" 1935 | } 1936 | ], 1937 | "source": [ 1938 | "# find anomalous transactions for 3 random card holders\n", 1939 | "import datetime\n", 1940 | "start_time = datetime.time(7,0,0)\n", 1941 | "end_time = datetime.time(9,0,0)\n", 1942 | "\n", 1943 | "anomalous_transactions = outlier.between_time(start_time, end_time).sort_values('amount', ascending=False)\n", 1944 | "\n", 1945 | "px.scatter(anomalous_transactions, x='name', y='amount', color='category', title='Anomalous Transactions')" 1946 | ] 1947 | }, 1948 | { 1949 | "cell_type": "markdown", 1950 | "metadata": {}, 1951 | "source": [ 1952 | " ## Identifying Outliers Using Interquartile Range" 1953 | ] 1954 | }, 1955 | { 1956 | "cell_type": "code", 1957 | "execution_count": 86, 1958 | "metadata": {}, 1959 | "outputs": [ 1960 | { 1961 | "name": "stdout", 1962 | "output_type": "stream", 1963 | "text": [ 1964 | "Percentiles: 25th=3.735, 75th=14.648, IQR=10.913\n", 1965 | "Identified outliers: 110\n", 1966 | "Non-outlier observations: 3390\n" 1967 | ] 1968 | }, 1969 | { 1970 | "data": { 1971 | "text/html": [ 1972 | "
\n", 1973 | "\n", 1986 | "\n", 1987 | " \n", 1988 | " \n", 1989 | " \n", 1990 | " \n", 1991 | " \n", 1992 | " \n", 1993 | " \n", 1994 | " \n", 1995 | " \n", 1996 | " \n", 1997 | " \n", 1998 | " \n", 1999 | " \n", 2000 | " \n", 2001 | " \n", 2002 | " \n", 2003 | " \n", 2004 | " \n", 2005 | " \n", 2006 | " \n", 2007 | " \n", 2008 | " \n", 2009 | " \n", 2010 | " \n", 2011 | " \n", 2012 | " \n", 2013 | " \n", 2014 | " \n", 2015 | " \n", 2016 | " \n", 2017 | " \n", 2018 | " \n", 2019 | " \n", 2020 | " \n", 2021 | " \n", 2022 | " \n", 2023 | " \n", 2024 | " \n", 2025 | " \n", 2026 | " \n", 2027 | " \n", 2028 | " \n", 2029 | " \n", 2030 | " \n", 2031 | " \n", 2032 | " \n", 2033 | " \n", 2034 | " \n", 2035 | " \n", 2036 | " \n", 2037 | " \n", 2038 | " \n", 2039 | " \n", 2040 | " \n", 2041 | " \n", 2042 | " \n", 2043 | " \n", 2044 | " \n", 2045 | " \n", 2046 | " \n", 2047 | " \n", 2048 | " \n", 2049 | " \n", 2050 | " \n", 2051 | " \n", 2052 | " \n", 2053 | " \n", 2054 | " \n", 2055 | " \n", 2056 | " \n", 2057 | " \n", 2058 | " \n", 2059 | " \n", 2060 | " \n", 2061 | " \n", 2062 | " \n", 2063 | " \n", 2064 | " \n", 2065 | " \n", 2066 | " \n", 2067 | " \n", 2068 | " \n", 2069 | " \n", 2070 | " \n", 2071 | " \n", 2072 | " \n", 2073 | " \n", 2074 | " \n", 2075 | " \n", 2076 | " \n", 2077 | " \n", 2078 | " \n", 2079 | " \n", 2080 | " \n", 2081 | " \n", 2082 | " \n", 2083 | " \n", 2084 | " \n", 2085 | " \n", 2086 | " \n", 2087 | " \n", 2088 | " \n", 2089 | " \n", 2090 | " \n", 2091 | " \n", 2092 | " \n", 2093 | " \n", 2094 | " \n", 2095 | " \n", 2096 | " \n", 2097 | " \n", 2098 | " \n", 2099 | " \n", 2100 | " \n", 2101 | " \n", 2102 | " \n", 2103 | " \n", 2104 | " \n", 2105 | " \n", 2106 | " \n", 2107 | " \n", 2108 | " \n", 2109 | " \n", 2110 | " \n", 2111 | " \n", 2112 | " \n", 2113 | " \n", 2114 | " \n", 2115 | " \n", 2116 | " \n", 2117 | " \n", 2118 | " \n", 2119 | " \n", 2120 | " \n", 2121 | " \n", 2122 | " \n", 2123 | " \n", 2124 | " \n", 2125 | " \n", 2126 | " \n", 2127 | " \n", 2128 | " \n", 2129 | " \n", 2130 | " \n", 2131 | " \n", 2132 | " \n", 2133 | " \n", 2134 | " \n", 2135 | " \n", 2136 | " \n", 2137 | " \n", 2138 | " \n", 2139 | " \n", 2140 | " \n", 2141 | " \n", 2142 | " \n", 2143 | " \n", 2144 | " \n", 2145 | " \n", 2146 | " \n", 2147 | " \n", 2148 | " \n", 2149 | " \n", 2150 | " \n", 2151 | " \n", 2152 | " \n", 2153 | " \n", 2154 | " \n", 2155 | " \n", 2156 | " \n", 2157 | " \n", 2158 | " \n", 2159 | " \n", 2160 | " \n", 2161 | " \n", 2162 | " \n", 2163 | " \n", 2164 | " \n", 2165 | " \n", 2166 | " \n", 2167 | " \n", 2168 | " \n", 2169 | " \n", 2170 | " \n", 2171 | " \n", 2172 | " \n", 2173 | " \n", 2174 | " \n", 2175 | " \n", 2176 | " \n", 2177 | " \n", 2178 | " \n", 2179 | " \n", 2180 | " \n", 2181 | " \n", 2182 | " \n", 2183 | " \n", 2184 | " \n", 2185 | " \n", 2186 | " \n", 2187 | " \n", 2188 | " \n", 2189 | " \n", 2190 | " \n", 2191 | " \n", 2192 | " \n", 2193 | " \n", 2194 | " \n", 2195 | " \n", 2196 | " \n", 2197 | " \n", 2198 | " \n", 2199 | " \n", 2200 | " \n", 2201 | " \n", 2202 | " \n", 2203 | " \n", 2204 | " \n", 2205 | " \n", 2206 | " \n", 2207 | " \n", 2208 | " \n", 2209 | " \n", 2210 | " \n", 2211 | " \n", 2212 | " \n", 2213 | " \n", 2214 | " \n", 2215 | " \n", 2216 | " \n", 2217 | " \n", 2218 | " \n", 2219 | " \n", 2220 | " \n", 2221 | " \n", 2222 | " \n", 2223 | " \n", 2224 | " \n", 2225 | " \n", 2226 | " \n", 2227 | " \n", 2228 | " \n", 2229 | " \n", 2230 | " \n", 2231 | " \n", 2232 | " \n", 2233 | " \n", 2234 | " \n", 2235 | " \n", 2236 | " \n", 2237 | " \n", 2238 | " \n", 2239 | " \n", 2240 | " \n", 2241 | " \n", 2242 | " \n", 2243 | " \n", 2244 | " \n", 2245 | " \n", 2246 | " \n", 2247 | " \n", 2248 | " \n", 2249 | " \n", 2250 | " \n", 2251 | " \n", 2252 | " \n", 2253 | " \n", 2254 | " \n", 2255 | " \n", 2256 | " \n", 2257 | " \n", 2258 | " \n", 2259 | " \n", 2260 | " \n", 2261 | " \n", 2262 | " \n", 2263 | " \n", 2264 | " \n", 2265 | " \n", 2266 | " \n", 2267 | " \n", 2268 | " \n", 2269 | " \n", 2270 | " \n", 2271 | " \n", 2272 | " \n", 2273 | " \n", 2274 | " \n", 2275 | " \n", 2276 | " \n", 2277 | " \n", 2278 | " \n", 2279 | " \n", 2280 | " \n", 2281 | " \n", 2282 | " \n", 2283 | " \n", 2284 | " \n", 2285 | " \n", 2286 | " \n", 2287 | " \n", 2288 | " \n", 2289 | " \n", 2290 | " \n", 2291 | " \n", 2292 | " \n", 2293 | " \n", 2294 | " \n", 2295 | " \n", 2296 | " \n", 2297 | " \n", 2298 | " \n", 2299 | " \n", 2300 | " \n", 2301 | " \n", 2302 | " \n", 2303 | " \n", 2304 | " \n", 2305 | " \n", 2306 | " \n", 2307 | " \n", 2308 | " \n", 2309 | " \n", 2310 | " \n", 2311 | " \n", 2312 | " \n", 2313 | " \n", 2314 | " \n", 2315 | " \n", 2316 | " \n", 2317 | " \n", 2318 | " \n", 2319 | " \n", 2320 | " \n", 2321 | " \n", 2322 | " \n", 2323 | " \n", 2324 | " \n", 2325 | " \n", 2326 | " \n", 2327 | " \n", 2328 | " \n", 2329 | " \n", 2330 | " \n", 2331 | " \n", 2332 | " \n", 2333 | " \n", 2334 | " \n", 2335 | " \n", 2336 | " \n", 2337 | " \n", 2338 | " \n", 2339 | " \n", 2340 | " \n", 2341 | " \n", 2342 | " \n", 2343 | " \n", 2344 | " \n", 2345 | " \n", 2346 | " \n", 2347 | " \n", 2348 | " \n", 2349 | " \n", 2350 | " \n", 2351 | " \n", 2352 | " \n", 2353 | " \n", 2354 | " \n", 2355 | " \n", 2356 | " \n", 2357 | " \n", 2358 | " \n", 2359 | " \n", 2360 | " \n", 2361 | " \n", 2362 | " \n", 2363 | " \n", 2364 | " \n", 2365 | " \n", 2366 | " \n", 2367 | " \n", 2368 | " \n", 2369 | " \n", 2370 | " \n", 2371 | " \n", 2372 | " \n", 2373 | " \n", 2374 | " \n", 2375 | " \n", 2376 | " \n", 2377 | " \n", 2378 | " \n", 2379 | " \n", 2380 | " \n", 2381 | " \n", 2382 | " \n", 2383 | " \n", 2384 | " \n", 2385 | " \n", 2386 | " \n", 2387 | " \n", 2388 | " \n", 2389 | " \n", 2390 | " \n", 2391 | " \n", 2392 | " \n", 2393 | " \n", 2394 | " \n", 2395 | " \n", 2396 | " \n", 2397 | " \n", 2398 | " \n", 2399 | " \n", 2400 | " \n", 2401 | " \n", 2402 | " \n", 2403 | " \n", 2404 | " \n", 2405 | " \n", 2406 | " \n", 2407 | " \n", 2408 | " \n", 2409 | " \n", 2410 | " \n", 2411 | " \n", 2412 | " \n", 2413 | " \n", 2414 | " \n", 2415 | " \n", 2416 | " \n", 2417 | " \n", 2418 | " \n", 2419 | " \n", 2420 | " \n", 2421 | " \n", 2422 | " \n", 2423 | " \n", 2424 | " \n", 2425 | " \n", 2426 | " \n", 2427 | " \n", 2428 | " \n", 2429 | " \n", 2430 | " \n", 2431 | " \n", 2432 | " \n", 2433 | " \n", 2434 | " \n", 2435 | " \n", 2436 | " \n", 2437 | " \n", 2438 | " \n", 2439 | " \n", 2440 | " \n", 2441 | " \n", 2442 | " \n", 2443 | " \n", 2444 | " \n", 2445 | " \n", 2446 | " \n", 2447 | " \n", 2448 | " \n", 2449 | " \n", 2450 | " \n", 2451 | " \n", 2452 | " \n", 2453 | " \n", 2454 | " \n", 2455 | " \n", 2456 | " \n", 2457 | " \n", 2458 | " \n", 2459 | " \n", 2460 | " \n", 2461 | " \n", 2462 | " \n", 2463 | " \n", 2464 | " \n", 2465 | " \n", 2466 | " \n", 2467 | " \n", 2468 | " \n", 2469 | " \n", 2470 | " \n", 2471 | " \n", 2472 | " \n", 2473 | " \n", 2474 | " \n", 2475 | " \n", 2476 | " \n", 2477 | " \n", 2478 | " \n", 2479 | " \n", 2480 | " \n", 2481 | " \n", 2482 | " \n", 2483 | " \n", 2484 | " \n", 2485 | " \n", 2486 | " \n", 2487 | " \n", 2488 | " \n", 2489 | " \n", 2490 | " \n", 2491 | " \n", 2492 | " \n", 2493 | " \n", 2494 | " \n", 2495 | " \n", 2496 | " \n", 2497 | " \n", 2498 | " \n", 2499 | " \n", 2500 | " \n", 2501 | " \n", 2502 | " \n", 2503 | " \n", 2504 | " \n", 2505 | " \n", 2506 | " \n", 2507 | " \n", 2508 | " \n", 2509 | " \n", 2510 | " \n", 2511 | " \n", 2512 | " \n", 2513 | " \n", 2514 | " \n", 2515 | " \n", 2516 | " \n", 2517 | " \n", 2518 | " \n", 2519 | " \n", 2520 | " \n", 2521 | " \n", 2522 | " \n", 2523 | " \n", 2524 | " \n", 2525 | " \n", 2526 | " \n", 2527 | " \n", 2528 | " \n", 2529 | " \n", 2530 | " \n", 2531 | " \n", 2532 | " \n", 2533 | " \n", 2534 | " \n", 2535 | " \n", 2536 | " \n", 2537 | " \n", 2538 | " \n", 2539 | " \n", 2540 | " \n", 2541 | " \n", 2542 | " \n", 2543 | " \n", 2544 | " \n", 2545 | " \n", 2546 | " \n", 2547 | " \n", 2548 | " \n", 2549 | " \n", 2550 | " \n", 2551 | " \n", 2552 | " \n", 2553 | " \n", 2554 | " \n", 2555 | " \n", 2556 | " \n", 2557 | " \n", 2558 | "
idnamecardamountcategoryoutlier
date
2018-01-02 23:27:4612Megan Price5018796574651031.0restaurantTrue
2018-01-04 03:05:187Sean Taylor35169523960802471685.0food truckTrue
2018-01-07 01:10:5418Malik Carlson344119623920892175.0pubTrue
2018-01-08 02:34:326Beth Hernandez35813459435439421029.0barTrue
2018-01-08 11:15:3618Malik Carlson344119623920892333.0restaurantTrue
2018-01-11 13:20:3116Crystal Clark5570600642865857229.0pubTrue
2018-01-22 08:07:0316Crystal Clark55706006428658571131.0restaurantTrue
2018-01-23 06:29:3712Megan Price5018796574651678.0pubTrue
2018-01-24 13:17:191Robert Johnson47610496457115558111691.0coffee shopTrue
2018-01-30 18:31:0025Nancy Contreras43196535135071177.0restaurantTrue
2018-02-09 11:38:377Sean Taylor3516952396080247445.0barTrue
2018-02-17 01:27:1916Crystal Clark55706006428658571430.0restaurantTrue
2018-02-19 16:00:437Sean Taylor35169523960802471072.0food truckTrue
2018-02-19 22:48:2518Malik Carlson3441196239208921839.0restaurantTrue
2018-02-27 15:27:326Beth Hernandez35813459435439421145.0barTrue
2018-03-01 21:29:053Elizabeth Sawyer300782990535121119.0pubTrue
2018-03-04 15:50:539Laurie Gibbs301819639133401534.0coffee shopTrue
2018-03-05 08:26:0816Crystal Clark55706006428658571617.0barTrue
2018-03-06 07:18:0925Nancy Contreras43196535135071334.0barTrue
2018-03-09 04:51:386Beth Hernandez3581345943543942389.0restaurantTrue
2018-03-12 00:44:0112Megan Price5018796574651530.0coffee shopTrue
2018-03-20 10:19:2512Megan Price501879657465852.0pubTrue
2018-03-20 13:05:5424Stephanie Dalton301429666991871011.0barTrue
2018-03-26 07:41:599Laurie Gibbs301819639133401009.0coffee shopTrue
2018-04-01 07:17:2125Nancy Contreras4319653513507100.0coffee shopTrue
2018-04-03 03:23:3718Malik Carlson3441196239208921077.0restaurantTrue
2018-04-08 06:03:5025Nancy Contreras43196535135071063.0pubTrue
2018-04-09 10:24:321Robert Johnson4761049645711555811283.0restaurantTrue
2018-04-09 18:28:2525Nancy Contreras4319653513507269.0restaurantTrue
2018-04-10 06:08:017Sean Taylor3516952396080247543.0food truckTrue
.....................
2018-10-11 23:29:333Elizabeth Sawyer30078299053512206.0restaurantTrue
2018-10-16 13:27:331Robert Johnson4761049645711555811484.0food truckTrue
2018-10-19 01:07:373Elizabeth Sawyer300782990535121053.0restaurantTrue
2018-10-19 12:32:3716Crystal Clark5570600642865857178.0food truckTrue
2018-10-23 22:47:1316Crystal Clark5570600642865857393.0food truckTrue
2018-10-28 02:12:5825Nancy Contreras4319653513507137.0pubTrue
2018-11-13 05:58:4724Stephanie Dalton30142966699187466.0barTrue
2018-11-13 17:07:2516Crystal Clark55706006428658571911.0restaurantTrue
2018-11-17 05:30:4318Malik Carlson3441196239208921769.0food truckTrue
2018-11-20 05:24:283Elizabeth Sawyer300782990535121054.0barTrue
2018-11-23 09:08:0512Megan Price501879657465233.0restaurantTrue
2018-11-25 20:44:0712Megan Price5018796574651123.0barTrue
2018-11-26 20:54:391Robert Johnson4761049645711555811267.0food truckTrue
2018-11-27 15:36:0512Megan Price5018796574651802.0barTrue
2018-11-27 17:20:296Beth Hernandez35813459435439421279.0restaurantTrue
2018-11-27 17:27:341Robert Johnson47610496457115558111660.0pubTrue
2018-12-03 02:38:5216Crystal Clark55706006428658571014.0restaurantTrue
2018-12-05 19:24:279Laurie Gibbs3018196391334057.0barTrue
2018-12-07 07:22:031Robert Johnson47610496457115558111894.0barTrue
2018-12-13 12:09:5818Malik Carlson3441196239208921154.0restaurantTrue
2018-12-13 15:51:597Sean Taylor35169523960802472249.0food truckTrue
2018-12-14 08:51:4112Megan Price501879657465748.0pubTrue
2018-12-18 13:33:3725Nancy Contreras43196535135071074.0coffee shopTrue
2018-12-18 17:20:337Sean Taylor35169523960802471296.0barTrue
2018-12-19 16:10:039Laurie Gibbs301819639133401724.0pubTrue
2018-12-21 09:56:3224Stephanie Dalton301429666991871301.0pubTrue
2018-12-24 15:55:0616Crystal Clark55706006428658571634.0pubTrue
2018-12-25 19:10:4224Stephanie Dalton301429666991871035.0pubTrue
2018-12-28 16:20:313Elizabeth Sawyer30078299053512313.0pubTrue
2018-12-30 23:23:091Robert Johnson47610496457115558111033.0pubTrue
\n", 2559 | "

110 rows × 6 columns

\n", 2560 | "
" 2561 | ], 2562 | "text/plain": [ 2563 | " id name card amount \\\n", 2564 | "date \n", 2565 | "2018-01-02 23:27:46 12 Megan Price 501879657465 1031.0 \n", 2566 | "2018-01-04 03:05:18 7 Sean Taylor 3516952396080247 1685.0 \n", 2567 | "2018-01-07 01:10:54 18 Malik Carlson 344119623920892 175.0 \n", 2568 | "2018-01-08 02:34:32 6 Beth Hernandez 3581345943543942 1029.0 \n", 2569 | "2018-01-08 11:15:36 18 Malik Carlson 344119623920892 333.0 \n", 2570 | "2018-01-11 13:20:31 16 Crystal Clark 5570600642865857 229.0 \n", 2571 | "2018-01-22 08:07:03 16 Crystal Clark 5570600642865857 1131.0 \n", 2572 | "2018-01-23 06:29:37 12 Megan Price 501879657465 1678.0 \n", 2573 | "2018-01-24 13:17:19 1 Robert Johnson 4761049645711555811 1691.0 \n", 2574 | "2018-01-30 18:31:00 25 Nancy Contreras 4319653513507 1177.0 \n", 2575 | "2018-02-09 11:38:37 7 Sean Taylor 3516952396080247 445.0 \n", 2576 | "2018-02-17 01:27:19 16 Crystal Clark 5570600642865857 1430.0 \n", 2577 | "2018-02-19 16:00:43 7 Sean Taylor 3516952396080247 1072.0 \n", 2578 | "2018-02-19 22:48:25 18 Malik Carlson 344119623920892 1839.0 \n", 2579 | "2018-02-27 15:27:32 6 Beth Hernandez 3581345943543942 1145.0 \n", 2580 | "2018-03-01 21:29:05 3 Elizabeth Sawyer 30078299053512 1119.0 \n", 2581 | "2018-03-04 15:50:53 9 Laurie Gibbs 30181963913340 1534.0 \n", 2582 | "2018-03-05 08:26:08 16 Crystal Clark 5570600642865857 1617.0 \n", 2583 | "2018-03-06 07:18:09 25 Nancy Contreras 4319653513507 1334.0 \n", 2584 | "2018-03-09 04:51:38 6 Beth Hernandez 3581345943543942 389.0 \n", 2585 | "2018-03-12 00:44:01 12 Megan Price 501879657465 1530.0 \n", 2586 | "2018-03-20 10:19:25 12 Megan Price 501879657465 852.0 \n", 2587 | "2018-03-20 13:05:54 24 Stephanie Dalton 30142966699187 1011.0 \n", 2588 | "2018-03-26 07:41:59 9 Laurie Gibbs 30181963913340 1009.0 \n", 2589 | "2018-04-01 07:17:21 25 Nancy Contreras 4319653513507 100.0 \n", 2590 | "2018-04-03 03:23:37 18 Malik Carlson 344119623920892 1077.0 \n", 2591 | "2018-04-08 06:03:50 25 Nancy Contreras 4319653513507 1063.0 \n", 2592 | "2018-04-09 10:24:32 1 Robert Johnson 4761049645711555811 283.0 \n", 2593 | "2018-04-09 18:28:25 25 Nancy Contreras 4319653513507 269.0 \n", 2594 | "2018-04-10 06:08:01 7 Sean Taylor 3516952396080247 543.0 \n", 2595 | "... .. ... ... ... \n", 2596 | "2018-10-11 23:29:33 3 Elizabeth Sawyer 30078299053512 206.0 \n", 2597 | "2018-10-16 13:27:33 1 Robert Johnson 4761049645711555811 484.0 \n", 2598 | "2018-10-19 01:07:37 3 Elizabeth Sawyer 30078299053512 1053.0 \n", 2599 | "2018-10-19 12:32:37 16 Crystal Clark 5570600642865857 178.0 \n", 2600 | "2018-10-23 22:47:13 16 Crystal Clark 5570600642865857 393.0 \n", 2601 | "2018-10-28 02:12:58 25 Nancy Contreras 4319653513507 137.0 \n", 2602 | "2018-11-13 05:58:47 24 Stephanie Dalton 30142966699187 466.0 \n", 2603 | "2018-11-13 17:07:25 16 Crystal Clark 5570600642865857 1911.0 \n", 2604 | "2018-11-17 05:30:43 18 Malik Carlson 344119623920892 1769.0 \n", 2605 | "2018-11-20 05:24:28 3 Elizabeth Sawyer 30078299053512 1054.0 \n", 2606 | "2018-11-23 09:08:05 12 Megan Price 501879657465 233.0 \n", 2607 | "2018-11-25 20:44:07 12 Megan Price 501879657465 1123.0 \n", 2608 | "2018-11-26 20:54:39 1 Robert Johnson 4761049645711555811 267.0 \n", 2609 | "2018-11-27 15:36:05 12 Megan Price 501879657465 1802.0 \n", 2610 | "2018-11-27 17:20:29 6 Beth Hernandez 3581345943543942 1279.0 \n", 2611 | "2018-11-27 17:27:34 1 Robert Johnson 4761049645711555811 1660.0 \n", 2612 | "2018-12-03 02:38:52 16 Crystal Clark 5570600642865857 1014.0 \n", 2613 | "2018-12-05 19:24:27 9 Laurie Gibbs 30181963913340 57.0 \n", 2614 | "2018-12-07 07:22:03 1 Robert Johnson 4761049645711555811 1894.0 \n", 2615 | "2018-12-13 12:09:58 18 Malik Carlson 344119623920892 1154.0 \n", 2616 | "2018-12-13 15:51:59 7 Sean Taylor 3516952396080247 2249.0 \n", 2617 | "2018-12-14 08:51:41 12 Megan Price 501879657465 748.0 \n", 2618 | "2018-12-18 13:33:37 25 Nancy Contreras 4319653513507 1074.0 \n", 2619 | "2018-12-18 17:20:33 7 Sean Taylor 3516952396080247 1296.0 \n", 2620 | "2018-12-19 16:10:03 9 Laurie Gibbs 30181963913340 1724.0 \n", 2621 | "2018-12-21 09:56:32 24 Stephanie Dalton 30142966699187 1301.0 \n", 2622 | "2018-12-24 15:55:06 16 Crystal Clark 5570600642865857 1634.0 \n", 2623 | "2018-12-25 19:10:42 24 Stephanie Dalton 30142966699187 1035.0 \n", 2624 | "2018-12-28 16:20:31 3 Elizabeth Sawyer 30078299053512 313.0 \n", 2625 | "2018-12-30 23:23:09 1 Robert Johnson 4761049645711555811 1033.0 \n", 2626 | "\n", 2627 | " category outlier \n", 2628 | "date \n", 2629 | "2018-01-02 23:27:46 restaurant True \n", 2630 | "2018-01-04 03:05:18 food truck True \n", 2631 | "2018-01-07 01:10:54 pub True \n", 2632 | "2018-01-08 02:34:32 bar True \n", 2633 | "2018-01-08 11:15:36 restaurant True \n", 2634 | "2018-01-11 13:20:31 pub True \n", 2635 | "2018-01-22 08:07:03 restaurant True \n", 2636 | "2018-01-23 06:29:37 pub True \n", 2637 | "2018-01-24 13:17:19 coffee shop True \n", 2638 | "2018-01-30 18:31:00 restaurant True \n", 2639 | "2018-02-09 11:38:37 bar True \n", 2640 | "2018-02-17 01:27:19 restaurant True \n", 2641 | "2018-02-19 16:00:43 food truck True \n", 2642 | "2018-02-19 22:48:25 restaurant True \n", 2643 | "2018-02-27 15:27:32 bar True \n", 2644 | "2018-03-01 21:29:05 pub True \n", 2645 | "2018-03-04 15:50:53 coffee shop True \n", 2646 | "2018-03-05 08:26:08 bar True \n", 2647 | "2018-03-06 07:18:09 bar True \n", 2648 | "2018-03-09 04:51:38 restaurant True \n", 2649 | "2018-03-12 00:44:01 coffee shop True \n", 2650 | "2018-03-20 10:19:25 pub True \n", 2651 | "2018-03-20 13:05:54 bar True \n", 2652 | "2018-03-26 07:41:59 coffee shop True \n", 2653 | "2018-04-01 07:17:21 coffee shop True \n", 2654 | "2018-04-03 03:23:37 restaurant True \n", 2655 | "2018-04-08 06:03:50 pub True \n", 2656 | "2018-04-09 10:24:32 restaurant True \n", 2657 | "2018-04-09 18:28:25 restaurant True \n", 2658 | "2018-04-10 06:08:01 food truck True \n", 2659 | "... ... ... \n", 2660 | "2018-10-11 23:29:33 restaurant True \n", 2661 | "2018-10-16 13:27:33 food truck True \n", 2662 | "2018-10-19 01:07:37 restaurant True \n", 2663 | "2018-10-19 12:32:37 food truck True \n", 2664 | "2018-10-23 22:47:13 food truck True \n", 2665 | "2018-10-28 02:12:58 pub True \n", 2666 | "2018-11-13 05:58:47 bar True \n", 2667 | "2018-11-13 17:07:25 restaurant True \n", 2668 | "2018-11-17 05:30:43 food truck True \n", 2669 | "2018-11-20 05:24:28 bar True \n", 2670 | "2018-11-23 09:08:05 restaurant True \n", 2671 | "2018-11-25 20:44:07 bar True \n", 2672 | "2018-11-26 20:54:39 food truck True \n", 2673 | "2018-11-27 15:36:05 bar True \n", 2674 | "2018-11-27 17:20:29 restaurant True \n", 2675 | "2018-11-27 17:27:34 pub True \n", 2676 | "2018-12-03 02:38:52 restaurant True \n", 2677 | "2018-12-05 19:24:27 bar True \n", 2678 | "2018-12-07 07:22:03 bar True \n", 2679 | "2018-12-13 12:09:58 restaurant True \n", 2680 | "2018-12-13 15:51:59 food truck True \n", 2681 | "2018-12-14 08:51:41 pub True \n", 2682 | "2018-12-18 13:33:37 coffee shop True \n", 2683 | "2018-12-18 17:20:33 bar True \n", 2684 | "2018-12-19 16:10:03 pub True \n", 2685 | "2018-12-21 09:56:32 pub True \n", 2686 | "2018-12-24 15:55:06 pub True \n", 2687 | "2018-12-25 19:10:42 pub True \n", 2688 | "2018-12-28 16:20:31 pub True \n", 2689 | "2018-12-30 23:23:09 pub True \n", 2690 | "\n", 2691 | "[110 rows x 6 columns]" 2692 | ] 2693 | }, 2694 | "execution_count": 86, 2695 | "metadata": {}, 2696 | "output_type": "execute_result" 2697 | } 2698 | ], 2699 | "source": [ 2700 | "# code a function to identify outliers based on interquartile range\n", 2701 | "# calculate interquartile range\n", 2702 | "q25, q75 = percentile(transaction_df['amount'], 25), percentile(transaction_df['amount'], 75)\n", 2703 | "iqr = q75 - q25\n", 2704 | "print('Percentiles: 25th=%.3f, 75th=%.3f, IQR=%.3f' % (q25, q75, iqr))\n", 2705 | "# calculate the outlier cutoff\n", 2706 | "cut_off = iqr * 1.5\n", 2707 | "lower, upper = q25 - cut_off, q75 + cut_off\n", 2708 | "# identify outliers\n", 2709 | "outliers_2 = [x for x in transaction_df['amount'] if x < lower or x > upper]\n", 2710 | "print('Identified outliers: %d' % len(outliers_2))\n", 2711 | "# remove outliers\n", 2712 | "outliers_removed_2 = [x for x in transaction_df['amount'] if x >= lower and x <= upper]\n", 2713 | "print('Non-outlier observations: %d' % len(outliers_removed_2))\n", 2714 | "\n", 2715 | "transaction_df['outlier'] = (transaction_df['amount'] > upper) | (transaction_df['amount'] < lower)\n", 2716 | "\n", 2717 | "outlier_2 = transaction_df[transaction_df['outlier']==True]\n", 2718 | "outlier_2\n" 2719 | ] 2720 | }, 2721 | { 2722 | "cell_type": "code", 2723 | "execution_count": 89, 2724 | "metadata": {}, 2725 | "outputs": [ 2726 | { 2727 | "data": { 2728 | "text/html": [ 2729 | "
\n", 2730 | "\n", 2743 | "\n", 2744 | " \n", 2745 | " \n", 2746 | " \n", 2747 | " \n", 2748 | " \n", 2749 | " \n", 2750 | " \n", 2751 | " \n", 2752 | " \n", 2753 | " \n", 2754 | " \n", 2755 | " \n", 2756 | " \n", 2757 | " \n", 2758 | " \n", 2759 | " \n", 2760 | " \n", 2761 | " \n", 2762 | " \n", 2763 | " \n", 2764 | " \n", 2765 | " \n", 2766 | " \n", 2767 | " \n", 2768 | " \n", 2769 | " \n", 2770 | " \n", 2771 | " \n", 2772 | " \n", 2773 | " \n", 2774 | " \n", 2775 | " \n", 2776 | " \n", 2777 | " \n", 2778 | " \n", 2779 | " \n", 2780 | " \n", 2781 | " \n", 2782 | " \n", 2783 | " \n", 2784 | " \n", 2785 | " \n", 2786 | " \n", 2787 | " \n", 2788 | " \n", 2789 | " \n", 2790 | " \n", 2791 | " \n", 2792 | " \n", 2793 | " \n", 2794 | " \n", 2795 | " \n", 2796 | " \n", 2797 | " \n", 2798 | " \n", 2799 | " \n", 2800 | " \n", 2801 | " \n", 2802 | " \n", 2803 | " \n", 2804 | " \n", 2805 | " \n", 2806 | " \n", 2807 | " \n", 2808 | " \n", 2809 | " \n", 2810 | " \n", 2811 | " \n", 2812 | " \n", 2813 | " \n", 2814 | " \n", 2815 | " \n", 2816 | " \n", 2817 | " \n", 2818 | " \n", 2819 | " \n", 2820 | " \n", 2821 | " \n", 2822 | " \n", 2823 | " \n", 2824 | " \n", 2825 | " \n", 2826 | " \n", 2827 | " \n", 2828 | " \n", 2829 | " \n", 2830 | " \n", 2831 | " \n", 2832 | " \n", 2833 | " \n", 2834 | " \n", 2835 | " \n", 2836 | " \n", 2837 | " \n", 2838 | " \n", 2839 | " \n", 2840 | " \n", 2841 | " \n", 2842 | " \n", 2843 | " \n", 2844 | " \n", 2845 | " \n", 2846 | " \n", 2847 | "
idnamecardamountcategoryoutlier
date
2018-12-07 07:22:031Robert Johnson47610496457115558111894.0barTrue
2018-03-05 08:26:0816Crystal Clark55706006428658571617.0barTrue
2018-03-06 07:18:0925Nancy Contreras43196535135071334.0barTrue
2018-01-22 08:07:0316Crystal Clark55706006428658571131.0restaurantTrue
2018-09-26 08:48:401Robert Johnson47610496457115558111060.0restaurantTrue
2018-09-06 08:28:551Robert Johnson47610496457115558111017.0barTrue
2018-03-26 07:41:599Laurie Gibbs301819639133401009.0coffee shopTrue
2018-12-14 08:51:4112Megan Price501879657465748.0pubTrue
2018-04-01 07:17:2125Nancy Contreras4319653513507100.0coffee shopTrue
\n", 2848 | "
" 2849 | ], 2850 | "text/plain": [ 2851 | " id name card amount \\\n", 2852 | "date \n", 2853 | "2018-12-07 07:22:03 1 Robert Johnson 4761049645711555811 1894.0 \n", 2854 | "2018-03-05 08:26:08 16 Crystal Clark 5570600642865857 1617.0 \n", 2855 | "2018-03-06 07:18:09 25 Nancy Contreras 4319653513507 1334.0 \n", 2856 | "2018-01-22 08:07:03 16 Crystal Clark 5570600642865857 1131.0 \n", 2857 | "2018-09-26 08:48:40 1 Robert Johnson 4761049645711555811 1060.0 \n", 2858 | "2018-09-06 08:28:55 1 Robert Johnson 4761049645711555811 1017.0 \n", 2859 | "2018-03-26 07:41:59 9 Laurie Gibbs 30181963913340 1009.0 \n", 2860 | "2018-12-14 08:51:41 12 Megan Price 501879657465 748.0 \n", 2861 | "2018-04-01 07:17:21 25 Nancy Contreras 4319653513507 100.0 \n", 2862 | "\n", 2863 | " category outlier \n", 2864 | "date \n", 2865 | "2018-12-07 07:22:03 bar True \n", 2866 | "2018-03-05 08:26:08 bar True \n", 2867 | "2018-03-06 07:18:09 bar True \n", 2868 | "2018-01-22 08:07:03 restaurant True \n", 2869 | "2018-09-26 08:48:40 restaurant True \n", 2870 | "2018-09-06 08:28:55 bar True \n", 2871 | "2018-03-26 07:41:59 coffee shop True \n", 2872 | "2018-12-14 08:51:41 pub True \n", 2873 | "2018-04-01 07:17:21 coffee shop True " 2874 | ] 2875 | }, 2876 | "execution_count": 89, 2877 | "metadata": {}, 2878 | "output_type": "execute_result" 2879 | } 2880 | ], 2881 | "source": [ 2882 | "# find anomalous transactions for 3 random card holders\n", 2883 | "anomalous_transactions2 = outlier_2.between_time(start_time, end_time).sort_values('amount', ascending=False)\n", 2884 | "anomalous_transactions2\n", 2885 | "\n" 2886 | ] 2887 | }, 2888 | { 2889 | "cell_type": "code", 2890 | "execution_count": 95, 2891 | "metadata": {}, 2892 | "outputs": [ 2893 | { 2894 | "data": { 2895 | "application/vnd.plotly.v1+json": { 2896 | "config": { 2897 | "plotlyServerURL": "https://plot.ly" 2898 | }, 2899 | "data": [ 2900 | { 2901 | "hovertemplate": "category=bar
name=%{x}
amount=%{y}", 2902 | "legendgroup": "bar", 2903 | "marker": { 2904 | "color": "#636efa", 2905 | "symbol": "circle" 2906 | }, 2907 | "mode": "markers", 2908 | "name": "bar", 2909 | "showlegend": true, 2910 | "type": "scatter", 2911 | "x": [ 2912 | "Robert Johnson", 2913 | "Crystal Clark", 2914 | "Nancy Contreras", 2915 | "Robert Johnson" 2916 | ], 2917 | "xaxis": "x", 2918 | "y": [ 2919 | 1894, 2920 | 1617, 2921 | 1334, 2922 | 1017 2923 | ], 2924 | "yaxis": "y" 2925 | }, 2926 | { 2927 | "hovertemplate": "category=restaurant
name=%{x}
amount=%{y}", 2928 | "legendgroup": "restaurant", 2929 | "marker": { 2930 | "color": "#EF553B", 2931 | "symbol": "circle" 2932 | }, 2933 | "mode": "markers", 2934 | "name": "restaurant", 2935 | "showlegend": true, 2936 | "type": "scatter", 2937 | "x": [ 2938 | "Crystal Clark", 2939 | "Robert Johnson" 2940 | ], 2941 | "xaxis": "x", 2942 | "y": [ 2943 | 1131, 2944 | 1060 2945 | ], 2946 | "yaxis": "y" 2947 | }, 2948 | { 2949 | "hovertemplate": "category=coffee shop
name=%{x}
amount=%{y}", 2950 | "legendgroup": "coffee shop", 2951 | "marker": { 2952 | "color": "#00cc96", 2953 | "symbol": "circle" 2954 | }, 2955 | "mode": "markers", 2956 | "name": "coffee shop", 2957 | "showlegend": true, 2958 | "type": "scatter", 2959 | "x": [ 2960 | "Laurie Gibbs", 2961 | "Nancy Contreras" 2962 | ], 2963 | "xaxis": "x", 2964 | "y": [ 2965 | 1009, 2966 | 100 2967 | ], 2968 | "yaxis": "y" 2969 | }, 2970 | { 2971 | "hovertemplate": "category=pub
name=%{x}
amount=%{y}", 2972 | "legendgroup": "pub", 2973 | "marker": { 2974 | "color": "#ab63fa", 2975 | "symbol": "circle" 2976 | }, 2977 | "mode": "markers", 2978 | "name": "pub", 2979 | "showlegend": true, 2980 | "type": "scatter", 2981 | "x": [ 2982 | "Megan Price" 2983 | ], 2984 | "xaxis": "x", 2985 | "y": [ 2986 | 748 2987 | ], 2988 | "yaxis": "y" 2989 | } 2990 | ], 2991 | "layout": { 2992 | "autosize": true, 2993 | "legend": { 2994 | "title": { 2995 | "text": "category" 2996 | }, 2997 | "tracegroupgap": 0 2998 | }, 2999 | "template": { 3000 | "data": { 3001 | "bar": [ 3002 | { 3003 | "error_x": { 3004 | "color": "#2a3f5f" 3005 | }, 3006 | "error_y": { 3007 | "color": "#2a3f5f" 3008 | }, 3009 | "marker": { 3010 | "line": { 3011 | "color": "#E5ECF6", 3012 | "width": 0.5 3013 | } 3014 | }, 3015 | "type": "bar" 3016 | } 3017 | ], 3018 | "barpolar": [ 3019 | { 3020 | "marker": { 3021 | "line": { 3022 | "color": "#E5ECF6", 3023 | "width": 0.5 3024 | } 3025 | }, 3026 | "type": "barpolar" 3027 | } 3028 | ], 3029 | "carpet": [ 3030 | { 3031 | "aaxis": { 3032 | "endlinecolor": "#2a3f5f", 3033 | "gridcolor": "white", 3034 | "linecolor": "white", 3035 | "minorgridcolor": "white", 3036 | "startlinecolor": "#2a3f5f" 3037 | }, 3038 | "baxis": { 3039 | "endlinecolor": "#2a3f5f", 3040 | "gridcolor": "white", 3041 | "linecolor": "white", 3042 | "minorgridcolor": "white", 3043 | "startlinecolor": "#2a3f5f" 3044 | }, 3045 | "type": "carpet" 3046 | } 3047 | ], 3048 | "choropleth": [ 3049 | { 3050 | "colorbar": { 3051 | "outlinewidth": 0, 3052 | "ticks": "" 3053 | }, 3054 | "type": "choropleth" 3055 | } 3056 | ], 3057 | "contour": [ 3058 | { 3059 | "colorbar": { 3060 | "outlinewidth": 0, 3061 | "ticks": "" 3062 | }, 3063 | "colorscale": [ 3064 | [ 3065 | 0, 3066 | "#0d0887" 3067 | ], 3068 | [ 3069 | 0.1111111111111111, 3070 | "#46039f" 3071 | ], 3072 | [ 3073 | 0.2222222222222222, 3074 | "#7201a8" 3075 | ], 3076 | [ 3077 | 0.3333333333333333, 3078 | "#9c179e" 3079 | ], 3080 | [ 3081 | 0.4444444444444444, 3082 | "#bd3786" 3083 | ], 3084 | [ 3085 | 0.5555555555555556, 3086 | "#d8576b" 3087 | ], 3088 | [ 3089 | 0.6666666666666666, 3090 | "#ed7953" 3091 | ], 3092 | [ 3093 | 0.7777777777777778, 3094 | "#fb9f3a" 3095 | ], 3096 | [ 3097 | 0.8888888888888888, 3098 | "#fdca26" 3099 | ], 3100 | [ 3101 | 1, 3102 | "#f0f921" 3103 | ] 3104 | ], 3105 | "type": "contour" 3106 | } 3107 | ], 3108 | "contourcarpet": [ 3109 | { 3110 | "colorbar": { 3111 | "outlinewidth": 0, 3112 | "ticks": "" 3113 | }, 3114 | "type": "contourcarpet" 3115 | } 3116 | ], 3117 | "heatmap": [ 3118 | { 3119 | "colorbar": { 3120 | "outlinewidth": 0, 3121 | "ticks": "" 3122 | }, 3123 | "colorscale": [ 3124 | [ 3125 | 0, 3126 | "#0d0887" 3127 | ], 3128 | [ 3129 | 0.1111111111111111, 3130 | "#46039f" 3131 | ], 3132 | [ 3133 | 0.2222222222222222, 3134 | "#7201a8" 3135 | ], 3136 | [ 3137 | 0.3333333333333333, 3138 | "#9c179e" 3139 | ], 3140 | [ 3141 | 0.4444444444444444, 3142 | "#bd3786" 3143 | ], 3144 | [ 3145 | 0.5555555555555556, 3146 | "#d8576b" 3147 | ], 3148 | [ 3149 | 0.6666666666666666, 3150 | "#ed7953" 3151 | ], 3152 | [ 3153 | 0.7777777777777778, 3154 | "#fb9f3a" 3155 | ], 3156 | [ 3157 | 0.8888888888888888, 3158 | "#fdca26" 3159 | ], 3160 | [ 3161 | 1, 3162 | "#f0f921" 3163 | ] 3164 | ], 3165 | "type": "heatmap" 3166 | } 3167 | ], 3168 | "heatmapgl": [ 3169 | { 3170 | "colorbar": { 3171 | "outlinewidth": 0, 3172 | "ticks": "" 3173 | }, 3174 | "colorscale": [ 3175 | [ 3176 | 0, 3177 | "#0d0887" 3178 | ], 3179 | [ 3180 | 0.1111111111111111, 3181 | "#46039f" 3182 | ], 3183 | [ 3184 | 0.2222222222222222, 3185 | "#7201a8" 3186 | ], 3187 | [ 3188 | 0.3333333333333333, 3189 | "#9c179e" 3190 | ], 3191 | [ 3192 | 0.4444444444444444, 3193 | "#bd3786" 3194 | ], 3195 | [ 3196 | 0.5555555555555556, 3197 | "#d8576b" 3198 | ], 3199 | [ 3200 | 0.6666666666666666, 3201 | "#ed7953" 3202 | ], 3203 | [ 3204 | 0.7777777777777778, 3205 | "#fb9f3a" 3206 | ], 3207 | [ 3208 | 0.8888888888888888, 3209 | "#fdca26" 3210 | ], 3211 | [ 3212 | 1, 3213 | "#f0f921" 3214 | ] 3215 | ], 3216 | "type": "heatmapgl" 3217 | } 3218 | ], 3219 | "histogram": [ 3220 | { 3221 | "marker": { 3222 | "colorbar": { 3223 | "outlinewidth": 0, 3224 | "ticks": "" 3225 | } 3226 | }, 3227 | "type": "histogram" 3228 | } 3229 | ], 3230 | "histogram2d": [ 3231 | { 3232 | "colorbar": { 3233 | "outlinewidth": 0, 3234 | "ticks": "" 3235 | }, 3236 | "colorscale": [ 3237 | [ 3238 | 0, 3239 | "#0d0887" 3240 | ], 3241 | [ 3242 | 0.1111111111111111, 3243 | "#46039f" 3244 | ], 3245 | [ 3246 | 0.2222222222222222, 3247 | "#7201a8" 3248 | ], 3249 | [ 3250 | 0.3333333333333333, 3251 | "#9c179e" 3252 | ], 3253 | [ 3254 | 0.4444444444444444, 3255 | "#bd3786" 3256 | ], 3257 | [ 3258 | 0.5555555555555556, 3259 | "#d8576b" 3260 | ], 3261 | [ 3262 | 0.6666666666666666, 3263 | "#ed7953" 3264 | ], 3265 | [ 3266 | 0.7777777777777778, 3267 | "#fb9f3a" 3268 | ], 3269 | [ 3270 | 0.8888888888888888, 3271 | "#fdca26" 3272 | ], 3273 | [ 3274 | 1, 3275 | "#f0f921" 3276 | ] 3277 | ], 3278 | "type": "histogram2d" 3279 | } 3280 | ], 3281 | "histogram2dcontour": [ 3282 | { 3283 | "colorbar": { 3284 | "outlinewidth": 0, 3285 | "ticks": "" 3286 | }, 3287 | "colorscale": [ 3288 | [ 3289 | 0, 3290 | "#0d0887" 3291 | ], 3292 | [ 3293 | 0.1111111111111111, 3294 | "#46039f" 3295 | ], 3296 | [ 3297 | 0.2222222222222222, 3298 | "#7201a8" 3299 | ], 3300 | [ 3301 | 0.3333333333333333, 3302 | "#9c179e" 3303 | ], 3304 | [ 3305 | 0.4444444444444444, 3306 | "#bd3786" 3307 | ], 3308 | [ 3309 | 0.5555555555555556, 3310 | "#d8576b" 3311 | ], 3312 | [ 3313 | 0.6666666666666666, 3314 | "#ed7953" 3315 | ], 3316 | [ 3317 | 0.7777777777777778, 3318 | "#fb9f3a" 3319 | ], 3320 | [ 3321 | 0.8888888888888888, 3322 | "#fdca26" 3323 | ], 3324 | [ 3325 | 1, 3326 | "#f0f921" 3327 | ] 3328 | ], 3329 | "type": "histogram2dcontour" 3330 | } 3331 | ], 3332 | "mesh3d": [ 3333 | { 3334 | "colorbar": { 3335 | "outlinewidth": 0, 3336 | "ticks": "" 3337 | }, 3338 | "type": "mesh3d" 3339 | } 3340 | ], 3341 | "parcoords": [ 3342 | { 3343 | "line": { 3344 | "colorbar": { 3345 | "outlinewidth": 0, 3346 | "ticks": "" 3347 | } 3348 | }, 3349 | "type": "parcoords" 3350 | } 3351 | ], 3352 | "pie": [ 3353 | { 3354 | "automargin": true, 3355 | "type": "pie" 3356 | } 3357 | ], 3358 | "scatter": [ 3359 | { 3360 | "marker": { 3361 | "colorbar": { 3362 | "outlinewidth": 0, 3363 | "ticks": "" 3364 | } 3365 | }, 3366 | "type": "scatter" 3367 | } 3368 | ], 3369 | "scatter3d": [ 3370 | { 3371 | "line": { 3372 | "colorbar": { 3373 | "outlinewidth": 0, 3374 | "ticks": "" 3375 | } 3376 | }, 3377 | "marker": { 3378 | "colorbar": { 3379 | "outlinewidth": 0, 3380 | "ticks": "" 3381 | } 3382 | }, 3383 | "type": "scatter3d" 3384 | } 3385 | ], 3386 | "scattercarpet": [ 3387 | { 3388 | "marker": { 3389 | "colorbar": { 3390 | "outlinewidth": 0, 3391 | "ticks": "" 3392 | } 3393 | }, 3394 | "type": "scattercarpet" 3395 | } 3396 | ], 3397 | "scattergeo": [ 3398 | { 3399 | "marker": { 3400 | "colorbar": { 3401 | "outlinewidth": 0, 3402 | "ticks": "" 3403 | } 3404 | }, 3405 | "type": "scattergeo" 3406 | } 3407 | ], 3408 | "scattergl": [ 3409 | { 3410 | "marker": { 3411 | "colorbar": { 3412 | "outlinewidth": 0, 3413 | "ticks": "" 3414 | } 3415 | }, 3416 | "type": "scattergl" 3417 | } 3418 | ], 3419 | "scattermapbox": [ 3420 | { 3421 | "marker": { 3422 | "colorbar": { 3423 | "outlinewidth": 0, 3424 | "ticks": "" 3425 | } 3426 | }, 3427 | "type": "scattermapbox" 3428 | } 3429 | ], 3430 | "scatterpolar": [ 3431 | { 3432 | "marker": { 3433 | "colorbar": { 3434 | "outlinewidth": 0, 3435 | "ticks": "" 3436 | } 3437 | }, 3438 | "type": "scatterpolar" 3439 | } 3440 | ], 3441 | "scatterpolargl": [ 3442 | { 3443 | "marker": { 3444 | "colorbar": { 3445 | "outlinewidth": 0, 3446 | "ticks": "" 3447 | } 3448 | }, 3449 | "type": "scatterpolargl" 3450 | } 3451 | ], 3452 | "scatterternary": [ 3453 | { 3454 | "marker": { 3455 | "colorbar": { 3456 | "outlinewidth": 0, 3457 | "ticks": "" 3458 | } 3459 | }, 3460 | "type": "scatterternary" 3461 | } 3462 | ], 3463 | "surface": [ 3464 | { 3465 | "colorbar": { 3466 | "outlinewidth": 0, 3467 | "ticks": "" 3468 | }, 3469 | "colorscale": [ 3470 | [ 3471 | 0, 3472 | "#0d0887" 3473 | ], 3474 | [ 3475 | 0.1111111111111111, 3476 | "#46039f" 3477 | ], 3478 | [ 3479 | 0.2222222222222222, 3480 | "#7201a8" 3481 | ], 3482 | [ 3483 | 0.3333333333333333, 3484 | "#9c179e" 3485 | ], 3486 | [ 3487 | 0.4444444444444444, 3488 | "#bd3786" 3489 | ], 3490 | [ 3491 | 0.5555555555555556, 3492 | "#d8576b" 3493 | ], 3494 | [ 3495 | 0.6666666666666666, 3496 | "#ed7953" 3497 | ], 3498 | [ 3499 | 0.7777777777777778, 3500 | "#fb9f3a" 3501 | ], 3502 | [ 3503 | 0.8888888888888888, 3504 | "#fdca26" 3505 | ], 3506 | [ 3507 | 1, 3508 | "#f0f921" 3509 | ] 3510 | ], 3511 | "type": "surface" 3512 | } 3513 | ], 3514 | "table": [ 3515 | { 3516 | "cells": { 3517 | "fill": { 3518 | "color": "#EBF0F8" 3519 | }, 3520 | "line": { 3521 | "color": "white" 3522 | } 3523 | }, 3524 | "header": { 3525 | "fill": { 3526 | "color": "#C8D4E3" 3527 | }, 3528 | "line": { 3529 | "color": "white" 3530 | } 3531 | }, 3532 | "type": "table" 3533 | } 3534 | ] 3535 | }, 3536 | "layout": { 3537 | "annotationdefaults": { 3538 | "arrowcolor": "#2a3f5f", 3539 | "arrowhead": 0, 3540 | "arrowwidth": 1 3541 | }, 3542 | "coloraxis": { 3543 | "colorbar": { 3544 | "outlinewidth": 0, 3545 | "ticks": "" 3546 | } 3547 | }, 3548 | "colorscale": { 3549 | "diverging": [ 3550 | [ 3551 | 0, 3552 | "#8e0152" 3553 | ], 3554 | [ 3555 | 0.1, 3556 | "#c51b7d" 3557 | ], 3558 | [ 3559 | 0.2, 3560 | "#de77ae" 3561 | ], 3562 | [ 3563 | 0.3, 3564 | "#f1b6da" 3565 | ], 3566 | [ 3567 | 0.4, 3568 | "#fde0ef" 3569 | ], 3570 | [ 3571 | 0.5, 3572 | "#f7f7f7" 3573 | ], 3574 | [ 3575 | 0.6, 3576 | "#e6f5d0" 3577 | ], 3578 | [ 3579 | 0.7, 3580 | "#b8e186" 3581 | ], 3582 | [ 3583 | 0.8, 3584 | "#7fbc41" 3585 | ], 3586 | [ 3587 | 0.9, 3588 | "#4d9221" 3589 | ], 3590 | [ 3591 | 1, 3592 | "#276419" 3593 | ] 3594 | ], 3595 | "sequential": [ 3596 | [ 3597 | 0, 3598 | "#0d0887" 3599 | ], 3600 | [ 3601 | 0.1111111111111111, 3602 | "#46039f" 3603 | ], 3604 | [ 3605 | 0.2222222222222222, 3606 | "#7201a8" 3607 | ], 3608 | [ 3609 | 0.3333333333333333, 3610 | "#9c179e" 3611 | ], 3612 | [ 3613 | 0.4444444444444444, 3614 | "#bd3786" 3615 | ], 3616 | [ 3617 | 0.5555555555555556, 3618 | "#d8576b" 3619 | ], 3620 | [ 3621 | 0.6666666666666666, 3622 | "#ed7953" 3623 | ], 3624 | [ 3625 | 0.7777777777777778, 3626 | "#fb9f3a" 3627 | ], 3628 | [ 3629 | 0.8888888888888888, 3630 | "#fdca26" 3631 | ], 3632 | [ 3633 | 1, 3634 | "#f0f921" 3635 | ] 3636 | ], 3637 | "sequentialminus": [ 3638 | [ 3639 | 0, 3640 | "#0d0887" 3641 | ], 3642 | [ 3643 | 0.1111111111111111, 3644 | "#46039f" 3645 | ], 3646 | [ 3647 | 0.2222222222222222, 3648 | "#7201a8" 3649 | ], 3650 | [ 3651 | 0.3333333333333333, 3652 | "#9c179e" 3653 | ], 3654 | [ 3655 | 0.4444444444444444, 3656 | "#bd3786" 3657 | ], 3658 | [ 3659 | 0.5555555555555556, 3660 | "#d8576b" 3661 | ], 3662 | [ 3663 | 0.6666666666666666, 3664 | "#ed7953" 3665 | ], 3666 | [ 3667 | 0.7777777777777778, 3668 | "#fb9f3a" 3669 | ], 3670 | [ 3671 | 0.8888888888888888, 3672 | "#fdca26" 3673 | ], 3674 | [ 3675 | 1, 3676 | "#f0f921" 3677 | ] 3678 | ] 3679 | }, 3680 | "colorway": [ 3681 | "#636efa", 3682 | "#EF553B", 3683 | "#00cc96", 3684 | "#ab63fa", 3685 | "#FFA15A", 3686 | "#19d3f3", 3687 | "#FF6692", 3688 | "#B6E880", 3689 | "#FF97FF", 3690 | "#FECB52" 3691 | ], 3692 | "font": { 3693 | "color": "#2a3f5f" 3694 | }, 3695 | "geo": { 3696 | "bgcolor": "white", 3697 | "lakecolor": "white", 3698 | "landcolor": "#E5ECF6", 3699 | "showlakes": true, 3700 | "showland": true, 3701 | "subunitcolor": "white" 3702 | }, 3703 | "hoverlabel": { 3704 | "align": "left" 3705 | }, 3706 | "hovermode": "closest", 3707 | "mapbox": { 3708 | "style": "light" 3709 | }, 3710 | "paper_bgcolor": "white", 3711 | "plot_bgcolor": "#E5ECF6", 3712 | "polar": { 3713 | "angularaxis": { 3714 | "gridcolor": "white", 3715 | "linecolor": "white", 3716 | "ticks": "" 3717 | }, 3718 | "bgcolor": "#E5ECF6", 3719 | "radialaxis": { 3720 | "gridcolor": "white", 3721 | "linecolor": "white", 3722 | "ticks": "" 3723 | } 3724 | }, 3725 | "scene": { 3726 | "xaxis": { 3727 | "backgroundcolor": "#E5ECF6", 3728 | "gridcolor": "white", 3729 | "gridwidth": 2, 3730 | "linecolor": "white", 3731 | "showbackground": true, 3732 | "ticks": "", 3733 | "zerolinecolor": "white" 3734 | }, 3735 | "yaxis": { 3736 | "backgroundcolor": "#E5ECF6", 3737 | "gridcolor": "white", 3738 | "gridwidth": 2, 3739 | "linecolor": "white", 3740 | "showbackground": true, 3741 | "ticks": "", 3742 | "zerolinecolor": "white" 3743 | }, 3744 | "zaxis": { 3745 | "backgroundcolor": "#E5ECF6", 3746 | "gridcolor": "white", 3747 | "gridwidth": 2, 3748 | "linecolor": "white", 3749 | "showbackground": true, 3750 | "ticks": "", 3751 | "zerolinecolor": "white" 3752 | } 3753 | }, 3754 | "shapedefaults": { 3755 | "line": { 3756 | "color": "#2a3f5f" 3757 | } 3758 | }, 3759 | "ternary": { 3760 | "aaxis": { 3761 | "gridcolor": "white", 3762 | "linecolor": "white", 3763 | "ticks": "" 3764 | }, 3765 | "baxis": { 3766 | "gridcolor": "white", 3767 | "linecolor": "white", 3768 | "ticks": "" 3769 | }, 3770 | "bgcolor": "#E5ECF6", 3771 | "caxis": { 3772 | "gridcolor": "white", 3773 | "linecolor": "white", 3774 | "ticks": "" 3775 | } 3776 | }, 3777 | "title": { 3778 | "x": 0.05 3779 | }, 3780 | "xaxis": { 3781 | "automargin": true, 3782 | "gridcolor": "white", 3783 | "linecolor": "white", 3784 | "ticks": "", 3785 | "title": { 3786 | "standoff": 15 3787 | }, 3788 | "zerolinecolor": "white", 3789 | "zerolinewidth": 2 3790 | }, 3791 | "yaxis": { 3792 | "automargin": true, 3793 | "gridcolor": "white", 3794 | "linecolor": "white", 3795 | "ticks": "", 3796 | "title": { 3797 | "standoff": 15 3798 | }, 3799 | "zerolinecolor": "white", 3800 | "zerolinewidth": 2 3801 | } 3802 | } 3803 | }, 3804 | "title": { 3805 | "text": "Early Hour Transactions" 3806 | }, 3807 | "xaxis": { 3808 | "anchor": "y", 3809 | "autorange": true, 3810 | "domain": [ 3811 | 0, 3812 | 1 3813 | ], 3814 | "range": [ 3815 | -0.24975222993062435, 3816 | 4.249752229930625 3817 | ], 3818 | "title": { 3819 | "text": "name" 3820 | }, 3821 | "type": "category" 3822 | }, 3823 | "yaxis": { 3824 | "anchor": "x", 3825 | "autorange": true, 3826 | "domain": [ 3827 | 0, 3828 | 1 3829 | ], 3830 | "range": [ 3831 | -31.407643312101925, 3832 | 2025.407643312102 3833 | ], 3834 | "title": { 3835 | "text": "amount" 3836 | }, 3837 | "type": "linear" 3838 | } 3839 | } 3840 | }, 3841 | "text/html": [ 3842 | "
\n", 3843 | " \n", 3844 | " \n", 3845 | "
\n", 3846 | " \n", 3884 | "
" 3885 | ] 3886 | }, 3887 | "metadata": {}, 3888 | "output_type": "display_data" 3889 | } 3890 | ], 3891 | "source": [ 3892 | "px.scatter(anomalous_transactions2, x='name', y='amount', color='category', title=\"Early Hour Transactions\")" 3893 | ] 3894 | }, 3895 | { 3896 | "cell_type": "markdown", 3897 | "metadata": {}, 3898 | "source": [ 3899 | "For Outlier calculation using standard deviation, results in 77 records whereas using Interquartile range results in 110 records. There seems to be fraudulent transactions in Bar category wherein amount spent between 7-9 AM in the Bar" 3900 | ] 3901 | } 3902 | ], 3903 | "metadata": { 3904 | "file_extension": ".py", 3905 | "kernelspec": { 3906 | "display_name": "Python 3", 3907 | "language": "python", 3908 | "name": "python3" 3909 | }, 3910 | "language_info": { 3911 | "codemirror_mode": { 3912 | "name": "ipython", 3913 | "version": 3 3914 | }, 3915 | "file_extension": ".py", 3916 | "mimetype": "text/x-python", 3917 | "name": "python", 3918 | "nbconvert_exporter": "python", 3919 | "pygments_lexer": "ipython3", 3920 | "version": "3.7.7" 3921 | }, 3922 | "mimetype": "text/x-python", 3923 | "name": "python", 3924 | "npconvert_exporter": "python", 3925 | "pygments_lexer": "ipython3", 3926 | "version": 3 3927 | }, 3928 | "nbformat": 4, 3929 | "nbformat_minor": 4 3930 | } 3931 | --------------------------------------------------------------------------------