├── Images
├── Early_hour.PNG
├── id_holder_2.PNG
├── id_holder_18.PNG
├── id_holder_25.PNG
├── QuickDBD-export.png
├── anomalous_transaction.PNG
├── credit_card_fraudster.jpg
└── anomalous_transaction_2.PNG
├── Data
├── merchant_category.csv
├── card_holder.csv
├── credit_card.csv
└── merchant.csv
├── SQL
└── schema.sql
├── README.md
└── Starter_file
└── challenge.ipynb
/Images/Early_hour.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/Early_hour.PNG
--------------------------------------------------------------------------------
/Images/id_holder_2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/id_holder_2.PNG
--------------------------------------------------------------------------------
/Data/merchant_category.csv:
--------------------------------------------------------------------------------
1 | id,name
2 | 1,restaurant
3 | 2,coffee shop
4 | 3,bar
5 | 4,pub
6 | 5,food truck
7 |
--------------------------------------------------------------------------------
/Images/id_holder_18.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/id_holder_18.PNG
--------------------------------------------------------------------------------
/Images/id_holder_25.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/id_holder_25.PNG
--------------------------------------------------------------------------------
/Images/QuickDBD-export.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/QuickDBD-export.png
--------------------------------------------------------------------------------
/Images/anomalous_transaction.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/anomalous_transaction.PNG
--------------------------------------------------------------------------------
/Images/credit_card_fraudster.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/credit_card_fraudster.jpg
--------------------------------------------------------------------------------
/Images/anomalous_transaction_2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitree7/Fraud_Detection_SQL/HEAD/Images/anomalous_transaction_2.PNG
--------------------------------------------------------------------------------
/Data/card_holder.csv:
--------------------------------------------------------------------------------
1 | id,name
2 | 1,Robert Johnson
3 | 2,Shane Shaffer
4 | 3,Elizabeth Sawyer
5 | 4,Danielle Green
6 | 5,Sara Cooper
7 | 6,Beth Hernandez
8 | 7,Sean Taylor
9 | 8,Michael Floyd
10 | 9,Laurie Gibbs
11 | 10,Matthew Gutierrez
12 | 11,Brandon Pineda
13 | 12,Megan Price
14 | 13,John Martin
15 | 14,Gary Jacobs
16 | 15,Kyle Tucker
17 | 16,Crystal Clark
18 | 17,Michael Carroll
19 | 18,Malik Carlson
20 | 19,Peter Mckay
21 | 20,Kevin Spencer
22 | 21,Dana Washington
23 | 22,Austin Johnson
24 | 23,Mark Lewis
25 | 24,Stephanie Dalton
26 | 25,Nancy Contreras
27 |
--------------------------------------------------------------------------------
/Data/credit_card.csv:
--------------------------------------------------------------------------------
1 | card,id_card_holder
2 | 3.51711E+15,1
3 | 4.76105E+18,1
4 | 4.86676E+18,2
5 | 6.75911E+11,2
6 | 3.00783E+13,3
7 | 4.26369E+15,4
8 | 5.84227E+11,4
9 | 4.27647E+12,5
10 | 4.26849E+15,5
11 | 3.58135E+15,6
12 | 4.15984E+18,6
13 | 3.51695E+15,7
14 | 4.53999E+15,7
15 | 4.83448E+15,8
16 | 3.00633E+13,8
17 | 3.0182E+13,9
18 | 4.96292E+18,10
19 | 4.16531E+18,10
20 | 2.13194E+14,10
21 | 1.80099E+14,11
22 | 4.64401E+18,11
23 | 4.02791E+15,11
24 | 5.0188E+11,12
25 | 5.29719E+15,12
26 | 3.76028E+14,12
27 | 4.71177E+15,13
28 | 5.13584E+15,13
29 | 3.56195E+15,13
30 | 5.17595E+15,14
31 | 4.72378E+18,15
32 | 6.50024E+15,15
33 | 5.03843E+11,16
34 | 5.5706E+15,16
35 | 5.50071E+15,16
36 | 6.01199E+15,17
37 | 4.498E+12,18
38 | 3.4412E+14,18
39 | 4.7432E+18,19
40 | 5.36178E+15,19
41 | 3.56107E+15,19
42 | 3.53565E+15,20
43 | 4.50641E+15,20
44 | 4.58696E+18,20
45 | 4.2791E+18,21
46 | 5.01809E+11,22
47 | 4.74104E+12,23
48 | 4.18816E+15,23
49 | 4.15072E+15,23
50 | 4.6819E+12,24
51 | 3.0143E+13,24
52 | 3.5822E+15,24
53 | 4.31965E+12,25
54 | 3.72415E+14,25
--------------------------------------------------------------------------------
/SQL/schema.sql:
--------------------------------------------------------------------------------
1 | -- Exported from QuickDBD: https://www.quickdatabasediagrams.com/
2 | -- Link to schema: https://app.quickdatabasediagrams.com/#/d/s8Wnqm
3 | -- NOTE! If you have used non-SQL datatypes in your design, you will have to change these here.
4 |
5 | --Card Holder
6 |
7 | CREATE TABLE "card_holder" (
8 | "id" SERIAL NOT NULL,
9 | "name" VARCHAR(50) NOT NULL,
10 | CONSTRAINT "pk_card_holder" PRIMARY KEY (
11 | "id"
12 | )
13 | );
14 |
15 | -- Credit Card
16 |
17 | CREATE TABLE "credit_card" (
18 | "card" VARCHAR(20) NOT NULL,
19 | "id_card_holder" INT NOT NULL,
20 | CONSTRAINT "pk_credit_card" PRIMARY KEY (
21 | "card"
22 | )
23 | );
24 |
25 | -- Merchant
26 |
27 | CREATE TABLE "merchant" (
28 | "id" SERIAL NOT NULL,
29 | "name" VARCHAR(255) NOT NULL,
30 | "id_merchant_category" INT NOT NULL,
31 | CONSTRAINT "pk_merchant" PRIMARY KEY (
32 | "id"
33 | )
34 | );
35 |
36 | -- Merchant Category
37 |
38 | CREATE TABLE "merchant_category" (
39 | "id" SERIAL NOT NULL,
40 | "name" VARCHAR(50) NOT NULL,
41 | CONSTRAINT "pk_merchant_category" PRIMARY KEY (
42 | "id"
43 | )
44 | );
45 |
46 | -- Transaction
47 |
48 | CREATE TABLE "transaction" (
49 | "id" INT NOT NULL,
50 | "date" TIMESTAMP NOT NULL,
51 | "amount" FLOAT NOT NULL,
52 | "card" VARCHAR(20) NOT NULL,
53 | "id_merchant" INT NOT NULL,
54 | CONSTRAINT "pk_transaction" PRIMARY KEY (
55 | "id"
56 | )
57 | );
58 |
59 |
60 | ALTER TABLE "credit_card" ADD CONSTRAINT "fk_credit_card_id_card_holder" FOREIGN KEY("id_card_holder")
61 | REFERENCES "card_holder" ("id");
62 |
63 | ALTER TABLE "credit_card" ADD CONSTRAINT "check_credit_card_length" CHECK (char_length("card") <= 20);
64 |
65 | --ALTER TABLE "credit_card" DROP CONSTRAINT check_credit_card_length
66 |
67 | ALTER TABLE "merchant" ADD CONSTRAINT "fk_merchant_id_merchant_category" FOREIGN KEY("id_merchant_category")
68 | REFERENCES "merchant_category" ("id");
69 |
70 | ALTER TABLE "transaction" ADD CONSTRAINT "fk_transaction_card" FOREIGN KEY("card")
71 | REFERENCES "credit_card" ("card");
72 |
73 | ALTER TABLE "transaction" ADD CONSTRAINT "fk_transaction_id_merchant" FOREIGN KEY("id_merchant")
74 | REFERENCES "merchant" ("id");
--------------------------------------------------------------------------------
/Data/merchant.csv:
--------------------------------------------------------------------------------
1 | id,name,id_merchant_category
2 | 1,"Murphy, Heath and Fields",1
3 | 2,Riggs-Adams,1
4 | 3,"Sanders, Parks and Mcfarland",2
5 | 4,Mccarty-Thomas,3
6 | 5,Miller-Blevins,4
7 | 6,Wilson and Sons,1
8 | 7,Gomez-Kelly,4
9 | 8,Russell-Thomas,1
10 | 9,"Curry, Scott and Richardson",3
11 | 10,Herrera Group,1
12 | 11,Stanton Group,4
13 | 12,"Bell, Gonzalez and Lowe",4
14 | 13,Giles and Sons,4
15 | 14,Osborne-Page,2
16 | 15,"Long, Harrell and Johnson",5
17 | 16,"Bryant, Thomas and Collins",4
18 | 17,Bauer-Cole,3
19 | 18,Romero-Jordan,5
20 | 19,Santos-Fitzgerald,4
21 | 20,Kim-Lopez,2
22 | 21,Robertson-Smith,4
23 | 22,"Dalton, Cameron and Jones",3
24 | 23,"Wilson, Roberts and Davenport",5
25 | 24,"Rodgers, Johnston and Macias",5
26 | 25,"Vaughn, Wilson and Hall",1
27 | 26,Smith-Stephens,2
28 | 27,Horn Ltd,2
29 | 28,Hess-Fischer,5
30 | 29,Browning-Cantu,4
31 | 30,Atkinson Ltd,3
32 | 31,"Fisher, Salazar and Thomas",5
33 | 32,"Norton, Burton and Smith",5
34 | 33,Vasquez-Parker,3
35 | 34,Combs-Jones,5
36 | 35,Jarvis-Turner,4
37 | 36,Hamilton-Mcfarland,1
38 | 37,"Nguyen, Bautista and Williams",4
39 | 38,Brown LLC,3
40 | 39,Young-Navarro,5
41 | 40,"Cox, Montgomery and Morgan",3
42 | 41,"Ford, Williams and Dunn",4
43 | 42,Kennedy-Chen,3
44 | 43,Wallace and Sons,2
45 | 44,Little-Floyd,4
46 | 45,Velazquez Ltd,4
47 | 46,"Miller, Chavez and Cobb",5
48 | 47,Martin Inc,1
49 | 48,Baker Inc,5
50 | 49,"Davis, Lowe and Baxter",5
51 | 50,Johnson-Watts,1
52 | 51,Fisher-Bolton,1
53 | 52,Jensen-Stanley,5
54 | 53,Wallace PLC,4
55 | 54,Berry-Lopez,1
56 | 55,"Johnson, Rivas and Anderson",1
57 | 56,Smith PLC,3
58 | 57,Thornton-Williams,4
59 | 58,"Young, Hull and Williams",4
60 | 59,Williams Group,3
61 | 60,Smith-Richards,4
62 | 61,"Richardson, Smith and Jordan",5
63 | 62,"Cooper, Carpenter and Jackson",5
64 | 63,Reed Group,5
65 | 64,"Cline, Myers and Strong",1
66 | 65,"Allen, Ramos and Carroll",4
67 | 66,Robles Inc,3
68 | 67,"Maxwell, Tapia and Villanueva",2
69 | 68,Ramirez-Carr,2
70 | 69,Dominguez PLC,5
71 | 70,White-Hall,3
72 | 71,Greene LLC,1
73 | 72,Lopez-Kelly,1
74 | 73,Colon Ltd,3
75 | 74,Skinner-Williams,4
76 | 75,Martinez Group,1
77 | 76,Lowe PLC,1
78 | 77,"Brown, Ballard and Glass",1
79 | 78,Ruiz-Anderson,4
80 | 79,Lee LLC,1
81 | 80,"Kelly, Dyer and Schmitt",5
82 | 81,Fowler and Sons,5
83 | 82,Day-Murray,5
84 | 83,Solis Group,5
85 | 84,Marshall-Rojas,2
86 | 85,Patton-Rivera,3
87 | 86,"Walker, Campbell and Sullivan",5
88 | 87,Griffin-Woodard,3
89 | 88,Armstrong PLC,5
90 | 89,Kelley-Roberts,5
91 | 90,Brown-Cunningham,4
92 | 91,Turner Ltd,4
93 | 92,Garcia-White,4
94 | 93,Rodriguez-Parker,5
95 | 94,Yoder-Zavala,5
96 | 95,Baxter-Smith,1
97 | 96,Johnson-Fuller,4
98 | 97,Ruiz-Chavez,3
99 | 98,Rivera PLC,4
100 | 99,"Bond, Lewis and Rangel",1
101 | 100,Townsend-Anderson,1
102 | 101,Whitehead-Sexton,4
103 | 102,Walters-Ward,1
104 | 103,"Jones, Clark and Hoover",2
105 | 104,"Mcdaniel, Hines and Mcfarland",2
106 | 105,Garcia and Sons,4
107 | 106,Carter-Blackwell,4
108 | 107,Rowe-Abbott,4
109 | 108,Best Inc,1
110 | 109,Collins LLC,2
111 | 110,"Rodriguez, Dunlap and Nunez",2
112 | 111,Padilla-Clements,2
113 | 112,Greer Inc,3
114 | 113,Edwards-Aguirre,2
115 | 114,Greene-Wood,3
116 | 115,Williams Inc,4
117 | 116,Ferguson Ltd,2
118 | 117,Mitchell Group,2
119 | 118,Maldonado Group,2
120 | 119,Henderson and Sons,1
121 | 120,"Vega, Jones and Castro",5
122 | 121,"Fleming, Smith and Collins",3
123 | 122,Perry and Sons,3
124 | 123,"Boone, Davis and Townsend",4
125 | 124,Mccarty PLC,1
126 | 125,Russell and Sons,4
127 | 126,Bartlett and Sons,4
128 | 127,"Williams, Wright and Wagner",2
129 | 128,"Pitts, Salinas and Garcia",2
130 | 129,Sweeney-Paul,2
131 | 130,"Brown, Estrada and Powers",2
132 | 131,"Harrison, Newton and Hansen",1
133 | 132,Pugh-Williams,3
134 | 133,"Scott, Hess and Finley",3
135 | 134,"Jenkins, Peterson and Beck",1
136 | 135,"Jacobs, Torres and Walker",3
137 | 136,Martinez-Robinson,3
138 | 137,Garcia PLC,5
139 | 138,Mccullough-Murphy,5
140 | 139,Kidd-Lopez,5
141 | 140,Wheeler-Moreno,5
142 | 141,Wood-Ramirez,3
143 | 142,Thomas-Garcia,5
144 | 143,"Guzman, Garcia and Church",3
145 | 144,"Walker, Deleon and Wolf",1
146 | 145,Hood-Phillips,3
147 | 146,"Pitts, Smith and Gonzalez",4
148 | 147,Marshall-Lopez,5
149 | 148,"Huerta, Keith and Walters",5
150 | 149,Clark and Sons,5
151 | 150,Johnson and Sons,2
152 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Fraud_Detection_SQL
2 |
3 | 
4 |
5 | *[Credit Card Fraudster by Richard Patterson](https://www.flickr.com/photos/136770128@N07/42252105582/) | [Creative Commons Licensed](https://creativecommons.org/licenses/by/2.0/)*
6 |
7 | ## Background
8 |
9 | Fraud is everywhere these days—whether you are a small taco shop or a large international business. While there are emerging technologies that employ machine learning and artificial intelligence to detect fraud, many instances of fraud detection still require strong data analytics to find abnormal charges.
10 |
11 | Application of new SQL skills to analyze historical credit card transactions and consumption patterns in order to identify possible fraudulent transactions.
12 |
13 | Accomplish three main tasks:
14 |
15 | 1. [Data Modeling](#Data-Modeling):
16 | Define a database model to store the credit card transactions data and create a new PostgreSQL database using your model.
17 |
18 | 2. [Data Engineering](#Data-Engineering): Create a database schema on PostgreSQL and populate your database from the CSV files provided.
19 |
20 | 3. [Data Analysis](#Data-Analysis): Analyze the data to identify possible fraudulent transactions.
21 |
22 | ---
23 |
24 | ## Files
25 |
26 | ### Query Files
27 |
28 | * [schema.sql](SQL/schema.sql)
29 | * [seed.sql](SQL/seed.sql)
30 |
31 | ### CSV Files
32 |
33 | * [card_holder.csv](Data/card_holder.csv)
34 | * [credit_card.csv](Data/credit_card.csv)
35 | * [merchant_category.csv](Data/merchant_category.csv)
36 | * [merchant.csv](Data/merchant.csv)
37 | * [transaction.csv](Data/transaction.csv)
38 |
39 |
40 | ### Data Modeling
41 |
42 | Create an entity relationship diagram (ERD) by inspecting the provided CSV files.
43 |
44 | **Note:** For the `credit_card` table, the `card` column should be a VARCHAR(20) datatype rather than an INT.
45 |
46 | Tool used to develop ERD [Quick Database Diagrams](https://app.quickdatabasediagrams.com/#/) to create your model.
47 |
48 | 
49 |
50 | ### Data Engineering
51 |
52 | Using your database model as a blueprint, create a database schema for each of your tables and relationships. Specify data types, primary keys, foreign keys, and any other constraints you defined.
53 |
54 | After creating the database schema, import the data from the corresponding CSV files.
55 |
56 |
57 | ### Data Analysis
58 |
59 | Now that your data is prepared within the database, it's finally time to identify fraudulent transactions using SQL and Pandas DataFrames.
60 |
61 | Top 100 highest transactions during early hours i.e. 7:00 to 9:00 AM
62 |
63 | 
64 |
65 | * Some fraudsters hack a credit card by making several small payments (generally less than $2.00), which are typically ignored by cardholders. Count the transactions that are less than $2.00 per cardholder. Is there any evidence to suggest that a credit card has been hacked? Explain your rationale.
66 |
67 | * What are the top five merchants prone to being hacked using small transactions?
68 |
69 | * Once you have a query that can be reused, create a view for each of the previous queries.
70 |
71 | Created a report for fraudulent transactions of some top customers of the firm using Pandas, Plotly Express, hvPlot, and SQLAlchemy to create the visualizations.
72 |
73 | * Fraudulent transactions in the history of two of the most important customers of the firm on the basis of their cardholders' IDs are 18 and 2.
74 |
75 | 
76 | 
77 |
78 | * Observation : `The consumption pattern for both the id holder is very different. Id Holder 2 makes too many small transactions. Id Holder 18 has transactions ranging till $1839. Id Holder 2 is more suspectable to fraudulent transactions`
79 |
80 | * The CEO of the firm's biggest customer suspects that someone has used her corporate credit card without authorization in the first quarter of 2018 to pay for several expensive restaurant bills. You are asked to find any anomalous transactions during that period.
81 |
82 | * Using Plotly Express, created a series of six box plots, one for each month, in order to identify how many outliers there are per month for cardholder ID 25.
83 |
84 | 
85 |
86 | * Observations : `There seems to be fraudulent transactions pertaining to Restaurant & Food Truck category where Food Truck is ranging from $1.46 to $1046`
87 |
88 |
89 | ## Challenge
90 |
91 | Another approach to identify fraudulent transactions is to look for outliers in the data. Standard deviation or quartiles are often used to detect outliers.
92 |
93 | #### Identifying Outliers based on Standard Deviation
94 |
95 | 
96 |
97 | #### Identifying Outliers based on Interquartile Range
98 |
99 | 
100 |
101 |
--------------------------------------------------------------------------------
/Starter_file/challenge.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | " # Challenge\n",
8 | "\n",
9 | " ## Identifying Outliers using Standard Deviation"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 91,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "# initial imports\n",
19 | "import pandas as pd\n",
20 | "import numpy as np\n",
21 | "import random\n",
22 | "from sqlalchemy import create_engine\n",
23 | "from numpy import mean\n",
24 | "from numpy import std\n",
25 | "from numpy import percentile\n",
26 | "import plotly.express as px\n"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 92,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "# create a connection to the database\n",
36 | "engine = create_engine(\"postgresql://postgres:Istay@10314@localhost:5432/fraud_detection\")\n",
37 | "\n",
38 | "# loading data from the database\n",
39 | "\n",
40 | "def execute_query(query):\n",
41 | "\n",
42 | " transaction_df = pd.read_sql(sql=query, con=engine, index_col='date', parse_dates='date')\n",
43 | "\n",
44 | " return transaction_df\n",
45 | "\n"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 93,
51 | "metadata": {},
52 | "outputs": [
53 | {
54 | "data": {
55 | "text/html": [
56 | "
\n",
57 | "\n",
70 | "
\n",
71 | " \n",
72 | " \n",
73 | " | \n",
74 | " id | \n",
75 | " name | \n",
76 | " card | \n",
77 | " amount | \n",
78 | " category | \n",
79 | "
\n",
80 | " \n",
81 | " | date | \n",
82 | " | \n",
83 | " | \n",
84 | " | \n",
85 | " | \n",
86 | " | \n",
87 | "
\n",
88 | " \n",
89 | " \n",
90 | " \n",
91 | " | 2018-01-01 21:35:10 | \n",
92 | " 13 | \n",
93 | " John Martin | \n",
94 | " 3561954487988605 | \n",
95 | " 6.22 | \n",
96 | " food truck | \n",
97 | "
\n",
98 | " \n",
99 | " | 2018-01-01 21:43:12 | \n",
100 | " 13 | \n",
101 | " John Martin | \n",
102 | " 5135837688671496 | \n",
103 | " 3.83 | \n",
104 | " bar | \n",
105 | "
\n",
106 | " \n",
107 | " | 2018-01-01 22:41:21 | \n",
108 | " 10 | \n",
109 | " Matthew Gutierrez | \n",
110 | " 213193946980303 | \n",
111 | " 9.61 | \n",
112 | " food truck | \n",
113 | "
\n",
114 | " \n",
115 | " | 2018-01-01 23:13:30 | \n",
116 | " 4 | \n",
117 | " Danielle Green | \n",
118 | " 4263694062533017 | \n",
119 | " 19.03 | \n",
120 | " pub | \n",
121 | "
\n",
122 | " \n",
123 | " | 2018-01-01 23:15:10 | \n",
124 | " 18 | \n",
125 | " Malik Carlson | \n",
126 | " 4498002758300 | \n",
127 | " 2.95 | \n",
128 | " restaurant | \n",
129 | "
\n",
130 | " \n",
131 | "
\n",
132 | "
"
133 | ],
134 | "text/plain": [
135 | " id name card amount \\\n",
136 | "date \n",
137 | "2018-01-01 21:35:10 13 John Martin 3561954487988605 6.22 \n",
138 | "2018-01-01 21:43:12 13 John Martin 5135837688671496 3.83 \n",
139 | "2018-01-01 22:41:21 10 Matthew Gutierrez 213193946980303 9.61 \n",
140 | "2018-01-01 23:13:30 4 Danielle Green 4263694062533017 19.03 \n",
141 | "2018-01-01 23:15:10 18 Malik Carlson 4498002758300 2.95 \n",
142 | "\n",
143 | " category \n",
144 | "date \n",
145 | "2018-01-01 21:35:10 food truck \n",
146 | "2018-01-01 21:43:12 bar \n",
147 | "2018-01-01 22:41:21 food truck \n",
148 | "2018-01-01 23:13:30 pub \n",
149 | "2018-01-01 23:15:10 restaurant "
150 | ]
151 | },
152 | "execution_count": 93,
153 | "metadata": {},
154 | "output_type": "execute_result"
155 | }
156 | ],
157 | "source": [
158 | "# loading data of daily transactions from jan to jun 2018 for card holder 25\n",
159 | "query = f'SELECT a.id, a.name, b.card, c.date, c.amount, e.name as \"category\" \\\n",
160 | " FROM public.card_holder a, public.credit_card b, public.transaction c, public.merchant d, public.merchant_category e \\\n",
161 | " WHERE a.id = b.id_card_holder AND b.card=c.card AND c.id_merchant=d.id AND d.id_merchant_category=e.id'\n",
162 | "\n",
163 | "transaction_df = execute_query(query)\n",
164 | "transaction_df.head()"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": 94,
170 | "metadata": {},
171 | "outputs": [
172 | {
173 | "name": "stdout",
174 | "output_type": "stream",
175 | "text": [
176 | "Identified outliers: 77\n",
177 | "Non-outlier observations: 3423\n"
178 | ]
179 | },
180 | {
181 | "data": {
182 | "text/html": [
183 | "\n",
184 | "\n",
197 | "
\n",
198 | " \n",
199 | " \n",
200 | " | \n",
201 | " id | \n",
202 | " name | \n",
203 | " card | \n",
204 | " amount | \n",
205 | " category | \n",
206 | " outlier | \n",
207 | "
\n",
208 | " \n",
209 | " | date | \n",
210 | " | \n",
211 | " | \n",
212 | " | \n",
213 | " | \n",
214 | " | \n",
215 | " | \n",
216 | "
\n",
217 | " \n",
218 | " \n",
219 | " \n",
220 | " | 2018-01-02 23:27:46 | \n",
221 | " 12 | \n",
222 | " Megan Price | \n",
223 | " 501879657465 | \n",
224 | " 1031.0 | \n",
225 | " restaurant | \n",
226 | " True | \n",
227 | "
\n",
228 | " \n",
229 | " | 2018-01-04 03:05:18 | \n",
230 | " 7 | \n",
231 | " Sean Taylor | \n",
232 | " 3516952396080247 | \n",
233 | " 1685.0 | \n",
234 | " food truck | \n",
235 | " True | \n",
236 | "
\n",
237 | " \n",
238 | " | 2018-01-08 02:34:32 | \n",
239 | " 6 | \n",
240 | " Beth Hernandez | \n",
241 | " 3581345943543942 | \n",
242 | " 1029.0 | \n",
243 | " bar | \n",
244 | " True | \n",
245 | "
\n",
246 | " \n",
247 | " | 2018-01-22 08:07:03 | \n",
248 | " 16 | \n",
249 | " Crystal Clark | \n",
250 | " 5570600642865857 | \n",
251 | " 1131.0 | \n",
252 | " restaurant | \n",
253 | " True | \n",
254 | "
\n",
255 | " \n",
256 | " | 2018-01-23 06:29:37 | \n",
257 | " 12 | \n",
258 | " Megan Price | \n",
259 | " 501879657465 | \n",
260 | " 1678.0 | \n",
261 | " pub | \n",
262 | " True | \n",
263 | "
\n",
264 | " \n",
265 | " | 2018-01-24 13:17:19 | \n",
266 | " 1 | \n",
267 | " Robert Johnson | \n",
268 | " 4761049645711555811 | \n",
269 | " 1691.0 | \n",
270 | " coffee shop | \n",
271 | " True | \n",
272 | "
\n",
273 | " \n",
274 | " | 2018-01-30 18:31:00 | \n",
275 | " 25 | \n",
276 | " Nancy Contreras | \n",
277 | " 4319653513507 | \n",
278 | " 1177.0 | \n",
279 | " restaurant | \n",
280 | " True | \n",
281 | "
\n",
282 | " \n",
283 | " | 2018-02-17 01:27:19 | \n",
284 | " 16 | \n",
285 | " Crystal Clark | \n",
286 | " 5570600642865857 | \n",
287 | " 1430.0 | \n",
288 | " restaurant | \n",
289 | " True | \n",
290 | "
\n",
291 | " \n",
292 | " | 2018-02-19 16:00:43 | \n",
293 | " 7 | \n",
294 | " Sean Taylor | \n",
295 | " 3516952396080247 | \n",
296 | " 1072.0 | \n",
297 | " food truck | \n",
298 | " True | \n",
299 | "
\n",
300 | " \n",
301 | " | 2018-02-19 22:48:25 | \n",
302 | " 18 | \n",
303 | " Malik Carlson | \n",
304 | " 344119623920892 | \n",
305 | " 1839.0 | \n",
306 | " restaurant | \n",
307 | " True | \n",
308 | "
\n",
309 | " \n",
310 | " | 2018-02-27 15:27:32 | \n",
311 | " 6 | \n",
312 | " Beth Hernandez | \n",
313 | " 3581345943543942 | \n",
314 | " 1145.0 | \n",
315 | " bar | \n",
316 | " True | \n",
317 | "
\n",
318 | " \n",
319 | " | 2018-03-01 21:29:05 | \n",
320 | " 3 | \n",
321 | " Elizabeth Sawyer | \n",
322 | " 30078299053512 | \n",
323 | " 1119.0 | \n",
324 | " pub | \n",
325 | " True | \n",
326 | "
\n",
327 | " \n",
328 | " | 2018-03-04 15:50:53 | \n",
329 | " 9 | \n",
330 | " Laurie Gibbs | \n",
331 | " 30181963913340 | \n",
332 | " 1534.0 | \n",
333 | " coffee shop | \n",
334 | " True | \n",
335 | "
\n",
336 | " \n",
337 | " | 2018-03-05 08:26:08 | \n",
338 | " 16 | \n",
339 | " Crystal Clark | \n",
340 | " 5570600642865857 | \n",
341 | " 1617.0 | \n",
342 | " bar | \n",
343 | " True | \n",
344 | "
\n",
345 | " \n",
346 | " | 2018-03-06 07:18:09 | \n",
347 | " 25 | \n",
348 | " Nancy Contreras | \n",
349 | " 4319653513507 | \n",
350 | " 1334.0 | \n",
351 | " bar | \n",
352 | " True | \n",
353 | "
\n",
354 | " \n",
355 | " | 2018-03-12 00:44:01 | \n",
356 | " 12 | \n",
357 | " Megan Price | \n",
358 | " 501879657465 | \n",
359 | " 1530.0 | \n",
360 | " coffee shop | \n",
361 | " True | \n",
362 | "
\n",
363 | " \n",
364 | " | 2018-03-20 10:19:25 | \n",
365 | " 12 | \n",
366 | " Megan Price | \n",
367 | " 501879657465 | \n",
368 | " 852.0 | \n",
369 | " pub | \n",
370 | " True | \n",
371 | "
\n",
372 | " \n",
373 | " | 2018-03-20 13:05:54 | \n",
374 | " 24 | \n",
375 | " Stephanie Dalton | \n",
376 | " 30142966699187 | \n",
377 | " 1011.0 | \n",
378 | " bar | \n",
379 | " True | \n",
380 | "
\n",
381 | " \n",
382 | " | 2018-03-26 07:41:59 | \n",
383 | " 9 | \n",
384 | " Laurie Gibbs | \n",
385 | " 30181963913340 | \n",
386 | " 1009.0 | \n",
387 | " coffee shop | \n",
388 | " True | \n",
389 | "
\n",
390 | " \n",
391 | " | 2018-04-03 03:23:37 | \n",
392 | " 18 | \n",
393 | " Malik Carlson | \n",
394 | " 344119623920892 | \n",
395 | " 1077.0 | \n",
396 | " restaurant | \n",
397 | " True | \n",
398 | "
\n",
399 | " \n",
400 | " | 2018-04-08 06:03:50 | \n",
401 | " 25 | \n",
402 | " Nancy Contreras | \n",
403 | " 4319653513507 | \n",
404 | " 1063.0 | \n",
405 | " pub | \n",
406 | " True | \n",
407 | "
\n",
408 | " \n",
409 | " | 2018-04-18 23:23:29 | \n",
410 | " 7 | \n",
411 | " Sean Taylor | \n",
412 | " 3516952396080247 | \n",
413 | " 1086.0 | \n",
414 | " coffee shop | \n",
415 | " True | \n",
416 | "
\n",
417 | " \n",
418 | " | 2018-04-21 19:41:51 | \n",
419 | " 6 | \n",
420 | " Beth Hernandez | \n",
421 | " 3581345943543942 | \n",
422 | " 2108.0 | \n",
423 | " coffee shop | \n",
424 | " True | \n",
425 | "
\n",
426 | " \n",
427 | " | 2018-05-08 13:21:01 | \n",
428 | " 24 | \n",
429 | " Stephanie Dalton | \n",
430 | " 30142966699187 | \n",
431 | " 1901.0 | \n",
432 | " restaurant | \n",
433 | " True | \n",
434 | "
\n",
435 | " \n",
436 | " | 2018-05-13 06:31:20 | \n",
437 | " 25 | \n",
438 | " Nancy Contreras | \n",
439 | " 4319653513507 | \n",
440 | " 1046.0 | \n",
441 | " food truck | \n",
442 | " True | \n",
443 | "
\n",
444 | " \n",
445 | " | 2018-05-29 02:55:08 | \n",
446 | " 16 | \n",
447 | " Crystal Clark | \n",
448 | " 5570600642865857 | \n",
449 | " 1203.0 | \n",
450 | " food truck | \n",
451 | " True | \n",
452 | "
\n",
453 | " \n",
454 | " | 2018-06-03 20:02:28 | \n",
455 | " 18 | \n",
456 | " Malik Carlson | \n",
457 | " 344119623920892 | \n",
458 | " 1814.0 | \n",
459 | " pub | \n",
460 | " True | \n",
461 | "
\n",
462 | " \n",
463 | " | 2018-06-04 03:46:15 | \n",
464 | " 25 | \n",
465 | " Nancy Contreras | \n",
466 | " 4319653513507 | \n",
467 | " 1162.0 | \n",
468 | " pub | \n",
469 | " True | \n",
470 | "
\n",
471 | " \n",
472 | " | 2018-06-06 21:50:17 | \n",
473 | " 25 | \n",
474 | " Nancy Contreras | \n",
475 | " 4319653513507 | \n",
476 | " 749.0 | \n",
477 | " restaurant | \n",
478 | " True | \n",
479 | "
\n",
480 | " \n",
481 | " | 2018-06-10 04:54:27 | \n",
482 | " 9 | \n",
483 | " Laurie Gibbs | \n",
484 | " 30181963913340 | \n",
485 | " 1795.0 | \n",
486 | " pub | \n",
487 | " True | \n",
488 | "
\n",
489 | " \n",
490 | " | ... | \n",
491 | " ... | \n",
492 | " ... | \n",
493 | " ... | \n",
494 | " ... | \n",
495 | " ... | \n",
496 | " ... | \n",
497 | "
\n",
498 | " \n",
499 | " | 2018-09-04 01:35:39 | \n",
500 | " 1 | \n",
501 | " Robert Johnson | \n",
502 | " 4761049645711555811 | \n",
503 | " 1790.0 | \n",
504 | " coffee shop | \n",
505 | " True | \n",
506 | "
\n",
507 | " \n",
508 | " | 2018-09-06 08:28:55 | \n",
509 | " 1 | \n",
510 | " Robert Johnson | \n",
511 | " 4761049645711555811 | \n",
512 | " 1017.0 | \n",
513 | " bar | \n",
514 | " True | \n",
515 | "
\n",
516 | " \n",
517 | " | 2018-09-06 21:55:02 | \n",
518 | " 1 | \n",
519 | " Robert Johnson | \n",
520 | " 4761049645711555811 | \n",
521 | " 1056.0 | \n",
522 | " restaurant | \n",
523 | " True | \n",
524 | "
\n",
525 | " \n",
526 | " | 2018-09-10 22:49:41 | \n",
527 | " 18 | \n",
528 | " Malik Carlson | \n",
529 | " 344119623920892 | \n",
530 | " 1176.0 | \n",
531 | " restaurant | \n",
532 | " True | \n",
533 | "
\n",
534 | " \n",
535 | " | 2018-09-11 15:16:47 | \n",
536 | " 6 | \n",
537 | " Beth Hernandez | \n",
538 | " 3581345943543942 | \n",
539 | " 1856.0 | \n",
540 | " food truck | \n",
541 | " True | \n",
542 | "
\n",
543 | " \n",
544 | " | 2018-09-23 19:20:23 | \n",
545 | " 12 | \n",
546 | " Megan Price | \n",
547 | " 501879657465 | \n",
548 | " 1075.0 | \n",
549 | " pub | \n",
550 | " True | \n",
551 | "
\n",
552 | " \n",
553 | " | 2018-09-25 23:23:21 | \n",
554 | " 9 | \n",
555 | " Laurie Gibbs | \n",
556 | " 30181963913340 | \n",
557 | " 1095.0 | \n",
558 | " food truck | \n",
559 | " True | \n",
560 | "
\n",
561 | " \n",
562 | " | 2018-09-26 08:48:40 | \n",
563 | " 1 | \n",
564 | " Robert Johnson | \n",
565 | " 4761049645711555811 | \n",
566 | " 1060.0 | \n",
567 | " restaurant | \n",
568 | " True | \n",
569 | "
\n",
570 | " \n",
571 | " | 2018-10-07 14:40:34 | \n",
572 | " 3 | \n",
573 | " Elizabeth Sawyer | \n",
574 | " 30078299053512 | \n",
575 | " 757.0 | \n",
576 | " bar | \n",
577 | " True | \n",
578 | "
\n",
579 | " \n",
580 | " | 2018-10-07 18:29:20 | \n",
581 | " 9 | \n",
582 | " Laurie Gibbs | \n",
583 | " 30181963913340 | \n",
584 | " 1179.0 | \n",
585 | " pub | \n",
586 | " True | \n",
587 | "
\n",
588 | " \n",
589 | " | 2018-10-19 01:07:37 | \n",
590 | " 3 | \n",
591 | " Elizabeth Sawyer | \n",
592 | " 30078299053512 | \n",
593 | " 1053.0 | \n",
594 | " restaurant | \n",
595 | " True | \n",
596 | "
\n",
597 | " \n",
598 | " | 2018-11-13 17:07:25 | \n",
599 | " 16 | \n",
600 | " Crystal Clark | \n",
601 | " 5570600642865857 | \n",
602 | " 1911.0 | \n",
603 | " restaurant | \n",
604 | " True | \n",
605 | "
\n",
606 | " \n",
607 | " | 2018-11-17 05:30:43 | \n",
608 | " 18 | \n",
609 | " Malik Carlson | \n",
610 | " 344119623920892 | \n",
611 | " 1769.0 | \n",
612 | " food truck | \n",
613 | " True | \n",
614 | "
\n",
615 | " \n",
616 | " | 2018-11-20 05:24:28 | \n",
617 | " 3 | \n",
618 | " Elizabeth Sawyer | \n",
619 | " 30078299053512 | \n",
620 | " 1054.0 | \n",
621 | " bar | \n",
622 | " True | \n",
623 | "
\n",
624 | " \n",
625 | " | 2018-11-25 20:44:07 | \n",
626 | " 12 | \n",
627 | " Megan Price | \n",
628 | " 501879657465 | \n",
629 | " 1123.0 | \n",
630 | " bar | \n",
631 | " True | \n",
632 | "
\n",
633 | " \n",
634 | " | 2018-11-27 15:36:05 | \n",
635 | " 12 | \n",
636 | " Megan Price | \n",
637 | " 501879657465 | \n",
638 | " 1802.0 | \n",
639 | " bar | \n",
640 | " True | \n",
641 | "
\n",
642 | " \n",
643 | " | 2018-11-27 17:20:29 | \n",
644 | " 6 | \n",
645 | " Beth Hernandez | \n",
646 | " 3581345943543942 | \n",
647 | " 1279.0 | \n",
648 | " restaurant | \n",
649 | " True | \n",
650 | "
\n",
651 | " \n",
652 | " | 2018-11-27 17:27:34 | \n",
653 | " 1 | \n",
654 | " Robert Johnson | \n",
655 | " 4761049645711555811 | \n",
656 | " 1660.0 | \n",
657 | " pub | \n",
658 | " True | \n",
659 | "
\n",
660 | " \n",
661 | " | 2018-12-03 02:38:52 | \n",
662 | " 16 | \n",
663 | " Crystal Clark | \n",
664 | " 5570600642865857 | \n",
665 | " 1014.0 | \n",
666 | " restaurant | \n",
667 | " True | \n",
668 | "
\n",
669 | " \n",
670 | " | 2018-12-07 07:22:03 | \n",
671 | " 1 | \n",
672 | " Robert Johnson | \n",
673 | " 4761049645711555811 | \n",
674 | " 1894.0 | \n",
675 | " bar | \n",
676 | " True | \n",
677 | "
\n",
678 | " \n",
679 | " | 2018-12-13 12:09:58 | \n",
680 | " 18 | \n",
681 | " Malik Carlson | \n",
682 | " 344119623920892 | \n",
683 | " 1154.0 | \n",
684 | " restaurant | \n",
685 | " True | \n",
686 | "
\n",
687 | " \n",
688 | " | 2018-12-13 15:51:59 | \n",
689 | " 7 | \n",
690 | " Sean Taylor | \n",
691 | " 3516952396080247 | \n",
692 | " 2249.0 | \n",
693 | " food truck | \n",
694 | " True | \n",
695 | "
\n",
696 | " \n",
697 | " | 2018-12-14 08:51:41 | \n",
698 | " 12 | \n",
699 | " Megan Price | \n",
700 | " 501879657465 | \n",
701 | " 748.0 | \n",
702 | " pub | \n",
703 | " True | \n",
704 | "
\n",
705 | " \n",
706 | " | 2018-12-18 13:33:37 | \n",
707 | " 25 | \n",
708 | " Nancy Contreras | \n",
709 | " 4319653513507 | \n",
710 | " 1074.0 | \n",
711 | " coffee shop | \n",
712 | " True | \n",
713 | "
\n",
714 | " \n",
715 | " | 2018-12-18 17:20:33 | \n",
716 | " 7 | \n",
717 | " Sean Taylor | \n",
718 | " 3516952396080247 | \n",
719 | " 1296.0 | \n",
720 | " bar | \n",
721 | " True | \n",
722 | "
\n",
723 | " \n",
724 | " | 2018-12-19 16:10:03 | \n",
725 | " 9 | \n",
726 | " Laurie Gibbs | \n",
727 | " 30181963913340 | \n",
728 | " 1724.0 | \n",
729 | " pub | \n",
730 | " True | \n",
731 | "
\n",
732 | " \n",
733 | " | 2018-12-21 09:56:32 | \n",
734 | " 24 | \n",
735 | " Stephanie Dalton | \n",
736 | " 30142966699187 | \n",
737 | " 1301.0 | \n",
738 | " pub | \n",
739 | " True | \n",
740 | "
\n",
741 | " \n",
742 | " | 2018-12-24 15:55:06 | \n",
743 | " 16 | \n",
744 | " Crystal Clark | \n",
745 | " 5570600642865857 | \n",
746 | " 1634.0 | \n",
747 | " pub | \n",
748 | " True | \n",
749 | "
\n",
750 | " \n",
751 | " | 2018-12-25 19:10:42 | \n",
752 | " 24 | \n",
753 | " Stephanie Dalton | \n",
754 | " 30142966699187 | \n",
755 | " 1035.0 | \n",
756 | " pub | \n",
757 | " True | \n",
758 | "
\n",
759 | " \n",
760 | " | 2018-12-30 23:23:09 | \n",
761 | " 1 | \n",
762 | " Robert Johnson | \n",
763 | " 4761049645711555811 | \n",
764 | " 1033.0 | \n",
765 | " pub | \n",
766 | " True | \n",
767 | "
\n",
768 | " \n",
769 | "
\n",
770 | "
77 rows × 6 columns
\n",
771 | "
"
772 | ],
773 | "text/plain": [
774 | " id name card amount \\\n",
775 | "date \n",
776 | "2018-01-02 23:27:46 12 Megan Price 501879657465 1031.0 \n",
777 | "2018-01-04 03:05:18 7 Sean Taylor 3516952396080247 1685.0 \n",
778 | "2018-01-08 02:34:32 6 Beth Hernandez 3581345943543942 1029.0 \n",
779 | "2018-01-22 08:07:03 16 Crystal Clark 5570600642865857 1131.0 \n",
780 | "2018-01-23 06:29:37 12 Megan Price 501879657465 1678.0 \n",
781 | "2018-01-24 13:17:19 1 Robert Johnson 4761049645711555811 1691.0 \n",
782 | "2018-01-30 18:31:00 25 Nancy Contreras 4319653513507 1177.0 \n",
783 | "2018-02-17 01:27:19 16 Crystal Clark 5570600642865857 1430.0 \n",
784 | "2018-02-19 16:00:43 7 Sean Taylor 3516952396080247 1072.0 \n",
785 | "2018-02-19 22:48:25 18 Malik Carlson 344119623920892 1839.0 \n",
786 | "2018-02-27 15:27:32 6 Beth Hernandez 3581345943543942 1145.0 \n",
787 | "2018-03-01 21:29:05 3 Elizabeth Sawyer 30078299053512 1119.0 \n",
788 | "2018-03-04 15:50:53 9 Laurie Gibbs 30181963913340 1534.0 \n",
789 | "2018-03-05 08:26:08 16 Crystal Clark 5570600642865857 1617.0 \n",
790 | "2018-03-06 07:18:09 25 Nancy Contreras 4319653513507 1334.0 \n",
791 | "2018-03-12 00:44:01 12 Megan Price 501879657465 1530.0 \n",
792 | "2018-03-20 10:19:25 12 Megan Price 501879657465 852.0 \n",
793 | "2018-03-20 13:05:54 24 Stephanie Dalton 30142966699187 1011.0 \n",
794 | "2018-03-26 07:41:59 9 Laurie Gibbs 30181963913340 1009.0 \n",
795 | "2018-04-03 03:23:37 18 Malik Carlson 344119623920892 1077.0 \n",
796 | "2018-04-08 06:03:50 25 Nancy Contreras 4319653513507 1063.0 \n",
797 | "2018-04-18 23:23:29 7 Sean Taylor 3516952396080247 1086.0 \n",
798 | "2018-04-21 19:41:51 6 Beth Hernandez 3581345943543942 2108.0 \n",
799 | "2018-05-08 13:21:01 24 Stephanie Dalton 30142966699187 1901.0 \n",
800 | "2018-05-13 06:31:20 25 Nancy Contreras 4319653513507 1046.0 \n",
801 | "2018-05-29 02:55:08 16 Crystal Clark 5570600642865857 1203.0 \n",
802 | "2018-06-03 20:02:28 18 Malik Carlson 344119623920892 1814.0 \n",
803 | "2018-06-04 03:46:15 25 Nancy Contreras 4319653513507 1162.0 \n",
804 | "2018-06-06 21:50:17 25 Nancy Contreras 4319653513507 749.0 \n",
805 | "2018-06-10 04:54:27 9 Laurie Gibbs 30181963913340 1795.0 \n",
806 | "... .. ... ... ... \n",
807 | "2018-09-04 01:35:39 1 Robert Johnson 4761049645711555811 1790.0 \n",
808 | "2018-09-06 08:28:55 1 Robert Johnson 4761049645711555811 1017.0 \n",
809 | "2018-09-06 21:55:02 1 Robert Johnson 4761049645711555811 1056.0 \n",
810 | "2018-09-10 22:49:41 18 Malik Carlson 344119623920892 1176.0 \n",
811 | "2018-09-11 15:16:47 6 Beth Hernandez 3581345943543942 1856.0 \n",
812 | "2018-09-23 19:20:23 12 Megan Price 501879657465 1075.0 \n",
813 | "2018-09-25 23:23:21 9 Laurie Gibbs 30181963913340 1095.0 \n",
814 | "2018-09-26 08:48:40 1 Robert Johnson 4761049645711555811 1060.0 \n",
815 | "2018-10-07 14:40:34 3 Elizabeth Sawyer 30078299053512 757.0 \n",
816 | "2018-10-07 18:29:20 9 Laurie Gibbs 30181963913340 1179.0 \n",
817 | "2018-10-19 01:07:37 3 Elizabeth Sawyer 30078299053512 1053.0 \n",
818 | "2018-11-13 17:07:25 16 Crystal Clark 5570600642865857 1911.0 \n",
819 | "2018-11-17 05:30:43 18 Malik Carlson 344119623920892 1769.0 \n",
820 | "2018-11-20 05:24:28 3 Elizabeth Sawyer 30078299053512 1054.0 \n",
821 | "2018-11-25 20:44:07 12 Megan Price 501879657465 1123.0 \n",
822 | "2018-11-27 15:36:05 12 Megan Price 501879657465 1802.0 \n",
823 | "2018-11-27 17:20:29 6 Beth Hernandez 3581345943543942 1279.0 \n",
824 | "2018-11-27 17:27:34 1 Robert Johnson 4761049645711555811 1660.0 \n",
825 | "2018-12-03 02:38:52 16 Crystal Clark 5570600642865857 1014.0 \n",
826 | "2018-12-07 07:22:03 1 Robert Johnson 4761049645711555811 1894.0 \n",
827 | "2018-12-13 12:09:58 18 Malik Carlson 344119623920892 1154.0 \n",
828 | "2018-12-13 15:51:59 7 Sean Taylor 3516952396080247 2249.0 \n",
829 | "2018-12-14 08:51:41 12 Megan Price 501879657465 748.0 \n",
830 | "2018-12-18 13:33:37 25 Nancy Contreras 4319653513507 1074.0 \n",
831 | "2018-12-18 17:20:33 7 Sean Taylor 3516952396080247 1296.0 \n",
832 | "2018-12-19 16:10:03 9 Laurie Gibbs 30181963913340 1724.0 \n",
833 | "2018-12-21 09:56:32 24 Stephanie Dalton 30142966699187 1301.0 \n",
834 | "2018-12-24 15:55:06 16 Crystal Clark 5570600642865857 1634.0 \n",
835 | "2018-12-25 19:10:42 24 Stephanie Dalton 30142966699187 1035.0 \n",
836 | "2018-12-30 23:23:09 1 Robert Johnson 4761049645711555811 1033.0 \n",
837 | "\n",
838 | " category outlier \n",
839 | "date \n",
840 | "2018-01-02 23:27:46 restaurant True \n",
841 | "2018-01-04 03:05:18 food truck True \n",
842 | "2018-01-08 02:34:32 bar True \n",
843 | "2018-01-22 08:07:03 restaurant True \n",
844 | "2018-01-23 06:29:37 pub True \n",
845 | "2018-01-24 13:17:19 coffee shop True \n",
846 | "2018-01-30 18:31:00 restaurant True \n",
847 | "2018-02-17 01:27:19 restaurant True \n",
848 | "2018-02-19 16:00:43 food truck True \n",
849 | "2018-02-19 22:48:25 restaurant True \n",
850 | "2018-02-27 15:27:32 bar True \n",
851 | "2018-03-01 21:29:05 pub True \n",
852 | "2018-03-04 15:50:53 coffee shop True \n",
853 | "2018-03-05 08:26:08 bar True \n",
854 | "2018-03-06 07:18:09 bar True \n",
855 | "2018-03-12 00:44:01 coffee shop True \n",
856 | "2018-03-20 10:19:25 pub True \n",
857 | "2018-03-20 13:05:54 bar True \n",
858 | "2018-03-26 07:41:59 coffee shop True \n",
859 | "2018-04-03 03:23:37 restaurant True \n",
860 | "2018-04-08 06:03:50 pub True \n",
861 | "2018-04-18 23:23:29 coffee shop True \n",
862 | "2018-04-21 19:41:51 coffee shop True \n",
863 | "2018-05-08 13:21:01 restaurant True \n",
864 | "2018-05-13 06:31:20 food truck True \n",
865 | "2018-05-29 02:55:08 food truck True \n",
866 | "2018-06-03 20:02:28 pub True \n",
867 | "2018-06-04 03:46:15 pub True \n",
868 | "2018-06-06 21:50:17 restaurant True \n",
869 | "2018-06-10 04:54:27 pub True \n",
870 | "... ... ... \n",
871 | "2018-09-04 01:35:39 coffee shop True \n",
872 | "2018-09-06 08:28:55 bar True \n",
873 | "2018-09-06 21:55:02 restaurant True \n",
874 | "2018-09-10 22:49:41 restaurant True \n",
875 | "2018-09-11 15:16:47 food truck True \n",
876 | "2018-09-23 19:20:23 pub True \n",
877 | "2018-09-25 23:23:21 food truck True \n",
878 | "2018-09-26 08:48:40 restaurant True \n",
879 | "2018-10-07 14:40:34 bar True \n",
880 | "2018-10-07 18:29:20 pub True \n",
881 | "2018-10-19 01:07:37 restaurant True \n",
882 | "2018-11-13 17:07:25 restaurant True \n",
883 | "2018-11-17 05:30:43 food truck True \n",
884 | "2018-11-20 05:24:28 bar True \n",
885 | "2018-11-25 20:44:07 bar True \n",
886 | "2018-11-27 15:36:05 bar True \n",
887 | "2018-11-27 17:20:29 restaurant True \n",
888 | "2018-11-27 17:27:34 pub True \n",
889 | "2018-12-03 02:38:52 restaurant True \n",
890 | "2018-12-07 07:22:03 bar True \n",
891 | "2018-12-13 12:09:58 restaurant True \n",
892 | "2018-12-13 15:51:59 food truck True \n",
893 | "2018-12-14 08:51:41 pub True \n",
894 | "2018-12-18 13:33:37 coffee shop True \n",
895 | "2018-12-18 17:20:33 bar True \n",
896 | "2018-12-19 16:10:03 pub True \n",
897 | "2018-12-21 09:56:32 pub True \n",
898 | "2018-12-24 15:55:06 pub True \n",
899 | "2018-12-25 19:10:42 pub True \n",
900 | "2018-12-30 23:23:09 pub True \n",
901 | "\n",
902 | "[77 rows x 6 columns]"
903 | ]
904 | },
905 | "execution_count": 94,
906 | "metadata": {},
907 | "output_type": "execute_result"
908 | }
909 | ],
910 | "source": [
911 | "# code a function to identify outliers based on standard deviation\n",
912 | "# calculate summary statistics\n",
913 | "data_mean, data_std = mean(transaction_df['amount']), std(transaction_df['amount'])\n",
914 | "\n",
915 | "# identify outliers\n",
916 | "cut_off = data_std * 3\n",
917 | "\n",
918 | "lower, upper = data_mean - cut_off, data_mean + cut_off\n",
919 | "\n",
920 | "# identify outliers\n",
921 | "outliers = [x for x in transaction_df['amount'] if x < lower or x > upper]\n",
922 | "\n",
923 | "print('Identified outliers: %d' % len(outliers))\n",
924 | "\n",
925 | "# remove outliers\n",
926 | "outliers_removed = [x for x in transaction_df['amount'] if x >= lower and x <= upper]\n",
927 | "\n",
928 | "print('Non-outlier observations: %d' % len(outliers_removed))\n",
929 | "\n",
930 | "transaction_df['outlier'] = (transaction_df['amount'] > upper) | (transaction_df['amount'] < lower)\n",
931 | "\n",
932 | "outlier = transaction_df[transaction_df['outlier']==True]\n",
933 | "outlier"
934 | ]
935 | },
936 | {
937 | "cell_type": "code",
938 | "execution_count": 96,
939 | "metadata": {},
940 | "outputs": [
941 | {
942 | "data": {
943 | "application/vnd.plotly.v1+json": {
944 | "config": {
945 | "plotlyServerURL": "https://plot.ly"
946 | },
947 | "data": [
948 | {
949 | "hovertemplate": "category=bar
name=%{x}
amount=%{y}",
950 | "legendgroup": "bar",
951 | "marker": {
952 | "color": "#636efa",
953 | "symbol": "circle"
954 | },
955 | "mode": "markers",
956 | "name": "bar",
957 | "showlegend": true,
958 | "type": "scatter",
959 | "x": [
960 | "Robert Johnson",
961 | "Crystal Clark",
962 | "Nancy Contreras",
963 | "Robert Johnson"
964 | ],
965 | "xaxis": "x",
966 | "y": [
967 | 1894,
968 | 1617,
969 | 1334,
970 | 1017
971 | ],
972 | "yaxis": "y"
973 | },
974 | {
975 | "hovertemplate": "category=restaurant
name=%{x}
amount=%{y}",
976 | "legendgroup": "restaurant",
977 | "marker": {
978 | "color": "#EF553B",
979 | "symbol": "circle"
980 | },
981 | "mode": "markers",
982 | "name": "restaurant",
983 | "showlegend": true,
984 | "type": "scatter",
985 | "x": [
986 | "Crystal Clark",
987 | "Robert Johnson"
988 | ],
989 | "xaxis": "x",
990 | "y": [
991 | 1131,
992 | 1060
993 | ],
994 | "yaxis": "y"
995 | },
996 | {
997 | "hovertemplate": "category=coffee shop
name=%{x}
amount=%{y}",
998 | "legendgroup": "coffee shop",
999 | "marker": {
1000 | "color": "#00cc96",
1001 | "symbol": "circle"
1002 | },
1003 | "mode": "markers",
1004 | "name": "coffee shop",
1005 | "showlegend": true,
1006 | "type": "scatter",
1007 | "x": [
1008 | "Laurie Gibbs"
1009 | ],
1010 | "xaxis": "x",
1011 | "y": [
1012 | 1009
1013 | ],
1014 | "yaxis": "y"
1015 | },
1016 | {
1017 | "hovertemplate": "category=pub
name=%{x}
amount=%{y}",
1018 | "legendgroup": "pub",
1019 | "marker": {
1020 | "color": "#ab63fa",
1021 | "symbol": "circle"
1022 | },
1023 | "mode": "markers",
1024 | "name": "pub",
1025 | "showlegend": true,
1026 | "type": "scatter",
1027 | "x": [
1028 | "Megan Price"
1029 | ],
1030 | "xaxis": "x",
1031 | "y": [
1032 | 748
1033 | ],
1034 | "yaxis": "y"
1035 | }
1036 | ],
1037 | "layout": {
1038 | "autosize": true,
1039 | "legend": {
1040 | "title": {
1041 | "text": "category"
1042 | },
1043 | "tracegroupgap": 0
1044 | },
1045 | "template": {
1046 | "data": {
1047 | "bar": [
1048 | {
1049 | "error_x": {
1050 | "color": "#2a3f5f"
1051 | },
1052 | "error_y": {
1053 | "color": "#2a3f5f"
1054 | },
1055 | "marker": {
1056 | "line": {
1057 | "color": "#E5ECF6",
1058 | "width": 0.5
1059 | }
1060 | },
1061 | "type": "bar"
1062 | }
1063 | ],
1064 | "barpolar": [
1065 | {
1066 | "marker": {
1067 | "line": {
1068 | "color": "#E5ECF6",
1069 | "width": 0.5
1070 | }
1071 | },
1072 | "type": "barpolar"
1073 | }
1074 | ],
1075 | "carpet": [
1076 | {
1077 | "aaxis": {
1078 | "endlinecolor": "#2a3f5f",
1079 | "gridcolor": "white",
1080 | "linecolor": "white",
1081 | "minorgridcolor": "white",
1082 | "startlinecolor": "#2a3f5f"
1083 | },
1084 | "baxis": {
1085 | "endlinecolor": "#2a3f5f",
1086 | "gridcolor": "white",
1087 | "linecolor": "white",
1088 | "minorgridcolor": "white",
1089 | "startlinecolor": "#2a3f5f"
1090 | },
1091 | "type": "carpet"
1092 | }
1093 | ],
1094 | "choropleth": [
1095 | {
1096 | "colorbar": {
1097 | "outlinewidth": 0,
1098 | "ticks": ""
1099 | },
1100 | "type": "choropleth"
1101 | }
1102 | ],
1103 | "contour": [
1104 | {
1105 | "colorbar": {
1106 | "outlinewidth": 0,
1107 | "ticks": ""
1108 | },
1109 | "colorscale": [
1110 | [
1111 | 0,
1112 | "#0d0887"
1113 | ],
1114 | [
1115 | 0.1111111111111111,
1116 | "#46039f"
1117 | ],
1118 | [
1119 | 0.2222222222222222,
1120 | "#7201a8"
1121 | ],
1122 | [
1123 | 0.3333333333333333,
1124 | "#9c179e"
1125 | ],
1126 | [
1127 | 0.4444444444444444,
1128 | "#bd3786"
1129 | ],
1130 | [
1131 | 0.5555555555555556,
1132 | "#d8576b"
1133 | ],
1134 | [
1135 | 0.6666666666666666,
1136 | "#ed7953"
1137 | ],
1138 | [
1139 | 0.7777777777777778,
1140 | "#fb9f3a"
1141 | ],
1142 | [
1143 | 0.8888888888888888,
1144 | "#fdca26"
1145 | ],
1146 | [
1147 | 1,
1148 | "#f0f921"
1149 | ]
1150 | ],
1151 | "type": "contour"
1152 | }
1153 | ],
1154 | "contourcarpet": [
1155 | {
1156 | "colorbar": {
1157 | "outlinewidth": 0,
1158 | "ticks": ""
1159 | },
1160 | "type": "contourcarpet"
1161 | }
1162 | ],
1163 | "heatmap": [
1164 | {
1165 | "colorbar": {
1166 | "outlinewidth": 0,
1167 | "ticks": ""
1168 | },
1169 | "colorscale": [
1170 | [
1171 | 0,
1172 | "#0d0887"
1173 | ],
1174 | [
1175 | 0.1111111111111111,
1176 | "#46039f"
1177 | ],
1178 | [
1179 | 0.2222222222222222,
1180 | "#7201a8"
1181 | ],
1182 | [
1183 | 0.3333333333333333,
1184 | "#9c179e"
1185 | ],
1186 | [
1187 | 0.4444444444444444,
1188 | "#bd3786"
1189 | ],
1190 | [
1191 | 0.5555555555555556,
1192 | "#d8576b"
1193 | ],
1194 | [
1195 | 0.6666666666666666,
1196 | "#ed7953"
1197 | ],
1198 | [
1199 | 0.7777777777777778,
1200 | "#fb9f3a"
1201 | ],
1202 | [
1203 | 0.8888888888888888,
1204 | "#fdca26"
1205 | ],
1206 | [
1207 | 1,
1208 | "#f0f921"
1209 | ]
1210 | ],
1211 | "type": "heatmap"
1212 | }
1213 | ],
1214 | "heatmapgl": [
1215 | {
1216 | "colorbar": {
1217 | "outlinewidth": 0,
1218 | "ticks": ""
1219 | },
1220 | "colorscale": [
1221 | [
1222 | 0,
1223 | "#0d0887"
1224 | ],
1225 | [
1226 | 0.1111111111111111,
1227 | "#46039f"
1228 | ],
1229 | [
1230 | 0.2222222222222222,
1231 | "#7201a8"
1232 | ],
1233 | [
1234 | 0.3333333333333333,
1235 | "#9c179e"
1236 | ],
1237 | [
1238 | 0.4444444444444444,
1239 | "#bd3786"
1240 | ],
1241 | [
1242 | 0.5555555555555556,
1243 | "#d8576b"
1244 | ],
1245 | [
1246 | 0.6666666666666666,
1247 | "#ed7953"
1248 | ],
1249 | [
1250 | 0.7777777777777778,
1251 | "#fb9f3a"
1252 | ],
1253 | [
1254 | 0.8888888888888888,
1255 | "#fdca26"
1256 | ],
1257 | [
1258 | 1,
1259 | "#f0f921"
1260 | ]
1261 | ],
1262 | "type": "heatmapgl"
1263 | }
1264 | ],
1265 | "histogram": [
1266 | {
1267 | "marker": {
1268 | "colorbar": {
1269 | "outlinewidth": 0,
1270 | "ticks": ""
1271 | }
1272 | },
1273 | "type": "histogram"
1274 | }
1275 | ],
1276 | "histogram2d": [
1277 | {
1278 | "colorbar": {
1279 | "outlinewidth": 0,
1280 | "ticks": ""
1281 | },
1282 | "colorscale": [
1283 | [
1284 | 0,
1285 | "#0d0887"
1286 | ],
1287 | [
1288 | 0.1111111111111111,
1289 | "#46039f"
1290 | ],
1291 | [
1292 | 0.2222222222222222,
1293 | "#7201a8"
1294 | ],
1295 | [
1296 | 0.3333333333333333,
1297 | "#9c179e"
1298 | ],
1299 | [
1300 | 0.4444444444444444,
1301 | "#bd3786"
1302 | ],
1303 | [
1304 | 0.5555555555555556,
1305 | "#d8576b"
1306 | ],
1307 | [
1308 | 0.6666666666666666,
1309 | "#ed7953"
1310 | ],
1311 | [
1312 | 0.7777777777777778,
1313 | "#fb9f3a"
1314 | ],
1315 | [
1316 | 0.8888888888888888,
1317 | "#fdca26"
1318 | ],
1319 | [
1320 | 1,
1321 | "#f0f921"
1322 | ]
1323 | ],
1324 | "type": "histogram2d"
1325 | }
1326 | ],
1327 | "histogram2dcontour": [
1328 | {
1329 | "colorbar": {
1330 | "outlinewidth": 0,
1331 | "ticks": ""
1332 | },
1333 | "colorscale": [
1334 | [
1335 | 0,
1336 | "#0d0887"
1337 | ],
1338 | [
1339 | 0.1111111111111111,
1340 | "#46039f"
1341 | ],
1342 | [
1343 | 0.2222222222222222,
1344 | "#7201a8"
1345 | ],
1346 | [
1347 | 0.3333333333333333,
1348 | "#9c179e"
1349 | ],
1350 | [
1351 | 0.4444444444444444,
1352 | "#bd3786"
1353 | ],
1354 | [
1355 | 0.5555555555555556,
1356 | "#d8576b"
1357 | ],
1358 | [
1359 | 0.6666666666666666,
1360 | "#ed7953"
1361 | ],
1362 | [
1363 | 0.7777777777777778,
1364 | "#fb9f3a"
1365 | ],
1366 | [
1367 | 0.8888888888888888,
1368 | "#fdca26"
1369 | ],
1370 | [
1371 | 1,
1372 | "#f0f921"
1373 | ]
1374 | ],
1375 | "type": "histogram2dcontour"
1376 | }
1377 | ],
1378 | "mesh3d": [
1379 | {
1380 | "colorbar": {
1381 | "outlinewidth": 0,
1382 | "ticks": ""
1383 | },
1384 | "type": "mesh3d"
1385 | }
1386 | ],
1387 | "parcoords": [
1388 | {
1389 | "line": {
1390 | "colorbar": {
1391 | "outlinewidth": 0,
1392 | "ticks": ""
1393 | }
1394 | },
1395 | "type": "parcoords"
1396 | }
1397 | ],
1398 | "pie": [
1399 | {
1400 | "automargin": true,
1401 | "type": "pie"
1402 | }
1403 | ],
1404 | "scatter": [
1405 | {
1406 | "marker": {
1407 | "colorbar": {
1408 | "outlinewidth": 0,
1409 | "ticks": ""
1410 | }
1411 | },
1412 | "type": "scatter"
1413 | }
1414 | ],
1415 | "scatter3d": [
1416 | {
1417 | "line": {
1418 | "colorbar": {
1419 | "outlinewidth": 0,
1420 | "ticks": ""
1421 | }
1422 | },
1423 | "marker": {
1424 | "colorbar": {
1425 | "outlinewidth": 0,
1426 | "ticks": ""
1427 | }
1428 | },
1429 | "type": "scatter3d"
1430 | }
1431 | ],
1432 | "scattercarpet": [
1433 | {
1434 | "marker": {
1435 | "colorbar": {
1436 | "outlinewidth": 0,
1437 | "ticks": ""
1438 | }
1439 | },
1440 | "type": "scattercarpet"
1441 | }
1442 | ],
1443 | "scattergeo": [
1444 | {
1445 | "marker": {
1446 | "colorbar": {
1447 | "outlinewidth": 0,
1448 | "ticks": ""
1449 | }
1450 | },
1451 | "type": "scattergeo"
1452 | }
1453 | ],
1454 | "scattergl": [
1455 | {
1456 | "marker": {
1457 | "colorbar": {
1458 | "outlinewidth": 0,
1459 | "ticks": ""
1460 | }
1461 | },
1462 | "type": "scattergl"
1463 | }
1464 | ],
1465 | "scattermapbox": [
1466 | {
1467 | "marker": {
1468 | "colorbar": {
1469 | "outlinewidth": 0,
1470 | "ticks": ""
1471 | }
1472 | },
1473 | "type": "scattermapbox"
1474 | }
1475 | ],
1476 | "scatterpolar": [
1477 | {
1478 | "marker": {
1479 | "colorbar": {
1480 | "outlinewidth": 0,
1481 | "ticks": ""
1482 | }
1483 | },
1484 | "type": "scatterpolar"
1485 | }
1486 | ],
1487 | "scatterpolargl": [
1488 | {
1489 | "marker": {
1490 | "colorbar": {
1491 | "outlinewidth": 0,
1492 | "ticks": ""
1493 | }
1494 | },
1495 | "type": "scatterpolargl"
1496 | }
1497 | ],
1498 | "scatterternary": [
1499 | {
1500 | "marker": {
1501 | "colorbar": {
1502 | "outlinewidth": 0,
1503 | "ticks": ""
1504 | }
1505 | },
1506 | "type": "scatterternary"
1507 | }
1508 | ],
1509 | "surface": [
1510 | {
1511 | "colorbar": {
1512 | "outlinewidth": 0,
1513 | "ticks": ""
1514 | },
1515 | "colorscale": [
1516 | [
1517 | 0,
1518 | "#0d0887"
1519 | ],
1520 | [
1521 | 0.1111111111111111,
1522 | "#46039f"
1523 | ],
1524 | [
1525 | 0.2222222222222222,
1526 | "#7201a8"
1527 | ],
1528 | [
1529 | 0.3333333333333333,
1530 | "#9c179e"
1531 | ],
1532 | [
1533 | 0.4444444444444444,
1534 | "#bd3786"
1535 | ],
1536 | [
1537 | 0.5555555555555556,
1538 | "#d8576b"
1539 | ],
1540 | [
1541 | 0.6666666666666666,
1542 | "#ed7953"
1543 | ],
1544 | [
1545 | 0.7777777777777778,
1546 | "#fb9f3a"
1547 | ],
1548 | [
1549 | 0.8888888888888888,
1550 | "#fdca26"
1551 | ],
1552 | [
1553 | 1,
1554 | "#f0f921"
1555 | ]
1556 | ],
1557 | "type": "surface"
1558 | }
1559 | ],
1560 | "table": [
1561 | {
1562 | "cells": {
1563 | "fill": {
1564 | "color": "#EBF0F8"
1565 | },
1566 | "line": {
1567 | "color": "white"
1568 | }
1569 | },
1570 | "header": {
1571 | "fill": {
1572 | "color": "#C8D4E3"
1573 | },
1574 | "line": {
1575 | "color": "white"
1576 | }
1577 | },
1578 | "type": "table"
1579 | }
1580 | ]
1581 | },
1582 | "layout": {
1583 | "annotationdefaults": {
1584 | "arrowcolor": "#2a3f5f",
1585 | "arrowhead": 0,
1586 | "arrowwidth": 1
1587 | },
1588 | "coloraxis": {
1589 | "colorbar": {
1590 | "outlinewidth": 0,
1591 | "ticks": ""
1592 | }
1593 | },
1594 | "colorscale": {
1595 | "diverging": [
1596 | [
1597 | 0,
1598 | "#8e0152"
1599 | ],
1600 | [
1601 | 0.1,
1602 | "#c51b7d"
1603 | ],
1604 | [
1605 | 0.2,
1606 | "#de77ae"
1607 | ],
1608 | [
1609 | 0.3,
1610 | "#f1b6da"
1611 | ],
1612 | [
1613 | 0.4,
1614 | "#fde0ef"
1615 | ],
1616 | [
1617 | 0.5,
1618 | "#f7f7f7"
1619 | ],
1620 | [
1621 | 0.6,
1622 | "#e6f5d0"
1623 | ],
1624 | [
1625 | 0.7,
1626 | "#b8e186"
1627 | ],
1628 | [
1629 | 0.8,
1630 | "#7fbc41"
1631 | ],
1632 | [
1633 | 0.9,
1634 | "#4d9221"
1635 | ],
1636 | [
1637 | 1,
1638 | "#276419"
1639 | ]
1640 | ],
1641 | "sequential": [
1642 | [
1643 | 0,
1644 | "#0d0887"
1645 | ],
1646 | [
1647 | 0.1111111111111111,
1648 | "#46039f"
1649 | ],
1650 | [
1651 | 0.2222222222222222,
1652 | "#7201a8"
1653 | ],
1654 | [
1655 | 0.3333333333333333,
1656 | "#9c179e"
1657 | ],
1658 | [
1659 | 0.4444444444444444,
1660 | "#bd3786"
1661 | ],
1662 | [
1663 | 0.5555555555555556,
1664 | "#d8576b"
1665 | ],
1666 | [
1667 | 0.6666666666666666,
1668 | "#ed7953"
1669 | ],
1670 | [
1671 | 0.7777777777777778,
1672 | "#fb9f3a"
1673 | ],
1674 | [
1675 | 0.8888888888888888,
1676 | "#fdca26"
1677 | ],
1678 | [
1679 | 1,
1680 | "#f0f921"
1681 | ]
1682 | ],
1683 | "sequentialminus": [
1684 | [
1685 | 0,
1686 | "#0d0887"
1687 | ],
1688 | [
1689 | 0.1111111111111111,
1690 | "#46039f"
1691 | ],
1692 | [
1693 | 0.2222222222222222,
1694 | "#7201a8"
1695 | ],
1696 | [
1697 | 0.3333333333333333,
1698 | "#9c179e"
1699 | ],
1700 | [
1701 | 0.4444444444444444,
1702 | "#bd3786"
1703 | ],
1704 | [
1705 | 0.5555555555555556,
1706 | "#d8576b"
1707 | ],
1708 | [
1709 | 0.6666666666666666,
1710 | "#ed7953"
1711 | ],
1712 | [
1713 | 0.7777777777777778,
1714 | "#fb9f3a"
1715 | ],
1716 | [
1717 | 0.8888888888888888,
1718 | "#fdca26"
1719 | ],
1720 | [
1721 | 1,
1722 | "#f0f921"
1723 | ]
1724 | ]
1725 | },
1726 | "colorway": [
1727 | "#636efa",
1728 | "#EF553B",
1729 | "#00cc96",
1730 | "#ab63fa",
1731 | "#FFA15A",
1732 | "#19d3f3",
1733 | "#FF6692",
1734 | "#B6E880",
1735 | "#FF97FF",
1736 | "#FECB52"
1737 | ],
1738 | "font": {
1739 | "color": "#2a3f5f"
1740 | },
1741 | "geo": {
1742 | "bgcolor": "white",
1743 | "lakecolor": "white",
1744 | "landcolor": "#E5ECF6",
1745 | "showlakes": true,
1746 | "showland": true,
1747 | "subunitcolor": "white"
1748 | },
1749 | "hoverlabel": {
1750 | "align": "left"
1751 | },
1752 | "hovermode": "closest",
1753 | "mapbox": {
1754 | "style": "light"
1755 | },
1756 | "paper_bgcolor": "white",
1757 | "plot_bgcolor": "#E5ECF6",
1758 | "polar": {
1759 | "angularaxis": {
1760 | "gridcolor": "white",
1761 | "linecolor": "white",
1762 | "ticks": ""
1763 | },
1764 | "bgcolor": "#E5ECF6",
1765 | "radialaxis": {
1766 | "gridcolor": "white",
1767 | "linecolor": "white",
1768 | "ticks": ""
1769 | }
1770 | },
1771 | "scene": {
1772 | "xaxis": {
1773 | "backgroundcolor": "#E5ECF6",
1774 | "gridcolor": "white",
1775 | "gridwidth": 2,
1776 | "linecolor": "white",
1777 | "showbackground": true,
1778 | "ticks": "",
1779 | "zerolinecolor": "white"
1780 | },
1781 | "yaxis": {
1782 | "backgroundcolor": "#E5ECF6",
1783 | "gridcolor": "white",
1784 | "gridwidth": 2,
1785 | "linecolor": "white",
1786 | "showbackground": true,
1787 | "ticks": "",
1788 | "zerolinecolor": "white"
1789 | },
1790 | "zaxis": {
1791 | "backgroundcolor": "#E5ECF6",
1792 | "gridcolor": "white",
1793 | "gridwidth": 2,
1794 | "linecolor": "white",
1795 | "showbackground": true,
1796 | "ticks": "",
1797 | "zerolinecolor": "white"
1798 | }
1799 | },
1800 | "shapedefaults": {
1801 | "line": {
1802 | "color": "#2a3f5f"
1803 | }
1804 | },
1805 | "ternary": {
1806 | "aaxis": {
1807 | "gridcolor": "white",
1808 | "linecolor": "white",
1809 | "ticks": ""
1810 | },
1811 | "baxis": {
1812 | "gridcolor": "white",
1813 | "linecolor": "white",
1814 | "ticks": ""
1815 | },
1816 | "bgcolor": "#E5ECF6",
1817 | "caxis": {
1818 | "gridcolor": "white",
1819 | "linecolor": "white",
1820 | "ticks": ""
1821 | }
1822 | },
1823 | "title": {
1824 | "x": 0.05
1825 | },
1826 | "xaxis": {
1827 | "automargin": true,
1828 | "gridcolor": "white",
1829 | "linecolor": "white",
1830 | "ticks": "",
1831 | "title": {
1832 | "standoff": 15
1833 | },
1834 | "zerolinecolor": "white",
1835 | "zerolinewidth": 2
1836 | },
1837 | "yaxis": {
1838 | "automargin": true,
1839 | "gridcolor": "white",
1840 | "linecolor": "white",
1841 | "ticks": "",
1842 | "title": {
1843 | "standoff": 15
1844 | },
1845 | "zerolinecolor": "white",
1846 | "zerolinewidth": 2
1847 | }
1848 | }
1849 | },
1850 | "title": {
1851 | "text": "Anomalous Transactions"
1852 | },
1853 | "xaxis": {
1854 | "anchor": "y",
1855 | "autorange": true,
1856 | "domain": [
1857 | 0,
1858 | 1
1859 | ],
1860 | "range": [
1861 | -0.24975222993062435,
1862 | 4.249752229930625
1863 | ],
1864 | "title": {
1865 | "text": "name"
1866 | },
1867 | "type": "category"
1868 | },
1869 | "yaxis": {
1870 | "anchor": "x",
1871 | "autorange": true,
1872 | "domain": [
1873 | 0,
1874 | 1
1875 | ],
1876 | "range": [
1877 | 664.0573248407643,
1878 | 1977.9426751592357
1879 | ],
1880 | "title": {
1881 | "text": "amount"
1882 | },
1883 | "type": "linear"
1884 | }
1885 | }
1886 | },
1887 | "text/html": [
1888 | "\n",
1889 | " \n",
1890 | " \n",
1891 | "
\n",
1892 | " \n",
1930 | "
"
1931 | ]
1932 | },
1933 | "metadata": {},
1934 | "output_type": "display_data"
1935 | }
1936 | ],
1937 | "source": [
1938 | "# find anomalous transactions for 3 random card holders\n",
1939 | "import datetime\n",
1940 | "start_time = datetime.time(7,0,0)\n",
1941 | "end_time = datetime.time(9,0,0)\n",
1942 | "\n",
1943 | "anomalous_transactions = outlier.between_time(start_time, end_time).sort_values('amount', ascending=False)\n",
1944 | "\n",
1945 | "px.scatter(anomalous_transactions, x='name', y='amount', color='category', title='Anomalous Transactions')"
1946 | ]
1947 | },
1948 | {
1949 | "cell_type": "markdown",
1950 | "metadata": {},
1951 | "source": [
1952 | " ## Identifying Outliers Using Interquartile Range"
1953 | ]
1954 | },
1955 | {
1956 | "cell_type": "code",
1957 | "execution_count": 86,
1958 | "metadata": {},
1959 | "outputs": [
1960 | {
1961 | "name": "stdout",
1962 | "output_type": "stream",
1963 | "text": [
1964 | "Percentiles: 25th=3.735, 75th=14.648, IQR=10.913\n",
1965 | "Identified outliers: 110\n",
1966 | "Non-outlier observations: 3390\n"
1967 | ]
1968 | },
1969 | {
1970 | "data": {
1971 | "text/html": [
1972 | "\n",
1973 | "\n",
1986 | "
\n",
1987 | " \n",
1988 | " \n",
1989 | " | \n",
1990 | " id | \n",
1991 | " name | \n",
1992 | " card | \n",
1993 | " amount | \n",
1994 | " category | \n",
1995 | " outlier | \n",
1996 | "
\n",
1997 | " \n",
1998 | " | date | \n",
1999 | " | \n",
2000 | " | \n",
2001 | " | \n",
2002 | " | \n",
2003 | " | \n",
2004 | " | \n",
2005 | "
\n",
2006 | " \n",
2007 | " \n",
2008 | " \n",
2009 | " | 2018-01-02 23:27:46 | \n",
2010 | " 12 | \n",
2011 | " Megan Price | \n",
2012 | " 501879657465 | \n",
2013 | " 1031.0 | \n",
2014 | " restaurant | \n",
2015 | " True | \n",
2016 | "
\n",
2017 | " \n",
2018 | " | 2018-01-04 03:05:18 | \n",
2019 | " 7 | \n",
2020 | " Sean Taylor | \n",
2021 | " 3516952396080247 | \n",
2022 | " 1685.0 | \n",
2023 | " food truck | \n",
2024 | " True | \n",
2025 | "
\n",
2026 | " \n",
2027 | " | 2018-01-07 01:10:54 | \n",
2028 | " 18 | \n",
2029 | " Malik Carlson | \n",
2030 | " 344119623920892 | \n",
2031 | " 175.0 | \n",
2032 | " pub | \n",
2033 | " True | \n",
2034 | "
\n",
2035 | " \n",
2036 | " | 2018-01-08 02:34:32 | \n",
2037 | " 6 | \n",
2038 | " Beth Hernandez | \n",
2039 | " 3581345943543942 | \n",
2040 | " 1029.0 | \n",
2041 | " bar | \n",
2042 | " True | \n",
2043 | "
\n",
2044 | " \n",
2045 | " | 2018-01-08 11:15:36 | \n",
2046 | " 18 | \n",
2047 | " Malik Carlson | \n",
2048 | " 344119623920892 | \n",
2049 | " 333.0 | \n",
2050 | " restaurant | \n",
2051 | " True | \n",
2052 | "
\n",
2053 | " \n",
2054 | " | 2018-01-11 13:20:31 | \n",
2055 | " 16 | \n",
2056 | " Crystal Clark | \n",
2057 | " 5570600642865857 | \n",
2058 | " 229.0 | \n",
2059 | " pub | \n",
2060 | " True | \n",
2061 | "
\n",
2062 | " \n",
2063 | " | 2018-01-22 08:07:03 | \n",
2064 | " 16 | \n",
2065 | " Crystal Clark | \n",
2066 | " 5570600642865857 | \n",
2067 | " 1131.0 | \n",
2068 | " restaurant | \n",
2069 | " True | \n",
2070 | "
\n",
2071 | " \n",
2072 | " | 2018-01-23 06:29:37 | \n",
2073 | " 12 | \n",
2074 | " Megan Price | \n",
2075 | " 501879657465 | \n",
2076 | " 1678.0 | \n",
2077 | " pub | \n",
2078 | " True | \n",
2079 | "
\n",
2080 | " \n",
2081 | " | 2018-01-24 13:17:19 | \n",
2082 | " 1 | \n",
2083 | " Robert Johnson | \n",
2084 | " 4761049645711555811 | \n",
2085 | " 1691.0 | \n",
2086 | " coffee shop | \n",
2087 | " True | \n",
2088 | "
\n",
2089 | " \n",
2090 | " | 2018-01-30 18:31:00 | \n",
2091 | " 25 | \n",
2092 | " Nancy Contreras | \n",
2093 | " 4319653513507 | \n",
2094 | " 1177.0 | \n",
2095 | " restaurant | \n",
2096 | " True | \n",
2097 | "
\n",
2098 | " \n",
2099 | " | 2018-02-09 11:38:37 | \n",
2100 | " 7 | \n",
2101 | " Sean Taylor | \n",
2102 | " 3516952396080247 | \n",
2103 | " 445.0 | \n",
2104 | " bar | \n",
2105 | " True | \n",
2106 | "
\n",
2107 | " \n",
2108 | " | 2018-02-17 01:27:19 | \n",
2109 | " 16 | \n",
2110 | " Crystal Clark | \n",
2111 | " 5570600642865857 | \n",
2112 | " 1430.0 | \n",
2113 | " restaurant | \n",
2114 | " True | \n",
2115 | "
\n",
2116 | " \n",
2117 | " | 2018-02-19 16:00:43 | \n",
2118 | " 7 | \n",
2119 | " Sean Taylor | \n",
2120 | " 3516952396080247 | \n",
2121 | " 1072.0 | \n",
2122 | " food truck | \n",
2123 | " True | \n",
2124 | "
\n",
2125 | " \n",
2126 | " | 2018-02-19 22:48:25 | \n",
2127 | " 18 | \n",
2128 | " Malik Carlson | \n",
2129 | " 344119623920892 | \n",
2130 | " 1839.0 | \n",
2131 | " restaurant | \n",
2132 | " True | \n",
2133 | "
\n",
2134 | " \n",
2135 | " | 2018-02-27 15:27:32 | \n",
2136 | " 6 | \n",
2137 | " Beth Hernandez | \n",
2138 | " 3581345943543942 | \n",
2139 | " 1145.0 | \n",
2140 | " bar | \n",
2141 | " True | \n",
2142 | "
\n",
2143 | " \n",
2144 | " | 2018-03-01 21:29:05 | \n",
2145 | " 3 | \n",
2146 | " Elizabeth Sawyer | \n",
2147 | " 30078299053512 | \n",
2148 | " 1119.0 | \n",
2149 | " pub | \n",
2150 | " True | \n",
2151 | "
\n",
2152 | " \n",
2153 | " | 2018-03-04 15:50:53 | \n",
2154 | " 9 | \n",
2155 | " Laurie Gibbs | \n",
2156 | " 30181963913340 | \n",
2157 | " 1534.0 | \n",
2158 | " coffee shop | \n",
2159 | " True | \n",
2160 | "
\n",
2161 | " \n",
2162 | " | 2018-03-05 08:26:08 | \n",
2163 | " 16 | \n",
2164 | " Crystal Clark | \n",
2165 | " 5570600642865857 | \n",
2166 | " 1617.0 | \n",
2167 | " bar | \n",
2168 | " True | \n",
2169 | "
\n",
2170 | " \n",
2171 | " | 2018-03-06 07:18:09 | \n",
2172 | " 25 | \n",
2173 | " Nancy Contreras | \n",
2174 | " 4319653513507 | \n",
2175 | " 1334.0 | \n",
2176 | " bar | \n",
2177 | " True | \n",
2178 | "
\n",
2179 | " \n",
2180 | " | 2018-03-09 04:51:38 | \n",
2181 | " 6 | \n",
2182 | " Beth Hernandez | \n",
2183 | " 3581345943543942 | \n",
2184 | " 389.0 | \n",
2185 | " restaurant | \n",
2186 | " True | \n",
2187 | "
\n",
2188 | " \n",
2189 | " | 2018-03-12 00:44:01 | \n",
2190 | " 12 | \n",
2191 | " Megan Price | \n",
2192 | " 501879657465 | \n",
2193 | " 1530.0 | \n",
2194 | " coffee shop | \n",
2195 | " True | \n",
2196 | "
\n",
2197 | " \n",
2198 | " | 2018-03-20 10:19:25 | \n",
2199 | " 12 | \n",
2200 | " Megan Price | \n",
2201 | " 501879657465 | \n",
2202 | " 852.0 | \n",
2203 | " pub | \n",
2204 | " True | \n",
2205 | "
\n",
2206 | " \n",
2207 | " | 2018-03-20 13:05:54 | \n",
2208 | " 24 | \n",
2209 | " Stephanie Dalton | \n",
2210 | " 30142966699187 | \n",
2211 | " 1011.0 | \n",
2212 | " bar | \n",
2213 | " True | \n",
2214 | "
\n",
2215 | " \n",
2216 | " | 2018-03-26 07:41:59 | \n",
2217 | " 9 | \n",
2218 | " Laurie Gibbs | \n",
2219 | " 30181963913340 | \n",
2220 | " 1009.0 | \n",
2221 | " coffee shop | \n",
2222 | " True | \n",
2223 | "
\n",
2224 | " \n",
2225 | " | 2018-04-01 07:17:21 | \n",
2226 | " 25 | \n",
2227 | " Nancy Contreras | \n",
2228 | " 4319653513507 | \n",
2229 | " 100.0 | \n",
2230 | " coffee shop | \n",
2231 | " True | \n",
2232 | "
\n",
2233 | " \n",
2234 | " | 2018-04-03 03:23:37 | \n",
2235 | " 18 | \n",
2236 | " Malik Carlson | \n",
2237 | " 344119623920892 | \n",
2238 | " 1077.0 | \n",
2239 | " restaurant | \n",
2240 | " True | \n",
2241 | "
\n",
2242 | " \n",
2243 | " | 2018-04-08 06:03:50 | \n",
2244 | " 25 | \n",
2245 | " Nancy Contreras | \n",
2246 | " 4319653513507 | \n",
2247 | " 1063.0 | \n",
2248 | " pub | \n",
2249 | " True | \n",
2250 | "
\n",
2251 | " \n",
2252 | " | 2018-04-09 10:24:32 | \n",
2253 | " 1 | \n",
2254 | " Robert Johnson | \n",
2255 | " 4761049645711555811 | \n",
2256 | " 283.0 | \n",
2257 | " restaurant | \n",
2258 | " True | \n",
2259 | "
\n",
2260 | " \n",
2261 | " | 2018-04-09 18:28:25 | \n",
2262 | " 25 | \n",
2263 | " Nancy Contreras | \n",
2264 | " 4319653513507 | \n",
2265 | " 269.0 | \n",
2266 | " restaurant | \n",
2267 | " True | \n",
2268 | "
\n",
2269 | " \n",
2270 | " | 2018-04-10 06:08:01 | \n",
2271 | " 7 | \n",
2272 | " Sean Taylor | \n",
2273 | " 3516952396080247 | \n",
2274 | " 543.0 | \n",
2275 | " food truck | \n",
2276 | " True | \n",
2277 | "
\n",
2278 | " \n",
2279 | " | ... | \n",
2280 | " ... | \n",
2281 | " ... | \n",
2282 | " ... | \n",
2283 | " ... | \n",
2284 | " ... | \n",
2285 | " ... | \n",
2286 | "
\n",
2287 | " \n",
2288 | " | 2018-10-11 23:29:33 | \n",
2289 | " 3 | \n",
2290 | " Elizabeth Sawyer | \n",
2291 | " 30078299053512 | \n",
2292 | " 206.0 | \n",
2293 | " restaurant | \n",
2294 | " True | \n",
2295 | "
\n",
2296 | " \n",
2297 | " | 2018-10-16 13:27:33 | \n",
2298 | " 1 | \n",
2299 | " Robert Johnson | \n",
2300 | " 4761049645711555811 | \n",
2301 | " 484.0 | \n",
2302 | " food truck | \n",
2303 | " True | \n",
2304 | "
\n",
2305 | " \n",
2306 | " | 2018-10-19 01:07:37 | \n",
2307 | " 3 | \n",
2308 | " Elizabeth Sawyer | \n",
2309 | " 30078299053512 | \n",
2310 | " 1053.0 | \n",
2311 | " restaurant | \n",
2312 | " True | \n",
2313 | "
\n",
2314 | " \n",
2315 | " | 2018-10-19 12:32:37 | \n",
2316 | " 16 | \n",
2317 | " Crystal Clark | \n",
2318 | " 5570600642865857 | \n",
2319 | " 178.0 | \n",
2320 | " food truck | \n",
2321 | " True | \n",
2322 | "
\n",
2323 | " \n",
2324 | " | 2018-10-23 22:47:13 | \n",
2325 | " 16 | \n",
2326 | " Crystal Clark | \n",
2327 | " 5570600642865857 | \n",
2328 | " 393.0 | \n",
2329 | " food truck | \n",
2330 | " True | \n",
2331 | "
\n",
2332 | " \n",
2333 | " | 2018-10-28 02:12:58 | \n",
2334 | " 25 | \n",
2335 | " Nancy Contreras | \n",
2336 | " 4319653513507 | \n",
2337 | " 137.0 | \n",
2338 | " pub | \n",
2339 | " True | \n",
2340 | "
\n",
2341 | " \n",
2342 | " | 2018-11-13 05:58:47 | \n",
2343 | " 24 | \n",
2344 | " Stephanie Dalton | \n",
2345 | " 30142966699187 | \n",
2346 | " 466.0 | \n",
2347 | " bar | \n",
2348 | " True | \n",
2349 | "
\n",
2350 | " \n",
2351 | " | 2018-11-13 17:07:25 | \n",
2352 | " 16 | \n",
2353 | " Crystal Clark | \n",
2354 | " 5570600642865857 | \n",
2355 | " 1911.0 | \n",
2356 | " restaurant | \n",
2357 | " True | \n",
2358 | "
\n",
2359 | " \n",
2360 | " | 2018-11-17 05:30:43 | \n",
2361 | " 18 | \n",
2362 | " Malik Carlson | \n",
2363 | " 344119623920892 | \n",
2364 | " 1769.0 | \n",
2365 | " food truck | \n",
2366 | " True | \n",
2367 | "
\n",
2368 | " \n",
2369 | " | 2018-11-20 05:24:28 | \n",
2370 | " 3 | \n",
2371 | " Elizabeth Sawyer | \n",
2372 | " 30078299053512 | \n",
2373 | " 1054.0 | \n",
2374 | " bar | \n",
2375 | " True | \n",
2376 | "
\n",
2377 | " \n",
2378 | " | 2018-11-23 09:08:05 | \n",
2379 | " 12 | \n",
2380 | " Megan Price | \n",
2381 | " 501879657465 | \n",
2382 | " 233.0 | \n",
2383 | " restaurant | \n",
2384 | " True | \n",
2385 | "
\n",
2386 | " \n",
2387 | " | 2018-11-25 20:44:07 | \n",
2388 | " 12 | \n",
2389 | " Megan Price | \n",
2390 | " 501879657465 | \n",
2391 | " 1123.0 | \n",
2392 | " bar | \n",
2393 | " True | \n",
2394 | "
\n",
2395 | " \n",
2396 | " | 2018-11-26 20:54:39 | \n",
2397 | " 1 | \n",
2398 | " Robert Johnson | \n",
2399 | " 4761049645711555811 | \n",
2400 | " 267.0 | \n",
2401 | " food truck | \n",
2402 | " True | \n",
2403 | "
\n",
2404 | " \n",
2405 | " | 2018-11-27 15:36:05 | \n",
2406 | " 12 | \n",
2407 | " Megan Price | \n",
2408 | " 501879657465 | \n",
2409 | " 1802.0 | \n",
2410 | " bar | \n",
2411 | " True | \n",
2412 | "
\n",
2413 | " \n",
2414 | " | 2018-11-27 17:20:29 | \n",
2415 | " 6 | \n",
2416 | " Beth Hernandez | \n",
2417 | " 3581345943543942 | \n",
2418 | " 1279.0 | \n",
2419 | " restaurant | \n",
2420 | " True | \n",
2421 | "
\n",
2422 | " \n",
2423 | " | 2018-11-27 17:27:34 | \n",
2424 | " 1 | \n",
2425 | " Robert Johnson | \n",
2426 | " 4761049645711555811 | \n",
2427 | " 1660.0 | \n",
2428 | " pub | \n",
2429 | " True | \n",
2430 | "
\n",
2431 | " \n",
2432 | " | 2018-12-03 02:38:52 | \n",
2433 | " 16 | \n",
2434 | " Crystal Clark | \n",
2435 | " 5570600642865857 | \n",
2436 | " 1014.0 | \n",
2437 | " restaurant | \n",
2438 | " True | \n",
2439 | "
\n",
2440 | " \n",
2441 | " | 2018-12-05 19:24:27 | \n",
2442 | " 9 | \n",
2443 | " Laurie Gibbs | \n",
2444 | " 30181963913340 | \n",
2445 | " 57.0 | \n",
2446 | " bar | \n",
2447 | " True | \n",
2448 | "
\n",
2449 | " \n",
2450 | " | 2018-12-07 07:22:03 | \n",
2451 | " 1 | \n",
2452 | " Robert Johnson | \n",
2453 | " 4761049645711555811 | \n",
2454 | " 1894.0 | \n",
2455 | " bar | \n",
2456 | " True | \n",
2457 | "
\n",
2458 | " \n",
2459 | " | 2018-12-13 12:09:58 | \n",
2460 | " 18 | \n",
2461 | " Malik Carlson | \n",
2462 | " 344119623920892 | \n",
2463 | " 1154.0 | \n",
2464 | " restaurant | \n",
2465 | " True | \n",
2466 | "
\n",
2467 | " \n",
2468 | " | 2018-12-13 15:51:59 | \n",
2469 | " 7 | \n",
2470 | " Sean Taylor | \n",
2471 | " 3516952396080247 | \n",
2472 | " 2249.0 | \n",
2473 | " food truck | \n",
2474 | " True | \n",
2475 | "
\n",
2476 | " \n",
2477 | " | 2018-12-14 08:51:41 | \n",
2478 | " 12 | \n",
2479 | " Megan Price | \n",
2480 | " 501879657465 | \n",
2481 | " 748.0 | \n",
2482 | " pub | \n",
2483 | " True | \n",
2484 | "
\n",
2485 | " \n",
2486 | " | 2018-12-18 13:33:37 | \n",
2487 | " 25 | \n",
2488 | " Nancy Contreras | \n",
2489 | " 4319653513507 | \n",
2490 | " 1074.0 | \n",
2491 | " coffee shop | \n",
2492 | " True | \n",
2493 | "
\n",
2494 | " \n",
2495 | " | 2018-12-18 17:20:33 | \n",
2496 | " 7 | \n",
2497 | " Sean Taylor | \n",
2498 | " 3516952396080247 | \n",
2499 | " 1296.0 | \n",
2500 | " bar | \n",
2501 | " True | \n",
2502 | "
\n",
2503 | " \n",
2504 | " | 2018-12-19 16:10:03 | \n",
2505 | " 9 | \n",
2506 | " Laurie Gibbs | \n",
2507 | " 30181963913340 | \n",
2508 | " 1724.0 | \n",
2509 | " pub | \n",
2510 | " True | \n",
2511 | "
\n",
2512 | " \n",
2513 | " | 2018-12-21 09:56:32 | \n",
2514 | " 24 | \n",
2515 | " Stephanie Dalton | \n",
2516 | " 30142966699187 | \n",
2517 | " 1301.0 | \n",
2518 | " pub | \n",
2519 | " True | \n",
2520 | "
\n",
2521 | " \n",
2522 | " | 2018-12-24 15:55:06 | \n",
2523 | " 16 | \n",
2524 | " Crystal Clark | \n",
2525 | " 5570600642865857 | \n",
2526 | " 1634.0 | \n",
2527 | " pub | \n",
2528 | " True | \n",
2529 | "
\n",
2530 | " \n",
2531 | " | 2018-12-25 19:10:42 | \n",
2532 | " 24 | \n",
2533 | " Stephanie Dalton | \n",
2534 | " 30142966699187 | \n",
2535 | " 1035.0 | \n",
2536 | " pub | \n",
2537 | " True | \n",
2538 | "
\n",
2539 | " \n",
2540 | " | 2018-12-28 16:20:31 | \n",
2541 | " 3 | \n",
2542 | " Elizabeth Sawyer | \n",
2543 | " 30078299053512 | \n",
2544 | " 313.0 | \n",
2545 | " pub | \n",
2546 | " True | \n",
2547 | "
\n",
2548 | " \n",
2549 | " | 2018-12-30 23:23:09 | \n",
2550 | " 1 | \n",
2551 | " Robert Johnson | \n",
2552 | " 4761049645711555811 | \n",
2553 | " 1033.0 | \n",
2554 | " pub | \n",
2555 | " True | \n",
2556 | "
\n",
2557 | " \n",
2558 | "
\n",
2559 | "
110 rows × 6 columns
\n",
2560 | "
"
2561 | ],
2562 | "text/plain": [
2563 | " id name card amount \\\n",
2564 | "date \n",
2565 | "2018-01-02 23:27:46 12 Megan Price 501879657465 1031.0 \n",
2566 | "2018-01-04 03:05:18 7 Sean Taylor 3516952396080247 1685.0 \n",
2567 | "2018-01-07 01:10:54 18 Malik Carlson 344119623920892 175.0 \n",
2568 | "2018-01-08 02:34:32 6 Beth Hernandez 3581345943543942 1029.0 \n",
2569 | "2018-01-08 11:15:36 18 Malik Carlson 344119623920892 333.0 \n",
2570 | "2018-01-11 13:20:31 16 Crystal Clark 5570600642865857 229.0 \n",
2571 | "2018-01-22 08:07:03 16 Crystal Clark 5570600642865857 1131.0 \n",
2572 | "2018-01-23 06:29:37 12 Megan Price 501879657465 1678.0 \n",
2573 | "2018-01-24 13:17:19 1 Robert Johnson 4761049645711555811 1691.0 \n",
2574 | "2018-01-30 18:31:00 25 Nancy Contreras 4319653513507 1177.0 \n",
2575 | "2018-02-09 11:38:37 7 Sean Taylor 3516952396080247 445.0 \n",
2576 | "2018-02-17 01:27:19 16 Crystal Clark 5570600642865857 1430.0 \n",
2577 | "2018-02-19 16:00:43 7 Sean Taylor 3516952396080247 1072.0 \n",
2578 | "2018-02-19 22:48:25 18 Malik Carlson 344119623920892 1839.0 \n",
2579 | "2018-02-27 15:27:32 6 Beth Hernandez 3581345943543942 1145.0 \n",
2580 | "2018-03-01 21:29:05 3 Elizabeth Sawyer 30078299053512 1119.0 \n",
2581 | "2018-03-04 15:50:53 9 Laurie Gibbs 30181963913340 1534.0 \n",
2582 | "2018-03-05 08:26:08 16 Crystal Clark 5570600642865857 1617.0 \n",
2583 | "2018-03-06 07:18:09 25 Nancy Contreras 4319653513507 1334.0 \n",
2584 | "2018-03-09 04:51:38 6 Beth Hernandez 3581345943543942 389.0 \n",
2585 | "2018-03-12 00:44:01 12 Megan Price 501879657465 1530.0 \n",
2586 | "2018-03-20 10:19:25 12 Megan Price 501879657465 852.0 \n",
2587 | "2018-03-20 13:05:54 24 Stephanie Dalton 30142966699187 1011.0 \n",
2588 | "2018-03-26 07:41:59 9 Laurie Gibbs 30181963913340 1009.0 \n",
2589 | "2018-04-01 07:17:21 25 Nancy Contreras 4319653513507 100.0 \n",
2590 | "2018-04-03 03:23:37 18 Malik Carlson 344119623920892 1077.0 \n",
2591 | "2018-04-08 06:03:50 25 Nancy Contreras 4319653513507 1063.0 \n",
2592 | "2018-04-09 10:24:32 1 Robert Johnson 4761049645711555811 283.0 \n",
2593 | "2018-04-09 18:28:25 25 Nancy Contreras 4319653513507 269.0 \n",
2594 | "2018-04-10 06:08:01 7 Sean Taylor 3516952396080247 543.0 \n",
2595 | "... .. ... ... ... \n",
2596 | "2018-10-11 23:29:33 3 Elizabeth Sawyer 30078299053512 206.0 \n",
2597 | "2018-10-16 13:27:33 1 Robert Johnson 4761049645711555811 484.0 \n",
2598 | "2018-10-19 01:07:37 3 Elizabeth Sawyer 30078299053512 1053.0 \n",
2599 | "2018-10-19 12:32:37 16 Crystal Clark 5570600642865857 178.0 \n",
2600 | "2018-10-23 22:47:13 16 Crystal Clark 5570600642865857 393.0 \n",
2601 | "2018-10-28 02:12:58 25 Nancy Contreras 4319653513507 137.0 \n",
2602 | "2018-11-13 05:58:47 24 Stephanie Dalton 30142966699187 466.0 \n",
2603 | "2018-11-13 17:07:25 16 Crystal Clark 5570600642865857 1911.0 \n",
2604 | "2018-11-17 05:30:43 18 Malik Carlson 344119623920892 1769.0 \n",
2605 | "2018-11-20 05:24:28 3 Elizabeth Sawyer 30078299053512 1054.0 \n",
2606 | "2018-11-23 09:08:05 12 Megan Price 501879657465 233.0 \n",
2607 | "2018-11-25 20:44:07 12 Megan Price 501879657465 1123.0 \n",
2608 | "2018-11-26 20:54:39 1 Robert Johnson 4761049645711555811 267.0 \n",
2609 | "2018-11-27 15:36:05 12 Megan Price 501879657465 1802.0 \n",
2610 | "2018-11-27 17:20:29 6 Beth Hernandez 3581345943543942 1279.0 \n",
2611 | "2018-11-27 17:27:34 1 Robert Johnson 4761049645711555811 1660.0 \n",
2612 | "2018-12-03 02:38:52 16 Crystal Clark 5570600642865857 1014.0 \n",
2613 | "2018-12-05 19:24:27 9 Laurie Gibbs 30181963913340 57.0 \n",
2614 | "2018-12-07 07:22:03 1 Robert Johnson 4761049645711555811 1894.0 \n",
2615 | "2018-12-13 12:09:58 18 Malik Carlson 344119623920892 1154.0 \n",
2616 | "2018-12-13 15:51:59 7 Sean Taylor 3516952396080247 2249.0 \n",
2617 | "2018-12-14 08:51:41 12 Megan Price 501879657465 748.0 \n",
2618 | "2018-12-18 13:33:37 25 Nancy Contreras 4319653513507 1074.0 \n",
2619 | "2018-12-18 17:20:33 7 Sean Taylor 3516952396080247 1296.0 \n",
2620 | "2018-12-19 16:10:03 9 Laurie Gibbs 30181963913340 1724.0 \n",
2621 | "2018-12-21 09:56:32 24 Stephanie Dalton 30142966699187 1301.0 \n",
2622 | "2018-12-24 15:55:06 16 Crystal Clark 5570600642865857 1634.0 \n",
2623 | "2018-12-25 19:10:42 24 Stephanie Dalton 30142966699187 1035.0 \n",
2624 | "2018-12-28 16:20:31 3 Elizabeth Sawyer 30078299053512 313.0 \n",
2625 | "2018-12-30 23:23:09 1 Robert Johnson 4761049645711555811 1033.0 \n",
2626 | "\n",
2627 | " category outlier \n",
2628 | "date \n",
2629 | "2018-01-02 23:27:46 restaurant True \n",
2630 | "2018-01-04 03:05:18 food truck True \n",
2631 | "2018-01-07 01:10:54 pub True \n",
2632 | "2018-01-08 02:34:32 bar True \n",
2633 | "2018-01-08 11:15:36 restaurant True \n",
2634 | "2018-01-11 13:20:31 pub True \n",
2635 | "2018-01-22 08:07:03 restaurant True \n",
2636 | "2018-01-23 06:29:37 pub True \n",
2637 | "2018-01-24 13:17:19 coffee shop True \n",
2638 | "2018-01-30 18:31:00 restaurant True \n",
2639 | "2018-02-09 11:38:37 bar True \n",
2640 | "2018-02-17 01:27:19 restaurant True \n",
2641 | "2018-02-19 16:00:43 food truck True \n",
2642 | "2018-02-19 22:48:25 restaurant True \n",
2643 | "2018-02-27 15:27:32 bar True \n",
2644 | "2018-03-01 21:29:05 pub True \n",
2645 | "2018-03-04 15:50:53 coffee shop True \n",
2646 | "2018-03-05 08:26:08 bar True \n",
2647 | "2018-03-06 07:18:09 bar True \n",
2648 | "2018-03-09 04:51:38 restaurant True \n",
2649 | "2018-03-12 00:44:01 coffee shop True \n",
2650 | "2018-03-20 10:19:25 pub True \n",
2651 | "2018-03-20 13:05:54 bar True \n",
2652 | "2018-03-26 07:41:59 coffee shop True \n",
2653 | "2018-04-01 07:17:21 coffee shop True \n",
2654 | "2018-04-03 03:23:37 restaurant True \n",
2655 | "2018-04-08 06:03:50 pub True \n",
2656 | "2018-04-09 10:24:32 restaurant True \n",
2657 | "2018-04-09 18:28:25 restaurant True \n",
2658 | "2018-04-10 06:08:01 food truck True \n",
2659 | "... ... ... \n",
2660 | "2018-10-11 23:29:33 restaurant True \n",
2661 | "2018-10-16 13:27:33 food truck True \n",
2662 | "2018-10-19 01:07:37 restaurant True \n",
2663 | "2018-10-19 12:32:37 food truck True \n",
2664 | "2018-10-23 22:47:13 food truck True \n",
2665 | "2018-10-28 02:12:58 pub True \n",
2666 | "2018-11-13 05:58:47 bar True \n",
2667 | "2018-11-13 17:07:25 restaurant True \n",
2668 | "2018-11-17 05:30:43 food truck True \n",
2669 | "2018-11-20 05:24:28 bar True \n",
2670 | "2018-11-23 09:08:05 restaurant True \n",
2671 | "2018-11-25 20:44:07 bar True \n",
2672 | "2018-11-26 20:54:39 food truck True \n",
2673 | "2018-11-27 15:36:05 bar True \n",
2674 | "2018-11-27 17:20:29 restaurant True \n",
2675 | "2018-11-27 17:27:34 pub True \n",
2676 | "2018-12-03 02:38:52 restaurant True \n",
2677 | "2018-12-05 19:24:27 bar True \n",
2678 | "2018-12-07 07:22:03 bar True \n",
2679 | "2018-12-13 12:09:58 restaurant True \n",
2680 | "2018-12-13 15:51:59 food truck True \n",
2681 | "2018-12-14 08:51:41 pub True \n",
2682 | "2018-12-18 13:33:37 coffee shop True \n",
2683 | "2018-12-18 17:20:33 bar True \n",
2684 | "2018-12-19 16:10:03 pub True \n",
2685 | "2018-12-21 09:56:32 pub True \n",
2686 | "2018-12-24 15:55:06 pub True \n",
2687 | "2018-12-25 19:10:42 pub True \n",
2688 | "2018-12-28 16:20:31 pub True \n",
2689 | "2018-12-30 23:23:09 pub True \n",
2690 | "\n",
2691 | "[110 rows x 6 columns]"
2692 | ]
2693 | },
2694 | "execution_count": 86,
2695 | "metadata": {},
2696 | "output_type": "execute_result"
2697 | }
2698 | ],
2699 | "source": [
2700 | "# code a function to identify outliers based on interquartile range\n",
2701 | "# calculate interquartile range\n",
2702 | "q25, q75 = percentile(transaction_df['amount'], 25), percentile(transaction_df['amount'], 75)\n",
2703 | "iqr = q75 - q25\n",
2704 | "print('Percentiles: 25th=%.3f, 75th=%.3f, IQR=%.3f' % (q25, q75, iqr))\n",
2705 | "# calculate the outlier cutoff\n",
2706 | "cut_off = iqr * 1.5\n",
2707 | "lower, upper = q25 - cut_off, q75 + cut_off\n",
2708 | "# identify outliers\n",
2709 | "outliers_2 = [x for x in transaction_df['amount'] if x < lower or x > upper]\n",
2710 | "print('Identified outliers: %d' % len(outliers_2))\n",
2711 | "# remove outliers\n",
2712 | "outliers_removed_2 = [x for x in transaction_df['amount'] if x >= lower and x <= upper]\n",
2713 | "print('Non-outlier observations: %d' % len(outliers_removed_2))\n",
2714 | "\n",
2715 | "transaction_df['outlier'] = (transaction_df['amount'] > upper) | (transaction_df['amount'] < lower)\n",
2716 | "\n",
2717 | "outlier_2 = transaction_df[transaction_df['outlier']==True]\n",
2718 | "outlier_2\n"
2719 | ]
2720 | },
2721 | {
2722 | "cell_type": "code",
2723 | "execution_count": 89,
2724 | "metadata": {},
2725 | "outputs": [
2726 | {
2727 | "data": {
2728 | "text/html": [
2729 | "\n",
2730 | "\n",
2743 | "
\n",
2744 | " \n",
2745 | " \n",
2746 | " | \n",
2747 | " id | \n",
2748 | " name | \n",
2749 | " card | \n",
2750 | " amount | \n",
2751 | " category | \n",
2752 | " outlier | \n",
2753 | "
\n",
2754 | " \n",
2755 | " | date | \n",
2756 | " | \n",
2757 | " | \n",
2758 | " | \n",
2759 | " | \n",
2760 | " | \n",
2761 | " | \n",
2762 | "
\n",
2763 | " \n",
2764 | " \n",
2765 | " \n",
2766 | " | 2018-12-07 07:22:03 | \n",
2767 | " 1 | \n",
2768 | " Robert Johnson | \n",
2769 | " 4761049645711555811 | \n",
2770 | " 1894.0 | \n",
2771 | " bar | \n",
2772 | " True | \n",
2773 | "
\n",
2774 | " \n",
2775 | " | 2018-03-05 08:26:08 | \n",
2776 | " 16 | \n",
2777 | " Crystal Clark | \n",
2778 | " 5570600642865857 | \n",
2779 | " 1617.0 | \n",
2780 | " bar | \n",
2781 | " True | \n",
2782 | "
\n",
2783 | " \n",
2784 | " | 2018-03-06 07:18:09 | \n",
2785 | " 25 | \n",
2786 | " Nancy Contreras | \n",
2787 | " 4319653513507 | \n",
2788 | " 1334.0 | \n",
2789 | " bar | \n",
2790 | " True | \n",
2791 | "
\n",
2792 | " \n",
2793 | " | 2018-01-22 08:07:03 | \n",
2794 | " 16 | \n",
2795 | " Crystal Clark | \n",
2796 | " 5570600642865857 | \n",
2797 | " 1131.0 | \n",
2798 | " restaurant | \n",
2799 | " True | \n",
2800 | "
\n",
2801 | " \n",
2802 | " | 2018-09-26 08:48:40 | \n",
2803 | " 1 | \n",
2804 | " Robert Johnson | \n",
2805 | " 4761049645711555811 | \n",
2806 | " 1060.0 | \n",
2807 | " restaurant | \n",
2808 | " True | \n",
2809 | "
\n",
2810 | " \n",
2811 | " | 2018-09-06 08:28:55 | \n",
2812 | " 1 | \n",
2813 | " Robert Johnson | \n",
2814 | " 4761049645711555811 | \n",
2815 | " 1017.0 | \n",
2816 | " bar | \n",
2817 | " True | \n",
2818 | "
\n",
2819 | " \n",
2820 | " | 2018-03-26 07:41:59 | \n",
2821 | " 9 | \n",
2822 | " Laurie Gibbs | \n",
2823 | " 30181963913340 | \n",
2824 | " 1009.0 | \n",
2825 | " coffee shop | \n",
2826 | " True | \n",
2827 | "
\n",
2828 | " \n",
2829 | " | 2018-12-14 08:51:41 | \n",
2830 | " 12 | \n",
2831 | " Megan Price | \n",
2832 | " 501879657465 | \n",
2833 | " 748.0 | \n",
2834 | " pub | \n",
2835 | " True | \n",
2836 | "
\n",
2837 | " \n",
2838 | " | 2018-04-01 07:17:21 | \n",
2839 | " 25 | \n",
2840 | " Nancy Contreras | \n",
2841 | " 4319653513507 | \n",
2842 | " 100.0 | \n",
2843 | " coffee shop | \n",
2844 | " True | \n",
2845 | "
\n",
2846 | " \n",
2847 | "
\n",
2848 | "
"
2849 | ],
2850 | "text/plain": [
2851 | " id name card amount \\\n",
2852 | "date \n",
2853 | "2018-12-07 07:22:03 1 Robert Johnson 4761049645711555811 1894.0 \n",
2854 | "2018-03-05 08:26:08 16 Crystal Clark 5570600642865857 1617.0 \n",
2855 | "2018-03-06 07:18:09 25 Nancy Contreras 4319653513507 1334.0 \n",
2856 | "2018-01-22 08:07:03 16 Crystal Clark 5570600642865857 1131.0 \n",
2857 | "2018-09-26 08:48:40 1 Robert Johnson 4761049645711555811 1060.0 \n",
2858 | "2018-09-06 08:28:55 1 Robert Johnson 4761049645711555811 1017.0 \n",
2859 | "2018-03-26 07:41:59 9 Laurie Gibbs 30181963913340 1009.0 \n",
2860 | "2018-12-14 08:51:41 12 Megan Price 501879657465 748.0 \n",
2861 | "2018-04-01 07:17:21 25 Nancy Contreras 4319653513507 100.0 \n",
2862 | "\n",
2863 | " category outlier \n",
2864 | "date \n",
2865 | "2018-12-07 07:22:03 bar True \n",
2866 | "2018-03-05 08:26:08 bar True \n",
2867 | "2018-03-06 07:18:09 bar True \n",
2868 | "2018-01-22 08:07:03 restaurant True \n",
2869 | "2018-09-26 08:48:40 restaurant True \n",
2870 | "2018-09-06 08:28:55 bar True \n",
2871 | "2018-03-26 07:41:59 coffee shop True \n",
2872 | "2018-12-14 08:51:41 pub True \n",
2873 | "2018-04-01 07:17:21 coffee shop True "
2874 | ]
2875 | },
2876 | "execution_count": 89,
2877 | "metadata": {},
2878 | "output_type": "execute_result"
2879 | }
2880 | ],
2881 | "source": [
2882 | "# find anomalous transactions for 3 random card holders\n",
2883 | "anomalous_transactions2 = outlier_2.between_time(start_time, end_time).sort_values('amount', ascending=False)\n",
2884 | "anomalous_transactions2\n",
2885 | "\n"
2886 | ]
2887 | },
2888 | {
2889 | "cell_type": "code",
2890 | "execution_count": 95,
2891 | "metadata": {},
2892 | "outputs": [
2893 | {
2894 | "data": {
2895 | "application/vnd.plotly.v1+json": {
2896 | "config": {
2897 | "plotlyServerURL": "https://plot.ly"
2898 | },
2899 | "data": [
2900 | {
2901 | "hovertemplate": "category=bar
name=%{x}
amount=%{y}",
2902 | "legendgroup": "bar",
2903 | "marker": {
2904 | "color": "#636efa",
2905 | "symbol": "circle"
2906 | },
2907 | "mode": "markers",
2908 | "name": "bar",
2909 | "showlegend": true,
2910 | "type": "scatter",
2911 | "x": [
2912 | "Robert Johnson",
2913 | "Crystal Clark",
2914 | "Nancy Contreras",
2915 | "Robert Johnson"
2916 | ],
2917 | "xaxis": "x",
2918 | "y": [
2919 | 1894,
2920 | 1617,
2921 | 1334,
2922 | 1017
2923 | ],
2924 | "yaxis": "y"
2925 | },
2926 | {
2927 | "hovertemplate": "category=restaurant
name=%{x}
amount=%{y}",
2928 | "legendgroup": "restaurant",
2929 | "marker": {
2930 | "color": "#EF553B",
2931 | "symbol": "circle"
2932 | },
2933 | "mode": "markers",
2934 | "name": "restaurant",
2935 | "showlegend": true,
2936 | "type": "scatter",
2937 | "x": [
2938 | "Crystal Clark",
2939 | "Robert Johnson"
2940 | ],
2941 | "xaxis": "x",
2942 | "y": [
2943 | 1131,
2944 | 1060
2945 | ],
2946 | "yaxis": "y"
2947 | },
2948 | {
2949 | "hovertemplate": "category=coffee shop
name=%{x}
amount=%{y}",
2950 | "legendgroup": "coffee shop",
2951 | "marker": {
2952 | "color": "#00cc96",
2953 | "symbol": "circle"
2954 | },
2955 | "mode": "markers",
2956 | "name": "coffee shop",
2957 | "showlegend": true,
2958 | "type": "scatter",
2959 | "x": [
2960 | "Laurie Gibbs",
2961 | "Nancy Contreras"
2962 | ],
2963 | "xaxis": "x",
2964 | "y": [
2965 | 1009,
2966 | 100
2967 | ],
2968 | "yaxis": "y"
2969 | },
2970 | {
2971 | "hovertemplate": "category=pub
name=%{x}
amount=%{y}",
2972 | "legendgroup": "pub",
2973 | "marker": {
2974 | "color": "#ab63fa",
2975 | "symbol": "circle"
2976 | },
2977 | "mode": "markers",
2978 | "name": "pub",
2979 | "showlegend": true,
2980 | "type": "scatter",
2981 | "x": [
2982 | "Megan Price"
2983 | ],
2984 | "xaxis": "x",
2985 | "y": [
2986 | 748
2987 | ],
2988 | "yaxis": "y"
2989 | }
2990 | ],
2991 | "layout": {
2992 | "autosize": true,
2993 | "legend": {
2994 | "title": {
2995 | "text": "category"
2996 | },
2997 | "tracegroupgap": 0
2998 | },
2999 | "template": {
3000 | "data": {
3001 | "bar": [
3002 | {
3003 | "error_x": {
3004 | "color": "#2a3f5f"
3005 | },
3006 | "error_y": {
3007 | "color": "#2a3f5f"
3008 | },
3009 | "marker": {
3010 | "line": {
3011 | "color": "#E5ECF6",
3012 | "width": 0.5
3013 | }
3014 | },
3015 | "type": "bar"
3016 | }
3017 | ],
3018 | "barpolar": [
3019 | {
3020 | "marker": {
3021 | "line": {
3022 | "color": "#E5ECF6",
3023 | "width": 0.5
3024 | }
3025 | },
3026 | "type": "barpolar"
3027 | }
3028 | ],
3029 | "carpet": [
3030 | {
3031 | "aaxis": {
3032 | "endlinecolor": "#2a3f5f",
3033 | "gridcolor": "white",
3034 | "linecolor": "white",
3035 | "minorgridcolor": "white",
3036 | "startlinecolor": "#2a3f5f"
3037 | },
3038 | "baxis": {
3039 | "endlinecolor": "#2a3f5f",
3040 | "gridcolor": "white",
3041 | "linecolor": "white",
3042 | "minorgridcolor": "white",
3043 | "startlinecolor": "#2a3f5f"
3044 | },
3045 | "type": "carpet"
3046 | }
3047 | ],
3048 | "choropleth": [
3049 | {
3050 | "colorbar": {
3051 | "outlinewidth": 0,
3052 | "ticks": ""
3053 | },
3054 | "type": "choropleth"
3055 | }
3056 | ],
3057 | "contour": [
3058 | {
3059 | "colorbar": {
3060 | "outlinewidth": 0,
3061 | "ticks": ""
3062 | },
3063 | "colorscale": [
3064 | [
3065 | 0,
3066 | "#0d0887"
3067 | ],
3068 | [
3069 | 0.1111111111111111,
3070 | "#46039f"
3071 | ],
3072 | [
3073 | 0.2222222222222222,
3074 | "#7201a8"
3075 | ],
3076 | [
3077 | 0.3333333333333333,
3078 | "#9c179e"
3079 | ],
3080 | [
3081 | 0.4444444444444444,
3082 | "#bd3786"
3083 | ],
3084 | [
3085 | 0.5555555555555556,
3086 | "#d8576b"
3087 | ],
3088 | [
3089 | 0.6666666666666666,
3090 | "#ed7953"
3091 | ],
3092 | [
3093 | 0.7777777777777778,
3094 | "#fb9f3a"
3095 | ],
3096 | [
3097 | 0.8888888888888888,
3098 | "#fdca26"
3099 | ],
3100 | [
3101 | 1,
3102 | "#f0f921"
3103 | ]
3104 | ],
3105 | "type": "contour"
3106 | }
3107 | ],
3108 | "contourcarpet": [
3109 | {
3110 | "colorbar": {
3111 | "outlinewidth": 0,
3112 | "ticks": ""
3113 | },
3114 | "type": "contourcarpet"
3115 | }
3116 | ],
3117 | "heatmap": [
3118 | {
3119 | "colorbar": {
3120 | "outlinewidth": 0,
3121 | "ticks": ""
3122 | },
3123 | "colorscale": [
3124 | [
3125 | 0,
3126 | "#0d0887"
3127 | ],
3128 | [
3129 | 0.1111111111111111,
3130 | "#46039f"
3131 | ],
3132 | [
3133 | 0.2222222222222222,
3134 | "#7201a8"
3135 | ],
3136 | [
3137 | 0.3333333333333333,
3138 | "#9c179e"
3139 | ],
3140 | [
3141 | 0.4444444444444444,
3142 | "#bd3786"
3143 | ],
3144 | [
3145 | 0.5555555555555556,
3146 | "#d8576b"
3147 | ],
3148 | [
3149 | 0.6666666666666666,
3150 | "#ed7953"
3151 | ],
3152 | [
3153 | 0.7777777777777778,
3154 | "#fb9f3a"
3155 | ],
3156 | [
3157 | 0.8888888888888888,
3158 | "#fdca26"
3159 | ],
3160 | [
3161 | 1,
3162 | "#f0f921"
3163 | ]
3164 | ],
3165 | "type": "heatmap"
3166 | }
3167 | ],
3168 | "heatmapgl": [
3169 | {
3170 | "colorbar": {
3171 | "outlinewidth": 0,
3172 | "ticks": ""
3173 | },
3174 | "colorscale": [
3175 | [
3176 | 0,
3177 | "#0d0887"
3178 | ],
3179 | [
3180 | 0.1111111111111111,
3181 | "#46039f"
3182 | ],
3183 | [
3184 | 0.2222222222222222,
3185 | "#7201a8"
3186 | ],
3187 | [
3188 | 0.3333333333333333,
3189 | "#9c179e"
3190 | ],
3191 | [
3192 | 0.4444444444444444,
3193 | "#bd3786"
3194 | ],
3195 | [
3196 | 0.5555555555555556,
3197 | "#d8576b"
3198 | ],
3199 | [
3200 | 0.6666666666666666,
3201 | "#ed7953"
3202 | ],
3203 | [
3204 | 0.7777777777777778,
3205 | "#fb9f3a"
3206 | ],
3207 | [
3208 | 0.8888888888888888,
3209 | "#fdca26"
3210 | ],
3211 | [
3212 | 1,
3213 | "#f0f921"
3214 | ]
3215 | ],
3216 | "type": "heatmapgl"
3217 | }
3218 | ],
3219 | "histogram": [
3220 | {
3221 | "marker": {
3222 | "colorbar": {
3223 | "outlinewidth": 0,
3224 | "ticks": ""
3225 | }
3226 | },
3227 | "type": "histogram"
3228 | }
3229 | ],
3230 | "histogram2d": [
3231 | {
3232 | "colorbar": {
3233 | "outlinewidth": 0,
3234 | "ticks": ""
3235 | },
3236 | "colorscale": [
3237 | [
3238 | 0,
3239 | "#0d0887"
3240 | ],
3241 | [
3242 | 0.1111111111111111,
3243 | "#46039f"
3244 | ],
3245 | [
3246 | 0.2222222222222222,
3247 | "#7201a8"
3248 | ],
3249 | [
3250 | 0.3333333333333333,
3251 | "#9c179e"
3252 | ],
3253 | [
3254 | 0.4444444444444444,
3255 | "#bd3786"
3256 | ],
3257 | [
3258 | 0.5555555555555556,
3259 | "#d8576b"
3260 | ],
3261 | [
3262 | 0.6666666666666666,
3263 | "#ed7953"
3264 | ],
3265 | [
3266 | 0.7777777777777778,
3267 | "#fb9f3a"
3268 | ],
3269 | [
3270 | 0.8888888888888888,
3271 | "#fdca26"
3272 | ],
3273 | [
3274 | 1,
3275 | "#f0f921"
3276 | ]
3277 | ],
3278 | "type": "histogram2d"
3279 | }
3280 | ],
3281 | "histogram2dcontour": [
3282 | {
3283 | "colorbar": {
3284 | "outlinewidth": 0,
3285 | "ticks": ""
3286 | },
3287 | "colorscale": [
3288 | [
3289 | 0,
3290 | "#0d0887"
3291 | ],
3292 | [
3293 | 0.1111111111111111,
3294 | "#46039f"
3295 | ],
3296 | [
3297 | 0.2222222222222222,
3298 | "#7201a8"
3299 | ],
3300 | [
3301 | 0.3333333333333333,
3302 | "#9c179e"
3303 | ],
3304 | [
3305 | 0.4444444444444444,
3306 | "#bd3786"
3307 | ],
3308 | [
3309 | 0.5555555555555556,
3310 | "#d8576b"
3311 | ],
3312 | [
3313 | 0.6666666666666666,
3314 | "#ed7953"
3315 | ],
3316 | [
3317 | 0.7777777777777778,
3318 | "#fb9f3a"
3319 | ],
3320 | [
3321 | 0.8888888888888888,
3322 | "#fdca26"
3323 | ],
3324 | [
3325 | 1,
3326 | "#f0f921"
3327 | ]
3328 | ],
3329 | "type": "histogram2dcontour"
3330 | }
3331 | ],
3332 | "mesh3d": [
3333 | {
3334 | "colorbar": {
3335 | "outlinewidth": 0,
3336 | "ticks": ""
3337 | },
3338 | "type": "mesh3d"
3339 | }
3340 | ],
3341 | "parcoords": [
3342 | {
3343 | "line": {
3344 | "colorbar": {
3345 | "outlinewidth": 0,
3346 | "ticks": ""
3347 | }
3348 | },
3349 | "type": "parcoords"
3350 | }
3351 | ],
3352 | "pie": [
3353 | {
3354 | "automargin": true,
3355 | "type": "pie"
3356 | }
3357 | ],
3358 | "scatter": [
3359 | {
3360 | "marker": {
3361 | "colorbar": {
3362 | "outlinewidth": 0,
3363 | "ticks": ""
3364 | }
3365 | },
3366 | "type": "scatter"
3367 | }
3368 | ],
3369 | "scatter3d": [
3370 | {
3371 | "line": {
3372 | "colorbar": {
3373 | "outlinewidth": 0,
3374 | "ticks": ""
3375 | }
3376 | },
3377 | "marker": {
3378 | "colorbar": {
3379 | "outlinewidth": 0,
3380 | "ticks": ""
3381 | }
3382 | },
3383 | "type": "scatter3d"
3384 | }
3385 | ],
3386 | "scattercarpet": [
3387 | {
3388 | "marker": {
3389 | "colorbar": {
3390 | "outlinewidth": 0,
3391 | "ticks": ""
3392 | }
3393 | },
3394 | "type": "scattercarpet"
3395 | }
3396 | ],
3397 | "scattergeo": [
3398 | {
3399 | "marker": {
3400 | "colorbar": {
3401 | "outlinewidth": 0,
3402 | "ticks": ""
3403 | }
3404 | },
3405 | "type": "scattergeo"
3406 | }
3407 | ],
3408 | "scattergl": [
3409 | {
3410 | "marker": {
3411 | "colorbar": {
3412 | "outlinewidth": 0,
3413 | "ticks": ""
3414 | }
3415 | },
3416 | "type": "scattergl"
3417 | }
3418 | ],
3419 | "scattermapbox": [
3420 | {
3421 | "marker": {
3422 | "colorbar": {
3423 | "outlinewidth": 0,
3424 | "ticks": ""
3425 | }
3426 | },
3427 | "type": "scattermapbox"
3428 | }
3429 | ],
3430 | "scatterpolar": [
3431 | {
3432 | "marker": {
3433 | "colorbar": {
3434 | "outlinewidth": 0,
3435 | "ticks": ""
3436 | }
3437 | },
3438 | "type": "scatterpolar"
3439 | }
3440 | ],
3441 | "scatterpolargl": [
3442 | {
3443 | "marker": {
3444 | "colorbar": {
3445 | "outlinewidth": 0,
3446 | "ticks": ""
3447 | }
3448 | },
3449 | "type": "scatterpolargl"
3450 | }
3451 | ],
3452 | "scatterternary": [
3453 | {
3454 | "marker": {
3455 | "colorbar": {
3456 | "outlinewidth": 0,
3457 | "ticks": ""
3458 | }
3459 | },
3460 | "type": "scatterternary"
3461 | }
3462 | ],
3463 | "surface": [
3464 | {
3465 | "colorbar": {
3466 | "outlinewidth": 0,
3467 | "ticks": ""
3468 | },
3469 | "colorscale": [
3470 | [
3471 | 0,
3472 | "#0d0887"
3473 | ],
3474 | [
3475 | 0.1111111111111111,
3476 | "#46039f"
3477 | ],
3478 | [
3479 | 0.2222222222222222,
3480 | "#7201a8"
3481 | ],
3482 | [
3483 | 0.3333333333333333,
3484 | "#9c179e"
3485 | ],
3486 | [
3487 | 0.4444444444444444,
3488 | "#bd3786"
3489 | ],
3490 | [
3491 | 0.5555555555555556,
3492 | "#d8576b"
3493 | ],
3494 | [
3495 | 0.6666666666666666,
3496 | "#ed7953"
3497 | ],
3498 | [
3499 | 0.7777777777777778,
3500 | "#fb9f3a"
3501 | ],
3502 | [
3503 | 0.8888888888888888,
3504 | "#fdca26"
3505 | ],
3506 | [
3507 | 1,
3508 | "#f0f921"
3509 | ]
3510 | ],
3511 | "type": "surface"
3512 | }
3513 | ],
3514 | "table": [
3515 | {
3516 | "cells": {
3517 | "fill": {
3518 | "color": "#EBF0F8"
3519 | },
3520 | "line": {
3521 | "color": "white"
3522 | }
3523 | },
3524 | "header": {
3525 | "fill": {
3526 | "color": "#C8D4E3"
3527 | },
3528 | "line": {
3529 | "color": "white"
3530 | }
3531 | },
3532 | "type": "table"
3533 | }
3534 | ]
3535 | },
3536 | "layout": {
3537 | "annotationdefaults": {
3538 | "arrowcolor": "#2a3f5f",
3539 | "arrowhead": 0,
3540 | "arrowwidth": 1
3541 | },
3542 | "coloraxis": {
3543 | "colorbar": {
3544 | "outlinewidth": 0,
3545 | "ticks": ""
3546 | }
3547 | },
3548 | "colorscale": {
3549 | "diverging": [
3550 | [
3551 | 0,
3552 | "#8e0152"
3553 | ],
3554 | [
3555 | 0.1,
3556 | "#c51b7d"
3557 | ],
3558 | [
3559 | 0.2,
3560 | "#de77ae"
3561 | ],
3562 | [
3563 | 0.3,
3564 | "#f1b6da"
3565 | ],
3566 | [
3567 | 0.4,
3568 | "#fde0ef"
3569 | ],
3570 | [
3571 | 0.5,
3572 | "#f7f7f7"
3573 | ],
3574 | [
3575 | 0.6,
3576 | "#e6f5d0"
3577 | ],
3578 | [
3579 | 0.7,
3580 | "#b8e186"
3581 | ],
3582 | [
3583 | 0.8,
3584 | "#7fbc41"
3585 | ],
3586 | [
3587 | 0.9,
3588 | "#4d9221"
3589 | ],
3590 | [
3591 | 1,
3592 | "#276419"
3593 | ]
3594 | ],
3595 | "sequential": [
3596 | [
3597 | 0,
3598 | "#0d0887"
3599 | ],
3600 | [
3601 | 0.1111111111111111,
3602 | "#46039f"
3603 | ],
3604 | [
3605 | 0.2222222222222222,
3606 | "#7201a8"
3607 | ],
3608 | [
3609 | 0.3333333333333333,
3610 | "#9c179e"
3611 | ],
3612 | [
3613 | 0.4444444444444444,
3614 | "#bd3786"
3615 | ],
3616 | [
3617 | 0.5555555555555556,
3618 | "#d8576b"
3619 | ],
3620 | [
3621 | 0.6666666666666666,
3622 | "#ed7953"
3623 | ],
3624 | [
3625 | 0.7777777777777778,
3626 | "#fb9f3a"
3627 | ],
3628 | [
3629 | 0.8888888888888888,
3630 | "#fdca26"
3631 | ],
3632 | [
3633 | 1,
3634 | "#f0f921"
3635 | ]
3636 | ],
3637 | "sequentialminus": [
3638 | [
3639 | 0,
3640 | "#0d0887"
3641 | ],
3642 | [
3643 | 0.1111111111111111,
3644 | "#46039f"
3645 | ],
3646 | [
3647 | 0.2222222222222222,
3648 | "#7201a8"
3649 | ],
3650 | [
3651 | 0.3333333333333333,
3652 | "#9c179e"
3653 | ],
3654 | [
3655 | 0.4444444444444444,
3656 | "#bd3786"
3657 | ],
3658 | [
3659 | 0.5555555555555556,
3660 | "#d8576b"
3661 | ],
3662 | [
3663 | 0.6666666666666666,
3664 | "#ed7953"
3665 | ],
3666 | [
3667 | 0.7777777777777778,
3668 | "#fb9f3a"
3669 | ],
3670 | [
3671 | 0.8888888888888888,
3672 | "#fdca26"
3673 | ],
3674 | [
3675 | 1,
3676 | "#f0f921"
3677 | ]
3678 | ]
3679 | },
3680 | "colorway": [
3681 | "#636efa",
3682 | "#EF553B",
3683 | "#00cc96",
3684 | "#ab63fa",
3685 | "#FFA15A",
3686 | "#19d3f3",
3687 | "#FF6692",
3688 | "#B6E880",
3689 | "#FF97FF",
3690 | "#FECB52"
3691 | ],
3692 | "font": {
3693 | "color": "#2a3f5f"
3694 | },
3695 | "geo": {
3696 | "bgcolor": "white",
3697 | "lakecolor": "white",
3698 | "landcolor": "#E5ECF6",
3699 | "showlakes": true,
3700 | "showland": true,
3701 | "subunitcolor": "white"
3702 | },
3703 | "hoverlabel": {
3704 | "align": "left"
3705 | },
3706 | "hovermode": "closest",
3707 | "mapbox": {
3708 | "style": "light"
3709 | },
3710 | "paper_bgcolor": "white",
3711 | "plot_bgcolor": "#E5ECF6",
3712 | "polar": {
3713 | "angularaxis": {
3714 | "gridcolor": "white",
3715 | "linecolor": "white",
3716 | "ticks": ""
3717 | },
3718 | "bgcolor": "#E5ECF6",
3719 | "radialaxis": {
3720 | "gridcolor": "white",
3721 | "linecolor": "white",
3722 | "ticks": ""
3723 | }
3724 | },
3725 | "scene": {
3726 | "xaxis": {
3727 | "backgroundcolor": "#E5ECF6",
3728 | "gridcolor": "white",
3729 | "gridwidth": 2,
3730 | "linecolor": "white",
3731 | "showbackground": true,
3732 | "ticks": "",
3733 | "zerolinecolor": "white"
3734 | },
3735 | "yaxis": {
3736 | "backgroundcolor": "#E5ECF6",
3737 | "gridcolor": "white",
3738 | "gridwidth": 2,
3739 | "linecolor": "white",
3740 | "showbackground": true,
3741 | "ticks": "",
3742 | "zerolinecolor": "white"
3743 | },
3744 | "zaxis": {
3745 | "backgroundcolor": "#E5ECF6",
3746 | "gridcolor": "white",
3747 | "gridwidth": 2,
3748 | "linecolor": "white",
3749 | "showbackground": true,
3750 | "ticks": "",
3751 | "zerolinecolor": "white"
3752 | }
3753 | },
3754 | "shapedefaults": {
3755 | "line": {
3756 | "color": "#2a3f5f"
3757 | }
3758 | },
3759 | "ternary": {
3760 | "aaxis": {
3761 | "gridcolor": "white",
3762 | "linecolor": "white",
3763 | "ticks": ""
3764 | },
3765 | "baxis": {
3766 | "gridcolor": "white",
3767 | "linecolor": "white",
3768 | "ticks": ""
3769 | },
3770 | "bgcolor": "#E5ECF6",
3771 | "caxis": {
3772 | "gridcolor": "white",
3773 | "linecolor": "white",
3774 | "ticks": ""
3775 | }
3776 | },
3777 | "title": {
3778 | "x": 0.05
3779 | },
3780 | "xaxis": {
3781 | "automargin": true,
3782 | "gridcolor": "white",
3783 | "linecolor": "white",
3784 | "ticks": "",
3785 | "title": {
3786 | "standoff": 15
3787 | },
3788 | "zerolinecolor": "white",
3789 | "zerolinewidth": 2
3790 | },
3791 | "yaxis": {
3792 | "automargin": true,
3793 | "gridcolor": "white",
3794 | "linecolor": "white",
3795 | "ticks": "",
3796 | "title": {
3797 | "standoff": 15
3798 | },
3799 | "zerolinecolor": "white",
3800 | "zerolinewidth": 2
3801 | }
3802 | }
3803 | },
3804 | "title": {
3805 | "text": "Early Hour Transactions"
3806 | },
3807 | "xaxis": {
3808 | "anchor": "y",
3809 | "autorange": true,
3810 | "domain": [
3811 | 0,
3812 | 1
3813 | ],
3814 | "range": [
3815 | -0.24975222993062435,
3816 | 4.249752229930625
3817 | ],
3818 | "title": {
3819 | "text": "name"
3820 | },
3821 | "type": "category"
3822 | },
3823 | "yaxis": {
3824 | "anchor": "x",
3825 | "autorange": true,
3826 | "domain": [
3827 | 0,
3828 | 1
3829 | ],
3830 | "range": [
3831 | -31.407643312101925,
3832 | 2025.407643312102
3833 | ],
3834 | "title": {
3835 | "text": "amount"
3836 | },
3837 | "type": "linear"
3838 | }
3839 | }
3840 | },
3841 | "text/html": [
3842 | "\n",
3843 | " \n",
3844 | " \n",
3845 | "
\n",
3846 | " \n",
3884 | "
"
3885 | ]
3886 | },
3887 | "metadata": {},
3888 | "output_type": "display_data"
3889 | }
3890 | ],
3891 | "source": [
3892 | "px.scatter(anomalous_transactions2, x='name', y='amount', color='category', title=\"Early Hour Transactions\")"
3893 | ]
3894 | },
3895 | {
3896 | "cell_type": "markdown",
3897 | "metadata": {},
3898 | "source": [
3899 | "For Outlier calculation using standard deviation, results in 77 records whereas using Interquartile range results in 110 records. There seems to be fraudulent transactions in Bar category wherein amount spent between 7-9 AM in the Bar"
3900 | ]
3901 | }
3902 | ],
3903 | "metadata": {
3904 | "file_extension": ".py",
3905 | "kernelspec": {
3906 | "display_name": "Python 3",
3907 | "language": "python",
3908 | "name": "python3"
3909 | },
3910 | "language_info": {
3911 | "codemirror_mode": {
3912 | "name": "ipython",
3913 | "version": 3
3914 | },
3915 | "file_extension": ".py",
3916 | "mimetype": "text/x-python",
3917 | "name": "python",
3918 | "nbconvert_exporter": "python",
3919 | "pygments_lexer": "ipython3",
3920 | "version": "3.7.7"
3921 | },
3922 | "mimetype": "text/x-python",
3923 | "name": "python",
3924 | "npconvert_exporter": "python",
3925 | "pygments_lexer": "ipython3",
3926 | "version": 3
3927 | },
3928 | "nbformat": 4,
3929 | "nbformat_minor": 4
3930 | }
3931 |
--------------------------------------------------------------------------------