├── Misc
└── create_data.py
├── README.md
└── SalesAnalysis
├── Output
└── all_data.csv
├── Sales_Data
├── Sales_April_2019.csv
├── Sales_August_2019.csv
├── Sales_December_2019.csv
├── Sales_February_2019.csv
├── Sales_January_2019.csv
├── Sales_July_2019.csv
├── Sales_June_2019.csv
├── Sales_March_2019.csv
├── Sales_May_2019.csv
├── Sales_November_2019.csv
├── Sales_October_2019.csv
└── Sales_September_2019.csv
├── Sales_Data_Analysis.ipynb
├── Sales_Data_Analysis.png
└── all_data.csv
/Misc/create_data.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import calendar
3 | import random
4 | import numpy
5 | import pandas as pd
6 | import uuid
7 |
8 | products = {
9 | 'iPhone': [700, 10],
10 | 'Google Phone': [600, 8],
11 | 'Vareebadd Phone': [400, 3],
12 | '20in Monitor': [109.99,6],
13 | '34in Ultrawide Monitor': [379.99, 9],
14 | '27in 4K Gaming Monitor': [389.99,9],
15 | '27in FHD Monitor': [149.99, 11],
16 | 'Flatscreen TV': [300, 7],
17 | 'Macbook Pro Laptop': [1700, 7],
18 | 'ThinkPad Laptop': [999.99, 6],
19 | 'AA Batteries (4-pack)': [3.84, 30],
20 | 'AAA Batteries (4-pack)': [2.99, 30],
21 | 'USB-C Charging Cable': [11.95, 30],
22 | 'Lightning Charging Cable': [14.95, 30],
23 | 'Wired Headphones': [11.99, 26],
24 | 'Bose SoundSport Headphones': [99.99, 19],
25 | 'Apple Airpods Headphones': [150, 22],
26 | 'LG Washing Machine': [600.00, 1],
27 | 'LG Dryer': [600.00, 1]
28 | }
29 |
30 | columns = ['Order ID', 'Product', 'Quantity Ordered', 'Price Each', 'Order Date', 'Purchase Address']
31 |
32 | def generate_random_time(month):
33 | day = generate_random_day(month)
34 | if random.random() < 0.5:
35 | date = datetime.datetime(2019, month, day,12,00)
36 | else:
37 | date = datetime.datetime(2019, month, day,20,00)
38 | time_offset = numpy.random.normal(loc=0.0, scale=180)
39 | final_date = date + datetime.timedelta(minutes=time_offset)
40 | return final_date.strftime("%m/%d/%y %H:%M")
41 |
42 | def generate_random_day(month):
43 | day_range = calendar.monthrange(2019,month)[1]
44 | return random.randint(1,day_range)
45 |
46 | def generate_random_address():
47 | street_names = ['Main', '2nd', '1st', '4th', '5th', 'Park', '6th', '7th', 'Maple', 'Pine', 'Washington', '8th', 'Cedar', 'Elm', 'Walnut', '9th', '10th', 'Lake', 'Sunset', 'Lincoln', 'Jackson', 'Church', 'River', '11th', 'Willow', 'Jefferson', 'Center', '12th', 'North', 'Lakeview', 'Ridge', 'Hickory', 'Adams', 'Cherry', 'Highland', 'Johnson', 'South', 'Dogwood', 'West', 'Chestnut', '13th', 'Spruce', '14th', 'Wilson', 'Meadow', 'Forest', 'Hill', 'Madison']
48 | cities = ['San Francisco', 'Boston', 'New York City', 'Austin', 'Dallas', 'Atlanta', 'Portland', 'Portland', 'Los Angeles', 'Seattle']
49 | weights = [9,4,5,2,3,3,2,0.5,6,3]
50 | zips = ['94016', '02215', '10001', '73301', '75001', '30301', '97035', '04101', '90001', '98101']
51 | state = ['CA', 'MA', 'NY', 'TX', 'TX', 'GA', 'OR', 'ME', 'CA', 'WA']
52 |
53 | street = random.choice(street_names)
54 | index = random.choices(range(len(cities)), weights=weights)[0]
55 |
56 | return f"{random.randint(1,999)} {street} St, {cities[index]}, {state[index]} {zips[index]}"
57 |
58 | def create_data_csv():
59 | pass
60 |
61 | def write_row(order_number, product, order_date, address):
62 | product_price = products[product][0]
63 | quantity = numpy.random.geometric(p=1.0-(1.0/product_price), size=1)[0]
64 | output = [order_number, product, quantity, product_price, order_date, address]
65 | return output
66 |
67 | if __name__ == '__main__':
68 | order_number = 141234
69 | for month in range(1,13):
70 | if month <= 10:
71 | orders_amount = int(numpy.random.normal(loc=12000, scale=4000))
72 | elif month == 11:
73 | orders_amount = int(numpy.random.normal(loc=20000, scale=3000))
74 | else: # month == 12
75 | orders_amount = int(numpy.random.normal(loc=26000, scale=3000))
76 |
77 | product_list = [product for product in products]
78 | weights = [products[product][1] for product in products]
79 |
80 | df = pd.DataFrame(columns=columns)
81 | print(orders_amount)
82 |
83 | i = 0
84 | while orders_amount > 0:
85 |
86 | address = generate_random_address()
87 | order_date = generate_random_time(month)
88 |
89 | product_choice = random.choices(product_list, weights)[0]
90 | df.loc[i] = write_row(order_number, product_choice, order_date, address)
91 | i += 1
92 |
93 | # Add some items to orders with random chance
94 | if product_choice == 'iPhone':
95 | if random.random() < 0.15:
96 | df.loc[i] = write_row(order_number, "Lightning Charging Cable", order_date, address)
97 | i += 1
98 | if random.random() < 0.05:
99 | df.loc[i] = write_row(order_number, "Apple Airpods Headphones", order_date, address)
100 | i += 1
101 |
102 | if random.random() < 0.07:
103 | df.loc[i] = write_row(order_number, "Wired Headphones", order_date, address)
104 | i += 1
105 |
106 | elif product_choice == "Google Phone" or product_choice == "Vareebadd Phone":
107 | if random.random() < 0.18:
108 | df.loc[i] = write_row(order_number, "USB-C Charging Cable", order_date, address)
109 | i += 1
110 | if random.random() < 0.04:
111 | df.loc[i] = write_row(order_number, "Bose SoundSport Headphones", order_date, address)
112 | i += 1
113 | if random.random() < 0.07:
114 | df.loc[i] = write_row(order_number, "Wired Headphones", order_date, address)
115 | i += 1
116 |
117 | if random.random() <= 0.02:
118 | product_choice = random.choices(product_list, weights)[0]
119 | df.loc[i] = write_row(order_number, product_choice, order_date, address)
120 | i += 1
121 |
122 | if random.random() <= 0.002:
123 | df.loc[i] = columns
124 | i += 1
125 |
126 | if random.random() <= 0.003:
127 | df.loc[i] = ["","","","","",""]
128 | i += 1
129 |
130 | order_number += 1
131 | orders_amount -= 1
132 |
133 | month_name = calendar.month_name[month]
134 | df.to_csv(f"Sales_{month_name}_2019.csv", index=False)
135 | print(f"{month_name} Complete")
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Pandas-Data-Science-Tasks
2 | ## Setup
3 |
4 | To access all of the files I recommend clone this repo it locally.
5 |
6 | The other option is to click the green "clone or download" button and then click "Download ZIP". You then should extract all of the files to the location you want to edit your code.
7 |
8 | Installing Jupyter Notebook: https://jupyter.readthedocs.io/en/latest/install.html
9 | Installing Pandas library: https://pandas.pydata.org/pandas-docs/stable/install.html
10 |
11 | ## Background Information:
12 | Python Pandas & Python Matplotlib to analyze and answer business questions about 12 months worth of sales data. The data contains hundreds of thousands of electronics store purchases broken down by month, product type, cost, purchase address, etc.
13 |
14 | We start by cleaning our data. Tasks during this section include:
15 | - Drop NaN values from DataFrame
16 | - Removing rows based on a condition
17 | - Change the type of columns (to_numeric, to_datetime, astype)
18 |
19 | Once we have cleaned up our data a bit, we move the data exploration section. In this section we explore 5 high level business questions related to our data:
20 | - What was the best month for sales? How much was earned that month?
21 | - What city sold the most product?
22 | - What time should we display advertisemens to maximize the likelihood of customer’s buying product?
23 | - What products are most often sold together?
24 | - What product sold the most? Why do you think it sold the most?
25 |
26 | To answer these questions we walk through many different pandas & matplotlib methods. They include:
27 | - Concatenating multiple csvs together to create a new DataFrame (pd.concat)
28 | - Adding columns
29 | - Parsing cells as strings to make new columns (.str)
30 | - Using the .apply() method
31 | - Using groupby to perform aggregate analysis
32 | - Plotting bar charts and lines graphs to visualize our results
33 | - Labeling our graphs
34 |
35 | ## Process:
36 | 
37 |
--------------------------------------------------------------------------------
/SalesAnalysis/Sales_Data_Analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import os"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "#### Merging 12 months of sales data into a single file"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 2,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "data": {
27 | "text/html": [
28 | "
\n",
29 | "\n",
42 | "
\n",
43 | " \n",
44 | " \n",
45 | " | \n",
46 | " Order ID | \n",
47 | " Product | \n",
48 | " Quantity Ordered | \n",
49 | " Price Each | \n",
50 | " Order Date | \n",
51 | " Purchase Address | \n",
52 | "
\n",
53 | " \n",
54 | " \n",
55 | " \n",
56 | " 0 | \n",
57 | " 176558 | \n",
58 | " USB-C Charging Cable | \n",
59 | " 2 | \n",
60 | " 11.95 | \n",
61 | " 04/19/19 08:46 | \n",
62 | " 917 1st St, Dallas, TX 75001 | \n",
63 | "
\n",
64 | " \n",
65 | " 1 | \n",
66 | " NaN | \n",
67 | " NaN | \n",
68 | " NaN | \n",
69 | " NaN | \n",
70 | " NaN | \n",
71 | " NaN | \n",
72 | "
\n",
73 | " \n",
74 | " 2 | \n",
75 | " 176559 | \n",
76 | " Bose SoundSport Headphones | \n",
77 | " 1 | \n",
78 | " 99.99 | \n",
79 | " 04/07/19 22:30 | \n",
80 | " 682 Chestnut St, Boston, MA 02215 | \n",
81 | "
\n",
82 | " \n",
83 | " 3 | \n",
84 | " 176560 | \n",
85 | " Google Phone | \n",
86 | " 1 | \n",
87 | " 600 | \n",
88 | " 04/12/19 14:38 | \n",
89 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
90 | "
\n",
91 | " \n",
92 | " 4 | \n",
93 | " 176560 | \n",
94 | " Wired Headphones | \n",
95 | " 1 | \n",
96 | " 11.99 | \n",
97 | " 04/12/19 14:38 | \n",
98 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
99 | "
\n",
100 | " \n",
101 | "
\n",
102 | "
"
103 | ],
104 | "text/plain": [
105 | " Order ID Product Quantity Ordered Price Each \\\n",
106 | "0 176558 USB-C Charging Cable 2 11.95 \n",
107 | "1 NaN NaN NaN NaN \n",
108 | "2 176559 Bose SoundSport Headphones 1 99.99 \n",
109 | "3 176560 Google Phone 1 600 \n",
110 | "4 176560 Wired Headphones 1 11.99 \n",
111 | "\n",
112 | " Order Date Purchase Address \n",
113 | "0 04/19/19 08:46 917 1st St, Dallas, TX 75001 \n",
114 | "1 NaN NaN \n",
115 | "2 04/07/19 22:30 682 Chestnut St, Boston, MA 02215 \n",
116 | "3 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001 \n",
117 | "4 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001 "
118 | ]
119 | },
120 | "execution_count": 2,
121 | "metadata": {},
122 | "output_type": "execute_result"
123 | }
124 | ],
125 | "source": [
126 | "files = [file for file in os.listdir('./Sales_Data') ]\n",
127 | "\n",
128 | "\n",
129 | "all_months_data = pd.DataFrame()\n",
130 | "\n",
131 | "for file in files:\n",
132 | " df = pd.read_csv(\"./Sales_Data/\"+file)\n",
133 | " all_months_data = pd.concat([all_months_data, df])\n",
134 | " \n",
135 | "all_months_data.head()"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": 3,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "#Saving single file output \n",
145 | "all_months_data.to_csv(\"all_data.csv\", index=False)"
146 | ]
147 | },
148 | {
149 | "cell_type": "markdown",
150 | "metadata": {},
151 | "source": [
152 | "#### Read in updated dataframe"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 4,
158 | "metadata": {},
159 | "outputs": [
160 | {
161 | "data": {
162 | "text/html": [
163 | "\n",
164 | "\n",
177 | "
\n",
178 | " \n",
179 | " \n",
180 | " | \n",
181 | " Order ID | \n",
182 | " Product | \n",
183 | " Quantity Ordered | \n",
184 | " Price Each | \n",
185 | " Order Date | \n",
186 | " Purchase Address | \n",
187 | "
\n",
188 | " \n",
189 | " \n",
190 | " \n",
191 | " 0 | \n",
192 | " 176558 | \n",
193 | " USB-C Charging Cable | \n",
194 | " 2 | \n",
195 | " 11.95 | \n",
196 | " 04/19/19 08:46 | \n",
197 | " 917 1st St, Dallas, TX 75001 | \n",
198 | "
\n",
199 | " \n",
200 | " 1 | \n",
201 | " NaN | \n",
202 | " NaN | \n",
203 | " NaN | \n",
204 | " NaN | \n",
205 | " NaN | \n",
206 | " NaN | \n",
207 | "
\n",
208 | " \n",
209 | " 2 | \n",
210 | " 176559 | \n",
211 | " Bose SoundSport Headphones | \n",
212 | " 1 | \n",
213 | " 99.99 | \n",
214 | " 04/07/19 22:30 | \n",
215 | " 682 Chestnut St, Boston, MA 02215 | \n",
216 | "
\n",
217 | " \n",
218 | " 3 | \n",
219 | " 176560 | \n",
220 | " Google Phone | \n",
221 | " 1 | \n",
222 | " 600 | \n",
223 | " 04/12/19 14:38 | \n",
224 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
225 | "
\n",
226 | " \n",
227 | " 4 | \n",
228 | " 176560 | \n",
229 | " Wired Headphones | \n",
230 | " 1 | \n",
231 | " 11.99 | \n",
232 | " 04/12/19 14:38 | \n",
233 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
234 | "
\n",
235 | " \n",
236 | "
\n",
237 | "
"
238 | ],
239 | "text/plain": [
240 | " Order ID Product Quantity Ordered Price Each \\\n",
241 | "0 176558 USB-C Charging Cable 2 11.95 \n",
242 | "1 NaN NaN NaN NaN \n",
243 | "2 176559 Bose SoundSport Headphones 1 99.99 \n",
244 | "3 176560 Google Phone 1 600 \n",
245 | "4 176560 Wired Headphones 1 11.99 \n",
246 | "\n",
247 | " Order Date Purchase Address \n",
248 | "0 04/19/19 08:46 917 1st St, Dallas, TX 75001 \n",
249 | "1 NaN NaN \n",
250 | "2 04/07/19 22:30 682 Chestnut St, Boston, MA 02215 \n",
251 | "3 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001 \n",
252 | "4 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001 "
253 | ]
254 | },
255 | "execution_count": 4,
256 | "metadata": {},
257 | "output_type": "execute_result"
258 | }
259 | ],
260 | "source": [
261 | "all_data = pd.read_csv(\"all_data.csv\")\n",
262 | "all_data.head()"
263 | ]
264 | },
265 | {
266 | "cell_type": "markdown",
267 | "metadata": {},
268 | "source": [
269 | "#### Clean up the data "
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 5,
275 | "metadata": {},
276 | "outputs": [],
277 | "source": [
278 | "# Check rows of NAN\n",
279 | "nan_df = all_data[all_data.isna().any(axis=1)]\n",
280 | "nan_df.head()\n",
281 | "# Drop rows of NAN \n",
282 | "all_data = all_data.dropna(how='all')"
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": 6,
288 | "metadata": {},
289 | "outputs": [],
290 | "source": [
291 | "# Find 'Or' and delete and update all_data df\n",
292 | "all_data = all_data[all_data['Order Date'].str[0:2] != 'Or']"
293 | ]
294 | },
295 | {
296 | "cell_type": "code",
297 | "execution_count": 7,
298 | "metadata": {},
299 | "outputs": [
300 | {
301 | "data": {
302 | "text/html": [
303 | "\n",
304 | "\n",
317 | "
\n",
318 | " \n",
319 | " \n",
320 | " | \n",
321 | " Order ID | \n",
322 | " Product | \n",
323 | " Quantity Ordered | \n",
324 | " Price Each | \n",
325 | " Order Date | \n",
326 | " Purchase Address | \n",
327 | "
\n",
328 | " \n",
329 | " \n",
330 | " \n",
331 | " 0 | \n",
332 | " 176558 | \n",
333 | " USB-C Charging Cable | \n",
334 | " 2 | \n",
335 | " 11.95 | \n",
336 | " 04/19/19 08:46 | \n",
337 | " 917 1st St, Dallas, TX 75001 | \n",
338 | "
\n",
339 | " \n",
340 | " 2 | \n",
341 | " 176559 | \n",
342 | " Bose SoundSport Headphones | \n",
343 | " 1 | \n",
344 | " 99.99 | \n",
345 | " 04/07/19 22:30 | \n",
346 | " 682 Chestnut St, Boston, MA 02215 | \n",
347 | "
\n",
348 | " \n",
349 | " 3 | \n",
350 | " 176560 | \n",
351 | " Google Phone | \n",
352 | " 1 | \n",
353 | " 600.00 | \n",
354 | " 04/12/19 14:38 | \n",
355 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
356 | "
\n",
357 | " \n",
358 | " 4 | \n",
359 | " 176560 | \n",
360 | " Wired Headphones | \n",
361 | " 1 | \n",
362 | " 11.99 | \n",
363 | " 04/12/19 14:38 | \n",
364 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
365 | "
\n",
366 | " \n",
367 | " 5 | \n",
368 | " 176561 | \n",
369 | " Wired Headphones | \n",
370 | " 1 | \n",
371 | " 11.99 | \n",
372 | " 04/30/19 09:27 | \n",
373 | " 333 8th St, Los Angeles, CA 90001 | \n",
374 | "
\n",
375 | " \n",
376 | "
\n",
377 | "
"
378 | ],
379 | "text/plain": [
380 | " Order ID Product Quantity Ordered Price Each \\\n",
381 | "0 176558 USB-C Charging Cable 2 11.95 \n",
382 | "2 176559 Bose SoundSport Headphones 1 99.99 \n",
383 | "3 176560 Google Phone 1 600.00 \n",
384 | "4 176560 Wired Headphones 1 11.99 \n",
385 | "5 176561 Wired Headphones 1 11.99 \n",
386 | "\n",
387 | " Order Date Purchase Address \n",
388 | "0 04/19/19 08:46 917 1st St, Dallas, TX 75001 \n",
389 | "2 04/07/19 22:30 682 Chestnut St, Boston, MA 02215 \n",
390 | "3 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001 \n",
391 | "4 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001 \n",
392 | "5 04/30/19 09:27 333 8th St, Los Angeles, CA 90001 "
393 | ]
394 | },
395 | "execution_count": 7,
396 | "metadata": {},
397 | "output_type": "execute_result"
398 | }
399 | ],
400 | "source": [
401 | "#Convert columns to the correct type\n",
402 | "\n",
403 | "#to int\n",
404 | "all_data['Quantity Ordered'] = pd.to_numeric(all_data['Quantity Ordered']) \n",
405 | "#to float\n",
406 | "all_data['Price Each'] = pd.to_numeric(all_data['Price Each']) \n",
407 | "\n",
408 | "all_data.head()"
409 | ]
410 | },
411 | {
412 | "cell_type": "markdown",
413 | "metadata": {},
414 | "source": [
415 | "#### Augment data with additional columns"
416 | ]
417 | },
418 | {
419 | "cell_type": "markdown",
420 | "metadata": {},
421 | "source": [
422 | "#### 2: Add Month Column"
423 | ]
424 | },
425 | {
426 | "cell_type": "code",
427 | "execution_count": 8,
428 | "metadata": {},
429 | "outputs": [
430 | {
431 | "data": {
432 | "text/html": [
433 | "\n",
434 | "\n",
447 | "
\n",
448 | " \n",
449 | " \n",
450 | " | \n",
451 | " Order ID | \n",
452 | " Product | \n",
453 | " Quantity Ordered | \n",
454 | " Price Each | \n",
455 | " Order Date | \n",
456 | " Purchase Address | \n",
457 | " Month | \n",
458 | "
\n",
459 | " \n",
460 | " \n",
461 | " \n",
462 | " 0 | \n",
463 | " 176558 | \n",
464 | " USB-C Charging Cable | \n",
465 | " 2 | \n",
466 | " 11.95 | \n",
467 | " 04/19/19 08:46 | \n",
468 | " 917 1st St, Dallas, TX 75001 | \n",
469 | " 4 | \n",
470 | "
\n",
471 | " \n",
472 | " 2 | \n",
473 | " 176559 | \n",
474 | " Bose SoundSport Headphones | \n",
475 | " 1 | \n",
476 | " 99.99 | \n",
477 | " 04/07/19 22:30 | \n",
478 | " 682 Chestnut St, Boston, MA 02215 | \n",
479 | " 4 | \n",
480 | "
\n",
481 | " \n",
482 | " 3 | \n",
483 | " 176560 | \n",
484 | " Google Phone | \n",
485 | " 1 | \n",
486 | " 600.00 | \n",
487 | " 04/12/19 14:38 | \n",
488 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
489 | " 4 | \n",
490 | "
\n",
491 | " \n",
492 | " 4 | \n",
493 | " 176560 | \n",
494 | " Wired Headphones | \n",
495 | " 1 | \n",
496 | " 11.99 | \n",
497 | " 04/12/19 14:38 | \n",
498 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
499 | " 4 | \n",
500 | "
\n",
501 | " \n",
502 | " 5 | \n",
503 | " 176561 | \n",
504 | " Wired Headphones | \n",
505 | " 1 | \n",
506 | " 11.99 | \n",
507 | " 04/30/19 09:27 | \n",
508 | " 333 8th St, Los Angeles, CA 90001 | \n",
509 | " 4 | \n",
510 | "
\n",
511 | " \n",
512 | "
\n",
513 | "
"
514 | ],
515 | "text/plain": [
516 | " Order ID Product Quantity Ordered Price Each \\\n",
517 | "0 176558 USB-C Charging Cable 2 11.95 \n",
518 | "2 176559 Bose SoundSport Headphones 1 99.99 \n",
519 | "3 176560 Google Phone 1 600.00 \n",
520 | "4 176560 Wired Headphones 1 11.99 \n",
521 | "5 176561 Wired Headphones 1 11.99 \n",
522 | "\n",
523 | " Order Date Purchase Address Month \n",
524 | "0 04/19/19 08:46 917 1st St, Dallas, TX 75001 4 \n",
525 | "2 04/07/19 22:30 682 Chestnut St, Boston, MA 02215 4 \n",
526 | "3 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001 4 \n",
527 | "4 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001 4 \n",
528 | "5 04/30/19 09:27 333 8th St, Los Angeles, CA 90001 4 "
529 | ]
530 | },
531 | "execution_count": 8,
532 | "metadata": {},
533 | "output_type": "execute_result"
534 | }
535 | ],
536 | "source": [
537 | "# Transforming \"order date\" column \n",
538 | "all_data['Month'] = all_data['Order Date'].str[0:2]\n",
539 | "all_data['Month'] = all_data['Month'].astype('int32')\n",
540 | "all_data.head()"
541 | ]
542 | },
543 | {
544 | "cell_type": "markdown",
545 | "metadata": {},
546 | "source": [
547 | "#### 3: Add a sales column "
548 | ]
549 | },
550 | {
551 | "cell_type": "code",
552 | "execution_count": 9,
553 | "metadata": {},
554 | "outputs": [
555 | {
556 | "data": {
557 | "text/html": [
558 | "\n",
559 | "\n",
572 | "
\n",
573 | " \n",
574 | " \n",
575 | " | \n",
576 | " Order ID | \n",
577 | " Product | \n",
578 | " Quantity Ordered | \n",
579 | " Price Each | \n",
580 | " Order Date | \n",
581 | " Purchase Address | \n",
582 | " Month | \n",
583 | " Sales | \n",
584 | "
\n",
585 | " \n",
586 | " \n",
587 | " \n",
588 | " 0 | \n",
589 | " 176558 | \n",
590 | " USB-C Charging Cable | \n",
591 | " 2 | \n",
592 | " 11.95 | \n",
593 | " 04/19/19 08:46 | \n",
594 | " 917 1st St, Dallas, TX 75001 | \n",
595 | " 4 | \n",
596 | " 23.90 | \n",
597 | "
\n",
598 | " \n",
599 | " 2 | \n",
600 | " 176559 | \n",
601 | " Bose SoundSport Headphones | \n",
602 | " 1 | \n",
603 | " 99.99 | \n",
604 | " 04/07/19 22:30 | \n",
605 | " 682 Chestnut St, Boston, MA 02215 | \n",
606 | " 4 | \n",
607 | " 99.99 | \n",
608 | "
\n",
609 | " \n",
610 | " 3 | \n",
611 | " 176560 | \n",
612 | " Google Phone | \n",
613 | " 1 | \n",
614 | " 600.00 | \n",
615 | " 04/12/19 14:38 | \n",
616 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
617 | " 4 | \n",
618 | " 600.00 | \n",
619 | "
\n",
620 | " \n",
621 | " 4 | \n",
622 | " 176560 | \n",
623 | " Wired Headphones | \n",
624 | " 1 | \n",
625 | " 11.99 | \n",
626 | " 04/12/19 14:38 | \n",
627 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
628 | " 4 | \n",
629 | " 11.99 | \n",
630 | "
\n",
631 | " \n",
632 | " 5 | \n",
633 | " 176561 | \n",
634 | " Wired Headphones | \n",
635 | " 1 | \n",
636 | " 11.99 | \n",
637 | " 04/30/19 09:27 | \n",
638 | " 333 8th St, Los Angeles, CA 90001 | \n",
639 | " 4 | \n",
640 | " 11.99 | \n",
641 | "
\n",
642 | " \n",
643 | "
\n",
644 | "
"
645 | ],
646 | "text/plain": [
647 | " Order ID Product Quantity Ordered Price Each \\\n",
648 | "0 176558 USB-C Charging Cable 2 11.95 \n",
649 | "2 176559 Bose SoundSport Headphones 1 99.99 \n",
650 | "3 176560 Google Phone 1 600.00 \n",
651 | "4 176560 Wired Headphones 1 11.99 \n",
652 | "5 176561 Wired Headphones 1 11.99 \n",
653 | "\n",
654 | " Order Date Purchase Address Month Sales \n",
655 | "0 04/19/19 08:46 917 1st St, Dallas, TX 75001 4 23.90 \n",
656 | "2 04/07/19 22:30 682 Chestnut St, Boston, MA 02215 4 99.99 \n",
657 | "3 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001 4 600.00 \n",
658 | "4 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001 4 11.99 \n",
659 | "5 04/30/19 09:27 333 8th St, Los Angeles, CA 90001 4 11.99 "
660 | ]
661 | },
662 | "execution_count": 9,
663 | "metadata": {},
664 | "output_type": "execute_result"
665 | }
666 | ],
667 | "source": [
668 | "all_data['Sales'] = all_data['Quantity Ordered'] * all_data['Price Each']\n",
669 | "all_data.head()"
670 | ]
671 | },
672 | {
673 | "cell_type": "markdown",
674 | "metadata": {},
675 | "source": [
676 | "#### 4: Add a city column"
677 | ]
678 | },
679 | {
680 | "cell_type": "code",
681 | "execution_count": 10,
682 | "metadata": {},
683 | "outputs": [
684 | {
685 | "data": {
686 | "text/html": [
687 | "\n",
688 | "\n",
701 | "
\n",
702 | " \n",
703 | " \n",
704 | " | \n",
705 | " Order ID | \n",
706 | " Product | \n",
707 | " Quantity Ordered | \n",
708 | " Price Each | \n",
709 | " Order Date | \n",
710 | " Purchase Address | \n",
711 | " Month | \n",
712 | " Sales | \n",
713 | " City | \n",
714 | "
\n",
715 | " \n",
716 | " \n",
717 | " \n",
718 | " 0 | \n",
719 | " 176558 | \n",
720 | " USB-C Charging Cable | \n",
721 | " 2 | \n",
722 | " 11.95 | \n",
723 | " 04/19/19 08:46 | \n",
724 | " 917 1st St, Dallas, TX 75001 | \n",
725 | " 4 | \n",
726 | " 23.90 | \n",
727 | " Dallas TX | \n",
728 | "
\n",
729 | " \n",
730 | " 2 | \n",
731 | " 176559 | \n",
732 | " Bose SoundSport Headphones | \n",
733 | " 1 | \n",
734 | " 99.99 | \n",
735 | " 04/07/19 22:30 | \n",
736 | " 682 Chestnut St, Boston, MA 02215 | \n",
737 | " 4 | \n",
738 | " 99.99 | \n",
739 | " Boston MA | \n",
740 | "
\n",
741 | " \n",
742 | " 3 | \n",
743 | " 176560 | \n",
744 | " Google Phone | \n",
745 | " 1 | \n",
746 | " 600.00 | \n",
747 | " 04/12/19 14:38 | \n",
748 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
749 | " 4 | \n",
750 | " 600.00 | \n",
751 | " Los Angeles CA | \n",
752 | "
\n",
753 | " \n",
754 | " 4 | \n",
755 | " 176560 | \n",
756 | " Wired Headphones | \n",
757 | " 1 | \n",
758 | " 11.99 | \n",
759 | " 04/12/19 14:38 | \n",
760 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
761 | " 4 | \n",
762 | " 11.99 | \n",
763 | " Los Angeles CA | \n",
764 | "
\n",
765 | " \n",
766 | " 5 | \n",
767 | " 176561 | \n",
768 | " Wired Headphones | \n",
769 | " 1 | \n",
770 | " 11.99 | \n",
771 | " 04/30/19 09:27 | \n",
772 | " 333 8th St, Los Angeles, CA 90001 | \n",
773 | " 4 | \n",
774 | " 11.99 | \n",
775 | " Los Angeles CA | \n",
776 | "
\n",
777 | " \n",
778 | "
\n",
779 | "
"
780 | ],
781 | "text/plain": [
782 | " Order ID Product Quantity Ordered Price Each \\\n",
783 | "0 176558 USB-C Charging Cable 2 11.95 \n",
784 | "2 176559 Bose SoundSport Headphones 1 99.99 \n",
785 | "3 176560 Google Phone 1 600.00 \n",
786 | "4 176560 Wired Headphones 1 11.99 \n",
787 | "5 176561 Wired Headphones 1 11.99 \n",
788 | "\n",
789 | " Order Date Purchase Address Month Sales \\\n",
790 | "0 04/19/19 08:46 917 1st St, Dallas, TX 75001 4 23.90 \n",
791 | "2 04/07/19 22:30 682 Chestnut St, Boston, MA 02215 4 99.99 \n",
792 | "3 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001 4 600.00 \n",
793 | "4 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001 4 11.99 \n",
794 | "5 04/30/19 09:27 333 8th St, Los Angeles, CA 90001 4 11.99 \n",
795 | "\n",
796 | " City \n",
797 | "0 Dallas TX \n",
798 | "2 Boston MA \n",
799 | "3 Los Angeles CA \n",
800 | "4 Los Angeles CA \n",
801 | "5 Los Angeles CA "
802 | ]
803 | },
804 | "execution_count": 10,
805 | "metadata": {},
806 | "output_type": "execute_result"
807 | }
808 | ],
809 | "source": [
810 | "# # Methond 1: Let's use .apply() method \n",
811 | "\n",
812 | "# all_data['Column'] = all_data['Purchase Address'].apply(lambda x: x.split(',')[1])\n",
813 | "# all_data.head()\n",
814 | "\n",
815 | "# Methon 2: Function tips with same line above \n",
816 | "\n",
817 | "def get_city(address):\n",
818 | " return address.split(',')[1]\n",
819 | " \n",
820 | "def get_state(address):\n",
821 | " return address.split(',')[2].split(' ')[1]\n",
822 | " \n",
823 | "all_data['City'] = all_data['Purchase Address'].apply(lambda x: get_city(x) + ' ' + get_state(x))\n",
824 | " #apply(lambda x: f\"{get_city(x)} ({get_state(x)})\")\n",
825 | "all_data.head()\n"
826 | ]
827 | },
828 | {
829 | "cell_type": "markdown",
830 | "metadata": {},
831 | "source": [
832 | "#### What was the best month for sales? how much was earned that month?"
833 | ]
834 | },
835 | {
836 | "cell_type": "code",
837 | "execution_count": 11,
838 | "metadata": {},
839 | "outputs": [],
840 | "source": [
841 | "results = all_data.groupby('Month').sum()"
842 | ]
843 | },
844 | {
845 | "cell_type": "code",
846 | "execution_count": 12,
847 | "metadata": {},
848 | "outputs": [
849 | {
850 | "data": {
851 | "text/plain": [
852 | "Month\n",
853 | "1 1.822257e+06\n",
854 | "2 2.202022e+06\n",
855 | "3 2.807100e+06\n",
856 | "4 3.390670e+06\n",
857 | "5 3.152607e+06\n",
858 | "6 2.577802e+06\n",
859 | "7 2.647776e+06\n",
860 | "8 2.244468e+06\n",
861 | "9 2.097560e+06\n",
862 | "10 3.736727e+06\n",
863 | "11 3.199603e+06\n",
864 | "12 4.613443e+06\n",
865 | "Name: Sales, dtype: float64"
866 | ]
867 | },
868 | "execution_count": 12,
869 | "metadata": {},
870 | "output_type": "execute_result"
871 | }
872 | ],
873 | "source": [
874 | "#Sales only \n",
875 | "all_data.groupby('Month').sum()['Sales']"
876 | ]
877 | },
878 | {
879 | "cell_type": "code",
880 | "execution_count": 13,
881 | "metadata": {},
882 | "outputs": [
883 | {
884 | "data": {
885 | "text/plain": [
886 | ""
887 | ]
888 | },
889 | "metadata": {},
890 | "output_type": "display_data"
891 | }
892 | ],
893 | "source": [
894 | "import matplotlib.pyplot as plt\n",
895 | "\n",
896 | "months = range(1,13)\n",
897 | "\n",
898 | "plt.bar(months, results['Sales'])\n",
899 | "plt.xticks(months)\n",
900 | "plt.xlabel('Month number')\n",
901 | "plt.ylabel('Sales in USD ($)')\n",
902 | "plt.show()\n"
903 | ]
904 | },
905 | {
906 | "cell_type": "markdown",
907 | "metadata": {},
908 | "source": [
909 | "#### What city had the highest number of sales "
910 | ]
911 | },
912 | {
913 | "cell_type": "code",
914 | "execution_count": 14,
915 | "metadata": {},
916 | "outputs": [
917 | {
918 | "data": {
919 | "text/html": [
920 | "\n",
921 | "\n",
934 | "
\n",
935 | " \n",
936 | " \n",
937 | " | \n",
938 | " Quantity Ordered | \n",
939 | " Price Each | \n",
940 | " Month | \n",
941 | " Sales | \n",
942 | "
\n",
943 | " \n",
944 | " City | \n",
945 | " | \n",
946 | " | \n",
947 | " | \n",
948 | " | \n",
949 | "
\n",
950 | " \n",
951 | " \n",
952 | " \n",
953 | " Atlanta GA | \n",
954 | " 16602 | \n",
955 | " 2.779908e+06 | \n",
956 | " 104794 | \n",
957 | " 2.795499e+06 | \n",
958 | "
\n",
959 | " \n",
960 | " Austin TX | \n",
961 | " 11153 | \n",
962 | " 1.809874e+06 | \n",
963 | " 69829 | \n",
964 | " 1.819582e+06 | \n",
965 | "
\n",
966 | " \n",
967 | " Boston MA | \n",
968 | " 22528 | \n",
969 | " 3.637410e+06 | \n",
970 | " 141112 | \n",
971 | " 3.661642e+06 | \n",
972 | "
\n",
973 | " \n",
974 | " Dallas TX | \n",
975 | " 16730 | \n",
976 | " 2.752628e+06 | \n",
977 | " 104620 | \n",
978 | " 2.767975e+06 | \n",
979 | "
\n",
980 | " \n",
981 | " Los Angeles CA | \n",
982 | " 33289 | \n",
983 | " 5.421435e+06 | \n",
984 | " 208325 | \n",
985 | " 5.452571e+06 | \n",
986 | "
\n",
987 | " \n",
988 | " New York City NY | \n",
989 | " 27932 | \n",
990 | " 4.635371e+06 | \n",
991 | " 175741 | \n",
992 | " 4.664317e+06 | \n",
993 | "
\n",
994 | " \n",
995 | " Portland ME | \n",
996 | " 2750 | \n",
997 | " 4.471893e+05 | \n",
998 | " 17144 | \n",
999 | " 4.497583e+05 | \n",
1000 | "
\n",
1001 | " \n",
1002 | " Portland OR | \n",
1003 | " 11303 | \n",
1004 | " 1.860558e+06 | \n",
1005 | " 70621 | \n",
1006 | " 1.870732e+06 | \n",
1007 | "
\n",
1008 | " \n",
1009 | " San Francisco CA | \n",
1010 | " 50239 | \n",
1011 | " 8.211462e+06 | \n",
1012 | " 315520 | \n",
1013 | " 8.262204e+06 | \n",
1014 | "
\n",
1015 | " \n",
1016 | " Seattle WA | \n",
1017 | " 16553 | \n",
1018 | " 2.733296e+06 | \n",
1019 | " 104941 | \n",
1020 | " 2.747755e+06 | \n",
1021 | "
\n",
1022 | " \n",
1023 | "
\n",
1024 | "
"
1025 | ],
1026 | "text/plain": [
1027 | " Quantity Ordered Price Each Month Sales\n",
1028 | "City \n",
1029 | " Atlanta GA 16602 2.779908e+06 104794 2.795499e+06\n",
1030 | " Austin TX 11153 1.809874e+06 69829 1.819582e+06\n",
1031 | " Boston MA 22528 3.637410e+06 141112 3.661642e+06\n",
1032 | " Dallas TX 16730 2.752628e+06 104620 2.767975e+06\n",
1033 | " Los Angeles CA 33289 5.421435e+06 208325 5.452571e+06\n",
1034 | " New York City NY 27932 4.635371e+06 175741 4.664317e+06\n",
1035 | " Portland ME 2750 4.471893e+05 17144 4.497583e+05\n",
1036 | " Portland OR 11303 1.860558e+06 70621 1.870732e+06\n",
1037 | " San Francisco CA 50239 8.211462e+06 315520 8.262204e+06\n",
1038 | " Seattle WA 16553 2.733296e+06 104941 2.747755e+06"
1039 | ]
1040 | },
1041 | "execution_count": 14,
1042 | "metadata": {},
1043 | "output_type": "execute_result"
1044 | }
1045 | ],
1046 | "source": [
1047 | "results = all_data.groupby('City').sum()\n",
1048 | "results"
1049 | ]
1050 | },
1051 | {
1052 | "cell_type": "code",
1053 | "execution_count": 15,
1054 | "metadata": {},
1055 | "outputs": [
1056 | {
1057 | "data": {
1058 | "image/png": "\n",
1059 | "text/plain": [
1060 | ""
1061 | ]
1062 | },
1063 | "metadata": {
1064 | "needs_background": "light"
1065 | },
1066 | "output_type": "display_data"
1067 | }
1068 | ],
1069 | "source": [
1070 | "import matplotlib.pyplot as plt\n",
1071 | "\n",
1072 | "cities = [city for city, df in all_data.groupby('City')]\n",
1073 | "\n",
1074 | "plt.bar(cities, results['Sales'])\n",
1075 | "plt.xticks(cities, rotation ='vertical', size=8)\n",
1076 | "plt.xlabel('City Name')\n",
1077 | "plt.ylabel('Sales in USD ($)')\n",
1078 | "plt.show()\n"
1079 | ]
1080 | },
1081 | {
1082 | "cell_type": "markdown",
1083 | "metadata": {},
1084 | "source": [
1085 | "#### What time should we display advertisements to maximize likelihood of customers buying products?"
1086 | ]
1087 | },
1088 | {
1089 | "cell_type": "code",
1090 | "execution_count": 16,
1091 | "metadata": {},
1092 | "outputs": [],
1093 | "source": [
1094 | "all_data['Order Date'] = pd.to_datetime(all_data['Order Date'])"
1095 | ]
1096 | },
1097 | {
1098 | "cell_type": "code",
1099 | "execution_count": 17,
1100 | "metadata": {},
1101 | "outputs": [
1102 | {
1103 | "data": {
1104 | "text/html": [
1105 | "\n",
1106 | "\n",
1119 | "
\n",
1120 | " \n",
1121 | " \n",
1122 | " | \n",
1123 | " Order ID | \n",
1124 | " Product | \n",
1125 | " Quantity Ordered | \n",
1126 | " Price Each | \n",
1127 | " Order Date | \n",
1128 | " Purchase Address | \n",
1129 | " Month | \n",
1130 | " Sales | \n",
1131 | " City | \n",
1132 | "
\n",
1133 | " \n",
1134 | " \n",
1135 | " \n",
1136 | " 0 | \n",
1137 | " 176558 | \n",
1138 | " USB-C Charging Cable | \n",
1139 | " 2 | \n",
1140 | " 11.95 | \n",
1141 | " 2019-04-19 08:46:00 | \n",
1142 | " 917 1st St, Dallas, TX 75001 | \n",
1143 | " 4 | \n",
1144 | " 23.90 | \n",
1145 | " Dallas TX | \n",
1146 | "
\n",
1147 | " \n",
1148 | " 2 | \n",
1149 | " 176559 | \n",
1150 | " Bose SoundSport Headphones | \n",
1151 | " 1 | \n",
1152 | " 99.99 | \n",
1153 | " 2019-04-07 22:30:00 | \n",
1154 | " 682 Chestnut St, Boston, MA 02215 | \n",
1155 | " 4 | \n",
1156 | " 99.99 | \n",
1157 | " Boston MA | \n",
1158 | "
\n",
1159 | " \n",
1160 | " 3 | \n",
1161 | " 176560 | \n",
1162 | " Google Phone | \n",
1163 | " 1 | \n",
1164 | " 600.00 | \n",
1165 | " 2019-04-12 14:38:00 | \n",
1166 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
1167 | " 4 | \n",
1168 | " 600.00 | \n",
1169 | " Los Angeles CA | \n",
1170 | "
\n",
1171 | " \n",
1172 | " 4 | \n",
1173 | " 176560 | \n",
1174 | " Wired Headphones | \n",
1175 | " 1 | \n",
1176 | " 11.99 | \n",
1177 | " 2019-04-12 14:38:00 | \n",
1178 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
1179 | " 4 | \n",
1180 | " 11.99 | \n",
1181 | " Los Angeles CA | \n",
1182 | "
\n",
1183 | " \n",
1184 | " 5 | \n",
1185 | " 176561 | \n",
1186 | " Wired Headphones | \n",
1187 | " 1 | \n",
1188 | " 11.99 | \n",
1189 | " 2019-04-30 09:27:00 | \n",
1190 | " 333 8th St, Los Angeles, CA 90001 | \n",
1191 | " 4 | \n",
1192 | " 11.99 | \n",
1193 | " Los Angeles CA | \n",
1194 | "
\n",
1195 | " \n",
1196 | "
\n",
1197 | "
"
1198 | ],
1199 | "text/plain": [
1200 | " Order ID Product Quantity Ordered Price Each \\\n",
1201 | "0 176558 USB-C Charging Cable 2 11.95 \n",
1202 | "2 176559 Bose SoundSport Headphones 1 99.99 \n",
1203 | "3 176560 Google Phone 1 600.00 \n",
1204 | "4 176560 Wired Headphones 1 11.99 \n",
1205 | "5 176561 Wired Headphones 1 11.99 \n",
1206 | "\n",
1207 | " Order Date Purchase Address Month Sales \\\n",
1208 | "0 2019-04-19 08:46:00 917 1st St, Dallas, TX 75001 4 23.90 \n",
1209 | "2 2019-04-07 22:30:00 682 Chestnut St, Boston, MA 02215 4 99.99 \n",
1210 | "3 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 4 600.00 \n",
1211 | "4 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 4 11.99 \n",
1212 | "5 2019-04-30 09:27:00 333 8th St, Los Angeles, CA 90001 4 11.99 \n",
1213 | "\n",
1214 | " City \n",
1215 | "0 Dallas TX \n",
1216 | "2 Boston MA \n",
1217 | "3 Los Angeles CA \n",
1218 | "4 Los Angeles CA \n",
1219 | "5 Los Angeles CA "
1220 | ]
1221 | },
1222 | "execution_count": 17,
1223 | "metadata": {},
1224 | "output_type": "execute_result"
1225 | }
1226 | ],
1227 | "source": [
1228 | "all_data.head()"
1229 | ]
1230 | },
1231 | {
1232 | "cell_type": "code",
1233 | "execution_count": 18,
1234 | "metadata": {},
1235 | "outputs": [
1236 | {
1237 | "data": {
1238 | "text/html": [
1239 | "\n",
1240 | "\n",
1253 | "
\n",
1254 | " \n",
1255 | " \n",
1256 | " | \n",
1257 | " Order ID | \n",
1258 | " Product | \n",
1259 | " Quantity Ordered | \n",
1260 | " Price Each | \n",
1261 | " Order Date | \n",
1262 | " Purchase Address | \n",
1263 | " Month | \n",
1264 | " Sales | \n",
1265 | " City | \n",
1266 | " Hour | \n",
1267 | "
\n",
1268 | " \n",
1269 | " \n",
1270 | " \n",
1271 | " 0 | \n",
1272 | " 176558 | \n",
1273 | " USB-C Charging Cable | \n",
1274 | " 2 | \n",
1275 | " 11.95 | \n",
1276 | " 2019-04-19 08:46:00 | \n",
1277 | " 917 1st St, Dallas, TX 75001 | \n",
1278 | " 4 | \n",
1279 | " 23.90 | \n",
1280 | " Dallas TX | \n",
1281 | " 8 | \n",
1282 | "
\n",
1283 | " \n",
1284 | " 2 | \n",
1285 | " 176559 | \n",
1286 | " Bose SoundSport Headphones | \n",
1287 | " 1 | \n",
1288 | " 99.99 | \n",
1289 | " 2019-04-07 22:30:00 | \n",
1290 | " 682 Chestnut St, Boston, MA 02215 | \n",
1291 | " 4 | \n",
1292 | " 99.99 | \n",
1293 | " Boston MA | \n",
1294 | " 22 | \n",
1295 | "
\n",
1296 | " \n",
1297 | " 3 | \n",
1298 | " 176560 | \n",
1299 | " Google Phone | \n",
1300 | " 1 | \n",
1301 | " 600.00 | \n",
1302 | " 2019-04-12 14:38:00 | \n",
1303 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
1304 | " 4 | \n",
1305 | " 600.00 | \n",
1306 | " Los Angeles CA | \n",
1307 | " 14 | \n",
1308 | "
\n",
1309 | " \n",
1310 | " 4 | \n",
1311 | " 176560 | \n",
1312 | " Wired Headphones | \n",
1313 | " 1 | \n",
1314 | " 11.99 | \n",
1315 | " 2019-04-12 14:38:00 | \n",
1316 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
1317 | " 4 | \n",
1318 | " 11.99 | \n",
1319 | " Los Angeles CA | \n",
1320 | " 14 | \n",
1321 | "
\n",
1322 | " \n",
1323 | " 5 | \n",
1324 | " 176561 | \n",
1325 | " Wired Headphones | \n",
1326 | " 1 | \n",
1327 | " 11.99 | \n",
1328 | " 2019-04-30 09:27:00 | \n",
1329 | " 333 8th St, Los Angeles, CA 90001 | \n",
1330 | " 4 | \n",
1331 | " 11.99 | \n",
1332 | " Los Angeles CA | \n",
1333 | " 9 | \n",
1334 | "
\n",
1335 | " \n",
1336 | "
\n",
1337 | "
"
1338 | ],
1339 | "text/plain": [
1340 | " Order ID Product Quantity Ordered Price Each \\\n",
1341 | "0 176558 USB-C Charging Cable 2 11.95 \n",
1342 | "2 176559 Bose SoundSport Headphones 1 99.99 \n",
1343 | "3 176560 Google Phone 1 600.00 \n",
1344 | "4 176560 Wired Headphones 1 11.99 \n",
1345 | "5 176561 Wired Headphones 1 11.99 \n",
1346 | "\n",
1347 | " Order Date Purchase Address Month Sales \\\n",
1348 | "0 2019-04-19 08:46:00 917 1st St, Dallas, TX 75001 4 23.90 \n",
1349 | "2 2019-04-07 22:30:00 682 Chestnut St, Boston, MA 02215 4 99.99 \n",
1350 | "3 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 4 600.00 \n",
1351 | "4 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 4 11.99 \n",
1352 | "5 2019-04-30 09:27:00 333 8th St, Los Angeles, CA 90001 4 11.99 \n",
1353 | "\n",
1354 | " City Hour \n",
1355 | "0 Dallas TX 8 \n",
1356 | "2 Boston MA 22 \n",
1357 | "3 Los Angeles CA 14 \n",
1358 | "4 Los Angeles CA 14 \n",
1359 | "5 Los Angeles CA 9 "
1360 | ]
1361 | },
1362 | "execution_count": 18,
1363 | "metadata": {},
1364 | "output_type": "execute_result"
1365 | }
1366 | ],
1367 | "source": [
1368 | "# By hour column\n",
1369 | "all_data['Hour'] = all_data['Order Date'].dt.hour\n",
1370 | "all_data.head()"
1371 | ]
1372 | },
1373 | {
1374 | "cell_type": "code",
1375 | "execution_count": 19,
1376 | "metadata": {},
1377 | "outputs": [
1378 | {
1379 | "data": {
1380 | "text/html": [
1381 | "\n",
1382 | "\n",
1395 | "
\n",
1396 | " \n",
1397 | " \n",
1398 | " | \n",
1399 | " Order ID | \n",
1400 | " Product | \n",
1401 | " Quantity Ordered | \n",
1402 | " Price Each | \n",
1403 | " Order Date | \n",
1404 | " Purchase Address | \n",
1405 | " Month | \n",
1406 | " Sales | \n",
1407 | " City | \n",
1408 | " Hour | \n",
1409 | " Minute | \n",
1410 | "
\n",
1411 | " \n",
1412 | " \n",
1413 | " \n",
1414 | " 0 | \n",
1415 | " 176558 | \n",
1416 | " USB-C Charging Cable | \n",
1417 | " 2 | \n",
1418 | " 11.95 | \n",
1419 | " 2019-04-19 08:46:00 | \n",
1420 | " 917 1st St, Dallas, TX 75001 | \n",
1421 | " 4 | \n",
1422 | " 23.90 | \n",
1423 | " Dallas TX | \n",
1424 | " 8 | \n",
1425 | " 46 | \n",
1426 | "
\n",
1427 | " \n",
1428 | " 2 | \n",
1429 | " 176559 | \n",
1430 | " Bose SoundSport Headphones | \n",
1431 | " 1 | \n",
1432 | " 99.99 | \n",
1433 | " 2019-04-07 22:30:00 | \n",
1434 | " 682 Chestnut St, Boston, MA 02215 | \n",
1435 | " 4 | \n",
1436 | " 99.99 | \n",
1437 | " Boston MA | \n",
1438 | " 22 | \n",
1439 | " 30 | \n",
1440 | "
\n",
1441 | " \n",
1442 | " 3 | \n",
1443 | " 176560 | \n",
1444 | " Google Phone | \n",
1445 | " 1 | \n",
1446 | " 600.00 | \n",
1447 | " 2019-04-12 14:38:00 | \n",
1448 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
1449 | " 4 | \n",
1450 | " 600.00 | \n",
1451 | " Los Angeles CA | \n",
1452 | " 14 | \n",
1453 | " 38 | \n",
1454 | "
\n",
1455 | " \n",
1456 | " 4 | \n",
1457 | " 176560 | \n",
1458 | " Wired Headphones | \n",
1459 | " 1 | \n",
1460 | " 11.99 | \n",
1461 | " 2019-04-12 14:38:00 | \n",
1462 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
1463 | " 4 | \n",
1464 | " 11.99 | \n",
1465 | " Los Angeles CA | \n",
1466 | " 14 | \n",
1467 | " 38 | \n",
1468 | "
\n",
1469 | " \n",
1470 | " 5 | \n",
1471 | " 176561 | \n",
1472 | " Wired Headphones | \n",
1473 | " 1 | \n",
1474 | " 11.99 | \n",
1475 | " 2019-04-30 09:27:00 | \n",
1476 | " 333 8th St, Los Angeles, CA 90001 | \n",
1477 | " 4 | \n",
1478 | " 11.99 | \n",
1479 | " Los Angeles CA | \n",
1480 | " 9 | \n",
1481 | " 27 | \n",
1482 | "
\n",
1483 | " \n",
1484 | "
\n",
1485 | "
"
1486 | ],
1487 | "text/plain": [
1488 | " Order ID Product Quantity Ordered Price Each \\\n",
1489 | "0 176558 USB-C Charging Cable 2 11.95 \n",
1490 | "2 176559 Bose SoundSport Headphones 1 99.99 \n",
1491 | "3 176560 Google Phone 1 600.00 \n",
1492 | "4 176560 Wired Headphones 1 11.99 \n",
1493 | "5 176561 Wired Headphones 1 11.99 \n",
1494 | "\n",
1495 | " Order Date Purchase Address Month Sales \\\n",
1496 | "0 2019-04-19 08:46:00 917 1st St, Dallas, TX 75001 4 23.90 \n",
1497 | "2 2019-04-07 22:30:00 682 Chestnut St, Boston, MA 02215 4 99.99 \n",
1498 | "3 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 4 600.00 \n",
1499 | "4 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 4 11.99 \n",
1500 | "5 2019-04-30 09:27:00 333 8th St, Los Angeles, CA 90001 4 11.99 \n",
1501 | "\n",
1502 | " City Hour Minute \n",
1503 | "0 Dallas TX 8 46 \n",
1504 | "2 Boston MA 22 30 \n",
1505 | "3 Los Angeles CA 14 38 \n",
1506 | "4 Los Angeles CA 14 38 \n",
1507 | "5 Los Angeles CA 9 27 "
1508 | ]
1509 | },
1510 | "execution_count": 19,
1511 | "metadata": {},
1512 | "output_type": "execute_result"
1513 | }
1514 | ],
1515 | "source": [
1516 | "# by Minute column\n",
1517 | "all_data['Minute'] = all_data['Order Date'].dt.minute\n",
1518 | "all_data.head()"
1519 | ]
1520 | },
1521 | {
1522 | "cell_type": "code",
1523 | "execution_count": 20,
1524 | "metadata": {},
1525 | "outputs": [
1526 | {
1527 | "data": {
1528 | "text/plain": [
1529 | "[,\n",
1530 | " ,\n",
1531 | " ,\n",
1532 | " ,\n",
1533 | " ,\n",
1534 | " ,\n",
1535 | " ,\n",
1536 | " ,\n",
1537 | " ,\n",
1538 | " ]"
1539 | ]
1540 | },
1541 | "execution_count": 20,
1542 | "metadata": {},
1543 | "output_type": "execute_result"
1544 | },
1545 | {
1546 | "data": {
1547 | "image/png": "\n",
1548 | "text/plain": [
1549 | ""
1550 | ]
1551 | },
1552 | "metadata": {
1553 | "needs_background": "light"
1554 | },
1555 | "output_type": "display_data"
1556 | }
1557 | ],
1558 | "source": [
1559 | "import matplotlib.pyplot as plt\n",
1560 | "\n",
1561 | "hours = [hour for hour, df in all_data.groupby('Hour')]\n",
1562 | "\n",
1563 | "plt.xticks(hours)\n",
1564 | "plt.xlabel('Hour')\n",
1565 | "plt.ylabel('Number of Orders')\n",
1566 | "plt.grid()\n",
1567 | "plt.plot(hours, all_data.groupby(['Hour']).count())"
1568 | ]
1569 | },
1570 | {
1571 | "cell_type": "markdown",
1572 | "metadata": {},
1573 | "source": [
1574 | "#### What products are most often sold together?"
1575 | ]
1576 | },
1577 | {
1578 | "cell_type": "code",
1579 | "execution_count": 28,
1580 | "metadata": {},
1581 | "outputs": [
1582 | {
1583 | "name": "stderr",
1584 | "output_type": "stream",
1585 | "text": [
1586 | "C:\\Users\\petra\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:6: SettingWithCopyWarning: \n",
1587 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
1588 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
1589 | "\n",
1590 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
1591 | " \n"
1592 | ]
1593 | }
1594 | ],
1595 | "source": [
1596 | "#Get duplicated Order ID's \n",
1597 | "# https://stackoverflow.com/questions/43348194/pandas-select-rows-if-id-appear-several-time\n",
1598 | "df = all_data[all_data['Order ID'].duplicated(keep=False)]\n",
1599 | "\n",
1600 | "# Referenced: https://stackoverflow.com/questions/27298178/concatenate-strings-from-several-rows-using-pandas-groupby\n",
1601 | "df['Grouped'] = df.groupby('Order ID')['Product'].transform(lambda x: ','.join(x))\n",
1602 | "df2 = df[['Order ID', 'Grouped']].drop_duplicates()"
1603 | ]
1604 | },
1605 | {
1606 | "cell_type": "code",
1607 | "execution_count": 29,
1608 | "metadata": {
1609 | "collapsed": true,
1610 | "jupyter": {
1611 | "outputs_hidden": true
1612 | }
1613 | },
1614 | "outputs": [
1615 | {
1616 | "data": {
1617 | "text/html": [
1618 | "\n",
1619 | "\n",
1632 | "
\n",
1633 | " \n",
1634 | " \n",
1635 | " | \n",
1636 | " Order ID | \n",
1637 | " Product | \n",
1638 | " Quantity Ordered | \n",
1639 | " Price Each | \n",
1640 | " Order Date | \n",
1641 | " Purchase Address | \n",
1642 | " Month | \n",
1643 | " Sales | \n",
1644 | " City | \n",
1645 | " Hour | \n",
1646 | " Minute | \n",
1647 | " Grouped | \n",
1648 | "
\n",
1649 | " \n",
1650 | " \n",
1651 | " \n",
1652 | " 3 | \n",
1653 | " 176560 | \n",
1654 | " Google Phone | \n",
1655 | " 1 | \n",
1656 | " 600.00 | \n",
1657 | " 2019-04-12 14:38:00 | \n",
1658 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
1659 | " 4 | \n",
1660 | " 600.00 | \n",
1661 | " Los Angeles CA | \n",
1662 | " 14 | \n",
1663 | " 38 | \n",
1664 | " Google Phone,Wired Headphones | \n",
1665 | "
\n",
1666 | " \n",
1667 | " 4 | \n",
1668 | " 176560 | \n",
1669 | " Wired Headphones | \n",
1670 | " 1 | \n",
1671 | " 11.99 | \n",
1672 | " 2019-04-12 14:38:00 | \n",
1673 | " 669 Spruce St, Los Angeles, CA 90001 | \n",
1674 | " 4 | \n",
1675 | " 11.99 | \n",
1676 | " Los Angeles CA | \n",
1677 | " 14 | \n",
1678 | " 38 | \n",
1679 | " Google Phone,Wired Headphones | \n",
1680 | "
\n",
1681 | " \n",
1682 | " 18 | \n",
1683 | " 176574 | \n",
1684 | " Google Phone | \n",
1685 | " 1 | \n",
1686 | " 600.00 | \n",
1687 | " 2019-04-03 19:42:00 | \n",
1688 | " 20 Hill St, Los Angeles, CA 90001 | \n",
1689 | " 4 | \n",
1690 | " 600.00 | \n",
1691 | " Los Angeles CA | \n",
1692 | " 19 | \n",
1693 | " 42 | \n",
1694 | " Google Phone,USB-C Charging Cable | \n",
1695 | "
\n",
1696 | " \n",
1697 | " 19 | \n",
1698 | " 176574 | \n",
1699 | " USB-C Charging Cable | \n",
1700 | " 1 | \n",
1701 | " 11.95 | \n",
1702 | " 2019-04-03 19:42:00 | \n",
1703 | " 20 Hill St, Los Angeles, CA 90001 | \n",
1704 | " 4 | \n",
1705 | " 11.95 | \n",
1706 | " Los Angeles CA | \n",
1707 | " 19 | \n",
1708 | " 42 | \n",
1709 | " Google Phone,USB-C Charging Cable | \n",
1710 | "
\n",
1711 | " \n",
1712 | " 30 | \n",
1713 | " 176585 | \n",
1714 | " Bose SoundSport Headphones | \n",
1715 | " 1 | \n",
1716 | " 99.99 | \n",
1717 | " 2019-04-07 11:31:00 | \n",
1718 | " 823 Highland St, Boston, MA 02215 | \n",
1719 | " 4 | \n",
1720 | " 99.99 | \n",
1721 | " Boston MA | \n",
1722 | " 11 | \n",
1723 | " 31 | \n",
1724 | " Bose SoundSport Headphones,Bose SoundSport Hea... | \n",
1725 | "
\n",
1726 | " \n",
1727 | " 31 | \n",
1728 | " 176585 | \n",
1729 | " Bose SoundSport Headphones | \n",
1730 | " 1 | \n",
1731 | " 99.99 | \n",
1732 | " 2019-04-07 11:31:00 | \n",
1733 | " 823 Highland St, Boston, MA 02215 | \n",
1734 | " 4 | \n",
1735 | " 99.99 | \n",
1736 | " Boston MA | \n",
1737 | " 11 | \n",
1738 | " 31 | \n",
1739 | " Bose SoundSport Headphones,Bose SoundSport Hea... | \n",
1740 | "
\n",
1741 | " \n",
1742 | " 32 | \n",
1743 | " 176586 | \n",
1744 | " AAA Batteries (4-pack) | \n",
1745 | " 2 | \n",
1746 | " 2.99 | \n",
1747 | " 2019-04-10 17:00:00 | \n",
1748 | " 365 Center St, San Francisco, CA 94016 | \n",
1749 | " 4 | \n",
1750 | " 5.98 | \n",
1751 | " San Francisco CA | \n",
1752 | " 17 | \n",
1753 | " 0 | \n",
1754 | " AAA Batteries (4-pack),Google Phone | \n",
1755 | "
\n",
1756 | " \n",
1757 | " 33 | \n",
1758 | " 176586 | \n",
1759 | " Google Phone | \n",
1760 | " 1 | \n",
1761 | " 600.00 | \n",
1762 | " 2019-04-10 17:00:00 | \n",
1763 | " 365 Center St, San Francisco, CA 94016 | \n",
1764 | " 4 | \n",
1765 | " 600.00 | \n",
1766 | " San Francisco CA | \n",
1767 | " 17 | \n",
1768 | " 0 | \n",
1769 | " AAA Batteries (4-pack),Google Phone | \n",
1770 | "
\n",
1771 | " \n",
1772 | " 119 | \n",
1773 | " 176672 | \n",
1774 | " Lightning Charging Cable | \n",
1775 | " 1 | \n",
1776 | " 14.95 | \n",
1777 | " 2019-04-12 11:07:00 | \n",
1778 | " 778 Maple St, New York City, NY 10001 | \n",
1779 | " 4 | \n",
1780 | " 14.95 | \n",
1781 | " New York City NY | \n",
1782 | " 11 | \n",
1783 | " 7 | \n",
1784 | " Lightning Charging Cable,USB-C Charging Cable | \n",
1785 | "
\n",
1786 | " \n",
1787 | " 120 | \n",
1788 | " 176672 | \n",
1789 | " USB-C Charging Cable | \n",
1790 | " 1 | \n",
1791 | " 11.95 | \n",
1792 | " 2019-04-12 11:07:00 | \n",
1793 | " 778 Maple St, New York City, NY 10001 | \n",
1794 | " 4 | \n",
1795 | " 11.95 | \n",
1796 | " New York City NY | \n",
1797 | " 11 | \n",
1798 | " 7 | \n",
1799 | " Lightning Charging Cable,USB-C Charging Cable | \n",
1800 | "
\n",
1801 | " \n",
1802 | "
\n",
1803 | "
"
1804 | ],
1805 | "text/plain": [
1806 | " Order ID Product Quantity Ordered Price Each \\\n",
1807 | "3 176560 Google Phone 1 600.00 \n",
1808 | "4 176560 Wired Headphones 1 11.99 \n",
1809 | "18 176574 Google Phone 1 600.00 \n",
1810 | "19 176574 USB-C Charging Cable 1 11.95 \n",
1811 | "30 176585 Bose SoundSport Headphones 1 99.99 \n",
1812 | "31 176585 Bose SoundSport Headphones 1 99.99 \n",
1813 | "32 176586 AAA Batteries (4-pack) 2 2.99 \n",
1814 | "33 176586 Google Phone 1 600.00 \n",
1815 | "119 176672 Lightning Charging Cable 1 14.95 \n",
1816 | "120 176672 USB-C Charging Cable 1 11.95 \n",
1817 | "\n",
1818 | " Order Date Purchase Address Month \\\n",
1819 | "3 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 4 \n",
1820 | "4 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 4 \n",
1821 | "18 2019-04-03 19:42:00 20 Hill St, Los Angeles, CA 90001 4 \n",
1822 | "19 2019-04-03 19:42:00 20 Hill St, Los Angeles, CA 90001 4 \n",
1823 | "30 2019-04-07 11:31:00 823 Highland St, Boston, MA 02215 4 \n",
1824 | "31 2019-04-07 11:31:00 823 Highland St, Boston, MA 02215 4 \n",
1825 | "32 2019-04-10 17:00:00 365 Center St, San Francisco, CA 94016 4 \n",
1826 | "33 2019-04-10 17:00:00 365 Center St, San Francisco, CA 94016 4 \n",
1827 | "119 2019-04-12 11:07:00 778 Maple St, New York City, NY 10001 4 \n",
1828 | "120 2019-04-12 11:07:00 778 Maple St, New York City, NY 10001 4 \n",
1829 | "\n",
1830 | " Sales City Hour Minute \\\n",
1831 | "3 600.00 Los Angeles CA 14 38 \n",
1832 | "4 11.99 Los Angeles CA 14 38 \n",
1833 | "18 600.00 Los Angeles CA 19 42 \n",
1834 | "19 11.95 Los Angeles CA 19 42 \n",
1835 | "30 99.99 Boston MA 11 31 \n",
1836 | "31 99.99 Boston MA 11 31 \n",
1837 | "32 5.98 San Francisco CA 17 0 \n",
1838 | "33 600.00 San Francisco CA 17 0 \n",
1839 | "119 14.95 New York City NY 11 7 \n",
1840 | "120 11.95 New York City NY 11 7 \n",
1841 | "\n",
1842 | " Grouped \n",
1843 | "3 Google Phone,Wired Headphones \n",
1844 | "4 Google Phone,Wired Headphones \n",
1845 | "18 Google Phone,USB-C Charging Cable \n",
1846 | "19 Google Phone,USB-C Charging Cable \n",
1847 | "30 Bose SoundSport Headphones,Bose SoundSport Hea... \n",
1848 | "31 Bose SoundSport Headphones,Bose SoundSport Hea... \n",
1849 | "32 AAA Batteries (4-pack),Google Phone \n",
1850 | "33 AAA Batteries (4-pack),Google Phone \n",
1851 | "119 Lightning Charging Cable,USB-C Charging Cable \n",
1852 | "120 Lightning Charging Cable,USB-C Charging Cable "
1853 | ]
1854 | },
1855 | "execution_count": 29,
1856 | "metadata": {},
1857 | "output_type": "execute_result"
1858 | }
1859 | ],
1860 | "source": [
1861 | "df.head(10)"
1862 | ]
1863 | },
1864 | {
1865 | "cell_type": "code",
1866 | "execution_count": 38,
1867 | "metadata": {},
1868 | "outputs": [
1869 | {
1870 | "name": "stdout",
1871 | "output_type": "stream",
1872 | "text": [
1873 | "('iPhone', 'Lightning Charging Cable') 2140\n",
1874 | "('Google Phone', 'USB-C Charging Cable') 2116\n",
1875 | "('iPhone', 'Wired Headphones') 987\n",
1876 | "('Google Phone', 'Wired Headphones') 949\n",
1877 | "('iPhone', 'Apple Airpods Headphones') 799\n",
1878 | "('Vareebadd Phone', 'USB-C Charging Cable') 773\n",
1879 | "('Google Phone', 'Bose SoundSport Headphones') 503\n",
1880 | "('USB-C Charging Cable', 'Wired Headphones') 452\n",
1881 | "('Vareebadd Phone', 'Wired Headphones') 327\n",
1882 | "('Lightning Charging Cable', 'Wired Headphones') 253\n"
1883 | ]
1884 | }
1885 | ],
1886 | "source": [
1887 | "from itertools import combinations\n",
1888 | "from collections import Counter\n",
1889 | "\n",
1890 | "count = Counter()\n",
1891 | "\n",
1892 | "for row in df['Grouped']:\n",
1893 | " row_list = row.split(',')\n",
1894 | " count.update(Counter(combinations(row_list, 2)))\n",
1895 | "\n",
1896 | "for key, value in count.most_common(10):\n",
1897 | " print(key, value)"
1898 | ]
1899 | },
1900 | {
1901 | "cell_type": "markdown",
1902 | "metadata": {},
1903 | "source": [
1904 | "#### 5. What products sold the most? Why do you think it sold the most?"
1905 | ]
1906 | },
1907 | {
1908 | "cell_type": "code",
1909 | "execution_count": 50,
1910 | "metadata": {},
1911 | "outputs": [],
1912 | "source": [
1913 | "product_group = all_data.groupby('Product')\n",
1914 | "quantity_ordered = product_group.sum()['Quantity Ordered']"
1915 | ]
1916 | },
1917 | {
1918 | "cell_type": "code",
1919 | "execution_count": 51,
1920 | "metadata": {},
1921 | "outputs": [
1922 | {
1923 | "data": {
1924 | "text/plain": [
1925 | "Product\n",
1926 | "20in Monitor 4129\n",
1927 | "27in 4K Gaming Monitor 6244\n",
1928 | "27in FHD Monitor 7550\n",
1929 | "34in Ultrawide Monitor 6199\n",
1930 | "AA Batteries (4-pack) 27635\n",
1931 | "AAA Batteries (4-pack) 31017\n",
1932 | "Apple Airpods Headphones 15661\n",
1933 | "Bose SoundSport Headphones 13457\n",
1934 | "Flatscreen TV 4819\n",
1935 | "Google Phone 5532\n",
1936 | "LG Dryer 646\n",
1937 | "LG Washing Machine 666\n",
1938 | "Lightning Charging Cable 23217\n",
1939 | "Macbook Pro Laptop 4728\n",
1940 | "ThinkPad Laptop 4130\n",
1941 | "USB-C Charging Cable 23975\n",
1942 | "Vareebadd Phone 2068\n",
1943 | "Wired Headphones 20557\n",
1944 | "iPhone 6849\n",
1945 | "Name: Quantity Ordered, dtype: int64"
1946 | ]
1947 | },
1948 | "execution_count": 51,
1949 | "metadata": {},
1950 | "output_type": "execute_result"
1951 | }
1952 | ],
1953 | "source": [
1954 | "quantity_ordered"
1955 | ]
1956 | },
1957 | {
1958 | "cell_type": "code",
1959 | "execution_count": 53,
1960 | "metadata": {},
1961 | "outputs": [
1962 | {
1963 | "data": {
1964 | "image/png": "\n",
1965 | "text/plain": [
1966 | ""
1967 | ]
1968 | },
1969 | "metadata": {
1970 | "needs_background": "light"
1971 | },
1972 | "output_type": "display_data"
1973 | }
1974 | ],
1975 | "source": [
1976 | "products = [product for product, df in product_group]\n",
1977 | "\n",
1978 | "plt.bar(products, quantity_ordered)\n",
1979 | "plt.xticks(products, rotation ='vertical', size=8)\n",
1980 | "plt.xlabel('Product')\n",
1981 | "plt.ylabel('Quantity Ordered')\n",
1982 | "plt.show()"
1983 | ]
1984 | },
1985 | {
1986 | "cell_type": "code",
1987 | "execution_count": 58,
1988 | "metadata": {},
1989 | "outputs": [
1990 | {
1991 | "name": "stderr",
1992 | "output_type": "stream",
1993 | "text": [
1994 | "C:\\Users\\petra\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:16: UserWarning: Matplotlib is currently using module://ipykernel.pylab.backend_inline, which is a non-GUI backend, so cannot show the figure.\n",
1995 | " app.launch_new_instance()\n"
1996 | ]
1997 | },
1998 | {
1999 | "data": {
2000 | "image/png": "\n",
2001 | "text/plain": [
2002 | ""
2003 | ]
2004 | },
2005 | "metadata": {
2006 | "needs_background": "light"
2007 | },
2008 | "output_type": "display_data"
2009 | }
2010 | ],
2011 | "source": [
2012 | "# Referenced: https://stackoverflow.com/questions/14762181/adding-a-y-axis-label-to-secondary-y-axis-in-matplotlib\n",
2013 | "\n",
2014 | "prices = all_data.groupby('Product').mean()['Price Each']\n",
2015 | "\n",
2016 | "fig, ax1 = plt.subplots()\n",
2017 | "\n",
2018 | "ax2 = ax1.twinx()\n",
2019 | "ax1.bar(products, quantity_ordered, color='g')\n",
2020 | "ax2.plot(products, prices, color='b')\n",
2021 | "\n",
2022 | "ax1.set_xlabel('Product Name')\n",
2023 | "ax1.set_ylabel('Quantity Ordered', color='g')\n",
2024 | "ax2.set_ylabel('Price ($)', color='b')\n",
2025 | "ax1.set_xticklabels(products, rotation='vertical', size=8)\n",
2026 | "\n",
2027 | "fig.show()"
2028 | ]
2029 | },
2030 | {
2031 | "cell_type": "code",
2032 | "execution_count": null,
2033 | "metadata": {},
2034 | "outputs": [],
2035 | "source": []
2036 | }
2037 | ],
2038 | "metadata": {
2039 | "kernelspec": {
2040 | "display_name": "Python 3",
2041 | "language": "python",
2042 | "name": "python3"
2043 | },
2044 | "language_info": {
2045 | "codemirror_mode": {
2046 | "name": "ipython",
2047 | "version": 3
2048 | },
2049 | "file_extension": ".py",
2050 | "mimetype": "text/x-python",
2051 | "name": "python",
2052 | "nbconvert_exporter": "python",
2053 | "pygments_lexer": "ipython3",
2054 | "version": "3.7.4"
2055 | }
2056 | },
2057 | "nbformat": 4,
2058 | "nbformat_minor": 4
2059 | }
2060 |
--------------------------------------------------------------------------------
/SalesAnalysis/Sales_Data_Analysis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PetraLee2019/Python-Sales-Data-Analysis/75746fb651d101834ecc4301bd6cc5907e57a7bc/SalesAnalysis/Sales_Data_Analysis.png
--------------------------------------------------------------------------------