├── Advance RDDs.ipynb
├── Distributed Variables.ipynb
├── End-To-End Example.ipynb
├── Iphone Data Analysis.ipynb
├── Joins.ipynb
├── Lower Level APIs.ipynb
├── Project 2 - ECommerce Data Analysis Azure Data Engineering
├── Broze_Layer.ipynb
├── Gold Layer.ipynb
├── Silver_Layer.ipynb
└── data
│ ├── 6M-0K-99K.users.dataset.public.csv
│ ├── Buyers-repartition-by-country.csv
│ ├── Comparison-of-Sellers-by-Gender-and-Country.csv
│ ├── Countries-with-Top-Sellers-(Fashion-C2C).csv
│ ├── chunk-data
│ ├── chunk1.csv
│ ├── chunk10.csv
│ ├── chunk2.csv
│ ├── chunk3.csv
│ ├── chunk4.csv
│ ├── chunk5.csv
│ ├── chunk6.csv
│ ├── chunk7.csv
│ ├── chunk8.csv
│ └── chunk9.csv
│ ├── chunk-user-data.ipynb
│ └── users.6M0xxK.2024.public.csv
├── README.md
├── Spark Data Source.ipynb
├── Spark Deployment.ipynb
├── Spark SQL.ipynb
├── Spotify Data Pipeline using Spark
├── (python) spotify_transformation_load_function.py
├── (spark) spotify_transformation.py
├── spotify_api_data_extract.py
└── spotipy_layer.zip
├── Structured API Overview.ipynb
├── Working with Different Types of Data.ipynb
├── data
├── apple_data
│ ├── apple_products.csv
│ └── output.csv
│ │ ├── Product Name=APPLE iPhone 11 (Black, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 (Black, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 (Purple, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 (Red, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 (Red, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 (White, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 (White, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 Pro (Midnight Green, 512 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 Pro (Midnight Green, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 Pro (Space Grey, 256 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 Pro (Space Grey, 512 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 Pro Max (Gold, 256 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 Pro Max (Gold, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 Pro Max (Midnight Green, 256 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 Pro Max (Midnight Green, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 11 Pro Max (Space Grey, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 (Black, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 (Black, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 (Blue, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 (Green, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 (Red, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 (White, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 (White, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Mini (Black, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Mini (Black, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Mini (Blue, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Mini (Red, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Mini (White, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Mini (White, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Pro (Graphite, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Pro (Graphite, 256 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Pro (Pacific Blue, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Pro (Pacific Blue, 512 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Pro (Silver, 512 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Pro Max (Gold, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Pro Max (Gold, 256 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Pro Max (Graphite, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Pro Max (Graphite, 256 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Pro Max (Pacific Blue, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Pro Max (Pacific Blue, 256 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Pro Max (Silver, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 12 Pro Max (Silver, 256 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 8 (Gold, 256 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 8 (Silver, 256 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 8 (Space Grey, 256 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 8 Plus (Gold, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 8 Plus (Silver, 256 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 8 Plus (Silver, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 8 Plus (Space Grey, 256 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone 8 Plus (Space Grey, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone SE (Black, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone SE (Black, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone SE (Red, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone SE (White, 128 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone SE (White, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=APPLE iPhone XS Max (Silver, 64 GB)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=Apple iPhone SE (White, 256 GB) (Includes EarPods, Power Adapter)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=Apple iPhone XR ((PRODUCT)RED, 128 GB) (Includes EarPods, Power Adapter)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=Apple iPhone XR (Black, 128 GB) (Includes EarPods, Power Adapter)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=Apple iPhone XR (Black, 64 GB) (Includes EarPods, Power Adapter)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=Apple iPhone XR (Coral, 128 GB) (Includes EarPods, Power Adapter)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ ├── Product Name=Apple iPhone XR (White, 128 GB) (Includes EarPods, Power Adapter)
│ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
│ │ └── _SUCCESS
├── flight-data-hive
│ ├── _SUCCESS
│ ├── _committed_4721890993021653500
│ ├── _started_4721890993021653500
│ └── part-00000-tid-4721890993021653500-d8ef7f6b-e6e5-4451-af50-08281422f186-0-c000
├── flight-data
│ ├── csv
│ │ ├── 2010-summary.csv
│ │ ├── 2011-summary.csv
│ │ ├── 2012-summary.csv
│ │ ├── 2013-summary.csv
│ │ ├── 2014-summary.csv
│ │ └── 2015-summary.csv
│ ├── jdbc
│ │ └── my-sqlite.db
│ ├── json
│ │ ├── 2010-summary.json
│ │ ├── 2011-summary.json
│ │ ├── 2012-summary.json
│ │ ├── 2013-summary.json
│ │ ├── 2014-summary.json
│ │ └── 2015-summary.json
│ ├── orc
│ │ └── 2010-summary.orc
│ │ │ ├── _SUCCESS
│ │ │ └── part-r-00000-2c4f7d96-e703-4de3-af1b-1441d172c80f.snappy.orc
│ └── parquet
│ │ └── 2010-summary.parquet
│ │ ├── _SUCCESS
│ │ └── part-r-00000-1a9822ba-b8fb-4d8e-844a-ea30d0801b9e.gz.parquet
└── retail-data
│ ├── all
│ └── online-retail-dataset.numbers
│ └── by-day
│ ├── 2010-12-01.csv
│ ├── 2010-12-02.csv
│ ├── 2010-12-03.csv
│ ├── 2010-12-05.csv
│ ├── 2010-12-06.csv
│ ├── 2010-12-07.csv
│ ├── 2010-12-08.csv
│ ├── 2010-12-09.csv
│ ├── 2010-12-10.csv
│ ├── 2010-12-12.csv
│ ├── 2010-12-13.csv
│ └── 2010-12-14.csv
└── spark-docker
├── data
└── data.csv
├── docker-compose.yml
└── files
└── Untitled.ipynb
/Distributed Variables.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "eb1f7038-2e32-408f-ac14-600162264a0d",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "myCollection = \"My Name is Darshil and I love Spark\"\\\n",
11 | ".split(\" \")\n",
12 | "\n",
13 | "words = spark.sparkContext.parallelize(myCollection, 2)"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 2,
19 | "id": "3377fcfc-36f2-4230-aadf-7aa04097692e",
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "supplementalData = {\"Name\":1000, \"Darshil\":200,\n",
24 | " \"love\":-300, \"Spark\":100}"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 3,
30 | "id": "f30ab6fb-0b2d-4d99-bb51-1d035c1871af",
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "suppBroadcast = spark.sparkContext.broadcast(supplementalData)"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 4,
40 | "id": "b03358a6-55ce-4472-8c8f-4f5d68f9913e",
41 | "metadata": {},
42 | "outputs": [
43 | {
44 | "data": {
45 | "text/plain": [
46 | "{'Name': 1000, 'Darshil': 200, 'love': -300, 'Spark': 100}"
47 | ]
48 | },
49 | "execution_count": 4,
50 | "metadata": {},
51 | "output_type": "execute_result"
52 | }
53 | ],
54 | "source": [
55 | "suppBroadcast.value"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 5,
61 | "id": "3abef626-9de9-4fc2-9484-3f8e8ac33f48",
62 | "metadata": {},
63 | "outputs": [
64 | {
65 | "data": {
66 | "text/plain": [
67 | "[('love', -300),\n",
68 | " ('My', 0),\n",
69 | " ('is', 0),\n",
70 | " ('and', 0),\n",
71 | " ('I', 0),\n",
72 | " ('Spark', 100),\n",
73 | " ('Darshil', 200),\n",
74 | " ('Name', 1000)]"
75 | ]
76 | },
77 | "execution_count": 5,
78 | "metadata": {},
79 | "output_type": "execute_result"
80 | }
81 | ],
82 | "source": [
83 | "words.map(lambda word: (word, suppBroadcast.value.get(word, 0)))\\\n",
84 | " .sortBy(lambda wordPair: wordPair[1])\\\n",
85 | " .collect()"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": 6,
91 | "id": "7ddcebd4-1c19-43d7-ba90-cd68761214c5",
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "flights = spark.read.parquet(\"data/flight-data/parquet/2010-summary.parquet\")"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 7,
101 | "id": "54a006a6-d05e-40c4-917a-ff13bf4bd21f",
102 | "metadata": {},
103 | "outputs": [
104 | {
105 | "data": {
106 | "text/plain": [
107 | "[Row(DEST_COUNTRY_NAME='United States', ORIGIN_COUNTRY_NAME='Romania', count=1),\n",
108 | " Row(DEST_COUNTRY_NAME='United States', ORIGIN_COUNTRY_NAME='Ireland', count=264),\n",
109 | " Row(DEST_COUNTRY_NAME='United States', ORIGIN_COUNTRY_NAME='India', count=69),\n",
110 | " Row(DEST_COUNTRY_NAME='Egypt', ORIGIN_COUNTRY_NAME='United States', count=24),\n",
111 | " Row(DEST_COUNTRY_NAME='Equatorial Guinea', ORIGIN_COUNTRY_NAME='United States', count=1)]"
112 | ]
113 | },
114 | "execution_count": 7,
115 | "metadata": {},
116 | "output_type": "execute_result"
117 | }
118 | ],
119 | "source": [
120 | "flights.take(5)"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 22,
126 | "id": "a5ed45fc-5ab4-4001-a52b-d06a21e791de",
127 | "metadata": {},
128 | "outputs": [],
129 | "source": [
130 | "accChina = spark.sparkContext.accumulator(0)"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": 23,
136 | "id": "97e52519-ae3e-4e60-a58d-666211376b70",
137 | "metadata": {},
138 | "outputs": [],
139 | "source": [
140 | "def accChinaFunc(flight_row):\n",
141 | " destination = flight_row[\"DEST_COUNTRY_NAME\"]\n",
142 | " origin = flight_row[\"ORIGIN_COUNTRY_NAME\"]\n",
143 | " \n",
144 | " if destination == \"China\":\n",
145 | " accChina.add(flight_row[\"count\"])\n",
146 | " \n",
147 | " if origin == \"China\":\n",
148 | " accChina.add(flight_row[\"count\"])"
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": 24,
154 | "id": "2e466874-4444-474c-b021-f176e96b4119",
155 | "metadata": {},
156 | "outputs": [],
157 | "source": [
158 | "flights.foreach(lambda flight_row: accChinaFunc(flight_row))"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 25,
164 | "id": "39894f17-4cff-4cc5-9b86-88f1fc4fc96b",
165 | "metadata": {},
166 | "outputs": [
167 | {
168 | "data": {
169 | "text/plain": [
170 | "953"
171 | ]
172 | },
173 | "execution_count": 25,
174 | "metadata": {},
175 | "output_type": "execute_result"
176 | }
177 | ],
178 | "source": [
179 | "accChina.value"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "id": "c600f0a8-f9bb-4289-8698-19536f3e64bb",
186 | "metadata": {},
187 | "outputs": [],
188 | "source": []
189 | }
190 | ],
191 | "metadata": {
192 | "kernelspec": {
193 | "display_name": "Python 3 (ipykernel)",
194 | "language": "python",
195 | "name": "python3"
196 | },
197 | "language_info": {
198 | "codemirror_mode": {
199 | "name": "ipython",
200 | "version": 3
201 | },
202 | "file_extension": ".py",
203 | "mimetype": "text/x-python",
204 | "name": "python",
205 | "nbconvert_exporter": "python",
206 | "pygments_lexer": "ipython3",
207 | "version": "3.12.1"
208 | }
209 | },
210 | "nbformat": 4,
211 | "nbformat_minor": 5
212 | }
213 |
--------------------------------------------------------------------------------
/End-To-End Example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "3c1c59a1-7047-41f5-835a-0176d1e8b2e6",
7 | "metadata": {},
8 | "outputs": [
9 | {
10 | "data": {
11 | "text/html": [
12 | "\n",
13 | "
\n",
14 | "
SparkSession - hive
\n",
15 | " \n",
16 | "
\n",
17 | "
SparkContext
\n",
18 | "\n",
19 | "
Spark UI
\n",
20 | "\n",
21 | "
\n",
22 | " - Version
\n",
23 | " v3.5.0
\n",
24 | " - Master
\n",
25 | " local[*]
\n",
26 | " - AppName
\n",
27 | " PySparkShell
\n",
28 | "
\n",
29 | "
\n",
30 | " \n",
31 | "
\n",
32 | " "
33 | ],
34 | "text/plain": [
35 | ""
36 | ]
37 | },
38 | "execution_count": 1,
39 | "metadata": {},
40 | "output_type": "execute_result"
41 | }
42 | ],
43 | "source": [
44 | "spark"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 23,
50 | "id": "f6d9acb1-158a-4d2c-9bc1-9e457c58112d",
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "flightData2015 = spark.read.option(\"inferSchema\",\"true\").option(\"header\",\"true\").csv(\"data/flight-data/csv/2015-summary.csv\")"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 24,
60 | "id": "32881bb3-07d4-4a3a-be42-79fb0c873e0f",
61 | "metadata": {},
62 | "outputs": [
63 | {
64 | "data": {
65 | "text/plain": [
66 | "[Row(DEST_COUNTRY_NAME='United States', ORIGIN_COUNTRY_NAME='Romania', count=15),\n",
67 | " Row(DEST_COUNTRY_NAME='United States', ORIGIN_COUNTRY_NAME='Croatia', count=1),\n",
68 | " Row(DEST_COUNTRY_NAME='United States', ORIGIN_COUNTRY_NAME='Ireland', count=344)]"
69 | ]
70 | },
71 | "execution_count": 24,
72 | "metadata": {},
73 | "output_type": "execute_result"
74 | }
75 | ],
76 | "source": [
77 | "flightData2015.take(3)"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 4,
83 | "id": "2400c85f-c5df-400c-b171-3a8bc4c03849",
84 | "metadata": {},
85 | "outputs": [
86 | {
87 | "data": {
88 | "text/plain": [
89 | "DataFrame[DEST_COUNTRY_NAME: string, ORIGIN_COUNTRY_NAME: string, count: int]"
90 | ]
91 | },
92 | "execution_count": 4,
93 | "metadata": {},
94 | "output_type": "execute_result"
95 | }
96 | ],
97 | "source": [
98 | "flightData2015.sort(\"count\")"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": 5,
104 | "id": "b026a257-91f9-49fa-88e0-fa17c232a9f1",
105 | "metadata": {},
106 | "outputs": [
107 | {
108 | "name": "stdout",
109 | "output_type": "stream",
110 | "text": [
111 | "== Physical Plan ==\n",
112 | "AdaptiveSparkPlan isFinalPlan=false\n",
113 | "+- Sort [count#19 ASC NULLS FIRST], true, 0\n",
114 | " +- Exchange rangepartitioning(count#19 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [plan_id=33]\n",
115 | " +- FileScan csv [DEST_COUNTRY_NAME#17,ORIGIN_COUNTRY_NAME#18,count#19] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/Users/darshil/Documents/DataWithDarshil/Apache Spark with DataBr..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct\n",
116 | "\n",
117 | "\n"
118 | ]
119 | }
120 | ],
121 | "source": [
122 | "flightData2015.sort(\"count\").explain()"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 9,
128 | "id": "7037d05f-98bd-48c5-a8cd-01a3513afc2a",
129 | "metadata": {},
130 | "outputs": [
131 | {
132 | "name": "stdout",
133 | "output_type": "stream",
134 | "text": [
135 | "+--------------------+-------------------+-----+\n",
136 | "| DEST_COUNTRY_NAME|ORIGIN_COUNTRY_NAME|count|\n",
137 | "+--------------------+-------------------+-----+\n",
138 | "| United States| Estonia| 1|\n",
139 | "| Kosovo| United States| 1|\n",
140 | "| Zambia| United States| 1|\n",
141 | "| United States| Papua New Guinea| 1|\n",
142 | "| Malta| United States| 1|\n",
143 | "| United States| Gibraltar| 1|\n",
144 | "| Suriname| United States| 1|\n",
145 | "| United States| Croatia| 1|\n",
146 | "| Djibouti| United States| 1|\n",
147 | "| Burkina Faso| United States| 1|\n",
148 | "|Saint Vincent and...| United States| 1|\n",
149 | "| United States| Cyprus| 1|\n",
150 | "| United States| Singapore| 1|\n",
151 | "| Moldova| United States| 1|\n",
152 | "| Cyprus| United States| 1|\n",
153 | "| United States| Lithuania| 1|\n",
154 | "| United States| Bulgaria| 1|\n",
155 | "| United States| Georgia| 1|\n",
156 | "| United States| Bahrain| 1|\n",
157 | "| Cote d'Ivoire| United States| 1|\n",
158 | "+--------------------+-------------------+-----+\n",
159 | "only showing top 20 rows\n",
160 | "\n"
161 | ]
162 | }
163 | ],
164 | "source": [
165 | "flightData2015.sort(\"count\").show()"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": 10,
171 | "id": "f59f2d98-dab9-48a1-8c27-a3ea0101d2ef",
172 | "metadata": {},
173 | "outputs": [],
174 | "source": [
175 | "flightData2015.createOrReplaceTempView(\"flight_data_2015\")"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": 11,
181 | "id": "05b932ca-7e30-4e2d-804d-1b79343dcfdd",
182 | "metadata": {},
183 | "outputs": [],
184 | "source": [
185 | "sqlWay = spark.sql(\"\"\"\n",
186 | " SELECT DEST_COUNTRY_NAME, count(1)\n",
187 | " FROM flight_data_2015\n",
188 | " GROUP BY DEST_COUNTRY_NAME\n",
189 | "\"\"\")"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": 12,
195 | "id": "435e58c9-a45b-478e-95f7-e055a7a63db2",
196 | "metadata": {},
197 | "outputs": [
198 | {
199 | "name": "stdout",
200 | "output_type": "stream",
201 | "text": [
202 | "== Physical Plan ==\n",
203 | "AdaptiveSparkPlan isFinalPlan=false\n",
204 | "+- HashAggregate(keys=[DEST_COUNTRY_NAME#17], functions=[count(1)])\n",
205 | " +- Exchange hashpartitioning(DEST_COUNTRY_NAME#17, 200), ENSURE_REQUIREMENTS, [plan_id=73]\n",
206 | " +- HashAggregate(keys=[DEST_COUNTRY_NAME#17], functions=[partial_count(1)])\n",
207 | " +- FileScan csv [DEST_COUNTRY_NAME#17] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/Users/darshil/Documents/DataWithDarshil/Apache Spark with DataBr..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct\n",
208 | "\n",
209 | "\n"
210 | ]
211 | }
212 | ],
213 | "source": [
214 | "sqlWay.explain()"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": 13,
220 | "id": "ad06731d-581f-4319-97b6-2498bc9525bf",
221 | "metadata": {},
222 | "outputs": [],
223 | "source": [
224 | "dataFrameWay = flightData2015\\\n",
225 | " .groupBy(\"DEST_COUNTRY_NAME\")\\\n",
226 | " .count()"
227 | ]
228 | },
229 | {
230 | "cell_type": "code",
231 | "execution_count": 14,
232 | "id": "1e87c25e-63b6-478f-b138-907c687978b3",
233 | "metadata": {},
234 | "outputs": [
235 | {
236 | "name": "stdout",
237 | "output_type": "stream",
238 | "text": [
239 | "== Physical Plan ==\n",
240 | "AdaptiveSparkPlan isFinalPlan=false\n",
241 | "+- HashAggregate(keys=[DEST_COUNTRY_NAME#17], functions=[count(1)])\n",
242 | " +- Exchange hashpartitioning(DEST_COUNTRY_NAME#17, 200), ENSURE_REQUIREMENTS, [plan_id=86]\n",
243 | " +- HashAggregate(keys=[DEST_COUNTRY_NAME#17], functions=[partial_count(1)])\n",
244 | " +- FileScan csv [DEST_COUNTRY_NAME#17] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/Users/darshil/Documents/DataWithDarshil/Apache Spark with DataBr..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct\n",
245 | "\n",
246 | "\n"
247 | ]
248 | }
249 | ],
250 | "source": [
251 | "dataFrameWay.explain()"
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": 15,
257 | "id": "5bc7f565-2a3d-4188-b5e4-39e9c47c2ff0",
258 | "metadata": {},
259 | "outputs": [
260 | {
261 | "data": {
262 | "text/plain": [
263 | "[Row(max(count)=370002)]"
264 | ]
265 | },
266 | "execution_count": 15,
267 | "metadata": {},
268 | "output_type": "execute_result"
269 | }
270 | ],
271 | "source": [
272 | "spark.sql(\"SELECT max(count) from flight_data_2015\").take(1)"
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": 16,
278 | "id": "bc19c214-7926-4b49-92e0-f97dea7dd0a5",
279 | "metadata": {},
280 | "outputs": [
281 | {
282 | "data": {
283 | "text/plain": [
284 | "[Row(max(count)=370002)]"
285 | ]
286 | },
287 | "execution_count": 16,
288 | "metadata": {},
289 | "output_type": "execute_result"
290 | }
291 | ],
292 | "source": [
293 | "from pyspark.sql.functions import max\n",
294 | "flightData2015.select(max(\"count\")).take(1)"
295 | ]
296 | },
297 | {
298 | "cell_type": "code",
299 | "execution_count": 17,
300 | "id": "e94f82af-ec84-4b4f-a8a3-3bbaa1da42d8",
301 | "metadata": {},
302 | "outputs": [
303 | {
304 | "name": "stdout",
305 | "output_type": "stream",
306 | "text": [
307 | "+-----------------+-----------------+\n",
308 | "|DEST_COUNTRY_NAME|destination_total|\n",
309 | "+-----------------+-----------------+\n",
310 | "| United States| 411352|\n",
311 | "| Canada| 8399|\n",
312 | "| Mexico| 7140|\n",
313 | "| United Kingdom| 2025|\n",
314 | "| Japan| 1548|\n",
315 | "+-----------------+-----------------+\n",
316 | "\n"
317 | ]
318 | }
319 | ],
320 | "source": [
321 | "maxSql = spark.sql(\"\"\"\n",
322 | "SELECT DEST_COUNTRY_NAME, sum(count) as destination_total\n",
323 | "FROM flight_data_2015\n",
324 | "GROUP BY DEST_COUNTRY_NAME\n",
325 | "ORDER BY sum(count) DESC\n",
326 | "LIMIT 5\n",
327 | "\"\"\")\n",
328 | "\n",
329 | "maxSql.show()"
330 | ]
331 | },
332 | {
333 | "cell_type": "code",
334 | "execution_count": 18,
335 | "id": "4b5966c6-3df6-4798-91ce-ef728130c30e",
336 | "metadata": {},
337 | "outputs": [
338 | {
339 | "name": "stdout",
340 | "output_type": "stream",
341 | "text": [
342 | "+-----------------+-----------------+\n",
343 | "|DEST_COUNTRY_NAME|destination_total|\n",
344 | "+-----------------+-----------------+\n",
345 | "| United States| 411352|\n",
346 | "| Canada| 8399|\n",
347 | "| Mexico| 7140|\n",
348 | "| United Kingdom| 2025|\n",
349 | "| Japan| 1548|\n",
350 | "+-----------------+-----------------+\n",
351 | "\n"
352 | ]
353 | }
354 | ],
355 | "source": [
356 | "from pyspark.sql.functions import desc\n",
357 | "\n",
358 | "flightData2015\\\n",
359 | " .groupBy(\"DEST_COUNTRY_NAME\")\\\n",
360 | " .sum(\"count\")\\\n",
361 | " .withColumnRenamed(\"sum(count)\", \"destination_total\")\\\n",
362 | " .sort(desc(\"destination_total\"))\\\n",
363 | " .limit(5)\\\n",
364 | " .show()"
365 | ]
366 | },
367 | {
368 | "cell_type": "code",
369 | "execution_count": 19,
370 | "id": "bd525c3f-bd0f-4574-adab-374b13a341ba",
371 | "metadata": {},
372 | "outputs": [
373 | {
374 | "name": "stdout",
375 | "output_type": "stream",
376 | "text": [
377 | "== Physical Plan ==\n",
378 | "AdaptiveSparkPlan isFinalPlan=false\n",
379 | "+- TakeOrderedAndProject(limit=5, orderBy=[destination_total#135L DESC NULLS LAST], output=[DEST_COUNTRY_NAME#17,destination_total#135L])\n",
380 | " +- HashAggregate(keys=[DEST_COUNTRY_NAME#17], functions=[sum(count#19)])\n",
381 | " +- Exchange hashpartitioning(DEST_COUNTRY_NAME#17, 200), ENSURE_REQUIREMENTS, [plan_id=256]\n",
382 | " +- HashAggregate(keys=[DEST_COUNTRY_NAME#17], functions=[partial_sum(count#19)])\n",
383 | " +- FileScan csv [DEST_COUNTRY_NAME#17,count#19] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/Users/darshil/Documents/DataWithDarshil/Apache Spark with DataBr..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct\n",
384 | "\n",
385 | "\n"
386 | ]
387 | }
388 | ],
389 | "source": [
390 | "flightData2015\\\n",
391 | " .groupBy(\"DEST_COUNTRY_NAME\")\\\n",
392 | " .sum(\"count\")\\\n",
393 | " .withColumnRenamed(\"sum(count)\", \"destination_total\")\\\n",
394 | " .sort(desc(\"destination_total\"))\\\n",
395 | " .limit(5)\\\n",
396 | " .explain()"
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": null,
402 | "id": "454fa12e-5967-4ec6-a04d-66c7ab546d93",
403 | "metadata": {},
404 | "outputs": [],
405 | "source": []
406 | }
407 | ],
408 | "metadata": {
409 | "kernelspec": {
410 | "display_name": "Python 3 (ipykernel)",
411 | "language": "python",
412 | "name": "python3"
413 | },
414 | "language_info": {
415 | "codemirror_mode": {
416 | "name": "ipython",
417 | "version": 3
418 | },
419 | "file_extension": ".py",
420 | "mimetype": "text/x-python",
421 | "name": "python",
422 | "nbconvert_exporter": "python",
423 | "pygments_lexer": "ipython3",
424 | "version": "3.12.0"
425 | }
426 | },
427 | "nbformat": 4,
428 | "nbformat_minor": 5
429 | }
430 |
--------------------------------------------------------------------------------
/Joins.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 5,
6 | "id": "8a691f86-74bc-44d5-8e4d-83b97d6116bb",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "person = spark.createDataFrame([\n",
11 | " (0, \"Bill Chambers\", 0, [100]),\n",
12 | " (1, \"Matei Zaharia\", 1, [500, 250, 100]),\n",
13 | " (2, \"Michael Armbrust\", 1, [250, 100])])\\\n",
14 | ".toDF(\"id\", \"name\", \"graduate_program\", \"spark_status\")\n",
15 | "\n",
16 | "\n",
17 | "graduateProgram = spark.createDataFrame([\n",
18 | " (0, \"Masters\", \"School of Information\", \"UC Berkeley\"),\n",
19 | " (2, \"Masters\", \"EECS\", \"UC Berkeley\"),\n",
20 | " (1, \"Ph.D.\", \"EECS\", \"UC Berkeley\")])\\\n",
21 | ".toDF(\"id\", \"degree\", \"department\", \"school\")\n",
22 | "\n",
23 | "sparkStatus = spark.createDataFrame([\n",
24 | " (500, \"Vice President\"),\n",
25 | " (250, \"PMC Member\"),\n",
26 | " (100, \"Contributor\")])\\\n",
27 | ".toDF(\"id\", \"status\")"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 6,
33 | "id": "ea0a2875-c151-4c08-8232-2e64ce881bae",
34 | "metadata": {},
35 | "outputs": [
36 | {
37 | "name": "stdout",
38 | "output_type": "stream",
39 | "text": [
40 | "+---+----------------+----------------+---------------+\n",
41 | "| id| name|graduate_program| spark_status|\n",
42 | "+---+----------------+----------------+---------------+\n",
43 | "| 0| Bill Chambers| 0| [100]|\n",
44 | "| 1| Matei Zaharia| 1|[500, 250, 100]|\n",
45 | "| 2|Michael Armbrust| 1| [250, 100]|\n",
46 | "+---+----------------+----------------+---------------+\n",
47 | "\n",
48 | "+---+-------+--------------------+-----------+\n",
49 | "| id| degree| department| school|\n",
50 | "+---+-------+--------------------+-----------+\n",
51 | "| 0|Masters|School of Informa...|UC Berkeley|\n",
52 | "| 2|Masters| EECS|UC Berkeley|\n",
53 | "| 1| Ph.D.| EECS|UC Berkeley|\n",
54 | "+---+-------+--------------------+-----------+\n",
55 | "\n",
56 | "+---+--------------+\n",
57 | "| id| status|\n",
58 | "+---+--------------+\n",
59 | "|500|Vice President|\n",
60 | "|250| PMC Member|\n",
61 | "|100| Contributor|\n",
62 | "+---+--------------+\n",
63 | "\n"
64 | ]
65 | }
66 | ],
67 | "source": [
68 | "person.show()\n",
69 | "graduateProgram.show()\n",
70 | "sparkStatus.show()"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 7,
76 | "id": "86338047-c384-46a7-8b24-75098978bb96",
77 | "metadata": {},
78 | "outputs": [],
79 | "source": [
80 | "joinExpression = person[\"graduate_program\"] == graduateProgram['id']"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 8,
86 | "id": "7b61803d-a8d7-4eac-a4ad-65987164645b",
87 | "metadata": {},
88 | "outputs": [
89 | {
90 | "name": "stdout",
91 | "output_type": "stream",
92 | "text": [
93 | "+---+----------------+----------------+---------------+---+-------+--------------------+-----------+\n",
94 | "| id| name|graduate_program| spark_status| id| degree| department| school|\n",
95 | "+---+----------------+----------------+---------------+---+-------+--------------------+-----------+\n",
96 | "| 0| Bill Chambers| 0| [100]| 0|Masters|School of Informa...|UC Berkeley|\n",
97 | "| 1| Matei Zaharia| 1|[500, 250, 100]| 1| Ph.D.| EECS|UC Berkeley|\n",
98 | "| 2|Michael Armbrust| 1| [250, 100]| 1| Ph.D.| EECS|UC Berkeley|\n",
99 | "+---+----------------+----------------+---------------+---+-------+--------------------+-----------+\n",
100 | "\n"
101 | ]
102 | }
103 | ],
104 | "source": [
105 | "person.join(graduateProgram, joinExpression).show()"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": 9,
111 | "id": "99a6a32f-b527-4669-b3f5-3eca1e87ddde",
112 | "metadata": {},
113 | "outputs": [
114 | {
115 | "name": "stdout",
116 | "output_type": "stream",
117 | "text": [
118 | "+----+----------------+----------------+---------------+---+-------+--------------------+-----------+\n",
119 | "| id| name|graduate_program| spark_status| id| degree| department| school|\n",
120 | "+----+----------------+----------------+---------------+---+-------+--------------------+-----------+\n",
121 | "| 0| Bill Chambers| 0| [100]| 0|Masters|School of Informa...|UC Berkeley|\n",
122 | "| 1| Matei Zaharia| 1|[500, 250, 100]| 1| Ph.D.| EECS|UC Berkeley|\n",
123 | "| 2|Michael Armbrust| 1| [250, 100]| 1| Ph.D.| EECS|UC Berkeley|\n",
124 | "|NULL| NULL| NULL| NULL| 2|Masters| EECS|UC Berkeley|\n",
125 | "+----+----------------+----------------+---------------+---+-------+--------------------+-----------+\n",
126 | "\n"
127 | ]
128 | }
129 | ],
130 | "source": [
131 | "joinType = \"outer\"\n",
132 | "person.join(graduateProgram, joinExpression, joinType).show()"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 10,
138 | "id": "57c0fbf2-40c1-4d4d-a280-96c2cbb7bb9c",
139 | "metadata": {},
140 | "outputs": [
141 | {
142 | "name": "stdout",
143 | "output_type": "stream",
144 | "text": [
145 | "+---+-------+--------------------+-----------+----+----------------+----------------+---------------+\n",
146 | "| id| degree| department| school| id| name|graduate_program| spark_status|\n",
147 | "+---+-------+--------------------+-----------+----+----------------+----------------+---------------+\n",
148 | "| 0|Masters|School of Informa...|UC Berkeley| 0| Bill Chambers| 0| [100]|\n",
149 | "| 2|Masters| EECS|UC Berkeley|NULL| NULL| NULL| NULL|\n",
150 | "| 1| Ph.D.| EECS|UC Berkeley| 2|Michael Armbrust| 1| [250, 100]|\n",
151 | "| 1| Ph.D.| EECS|UC Berkeley| 1| Matei Zaharia| 1|[500, 250, 100]|\n",
152 | "+---+-------+--------------------+-----------+----+----------------+----------------+---------------+\n",
153 | "\n"
154 | ]
155 | }
156 | ],
157 | "source": [
158 | "joinType = \"left_outer\"\n",
159 | "graduateProgram.join(person, joinExpression, joinType).show()"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 11,
165 | "id": "2a61d18b-a8bb-4a9f-80c9-35759fb1d517",
166 | "metadata": {},
167 | "outputs": [
168 | {
169 | "name": "stdout",
170 | "output_type": "stream",
171 | "text": [
172 | "+----+----------------+----------------+---------------+---+-------+--------------------+-----------+\n",
173 | "| id| name|graduate_program| spark_status| id| degree| department| school|\n",
174 | "+----+----------------+----------------+---------------+---+-------+--------------------+-----------+\n",
175 | "| 0| Bill Chambers| 0| [100]| 0|Masters|School of Informa...|UC Berkeley|\n",
176 | "|NULL| NULL| NULL| NULL| 2|Masters| EECS|UC Berkeley|\n",
177 | "| 2|Michael Armbrust| 1| [250, 100]| 1| Ph.D.| EECS|UC Berkeley|\n",
178 | "| 1| Matei Zaharia| 1|[500, 250, 100]| 1| Ph.D.| EECS|UC Berkeley|\n",
179 | "+----+----------------+----------------+---------------+---+-------+--------------------+-----------+\n",
180 | "\n"
181 | ]
182 | }
183 | ],
184 | "source": [
185 | "joinType = \"right_outer\"\n",
186 | "person.join(graduateProgram, joinExpression, joinType).show()"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 12,
192 | "id": "2b628353-9d65-4eb9-830b-6169231e4b34",
193 | "metadata": {},
194 | "outputs": [
195 | {
196 | "name": "stdout",
197 | "output_type": "stream",
198 | "text": [
199 | "+---+-------+--------------------+-----------+\n",
200 | "| id| degree| department| school|\n",
201 | "+---+-------+--------------------+-----------+\n",
202 | "| 0|Masters|School of Informa...|UC Berkeley|\n",
203 | "| 1| Ph.D.| EECS|UC Berkeley|\n",
204 | "+---+-------+--------------------+-----------+\n",
205 | "\n"
206 | ]
207 | }
208 | ],
209 | "source": [
210 | "joinType = \"left_semi\"\n",
211 | "graduateProgram.join(person, joinExpression, joinType).show()"
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 13,
217 | "id": "1b4b1692-fd2a-4b04-8048-199130867881",
218 | "metadata": {},
219 | "outputs": [
220 | {
221 | "name": "stdout",
222 | "output_type": "stream",
223 | "text": [
224 | "+---+-------+----------+-----------+\n",
225 | "| id| degree|department| school|\n",
226 | "+---+-------+----------+-----------+\n",
227 | "| 2|Masters| EECS|UC Berkeley|\n",
228 | "+---+-------+----------+-----------+\n",
229 | "\n"
230 | ]
231 | }
232 | ],
233 | "source": [
234 | "joinType = \"left_anti\"\n",
235 | "graduateProgram.join(person, joinExpression, joinType).show()"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 14,
241 | "id": "fb5b559e-a6f8-4b66-bfaa-8200fb963a44",
242 | "metadata": {},
243 | "outputs": [
244 | {
245 | "name": "stdout",
246 | "output_type": "stream",
247 | "text": [
248 | "+---+-------+--------------------+-----------+---+----------------+----------------+---------------+\n",
249 | "| id| degree| department| school| id| name|graduate_program| spark_status|\n",
250 | "+---+-------+--------------------+-----------+---+----------------+----------------+---------------+\n",
251 | "| 0|Masters|School of Informa...|UC Berkeley| 0| Bill Chambers| 0| [100]|\n",
252 | "| 1| Ph.D.| EECS|UC Berkeley| 1| Matei Zaharia| 1|[500, 250, 100]|\n",
253 | "| 1| Ph.D.| EECS|UC Berkeley| 2|Michael Armbrust| 1| [250, 100]|\n",
254 | "+---+-------+--------------------+-----------+---+----------------+----------------+---------------+\n",
255 | "\n"
256 | ]
257 | }
258 | ],
259 | "source": [
260 | "joinType = \"cross\"\n",
261 | "graduateProgram.join(person, joinExpression, joinType).show()"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": 18,
267 | "id": "2f1faefb-d731-4638-a92a-85c942dce402",
268 | "metadata": {},
269 | "outputs": [
270 | {
271 | "data": {
272 | "text/html": [
273 | "\n",
274 | " \n",
275 | "
SparkSession - hive
\n",
276 | " \n",
277 | "
\n",
278 | "
SparkContext
\n",
279 | "\n",
280 | "
Spark UI
\n",
281 | "\n",
282 | "
\n",
283 | " - Version
\n",
284 | " v3.5.0
\n",
285 | " - Master
\n",
286 | " local[*]
\n",
287 | " - AppName
\n",
288 | " PySparkShell
\n",
289 | "
\n",
290 | "
\n",
291 | " \n",
292 | "
\n",
293 | " "
294 | ],
295 | "text/plain": [
296 | ""
297 | ]
298 | },
299 | "execution_count": 18,
300 | "metadata": {},
301 | "output_type": "execute_result"
302 | }
303 | ],
304 | "source": [
305 | "spark"
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": null,
311 | "id": "97f654ee-6539-4b39-a777-f05add409e3d",
312 | "metadata": {},
313 | "outputs": [],
314 | "source": []
315 | }
316 | ],
317 | "metadata": {
318 | "kernelspec": {
319 | "display_name": "Python 3 (ipykernel)",
320 | "language": "python",
321 | "name": "python3"
322 | },
323 | "language_info": {
324 | "codemirror_mode": {
325 | "name": "ipython",
326 | "version": 3
327 | },
328 | "file_extension": ".py",
329 | "mimetype": "text/x-python",
330 | "name": "python",
331 | "nbconvert_exporter": "python",
332 | "pygments_lexer": "ipython3",
333 | "version": "3.12.1"
334 | }
335 | },
336 | "nbformat": 4,
337 | "nbformat_minor": 5
338 | }
339 |
--------------------------------------------------------------------------------
/Project 2 - ECommerce Data Analysis Azure Data Engineering/Gold Layer.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 0,
6 | "metadata": {
7 | "application/vnd.databricks.v1+cell": {
8 | "cellMetadata": {
9 | "byteLimit": 2048000,
10 | "rowLimit": 10000
11 | },
12 | "inputWidgets": {},
13 | "nuid": "b1ba9d5d-0019-47e1-947f-e55becb33dd1",
14 | "showTitle": false,
15 | "title": ""
16 | }
17 | },
18 | "outputs": [],
19 | "source": [
20 | "from pyspark.sql import SparkSession\n",
21 | "from pyspark.sql.functions import sum, avg, col,monotonically_increasing_id \n",
22 | "\n",
23 | "spark = SparkSession.builder.appName(\"GoldLayerCreation\").getOrCreate()\n",
24 | "\n",
25 | "# Read the necessary Silver tables\n",
26 | "silver_sellers = spark.read.format(\"delta\").load(\"/mnt/delta/tables/silver/sellers\")\n",
27 | "silver_buyers = spark.read.format(\"delta\").load(\"/mnt/delta/tables/silver/buyers\")\n",
28 | "silver_users = spark.read.format(\"delta\").load(\"/mnt/delta/tables/silver/users\")\n",
29 | "silver_countries = spark.read.format(\"delta\").load(\"/mnt/delta/tables/silver/countries\")\n"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 0,
35 | "metadata": {
36 | "application/vnd.databricks.v1+cell": {
37 | "cellMetadata": {
38 | "byteLimit": 2048000,
39 | "rowLimit": 10000
40 | },
41 | "inputWidgets": {},
42 | "nuid": "9d4fea28-f7ae-4860-8c97-5f67389b54d0",
43 | "showTitle": false,
44 | "title": ""
45 | }
46 | },
47 | "outputs": [],
48 | "source": [
49 | "# Perform the join operations\n",
50 | "comprehensive_user_table = silver_users \\\n",
51 | " .join(silver_countries, [\"country\"], \"outer\") \\\n",
52 | " .join(silver_buyers, [\"country\"], \"outer\") \\\n",
53 | " .join(silver_sellers, [\"country\"], \"outer\")\n",
54 | "\n",
55 | "# Select and alias columns from each dataframe to ensure uniqueness\n",
56 | "comprehensive_user_table = comprehensive_user_table.select(\n",
57 | " silver_users[\"country\"].alias(\"Country\"),\n",
58 | " # From silver_users\n",
59 | " silver_users[\"productsSold\"].alias(\"Users_productsSold\"),\n",
60 | " silver_users[\"productsWished\"].alias(\"Users_productsWished\"),\n",
61 | " silver_users[\"account_age_years\"].alias(\"Users_account_age_years\"),\n",
62 | " silver_users[\"account_age_group\"].alias(\"Users_account_age_group\"),\n",
63 | " silver_users[\"hasanyapp\"].alias(\"Users_hasanyapp\"),\n",
64 | " silver_users[\"socialnbfollowers\"].alias(\"Users_socialnbfollowers\"),\n",
65 | " silver_users[\"flag_long_title\"].alias(\"Users_flag_long_title\"),\n",
66 | " # Continue with other silver_users columns as needed...\n",
67 | " \n",
68 | " # From silver_countries\n",
69 | " silver_countries[\"sellers\"].alias(\"Countries_Sellers\"),\n",
70 | " silver_countries[\"topsellers\"].alias(\"Countries_TopSellers\"),\n",
71 | " silver_countries[\"femalesellers\"].alias(\"Countries_FemaleSellers\"),\n",
72 | " silver_countries[\"malesellers\"].alias(\"Countries_MaleSellers\"),\n",
73 | " silver_countries[\"topfemalesellers\"].alias(\"Countries_TopFemaleSellers\"),\n",
74 | " silver_countries[\"topmalesellers\"].alias(\"Countries_TopMaleSellers\"),\n",
75 | " # Continue with other silver_countries columns as needed...\n",
76 | " \n",
77 | " # From silver_buyers\n",
78 | " silver_buyers[\"buyers\"].alias(\"Buyers_Total\"),\n",
79 | " silver_buyers[\"topbuyers\"].alias(\"Buyers_Top\"),\n",
80 | " silver_buyers[\"femalebuyers\"].alias(\"Buyers_Female\"),\n",
81 | " silver_buyers[\"malebuyers\"].alias(\"Buyers_Male\"),\n",
82 | " silver_buyers[\"topfemalebuyers\"].alias(\"Buyers_TopFemale\"),\n",
83 | " silver_buyers[\"topmalebuyers\"].alias(\"Buyers_TopMale\"),\n",
84 | " # Continue with other silver_buyers columns as needed...\n",
85 | " \n",
86 | " # From silver_sellers\n",
87 | " silver_sellers[\"nbsellers\"].alias(\"Sellers_Total\"),\n",
88 | " silver_sellers[\"sex\"].alias(\"Sellers_Sex\"),\n",
89 | " silver_sellers[\"meanproductssold\"].alias(\"Sellers_MeanProductsSold\"),\n",
90 | " silver_sellers[\"meanproductslisted\"].alias(\"Sellers_MeanProductsListed\"),\n",
91 | " # Continue with other silver_sellers columns as needed...\n",
92 | ")\n"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 0,
98 | "metadata": {
99 | "application/vnd.databricks.v1+cell": {
100 | "cellMetadata": {
101 | "byteLimit": 2048000,
102 | "rowLimit": 10000
103 | },
104 | "inputWidgets": {},
105 | "nuid": "37ff2030-47ef-463e-abaf-f13bcad70e2d",
106 | "showTitle": false,
107 | "title": ""
108 | }
109 | },
110 | "outputs": [
111 | {
112 | "output_type": "stream",
113 | "name": "stdout",
114 | "output_type": "stream",
115 | "text": [
116 | "+---------+------------------+--------------------+-----------------------+-----------------------+---------------+-----------------------+---------------------+-----------------+--------------------+-----------------------+---------------------+--------------------------+------------------------+------------+----------+-------------+-----------+----------------+--------------+-------------+-----------+------------------------+--------------------------+\n| Country|Users_productsSold|Users_productsWished|Users_account_age_years|Users_account_age_group|Users_hasanyapp|Users_socialnbfollowers|Users_flag_long_title|Countries_Sellers|Countries_TopSellers|Countries_FemaleSellers|Countries_MaleSellers|Countries_TopFemaleSellers|Countries_TopMaleSellers|Buyers_Total|Buyers_Top|Buyers_Female|Buyers_Male|Buyers_TopFemale|Buyers_TopMale|Sellers_Total|Sellers_Sex|Sellers_MeanProductsSold|Sellers_MeanProductsListed|\n+---------+------------------+--------------------+-----------------------+-----------------------+---------------+-----------------------+---------------------+-----------------+--------------------+-----------------------+---------------------+--------------------------+------------------------+------------+----------+-------------+-----------+----------------+--------------+-------------+-----------+------------------------+--------------------------+\n|Singapour| 0| 2| 8.78| Experienced| false| 4| false| NULL| NULL| NULL| NULL| NULL| NULL| 16| 1| 12| 4| 0| 1| 5| FEMALE| 2.20| 1.60|\n|Singapour| 0| 0| 8.78| Experienced| false| 3| false| NULL| NULL| NULL| NULL| NULL| NULL| 16| 1| 12| 4| 0| 1| 5| FEMALE| 2.20| 1.60|\n|Singapour| 0| 0| 8.78| Experienced| false| 3| false| NULL| NULL| NULL| NULL| NULL| NULL| 16| 1| 12| 4| 0| 1| 5| FEMALE| 2.20| 1.60|\n|Singapour| 0| 0| 8.78| Experienced| false| 3| false| NULL| NULL| NULL| NULL| NULL| NULL| 16| 1| 12| 4| 0| 1| 5| FEMALE| 2.20| 1.60|\n|Singapour| 0| 0| 8.78| Experienced| false| 3| false| NULL| NULL| NULL| NULL| NULL| NULL| 16| 1| 12| 4| 0| 1| 5| FEMALE| 2.20| 1.60|\n+---------+------------------+--------------------+-----------------------+-----------------------+---------------+-----------------------+---------------------+-----------------+--------------------+-----------------------+---------------------+--------------------------+------------------------+------------+----------+-------------+-----------+----------------+--------------+-------------+-----------+------------------------+--------------------------+\nonly showing top 5 rows\n\n"
117 | ]
118 | }
119 | ],
120 | "source": [
121 | "comprehensive_user_table.show(5)"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 0,
127 | "metadata": {
128 | "application/vnd.databricks.v1+cell": {
129 | "cellMetadata": {
130 | "byteLimit": 2048000,
131 | "rowLimit": 10000
132 | },
133 | "inputWidgets": {},
134 | "nuid": "3cc23c56-bd18-484e-b57c-940d7a6fd61d",
135 | "showTitle": false,
136 | "title": ""
137 | }
138 | },
139 | "outputs": [],
140 | "source": [
141 | "comprehensive_user_table.write.format(\"delta\").mode(\"overwrite\").save(\"/mnt/delta/tables/gold/ecom_one_big_table\")\n"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": 0,
147 | "metadata": {
148 | "application/vnd.databricks.v1+cell": {
149 | "cellMetadata": {
150 | "byteLimit": 2048000,
151 | "rowLimit": 10000
152 | },
153 | "inputWidgets": {},
154 | "nuid": "f5d68f5d-85e5-4d8c-aa9f-37167002fb05",
155 | "showTitle": false,
156 | "title": ""
157 | }
158 | },
159 | "outputs": [
160 | {
161 | "output_type": "display_data",
162 | "data": {
163 | "text/plain": [
164 | "\u001B[0;36m File \u001B[0;32m, line 1\u001B[0;36m\u001B[0m\n",
165 | "\u001B[0;31m CREATE OR REPALCE TABLE\u001B[0m\n",
166 | "\u001B[0m ^\u001B[0m\n",
167 | "\u001B[0;31mSyntaxError\u001B[0m\u001B[0;31m:\u001B[0m invalid syntax\n"
168 | ]
169 | },
170 | "metadata": {
171 | "application/vnd.databricks.v1+output": {
172 | "arguments": {},
173 | "data": "\u001B[0;36m File \u001B[0;32m, line 1\u001B[0;36m\u001B[0m\n\u001B[0;31m CREATE OR REPALCE TABLE\u001B[0m\n\u001B[0m ^\u001B[0m\n\u001B[0;31mSyntaxError\u001B[0m\u001B[0;31m:\u001B[0m invalid syntax\n",
174 | "errorSummary": "SyntaxError: invalid syntax (command-1873744497818564-122763488, line 1)",
175 | "errorTraceType": "ansi",
176 | "metadata": {},
177 | "type": "ipynbError"
178 | }
179 | },
180 | "output_type": "display_data"
181 | }
182 | ],
183 | "source": []
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 0,
188 | "metadata": {
189 | "application/vnd.databricks.v1+cell": {
190 | "cellMetadata": {},
191 | "inputWidgets": {},
192 | "nuid": "54698b78-8574-484d-843a-755d5593b652",
193 | "showTitle": false,
194 | "title": ""
195 | }
196 | },
197 | "outputs": [],
198 | "source": [
199 | "DE"
200 | ]
201 | }
202 | ],
203 | "metadata": {
204 | "application/vnd.databricks.v1+notebook": {
205 | "dashboards": [],
206 | "language": "python",
207 | "notebookMetadata": {
208 | "pythonIndentUnit": 4
209 | },
210 | "notebookName": "Gold Layer",
211 | "widgets": {}
212 | }
213 | },
214 | "nbformat": 4,
215 | "nbformat_minor": 0
216 | }
217 |
--------------------------------------------------------------------------------
/Project 2 - ECommerce Data Analysis Azure Data Engineering/data/Buyers-repartition-by-country.csv:
--------------------------------------------------------------------------------
1 | country,buyers,topbuyers,topbuyerratio,femalebuyers,malebuyers,topfemalebuyers,topmalebuyers,femalebuyersratio,topfemalebuyersratio,boughtperwishlistratio,boughtperlikeratio,topboughtperwishlistratio,topboughtperlikeratio,totalproductsbought,totalproductswished,totalproductsliked,toptotalproductsbought,toptotalproductswished,toptotalproductsliked,meanproductsbought,meanproductswished,meanproductsliked,topmeanproductsbought,topmeanproductswished,topmeanproductsliked,meanofflinedays,topmeanofflinedays,meanfollowers,meanfollowing,topmeanfollowers,topmeanfollowing
2 | France,1251,53,4.2,851,400,33,20,68.0,62.3,26.08,14.15,36.37,23.21,3573,13700,25249,1487,4089,6406,2.9,11.0,20.2,28.1,77.2,120.9,321.8,75.0,4.8,8.8,10.1,11.0
3 | Royaume-Uni,792,38,4.8,560,232,30,8,70.7,78.9,14.97,9.3,13.93,11.59,2174,14524,23378,853,6124,7362,2.7,18.3,29.5,22.4,161.2,193.7,298.3,68.2,5.4,8.8,11.1,12.2
4 | Etats-Unis,912,31,3.4,700,212,21,10,76.8,67.7,21.84,7.92,23.35,12.44,2370,10852,29917,864,3701,6946,2.6,11.9,32.8,27.9,119.4,224.1,359.3,73.0,4.9,9.7,11.7,27.9
5 | Allemagne,578,29,5.0,409,169,21,8,70.8,72.4,13.91,6.88,10.13,4.85,1635,11758,23761,516,5096,10648,2.8,20.3,41.1,17.8,175.7,367.2,296.6,39.3,5.1,8.6,11.2,11.3
6 | Italie,400,21,5.3,283,117,14,7,70.8,66.7,12.56,8.6,11.86,23.91,1221,9720,14200,519,4376,2171,3.1,24.3,35.5,24.7,208.4,103.4,247.9,40.9,6.4,11.6,14.3,15.4
7 | Espagne,255,21,8.2,189,66,18,3,74.1,85.7,12.98,6.2,11.94,6.21,1028,7918,16579,558,4673,8984,4.0,31.1,65.0,26.6,222.5,427.8,271.1,39.4,7.0,9.5,17.6,11.6
8 | Pays-Bas,144,15,10.4,118,26,10,5,81.9,66.7,19.69,8.73,27.21,11.81,537,2727,6154,295,1084,2498,3.7,18.9,42.7,19.7,72.3,166.5,191.6,14.3,6.3,9.0,17.7,10.5
9 | Suède,151,11,7.3,113,38,7,4,74.8,63.6,11.35,5.52,12.57,5.02,566,4987,10256,234,1862,4658,3.7,33.0,67.9,21.3,169.3,423.5,189.1,86.0,7.4,9.1,20.2,9.5
10 | Finlande,64,10,15.6,53,11,9,1,82.8,90.0,35.33,9.29,42.23,24.26,331,937,3562,212,502,874,5.2,14.6,55.7,21.2,50.2,87.4,184.9,121.1,6.7,9.2,10.3,12.5
11 | Danemark,157,9,5.7,127,30,9,0,80.9,100.0,12.89,1.66,6.85,4.03,438,3399,26413,141,2059,3499,2.8,21.6,168.2,15.7,228.8,388.8,156.0,19.3,5.6,8.6,11.7,9.7
12 | Australie,126,9,7.1,92,34,8,1,73.0,88.9,27.75,5.17,39.66,4.83,398,1434,7692,184,464,3812,3.2,11.4,61.0,20.4,51.6,423.6,273.7,102.4,6.3,8.9,11.6,10.6
13 | Belgique,90,7,7.8,73,17,7,0,81.1,100.0,23.67,37.47,23.82,439.06,718,3033,1916,562,2359,128,8.0,33.7,21.3,80.3,337.0,18.3,220.9,16.0,5.5,8.5,8.7,8.4
14 | Autriche,49,6,12.2,40,9,5,1,81.6,83.3,14.14,8.37,16.84,9.53,242,1712,2893,144,855,1511,4.9,34.9,59.0,24.0,142.5,251.8,295.7,19.7,6.0,8.8,9.7,10.0
15 | Bulgarie,9,4,44.4,7,2,3,1,77.8,75.0,3.29,14.26,3.0,13.84,95,2891,666,84,2803,607,10.6,321.2,74.0,21.0,700.8,151.8,82.2,37.0,20.8,32.7,39.5,62.8
16 | Canada,65,3,4.6,45,20,3,0,69.2,100.0,8.51,2.89,40.94,13.1,155,1822,5367,52,127,397,2.4,28.0,82.6,17.3,42.3,132.3,325.6,51.0,6.1,8.6,16.3,12.0
17 | Roumanie,28,3,10.7,22,6,2,1,78.6,66.7,14.22,6.84,15.19,7.44,162,1139,2369,94,619,1263,5.8,40.7,84.6,31.3,206.3,421.0,122.1,16.3,6.0,8.4,12.3,8.0
18 | Hong Kong,28,3,10.7,22,6,2,1,78.6,66.7,6.75,1.61,9.21,1.06,112,1660,6960,64,695,6063,4.0,59.3,248.6,21.3,231.7,2021.0,157.5,68.3,9.4,12.4,14.3,11.0
19 | Luxembourg,13,3,23.1,10,3,3,0,76.9,100.0,32.73,35.14,30.94,46.04,162,495,461,151,488,328,12.5,38.1,35.5,50.3,162.7,109.3,229.4,11.3,9.6,8.3,26.0,8.7
20 | Portugal,18,2,11.1,12,6,1,1,66.7,50.0,1.83,3.42,18.07,4.48,72,3931,2108,30,166,670,4.0,218.4,117.1,15.0,83.0,335.0,159.9,11.0,9.2,15.8,21.0,11.5
21 | Chine,13,2,15.4,9,4,2,0,69.2,100.0,292.11,18.78,1300.0,20.68,111,38,591,91,7,440,8.5,2.9,45.5,45.5,3.5,220.0,315.9,189.0,4.7,8.5,6.0,10.0
22 | Hongrie,7,2,28.6,7,0,2,0,100.0,100.0,23.08,8.43,79.41,7.52,42,182,498,27,34,359,6.0,26.0,71.1,13.5,17.0,179.5,83.7,13.0,3.9,8.0,5.0,8.0
23 | Slovénie,2,2,100.0,2,0,2,0,100.0,100.0,6.87,46.99,6.87,46.99,39,568,83,39,568,83,19.5,284.0,41.5,19.5,284.0,41.5,27.0,27.0,22.5,80.0,22.5,80.0
24 | Suisse,58,1,1.7,45,13,1,0,77.6,100.0,8.46,12.63,1.37,10.38,137,1620,1085,11,803,106,2.4,27.9,18.7,11.0,803.0,106.0,249.2,11.0,5.3,9.0,9.0,27.0
25 | Irlande,31,1,3.2,23,8,1,0,74.2,100.0,7.29,5.97,14.63,5.77,79,1084,1324,18,123,312,2.5,35.0,42.7,18.0,123.0,312.0,253.2,14.0,5.9,8.8,8.0,8.0
26 | Pologne,29,1,3.4,22,7,1,0,75.9,100.0,33.26,21.1,362.5,59.18,150,451,711,87,24,147,5.2,15.6,24.5,87.0,24.0,147.0,224.2,11.0,5.4,8.8,13.0,9.0
27 | Grèce,20,1,5.0,18,2,1,0,90.0,100.0,16.21,5.81,13.08,6.33,53,327,913,14,107,221,2.7,16.4,45.7,14.0,107.0,221.0,135.3,23.0,8.9,9.3,9.0,17.0
28 | Japon,17,1,5.9,8,9,0,1,47.1,0.0,93.06,7.41,,460.0,67,72,904,23,0,5,3.9,4.2,53.2,23.0,0.0,5.0,278.1,26.0,6.5,9.0,4.0,8.0
29 | Singapour,16,1,6.3,12,4,0,1,75.0,0.0,7.84,3.17,1800.0,1800.0,75,957,2369,36,2,2,4.7,59.8,148.1,36.0,2.0,2.0,191.9,56.0,8.3,12.4,10.0,8.0
30 | Croatie,9,1,11.1,7,2,1,0,77.8,100.0,37.86,24.84,35.71,52.63,39,103,157,20,56,38,4.3,11.4,17.4,20.0,56.0,38.0,174.0,14.0,5.9,8.4,5.0,9.0
31 | Émirats arabes unis,2,1,50.0,2,0,1,0,100.0,100.0,9.09,1.64,14.81,1.59,13,143,795,12,81,755,6.5,71.5,397.5,12.0,81.0,755.0,103.0,92.0,6.5,9.5,8.0,11.0
32 | Macau,2,1,50.0,2,0,1,0,100.0,100.0,14.18,3.54,22.78,3.77,19,134,536,18,79,477,9.5,67.0,268.0,18.0,79.0,477.0,132.0,11.0,9.5,11.5,9.0,8.0
33 | Ukraine,1,1,100.0,1,0,1,0,100.0,100.0,2.55,2.27,2.55,2.27,18,707,792,18,707,792,18.0,707.0,792.0,18.0,707.0,792.0,11.0,11.0,65.0,157.0,65.0,157.0
34 | Cambodge,1,1,100.0,0,1,0,1,0.0,0.0,162.5,243.75,162.5,243.75,39,24,16,39,24,16,39.0,24.0,16.0,39.0,24.0,16.0,22.0,22.0,9.0,9.0,9.0,9.0
35 | Norvège,8,0,0.0,8,0,0,0,100.0,,8.03,9.24,,,11,137,119,0,0,0,1.4,17.1,14.9,,,,242.6,,5.3,8.4,,
36 | Estonie,7,0,0.0,5,2,0,0,71.4,,45.0,25.71,,,9,20,35,0,0,0,1.3,2.9,5.0,,,,351.4,,6.4,8.3,,
37 | Corée du Sud,6,0,0.0,5,1,0,0,83.3,,34.38,5.37,,,11,32,205,0,0,0,1.8,5.3,34.2,,,,238.8,,4.3,8.8,,
38 | République tchèque,6,0,0.0,4,2,0,0,66.7,,19.51,88.89,,,8,41,9,0,0,0,1.3,6.8,1.5,,,,291.8,,4.0,8.2,,
39 | Chypre,5,0,0.0,5,0,0,0,100.0,,5.26,2.47,,,9,171,364,0,0,0,1.8,34.2,72.8,,,,243.0,,5.4,10.6,,
40 | Russie,5,0,0.0,4,1,0,0,80.0,,266.67,25.81,,,8,3,31,0,0,0,1.6,0.6,6.2,,,,285.2,,4.4,8.2,,
41 | Lituanie,4,0,0.0,4,0,0,0,100.0,,5.56,1.11,,,4,72,359,0,0,0,1.0,18.0,89.8,,,,185.8,,9.5,9.5,,
42 | Turquie,3,0,0.0,1,2,0,0,33.3,,80.0,28.57,,,4,5,14,0,0,0,1.3,1.7,4.7,,,,423.0,,3.7,7.7,,
43 | Brésil,3,0,0.0,2,1,0,0,66.7,,14.81,36.36,,,4,27,11,0,0,0,1.3,9.0,3.7,,,,118.7,,5.7,8.3,,
44 | Malaisie,3,0,0.0,3,0,0,0,100.0,,38.46,83.33,,,5,13,6,0,0,0,1.7,4.3,2.0,,,,167.7,,3.3,8.0,,
45 | Israel,3,0,0.0,3,0,0,0,100.0,,18.6,5.56,,,8,43,144,0,0,0,2.7,14.3,48.0,,,,170.7,,3.0,8.0,,
46 | Taiwan,3,0,0.0,2,1,0,0,66.7,,29.03,45.0,,,9,31,20,0,0,0,3.0,10.3,6.7,,,,286.3,,3.7,8.3,,
47 | Islande,2,0,0.0,1,1,0,0,50.0,,9.09,9.52,,,2,22,21,0,0,0,1.0,11.0,10.5,,,,21.0,,5.0,8.0,,
48 | Guadeloupe,2,0,0.0,1,1,0,0,50.0,,33.33,300.0,,,3,9,1,0,0,0,1.5,4.5,0.5,,,,275.0,,4.5,9.0,,
49 | Arabie Saoudite,2,0,0.0,2,0,0,0,100.0,,200.0,8.0,,,2,1,25,0,0,0,1.0,0.5,12.5,,,,32.0,,4.5,8.0,,
50 | Egypte,2,0,0.0,1,1,0,0,50.0,,33.33,4.17,,,3,9,72,0,0,0,1.5,4.5,36.0,,,,33.0,,6.0,9.5,,
51 | Indonésie,2,0,0.0,1,1,0,0,50.0,,,9.09,,,2,0,22,0,0,0,1.0,0.0,11.0,,,,331.5,,5.0,9.5,,
52 | Monaco,2,0,0.0,1,1,0,0,50.0,,80.0,30.77,,,4,5,13,0,0,0,2.0,2.5,6.5,,,,332.5,,4.0,8.0,,
53 | Puerto Rico,2,0,0.0,1,1,0,0,50.0,,3.57,2.61,,,3,84,115,0,0,0,1.5,42.0,57.5,,,,359.0,,4.0,8.0,,
54 | Slovaquie,2,0,0.0,2,0,0,0,100.0,,,,,,2,0,0,0,0,0,1.0,0.0,0.0,,,,459.0,,6.5,9.0,,
55 | Colombie,1,0,0.0,1,0,0,0,100.0,,20.0,1.04,,,1,5,96,0,0,0,1.0,5.0,96.0,,,,12.0,,4.0,10.0,,
56 | Serbie,1,0,0.0,1,0,0,0,100.0,,100.0,5.26,,,1,1,19,0,0,0,1.0,1.0,19.0,,,,13.0,,3.0,8.0,,
57 | Bahreïn,1,0,0.0,1,0,0,0,100.0,,400.0,200.0,,,4,1,2,0,0,0,4.0,1.0,2.0,,,,21.0,,4.0,8.0,,
58 | Nouvelle Zélande,1,0,0.0,1,0,0,0,100.0,,300.0,18.37,,,9,3,49,0,0,0,9.0,3.0,49.0,,,,33.0,,6.0,8.0,,
59 | Malte,1,0,0.0,1,0,0,0,100.0,,,50.0,,,1,0,2,0,0,0,1.0,0.0,2.0,,,,366.0,,3.0,8.0,,
60 | Géorgie,1,0,0.0,1,0,0,0,100.0,,,150.0,,,3,0,2,0,0,0,3.0,0.0,2.0,,,,37.0,,3.0,8.0,,
61 | Pakistan,1,0,0.0,0,1,0,0,0.0,,100.0,100.0,,,1,1,1,0,0,0,1.0,1.0,1.0,,,,431.0,,4.0,9.0,,
62 | Mexique,1,0,0.0,1,0,0,0,100.0,,133.33,4.08,,,4,3,98,0,0,0,4.0,3.0,98.0,,,,61.0,,9.0,13.0,,
63 | Qatar,1,0,0.0,1,0,0,0,100.0,,,,,,1,0,0,0,0,0,1.0,0.0,0.0,,,,692.0,,3.0,8.0,,
64 |
--------------------------------------------------------------------------------
/Project 2 - ECommerce Data Analysis Azure Data Engineering/data/Comparison-of-Sellers-by-Gender-and-Country.csv:
--------------------------------------------------------------------------------
1 | country,sex,nbsellers,meanproductssold,meanproductslisted,meansellerpassrate,totalproductssold,totalproductslisted,meanproductsbought,meanproductswished,meanproductsliked,totalbought,totalwished,totalproductsliked,meanfollowers,meanfollows,percentofappusers,percentofiosusers,meanseniority
2 | Allemagne,Female,116,4.03,2.72,27.33,468,315,3.05,34.66,35.28,354,4021,4092,9.5,8.9,54.0,49.0,3060.3362068965516
3 | Allemagne,Male,34,2.0,1.0,19.15,68,34,1.62,3.38,31.79,55,115,1081,7.8,8.4,79.0,64.0,3089.0588235294117
4 | Arménie,Female,1,0.0,25.0,0.0,0,25,0.0,0.0,1.0,0,0,1,4.0,8.0,,,3201.0
5 | Australie,Female,18,0.94,1.33,10.44,17,24,6.11,17.72,209.28,110,319,3767,7.5,9.3,55.0,55.0,3103.6666666666665
6 | Australie,Male,3,6.0,4.0,33.33,18,12,8.0,24.0,38.33,24,72,115,12.7,8.3,66.0,66.0,3085.6666666666665
7 | Autriche,Female,18,3.28,2.5,38.67,59,45,3.39,31.94,33.0,61,575,594,6.9,8.4,61.0,50.0,3048.222222222222
8 | Autriche,Male,5,1.6,0.2,35.0,8,1,0.8,5.0,0.8,4,25,4,6.2,8.0,60.0,60.0,3133.0
9 | Bahamas,Female,1,1.0,0.0,0.0,1,0,0.0,0.0,0.0,0,0,0,4.0,8.0,,,2857.0
10 | Belgique,Female,37,2.41,1.89,29.59,89,70,12.03,57.24,20.11,445,2118,744,8.3,8.5,62.0,48.0,3041.0810810810813
11 | Belgique,Male,8,3.13,1.75,26.13,25,14,0.63,1.13,1.25,5,9,10,8.4,9.3,62.0,37.0,3069.625
12 | Bulgarie,Female,7,22.29,11.71,71.71,156,82,4.86,124.43,120.29,34,871,842,29.1,14.4,71.0,57.0,3150.1428571428573
13 | Bulgarie,Male,3,4.67,0.33,54.0,14,1,14.0,633.67,11.0,42,1901,33,19.7,63.7,100.0,100.0,3082.6666666666665
14 | Canada,Female,3,2.33,0.33,25.0,7,1,0.0,1.33,6.33,0,4,19,5.7,8.7,33.0,33.0,3085.6666666666665
15 | Chine,Female,3,0.67,1.0,33.33,2,3,24.67,1.67,133.33,74,5,400,11.3,9.7,66.0,66.0,3085.3333333333335
16 | Chypre,Female,5,11.2,16.4,55.4,56,82,0.0,4.2,99.0,0,21,495,19.2,10.0,60.0,40.0,3130.6
17 | Chypre,Male,1,0.0,1.0,0.0,0,1,0.0,0.0,49.0,0,0,49,8.0,8.0,100.0,100.0,3199.0
18 | Croatie,Female,10,10.7,6.9,32.6,107,69,0.8,4.7,21.4,8,47,214,15.1,8.8,60.0,40.0,3061.7
19 | Danemark,Female,22,1.64,1.0,6.5,36,22,3.5,90.32,175.41,77,1987,3859,8.9,9.5,59.0,59.0,3043.909090909091
20 | Danemark,Male,6,2.33,1.17,40.17,14,7,1.33,3.17,18.83,8,19,113,6.0,8.0,66.0,66.0,3141.8333333333335
21 | Espagne,Female,133,5.91,3.86,29.52,786,514,2.98,37.66,104.8,397,5009,13938,13.7,14.1,66.0,49.0,3075.533834586466
22 | Espagne,Male,30,6.8,5.1,36.01,204,153,0.8,4.5,15.23,24,135,457,11.0,8.1,80.0,63.0,3108.0666666666666
23 | Estonie,Female,3,1.0,3.33,0.0,3,10,0.33,0.33,1.0,1,1,3,11.3,8.3,66.0,33.0,3084.6666666666665
24 | Estonie,Male,1,1.0,0.0,0.0,1,0,0.0,0.0,0.0,0,0,0,6.0,8.0,100.0,100.0,3198.0
25 | Etats-Unis,Female,182,4.49,4.5,25.4,817,819,3.25,18.03,46.79,592,3282,8516,11.6,12.5,52.0,50.0,3033.2252747252746
26 | Etats-Unis,Male,41,3.66,3.49,30.31,150,143,0.88,3.34,8.12,36,137,333,13.0,9.0,60.0,58.0,3065.7804878048782
27 | Finlande,Female,8,2.5,1.25,32.13,20,10,12.63,31.13,23.75,101,249,190,7.6,9.8,75.0,75.0,3157.0
28 | France,Female,733,3.23,2.39,25.29,2365,1749,1.03,6.25,22.01,757,4583,16133,8.0,22.4,56.0,46.0,3061.4529331514323
29 | France,Male,233,2.79,2.12,25.89,649,493,2.26,4.14,19.57,526,965,4559,6.9,24.7,59.0,49.0,3066.8197424892705
30 | Grèce,Female,14,2.14,5.57,36.36,30,78,1.29,5.79,42.07,18,81,589,13.4,9.5,64.0,50.0,3053.214285714286
31 | Grèce,Male,2,0.5,1.5,0.0,1,3,0.0,0.5,3.0,0,1,6,7.5,8.0,50.0,50.0,3026.0
32 | Hong Kong,Female,14,2.5,4.43,13.29,35,62,3.5,61.21,461.79,49,857,6465,10.9,14.1,57.0,50.0,3026.5
33 | Hong Kong,Male,3,5.0,1.33,43.0,15,4,7.0,90.33,35.33,21,271,106,9.0,8.3,100.0,100.0,3087.3333333333335
34 | Hongrie,Female,1,0.0,3.0,0.0,0,3,8.0,80.0,97.0,8,80,97,3.0,8.0,,,2856.0
35 | Irlande,Female,8,2.0,1.13,28.25,16,9,3.0,18.88,50.38,24,151,403,7.1,8.3,37.0,12.0,3157.5
36 | Islande,Female,1,1.0,2.0,0.0,1,2,1.0,15.0,15.0,1,15,15,6.0,8.0,,,3198.0
37 | Islande,Male,1,1.0,1.0,0.0,1,1,1.0,7.0,6.0,1,7,6,4.0,8.0,,,2858.0
38 | Italie,Female,337,5.53,5.33,37.65,1865,1796,0.83,19.85,185.0,281,6689,62346,13.7,55.3,72.0,59.0,3061.949554896142
39 | Italie,Male,118,8.09,5.13,42.22,955,605,1.22,6.22,23.53,144,734,2777,12.7,11.1,72.0,61.0,3089.8050847457625
40 | Japon,Female,3,0.33,4.33,0.0,1,13,3.33,16.33,178.33,10,49,535,20.0,12.3,66.0,66.0,3087.6666666666665
41 | Kazakhstan,Female,1,1.0,0.0,0.0,1,0,0.0,0.0,1.0,0,0,1,5.0,9.0,100.0,100.0,2859.0
42 | Lettonie,Female,4,20.75,9.0,45.0,83,36,0.0,4.75,27.75,0,19,111,21.0,52.3,75.0,75.0,3200.5
43 | Lituanie,Female,3,1.33,2.67,0.0,4,8,1.0,24.0,115.0,3,72,345,10.3,10.0,66.0,66.0,3197.6666666666665
44 | Luxembourg,Female,8,5.25,0.75,36.25,42,6,17.13,19.63,58.13,137,157,465,14.4,8.4,75.0,50.0,3113.5
45 | Luxembourg,Male,3,0.33,2.33,0.0,1,7,0.0,0.0,0.33,0,0,1,3.3,8.0,100.0,100.0,2856.3333333333335
46 | Malaisie,Female,1,0.0,2.0,0.0,0,2,0.0,7.0,83.0,0,7,83,17.0,18.0,100.0,,3201.0
47 | Martinique,Female,1,0.0,1.0,0.0,0,1,0.0,0.0,0.0,0,0,0,4.0,9.0,100.0,100.0,3204.0
48 | Monaco,Female,6,31.67,4.5,60.83,190,27,0.0,0.17,1.67,0,1,10,34.7,8.0,33.0,33.0,2970.5
49 | Pays-Bas,Female,53,2.64,2.02,31.28,140,107,2.94,30.53,62.11,156,1618,3292,9.8,9.0,69.0,62.0,3109.0377358490564
50 | Pays-Bas,Male,15,4.93,3.33,37.47,74,50,4.8,26.4,72.8,72,396,1092,11.1,10.5,66.0,60.0,3084.6
51 | Pologne,Female,4,2.0,2.25,25.0,8,9,2.5,18.0,31.5,10,72,126,8.0,12.3,50.0,50.0,2942.0
52 | Pologne,Male,2,7.5,0.5,74.5,15,1,0.5,0.0,0.5,1,0,1,11.0,8.0,50.0,50.0,2857.0
53 | Portugal,Female,14,2.14,2.86,37.21,30,40,1.79,32.93,50.07,25,461,701,9.2,9.4,64.0,57.0,3127.285714285714
54 | Portugal,Male,2,1.5,1.0,0.0,3,2,0.0,0.0,3.0,0,0,6,3.5,8.0,50.0,50.0,3196.5
55 | Roumanie,Female,13,3.38,4.15,18.62,44,54,1.08,5.0,31.77,14,65,413,7.6,8.2,69.0,53.0,3121.0
56 | Roumanie,Male,4,6.0,9.75,33.75,24,39,6.0,0.25,96.0,24,1,384,16.5,19.3,75.0,75.0,3113.25
57 | Royaume-Uni,Female,285,5.06,3.24,32.56,1442,922,1.47,8.62,22.69,420,2457,6466,9.1,8.8,49.0,48.0,3050.519298245614
58 | Royaume-Uni,Male,64,5.69,1.61,25.31,364,103,1.59,12.77,8.16,102,817,522,9.6,8.4,71.0,62.0,3070.765625
59 | Russie,Female,3,3.33,1.33,25.67,10,4,0.0,1.33,13.33,0,4,40,8.0,8.7,33.0,33.0,2972.0
60 | République tchèque,Female,1,0.0,1.0,0.0,0,1,0.0,219.0,615.0,0,219,615,7.0,10.0,,,2859.0
61 | Serbie,Female,2,2.5,1.5,50.0,5,3,0.0,0.0,28.0,0,0,56,15.0,8.5,100.0,100.0,3199.5
62 | Singapour,Female,5,2.2,1.6,70.0,11,8,3.0,185.4,266.6,15,927,1333,9.8,18.6,40.0,40.0,3128.8
63 | Slovaquie,Male,2,14.0,7.0,37.0,28,14,0.0,0.5,5.5,0,1,11,10.5,8.5,100.0,100.0,3199.5
64 | Slovénie,Female,1,6.0,0.0,50.0,6,0,23.0,49.0,39.0,23,49,39,34.0,147.0,100.0,100.0,3199.0
65 | Suisse,Female,11,3.55,19.45,32.3,39,214,1.27,15.09,8.45,14,166,93,8.9,8.3,54.0,54.0,3075.3636363636365
66 | Suisse,Male,5,1.2,0.0,0.0,6,0,0.0,0.2,1.0,0,1,5,9.2,8.2,20.0,20.0,2992.8
67 | Suède,Female,30,10.03,7.3,49.0,301,219,5.0,62.7,321.03,150,1881,9631,18.6,24.7,66.0,56.0,3028.5666666666666
68 | Suède,Male,6,5.83,3.0,23.6,35,18,2.33,18.0,17.17,14,108,103,16.8,10.3,83.0,83.0,3086.6666666666665
69 | Taiwan,Female,1,57.0,56.0,98.0,57,56,0.0,0.0,4.0,0,0,4,83.0,8.0,100.0,100.0,2855.0
70 | Ukraine,Female,1,3.0,2.0,0.0,3,2,18.0,707.0,792.0,18,707,792,65.0,157.0,100.0,100.0,3204.0
71 | Îles Canaries,Female,1,1.0,1.0,0.0,1,1,0.0,0.0,0.0,0,0,0,3.0,8.0,,,2857.0
72 | ,Female,2127,4.4,3.54,29.46,9354,7523,2.06,18.76,69.99,4387,39896,148873,10.4,22.1,59.0,50.0,3060.948754113775
73 | ,Male,592,4.52,2.88,29.88,2673,1706,1.86,9.65,19.89,1103,5715,11774,9.4,15.8,66.0,57.0,3076.1655405405404
74 | ,,2719,4.42,3.39,29.55,12027,9229,2.02,16.77,59.08,5490,45611,160647,10.2,20.7,60.0,52.0,3064.261860978301
75 |
--------------------------------------------------------------------------------
/Project 2 - ECommerce Data Analysis Azure Data Engineering/data/Countries-with-Top-Sellers-(Fashion-C2C).csv:
--------------------------------------------------------------------------------
1 | country,sellers,topsellers,topsellerratio,femalesellersratio,topfemalesellersratio,femalesellers,malesellers,topfemalesellers,topmalesellers,countrysoldratio,bestsoldratio,toptotalproductssold,totalproductssold,toptotalproductslisted,totalproductslisted,topmeanproductssold,topmeanproductslisted,meanproductssold,meanproductslisted,meanofflinedays,topmeanofflinedays,meanfollowers,meanfollowing,topmeanfollowers,topmeanfollowing
2 | Taiwan,1,1,100.0,100.0,100.0,1,0,1,0,1.02,1.02,57,57,56,56,57.0,56.0,57.0,56.0,11.0,11.0,83.0,8.0,83.0,8.0
3 | Slovaquie,2,1,50.0,0.0,0.0,0,2,0,1,2.0,1.93,27,28,14,14,27.0,14.0,14.0,7.0,17.0,15.0,10.5,8.5,15.0,8.0
4 | Lettonie,4,2,50.0,100.0,100.0,4,0,2,0,2.31,2.25,81,83,36,36,40.5,18.0,20.75,9.0,120.3,11.5,21.0,52.3,38.0,98.5
5 | Bulgarie,9,4,44.4,66.7,100.0,6,3,4,0,2.07,2.1,145,170,69,82,36.25,17.25,18.888888888888888888888888,9.111111111111111111111111,98.3,19.0,28.6,31.6,46.3,19.0
6 | Chypre,4,1,25.0,100.0,100.0,4,0,1,0,0.69,0.62,41,56,66,81,41.0,66.0,14.0,20.25,17.3,11.0,21.3,10.3,39.0,17.0
7 | Monaco,5,1,20.0,100.0,100.0,5,0,1,0,7.31,8.95,170,190,19,26,170.0,19.0,38.0,5.2,51.6,12.0,39.6,8.0,167.0,8.0
8 | Roumanie,13,2,15.4,76.9,50.0,10,3,1,1,0.88,1.26,49,68,39,77,24.5,19.5,5.230769230769230769230769,5.923076923076923076923076,121.6,11.0,10.9,11.5,30.0,32.0
9 | Luxembourg,7,1,14.3,85.7,100.0,6,1,1,0,5.38,,30,43,0,8,30.0,0.0,6.142857142857142857142857,1.142857142857142857142857,73.6,11.0,15.9,8.4,52.0,3.0
10 | Espagne,119,13,10.9,81.5,76.9,97,22,10,3,1.67,2.02,607,990,301,594,46.692307692307692307692307,23.153846153846153846153846,8.319327731092436974789915,4.991596638655462184873949,202.4,30.5,16.1,14.6,53.2,14.5
11 | Italie,347,35,10.1,71.5,65.7,248,99,23,12,1.27,1.29,1389,2820,1077,2218,39.685714285714285714285714,30.771428571428571428571428,8.126801152737752161383285,6.391930835734870317002881,141.8,26.5,16.1,54.6,63.4,429.3
12 | Croatie,10,1,10.0,100.0,100.0,10,0,1,0,1.55,1.45,64,107,44,69,64.0,44.0,10.7,6.9,141.7,11.0,15.1,8.8,50.0,8.0
13 | Hong Kong,10,1,10.0,70.0,100.0,7,3,1,0,0.91,0.66,23,50,35,55,23.0,35.0,5.0,5.5,49.6,11.0,14.7,15.5,36.0,41.0
14 | Suède,32,3,9.4,81.3,66.7,26,6,2,1,1.45,1.29,202,336,156,231,67.333333333333333333333333,52.0,10.5,7.21875,118.2,12.7,19.8,24.0,102.7,172.0
15 | Royaume-Uni,289,21,7.3,80.3,85.7,232,57,18,3,1.95,2.16,972,1806,449,927,46.285714285714285714285714,21.380952380952380952380952,6.249134948096885813148788,3.207612456747404844290657,188.1,32.4,10.1,8.8,37.3,9.1
16 | Autriche,18,1,5.6,77.8,100.0,14,4,1,0,1.81,1.0,20,67,20,37,20.0,20.0,3.722222222222222222222222,2.055555555555555555555555,138.7,11.0,7.0,8.6,17.0,10.0
17 | Allemagne,105,5,4.8,75.2,80.0,79,26,4,1,2.03,2.85,282,536,99,264,56.4,19.8,5.104761904761904761904761,2.514285714285714285714285,168.2,79.4,10.7,8.8,53.2,10.6
18 | Etats-Unis,150,6,4.0,78.7,83.3,118,32,5,1,1.19,1.3,558,967,428,816,93.0,71.333333333333333333333333,6.446666666666666666666666,5.44,266.1,76.8,14.6,10.2,105.0,17.2
19 | Pays-Bas,54,2,3.7,77.8,50.0,42,12,1,1,1.54,1.74,47,214,27,139,23.5,13.5,3.962962962962962962962962,2.574074074074074074074074,177.4,22.0,11.4,9.6,21.5,10.0
20 | France,713,25,3.5,73.4,80.0,523,190,20,5,1.59,1.4,1109,3014,792,1892,44.36,31.68,4.227208976157082748948106,2.65357643758765778401122,227.1,36.6,8.9,28.3,41.3,156.2
21 |
--------------------------------------------------------------------------------
/Project 2 - ECommerce Data Analysis Azure Data Engineering/data/chunk-user-data.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "6898144a-ffb8-4bba-9e93-f06207396e80",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "pip install pandas"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "id": "78019320-f2ad-49ff-8f8e-1a78db6cfecd",
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "import pandas as pd"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 6,
26 | "id": "48629112-b6d7-4e38-b6d3-e55f9a279d35",
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "df = pd.read_csv(\"users.6M0xxK.2024.public.csv\")"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 7,
36 | "id": "fa41cf7b-e062-47b8-a882-51bed8b4434b",
37 | "metadata": {},
38 | "outputs": [
39 | {
40 | "data": {
41 | "text/html": [
42 | "\n",
43 | "\n",
56 | "
\n",
57 | " \n",
58 | " \n",
59 | " | \n",
60 | " identifierHash | \n",
61 | " type | \n",
62 | " countryCode | \n",
63 | " country | \n",
64 | " language | \n",
65 | " socialNbFollowers | \n",
66 | " socialNbFollows | \n",
67 | " socialProductsLiked | \n",
68 | " productsListed | \n",
69 | " productsSold | \n",
70 | " ... | \n",
71 | " productsWished | \n",
72 | " productsBought | \n",
73 | " gender | \n",
74 | " civilityGenderId | \n",
75 | " civilityTitle | \n",
76 | " hasProfilePicture | \n",
77 | " seniority | \n",
78 | " seniorityAsMonths | \n",
79 | " seniorityAsYears | \n",
80 | " websiteLongevity | \n",
81 | "
\n",
82 | " \n",
83 | " \n",
84 | " \n",
85 | " 0 | \n",
86 | " 1920991140 | \n",
87 | " user | \n",
88 | " us | \n",
89 | " Etats-Unis | \n",
90 | " en | \n",
91 | " 3 | \n",
92 | " 8 | \n",
93 | " 0 | \n",
94 | " 0 | \n",
95 | " 0 | \n",
96 | " ... | \n",
97 | " 0 | \n",
98 | " 0 | \n",
99 | " M | \n",
100 | " 1 | \n",
101 | " mr | \n",
102 | " True | \n",
103 | " 3206 | \n",
104 | " 106.87 | \n",
105 | " 8.91 | \n",
106 | " 3803 | \n",
107 | "
\n",
108 | " \n",
109 | " 1 | \n",
110 | " 1920860068 | \n",
111 | " user | \n",
112 | " se | \n",
113 | " Suède | \n",
114 | " en | \n",
115 | " 3 | \n",
116 | " 8 | \n",
117 | " 1 | \n",
118 | " 0 | \n",
119 | " 0 | \n",
120 | " ... | \n",
121 | " 0 | \n",
122 | " 0 | \n",
123 | " M | \n",
124 | " 1 | \n",
125 | " mr | \n",
126 | " True | \n",
127 | " 3206 | \n",
128 | " 106.87 | \n",
129 | " 8.91 | \n",
130 | " 3803 | \n",
131 | "
\n",
132 | " \n",
133 | " 2 | \n",
134 | " 1920401316 | \n",
135 | " user | \n",
136 | " it | \n",
137 | " Italie | \n",
138 | " fr | \n",
139 | " 3 | \n",
140 | " 8 | \n",
141 | " 0 | \n",
142 | " 0 | \n",
143 | " 0 | \n",
144 | " ... | \n",
145 | " 0 | \n",
146 | " 0 | \n",
147 | " M | \n",
148 | " 1 | \n",
149 | " mr | \n",
150 | " True | \n",
151 | " 3206 | \n",
152 | " 106.87 | \n",
153 | " 8.91 | \n",
154 | " 3803 | \n",
155 | "
\n",
156 | " \n",
157 | " 3 | \n",
158 | " 1919811492 | \n",
159 | " user | \n",
160 | " dk | \n",
161 | " Danemark | \n",
162 | " en | \n",
163 | " 75 | \n",
164 | " 10 | \n",
165 | " 1968 | \n",
166 | " 0 | \n",
167 | " 0 | \n",
168 | " ... | \n",
169 | " 43 | \n",
170 | " 15 | \n",
171 | " M | \n",
172 | " 1 | \n",
173 | " mr | \n",
174 | " True | \n",
175 | " 3206 | \n",
176 | " 106.87 | \n",
177 | " 8.91 | \n",
178 | " 3803 | \n",
179 | "
\n",
180 | " \n",
181 | " 4 | \n",
182 | " 1919025060 | \n",
183 | " user | \n",
184 | " de | \n",
185 | " Allemagne | \n",
186 | " de | \n",
187 | " 3 | \n",
188 | " 8 | \n",
189 | " 0 | \n",
190 | " 0 | \n",
191 | " 0 | \n",
192 | " ... | \n",
193 | " 0 | \n",
194 | " 0 | \n",
195 | " M | \n",
196 | " 1 | \n",
197 | " mr | \n",
198 | " True | \n",
199 | " 3206 | \n",
200 | " 106.87 | \n",
201 | " 8.91 | \n",
202 | " 3803 | \n",
203 | "
\n",
204 | " \n",
205 | "
\n",
206 | "
5 rows × 21 columns
\n",
207 | "
"
208 | ],
209 | "text/plain": [
210 | " identifierHash type countryCode country language socialNbFollowers \\\n",
211 | "0 1920991140 user us Etats-Unis en 3 \n",
212 | "1 1920860068 user se Suède en 3 \n",
213 | "2 1920401316 user it Italie fr 3 \n",
214 | "3 1919811492 user dk Danemark en 75 \n",
215 | "4 1919025060 user de Allemagne de 3 \n",
216 | "\n",
217 | " socialNbFollows socialProductsLiked productsListed productsSold ... \\\n",
218 | "0 8 0 0 0 ... \n",
219 | "1 8 1 0 0 ... \n",
220 | "2 8 0 0 0 ... \n",
221 | "3 10 1968 0 0 ... \n",
222 | "4 8 0 0 0 ... \n",
223 | "\n",
224 | " productsWished productsBought gender civilityGenderId civilityTitle \\\n",
225 | "0 0 0 M 1 mr \n",
226 | "1 0 0 M 1 mr \n",
227 | "2 0 0 M 1 mr \n",
228 | "3 43 15 M 1 mr \n",
229 | "4 0 0 M 1 mr \n",
230 | "\n",
231 | " hasProfilePicture seniority seniorityAsMonths seniorityAsYears \\\n",
232 | "0 True 3206 106.87 8.91 \n",
233 | "1 True 3206 106.87 8.91 \n",
234 | "2 True 3206 106.87 8.91 \n",
235 | "3 True 3206 106.87 8.91 \n",
236 | "4 True 3206 106.87 8.91 \n",
237 | "\n",
238 | " websiteLongevity \n",
239 | "0 3803 \n",
240 | "1 3803 \n",
241 | "2 3803 \n",
242 | "3 3803 \n",
243 | "4 3803 \n",
244 | "\n",
245 | "[5 rows x 21 columns]"
246 | ]
247 | },
248 | "execution_count": 7,
249 | "metadata": {},
250 | "output_type": "execute_result"
251 | }
252 | ],
253 | "source": [
254 | "df.head()"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": 8,
260 | "id": "8e4bf583-5639-4a69-ab92-ef91e11276b5",
261 | "metadata": {},
262 | "outputs": [],
263 | "source": [
264 | "\n",
265 | "# Calculate the size of each chunk\n",
266 | "chunk_size = len(df) // 5 + (1 if len(df) % 5 else 0)\n",
267 | "\n",
268 | "# Split the dataframe into chunks and save them\n",
269 | "for i in range(5):\n",
270 | " start = i * chunk_size\n",
271 | " end = start + chunk_size\n",
272 | " chunk_df = df.iloc[start:end]\n",
273 | " chunk_df.to_csv(f'chunk-data/chunk{i+5+1}.csv', index=False)\n"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": null,
279 | "id": "abb71ec8-46ef-4372-b673-d551c58eb1f2",
280 | "metadata": {},
281 | "outputs": [],
282 | "source": []
283 | }
284 | ],
285 | "metadata": {
286 | "kernelspec": {
287 | "display_name": "Python 3 (ipykernel)",
288 | "language": "python",
289 | "name": "python3"
290 | },
291 | "language_info": {
292 | "codemirror_mode": {
293 | "name": "ipython",
294 | "version": 3
295 | },
296 | "file_extension": ".py",
297 | "mimetype": "text/x-python",
298 | "name": "python",
299 | "nbconvert_exporter": "python",
300 | "pygments_lexer": "ipython3",
301 | "version": "3.12.1"
302 | }
303 | },
304 | "nbformat": 4,
305 | "nbformat_minor": 5
306 | }
307 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # apache-spark-with-data-bricks-for-data-engineering
2 | apache-spark-with-data-bricks-for-data-engineering
3 |
--------------------------------------------------------------------------------
/Spark Data Source.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "55388fa7-e326-48bb-a72e-bbc1f69a6739",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "csvFile = spark.read.format(\"csv\")\\\n",
11 | " .option(\"header\", \"true\")\\\n",
12 | " .option(\"mode\", \"FAILFAST\")\\\n",
13 | " .option(\"inferSchema\", \"true\")\\\n",
14 | " .load(\"data/flight-data/csv/2010-summary.csv\")"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "id": "3d5ef8bb-a67a-4e12-9338-01203022e6ba",
21 | "metadata": {},
22 | "outputs": [
23 | {
24 | "name": "stdout",
25 | "output_type": "stream",
26 | "text": [
27 | "+--------------------+-------------------+-----+\n",
28 | "| DEST_COUNTRY_NAME|ORIGIN_COUNTRY_NAME|count|\n",
29 | "+--------------------+-------------------+-----+\n",
30 | "| United States| Romania| 1|\n",
31 | "| United States| Ireland| 264|\n",
32 | "| United States| India| 69|\n",
33 | "| Egypt| United States| 24|\n",
34 | "| Equatorial Guinea| United States| 1|\n",
35 | "| United States| Singapore| 25|\n",
36 | "| United States| Grenada| 54|\n",
37 | "| Costa Rica| United States| 477|\n",
38 | "| Senegal| United States| 29|\n",
39 | "| United States| Marshall Islands| 44|\n",
40 | "| Guyana| United States| 17|\n",
41 | "| United States| Sint Maarten| 53|\n",
42 | "| Malta| United States| 1|\n",
43 | "| Bolivia| United States| 46|\n",
44 | "| Anguilla| United States| 21|\n",
45 | "|Turks and Caicos ...| United States| 136|\n",
46 | "| United States| Afghanistan| 2|\n",
47 | "|Saint Vincent and...| United States| 1|\n",
48 | "| Italy| United States| 390|\n",
49 | "| United States| Russia| 156|\n",
50 | "+--------------------+-------------------+-----+\n",
51 | "only showing top 20 rows\n",
52 | "\n"
53 | ]
54 | }
55 | ],
56 | "source": [
57 | "csvFile.show()"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 3,
63 | "id": "29048a70-2c3c-49fd-b540-8d66823cabba",
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "csvFile.write.format(\"csv\").mode(\"overwrite\").option(\"sep\", \",\").save(\"data/flight-data/write/my-csv-data.csv\")"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": 4,
73 | "id": "13e3b920-f40f-48cc-8c04-8e3a35b4e30b",
74 | "metadata": {},
75 | "outputs": [
76 | {
77 | "name": "stdout",
78 | "output_type": "stream",
79 | "text": [
80 | "+-----------------+-------------------+-----+\n",
81 | "|DEST_COUNTRY_NAME|ORIGIN_COUNTRY_NAME|count|\n",
82 | "+-----------------+-------------------+-----+\n",
83 | "| United States| Romania| 1|\n",
84 | "| United States| Ireland| 264|\n",
85 | "| United States| India| 69|\n",
86 | "| Egypt| United States| 24|\n",
87 | "|Equatorial Guinea| United States| 1|\n",
88 | "+-----------------+-------------------+-----+\n",
89 | "only showing top 5 rows\n",
90 | "\n"
91 | ]
92 | }
93 | ],
94 | "source": [
95 | "spark.read.format(\"json\").option(\"mode\", \"FAILFAST\")\\\n",
96 | " .option(\"inferSchema\", \"true\")\\\n",
97 | " .load(\"data/flight-data/json/2010-summary.json\").show(5)"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": 5,
103 | "id": "45820a61-fe4f-4c69-b879-50bf64fb766c",
104 | "metadata": {},
105 | "outputs": [],
106 | "source": [
107 | "csvFile.write.format(\"json\").mode(\"overwrite\").save(\"data/flight-data/write/my-json-file.json\")"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 7,
113 | "id": "6a7c0d9e-d6b5-4aec-af15-76ff9cbe2155",
114 | "metadata": {},
115 | "outputs": [
116 | {
117 | "name": "stdout",
118 | "output_type": "stream",
119 | "text": [
120 | "+-----------------+-------------------+-----+\n",
121 | "|DEST_COUNTRY_NAME|ORIGIN_COUNTRY_NAME|count|\n",
122 | "+-----------------+-------------------+-----+\n",
123 | "| United States| Romania| 1|\n",
124 | "| United States| Ireland| 264|\n",
125 | "| United States| India| 69|\n",
126 | "| Egypt| United States| 24|\n",
127 | "|Equatorial Guinea| United States| 1|\n",
128 | "+-----------------+-------------------+-----+\n",
129 | "only showing top 5 rows\n",
130 | "\n"
131 | ]
132 | }
133 | ],
134 | "source": [
135 | "spark.read.format(\"parquet\")\\\n",
136 | "\t .load(\"data/flight-data/parquet/2010-summary.parquet\").show(5)"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": 9,
142 | "id": "c669ff37-e161-40b3-93f2-c001361d783c",
143 | "metadata": {},
144 | "outputs": [
145 | {
146 | "name": "stdout",
147 | "output_type": "stream",
148 | "text": [
149 | "+-----------------+-------------------+-----+\n",
150 | "|DEST_COUNTRY_NAME|ORIGIN_COUNTRY_NAME|count|\n",
151 | "+-----------------+-------------------+-----+\n",
152 | "| United States| Romania| 1|\n",
153 | "| United States| Ireland| 264|\n",
154 | "| United States| India| 69|\n",
155 | "| Egypt| United States| 24|\n",
156 | "|Equatorial Guinea| United States| 1|\n",
157 | "+-----------------+-------------------+-----+\n",
158 | "only showing top 5 rows\n",
159 | "\n"
160 | ]
161 | }
162 | ],
163 | "source": [
164 | "spark.read.format(\"orc\").load(\"data/flight-data/orc/2010-summary.orc\").show(5)"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": 10,
170 | "id": "cad35276-7c10-4e7e-ae01-954995b6b753",
171 | "metadata": {},
172 | "outputs": [],
173 | "source": [
174 | "csvFile.repartition(5).write.format(\"csv\").save(\"data/flight-data/write/multiple.csv\")"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": 11,
180 | "id": "37a63bc8-6dcf-43a6-aa62-261c91d724e5",
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "csvFile.limit(10).write.mode(\"overwrite\").partitionBy(\"DEST_COUNTRY_NAME\").save(\"data/flight-data/write/partitioned-files.parquet\")"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": null,
190 | "id": "b0f65509-2794-49f2-9192-f039a662cdb0",
191 | "metadata": {},
192 | "outputs": [],
193 | "source": []
194 | }
195 | ],
196 | "metadata": {
197 | "kernelspec": {
198 | "display_name": "Python 3 (ipykernel)",
199 | "language": "python",
200 | "name": "python3"
201 | },
202 | "language_info": {
203 | "codemirror_mode": {
204 | "name": "ipython",
205 | "version": 3
206 | },
207 | "file_extension": ".py",
208 | "mimetype": "text/x-python",
209 | "name": "python",
210 | "nbconvert_exporter": "python",
211 | "pygments_lexer": "ipython3",
212 | "version": "3.12.1"
213 | }
214 | },
215 | "nbformat": 4,
216 | "nbformat_minor": 5
217 | }
218 |
--------------------------------------------------------------------------------
/Spark Deployment.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "e84d341d-d5dd-4ad1-a539-8c75455c71f9",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "from pyspark.sql import SparkSession"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 5,
16 | "id": "5436eedf-99e1-4e56-9737-ee06463928e4",
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "spark_create = SparkSession.builder.master(\"local\").appName(\"Word Count\")\\\n",
21 | ".config(\"spark.some.config.option\", \"some-value\")\\\n",
22 | ".getOrCreate()"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 4,
28 | "id": "ed99f4cf-93cd-4449-b623-6a695841985c",
29 | "metadata": {},
30 | "outputs": [
31 | {
32 | "data": {
33 | "text/html": [
34 | "\n",
35 | " \n",
36 | "
SparkSession - hive
\n",
37 | " \n",
38 | "
\n",
39 | "
SparkContext
\n",
40 | "\n",
41 | "
Spark UI
\n",
42 | "\n",
43 | "
\n",
44 | " - Version
\n",
45 | " v3.5.0
\n",
46 | " - Master
\n",
47 | " local[*]
\n",
48 | " - AppName
\n",
49 | " PySparkShell
\n",
50 | "
\n",
51 | "
\n",
52 | " \n",
53 | "
\n",
54 | " "
55 | ],
56 | "text/plain": [
57 | ""
58 | ]
59 | },
60 | "execution_count": 4,
61 | "metadata": {},
62 | "output_type": "execute_result"
63 | }
64 | ],
65 | "source": [
66 | "spark"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 6,
72 | "id": "fa05005c-85a6-469c-bf0d-2df945110cc1",
73 | "metadata": {},
74 | "outputs": [
75 | {
76 | "data": {
77 | "text/html": [
78 | "\n",
79 | " \n",
80 | "
SparkSession - hive
\n",
81 | " \n",
82 | "
\n",
83 | "
SparkContext
\n",
84 | "\n",
85 | "
Spark UI
\n",
86 | "\n",
87 | "
\n",
88 | " - Version
\n",
89 | " v3.5.0
\n",
90 | " - Master
\n",
91 | " local[*]
\n",
92 | " - AppName
\n",
93 | " PySparkShell
\n",
94 | "
\n",
95 | "
\n",
96 | " \n",
97 | "
\n",
98 | " "
99 | ],
100 | "text/plain": [
101 | ""
102 | ]
103 | },
104 | "execution_count": 6,
105 | "metadata": {},
106 | "output_type": "execute_result"
107 | }
108 | ],
109 | "source": [
110 | "spark_create"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 8,
116 | "id": "4c24fc3a-16f2-4da5-9716-f4a0b5524cae",
117 | "metadata": {},
118 | "outputs": [
119 | {
120 | "data": {
121 | "text/html": [
122 | "\n",
123 | " \n",
124 | "
SparkContext
\n",
125 | "\n",
126 | "
Spark UI
\n",
127 | "\n",
128 | "
\n",
129 | " - Version
\n",
130 | " v3.5.0
\n",
131 | " - Master
\n",
132 | " local[*]
\n",
133 | " - AppName
\n",
134 | " PySparkShell
\n",
135 | "
\n",
136 | "
\n",
137 | " "
138 | ],
139 | "text/plain": [
140 | ""
141 | ]
142 | },
143 | "execution_count": 8,
144 | "metadata": {},
145 | "output_type": "execute_result"
146 | }
147 | ],
148 | "source": [
149 | "sc"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "id": "37a8b2ec-813c-4ab3-b1e6-6520a97bbd8a",
156 | "metadata": {},
157 | "outputs": [],
158 | "source": []
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": 19,
163 | "id": "63d0eedb-2b1e-4dac-b3d6-d0d8997db668",
164 | "metadata": {},
165 | "outputs": [
166 | {
167 | "name": "stderr",
168 | "output_type": "stream",
169 | "text": [
170 | " \r"
171 | ]
172 | },
173 | {
174 | "data": {
175 | "text/plain": [
176 | "[Row(sum(id)=2500000000000)]"
177 | ]
178 | },
179 | "execution_count": 19,
180 | "metadata": {},
181 | "output_type": "execute_result"
182 | }
183 | ],
184 | "source": [
185 | "# in Python\n",
186 | "\n",
187 | "df1 = spark.range(2, 10000000, 2)\n",
188 | "df2 = spark.range(2, 10000000, 4)\n",
189 | "step1 = df1.repartition(5)\n",
190 | "step12 = df2.repartition(6)\n",
191 | "\n",
192 | "step2 = step1.selectExpr(\"id * 5 as id\")\n",
193 | "step3 = step2.join(step12, [\"id\"])\n",
194 | "step4 = step3.selectExpr(\"sum(id)\")\n",
195 | "\n",
196 | "step4.collect() # 2500000000000"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": 13,
202 | "id": "7d3bd22e-e8ad-4e1c-af6e-f7e062ea4c87",
203 | "metadata": {},
204 | "outputs": [
205 | {
206 | "name": "stdout",
207 | "output_type": "stream",
208 | "text": [
209 | "== Physical Plan ==\n",
210 | "AdaptiveSparkPlan isFinalPlan=true\n",
211 | "+- == Final Plan ==\n",
212 | " *(7) HashAggregate(keys=[], functions=[sum(id#29L)])\n",
213 | " +- ShuffleQueryStage 4\n",
214 | " +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=555]\n",
215 | " +- *(6) HashAggregate(keys=[], functions=[partial_sum(id#29L)])\n",
216 | " +- *(6) Project [id#29L]\n",
217 | " +- *(6) SortMergeJoin [id#29L], [id#23L], Inner\n",
218 | " :- *(4) Sort [id#29L ASC NULLS FIRST], false, 0\n",
219 | " : +- AQEShuffleRead coalesced\n",
220 | " : +- ShuffleQueryStage 2\n",
221 | " : +- Exchange hashpartitioning(id#29L, 200), ENSURE_REQUIREMENTS, [plan_id=426]\n",
222 | " : +- *(3) Project [(id#21L * 5) AS id#29L]\n",
223 | " : +- ShuffleQueryStage 0\n",
224 | " : +- Exchange RoundRobinPartitioning(5), REPARTITION_BY_NUM, [plan_id=359]\n",
225 | " : +- *(1) Range (2, 10000000, step=2, splits=8)\n",
226 | " +- *(5) Sort [id#23L ASC NULLS FIRST], false, 0\n",
227 | " +- AQEShuffleRead coalesced\n",
228 | " +- ShuffleQueryStage 3\n",
229 | " +- Exchange hashpartitioning(id#23L, 200), ENSURE_REQUIREMENTS, [plan_id=371]\n",
230 | " +- ShuffleQueryStage 1\n",
231 | " +- Exchange RoundRobinPartitioning(6), REPARTITION_BY_NUM, [plan_id=367]\n",
232 | " +- *(2) Range (2, 10000000, step=4, splits=8)\n",
233 | "+- == Initial Plan ==\n",
234 | " HashAggregate(keys=[], functions=[sum(id#29L)])\n",
235 | " +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=335]\n",
236 | " +- HashAggregate(keys=[], functions=[partial_sum(id#29L)])\n",
237 | " +- Project [id#29L]\n",
238 | " +- SortMergeJoin [id#29L], [id#23L], Inner\n",
239 | " :- Sort [id#29L ASC NULLS FIRST], false, 0\n",
240 | " : +- Exchange hashpartitioning(id#29L, 200), ENSURE_REQUIREMENTS, [plan_id=327]\n",
241 | " : +- Project [(id#21L * 5) AS id#29L]\n",
242 | " : +- Exchange RoundRobinPartitioning(5), REPARTITION_BY_NUM, [plan_id=317]\n",
243 | " : +- Range (2, 10000000, step=2, splits=8)\n",
244 | " +- Sort [id#23L ASC NULLS FIRST], false, 0\n",
245 | " +- Exchange hashpartitioning(id#23L, 200), ENSURE_REQUIREMENTS, [plan_id=328]\n",
246 | " +- Exchange RoundRobinPartitioning(6), REPARTITION_BY_NUM, [plan_id=320]\n",
247 | " +- Range (2, 10000000, step=4, splits=8)\n",
248 | "\n",
249 | "\n"
250 | ]
251 | }
252 | ],
253 | "source": [
254 | "step4.explain()"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": 11,
260 | "id": "c9029437-b662-445b-8ef8-2b108c9bde2c",
261 | "metadata": {},
262 | "outputs": [
263 | {
264 | "data": {
265 | "text/html": [
266 | "\n",
267 | " \n",
268 | "
SparkSession - hive
\n",
269 | " \n",
270 | "
\n",
271 | "
SparkContext
\n",
272 | "\n",
273 | "
Spark UI
\n",
274 | "\n",
275 | "
\n",
276 | " - Version
\n",
277 | " v3.5.0
\n",
278 | " - Master
\n",
279 | " local[*]
\n",
280 | " - AppName
\n",
281 | " PySparkShell
\n",
282 | "
\n",
283 | "
\n",
284 | " \n",
285 | "
\n",
286 | " "
287 | ],
288 | "text/plain": [
289 | ""
290 | ]
291 | },
292 | "execution_count": 11,
293 | "metadata": {},
294 | "output_type": "execute_result"
295 | }
296 | ],
297 | "source": [
298 | "spark"
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": null,
304 | "id": "3c58f0c0-6e80-4883-9d45-24978f98c056",
305 | "metadata": {},
306 | "outputs": [],
307 | "source": []
308 | }
309 | ],
310 | "metadata": {
311 | "kernelspec": {
312 | "display_name": "Python 3 (ipykernel)",
313 | "language": "python",
314 | "name": "python3"
315 | },
316 | "language_info": {
317 | "codemirror_mode": {
318 | "name": "ipython",
319 | "version": 3
320 | },
321 | "file_extension": ".py",
322 | "mimetype": "text/x-python",
323 | "name": "python",
324 | "nbconvert_exporter": "python",
325 | "pygments_lexer": "ipython3",
326 | "version": "3.12.1"
327 | }
328 | },
329 | "nbformat": 4,
330 | "nbformat_minor": 5
331 | }
332 |
--------------------------------------------------------------------------------
/Spotify Data Pipeline using Spark/(python) spotify_transformation_load_function.py:
--------------------------------------------------------------------------------
1 | import json
2 | import boto3
3 | from datetime import datetime
4 | from io import StringIO
5 | import pandas as pd
6 |
7 | def album(data):
8 | album_list = []
9 | for row in data['items']:
10 | album_id = row['track']['album']['id']
11 | album_name = row['track']['album']['name']
12 | album_release_date = row['track']['album']['release_date']
13 | album_total_tracks = row['track']['album']['total_tracks']
14 | album_url = row['track']['album']['external_urls']['spotify']
15 | album_element = {'album_id':album_id,'name':album_name,'release_date':album_release_date,
16 | 'total_tracks':album_total_tracks,'url':album_url}
17 | album_list.append(album_element)
18 | return album_list
19 |
20 | def artist(data):
21 | artist_list = []
22 | for row in data['items']:
23 | for key, value in row.items():
24 | if key == "track":
25 | for artist in value['artists']:
26 | artist_dict = {'artist_id':artist['id'], 'artist_name':artist['name'], 'external_url': artist['href']}
27 | artist_list.append(artist_dict)
28 | return artist_list
29 |
30 | def songs(data):
31 | song_list = []
32 | for row in data['items']:
33 | song_id = row['track']['id']
34 | song_name = row['track']['name']
35 | song_duration = row['track']['duration_ms']
36 | song_url = row['track']['external_urls']['spotify']
37 | song_popularity = row['track']['popularity']
38 | song_added = row['added_at']
39 | album_id = row['track']['album']['id']
40 | artist_id = row['track']['album']['artists'][0]['id']
41 | song_element = {'song_id':song_id,'song_name':song_name,'duration_ms':song_duration,'url':song_url,
42 | 'popularity':song_popularity,'song_added':song_added,'album_id':album_id,
43 | 'artist_id':artist_id
44 | }
45 | song_list.append(song_element)
46 |
47 | return song_list
48 |
49 | def lambda_handler(event, context):
50 | s3 = boto3.client('s3')
51 | Bucket = "spotify-etl-project-darshil"
52 | Key = "raw_data/to_processed/"
53 |
54 | spotify_data = []
55 | spotify_keys = []
56 | for file in s3.list_objects(Bucket=Bucket, Prefix=Key)['Contents']:
57 | file_key = file['Key']
58 | if file_key.split('.')[-1] == "json":
59 | response = s3.get_object(Bucket = Bucket, Key = file_key)
60 | content = response['Body']
61 | jsonObject = json.loads(content.read())
62 | spotify_data.append(jsonObject)
63 | spotify_keys.append(file_key)
64 |
65 | for data in spotify_data:
66 | album_list = album(data)
67 | artist_list = artist(data)
68 | song_list = songs(data)
69 |
70 | album_df = pd.DataFrame.from_dict(album_list)
71 | album_df = album_df.drop_duplicates(subset=['album_id'])
72 |
73 | artist_df = pd.DataFrame.from_dict(artist_list)
74 | artist_df = artist_df.drop_duplicates(subset=['artist_id'])
75 |
76 | #Song Dataframe
77 | song_df = pd.DataFrame.from_dict(song_list)
78 |
79 | album_df['release_date'] = pd.to_datetime(album_df['release_date'])
80 | song_df['song_added'] = pd.to_datetime(song_df['song_added'])
81 |
82 | songs_key = "transformed_data/songs_data/songs_transformed_" + str(datetime.now()) + ".csv"
83 | song_buffer=StringIO()
84 | song_df.to_csv(song_buffer, index=False)
85 | song_content = song_buffer.getvalue()
86 | s3.put_object(Bucket=Bucket, Key=songs_key, Body=song_content)
87 |
88 | album_key = "transformed_data/album_data/album_transformed_" + str(datetime.now()) + ".csv"
89 | album_buffer=StringIO()
90 | album_df.to_csv(album_buffer, index=False)
91 | album_content = album_buffer.getvalue()
92 | s3.put_object(Bucket=Bucket, Key=album_key, Body=album_content)
93 |
94 | artist_key = "transformed_data/artist_data/artist_transformed_" + str(datetime.now()) + ".csv"
95 | artist_buffer=StringIO()
96 | artist_df.to_csv(artist_buffer, index=False)
97 | artist_content = artist_buffer.getvalue()
98 | s3.put_object(Bucket=Bucket, Key=artist_key, Body=artist_content)
99 |
100 | s3_resource = boto3.resource('s3')
101 | for key in spotify_keys:
102 | copy_source = {
103 | 'Bucket': Bucket,
104 | 'Key': key
105 | }
106 | s3_resource.meta.client.copy(copy_source, Bucket, 'raw_data/processed/' + key.split("/")[-1])
107 | s3_resource.Object(Bucket, key).delete()
--------------------------------------------------------------------------------
/Spotify Data Pipeline using Spark/(spark) spotify_transformation.py:
--------------------------------------------------------------------------------
1 |
2 | import sys
3 | from awsglue.transforms import *
4 | from awsglue.utils import getResolvedOptions
5 | from pyspark.context import SparkContext
6 | from awsglue.context import GlueContext
7 | from awsglue.job import Job
8 | from pyspark.sql.functions import explode, col, to_date
9 | from datetime import datetime
10 | from awsglue.dynamicframe import DynamicFrame
11 |
12 | sc = SparkContext.getOrCreate()
13 | glueContext = GlueContext(sc)
14 | spark = glueContext.spark_session
15 | job = Job(glueContext)
16 |
17 | s3_path = "s3://spotify-daily-data-project/raw_data/to_processed/"
18 | source_dyf = glueContext.create_dynamic_frame_from_options(
19 | connection_type="s3",
20 | connection_options={"paths":[s3_path]},
21 | format="json"
22 | )
23 |
24 | spotify_df = source_dyf.toDF()
25 | def process_albums(df):
26 | df = df.withColumn("items", explode("items")).select(
27 | col("items.track.album.id").alias("album_id"),
28 | col("items.track.album.name").alias("album_name"),
29 | col("items.track.album.release_date").alias("release_date"),
30 | col("items.track.album.total_tracks").alias("total_tracks"),
31 | col("items.track.album.external_urls.spotify").alias("url")
32 | ).drop_duplicates(["album_id"])
33 | return df
34 |
35 |
36 | def process_artists(df):
37 | # First, explode the items to get individual tracks
38 | df_items_exploded = df.select(explode(col("items")).alias("item"))
39 |
40 | # Then, explode the artists array within each item to create a row for each artist
41 | df_artists_exploded = df_items_exploded.select(explode(col("item.track.artists")).alias("artist"))
42 |
43 | # Now, select the artist attributes, ensuring each artist is in its own row
44 | df_artists = df_artists_exploded.select(
45 | col("artist.id").alias("artist_id"),
46 | col("artist.name").alias("artist_name"),
47 | col("artist.external_urls.spotify").alias("external_url")
48 | ).drop_duplicates(["artist_id"])
49 |
50 | return df_artists
51 |
52 |
53 | def process_songs(df):
54 | # Explode the items array to create a row for each song
55 | df_exploded = df.select(explode(col("items")).alias("item"))
56 |
57 | # Extract song information from the exploded DataFrame
58 | df_songs = df_exploded.select(
59 | col("item.track.id").alias("song_id"),
60 | col("item.track.name").alias("song_name"),
61 | col("item.track.duration_ms").alias("duration_ms"),
62 | col("item.track.external_urls.spotify").alias("url"),
63 | col("item.track.popularity").alias("popularity"),
64 | col("item.added_at").alias("song_added"),
65 | col("item.track.album.id").alias("album_id"),
66 | col("item.track.artists")[0]["id"].alias("artist_id")
67 | ).drop_duplicates(["song_id"])
68 |
69 | # Convert string dates in 'song_added' to actual date types
70 | df_songs = df_songs.withColumn("song_added", to_date(col("song_added")))
71 |
72 | return df_songs
73 |
74 | #process data
75 | album_df = process_albums(spotify_df)
76 | artist_df = process_artists(spotify_df)
77 | song_df = process_songs(spotify_df)
78 |
79 |
80 | def write_to_s3(df, path_suffix, format_type="csv"):
81 | # Convert back to DynamicFrame
82 | dynamic_frame = DynamicFrame.fromDF(df, glueContext, "dynamic_frame")
83 |
84 | glueContext.write_dynamic_frame.from_options(
85 | frame = dynamic_frame,
86 | connection_type = "s3",
87 | connection_options = {"path": f"s3://spotify-daily-data-project/transformed_data/{path_suffix}/"},
88 | format = format_type
89 | )
90 |
91 | #write data to s3
92 | write_to_s3(album_df, "album/album_transformed_{}".format(datetime.now().strftime("%Y-%m-%d")), "csv")
93 | write_to_s3(artist_df, "artist/artist_transformed_{}".format(datetime.now().strftime("%Y-%m-%d")), "csv")
94 | write_to_s3(song_df, "songs/songs_transformed_{}".format(datetime.now().strftime("%Y-%m-%d")), "csv")
95 |
96 | job.commit()
--------------------------------------------------------------------------------
/Spotify Data Pipeline using Spark/spotify_api_data_extract.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import spotipy
4 | from spotipy.oauth2 import SpotifyClientCredentials
5 | import boto3
6 | from datetime import datetime
7 |
8 | def lambda_handler(event, context):
9 |
10 | cilent_id = os.environ.get('client_id')
11 | client_secret = os.environ.get('client_secret')
12 |
13 | client_credentials_manager = SpotifyClientCredentials(client_id=cilent_id, client_secret=client_secret)
14 | sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)
15 | playlists = sp.user_playlists('spotify')
16 |
17 | playlist_link = "https://open.spotify.com/playlist/37i9dQZEVXbNG2KDcFcKOF?si=1333723a6eff4b7f"
18 | playlist_URI = playlist_link.split("/")[-1].split("?")[0]
19 |
20 | spotify_data = sp.playlist_tracks(playlist_URI)
21 |
22 | cilent = boto3.client('s3')
23 |
24 | filename = "spotify_raw_" + str(datetime.now()) + ".json"
25 |
26 | cilent.put_object(
27 | Bucket="spotify-etl-project-darshil",
28 | Key="raw_data/to_processed/" + filename,
29 | Body=json.dumps(spotify_data)
30 | )
31 |
--------------------------------------------------------------------------------
/Spotify Data Pipeline using Spark/spotipy_layer.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/Spotify Data Pipeline using Spark/spotipy_layer.zip
--------------------------------------------------------------------------------
/data/apple_data/apple_products.csv:
--------------------------------------------------------------------------------
1 | Product Name,Product URL,Brand,Sale Price,Mrp,Discount Percentage,Number Of Ratings,Number Of Reviews,Upc,Star Rating,Ram
2 | "APPLE iPhone 8 Plus (Gold, 64 GB)",https://www.flipkart.com/apple-iphone-8-plus-gold-64-gb/p/itmexrgvuzgzttzh?pid=MOBEXRGV7EHHTGUH,Apple,49900,49900,0,3431,356,MOBEXRGV7EHHTGUH,4.6,2 GB
3 | "APPLE iPhone 8 Plus (Space Grey, 256 GB)",https://www.flipkart.com/apple-iphone-8-plus-space-grey-256-gb/p/itmexrgvzkbyqgtf?pid=MOBEXRGVAC6TJT4F,Apple,84900,84900,0,3431,356,MOBEXRGVAC6TJT4F,4.6,2 GB
4 | "APPLE iPhone 8 Plus (Silver, 256 GB)",https://www.flipkart.com/apple-iphone-8-plus-silver-256-gb/p/itmexrgvxatuyrqw?pid=MOBEXRGVGETABXWZ,Apple,84900,84900,0,3431,356,MOBEXRGVGETABXWZ,4.6,2 GB
5 | "APPLE iPhone 8 (Silver, 256 GB)",https://www.flipkart.com/apple-iphone-8-silver-256-gb/p/itmexrgvae48gzhp?pid=MOBEXRGVMZWUHCBA,Apple,77000,77000,0,11202,794,MOBEXRGVMZWUHCBA,4.5,2 GB
6 | "APPLE iPhone 8 (Gold, 256 GB)",https://www.flipkart.com/apple-iphone-8-gold-256-gb/p/itmexrgv8bvfbzgw?pid=MOBEXRGVPK7PFEJZ,Apple,77000,77000,0,11202,794,MOBEXRGVPK7PFEJZ,4.5,2 GB
7 | "APPLE iPhone 8 Plus (Silver, 64 GB)",https://www.flipkart.com/apple-iphone-8-plus-silver-64-gb/p/itmexrgv7yfjbryy?pid=MOBEXRGVQGYYP8FV,Apple,49900,49900,0,3431,356,MOBEXRGVQGYYP8FV,4.6,2 GB
8 | "APPLE iPhone 8 Plus (Space Grey, 64 GB)",https://www.flipkart.com/apple-iphone-8-plus-space-grey-64-gb/p/itmexrgvehtzhh9v?pid=MOBEXRGVQKBREZP8,Apple,49900,49900,0,3431,356,MOBEXRGVQKBREZP8,4.6,2 GB
9 | "APPLE iPhone 8 (Space Grey, 256 GB)",https://www.flipkart.com/apple-iphone-8-space-grey-256-gb/p/itmexrgvypzqw6df?pid=MOBEXRGVZFZGZEWV,Apple,77000,77000,0,11202,794,MOBEXRGVZFZGZEWV,4.5,2 GB
10 | "APPLE iPhone XS Max (Silver, 64 GB)",https://www.flipkart.com/apple-iphone-xs-max-silver-64-gb/p/itmf944ehzsh7emx?pid=MOBF944E2XAHW8V5,Apple,89900,89900,0,1454,149,MOBF944E2XAHW8V5,4.6,4 GB
11 | "Apple iPhone XR ((PRODUCT)RED, 128 GB) (Includes EarPods, Power Adapter)",https://www.flipkart.com/apple-iphone-xr-product-red-128-gb-includes-earpods-power-adapter/p/itmf9z7zbkvjzegf?pid=MOBF9Z7ZHQC23PWQ,Apple,41999,52900,20,79512,6796,MOBF9Z7ZHQC23PWQ,4.6,4 GB
12 | "Apple iPhone XR (Black, 64 GB) (Includes EarPods, Power Adapter)",https://www.flipkart.com/apple-iphone-xr-black-64-gb-includes-earpods-power-adapter/p/itmf9z7zxu4uqyz2?pid=MOBF9Z7ZPHGV4GNH,Apple,39999,47900,16,79512,6796,MOBF9Z7ZPHGV4GNH,4.6,4 GB
13 | "Apple iPhone XR (Coral, 128 GB) (Includes EarPods, Power Adapter)",https://www.flipkart.com/apple-iphone-xr-coral-128-gb-includes-earpods-power-adapter/p/itmf9z7zgxx5wgez?pid=MOBF9Z7ZS6GF5UAP,Apple,41999,52900,20,79582,6804,MOBF9Z7ZS6GF5UAP,4.6,4 GB
14 | "Apple iPhone XR (Black, 128 GB) (Includes EarPods, Power Adapter)",https://www.flipkart.com/apple-iphone-xr-black-128-gb-includes-earpods-power-adapter/p/itmf9z7zhdgzwmzm?pid=MOBF9Z7ZYWNFGZUC,Apple,41999,52900,20,79512,6796,MOBF9Z7ZYWNFGZUC,4.6,3 GB
15 | "Apple iPhone XR (White, 128 GB) (Includes EarPods, Power Adapter)",https://www.flipkart.com/apple-iphone-xr-white-128-gb-includes-earpods-power-adapter/p/itmf9z7zrz4tbhwm?pid=MOBF9Z7ZZY3HCDZZ,Apple,41999,52900,20,79512,6796,MOBF9Z7ZZY3HCDZZ,4.6,4 GB
16 | "APPLE iPhone 11 Pro Max (Gold, 256 GB)",https://www.flipkart.com/apple-iphone-11-pro-max-gold-256-gb/p/itm94c6536eefc46?pid=MOBFKCTS7HCHSPFH,Apple,131900,131900,0,1078,101,MOBFKCTS7HCHSPFH,4.7,4 GB
17 | "APPLE iPhone 11 Pro Max (Gold, 64 GB)",https://www.flipkart.com/apple-iphone-11-pro-max-gold-64-gb/p/itm6ef18218efdf6?pid=MOBFKCTSAPAYNSGG,Apple,117100,117100,0,1078,101,MOBFKCTSAPAYNSGG,4.7,4 GB
18 | "APPLE iPhone 11 Pro Max (Midnight Green, 256 GB)",https://www.flipkart.com/apple-iphone-11-pro-max-midnight-green-256-gb/p/itm68e0db18f9ecc?pid=MOBFKCTSCAAKGQV7,Apple,131900,131900,0,1078,101,MOBFKCTSCAAKGQV7,4.7,4 GB
19 | "APPLE iPhone 11 Pro Max (Space Grey, 64 GB)",https://www.flipkart.com/apple-iphone-11-pro-max-space-grey-64-gb/p/itme0101031155f8?pid=MOBFKCTSKDMKCGQS,Apple,117100,117100,0,1078,101,MOBFKCTSKDMKCGQS,4.7,4 GB
20 | "APPLE iPhone 11 Pro (Midnight Green, 64 GB)",https://www.flipkart.com/apple-iphone-11-pro-midnight-green-64-gb/p/itm471de0d2e8474?pid=MOBFKCTSN3TG3RFJ,Apple,74999,106600,29,7088,523,MOBFKCTSN3TG3RFJ,4.6,4 GB
21 | "APPLE iPhone 11 Pro (Space Grey, 512 GB)",https://www.flipkart.com/apple-iphone-11-pro-space-grey-512-gb/p/itmcd1f0ddbf1c21?pid=MOBFKCTSRTHRQTFT,Apple,117900,140300,15,7088,523,MOBFKCTSRTHRQTFT,4.6,4 GB
22 | "APPLE iPhone 11 Pro Max (Midnight Green, 64 GB)",https://www.flipkart.com/apple-iphone-11-pro-max-midnight-green-64-gb/p/itmab1763b5ca244?pid=MOBFKCTSRYPAQNYT,Apple,117100,117100,0,1078,101,MOBFKCTSRYPAQNYT,4.7,4 GB
23 | "APPLE iPhone 11 Pro (Midnight Green, 512 GB)",https://www.flipkart.com/apple-iphone-11-pro-midnight-green-512-gb/p/itm0bdd954387ca9?pid=MOBFKCTSSJCWYGCC,Apple,117900,140300,15,7088,523,MOBFKCTSSJCWYGCC,4.6,4 GB
24 | "APPLE iPhone 11 Pro (Space Grey, 256 GB)",https://www.flipkart.com/apple-iphone-11-pro-space-grey-256-gb/p/itmbecef22a95790?pid=MOBFKCTSWGYSAS9X,Apple,99900,121300,17,7081,522,MOBFKCTSWGYSAS9X,4.6,4 GB
25 | "Apple iPhone SE (White, 256 GB) (Includes EarPods, Power Adapter)",https://www.flipkart.com/apple-iphone-se-white-256-gb-includes-earpods-power-adapter/p/itm4d73793c30c4c?pid=MOBFRFXHPZCHAPEH,Apple,44999,54900,18,95909,8161,MOBFRFXHPZCHAPEH,4.5,2 GB
26 | "APPLE iPhone 12 Pro (Silver, 512 GB)",https://www.flipkart.com/apple-iphone-12-pro-silver-512-gb/p/itm0ccf9fc219a71?pid=MOBFWBYZ5UY6ZBVA,Apple,140900,149900,6,542,42,MOBFWBYZ5UY6ZBVA,4.5,4 GB
27 | "APPLE iPhone 12 Pro Max (Pacific Blue, 256 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-pacific-blue-256-gb/p/itm3a0860c94250e?pid=MOBFWBYZ8STJXCVT,Apple,130900,139900,6,580,45,MOBFWBYZ8STJXCVT,4.6,6 GB
28 | "APPLE iPhone 12 Mini (White, 128 GB)",https://www.flipkart.com/apple-iphone-12-mini-white-128-gb/p/itm9098fa76548ba?pid=MOBFWBYZAGXJRDGB,Apple,64900,74900,13,740,64,MOBFWBYZAGXJRDGB,4.5,4 GB
29 | "APPLE iPhone 12 Pro (Graphite, 256 GB)",https://www.flipkart.com/apple-iphone-12-pro-graphite-256-gb/p/itm4fa4da575698c?pid=MOBFWBYZBA36UB7G,Apple,120900,129900,6,545,42,MOBFWBYZBA36UB7G,4.5,6 GB
30 | "APPLE iPhone 12 Mini (White, 64 GB)",https://www.flipkart.com/apple-iphone-12-mini-white-64-gb/p/itmf3b16d1640898?pid=MOBFWBYZBH4CEC4C,Apple,59900,69900,14,740,64,MOBFWBYZBH4CEC4C,4.5,4 GB
31 | "APPLE iPhone 12 (White, 128 GB)",https://www.flipkart.com/apple-iphone-12-white-128-gb/p/itm95393f4c6cc59?pid=MOBFWBYZBTZFGJF9,Apple,75900,84900,10,2101,180,MOBFWBYZBTZFGJF9,4.6,6 GB
32 | "APPLE iPhone 12 Pro (Graphite, 128 GB)",https://www.flipkart.com/apple-iphone-12-pro-graphite-128-gb/p/itm03e5f2595d843?pid=MOBFWBYZBZ7Y56WD,Apple,110900,119900,7,545,42,MOBFWBYZBZ7Y56WD,4.5,6 GB
33 | "APPLE iPhone 12 Pro Max (Graphite, 256 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-graphite-256-gb/p/itm8dbdf0b986725?pid=MOBFWBYZEF6XQ5ZW,Apple,130900,139900,6,580,45,MOBFWBYZEF6XQ5ZW,4.6,6 GB
34 | "APPLE iPhone 12 Pro Max (Graphite, 128 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-graphite-128-gb/p/itm973c298184f51?pid=MOBFWBYZFDGQSDWS,Apple,120900,129900,6,580,45,MOBFWBYZFDGQSDWS,4.6,6 GB
35 | "APPLE iPhone 12 Mini (Black, 128 GB)",https://www.flipkart.com/apple-iphone-12-mini-black-128-gb/p/itm0eb512c195bdf?pid=MOBFWBYZH2AMPNPD,Apple,64900,74900,13,730,63,MOBFWBYZH2AMPNPD,4.5,4 GB
36 | "APPLE iPhone 12 Mini (Blue, 128 GB)",https://www.flipkart.com/apple-iphone-12-mini-blue-128-gb/p/itm9b6cdec9700ee?pid=MOBFWBYZHU58PHCZ,Apple,64900,74900,13,730,63,MOBFWBYZHU58PHCZ,4.5,4 GB
37 | "APPLE iPhone 12 (Black, 128 GB)",https://www.flipkart.com/apple-iphone-12-black-128-gb/p/itmf1f0a58f1ecd7?pid=MOBFWBYZK3HACR72,Apple,75900,84900,10,2101,180,MOBFWBYZK3HACR72,4.6,6 GB
38 | "APPLE iPhone 12 (Blue, 128 GB)",https://www.flipkart.com/apple-iphone-12-blue-128-gb/p/itm02853ae92e90a?pid=MOBFWBYZKPTZF9VG,Apple,75900,84900,10,2101,180,MOBFWBYZKPTZF9VG,4.6,6 GB
39 | "APPLE iPhone 12 Pro Max (Silver, 128 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-silver-128-gb/p/itm5a51ba742a17e?pid=MOBFWBYZNSNVGGZC,Apple,120900,129900,6,580,45,MOBFWBYZNSNVGGZC,4.6,6 GB
40 | "APPLE iPhone 12 Mini (Red, 64 GB)",https://www.flipkart.com/apple-iphone-12-mini-red-64-gb/p/itm255dd64643767?pid=MOBFWBYZNVWGWN2U,Apple,59900,69900,14,740,64,MOBFWBYZNVWGWN2U,4.5,6 GB
41 | "APPLE iPhone 12 Pro Max (Gold, 128 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-gold-128-gb/p/itme3ae592065711?pid=MOBFWBYZQVKT77YH,Apple,120900,129900,6,580,45,MOBFWBYZQVKT77YH,4.6,6 GB
42 | "APPLE iPhone 12 (Green, 128 GB)",https://www.flipkart.com/apple-iphone-12-green-128-gb/p/itm4e0a120f7d9c4?pid=MOBFWBYZQXUEHF48,Apple,75900,84900,10,2092,178,MOBFWBYZQXUEHF48,4.6,6 GB
43 | "APPLE iPhone 12 Pro (Pacific Blue, 512 GB)",https://www.flipkart.com/apple-iphone-12-pro-pacific-blue-512-gb/p/itm8a39d6779b04e?pid=MOBFWBYZTHSXKMGW,Apple,140900,149900,6,545,42,MOBFWBYZTHSXKMGW,4.5,4 GB
44 | "APPLE iPhone 12 (White, 64 GB)",https://www.flipkart.com/apple-iphone-12-white-64-gb/p/itm8b88bdc03cd79?pid=MOBFWBYZTK33MBG9,Apple,70900,79900,11,2101,180,MOBFWBYZTK33MBG9,4.6,6 GB
45 | "APPLE iPhone 12 (Black, 64 GB)",https://www.flipkart.com/apple-iphone-12-black-64-gb/p/itma2559422bf7c7?pid=MOBFWBYZU5FWK2VP,Apple,70900,79900,11,2092,178,MOBFWBYZU5FWK2VP,4.6,6 GB
46 | "APPLE iPhone 12 (Red, 128 GB)",https://www.flipkart.com/apple-iphone-12-red-128-gb/p/itma7bd86885ed98?pid=MOBFWBYZUHPFWQRD,Apple,75900,84900,10,2101,180,MOBFWBYZUHPFWQRD,4.6,6 GB
47 | "APPLE iPhone 12 Mini (Black, 64 GB)",https://www.flipkart.com/apple-iphone-12-mini-black-64-gb/p/itm38b727191eb08?pid=MOBFWBYZXSEGBS6F,Apple,59900,69900,14,740,64,MOBFWBYZXSEGBS6F,4.5,4 GB
48 | "APPLE iPhone 12 Pro (Pacific Blue, 128 GB)",https://www.flipkart.com/apple-iphone-12-pro-pacific-blue-128-gb/p/itm97c833296c221?pid=MOBFWBYZXYSCEEEH,Apple,110900,119900,7,545,42,MOBFWBYZXYSCEEEH,4.5,6 GB
49 | "APPLE iPhone 12 Pro Max (Pacific Blue, 128 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-pacific-blue-128-gb/p/itmd89812b558a03?pid=MOBFWBYZZABKHZQA,Apple,120900,129900,6,580,45,MOBFWBYZZABKHZQA,4.6,6 GB
50 | "APPLE iPhone 12 Pro Max (Silver, 256 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-silver-256-gb/p/itm30faf74442adb?pid=MOBFWBYZZH4AM8FE,Apple,130900,139900,6,580,45,MOBFWBYZZH4AM8FE,4.6,6 GB
51 | "APPLE iPhone 12 Pro Max (Gold, 256 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-gold-256-gb/p/itm1e0354f5bbc8c?pid=MOBFWBYZZPW8JHQF,Apple,130900,139900,6,580,45,MOBFWBYZZPW8JHQF,4.6,6 GB
52 | "APPLE iPhone 11 (White, 128 GB)",https://www.flipkart.com/apple-iphone-11-white-128-gb/p/itme32df47ea6742?pid=MOBFWQ6B7KKRXDDS,Apple,54999,59900,8,43707,3357,MOBFWQ6B7KKRXDDS,4.6,4 GB
53 | "APPLE iPhone 11 (Red, 128 GB)",https://www.flipkart.com/apple-iphone-11-red-128-gb/p/itm8d14bd0e33a1c?pid=MOBFWQ6BEZTNK59G,Apple,54999,59900,8,43707,3357,MOBFWQ6BEZTNK59G,4.6,4 GB
54 | "APPLE iPhone SE (White, 64 GB)",https://www.flipkart.com/apple-iphone-se-white-64-gb/p/itma00a19e11c81b?pid=MOBFWQ6BGWDVGF3E,Apple,29999,39900,24,95807,8154,MOBFWQ6BGWDVGF3E,4.5,2 GB
55 | "APPLE iPhone SE (Black, 128 GB)",https://www.flipkart.com/apple-iphone-se-black-128-gb/p/itma9285ccc6af28?pid=MOBFWQ6BHUEVZPXD,Apple,34999,44900,22,95909,8161,MOBFWQ6BHUEVZPXD,4.5,2 GB
56 | "APPLE iPhone SE (White, 128 GB)",https://www.flipkart.com/apple-iphone-se-white-128-gb/p/itmc2a0f593a4ad8?pid=MOBFWQ6BJEHMUUZY,Apple,34999,44900,22,95807,8154,MOBFWQ6BJEHMUUZY,4.5,2 GB
57 | "APPLE iPhone SE (Red, 128 GB)",https://www.flipkart.com/apple-iphone-se-red-128-gb/p/itma4202509da171?pid=MOBFWQ6BJTVFKPEJ,Apple,34999,44900,22,95909,8161,MOBFWQ6BJTVFKPEJ,4.5,2 GB
58 | "APPLE iPhone 11 (Black, 128 GB)",https://www.flipkart.com/apple-iphone-11-black-128-gb/p/itm8244e8d955aba?pid=MOBFWQ6BKRYBP5X8,Apple,54999,59900,8,43470,3331,MOBFWQ6BKRYBP5X8,4.6,4 GB
59 | "APPLE iPhone SE (Black, 64 GB)",https://www.flipkart.com/apple-iphone-se-black-64-gb/p/itm4d3d5718a5c95?pid=MOBFWQ6BR3MK7AUG,Apple,29999,39900,24,95909,8161,MOBFWQ6BR3MK7AUG,4.5,4 GB
60 | "APPLE iPhone 11 (Purple, 64 GB)",https://www.flipkart.com/apple-iphone-11-purple-64-gb/p/itm2b8d03427ddac?pid=MOBFWQ6BTFFJKGKE,Apple,46999,54900,14,43470,3331,MOBFWQ6BTFFJKGKE,4.6,4 GB
61 | "APPLE iPhone 11 (White, 64 GB)",https://www.flipkart.com/apple-iphone-11-white-64-gb/p/itmfc6a7091eb20b?pid=MOBFWQ6BVWVEH3XE,Apple,46999,54900,14,43470,3331,MOBFWQ6BVWVEH3XE,4.6,4 GB
62 | "APPLE iPhone 11 (Black, 64 GB)",https://www.flipkart.com/apple-iphone-11-black-64-gb/p/itm4e5041ba101fd?pid=MOBFWQ6BXGJCEYNY,Apple,46999,54900,14,43470,3331,MOBFWQ6BXGJCEYNY,4.6,4 GB
63 | "APPLE iPhone 11 (Red, 64 GB)",https://www.flipkart.com/apple-iphone-11-red-64-gb/p/itmc3935326f2feb?pid=MOBFWQ6BYYV3FCU7,Apple,46999,54900,14,43470,3331,MOBFWQ6BYYV3FCU7,4.6,4 GB
64 |
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Purple, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Purple, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Red, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Red, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Red, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Red, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Midnight Green, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Midnight Green, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Midnight Green, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Midnight Green, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Space Grey, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Space Grey, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Space Grey, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Space Grey, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Gold, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Gold, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Gold, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Gold, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Midnight Green, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Midnight Green, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Midnight Green, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Midnight Green, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Space Grey, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Space Grey, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Green, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Green, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Red, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Red, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Red, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Red, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Graphite, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Graphite, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Graphite, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Graphite, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Pacific Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Pacific Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Pacific Blue, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Pacific Blue, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Silver, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Silver, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Gold, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Gold, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Gold, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Gold, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Graphite, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Graphite, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Graphite, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Graphite, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Pacific Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Pacific Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Pacific Blue, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Pacific Blue, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Silver, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Silver, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Silver, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Silver, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 8 (Gold, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 (Gold, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 8 (Silver, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 (Silver, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 8 (Space Grey, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 (Space Grey, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Gold, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Gold, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Silver, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Silver, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Silver, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Silver, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Space Grey, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Space Grey, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Space Grey, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Space Grey, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone SE (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone SE (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone SE (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone SE (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone SE (Red, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone SE (Red, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone SE (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone SE (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone SE (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone SE (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=APPLE iPhone XS Max (Silver, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone XS Max (Silver, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=Apple iPhone SE (White, 256 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=Apple iPhone SE (White, 256 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=Apple iPhone XR ((PRODUCT)RED, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=Apple iPhone XR ((PRODUCT)RED, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=Apple iPhone XR (Black, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=Apple iPhone XR (Black, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=Apple iPhone XR (Black, 64 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=Apple iPhone XR (Black, 64 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=Apple iPhone XR (Coral, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=Apple iPhone XR (Coral, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/Product Name=Apple iPhone XR (White, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=Apple iPhone XR (White, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet
--------------------------------------------------------------------------------
/data/apple_data/output.csv/_SUCCESS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/_SUCCESS
--------------------------------------------------------------------------------
/data/flight-data-hive/_SUCCESS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data-hive/_SUCCESS
--------------------------------------------------------------------------------
/data/flight-data-hive/_committed_4721890993021653500:
--------------------------------------------------------------------------------
1 | {"added":["part-00000-tid-4721890993021653500-d8ef7f6b-e6e5-4451-af50-08281422f186-0-c000"],"removed":[]}
--------------------------------------------------------------------------------
/data/flight-data-hive/_started_4721890993021653500:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data-hive/_started_4721890993021653500
--------------------------------------------------------------------------------
/data/flight-data-hive/part-00000-tid-4721890993021653500-d8ef7f6b-e6e5-4451-af50-08281422f186-0-c000:
--------------------------------------------------------------------------------
1 | United States,Romania,15
2 | United States,Croatia,1
3 | United States,Ireland,344
4 | Egypt,United States,15
5 | United States,India,62
6 | United States,Singapore,1
7 | United States,Grenada,62
8 | Costa Rica,United States,588
9 | Senegal,United States,40
10 | Moldova,United States,1
11 | United States,Sint Maarten,325
12 | United States,Marshall Islands,39
13 | Guyana,United States,64
14 | Malta,United States,1
15 | Anguilla,United States,41
16 | Bolivia,United States,30
17 | United States,Paraguay,6
18 | Algeria,United States,4
19 | Turks and Caicos Islands,United States,230
20 | United States,Gibraltar,1
21 | Saint Vincent and the Grenadines,United States,1
22 | Italy,United States,382
23 | United States,Federated States of Micronesia,69
24 | United States,Russia,161
25 | Pakistan,United States,12
26 | United States,Netherlands,660
27 | Iceland,United States,181
28 | Marshall Islands,United States,42
29 | Luxembourg,United States,155
30 | Honduras,United States,362
31 | The Bahamas,United States,955
32 | United States,Senegal,42
33 | El Salvador,United States,561
34 | Samoa,United States,25
35 | United States,Angola,13
36 | Switzerland,United States,294
37 | United States,Anguilla,38
38 | Sint Maarten,United States,325
39 | Hong Kong,United States,332
40 | Trinidad and Tobago,United States,211
41 | Latvia,United States,19
42 | United States,Ecuador,300
43 | Suriname,United States,1
44 | Mexico,United States,7140
45 | United States,Cyprus,1
46 | Ecuador,United States,268
47 | United States,Portugal,134
48 | United States,Costa Rica,608
49 | United States,Guatemala,318
50 | United States,Suriname,34
51 | Colombia,United States,873
52 | United States,Cape Verde,14
53 | United States,Jamaica,712
54 | Norway,United States,121
55 | United States,Malaysia,3
56 | United States,Morocco,19
57 | Thailand,United States,3
58 | United States,Samoa,25
59 | Venezuela,United States,290
60 | United States,Palau,31
61 | United States,Venezuela,246
62 | Panama,United States,510
63 | Antigua and Barbuda,United States,126
64 | United States,Chile,185
65 | Morocco,United States,15
66 | United States,Finland,28
67 | Azerbaijan,United States,21
68 | United States,Greece,23
69 | United States,The Bahamas,986
70 | New Zealand,United States,111
71 | Liberia,United States,2
72 | United States,Hong Kong,414
73 | Hungary,United States,2
74 | United States,China,920
75 | United States,Vietnam,2
76 | Burkina Faso,United States,1
77 | Sweden,United States,118
78 | United States,Kuwait,28
79 | United States,Dominican Republic,1420
80 | United States,Egypt,12
81 | Israel,United States,134
82 | United States,United States,370002
83 | Ethiopia,United States,13
84 | United States,Luxembourg,134
85 | United States,Poland,33
86 | Martinique,United States,44
87 | United States,Saint Barthelemy,41
88 | Saint Barthelemy,United States,39
89 | Barbados,United States,154
90 | United States,Turkey,129
91 | Djibouti,United States,1
92 | United States,Azerbaijan,21
93 | United States,Estonia,1
94 | Germany,United States,1468
95 | United States,South Korea,827
96 | United States,El Salvador,508
97 | Ireland,United States,335
98 | United States,Hungary,3
99 | Zambia,United States,1
100 | Malaysia,United States,2
101 | United States,Ethiopia,12
102 | United States,Panama,465
103 | United States,Aruba,342
104 | United States,Thailand,4
105 | United States,Turks and Caicos Islands,236
106 | Croatia,United States,2
107 | United States,Pakistan,12
108 | Cyprus,United States,1
109 | United States,Honduras,407
110 | Fiji,United States,24
111 | Qatar,United States,108
112 | Saint Kitts and Nevis,United States,139
113 | Kuwait,United States,32
114 | Taiwan,United States,266
115 | Haiti,United States,226
116 | Canada,United States,8399
117 | Federated States of Micronesia,United States,69
118 | United States,Liberia,2
119 | Jamaica,United States,666
120 | United States,Malta,2
121 | Dominican Republic,United States,1353
122 | Japan,United States,1548
123 | United States,Lithuania,1
124 | Finland,United States,26
125 | United States,Guadeloupe,59
126 | United States,Ukraine,13
127 | United States,France,952
128 | United States,Norway,115
129 | Aruba,United States,346
130 | French Guiana,United States,5
131 | United States,Kiribati,35
132 | India,United States,61
133 | British Virgin Islands,United States,107
134 | Brazil,United States,853
135 | United States,Germany,1336
136 | United States,New Zealand,74
137 | French Polynesia,United States,43
138 | United Arab Emirates,United States,320
139 | Singapore,United States,3
140 | United States,Mexico,7187
141 | United States,Sweden,119
142 | Netherlands,United States,776
143 | United States,Martinique,43
144 | United States,United Arab Emirates,313
145 | United States,Bulgaria,1
146 | Denmark,United States,153
147 | China,United States,772
148 | United States,Nicaragua,201
149 | United States,Philippines,126
150 | United States,Georgia,1
151 | United States,Belgium,228
152 | Cayman Islands,United States,314
153 | Argentina,United States,180
154 | Peru,United States,279
155 | South Africa,United States,36
156 | United States,Iceland,202
157 | United States,Argentina,141
158 | Spain,United States,420
159 | Bermuda,United States,183
160 | United States,Nigeria,50
161 | United States,Austria,63
162 | United States,Bonaire, Sint Eustatius, and Saba,59
163 | Kiribati,United States,26
164 | Saudi Arabia,United States,83
165 | Czech Republic,United States,13
166 | United States,Israel,127
167 | Belgium,United States,259
168 | United States,Saint Lucia,136
169 | United States,Bahrain,1
170 | United States,British Virgin Islands,80
171 | Curacao,United States,90
172 | Georgia,United States,2
173 | United States,Denmark,152
174 | United States,Guyana,63
175 | Philippines,United States,134
176 | Grenada,United States,53
177 | Cape Verde,United States,20
178 | Cote d'Ivoire,United States,1
179 | Ukraine,United States,14
180 | United States,Papua New Guinea,1
181 | Russia,United States,176
182 | United States,Saudi Arabia,70
183 | Guatemala,United States,397
184 | Saint Lucia,United States,123
185 | Paraguay,United States,60
186 | United States,Curacao,83
187 | Kosovo,United States,1
188 | United States,Taiwan,235
189 | Tunisia,United States,3
190 | United States,South Africa,40
191 | Niger,United States,2
192 | Turkey,United States,138
193 | United Kingdom,United States,2025
194 | Romania,United States,14
195 | United States,Greenland,4
196 | Papua New Guinea,United States,3
197 | United States,Spain,442
198 | Iraq,United States,1
199 | United States,Italy,438
200 | Cuba,United States,466
201 | United States,Switzerland,305
202 | Dominica,United States,20
203 | United States,Japan,1496
204 | Portugal,United States,127
205 | United States,Brazil,619
206 | Bahrain,United States,19
207 | United States,Peru,337
208 | Indonesia,United States,1
209 | United States,Belize,193
210 | United States,United Kingdom,1970
211 | Belize,United States,188
212 | United States,Ghana,20
213 | United States,Indonesia,2
214 | United States,Fiji,25
215 | United States,Canada,8483
216 | United States,Antigua and Barbuda,117
217 | United States,French Polynesia,40
218 | Nicaragua,United States,179
219 | United States,Latvia,15
220 | United States,Dominica,27
221 | United States,Czech Republic,12
222 | United States,Australia,258
223 | United States,Cook Islands,13
224 | Austria,United States,62
225 | Jordan,United States,44
226 | Palau,United States,30
227 | South Korea,United States,1048
228 | Angola,United States,15
229 | Ghana,United States,18
230 | New Caledonia,United States,1
231 | Guadeloupe,United States,56
232 | France,United States,935
233 | Poland,United States,32
234 | Nigeria,United States,59
235 | United States,Uruguay,13
236 | Greenland,United States,2
237 | United States,Bermuda,193
238 | Chile,United States,174
239 | United States,Cuba,478
240 | United States,Montenegro,1
241 | United States,Colombia,867
242 | United States,Barbados,130
243 | United States,Qatar,109
244 | Australia,United States,329
245 | United States,Cayman Islands,310
246 | United States,Jordan,44
247 | United States,Namibia,1
248 | United States,Trinidad and Tobago,217
249 | United States,Bolivia,13
250 | Cook Islands,United States,13
251 | Bulgaria,United States,3
252 | United States,Saint Kitts and Nevis,145
253 | Uruguay,United States,43
254 | United States,Haiti,225
255 | Bonaire, Sint Eustatius, and Saba,United States,58
256 | Greece,United States,30
257 |
--------------------------------------------------------------------------------
/data/flight-data/csv/2010-summary.csv:
--------------------------------------------------------------------------------
1 | DEST_COUNTRY_NAME,ORIGIN_COUNTRY_NAME,count
2 | United States,Romania,1
3 | United States,Ireland,264
4 | United States,India,69
5 | Egypt,United States,24
6 | Equatorial Guinea,United States,1
7 | United States,Singapore,25
8 | United States,Grenada,54
9 | Costa Rica,United States,477
10 | Senegal,United States,29
11 | United States,Marshall Islands,44
12 | Guyana,United States,17
13 | United States,Sint Maarten,53
14 | Malta,United States,1
15 | Bolivia,United States,46
16 | Anguilla,United States,21
17 | Turks and Caicos Islands,United States,136
18 | United States,Afghanistan,2
19 | Saint Vincent and the Grenadines,United States,1
20 | Italy,United States,390
21 | United States,Russia,156
22 | United States,Federated States of Micronesia,48
23 | Pakistan,United States,9
24 | United States,Netherlands,570
25 | Iceland,United States,118
26 | Marshall Islands,United States,77
27 | Luxembourg,United States,91
28 | Honduras,United States,391
29 | The Bahamas,United States,903
30 | El Salvador,United States,519
31 | United States,Senegal,46
32 | Samoa,United States,28
33 | United States,Angola,18
34 | Kazakhstan,United States,3
35 | Switzerland,United States,315
36 | United States,Anguilla,20
37 | Sint Maarten,United States,61
38 | Hong Kong,United States,252
39 | Trinidad and Tobago,United States,187
40 | Latvia,United States,12
41 | United States,Ecuador,345
42 | Slovakia,United States,1
43 | Suriname,United States,12
44 | Mexico,United States,6200
45 | United States,Cyprus,1
46 | Ecuador,United States,272
47 | United States,Bosnia and Herzegovina,1
48 | United States,Portugal,104
49 | United States,Costa Rica,501
50 | United States,Guatemala,333
51 | United States,Suriname,13
52 | Colombia,United States,785
53 | United States,Cape Verde,18
54 | United States,Jamaica,757
55 | United States,Malaysia,3
56 | Norway,United States,29
57 | United States,Morocco,16
58 | Thailand,United States,16
59 | Venezuela,United States,377
60 | United States,Palau,30
61 | United States,Samoa,28
62 | United States,Algeria,1
63 | United States,Venezuela,341
64 | Panama,United States,355
65 | United States,Finland,20
66 | United States,Chile,176
67 | Morocco,United States,16
68 | United States,Greece,61
69 | Antigua and Barbuda,United States,123
70 | Azerbaijan,United States,1
71 | United States,The Bahamas,959
72 | New Zealand,United States,86
73 | Liberia,United States,1
74 | United States,Hong Kong,293
75 | Hungary,United States,14
76 | United States,China,505
77 | United States,Vietnam,1
78 | Sweden,United States,65
79 | United States,Kuwait,25
80 | United States,Dominican Republic,1150
81 | United States,Egypt,25
82 | Israel,United States,117
83 | United States,United States,348113
84 | United States,Luxembourg,90
85 | Ethiopia,United States,12
86 | United States,Poland,61
87 | Martinique,United States,23
88 | Saint Barthelemy,United States,28
89 | United States,Saint Barthelemy,29
90 | United States,Turkey,87
91 | Barbados,United States,130
92 | United States,Estonia,1
93 | United States,Azerbaijan,1
94 | Germany,United States,1392
95 | Kyrgyzstan,United States,2
96 | United States,South Korea,621
97 | United States,El Salvador,464
98 | Ireland,United States,231
99 | United States,Hungary,15
100 | United States,Serbia,1
101 | Malaysia,United States,1
102 | United States,Panama,363
103 | United States,Ethiopia,12
104 | United States,Aruba,349
105 | United States,Thailand,13
106 | United States,Netherlands Antilles,289
107 | United States,Turks and Caicos Islands,147
108 | Cyprus,United States,2
109 | United States,Pakistan,15
110 | United States,Honduras,393
111 | Qatar,United States,41
112 | Fiji,United States,53
113 | Saint Kitts and Nevis,United States,113
114 | Taiwan,United States,275
115 | Haiti,United States,238
116 | Kuwait,United States,26
117 | Canada,United States,8271
118 | Federated States of Micronesia,United States,46
119 | United States,Liberia,1
120 | Jamaica,United States,733
121 | United States,Malta,2
122 | Dominican Republic,United States,1109
123 | Japan,United States,1383
124 | United States,Saint Vincent and the Grenadines,16
125 | United States,Guadeloupe,33
126 | Finland,United States,24
127 | United States,Ukraine,17
128 | United States,France,776
129 | United States,Norway,36
130 | Aruba,United States,359
131 | French Guiana,United States,4
132 | United States,Kiribati,18
133 | India,United States,66
134 | British Virgin Islands,United States,49
135 | United States,Germany,1406
136 | Brazil,United States,995
137 | United States,New Zealand,71
138 | French Polynesia,United States,38
139 | United Arab Emirates,United States,165
140 | Singapore,United States,25
141 | United States,Mexico,6220
142 | Netherlands,United States,586
143 | United States,Sweden,73
144 | United States,Gabon,1
145 | United States,Martinique,20
146 | United States,United Arab Emirates,156
147 | United States,Bulgaria,2
148 | China,United States,448
149 | Denmark,United States,98
150 | United States,Philippines,116
151 | United States,Nicaragua,181
152 | United States,Belgium,355
153 | Peru,United States,212
154 | Argentina,United States,184
155 | Cayman Islands,United States,247
156 | South Africa,United States,25
157 | United States,Iceland,129
158 | United States,Argentina,159
159 | Spain,United States,422
160 | Netherlands Antilles,United States,286
161 | Bermuda,United States,183
162 | United States,Austria,34
163 | United States,Nigeria,18
164 | United States,"Bonaire, Sint Eustatius, and Saba",16
165 | Kiribati,United States,17
166 | Saudi Arabia,United States,42
167 | Czech Republic,United States,21
168 | Belgium,United States,408
169 | United States,Israel,122
170 | United States,Bahrain,1
171 | United States,Saint Lucia,121
172 | Afghanistan,United States,11
173 | United States,British Virgin Islands,47
174 | Curacao,United States,20
175 | Georgia,United States,2
176 | United States,Guyana,20
177 | United States,Denmark,107
178 | Philippines,United States,132
179 | Grenada,United States,65
180 | Cape Verde,United States,18
181 | Ukraine,United States,19
182 | United States,Papua New Guinea,1
183 | United States,Saudi Arabia,54
184 | Russia,United States,152
185 | Guatemala,United States,386
186 | Saint Lucia,United States,116
187 | Paraguay,United States,90
188 | United States,Curacao,14
189 | United States,Taiwan,252
190 | United States,South Africa,15
191 | Turkey,United States,75
192 | United Kingdom,United States,1629
193 | United States,Greenland,2
194 | United States,Spain,442
195 | Cuba,United States,243
196 | United States,Italy,433
197 | United States,Switzerland,334
198 | Dominica,United States,28
199 | United States,Japan,1307
200 | Portugal,United States,102
201 | United States,Brazil,578
202 | Bahrain,United States,30
203 | United States,Peru,284
204 | Vietnam,United States,1
205 | United States,United Kingdom,1503
206 | United States,Belize,118
207 | Belize,United States,121
208 | United States,Ghana,28
209 | United States,Indonesia,1
210 | United States,Fiji,51
211 | United States,Canada,8305
212 | United States,French Polynesia,38
213 | United States,Antigua and Barbuda,121
214 | United States,Burkina Faso,1
215 | Nicaragua,United States,178
216 | United States,Latvia,13
217 | United States,Dominica,32
218 | United States,Czech Republic,22
219 | United States,Australia,216
220 | Austria,United States,36
221 | United States,Cook Islands,12
222 | Jordan,United States,50
223 | Palau,United States,31
224 | Uganda,United States,2
225 | United States,Cameroon,1
226 | South Korea,United States,683
227 | Angola,United States,14
228 | Ghana,United States,27
229 | Guadeloupe,United States,40
230 | France,United States,774
231 | United States,Kyrgyzstan,12
232 | Poland,United States,62
233 | Nigeria,United States,16
234 | United States,Uruguay,24
235 | Greenland,United States,2
236 | Chile,United States,166
237 | United States,Bermuda,180
238 | United States,Cuba,235
239 | United States,Colombia,832
240 | United States,Qatar,40
241 | United States,Barbados,119
242 | Australia,United States,290
243 | United States,Jordan,51
244 | United States,Cayman Islands,251
245 | United States,Trinidad and Tobago,200
246 | United States,Bolivia,39
247 | United States,Slovakia,1
248 | Uruguay,United States,54
249 | Cook Islands,United States,13
250 | United States,Saint Kitts and Nevis,127
251 | Bulgaria,United States,1
252 | United States,French Guiana,1
253 | United States,Haiti,226
254 | United States,Uganda,1
255 | "Bonaire, Sint Eustatius, and Saba",United States,16
256 | Greece,United States,50
257 |
--------------------------------------------------------------------------------
/data/flight-data/csv/2011-summary.csv:
--------------------------------------------------------------------------------
1 | DEST_COUNTRY_NAME,ORIGIN_COUNTRY_NAME,count
2 | United States,Saint Martin,2
3 | United States,Guinea,2
4 | United States,Croatia,1
5 | United States,Romania,3
6 | United States,Ireland,268
7 | Egypt,United States,13
8 | United States,India,76
9 | United States,Singapore,24
10 | United States,Grenada,59
11 | Costa Rica,United States,494
12 | Senegal,United States,29
13 | Guyana,United States,26
14 | United States,Marshall Islands,49
15 | United States,Sint Maarten,223
16 | Malta,United States,1
17 | Bolivia,United States,61
18 | Anguilla,United States,21
19 | United States,Paraguay,3
20 | United States,Gibraltar,1
21 | Turks and Caicos Islands,United States,163
22 | United States,Afghanistan,3
23 | Saint Vincent and the Grenadines,United States,6
24 | Italy,United States,428
25 | United States,Russia,153
26 | United States,Federated States of Micronesia,54
27 | Pakistan,United States,12
28 | Iceland,United States,113
29 | United States,Netherlands,622
30 | Marshall Islands,United States,81
31 | Luxembourg,United States,120
32 | Honduras,United States,386
33 | The Bahamas,United States,811
34 | El Salvador,United States,495
35 | United States,Senegal,33
36 | Samoa,United States,25
37 | United States,Angola,12
38 | Switzerland,United States,314
39 | United States,Anguilla,20
40 | Sint Maarten,United States,240
41 | Hong Kong,United States,282
42 | Trinidad and Tobago,United States,205
43 | Latvia,United States,12
44 | Yemen,United States,1
45 | United States,Ecuador,351
46 | Suriname,United States,11
47 | Mexico,United States,5953
48 | United States,Cyprus,1
49 | Ecuador,United States,280
50 | United States,Portugal,109
51 | United States,Guatemala,340
52 | United States,Costa Rica,526
53 | United States,Suriname,20
54 | Colombia,United States,707
55 | United States,Cape Verde,19
56 | United States,Jamaica,695
57 | Norway,United States,25
58 | United States,Malaysia,1
59 | The Gambia,United States,1
60 | United States,Morocco,17
61 | Thailand,United States,12
62 | United States,Palau,41
63 | Venezuela,United States,373
64 | United States,Samoa,25
65 | United States,Algeria,1
66 | Panama,United States,397
67 | United States,Venezuela,352
68 | Guinea,United States,5
69 | Antigua and Barbuda,United States,146
70 | United States,Finland,32
71 | United States,Chile,182
72 | United States,Greece,45
73 | Morocco,United States,19
74 | United States,The Bahamas,888
75 | United States,Brunei,1
76 | New Zealand,United States,96
77 | United States,Hong Kong,317
78 | Hungary,United States,13
79 | United States,China,605
80 | Sweden,United States,59
81 | United States,Kuwait,27
82 | United States,Dominican Republic,1100
83 | United States,Egypt,15
84 | Israel,United States,121
85 | United States,United States,352742
86 | United States,Luxembourg,170
87 | Ethiopia,United States,12
88 | United States,Poland,42
89 | Martinique,United States,20
90 | Saint Barthelemy,United States,38
91 | United States,Saint Barthelemy,39
92 | Barbados,United States,129
93 | United States,Turkey,99
94 | United States,Azerbaijan,3
95 | Germany,United States,1423
96 | United States,South Korea,661
97 | United States,El Salvador,443
98 | Ireland,United States,250
99 | United States,Hungary,17
100 | Malaysia,United States,2
101 | United States,Panama,380
102 | United States,Ethiopia,12
103 | United States,Thailand,14
104 | United States,Aruba,332
105 | United States,Turks and Caicos Islands,177
106 | Croatia,United States,2
107 | United States,Pakistan,24
108 | United States,Honduras,388
109 | Qatar,United States,48
110 | Fiji,United States,49
111 | Saint Kitts and Nevis,United States,105
112 | Taiwan,United States,248
113 | Haiti,United States,188
114 | Kuwait,United States,27
115 | Canada,United States,8514
116 | Belarus,United States,1
117 | Federated States of Micronesia,United States,48
118 | Jamaica,United States,675
119 | United States,Malta,3
120 | Dominican Republic,United States,1020
121 | Japan,United States,1532
122 | United States,Lithuania,1
123 | United States,Saint Vincent and the Grenadines,17
124 | Finland,United States,33
125 | United States,Ukraine,15
126 | United States,Guadeloupe,31
127 | United States,France,914
128 | Aruba,United States,347
129 | French Guiana,United States,14
130 | United States,Norway,33
131 | United States,Kiribati,28
132 | India,United States,73
133 | British Virgin Islands,United States,73
134 | United States,Germany,1480
135 | Brazil,United States,969
136 | United States,New Zealand,77
137 | French Polynesia,United States,36
138 | United Arab Emirates,United States,157
139 | Singapore,United States,27
140 | United States,Mexico,6000
141 | Netherlands,United States,635
142 | United States,Sweden,68
143 | United States,Martinique,20
144 | United States,United Arab Emirates,142
145 | China,United States,504
146 | United States,Philippines,109
147 | United States,Nicaragua,173
148 | Denmark,United States,95
149 | United States,Georgia,1
150 | United States,Belgium,369
151 | Libya,United States,1
152 | Argentina,United States,183
153 | Peru,United States,246
154 | Cayman Islands,United States,251
155 | South Africa,United States,24
156 | United States,Argentina,152
157 | United States,Iceland,138
158 | Spain,United States,445
159 | Bermuda,United States,191
160 | United States,Austria,34
161 | United States,Nigeria,26
162 | United States,"Bonaire, Sint Eustatius, and Saba",49
163 | Kiribati,United States,28
164 | Czech Republic,United States,23
165 | Saudi Arabia,United States,83
166 | Macau,United States,2
167 | Belgium,United States,376
168 | United States,Israel,126
169 | United States,Saint Lucia,102
170 | United States,Bahrain,2
171 | United States,British Virgin Islands,65
172 | Afghanistan,United States,8
173 | Curacao,United States,106
174 | Georgia,United States,1
175 | United States,Guyana,29
176 | Philippines,United States,127
177 | United States,Denmark,103
178 | Grenada,United States,67
179 | Cape Verde,United States,13
180 | Ukraine,United States,15
181 | United States,Papua New Guinea,1
182 | Russia,United States,199
183 | United States,Saudi Arabia,77
184 | Guatemala,United States,407
185 | Saint Lucia,United States,102
186 | Paraguay,United States,85
187 | United States,Curacao,97
188 | United States,Taiwan,231
189 | United States,South Africa,21
190 | Togo,United States,1
191 | Turkey,United States,104
192 | United Kingdom,United States,1726
193 | Romania,United States,4
194 | United States,Greenland,2
195 | United States,Spain,472
196 | Cuba,United States,320
197 | United States,Italy,447
198 | United States,Switzerland,319
199 | Dominica,United States,25
200 | United States,Japan,1412
201 | Portugal,United States,95
202 | United States,Brazil,575
203 | Bahrain,United States,40
204 | United States,Peru,288
205 | Vietnam,United States,1
206 | United States,United Kingdom,1649
207 | United States,Belize,112
208 | Belize,United States,113
209 | United States,Ghana,39
210 | Mauritania,United States,1
211 | Saint Martin,United States,1
212 | United States,Indonesia,1
213 | United States,Fiji,48
214 | United States,Mauritania,1
215 | United States,Canada,8650
216 | United States,Antigua and Barbuda,152
217 | United States,French Polynesia,36
218 | Nicaragua,United States,180
219 | Namibia,United States,1
220 | United States,Latvia,12
221 | United States,Dominica,29
222 | United States,Czech Republic,25
223 | United States,Australia,231
224 | United States,Cook Islands,12
225 | Austria,United States,34
226 | Palau,United States,41
227 | Jordan,United States,42
228 | South Korea,United States,713
229 | Ghana,United States,39
230 | Angola,United States,13
231 | Guadeloupe,United States,33
232 | France,United States,876
233 | Nigeria,United States,31
234 | Poland,United States,39
235 | United States,Uruguay,27
236 | Greenland,United States,1
237 | Chile,United States,178
238 | United States,Bermuda,196
239 | Lebanon,United States,1
240 | United States,Cuba,326
241 | United States,Colombia,777
242 | United States,Barbados,125
243 | United States,Qatar,48
244 | Australia,United States,280
245 | United States,Cayman Islands,262
246 | United States,Jordan,45
247 | United States,Bolivia,51
248 | United States,Trinidad and Tobago,213
249 | Uruguay,United States,50
250 | Cook Islands,United States,12
251 | United States,Saint Kitts and Nevis,120
252 | Bulgaria,United States,4
253 | United States,French Guiana,11
254 | United States,Haiti,197
255 | "Bonaire, Sint Eustatius, and Saba",United States,50
256 | Greece,United States,38
257 |
--------------------------------------------------------------------------------
/data/flight-data/csv/2012-summary.csv:
--------------------------------------------------------------------------------
1 | DEST_COUNTRY_NAME,ORIGIN_COUNTRY_NAME,count
2 | United States,Croatia,1
3 | United States,Ireland,252
4 | Egypt,United States,13
5 | United States,India,62
6 | United States,Singapore,25
7 | United States,Grenada,46
8 | Costa Rica,United States,522
9 | Senegal,United States,31
10 | Guyana,United States,65
11 | United States,Marshall Islands,30
12 | United States,Sint Maarten,245
13 | Bolivia,United States,35
14 | Anguilla,United States,19
15 | United States,Paraguay,5
16 | United States,Afghanistan,5
17 | Turks and Caicos Islands,United States,183
18 | Saint Vincent and the Grenadines,United States,6
19 | Italy,United States,381
20 | Pakistan,United States,12
21 | United States,Russia,148
22 | United States,Federated States of Micronesia,63
23 | United States,Netherlands,607
24 | Iceland,United States,137
25 | Marshall Islands,United States,60
26 | United States,Togo,1
27 | Luxembourg,United States,111
28 | Honduras,United States,413
29 | The Bahamas,United States,975
30 | El Salvador,United States,539
31 | United States,Senegal,33
32 | United States,Solomon Islands,1
33 | United States,Angola,12
34 | Samoa,United States,25
35 | Kazakhstan,United States,1
36 | Switzerland,United States,285
37 | United States,Anguilla,19
38 | Sint Maarten,United States,243
39 | Hong Kong,United States,296
40 | Trinidad and Tobago,United States,216
41 | Latvia,United States,13
42 | United States,Ecuador,321
43 | Suriname,United States,14
44 | Mexico,United States,5983
45 | Ecuador,United States,239
46 | United States,Portugal,111
47 | United States,Costa Rica,555
48 | United States,Guatemala,345
49 | United States,Suriname,26
50 | Colombia,United States,759
51 | United States,The Gambia,1
52 | United States,Cape Verde,18
53 | United States,Jamaica,610
54 | Norway,United States,31
55 | The Gambia,United States,2
56 | United States,Morocco,16
57 | Thailand,United States,6
58 | Venezuela,United States,389
59 | United States,Samoa,25
60 | United States,Palau,35
61 | United States,Venezuela,360
62 | Panama,United States,394
63 | United States,Chile,201
64 | United States,Finland,21
65 | Antigua and Barbuda,United States,145
66 | Morocco,United States,15
67 | United States,Greece,21
68 | United States,The Bahamas,1054
69 | New Zealand,United States,92
70 | United States,Hong Kong,331
71 | Hungary,United States,2
72 | United States,Tunisia,1
73 | United States,China,610
74 | Sweden,United States,50
75 | United States,Kuwait,28
76 | United States,Dominican Republic,1123
77 | United States,Egypt,12
78 | Israel,United States,117
79 | United States,United States,347452
80 | United States,Luxembourg,123
81 | Ethiopia,United States,12
82 | United States,Poland,40
83 | Martinique,United States,17
84 | United States,Saint Barthelemy,49
85 | Saint Barthelemy,United States,49
86 | Barbados,United States,121
87 | United States,Azerbaijan,1
88 | United States,Turkey,89
89 | Germany,United States,1404
90 | Kyrgyzstan,United States,1
91 | United States,South Korea,703
92 | United States,El Salvador,465
93 | Ireland,United States,255
94 | United States,Panama,393
95 | United States,Ethiopia,12
96 | United States,Thailand,6
97 | United States,Aruba,332
98 | United States,Turks and Caicos Islands,183
99 | Croatia,United States,1
100 | United States,Pakistan,16
101 | United States,Honduras,372
102 | Qatar,United States,56
103 | Fiji,United States,28
104 | Saint Kitts and Nevis,United States,109
105 | Taiwan,United States,229
106 | Haiti,United States,224
107 | Kuwait,United States,26
108 | Canada,United States,8034
109 | Belarus,United States,1
110 | Federated States of Micronesia,United States,54
111 | Jamaica,United States,617
112 | Dominican Republic,United States,1078
113 | Japan,United States,1538
114 | United States,Saint Vincent and the Grenadines,11
115 | Finland,United States,22
116 | United States,Ukraine,13
117 | United States,Guadeloupe,34
118 | United States,France,840
119 | United States,Norway,33
120 | Aruba,United States,349
121 | French Guiana,United States,8
122 | United States,Kiribati,26
123 | India,United States,61
124 | British Virgin Islands,United States,83
125 | Brazil,United States,979
126 | United States,Germany,1413
127 | United States,New Zealand,65
128 | United States,Cambodia,1
129 | French Polynesia,United States,39
130 | United Arab Emirates,United States,188
131 | Singapore,United States,26
132 | United States,Mexico,5974
133 | Netherlands,United States,640
134 | United States,Sweden,54
135 | United States,Martinique,17
136 | United States,United Arab Emirates,176
137 | China,United States,509
138 | United States,Philippines,112
139 | Denmark,United States,83
140 | United States,Nicaragua,183
141 | Oman,United States,1
142 | United States,Belgium,275
143 | Argentina,United States,208
144 | Peru,United States,249
145 | Cayman Islands,United States,258
146 | South Africa,United States,24
147 | United States,Argentina,170
148 | United States,Iceland,150
149 | Cameroon,United States,1
150 | Spain,United States,390
151 | Bermuda,United States,190
152 | United States,Austria,31
153 | United States,Nigeria,44
154 | United States,"Bonaire, Sint Eustatius, and Saba",44
155 | Kiribati,United States,26
156 | Saudi Arabia,United States,72
157 | Czech Republic,United States,15
158 | Macau,United States,1
159 | United States,Bahrain,2
160 | Belgium,United States,352
161 | United States,Israel,117
162 | United States,Saint Lucia,84
163 | Sierra Leone,United States,1
164 | Afghanistan,United States,5
165 | United States,British Virgin Islands,78
166 | Curacao,United States,107
167 | Georgia,United States,2
168 | United States,Guyana,79
169 | Philippines,United States,136
170 | United States,Denmark,82
171 | Grenada,United States,51
172 | Cape Verde,United States,19
173 | Ukraine,United States,14
174 | United States,Papua New Guinea,2
175 | Russia,United States,183
176 | United States,Saudi Arabia,70
177 | Guatemala,United States,413
178 | Saint Lucia,United States,81
179 | Paraguay,United States,85
180 | United States,Curacao,96
181 | United States,Taiwan,214
182 | Tunisia,United States,2
183 | United States,South Africa,29
184 | Turkey,United States,94
185 | United Kingdom,United States,1852
186 | Solomon Islands,United States,2
187 | United States,Greenland,1
188 | United States,Spain,393
189 | United States,Italy,377
190 | Cuba,United States,340
191 | United States,Switzerland,282
192 | Dominica,United States,26
193 | United States,Japan,1451
194 | Portugal,United States,112
195 | Bahrain,United States,65
196 | United States,Brazil,567
197 | United States,Peru,292
198 | Vietnam,United States,2
199 | United States,United Kingdom,1713
200 | United States,Belize,108
201 | Belize,United States,113
202 | United States,Ghana,31
203 | Mauritania,United States,1
204 | United States,Fiji,26
205 | United States,Canada,8097
206 | United States,French Polynesia,38
207 | United States,Antigua and Barbuda,146
208 | Nicaragua,United States,174
209 | United States,Latvia,14
210 | United States,Dominica,25
211 | United States,Czech Republic,13
212 | United States,Australia,211
213 | United States,Cook Islands,12
214 | Austria,United States,34
215 | Jordan,United States,41
216 | Palau,United States,35
217 | United States,Equatorial Guinea,4
218 | South Korea,United States,819
219 | Angola,United States,12
220 | Ghana,United States,30
221 | New Caledonia,United States,1
222 | Guadeloupe,United States,36
223 | France,United States,818
224 | Poland,United States,38
225 | Nigeria,United States,57
226 | United States,Uruguay,17
227 | Greenland,United States,1
228 | Chile,United States,178
229 | United States,Bermuda,194
230 | United States,Cuba,341
231 | United States,Colombia,806
232 | United States,Barbados,109
233 | United States,Qatar,56
234 | Australia,United States,277
235 | United States,Cayman Islands,247
236 | United States,Jordan,43
237 | United States,Trinidad and Tobago,213
238 | United States,Bolivia,21
239 | Uruguay,United States,53
240 | Cook Islands,United States,12
241 | United States,Saint Kitts and Nevis,124
242 | Bulgaria,United States,6
243 | United States,French Guiana,1
244 | United States,Haiti,234
245 | "Bonaire, Sint Eustatius, and Saba",United States,46
246 | Greece,United States,17
247 |
--------------------------------------------------------------------------------
/data/flight-data/csv/2013-summary.csv:
--------------------------------------------------------------------------------
1 | DEST_COUNTRY_NAME,ORIGIN_COUNTRY_NAME,count
2 | United States,Romania,12
3 | United States,Croatia,1
4 | United States,Ireland,266
5 | Egypt,United States,13
6 | United States,India,60
7 | Equatorial Guinea,United States,1
8 | United States,Niger,1
9 | United States,Singapore,22
10 | United States,Grenada,40
11 | Costa Rica,United States,509
12 | Senegal,United States,28
13 | Guyana,United States,34
14 | United States,Sint Maarten,260
15 | United States,Marshall Islands,33
16 | Bolivia,United States,33
17 | Anguilla,United States,22
18 | United States,Paraguay,15
19 | Algeria,United States,2
20 | Turks and Caicos Islands,United States,181
21 | Saint Vincent and the Grenadines,United States,4
22 | Lithuania,United States,1
23 | Pakistan,United States,14
24 | Italy,United States,327
25 | United States,Russia,167
26 | United States,Federated States of Micronesia,62
27 | United States,Netherlands,618
28 | Iceland,United States,146
29 | Marshall Islands,United States,50
30 | Luxembourg,United States,160
31 | Honduras,United States,423
32 | Rwanda,United States,1
33 | The Bahamas,United States,865
34 | El Salvador,United States,551
35 | United States,Senegal,32
36 | Samoa,United States,25
37 | United States,Angola,12
38 | Kazakhstan,United States,2
39 | Switzerland,United States,288
40 | United States,Anguilla,18
41 | United States,Mali,1
42 | Sint Maarten,United States,271
43 | Hong Kong,United States,282
44 | Trinidad and Tobago,United States,183
45 | Latvia,United States,29
46 | United States,Ecuador,307
47 | Suriname,United States,15
48 | Mexico,United States,6372
49 | Ecuador,United States,252
50 | United States,Portugal,126
51 | United States,Costa Rica,549
52 | United States,Guatemala,331
53 | United States,Suriname,21
54 | Colombia,United States,798
55 | United States,Cape Verde,15
56 | Norway,United States,48
57 | United States,Jamaica,599
58 | United States,Malaysia,1
59 | United States,Morocco,16
60 | Thailand,United States,2
61 | Burundi,United States,1
62 | Venezuela,United States,350
63 | United States,Samoa,25
64 | United States,Palau,35
65 | United States,Venezuela,295
66 | Panama,United States,418
67 | United States,Finland,15
68 | Morocco,United States,18
69 | Antigua and Barbuda,United States,123
70 | United States,Chile,171
71 | Azerbaijan,United States,3
72 | United States,Greece,14
73 | United States,The Bahamas,897
74 | New Zealand,United States,114
75 | United States,Hong Kong,344
76 | Hungary,United States,1
77 | United States,China,614
78 | United States,Kuwait,24
79 | Sweden,United States,70
80 | United States,Egypt,12
81 | United States,Dominican Republic,1119
82 | Israel,United States,119
83 | United States,United States,343132
84 | United States,Luxembourg,134
85 | Ethiopia,United States,12
86 | United States,Poland,37
87 | Martinique,United States,26
88 | United States,Zimbabwe,2
89 | United States,Saint Barthelemy,58
90 | Saint Barthelemy,United States,67
91 | United States,Turkey,100
92 | Barbados,United States,97
93 | Germany,United States,1423
94 | Kyrgyzstan,United States,1
95 | United States,South Korea,687
96 | United States,El Salvador,466
97 | Ireland,United States,255
98 | Malaysia,United States,2
99 | United States,Aruba,344
100 | United States,Panama,419
101 | United States,Ethiopia,12
102 | United States,Turks and Caicos Islands,178
103 | United States,Pakistan,12
104 | Cyprus,United States,2
105 | United States,Honduras,392
106 | Fiji,United States,34
107 | Qatar,United States,71
108 | Saint Kitts and Nevis,United States,99
109 | Kuwait,United States,24
110 | Haiti,United States,199
111 | Taiwan,United States,243
112 | Canada,United States,7860
113 | Federated States of Micronesia,United States,53
114 | Jamaica,United States,581
115 | Dominican Republic,United States,1068
116 | Japan,United States,1613
117 | United States,Saint Vincent and the Grenadines,10
118 | Finland,United States,17
119 | United States,Ukraine,4
120 | United States,Guadeloupe,42
121 | United States,France,848
122 | United States,Norway,46
123 | Aruba,United States,352
124 | French Guiana,United States,7
125 | United States,Kiribati,29
126 | Zimbabwe,United States,2
127 | India,United States,61
128 | British Virgin Islands,United States,101
129 | United States,Germany,1402
130 | United States,New Zealand,77
131 | Brazil,United States,912
132 | "Saint Helena, Ascension, and Tristan da Cunha",United States,1
133 | United States,Cambodia,3
134 | French Polynesia,United States,43
135 | United Arab Emirates,United States,202
136 | Singapore,United States,21
137 | Tanzania,United States,2
138 | United States,Mexico,6354
139 | Netherlands,United States,640
140 | United States,Sweden,73
141 | United States,Martinique,24
142 | United States,United Arab Emirates,187
143 | Denmark,United States,104
144 | China,United States,543
145 | United States,Nicaragua,185
146 | United States,Philippines,112
147 | United States,Belgium,193
148 | Nepal,United States,1
149 | Libya,United States,1
150 | Argentina,United States,176
151 | Peru,United States,255
152 | Cayman Islands,United States,274
153 | South Africa,United States,24
154 | United States,Iceland,151
155 | United States,Argentina,140
156 | Spain,United States,370
157 | Bermuda,United States,163
158 | United States,Nigeria,49
159 | United States,Austria,39
160 | United States,"Bonaire, Sint Eustatius, and Saba",54
161 | Kiribati,United States,29
162 | Saudi Arabia,United States,86
163 | Czech Republic,United States,10
164 | Macau,United States,2
165 | Belgium,United States,265
166 | United States,Israel,107
167 | United States,Saint Lucia,92
168 | Sierra Leone,United States,1
169 | United States,Bahrain,1
170 | United States,British Virgin Islands,94
171 | United States,"Saint Helena, Ascension, and Tristan da Cunha",2
172 | Curacao,United States,97
173 | United States,Denmark,106
174 | United States,Guyana,52
175 | Philippines,United States,136
176 | Grenada,United States,42
177 | Cape Verde,United States,21
178 | Ukraine,United States,4
179 | Russia,United States,194
180 | United States,Saudi Arabia,84
181 | Guatemala,United States,376
182 | Saint Lucia,United States,91
183 | Paraguay,United States,75
184 | United States,Curacao,89
185 | United States,Taiwan,235
186 | United States,South Africa,26
187 | Niger,United States,1
188 | Turkey,United States,99
189 | United Kingdom,United States,1802
190 | Romania,United States,14
191 | United States,Greenland,3
192 | Papua New Guinea,United States,2
193 | United States,Spain,377
194 | United States,Italy,333
195 | Cuba,United States,337
196 | United States,Switzerland,285
197 | Dominica,United States,19
198 | Portugal,United States,116
199 | United States,Japan,1476
200 | United States,Brazil,510
201 | Bahrain,United States,50
202 | United States,Peru,300
203 | Indonesia,United States,1
204 | United States,United Kingdom,1711
205 | United States,Belize,127
206 | Belize,United States,137
207 | Kenya,United States,1
208 | United States,Ghana,16
209 | United States,Fiji,30
210 | United States,Canada,7983
211 | United States,French Polynesia,40
212 | United States,Antigua and Barbuda,123
213 | Brunei,United States,1
214 | Nicaragua,United States,178
215 | United States,Latvia,12
216 | United States,Dominica,25
217 | United States,Czech Republic,9
218 | United States,Cook Islands,12
219 | Austria,United States,35
220 | United States,Australia,222
221 | Jordan,United States,74
222 | Palau,United States,35
223 | South Korea,United States,842
224 | Angola,United States,12
225 | Ghana,United States,16
226 | New Caledonia,United States,1
227 | Guadeloupe,United States,41
228 | France,United States,837
229 | Poland,United States,40
230 | Nigeria,United States,58
231 | United States,Uruguay,20
232 | Greenland,United States,1
233 | Chile,United States,147
234 | United States,Bermuda,166
235 | Lebanon,United States,1
236 | United States,Cuba,336
237 | United States,Colombia,811
238 | United States,Qatar,72
239 | United States,Barbados,78
240 | Australia,United States,295
241 | United States,Cayman Islands,274
242 | United States,Jordan,77
243 | United States,Trinidad and Tobago,184
244 | United States,Bolivia,13
245 | Uruguay,United States,57
246 | Cook Islands,United States,12
247 | United States,Saint Kitts and Nevis,115
248 | United States,French Guiana,3
249 | United States,Haiti,186
250 | "Bonaire, Sint Eustatius, and Saba",United States,53
251 | Greece,United States,12
252 |
--------------------------------------------------------------------------------
/data/flight-data/csv/2014-summary.csv:
--------------------------------------------------------------------------------
1 | DEST_COUNTRY_NAME,ORIGIN_COUNTRY_NAME,count
2 | United States,Saint Martin,1
3 | United States,Romania,12
4 | United States,Croatia,2
5 | United States,Ireland,291
6 | United States,India,62
7 | Egypt,United States,11
8 | United States,Grenada,47
9 | Costa Rica,United States,529
10 | Senegal,United States,35
11 | United States,Sint Maarten,290
12 | Guyana,United States,52
13 | United States,Marshall Islands,35
14 | Malta,United States,2
15 | Malawi,United States,1
16 | Bolivia,United States,33
17 | Anguilla,United States,34
18 | Algeria,United States,9
19 | United States,Paraguay,14
20 | Gibraltar,United States,1
21 | Turks and Caicos Islands,United States,193
22 | Saint Vincent and the Grenadines,United States,1
23 | United States,Russia,151
24 | Italy,United States,366
25 | Pakistan,United States,12
26 | United States,Federated States of Micronesia,71
27 | Iceland,United States,150
28 | United States,Netherlands,702
29 | Marshall Islands,United States,46
30 | Luxembourg,United States,161
31 | Honduras,United States,411
32 | The Bahamas,United States,950
33 | El Salvador,United States,572
34 | United States,Senegal,28
35 | Samoa,United States,24
36 | United States,Angola,12
37 | Kazakhstan,United States,1
38 | Switzerland,United States,302
39 | United States,Anguilla,35
40 | Sint Maarten,United States,290
41 | Hong Kong,United States,338
42 | United States,Congo (Brazaville),1
43 | Latvia,United States,40
44 | Trinidad and Tobago,United States,188
45 | United States,Ecuador,326
46 | Slovakia,United States,1
47 | Suriname,United States,10
48 | Mexico,United States,6427
49 | Ecuador,United States,256
50 | United States,Portugal,122
51 | United States,Guatemala,327
52 | United States,Costa Rica,560
53 | United States,Suriname,27
54 | Colombia,United States,872
55 | United States,Cape Verde,16
56 | United States,Jamaica,714
57 | Norway,United States,86
58 | United States,Morocco,15
59 | Venezuela,United States,335
60 | United States,Samoa,25
61 | United States,Palau,38
62 | United States,Venezuela,258
63 | Panama,United States,456
64 | United States,Chile,168
65 | United States,Finland,19
66 | Antigua and Barbuda,United States,115
67 | Morocco,United States,18
68 | United States,Greece,19
69 | Azerbaijan,United States,7
70 | United States,The Bahamas,991
71 | New Zealand,United States,114
72 | Liberia,United States,3
73 | United States,Hong Kong,381
74 | Hungary,United States,2
75 | United States,Tunisia,1
76 | United States,China,767
77 | Burkina Faso,United States,2
78 | Sweden,United States,94
79 | United States,Kuwait,24
80 | United States,Dominican Republic,1282
81 | United States,Egypt,11
82 | Israel,United States,108
83 | United States,United States,358354
84 | United States,Luxembourg,115
85 | Ethiopia,United States,11
86 | United States,Poland,33
87 | Martinique,United States,31
88 | United States,Saint Barthelemy,53
89 | Saint Barthelemy,United States,53
90 | United States,Turkey,92
91 | Barbados,United States,102
92 | United States,Kazakhstan,1
93 | United States,Azerbaijan,5
94 | Germany,United States,1391
95 | United States,South Korea,754
96 | United States,El Salvador,486
97 | Ireland,United States,267
98 | United States,Hungary,1
99 | Malaysia,United States,2
100 | United States,Panama,460
101 | United States,Aruba,348
102 | United States,Ethiopia,11
103 | United States,Turks and Caicos Islands,204
104 | United States,Pakistan,12
105 | United States,Honduras,412
106 | Fiji,United States,25
107 | Qatar,United States,95
108 | Saint Kitts and Nevis,United States,118
109 | Haiti,United States,201
110 | Taiwan,United States,265
111 | Kuwait,United States,24
112 | Canada,United States,7974
113 | Federated States of Micronesia,United States,60
114 | Jamaica,United States,673
115 | United States,Malta,2
116 | Dominican Republic,United States,1230
117 | Japan,United States,1591
118 | United States,Saint Vincent and the Grenadines,3
119 | Finland,United States,18
120 | United States,Ukraine,1
121 | United States,Guadeloupe,47
122 | United States,France,960
123 | Aruba,United States,351
124 | United States,Norway,87
125 | French Guiana,United States,11
126 | United States,Kiribati,27
127 | India,United States,61
128 | British Virgin Islands,United States,108
129 | Brazil,United States,927
130 | United States,Germany,1343
131 | United States,New Zealand,77
132 | United States,Cambodia,1
133 | French Polynesia,United States,39
134 | United Arab Emirates,United States,247
135 | United States,Mexico,6490
136 | Netherlands,United States,773
137 | United States,Sweden,101
138 | United States,Martinique,32
139 | United States,United Arab Emirates,226
140 | United States,Bulgaria,1
141 | Denmark,United States,114
142 | China,United States,653
143 | United States,Nicaragua,170
144 | United States,Philippines,116
145 | United States,Belgium,230
146 | Peru,United States,277
147 | Argentina,United States,195
148 | Cayman Islands,United States,283
149 | South Africa,United States,32
150 | United States,Argentina,153
151 | United States,Iceland,177
152 | Chad,United States,1
153 | Spain,United States,388
154 | Bermuda,United States,185
155 | United States,Nigeria,43
156 | United States,Austria,46
157 | United States,"Bonaire, Sint Eustatius, and Saba",63
158 | Kiribati,United States,26
159 | Saudi Arabia,United States,79
160 | Czech Republic,United States,11
161 | Macau,United States,3
162 | Belgium,United States,259
163 | United States,Israel,112
164 | United States,Saint Lucia,109
165 | United States,British Virgin Islands,101
166 | Curacao,United States,74
167 | United States,Denmark,116
168 | United States,Guyana,55
169 | Philippines,United States,134
170 | Grenada,United States,43
171 | Cape Verde,United States,24
172 | Ukraine,United States,11
173 | Russia,United States,213
174 | United States,Saudi Arabia,74
175 | Guatemala,United States,383
176 | Saint Lucia,United States,107
177 | Paraguay,United States,90
178 | United States,Curacao,77
179 | United States,Taiwan,240
180 | Tunisia,United States,2
181 | United States,South Africa,32
182 | Turkey,United States,91
183 | United Kingdom,United States,1912
184 | Romania,United States,11
185 | United States,Greenland,1
186 | United States,Spain,424
187 | Cuba,United States,417
188 | United States,Italy,385
189 | United States,Switzerland,300
190 | Dominica,United States,25
191 | United States,Japan,1501
192 | Portugal,United States,124
193 | United States,Brazil,578
194 | Bahrain,United States,41
195 | United States,Peru,315
196 | Indonesia,United States,1
197 | United States,United Kingdom,1812
198 | United States,Belize,143
199 | Belize,United States,142
200 | United States,Ghana,15
201 | Mauritania,United States,1
202 | United States,Indonesia,1
203 | United States,Fiji,27
204 | United States,Canada,8177
205 | United States,Antigua and Barbuda,112
206 | United States,French Polynesia,40
207 | Nicaragua,United States,168
208 | United States,Latvia,13
209 | United States,Dominica,36
210 | United States,Czech Republic,11
211 | United States,Cook Islands,12
212 | United States,Australia,235
213 | Austria,United States,47
214 | Jordan,United States,65
215 | Palau,United States,38
216 | South Korea,United States,968
217 | Angola,United States,13
218 | Ghana,United States,13
219 | Guadeloupe,United States,43
220 | France,United States,966
221 | Poland,United States,35
222 | Nigeria,United States,49
223 | United States,Uruguay,18
224 | Greenland,United States,2
225 | Chile,United States,156
226 | United States,Bermuda,190
227 | United States,Cuba,419
228 | United States,Qatar,96
229 | United States,Colombia,888
230 | United States,Barbados,89
231 | Australia,United States,293
232 | United States,Cayman Islands,278
233 | United States,Jordan,64
234 | United States,Trinidad and Tobago,175
235 | United States,Bolivia,14
236 | Uruguay,United States,60
237 | Cook Islands,United States,12
238 | United States,French Guiana,4
239 | United States,Saint Kitts and Nevis,123
240 | United States,Haiti,193
241 | "Bonaire, Sint Eustatius, and Saba",United States,62
242 | Greece,United States,20
243 |
--------------------------------------------------------------------------------
/data/flight-data/csv/2015-summary.csv:
--------------------------------------------------------------------------------
1 | DEST_COUNTRY_NAME,ORIGIN_COUNTRY_NAME,count
2 | United States,Romania,15
3 | United States,Croatia,1
4 | United States,Ireland,344
5 | Egypt,United States,15
6 | United States,India,62
7 | United States,Singapore,1
8 | United States,Grenada,62
9 | Costa Rica,United States,588
10 | Senegal,United States,40
11 | Moldova,United States,1
12 | United States,Sint Maarten,325
13 | United States,Marshall Islands,39
14 | Guyana,United States,64
15 | Malta,United States,1
16 | Anguilla,United States,41
17 | Bolivia,United States,30
18 | United States,Paraguay,6
19 | Algeria,United States,4
20 | Turks and Caicos Islands,United States,230
21 | United States,Gibraltar,1
22 | Saint Vincent and the Grenadines,United States,1
23 | Italy,United States,382
24 | United States,Federated States of Micronesia,69
25 | United States,Russia,161
26 | Pakistan,United States,12
27 | United States,Netherlands,660
28 | Iceland,United States,181
29 | Marshall Islands,United States,42
30 | Luxembourg,United States,155
31 | Honduras,United States,362
32 | The Bahamas,United States,955
33 | United States,Senegal,42
34 | El Salvador,United States,561
35 | Samoa,United States,25
36 | United States,Angola,13
37 | Switzerland,United States,294
38 | United States,Anguilla,38
39 | Sint Maarten,United States,325
40 | Hong Kong,United States,332
41 | Trinidad and Tobago,United States,211
42 | Latvia,United States,19
43 | United States,Ecuador,300
44 | Suriname,United States,1
45 | Mexico,United States,7140
46 | United States,Cyprus,1
47 | Ecuador,United States,268
48 | United States,Portugal,134
49 | United States,Costa Rica,608
50 | United States,Guatemala,318
51 | United States,Suriname,34
52 | Colombia,United States,873
53 | United States,Cape Verde,14
54 | United States,Jamaica,712
55 | Norway,United States,121
56 | United States,Malaysia,3
57 | United States,Morocco,19
58 | Thailand,United States,3
59 | United States,Samoa,25
60 | Venezuela,United States,290
61 | United States,Palau,31
62 | United States,Venezuela,246
63 | Panama,United States,510
64 | Antigua and Barbuda,United States,126
65 | United States,Chile,185
66 | Morocco,United States,15
67 | United States,Finland,28
68 | Azerbaijan,United States,21
69 | United States,Greece,23
70 | United States,The Bahamas,986
71 | New Zealand,United States,111
72 | Liberia,United States,2
73 | United States,Hong Kong,414
74 | Hungary,United States,2
75 | United States,China,920
76 | United States,Vietnam,2
77 | Burkina Faso,United States,1
78 | Sweden,United States,118
79 | United States,Kuwait,28
80 | United States,Dominican Republic,1420
81 | United States,Egypt,12
82 | Israel,United States,134
83 | United States,United States,370002
84 | Ethiopia,United States,13
85 | United States,Luxembourg,134
86 | United States,Poland,33
87 | Martinique,United States,44
88 | United States,Saint Barthelemy,41
89 | Saint Barthelemy,United States,39
90 | Barbados,United States,154
91 | United States,Turkey,129
92 | Djibouti,United States,1
93 | United States,Azerbaijan,21
94 | United States,Estonia,1
95 | Germany,United States,1468
96 | United States,South Korea,827
97 | United States,El Salvador,508
98 | Ireland,United States,335
99 | United States,Hungary,3
100 | Zambia,United States,1
101 | Malaysia,United States,2
102 | United States,Ethiopia,12
103 | United States,Panama,465
104 | United States,Aruba,342
105 | United States,Thailand,4
106 | United States,Turks and Caicos Islands,236
107 | Croatia,United States,2
108 | United States,Pakistan,12
109 | Cyprus,United States,1
110 | United States,Honduras,407
111 | Fiji,United States,24
112 | Qatar,United States,108
113 | Saint Kitts and Nevis,United States,139
114 | Kuwait,United States,32
115 | Taiwan,United States,266
116 | Haiti,United States,226
117 | Canada,United States,8399
118 | Federated States of Micronesia,United States,69
119 | United States,Liberia,2
120 | Jamaica,United States,666
121 | United States,Malta,2
122 | Dominican Republic,United States,1353
123 | Japan,United States,1548
124 | United States,Lithuania,1
125 | Finland,United States,26
126 | United States,Guadeloupe,59
127 | United States,Ukraine,13
128 | United States,France,952
129 | United States,Norway,115
130 | Aruba,United States,346
131 | French Guiana,United States,5
132 | United States,Kiribati,35
133 | India,United States,61
134 | British Virgin Islands,United States,107
135 | Brazil,United States,853
136 | United States,Germany,1336
137 | United States,New Zealand,74
138 | French Polynesia,United States,43
139 | United Arab Emirates,United States,320
140 | Singapore,United States,3
141 | United States,Mexico,7187
142 | United States,Sweden,119
143 | Netherlands,United States,776
144 | United States,Martinique,43
145 | United States,United Arab Emirates,313
146 | United States,Bulgaria,1
147 | Denmark,United States,153
148 | China,United States,772
149 | United States,Nicaragua,201
150 | United States,Philippines,126
151 | United States,Georgia,1
152 | United States,Belgium,228
153 | Cayman Islands,United States,314
154 | Argentina,United States,180
155 | Peru,United States,279
156 | South Africa,United States,36
157 | United States,Iceland,202
158 | United States,Argentina,141
159 | Spain,United States,420
160 | Bermuda,United States,183
161 | United States,Nigeria,50
162 | United States,Austria,63
163 | United States,"Bonaire, Sint Eustatius, and Saba",59
164 | Kiribati,United States,26
165 | Saudi Arabia,United States,83
166 | Czech Republic,United States,13
167 | United States,Israel,127
168 | Belgium,United States,259
169 | United States,Saint Lucia,136
170 | United States,Bahrain,1
171 | United States,British Virgin Islands,80
172 | Curacao,United States,90
173 | Georgia,United States,2
174 | United States,Denmark,152
175 | United States,Guyana,63
176 | Philippines,United States,134
177 | Grenada,United States,53
178 | Cape Verde,United States,20
179 | Cote d'Ivoire,United States,1
180 | Ukraine,United States,14
181 | United States,Papua New Guinea,1
182 | Russia,United States,176
183 | United States,Saudi Arabia,70
184 | Guatemala,United States,397
185 | Saint Lucia,United States,123
186 | Paraguay,United States,60
187 | United States,Curacao,83
188 | Kosovo,United States,1
189 | United States,Taiwan,235
190 | Tunisia,United States,3
191 | United States,South Africa,40
192 | Niger,United States,2
193 | Turkey,United States,138
194 | United Kingdom,United States,2025
195 | Romania,United States,14
196 | United States,Greenland,4
197 | Papua New Guinea,United States,3
198 | United States,Spain,442
199 | Iraq,United States,1
200 | United States,Italy,438
201 | Cuba,United States,466
202 | United States,Switzerland,305
203 | Dominica,United States,20
204 | United States,Japan,1496
205 | Portugal,United States,127
206 | United States,Brazil,619
207 | Bahrain,United States,19
208 | United States,Peru,337
209 | Indonesia,United States,1
210 | United States,Belize,193
211 | United States,United Kingdom,1970
212 | Belize,United States,188
213 | United States,Ghana,20
214 | United States,Indonesia,2
215 | United States,Fiji,25
216 | United States,Canada,8483
217 | United States,Antigua and Barbuda,117
218 | United States,French Polynesia,40
219 | Nicaragua,United States,179
220 | United States,Latvia,15
221 | United States,Dominica,27
222 | United States,Czech Republic,12
223 | United States,Australia,258
224 | United States,Cook Islands,13
225 | Austria,United States,62
226 | Jordan,United States,44
227 | Palau,United States,30
228 | South Korea,United States,1048
229 | Angola,United States,15
230 | Ghana,United States,18
231 | New Caledonia,United States,1
232 | Guadeloupe,United States,56
233 | France,United States,935
234 | Poland,United States,32
235 | Nigeria,United States,59
236 | United States,Uruguay,13
237 | Greenland,United States,2
238 | United States,Bermuda,193
239 | Chile,United States,174
240 | United States,Cuba,478
241 | United States,Montenegro,1
242 | United States,Colombia,867
243 | United States,Barbados,130
244 | United States,Qatar,109
245 | Australia,United States,329
246 | United States,Cayman Islands,310
247 | United States,Jordan,44
248 | United States,Namibia,1
249 | United States,Trinidad and Tobago,217
250 | United States,Bolivia,13
251 | Cook Islands,United States,13
252 | Bulgaria,United States,3
253 | United States,Saint Kitts and Nevis,145
254 | Uruguay,United States,43
255 | United States,Haiti,225
256 | "Bonaire, Sint Eustatius, and Saba",United States,58
257 | Greece,United States,30
258 |
--------------------------------------------------------------------------------
/data/flight-data/jdbc/my-sqlite.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data/jdbc/my-sqlite.db
--------------------------------------------------------------------------------
/data/flight-data/orc/2010-summary.orc/_SUCCESS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data/orc/2010-summary.orc/_SUCCESS
--------------------------------------------------------------------------------
/data/flight-data/orc/2010-summary.orc/part-r-00000-2c4f7d96-e703-4de3-af1b-1441d172c80f.snappy.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data/orc/2010-summary.orc/part-r-00000-2c4f7d96-e703-4de3-af1b-1441d172c80f.snappy.orc
--------------------------------------------------------------------------------
/data/flight-data/parquet/2010-summary.parquet/_SUCCESS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data/parquet/2010-summary.parquet/_SUCCESS
--------------------------------------------------------------------------------
/data/flight-data/parquet/2010-summary.parquet/part-r-00000-1a9822ba-b8fb-4d8e-844a-ea30d0801b9e.gz.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data/parquet/2010-summary.parquet/part-r-00000-1a9822ba-b8fb-4d8e-844a-ea30d0801b9e.gz.parquet
--------------------------------------------------------------------------------
/data/retail-data/all/online-retail-dataset.numbers:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/retail-data/all/online-retail-dataset.numbers
--------------------------------------------------------------------------------
/spark-docker/data/data.csv:
--------------------------------------------------------------------------------
1 | "invoiceseq","invoiceUrl","date_of_purchase","txn_id","user_email","base_amount","total_amount","igstTaxableAmount","user_state","refund_processed","cgstTaxableAmount","sgstTaxableAmount","user_gstin","legal_name"
2 | "AZZIRR2200004531","https://d3r0n59fu7ub44.cloudfront.net/invoicer/TYPEB/2024-07-02/66843f83c208fc0bdd87bbbf.pdf","02-07-2024 23:27","66843f83c208fc0bdd87bbbf","sreenivassree8@gmail.com","2499.15",2949,"449.85","Tamil Nadu",false,,,,
3 | "AZZIRR2200004530","https://d3r0n59fu7ub44.cloudfront.net/invoicer/TYPEB/2024-07-02/66843d0ef623480bcfcc98a2.pdf","02-07-2024 23:16","66843d0ef623480bcfcc98a2","jayanth.gundagoni98@gmail.com","1440.68",1700,"259.32","Telangana",false,,,,
4 | "AZZIRR2200004529","https://d3r0n59fu7ub44.cloudfront.net/invoicer/TYPEB/2024-07-02/668424ed380ba40bd38be6e1.pdf","02-07-2024 21:33","668424ed380ba40bd38be6e1","raunak77mm@gmail.com","2499.15",2949,"449.85","Massachusetts",false,,,,
5 | "AZZIRR2200004528","https://d3r0n59fu7ub44.cloudfront.net/invoicer/TYPEB/2024-07-02/6683e7632ef4bd0bc26b2da5.pdf","02-07-2024 17:11","6683e7632ef4bd0bc26b2da5","vrushabjain@gmail.com","1440.68",1700,,"Maharashtra",false,"129.66","129.66",,
6 |
--------------------------------------------------------------------------------
/spark-docker/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | spark:
4 | image: jupyter/pyspark-notebook
5 | ports:
6 | - "8888:8888" # Jupyter Notebook
7 | - "4040:4040" # Spark UI
8 | volumes:
9 | - ./files/data:/home/jovyan/work/files/data
10 | - ./files:/home/jovyan/work/files
11 | environment:
12 | - JUPYTER_ENABLE_LAB=yes
13 | command: start.sh jupyter lab --NotebookApp.token=''
14 |
15 | spark-master:
16 | image: bitnami/spark:latest
17 | environment:
18 | - SPARK_MODE=master
19 | ports:
20 | - "8080:8080"
21 | - "7077:7077"
22 |
23 | spark-worker:
24 | image: bitnami/spark:latest
25 | environment:
26 | - SPARK_MODE=worker
27 | - SPARK_MASTER_URL=spark://spark-master:7077
28 | depends_on:
29 | - spark-master
--------------------------------------------------------------------------------
/spark-docker/files/Untitled.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "649dc83f-4c28-4875-a23f-a2e931af8669",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "from pyspark.sql import SparkSession"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "id": "bd6754db-bd76-4444-8e45-aad04bd8cd60",
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "from pyspark.sql import SparkSession\n",
21 | "\n",
22 | "# Create SparkSession \n",
23 | "spark = SparkSession.builder \\\n",
24 | " .master(\"local[1]\") \\\n",
25 | " .appName(\"SparkByExamples.com\") \\\n",
26 | " .getOrCreate() \n"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 3,
32 | "id": "ae3ed98b-f46c-431f-9103-34e069afa093",
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "data": {
37 | "text/html": [
38 | "\n",
39 | " \n",
40 | "
SparkSession - in-memory
\n",
41 | " \n",
42 | "
\n",
43 | "
SparkContext
\n",
44 | "\n",
45 | "
Spark UI
\n",
46 | "\n",
47 | "
\n",
48 | " - Version
\n",
49 | " v3.5.0
\n",
50 | " - Master
\n",
51 | " local[1]
\n",
52 | " - AppName
\n",
53 | " SparkByExamples.com
\n",
54 | "
\n",
55 | "
\n",
56 | " \n",
57 | "
\n",
58 | " "
59 | ],
60 | "text/plain": [
61 | ""
62 | ]
63 | },
64 | "execution_count": 3,
65 | "metadata": {},
66 | "output_type": "execute_result"
67 | }
68 | ],
69 | "source": [
70 | "spark"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 5,
76 | "id": "76e22f21-4441-437b-aa20-b20216e779f1",
77 | "metadata": {},
78 | "outputs": [
79 | {
80 | "name": "stdout",
81 | "output_type": "stream",
82 | "text": [
83 | "+-------+--------+---------------+--------------------+--------------+---------------+--------------------+------------+------+---------------+--------+\n",
84 | "| _c0| _c1| _c2| _c3| _c4| _c5| _c6| _c7| _c8| _c9| _c10|\n",
85 | "+-------+--------+---------------+--------------------+--------------+---------------+--------------------+------------+------+---------------+--------+\n",
86 | "|Trip ID|Duration| Start Date| Start Station|Start Terminal| End Date| End Station|End Terminal|Bike #|Subscriber Type|Zip Code|\n",
87 | "| 913460| 765|8/31/2015 23:26|Harry Bridges Pla...| 50|8/31/2015 23:39|San Francisco Cal...| 70| 288| Subscriber| 2139|\n",
88 | "| 913459| 1036|8/31/2015 23:11|San Antonio Shopp...| 31|8/31/2015 23:28|Mountain View Cit...| 27| 35| Subscriber| 95032|\n",
89 | "| 913455| 307|8/31/2015 23:13| Post at Kearny| 47|8/31/2015 23:18| 2nd at South Park| 64| 468| Subscriber| 94107|\n",
90 | "| 913454| 409|8/31/2015 23:10| San Jose City Hall| 10|8/31/2015 23:17| San Salvador at 1st| 8| 68| Subscriber| 95113|\n",
91 | "+-------+--------+---------------+--------------------+--------------+---------------+--------------------+------------+------+---------------+--------+\n",
92 | "only showing top 5 rows\n",
93 | "\n"
94 | ]
95 | }
96 | ],
97 | "source": [
98 | "spark.read.csv(\"data/201508_trip_data.csv\").show(5)"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": null,
104 | "id": "31dea8e7-cdde-47b7-96db-076b55d8682c",
105 | "metadata": {},
106 | "outputs": [],
107 | "source": []
108 | }
109 | ],
110 | "metadata": {
111 | "kernelspec": {
112 | "display_name": "Python 3 (ipykernel)",
113 | "language": "python",
114 | "name": "python3"
115 | },
116 | "language_info": {
117 | "codemirror_mode": {
118 | "name": "ipython",
119 | "version": 3
120 | },
121 | "file_extension": ".py",
122 | "mimetype": "text/x-python",
123 | "name": "python",
124 | "nbconvert_exporter": "python",
125 | "pygments_lexer": "ipython3",
126 | "version": "3.11.6"
127 | }
128 | },
129 | "nbformat": 4,
130 | "nbformat_minor": 5
131 | }
132 |
--------------------------------------------------------------------------------