├── Advance RDDs.ipynb ├── Distributed Variables.ipynb ├── End-To-End Example.ipynb ├── Iphone Data Analysis.ipynb ├── Joins.ipynb ├── Lower Level APIs.ipynb ├── Project 2 - ECommerce Data Analysis Azure Data Engineering ├── Broze_Layer.ipynb ├── Gold Layer.ipynb ├── Silver_Layer.ipynb └── data │ ├── 6M-0K-99K.users.dataset.public.csv │ ├── Buyers-repartition-by-country.csv │ ├── Comparison-of-Sellers-by-Gender-and-Country.csv │ ├── Countries-with-Top-Sellers-(Fashion-C2C).csv │ ├── chunk-data │ ├── chunk1.csv │ ├── chunk10.csv │ ├── chunk2.csv │ ├── chunk3.csv │ ├── chunk4.csv │ ├── chunk5.csv │ ├── chunk6.csv │ ├── chunk7.csv │ ├── chunk8.csv │ └── chunk9.csv │ ├── chunk-user-data.ipynb │ └── users.6M0xxK.2024.public.csv ├── README.md ├── Spark Data Source.ipynb ├── Spark Deployment.ipynb ├── Spark SQL.ipynb ├── Spotify Data Pipeline using Spark ├── (python) spotify_transformation_load_function.py ├── (spark) spotify_transformation.py ├── spotify_api_data_extract.py └── spotipy_layer.zip ├── Structured API Overview.ipynb ├── Working with Different Types of Data.ipynb ├── data ├── apple_data │ ├── apple_products.csv │ └── output.csv │ │ ├── Product Name=APPLE iPhone 11 (Black, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 (Black, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 (Purple, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 (Red, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 (Red, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 (White, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 (White, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 Pro (Midnight Green, 512 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 Pro (Midnight Green, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 Pro (Space Grey, 256 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 Pro (Space Grey, 512 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 Pro Max (Gold, 256 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 Pro Max (Gold, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 Pro Max (Midnight Green, 256 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 Pro Max (Midnight Green, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 11 Pro Max (Space Grey, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 (Black, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 (Black, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 (Blue, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 (Green, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 (Red, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 (White, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 (White, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Mini (Black, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Mini (Black, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Mini (Blue, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Mini (Red, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Mini (White, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Mini (White, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Pro (Graphite, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Pro (Graphite, 256 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Pro (Pacific Blue, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Pro (Pacific Blue, 512 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Pro (Silver, 512 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Pro Max (Gold, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Pro Max (Gold, 256 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Pro Max (Graphite, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Pro Max (Graphite, 256 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Pro Max (Pacific Blue, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Pro Max (Pacific Blue, 256 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Pro Max (Silver, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 12 Pro Max (Silver, 256 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 8 (Gold, 256 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 8 (Silver, 256 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 8 (Space Grey, 256 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 8 Plus (Gold, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 8 Plus (Silver, 256 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 8 Plus (Silver, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 8 Plus (Space Grey, 256 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone 8 Plus (Space Grey, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone SE (Black, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone SE (Black, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone SE (Red, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone SE (White, 128 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone SE (White, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=APPLE iPhone XS Max (Silver, 64 GB) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=Apple iPhone SE (White, 256 GB) (Includes EarPods, Power Adapter) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=Apple iPhone XR ((PRODUCT)RED, 128 GB) (Includes EarPods, Power Adapter) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=Apple iPhone XR (Black, 128 GB) (Includes EarPods, Power Adapter) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=Apple iPhone XR (Black, 64 GB) (Includes EarPods, Power Adapter) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=Apple iPhone XR (Coral, 128 GB) (Includes EarPods, Power Adapter) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ ├── Product Name=Apple iPhone XR (White, 128 GB) (Includes EarPods, Power Adapter) │ │ └── part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet │ │ └── _SUCCESS ├── flight-data-hive │ ├── _SUCCESS │ ├── _committed_4721890993021653500 │ ├── _started_4721890993021653500 │ └── part-00000-tid-4721890993021653500-d8ef7f6b-e6e5-4451-af50-08281422f186-0-c000 ├── flight-data │ ├── csv │ │ ├── 2010-summary.csv │ │ ├── 2011-summary.csv │ │ ├── 2012-summary.csv │ │ ├── 2013-summary.csv │ │ ├── 2014-summary.csv │ │ └── 2015-summary.csv │ ├── jdbc │ │ └── my-sqlite.db │ ├── json │ │ ├── 2010-summary.json │ │ ├── 2011-summary.json │ │ ├── 2012-summary.json │ │ ├── 2013-summary.json │ │ ├── 2014-summary.json │ │ └── 2015-summary.json │ ├── orc │ │ └── 2010-summary.orc │ │ │ ├── _SUCCESS │ │ │ └── part-r-00000-2c4f7d96-e703-4de3-af1b-1441d172c80f.snappy.orc │ └── parquet │ │ └── 2010-summary.parquet │ │ ├── _SUCCESS │ │ └── part-r-00000-1a9822ba-b8fb-4d8e-844a-ea30d0801b9e.gz.parquet └── retail-data │ ├── all │ └── online-retail-dataset.numbers │ └── by-day │ ├── 2010-12-01.csv │ ├── 2010-12-02.csv │ ├── 2010-12-03.csv │ ├── 2010-12-05.csv │ ├── 2010-12-06.csv │ ├── 2010-12-07.csv │ ├── 2010-12-08.csv │ ├── 2010-12-09.csv │ ├── 2010-12-10.csv │ ├── 2010-12-12.csv │ ├── 2010-12-13.csv │ └── 2010-12-14.csv └── spark-docker ├── data └── data.csv ├── docker-compose.yml └── files └── Untitled.ipynb /Distributed Variables.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "eb1f7038-2e32-408f-ac14-600162264a0d", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "myCollection = \"My Name is Darshil and I love Spark\"\\\n", 11 | ".split(\" \")\n", 12 | "\n", 13 | "words = spark.sparkContext.parallelize(myCollection, 2)" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "id": "3377fcfc-36f2-4230-aadf-7aa04097692e", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "supplementalData = {\"Name\":1000, \"Darshil\":200,\n", 24 | " \"love\":-300, \"Spark\":100}" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "id": "f30ab6fb-0b2d-4d99-bb51-1d035c1871af", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "suppBroadcast = spark.sparkContext.broadcast(supplementalData)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 4, 40 | "id": "b03358a6-55ce-4472-8c8f-4f5d68f9913e", 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/plain": [ 46 | "{'Name': 1000, 'Darshil': 200, 'love': -300, 'Spark': 100}" 47 | ] 48 | }, 49 | "execution_count": 4, 50 | "metadata": {}, 51 | "output_type": "execute_result" 52 | } 53 | ], 54 | "source": [ 55 | "suppBroadcast.value" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 5, 61 | "id": "3abef626-9de9-4fc2-9484-3f8e8ac33f48", 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": [ 67 | "[('love', -300),\n", 68 | " ('My', 0),\n", 69 | " ('is', 0),\n", 70 | " ('and', 0),\n", 71 | " ('I', 0),\n", 72 | " ('Spark', 100),\n", 73 | " ('Darshil', 200),\n", 74 | " ('Name', 1000)]" 75 | ] 76 | }, 77 | "execution_count": 5, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "words.map(lambda word: (word, suppBroadcast.value.get(word, 0)))\\\n", 84 | " .sortBy(lambda wordPair: wordPair[1])\\\n", 85 | " .collect()" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 6, 91 | "id": "7ddcebd4-1c19-43d7-ba90-cd68761214c5", 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "flights = spark.read.parquet(\"data/flight-data/parquet/2010-summary.parquet\")" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 7, 101 | "id": "54a006a6-d05e-40c4-917a-ff13bf4bd21f", 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "text/plain": [ 107 | "[Row(DEST_COUNTRY_NAME='United States', ORIGIN_COUNTRY_NAME='Romania', count=1),\n", 108 | " Row(DEST_COUNTRY_NAME='United States', ORIGIN_COUNTRY_NAME='Ireland', count=264),\n", 109 | " Row(DEST_COUNTRY_NAME='United States', ORIGIN_COUNTRY_NAME='India', count=69),\n", 110 | " Row(DEST_COUNTRY_NAME='Egypt', ORIGIN_COUNTRY_NAME='United States', count=24),\n", 111 | " Row(DEST_COUNTRY_NAME='Equatorial Guinea', ORIGIN_COUNTRY_NAME='United States', count=1)]" 112 | ] 113 | }, 114 | "execution_count": 7, 115 | "metadata": {}, 116 | "output_type": "execute_result" 117 | } 118 | ], 119 | "source": [ 120 | "flights.take(5)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 22, 126 | "id": "a5ed45fc-5ab4-4001-a52b-d06a21e791de", 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "accChina = spark.sparkContext.accumulator(0)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 23, 136 | "id": "97e52519-ae3e-4e60-a58d-666211376b70", 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "def accChinaFunc(flight_row):\n", 141 | " destination = flight_row[\"DEST_COUNTRY_NAME\"]\n", 142 | " origin = flight_row[\"ORIGIN_COUNTRY_NAME\"]\n", 143 | " \n", 144 | " if destination == \"China\":\n", 145 | " accChina.add(flight_row[\"count\"])\n", 146 | " \n", 147 | " if origin == \"China\":\n", 148 | " accChina.add(flight_row[\"count\"])" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 24, 154 | "id": "2e466874-4444-474c-b021-f176e96b4119", 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "flights.foreach(lambda flight_row: accChinaFunc(flight_row))" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 25, 164 | "id": "39894f17-4cff-4cc5-9b86-88f1fc4fc96b", 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "953" 171 | ] 172 | }, 173 | "execution_count": 25, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | } 177 | ], 178 | "source": [ 179 | "accChina.value" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "id": "c600f0a8-f9bb-4289-8698-19536f3e64bb", 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [] 189 | } 190 | ], 191 | "metadata": { 192 | "kernelspec": { 193 | "display_name": "Python 3 (ipykernel)", 194 | "language": "python", 195 | "name": "python3" 196 | }, 197 | "language_info": { 198 | "codemirror_mode": { 199 | "name": "ipython", 200 | "version": 3 201 | }, 202 | "file_extension": ".py", 203 | "mimetype": "text/x-python", 204 | "name": "python", 205 | "nbconvert_exporter": "python", 206 | "pygments_lexer": "ipython3", 207 | "version": "3.12.1" 208 | } 209 | }, 210 | "nbformat": 4, 211 | "nbformat_minor": 5 212 | } 213 | -------------------------------------------------------------------------------- /End-To-End Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "3c1c59a1-7047-41f5-835a-0176d1e8b2e6", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "data": { 11 | "text/html": [ 12 | "\n", 13 | "
\n", 14 | "

SparkSession - hive

\n", 15 | " \n", 16 | "
\n", 17 | "

SparkContext

\n", 18 | "\n", 19 | "

Spark UI

\n", 20 | "\n", 21 | "
\n", 22 | "
Version
\n", 23 | "
v3.5.0
\n", 24 | "
Master
\n", 25 | "
local[*]
\n", 26 | "
AppName
\n", 27 | "
PySparkShell
\n", 28 | "
\n", 29 | "
\n", 30 | " \n", 31 | "
\n", 32 | " " 33 | ], 34 | "text/plain": [ 35 | "" 36 | ] 37 | }, 38 | "execution_count": 1, 39 | "metadata": {}, 40 | "output_type": "execute_result" 41 | } 42 | ], 43 | "source": [ 44 | "spark" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 23, 50 | "id": "f6d9acb1-158a-4d2c-9bc1-9e457c58112d", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "flightData2015 = spark.read.option(\"inferSchema\",\"true\").option(\"header\",\"true\").csv(\"data/flight-data/csv/2015-summary.csv\")" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 24, 60 | "id": "32881bb3-07d4-4a3a-be42-79fb0c873e0f", 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "[Row(DEST_COUNTRY_NAME='United States', ORIGIN_COUNTRY_NAME='Romania', count=15),\n", 67 | " Row(DEST_COUNTRY_NAME='United States', ORIGIN_COUNTRY_NAME='Croatia', count=1),\n", 68 | " Row(DEST_COUNTRY_NAME='United States', ORIGIN_COUNTRY_NAME='Ireland', count=344)]" 69 | ] 70 | }, 71 | "execution_count": 24, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "flightData2015.take(3)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 4, 83 | "id": "2400c85f-c5df-400c-b171-3a8bc4c03849", 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "DataFrame[DEST_COUNTRY_NAME: string, ORIGIN_COUNTRY_NAME: string, count: int]" 90 | ] 91 | }, 92 | "execution_count": 4, 93 | "metadata": {}, 94 | "output_type": "execute_result" 95 | } 96 | ], 97 | "source": [ 98 | "flightData2015.sort(\"count\")" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 5, 104 | "id": "b026a257-91f9-49fa-88e0-fa17c232a9f1", 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "name": "stdout", 109 | "output_type": "stream", 110 | "text": [ 111 | "== Physical Plan ==\n", 112 | "AdaptiveSparkPlan isFinalPlan=false\n", 113 | "+- Sort [count#19 ASC NULLS FIRST], true, 0\n", 114 | " +- Exchange rangepartitioning(count#19 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [plan_id=33]\n", 115 | " +- FileScan csv [DEST_COUNTRY_NAME#17,ORIGIN_COUNTRY_NAME#18,count#19] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/Users/darshil/Documents/DataWithDarshil/Apache Spark with DataBr..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct\n", 116 | "\n", 117 | "\n" 118 | ] 119 | } 120 | ], 121 | "source": [ 122 | "flightData2015.sort(\"count\").explain()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 9, 128 | "id": "7037d05f-98bd-48c5-a8cd-01a3513afc2a", 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "+--------------------+-------------------+-----+\n", 136 | "| DEST_COUNTRY_NAME|ORIGIN_COUNTRY_NAME|count|\n", 137 | "+--------------------+-------------------+-----+\n", 138 | "| United States| Estonia| 1|\n", 139 | "| Kosovo| United States| 1|\n", 140 | "| Zambia| United States| 1|\n", 141 | "| United States| Papua New Guinea| 1|\n", 142 | "| Malta| United States| 1|\n", 143 | "| United States| Gibraltar| 1|\n", 144 | "| Suriname| United States| 1|\n", 145 | "| United States| Croatia| 1|\n", 146 | "| Djibouti| United States| 1|\n", 147 | "| Burkina Faso| United States| 1|\n", 148 | "|Saint Vincent and...| United States| 1|\n", 149 | "| United States| Cyprus| 1|\n", 150 | "| United States| Singapore| 1|\n", 151 | "| Moldova| United States| 1|\n", 152 | "| Cyprus| United States| 1|\n", 153 | "| United States| Lithuania| 1|\n", 154 | "| United States| Bulgaria| 1|\n", 155 | "| United States| Georgia| 1|\n", 156 | "| United States| Bahrain| 1|\n", 157 | "| Cote d'Ivoire| United States| 1|\n", 158 | "+--------------------+-------------------+-----+\n", 159 | "only showing top 20 rows\n", 160 | "\n" 161 | ] 162 | } 163 | ], 164 | "source": [ 165 | "flightData2015.sort(\"count\").show()" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 10, 171 | "id": "f59f2d98-dab9-48a1-8c27-a3ea0101d2ef", 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "flightData2015.createOrReplaceTempView(\"flight_data_2015\")" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 11, 181 | "id": "05b932ca-7e30-4e2d-804d-1b79343dcfdd", 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "sqlWay = spark.sql(\"\"\"\n", 186 | " SELECT DEST_COUNTRY_NAME, count(1)\n", 187 | " FROM flight_data_2015\n", 188 | " GROUP BY DEST_COUNTRY_NAME\n", 189 | "\"\"\")" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 12, 195 | "id": "435e58c9-a45b-478e-95f7-e055a7a63db2", 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | "== Physical Plan ==\n", 203 | "AdaptiveSparkPlan isFinalPlan=false\n", 204 | "+- HashAggregate(keys=[DEST_COUNTRY_NAME#17], functions=[count(1)])\n", 205 | " +- Exchange hashpartitioning(DEST_COUNTRY_NAME#17, 200), ENSURE_REQUIREMENTS, [plan_id=73]\n", 206 | " +- HashAggregate(keys=[DEST_COUNTRY_NAME#17], functions=[partial_count(1)])\n", 207 | " +- FileScan csv [DEST_COUNTRY_NAME#17] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/Users/darshil/Documents/DataWithDarshil/Apache Spark with DataBr..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct\n", 208 | "\n", 209 | "\n" 210 | ] 211 | } 212 | ], 213 | "source": [ 214 | "sqlWay.explain()" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 13, 220 | "id": "ad06731d-581f-4319-97b6-2498bc9525bf", 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "dataFrameWay = flightData2015\\\n", 225 | " .groupBy(\"DEST_COUNTRY_NAME\")\\\n", 226 | " .count()" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 14, 232 | "id": "1e87c25e-63b6-478f-b138-907c687978b3", 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "name": "stdout", 237 | "output_type": "stream", 238 | "text": [ 239 | "== Physical Plan ==\n", 240 | "AdaptiveSparkPlan isFinalPlan=false\n", 241 | "+- HashAggregate(keys=[DEST_COUNTRY_NAME#17], functions=[count(1)])\n", 242 | " +- Exchange hashpartitioning(DEST_COUNTRY_NAME#17, 200), ENSURE_REQUIREMENTS, [plan_id=86]\n", 243 | " +- HashAggregate(keys=[DEST_COUNTRY_NAME#17], functions=[partial_count(1)])\n", 244 | " +- FileScan csv [DEST_COUNTRY_NAME#17] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/Users/darshil/Documents/DataWithDarshil/Apache Spark with DataBr..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct\n", 245 | "\n", 246 | "\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "dataFrameWay.explain()" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 15, 257 | "id": "5bc7f565-2a3d-4188-b5e4-39e9c47c2ff0", 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "data": { 262 | "text/plain": [ 263 | "[Row(max(count)=370002)]" 264 | ] 265 | }, 266 | "execution_count": 15, 267 | "metadata": {}, 268 | "output_type": "execute_result" 269 | } 270 | ], 271 | "source": [ 272 | "spark.sql(\"SELECT max(count) from flight_data_2015\").take(1)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 16, 278 | "id": "bc19c214-7926-4b49-92e0-f97dea7dd0a5", 279 | "metadata": {}, 280 | "outputs": [ 281 | { 282 | "data": { 283 | "text/plain": [ 284 | "[Row(max(count)=370002)]" 285 | ] 286 | }, 287 | "execution_count": 16, 288 | "metadata": {}, 289 | "output_type": "execute_result" 290 | } 291 | ], 292 | "source": [ 293 | "from pyspark.sql.functions import max\n", 294 | "flightData2015.select(max(\"count\")).take(1)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 17, 300 | "id": "e94f82af-ec84-4b4f-a8a3-3bbaa1da42d8", 301 | "metadata": {}, 302 | "outputs": [ 303 | { 304 | "name": "stdout", 305 | "output_type": "stream", 306 | "text": [ 307 | "+-----------------+-----------------+\n", 308 | "|DEST_COUNTRY_NAME|destination_total|\n", 309 | "+-----------------+-----------------+\n", 310 | "| United States| 411352|\n", 311 | "| Canada| 8399|\n", 312 | "| Mexico| 7140|\n", 313 | "| United Kingdom| 2025|\n", 314 | "| Japan| 1548|\n", 315 | "+-----------------+-----------------+\n", 316 | "\n" 317 | ] 318 | } 319 | ], 320 | "source": [ 321 | "maxSql = spark.sql(\"\"\"\n", 322 | "SELECT DEST_COUNTRY_NAME, sum(count) as destination_total\n", 323 | "FROM flight_data_2015\n", 324 | "GROUP BY DEST_COUNTRY_NAME\n", 325 | "ORDER BY sum(count) DESC\n", 326 | "LIMIT 5\n", 327 | "\"\"\")\n", 328 | "\n", 329 | "maxSql.show()" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 18, 335 | "id": "4b5966c6-3df6-4798-91ce-ef728130c30e", 336 | "metadata": {}, 337 | "outputs": [ 338 | { 339 | "name": "stdout", 340 | "output_type": "stream", 341 | "text": [ 342 | "+-----------------+-----------------+\n", 343 | "|DEST_COUNTRY_NAME|destination_total|\n", 344 | "+-----------------+-----------------+\n", 345 | "| United States| 411352|\n", 346 | "| Canada| 8399|\n", 347 | "| Mexico| 7140|\n", 348 | "| United Kingdom| 2025|\n", 349 | "| Japan| 1548|\n", 350 | "+-----------------+-----------------+\n", 351 | "\n" 352 | ] 353 | } 354 | ], 355 | "source": [ 356 | "from pyspark.sql.functions import desc\n", 357 | "\n", 358 | "flightData2015\\\n", 359 | " .groupBy(\"DEST_COUNTRY_NAME\")\\\n", 360 | " .sum(\"count\")\\\n", 361 | " .withColumnRenamed(\"sum(count)\", \"destination_total\")\\\n", 362 | " .sort(desc(\"destination_total\"))\\\n", 363 | " .limit(5)\\\n", 364 | " .show()" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 19, 370 | "id": "bd525c3f-bd0f-4574-adab-374b13a341ba", 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "name": "stdout", 375 | "output_type": "stream", 376 | "text": [ 377 | "== Physical Plan ==\n", 378 | "AdaptiveSparkPlan isFinalPlan=false\n", 379 | "+- TakeOrderedAndProject(limit=5, orderBy=[destination_total#135L DESC NULLS LAST], output=[DEST_COUNTRY_NAME#17,destination_total#135L])\n", 380 | " +- HashAggregate(keys=[DEST_COUNTRY_NAME#17], functions=[sum(count#19)])\n", 381 | " +- Exchange hashpartitioning(DEST_COUNTRY_NAME#17, 200), ENSURE_REQUIREMENTS, [plan_id=256]\n", 382 | " +- HashAggregate(keys=[DEST_COUNTRY_NAME#17], functions=[partial_sum(count#19)])\n", 383 | " +- FileScan csv [DEST_COUNTRY_NAME#17,count#19] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/Users/darshil/Documents/DataWithDarshil/Apache Spark with DataBr..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct\n", 384 | "\n", 385 | "\n" 386 | ] 387 | } 388 | ], 389 | "source": [ 390 | "flightData2015\\\n", 391 | " .groupBy(\"DEST_COUNTRY_NAME\")\\\n", 392 | " .sum(\"count\")\\\n", 393 | " .withColumnRenamed(\"sum(count)\", \"destination_total\")\\\n", 394 | " .sort(desc(\"destination_total\"))\\\n", 395 | " .limit(5)\\\n", 396 | " .explain()" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "id": "454fa12e-5967-4ec6-a04d-66c7ab546d93", 403 | "metadata": {}, 404 | "outputs": [], 405 | "source": [] 406 | } 407 | ], 408 | "metadata": { 409 | "kernelspec": { 410 | "display_name": "Python 3 (ipykernel)", 411 | "language": "python", 412 | "name": "python3" 413 | }, 414 | "language_info": { 415 | "codemirror_mode": { 416 | "name": "ipython", 417 | "version": 3 418 | }, 419 | "file_extension": ".py", 420 | "mimetype": "text/x-python", 421 | "name": "python", 422 | "nbconvert_exporter": "python", 423 | "pygments_lexer": "ipython3", 424 | "version": "3.12.0" 425 | } 426 | }, 427 | "nbformat": 4, 428 | "nbformat_minor": 5 429 | } 430 | -------------------------------------------------------------------------------- /Joins.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "id": "8a691f86-74bc-44d5-8e4d-83b97d6116bb", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "person = spark.createDataFrame([\n", 11 | " (0, \"Bill Chambers\", 0, [100]),\n", 12 | " (1, \"Matei Zaharia\", 1, [500, 250, 100]),\n", 13 | " (2, \"Michael Armbrust\", 1, [250, 100])])\\\n", 14 | ".toDF(\"id\", \"name\", \"graduate_program\", \"spark_status\")\n", 15 | "\n", 16 | "\n", 17 | "graduateProgram = spark.createDataFrame([\n", 18 | " (0, \"Masters\", \"School of Information\", \"UC Berkeley\"),\n", 19 | " (2, \"Masters\", \"EECS\", \"UC Berkeley\"),\n", 20 | " (1, \"Ph.D.\", \"EECS\", \"UC Berkeley\")])\\\n", 21 | ".toDF(\"id\", \"degree\", \"department\", \"school\")\n", 22 | "\n", 23 | "sparkStatus = spark.createDataFrame([\n", 24 | " (500, \"Vice President\"),\n", 25 | " (250, \"PMC Member\"),\n", 26 | " (100, \"Contributor\")])\\\n", 27 | ".toDF(\"id\", \"status\")" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 6, 33 | "id": "ea0a2875-c151-4c08-8232-2e64ce881bae", 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "name": "stdout", 38 | "output_type": "stream", 39 | "text": [ 40 | "+---+----------------+----------------+---------------+\n", 41 | "| id| name|graduate_program| spark_status|\n", 42 | "+---+----------------+----------------+---------------+\n", 43 | "| 0| Bill Chambers| 0| [100]|\n", 44 | "| 1| Matei Zaharia| 1|[500, 250, 100]|\n", 45 | "| 2|Michael Armbrust| 1| [250, 100]|\n", 46 | "+---+----------------+----------------+---------------+\n", 47 | "\n", 48 | "+---+-------+--------------------+-----------+\n", 49 | "| id| degree| department| school|\n", 50 | "+---+-------+--------------------+-----------+\n", 51 | "| 0|Masters|School of Informa...|UC Berkeley|\n", 52 | "| 2|Masters| EECS|UC Berkeley|\n", 53 | "| 1| Ph.D.| EECS|UC Berkeley|\n", 54 | "+---+-------+--------------------+-----------+\n", 55 | "\n", 56 | "+---+--------------+\n", 57 | "| id| status|\n", 58 | "+---+--------------+\n", 59 | "|500|Vice President|\n", 60 | "|250| PMC Member|\n", 61 | "|100| Contributor|\n", 62 | "+---+--------------+\n", 63 | "\n" 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "person.show()\n", 69 | "graduateProgram.show()\n", 70 | "sparkStatus.show()" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 7, 76 | "id": "86338047-c384-46a7-8b24-75098978bb96", 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "joinExpression = person[\"graduate_program\"] == graduateProgram['id']" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 8, 86 | "id": "7b61803d-a8d7-4eac-a4ad-65987164645b", 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "name": "stdout", 91 | "output_type": "stream", 92 | "text": [ 93 | "+---+----------------+----------------+---------------+---+-------+--------------------+-----------+\n", 94 | "| id| name|graduate_program| spark_status| id| degree| department| school|\n", 95 | "+---+----------------+----------------+---------------+---+-------+--------------------+-----------+\n", 96 | "| 0| Bill Chambers| 0| [100]| 0|Masters|School of Informa...|UC Berkeley|\n", 97 | "| 1| Matei Zaharia| 1|[500, 250, 100]| 1| Ph.D.| EECS|UC Berkeley|\n", 98 | "| 2|Michael Armbrust| 1| [250, 100]| 1| Ph.D.| EECS|UC Berkeley|\n", 99 | "+---+----------------+----------------+---------------+---+-------+--------------------+-----------+\n", 100 | "\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "person.join(graduateProgram, joinExpression).show()" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 9, 111 | "id": "99a6a32f-b527-4669-b3f5-3eca1e87ddde", 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "+----+----------------+----------------+---------------+---+-------+--------------------+-----------+\n", 119 | "| id| name|graduate_program| spark_status| id| degree| department| school|\n", 120 | "+----+----------------+----------------+---------------+---+-------+--------------------+-----------+\n", 121 | "| 0| Bill Chambers| 0| [100]| 0|Masters|School of Informa...|UC Berkeley|\n", 122 | "| 1| Matei Zaharia| 1|[500, 250, 100]| 1| Ph.D.| EECS|UC Berkeley|\n", 123 | "| 2|Michael Armbrust| 1| [250, 100]| 1| Ph.D.| EECS|UC Berkeley|\n", 124 | "|NULL| NULL| NULL| NULL| 2|Masters| EECS|UC Berkeley|\n", 125 | "+----+----------------+----------------+---------------+---+-------+--------------------+-----------+\n", 126 | "\n" 127 | ] 128 | } 129 | ], 130 | "source": [ 131 | "joinType = \"outer\"\n", 132 | "person.join(graduateProgram, joinExpression, joinType).show()" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 10, 138 | "id": "57c0fbf2-40c1-4d4d-a280-96c2cbb7bb9c", 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "+---+-------+--------------------+-----------+----+----------------+----------------+---------------+\n", 146 | "| id| degree| department| school| id| name|graduate_program| spark_status|\n", 147 | "+---+-------+--------------------+-----------+----+----------------+----------------+---------------+\n", 148 | "| 0|Masters|School of Informa...|UC Berkeley| 0| Bill Chambers| 0| [100]|\n", 149 | "| 2|Masters| EECS|UC Berkeley|NULL| NULL| NULL| NULL|\n", 150 | "| 1| Ph.D.| EECS|UC Berkeley| 2|Michael Armbrust| 1| [250, 100]|\n", 151 | "| 1| Ph.D.| EECS|UC Berkeley| 1| Matei Zaharia| 1|[500, 250, 100]|\n", 152 | "+---+-------+--------------------+-----------+----+----------------+----------------+---------------+\n", 153 | "\n" 154 | ] 155 | } 156 | ], 157 | "source": [ 158 | "joinType = \"left_outer\"\n", 159 | "graduateProgram.join(person, joinExpression, joinType).show()" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 11, 165 | "id": "2a61d18b-a8bb-4a9f-80c9-35759fb1d517", 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "name": "stdout", 170 | "output_type": "stream", 171 | "text": [ 172 | "+----+----------------+----------------+---------------+---+-------+--------------------+-----------+\n", 173 | "| id| name|graduate_program| spark_status| id| degree| department| school|\n", 174 | "+----+----------------+----------------+---------------+---+-------+--------------------+-----------+\n", 175 | "| 0| Bill Chambers| 0| [100]| 0|Masters|School of Informa...|UC Berkeley|\n", 176 | "|NULL| NULL| NULL| NULL| 2|Masters| EECS|UC Berkeley|\n", 177 | "| 2|Michael Armbrust| 1| [250, 100]| 1| Ph.D.| EECS|UC Berkeley|\n", 178 | "| 1| Matei Zaharia| 1|[500, 250, 100]| 1| Ph.D.| EECS|UC Berkeley|\n", 179 | "+----+----------------+----------------+---------------+---+-------+--------------------+-----------+\n", 180 | "\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "joinType = \"right_outer\"\n", 186 | "person.join(graduateProgram, joinExpression, joinType).show()" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 12, 192 | "id": "2b628353-9d65-4eb9-830b-6169231e4b34", 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "name": "stdout", 197 | "output_type": "stream", 198 | "text": [ 199 | "+---+-------+--------------------+-----------+\n", 200 | "| id| degree| department| school|\n", 201 | "+---+-------+--------------------+-----------+\n", 202 | "| 0|Masters|School of Informa...|UC Berkeley|\n", 203 | "| 1| Ph.D.| EECS|UC Berkeley|\n", 204 | "+---+-------+--------------------+-----------+\n", 205 | "\n" 206 | ] 207 | } 208 | ], 209 | "source": [ 210 | "joinType = \"left_semi\"\n", 211 | "graduateProgram.join(person, joinExpression, joinType).show()" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 13, 217 | "id": "1b4b1692-fd2a-4b04-8048-199130867881", 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "name": "stdout", 222 | "output_type": "stream", 223 | "text": [ 224 | "+---+-------+----------+-----------+\n", 225 | "| id| degree|department| school|\n", 226 | "+---+-------+----------+-----------+\n", 227 | "| 2|Masters| EECS|UC Berkeley|\n", 228 | "+---+-------+----------+-----------+\n", 229 | "\n" 230 | ] 231 | } 232 | ], 233 | "source": [ 234 | "joinType = \"left_anti\"\n", 235 | "graduateProgram.join(person, joinExpression, joinType).show()" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 14, 241 | "id": "fb5b559e-a6f8-4b66-bfaa-8200fb963a44", 242 | "metadata": {}, 243 | "outputs": [ 244 | { 245 | "name": "stdout", 246 | "output_type": "stream", 247 | "text": [ 248 | "+---+-------+--------------------+-----------+---+----------------+----------------+---------------+\n", 249 | "| id| degree| department| school| id| name|graduate_program| spark_status|\n", 250 | "+---+-------+--------------------+-----------+---+----------------+----------------+---------------+\n", 251 | "| 0|Masters|School of Informa...|UC Berkeley| 0| Bill Chambers| 0| [100]|\n", 252 | "| 1| Ph.D.| EECS|UC Berkeley| 1| Matei Zaharia| 1|[500, 250, 100]|\n", 253 | "| 1| Ph.D.| EECS|UC Berkeley| 2|Michael Armbrust| 1| [250, 100]|\n", 254 | "+---+-------+--------------------+-----------+---+----------------+----------------+---------------+\n", 255 | "\n" 256 | ] 257 | } 258 | ], 259 | "source": [ 260 | "joinType = \"cross\"\n", 261 | "graduateProgram.join(person, joinExpression, joinType).show()" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 18, 267 | "id": "2f1faefb-d731-4638-a92a-85c942dce402", 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/html": [ 273 | "\n", 274 | "
\n", 275 | "

SparkSession - hive

\n", 276 | " \n", 277 | "
\n", 278 | "

SparkContext

\n", 279 | "\n", 280 | "

Spark UI

\n", 281 | "\n", 282 | "
\n", 283 | "
Version
\n", 284 | "
v3.5.0
\n", 285 | "
Master
\n", 286 | "
local[*]
\n", 287 | "
AppName
\n", 288 | "
PySparkShell
\n", 289 | "
\n", 290 | "
\n", 291 | " \n", 292 | "
\n", 293 | " " 294 | ], 295 | "text/plain": [ 296 | "" 297 | ] 298 | }, 299 | "execution_count": 18, 300 | "metadata": {}, 301 | "output_type": "execute_result" 302 | } 303 | ], 304 | "source": [ 305 | "spark" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "id": "97f654ee-6539-4b39-a777-f05add409e3d", 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [] 315 | } 316 | ], 317 | "metadata": { 318 | "kernelspec": { 319 | "display_name": "Python 3 (ipykernel)", 320 | "language": "python", 321 | "name": "python3" 322 | }, 323 | "language_info": { 324 | "codemirror_mode": { 325 | "name": "ipython", 326 | "version": 3 327 | }, 328 | "file_extension": ".py", 329 | "mimetype": "text/x-python", 330 | "name": "python", 331 | "nbconvert_exporter": "python", 332 | "pygments_lexer": "ipython3", 333 | "version": "3.12.1" 334 | } 335 | }, 336 | "nbformat": 4, 337 | "nbformat_minor": 5 338 | } 339 | -------------------------------------------------------------------------------- /Project 2 - ECommerce Data Analysis Azure Data Engineering/Gold Layer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 0, 6 | "metadata": { 7 | "application/vnd.databricks.v1+cell": { 8 | "cellMetadata": { 9 | "byteLimit": 2048000, 10 | "rowLimit": 10000 11 | }, 12 | "inputWidgets": {}, 13 | "nuid": "b1ba9d5d-0019-47e1-947f-e55becb33dd1", 14 | "showTitle": false, 15 | "title": "" 16 | } 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "from pyspark.sql import SparkSession\n", 21 | "from pyspark.sql.functions import sum, avg, col,monotonically_increasing_id \n", 22 | "\n", 23 | "spark = SparkSession.builder.appName(\"GoldLayerCreation\").getOrCreate()\n", 24 | "\n", 25 | "# Read the necessary Silver tables\n", 26 | "silver_sellers = spark.read.format(\"delta\").load(\"/mnt/delta/tables/silver/sellers\")\n", 27 | "silver_buyers = spark.read.format(\"delta\").load(\"/mnt/delta/tables/silver/buyers\")\n", 28 | "silver_users = spark.read.format(\"delta\").load(\"/mnt/delta/tables/silver/users\")\n", 29 | "silver_countries = spark.read.format(\"delta\").load(\"/mnt/delta/tables/silver/countries\")\n" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 0, 35 | "metadata": { 36 | "application/vnd.databricks.v1+cell": { 37 | "cellMetadata": { 38 | "byteLimit": 2048000, 39 | "rowLimit": 10000 40 | }, 41 | "inputWidgets": {}, 42 | "nuid": "9d4fea28-f7ae-4860-8c97-5f67389b54d0", 43 | "showTitle": false, 44 | "title": "" 45 | } 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "# Perform the join operations\n", 50 | "comprehensive_user_table = silver_users \\\n", 51 | " .join(silver_countries, [\"country\"], \"outer\") \\\n", 52 | " .join(silver_buyers, [\"country\"], \"outer\") \\\n", 53 | " .join(silver_sellers, [\"country\"], \"outer\")\n", 54 | "\n", 55 | "# Select and alias columns from each dataframe to ensure uniqueness\n", 56 | "comprehensive_user_table = comprehensive_user_table.select(\n", 57 | " silver_users[\"country\"].alias(\"Country\"),\n", 58 | " # From silver_users\n", 59 | " silver_users[\"productsSold\"].alias(\"Users_productsSold\"),\n", 60 | " silver_users[\"productsWished\"].alias(\"Users_productsWished\"),\n", 61 | " silver_users[\"account_age_years\"].alias(\"Users_account_age_years\"),\n", 62 | " silver_users[\"account_age_group\"].alias(\"Users_account_age_group\"),\n", 63 | " silver_users[\"hasanyapp\"].alias(\"Users_hasanyapp\"),\n", 64 | " silver_users[\"socialnbfollowers\"].alias(\"Users_socialnbfollowers\"),\n", 65 | " silver_users[\"flag_long_title\"].alias(\"Users_flag_long_title\"),\n", 66 | " # Continue with other silver_users columns as needed...\n", 67 | " \n", 68 | " # From silver_countries\n", 69 | " silver_countries[\"sellers\"].alias(\"Countries_Sellers\"),\n", 70 | " silver_countries[\"topsellers\"].alias(\"Countries_TopSellers\"),\n", 71 | " silver_countries[\"femalesellers\"].alias(\"Countries_FemaleSellers\"),\n", 72 | " silver_countries[\"malesellers\"].alias(\"Countries_MaleSellers\"),\n", 73 | " silver_countries[\"topfemalesellers\"].alias(\"Countries_TopFemaleSellers\"),\n", 74 | " silver_countries[\"topmalesellers\"].alias(\"Countries_TopMaleSellers\"),\n", 75 | " # Continue with other silver_countries columns as needed...\n", 76 | " \n", 77 | " # From silver_buyers\n", 78 | " silver_buyers[\"buyers\"].alias(\"Buyers_Total\"),\n", 79 | " silver_buyers[\"topbuyers\"].alias(\"Buyers_Top\"),\n", 80 | " silver_buyers[\"femalebuyers\"].alias(\"Buyers_Female\"),\n", 81 | " silver_buyers[\"malebuyers\"].alias(\"Buyers_Male\"),\n", 82 | " silver_buyers[\"topfemalebuyers\"].alias(\"Buyers_TopFemale\"),\n", 83 | " silver_buyers[\"topmalebuyers\"].alias(\"Buyers_TopMale\"),\n", 84 | " # Continue with other silver_buyers columns as needed...\n", 85 | " \n", 86 | " # From silver_sellers\n", 87 | " silver_sellers[\"nbsellers\"].alias(\"Sellers_Total\"),\n", 88 | " silver_sellers[\"sex\"].alias(\"Sellers_Sex\"),\n", 89 | " silver_sellers[\"meanproductssold\"].alias(\"Sellers_MeanProductsSold\"),\n", 90 | " silver_sellers[\"meanproductslisted\"].alias(\"Sellers_MeanProductsListed\"),\n", 91 | " # Continue with other silver_sellers columns as needed...\n", 92 | ")\n" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 0, 98 | "metadata": { 99 | "application/vnd.databricks.v1+cell": { 100 | "cellMetadata": { 101 | "byteLimit": 2048000, 102 | "rowLimit": 10000 103 | }, 104 | "inputWidgets": {}, 105 | "nuid": "37ff2030-47ef-463e-abaf-f13bcad70e2d", 106 | "showTitle": false, 107 | "title": "" 108 | } 109 | }, 110 | "outputs": [ 111 | { 112 | "output_type": "stream", 113 | "name": "stdout", 114 | "output_type": "stream", 115 | "text": [ 116 | "+---------+------------------+--------------------+-----------------------+-----------------------+---------------+-----------------------+---------------------+-----------------+--------------------+-----------------------+---------------------+--------------------------+------------------------+------------+----------+-------------+-----------+----------------+--------------+-------------+-----------+------------------------+--------------------------+\n| Country|Users_productsSold|Users_productsWished|Users_account_age_years|Users_account_age_group|Users_hasanyapp|Users_socialnbfollowers|Users_flag_long_title|Countries_Sellers|Countries_TopSellers|Countries_FemaleSellers|Countries_MaleSellers|Countries_TopFemaleSellers|Countries_TopMaleSellers|Buyers_Total|Buyers_Top|Buyers_Female|Buyers_Male|Buyers_TopFemale|Buyers_TopMale|Sellers_Total|Sellers_Sex|Sellers_MeanProductsSold|Sellers_MeanProductsListed|\n+---------+------------------+--------------------+-----------------------+-----------------------+---------------+-----------------------+---------------------+-----------------+--------------------+-----------------------+---------------------+--------------------------+------------------------+------------+----------+-------------+-----------+----------------+--------------+-------------+-----------+------------------------+--------------------------+\n|Singapour| 0| 2| 8.78| Experienced| false| 4| false| NULL| NULL| NULL| NULL| NULL| NULL| 16| 1| 12| 4| 0| 1| 5| FEMALE| 2.20| 1.60|\n|Singapour| 0| 0| 8.78| Experienced| false| 3| false| NULL| NULL| NULL| NULL| NULL| NULL| 16| 1| 12| 4| 0| 1| 5| FEMALE| 2.20| 1.60|\n|Singapour| 0| 0| 8.78| Experienced| false| 3| false| NULL| NULL| NULL| NULL| NULL| NULL| 16| 1| 12| 4| 0| 1| 5| FEMALE| 2.20| 1.60|\n|Singapour| 0| 0| 8.78| Experienced| false| 3| false| NULL| NULL| NULL| NULL| NULL| NULL| 16| 1| 12| 4| 0| 1| 5| FEMALE| 2.20| 1.60|\n|Singapour| 0| 0| 8.78| Experienced| false| 3| false| NULL| NULL| NULL| NULL| NULL| NULL| 16| 1| 12| 4| 0| 1| 5| FEMALE| 2.20| 1.60|\n+---------+------------------+--------------------+-----------------------+-----------------------+---------------+-----------------------+---------------------+-----------------+--------------------+-----------------------+---------------------+--------------------------+------------------------+------------+----------+-------------+-----------+----------------+--------------+-------------+-----------+------------------------+--------------------------+\nonly showing top 5 rows\n\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "comprehensive_user_table.show(5)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 0, 127 | "metadata": { 128 | "application/vnd.databricks.v1+cell": { 129 | "cellMetadata": { 130 | "byteLimit": 2048000, 131 | "rowLimit": 10000 132 | }, 133 | "inputWidgets": {}, 134 | "nuid": "3cc23c56-bd18-484e-b57c-940d7a6fd61d", 135 | "showTitle": false, 136 | "title": "" 137 | } 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "comprehensive_user_table.write.format(\"delta\").mode(\"overwrite\").save(\"/mnt/delta/tables/gold/ecom_one_big_table\")\n" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 0, 147 | "metadata": { 148 | "application/vnd.databricks.v1+cell": { 149 | "cellMetadata": { 150 | "byteLimit": 2048000, 151 | "rowLimit": 10000 152 | }, 153 | "inputWidgets": {}, 154 | "nuid": "f5d68f5d-85e5-4d8c-aa9f-37167002fb05", 155 | "showTitle": false, 156 | "title": "" 157 | } 158 | }, 159 | "outputs": [ 160 | { 161 | "output_type": "display_data", 162 | "data": { 163 | "text/plain": [ 164 | "\u001B[0;36m File \u001B[0;32m, line 1\u001B[0;36m\u001B[0m\n", 165 | "\u001B[0;31m CREATE OR REPALCE TABLE\u001B[0m\n", 166 | "\u001B[0m ^\u001B[0m\n", 167 | "\u001B[0;31mSyntaxError\u001B[0m\u001B[0;31m:\u001B[0m invalid syntax\n" 168 | ] 169 | }, 170 | "metadata": { 171 | "application/vnd.databricks.v1+output": { 172 | "arguments": {}, 173 | "data": "\u001B[0;36m File \u001B[0;32m, line 1\u001B[0;36m\u001B[0m\n\u001B[0;31m CREATE OR REPALCE TABLE\u001B[0m\n\u001B[0m ^\u001B[0m\n\u001B[0;31mSyntaxError\u001B[0m\u001B[0;31m:\u001B[0m invalid syntax\n", 174 | "errorSummary": "SyntaxError: invalid syntax (command-1873744497818564-122763488, line 1)", 175 | "errorTraceType": "ansi", 176 | "metadata": {}, 177 | "type": "ipynbError" 178 | } 179 | }, 180 | "output_type": "display_data" 181 | } 182 | ], 183 | "source": [] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 0, 188 | "metadata": { 189 | "application/vnd.databricks.v1+cell": { 190 | "cellMetadata": {}, 191 | "inputWidgets": {}, 192 | "nuid": "54698b78-8574-484d-843a-755d5593b652", 193 | "showTitle": false, 194 | "title": "" 195 | } 196 | }, 197 | "outputs": [], 198 | "source": [ 199 | "DE" 200 | ] 201 | } 202 | ], 203 | "metadata": { 204 | "application/vnd.databricks.v1+notebook": { 205 | "dashboards": [], 206 | "language": "python", 207 | "notebookMetadata": { 208 | "pythonIndentUnit": 4 209 | }, 210 | "notebookName": "Gold Layer", 211 | "widgets": {} 212 | } 213 | }, 214 | "nbformat": 4, 215 | "nbformat_minor": 0 216 | } 217 | -------------------------------------------------------------------------------- /Project 2 - ECommerce Data Analysis Azure Data Engineering/data/Buyers-repartition-by-country.csv: -------------------------------------------------------------------------------- 1 | country,buyers,topbuyers,topbuyerratio,femalebuyers,malebuyers,topfemalebuyers,topmalebuyers,femalebuyersratio,topfemalebuyersratio,boughtperwishlistratio,boughtperlikeratio,topboughtperwishlistratio,topboughtperlikeratio,totalproductsbought,totalproductswished,totalproductsliked,toptotalproductsbought,toptotalproductswished,toptotalproductsliked,meanproductsbought,meanproductswished,meanproductsliked,topmeanproductsbought,topmeanproductswished,topmeanproductsliked,meanofflinedays,topmeanofflinedays,meanfollowers,meanfollowing,topmeanfollowers,topmeanfollowing 2 | France,1251,53,4.2,851,400,33,20,68.0,62.3,26.08,14.15,36.37,23.21,3573,13700,25249,1487,4089,6406,2.9,11.0,20.2,28.1,77.2,120.9,321.8,75.0,4.8,8.8,10.1,11.0 3 | Royaume-Uni,792,38,4.8,560,232,30,8,70.7,78.9,14.97,9.3,13.93,11.59,2174,14524,23378,853,6124,7362,2.7,18.3,29.5,22.4,161.2,193.7,298.3,68.2,5.4,8.8,11.1,12.2 4 | Etats-Unis,912,31,3.4,700,212,21,10,76.8,67.7,21.84,7.92,23.35,12.44,2370,10852,29917,864,3701,6946,2.6,11.9,32.8,27.9,119.4,224.1,359.3,73.0,4.9,9.7,11.7,27.9 5 | Allemagne,578,29,5.0,409,169,21,8,70.8,72.4,13.91,6.88,10.13,4.85,1635,11758,23761,516,5096,10648,2.8,20.3,41.1,17.8,175.7,367.2,296.6,39.3,5.1,8.6,11.2,11.3 6 | Italie,400,21,5.3,283,117,14,7,70.8,66.7,12.56,8.6,11.86,23.91,1221,9720,14200,519,4376,2171,3.1,24.3,35.5,24.7,208.4,103.4,247.9,40.9,6.4,11.6,14.3,15.4 7 | Espagne,255,21,8.2,189,66,18,3,74.1,85.7,12.98,6.2,11.94,6.21,1028,7918,16579,558,4673,8984,4.0,31.1,65.0,26.6,222.5,427.8,271.1,39.4,7.0,9.5,17.6,11.6 8 | Pays-Bas,144,15,10.4,118,26,10,5,81.9,66.7,19.69,8.73,27.21,11.81,537,2727,6154,295,1084,2498,3.7,18.9,42.7,19.7,72.3,166.5,191.6,14.3,6.3,9.0,17.7,10.5 9 | Suède,151,11,7.3,113,38,7,4,74.8,63.6,11.35,5.52,12.57,5.02,566,4987,10256,234,1862,4658,3.7,33.0,67.9,21.3,169.3,423.5,189.1,86.0,7.4,9.1,20.2,9.5 10 | Finlande,64,10,15.6,53,11,9,1,82.8,90.0,35.33,9.29,42.23,24.26,331,937,3562,212,502,874,5.2,14.6,55.7,21.2,50.2,87.4,184.9,121.1,6.7,9.2,10.3,12.5 11 | Danemark,157,9,5.7,127,30,9,0,80.9,100.0,12.89,1.66,6.85,4.03,438,3399,26413,141,2059,3499,2.8,21.6,168.2,15.7,228.8,388.8,156.0,19.3,5.6,8.6,11.7,9.7 12 | Australie,126,9,7.1,92,34,8,1,73.0,88.9,27.75,5.17,39.66,4.83,398,1434,7692,184,464,3812,3.2,11.4,61.0,20.4,51.6,423.6,273.7,102.4,6.3,8.9,11.6,10.6 13 | Belgique,90,7,7.8,73,17,7,0,81.1,100.0,23.67,37.47,23.82,439.06,718,3033,1916,562,2359,128,8.0,33.7,21.3,80.3,337.0,18.3,220.9,16.0,5.5,8.5,8.7,8.4 14 | Autriche,49,6,12.2,40,9,5,1,81.6,83.3,14.14,8.37,16.84,9.53,242,1712,2893,144,855,1511,4.9,34.9,59.0,24.0,142.5,251.8,295.7,19.7,6.0,8.8,9.7,10.0 15 | Bulgarie,9,4,44.4,7,2,3,1,77.8,75.0,3.29,14.26,3.0,13.84,95,2891,666,84,2803,607,10.6,321.2,74.0,21.0,700.8,151.8,82.2,37.0,20.8,32.7,39.5,62.8 16 | Canada,65,3,4.6,45,20,3,0,69.2,100.0,8.51,2.89,40.94,13.1,155,1822,5367,52,127,397,2.4,28.0,82.6,17.3,42.3,132.3,325.6,51.0,6.1,8.6,16.3,12.0 17 | Roumanie,28,3,10.7,22,6,2,1,78.6,66.7,14.22,6.84,15.19,7.44,162,1139,2369,94,619,1263,5.8,40.7,84.6,31.3,206.3,421.0,122.1,16.3,6.0,8.4,12.3,8.0 18 | Hong Kong,28,3,10.7,22,6,2,1,78.6,66.7,6.75,1.61,9.21,1.06,112,1660,6960,64,695,6063,4.0,59.3,248.6,21.3,231.7,2021.0,157.5,68.3,9.4,12.4,14.3,11.0 19 | Luxembourg,13,3,23.1,10,3,3,0,76.9,100.0,32.73,35.14,30.94,46.04,162,495,461,151,488,328,12.5,38.1,35.5,50.3,162.7,109.3,229.4,11.3,9.6,8.3,26.0,8.7 20 | Portugal,18,2,11.1,12,6,1,1,66.7,50.0,1.83,3.42,18.07,4.48,72,3931,2108,30,166,670,4.0,218.4,117.1,15.0,83.0,335.0,159.9,11.0,9.2,15.8,21.0,11.5 21 | Chine,13,2,15.4,9,4,2,0,69.2,100.0,292.11,18.78,1300.0,20.68,111,38,591,91,7,440,8.5,2.9,45.5,45.5,3.5,220.0,315.9,189.0,4.7,8.5,6.0,10.0 22 | Hongrie,7,2,28.6,7,0,2,0,100.0,100.0,23.08,8.43,79.41,7.52,42,182,498,27,34,359,6.0,26.0,71.1,13.5,17.0,179.5,83.7,13.0,3.9,8.0,5.0,8.0 23 | Slovénie,2,2,100.0,2,0,2,0,100.0,100.0,6.87,46.99,6.87,46.99,39,568,83,39,568,83,19.5,284.0,41.5,19.5,284.0,41.5,27.0,27.0,22.5,80.0,22.5,80.0 24 | Suisse,58,1,1.7,45,13,1,0,77.6,100.0,8.46,12.63,1.37,10.38,137,1620,1085,11,803,106,2.4,27.9,18.7,11.0,803.0,106.0,249.2,11.0,5.3,9.0,9.0,27.0 25 | Irlande,31,1,3.2,23,8,1,0,74.2,100.0,7.29,5.97,14.63,5.77,79,1084,1324,18,123,312,2.5,35.0,42.7,18.0,123.0,312.0,253.2,14.0,5.9,8.8,8.0,8.0 26 | Pologne,29,1,3.4,22,7,1,0,75.9,100.0,33.26,21.1,362.5,59.18,150,451,711,87,24,147,5.2,15.6,24.5,87.0,24.0,147.0,224.2,11.0,5.4,8.8,13.0,9.0 27 | Grèce,20,1,5.0,18,2,1,0,90.0,100.0,16.21,5.81,13.08,6.33,53,327,913,14,107,221,2.7,16.4,45.7,14.0,107.0,221.0,135.3,23.0,8.9,9.3,9.0,17.0 28 | Japon,17,1,5.9,8,9,0,1,47.1,0.0,93.06,7.41,,460.0,67,72,904,23,0,5,3.9,4.2,53.2,23.0,0.0,5.0,278.1,26.0,6.5,9.0,4.0,8.0 29 | Singapour,16,1,6.3,12,4,0,1,75.0,0.0,7.84,3.17,1800.0,1800.0,75,957,2369,36,2,2,4.7,59.8,148.1,36.0,2.0,2.0,191.9,56.0,8.3,12.4,10.0,8.0 30 | Croatie,9,1,11.1,7,2,1,0,77.8,100.0,37.86,24.84,35.71,52.63,39,103,157,20,56,38,4.3,11.4,17.4,20.0,56.0,38.0,174.0,14.0,5.9,8.4,5.0,9.0 31 | Émirats arabes unis,2,1,50.0,2,0,1,0,100.0,100.0,9.09,1.64,14.81,1.59,13,143,795,12,81,755,6.5,71.5,397.5,12.0,81.0,755.0,103.0,92.0,6.5,9.5,8.0,11.0 32 | Macau,2,1,50.0,2,0,1,0,100.0,100.0,14.18,3.54,22.78,3.77,19,134,536,18,79,477,9.5,67.0,268.0,18.0,79.0,477.0,132.0,11.0,9.5,11.5,9.0,8.0 33 | Ukraine,1,1,100.0,1,0,1,0,100.0,100.0,2.55,2.27,2.55,2.27,18,707,792,18,707,792,18.0,707.0,792.0,18.0,707.0,792.0,11.0,11.0,65.0,157.0,65.0,157.0 34 | Cambodge,1,1,100.0,0,1,0,1,0.0,0.0,162.5,243.75,162.5,243.75,39,24,16,39,24,16,39.0,24.0,16.0,39.0,24.0,16.0,22.0,22.0,9.0,9.0,9.0,9.0 35 | Norvège,8,0,0.0,8,0,0,0,100.0,,8.03,9.24,,,11,137,119,0,0,0,1.4,17.1,14.9,,,,242.6,,5.3,8.4,, 36 | Estonie,7,0,0.0,5,2,0,0,71.4,,45.0,25.71,,,9,20,35,0,0,0,1.3,2.9,5.0,,,,351.4,,6.4,8.3,, 37 | Corée du Sud,6,0,0.0,5,1,0,0,83.3,,34.38,5.37,,,11,32,205,0,0,0,1.8,5.3,34.2,,,,238.8,,4.3,8.8,, 38 | République tchèque,6,0,0.0,4,2,0,0,66.7,,19.51,88.89,,,8,41,9,0,0,0,1.3,6.8,1.5,,,,291.8,,4.0,8.2,, 39 | Chypre,5,0,0.0,5,0,0,0,100.0,,5.26,2.47,,,9,171,364,0,0,0,1.8,34.2,72.8,,,,243.0,,5.4,10.6,, 40 | Russie,5,0,0.0,4,1,0,0,80.0,,266.67,25.81,,,8,3,31,0,0,0,1.6,0.6,6.2,,,,285.2,,4.4,8.2,, 41 | Lituanie,4,0,0.0,4,0,0,0,100.0,,5.56,1.11,,,4,72,359,0,0,0,1.0,18.0,89.8,,,,185.8,,9.5,9.5,, 42 | Turquie,3,0,0.0,1,2,0,0,33.3,,80.0,28.57,,,4,5,14,0,0,0,1.3,1.7,4.7,,,,423.0,,3.7,7.7,, 43 | Brésil,3,0,0.0,2,1,0,0,66.7,,14.81,36.36,,,4,27,11,0,0,0,1.3,9.0,3.7,,,,118.7,,5.7,8.3,, 44 | Malaisie,3,0,0.0,3,0,0,0,100.0,,38.46,83.33,,,5,13,6,0,0,0,1.7,4.3,2.0,,,,167.7,,3.3,8.0,, 45 | Israel,3,0,0.0,3,0,0,0,100.0,,18.6,5.56,,,8,43,144,0,0,0,2.7,14.3,48.0,,,,170.7,,3.0,8.0,, 46 | Taiwan,3,0,0.0,2,1,0,0,66.7,,29.03,45.0,,,9,31,20,0,0,0,3.0,10.3,6.7,,,,286.3,,3.7,8.3,, 47 | Islande,2,0,0.0,1,1,0,0,50.0,,9.09,9.52,,,2,22,21,0,0,0,1.0,11.0,10.5,,,,21.0,,5.0,8.0,, 48 | Guadeloupe,2,0,0.0,1,1,0,0,50.0,,33.33,300.0,,,3,9,1,0,0,0,1.5,4.5,0.5,,,,275.0,,4.5,9.0,, 49 | Arabie Saoudite,2,0,0.0,2,0,0,0,100.0,,200.0,8.0,,,2,1,25,0,0,0,1.0,0.5,12.5,,,,32.0,,4.5,8.0,, 50 | Egypte,2,0,0.0,1,1,0,0,50.0,,33.33,4.17,,,3,9,72,0,0,0,1.5,4.5,36.0,,,,33.0,,6.0,9.5,, 51 | Indonésie,2,0,0.0,1,1,0,0,50.0,,,9.09,,,2,0,22,0,0,0,1.0,0.0,11.0,,,,331.5,,5.0,9.5,, 52 | Monaco,2,0,0.0,1,1,0,0,50.0,,80.0,30.77,,,4,5,13,0,0,0,2.0,2.5,6.5,,,,332.5,,4.0,8.0,, 53 | Puerto Rico,2,0,0.0,1,1,0,0,50.0,,3.57,2.61,,,3,84,115,0,0,0,1.5,42.0,57.5,,,,359.0,,4.0,8.0,, 54 | Slovaquie,2,0,0.0,2,0,0,0,100.0,,,,,,2,0,0,0,0,0,1.0,0.0,0.0,,,,459.0,,6.5,9.0,, 55 | Colombie,1,0,0.0,1,0,0,0,100.0,,20.0,1.04,,,1,5,96,0,0,0,1.0,5.0,96.0,,,,12.0,,4.0,10.0,, 56 | Serbie,1,0,0.0,1,0,0,0,100.0,,100.0,5.26,,,1,1,19,0,0,0,1.0,1.0,19.0,,,,13.0,,3.0,8.0,, 57 | Bahreïn,1,0,0.0,1,0,0,0,100.0,,400.0,200.0,,,4,1,2,0,0,0,4.0,1.0,2.0,,,,21.0,,4.0,8.0,, 58 | Nouvelle Zélande,1,0,0.0,1,0,0,0,100.0,,300.0,18.37,,,9,3,49,0,0,0,9.0,3.0,49.0,,,,33.0,,6.0,8.0,, 59 | Malte,1,0,0.0,1,0,0,0,100.0,,,50.0,,,1,0,2,0,0,0,1.0,0.0,2.0,,,,366.0,,3.0,8.0,, 60 | Géorgie,1,0,0.0,1,0,0,0,100.0,,,150.0,,,3,0,2,0,0,0,3.0,0.0,2.0,,,,37.0,,3.0,8.0,, 61 | Pakistan,1,0,0.0,0,1,0,0,0.0,,100.0,100.0,,,1,1,1,0,0,0,1.0,1.0,1.0,,,,431.0,,4.0,9.0,, 62 | Mexique,1,0,0.0,1,0,0,0,100.0,,133.33,4.08,,,4,3,98,0,0,0,4.0,3.0,98.0,,,,61.0,,9.0,13.0,, 63 | Qatar,1,0,0.0,1,0,0,0,100.0,,,,,,1,0,0,0,0,0,1.0,0.0,0.0,,,,692.0,,3.0,8.0,, 64 | -------------------------------------------------------------------------------- /Project 2 - ECommerce Data Analysis Azure Data Engineering/data/Comparison-of-Sellers-by-Gender-and-Country.csv: -------------------------------------------------------------------------------- 1 | country,sex,nbsellers,meanproductssold,meanproductslisted,meansellerpassrate,totalproductssold,totalproductslisted,meanproductsbought,meanproductswished,meanproductsliked,totalbought,totalwished,totalproductsliked,meanfollowers,meanfollows,percentofappusers,percentofiosusers,meanseniority 2 | Allemagne,Female,116,4.03,2.72,27.33,468,315,3.05,34.66,35.28,354,4021,4092,9.5,8.9,54.0,49.0,3060.3362068965516 3 | Allemagne,Male,34,2.0,1.0,19.15,68,34,1.62,3.38,31.79,55,115,1081,7.8,8.4,79.0,64.0,3089.0588235294117 4 | Arménie,Female,1,0.0,25.0,0.0,0,25,0.0,0.0,1.0,0,0,1,4.0,8.0,,,3201.0 5 | Australie,Female,18,0.94,1.33,10.44,17,24,6.11,17.72,209.28,110,319,3767,7.5,9.3,55.0,55.0,3103.6666666666665 6 | Australie,Male,3,6.0,4.0,33.33,18,12,8.0,24.0,38.33,24,72,115,12.7,8.3,66.0,66.0,3085.6666666666665 7 | Autriche,Female,18,3.28,2.5,38.67,59,45,3.39,31.94,33.0,61,575,594,6.9,8.4,61.0,50.0,3048.222222222222 8 | Autriche,Male,5,1.6,0.2,35.0,8,1,0.8,5.0,0.8,4,25,4,6.2,8.0,60.0,60.0,3133.0 9 | Bahamas,Female,1,1.0,0.0,0.0,1,0,0.0,0.0,0.0,0,0,0,4.0,8.0,,,2857.0 10 | Belgique,Female,37,2.41,1.89,29.59,89,70,12.03,57.24,20.11,445,2118,744,8.3,8.5,62.0,48.0,3041.0810810810813 11 | Belgique,Male,8,3.13,1.75,26.13,25,14,0.63,1.13,1.25,5,9,10,8.4,9.3,62.0,37.0,3069.625 12 | Bulgarie,Female,7,22.29,11.71,71.71,156,82,4.86,124.43,120.29,34,871,842,29.1,14.4,71.0,57.0,3150.1428571428573 13 | Bulgarie,Male,3,4.67,0.33,54.0,14,1,14.0,633.67,11.0,42,1901,33,19.7,63.7,100.0,100.0,3082.6666666666665 14 | Canada,Female,3,2.33,0.33,25.0,7,1,0.0,1.33,6.33,0,4,19,5.7,8.7,33.0,33.0,3085.6666666666665 15 | Chine,Female,3,0.67,1.0,33.33,2,3,24.67,1.67,133.33,74,5,400,11.3,9.7,66.0,66.0,3085.3333333333335 16 | Chypre,Female,5,11.2,16.4,55.4,56,82,0.0,4.2,99.0,0,21,495,19.2,10.0,60.0,40.0,3130.6 17 | Chypre,Male,1,0.0,1.0,0.0,0,1,0.0,0.0,49.0,0,0,49,8.0,8.0,100.0,100.0,3199.0 18 | Croatie,Female,10,10.7,6.9,32.6,107,69,0.8,4.7,21.4,8,47,214,15.1,8.8,60.0,40.0,3061.7 19 | Danemark,Female,22,1.64,1.0,6.5,36,22,3.5,90.32,175.41,77,1987,3859,8.9,9.5,59.0,59.0,3043.909090909091 20 | Danemark,Male,6,2.33,1.17,40.17,14,7,1.33,3.17,18.83,8,19,113,6.0,8.0,66.0,66.0,3141.8333333333335 21 | Espagne,Female,133,5.91,3.86,29.52,786,514,2.98,37.66,104.8,397,5009,13938,13.7,14.1,66.0,49.0,3075.533834586466 22 | Espagne,Male,30,6.8,5.1,36.01,204,153,0.8,4.5,15.23,24,135,457,11.0,8.1,80.0,63.0,3108.0666666666666 23 | Estonie,Female,3,1.0,3.33,0.0,3,10,0.33,0.33,1.0,1,1,3,11.3,8.3,66.0,33.0,3084.6666666666665 24 | Estonie,Male,1,1.0,0.0,0.0,1,0,0.0,0.0,0.0,0,0,0,6.0,8.0,100.0,100.0,3198.0 25 | Etats-Unis,Female,182,4.49,4.5,25.4,817,819,3.25,18.03,46.79,592,3282,8516,11.6,12.5,52.0,50.0,3033.2252747252746 26 | Etats-Unis,Male,41,3.66,3.49,30.31,150,143,0.88,3.34,8.12,36,137,333,13.0,9.0,60.0,58.0,3065.7804878048782 27 | Finlande,Female,8,2.5,1.25,32.13,20,10,12.63,31.13,23.75,101,249,190,7.6,9.8,75.0,75.0,3157.0 28 | France,Female,733,3.23,2.39,25.29,2365,1749,1.03,6.25,22.01,757,4583,16133,8.0,22.4,56.0,46.0,3061.4529331514323 29 | France,Male,233,2.79,2.12,25.89,649,493,2.26,4.14,19.57,526,965,4559,6.9,24.7,59.0,49.0,3066.8197424892705 30 | Grèce,Female,14,2.14,5.57,36.36,30,78,1.29,5.79,42.07,18,81,589,13.4,9.5,64.0,50.0,3053.214285714286 31 | Grèce,Male,2,0.5,1.5,0.0,1,3,0.0,0.5,3.0,0,1,6,7.5,8.0,50.0,50.0,3026.0 32 | Hong Kong,Female,14,2.5,4.43,13.29,35,62,3.5,61.21,461.79,49,857,6465,10.9,14.1,57.0,50.0,3026.5 33 | Hong Kong,Male,3,5.0,1.33,43.0,15,4,7.0,90.33,35.33,21,271,106,9.0,8.3,100.0,100.0,3087.3333333333335 34 | Hongrie,Female,1,0.0,3.0,0.0,0,3,8.0,80.0,97.0,8,80,97,3.0,8.0,,,2856.0 35 | Irlande,Female,8,2.0,1.13,28.25,16,9,3.0,18.88,50.38,24,151,403,7.1,8.3,37.0,12.0,3157.5 36 | Islande,Female,1,1.0,2.0,0.0,1,2,1.0,15.0,15.0,1,15,15,6.0,8.0,,,3198.0 37 | Islande,Male,1,1.0,1.0,0.0,1,1,1.0,7.0,6.0,1,7,6,4.0,8.0,,,2858.0 38 | Italie,Female,337,5.53,5.33,37.65,1865,1796,0.83,19.85,185.0,281,6689,62346,13.7,55.3,72.0,59.0,3061.949554896142 39 | Italie,Male,118,8.09,5.13,42.22,955,605,1.22,6.22,23.53,144,734,2777,12.7,11.1,72.0,61.0,3089.8050847457625 40 | Japon,Female,3,0.33,4.33,0.0,1,13,3.33,16.33,178.33,10,49,535,20.0,12.3,66.0,66.0,3087.6666666666665 41 | Kazakhstan,Female,1,1.0,0.0,0.0,1,0,0.0,0.0,1.0,0,0,1,5.0,9.0,100.0,100.0,2859.0 42 | Lettonie,Female,4,20.75,9.0,45.0,83,36,0.0,4.75,27.75,0,19,111,21.0,52.3,75.0,75.0,3200.5 43 | Lituanie,Female,3,1.33,2.67,0.0,4,8,1.0,24.0,115.0,3,72,345,10.3,10.0,66.0,66.0,3197.6666666666665 44 | Luxembourg,Female,8,5.25,0.75,36.25,42,6,17.13,19.63,58.13,137,157,465,14.4,8.4,75.0,50.0,3113.5 45 | Luxembourg,Male,3,0.33,2.33,0.0,1,7,0.0,0.0,0.33,0,0,1,3.3,8.0,100.0,100.0,2856.3333333333335 46 | Malaisie,Female,1,0.0,2.0,0.0,0,2,0.0,7.0,83.0,0,7,83,17.0,18.0,100.0,,3201.0 47 | Martinique,Female,1,0.0,1.0,0.0,0,1,0.0,0.0,0.0,0,0,0,4.0,9.0,100.0,100.0,3204.0 48 | Monaco,Female,6,31.67,4.5,60.83,190,27,0.0,0.17,1.67,0,1,10,34.7,8.0,33.0,33.0,2970.5 49 | Pays-Bas,Female,53,2.64,2.02,31.28,140,107,2.94,30.53,62.11,156,1618,3292,9.8,9.0,69.0,62.0,3109.0377358490564 50 | Pays-Bas,Male,15,4.93,3.33,37.47,74,50,4.8,26.4,72.8,72,396,1092,11.1,10.5,66.0,60.0,3084.6 51 | Pologne,Female,4,2.0,2.25,25.0,8,9,2.5,18.0,31.5,10,72,126,8.0,12.3,50.0,50.0,2942.0 52 | Pologne,Male,2,7.5,0.5,74.5,15,1,0.5,0.0,0.5,1,0,1,11.0,8.0,50.0,50.0,2857.0 53 | Portugal,Female,14,2.14,2.86,37.21,30,40,1.79,32.93,50.07,25,461,701,9.2,9.4,64.0,57.0,3127.285714285714 54 | Portugal,Male,2,1.5,1.0,0.0,3,2,0.0,0.0,3.0,0,0,6,3.5,8.0,50.0,50.0,3196.5 55 | Roumanie,Female,13,3.38,4.15,18.62,44,54,1.08,5.0,31.77,14,65,413,7.6,8.2,69.0,53.0,3121.0 56 | Roumanie,Male,4,6.0,9.75,33.75,24,39,6.0,0.25,96.0,24,1,384,16.5,19.3,75.0,75.0,3113.25 57 | Royaume-Uni,Female,285,5.06,3.24,32.56,1442,922,1.47,8.62,22.69,420,2457,6466,9.1,8.8,49.0,48.0,3050.519298245614 58 | Royaume-Uni,Male,64,5.69,1.61,25.31,364,103,1.59,12.77,8.16,102,817,522,9.6,8.4,71.0,62.0,3070.765625 59 | Russie,Female,3,3.33,1.33,25.67,10,4,0.0,1.33,13.33,0,4,40,8.0,8.7,33.0,33.0,2972.0 60 | République tchèque,Female,1,0.0,1.0,0.0,0,1,0.0,219.0,615.0,0,219,615,7.0,10.0,,,2859.0 61 | Serbie,Female,2,2.5,1.5,50.0,5,3,0.0,0.0,28.0,0,0,56,15.0,8.5,100.0,100.0,3199.5 62 | Singapour,Female,5,2.2,1.6,70.0,11,8,3.0,185.4,266.6,15,927,1333,9.8,18.6,40.0,40.0,3128.8 63 | Slovaquie,Male,2,14.0,7.0,37.0,28,14,0.0,0.5,5.5,0,1,11,10.5,8.5,100.0,100.0,3199.5 64 | Slovénie,Female,1,6.0,0.0,50.0,6,0,23.0,49.0,39.0,23,49,39,34.0,147.0,100.0,100.0,3199.0 65 | Suisse,Female,11,3.55,19.45,32.3,39,214,1.27,15.09,8.45,14,166,93,8.9,8.3,54.0,54.0,3075.3636363636365 66 | Suisse,Male,5,1.2,0.0,0.0,6,0,0.0,0.2,1.0,0,1,5,9.2,8.2,20.0,20.0,2992.8 67 | Suède,Female,30,10.03,7.3,49.0,301,219,5.0,62.7,321.03,150,1881,9631,18.6,24.7,66.0,56.0,3028.5666666666666 68 | Suède,Male,6,5.83,3.0,23.6,35,18,2.33,18.0,17.17,14,108,103,16.8,10.3,83.0,83.0,3086.6666666666665 69 | Taiwan,Female,1,57.0,56.0,98.0,57,56,0.0,0.0,4.0,0,0,4,83.0,8.0,100.0,100.0,2855.0 70 | Ukraine,Female,1,3.0,2.0,0.0,3,2,18.0,707.0,792.0,18,707,792,65.0,157.0,100.0,100.0,3204.0 71 | Îles Canaries,Female,1,1.0,1.0,0.0,1,1,0.0,0.0,0.0,0,0,0,3.0,8.0,,,2857.0 72 | ,Female,2127,4.4,3.54,29.46,9354,7523,2.06,18.76,69.99,4387,39896,148873,10.4,22.1,59.0,50.0,3060.948754113775 73 | ,Male,592,4.52,2.88,29.88,2673,1706,1.86,9.65,19.89,1103,5715,11774,9.4,15.8,66.0,57.0,3076.1655405405404 74 | ,,2719,4.42,3.39,29.55,12027,9229,2.02,16.77,59.08,5490,45611,160647,10.2,20.7,60.0,52.0,3064.261860978301 75 | -------------------------------------------------------------------------------- /Project 2 - ECommerce Data Analysis Azure Data Engineering/data/Countries-with-Top-Sellers-(Fashion-C2C).csv: -------------------------------------------------------------------------------- 1 | country,sellers,topsellers,topsellerratio,femalesellersratio,topfemalesellersratio,femalesellers,malesellers,topfemalesellers,topmalesellers,countrysoldratio,bestsoldratio,toptotalproductssold,totalproductssold,toptotalproductslisted,totalproductslisted,topmeanproductssold,topmeanproductslisted,meanproductssold,meanproductslisted,meanofflinedays,topmeanofflinedays,meanfollowers,meanfollowing,topmeanfollowers,topmeanfollowing 2 | Taiwan,1,1,100.0,100.0,100.0,1,0,1,0,1.02,1.02,57,57,56,56,57.0,56.0,57.0,56.0,11.0,11.0,83.0,8.0,83.0,8.0 3 | Slovaquie,2,1,50.0,0.0,0.0,0,2,0,1,2.0,1.93,27,28,14,14,27.0,14.0,14.0,7.0,17.0,15.0,10.5,8.5,15.0,8.0 4 | Lettonie,4,2,50.0,100.0,100.0,4,0,2,0,2.31,2.25,81,83,36,36,40.5,18.0,20.75,9.0,120.3,11.5,21.0,52.3,38.0,98.5 5 | Bulgarie,9,4,44.4,66.7,100.0,6,3,4,0,2.07,2.1,145,170,69,82,36.25,17.25,18.888888888888888888888888,9.111111111111111111111111,98.3,19.0,28.6,31.6,46.3,19.0 6 | Chypre,4,1,25.0,100.0,100.0,4,0,1,0,0.69,0.62,41,56,66,81,41.0,66.0,14.0,20.25,17.3,11.0,21.3,10.3,39.0,17.0 7 | Monaco,5,1,20.0,100.0,100.0,5,0,1,0,7.31,8.95,170,190,19,26,170.0,19.0,38.0,5.2,51.6,12.0,39.6,8.0,167.0,8.0 8 | Roumanie,13,2,15.4,76.9,50.0,10,3,1,1,0.88,1.26,49,68,39,77,24.5,19.5,5.230769230769230769230769,5.923076923076923076923076,121.6,11.0,10.9,11.5,30.0,32.0 9 | Luxembourg,7,1,14.3,85.7,100.0,6,1,1,0,5.38,,30,43,0,8,30.0,0.0,6.142857142857142857142857,1.142857142857142857142857,73.6,11.0,15.9,8.4,52.0,3.0 10 | Espagne,119,13,10.9,81.5,76.9,97,22,10,3,1.67,2.02,607,990,301,594,46.692307692307692307692307,23.153846153846153846153846,8.319327731092436974789915,4.991596638655462184873949,202.4,30.5,16.1,14.6,53.2,14.5 11 | Italie,347,35,10.1,71.5,65.7,248,99,23,12,1.27,1.29,1389,2820,1077,2218,39.685714285714285714285714,30.771428571428571428571428,8.126801152737752161383285,6.391930835734870317002881,141.8,26.5,16.1,54.6,63.4,429.3 12 | Croatie,10,1,10.0,100.0,100.0,10,0,1,0,1.55,1.45,64,107,44,69,64.0,44.0,10.7,6.9,141.7,11.0,15.1,8.8,50.0,8.0 13 | Hong Kong,10,1,10.0,70.0,100.0,7,3,1,0,0.91,0.66,23,50,35,55,23.0,35.0,5.0,5.5,49.6,11.0,14.7,15.5,36.0,41.0 14 | Suède,32,3,9.4,81.3,66.7,26,6,2,1,1.45,1.29,202,336,156,231,67.333333333333333333333333,52.0,10.5,7.21875,118.2,12.7,19.8,24.0,102.7,172.0 15 | Royaume-Uni,289,21,7.3,80.3,85.7,232,57,18,3,1.95,2.16,972,1806,449,927,46.285714285714285714285714,21.380952380952380952380952,6.249134948096885813148788,3.207612456747404844290657,188.1,32.4,10.1,8.8,37.3,9.1 16 | Autriche,18,1,5.6,77.8,100.0,14,4,1,0,1.81,1.0,20,67,20,37,20.0,20.0,3.722222222222222222222222,2.055555555555555555555555,138.7,11.0,7.0,8.6,17.0,10.0 17 | Allemagne,105,5,4.8,75.2,80.0,79,26,4,1,2.03,2.85,282,536,99,264,56.4,19.8,5.104761904761904761904761,2.514285714285714285714285,168.2,79.4,10.7,8.8,53.2,10.6 18 | Etats-Unis,150,6,4.0,78.7,83.3,118,32,5,1,1.19,1.3,558,967,428,816,93.0,71.333333333333333333333333,6.446666666666666666666666,5.44,266.1,76.8,14.6,10.2,105.0,17.2 19 | Pays-Bas,54,2,3.7,77.8,50.0,42,12,1,1,1.54,1.74,47,214,27,139,23.5,13.5,3.962962962962962962962962,2.574074074074074074074074,177.4,22.0,11.4,9.6,21.5,10.0 20 | France,713,25,3.5,73.4,80.0,523,190,20,5,1.59,1.4,1109,3014,792,1892,44.36,31.68,4.227208976157082748948106,2.65357643758765778401122,227.1,36.6,8.9,28.3,41.3,156.2 21 | -------------------------------------------------------------------------------- /Project 2 - ECommerce Data Analysis Azure Data Engineering/data/chunk-user-data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "6898144a-ffb8-4bba-9e93-f06207396e80", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "pip install pandas" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "id": "78019320-f2ad-49ff-8f8e-1a78db6cfecd", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import pandas as pd" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 6, 26 | "id": "48629112-b6d7-4e38-b6d3-e55f9a279d35", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "df = pd.read_csv(\"users.6M0xxK.2024.public.csv\")" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 7, 36 | "id": "fa41cf7b-e062-47b8-a882-51bed8b4434b", 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/html": [ 42 | "
\n", 43 | "\n", 56 | "\n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | "
identifierHashtypecountryCodecountrylanguagesocialNbFollowerssocialNbFollowssocialProductsLikedproductsListedproductsSold...productsWishedproductsBoughtgendercivilityGenderIdcivilityTitlehasProfilePictureseniorityseniorityAsMonthsseniorityAsYearswebsiteLongevity
01920991140userusEtats-Unisen38000...00M1mrTrue3206106.878.913803
11920860068userseSuèdeen38100...00M1mrTrue3206106.878.913803
21920401316useritItaliefr38000...00M1mrTrue3206106.878.913803
31919811492userdkDanemarken7510196800...4315M1mrTrue3206106.878.913803
41919025060userdeAllemagnede38000...00M1mrTrue3206106.878.913803
\n", 206 | "

5 rows × 21 columns

\n", 207 | "
" 208 | ], 209 | "text/plain": [ 210 | " identifierHash type countryCode country language socialNbFollowers \\\n", 211 | "0 1920991140 user us Etats-Unis en 3 \n", 212 | "1 1920860068 user se Suède en 3 \n", 213 | "2 1920401316 user it Italie fr 3 \n", 214 | "3 1919811492 user dk Danemark en 75 \n", 215 | "4 1919025060 user de Allemagne de 3 \n", 216 | "\n", 217 | " socialNbFollows socialProductsLiked productsListed productsSold ... \\\n", 218 | "0 8 0 0 0 ... \n", 219 | "1 8 1 0 0 ... \n", 220 | "2 8 0 0 0 ... \n", 221 | "3 10 1968 0 0 ... \n", 222 | "4 8 0 0 0 ... \n", 223 | "\n", 224 | " productsWished productsBought gender civilityGenderId civilityTitle \\\n", 225 | "0 0 0 M 1 mr \n", 226 | "1 0 0 M 1 mr \n", 227 | "2 0 0 M 1 mr \n", 228 | "3 43 15 M 1 mr \n", 229 | "4 0 0 M 1 mr \n", 230 | "\n", 231 | " hasProfilePicture seniority seniorityAsMonths seniorityAsYears \\\n", 232 | "0 True 3206 106.87 8.91 \n", 233 | "1 True 3206 106.87 8.91 \n", 234 | "2 True 3206 106.87 8.91 \n", 235 | "3 True 3206 106.87 8.91 \n", 236 | "4 True 3206 106.87 8.91 \n", 237 | "\n", 238 | " websiteLongevity \n", 239 | "0 3803 \n", 240 | "1 3803 \n", 241 | "2 3803 \n", 242 | "3 3803 \n", 243 | "4 3803 \n", 244 | "\n", 245 | "[5 rows x 21 columns]" 246 | ] 247 | }, 248 | "execution_count": 7, 249 | "metadata": {}, 250 | "output_type": "execute_result" 251 | } 252 | ], 253 | "source": [ 254 | "df.head()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 8, 260 | "id": "8e4bf583-5639-4a69-ab92-ef91e11276b5", 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "\n", 265 | "# Calculate the size of each chunk\n", 266 | "chunk_size = len(df) // 5 + (1 if len(df) % 5 else 0)\n", 267 | "\n", 268 | "# Split the dataframe into chunks and save them\n", 269 | "for i in range(5):\n", 270 | " start = i * chunk_size\n", 271 | " end = start + chunk_size\n", 272 | " chunk_df = df.iloc[start:end]\n", 273 | " chunk_df.to_csv(f'chunk-data/chunk{i+5+1}.csv', index=False)\n" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "id": "abb71ec8-46ef-4372-b673-d551c58eb1f2", 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [] 283 | } 284 | ], 285 | "metadata": { 286 | "kernelspec": { 287 | "display_name": "Python 3 (ipykernel)", 288 | "language": "python", 289 | "name": "python3" 290 | }, 291 | "language_info": { 292 | "codemirror_mode": { 293 | "name": "ipython", 294 | "version": 3 295 | }, 296 | "file_extension": ".py", 297 | "mimetype": "text/x-python", 298 | "name": "python", 299 | "nbconvert_exporter": "python", 300 | "pygments_lexer": "ipython3", 301 | "version": "3.12.1" 302 | } 303 | }, 304 | "nbformat": 4, 305 | "nbformat_minor": 5 306 | } 307 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # apache-spark-with-data-bricks-for-data-engineering 2 | apache-spark-with-data-bricks-for-data-engineering 3 | -------------------------------------------------------------------------------- /Spark Data Source.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "55388fa7-e326-48bb-a72e-bbc1f69a6739", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "csvFile = spark.read.format(\"csv\")\\\n", 11 | " .option(\"header\", \"true\")\\\n", 12 | " .option(\"mode\", \"FAILFAST\")\\\n", 13 | " .option(\"inferSchema\", \"true\")\\\n", 14 | " .load(\"data/flight-data/csv/2010-summary.csv\")" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "id": "3d5ef8bb-a67a-4e12-9338-01203022e6ba", 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stdout", 25 | "output_type": "stream", 26 | "text": [ 27 | "+--------------------+-------------------+-----+\n", 28 | "| DEST_COUNTRY_NAME|ORIGIN_COUNTRY_NAME|count|\n", 29 | "+--------------------+-------------------+-----+\n", 30 | "| United States| Romania| 1|\n", 31 | "| United States| Ireland| 264|\n", 32 | "| United States| India| 69|\n", 33 | "| Egypt| United States| 24|\n", 34 | "| Equatorial Guinea| United States| 1|\n", 35 | "| United States| Singapore| 25|\n", 36 | "| United States| Grenada| 54|\n", 37 | "| Costa Rica| United States| 477|\n", 38 | "| Senegal| United States| 29|\n", 39 | "| United States| Marshall Islands| 44|\n", 40 | "| Guyana| United States| 17|\n", 41 | "| United States| Sint Maarten| 53|\n", 42 | "| Malta| United States| 1|\n", 43 | "| Bolivia| United States| 46|\n", 44 | "| Anguilla| United States| 21|\n", 45 | "|Turks and Caicos ...| United States| 136|\n", 46 | "| United States| Afghanistan| 2|\n", 47 | "|Saint Vincent and...| United States| 1|\n", 48 | "| Italy| United States| 390|\n", 49 | "| United States| Russia| 156|\n", 50 | "+--------------------+-------------------+-----+\n", 51 | "only showing top 20 rows\n", 52 | "\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "csvFile.show()" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 3, 63 | "id": "29048a70-2c3c-49fd-b540-8d66823cabba", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "csvFile.write.format(\"csv\").mode(\"overwrite\").option(\"sep\", \",\").save(\"data/flight-data/write/my-csv-data.csv\")" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 4, 73 | "id": "13e3b920-f40f-48cc-8c04-8e3a35b4e30b", 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "+-----------------+-------------------+-----+\n", 81 | "|DEST_COUNTRY_NAME|ORIGIN_COUNTRY_NAME|count|\n", 82 | "+-----------------+-------------------+-----+\n", 83 | "| United States| Romania| 1|\n", 84 | "| United States| Ireland| 264|\n", 85 | "| United States| India| 69|\n", 86 | "| Egypt| United States| 24|\n", 87 | "|Equatorial Guinea| United States| 1|\n", 88 | "+-----------------+-------------------+-----+\n", 89 | "only showing top 5 rows\n", 90 | "\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "spark.read.format(\"json\").option(\"mode\", \"FAILFAST\")\\\n", 96 | " .option(\"inferSchema\", \"true\")\\\n", 97 | " .load(\"data/flight-data/json/2010-summary.json\").show(5)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 5, 103 | "id": "45820a61-fe4f-4c69-b879-50bf64fb766c", 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "csvFile.write.format(\"json\").mode(\"overwrite\").save(\"data/flight-data/write/my-json-file.json\")" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 7, 113 | "id": "6a7c0d9e-d6b5-4aec-af15-76ff9cbe2155", 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "name": "stdout", 118 | "output_type": "stream", 119 | "text": [ 120 | "+-----------------+-------------------+-----+\n", 121 | "|DEST_COUNTRY_NAME|ORIGIN_COUNTRY_NAME|count|\n", 122 | "+-----------------+-------------------+-----+\n", 123 | "| United States| Romania| 1|\n", 124 | "| United States| Ireland| 264|\n", 125 | "| United States| India| 69|\n", 126 | "| Egypt| United States| 24|\n", 127 | "|Equatorial Guinea| United States| 1|\n", 128 | "+-----------------+-------------------+-----+\n", 129 | "only showing top 5 rows\n", 130 | "\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "spark.read.format(\"parquet\")\\\n", 136 | "\t .load(\"data/flight-data/parquet/2010-summary.parquet\").show(5)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 9, 142 | "id": "c669ff37-e161-40b3-93f2-c001361d783c", 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "+-----------------+-------------------+-----+\n", 150 | "|DEST_COUNTRY_NAME|ORIGIN_COUNTRY_NAME|count|\n", 151 | "+-----------------+-------------------+-----+\n", 152 | "| United States| Romania| 1|\n", 153 | "| United States| Ireland| 264|\n", 154 | "| United States| India| 69|\n", 155 | "| Egypt| United States| 24|\n", 156 | "|Equatorial Guinea| United States| 1|\n", 157 | "+-----------------+-------------------+-----+\n", 158 | "only showing top 5 rows\n", 159 | "\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "spark.read.format(\"orc\").load(\"data/flight-data/orc/2010-summary.orc\").show(5)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 10, 170 | "id": "cad35276-7c10-4e7e-ae01-954995b6b753", 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "csvFile.repartition(5).write.format(\"csv\").save(\"data/flight-data/write/multiple.csv\")" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 11, 180 | "id": "37a63bc8-6dcf-43a6-aa62-261c91d724e5", 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "csvFile.limit(10).write.mode(\"overwrite\").partitionBy(\"DEST_COUNTRY_NAME\").save(\"data/flight-data/write/partitioned-files.parquet\")" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "id": "b0f65509-2794-49f2-9192-f039a662cdb0", 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [] 194 | } 195 | ], 196 | "metadata": { 197 | "kernelspec": { 198 | "display_name": "Python 3 (ipykernel)", 199 | "language": "python", 200 | "name": "python3" 201 | }, 202 | "language_info": { 203 | "codemirror_mode": { 204 | "name": "ipython", 205 | "version": 3 206 | }, 207 | "file_extension": ".py", 208 | "mimetype": "text/x-python", 209 | "name": "python", 210 | "nbconvert_exporter": "python", 211 | "pygments_lexer": "ipython3", 212 | "version": "3.12.1" 213 | } 214 | }, 215 | "nbformat": 4, 216 | "nbformat_minor": 5 217 | } 218 | -------------------------------------------------------------------------------- /Spark Deployment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "e84d341d-d5dd-4ad1-a539-8c75455c71f9", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "from pyspark.sql import SparkSession" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 5, 16 | "id": "5436eedf-99e1-4e56-9737-ee06463928e4", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "spark_create = SparkSession.builder.master(\"local\").appName(\"Word Count\")\\\n", 21 | ".config(\"spark.some.config.option\", \"some-value\")\\\n", 22 | ".getOrCreate()" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 4, 28 | "id": "ed99f4cf-93cd-4449-b623-6a695841985c", 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "text/html": [ 34 | "\n", 35 | "
\n", 36 | "

SparkSession - hive

\n", 37 | " \n", 38 | "
\n", 39 | "

SparkContext

\n", 40 | "\n", 41 | "

Spark UI

\n", 42 | "\n", 43 | "
\n", 44 | "
Version
\n", 45 | "
v3.5.0
\n", 46 | "
Master
\n", 47 | "
local[*]
\n", 48 | "
AppName
\n", 49 | "
PySparkShell
\n", 50 | "
\n", 51 | "
\n", 52 | " \n", 53 | "
\n", 54 | " " 55 | ], 56 | "text/plain": [ 57 | "" 58 | ] 59 | }, 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [ 66 | "spark" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 6, 72 | "id": "fa05005c-85a6-469c-bf0d-2df945110cc1", 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/html": [ 78 | "\n", 79 | "
\n", 80 | "

SparkSession - hive

\n", 81 | " \n", 82 | "
\n", 83 | "

SparkContext

\n", 84 | "\n", 85 | "

Spark UI

\n", 86 | "\n", 87 | "
\n", 88 | "
Version
\n", 89 | "
v3.5.0
\n", 90 | "
Master
\n", 91 | "
local[*]
\n", 92 | "
AppName
\n", 93 | "
PySparkShell
\n", 94 | "
\n", 95 | "
\n", 96 | " \n", 97 | "
\n", 98 | " " 99 | ], 100 | "text/plain": [ 101 | "" 102 | ] 103 | }, 104 | "execution_count": 6, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "spark_create" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 8, 116 | "id": "4c24fc3a-16f2-4da5-9716-f4a0b5524cae", 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/html": [ 122 | "\n", 123 | "
\n", 124 | "

SparkContext

\n", 125 | "\n", 126 | "

Spark UI

\n", 127 | "\n", 128 | "
\n", 129 | "
Version
\n", 130 | "
v3.5.0
\n", 131 | "
Master
\n", 132 | "
local[*]
\n", 133 | "
AppName
\n", 134 | "
PySparkShell
\n", 135 | "
\n", 136 | "
\n", 137 | " " 138 | ], 139 | "text/plain": [ 140 | "" 141 | ] 142 | }, 143 | "execution_count": 8, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "sc" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "id": "37a8b2ec-813c-4ab3-b1e6-6520a97bbd8a", 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 19, 163 | "id": "63d0eedb-2b1e-4dac-b3d6-d0d8997db668", 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "name": "stderr", 168 | "output_type": "stream", 169 | "text": [ 170 | " \r" 171 | ] 172 | }, 173 | { 174 | "data": { 175 | "text/plain": [ 176 | "[Row(sum(id)=2500000000000)]" 177 | ] 178 | }, 179 | "execution_count": 19, 180 | "metadata": {}, 181 | "output_type": "execute_result" 182 | } 183 | ], 184 | "source": [ 185 | "# in Python\n", 186 | "\n", 187 | "df1 = spark.range(2, 10000000, 2)\n", 188 | "df2 = spark.range(2, 10000000, 4)\n", 189 | "step1 = df1.repartition(5)\n", 190 | "step12 = df2.repartition(6)\n", 191 | "\n", 192 | "step2 = step1.selectExpr(\"id * 5 as id\")\n", 193 | "step3 = step2.join(step12, [\"id\"])\n", 194 | "step4 = step3.selectExpr(\"sum(id)\")\n", 195 | "\n", 196 | "step4.collect() # 2500000000000" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 13, 202 | "id": "7d3bd22e-e8ad-4e1c-af6e-f7e062ea4c87", 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "name": "stdout", 207 | "output_type": "stream", 208 | "text": [ 209 | "== Physical Plan ==\n", 210 | "AdaptiveSparkPlan isFinalPlan=true\n", 211 | "+- == Final Plan ==\n", 212 | " *(7) HashAggregate(keys=[], functions=[sum(id#29L)])\n", 213 | " +- ShuffleQueryStage 4\n", 214 | " +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=555]\n", 215 | " +- *(6) HashAggregate(keys=[], functions=[partial_sum(id#29L)])\n", 216 | " +- *(6) Project [id#29L]\n", 217 | " +- *(6) SortMergeJoin [id#29L], [id#23L], Inner\n", 218 | " :- *(4) Sort [id#29L ASC NULLS FIRST], false, 0\n", 219 | " : +- AQEShuffleRead coalesced\n", 220 | " : +- ShuffleQueryStage 2\n", 221 | " : +- Exchange hashpartitioning(id#29L, 200), ENSURE_REQUIREMENTS, [plan_id=426]\n", 222 | " : +- *(3) Project [(id#21L * 5) AS id#29L]\n", 223 | " : +- ShuffleQueryStage 0\n", 224 | " : +- Exchange RoundRobinPartitioning(5), REPARTITION_BY_NUM, [plan_id=359]\n", 225 | " : +- *(1) Range (2, 10000000, step=2, splits=8)\n", 226 | " +- *(5) Sort [id#23L ASC NULLS FIRST], false, 0\n", 227 | " +- AQEShuffleRead coalesced\n", 228 | " +- ShuffleQueryStage 3\n", 229 | " +- Exchange hashpartitioning(id#23L, 200), ENSURE_REQUIREMENTS, [plan_id=371]\n", 230 | " +- ShuffleQueryStage 1\n", 231 | " +- Exchange RoundRobinPartitioning(6), REPARTITION_BY_NUM, [plan_id=367]\n", 232 | " +- *(2) Range (2, 10000000, step=4, splits=8)\n", 233 | "+- == Initial Plan ==\n", 234 | " HashAggregate(keys=[], functions=[sum(id#29L)])\n", 235 | " +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=335]\n", 236 | " +- HashAggregate(keys=[], functions=[partial_sum(id#29L)])\n", 237 | " +- Project [id#29L]\n", 238 | " +- SortMergeJoin [id#29L], [id#23L], Inner\n", 239 | " :- Sort [id#29L ASC NULLS FIRST], false, 0\n", 240 | " : +- Exchange hashpartitioning(id#29L, 200), ENSURE_REQUIREMENTS, [plan_id=327]\n", 241 | " : +- Project [(id#21L * 5) AS id#29L]\n", 242 | " : +- Exchange RoundRobinPartitioning(5), REPARTITION_BY_NUM, [plan_id=317]\n", 243 | " : +- Range (2, 10000000, step=2, splits=8)\n", 244 | " +- Sort [id#23L ASC NULLS FIRST], false, 0\n", 245 | " +- Exchange hashpartitioning(id#23L, 200), ENSURE_REQUIREMENTS, [plan_id=328]\n", 246 | " +- Exchange RoundRobinPartitioning(6), REPARTITION_BY_NUM, [plan_id=320]\n", 247 | " +- Range (2, 10000000, step=4, splits=8)\n", 248 | "\n", 249 | "\n" 250 | ] 251 | } 252 | ], 253 | "source": [ 254 | "step4.explain()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 11, 260 | "id": "c9029437-b662-445b-8ef8-2b108c9bde2c", 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "data": { 265 | "text/html": [ 266 | "\n", 267 | "
\n", 268 | "

SparkSession - hive

\n", 269 | " \n", 270 | "
\n", 271 | "

SparkContext

\n", 272 | "\n", 273 | "

Spark UI

\n", 274 | "\n", 275 | "
\n", 276 | "
Version
\n", 277 | "
v3.5.0
\n", 278 | "
Master
\n", 279 | "
local[*]
\n", 280 | "
AppName
\n", 281 | "
PySparkShell
\n", 282 | "
\n", 283 | "
\n", 284 | " \n", 285 | "
\n", 286 | " " 287 | ], 288 | "text/plain": [ 289 | "" 290 | ] 291 | }, 292 | "execution_count": 11, 293 | "metadata": {}, 294 | "output_type": "execute_result" 295 | } 296 | ], 297 | "source": [ 298 | "spark" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "id": "3c58f0c0-6e80-4883-9d45-24978f98c056", 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [] 308 | } 309 | ], 310 | "metadata": { 311 | "kernelspec": { 312 | "display_name": "Python 3 (ipykernel)", 313 | "language": "python", 314 | "name": "python3" 315 | }, 316 | "language_info": { 317 | "codemirror_mode": { 318 | "name": "ipython", 319 | "version": 3 320 | }, 321 | "file_extension": ".py", 322 | "mimetype": "text/x-python", 323 | "name": "python", 324 | "nbconvert_exporter": "python", 325 | "pygments_lexer": "ipython3", 326 | "version": "3.12.1" 327 | } 328 | }, 329 | "nbformat": 4, 330 | "nbformat_minor": 5 331 | } 332 | -------------------------------------------------------------------------------- /Spotify Data Pipeline using Spark/(python) spotify_transformation_load_function.py: -------------------------------------------------------------------------------- 1 | import json 2 | import boto3 3 | from datetime import datetime 4 | from io import StringIO 5 | import pandas as pd 6 | 7 | def album(data): 8 | album_list = [] 9 | for row in data['items']: 10 | album_id = row['track']['album']['id'] 11 | album_name = row['track']['album']['name'] 12 | album_release_date = row['track']['album']['release_date'] 13 | album_total_tracks = row['track']['album']['total_tracks'] 14 | album_url = row['track']['album']['external_urls']['spotify'] 15 | album_element = {'album_id':album_id,'name':album_name,'release_date':album_release_date, 16 | 'total_tracks':album_total_tracks,'url':album_url} 17 | album_list.append(album_element) 18 | return album_list 19 | 20 | def artist(data): 21 | artist_list = [] 22 | for row in data['items']: 23 | for key, value in row.items(): 24 | if key == "track": 25 | for artist in value['artists']: 26 | artist_dict = {'artist_id':artist['id'], 'artist_name':artist['name'], 'external_url': artist['href']} 27 | artist_list.append(artist_dict) 28 | return artist_list 29 | 30 | def songs(data): 31 | song_list = [] 32 | for row in data['items']: 33 | song_id = row['track']['id'] 34 | song_name = row['track']['name'] 35 | song_duration = row['track']['duration_ms'] 36 | song_url = row['track']['external_urls']['spotify'] 37 | song_popularity = row['track']['popularity'] 38 | song_added = row['added_at'] 39 | album_id = row['track']['album']['id'] 40 | artist_id = row['track']['album']['artists'][0]['id'] 41 | song_element = {'song_id':song_id,'song_name':song_name,'duration_ms':song_duration,'url':song_url, 42 | 'popularity':song_popularity,'song_added':song_added,'album_id':album_id, 43 | 'artist_id':artist_id 44 | } 45 | song_list.append(song_element) 46 | 47 | return song_list 48 | 49 | def lambda_handler(event, context): 50 | s3 = boto3.client('s3') 51 | Bucket = "spotify-etl-project-darshil" 52 | Key = "raw_data/to_processed/" 53 | 54 | spotify_data = [] 55 | spotify_keys = [] 56 | for file in s3.list_objects(Bucket=Bucket, Prefix=Key)['Contents']: 57 | file_key = file['Key'] 58 | if file_key.split('.')[-1] == "json": 59 | response = s3.get_object(Bucket = Bucket, Key = file_key) 60 | content = response['Body'] 61 | jsonObject = json.loads(content.read()) 62 | spotify_data.append(jsonObject) 63 | spotify_keys.append(file_key) 64 | 65 | for data in spotify_data: 66 | album_list = album(data) 67 | artist_list = artist(data) 68 | song_list = songs(data) 69 | 70 | album_df = pd.DataFrame.from_dict(album_list) 71 | album_df = album_df.drop_duplicates(subset=['album_id']) 72 | 73 | artist_df = pd.DataFrame.from_dict(artist_list) 74 | artist_df = artist_df.drop_duplicates(subset=['artist_id']) 75 | 76 | #Song Dataframe 77 | song_df = pd.DataFrame.from_dict(song_list) 78 | 79 | album_df['release_date'] = pd.to_datetime(album_df['release_date']) 80 | song_df['song_added'] = pd.to_datetime(song_df['song_added']) 81 | 82 | songs_key = "transformed_data/songs_data/songs_transformed_" + str(datetime.now()) + ".csv" 83 | song_buffer=StringIO() 84 | song_df.to_csv(song_buffer, index=False) 85 | song_content = song_buffer.getvalue() 86 | s3.put_object(Bucket=Bucket, Key=songs_key, Body=song_content) 87 | 88 | album_key = "transformed_data/album_data/album_transformed_" + str(datetime.now()) + ".csv" 89 | album_buffer=StringIO() 90 | album_df.to_csv(album_buffer, index=False) 91 | album_content = album_buffer.getvalue() 92 | s3.put_object(Bucket=Bucket, Key=album_key, Body=album_content) 93 | 94 | artist_key = "transformed_data/artist_data/artist_transformed_" + str(datetime.now()) + ".csv" 95 | artist_buffer=StringIO() 96 | artist_df.to_csv(artist_buffer, index=False) 97 | artist_content = artist_buffer.getvalue() 98 | s3.put_object(Bucket=Bucket, Key=artist_key, Body=artist_content) 99 | 100 | s3_resource = boto3.resource('s3') 101 | for key in spotify_keys: 102 | copy_source = { 103 | 'Bucket': Bucket, 104 | 'Key': key 105 | } 106 | s3_resource.meta.client.copy(copy_source, Bucket, 'raw_data/processed/' + key.split("/")[-1]) 107 | s3_resource.Object(Bucket, key).delete() -------------------------------------------------------------------------------- /Spotify Data Pipeline using Spark/(spark) spotify_transformation.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | from awsglue.transforms import * 4 | from awsglue.utils import getResolvedOptions 5 | from pyspark.context import SparkContext 6 | from awsglue.context import GlueContext 7 | from awsglue.job import Job 8 | from pyspark.sql.functions import explode, col, to_date 9 | from datetime import datetime 10 | from awsglue.dynamicframe import DynamicFrame 11 | 12 | sc = SparkContext.getOrCreate() 13 | glueContext = GlueContext(sc) 14 | spark = glueContext.spark_session 15 | job = Job(glueContext) 16 | 17 | s3_path = "s3://spotify-daily-data-project/raw_data/to_processed/" 18 | source_dyf = glueContext.create_dynamic_frame_from_options( 19 | connection_type="s3", 20 | connection_options={"paths":[s3_path]}, 21 | format="json" 22 | ) 23 | 24 | spotify_df = source_dyf.toDF() 25 | def process_albums(df): 26 | df = df.withColumn("items", explode("items")).select( 27 | col("items.track.album.id").alias("album_id"), 28 | col("items.track.album.name").alias("album_name"), 29 | col("items.track.album.release_date").alias("release_date"), 30 | col("items.track.album.total_tracks").alias("total_tracks"), 31 | col("items.track.album.external_urls.spotify").alias("url") 32 | ).drop_duplicates(["album_id"]) 33 | return df 34 | 35 | 36 | def process_artists(df): 37 | # First, explode the items to get individual tracks 38 | df_items_exploded = df.select(explode(col("items")).alias("item")) 39 | 40 | # Then, explode the artists array within each item to create a row for each artist 41 | df_artists_exploded = df_items_exploded.select(explode(col("item.track.artists")).alias("artist")) 42 | 43 | # Now, select the artist attributes, ensuring each artist is in its own row 44 | df_artists = df_artists_exploded.select( 45 | col("artist.id").alias("artist_id"), 46 | col("artist.name").alias("artist_name"), 47 | col("artist.external_urls.spotify").alias("external_url") 48 | ).drop_duplicates(["artist_id"]) 49 | 50 | return df_artists 51 | 52 | 53 | def process_songs(df): 54 | # Explode the items array to create a row for each song 55 | df_exploded = df.select(explode(col("items")).alias("item")) 56 | 57 | # Extract song information from the exploded DataFrame 58 | df_songs = df_exploded.select( 59 | col("item.track.id").alias("song_id"), 60 | col("item.track.name").alias("song_name"), 61 | col("item.track.duration_ms").alias("duration_ms"), 62 | col("item.track.external_urls.spotify").alias("url"), 63 | col("item.track.popularity").alias("popularity"), 64 | col("item.added_at").alias("song_added"), 65 | col("item.track.album.id").alias("album_id"), 66 | col("item.track.artists")[0]["id"].alias("artist_id") 67 | ).drop_duplicates(["song_id"]) 68 | 69 | # Convert string dates in 'song_added' to actual date types 70 | df_songs = df_songs.withColumn("song_added", to_date(col("song_added"))) 71 | 72 | return df_songs 73 | 74 | #process data 75 | album_df = process_albums(spotify_df) 76 | artist_df = process_artists(spotify_df) 77 | song_df = process_songs(spotify_df) 78 | 79 | 80 | def write_to_s3(df, path_suffix, format_type="csv"): 81 | # Convert back to DynamicFrame 82 | dynamic_frame = DynamicFrame.fromDF(df, glueContext, "dynamic_frame") 83 | 84 | glueContext.write_dynamic_frame.from_options( 85 | frame = dynamic_frame, 86 | connection_type = "s3", 87 | connection_options = {"path": f"s3://spotify-daily-data-project/transformed_data/{path_suffix}/"}, 88 | format = format_type 89 | ) 90 | 91 | #write data to s3 92 | write_to_s3(album_df, "album/album_transformed_{}".format(datetime.now().strftime("%Y-%m-%d")), "csv") 93 | write_to_s3(artist_df, "artist/artist_transformed_{}".format(datetime.now().strftime("%Y-%m-%d")), "csv") 94 | write_to_s3(song_df, "songs/songs_transformed_{}".format(datetime.now().strftime("%Y-%m-%d")), "csv") 95 | 96 | job.commit() -------------------------------------------------------------------------------- /Spotify Data Pipeline using Spark/spotify_api_data_extract.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import spotipy 4 | from spotipy.oauth2 import SpotifyClientCredentials 5 | import boto3 6 | from datetime import datetime 7 | 8 | def lambda_handler(event, context): 9 | 10 | cilent_id = os.environ.get('client_id') 11 | client_secret = os.environ.get('client_secret') 12 | 13 | client_credentials_manager = SpotifyClientCredentials(client_id=cilent_id, client_secret=client_secret) 14 | sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager) 15 | playlists = sp.user_playlists('spotify') 16 | 17 | playlist_link = "https://open.spotify.com/playlist/37i9dQZEVXbNG2KDcFcKOF?si=1333723a6eff4b7f" 18 | playlist_URI = playlist_link.split("/")[-1].split("?")[0] 19 | 20 | spotify_data = sp.playlist_tracks(playlist_URI) 21 | 22 | cilent = boto3.client('s3') 23 | 24 | filename = "spotify_raw_" + str(datetime.now()) + ".json" 25 | 26 | cilent.put_object( 27 | Bucket="spotify-etl-project-darshil", 28 | Key="raw_data/to_processed/" + filename, 29 | Body=json.dumps(spotify_data) 30 | ) 31 | -------------------------------------------------------------------------------- /Spotify Data Pipeline using Spark/spotipy_layer.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/Spotify Data Pipeline using Spark/spotipy_layer.zip -------------------------------------------------------------------------------- /data/apple_data/apple_products.csv: -------------------------------------------------------------------------------- 1 | Product Name,Product URL,Brand,Sale Price,Mrp,Discount Percentage,Number Of Ratings,Number Of Reviews,Upc,Star Rating,Ram 2 | "APPLE iPhone 8 Plus (Gold, 64 GB)",https://www.flipkart.com/apple-iphone-8-plus-gold-64-gb/p/itmexrgvuzgzttzh?pid=MOBEXRGV7EHHTGUH,Apple,49900,49900,0,3431,356,MOBEXRGV7EHHTGUH,4.6,2 GB 3 | "APPLE iPhone 8 Plus (Space Grey, 256 GB)",https://www.flipkart.com/apple-iphone-8-plus-space-grey-256-gb/p/itmexrgvzkbyqgtf?pid=MOBEXRGVAC6TJT4F,Apple,84900,84900,0,3431,356,MOBEXRGVAC6TJT4F,4.6,2 GB 4 | "APPLE iPhone 8 Plus (Silver, 256 GB)",https://www.flipkart.com/apple-iphone-8-plus-silver-256-gb/p/itmexrgvxatuyrqw?pid=MOBEXRGVGETABXWZ,Apple,84900,84900,0,3431,356,MOBEXRGVGETABXWZ,4.6,2 GB 5 | "APPLE iPhone 8 (Silver, 256 GB)",https://www.flipkart.com/apple-iphone-8-silver-256-gb/p/itmexrgvae48gzhp?pid=MOBEXRGVMZWUHCBA,Apple,77000,77000,0,11202,794,MOBEXRGVMZWUHCBA,4.5,2 GB 6 | "APPLE iPhone 8 (Gold, 256 GB)",https://www.flipkart.com/apple-iphone-8-gold-256-gb/p/itmexrgv8bvfbzgw?pid=MOBEXRGVPK7PFEJZ,Apple,77000,77000,0,11202,794,MOBEXRGVPK7PFEJZ,4.5,2 GB 7 | "APPLE iPhone 8 Plus (Silver, 64 GB)",https://www.flipkart.com/apple-iphone-8-plus-silver-64-gb/p/itmexrgv7yfjbryy?pid=MOBEXRGVQGYYP8FV,Apple,49900,49900,0,3431,356,MOBEXRGVQGYYP8FV,4.6,2 GB 8 | "APPLE iPhone 8 Plus (Space Grey, 64 GB)",https://www.flipkart.com/apple-iphone-8-plus-space-grey-64-gb/p/itmexrgvehtzhh9v?pid=MOBEXRGVQKBREZP8,Apple,49900,49900,0,3431,356,MOBEXRGVQKBREZP8,4.6,2 GB 9 | "APPLE iPhone 8 (Space Grey, 256 GB)",https://www.flipkart.com/apple-iphone-8-space-grey-256-gb/p/itmexrgvypzqw6df?pid=MOBEXRGVZFZGZEWV,Apple,77000,77000,0,11202,794,MOBEXRGVZFZGZEWV,4.5,2 GB 10 | "APPLE iPhone XS Max (Silver, 64 GB)",https://www.flipkart.com/apple-iphone-xs-max-silver-64-gb/p/itmf944ehzsh7emx?pid=MOBF944E2XAHW8V5,Apple,89900,89900,0,1454,149,MOBF944E2XAHW8V5,4.6,4 GB 11 | "Apple iPhone XR ((PRODUCT)RED, 128 GB) (Includes EarPods, Power Adapter)",https://www.flipkart.com/apple-iphone-xr-product-red-128-gb-includes-earpods-power-adapter/p/itmf9z7zbkvjzegf?pid=MOBF9Z7ZHQC23PWQ,Apple,41999,52900,20,79512,6796,MOBF9Z7ZHQC23PWQ,4.6,4 GB 12 | "Apple iPhone XR (Black, 64 GB) (Includes EarPods, Power Adapter)",https://www.flipkart.com/apple-iphone-xr-black-64-gb-includes-earpods-power-adapter/p/itmf9z7zxu4uqyz2?pid=MOBF9Z7ZPHGV4GNH,Apple,39999,47900,16,79512,6796,MOBF9Z7ZPHGV4GNH,4.6,4 GB 13 | "Apple iPhone XR (Coral, 128 GB) (Includes EarPods, Power Adapter)",https://www.flipkart.com/apple-iphone-xr-coral-128-gb-includes-earpods-power-adapter/p/itmf9z7zgxx5wgez?pid=MOBF9Z7ZS6GF5UAP,Apple,41999,52900,20,79582,6804,MOBF9Z7ZS6GF5UAP,4.6,4 GB 14 | "Apple iPhone XR (Black, 128 GB) (Includes EarPods, Power Adapter)",https://www.flipkart.com/apple-iphone-xr-black-128-gb-includes-earpods-power-adapter/p/itmf9z7zhdgzwmzm?pid=MOBF9Z7ZYWNFGZUC,Apple,41999,52900,20,79512,6796,MOBF9Z7ZYWNFGZUC,4.6,3 GB 15 | "Apple iPhone XR (White, 128 GB) (Includes EarPods, Power Adapter)",https://www.flipkart.com/apple-iphone-xr-white-128-gb-includes-earpods-power-adapter/p/itmf9z7zrz4tbhwm?pid=MOBF9Z7ZZY3HCDZZ,Apple,41999,52900,20,79512,6796,MOBF9Z7ZZY3HCDZZ,4.6,4 GB 16 | "APPLE iPhone 11 Pro Max (Gold, 256 GB)",https://www.flipkart.com/apple-iphone-11-pro-max-gold-256-gb/p/itm94c6536eefc46?pid=MOBFKCTS7HCHSPFH,Apple,131900,131900,0,1078,101,MOBFKCTS7HCHSPFH,4.7,4 GB 17 | "APPLE iPhone 11 Pro Max (Gold, 64 GB)",https://www.flipkart.com/apple-iphone-11-pro-max-gold-64-gb/p/itm6ef18218efdf6?pid=MOBFKCTSAPAYNSGG,Apple,117100,117100,0,1078,101,MOBFKCTSAPAYNSGG,4.7,4 GB 18 | "APPLE iPhone 11 Pro Max (Midnight Green, 256 GB)",https://www.flipkart.com/apple-iphone-11-pro-max-midnight-green-256-gb/p/itm68e0db18f9ecc?pid=MOBFKCTSCAAKGQV7,Apple,131900,131900,0,1078,101,MOBFKCTSCAAKGQV7,4.7,4 GB 19 | "APPLE iPhone 11 Pro Max (Space Grey, 64 GB)",https://www.flipkart.com/apple-iphone-11-pro-max-space-grey-64-gb/p/itme0101031155f8?pid=MOBFKCTSKDMKCGQS,Apple,117100,117100,0,1078,101,MOBFKCTSKDMKCGQS,4.7,4 GB 20 | "APPLE iPhone 11 Pro (Midnight Green, 64 GB)",https://www.flipkart.com/apple-iphone-11-pro-midnight-green-64-gb/p/itm471de0d2e8474?pid=MOBFKCTSN3TG3RFJ,Apple,74999,106600,29,7088,523,MOBFKCTSN3TG3RFJ,4.6,4 GB 21 | "APPLE iPhone 11 Pro (Space Grey, 512 GB)",https://www.flipkart.com/apple-iphone-11-pro-space-grey-512-gb/p/itmcd1f0ddbf1c21?pid=MOBFKCTSRTHRQTFT,Apple,117900,140300,15,7088,523,MOBFKCTSRTHRQTFT,4.6,4 GB 22 | "APPLE iPhone 11 Pro Max (Midnight Green, 64 GB)",https://www.flipkart.com/apple-iphone-11-pro-max-midnight-green-64-gb/p/itmab1763b5ca244?pid=MOBFKCTSRYPAQNYT,Apple,117100,117100,0,1078,101,MOBFKCTSRYPAQNYT,4.7,4 GB 23 | "APPLE iPhone 11 Pro (Midnight Green, 512 GB)",https://www.flipkart.com/apple-iphone-11-pro-midnight-green-512-gb/p/itm0bdd954387ca9?pid=MOBFKCTSSJCWYGCC,Apple,117900,140300,15,7088,523,MOBFKCTSSJCWYGCC,4.6,4 GB 24 | "APPLE iPhone 11 Pro (Space Grey, 256 GB)",https://www.flipkart.com/apple-iphone-11-pro-space-grey-256-gb/p/itmbecef22a95790?pid=MOBFKCTSWGYSAS9X,Apple,99900,121300,17,7081,522,MOBFKCTSWGYSAS9X,4.6,4 GB 25 | "Apple iPhone SE (White, 256 GB) (Includes EarPods, Power Adapter)",https://www.flipkart.com/apple-iphone-se-white-256-gb-includes-earpods-power-adapter/p/itm4d73793c30c4c?pid=MOBFRFXHPZCHAPEH,Apple,44999,54900,18,95909,8161,MOBFRFXHPZCHAPEH,4.5,2 GB 26 | "APPLE iPhone 12 Pro (Silver, 512 GB)",https://www.flipkart.com/apple-iphone-12-pro-silver-512-gb/p/itm0ccf9fc219a71?pid=MOBFWBYZ5UY6ZBVA,Apple,140900,149900,6,542,42,MOBFWBYZ5UY6ZBVA,4.5,4 GB 27 | "APPLE iPhone 12 Pro Max (Pacific Blue, 256 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-pacific-blue-256-gb/p/itm3a0860c94250e?pid=MOBFWBYZ8STJXCVT,Apple,130900,139900,6,580,45,MOBFWBYZ8STJXCVT,4.6,6 GB 28 | "APPLE iPhone 12 Mini (White, 128 GB)",https://www.flipkart.com/apple-iphone-12-mini-white-128-gb/p/itm9098fa76548ba?pid=MOBFWBYZAGXJRDGB,Apple,64900,74900,13,740,64,MOBFWBYZAGXJRDGB,4.5,4 GB 29 | "APPLE iPhone 12 Pro (Graphite, 256 GB)",https://www.flipkart.com/apple-iphone-12-pro-graphite-256-gb/p/itm4fa4da575698c?pid=MOBFWBYZBA36UB7G,Apple,120900,129900,6,545,42,MOBFWBYZBA36UB7G,4.5,6 GB 30 | "APPLE iPhone 12 Mini (White, 64 GB)",https://www.flipkart.com/apple-iphone-12-mini-white-64-gb/p/itmf3b16d1640898?pid=MOBFWBYZBH4CEC4C,Apple,59900,69900,14,740,64,MOBFWBYZBH4CEC4C,4.5,4 GB 31 | "APPLE iPhone 12 (White, 128 GB)",https://www.flipkart.com/apple-iphone-12-white-128-gb/p/itm95393f4c6cc59?pid=MOBFWBYZBTZFGJF9,Apple,75900,84900,10,2101,180,MOBFWBYZBTZFGJF9,4.6,6 GB 32 | "APPLE iPhone 12 Pro (Graphite, 128 GB)",https://www.flipkart.com/apple-iphone-12-pro-graphite-128-gb/p/itm03e5f2595d843?pid=MOBFWBYZBZ7Y56WD,Apple,110900,119900,7,545,42,MOBFWBYZBZ7Y56WD,4.5,6 GB 33 | "APPLE iPhone 12 Pro Max (Graphite, 256 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-graphite-256-gb/p/itm8dbdf0b986725?pid=MOBFWBYZEF6XQ5ZW,Apple,130900,139900,6,580,45,MOBFWBYZEF6XQ5ZW,4.6,6 GB 34 | "APPLE iPhone 12 Pro Max (Graphite, 128 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-graphite-128-gb/p/itm973c298184f51?pid=MOBFWBYZFDGQSDWS,Apple,120900,129900,6,580,45,MOBFWBYZFDGQSDWS,4.6,6 GB 35 | "APPLE iPhone 12 Mini (Black, 128 GB)",https://www.flipkart.com/apple-iphone-12-mini-black-128-gb/p/itm0eb512c195bdf?pid=MOBFWBYZH2AMPNPD,Apple,64900,74900,13,730,63,MOBFWBYZH2AMPNPD,4.5,4 GB 36 | "APPLE iPhone 12 Mini (Blue, 128 GB)",https://www.flipkart.com/apple-iphone-12-mini-blue-128-gb/p/itm9b6cdec9700ee?pid=MOBFWBYZHU58PHCZ,Apple,64900,74900,13,730,63,MOBFWBYZHU58PHCZ,4.5,4 GB 37 | "APPLE iPhone 12 (Black, 128 GB)",https://www.flipkart.com/apple-iphone-12-black-128-gb/p/itmf1f0a58f1ecd7?pid=MOBFWBYZK3HACR72,Apple,75900,84900,10,2101,180,MOBFWBYZK3HACR72,4.6,6 GB 38 | "APPLE iPhone 12 (Blue, 128 GB)",https://www.flipkart.com/apple-iphone-12-blue-128-gb/p/itm02853ae92e90a?pid=MOBFWBYZKPTZF9VG,Apple,75900,84900,10,2101,180,MOBFWBYZKPTZF9VG,4.6,6 GB 39 | "APPLE iPhone 12 Pro Max (Silver, 128 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-silver-128-gb/p/itm5a51ba742a17e?pid=MOBFWBYZNSNVGGZC,Apple,120900,129900,6,580,45,MOBFWBYZNSNVGGZC,4.6,6 GB 40 | "APPLE iPhone 12 Mini (Red, 64 GB)",https://www.flipkart.com/apple-iphone-12-mini-red-64-gb/p/itm255dd64643767?pid=MOBFWBYZNVWGWN2U,Apple,59900,69900,14,740,64,MOBFWBYZNVWGWN2U,4.5,6 GB 41 | "APPLE iPhone 12 Pro Max (Gold, 128 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-gold-128-gb/p/itme3ae592065711?pid=MOBFWBYZQVKT77YH,Apple,120900,129900,6,580,45,MOBFWBYZQVKT77YH,4.6,6 GB 42 | "APPLE iPhone 12 (Green, 128 GB)",https://www.flipkart.com/apple-iphone-12-green-128-gb/p/itm4e0a120f7d9c4?pid=MOBFWBYZQXUEHF48,Apple,75900,84900,10,2092,178,MOBFWBYZQXUEHF48,4.6,6 GB 43 | "APPLE iPhone 12 Pro (Pacific Blue, 512 GB)",https://www.flipkart.com/apple-iphone-12-pro-pacific-blue-512-gb/p/itm8a39d6779b04e?pid=MOBFWBYZTHSXKMGW,Apple,140900,149900,6,545,42,MOBFWBYZTHSXKMGW,4.5,4 GB 44 | "APPLE iPhone 12 (White, 64 GB)",https://www.flipkart.com/apple-iphone-12-white-64-gb/p/itm8b88bdc03cd79?pid=MOBFWBYZTK33MBG9,Apple,70900,79900,11,2101,180,MOBFWBYZTK33MBG9,4.6,6 GB 45 | "APPLE iPhone 12 (Black, 64 GB)",https://www.flipkart.com/apple-iphone-12-black-64-gb/p/itma2559422bf7c7?pid=MOBFWBYZU5FWK2VP,Apple,70900,79900,11,2092,178,MOBFWBYZU5FWK2VP,4.6,6 GB 46 | "APPLE iPhone 12 (Red, 128 GB)",https://www.flipkart.com/apple-iphone-12-red-128-gb/p/itma7bd86885ed98?pid=MOBFWBYZUHPFWQRD,Apple,75900,84900,10,2101,180,MOBFWBYZUHPFWQRD,4.6,6 GB 47 | "APPLE iPhone 12 Mini (Black, 64 GB)",https://www.flipkart.com/apple-iphone-12-mini-black-64-gb/p/itm38b727191eb08?pid=MOBFWBYZXSEGBS6F,Apple,59900,69900,14,740,64,MOBFWBYZXSEGBS6F,4.5,4 GB 48 | "APPLE iPhone 12 Pro (Pacific Blue, 128 GB)",https://www.flipkart.com/apple-iphone-12-pro-pacific-blue-128-gb/p/itm97c833296c221?pid=MOBFWBYZXYSCEEEH,Apple,110900,119900,7,545,42,MOBFWBYZXYSCEEEH,4.5,6 GB 49 | "APPLE iPhone 12 Pro Max (Pacific Blue, 128 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-pacific-blue-128-gb/p/itmd89812b558a03?pid=MOBFWBYZZABKHZQA,Apple,120900,129900,6,580,45,MOBFWBYZZABKHZQA,4.6,6 GB 50 | "APPLE iPhone 12 Pro Max (Silver, 256 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-silver-256-gb/p/itm30faf74442adb?pid=MOBFWBYZZH4AM8FE,Apple,130900,139900,6,580,45,MOBFWBYZZH4AM8FE,4.6,6 GB 51 | "APPLE iPhone 12 Pro Max (Gold, 256 GB)",https://www.flipkart.com/apple-iphone-12-pro-max-gold-256-gb/p/itm1e0354f5bbc8c?pid=MOBFWBYZZPW8JHQF,Apple,130900,139900,6,580,45,MOBFWBYZZPW8JHQF,4.6,6 GB 52 | "APPLE iPhone 11 (White, 128 GB)",https://www.flipkart.com/apple-iphone-11-white-128-gb/p/itme32df47ea6742?pid=MOBFWQ6B7KKRXDDS,Apple,54999,59900,8,43707,3357,MOBFWQ6B7KKRXDDS,4.6,4 GB 53 | "APPLE iPhone 11 (Red, 128 GB)",https://www.flipkart.com/apple-iphone-11-red-128-gb/p/itm8d14bd0e33a1c?pid=MOBFWQ6BEZTNK59G,Apple,54999,59900,8,43707,3357,MOBFWQ6BEZTNK59G,4.6,4 GB 54 | "APPLE iPhone SE (White, 64 GB)",https://www.flipkart.com/apple-iphone-se-white-64-gb/p/itma00a19e11c81b?pid=MOBFWQ6BGWDVGF3E,Apple,29999,39900,24,95807,8154,MOBFWQ6BGWDVGF3E,4.5,2 GB 55 | "APPLE iPhone SE (Black, 128 GB)",https://www.flipkart.com/apple-iphone-se-black-128-gb/p/itma9285ccc6af28?pid=MOBFWQ6BHUEVZPXD,Apple,34999,44900,22,95909,8161,MOBFWQ6BHUEVZPXD,4.5,2 GB 56 | "APPLE iPhone SE (White, 128 GB)",https://www.flipkart.com/apple-iphone-se-white-128-gb/p/itmc2a0f593a4ad8?pid=MOBFWQ6BJEHMUUZY,Apple,34999,44900,22,95807,8154,MOBFWQ6BJEHMUUZY,4.5,2 GB 57 | "APPLE iPhone SE (Red, 128 GB)",https://www.flipkart.com/apple-iphone-se-red-128-gb/p/itma4202509da171?pid=MOBFWQ6BJTVFKPEJ,Apple,34999,44900,22,95909,8161,MOBFWQ6BJTVFKPEJ,4.5,2 GB 58 | "APPLE iPhone 11 (Black, 128 GB)",https://www.flipkart.com/apple-iphone-11-black-128-gb/p/itm8244e8d955aba?pid=MOBFWQ6BKRYBP5X8,Apple,54999,59900,8,43470,3331,MOBFWQ6BKRYBP5X8,4.6,4 GB 59 | "APPLE iPhone SE (Black, 64 GB)",https://www.flipkart.com/apple-iphone-se-black-64-gb/p/itm4d3d5718a5c95?pid=MOBFWQ6BR3MK7AUG,Apple,29999,39900,24,95909,8161,MOBFWQ6BR3MK7AUG,4.5,4 GB 60 | "APPLE iPhone 11 (Purple, 64 GB)",https://www.flipkart.com/apple-iphone-11-purple-64-gb/p/itm2b8d03427ddac?pid=MOBFWQ6BTFFJKGKE,Apple,46999,54900,14,43470,3331,MOBFWQ6BTFFJKGKE,4.6,4 GB 61 | "APPLE iPhone 11 (White, 64 GB)",https://www.flipkart.com/apple-iphone-11-white-64-gb/p/itmfc6a7091eb20b?pid=MOBFWQ6BVWVEH3XE,Apple,46999,54900,14,43470,3331,MOBFWQ6BVWVEH3XE,4.6,4 GB 62 | "APPLE iPhone 11 (Black, 64 GB)",https://www.flipkart.com/apple-iphone-11-black-64-gb/p/itm4e5041ba101fd?pid=MOBFWQ6BXGJCEYNY,Apple,46999,54900,14,43470,3331,MOBFWQ6BXGJCEYNY,4.6,4 GB 63 | "APPLE iPhone 11 (Red, 64 GB)",https://www.flipkart.com/apple-iphone-11-red-64-gb/p/itmc3935326f2feb?pid=MOBFWQ6BYYV3FCU7,Apple,46999,54900,14,43470,3331,MOBFWQ6BYYV3FCU7,4.6,4 GB 64 | -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Purple, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Purple, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Red, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Red, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Red, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (Red, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Midnight Green, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Midnight Green, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Midnight Green, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Midnight Green, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Space Grey, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Space Grey, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Space Grey, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro (Space Grey, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Gold, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Gold, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Gold, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Gold, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Midnight Green, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Midnight Green, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Midnight Green, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Midnight Green, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Space Grey, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 11 Pro Max (Space Grey, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Green, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Green, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Red, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (Red, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Red, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (Red, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Mini (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Graphite, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Graphite, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Graphite, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Graphite, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Pacific Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Pacific Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Pacific Blue, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Pacific Blue, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Silver, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro (Silver, 512 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Gold, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Gold, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Gold, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Gold, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Graphite, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Graphite, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Graphite, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Graphite, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Pacific Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Pacific Blue, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Pacific Blue, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Pacific Blue, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Silver, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Silver, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Silver, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 12 Pro Max (Silver, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 8 (Gold, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 (Gold, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 8 (Silver, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 (Silver, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 8 (Space Grey, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 (Space Grey, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Gold, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Gold, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Silver, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Silver, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Silver, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Silver, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Space Grey, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Space Grey, 256 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Space Grey, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone 8 Plus (Space Grey, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone SE (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone SE (Black, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone SE (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone SE (Black, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone SE (Red, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone SE (Red, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone SE (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone SE (White, 128 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone SE (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone SE (White, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=APPLE iPhone XS Max (Silver, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=APPLE iPhone XS Max (Silver, 64 GB)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=Apple iPhone SE (White, 256 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=Apple iPhone SE (White, 256 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=Apple iPhone XR ((PRODUCT)RED, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=Apple iPhone XR ((PRODUCT)RED, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=Apple iPhone XR (Black, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=Apple iPhone XR (Black, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=Apple iPhone XR (Black, 64 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=Apple iPhone XR (Black, 64 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=Apple iPhone XR (Coral, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=Apple iPhone XR (Coral, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/Product Name=Apple iPhone XR (White, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/Product Name=Apple iPhone XR (White, 128 GB) (Includes EarPods, Power Adapter)/part-00000-c782ec41-9cf2-4046-b5c7-8ac57ce9d25a.c000.snappy.parquet -------------------------------------------------------------------------------- /data/apple_data/output.csv/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/apple_data/output.csv/_SUCCESS -------------------------------------------------------------------------------- /data/flight-data-hive/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data-hive/_SUCCESS -------------------------------------------------------------------------------- /data/flight-data-hive/_committed_4721890993021653500: -------------------------------------------------------------------------------- 1 | {"added":["part-00000-tid-4721890993021653500-d8ef7f6b-e6e5-4451-af50-08281422f186-0-c000"],"removed":[]} -------------------------------------------------------------------------------- /data/flight-data-hive/_started_4721890993021653500: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data-hive/_started_4721890993021653500 -------------------------------------------------------------------------------- /data/flight-data-hive/part-00000-tid-4721890993021653500-d8ef7f6b-e6e5-4451-af50-08281422f186-0-c000: -------------------------------------------------------------------------------- 1 | United States,Romania,15 2 | United States,Croatia,1 3 | United States,Ireland,344 4 | Egypt,United States,15 5 | United States,India,62 6 | United States,Singapore,1 7 | United States,Grenada,62 8 | Costa Rica,United States,588 9 | Senegal,United States,40 10 | Moldova,United States,1 11 | United States,Sint Maarten,325 12 | United States,Marshall Islands,39 13 | Guyana,United States,64 14 | Malta,United States,1 15 | Anguilla,United States,41 16 | Bolivia,United States,30 17 | United States,Paraguay,6 18 | Algeria,United States,4 19 | Turks and Caicos Islands,United States,230 20 | United States,Gibraltar,1 21 | Saint Vincent and the Grenadines,United States,1 22 | Italy,United States,382 23 | United States,Federated States of Micronesia,69 24 | United States,Russia,161 25 | Pakistan,United States,12 26 | United States,Netherlands,660 27 | Iceland,United States,181 28 | Marshall Islands,United States,42 29 | Luxembourg,United States,155 30 | Honduras,United States,362 31 | The Bahamas,United States,955 32 | United States,Senegal,42 33 | El Salvador,United States,561 34 | Samoa,United States,25 35 | United States,Angola,13 36 | Switzerland,United States,294 37 | United States,Anguilla,38 38 | Sint Maarten,United States,325 39 | Hong Kong,United States,332 40 | Trinidad and Tobago,United States,211 41 | Latvia,United States,19 42 | United States,Ecuador,300 43 | Suriname,United States,1 44 | Mexico,United States,7140 45 | United States,Cyprus,1 46 | Ecuador,United States,268 47 | United States,Portugal,134 48 | United States,Costa Rica,608 49 | United States,Guatemala,318 50 | United States,Suriname,34 51 | Colombia,United States,873 52 | United States,Cape Verde,14 53 | United States,Jamaica,712 54 | Norway,United States,121 55 | United States,Malaysia,3 56 | United States,Morocco,19 57 | Thailand,United States,3 58 | United States,Samoa,25 59 | Venezuela,United States,290 60 | United States,Palau,31 61 | United States,Venezuela,246 62 | Panama,United States,510 63 | Antigua and Barbuda,United States,126 64 | United States,Chile,185 65 | Morocco,United States,15 66 | United States,Finland,28 67 | Azerbaijan,United States,21 68 | United States,Greece,23 69 | United States,The Bahamas,986 70 | New Zealand,United States,111 71 | Liberia,United States,2 72 | United States,Hong Kong,414 73 | Hungary,United States,2 74 | United States,China,920 75 | United States,Vietnam,2 76 | Burkina Faso,United States,1 77 | Sweden,United States,118 78 | United States,Kuwait,28 79 | United States,Dominican Republic,1420 80 | United States,Egypt,12 81 | Israel,United States,134 82 | United States,United States,370002 83 | Ethiopia,United States,13 84 | United States,Luxembourg,134 85 | United States,Poland,33 86 | Martinique,United States,44 87 | United States,Saint Barthelemy,41 88 | Saint Barthelemy,United States,39 89 | Barbados,United States,154 90 | United States,Turkey,129 91 | Djibouti,United States,1 92 | United States,Azerbaijan,21 93 | United States,Estonia,1 94 | Germany,United States,1468 95 | United States,South Korea,827 96 | United States,El Salvador,508 97 | Ireland,United States,335 98 | United States,Hungary,3 99 | Zambia,United States,1 100 | Malaysia,United States,2 101 | United States,Ethiopia,12 102 | United States,Panama,465 103 | United States,Aruba,342 104 | United States,Thailand,4 105 | United States,Turks and Caicos Islands,236 106 | Croatia,United States,2 107 | United States,Pakistan,12 108 | Cyprus,United States,1 109 | United States,Honduras,407 110 | Fiji,United States,24 111 | Qatar,United States,108 112 | Saint Kitts and Nevis,United States,139 113 | Kuwait,United States,32 114 | Taiwan,United States,266 115 | Haiti,United States,226 116 | Canada,United States,8399 117 | Federated States of Micronesia,United States,69 118 | United States,Liberia,2 119 | Jamaica,United States,666 120 | United States,Malta,2 121 | Dominican Republic,United States,1353 122 | Japan,United States,1548 123 | United States,Lithuania,1 124 | Finland,United States,26 125 | United States,Guadeloupe,59 126 | United States,Ukraine,13 127 | United States,France,952 128 | United States,Norway,115 129 | Aruba,United States,346 130 | French Guiana,United States,5 131 | United States,Kiribati,35 132 | India,United States,61 133 | British Virgin Islands,United States,107 134 | Brazil,United States,853 135 | United States,Germany,1336 136 | United States,New Zealand,74 137 | French Polynesia,United States,43 138 | United Arab Emirates,United States,320 139 | Singapore,United States,3 140 | United States,Mexico,7187 141 | United States,Sweden,119 142 | Netherlands,United States,776 143 | United States,Martinique,43 144 | United States,United Arab Emirates,313 145 | United States,Bulgaria,1 146 | Denmark,United States,153 147 | China,United States,772 148 | United States,Nicaragua,201 149 | United States,Philippines,126 150 | United States,Georgia,1 151 | United States,Belgium,228 152 | Cayman Islands,United States,314 153 | Argentina,United States,180 154 | Peru,United States,279 155 | South Africa,United States,36 156 | United States,Iceland,202 157 | United States,Argentina,141 158 | Spain,United States,420 159 | Bermuda,United States,183 160 | United States,Nigeria,50 161 | United States,Austria,63 162 | United States,Bonaire, Sint Eustatius, and Saba,59 163 | Kiribati,United States,26 164 | Saudi Arabia,United States,83 165 | Czech Republic,United States,13 166 | United States,Israel,127 167 | Belgium,United States,259 168 | United States,Saint Lucia,136 169 | United States,Bahrain,1 170 | United States,British Virgin Islands,80 171 | Curacao,United States,90 172 | Georgia,United States,2 173 | United States,Denmark,152 174 | United States,Guyana,63 175 | Philippines,United States,134 176 | Grenada,United States,53 177 | Cape Verde,United States,20 178 | Cote d'Ivoire,United States,1 179 | Ukraine,United States,14 180 | United States,Papua New Guinea,1 181 | Russia,United States,176 182 | United States,Saudi Arabia,70 183 | Guatemala,United States,397 184 | Saint Lucia,United States,123 185 | Paraguay,United States,60 186 | United States,Curacao,83 187 | Kosovo,United States,1 188 | United States,Taiwan,235 189 | Tunisia,United States,3 190 | United States,South Africa,40 191 | Niger,United States,2 192 | Turkey,United States,138 193 | United Kingdom,United States,2025 194 | Romania,United States,14 195 | United States,Greenland,4 196 | Papua New Guinea,United States,3 197 | United States,Spain,442 198 | Iraq,United States,1 199 | United States,Italy,438 200 | Cuba,United States,466 201 | United States,Switzerland,305 202 | Dominica,United States,20 203 | United States,Japan,1496 204 | Portugal,United States,127 205 | United States,Brazil,619 206 | Bahrain,United States,19 207 | United States,Peru,337 208 | Indonesia,United States,1 209 | United States,Belize,193 210 | United States,United Kingdom,1970 211 | Belize,United States,188 212 | United States,Ghana,20 213 | United States,Indonesia,2 214 | United States,Fiji,25 215 | United States,Canada,8483 216 | United States,Antigua and Barbuda,117 217 | United States,French Polynesia,40 218 | Nicaragua,United States,179 219 | United States,Latvia,15 220 | United States,Dominica,27 221 | United States,Czech Republic,12 222 | United States,Australia,258 223 | United States,Cook Islands,13 224 | Austria,United States,62 225 | Jordan,United States,44 226 | Palau,United States,30 227 | South Korea,United States,1048 228 | Angola,United States,15 229 | Ghana,United States,18 230 | New Caledonia,United States,1 231 | Guadeloupe,United States,56 232 | France,United States,935 233 | Poland,United States,32 234 | Nigeria,United States,59 235 | United States,Uruguay,13 236 | Greenland,United States,2 237 | United States,Bermuda,193 238 | Chile,United States,174 239 | United States,Cuba,478 240 | United States,Montenegro,1 241 | United States,Colombia,867 242 | United States,Barbados,130 243 | United States,Qatar,109 244 | Australia,United States,329 245 | United States,Cayman Islands,310 246 | United States,Jordan,44 247 | United States,Namibia,1 248 | United States,Trinidad and Tobago,217 249 | United States,Bolivia,13 250 | Cook Islands,United States,13 251 | Bulgaria,United States,3 252 | United States,Saint Kitts and Nevis,145 253 | Uruguay,United States,43 254 | United States,Haiti,225 255 | Bonaire, Sint Eustatius, and Saba,United States,58 256 | Greece,United States,30 257 | -------------------------------------------------------------------------------- /data/flight-data/csv/2010-summary.csv: -------------------------------------------------------------------------------- 1 | DEST_COUNTRY_NAME,ORIGIN_COUNTRY_NAME,count 2 | United States,Romania,1 3 | United States,Ireland,264 4 | United States,India,69 5 | Egypt,United States,24 6 | Equatorial Guinea,United States,1 7 | United States,Singapore,25 8 | United States,Grenada,54 9 | Costa Rica,United States,477 10 | Senegal,United States,29 11 | United States,Marshall Islands,44 12 | Guyana,United States,17 13 | United States,Sint Maarten,53 14 | Malta,United States,1 15 | Bolivia,United States,46 16 | Anguilla,United States,21 17 | Turks and Caicos Islands,United States,136 18 | United States,Afghanistan,2 19 | Saint Vincent and the Grenadines,United States,1 20 | Italy,United States,390 21 | United States,Russia,156 22 | United States,Federated States of Micronesia,48 23 | Pakistan,United States,9 24 | United States,Netherlands,570 25 | Iceland,United States,118 26 | Marshall Islands,United States,77 27 | Luxembourg,United States,91 28 | Honduras,United States,391 29 | The Bahamas,United States,903 30 | El Salvador,United States,519 31 | United States,Senegal,46 32 | Samoa,United States,28 33 | United States,Angola,18 34 | Kazakhstan,United States,3 35 | Switzerland,United States,315 36 | United States,Anguilla,20 37 | Sint Maarten,United States,61 38 | Hong Kong,United States,252 39 | Trinidad and Tobago,United States,187 40 | Latvia,United States,12 41 | United States,Ecuador,345 42 | Slovakia,United States,1 43 | Suriname,United States,12 44 | Mexico,United States,6200 45 | United States,Cyprus,1 46 | Ecuador,United States,272 47 | United States,Bosnia and Herzegovina,1 48 | United States,Portugal,104 49 | United States,Costa Rica,501 50 | United States,Guatemala,333 51 | United States,Suriname,13 52 | Colombia,United States,785 53 | United States,Cape Verde,18 54 | United States,Jamaica,757 55 | United States,Malaysia,3 56 | Norway,United States,29 57 | United States,Morocco,16 58 | Thailand,United States,16 59 | Venezuela,United States,377 60 | United States,Palau,30 61 | United States,Samoa,28 62 | United States,Algeria,1 63 | United States,Venezuela,341 64 | Panama,United States,355 65 | United States,Finland,20 66 | United States,Chile,176 67 | Morocco,United States,16 68 | United States,Greece,61 69 | Antigua and Barbuda,United States,123 70 | Azerbaijan,United States,1 71 | United States,The Bahamas,959 72 | New Zealand,United States,86 73 | Liberia,United States,1 74 | United States,Hong Kong,293 75 | Hungary,United States,14 76 | United States,China,505 77 | United States,Vietnam,1 78 | Sweden,United States,65 79 | United States,Kuwait,25 80 | United States,Dominican Republic,1150 81 | United States,Egypt,25 82 | Israel,United States,117 83 | United States,United States,348113 84 | United States,Luxembourg,90 85 | Ethiopia,United States,12 86 | United States,Poland,61 87 | Martinique,United States,23 88 | Saint Barthelemy,United States,28 89 | United States,Saint Barthelemy,29 90 | United States,Turkey,87 91 | Barbados,United States,130 92 | United States,Estonia,1 93 | United States,Azerbaijan,1 94 | Germany,United States,1392 95 | Kyrgyzstan,United States,2 96 | United States,South Korea,621 97 | United States,El Salvador,464 98 | Ireland,United States,231 99 | United States,Hungary,15 100 | United States,Serbia,1 101 | Malaysia,United States,1 102 | United States,Panama,363 103 | United States,Ethiopia,12 104 | United States,Aruba,349 105 | United States,Thailand,13 106 | United States,Netherlands Antilles,289 107 | United States,Turks and Caicos Islands,147 108 | Cyprus,United States,2 109 | United States,Pakistan,15 110 | United States,Honduras,393 111 | Qatar,United States,41 112 | Fiji,United States,53 113 | Saint Kitts and Nevis,United States,113 114 | Taiwan,United States,275 115 | Haiti,United States,238 116 | Kuwait,United States,26 117 | Canada,United States,8271 118 | Federated States of Micronesia,United States,46 119 | United States,Liberia,1 120 | Jamaica,United States,733 121 | United States,Malta,2 122 | Dominican Republic,United States,1109 123 | Japan,United States,1383 124 | United States,Saint Vincent and the Grenadines,16 125 | United States,Guadeloupe,33 126 | Finland,United States,24 127 | United States,Ukraine,17 128 | United States,France,776 129 | United States,Norway,36 130 | Aruba,United States,359 131 | French Guiana,United States,4 132 | United States,Kiribati,18 133 | India,United States,66 134 | British Virgin Islands,United States,49 135 | United States,Germany,1406 136 | Brazil,United States,995 137 | United States,New Zealand,71 138 | French Polynesia,United States,38 139 | United Arab Emirates,United States,165 140 | Singapore,United States,25 141 | United States,Mexico,6220 142 | Netherlands,United States,586 143 | United States,Sweden,73 144 | United States,Gabon,1 145 | United States,Martinique,20 146 | United States,United Arab Emirates,156 147 | United States,Bulgaria,2 148 | China,United States,448 149 | Denmark,United States,98 150 | United States,Philippines,116 151 | United States,Nicaragua,181 152 | United States,Belgium,355 153 | Peru,United States,212 154 | Argentina,United States,184 155 | Cayman Islands,United States,247 156 | South Africa,United States,25 157 | United States,Iceland,129 158 | United States,Argentina,159 159 | Spain,United States,422 160 | Netherlands Antilles,United States,286 161 | Bermuda,United States,183 162 | United States,Austria,34 163 | United States,Nigeria,18 164 | United States,"Bonaire, Sint Eustatius, and Saba",16 165 | Kiribati,United States,17 166 | Saudi Arabia,United States,42 167 | Czech Republic,United States,21 168 | Belgium,United States,408 169 | United States,Israel,122 170 | United States,Bahrain,1 171 | United States,Saint Lucia,121 172 | Afghanistan,United States,11 173 | United States,British Virgin Islands,47 174 | Curacao,United States,20 175 | Georgia,United States,2 176 | United States,Guyana,20 177 | United States,Denmark,107 178 | Philippines,United States,132 179 | Grenada,United States,65 180 | Cape Verde,United States,18 181 | Ukraine,United States,19 182 | United States,Papua New Guinea,1 183 | United States,Saudi Arabia,54 184 | Russia,United States,152 185 | Guatemala,United States,386 186 | Saint Lucia,United States,116 187 | Paraguay,United States,90 188 | United States,Curacao,14 189 | United States,Taiwan,252 190 | United States,South Africa,15 191 | Turkey,United States,75 192 | United Kingdom,United States,1629 193 | United States,Greenland,2 194 | United States,Spain,442 195 | Cuba,United States,243 196 | United States,Italy,433 197 | United States,Switzerland,334 198 | Dominica,United States,28 199 | United States,Japan,1307 200 | Portugal,United States,102 201 | United States,Brazil,578 202 | Bahrain,United States,30 203 | United States,Peru,284 204 | Vietnam,United States,1 205 | United States,United Kingdom,1503 206 | United States,Belize,118 207 | Belize,United States,121 208 | United States,Ghana,28 209 | United States,Indonesia,1 210 | United States,Fiji,51 211 | United States,Canada,8305 212 | United States,French Polynesia,38 213 | United States,Antigua and Barbuda,121 214 | United States,Burkina Faso,1 215 | Nicaragua,United States,178 216 | United States,Latvia,13 217 | United States,Dominica,32 218 | United States,Czech Republic,22 219 | United States,Australia,216 220 | Austria,United States,36 221 | United States,Cook Islands,12 222 | Jordan,United States,50 223 | Palau,United States,31 224 | Uganda,United States,2 225 | United States,Cameroon,1 226 | South Korea,United States,683 227 | Angola,United States,14 228 | Ghana,United States,27 229 | Guadeloupe,United States,40 230 | France,United States,774 231 | United States,Kyrgyzstan,12 232 | Poland,United States,62 233 | Nigeria,United States,16 234 | United States,Uruguay,24 235 | Greenland,United States,2 236 | Chile,United States,166 237 | United States,Bermuda,180 238 | United States,Cuba,235 239 | United States,Colombia,832 240 | United States,Qatar,40 241 | United States,Barbados,119 242 | Australia,United States,290 243 | United States,Jordan,51 244 | United States,Cayman Islands,251 245 | United States,Trinidad and Tobago,200 246 | United States,Bolivia,39 247 | United States,Slovakia,1 248 | Uruguay,United States,54 249 | Cook Islands,United States,13 250 | United States,Saint Kitts and Nevis,127 251 | Bulgaria,United States,1 252 | United States,French Guiana,1 253 | United States,Haiti,226 254 | United States,Uganda,1 255 | "Bonaire, Sint Eustatius, and Saba",United States,16 256 | Greece,United States,50 257 | -------------------------------------------------------------------------------- /data/flight-data/csv/2011-summary.csv: -------------------------------------------------------------------------------- 1 | DEST_COUNTRY_NAME,ORIGIN_COUNTRY_NAME,count 2 | United States,Saint Martin,2 3 | United States,Guinea,2 4 | United States,Croatia,1 5 | United States,Romania,3 6 | United States,Ireland,268 7 | Egypt,United States,13 8 | United States,India,76 9 | United States,Singapore,24 10 | United States,Grenada,59 11 | Costa Rica,United States,494 12 | Senegal,United States,29 13 | Guyana,United States,26 14 | United States,Marshall Islands,49 15 | United States,Sint Maarten,223 16 | Malta,United States,1 17 | Bolivia,United States,61 18 | Anguilla,United States,21 19 | United States,Paraguay,3 20 | United States,Gibraltar,1 21 | Turks and Caicos Islands,United States,163 22 | United States,Afghanistan,3 23 | Saint Vincent and the Grenadines,United States,6 24 | Italy,United States,428 25 | United States,Russia,153 26 | United States,Federated States of Micronesia,54 27 | Pakistan,United States,12 28 | Iceland,United States,113 29 | United States,Netherlands,622 30 | Marshall Islands,United States,81 31 | Luxembourg,United States,120 32 | Honduras,United States,386 33 | The Bahamas,United States,811 34 | El Salvador,United States,495 35 | United States,Senegal,33 36 | Samoa,United States,25 37 | United States,Angola,12 38 | Switzerland,United States,314 39 | United States,Anguilla,20 40 | Sint Maarten,United States,240 41 | Hong Kong,United States,282 42 | Trinidad and Tobago,United States,205 43 | Latvia,United States,12 44 | Yemen,United States,1 45 | United States,Ecuador,351 46 | Suriname,United States,11 47 | Mexico,United States,5953 48 | United States,Cyprus,1 49 | Ecuador,United States,280 50 | United States,Portugal,109 51 | United States,Guatemala,340 52 | United States,Costa Rica,526 53 | United States,Suriname,20 54 | Colombia,United States,707 55 | United States,Cape Verde,19 56 | United States,Jamaica,695 57 | Norway,United States,25 58 | United States,Malaysia,1 59 | The Gambia,United States,1 60 | United States,Morocco,17 61 | Thailand,United States,12 62 | United States,Palau,41 63 | Venezuela,United States,373 64 | United States,Samoa,25 65 | United States,Algeria,1 66 | Panama,United States,397 67 | United States,Venezuela,352 68 | Guinea,United States,5 69 | Antigua and Barbuda,United States,146 70 | United States,Finland,32 71 | United States,Chile,182 72 | United States,Greece,45 73 | Morocco,United States,19 74 | United States,The Bahamas,888 75 | United States,Brunei,1 76 | New Zealand,United States,96 77 | United States,Hong Kong,317 78 | Hungary,United States,13 79 | United States,China,605 80 | Sweden,United States,59 81 | United States,Kuwait,27 82 | United States,Dominican Republic,1100 83 | United States,Egypt,15 84 | Israel,United States,121 85 | United States,United States,352742 86 | United States,Luxembourg,170 87 | Ethiopia,United States,12 88 | United States,Poland,42 89 | Martinique,United States,20 90 | Saint Barthelemy,United States,38 91 | United States,Saint Barthelemy,39 92 | Barbados,United States,129 93 | United States,Turkey,99 94 | United States,Azerbaijan,3 95 | Germany,United States,1423 96 | United States,South Korea,661 97 | United States,El Salvador,443 98 | Ireland,United States,250 99 | United States,Hungary,17 100 | Malaysia,United States,2 101 | United States,Panama,380 102 | United States,Ethiopia,12 103 | United States,Thailand,14 104 | United States,Aruba,332 105 | United States,Turks and Caicos Islands,177 106 | Croatia,United States,2 107 | United States,Pakistan,24 108 | United States,Honduras,388 109 | Qatar,United States,48 110 | Fiji,United States,49 111 | Saint Kitts and Nevis,United States,105 112 | Taiwan,United States,248 113 | Haiti,United States,188 114 | Kuwait,United States,27 115 | Canada,United States,8514 116 | Belarus,United States,1 117 | Federated States of Micronesia,United States,48 118 | Jamaica,United States,675 119 | United States,Malta,3 120 | Dominican Republic,United States,1020 121 | Japan,United States,1532 122 | United States,Lithuania,1 123 | United States,Saint Vincent and the Grenadines,17 124 | Finland,United States,33 125 | United States,Ukraine,15 126 | United States,Guadeloupe,31 127 | United States,France,914 128 | Aruba,United States,347 129 | French Guiana,United States,14 130 | United States,Norway,33 131 | United States,Kiribati,28 132 | India,United States,73 133 | British Virgin Islands,United States,73 134 | United States,Germany,1480 135 | Brazil,United States,969 136 | United States,New Zealand,77 137 | French Polynesia,United States,36 138 | United Arab Emirates,United States,157 139 | Singapore,United States,27 140 | United States,Mexico,6000 141 | Netherlands,United States,635 142 | United States,Sweden,68 143 | United States,Martinique,20 144 | United States,United Arab Emirates,142 145 | China,United States,504 146 | United States,Philippines,109 147 | United States,Nicaragua,173 148 | Denmark,United States,95 149 | United States,Georgia,1 150 | United States,Belgium,369 151 | Libya,United States,1 152 | Argentina,United States,183 153 | Peru,United States,246 154 | Cayman Islands,United States,251 155 | South Africa,United States,24 156 | United States,Argentina,152 157 | United States,Iceland,138 158 | Spain,United States,445 159 | Bermuda,United States,191 160 | United States,Austria,34 161 | United States,Nigeria,26 162 | United States,"Bonaire, Sint Eustatius, and Saba",49 163 | Kiribati,United States,28 164 | Czech Republic,United States,23 165 | Saudi Arabia,United States,83 166 | Macau,United States,2 167 | Belgium,United States,376 168 | United States,Israel,126 169 | United States,Saint Lucia,102 170 | United States,Bahrain,2 171 | United States,British Virgin Islands,65 172 | Afghanistan,United States,8 173 | Curacao,United States,106 174 | Georgia,United States,1 175 | United States,Guyana,29 176 | Philippines,United States,127 177 | United States,Denmark,103 178 | Grenada,United States,67 179 | Cape Verde,United States,13 180 | Ukraine,United States,15 181 | United States,Papua New Guinea,1 182 | Russia,United States,199 183 | United States,Saudi Arabia,77 184 | Guatemala,United States,407 185 | Saint Lucia,United States,102 186 | Paraguay,United States,85 187 | United States,Curacao,97 188 | United States,Taiwan,231 189 | United States,South Africa,21 190 | Togo,United States,1 191 | Turkey,United States,104 192 | United Kingdom,United States,1726 193 | Romania,United States,4 194 | United States,Greenland,2 195 | United States,Spain,472 196 | Cuba,United States,320 197 | United States,Italy,447 198 | United States,Switzerland,319 199 | Dominica,United States,25 200 | United States,Japan,1412 201 | Portugal,United States,95 202 | United States,Brazil,575 203 | Bahrain,United States,40 204 | United States,Peru,288 205 | Vietnam,United States,1 206 | United States,United Kingdom,1649 207 | United States,Belize,112 208 | Belize,United States,113 209 | United States,Ghana,39 210 | Mauritania,United States,1 211 | Saint Martin,United States,1 212 | United States,Indonesia,1 213 | United States,Fiji,48 214 | United States,Mauritania,1 215 | United States,Canada,8650 216 | United States,Antigua and Barbuda,152 217 | United States,French Polynesia,36 218 | Nicaragua,United States,180 219 | Namibia,United States,1 220 | United States,Latvia,12 221 | United States,Dominica,29 222 | United States,Czech Republic,25 223 | United States,Australia,231 224 | United States,Cook Islands,12 225 | Austria,United States,34 226 | Palau,United States,41 227 | Jordan,United States,42 228 | South Korea,United States,713 229 | Ghana,United States,39 230 | Angola,United States,13 231 | Guadeloupe,United States,33 232 | France,United States,876 233 | Nigeria,United States,31 234 | Poland,United States,39 235 | United States,Uruguay,27 236 | Greenland,United States,1 237 | Chile,United States,178 238 | United States,Bermuda,196 239 | Lebanon,United States,1 240 | United States,Cuba,326 241 | United States,Colombia,777 242 | United States,Barbados,125 243 | United States,Qatar,48 244 | Australia,United States,280 245 | United States,Cayman Islands,262 246 | United States,Jordan,45 247 | United States,Bolivia,51 248 | United States,Trinidad and Tobago,213 249 | Uruguay,United States,50 250 | Cook Islands,United States,12 251 | United States,Saint Kitts and Nevis,120 252 | Bulgaria,United States,4 253 | United States,French Guiana,11 254 | United States,Haiti,197 255 | "Bonaire, Sint Eustatius, and Saba",United States,50 256 | Greece,United States,38 257 | -------------------------------------------------------------------------------- /data/flight-data/csv/2012-summary.csv: -------------------------------------------------------------------------------- 1 | DEST_COUNTRY_NAME,ORIGIN_COUNTRY_NAME,count 2 | United States,Croatia,1 3 | United States,Ireland,252 4 | Egypt,United States,13 5 | United States,India,62 6 | United States,Singapore,25 7 | United States,Grenada,46 8 | Costa Rica,United States,522 9 | Senegal,United States,31 10 | Guyana,United States,65 11 | United States,Marshall Islands,30 12 | United States,Sint Maarten,245 13 | Bolivia,United States,35 14 | Anguilla,United States,19 15 | United States,Paraguay,5 16 | United States,Afghanistan,5 17 | Turks and Caicos Islands,United States,183 18 | Saint Vincent and the Grenadines,United States,6 19 | Italy,United States,381 20 | Pakistan,United States,12 21 | United States,Russia,148 22 | United States,Federated States of Micronesia,63 23 | United States,Netherlands,607 24 | Iceland,United States,137 25 | Marshall Islands,United States,60 26 | United States,Togo,1 27 | Luxembourg,United States,111 28 | Honduras,United States,413 29 | The Bahamas,United States,975 30 | El Salvador,United States,539 31 | United States,Senegal,33 32 | United States,Solomon Islands,1 33 | United States,Angola,12 34 | Samoa,United States,25 35 | Kazakhstan,United States,1 36 | Switzerland,United States,285 37 | United States,Anguilla,19 38 | Sint Maarten,United States,243 39 | Hong Kong,United States,296 40 | Trinidad and Tobago,United States,216 41 | Latvia,United States,13 42 | United States,Ecuador,321 43 | Suriname,United States,14 44 | Mexico,United States,5983 45 | Ecuador,United States,239 46 | United States,Portugal,111 47 | United States,Costa Rica,555 48 | United States,Guatemala,345 49 | United States,Suriname,26 50 | Colombia,United States,759 51 | United States,The Gambia,1 52 | United States,Cape Verde,18 53 | United States,Jamaica,610 54 | Norway,United States,31 55 | The Gambia,United States,2 56 | United States,Morocco,16 57 | Thailand,United States,6 58 | Venezuela,United States,389 59 | United States,Samoa,25 60 | United States,Palau,35 61 | United States,Venezuela,360 62 | Panama,United States,394 63 | United States,Chile,201 64 | United States,Finland,21 65 | Antigua and Barbuda,United States,145 66 | Morocco,United States,15 67 | United States,Greece,21 68 | United States,The Bahamas,1054 69 | New Zealand,United States,92 70 | United States,Hong Kong,331 71 | Hungary,United States,2 72 | United States,Tunisia,1 73 | United States,China,610 74 | Sweden,United States,50 75 | United States,Kuwait,28 76 | United States,Dominican Republic,1123 77 | United States,Egypt,12 78 | Israel,United States,117 79 | United States,United States,347452 80 | United States,Luxembourg,123 81 | Ethiopia,United States,12 82 | United States,Poland,40 83 | Martinique,United States,17 84 | United States,Saint Barthelemy,49 85 | Saint Barthelemy,United States,49 86 | Barbados,United States,121 87 | United States,Azerbaijan,1 88 | United States,Turkey,89 89 | Germany,United States,1404 90 | Kyrgyzstan,United States,1 91 | United States,South Korea,703 92 | United States,El Salvador,465 93 | Ireland,United States,255 94 | United States,Panama,393 95 | United States,Ethiopia,12 96 | United States,Thailand,6 97 | United States,Aruba,332 98 | United States,Turks and Caicos Islands,183 99 | Croatia,United States,1 100 | United States,Pakistan,16 101 | United States,Honduras,372 102 | Qatar,United States,56 103 | Fiji,United States,28 104 | Saint Kitts and Nevis,United States,109 105 | Taiwan,United States,229 106 | Haiti,United States,224 107 | Kuwait,United States,26 108 | Canada,United States,8034 109 | Belarus,United States,1 110 | Federated States of Micronesia,United States,54 111 | Jamaica,United States,617 112 | Dominican Republic,United States,1078 113 | Japan,United States,1538 114 | United States,Saint Vincent and the Grenadines,11 115 | Finland,United States,22 116 | United States,Ukraine,13 117 | United States,Guadeloupe,34 118 | United States,France,840 119 | United States,Norway,33 120 | Aruba,United States,349 121 | French Guiana,United States,8 122 | United States,Kiribati,26 123 | India,United States,61 124 | British Virgin Islands,United States,83 125 | Brazil,United States,979 126 | United States,Germany,1413 127 | United States,New Zealand,65 128 | United States,Cambodia,1 129 | French Polynesia,United States,39 130 | United Arab Emirates,United States,188 131 | Singapore,United States,26 132 | United States,Mexico,5974 133 | Netherlands,United States,640 134 | United States,Sweden,54 135 | United States,Martinique,17 136 | United States,United Arab Emirates,176 137 | China,United States,509 138 | United States,Philippines,112 139 | Denmark,United States,83 140 | United States,Nicaragua,183 141 | Oman,United States,1 142 | United States,Belgium,275 143 | Argentina,United States,208 144 | Peru,United States,249 145 | Cayman Islands,United States,258 146 | South Africa,United States,24 147 | United States,Argentina,170 148 | United States,Iceland,150 149 | Cameroon,United States,1 150 | Spain,United States,390 151 | Bermuda,United States,190 152 | United States,Austria,31 153 | United States,Nigeria,44 154 | United States,"Bonaire, Sint Eustatius, and Saba",44 155 | Kiribati,United States,26 156 | Saudi Arabia,United States,72 157 | Czech Republic,United States,15 158 | Macau,United States,1 159 | United States,Bahrain,2 160 | Belgium,United States,352 161 | United States,Israel,117 162 | United States,Saint Lucia,84 163 | Sierra Leone,United States,1 164 | Afghanistan,United States,5 165 | United States,British Virgin Islands,78 166 | Curacao,United States,107 167 | Georgia,United States,2 168 | United States,Guyana,79 169 | Philippines,United States,136 170 | United States,Denmark,82 171 | Grenada,United States,51 172 | Cape Verde,United States,19 173 | Ukraine,United States,14 174 | United States,Papua New Guinea,2 175 | Russia,United States,183 176 | United States,Saudi Arabia,70 177 | Guatemala,United States,413 178 | Saint Lucia,United States,81 179 | Paraguay,United States,85 180 | United States,Curacao,96 181 | United States,Taiwan,214 182 | Tunisia,United States,2 183 | United States,South Africa,29 184 | Turkey,United States,94 185 | United Kingdom,United States,1852 186 | Solomon Islands,United States,2 187 | United States,Greenland,1 188 | United States,Spain,393 189 | United States,Italy,377 190 | Cuba,United States,340 191 | United States,Switzerland,282 192 | Dominica,United States,26 193 | United States,Japan,1451 194 | Portugal,United States,112 195 | Bahrain,United States,65 196 | United States,Brazil,567 197 | United States,Peru,292 198 | Vietnam,United States,2 199 | United States,United Kingdom,1713 200 | United States,Belize,108 201 | Belize,United States,113 202 | United States,Ghana,31 203 | Mauritania,United States,1 204 | United States,Fiji,26 205 | United States,Canada,8097 206 | United States,French Polynesia,38 207 | United States,Antigua and Barbuda,146 208 | Nicaragua,United States,174 209 | United States,Latvia,14 210 | United States,Dominica,25 211 | United States,Czech Republic,13 212 | United States,Australia,211 213 | United States,Cook Islands,12 214 | Austria,United States,34 215 | Jordan,United States,41 216 | Palau,United States,35 217 | United States,Equatorial Guinea,4 218 | South Korea,United States,819 219 | Angola,United States,12 220 | Ghana,United States,30 221 | New Caledonia,United States,1 222 | Guadeloupe,United States,36 223 | France,United States,818 224 | Poland,United States,38 225 | Nigeria,United States,57 226 | United States,Uruguay,17 227 | Greenland,United States,1 228 | Chile,United States,178 229 | United States,Bermuda,194 230 | United States,Cuba,341 231 | United States,Colombia,806 232 | United States,Barbados,109 233 | United States,Qatar,56 234 | Australia,United States,277 235 | United States,Cayman Islands,247 236 | United States,Jordan,43 237 | United States,Trinidad and Tobago,213 238 | United States,Bolivia,21 239 | Uruguay,United States,53 240 | Cook Islands,United States,12 241 | United States,Saint Kitts and Nevis,124 242 | Bulgaria,United States,6 243 | United States,French Guiana,1 244 | United States,Haiti,234 245 | "Bonaire, Sint Eustatius, and Saba",United States,46 246 | Greece,United States,17 247 | -------------------------------------------------------------------------------- /data/flight-data/csv/2013-summary.csv: -------------------------------------------------------------------------------- 1 | DEST_COUNTRY_NAME,ORIGIN_COUNTRY_NAME,count 2 | United States,Romania,12 3 | United States,Croatia,1 4 | United States,Ireland,266 5 | Egypt,United States,13 6 | United States,India,60 7 | Equatorial Guinea,United States,1 8 | United States,Niger,1 9 | United States,Singapore,22 10 | United States,Grenada,40 11 | Costa Rica,United States,509 12 | Senegal,United States,28 13 | Guyana,United States,34 14 | United States,Sint Maarten,260 15 | United States,Marshall Islands,33 16 | Bolivia,United States,33 17 | Anguilla,United States,22 18 | United States,Paraguay,15 19 | Algeria,United States,2 20 | Turks and Caicos Islands,United States,181 21 | Saint Vincent and the Grenadines,United States,4 22 | Lithuania,United States,1 23 | Pakistan,United States,14 24 | Italy,United States,327 25 | United States,Russia,167 26 | United States,Federated States of Micronesia,62 27 | United States,Netherlands,618 28 | Iceland,United States,146 29 | Marshall Islands,United States,50 30 | Luxembourg,United States,160 31 | Honduras,United States,423 32 | Rwanda,United States,1 33 | The Bahamas,United States,865 34 | El Salvador,United States,551 35 | United States,Senegal,32 36 | Samoa,United States,25 37 | United States,Angola,12 38 | Kazakhstan,United States,2 39 | Switzerland,United States,288 40 | United States,Anguilla,18 41 | United States,Mali,1 42 | Sint Maarten,United States,271 43 | Hong Kong,United States,282 44 | Trinidad and Tobago,United States,183 45 | Latvia,United States,29 46 | United States,Ecuador,307 47 | Suriname,United States,15 48 | Mexico,United States,6372 49 | Ecuador,United States,252 50 | United States,Portugal,126 51 | United States,Costa Rica,549 52 | United States,Guatemala,331 53 | United States,Suriname,21 54 | Colombia,United States,798 55 | United States,Cape Verde,15 56 | Norway,United States,48 57 | United States,Jamaica,599 58 | United States,Malaysia,1 59 | United States,Morocco,16 60 | Thailand,United States,2 61 | Burundi,United States,1 62 | Venezuela,United States,350 63 | United States,Samoa,25 64 | United States,Palau,35 65 | United States,Venezuela,295 66 | Panama,United States,418 67 | United States,Finland,15 68 | Morocco,United States,18 69 | Antigua and Barbuda,United States,123 70 | United States,Chile,171 71 | Azerbaijan,United States,3 72 | United States,Greece,14 73 | United States,The Bahamas,897 74 | New Zealand,United States,114 75 | United States,Hong Kong,344 76 | Hungary,United States,1 77 | United States,China,614 78 | United States,Kuwait,24 79 | Sweden,United States,70 80 | United States,Egypt,12 81 | United States,Dominican Republic,1119 82 | Israel,United States,119 83 | United States,United States,343132 84 | United States,Luxembourg,134 85 | Ethiopia,United States,12 86 | United States,Poland,37 87 | Martinique,United States,26 88 | United States,Zimbabwe,2 89 | United States,Saint Barthelemy,58 90 | Saint Barthelemy,United States,67 91 | United States,Turkey,100 92 | Barbados,United States,97 93 | Germany,United States,1423 94 | Kyrgyzstan,United States,1 95 | United States,South Korea,687 96 | United States,El Salvador,466 97 | Ireland,United States,255 98 | Malaysia,United States,2 99 | United States,Aruba,344 100 | United States,Panama,419 101 | United States,Ethiopia,12 102 | United States,Turks and Caicos Islands,178 103 | United States,Pakistan,12 104 | Cyprus,United States,2 105 | United States,Honduras,392 106 | Fiji,United States,34 107 | Qatar,United States,71 108 | Saint Kitts and Nevis,United States,99 109 | Kuwait,United States,24 110 | Haiti,United States,199 111 | Taiwan,United States,243 112 | Canada,United States,7860 113 | Federated States of Micronesia,United States,53 114 | Jamaica,United States,581 115 | Dominican Republic,United States,1068 116 | Japan,United States,1613 117 | United States,Saint Vincent and the Grenadines,10 118 | Finland,United States,17 119 | United States,Ukraine,4 120 | United States,Guadeloupe,42 121 | United States,France,848 122 | United States,Norway,46 123 | Aruba,United States,352 124 | French Guiana,United States,7 125 | United States,Kiribati,29 126 | Zimbabwe,United States,2 127 | India,United States,61 128 | British Virgin Islands,United States,101 129 | United States,Germany,1402 130 | United States,New Zealand,77 131 | Brazil,United States,912 132 | "Saint Helena, Ascension, and Tristan da Cunha",United States,1 133 | United States,Cambodia,3 134 | French Polynesia,United States,43 135 | United Arab Emirates,United States,202 136 | Singapore,United States,21 137 | Tanzania,United States,2 138 | United States,Mexico,6354 139 | Netherlands,United States,640 140 | United States,Sweden,73 141 | United States,Martinique,24 142 | United States,United Arab Emirates,187 143 | Denmark,United States,104 144 | China,United States,543 145 | United States,Nicaragua,185 146 | United States,Philippines,112 147 | United States,Belgium,193 148 | Nepal,United States,1 149 | Libya,United States,1 150 | Argentina,United States,176 151 | Peru,United States,255 152 | Cayman Islands,United States,274 153 | South Africa,United States,24 154 | United States,Iceland,151 155 | United States,Argentina,140 156 | Spain,United States,370 157 | Bermuda,United States,163 158 | United States,Nigeria,49 159 | United States,Austria,39 160 | United States,"Bonaire, Sint Eustatius, and Saba",54 161 | Kiribati,United States,29 162 | Saudi Arabia,United States,86 163 | Czech Republic,United States,10 164 | Macau,United States,2 165 | Belgium,United States,265 166 | United States,Israel,107 167 | United States,Saint Lucia,92 168 | Sierra Leone,United States,1 169 | United States,Bahrain,1 170 | United States,British Virgin Islands,94 171 | United States,"Saint Helena, Ascension, and Tristan da Cunha",2 172 | Curacao,United States,97 173 | United States,Denmark,106 174 | United States,Guyana,52 175 | Philippines,United States,136 176 | Grenada,United States,42 177 | Cape Verde,United States,21 178 | Ukraine,United States,4 179 | Russia,United States,194 180 | United States,Saudi Arabia,84 181 | Guatemala,United States,376 182 | Saint Lucia,United States,91 183 | Paraguay,United States,75 184 | United States,Curacao,89 185 | United States,Taiwan,235 186 | United States,South Africa,26 187 | Niger,United States,1 188 | Turkey,United States,99 189 | United Kingdom,United States,1802 190 | Romania,United States,14 191 | United States,Greenland,3 192 | Papua New Guinea,United States,2 193 | United States,Spain,377 194 | United States,Italy,333 195 | Cuba,United States,337 196 | United States,Switzerland,285 197 | Dominica,United States,19 198 | Portugal,United States,116 199 | United States,Japan,1476 200 | United States,Brazil,510 201 | Bahrain,United States,50 202 | United States,Peru,300 203 | Indonesia,United States,1 204 | United States,United Kingdom,1711 205 | United States,Belize,127 206 | Belize,United States,137 207 | Kenya,United States,1 208 | United States,Ghana,16 209 | United States,Fiji,30 210 | United States,Canada,7983 211 | United States,French Polynesia,40 212 | United States,Antigua and Barbuda,123 213 | Brunei,United States,1 214 | Nicaragua,United States,178 215 | United States,Latvia,12 216 | United States,Dominica,25 217 | United States,Czech Republic,9 218 | United States,Cook Islands,12 219 | Austria,United States,35 220 | United States,Australia,222 221 | Jordan,United States,74 222 | Palau,United States,35 223 | South Korea,United States,842 224 | Angola,United States,12 225 | Ghana,United States,16 226 | New Caledonia,United States,1 227 | Guadeloupe,United States,41 228 | France,United States,837 229 | Poland,United States,40 230 | Nigeria,United States,58 231 | United States,Uruguay,20 232 | Greenland,United States,1 233 | Chile,United States,147 234 | United States,Bermuda,166 235 | Lebanon,United States,1 236 | United States,Cuba,336 237 | United States,Colombia,811 238 | United States,Qatar,72 239 | United States,Barbados,78 240 | Australia,United States,295 241 | United States,Cayman Islands,274 242 | United States,Jordan,77 243 | United States,Trinidad and Tobago,184 244 | United States,Bolivia,13 245 | Uruguay,United States,57 246 | Cook Islands,United States,12 247 | United States,Saint Kitts and Nevis,115 248 | United States,French Guiana,3 249 | United States,Haiti,186 250 | "Bonaire, Sint Eustatius, and Saba",United States,53 251 | Greece,United States,12 252 | -------------------------------------------------------------------------------- /data/flight-data/csv/2014-summary.csv: -------------------------------------------------------------------------------- 1 | DEST_COUNTRY_NAME,ORIGIN_COUNTRY_NAME,count 2 | United States,Saint Martin,1 3 | United States,Romania,12 4 | United States,Croatia,2 5 | United States,Ireland,291 6 | United States,India,62 7 | Egypt,United States,11 8 | United States,Grenada,47 9 | Costa Rica,United States,529 10 | Senegal,United States,35 11 | United States,Sint Maarten,290 12 | Guyana,United States,52 13 | United States,Marshall Islands,35 14 | Malta,United States,2 15 | Malawi,United States,1 16 | Bolivia,United States,33 17 | Anguilla,United States,34 18 | Algeria,United States,9 19 | United States,Paraguay,14 20 | Gibraltar,United States,1 21 | Turks and Caicos Islands,United States,193 22 | Saint Vincent and the Grenadines,United States,1 23 | United States,Russia,151 24 | Italy,United States,366 25 | Pakistan,United States,12 26 | United States,Federated States of Micronesia,71 27 | Iceland,United States,150 28 | United States,Netherlands,702 29 | Marshall Islands,United States,46 30 | Luxembourg,United States,161 31 | Honduras,United States,411 32 | The Bahamas,United States,950 33 | El Salvador,United States,572 34 | United States,Senegal,28 35 | Samoa,United States,24 36 | United States,Angola,12 37 | Kazakhstan,United States,1 38 | Switzerland,United States,302 39 | United States,Anguilla,35 40 | Sint Maarten,United States,290 41 | Hong Kong,United States,338 42 | United States,Congo (Brazaville),1 43 | Latvia,United States,40 44 | Trinidad and Tobago,United States,188 45 | United States,Ecuador,326 46 | Slovakia,United States,1 47 | Suriname,United States,10 48 | Mexico,United States,6427 49 | Ecuador,United States,256 50 | United States,Portugal,122 51 | United States,Guatemala,327 52 | United States,Costa Rica,560 53 | United States,Suriname,27 54 | Colombia,United States,872 55 | United States,Cape Verde,16 56 | United States,Jamaica,714 57 | Norway,United States,86 58 | United States,Morocco,15 59 | Venezuela,United States,335 60 | United States,Samoa,25 61 | United States,Palau,38 62 | United States,Venezuela,258 63 | Panama,United States,456 64 | United States,Chile,168 65 | United States,Finland,19 66 | Antigua and Barbuda,United States,115 67 | Morocco,United States,18 68 | United States,Greece,19 69 | Azerbaijan,United States,7 70 | United States,The Bahamas,991 71 | New Zealand,United States,114 72 | Liberia,United States,3 73 | United States,Hong Kong,381 74 | Hungary,United States,2 75 | United States,Tunisia,1 76 | United States,China,767 77 | Burkina Faso,United States,2 78 | Sweden,United States,94 79 | United States,Kuwait,24 80 | United States,Dominican Republic,1282 81 | United States,Egypt,11 82 | Israel,United States,108 83 | United States,United States,358354 84 | United States,Luxembourg,115 85 | Ethiopia,United States,11 86 | United States,Poland,33 87 | Martinique,United States,31 88 | United States,Saint Barthelemy,53 89 | Saint Barthelemy,United States,53 90 | United States,Turkey,92 91 | Barbados,United States,102 92 | United States,Kazakhstan,1 93 | United States,Azerbaijan,5 94 | Germany,United States,1391 95 | United States,South Korea,754 96 | United States,El Salvador,486 97 | Ireland,United States,267 98 | United States,Hungary,1 99 | Malaysia,United States,2 100 | United States,Panama,460 101 | United States,Aruba,348 102 | United States,Ethiopia,11 103 | United States,Turks and Caicos Islands,204 104 | United States,Pakistan,12 105 | United States,Honduras,412 106 | Fiji,United States,25 107 | Qatar,United States,95 108 | Saint Kitts and Nevis,United States,118 109 | Haiti,United States,201 110 | Taiwan,United States,265 111 | Kuwait,United States,24 112 | Canada,United States,7974 113 | Federated States of Micronesia,United States,60 114 | Jamaica,United States,673 115 | United States,Malta,2 116 | Dominican Republic,United States,1230 117 | Japan,United States,1591 118 | United States,Saint Vincent and the Grenadines,3 119 | Finland,United States,18 120 | United States,Ukraine,1 121 | United States,Guadeloupe,47 122 | United States,France,960 123 | Aruba,United States,351 124 | United States,Norway,87 125 | French Guiana,United States,11 126 | United States,Kiribati,27 127 | India,United States,61 128 | British Virgin Islands,United States,108 129 | Brazil,United States,927 130 | United States,Germany,1343 131 | United States,New Zealand,77 132 | United States,Cambodia,1 133 | French Polynesia,United States,39 134 | United Arab Emirates,United States,247 135 | United States,Mexico,6490 136 | Netherlands,United States,773 137 | United States,Sweden,101 138 | United States,Martinique,32 139 | United States,United Arab Emirates,226 140 | United States,Bulgaria,1 141 | Denmark,United States,114 142 | China,United States,653 143 | United States,Nicaragua,170 144 | United States,Philippines,116 145 | United States,Belgium,230 146 | Peru,United States,277 147 | Argentina,United States,195 148 | Cayman Islands,United States,283 149 | South Africa,United States,32 150 | United States,Argentina,153 151 | United States,Iceland,177 152 | Chad,United States,1 153 | Spain,United States,388 154 | Bermuda,United States,185 155 | United States,Nigeria,43 156 | United States,Austria,46 157 | United States,"Bonaire, Sint Eustatius, and Saba",63 158 | Kiribati,United States,26 159 | Saudi Arabia,United States,79 160 | Czech Republic,United States,11 161 | Macau,United States,3 162 | Belgium,United States,259 163 | United States,Israel,112 164 | United States,Saint Lucia,109 165 | United States,British Virgin Islands,101 166 | Curacao,United States,74 167 | United States,Denmark,116 168 | United States,Guyana,55 169 | Philippines,United States,134 170 | Grenada,United States,43 171 | Cape Verde,United States,24 172 | Ukraine,United States,11 173 | Russia,United States,213 174 | United States,Saudi Arabia,74 175 | Guatemala,United States,383 176 | Saint Lucia,United States,107 177 | Paraguay,United States,90 178 | United States,Curacao,77 179 | United States,Taiwan,240 180 | Tunisia,United States,2 181 | United States,South Africa,32 182 | Turkey,United States,91 183 | United Kingdom,United States,1912 184 | Romania,United States,11 185 | United States,Greenland,1 186 | United States,Spain,424 187 | Cuba,United States,417 188 | United States,Italy,385 189 | United States,Switzerland,300 190 | Dominica,United States,25 191 | United States,Japan,1501 192 | Portugal,United States,124 193 | United States,Brazil,578 194 | Bahrain,United States,41 195 | United States,Peru,315 196 | Indonesia,United States,1 197 | United States,United Kingdom,1812 198 | United States,Belize,143 199 | Belize,United States,142 200 | United States,Ghana,15 201 | Mauritania,United States,1 202 | United States,Indonesia,1 203 | United States,Fiji,27 204 | United States,Canada,8177 205 | United States,Antigua and Barbuda,112 206 | United States,French Polynesia,40 207 | Nicaragua,United States,168 208 | United States,Latvia,13 209 | United States,Dominica,36 210 | United States,Czech Republic,11 211 | United States,Cook Islands,12 212 | United States,Australia,235 213 | Austria,United States,47 214 | Jordan,United States,65 215 | Palau,United States,38 216 | South Korea,United States,968 217 | Angola,United States,13 218 | Ghana,United States,13 219 | Guadeloupe,United States,43 220 | France,United States,966 221 | Poland,United States,35 222 | Nigeria,United States,49 223 | United States,Uruguay,18 224 | Greenland,United States,2 225 | Chile,United States,156 226 | United States,Bermuda,190 227 | United States,Cuba,419 228 | United States,Qatar,96 229 | United States,Colombia,888 230 | United States,Barbados,89 231 | Australia,United States,293 232 | United States,Cayman Islands,278 233 | United States,Jordan,64 234 | United States,Trinidad and Tobago,175 235 | United States,Bolivia,14 236 | Uruguay,United States,60 237 | Cook Islands,United States,12 238 | United States,French Guiana,4 239 | United States,Saint Kitts and Nevis,123 240 | United States,Haiti,193 241 | "Bonaire, Sint Eustatius, and Saba",United States,62 242 | Greece,United States,20 243 | -------------------------------------------------------------------------------- /data/flight-data/csv/2015-summary.csv: -------------------------------------------------------------------------------- 1 | DEST_COUNTRY_NAME,ORIGIN_COUNTRY_NAME,count 2 | United States,Romania,15 3 | United States,Croatia,1 4 | United States,Ireland,344 5 | Egypt,United States,15 6 | United States,India,62 7 | United States,Singapore,1 8 | United States,Grenada,62 9 | Costa Rica,United States,588 10 | Senegal,United States,40 11 | Moldova,United States,1 12 | United States,Sint Maarten,325 13 | United States,Marshall Islands,39 14 | Guyana,United States,64 15 | Malta,United States,1 16 | Anguilla,United States,41 17 | Bolivia,United States,30 18 | United States,Paraguay,6 19 | Algeria,United States,4 20 | Turks and Caicos Islands,United States,230 21 | United States,Gibraltar,1 22 | Saint Vincent and the Grenadines,United States,1 23 | Italy,United States,382 24 | United States,Federated States of Micronesia,69 25 | United States,Russia,161 26 | Pakistan,United States,12 27 | United States,Netherlands,660 28 | Iceland,United States,181 29 | Marshall Islands,United States,42 30 | Luxembourg,United States,155 31 | Honduras,United States,362 32 | The Bahamas,United States,955 33 | United States,Senegal,42 34 | El Salvador,United States,561 35 | Samoa,United States,25 36 | United States,Angola,13 37 | Switzerland,United States,294 38 | United States,Anguilla,38 39 | Sint Maarten,United States,325 40 | Hong Kong,United States,332 41 | Trinidad and Tobago,United States,211 42 | Latvia,United States,19 43 | United States,Ecuador,300 44 | Suriname,United States,1 45 | Mexico,United States,7140 46 | United States,Cyprus,1 47 | Ecuador,United States,268 48 | United States,Portugal,134 49 | United States,Costa Rica,608 50 | United States,Guatemala,318 51 | United States,Suriname,34 52 | Colombia,United States,873 53 | United States,Cape Verde,14 54 | United States,Jamaica,712 55 | Norway,United States,121 56 | United States,Malaysia,3 57 | United States,Morocco,19 58 | Thailand,United States,3 59 | United States,Samoa,25 60 | Venezuela,United States,290 61 | United States,Palau,31 62 | United States,Venezuela,246 63 | Panama,United States,510 64 | Antigua and Barbuda,United States,126 65 | United States,Chile,185 66 | Morocco,United States,15 67 | United States,Finland,28 68 | Azerbaijan,United States,21 69 | United States,Greece,23 70 | United States,The Bahamas,986 71 | New Zealand,United States,111 72 | Liberia,United States,2 73 | United States,Hong Kong,414 74 | Hungary,United States,2 75 | United States,China,920 76 | United States,Vietnam,2 77 | Burkina Faso,United States,1 78 | Sweden,United States,118 79 | United States,Kuwait,28 80 | United States,Dominican Republic,1420 81 | United States,Egypt,12 82 | Israel,United States,134 83 | United States,United States,370002 84 | Ethiopia,United States,13 85 | United States,Luxembourg,134 86 | United States,Poland,33 87 | Martinique,United States,44 88 | United States,Saint Barthelemy,41 89 | Saint Barthelemy,United States,39 90 | Barbados,United States,154 91 | United States,Turkey,129 92 | Djibouti,United States,1 93 | United States,Azerbaijan,21 94 | United States,Estonia,1 95 | Germany,United States,1468 96 | United States,South Korea,827 97 | United States,El Salvador,508 98 | Ireland,United States,335 99 | United States,Hungary,3 100 | Zambia,United States,1 101 | Malaysia,United States,2 102 | United States,Ethiopia,12 103 | United States,Panama,465 104 | United States,Aruba,342 105 | United States,Thailand,4 106 | United States,Turks and Caicos Islands,236 107 | Croatia,United States,2 108 | United States,Pakistan,12 109 | Cyprus,United States,1 110 | United States,Honduras,407 111 | Fiji,United States,24 112 | Qatar,United States,108 113 | Saint Kitts and Nevis,United States,139 114 | Kuwait,United States,32 115 | Taiwan,United States,266 116 | Haiti,United States,226 117 | Canada,United States,8399 118 | Federated States of Micronesia,United States,69 119 | United States,Liberia,2 120 | Jamaica,United States,666 121 | United States,Malta,2 122 | Dominican Republic,United States,1353 123 | Japan,United States,1548 124 | United States,Lithuania,1 125 | Finland,United States,26 126 | United States,Guadeloupe,59 127 | United States,Ukraine,13 128 | United States,France,952 129 | United States,Norway,115 130 | Aruba,United States,346 131 | French Guiana,United States,5 132 | United States,Kiribati,35 133 | India,United States,61 134 | British Virgin Islands,United States,107 135 | Brazil,United States,853 136 | United States,Germany,1336 137 | United States,New Zealand,74 138 | French Polynesia,United States,43 139 | United Arab Emirates,United States,320 140 | Singapore,United States,3 141 | United States,Mexico,7187 142 | United States,Sweden,119 143 | Netherlands,United States,776 144 | United States,Martinique,43 145 | United States,United Arab Emirates,313 146 | United States,Bulgaria,1 147 | Denmark,United States,153 148 | China,United States,772 149 | United States,Nicaragua,201 150 | United States,Philippines,126 151 | United States,Georgia,1 152 | United States,Belgium,228 153 | Cayman Islands,United States,314 154 | Argentina,United States,180 155 | Peru,United States,279 156 | South Africa,United States,36 157 | United States,Iceland,202 158 | United States,Argentina,141 159 | Spain,United States,420 160 | Bermuda,United States,183 161 | United States,Nigeria,50 162 | United States,Austria,63 163 | United States,"Bonaire, Sint Eustatius, and Saba",59 164 | Kiribati,United States,26 165 | Saudi Arabia,United States,83 166 | Czech Republic,United States,13 167 | United States,Israel,127 168 | Belgium,United States,259 169 | United States,Saint Lucia,136 170 | United States,Bahrain,1 171 | United States,British Virgin Islands,80 172 | Curacao,United States,90 173 | Georgia,United States,2 174 | United States,Denmark,152 175 | United States,Guyana,63 176 | Philippines,United States,134 177 | Grenada,United States,53 178 | Cape Verde,United States,20 179 | Cote d'Ivoire,United States,1 180 | Ukraine,United States,14 181 | United States,Papua New Guinea,1 182 | Russia,United States,176 183 | United States,Saudi Arabia,70 184 | Guatemala,United States,397 185 | Saint Lucia,United States,123 186 | Paraguay,United States,60 187 | United States,Curacao,83 188 | Kosovo,United States,1 189 | United States,Taiwan,235 190 | Tunisia,United States,3 191 | United States,South Africa,40 192 | Niger,United States,2 193 | Turkey,United States,138 194 | United Kingdom,United States,2025 195 | Romania,United States,14 196 | United States,Greenland,4 197 | Papua New Guinea,United States,3 198 | United States,Spain,442 199 | Iraq,United States,1 200 | United States,Italy,438 201 | Cuba,United States,466 202 | United States,Switzerland,305 203 | Dominica,United States,20 204 | United States,Japan,1496 205 | Portugal,United States,127 206 | United States,Brazil,619 207 | Bahrain,United States,19 208 | United States,Peru,337 209 | Indonesia,United States,1 210 | United States,Belize,193 211 | United States,United Kingdom,1970 212 | Belize,United States,188 213 | United States,Ghana,20 214 | United States,Indonesia,2 215 | United States,Fiji,25 216 | United States,Canada,8483 217 | United States,Antigua and Barbuda,117 218 | United States,French Polynesia,40 219 | Nicaragua,United States,179 220 | United States,Latvia,15 221 | United States,Dominica,27 222 | United States,Czech Republic,12 223 | United States,Australia,258 224 | United States,Cook Islands,13 225 | Austria,United States,62 226 | Jordan,United States,44 227 | Palau,United States,30 228 | South Korea,United States,1048 229 | Angola,United States,15 230 | Ghana,United States,18 231 | New Caledonia,United States,1 232 | Guadeloupe,United States,56 233 | France,United States,935 234 | Poland,United States,32 235 | Nigeria,United States,59 236 | United States,Uruguay,13 237 | Greenland,United States,2 238 | United States,Bermuda,193 239 | Chile,United States,174 240 | United States,Cuba,478 241 | United States,Montenegro,1 242 | United States,Colombia,867 243 | United States,Barbados,130 244 | United States,Qatar,109 245 | Australia,United States,329 246 | United States,Cayman Islands,310 247 | United States,Jordan,44 248 | United States,Namibia,1 249 | United States,Trinidad and Tobago,217 250 | United States,Bolivia,13 251 | Cook Islands,United States,13 252 | Bulgaria,United States,3 253 | United States,Saint Kitts and Nevis,145 254 | Uruguay,United States,43 255 | United States,Haiti,225 256 | "Bonaire, Sint Eustatius, and Saba",United States,58 257 | Greece,United States,30 258 | -------------------------------------------------------------------------------- /data/flight-data/jdbc/my-sqlite.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data/jdbc/my-sqlite.db -------------------------------------------------------------------------------- /data/flight-data/orc/2010-summary.orc/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data/orc/2010-summary.orc/_SUCCESS -------------------------------------------------------------------------------- /data/flight-data/orc/2010-summary.orc/part-r-00000-2c4f7d96-e703-4de3-af1b-1441d172c80f.snappy.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data/orc/2010-summary.orc/part-r-00000-2c4f7d96-e703-4de3-af1b-1441d172c80f.snappy.orc -------------------------------------------------------------------------------- /data/flight-data/parquet/2010-summary.parquet/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data/parquet/2010-summary.parquet/_SUCCESS -------------------------------------------------------------------------------- /data/flight-data/parquet/2010-summary.parquet/part-r-00000-1a9822ba-b8fb-4d8e-844a-ea30d0801b9e.gz.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/flight-data/parquet/2010-summary.parquet/part-r-00000-1a9822ba-b8fb-4d8e-844a-ea30d0801b9e.gz.parquet -------------------------------------------------------------------------------- /data/retail-data/all/online-retail-dataset.numbers: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darshilparmar/apache-spark-with-data-bricks-for-data-engineering/bd4563d319650c8284fccf3e1690218379798b29/data/retail-data/all/online-retail-dataset.numbers -------------------------------------------------------------------------------- /spark-docker/data/data.csv: -------------------------------------------------------------------------------- 1 | "invoiceseq","invoiceUrl","date_of_purchase","txn_id","user_email","base_amount","total_amount","igstTaxableAmount","user_state","refund_processed","cgstTaxableAmount","sgstTaxableAmount","user_gstin","legal_name" 2 | "AZZIRR2200004531","https://d3r0n59fu7ub44.cloudfront.net/invoicer/TYPEB/2024-07-02/66843f83c208fc0bdd87bbbf.pdf","02-07-2024 23:27","66843f83c208fc0bdd87bbbf","sreenivassree8@gmail.com","2499.15",2949,"449.85","Tamil Nadu",false,,,, 3 | "AZZIRR2200004530","https://d3r0n59fu7ub44.cloudfront.net/invoicer/TYPEB/2024-07-02/66843d0ef623480bcfcc98a2.pdf","02-07-2024 23:16","66843d0ef623480bcfcc98a2","jayanth.gundagoni98@gmail.com","1440.68",1700,"259.32","Telangana",false,,,, 4 | "AZZIRR2200004529","https://d3r0n59fu7ub44.cloudfront.net/invoicer/TYPEB/2024-07-02/668424ed380ba40bd38be6e1.pdf","02-07-2024 21:33","668424ed380ba40bd38be6e1","raunak77mm@gmail.com","2499.15",2949,"449.85","Massachusetts",false,,,, 5 | "AZZIRR2200004528","https://d3r0n59fu7ub44.cloudfront.net/invoicer/TYPEB/2024-07-02/6683e7632ef4bd0bc26b2da5.pdf","02-07-2024 17:11","6683e7632ef4bd0bc26b2da5","vrushabjain@gmail.com","1440.68",1700,,"Maharashtra",false,"129.66","129.66",, 6 | -------------------------------------------------------------------------------- /spark-docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | spark: 4 | image: jupyter/pyspark-notebook 5 | ports: 6 | - "8888:8888" # Jupyter Notebook 7 | - "4040:4040" # Spark UI 8 | volumes: 9 | - ./files/data:/home/jovyan/work/files/data 10 | - ./files:/home/jovyan/work/files 11 | environment: 12 | - JUPYTER_ENABLE_LAB=yes 13 | command: start.sh jupyter lab --NotebookApp.token='' 14 | 15 | spark-master: 16 | image: bitnami/spark:latest 17 | environment: 18 | - SPARK_MODE=master 19 | ports: 20 | - "8080:8080" 21 | - "7077:7077" 22 | 23 | spark-worker: 24 | image: bitnami/spark:latest 25 | environment: 26 | - SPARK_MODE=worker 27 | - SPARK_MASTER_URL=spark://spark-master:7077 28 | depends_on: 29 | - spark-master -------------------------------------------------------------------------------- /spark-docker/files/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "649dc83f-4c28-4875-a23f-a2e931af8669", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "from pyspark.sql import SparkSession" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "bd6754db-bd76-4444-8e45-aad04bd8cd60", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "from pyspark.sql import SparkSession\n", 21 | "\n", 22 | "# Create SparkSession \n", 23 | "spark = SparkSession.builder \\\n", 24 | " .master(\"local[1]\") \\\n", 25 | " .appName(\"SparkByExamples.com\") \\\n", 26 | " .getOrCreate() \n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "id": "ae3ed98b-f46c-431f-9103-34e069afa093", 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/html": [ 38 | "\n", 39 | "
\n", 40 | "

SparkSession - in-memory

\n", 41 | " \n", 42 | "
\n", 43 | "

SparkContext

\n", 44 | "\n", 45 | "

Spark UI

\n", 46 | "\n", 47 | "
\n", 48 | "
Version
\n", 49 | "
v3.5.0
\n", 50 | "
Master
\n", 51 | "
local[1]
\n", 52 | "
AppName
\n", 53 | "
SparkByExamples.com
\n", 54 | "
\n", 55 | "
\n", 56 | " \n", 57 | "
\n", 58 | " " 59 | ], 60 | "text/plain": [ 61 | "" 62 | ] 63 | }, 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "spark" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 5, 76 | "id": "76e22f21-4441-437b-aa20-b20216e779f1", 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "+-------+--------+---------------+--------------------+--------------+---------------+--------------------+------------+------+---------------+--------+\n", 84 | "| _c0| _c1| _c2| _c3| _c4| _c5| _c6| _c7| _c8| _c9| _c10|\n", 85 | "+-------+--------+---------------+--------------------+--------------+---------------+--------------------+------------+------+---------------+--------+\n", 86 | "|Trip ID|Duration| Start Date| Start Station|Start Terminal| End Date| End Station|End Terminal|Bike #|Subscriber Type|Zip Code|\n", 87 | "| 913460| 765|8/31/2015 23:26|Harry Bridges Pla...| 50|8/31/2015 23:39|San Francisco Cal...| 70| 288| Subscriber| 2139|\n", 88 | "| 913459| 1036|8/31/2015 23:11|San Antonio Shopp...| 31|8/31/2015 23:28|Mountain View Cit...| 27| 35| Subscriber| 95032|\n", 89 | "| 913455| 307|8/31/2015 23:13| Post at Kearny| 47|8/31/2015 23:18| 2nd at South Park| 64| 468| Subscriber| 94107|\n", 90 | "| 913454| 409|8/31/2015 23:10| San Jose City Hall| 10|8/31/2015 23:17| San Salvador at 1st| 8| 68| Subscriber| 95113|\n", 91 | "+-------+--------+---------------+--------------------+--------------+---------------+--------------------+------------+------+---------------+--------+\n", 92 | "only showing top 5 rows\n", 93 | "\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "spark.read.csv(\"data/201508_trip_data.csv\").show(5)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "id": "31dea8e7-cdde-47b7-96db-076b55d8682c", 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [] 108 | } 109 | ], 110 | "metadata": { 111 | "kernelspec": { 112 | "display_name": "Python 3 (ipykernel)", 113 | "language": "python", 114 | "name": "python3" 115 | }, 116 | "language_info": { 117 | "codemirror_mode": { 118 | "name": "ipython", 119 | "version": 3 120 | }, 121 | "file_extension": ".py", 122 | "mimetype": "text/x-python", 123 | "name": "python", 124 | "nbconvert_exporter": "python", 125 | "pygments_lexer": "ipython3", 126 | "version": "3.11.6" 127 | } 128 | }, 129 | "nbformat": 4, 130 | "nbformat_minor": 5 131 | } 132 | --------------------------------------------------------------------------------