├── .DS_Store ├── .gitignore ├── 1_Python_for_Data_Analysts.ipynb ├── 2_Exploratory_Data_Analysis.ipynb ├── 3_Reshaping_Visualization.ipynb ├── 4_SQL_Query.ipynb ├── 5_Intro_to_machine_learning_1.ipynb ├── 6_Intro_to_Machine_Learning_2.ipynb ├── assets ├── 1.png ├── 2.png ├── 3.png ├── 4.png ├── 5.png ├── 6.png ├── 7.png ├── RM_t.png ├── Thumbs.db ├── biasvariance.png ├── centroids.png ├── cheatsheet.png ├── chinookschema.png ├── chinookschema2.png ├── clientserver.png ├── curseofdim.png ├── cv.jpg ├── dendrogram.PNG ├── dendrogram_cluster.PNG ├── illustration1.png ├── imbalance.jpg ├── iris.png ├── logo.png ├── ml_types_algorithm_oracle.png ├── normplot.png ├── os_meme.png ├── pdf.png ├── pdf.svg ├── requirements.txt ├── sensitivity.png └── sqljoins.png ├── data_cache └── stock ├── data_input ├── .DS_Store ├── airports.sqlite ├── analytics.csv ├── books_c.csv ├── broadband.csv ├── car_data.csv ├── chinook.db ├── companies.csv ├── concrete.csv ├── copiers.csv ├── country_sales.pkl ├── crime.csv ├── flight_sm.csv ├── flights.db ├── household.csv ├── loan.csv ├── loan2017Q4.csv ├── loan2018q1.csv ├── monthly_cust.csv ├── normal_plot.png ├── online_bl.csv ├── rice.csv ├── salary.csv ├── sample.csv ├── techcrunch.csv ├── telcochurn.csv ├── wholesale.csv └── wisc_bc_data.csv ├── environments.md └── lecturenotes ├── 1_july_cohort.md ├── 2_july_cohort.md ├── 4_july_cohort.md ├── analyticsapp ├── app.py ├── classroom.py ├── demo.py ├── techcrunch.csv └── templates │ └── analytics.html ├── book_analytics.ipynb ├── classroom.sql ├── googleanalytics.ipynb ├── report_final.html └── techcrunch.ipynb /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .ipynb_checkpoints 3 | -------------------------------------------------------------------------------- /assets/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/1.png -------------------------------------------------------------------------------- /assets/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/2.png -------------------------------------------------------------------------------- /assets/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/3.png -------------------------------------------------------------------------------- /assets/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/4.png -------------------------------------------------------------------------------- /assets/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/5.png -------------------------------------------------------------------------------- /assets/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/6.png -------------------------------------------------------------------------------- /assets/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/7.png -------------------------------------------------------------------------------- /assets/RM_t.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/RM_t.png -------------------------------------------------------------------------------- /assets/Thumbs.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/Thumbs.db -------------------------------------------------------------------------------- /assets/biasvariance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/biasvariance.png -------------------------------------------------------------------------------- /assets/centroids.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/centroids.png -------------------------------------------------------------------------------- /assets/cheatsheet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/cheatsheet.png -------------------------------------------------------------------------------- /assets/chinookschema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/chinookschema.png -------------------------------------------------------------------------------- /assets/chinookschema2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/chinookschema2.png -------------------------------------------------------------------------------- /assets/clientserver.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/clientserver.png -------------------------------------------------------------------------------- /assets/curseofdim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/curseofdim.png -------------------------------------------------------------------------------- /assets/cv.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/cv.jpg -------------------------------------------------------------------------------- /assets/dendrogram.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/dendrogram.PNG -------------------------------------------------------------------------------- /assets/dendrogram_cluster.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/dendrogram_cluster.PNG -------------------------------------------------------------------------------- /assets/illustration1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/illustration1.png -------------------------------------------------------------------------------- /assets/imbalance.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/imbalance.jpg -------------------------------------------------------------------------------- /assets/iris.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/iris.png -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/logo.png -------------------------------------------------------------------------------- /assets/ml_types_algorithm_oracle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/ml_types_algorithm_oracle.png -------------------------------------------------------------------------------- /assets/normplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/normplot.png -------------------------------------------------------------------------------- /assets/os_meme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/os_meme.png -------------------------------------------------------------------------------- /assets/pdf.png: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | image/svg+xml0.0 198 | 0.1 202 | 0.2 206 | 0.3 210 | 0.4 214 | −2σ 311 | −1σ 315 | 319 | −3σ 323 | 327 | µ 331 | 335 | 34.1% 339 | 34.1% 343 | 13.6% 347 | 2.1% 351 | 13.6% 355 | 0.1% 367 | 0.1% 379 | 2.1% 391 | -------------------------------------------------------------------------------- /assets/pdf.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | image/svg+xml0.0 198 | 0.1 202 | 0.2 206 | 0.3 210 | 0.4 214 | −2σ 311 | −1σ 315 | 319 | −3σ 323 | 327 | µ 331 | 335 | 34.1% 339 | 34.1% 343 | 13.6% 347 | 2.1% 351 | 13.6% 355 | 0.1% 367 | 0.1% 379 | 2.1% 391 | -------------------------------------------------------------------------------- /assets/requirements.txt: -------------------------------------------------------------------------------- 1 | backcall==0.1.0 2 | certifi==2019.11.28 3 | chardet==3.0.4 4 | cycler==0.10.0 5 | decorator==4.4.0 6 | idna==2.9 7 | ipython==7.7.0 8 | ipython-genutils==0.2.0 9 | jedi==0.14.1 10 | kiwisolver==1.1.0 11 | lxml==4.5.0 12 | matplotlib==3.2.1 13 | nltk==3.4.5 14 | numpy==1.18.2 15 | pandas==1.0.3 16 | pandas-datareader==0.8.1 17 | parso==0.5.1 18 | pexpect==4.7.0 19 | pickleshare==0.7.5 20 | prompt-toolkit==2.0.9 21 | ptyprocess==0.6.0 22 | Pygments==2.7.4 23 | pyparsing==2.4.6 24 | python-dateutil==2.8.1 25 | pytz==2019.3 26 | requests==2.23.0 27 | six==1.14.0 28 | tornado==6.0.3 29 | traitlets==4.3.2 30 | urllib3==1.25.8 31 | wcwidth==0.1.7 32 | -------------------------------------------------------------------------------- /assets/sensitivity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/sensitivity.png -------------------------------------------------------------------------------- /assets/sqljoins.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/assets/sqljoins.png -------------------------------------------------------------------------------- /data_cache/stock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/data_cache/stock -------------------------------------------------------------------------------- /data_input/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/data_input/.DS_Store -------------------------------------------------------------------------------- /data_input/airports.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/data_input/airports.sqlite -------------------------------------------------------------------------------- /data_input/analytics.csv: -------------------------------------------------------------------------------- 1 | # ---------------------------------------- 2 | # eCommerce (setup: SamuelC) 3 | # Language 4 | # 20190101-20190430 5 | # ---------------------------------------- 6 | 7 | Language,Users,New Users,Sessions,Bounce Rate,Pages / Session,Avg. Session Duration,Goal Conversion Rate,Goal Completions,Goal Value 8 | en-us,"23,497","22,696","35,113",0.54%,6.35,00:02:32,27.21%,"9,555",$0.00 9 | id-id,"7,797","7,613","10,617",0.65%,4.89,00:01:38,16.26%,"1,726",$0.00 10 | en-gb,"3,198","3,086","4,625",0.56%,5.52,00:01:50,21.36%,988,$0.00 11 | id,"2,171","2,094","2,724",0.29%,4.75,00:01:43,17.18%,468,$0.00 12 | en,224,214,295,3.05%,5.03,00:01:44,26.78%,79,$0.00 13 | id-us,204,202,235,1.28%,3.64,00:00:44,4.68%,11,$0.00 14 | th-th,186,186,224,0.45%,3.50,00:00:40,4.91%,11,$0.00 15 | en-sg,132,125,231,0.00%,6.92,00:02:10,39.83%,92,$0.00 16 | en-id,126,119,171,0.00%,6.39,00:04:10,26.32%,45,$0.00 17 | en-au,93,91,127,0.00%,5.56,00:01:22,19.69%,25,$0.00 18 | ,"38,351","37,140","55,195",0.63%,5.86,00:02:13,23.96%,"13,222",$0.00 19 | 20 | Day Index,Users 21 | 1/1/19,122 22 | 1/2/19,174 23 | 1/3/19,240 24 | 1/4/19,231 25 | 1/5/19,172 26 | 1/6/19,134 27 | 1/7/19,304 28 | 1/8/19,456 29 | 1/9/19,381 30 | 1/10/19,448 31 | 1/11/19,460 32 | 1/12/19,320 33 | 1/13/19,261 34 | 1/14/19,352 35 | 1/15/19,331 36 | 1/16/19,243 37 | 1/17/19,246 38 | 1/18/19,238 39 | 1/19/19,163 40 | 1/20/19,173 41 | 1/21/19,227 42 | 1/22/19,347 43 | 1/23/19,359 44 | 1/24/19,258 45 | 1/25/19,218 46 | 1/26/19,163 47 | 1/27/19,154 48 | 1/28/19,227 49 | 1/29/19,288 50 | 1/30/19,327 51 | 1/31/19,252 52 | 2/1/19,317 53 | 2/2/19,413 54 | 2/3/19,257 55 | 2/4/19,297 56 | 2/5/19,252 57 | 2/6/19,434 58 | 2/7/19,353 59 | 2/8/19,335 60 | 2/9/19,260 61 | 2/10/19,181 62 | 2/11/19,345 63 | 2/12/19,581 64 | 2/13/19,487 65 | 2/14/19,408 66 | 2/15/19,347 67 | 2/16/19,480 68 | 2/17/19,294 69 | 2/18/19,385 70 | 2/19/19,591 71 | 2/20/19,"1,107" 72 | 2/21/19,"2,487" 73 | 2/22/19,"1,101" 74 | 2/23/19,813 75 | 2/24/19,481 76 | 2/25/19,521 77 | 2/26/19,619 78 | 2/27/19,484 79 | 2/28/19,461 80 | 3/1/19,503 81 | 3/2/19,307 82 | 3/3/19,249 83 | 3/4/19,662 84 | 3/5/19,670 85 | 3/6/19,625 86 | 3/7/19,571 87 | 3/8/19,477 88 | 3/9/19,222 89 | 3/10/19,311 90 | 3/11/19,444 91 | 3/12/19,442 92 | 3/13/19,602 93 | 3/14/19,452 94 | 3/15/19,384 95 | 3/16/19,342 96 | 3/17/19,214 97 | 3/18/19,539 98 | 3/19/19,731 99 | 3/20/19,584 100 | 3/21/19,738 101 | 3/22/19,496 102 | 3/23/19,418 103 | 3/24/19,491 104 | 3/25/19,396 105 | 3/26/19,936 106 | 3/27/19,463 107 | 3/28/19,433 108 | 3/29/19,343 109 | 3/30/19,257 110 | 3/31/19,204 111 | 4/1/19,316 112 | 4/2/19,348 113 | 4/3/19,257 114 | 4/4/19,320 115 | 4/5/19,299 116 | 4/6/19,191 117 | 4/7/19,173 118 | 4/8/19,313 119 | 4/9/19,404 120 | 4/10/19,360 121 | 4/11/19,354 122 | 4/12/19,316 123 | 4/13/19,296 124 | 4/14/19,188 125 | 4/15/19,303 126 | 4/16/19,453 127 | 4/17/19,272 128 | 4/18/19,463 129 | 4/19/19,232 130 | 4/20/19,225 131 | 4/21/19,185 132 | 4/22/19,394 133 | 4/23/19,518 134 | 4/24/19,405 135 | 4/25/19,448 136 | 4/26/19,369 137 | 4/27/19,359 138 | 4/28/19,307 139 | 4/29/19,428 140 | 4/30/19,468 141 | ,"47,830" 142 | -------------------------------------------------------------------------------- /data_input/car_data.csv: -------------------------------------------------------------------------------- 1 | Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner 2 | ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0 3 | sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0 4 | ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0 5 | wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0 6 | swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0 7 | vitara brezza,2018,9.25,9.83,2071,Diesel,Dealer,Manual,0 8 | ciaz,2015,6.75,8.12,18796,Petrol,Dealer,Manual,0 9 | s cross,2015,6.5,8.61,33429,Diesel,Dealer,Manual,0 10 | ciaz,2016,8.75,8.89,20273,Diesel,Dealer,Manual,0 11 | ciaz,2015,7.45,8.92,42367,Diesel,Dealer,Manual,0 12 | alto 800,2017,2.85,3.6,2135,Petrol,Dealer,Manual,0 13 | ciaz,2015,6.85,10.38,51000,Diesel,Dealer,Manual,0 14 | ciaz,2015,7.5,9.94,15000,Petrol,Dealer,Automatic,0 15 | ertiga,2015,6.1,7.71,26000,Petrol,Dealer,Manual,0 16 | dzire,2009,2.25,7.21,77427,Petrol,Dealer,Manual,0 17 | ertiga,2016,7.75,10.79,43000,Diesel,Dealer,Manual,0 18 | ertiga,2015,7.25,10.79,41678,Diesel,Dealer,Manual,0 19 | ertiga,2016,7.75,10.79,43000,Diesel,Dealer,Manual,0 20 | wagon r,2015,3.25,5.09,35500,CNG,Dealer,Manual,0 21 | sx4,2010,2.65,7.98,41442,Petrol,Dealer,Manual,0 22 | alto k10,2016,2.85,3.95,25000,Petrol,Dealer,Manual,0 23 | ignis,2017,4.9,5.71,2400,Petrol,Dealer,Manual,0 24 | sx4,2011,4.4,8.01,50000,Petrol,Dealer,Automatic,0 25 | alto k10,2014,2.5,3.46,45280,Petrol,Dealer,Manual,0 26 | wagon r,2013,2.9,4.41,56879,Petrol,Dealer,Manual,0 27 | swift,2011,3,4.99,20000,Petrol,Dealer,Manual,0 28 | swift,2013,4.15,5.87,55138,Petrol,Dealer,Manual,0 29 | swift,2017,6,6.49,16200,Petrol,Individual,Manual,0 30 | alto k10,2010,1.95,3.95,44542,Petrol,Dealer,Manual,0 31 | ciaz,2015,7.45,10.38,45000,Diesel,Dealer,Manual,0 32 | ritz,2012,3.1,5.98,51439,Diesel,Dealer,Manual,0 33 | ritz,2011,2.35,4.89,54200,Petrol,Dealer,Manual,0 34 | swift,2014,4.95,7.49,39000,Diesel,Dealer,Manual,0 35 | ertiga,2014,6,9.95,45000,Diesel,Dealer,Manual,0 36 | dzire,2014,5.5,8.06,45000,Diesel,Dealer,Manual,0 37 | sx4,2011,2.95,7.74,49998,CNG,Dealer,Manual,0 38 | dzire,2015,4.65,7.2,48767,Petrol,Dealer,Manual,0 39 | 800,2003,0.35,2.28,127000,Petrol,Individual,Manual,0 40 | alto k10,2016,3,3.76,10079,Petrol,Dealer,Manual,0 41 | sx4,2003,2.25,7.98,62000,Petrol,Dealer,Manual,0 42 | baleno,2016,5.85,7.87,24524,Petrol,Dealer,Automatic,0 43 | alto k10,2014,2.55,3.98,46706,Petrol,Dealer,Manual,0 44 | sx4,2008,1.95,7.15,58000,Petrol,Dealer,Manual,0 45 | dzire,2014,5.5,8.06,45780,Diesel,Dealer,Manual,0 46 | omni,2012,1.25,2.69,50000,Petrol,Dealer,Manual,0 47 | ciaz,2014,7.5,12.04,15000,Petrol,Dealer,Automatic,0 48 | ritz,2013,2.65,4.89,64532,Petrol,Dealer,Manual,0 49 | wagon r,2006,1.05,4.15,65000,Petrol,Dealer,Manual,0 50 | ertiga,2015,5.8,7.71,25870,Petrol,Dealer,Manual,0 51 | ciaz,2017,7.75,9.29,37000,Petrol,Dealer,Automatic,0 52 | fortuner,2012,14.9,30.61,104707,Diesel,Dealer,Automatic,0 53 | fortuner,2015,23,30.61,40000,Diesel,Dealer,Automatic,0 54 | innova,2017,18,19.77,15000,Diesel,Dealer,Automatic,0 55 | fortuner,2013,16,30.61,135000,Diesel,Individual,Automatic,0 56 | innova,2005,2.75,10.21,90000,Petrol,Individual,Manual,0 57 | corolla altis,2009,3.6,15.04,70000,Petrol,Dealer,Automatic,0 58 | etios cross,2015,4.5,7.27,40534,Petrol,Dealer,Manual,0 59 | corolla altis,2010,4.75,18.54,50000,Petrol,Dealer,Manual,0 60 | etios g,2014,4.1,6.8,39485,Petrol,Dealer,Manual,1 61 | fortuner,2014,19.99,35.96,41000,Diesel,Dealer,Automatic,0 62 | corolla altis,2013,6.95,18.61,40001,Petrol,Dealer,Manual,0 63 | etios cross,2015,4.5,7.7,40588,Petrol,Dealer,Manual,0 64 | fortuner,2014,18.75,35.96,78000,Diesel,Dealer,Automatic,0 65 | fortuner,2015,23.5,35.96,47000,Diesel,Dealer,Automatic,0 66 | fortuner,2017,33,36.23,6000,Diesel,Dealer,Automatic,0 67 | etios liva,2014,4.75,6.95,45000,Diesel,Dealer,Manual,0 68 | innova,2017,19.75,23.15,11000,Petrol,Dealer,Automatic,0 69 | fortuner,2010,9.25,20.45,59000,Diesel,Dealer,Manual,0 70 | corolla altis,2011,4.35,13.74,88000,Petrol,Dealer,Manual,0 71 | corolla altis,2016,14.25,20.91,12000,Petrol,Dealer,Manual,0 72 | etios liva,2014,3.95,6.76,71000,Diesel,Dealer,Manual,0 73 | corolla altis,2011,4.5,12.48,45000,Diesel,Dealer,Manual,0 74 | corolla altis,2013,7.45,18.61,56001,Petrol,Dealer,Manual,0 75 | etios liva,2011,2.65,5.71,43000,Petrol,Dealer,Manual,0 76 | etios cross,2014,4.9,8.93,83000,Diesel,Dealer,Manual,0 77 | etios g,2015,3.95,6.8,36000,Petrol,Dealer,Manual,0 78 | corolla altis,2013,5.5,14.68,72000,Petrol,Dealer,Manual,0 79 | corolla,2004,1.5,12.35,135154,Petrol,Dealer,Automatic,0 80 | corolla altis,2010,5.25,22.83,80000,Petrol,Dealer,Automatic,0 81 | fortuner,2012,14.5,30.61,89000,Diesel,Dealer,Automatic,0 82 | corolla altis,2016,14.73,14.89,23000,Diesel,Dealer,Manual,0 83 | etios gd,2015,4.75,7.85,40000,Diesel,Dealer,Manual,0 84 | innova,2017,23,25.39,15000,Diesel,Dealer,Automatic,0 85 | innova,2015,12.5,13.46,38000,Diesel,Dealer,Manual,0 86 | innova,2005,3.49,13.46,197176,Diesel,Dealer,Manual,0 87 | camry,2006,2.5,23.73,142000,Petrol,Individual,Automatic,3 88 | land cruiser,2010,35,92.6,78000,Diesel,Dealer,Manual,0 89 | corolla altis,2012,5.9,13.74,56000,Petrol,Dealer,Manual,0 90 | etios liva,2013,3.45,6.05,47000,Petrol,Dealer,Manual,0 91 | etios g,2014,4.75,6.76,40000,Petrol,Dealer,Manual,0 92 | corolla altis,2009,3.8,18.61,62000,Petrol,Dealer,Manual,0 93 | innova,2014,11.25,16.09,58242,Diesel,Dealer,Manual,0 94 | innova,2005,3.51,13.7,75000,Petrol,Dealer,Manual,0 95 | fortuner,2015,23,30.61,40000,Diesel,Dealer,Automatic,0 96 | corolla altis,2008,4,22.78,89000,Petrol,Dealer,Automatic,0 97 | corolla altis,2012,5.85,18.61,72000,Petrol,Dealer,Manual,0 98 | innova,2016,20.75,25.39,29000,Diesel,Dealer,Automatic,0 99 | corolla altis,2017,17,18.64,8700,Petrol,Dealer,Manual,0 100 | corolla altis,2013,7.05,18.61,45000,Petrol,Dealer,Manual,0 101 | fortuner,2010,9.65,20.45,50024,Diesel,Dealer,Manual,0 102 | Royal Enfield Thunder 500,2016,1.75,1.9,3000,Petrol,Individual,Manual,0 103 | UM Renegade Mojave,2017,1.7,1.82,1400,Petrol,Individual,Manual,0 104 | KTM RC200,2017,1.65,1.78,4000,Petrol,Individual,Manual,0 105 | Bajaj Dominar 400,2017,1.45,1.6,1200,Petrol,Individual,Manual,0 106 | Royal Enfield Classic 350,2017,1.35,1.47,4100,Petrol,Individual,Manual,0 107 | KTM RC390,2015,1.35,2.37,21700,Petrol,Individual,Manual,0 108 | Hyosung GT250R,2014,1.35,3.45,16500,Petrol,Individual,Manual,1 109 | Royal Enfield Thunder 350,2013,1.25,1.5,15000,Petrol,Individual,Manual,0 110 | Royal Enfield Thunder 350,2016,1.2,1.5,18000,Petrol,Individual,Manual,0 111 | Royal Enfield Classic 350,2017,1.2,1.47,11000,Petrol,Individual,Manual,0 112 | KTM RC200,2016,1.2,1.78,6000,Petrol,Individual,Manual,0 113 | Royal Enfield Thunder 350,2016,1.15,1.5,8700,Petrol,Individual,Manual,0 114 | KTM 390 Duke ,2014,1.15,2.4,7000,Petrol,Individual,Manual,0 115 | Mahindra Mojo XT300,2016,1.15,1.4,35000,Petrol,Individual,Manual,0 116 | Royal Enfield Classic 350,2015,1.15,1.47,17000,Petrol,Individual,Manual,0 117 | Royal Enfield Classic 350,2015,1.11,1.47,17500,Petrol,Individual,Manual,0 118 | Royal Enfield Classic 350,2013,1.1,1.47,33000,Petrol,Individual,Manual,0 119 | Royal Enfield Thunder 500,2015,1.1,1.9,14000,Petrol,Individual,Manual,0 120 | Royal Enfield Classic 350,2015,1.1,1.47,26000,Petrol,Individual,Manual,0 121 | Royal Enfield Thunder 500,2013,1.05,1.9,5400,Petrol,Individual,Manual,0 122 | Bajaj Pulsar RS200,2016,1.05,1.26,5700,Petrol,Individual,Manual,0 123 | Royal Enfield Thunder 350,2011,1.05,1.5,6900,Petrol,Individual,Manual,0 124 | Royal Enfield Bullet 350,2016,1.05,1.17,6000,Petrol,Individual,Manual,0 125 | Royal Enfield Classic 350,2013,1,1.47,46500,Petrol,Individual,Manual,0 126 | Royal Enfield Classic 500,2012,0.95,1.75,11500,Petrol,Individual,Manual,0 127 | Royal Enfield Classic 500,2009,0.9,1.75,40000,Petrol,Individual,Manual,0 128 | Bajaj Avenger 220,2017,0.9,0.95,1300,Petrol,Individual,Manual,0 129 | Bajaj Avenger 150,2016,0.75,0.8,7000,Petrol,Individual,Manual,0 130 | Honda CB Hornet 160R,2017,0.8,0.87,3000,Petrol,Individual,Manual,0 131 | Yamaha FZ S V 2.0,2017,0.78,0.84,5000,Petrol,Individual,Manual,0 132 | Honda CB Hornet 160R,2017,0.75,0.87,11000,Petrol,Individual,Manual,0 133 | Yamaha FZ 16,2015,0.75,0.82,18000,Petrol,Individual,Manual,0 134 | Bajaj Avenger 220,2017,0.75,0.95,3500,Petrol,Individual,Manual,0 135 | Bajaj Avenger 220,2016,0.72,0.95,500,Petrol,Individual,Manual,0 136 | TVS Apache RTR 160,2017,0.65,0.81,11800,Petrol,Individual,Manual,0 137 | Bajaj Pulsar 150,2015,0.65,0.74,5000,Petrol,Individual,Manual,0 138 | Honda CBR 150,2014,0.65,1.2,23500,Petrol,Individual,Manual,0 139 | Hero Extreme,2013,0.65,0.787,16000,Petrol,Individual,Manual,0 140 | Honda CB Hornet 160R,2016,0.6,0.87,15000,Petrol,Individual,Manual,0 141 | Bajaj Avenger 220 dtsi,2015,0.6,0.95,16600,Petrol,Individual,Manual,0 142 | Honda CBR 150,2013,0.6,1.2,32000,Petrol,Individual,Manual,0 143 | Bajaj Avenger 150 street,2016,0.6,0.8,20000,Petrol,Individual,Manual,0 144 | Yamaha FZ v 2.0,2015,0.6,0.84,29000,Petrol,Individual,Manual,0 145 | Yamaha FZ v 2.0,2016,0.6,0.84,25000,Petrol,Individual,Manual,0 146 | Bajaj Pulsar NS 200,2014,0.6,0.99,25000,Petrol,Individual,Manual,0 147 | TVS Apache RTR 160,2012,0.6,0.81,19000,Petrol,Individual,Manual,0 148 | Hero Extreme,2014,0.55,0.787,15000,Petrol,Individual,Manual,0 149 | Yamaha FZ S V 2.0,2015,0.55,0.84,58000,Petrol,Individual,Manual,0 150 | Bajaj Pulsar 220 F,2010,0.52,0.94,45000,Petrol,Individual,Manual,0 151 | Bajaj Pulsar 220 F,2016,0.51,0.94,24000,Petrol,Individual,Manual,0 152 | TVS Apache RTR 180,2011,0.5,0.826,6000,Petrol,Individual,Manual,0 153 | Hero Passion X pro,2016,0.5,0.55,31000,Petrol,Individual,Manual,0 154 | Bajaj Pulsar NS 200,2012,0.5,0.99,13000,Petrol,Individual,Manual,0 155 | Bajaj Pulsar NS 200,2013,0.5,0.99,45000,Petrol,Individual,Manual,0 156 | Yamaha Fazer ,2014,0.5,0.88,8000,Petrol,Individual,Manual,0 157 | Honda Activa 4G,2017,0.48,0.51,4300,Petrol,Individual,Automatic,0 158 | TVS Sport ,2017,0.48,0.52,15000,Petrol,Individual,Manual,0 159 | Yamaha FZ S V 2.0,2015,0.48,0.84,23000,Petrol,Individual,Manual,0 160 | Honda Dream Yuga ,2017,0.48,0.54,8600,Petrol,Individual,Manual,0 161 | Honda Activa 4G,2017,0.45,0.51,4000,Petrol,Individual,Automatic,0 162 | Bajaj Avenger Street 220,2011,0.45,0.95,24000,Petrol,Individual,Manual,0 163 | TVS Apache RTR 180,2014,0.45,0.826,23000,Petrol,Individual,Manual,0 164 | Bajaj Pulsar NS 200,2012,0.45,0.99,14500,Petrol,Individual,Manual,0 165 | Bajaj Avenger 220 dtsi,2010,0.45,0.95,27000,Petrol,Individual,Manual,0 166 | Hero Splender iSmart,2016,0.45,0.54,14000,Petrol,Individual,Manual,0 167 | Activa 3g,2016,0.45,0.54,500,Petrol,Individual,Automatic,0 168 | Hero Passion Pro,2016,0.45,0.55,1000,Petrol,Individual,Manual,0 169 | TVS Apache RTR 160,2014,0.42,0.81,42000,Petrol,Individual,Manual,0 170 | Honda CB Trigger,2013,0.42,0.73,12000,Petrol,Individual,Manual,0 171 | Hero Splender iSmart,2015,0.4,0.54,14000,Petrol,Individual,Manual,0 172 | Yamaha FZ S ,2012,0.4,0.83,5500,Petrol,Individual,Manual,0 173 | Hero Passion Pro,2015,0.4,0.55,6700,Petrol,Individual,Manual,0 174 | Bajaj Pulsar 135 LS,2014,0.4,0.64,13700,Petrol,Individual,Manual,0 175 | Activa 4g,2017,0.4,0.51,1300,Petrol,Individual,Automatic,0 176 | Honda CB Unicorn,2015,0.38,0.72,38600,Petrol,Individual,Manual,0 177 | Hero Honda CBZ extreme,2011,0.38,0.787,75000,Petrol,Individual,Manual,0 178 | Honda Karizma,2011,0.35,1.05,30000,Petrol,Individual,Manual,0 179 | Honda Activa 125,2016,0.35,0.57,24000,Petrol,Individual,Automatic,0 180 | TVS Jupyter,2014,0.35,0.52,19000,Petrol,Individual,Automatic,0 181 | Honda Karizma,2010,0.31,1.05,213000,Petrol,Individual,Manual,0 182 | Hero Honda Passion Pro,2012,0.3,0.51,60000,Petrol,Individual,Manual,0 183 | Hero Splender Plus,2016,0.3,0.48,50000,Petrol,Individual,Manual,0 184 | Honda CB Shine,2013,0.3,0.58,30000,Petrol,Individual,Manual,0 185 | Bajaj Discover 100,2013,0.27,0.47,21000,Petrol,Individual,Manual,0 186 | Bajaj Pulsar 150,2008,0.25,0.75,26000,Petrol,Individual,Manual,1 187 | Suzuki Access 125,2008,0.25,0.58,1900,Petrol,Individual,Automatic,0 188 | TVS Wego,2010,0.25,0.52,22000,Petrol,Individual,Automatic,0 189 | Honda CB twister,2013,0.25,0.51,32000,Petrol,Individual,Manual,0 190 | Hero Glamour,2013,0.25,0.57,18000,Petrol,Individual,Manual,0 191 | Hero Super Splendor,2005,0.2,0.57,55000,Petrol,Individual,Manual,0 192 | Bajaj Pulsar 150,2008,0.2,0.75,60000,Petrol,Individual,Manual,0 193 | Bajaj Discover 125,2012,0.2,0.57,25000,Petrol,Individual,Manual,1 194 | Hero Hunk,2007,0.2,0.75,49000,Petrol,Individual,Manual,1 195 | Hero Ignitor Disc,2013,0.2,0.65,24000,Petrol,Individual,Manual,1 196 | Hero CBZ Xtreme,2008,0.2,0.787,50000,Petrol,Individual,Manual,0 197 | Bajaj ct 100,2015,0.18,0.32,35000,Petrol,Individual,Manual,0 198 | Activa 3g,2008,0.17,0.52,500000,Petrol,Individual,Automatic,0 199 | Honda CB twister,2010,0.16,0.51,33000,Petrol,Individual,Manual,0 200 | Bajaj Discover 125,2011,0.15,0.57,35000,Petrol,Individual,Manual,1 201 | Honda CB Shine,2007,0.12,0.58,53000,Petrol,Individual,Manual,0 202 | Bajaj Pulsar 150,2006,0.1,0.75,92233,Petrol,Individual,Manual,0 203 | i20,2010,3.25,6.79,58000,Diesel,Dealer,Manual,1 204 | grand i10,2015,4.4,5.7,28200,Petrol,Dealer,Manual,0 205 | i10,2011,2.95,4.6,53460,Petrol,Dealer,Manual,0 206 | eon,2015,2.75,4.43,28282,Petrol,Dealer,Manual,0 207 | grand i10,2016,5.25,5.7,3493,Petrol,Dealer,Manual,1 208 | xcent,2017,5.75,7.13,12479,Petrol,Dealer,Manual,0 209 | grand i10,2015,5.15,5.7,34797,Petrol,Dealer,Automatic,0 210 | i20,2017,7.9,8.1,3435,Petrol,Dealer,Manual,0 211 | grand i10,2015,4.85,5.7,21125,Diesel,Dealer,Manual,0 212 | i10,2012,3.1,4.6,35775,Petrol,Dealer,Manual,0 213 | elantra,2015,11.75,14.79,43535,Diesel,Dealer,Manual,0 214 | creta,2016,11.25,13.6,22671,Petrol,Dealer,Manual,0 215 | i20,2011,2.9,6.79,31604,Petrol,Dealer,Manual,0 216 | grand i10,2017,5.25,5.7,20114,Petrol,Dealer,Manual,0 217 | verna,2012,4.5,9.4,36100,Petrol,Dealer,Manual,0 218 | eon,2016,2.9,4.43,12500,Petrol,Dealer,Manual,0 219 | eon,2016,3.15,4.43,15000,Petrol,Dealer,Manual,0 220 | verna,2014,6.45,9.4,45078,Petrol,Dealer,Manual,0 221 | verna,2012,4.5,9.4,36000,Petrol,Dealer,Manual,0 222 | eon,2017,3.5,4.43,38488,Petrol,Dealer,Manual,0 223 | i20,2013,4.5,6.79,32000,Petrol,Dealer,Automatic,0 224 | i20,2014,6,7.6,77632,Diesel,Dealer,Manual,0 225 | verna,2015,8.25,9.4,61381,Diesel,Dealer,Manual,0 226 | verna,2013,5.11,9.4,36198,Petrol,Dealer,Automatic,0 227 | i10,2011,2.7,4.6,22517,Petrol,Dealer,Manual,0 228 | grand i10,2015,5.25,5.7,24678,Petrol,Dealer,Manual,0 229 | i10,2011,2.55,4.43,57000,Petrol,Dealer,Manual,0 230 | verna,2012,4.95,9.4,60000,Diesel,Dealer,Manual,0 231 | i20,2012,3.1,6.79,52132,Diesel,Dealer,Manual,0 232 | verna,2013,6.15,9.4,45000,Diesel,Dealer,Manual,0 233 | verna,2017,9.25,9.4,15001,Petrol,Dealer,Manual,0 234 | elantra,2015,11.45,14.79,12900,Petrol,Dealer,Automatic,0 235 | grand i10,2013,3.9,5.7,53000,Diesel,Dealer,Manual,0 236 | grand i10,2015,5.5,5.7,4492,Petrol,Dealer,Manual,0 237 | verna,2017,9.1,9.4,15141,Petrol,Dealer,Manual,0 238 | eon,2016,3.1,4.43,11849,Petrol,Dealer,Manual,0 239 | creta,2015,11.25,13.6,68000,Diesel,Dealer,Manual,0 240 | verna,2013,4.8,9.4,60241,Petrol,Dealer,Manual,0 241 | eon,2012,2,4.43,23709,Petrol,Dealer,Manual,0 242 | verna,2012,5.35,9.4,32322,Diesel,Dealer,Manual,0 243 | xcent,2015,4.75,7.13,35866,Petrol,Dealer,Manual,1 244 | xcent,2014,4.4,7.13,34000,Petrol,Dealer,Manual,0 245 | i20,2016,6.25,7.6,7000,Petrol,Dealer,Manual,0 246 | verna,2013,5.95,9.4,49000,Diesel,Dealer,Manual,0 247 | verna,2012,5.2,9.4,71000,Diesel,Dealer,Manual,0 248 | i20,2012,3.75,6.79,35000,Petrol,Dealer,Manual,0 249 | verna,2015,5.95,9.4,36000,Petrol,Dealer,Manual,0 250 | i10,2013,4,4.6,30000,Petrol,Dealer,Manual,0 251 | i20,2016,5.25,7.6,17000,Petrol,Dealer,Manual,0 252 | creta,2016,12.9,13.6,35934,Diesel,Dealer,Manual,0 253 | city,2013,5,9.9,56701,Petrol,Dealer,Manual,0 254 | brio,2015,5.4,6.82,31427,Petrol,Dealer,Automatic,0 255 | city,2014,7.2,9.9,48000,Diesel,Dealer,Manual,0 256 | city,2013,5.25,9.9,54242,Petrol,Dealer,Manual,0 257 | brio,2012,3,5.35,53675,Petrol,Dealer,Manual,0 258 | city,2016,10.25,13.6,49562,Petrol,Dealer,Manual,0 259 | city,2015,8.5,13.6,40324,Petrol,Dealer,Manual,0 260 | city,2015,8.4,13.6,25000,Petrol,Dealer,Manual,0 261 | amaze,2014,3.9,7,36054,Petrol,Dealer,Manual,0 262 | city,2016,9.15,13.6,29223,Petrol,Dealer,Manual,0 263 | brio,2016,5.5,5.97,5600,Petrol,Dealer,Manual,0 264 | amaze,2015,4,5.8,40023,Petrol,Dealer,Manual,0 265 | jazz,2016,6.6,7.7,16002,Petrol,Dealer,Manual,0 266 | amaze,2015,4,7,40026,Petrol,Dealer,Manual,0 267 | jazz,2017,6.5,8.7,21200,Petrol,Dealer,Manual,0 268 | amaze,2014,3.65,7,35000,Petrol,Dealer,Manual,0 269 | city,2016,8.35,9.4,19434,Diesel,Dealer,Manual,0 270 | brio,2017,4.8,5.8,19000,Petrol,Dealer,Manual,0 271 | city,2015,6.7,10,18828,Petrol,Dealer,Manual,0 272 | city,2011,4.1,10,69341,Petrol,Dealer,Manual,0 273 | city,2009,3,10,69562,Petrol,Dealer,Manual,0 274 | city,2015,7.5,10,27600,Petrol,Dealer,Manual,0 275 | jazz,2010,2.25,7.5,61203,Petrol,Dealer,Manual,0 276 | brio,2014,5.3,6.8,16500,Petrol,Dealer,Manual,0 277 | city,2016,10.9,13.6,30753,Petrol,Dealer,Automatic,0 278 | city,2015,8.65,13.6,24800,Petrol,Dealer,Manual,0 279 | city,2015,9.7,13.6,21780,Petrol,Dealer,Manual,0 280 | jazz,2016,6,8.4,4000,Petrol,Dealer,Manual,0 281 | city,2014,6.25,13.6,40126,Petrol,Dealer,Manual,0 282 | brio,2015,5.25,5.9,14465,Petrol,Dealer,Manual,0 283 | city,2006,2.1,7.6,50456,Petrol,Dealer,Manual,0 284 | city,2014,8.25,14,63000,Diesel,Dealer,Manual,0 285 | city,2016,8.99,11.8,9010,Petrol,Dealer,Manual,0 286 | brio,2013,3.5,5.9,9800,Petrol,Dealer,Manual,0 287 | jazz,2016,7.4,8.5,15059,Petrol,Dealer,Automatic,0 288 | jazz,2016,5.65,7.9,28569,Petrol,Dealer,Manual,0 289 | amaze,2015,5.75,7.5,44000,Petrol,Dealer,Automatic,0 290 | city,2015,8.4,13.6,34000,Petrol,Dealer,Manual,0 291 | city,2016,10.11,13.6,10980,Petrol,Dealer,Manual,0 292 | amaze,2014,4.5,6.4,19000,Petrol,Dealer,Manual,0 293 | brio,2015,5.4,6.1,31427,Petrol,Dealer,Manual,0 294 | jazz,2016,6.4,8.4,12000,Petrol,Dealer,Manual,0 295 | city,2010,3.25,9.9,38000,Petrol,Dealer,Manual,0 296 | amaze,2014,3.75,6.8,33019,Petrol,Dealer,Manual,0 297 | city,2015,8.55,13.09,60076,Diesel,Dealer,Manual,0 298 | city,2016,9.5,11.6,33988,Diesel,Dealer,Manual,0 299 | brio,2015,4,5.9,60000,Petrol,Dealer,Manual,0 300 | city,2009,3.35,11,87934,Petrol,Dealer,Manual,0 301 | city,2017,11.5,12.5,9000,Diesel,Dealer,Manual,0 302 | brio,2016,5.3,5.9,5464,Petrol,Dealer,Manual,0 303 | -------------------------------------------------------------------------------- /data_input/chinook.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/data_input/chinook.db -------------------------------------------------------------------------------- /data_input/companies.csv: -------------------------------------------------------------------------------- 1 | ID,Customer Name,Consulting Sales,Software Sales,Forecasted Growth,Returns,Month,Day,Year,Location,Account 2 | 30940,New Media Group,IDR7125000,IDR5500000,30.00%,"IDR1,500,000",1,10,2017,Jakarta,Enterprise 3 | 82391,Li and Partners,IDR420000,IDR820000,10.00%,"IDR400,000",6,15,2016,Jakarta,Startup 4 | 18374,PT. Kreasi Metrik Solusi,0,IDR550403,25.00%,0,3,29,2012,Surabaya,Enterprise 5 | 57531,PT. Algoritma Data Indonesia,IDR850000,IDR395500,4.00%,0,7,17,2017,Jakarta,Startup 6 | 19002,Palembang Konsultansi,IDR2115000,0,-15.00%,0,2,24,2018,Bandung,Startup 7 | 31142,PT. Surya Citra Manajemen,IDR960000,IDR503000,19.00%,0,1,19,2019,Jakarta,Enterprise -------------------------------------------------------------------------------- /data_input/copiers.csv: -------------------------------------------------------------------------------- 1 | Order.ID,Order.Date,Ship.Date,Ship.Mode,Customer.ID,Segment,Product.ID,Category,Sub.Category,Product.Name,Sales,Quantity,Discount,Profit 2 | CA-2015-137946,9/1/15,9/4/15,Second Class,DB-13615,Consumer,TEC-CO-10001449,Technology,Copiers,Hewlett Packard LaserJet 3310 Copier,959.984,2,0.2,335.9944 3 | US-2014-135972,9/21/14,9/23/14,Second Class,JG-15115,Consumer,TEC-CO-10002313,Technology,Copiers,Canon PC1080F Personal Copier,1799.97,3,0,701.9883 4 | CA-2017-117457,12/8/17,12/12/17,Standard Class,KH-16510,Consumer,TEC-CO-10004115,Technology,Copiers,Sharp AL-1530CS Digital Copier,1199.976,3,0.2,434.9913 5 | CA-2017-127432,1/22/17,1/27/17,Standard Class,AD-10180,Home Office,TEC-CO-10003236,Technology,Copiers,Canon Image Class D660 Copier,2999.95,5,0,1379.977 6 | CA-2014-131450,8/8/14,8/15/14,Standard Class,LR-16915,Consumer,TEC-CO-10004115,Technology,Copiers,Sharp AL-1530CS Digital Copier,1199.976,3,0.2,434.9913 7 | CA-2016-110499,4/7/16,4/9/16,First Class,YC-21895,Corporate,TEC-CO-10002095,Technology,Copiers,Hewlett Packard 610 Color Digital Copier / Printer,1199.976,3,0.2,374.9925 8 | CA-2015-112452,4/4/15,4/4/15,Same Day,NC-18340,Consumer,TEC-CO-10004202,Technology,Copiers,Brother DCP1000 Digital 3 in 1 Multifunction Machine,599.98,2,0,209.993 9 | CA-2016-153682,5/30/16,6/1/16,First Class,BG-11695,Corporate,TEC-CO-10001046,Technology,Copiers,Canon Imageclass D680 Copier / Fax,839.988,2,0.4,69.999 10 | CA-2014-134278,7/6/14,7/8/14,First Class,EP-13915,Consumer,TEC-CO-10001046,Technology,Copiers,Canon Imageclass D680 Copier / Fax,559.992,1,0.2,174.9975 11 | CA-2015-111829,3/19/15,3/20/15,First Class,FH-14365,Corporate,TEC-CO-10001766,Technology,Copiers,Canon PC940 Copier,3149.93,7,0,1480.4671 12 | CA-2016-147417,7/25/16,7/27/16,First Class,CB-12415,Consumer,TEC-CO-10001449,Technology,Copiers,Hewlett Packard LaserJet 3310 Copier,1439.976,4,0.4,191.9968 13 | CA-2015-142944,3/6/15,3/11/15,Standard Class,JL-15850,Consumer,TEC-CO-10003763,Technology,Copiers,Canon PC1060 Personal Laser Copier,1119.984,2,0.2,377.9946 14 | US-2017-117534,3/25/17,3/26/17,First Class,CV-12295,Consumer,TEC-CO-10000971,Technology,Copiers,Hewlett Packard 310 Color Digital Copier,479.984,2,0.2,59.998 15 | US-2016-164630,1/4/16,1/9/16,Standard Class,EB-13975,Corporate,TEC-CO-10000971,Technology,Copiers,Hewlett Packard 310 Color Digital Copier,959.968,4,0.2,119.996 16 | CA-2015-166464,9/12/15,9/17/15,Standard Class,PG-18895,Consumer,TEC-CO-10000971,Technology,Copiers,Hewlett Packard 310 Color Digital Copier,479.984,2,0.2,59.998 17 | US-2015-100377,8/28/15,9/1/15,Standard Class,TS-21370,Corporate,TEC-CO-10001046,Technology,Copiers,Canon Imageclass D680 Copier / Fax,2799.96,5,0.2,874.9875 18 | CA-2017-133865,5/8/17,5/12/17,Standard Class,PS-19045,Home Office,TEC-CO-10001046,Technology,Copiers,Canon Imageclass D680 Copier / Fax,3359.952,6,0.2,1049.985 19 | US-2015-131359,10/30/15,11/2/15,Second Class,FA-14230,Corporate,TEC-CO-10001571,Technology,Copiers,Sharp 1540cs Digital Laser Copier,439.992,1,0.2,164.997 20 | CA-2015-104941,6/13/15,6/19/15,Standard Class,DH-13075,Corporate,TEC-CO-10004202,Technology,Copiers,Brother DCP1000 Digital 3 in 1 Multifunction Machine,899.97,3,0,314.9895 21 | CA-2017-161956,8/27/17,8/29/17,Second Class,DR-12880,Corporate,TEC-CO-10001571,Technology,Copiers,Sharp 1540cs Digital Laser Copier,879.984,2,0.2,329.994 22 | CA-2015-140984,9/14/15,9/18/15,Standard Class,CC-12685,Consumer,TEC-CO-10001571,Technology,Copiers,Sharp 1540cs Digital Laser Copier,879.984,2,0.2,329.994 23 | CA-2016-135265,7/7/16,7/9/16,Second Class,CC-12370,Consumer,TEC-CO-10003763,Technology,Copiers,Canon PC1060 Personal Laser Copier,2799.96,5,0.2,944.9865 24 | CA-2015-156104,12/6/15,12/8/15,Second Class,NP-18685,Home Office,TEC-CO-10002095,Technology,Copiers,Hewlett Packard 610 Color Digital Copier / Printer,999.98,2,0,449.991 25 | CA-2015-143105,12/10/15,12/10/15,Same Day,MA-17560,Home Office,TEC-CO-10002095,Technology,Copiers,Hewlett Packard 610 Color Digital Copier / Printer,799.984,2,0.2,249.995 26 | CA-2015-105690,11/21/15,11/26/15,Second Class,CA-11965,Corporate,TEC-CO-10001571,Technology,Copiers,Sharp 1540cs Digital Laser Copier,439.992,1,0.2,164.997 27 | CA-2014-150798,12/1/14,12/3/14,Second Class,JK-15730,Consumer,TEC-CO-10001571,Technology,Copiers,Sharp 1540cs Digital Laser Copier,659.988,2,0.4,109.998 28 | CA-2017-126662,7/17/17,7/21/17,Standard Class,AB-10255,Home Office,TEC-CO-10004202,Technology,Copiers,Brother DCP1000 Digital 3 in 1 Multifunction Machine,479.984,2,0.2,89.997 29 | CA-2017-145219,12/24/17,12/25/17,First Class,RM-19675,Home Office,TEC-CO-10001449,Technology,Copiers,Hewlett Packard LaserJet 3310 Copier,2879.952,6,0.2,1007.9832 30 | CA-2016-102162,9/11/16,9/16/16,Standard Class,JF-15565,Consumer,TEC-CO-10001943,Technology,Copiers,Canon PC-428 Personal Copier,1599.92,8,0,751.9624 31 | CA-2016-121370,11/14/16,11/19/16,Second Class,EB-14110,Consumer,TEC-CO-10004115,Technology,Copiers,Sharp AL-1530CS Digital Copier,1199.976,4,0.4,179.9964 32 | CA-2014-124478,8/8/14,8/12/14,Standard Class,MA-17560,Home Office,TEC-CO-10001571,Technology,Copiers,Sharp 1540cs Digital Laser Copier,549.99,1,0,274.995 33 | CA-2017-133263,3/31/17,4/2/17,Second Class,JE-15610,Corporate,TEC-CO-10001449,Technology,Copiers,Hewlett Packard LaserJet 3310 Copier,2999.95,5,0,1439.976 34 | CA-2017-157966,3/13/17,3/13/17,Same Day,SU-20665,Home Office,TEC-CO-10001449,Technology,Copiers,Hewlett Packard LaserJet 3310 Copier,959.984,2,0.2,335.9944 35 | US-2015-160857,5/8/15,5/15/15,Standard Class,NW-18400,Consumer,TEC-CO-10004115,Technology,Copiers,Sharp AL-1530CS Digital Copier,2799.944,7,0.2,1014.9797 36 | CA-2014-163748,10/14/14,10/18/14,Standard Class,HG-15025,Consumer,TEC-CO-10002095,Technology,Copiers,Hewlett Packard 610 Color Digital Copier / Printer,1999.96,5,0.2,624.9875 37 | CA-2015-158939,11/26/15,12/1/15,Standard Class,EA-14035,Corporate,TEC-CO-10002313,Technology,Copiers,Canon PC1080F Personal Copier,599.99,1,0,233.9961 38 | CA-2015-121783,11/10/15,11/14/15,Standard Class,PO-19180,Home Office,TEC-CO-10001571,Technology,Copiers,Sharp 1540cs Digital Laser Copier,549.99,1,0,274.995 39 | CA-2015-111780,12/25/15,12/30/15,Second Class,RA-19285,Consumer,TEC-CO-10004202,Technology,Copiers,Brother DCP1000 Digital 3 in 1 Multifunction Machine,1199.96,5,0.2,224.9925 40 | CA-2015-146675,4/16/15,4/20/15,Standard Class,SB-20185,Consumer,TEC-CO-10001766,Technology,Copiers,Canon PC940 Copier,1439.968,4,0.2,485.9892 41 | CA-2016-157791,12/23/16,12/28/16,Second Class,CA-11965,Corporate,TEC-CO-10002095,Technology,Copiers,Hewlett Packard 610 Color Digital Copier / Printer,1999.96,4,0,899.982 42 | US-2017-124779,9/8/17,9/11/17,First Class,BF-11020,Corporate,TEC-CO-10001943,Technology,Copiers,Canon PC-428 Personal Copier,319.984,2,0.2,107.9946 43 | US-2017-135013,7/24/17,7/24/17,Same Day,HR-14830,Corporate,TEC-CO-10001449,Technology,Copiers,Hewlett Packard LaserJet 3310 Copier,2399.96,5,0.2,839.986 44 | CA-2014-124618,5/2/14,5/4/14,Second Class,CS-11860,Consumer,TEC-CO-10004202,Technology,Copiers,Brother DCP1000 Digital 3 in 1 Multifunction Machine,479.984,2,0.2,89.997 45 | US-2017-165358,7/18/17,7/23/17,Standard Class,SV-20365,Consumer,TEC-CO-10001943,Technology,Copiers,Canon PC-428 Personal Copier,599.97,5,0.4,69.9965 46 | US-2017-141677,3/26/17,3/30/17,Standard Class,HK-14890,Corporate,TEC-CO-10002313,Technology,Copiers,Canon PC1080F Personal Copier,2399.96,5,0.2,569.9905 47 | US-2015-136987,4/11/15,4/14/15,Second Class,AR-10540,Consumer,TEC-CO-10001943,Technology,Copiers,Canon PC-428 Personal Copier,639.968,4,0.2,215.9892 48 | CA-2016-139997,7/1/16,7/3/16,First Class,EM-14140,Home Office,TEC-CO-10000971,Technology,Copiers,Hewlett Packard 310 Color Digital Copier,1499.95,5,0,449.985 49 | US-2017-167920,12/9/17,12/12/17,Second Class,JL-15835,Consumer,TEC-CO-10001046,Technology,Copiers,Canon Imageclass D680 Copier / Fax,1399.98,2,0,629.991 50 | CA-2017-151799,12/14/17,12/18/17,Standard Class,BF-11170,Home Office,TEC-CO-10002313,Technology,Copiers,Canon PC1080F Personal Copier,1199.98,2,0,467.9922 51 | CA-2014-138128,12/9/14,12/15/14,Standard Class,FP-14320,Consumer,TEC-CO-10001766,Technology,Copiers,Canon PC940 Copier,1079.976,4,0.4,125.9972 52 | CA-2016-166429,9/2/16,9/8/16,Standard Class,TG-21310,Consumer,TEC-CO-10002095,Technology,Copiers,Hewlett Packard 610 Color Digital Copier / Printer,999.98,2,0,449.991 53 | CA-2017-152310,8/12/17,8/19/17,Standard Class,DK-12895,Consumer,TEC-CO-10000971,Technology,Copiers,Hewlett Packard 310 Color Digital Copier,299.99,1,0,89.997 54 | CA-2015-109190,10/23/15,10/28/15,Standard Class,CC-12685,Consumer,TEC-CO-10001943,Technology,Copiers,Canon PC-428 Personal Copier,479.976,3,0.2,161.9919 55 | CA-2017-147354,3/9/17,3/13/17,Standard Class,KB-16315,Consumer,TEC-CO-10000971,Technology,Copiers,Hewlett Packard 310 Color Digital Copier,479.984,2,0.2,59.998 56 | CA-2014-140473,5/30/14,6/3/14,Standard Class,MC-17425,Corporate,TEC-CO-10004202,Technology,Copiers,Brother DCP1000 Digital 3 in 1 Multifunction Machine,719.976,3,0.2,134.9955 57 | CA-2016-162390,12/6/16,12/12/16,Standard Class,DP-13105,Corporate,TEC-CO-10004202,Technology,Copiers,Brother DCP1000 Digital 3 in 1 Multifunction Machine,479.984,2,0.2,89.997 58 | CA-2014-116666,5/8/14,5/10/14,First Class,KT-16480,Consumer,TEC-CO-10001449,Technology,Copiers,Hewlett Packard LaserJet 3310 Copier,1799.97,5,0.4,239.996 59 | CA-2017-100622,11/3/17,11/7/17,Standard Class,DK-13090,Consumer,TEC-CO-10003236,Technology,Copiers,Canon Image Class D660 Copier,959.984,2,0.2,311.9948 60 | US-2015-128587,12/24/15,12/30/15,Standard Class,HM-14860,Corporate,TEC-CO-10003763,Technology,Copiers,Canon PC1060 Personal Laser Copier,4899.93,7,0,2302.9671 61 | CA-2017-160633,11/16/17,11/21/17,Standard Class,BS-11380,Corporate,TEC-CO-10002095,Technology,Copiers,Hewlett Packard 610 Color Digital Copier / Printer,899.982,3,0.4,74.9985 62 | US-2016-125402,9/25/16,10/1/16,Standard Class,DL-12865,Consumer,TEC-CO-10001943,Technology,Copiers,Canon PC-428 Personal Copier,479.976,3,0.2,161.9919 63 | CA-2016-129630,9/4/16,9/4/16,Same Day,IM-15055,Consumer,TEC-CO-10003763,Technology,Copiers,Canon PC1060 Personal Laser Copier,2799.96,5,0.2,944.9865 64 | -------------------------------------------------------------------------------- /data_input/country_sales.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/data_input/country_sales.pkl -------------------------------------------------------------------------------- /data_input/crime.csv: -------------------------------------------------------------------------------- 1 | "","M","So","Ed","Po1","Po2","LF","M.F","Pop","NW","U1","U2","GDP","Ineq","Prob","Time","y" 2 | "1",151,1,91,58,56,510,950,33,301,108,41,394,261,0.084602,26.2011,791 3 | "2",143,0,113,103,95,583,1012,13,102,96,36,557,194,0.029599,25.2999,1635 4 | "3",142,1,89,45,44,533,969,18,219,94,33,318,250,0.083401,24.3006,578 5 | "4",136,0,121,149,141,577,994,157,80,102,39,673,167,0.015801,29.9012,1969 6 | "5",141,0,121,109,101,591,985,18,30,91,20,578,174,0.041399,21.2998,1234 7 | "6",121,0,110,118,115,547,964,25,44,84,29,689,126,0.034201,20.9995,682 8 | "7",127,1,111,82,79,519,982,4,139,97,38,620,168,0.0421,20.6993,963 9 | "8",131,1,109,115,109,542,969,50,179,79,35,472,206,0.040099,24.5988,1555 10 | "9",157,1,90,65,62,553,955,39,286,81,28,421,239,0.071697,29.4001,856 11 | "10",140,0,118,71,68,632,1029,7,15,100,24,526,174,0.044498,19.5994,705 12 | "11",124,0,105,121,116,580,966,101,106,77,35,657,170,0.016201,41.6,1674 13 | "12",134,0,108,75,71,595,972,47,59,83,31,580,172,0.031201,34.2984,849 14 | "13",128,0,113,67,60,624,972,28,10,77,25,507,206,0.045302,36.2993,511 15 | "14",135,0,117,62,61,595,986,22,46,77,27,529,190,0.0532,21.501,664 16 | "15",152,1,87,57,53,530,986,30,72,92,43,405,264,0.0691,22.7008,798 17 | "16",142,1,88,81,77,497,956,33,321,116,47,427,247,0.052099,26.0991,946 18 | "17",143,0,110,66,63,537,977,10,6,114,35,487,166,0.076299,19.1002,539 19 | "18",135,1,104,123,115,537,978,31,170,89,34,631,165,0.119804,18.1996,929 20 | "19",130,0,116,128,128,536,934,51,24,78,34,627,135,0.019099,24.9008,750 21 | "20",125,0,108,113,105,567,985,78,94,130,58,626,166,0.034801,26.401,1225 22 | "21",126,0,108,74,67,602,984,34,12,102,33,557,195,0.0228,37.5998,742 23 | "22",157,1,89,47,44,512,962,22,423,97,34,288,276,0.089502,37.0994,439 24 | "23",132,0,96,87,83,564,953,43,92,83,32,513,227,0.0307,25.1989,1216 25 | "24",131,0,116,78,73,574,1038,7,36,142,42,540,176,0.041598,17.6,968 26 | "25",130,0,116,63,57,641,984,14,26,70,21,486,196,0.069197,21.9003,523 27 | "26",131,0,121,160,143,631,1071,3,77,102,41,674,152,0.041698,22.1005,1993 28 | "27",135,0,109,69,71,540,965,6,4,80,22,564,139,0.036099,28.4999,342 29 | "28",152,0,112,82,76,571,1018,10,79,103,28,537,215,0.038201,25.8006,1216 30 | "29",119,0,107,166,157,521,938,168,89,92,36,637,154,0.0234,36.7009,1043 31 | "30",166,1,89,58,54,521,973,46,254,72,26,396,237,0.075298,28.3011,696 32 | "31",140,0,93,55,54,535,1045,6,20,135,40,453,200,0.041999,21.7998,373 33 | "32",125,0,109,90,81,586,964,97,82,105,43,617,163,0.042698,30.9014,754 34 | "33",147,1,104,63,64,560,972,23,95,76,24,462,233,0.049499,25.5005,1072 35 | "34",126,0,118,97,97,542,990,18,21,102,35,589,166,0.040799,21.6997,923 36 | "35",123,0,102,97,87,526,948,113,76,124,50,572,158,0.0207,37.4011,653 37 | "36",150,0,100,109,98,531,964,9,24,87,38,559,153,0.0069,44.0004,1272 38 | "37",177,1,87,58,56,638,974,24,349,76,28,382,254,0.045198,31.6995,831 39 | "38",133,0,104,51,47,599,1024,7,40,99,27,425,225,0.053998,16.6999,566 40 | "39",149,1,88,61,54,515,953,36,165,86,35,395,251,0.047099,27.3004,826 41 | "40",145,1,104,82,74,560,981,96,126,88,31,488,228,0.038801,29.3004,1151 42 | "41",148,0,122,72,66,601,998,9,19,84,20,590,144,0.0251,30.0001,880 43 | "42",141,0,109,56,54,523,968,4,2,107,37,489,170,0.088904,12.1996,542 44 | "43",162,1,99,75,70,522,996,40,208,73,27,496,224,0.054902,31.9989,823 45 | "44",136,0,121,95,96,574,1012,29,36,111,37,622,162,0.0281,30.0001,1030 46 | "45",139,1,88,46,41,480,968,19,49,135,53,457,249,0.056202,32.5996,455 47 | "46",126,0,104,106,97,599,989,40,24,78,25,593,171,0.046598,16.6999,508 48 | "47",130,0,121,90,91,623,1049,3,22,113,40,588,160,0.052802,16.0997,849 49 | -------------------------------------------------------------------------------- /data_input/flights.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/data_input/flights.db -------------------------------------------------------------------------------- /data_input/loan2018q1.csv: -------------------------------------------------------------------------------- 1 | initial_list_status,purpose,int_rate,installment,annual_inc,dti,verification_status,grade,revol_bal,inq_last_12m,delinq_2yrs,home_ownership,log_inc,verified,grdCtoA,not_paid 2 | w,credit_card,14.08,342.17,60000.0,20.28,Source Verified,C,18518,1,0,RENT,11.002099841204199,1,0,1 3 | w,debt_consolidation,19.03,597.02,86000.0,30.16,Verified,D,40998,9,0,MORTGAGE,11.3621025752356,1,0,0 4 | w,debt_consolidation,23.88,717.46,27000.0,36.67,Source Verified,E,7166,1,0,RENT,10.2035921449865,1,0,1 5 | w,debt_consolidation,7.21,557.52,80000.0,35.63,Not Verified,A,6859,2,0,MORTGAGE,11.289781913656,0,1,0 6 | w,home_improvement,6.72,92.25,70000.0,7.27,Not Verified,A,2424,1,0,MORTGAGE,11.156250521031499,0,1,0 7 | w,debt_consolidation,10.42,858.18,180000.0,5.83,Source Verified,B,9458,6,0,MORTGAGE,12.100712129872301,1,1,0 8 | w,debt_consolidation,19.03,311.49,81400.0,8.06,Source Verified,D,6800,4,0,MORTGAGE,11.3071305519906,1,0,0 9 | w,debt_consolidation,21.45,409.61,80000.0,22.81,Source Verified,D,14103,0,0,MORTGAGE,11.289781913656,1,0,1 10 | w,credit_card,9.44,320.05,74000.0,14.24,Not Verified,B,43231,4,1,MORTGAGE,11.2118203721863,0,1,1 11 | w,home_improvement,11.99,344.72,75000.0,4.53,Verified,B,2124,8,0,MORTGAGE,11.2252433925184,1,1,1 12 | w,credit_card,10.42,350.62,49500.0,19.03,Not Verified,B,12509,2,1,RENT,10.8097279485568,0,1,1 13 | w,home_improvement,7.35,465.57,63000.0,16.86,Not Verified,A,26853,1,0,MORTGAGE,11.0508900053737,0,1,0 14 | f,debt_consolidation,26.3,809.02,34800.0,26.41,Verified,E,4053,3,0,OWN,10.4573726657626,1,0,0 15 | w,debt_consolidation,16.02,351.67,120000.0,30.75,Not Verified,C,11691,11,1,MORTGAGE,11.6952470217642,0,0,1 16 | w,debt_consolidation,16.02,351.67,50000.0,13.87,Verified,C,0,1,0,MORTGAGE,10.819778284410301,1,0,1 17 | w,credit_card,13.59,461.13,98000.0,14.44,Verified,C,25100,3,0,MORTGAGE,11.4927227576527,1,0,1 18 | f,major_purchase,9.44,64.01,68000.0,17.49,Not Verified,B,14579,1,0,OWN,11.127262984158198,0,1,1 19 | w,home_improvement,7.35,465.57,47000.0,21.91,Not Verified,A,5053,0,0,MORTGAGE,10.757902880692198,0,1,1 20 | w,credit_card,12.62,117.29,42350.0,22.52,Not Verified,C,22418,4,0,OWN,10.6537237000802,0,0,1 21 | w,debt_consolidation,6.08,304.59,40000.0,7.05,Not Verified,A,12114,0,0,MORTGAGE,10.5966347330961,0,1,1 22 | f,debt_consolidation,17.09,178.49,48500.0,24.57,Source Verified,D,12876,2,1,RENT,10.789319076925599,1,0,0 23 | f,credit_card,7.21,322.13,26000.0,10.91,Verified,A,8486,0,0,OWN,10.165851817003599,1,1,1 24 | f,debt_consolidation,19.03,110.02,35000.0,24.74,Verified,D,3710,2,0,RENT,10.4631033404715,1,0,0 25 | f,credit_card,7.21,464.6,80000.0,30.68,Not Verified,A,17613,3,0,OWN,11.289781913656,0,1,0 26 | w,credit_card,6.72,107.63,57000.0,24.74,Not Verified,A,14125,3,0,RENT,10.950806546816699,0,1,0 27 | f,credit_card,19.03,88.02,38000.0,31.93,Not Verified,D,1844,4,0,RENT,10.5453414387085,0,0,0 28 | f,debt_consolidation,22.91,1353.2,127000.0,19.47,Verified,E,18124,5,2,MORTGAGE,11.7519423654407,1,0,1 29 | w,debt_consolidation,21.45,955.75,60000.0,21.46,Verified,D,6983,2,0,RENT,11.002099841204199,1,0,1 30 | w,home_improvement,10.42,162.33,25000.0,44.07,Verified,B,19898,0,0,OWN,10.1266311038503,1,1,0 31 | w,debt_consolidation,24.85,643.8,60000.0,38.16,Source Verified,E,18341,8,0,MORTGAGE,11.002099841204199,1,0,1 32 | f,debt_consolidation,23.88,317.28,51000.0,24.71,Not Verified,E,4742,4,0,RENT,10.8395809117065,0,0,1 33 | w,debt_consolidation,11.99,322.48,102000.0,22.4,Not Verified,B,0,4,0,MORTGAGE,11.5327280922664,0,1,0 34 | w,debt_consolidation,7.21,278.76,50000.0,11.0,Not Verified,A,7801,3,0,RENT,10.819778284410301,0,1,0 35 | f,debt_consolidation,15.05,145.7,125000.0,13.86,Not Verified,C,8660,9,0,MORTGAGE,11.736069016284402,0,0,0 36 | w,home_improvement,15.05,400.12,250000.0,10.42,Source Verified,C,9302,4,1,OWN,12.4292161968444,1,0,1 37 | w,debt_consolidation,13.59,508.4,66000.0,27.75,Verified,C,10949,0,0,MORTGAGE,11.0974100210086,1,0,0 38 | w,debt_consolidation,23.88,195.85,53000.0,33.49,Not Verified,E,3212,0,0,RENT,10.878047192534302,0,0,1 39 | w,debt_consolidation,9.93,483.52,88500.0,35.06,Source Verified,B,25599,1,0,MORTGAGE,11.390757830996002,1,1,1 40 | w,debt_consolidation,9.44,251.68,68000.0,12.28,Source Verified,B,21180,0,0,RENT,11.127262984158198,1,1,0 41 | w,debt_consolidation,18.06,408.73,85000.0,17.66,Verified,D,11719,1,0,RENT,11.3504065354725,1,0,0 42 | f,home_improvement,15.05,346.9,50000.0,28.16,Not Verified,C,12959,1,0,MORTGAGE,10.819778284410301,0,0,1 43 | f,debt_consolidation,10.42,486.98,68000.0,21.73,Not Verified,B,6096,1,0,RENT,11.127262984158198,0,1,0 44 | w,debt_consolidation,18.06,1266.39,75000.0,25.66,Verified,D,10466,6,0,MORTGAGE,11.2252433925184,1,0,0 45 | w,debt_consolidation,7.97,313.23,25000.0,85.93,Verified,A,12520,6,0,MORTGAGE,10.1266311038503,1,1,1 46 | w,debt_consolidation,26.3,599.36,123000.0,20.38,Verified,E,51164,3,0,MORTGAGE,11.7199396343546,1,0,1 47 | w,debt_consolidation,10.42,472.0,82000.0,22.22,Verified,B,6602,1,0,RENT,11.3144745262464,1,1,0 48 | f,home_improvement,9.44,672.11,65000.0,7.55,Source Verified,B,1329,3,2,MORTGAGE,11.082142548877801,1,1,0 49 | w,home_improvement,19.03,183.36,82000.0,32.04,Source Verified,D,25271,2,2,MORTGAGE,11.3144745262464,1,0,1 50 | w,debt_consolidation,9.44,672.11,65000.0,22.3,Not Verified,B,25183,1,2,MORTGAGE,11.082142548877801,0,1,1 51 | w,debt_consolidation,25.82,775.69,90000.0,22.92,Verified,E,23385,3,2,MORTGAGE,11.4075649493124,1,0,1 52 | w,debt_consolidation,15.05,97.14,15000.0,8.8,Source Verified,C,2337,1,2,RENT,9.61580548008435,1,0,0 53 | w,debt_consolidation,13.59,924.23,64000.0,18.64,Source Verified,C,6660,2,0,RENT,11.0666383623418,1,0,1 54 | w,credit_card,12.62,201.07,15000.0,26.96,Not Verified,C,8268,0,0,RENT,9.61580548008435,0,0,1 55 | f,debt_consolidation,29.69,914.25,120000.0,10.14,Verified,F,11689,7,0,RENT,11.6952470217642,1,0,1 56 | f,home_improvement,12.62,180.97,82000.0,12.79,Source Verified,C,8726,7,0,MORTGAGE,11.3144745262464,1,0,1 57 | w,debt_consolidation,9.93,128.94,42000.0,1.03,Not Verified,B,1752,2,1,RENT,10.6454248972655,0,1,1 58 | w,debt_consolidation,18.06,1157.85,198000.0,2.29,Verified,D,20647,3,0,RENT,12.1960223096767,1,0,1 59 | w,credit_card,7.35,155.19,56000.0,4.41,Not Verified,A,2734,1,0,MORTGAGE,10.9331069697173,0,1,0 60 | f,debt_consolidation,18.06,542.74,75000.0,8.64,Source Verified,D,42,6,2,OWN,11.2252433925184,1,0,1 61 | f,debt_consolidation,12.62,46.92,31665.71,0.45,Verified,C,12,8,0,RENT,10.362989670931901,1,0,0 62 | w,debt_consolidation,13.59,806.97,45000.0,22.13,Source Verified,C,7288,1,1,MORTGAGE,10.714417768752499,1,0,1 63 | f,debt_consolidation,10.91,199.45,18000.0,12.51,Source Verified,B,5883,1,0,RENT,9.7981270368783,1,1,0 64 | w,home_improvement,28.72,315.72,43350.0,25.89,Verified,F,3002,4,0,MORTGAGE,10.677061982208699,1,0,1 65 | w,debt_consolidation,14.08,684.33,100000.0,18.58,Source Verified,C,25669,0,1,MORTGAGE,11.5129254649702,1,0,1 66 | w,credit_card,7.97,469.84,60000.0,19.2,Not Verified,A,25011,2,0,MORTGAGE,11.002099841204199,0,1,0 67 | w,debt_consolidation,15.05,346.9,95000.0,13.68,Verified,C,12568,6,0,OWN,11.4616321705827,1,0,0 68 | f,debt_consolidation,17.09,160.64,60000.0,10.26,Source Verified,D,16483,0,0,RENT,11.002099841204199,1,0,1 69 | w,debt_consolidation,11.99,498.15,52000.0,17.1,Not Verified,B,7605,8,0,MORTGAGE,10.858998997563601,0,1,0 70 | w,debt_consolidation,15.05,476.33,48000.0,38.4,Not Verified,C,8101,0,0,MORTGAGE,10.778956289889999,0,0,1 71 | w,credit_card,19.03,73.35,30000.0,23.48,Source Verified,D,2106,2,0,RENT,10.308952660644302,1,0,0 72 | w,credit_card,13.59,203.88,60000.0,24.36,Verified,C,7597,5,0,RENT,11.002099841204199,1,0,1 73 | f,debt_consolidation,10.91,326.97,36000.0,18.8,Source Verified,B,3080,0,1,RENT,10.4912742174382,1,1,1 74 | w,debt_consolidation,16.02,486.58,55000.0,7.53,Verified,C,2680,2,0,MORTGAGE,10.9150884642146,1,0,1 75 | w,credit_card,10.42,155.84,48000.0,16.6,Not Verified,B,35682,0,0,MORTGAGE,10.778956289889999,0,1,1 76 | w,debt_consolidation,6.08,1218.33,105000.0,1.9,Not Verified,A,7025,2,0,MORTGAGE,11.5617156291397,0,1,1 77 | w,debt_consolidation,9.93,464.18,75000.0,14.8,Source Verified,B,25068,0,1,OWN,11.2252433925184,1,1,1 78 | w,home_improvement,11.99,149.45,88282.0,8.37,Source Verified,B,5604,4,0,MORTGAGE,11.3882915153021,1,1,0 79 | f,debt_consolidation,12.62,804.27,95000.0,24.82,Not Verified,C,2477,6,0,RENT,11.4616321705827,0,0,0 80 | w,home_improvement,6.08,304.59,42000.0,30.84,Source Verified,A,9448,0,0,MORTGAGE,10.6454248972655,1,1,0 81 | w,debt_consolidation,9.93,354.58,75000.0,1.52,Verified,B,2903,0,4,MORTGAGE,11.2252433925184,1,1,1 82 | w,debt_consolidation,16.02,218.92,39580.0,37.86,Verified,C,3374,4,0,MORTGAGE,10.586079219156598,1,0,0 83 | w,debt_consolidation,22.91,38.67,55000.0,35.71,Source Verified,E,11673,2,3,OWN,10.9150884642146,1,0,0 84 | w,debt_consolidation,18.06,330.54,98000.0,7.07,Source Verified,D,2802,0,0,RENT,11.4927227576527,1,0,1 85 | w,debt_consolidation,10.91,326.97,50000.0,18.72,Not Verified,B,6511,3,0,RENT,10.819778284410301,0,1,0 86 | w,debt_consolidation,11.99,996.29,146000.0,4.35,Not Verified,B,9340,2,0,RENT,11.891361900690502,0,1,0 87 | f,credit_card,9.44,320.05,85000.0,22.15,Not Verified,B,10413,3,0,RENT,11.3504065354725,0,1,0 88 | w,major_purchase,19.03,363.4,50000.0,33.03,Not Verified,D,2075,1,0,RENT,10.819778284410301,0,0,1 89 | w,debt_consolidation,16.02,351.67,95000.0,31.91,Not Verified,C,18258,0,0,MORTGAGE,11.4616321705827,0,0,0 90 | f,debt_consolidation,9.93,154.73,70000.0,21.43,Verified,B,225,0,0,RENT,11.156250521031499,1,1,0 91 | w,credit_card,14.08,839.16,150000.0,27.03,Verified,C,35581,0,0,MORTGAGE,11.918390573078401,1,0,1 92 | w,credit_card,11.99,332.1,104000.0,15.01,Source Verified,B,13034,3,0,MORTGAGE,11.5521461781235,1,1,1 93 | w,credit_card,11.99,166.05,27000.0,37.42,Not Verified,B,11290,1,0,OWN,10.2035921449865,0,1,1 94 | w,debt_consolidation,15.05,520.35,65000.0,11.91,Not Verified,C,14102,0,1,RENT,11.082142548877801,0,0,0 95 | f,debt_consolidation,11.99,166.05,31020.0,20.7,Verified,B,7230,0,0,RENT,10.342387436730501,1,1,0 96 | w,credit_card,13.59,254.85,40000.0,25.8,Verified,C,13300,1,0,MORTGAGE,10.5966347330961,1,0,0 97 | w,credit_card,11.99,830.24,190000.0,14.06,Not Verified,B,43830,1,0,OWN,12.154779351142599,0,1,1 98 | w,credit_card,11.99,249.09,99000.0,26.68,Source Verified,B,15471,1,0,MORTGAGE,11.502875129116699,1,1,0 99 | w,home_improvement,14.08,205.3,180000.0,17.1,Source Verified,C,85294,2,0,MORTGAGE,12.100712129872301,1,0,0 100 | w,debt_consolidation,7.97,313.23,75000.0,7.74,Not Verified,A,12649,1,2,MORTGAGE,11.2252433925184,0,1,0 101 | w,debt_consolidation,25.82,761.52,70891.0,25.34,Source Verified,E,11340,5,0,MORTGAGE,11.168898765111498,1,0,1 102 | w,credit_card,10.42,162.33,68500.0,12.58,Not Verified,B,6066,2,3,RENT,11.1345890242503,0,1,0 103 | w,credit_card,10.91,457.75,65000.0,14.99,Source Verified,B,11011,0,0,RENT,11.082142548877801,1,1,1 104 | f,credit_card,20.0,178.39,38000.0,13.74,Not Verified,D,2603,8,0,RENT,10.5453414387085,0,0,0 105 | w,debt_consolidation,21.45,546.15,62000.0,33.47,Not Verified,D,38746,0,0,RENT,11.0348896640272,0,0,1 106 | w,debt_consolidation,20.0,264.94,31729.0,15.47,Source Verified,D,4221,1,0,OWN,10.3649863681651,1,0,1 107 | w,credit_card,14.08,188.2,41919.0,48.91,Verified,C,3910,3,0,RENT,10.643494463748599,1,0,0 108 | w,debt_consolidation,24.85,877.91,30000.0,37.52,Source Verified,E,13242,2,0,RENT,10.308952660644302,1,0,0 109 | w,home_improvement,14.08,559.44,36000.0,8.8,Source Verified,C,25,0,0,RENT,10.4912742174382,1,0,1 110 | w,debt_consolidation,14.08,261.07,57000.0,26.32,Not Verified,C,11152,2,0,MORTGAGE,10.950806546816699,0,0,0 111 | w,debt_consolidation,6.08,365.5,95000.0,30.04,Not Verified,A,13562,2,0,MORTGAGE,11.4616321705827,0,1,0 112 | w,credit_card,10.42,162.33,66000.0,21.4,Not Verified,B,3376,0,0,RENT,11.0974100210086,0,1,0 113 | w,home_improvement,11.99,1328.39,188000.0,7.83,Source Verified,B,2102,6,0,MORTGAGE,12.144197241812101,1,1,0 114 | f,credit_card,13.59,489.3,40000.0,27.42,Not Verified,C,13251,3,0,RENT,10.5966347330961,0,0,1 115 | w,debt_consolidation,7.97,1096.29,135000.0,3.92,Source Verified,A,8159,2,0,RENT,11.8130300574206,1,1,0 116 | f,debt_consolidation,21.45,382.3,70000.0,10.94,Not Verified,D,13348,0,0,RENT,11.156250521031499,0,0,0 117 | w,home_improvement,10.42,321.82,49800.0,2.46,Source Verified,B,7316,2,0,MORTGAGE,10.8157702630127,1,1,0 118 | w,debt_consolidation,13.59,217.47,37000.0,15.08,Not Verified,C,3338,3,0,RENT,10.518673191626402,0,0,0 119 | f,debt_consolidation,13.59,407.75,59075.0,11.31,Not Verified,C,4549,11,2,RENT,10.986563102055099,0,0,0 120 | w,credit_card,9.44,307.25,45000.0,19.24,Source Verified,B,10122,0,0,RENT,10.714417768752499,1,1,1 121 | f,debt_consolidation,19.03,660.09,50000.0,11.45,Verified,D,4317,0,0,RENT,10.819778284410301,1,0,1 122 | w,credit_card,10.42,311.67,26000.0,32.66,Verified,B,7723,2,0,RENT,10.165851817003599,1,1,0 123 | w,debt_consolidation,17.09,448.22,90000.0,17.09,Source Verified,D,12781,1,0,RENT,11.4075649493124,1,0,1 124 | w,debt_consolidation,14.08,492.72,73000.0,16.54,Source Verified,C,9139,2,0,RENT,11.198214720130501,1,0,1 125 | f,credit_card,26.3,302.37,22000.0,8.4,Verified,E,4190,1,0,OWN,9.99879773234045,1,0,0 126 | w,debt_consolidation,10.91,719.32,145000.0,15.96,Not Verified,B,42784,0,0,RENT,11.8844890214027,0,1,1 127 | w,debt_consolidation,13.59,101.94,44000.0,12.6,Not Verified,C,8869,1,0,RENT,10.6919449129004,0,0,0 128 | w,home_improvement,28.72,659.85,55000.0,5.13,Verified,F,7335,4,0,RENT,10.9150884642146,1,0,1 129 | w,credit_card,6.72,147.6,140000.0,3.0,Not Verified,A,2887,0,0,RENT,11.849397701591402,0,1,0 130 | f,debt_consolidation,9.44,1280.2,104000.0,0.61,Not Verified,B,183,0,0,RENT,11.5521461781235,0,1,0 131 | w,debt_consolidation,25.82,895.02,150000.0,8.7,Source Verified,E,25556,0,0,RENT,11.918390573078401,1,0,0 132 | f,credit_card,9.44,384.06,60000.0,21.14,Source Verified,B,11851,1,0,RENT,11.002099841204199,1,1,1 133 | w,debt_consolidation,10.42,486.98,70000.0,15.49,Verified,B,11713,2,0,MORTGAGE,11.156250521031499,1,1,1 134 | w,small_business,20.0,111.5,30000.0,7.96,Verified,D,6922,0,0,RENT,10.308952660644302,1,0,1 135 | f,debt_consolidation,14.08,325.06,75000.0,28.54,Not Verified,C,7529,6,0,RENT,11.2252433925184,0,0,0 136 | f,debt_consolidation,11.99,830.24,35000.0,17.94,Not Verified,B,21042,1,0,RENT,10.4631033404715,0,1,0 137 | w,debt_consolidation,7.97,626.46,47000.0,9.01,Verified,A,1278,0,0,OWN,10.757902880692198,1,1,1 138 | f,major_purchase,16.02,175.84,138000.0,2.16,Source Verified,C,10,3,0,RENT,11.835008964139302,1,0,1 139 | w,debt_consolidation,23.88,803.56,70200.0,25.49,Not Verified,E,7836,8,0,RENT,11.159103590013899,0,0,1 140 | w,debt_consolidation,25.82,298.34,65000.0,26.45,Source Verified,E,2993,3,0,MORTGAGE,11.082142548877801,1,0,0 141 | w,home_improvement,7.35,183.9,63400.0,1.04,Not Verified,A,1824,0,0,MORTGAGE,11.0572191404253,0,1,0 142 | w,home_improvement,12.62,225.6,40000.0,17.16,Not Verified,C,2682,1,0,MORTGAGE,10.5966347330961,0,0,1 143 | w,debt_consolidation,9.44,335.57,58000.0,10.51,Verified,B,1145,3,0,MORTGAGE,10.9681982895286,1,1,1 144 | w,debt_consolidation,20.0,927.29,95000.0,16.42,Not Verified,D,15719,1,0,MORTGAGE,11.4616321705827,0,0,0 145 | f,debt_consolidation,9.44,320.05,17976.0,7.81,Not Verified,B,1438,1,1,OWN,9.79679281386517,0,1,1 146 | f,debt_consolidation,16.02,147.71,22000.0,15.38,Verified,C,256,2,0,MORTGAGE,9.99879773234045,1,0,0 147 | f,debt_consolidation,9.93,1289.38,126000.0,18.33,Source Verified,B,53195,1,0,RENT,11.7440371859336,1,1,0 148 | f,debt_consolidation,30.79,525.45,17004.0,24.91,Verified,G,14672,1,2,MORTGAGE,9.74120388947868,1,0,0 149 | w,debt_consolidation,21.45,955.75,80000.0,16.2,Source Verified,D,31315,1,0,RENT,11.289781913656,1,0,0 150 | w,debt_consolidation,7.21,216.82,130000.0,5.46,Not Verified,A,15181,2,1,OWN,11.775289729437699,0,1,0 151 | w,major_purchase,11.99,418.45,30000.0,4.28,Verified,B,3506,0,0,MORTGAGE,10.308952660644302,1,1,1 152 | w,debt_consolidation,21.45,303.26,48000.0,20.6,Source Verified,D,4633,2,1,RENT,10.778956289889999,1,0,1 153 | f,credit_card,16.02,486.58,85000.0,10.56,Not Verified,C,20873,0,0,MORTGAGE,11.3504065354725,0,0,1 154 | w,debt_consolidation,21.45,327.69,50000.0,17.47,Not Verified,D,4350,1,0,RENT,10.819778284410301,0,0,0 155 | f,debt_consolidation,14.08,273.74,48300.0,18.42,Not Verified,C,13995,2,0,RENT,10.785186839640698,0,0,0 156 | w,debt_consolidation,25.82,358.01,38400.0,22.41,Source Verified,E,1974,2,0,RENT,10.555812738575801,1,0,1 157 | w,credit_card,9.93,128.94,80000.0,5.74,Not Verified,B,2897,7,2,MORTGAGE,11.289781913656,0,1,0 158 | f,debt_consolidation,18.06,217.1,35064.0,17.76,Not Verified,D,6255,1,0,MORTGAGE,10.4649302420986,0,0,1 159 | w,debt_consolidation,7.97,87.71,93000.0,18.79,Not Verified,A,669,8,0,MORTGAGE,11.4403547721354,0,1,0 160 | w,home_improvement,15.05,520.35,55000.0,4.3,Verified,C,3071,3,0,OWN,10.9150884642146,1,0,1 161 | f,credit_card,9.44,640.1,82000.0,13.07,Source Verified,B,22580,0,0,RENT,11.3144745262464,1,1,0 162 | w,debt_consolidation,17.09,498.02,65000.0,14.29,Verified,D,9399,4,1,RENT,11.082142548877801,1,0,0 163 | w,debt_consolidation,14.08,290.85,55000.0,21.78,Verified,C,10938,5,0,RENT,10.9150884642146,1,0,0 164 | w,home_improvement,9.44,838.91,97400.0,12.64,Source Verified,B,2372,1,0,MORTGAGE,11.486581489630598,1,1,1 165 | w,debt_consolidation,15.05,428.7,85000.0,18.35,Verified,C,13054,2,0,MORTGAGE,11.3504065354725,1,0,0 166 | w,debt_consolidation,16.02,496.31,62000.0,23.21,Not Verified,C,6009,4,0,OWN,11.0348896640272,0,0,0 167 | w,home_improvement,14.08,855.42,74000.0,26.18,Verified,C,5501,16,0,MORTGAGE,11.2118203721863,1,0,1 168 | w,debt_consolidation,14.08,171.09,54000.0,22.2,Verified,C,18036,1,0,OWN,10.8967393255464,1,0,1 169 | w,debt_consolidation,9.93,45.13,20000.0,2.88,Source Verified,B,2398,0,0,RENT,9.90348755253613,1,1,0 170 | w,debt_consolidation,19.03,539.26,56500.0,18.61,Not Verified,D,22745,0,0,MORTGAGE,10.9419959171345,0,0,0 171 | w,credit_card,9.44,460.88,19000.0,0.0,Verified,B,0,0,0,OWN,9.852194258148579,1,1,0 172 | w,credit_card,10.42,211.03,52000.0,5.96,Source Verified,B,5927,1,0,RENT,10.858998997563601,1,1,1 173 | w,debt_consolidation,9.44,957.75,210000.0,28.37,Verified,B,28026,3,0,MORTGAGE,12.254862809699599,1,1,0 174 | f,credit_card,11.99,179.34,115000.0,13.76,Source Verified,B,3539,8,1,MORTGAGE,11.6526874073454,1,1,0 175 | w,credit_card,19.03,674.89,80000.0,15.18,Source Verified,D,2382,2,0,RENT,11.289781913656,1,0,1 176 | w,debt_consolidation,7.97,250.59,60000.0,36.52,Source Verified,A,10322,4,0,MORTGAGE,11.002099841204199,1,1,1 177 | w,debt_consolidation,12.62,541.42,58000.0,9.27,Source Verified,C,3,0,0,RENT,10.9681982895286,1,0,1 178 | f,debt_consolidation,15.05,867.25,98000.0,15.75,Not Verified,C,20463,3,0,MORTGAGE,11.4927227576527,0,0,0 179 | f,debt_consolidation,20.0,185.82,41600.0,26.51,Source Verified,D,14386,1,0,MORTGAGE,10.6358554462494,1,0,1 180 | w,credit_card,16.02,973.15,85000.0,10.81,Verified,C,32918,2,0,MORTGAGE,11.3504065354725,1,0,0 181 | w,credit_card,7.97,150.35,31500.0,6.11,Not Verified,A,6563,1,0,RENT,10.3577428248137,0,1,0 182 | w,debt_consolidation,15.05,346.9,32000.0,25.81,Not Verified,C,8476,0,0,MORTGAGE,10.3734911817819,0,0,1 183 | f,debt_consolidation,9.93,161.18,70000.0,14.56,Not Verified,B,21658,3,0,RENT,11.156250521031499,0,1,0 184 | w,credit_card,9.93,161.18,146707.0,6.9,Source Verified,B,923,5,0,MORTGAGE,11.8961926794208,1,1,0 185 | w,debt_consolidation,15.05,832.56,95000.0,6.8,Source Verified,C,18699,0,1,MORTGAGE,11.4616321705827,1,0,0 186 | w,credit_card,9.44,240.04,44000.0,17.54,Not Verified,B,4235,1,0,MORTGAGE,10.6919449129004,0,1,0 187 | f,credit_card,12.62,1005.34,85000.0,12.77,Source Verified,C,36942,2,0,MORTGAGE,11.3504065354725,1,0,0 188 | w,home_improvement,13.59,332.01,300000.0,13.25,Source Verified,C,19491,7,0,MORTGAGE,12.6115377536383,1,0,0 189 | w,major_purchase,15.05,476.33,109992.0,10.0,Not Verified,C,2284,3,0,MORTGAGE,11.608162914857099,0,0,0 190 | w,debt_consolidation,20.0,445.97,38000.0,10.93,Source Verified,D,13400,2,0,RENT,10.5453414387085,1,0,0 191 | w,debt_consolidation,10.42,259.72,40000.0,24.51,Source Verified,B,4620,0,0,MORTGAGE,10.5966347330961,1,1,1 192 | w,debt_consolidation,7.35,682.83,76000.0,23.69,Verified,A,44655,1,0,MORTGAGE,11.238488619268502,1,1,1 193 | w,debt_consolidation,9.93,644.69,73000.0,20.91,Source Verified,B,20170,0,0,RENT,11.198214720130501,1,1,0 194 | f,credit_card,9.44,320.05,132000.0,8.61,Not Verified,B,10366,2,0,RENT,11.7905572015685,0,1,1 195 | w,debt_consolidation,20.0,423.91,50000.0,18.29,Verified,D,13434,12,0,MORTGAGE,10.819778284410301,1,0,1 196 | w,debt_consolidation,23.88,636.39,65000.0,32.92,Not Verified,E,12043,6,0,MORTGAGE,11.082142548877801,0,0,0 197 | w,debt_consolidation,7.35,93.12,69800.0,16.9,Not Verified,A,3913,2,0,RENT,11.153389288750502,0,1,0 198 | w,debt_consolidation,7.97,375.88,600000.0,8.72,Verified,A,228292,4,1,MORTGAGE,13.304684934198301,1,1,0 199 | w,credit_card,6.72,461.24,65000.0,17.89,Not Verified,A,12534,1,0,MORTGAGE,11.082142548877801,0,1,0 200 | w,credit_card,5.32,1084.14,750000.0,6.06,Verified,A,10084,14,0,MORTGAGE,13.527828485512499,1,1,0 201 | w,debt_consolidation,10.91,326.97,48000.0,12.05,Not Verified,B,4244,1,0,MORTGAGE,10.778956289889999,0,1,1 202 | w,debt_consolidation,15.05,693.8,85000.0,13.3,Verified,C,5462,3,0,MORTGAGE,11.3504065354725,1,0,1 203 | w,debt_consolidation,15.05,357.25,41000.0,40.07,Not Verified,C,8590,4,1,OWN,10.6213273456864,0,0,0 204 | w,debt_consolidation,16.02,105.51,53000.0,28.22,Verified,C,8478,3,0,RENT,10.878047192534302,1,0,1 205 | w,credit_card,12.62,541.42,90000.0,10.67,Not Verified,C,21203,1,0,MORTGAGE,11.4075649493124,0,0,1 206 | w,debt_consolidation,16.02,364.94,69000.0,34.42,Not Verified,C,16467,1,0,MORTGAGE,11.1418617835794,0,0,0 207 | w,credit_card,11.99,222.4,54000.0,3.98,Source Verified,B,7895,2,1,MORTGAGE,10.8967393255464,1,1,0 208 | w,debt_consolidation,17.09,662.19,42200.0,12.63,Verified,D,6682,1,0,MORTGAGE,10.6501755000241,1,0,0 209 | w,debt_consolidation,12.62,73.73,18000.0,21.53,Verified,C,6766,2,0,RENT,9.7981270368783,1,0,1 210 | w,credit_card,9.44,960.15,180000.0,33.1,Source Verified,B,11216,2,0,MORTGAGE,12.100712129872301,1,1,1 211 | w,debt_consolidation,10.91,261.57,75000.0,25.02,Not Verified,B,6808,3,0,RENT,11.2252433925184,0,1,0 212 | w,debt_consolidation,7.97,494.12,52000.0,38.59,Source Verified,A,7640,0,0,RENT,10.858998997563601,1,1,1 213 | w,debt_consolidation,11.99,531.36,190000.0,11.1,Source Verified,B,22606,1,7,MORTGAGE,12.154779351142599,1,1,1 214 | f,credit_card,26.3,111.24,30000.0,36.52,Verified,E,19869,1,0,OWN,10.308952660644302,1,0,1 215 | w,debt_consolidation,5.32,752.87,118000.0,17.52,Source Verified,A,10044,0,0,MORTGAGE,11.6784399034478,1,1,0 216 | f,credit_card,14.08,342.17,43000.0,12.57,Source Verified,C,10771,0,0,RENT,10.668955394675699,1,0,0 217 | w,credit_card,7.97,469.84,65000.0,10.76,Source Verified,A,12054,2,0,MORTGAGE,11.082142548877801,1,1,0 218 | f,debt_consolidation,17.09,178.49,43000.0,11.08,Not Verified,D,6094,0,0,MORTGAGE,10.668955394675699,0,0,0 219 | w,debt_consolidation,16.02,527.51,39000.0,33.29,Verified,C,16427,2,0,RENT,10.5713169251118,1,0,0 220 | w,debt_consolidation,24.85,386.28,114000.0,18.6,Source Verified,E,55881,2,0,MORTGAGE,11.6439537273766,1,0,0 221 | w,credit_card,19.03,293.37,66000.0,15.82,Source Verified,D,31377,0,1,RENT,11.0974100210086,1,0,1 222 | w,debt_consolidation,10.91,343.31,100000.0,9.02,Not Verified,B,19569,0,0,RENT,11.5129254649702,0,1,0 223 | w,debt_consolidation,13.59,339.79,95000.0,10.46,Not Verified,C,5940,0,3,RENT,11.4616321705827,0,0,1 224 | w,debt_consolidation,7.21,154.87,69600.0,8.29,Not Verified,A,4495,5,0,MORTGAGE,11.150519846322501,0,1,0 225 | f,debt_consolidation,14.08,342.17,40000.0,10.92,Source Verified,C,4541,1,0,MORTGAGE,10.5966347330961,1,0,1 226 | w,debt_consolidation,15.05,777.06,130000.0,29.65,Verified,C,30210,10,0,MORTGAGE,11.775289729437699,1,0,0 227 | f,debt_consolidation,10.91,1307.85,87000.0,12.26,Not Verified,B,45079,2,1,OWN,11.3736633976367,0,1,0 228 | w,debt_consolidation,25.82,835.36,57000.0,22.06,Verified,E,3263,1,0,RENT,10.950806546816699,1,0,0 229 | w,debt_consolidation,18.06,635.66,79000.0,33.16,Not Verified,D,22290,0,0,RENT,11.277203131449198,0,0,0 230 | w,debt_consolidation,11.99,99.63,18000.0,17.0,Source Verified,B,5473,1,0,MORTGAGE,9.7981270368783,1,1,0 231 | f,credit_card,23.88,861.74,53320.0,12.99,Verified,E,4563,6,0,RENT,10.8840667742926,1,0,1 232 | w,debt_consolidation,14.08,109.5,42500.0,19.12,Verified,C,16916,0,0,MORTGAGE,10.657259354912501,1,0,0 233 | w,credit_card,14.08,372.96,45000.0,34.01,Not Verified,C,13807,4,0,MORTGAGE,10.714417768752499,0,0,0 234 | w,debt_consolidation,20.0,503.39,45000.0,27.36,Verified,D,12392,1,0,RENT,10.714417768752499,1,0,0 235 | w,debt_consolidation,17.09,1070.93,35000.0,0.0,Verified,D,0,3,0,RENT,10.4631033404715,1,0,1 236 | w,debt_consolidation,25.82,895.02,160000.0,18.51,Verified,E,53496,5,0,MORTGAGE,11.982929094216,1,0,0 237 | w,debt_consolidation,10.91,130.79,49000.0,5.12,Not Verified,B,2016,5,0,MORTGAGE,10.799575577092801,0,1,1 238 | f,home_improvement,10.42,389.58,98000.0,7.38,Not Verified,B,11021,6,2,MORTGAGE,11.4927227576527,0,1,1 239 | w,debt_consolidation,20.0,529.88,71000.0,26.4,Verified,D,32168,2,0,MORTGAGE,11.1704351560235,1,0,1 240 | w,credit_card,9.93,530.32,23500.0,0.0,Verified,B,0,0,0,MORTGAGE,10.0647557001323,1,1,0 241 | f,debt_consolidation,7.35,295.64,53000.0,19.88,Not Verified,A,21341,1,0,MORTGAGE,10.878047192534302,0,1,0 242 | w,debt_consolidation,19.03,249.37,42000.0,13.71,Not Verified,D,670,5,0,RENT,10.6454248972655,0,0,1 243 | w,debt_consolidation,20.0,222.99,120000.0,4.74,Not Verified,D,7130,0,0,OWN,11.6952470217642,0,0,0 244 | w,home_improvement,17.09,853.18,64800.0,37.56,Not Verified,D,211088,2,0,MORTGAGE,11.0790608823404,0,0,1 245 | f,debt_consolidation,10.42,571.38,186000.0,9.54,Not Verified,B,3647,3,2,MORTGAGE,12.133501952695301,0,1,1 246 | w,home_improvement,11.99,182.66,56000.0,22.27,Verified,B,9475,0,0,MORTGAGE,10.9331069697173,1,1,1 247 | w,credit_card,6.08,304.59,50000.0,20.02,Source Verified,A,4512,1,0,MORTGAGE,10.819778284410301,1,1,0 248 | w,debt_consolidation,17.09,214.19,36000.0,8.43,Not Verified,D,6203,4,0,OWN,10.4912742174382,0,0,0 249 | w,debt_consolidation,13.59,806.97,115000.0,11.77,Source Verified,C,3959,3,0,RENT,11.6526874073454,1,0,0 250 | w,credit_card,12.62,201.07,22000.0,23.79,Not Verified,C,10291,3,0,MORTGAGE,9.99879773234045,0,0,1 251 | f,debt_consolidation,9.44,224.04,40000.0,6.0,Source Verified,B,5714,4,0,MORTGAGE,10.5966347330961,1,1,0 252 | w,credit_card,21.45,327.69,85000.0,3.77,Not Verified,D,8540,1,0,RENT,11.3504065354725,0,0,0 253 | f,major_purchase,19.03,440.06,125582.0,12.3,Verified,D,8328,8,0,RENT,11.7407142106442,1,0,1 254 | w,credit_card,16.02,510.91,70000.0,23.8,Source Verified,C,30629,2,0,RENT,11.156250521031499,1,0,0 255 | w,credit_card,17.09,285.59,27000.0,17.07,Source Verified,D,10411,4,0,MORTGAGE,10.2035921449865,1,0,0 256 | w,debt_consolidation,18.06,635.66,28000.0,6.99,Source Verified,D,3320,4,1,MORTGAGE,10.239959789157302,1,0,1 257 | f,debt_consolidation,13.59,1019.37,69200.0,13.37,Source Verified,C,21107,5,0,MORTGAGE,11.144756141605802,1,0,1 258 | w,home_improvement,9.44,734.04,176000.0,1.56,Not Verified,B,6788,2,0,MORTGAGE,12.0782392740203,0,1,0 259 | w,credit_card,11.99,332.1,50000.0,12.48,Source Verified,B,5950,0,0,MORTGAGE,10.819778284410301,1,1,1 260 | w,credit_card,7.97,626.46,145000.0,13.47,Source Verified,A,11401,2,0,RENT,11.8844890214027,1,1,0 261 | w,debt_consolidation,20.0,148.66,68000.0,25.38,Not Verified,D,20711,4,0,MORTGAGE,11.127262984158198,0,0,1 262 | w,debt_consolidation,16.02,330.88,65000.0,21.68,Source Verified,C,14078,9,0,RENT,11.082142548877801,1,0,0 263 | w,credit_card,11.99,289.12,110000.0,5.24,Not Verified,B,7021,3,1,MORTGAGE,11.608235644774599,0,1,0 264 | w,debt_consolidation,15.05,238.17,100000.0,5.04,Source Verified,C,2934,0,1,MORTGAGE,11.5129254649702,1,0,0 265 | w,credit_card,13.59,368.9,45000.0,22.56,Source Verified,C,11046,0,0,MORTGAGE,10.714417768752499,1,0,0 266 | w,debt_consolidation,22.91,678.85,120000.0,13.45,Verified,E,6887,3,0,OWN,11.6952470217642,1,0,0 267 | w,debt_consolidation,18.06,347.36,48000.0,36.81,Verified,D,5212,6,0,OWN,10.778956289889999,1,0,1 268 | w,debt_consolidation,30.84,1150.5,280000.0,11.63,Verified,G,95357,3,0,RENT,12.5425448821514,1,0,1 269 | f,debt_consolidation,13.59,271.84,65000.0,5.61,Not Verified,C,10987,1,0,RENT,11.082142548877801,0,0,0 270 | w,home_improvement,22.91,579.95,170000.0,11.03,Not Verified,E,4251,8,1,MORTGAGE,12.0435537160324,0,0,0 271 | w,debt_consolidation,18.06,542.74,65000.0,11.34,Source Verified,D,1928,1,0,MORTGAGE,11.082142548877801,1,0,1 272 | w,debt_consolidation,28.72,1010.3,25000.0,65.58,Verified,F,23453,0,0,OWN,10.1266311038503,1,0,1 273 | w,credit_card,16.02,351.67,82000.0,18.19,Verified,C,35507,2,0,MORTGAGE,11.3144745262464,1,0,0 274 | w,debt_consolidation,10.42,195.61,105000.0,11.02,Source Verified,B,3497,1,0,MORTGAGE,11.5617156291397,1,1,0 275 | w,credit_card,7.97,798.73,45000.0,34.89,Source Verified,A,42338,0,0,MORTGAGE,10.714417768752499,1,1,1 276 | f,credit_card,13.59,570.85,14500.0,72.1,Verified,C,26585,0,0,MORTGAGE,9.58190392840867,1,0,1 277 | w,credit_card,15.05,138.76,68000.0,12.85,Source Verified,C,17047,1,0,RENT,11.127262984158198,1,0,1 278 | w,debt_consolidation,19.03,415.32,120000.0,7.57,Source Verified,D,4947,2,0,MORTGAGE,11.6952470217642,1,0,1 279 | w,debt_consolidation,17.09,622.53,95760.11,19.29,Source Verified,D,40947,0,0,RENT,11.4696014889363,1,0,0 280 | w,debt_consolidation,10.42,973.95,100000.0,17.53,Not Verified,B,13729,3,1,RENT,11.5129254649702,0,1,1 281 | w,credit_card,12.62,676.78,62000.0,18.23,Source Verified,C,10373,7,0,RENT,11.0348896640272,1,0,1 282 | f,debt_consolidation,12.62,335.12,75000.0,20.05,Source Verified,C,12974,0,1,RENT,11.2252433925184,1,0,1 283 | f,debt_consolidation,9.93,547.99,72000.0,27.9,Not Verified,B,14130,7,0,RENT,11.1844213979982,0,1,1 284 | w,debt_consolidation,7.21,145.58,44584.0,13.59,Not Verified,A,3764,1,0,MORTGAGE,10.7051303292494,0,1,0 285 | w,debt_consolidation,7.21,247.79,100000.0,20.45,Not Verified,A,19556,1,0,RENT,11.5129254649702,0,1,1 286 | w,debt_consolidation,9.93,212.13,175000.0,12.78,Not Verified,B,27936,4,0,MORTGAGE,12.072541252905701,0,1,0 287 | w,debt_consolidation,13.59,688.08,31600.0,90.09,Verified,C,39108,1,0,RENT,10.360912399575,1,0,1 288 | f,credit_card,20.0,743.28,48000.0,30.3,Verified,D,5592,5,0,RENT,10.778956289889999,1,0,1 289 | w,debt_consolidation,7.35,620.75,90000.0,21.27,Not Verified,A,15762,0,0,MORTGAGE,11.4075649493124,0,1,1 290 | f,debt_consolidation,9.44,480.08,50000.0,19.35,Not Verified,B,5457,1,1,RENT,10.819778284410301,0,1,1 291 | f,debt_consolidation,13.59,305.81,87500.0,15.33,Not Verified,C,14489,2,0,RENT,11.3793940723457,0,0,1 292 | w,debt_consolidation,14.08,155.69,60000.0,32.16,Verified,C,3104,2,0,MORTGAGE,11.002099841204199,1,0,0 293 | w,major_purchase,9.93,424.26,194000.0,0.88,Verified,B,934,2,0,OWN,12.175613438045499,1,1,1 294 | f,debt_consolidation,30.75,1177.68,190000.0,16.52,Verified,F,13316,3,0,RENT,12.154779351142599,1,0,0 295 | w,debt_consolidation,9.93,464.18,42000.0,14.91,Not Verified,B,9309,1,0,OWN,10.6454248972655,0,1,0 296 | w,credit_card,9.93,773.63,120000.0,20.08,Source Verified,B,27672,1,0,RENT,11.6952470217642,1,1,1 297 | w,debt_consolidation,11.99,332.1,35000.0,25.03,Source Verified,B,7366,0,0,RENT,10.4631033404715,1,1,1 298 | w,home_improvement,12.62,884.7,186000.0,19.01,Not Verified,C,54349,5,0,OWN,12.133501952695301,0,0,1 299 | w,debt_consolidation,9.44,256.04,38000.0,24.68,Verified,B,7087,3,1,MORTGAGE,10.5453414387085,1,1,1 300 | w,home_improvement,7.97,156.62,55000.0,11.85,Not Verified,A,7357,1,0,MORTGAGE,10.9150884642146,0,1,0 301 | w,debt_consolidation,6.72,550.75,65000.0,28.38,Verified,A,58078,2,1,MORTGAGE,11.082142548877801,1,1,1 302 | w,debt_consolidation,16.02,291.95,93500.0,17.44,Verified,C,7811,1,0,RENT,11.445716715276802,1,0,1 303 | w,debt_consolidation,21.45,273.08,36500.0,42.95,Verified,D,11052,5,0,MORTGAGE,10.5050675395706,1,0,0 304 | w,credit_card,6.08,487.34,66400.0,20.78,Verified,A,2678,3,0,RENT,11.1034523354645,1,1,0 305 | w,debt_consolidation,7.35,148.98,60000.0,28.14,Source Verified,A,12433,4,0,MORTGAGE,11.002099841204199,1,1,1 306 | w,home_improvement,7.97,78.31,80000.0,19.88,Not Verified,A,14026,1,0,MORTGAGE,11.289781913656,0,1,0 307 | w,debt_consolidation,14.08,85.55,37000.0,3.96,Source Verified,C,1002,2,0,RENT,10.518673191626402,1,0,0 308 | w,home_improvement,12.62,902.37,57000.0,4.48,Verified,C,6471,3,1,MORTGAGE,10.950806546816699,1,0,0 309 | w,small_business,12.62,335.12,80000.0,7.41,Not Verified,C,9643,4,0,RENT,11.289781913656,0,0,1 310 | f,debt_consolidation,20.0,148.66,84750.0,17.29,Source Verified,D,1344,4,1,OWN,11.347461025242698,1,0,0 311 | w,credit_card,12.62,338.39,60000.0,26.46,Not Verified,C,11919,1,1,RENT,11.002099841204199,0,0,1 312 | w,debt_consolidation,12.62,204.42,72000.0,14.62,Not Verified,C,5519,3,0,MORTGAGE,11.1844213979982,0,0,0 313 | f,debt_consolidation,7.35,372.45,100000.0,16.57,Not Verified,A,15188,2,0,MORTGAGE,11.5129254649702,0,1,0 314 | -------------------------------------------------------------------------------- /data_input/monthly_cust.csv: -------------------------------------------------------------------------------- 1 | period,avg_tenure,total_revenue,avg_ceexp,total_complaint,total_cust,total_cust_new,total_cust_churned 2 | 2018-01-01,46,8817393,8.71976755836089,818,19962,280,5024 3 | 2018-02-01,47,8829352,8.79233931615362,801,19946,469,4821 4 | 2018-03-01,47,9237268,8.75996426266938,813,20147,740,4785 5 | 2018-04-01,47,9184947,8.80812137438643,779,20169,744,4552 6 | 2018-05-01,47,9367982,8.80464406946426,748,20327,837,4424 7 | 2018-06-01,46,9372376,8.85738123282293,710,20376,847,4182 8 | 2018-07-01,46,9508697,8.89939588814187,688,20526,804,3979 9 | 2018-08-01,46,9765945,8.96497000193536,681,20668,926,3758 10 | 2018-09-01,46,9826839,9.03611499012857,647,20767,887,3563 11 | 2018-10-01,45,10030610,9.17166587565259,641,21070,838,3425 12 | 2018-11-01,45,10095840,9.25661636816622,623,21273,935,3284 13 | 2018-12-01,44,10120232,9.35417252037089,602,21354,871,3107 14 | 2019-01-01,44,10130985,9.48909642773757,604,21415,860,2875 15 | 2019-02-01,44,10029891,9.61209384601061,574,21482,704,2708 16 | 2019-03-01,44,10486906,9.76135995182732,573,21589,953,2517 17 | 2019-04-01,44,11114508,9.93274393630809,547,21604,866,2269 18 | 2019-05-01,44,10901146,10.1259378596087,550,21725,927,2042 19 | 2019-06-01,44,11391631,10.3561203890622,540,21796,945,1776 20 | 2019-07-01,44,12320887,10.6035592060233,546,21915,883,1570 21 | 2019-08-01,44,12564159,10.8587913340935,499,21925,919,1296 22 | 2019-09-01,44,13129472,11.120774263904,460,22008,947,1058 23 | 2019-10-01,44,12976005,11.3880441196496,432,22031,836,803 24 | 2019-11-01,44,13037597,11.694359998185,411,22039,908,517 25 | 2019-12-01,44,14078593,11.991211379904,416,22074,830,245 26 | -------------------------------------------------------------------------------- /data_input/normal_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onlyphantom/dataanalysis/5af1f2e8d3952b569e519f9e9c49e53b7f045c4a/data_input/normal_plot.png -------------------------------------------------------------------------------- /data_input/sample.csv: -------------------------------------------------------------------------------- 1 | female,read,write,math,hon,femalexmath 2 | 0,57,52,41,0,0 3 | 1,68,59,53,0,53 4 | 0,44,33,54,0,0 5 | 0,63,44,47,0,0 6 | 0,47,52,57,0,0 7 | 0,44,52,51,0,0 8 | 0,50,59,42,0,0 9 | 0,34,46,45,0,0 10 | 0,63,57,54,0,0 11 | 0,57,55,52,0,0 12 | 0,60,46,51,0,0 13 | 0,57,65,51,1,0 14 | 0,73,60,71,0,0 15 | 0,54,63,57,1,0 16 | 0,45,57,50,0,0 17 | 0,42,49,43,0,0 18 | 0,47,52,51,0,0 19 | 0,57,57,60,0,0 20 | 0,68,65,62,1,0 21 | 0,55,39,57,0,0 22 | 0,63,49,35,0,0 23 | 0,63,63,75,1,0 24 | 0,50,40,45,0,0 25 | 0,60,52,57,0,0 26 | 0,37,44,45,0,0 27 | 0,34,37,46,0,0 28 | 0,65,65,66,1,0 29 | 0,47,57,57,0,0 30 | 0,44,38,49,0,0 31 | 0,52,44,49,0,0 32 | 0,42,31,57,0,0 33 | 0,76,52,64,0,0 34 | 0,65,67,63,1,0 35 | 0,42,41,57,0,0 36 | 0,52,59,50,0,0 37 | 0,60,65,58,1,0 38 | 0,68,54,75,0,0 39 | 0,65,62,68,1,0 40 | 0,47,31,44,0,0 41 | 0,39,31,40,0,0 42 | 0,47,47,41,0,0 43 | 0,55,59,62,0,0 44 | 0,52,54,57,0,0 45 | 0,42,41,43,0,0 46 | 0,65,65,48,1,0 47 | 0,55,59,63,0,0 48 | 0,50,40,39,0,0 49 | 0,65,59,70,0,0 50 | 0,47,59,63,0,0 51 | 0,57,54,59,0,0 52 | 0,53,61,61,1,0 53 | 0,39,33,38,0,0 54 | 0,44,44,61,0,0 55 | 0,63,59,49,0,0 56 | 0,73,62,73,1,0 57 | 0,39,39,44,0,0 58 | 0,37,37,42,0,0 59 | 0,42,39,39,0,0 60 | 0,63,57,55,0,0 61 | 0,48,49,52,0,0 62 | 0,50,46,45,0,0 63 | 0,47,62,61,1,0 64 | 0,44,44,39,0,0 65 | 0,34,33,41,0,0 66 | 0,50,42,50,0,0 67 | 0,44,41,40,0,0 68 | 0,60,54,60,0,0 69 | 0,47,39,47,0,0 70 | 0,63,43,59,0,0 71 | 0,50,33,49,0,0 72 | 0,44,44,46,0,0 73 | 0,60,54,58,0,0 74 | 0,73,67,71,1,0 75 | 0,68,59,58,0,0 76 | 0,55,45,46,0,0 77 | 0,47,40,43,0,0 78 | 0,55,61,54,1,0 79 | 0,68,59,56,0,0 80 | 0,31,36,46,0,0 81 | 0,47,41,54,0,0 82 | 0,63,59,57,0,0 83 | 0,36,49,54,0,0 84 | 0,68,59,71,0,0 85 | 0,63,65,48,1,0 86 | 0,55,41,40,0,0 87 | 0,55,62,64,1,0 88 | 0,52,41,51,0,0 89 | 0,34,49,39,0,0 90 | 0,50,31,40,0,0 91 | 0,55,49,61,0,0 92 | 0,52,62,66,1,0 93 | 0,63,49,49,0,0 94 | 1,68,62,65,1,65 95 | 1,39,44,52,0,52 96 | 1,44,44,46,0,46 97 | 1,50,62,61,1,61 98 | 1,71,65,72,1,72 99 | 1,63,65,71,1,71 100 | 1,34,44,40,0,40 101 | 1,63,63,69,1,69 102 | 1,68,60,64,0,64 103 | 1,47,59,56,0,56 104 | 1,47,46,49,0,49 105 | 1,63,52,54,0,54 106 | 1,52,59,53,0,53 107 | 1,55,54,66,0,66 108 | 1,60,62,67,1,67 109 | 1,35,35,40,0,40 110 | 1,47,54,46,0,46 111 | 1,71,65,69,1,69 112 | 1,57,52,40,0,40 113 | 1,44,50,41,0,41 114 | 1,65,59,57,0,57 115 | 1,68,65,58,1,58 116 | 1,73,61,57,1,57 117 | 1,36,44,37,0,37 118 | 1,43,54,55,0,55 119 | 1,73,67,62,1,62 120 | 1,52,57,64,0,64 121 | 1,41,47,40,0,40 122 | 1,60,54,50,0,50 123 | 1,50,52,46,0,46 124 | 1,50,52,53,0,53 125 | 1,47,46,52,0,52 126 | 1,47,62,45,1,45 127 | 1,55,57,56,0,56 128 | 1,50,41,45,0,45 129 | 1,39,53,54,0,54 130 | 1,50,49,56,0,56 131 | 1,34,35,41,0,41 132 | 1,57,59,54,0,54 133 | 1,57,65,72,1,72 134 | 1,68,62,56,1,56 135 | 1,42,54,47,0,47 136 | 1,61,59,49,0,49 137 | 1,76,63,60,1,60 138 | 1,47,59,54,0,54 139 | 1,46,52,55,0,55 140 | 1,39,41,33,0,33 141 | 1,52,49,49,0,49 142 | 1,28,46,43,0,43 143 | 1,42,54,50,0,50 144 | 1,47,42,52,0,52 145 | 1,47,57,48,0,48 146 | 1,52,59,58,0,58 147 | 1,47,52,43,0,43 148 | 1,50,62,41,1,41 149 | 1,44,52,43,0,43 150 | 1,47,41,46,0,46 151 | 1,45,55,44,0,44 152 | 1,47,37,43,0,43 153 | 1,65,54,61,0,61 154 | 1,43,57,40,0,40 155 | 1,47,54,49,0,49 156 | 1,57,62,56,1,56 157 | 1,68,59,61,0,61 158 | 1,52,55,50,0,50 159 | 1,42,57,51,0,51 160 | 1,42,39,42,0,42 161 | 1,66,67,67,1,67 162 | 1,47,62,53,1,53 163 | 1,57,50,50,0,50 164 | 1,47,61,51,1,51 165 | 1,57,62,72,1,72 166 | 1,52,59,48,0,48 167 | 1,44,44,40,0,40 168 | 1,50,59,53,0,53 169 | 1,39,54,39,0,39 170 | 1,57,62,63,1,63 171 | 1,57,60,51,0,51 172 | 1,42,57,45,0,45 173 | 1,47,46,39,0,39 174 | 1,42,36,42,0,42 175 | 1,60,59,62,0,62 176 | 1,44,49,44,0,44 177 | 1,63,60,65,0,65 178 | 1,65,67,63,1,63 179 | 1,39,54,54,0,54 180 | 1,50,52,45,0,45 181 | 1,52,65,60,1,60 182 | 1,60,62,49,1,49 183 | 1,44,49,48,0,48 184 | 1,52,67,57,1,57 185 | 1,55,65,55,1,55 186 | 1,50,67,66,1,66 187 | 1,65,65,64,1,64 188 | 1,52,54,55,0,55 189 | 1,47,44,42,0,42 190 | 1,63,62,56,1,56 191 | 1,50,46,53,0,53 192 | 1,42,54,41,0,41 193 | 1,36,57,42,0,42 194 | 1,50,52,53,0,53 195 | 1,41,59,42,0,42 196 | 1,47,65,60,1,60 197 | 1,55,59,52,0,52 198 | 1,42,46,38,0,38 199 | 1,57,41,57,0,57 200 | 1,55,62,58,1,58 201 | 1,63,65,65,1,65 202 | -------------------------------------------------------------------------------- /data_input/wholesale.csv: -------------------------------------------------------------------------------- 1 | Channel,Region,Fresh,Milk,Grocery,Frozen,Detergents_Paper,Delicassen 2 | 2,3,12669,9656,7561,214,2674,1338 3 | 2,3,7057,9810,9568,1762,3293,1776 4 | 2,3,6353,8808,7684,2405,3516,7844 5 | 1,3,13265,1196,4221,6404,507,1788 6 | 2,3,22615,5410,7198,3915,1777,5185 7 | 2,3,9413,8259,5126,666,1795,1451 8 | 2,3,12126,3199,6975,480,3140,545 9 | 2,3,7579,4956,9426,1669,3321,2566 10 | 1,3,5963,3648,6192,425,1716,750 11 | 2,3,6006,11093,18881,1159,7425,2098 12 | 2,3,3366,5403,12974,4400,5977,1744 13 | 2,3,13146,1124,4523,1420,549,497 14 | 2,3,31714,12319,11757,287,3881,2931 15 | 2,3,21217,6208,14982,3095,6707,602 16 | 2,3,24653,9465,12091,294,5058,2168 17 | 1,3,10253,1114,3821,397,964,412 18 | 2,3,1020,8816,12121,134,4508,1080 19 | 1,3,5876,6157,2933,839,370,4478 20 | 2,3,18601,6327,10099,2205,2767,3181 21 | 1,3,7780,2495,9464,669,2518,501 22 | 2,3,17546,4519,4602,1066,2259,2124 23 | 1,3,5567,871,2010,3383,375,569 24 | 1,3,31276,1917,4469,9408,2381,4334 25 | 2,3,26373,36423,22019,5154,4337,16523 26 | 2,3,22647,9776,13792,2915,4482,5778 27 | 2,3,16165,4230,7595,201,4003,57 28 | 1,3,9898,961,2861,3151,242,833 29 | 1,3,14276,803,3045,485,100,518 30 | 2,3,4113,20484,25957,1158,8604,5206 31 | 1,3,43088,2100,2609,1200,1107,823 32 | 1,3,18815,3610,11107,1148,2134,2963 33 | 1,3,2612,4339,3133,2088,820,985 34 | 1,3,21632,1318,2886,266,918,405 35 | 1,3,29729,4786,7326,6130,361,1083 36 | 1,3,1502,1979,2262,425,483,395 37 | 2,3,688,5491,11091,833,4239,436 38 | 1,3,29955,4362,5428,1729,862,4626 39 | 2,3,15168,10556,12477,1920,6506,714 40 | 2,3,4591,15729,16709,33,6956,433 41 | 1,3,56159,555,902,10002,212,2916 42 | 1,3,24025,4332,4757,9510,1145,5864 43 | 1,3,19176,3065,5956,2033,2575,2802 44 | 2,3,10850,7555,14961,188,6899,46 45 | 2,3,630,11095,23998,787,9529,72 46 | 2,3,9670,7027,10471,541,4618,65 47 | 2,3,5181,22044,21531,1740,7353,4985 48 | 2,3,3103,14069,21955,1668,6792,1452 49 | 2,3,44466,54259,55571,7782,24171,6465 50 | 2,3,11519,6152,10868,584,5121,1476 51 | 2,3,4967,21412,28921,1798,13583,1163 52 | 1,3,6269,1095,1980,3860,609,2162 53 | 1,3,3347,4051,6996,239,1538,301 54 | 2,3,40721,3916,5876,532,2587,1278 55 | 2,3,491,10473,11532,744,5611,224 56 | 1,3,27329,1449,1947,2436,204,1333 57 | 1,3,5264,3683,5005,1057,2024,1130 58 | 2,3,4098,29892,26866,2616,17740,1340 59 | 2,3,5417,9933,10487,38,7572,1282 60 | 1,3,13779,1970,1648,596,227,436 61 | 1,3,6137,5360,8040,129,3084,1603 62 | 2,3,8590,3045,7854,96,4095,225 63 | 2,3,35942,38369,59598,3254,26701,2017 64 | 2,3,7823,6245,6544,4154,4074,964 65 | 2,3,9396,11601,15775,2896,7677,1295 66 | 1,3,4760,1227,3250,3724,1247,1145 67 | 2,3,85,20959,45828,36,24231,1423 68 | 1,3,9,1534,7417,175,3468,27 69 | 2,3,19913,6759,13462,1256,5141,834 70 | 1,3,2446,7260,3993,5870,788,3095 71 | 1,3,8352,2820,1293,779,656,144 72 | 1,3,16705,2037,3202,10643,116,1365 73 | 1,3,18291,1266,21042,5373,4173,14472 74 | 1,3,4420,5139,2661,8872,1321,181 75 | 2,3,19899,5332,8713,8132,764,648 76 | 2,3,8190,6343,9794,1285,1901,1780 77 | 1,3,20398,1137,3,4407,3,975 78 | 1,3,717,3587,6532,7530,529,894 79 | 2,3,12205,12697,28540,869,12034,1009 80 | 1,3,10766,1175,2067,2096,301,167 81 | 1,3,1640,3259,3655,868,1202,1653 82 | 1,3,7005,829,3009,430,610,529 83 | 2,3,219,9540,14403,283,7818,156 84 | 2,3,10362,9232,11009,737,3537,2342 85 | 1,3,20874,1563,1783,2320,550,772 86 | 2,3,11867,3327,4814,1178,3837,120 87 | 2,3,16117,46197,92780,1026,40827,2944 88 | 2,3,22925,73498,32114,987,20070,903 89 | 1,3,43265,5025,8117,6312,1579,14351 90 | 1,3,7864,542,4042,9735,165,46 91 | 1,3,24904,3836,5330,3443,454,3178 92 | 1,3,11405,596,1638,3347,69,360 93 | 1,3,12754,2762,2530,8693,627,1117 94 | 2,3,9198,27472,32034,3232,18906,5130 95 | 1,3,11314,3090,2062,35009,71,2698 96 | 2,3,5626,12220,11323,206,5038,244 97 | 1,3,3,2920,6252,440,223,709 98 | 2,3,23,2616,8118,145,3874,217 99 | 1,3,403,254,610,774,54,63 100 | 1,3,503,112,778,895,56,132 101 | 1,3,9658,2182,1909,5639,215,323 102 | 2,3,11594,7779,12144,3252,8035,3029 103 | 2,3,1420,10810,16267,1593,6766,1838 104 | 2,3,2932,6459,7677,2561,4573,1386 105 | 1,3,56082,3504,8906,18028,1480,2498 106 | 1,3,14100,2132,3445,1336,1491,548 107 | 1,3,15587,1014,3970,910,139,1378 108 | 2,3,1454,6337,10704,133,6830,1831 109 | 2,3,8797,10646,14886,2471,8969,1438 110 | 2,3,1531,8397,6981,247,2505,1236 111 | 2,3,1406,16729,28986,673,836,3 112 | 1,3,11818,1648,1694,2276,169,1647 113 | 2,3,12579,11114,17569,805,6457,1519 114 | 1,3,19046,2770,2469,8853,483,2708 115 | 1,3,14438,2295,1733,3220,585,1561 116 | 1,3,18044,1080,2000,2555,118,1266 117 | 1,3,11134,793,2988,2715,276,610 118 | 1,3,11173,2521,3355,1517,310,222 119 | 1,3,6990,3880,5380,1647,319,1160 120 | 1,3,20049,1891,2362,5343,411,933 121 | 1,3,8258,2344,2147,3896,266,635 122 | 1,3,17160,1200,3412,2417,174,1136 123 | 1,3,4020,3234,1498,2395,264,255 124 | 1,3,12212,201,245,1991,25,860 125 | 2,3,11170,10769,8814,2194,1976,143 126 | 1,3,36050,1642,2961,4787,500,1621 127 | 1,3,76237,3473,7102,16538,778,918 128 | 1,3,19219,1840,1658,8195,349,483 129 | 2,3,21465,7243,10685,880,2386,2749 130 | 1,3,140,8847,3823,142,1062,3 131 | 1,3,42312,926,1510,1718,410,1819 132 | 1,3,7149,2428,699,6316,395,911 133 | 1,3,2101,589,314,346,70,310 134 | 1,3,14903,2032,2479,576,955,328 135 | 1,3,9434,1042,1235,436,256,396 136 | 1,3,7388,1882,2174,720,47,537 137 | 1,3,6300,1289,2591,1170,199,326 138 | 1,3,4625,8579,7030,4575,2447,1542 139 | 1,3,3087,8080,8282,661,721,36 140 | 1,3,13537,4257,5034,155,249,3271 141 | 1,3,5387,4979,3343,825,637,929 142 | 1,3,17623,4280,7305,2279,960,2616 143 | 1,3,30379,13252,5189,321,51,1450 144 | 1,3,37036,7152,8253,2995,20,3 145 | 1,3,10405,1596,1096,8425,399,318 146 | 1,3,18827,3677,1988,118,516,201 147 | 2,3,22039,8384,34792,42,12591,4430 148 | 1,3,7769,1936,2177,926,73,520 149 | 1,3,9203,3373,2707,1286,1082,526 150 | 1,3,5924,584,542,4052,283,434 151 | 1,3,31812,1433,1651,800,113,1440 152 | 1,3,16225,1825,1765,853,170,1067 153 | 1,3,1289,3328,2022,531,255,1774 154 | 1,3,18840,1371,3135,3001,352,184 155 | 1,3,3463,9250,2368,779,302,1627 156 | 1,3,622,55,137,75,7,8 157 | 2,3,1989,10690,19460,233,11577,2153 158 | 2,3,3830,5291,14855,317,6694,3182 159 | 1,3,17773,1366,2474,3378,811,418 160 | 2,3,2861,6570,9618,930,4004,1682 161 | 2,3,355,7704,14682,398,8077,303 162 | 2,3,1725,3651,12822,824,4424,2157 163 | 1,3,12434,540,283,1092,3,2233 164 | 1,3,15177,2024,3810,2665,232,610 165 | 2,3,5531,15726,26870,2367,13726,446 166 | 2,3,5224,7603,8584,2540,3674,238 167 | 2,3,15615,12653,19858,4425,7108,2379 168 | 2,3,4822,6721,9170,993,4973,3637 169 | 1,3,2926,3195,3268,405,1680,693 170 | 1,3,5809,735,803,1393,79,429 171 | 1,3,5414,717,2155,2399,69,750 172 | 2,3,260,8675,13430,1116,7015,323 173 | 2,3,200,25862,19816,651,8773,6250 174 | 1,3,955,5479,6536,333,2840,707 175 | 2,3,514,7677,19805,937,9836,716 176 | 1,3,286,1208,5241,2515,153,1442 177 | 2,3,2343,7845,11874,52,4196,1697 178 | 1,3,45640,6958,6536,7368,1532,230 179 | 1,3,12759,7330,4533,1752,20,2631 180 | 1,3,11002,7075,4945,1152,120,395 181 | 1,3,3157,4888,2500,4477,273,2165 182 | 1,3,12356,6036,8887,402,1382,2794 183 | 1,3,112151,29627,18148,16745,4948,8550 184 | 1,3,694,8533,10518,443,6907,156 185 | 1,3,36847,43950,20170,36534,239,47943 186 | 1,3,327,918,4710,74,334,11 187 | 1,3,8170,6448,1139,2181,58,247 188 | 1,3,3009,521,854,3470,949,727 189 | 1,3,2438,8002,9819,6269,3459,3 190 | 2,3,8040,7639,11687,2758,6839,404 191 | 2,3,834,11577,11522,275,4027,1856 192 | 1,3,16936,6250,1981,7332,118,64 193 | 1,3,13624,295,1381,890,43,84 194 | 1,3,5509,1461,2251,547,187,409 195 | 2,3,180,3485,20292,959,5618,666 196 | 1,3,7107,1012,2974,806,355,1142 197 | 1,3,17023,5139,5230,7888,330,1755 198 | 1,1,30624,7209,4897,18711,763,2876 199 | 2,1,2427,7097,10391,1127,4314,1468 200 | 1,1,11686,2154,6824,3527,592,697 201 | 1,1,9670,2280,2112,520,402,347 202 | 2,1,3067,13240,23127,3941,9959,731 203 | 2,1,4484,14399,24708,3549,14235,1681 204 | 1,1,25203,11487,9490,5065,284,6854 205 | 1,1,583,685,2216,469,954,18 206 | 1,1,1956,891,5226,1383,5,1328 207 | 2,1,1107,11711,23596,955,9265,710 208 | 1,1,6373,780,950,878,288,285 209 | 2,1,2541,4737,6089,2946,5316,120 210 | 1,1,1537,3748,5838,1859,3381,806 211 | 2,1,5550,12729,16767,864,12420,797 212 | 1,1,18567,1895,1393,1801,244,2100 213 | 2,1,12119,28326,39694,4736,19410,2870 214 | 1,1,7291,1012,2062,1291,240,1775 215 | 1,1,3317,6602,6861,1329,3961,1215 216 | 2,1,2362,6551,11364,913,5957,791 217 | 1,1,2806,10765,15538,1374,5828,2388 218 | 2,1,2532,16599,36486,179,13308,674 219 | 1,1,18044,1475,2046,2532,130,1158 220 | 2,1,18,7504,15205,1285,4797,6372 221 | 1,1,4155,367,1390,2306,86,130 222 | 1,1,14755,899,1382,1765,56,749 223 | 1,1,5396,7503,10646,91,4167,239 224 | 1,1,5041,1115,2856,7496,256,375 225 | 2,1,2790,2527,5265,5612,788,1360 226 | 1,1,7274,659,1499,784,70,659 227 | 1,1,12680,3243,4157,660,761,786 228 | 2,1,20782,5921,9212,1759,2568,1553 229 | 1,1,4042,2204,1563,2286,263,689 230 | 1,1,1869,577,572,950,4762,203 231 | 1,1,8656,2746,2501,6845,694,980 232 | 2,1,11072,5989,5615,8321,955,2137 233 | 1,1,2344,10678,3828,1439,1566,490 234 | 1,1,25962,1780,3838,638,284,834 235 | 1,1,964,4984,3316,937,409,7 236 | 1,1,15603,2703,3833,4260,325,2563 237 | 1,1,1838,6380,2824,1218,1216,295 238 | 1,1,8635,820,3047,2312,415,225 239 | 1,1,18692,3838,593,4634,28,1215 240 | 1,1,7363,475,585,1112,72,216 241 | 1,1,47493,2567,3779,5243,828,2253 242 | 1,1,22096,3575,7041,11422,343,2564 243 | 1,1,24929,1801,2475,2216,412,1047 244 | 1,1,18226,659,2914,3752,586,578 245 | 1,1,11210,3576,5119,561,1682,2398 246 | 1,1,6202,7775,10817,1183,3143,1970 247 | 2,1,3062,6154,13916,230,8933,2784 248 | 1,1,8885,2428,1777,1777,430,610 249 | 1,1,13569,346,489,2077,44,659 250 | 1,1,15671,5279,2406,559,562,572 251 | 1,1,8040,3795,2070,6340,918,291 252 | 1,1,3191,1993,1799,1730,234,710 253 | 2,1,6134,23133,33586,6746,18594,5121 254 | 1,1,6623,1860,4740,7683,205,1693 255 | 1,1,29526,7961,16966,432,363,1391 256 | 1,1,10379,17972,4748,4686,1547,3265 257 | 1,1,31614,489,1495,3242,111,615 258 | 1,1,11092,5008,5249,453,392,373 259 | 1,1,8475,1931,1883,5004,3593,987 260 | 1,1,56083,4563,2124,6422,730,3321 261 | 1,1,53205,4959,7336,3012,967,818 262 | 1,1,9193,4885,2157,327,780,548 263 | 1,1,7858,1110,1094,6818,49,287 264 | 1,1,23257,1372,1677,982,429,655 265 | 1,1,2153,1115,6684,4324,2894,411 266 | 2,1,1073,9679,15445,61,5980,1265 267 | 1,1,5909,23527,13699,10155,830,3636 268 | 2,1,572,9763,22182,2221,4882,2563 269 | 1,1,20893,1222,2576,3975,737,3628 270 | 2,1,11908,8053,19847,1069,6374,698 271 | 1,1,15218,258,1138,2516,333,204 272 | 1,1,4720,1032,975,5500,197,56 273 | 1,1,2083,5007,1563,1120,147,1550 274 | 1,1,514,8323,6869,529,93,1040 275 | 1,3,36817,3045,1493,4802,210,1824 276 | 1,3,894,1703,1841,744,759,1153 277 | 1,3,680,1610,223,862,96,379 278 | 1,3,27901,3749,6964,4479,603,2503 279 | 1,3,9061,829,683,16919,621,139 280 | 1,3,11693,2317,2543,5845,274,1409 281 | 2,3,17360,6200,9694,1293,3620,1721 282 | 1,3,3366,2884,2431,977,167,1104 283 | 2,3,12238,7108,6235,1093,2328,2079 284 | 1,3,49063,3965,4252,5970,1041,1404 285 | 1,3,25767,3613,2013,10303,314,1384 286 | 1,3,68951,4411,12609,8692,751,2406 287 | 1,3,40254,640,3600,1042,436,18 288 | 1,3,7149,2247,1242,1619,1226,128 289 | 1,3,15354,2102,2828,8366,386,1027 290 | 1,3,16260,594,1296,848,445,258 291 | 1,3,42786,286,471,1388,32,22 292 | 1,3,2708,2160,2642,502,965,1522 293 | 1,3,6022,3354,3261,2507,212,686 294 | 1,3,2838,3086,4329,3838,825,1060 295 | 2,2,3996,11103,12469,902,5952,741 296 | 1,2,21273,2013,6550,909,811,1854 297 | 2,2,7588,1897,5234,417,2208,254 298 | 1,2,19087,1304,3643,3045,710,898 299 | 2,2,8090,3199,6986,1455,3712,531 300 | 2,2,6758,4560,9965,934,4538,1037 301 | 1,2,444,879,2060,264,290,259 302 | 2,2,16448,6243,6360,824,2662,2005 303 | 2,2,5283,13316,20399,1809,8752,172 304 | 2,2,2886,5302,9785,364,6236,555 305 | 2,2,2599,3688,13829,492,10069,59 306 | 2,2,161,7460,24773,617,11783,2410 307 | 2,2,243,12939,8852,799,3909,211 308 | 2,2,6468,12867,21570,1840,7558,1543 309 | 1,2,17327,2374,2842,1149,351,925 310 | 1,2,6987,1020,3007,416,257,656 311 | 2,2,918,20655,13567,1465,6846,806 312 | 1,2,7034,1492,2405,12569,299,1117 313 | 1,2,29635,2335,8280,3046,371,117 314 | 2,2,2137,3737,19172,1274,17120,142 315 | 1,2,9784,925,2405,4447,183,297 316 | 1,2,10617,1795,7647,1483,857,1233 317 | 2,2,1479,14982,11924,662,3891,3508 318 | 1,2,7127,1375,2201,2679,83,1059 319 | 1,2,1182,3088,6114,978,821,1637 320 | 1,2,11800,2713,3558,2121,706,51 321 | 2,2,9759,25071,17645,1128,12408,1625 322 | 1,2,1774,3696,2280,514,275,834 323 | 1,2,9155,1897,5167,2714,228,1113 324 | 1,2,15881,713,3315,3703,1470,229 325 | 1,2,13360,944,11593,915,1679,573 326 | 1,2,25977,3587,2464,2369,140,1092 327 | 1,2,32717,16784,13626,60869,1272,5609 328 | 1,2,4414,1610,1431,3498,387,834 329 | 1,2,542,899,1664,414,88,522 330 | 1,2,16933,2209,3389,7849,210,1534 331 | 1,2,5113,1486,4583,5127,492,739 332 | 1,2,9790,1786,5109,3570,182,1043 333 | 2,2,11223,14881,26839,1234,9606,1102 334 | 1,2,22321,3216,1447,2208,178,2602 335 | 2,2,8565,4980,67298,131,38102,1215 336 | 2,2,16823,928,2743,11559,332,3486 337 | 2,2,27082,6817,10790,1365,4111,2139 338 | 1,2,13970,1511,1330,650,146,778 339 | 1,2,9351,1347,2611,8170,442,868 340 | 1,2,3,333,7021,15601,15,550 341 | 1,2,2617,1188,5332,9584,573,1942 342 | 2,3,381,4025,9670,388,7271,1371 343 | 2,3,2320,5763,11238,767,5162,2158 344 | 1,3,255,5758,5923,349,4595,1328 345 | 2,3,1689,6964,26316,1456,15469,37 346 | 1,3,3043,1172,1763,2234,217,379 347 | 1,3,1198,2602,8335,402,3843,303 348 | 2,3,2771,6939,15541,2693,6600,1115 349 | 2,3,27380,7184,12311,2809,4621,1022 350 | 1,3,3428,2380,2028,1341,1184,665 351 | 2,3,5981,14641,20521,2005,12218,445 352 | 1,3,3521,1099,1997,1796,173,995 353 | 2,3,1210,10044,22294,1741,12638,3137 354 | 1,3,608,1106,1533,830,90,195 355 | 2,3,117,6264,21203,228,8682,1111 356 | 1,3,14039,7393,2548,6386,1333,2341 357 | 1,3,190,727,2012,245,184,127 358 | 1,3,22686,134,218,3157,9,548 359 | 2,3,37,1275,22272,137,6747,110 360 | 1,3,759,18664,1660,6114,536,4100 361 | 1,3,796,5878,2109,340,232,776 362 | 1,3,19746,2872,2006,2601,468,503 363 | 1,3,4734,607,864,1206,159,405 364 | 1,3,2121,1601,2453,560,179,712 365 | 1,3,4627,997,4438,191,1335,314 366 | 1,3,2615,873,1524,1103,514,468 367 | 2,3,4692,6128,8025,1619,4515,3105 368 | 1,3,9561,2217,1664,1173,222,447 369 | 1,3,3477,894,534,1457,252,342 370 | 1,3,22335,1196,2406,2046,101,558 371 | 1,3,6211,337,683,1089,41,296 372 | 2,3,39679,3944,4955,1364,523,2235 373 | 1,3,20105,1887,1939,8164,716,790 374 | 1,3,3884,3801,1641,876,397,4829 375 | 2,3,15076,6257,7398,1504,1916,3113 376 | 1,3,6338,2256,1668,1492,311,686 377 | 1,3,5841,1450,1162,597,476,70 378 | 2,3,3136,8630,13586,5641,4666,1426 379 | 1,3,38793,3154,2648,1034,96,1242 380 | 1,3,3225,3294,1902,282,68,1114 381 | 2,3,4048,5164,10391,130,813,179 382 | 1,3,28257,944,2146,3881,600,270 383 | 1,3,17770,4591,1617,9927,246,532 384 | 1,3,34454,7435,8469,2540,1711,2893 385 | 1,3,1821,1364,3450,4006,397,361 386 | 1,3,10683,21858,15400,3635,282,5120 387 | 1,3,11635,922,1614,2583,192,1068 388 | 1,3,1206,3620,2857,1945,353,967 389 | 1,3,20918,1916,1573,1960,231,961 390 | 1,3,9785,848,1172,1677,200,406 391 | 1,3,9385,1530,1422,3019,227,684 392 | 1,3,3352,1181,1328,5502,311,1000 393 | 1,3,2647,2761,2313,907,95,1827 394 | 1,3,518,4180,3600,659,122,654 395 | 1,3,23632,6730,3842,8620,385,819 396 | 1,3,12377,865,3204,1398,149,452 397 | 1,3,9602,1316,1263,2921,841,290 398 | 2,3,4515,11991,9345,2644,3378,2213 399 | 1,3,11535,1666,1428,6838,64,743 400 | 1,3,11442,1032,582,5390,74,247 401 | 1,3,9612,577,935,1601,469,375 402 | 1,3,4446,906,1238,3576,153,1014 403 | 1,3,27167,2801,2128,13223,92,1902 404 | 1,3,26539,4753,5091,220,10,340 405 | 1,3,25606,11006,4604,127,632,288 406 | 1,3,18073,4613,3444,4324,914,715 407 | 1,3,6884,1046,1167,2069,593,378 408 | 1,3,25066,5010,5026,9806,1092,960 409 | 2,3,7362,12844,18683,2854,7883,553 410 | 2,3,8257,3880,6407,1646,2730,344 411 | 1,3,8708,3634,6100,2349,2123,5137 412 | 1,3,6633,2096,4563,1389,1860,1892 413 | 1,3,2126,3289,3281,1535,235,4365 414 | 1,3,97,3605,12400,98,2970,62 415 | 1,3,4983,4859,6633,17866,912,2435 416 | 1,3,5969,1990,3417,5679,1135,290 417 | 2,3,7842,6046,8552,1691,3540,1874 418 | 2,3,4389,10940,10908,848,6728,993 419 | 1,3,5065,5499,11055,364,3485,1063 420 | 2,3,660,8494,18622,133,6740,776 421 | 1,3,8861,3783,2223,633,1580,1521 422 | 1,3,4456,5266,13227,25,6818,1393 423 | 2,3,17063,4847,9053,1031,3415,1784 424 | 1,3,26400,1377,4172,830,948,1218 425 | 2,3,17565,3686,4657,1059,1803,668 426 | 2,3,16980,2884,12232,874,3213,249 427 | 1,3,11243,2408,2593,15348,108,1886 428 | 1,3,13134,9347,14316,3141,5079,1894 429 | 1,3,31012,16687,5429,15082,439,1163 430 | 1,3,3047,5970,4910,2198,850,317 431 | 1,3,8607,1750,3580,47,84,2501 432 | 1,3,3097,4230,16483,575,241,2080 433 | 1,3,8533,5506,5160,13486,1377,1498 434 | 1,3,21117,1162,4754,269,1328,395 435 | 1,3,1982,3218,1493,1541,356,1449 436 | 1,3,16731,3922,7994,688,2371,838 437 | 1,3,29703,12051,16027,13135,182,2204 438 | 1,3,39228,1431,764,4510,93,2346 439 | 2,3,14531,15488,30243,437,14841,1867 440 | 1,3,10290,1981,2232,1038,168,2125 441 | 1,3,2787,1698,2510,65,477,52 442 | -------------------------------------------------------------------------------- /environments.md: -------------------------------------------------------------------------------- 1 | # Python virtual environment for data scientists 2 | ## Virtual environments 3 | A virtual environment is a tool that helps to keep dependencies required by different projects separate by creating isolated python virtual environments for them. This is one of the most important tool for production-level code. 4 | 5 | From my own experience, folks from the scientific computing community often dive into the notebook and install packages as we go without necessarily taking time to set up environments. This is dangerous and leads to numerous frustration down the road: 6 | - Having to downgrade / upgrade packages mutiple times as we "switch" projects 7 | - Without isolation of dependencies, this also hurts reproducibility, making it difficult to collaborate on a single project 8 | - Difficult to troubleshoot project errors 9 | - Temptation to stick to legacy packages or Python version as updating one of them may break several projects 10 | 11 | If you're doing data science work alone, with no concern on collaboration, and only using your computer to do exactly one project, you'd probably be fine. If your use case of data science falls outside that oddly specific scenario, I strongly recommend you follow this chapter closely. 12 | 13 | ## Prerequisites 14 | Ananconda / Miniconda: [Installation — Anaconda 2.0 documentation](http://docs.anaconda.com/anaconda/install/) 15 | 16 | > Both _Anaconda_ and _Miniconda_ uses Conda as the package manager. The primary difference is the ~720+ packages that are bundled into _Anaconda_ (~3GB of disk space). 17 | > If you want a lightweight version that includes only _conda_, its dependencies, and Python, use _Miniconda_. 18 | > 19 | > Tips: To verify that you have conda installed, open a terminal and run `conda --version` 20 | 21 | ## Learning Approaches 22 | In the workshop, we will be using the terminal (eg. command line, or bash) to create, manage, and activate our environments. We will also use the terminal to start up a Jupyter notebook / Lab instead of using a Graphical User Interface (GUI) such as the Ananconda Navigator. For this reason it is completely safe to pick either _Anaconda_ or _Miniconda_. 23 | 24 | #### Why CLI over GUI? 25 | Learning how to work in the conda shell and terminal may add some upfront overhead, but they are well worth it in the long run. Being comfortable with the command line interface (CLI) allows you to make the most out of what the development tool has to offer, unrestricted by the implementation choices of a Graphical user interface. 26 | 27 | When you're ready to move your code into production (eg. a virtual machine or an Ubuntu server from Azure), you may have to use SSH (secure shell) or use a Cloud Shell service (such as Azure) to provision, set up, and manage your development services as well as configure environment settings. By learning how to do these work using the command line interface (CLI) now, you will feel right at home later on when there's no GUI to fall back on. 28 | 29 | ## Creating your virtual environment 30 | To see all conda environments currently installed on your machine, use: 31 | ```bash 32 | conda env list 33 | 34 | # conda environments: 35 | # 36 | analyst /anaconda3/envs/analyst 37 | deeplearning /anaconda3/envs/deeplearning 38 | microblog /anaconda3/envs/microblog 39 | networking /anaconda3/envs/networking 40 | pedagogy /anaconda3/envs/pedagogy 41 | tokopedia /anaconda3/envs/tokopedia 42 | root * /anaconda3 43 | ``` 44 | The environment with an asterisk (`*`) next to it indicates the current active environment. 45 | 46 | The following command creates a new environment named `tokopedia` with Python 3 installed. 47 | ```bash 48 | mkdir IntroPython 49 | conda create -n tokopedia python=3 50 | source activate tokopedia 51 | ``` 52 | 53 | At the creation stage (`conda create -n`), conda may try to install some packages and prompt for your confirmation: 54 | ``` 55 | Proceed ([y]/n)? 56 | ``` 57 | Type `y` to proceed. When the environment is created, use `source activate ` to activate the environment. 58 | 59 | Run `conda env list` again and you should confirm that you're now using the `tokopedia` environment. 60 | 61 | ## Packages in your virtual environment 62 | When you first create your `tokopedia` virtual environment, a number of packages were installed. What are all the packages currently in your environment? What version of each package, respectively, are you using? Conda can list them for us: 63 | 64 | ```bash 65 | conda list 66 | 67 | # packages in environment at /anaconda3/envs/tokopedia: 68 | # 69 | ca-certificates 2019.5.15 0 70 | certifi 2019.6.16 py37_0 71 | libcxx 4.0.1 hcfea43d_1 72 | libcxxabi 4.0.1 hcfea43d_1 73 | libedit 3.1.20181209 hb402a30_0 74 | libffi 3.2.1 h475c297_4 75 | ncurses 6.1 h0a44026_1 76 | ... 77 | ``` 78 | 79 | For the Data Analytics Specialization, we will be using `numpy` and `pandas` primarily for most of the classes. If you took the Miniconda option, these are packages that you'll need to install individually. 80 | 81 | If `numpy` is not already present, install it using `conda install `: 82 | 83 | ```bash 84 | conda install numpy 85 | 86 | Fetching package metadata ........... 87 | Solving package specifications: . 88 | 89 | Package plan for installation in environment /anaconda3/envs/tokopedia: 90 | 91 | The following NEW packages will be INSTALLED: 92 | 93 | blas: 1.0-mkl 94 | intel-openmp: 2019.4-233 95 | ... 96 | ``` 97 | 98 | When prompted by `conda`, type `y` to proceed with the installation of `numpy` and its dependencies. 99 | 100 | Now repeat the last step to install `pandas`. 101 | 102 | > You can type the "up-arrow" button in your terminal and this will toggle through your previous commands. This can be a very convenient shortcut instead of typing the full `bash install pandas` command. 103 | 104 | ## Packages not in your virtual environment 105 | 106 | When you're done installing `numpy` and `pandas`, deactivate your environment and then list all packages again: 107 | 108 | ```bash 109 | source deactivate 110 | conda list 111 | ``` 112 | You will notice that conda now return all packages in your root environment, instead of the ones in your `tokopedia` environment. This may mean a different version of Python, a different version of `pandas` and `numpy`. 113 | 114 | ## Notebook Kernels 115 | At this point, you may start up Jupyter Lab and realize that your `tokopedia` environment is not being used. This is because the corresponding package (`notebook`) is installed in your root environment, and has no access to the Python modules in your other environments. 116 | 117 | We can fix this with the use of two additional packages. 118 | 119 | First, in the root environment where you typically launch JupyterLab (or Jupyter Notebook) from, install `nb_conda_kernels`. This is the **environment** that contains the `notebook` package. 120 | 121 | ```bash 122 | conda install nb_conda_kernels 123 | ``` 124 | 125 | Then, activate your `tokopedia` environment and install the `ipykernel` into this environment. 126 | 127 | Alternatively, you can use the `-n` flag and pass in the name of the environment to which a package is installed to. 128 | 129 | The `ipykernel` package allow us to create a new kernel with a specified name (by convention, I use the same name as my environment): 130 | 131 | ```bash 132 | conda install -n tokopedia ipykernel 133 | ipython kernel install --name=tokopedia 134 | 135 | # Success! 136 | Installed kernelspec tokopedia in /usr/local/share/jupyter/kernels/tokopedia 137 | ``` 138 | 139 | When this is done, launch JupyterLab (`jupyter lab`) and you'll see the kernel as an option in your JupyterLab interface. You're all ready to go! 140 | 141 | ![](/assets/kernels.png) 142 | 143 | ## Final Words 144 | 145 | Congratulations! You've taken the "hard choice" to set up environments for your project. This form of isolation helps you run several projects on your machine, each having its own version of packages and dependencies. Your project collaborators are going to love you for that, and your future self will appreciate you take the extra effort to "do things right" at the beginning. 146 | -------------------------------------------------------------------------------- /lecturenotes/1_july_cohort.md: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | - A copy of Anaconda / Miniconda installed on your PC 3 | - Python 3 installed on your PC 4 | 5 | # Administrative Details 6 | - Google Classroom 7 | - Graded Modules 8 | 9 | # Python for Data Analytics 10 | ## Day 1 - Day 4 11 | - List: `salary=[15, 7, 7, 10, 8]` 12 | - Zero-based indexing 13 | - `salary[3]` 14 | - `salary[0:2]` 15 | - `salary[-2]` 16 | - `salary[0:3:2]` 17 | - General syntax: `[start:end:step]` 18 | 19 | - We make python more suitable for data analysis by importing packages 20 | - `import pandas as pd` 21 | 22 | - `pd.read_csv("/financial_report/sales2019.csv")` 23 | - By default, it wouldn't know which column to use as index 24 | - `sales = pd.read_csv("/financial/sales2019.csv", index_col=0)` 25 | - `sales.head()` and `sales.tail()` show the first or last 5 rows respectively 26 | - `sales.shape` print the number of rows and columns 27 | - When I convert something to a DataFrame, the DataFrame gain the following attributes: 28 | - .shape 29 | - data types: .dtypes 30 | - When I convert something to a DataFrame, the DataFrame gain the following method: 31 | - .head() 32 | - .tail() 33 | - method can take additional parameters 34 | 35 | - pd.read_csv("where_data_locates.csv", index_col) 36 | 37 | - Naming your variables 38 | - Python is case-sensitive 39 | - Don't start with a number; Don't use dashes 40 | - Convention: use underscores: `weekly_stock_returns_q4` 41 | - Don't use a python keyword 42 | - `False = 10` 43 | 44 | - pandas has two data types: 45 | - Series (pd.Series) 46 | - example: `pd.Series([100, 50, 0, 25.5])` 47 | - DataFrame(pd.DataFrame) 48 | - Method 1 example: `pd.DataFrame(my_dict)` 49 | - Method 2 example: `pd.read_csv()` 50 | 51 | - For a pandas object, you can call methods and access attributes such as: 52 | - .dtypes 53 | - Categorical, Int, Float, DateTime, Object 54 | - .shape 55 | - .describe() 56 | - .describe(include=['datetime']) 57 | - .head(), .tail() 58 | - .axes() return 2 values, the first one correspond to the row, second one to the column 59 | - .axes()[1] -> .columns 60 | - len(stock.columns) 61 | - .value_counts() 62 | - Ex: appl.trade_market.value_counts() 63 | 64 | - Indexing 65 | - `appl.trade_market` points to a column called trade_market, and the returned value is a `pd.Series` 66 | - `appl['trade_market']` is a safer choice, because it allows for otherwise reserved keywords and spaces 67 | - ```py 68 | cond1 = appl['volume'] = 1000000 69 | appl.loc[cond1, 'date'] 70 | ``` 71 | - .loc method to subset, we refer by names on that index 72 | - .iloc method, it uses the index (numbers) 73 | - `.iloc[x,y:z]` 74 | 75 | - Function 76 | ```py 77 | def extract_listings(url, num_of_listing=5): 78 | url.findAll('ul') 79 | ... 80 | 81 | tokopedia = extract_listing('tokopedia.com.....') 82 | blibli = extract_listing('blibli.com.....') 83 | tokopedia.to_csv() 84 | ``` 85 | - Anonymous function is also called `lambda` 86 | 87 | - `.apply(lambda x: some_operations_on_x )` 88 | 89 | - `.str` exposes all the string methods and you can combine them with `.startswith()` to narrow down your search 90 | 91 | - `.replace()`; Can do a simple string replace 92 | - ex: `.replace("Corporate", "Company")` 93 | - ex: `.replace("^\d", "-", regex=True)` 94 | 95 | 96 | # Agenda 97 | - 630pm: Class begins 98 | - 740pm: Break 99 | - 810pm: End of Break 100 | - 930pm: End of Class 101 | 102 | # Resources and Extra Materials 103 | - https://github.com/onlyphantom/dataanalysis 104 | 105 | # Discussion points 106 | - Arithmetics 107 | - Equality Check 108 | - DataFrames vs Series 109 | - Syntax highlighting is not a feature of Python 110 | 111 | 112 | 113 | - Spaces / New lines 114 | 115 | - Running Python scripts in IDE vs Notebook vs Console 116 | - Functions constructions 117 | - Positional vs named arguments 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /lecturenotes/2_july_cohort.md: -------------------------------------------------------------------------------- 1 | # Exploratory Data Analysis 2 | 3 | ## First 30 mins 4 | - Set up your environment 5 | - Import libraries 6 | - Acquire your data 7 | - `pd.read_csv(comment='#', skiprows=10)` 8 | - Peek at your data 9 | - head, tail 10 | - shape 11 | - dtypes 12 | - describe 13 | - astype 14 | - value_counts 15 | - sort_values 16 | - fillna(0) 17 | - Replaces all `NaN` with 0. If you pass in a string, it replaces with that string 18 | 19 | ## Next 1 hour 20 | - Exploratory Data Analysis 21 | - Frequency table / Contingency table 22 | - Can be one-dimensional 23 | - Output is consistent with values from `.value_counts()` 24 | - Can be two-dimensional 25 | - Minimal Syntax: `pd.crosstab(index='', columns='')` 26 | - For one-dimensional: 27 | - `pd.crosstab(index='bank.branches', columns='count')` 28 | - For two-dimensional: 29 | - `pd.crosstab(index='bank.branches', columns='bank.division')` 30 | - Full Suntax: 31 | - ```py 32 | pd.crosstab(index='', 33 | columns='', 34 | values='', 35 | aggfunc='', 36 | margins='', 37 | margins_name='', 38 | normalize='')` 39 | ``` 40 | - If you use `values`, you need to pass in `aggfunc` so it know which function to use to aggregate 41 | - `margins` add a row / column at the end that totals across the respective dimension 42 | - `margins_name` allow usb to overwrite the default, which is `All` 43 | - `normalize` takes one of **three** values: 44 | - `index`, `columns`, `True` 45 | - You can pass in multiple values for `index` and `columns` to create a multi-index. You need to use a list 46 | - Default function of a crosstab function is `len` 47 | 48 | - Pivot Table 49 | - Default function of a pivot table is `mean` 50 | - Remember that mean is sensitive to outlier; `median` on the other hand is more robust 51 | - Require the `data` parameter, which is the name you give to your dataframe 52 | - Every other parameter is the same 53 | - If a `values` parameter is not provided explicitly 54 | - Implictly: It takes all numeric columns **not used as index or columns** and implictly treat them as a list of `values` 55 | - To avoid that implicit behavior: 56 | - Explictly provide the `values` 57 | - Use `.astype` to do an extra step of preprocessing so all non-numeric columns are not numeric types 58 | - Use column indexing before passing that into the `data` parameter 59 | 60 | - Datetimes 61 | - `pd.to_datetime(sales['ReportDate'])` 62 | - Once it's a datetime, you gain access to `dt`'s attributes and methods 63 | - `dt.month`, `dt.week`, `dt.day`... 64 | - You can use methods like `dt.to_period('M')` 65 | - Practical: Create new features / columns 66 | 67 | - Tips 68 | - Very often, you need to combine EDA with conditional indexing (boolean indexing) 69 | - If you normalize, you can optionally apply a `* 100` multiplier to the resulting DataFrame so you can interpret in percentages 70 | - Can also combine with `round(,2)` 71 | - If you use `sum` as the aggregating function, your values may become very huge. Use the opposite: ` / 1000000` and interpret the resulting DataFrame in the unit of millions 72 | - Very often, you need to combine your EDA with string matching 73 | - `.str.startswith('PT.')` 74 | - `.str.endswith('Zimmerman')` 75 | - `.str.contains('New York City')` 76 | 77 | -------------------------------------------------------------------------------- /lecturenotes/4_july_cohort.md: -------------------------------------------------------------------------------- 1 | # Day 1 2 | Reference: github.com/google/python-fire 3 | Template: github.com/onlyphantom/stockmonitor 4 | 5 | `pd.DataFrame.plot()` 6 | - Two parameters 7 | - `subplots` default False 8 | - `kind` is the kind of plot you want 9 | - default is `kind="line"` 10 | - Options are `hist` for histogram, `bar` for bar plot 11 | 12 | # One-dimensional 13 | - apple['price'] 14 | - Numeric? Categorical? Time Series? 15 | - Numeric: Box or Histogram for distribution; Line is temporal / series / sequence of values 16 | - Categorical: Bar 17 | - Time Series: Line type 18 | 19 | ## Two-dimensional 20 | - X axis and Y axis 21 | - If X (age) and Y (premium) are both numeric: Scatterplot 22 | - If X (gender) is categorical and Y is numeric: Boxplot 23 | - If X and Y are both categorical: Bar 24 | 25 | ## Altair Visualization 26 | ```py 27 | import altair as alt 28 | alt.Chart(dat).encode( 29 | x='recruitment_date', 30 | y='employee_age', 31 | color='division' 32 | ).mark_bar() 33 | ``` 34 | - mark_bar(), mark_line(), mark_point(), mark_area() 35 | - Use the example gallery 36 | 37 | Later on add .brush, .interactive() 38 | 39 | ## SQL Databases 40 | - SQL = structured query language 41 | - CRUD operations 42 | - Create 43 | - Read 44 | - SELECT *... 45 | - Update 46 | - UPDATE salesperson SET ... WHERE ... 47 | - Delete 48 | - DELETE FROM 49 | - Relational 50 | - A table can have Foreign Key(s) 51 | - One to One 52 | - Owner A <-> Pet A 53 | - Many to One / One to Many 54 | - Employee -> Manager 55 | - Many to Many 56 | - Clients <-> Tags (industry, location, size) 57 | - Joins 58 | - Left Join return all rows from the left table regardless of whether a match is found 59 | - Inner Join return all rows from the left table IF there is a match on the right table 60 | - Inverse of left join is right join 61 | - Outer Join return all rows from both left and right table, creating Nulls as necessary 62 | 63 | ## SQL Queries 64 | - Structured Query Language 65 | ```sql 66 | SELECT c.name, c.bank_acc, c.savings_bal, cs.name, b.name 67 | FROM customers as c 68 | LEFT JOIN customersupport as cs ON cs.id = c.customerrep.id 69 | LEFT JOIN branch as b ON b.id = c.branchid 70 | WHERE b.name IN ('kemang', 'pluit', 'benhil') 71 | ORDER BY c.savings_bal DESC 72 | LIMIT 20 73 | ``` 74 | 75 | ```py 76 | import sqlite3 77 | # create connection 78 | pd.read_sql_query("SELECT name FROM customers", conn, index_col='name') 79 | ``` 80 | Hint: SQL engines are ALWAYS going to be faster than a programming language, so do your conditional selects / filters in SQL instead 81 | 82 | ### Create Databases 83 | https://github.com/onlyphantom/dataanalysis/blob/master/lecturenotes/classroom.sql 84 | In a terminal: `sqlite3 name_of_database` or `sqlite3 name_of_database < injection.sql` 85 | -------------------------------------------------------------------------------- /lecturenotes/analyticsapp/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template 2 | 3 | app = Flask(__name__) 4 | 5 | 6 | @app.route("/") 7 | def hello(): 8 | return "Welcome to Algoritma Insights app service!" 9 | 10 | 11 | @app.route("/dashboard") 12 | def analytics(): 13 | import pandas as pd 14 | 15 | dat = pd.read_csv( 16 | "https://raw.githubusercontent.com/onlyphantom/dataanalysis/master/data_input/techcrunch.csv" 17 | ) 18 | 19 | cond1 = dat["company"] == "Tesla Motors" 20 | result = ( 21 | dat.loc[cond1, ["company", "round", "raisedCurrency", "raisedAmt"]] 22 | .sort_values(["round", "raisedAmt"]) 23 | .to_html() 24 | ) 25 | 26 | return render_template("analytics.html", result=result) 27 | 28 | 29 | @app.route("/api") 30 | def api(): 31 | import pandas as pd 32 | 33 | dat = pd.read_csv( 34 | "https://raw.githubusercontent.com/onlyphantom/dataanalysis/master/data_input/techcrunch.csv" 35 | ) 36 | 37 | cond1 = dat["company"] == "Tesla Motors" 38 | result = ( 39 | dat.loc[cond1, ["company", "round", "raisedCurrency", "raisedAmt"]] 40 | .sort_values(["round", "raisedAmt"]) 41 | .to_json() 42 | ) 43 | 44 | return result 45 | 46 | 47 | if __name__ == "__main__": 48 | app.run() 49 | -------------------------------------------------------------------------------- /lecturenotes/analyticsapp/classroom.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | -------------------------------------------------------------------------------- /lecturenotes/analyticsapp/demo.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | dat = pd.read_csv( 4 | "https://raw.githubusercontent.com/onlyphantom/dataanalysis/master/data_input/techcrunch.csv" 5 | ) 6 | 7 | cond1 = dat["company"] == "Tesla Motors" 8 | result = dat.loc[ 9 | cond1, ["company", "round", "raisedCurrency", "raisedAmt"] 10 | ].sort_values(["round", "raisedAmt"]) 11 | 12 | print(result) 13 | -------------------------------------------------------------------------------- /lecturenotes/analyticsapp/templates/analytics.html: -------------------------------------------------------------------------------- 1 |

Dashboard

2 | 3 | {{ result | safe }} -------------------------------------------------------------------------------- /lecturenotes/book_analytics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Background" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "The following document describes our analysis on a dataset of more than 13,700 books as an attempt to discover any useful insights for our company" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 4, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "data": { 24 | "text/plain": [ 25 | "(13714, 10)" 26 | ] 27 | }, 28 | "execution_count": 4, 29 | "metadata": {}, 30 | "output_type": "execute_result" 31 | } 32 | ], 33 | "source": [ 34 | "import pandas as pd\n", 35 | "books_ori = pd.read_csv(\"data_input/books_c.csv\")\n", 36 | "book = books_ori.copy()\n", 37 | "books.shape" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "Understanding our data is crucial to this analysis. We start off by looking at the data types in each columns of our dataset:" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 5, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/plain": [ 55 | "bookID int64\n", 56 | "title object\n", 57 | "authors object\n", 58 | "average_rating float64\n", 59 | "isbn object\n", 60 | "isbn13 int64\n", 61 | "language_code object\n", 62 | "# num_pages int64\n", 63 | "ratings_count int64\n", 64 | "text_reviews_count int64\n", 65 | "dtype: object" 66 | ] 67 | }, 68 | "execution_count": 5, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | } 72 | ], 73 | "source": [ 74 | "books.dtypes" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "Perform a type conversion so both `isbn` and `isbn13` share the same type. This adds consistency to our exploratory data analysis process later on:" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 6, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "text/plain": [ 92 | "bookID int64\n", 93 | "title object\n", 94 | "authors object\n", 95 | "average_rating float64\n", 96 | "isbn object\n", 97 | "isbn13 object\n", 98 | "language_code object\n", 99 | "# num_pages int64\n", 100 | "ratings_count int64\n", 101 | "text_reviews_count int64\n", 102 | "dtype: object" 103 | ] 104 | }, 105 | "execution_count": 6, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "books['isbn13'] = books['isbn13'].astype('object')\n", 112 | "books.dtypes" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "It's useful to start off our analysis by looking at the top 3 most prolific authors according to our company's data:" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 36, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/plain": [ 130 | "['Agatha Christie', 'Stephen King', 'Orson Scott Card']" 131 | ] 132 | }, 133 | "execution_count": 36, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "mylist = books['authors'].value_counts().head(3).index.to_list()\n", 140 | "mylist" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "Our scout Andy recommended us to sign J.K. Rowling, a promising author from Great Britain. We want to present a more data-driven argument as to make the case of whether we should be splashing top cash to signing her under our publishing label:" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 11, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/plain": [ 158 | "4.52" 159 | ] 160 | }, 161 | "execution_count": 11, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "cond1 = books['authors'] == 'J.K. Rowling'\n", 168 | "mean_ratings = books.loc[cond1, 'average_rating'].mean()\n", 169 | "round(mean_ratings, 2)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "To help the company acquire shelf-worthy titles, I've compiled a list of commercially successful books with great ratings. These are the top 10 books:" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 12, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/plain": [ 187 | "Index(['bookID', 'title', 'authors', 'average_rating', 'isbn', 'isbn13',\n", 188 | " 'language_code', '# num_pages', 'ratings_count', 'text_reviews_count'],\n", 189 | " dtype='object')" 190 | ] 191 | }, 192 | "execution_count": 12, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "books.columns" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 22, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "data": { 208 | "text/html": [ 209 | "
\n", 210 | "\n", 223 | "\n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | "
titleauthorsaverage_ratingratings_count
0Harry Potter and the Half-Blood Prince (Harry ...J.K. Rowling4.561944099
4Harry Potter and the Prisoner of Azkaban (Harr...J.K. Rowling4.552149872
1Harry Potter and the Order of the Phoenix (Har...J.K. Rowling4.491996446
2Harry Potter and the Sorcerer's Stone (Harry P...J.K. Rowling4.475629932
4455A Game of Thrones (A Song of Ice and Fire #1)George R.R. Martin4.451598396
5300Harry Potter and the Chamber of Secrets (Harry...J.K. Rowling4.412115562
6363The Book ThiefMarkus Zusak4.371410666
25The Fellowship of the Ring (The Lord of the Ri...J.R.R. Tolkien4.352009749
9319Where the Sidewalk EndsShel Silverstein4.301094416
2000The Hobbit or There and Back AgainJ.R.R. Tolkien4.262364968
\n", 306 | "
" 307 | ], 308 | "text/plain": [ 309 | " title authors \\\n", 310 | "0 Harry Potter and the Half-Blood Prince (Harry ... J.K. Rowling \n", 311 | "4 Harry Potter and the Prisoner of Azkaban (Harr... J.K. Rowling \n", 312 | "1 Harry Potter and the Order of the Phoenix (Har... J.K. Rowling \n", 313 | "2 Harry Potter and the Sorcerer's Stone (Harry P... J.K. Rowling \n", 314 | "4455 A Game of Thrones (A Song of Ice and Fire #1) George R.R. Martin \n", 315 | "5300 Harry Potter and the Chamber of Secrets (Harry... J.K. Rowling \n", 316 | "6363 The Book Thief Markus Zusak \n", 317 | "25 The Fellowship of the Ring (The Lord of the Ri... J.R.R. Tolkien \n", 318 | "9319 Where the Sidewalk Ends Shel Silverstein \n", 319 | "2000 The Hobbit or There and Back Again J.R.R. Tolkien \n", 320 | "\n", 321 | " average_rating ratings_count \n", 322 | "0 4.56 1944099 \n", 323 | "4 4.55 2149872 \n", 324 | "1 4.49 1996446 \n", 325 | "2 4.47 5629932 \n", 326 | "4455 4.45 1598396 \n", 327 | "5300 4.41 2115562 \n", 328 | "6363 4.37 1410666 \n", 329 | "25 4.35 2009749 \n", 330 | "9319 4.30 1094416 \n", 331 | "2000 4.26 2364968 " 332 | ] 333 | }, 334 | "execution_count": 22, 335 | "metadata": {}, 336 | "output_type": "execute_result" 337 | } 338 | ], 339 | "source": [ 340 | "cond1 = books['ratings_count'] > 1000000\n", 341 | "greatbooks = books.loc[cond1, ].sort_values('average_rating', ascending=False).head(10)\n", 342 | "result = greatbooks.loc[:,['title', 'authors', 'average_rating', 'ratings_count']]\n", 343 | "result" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 38, 349 | "metadata": {}, 350 | "outputs": [ 351 | { 352 | "data": { 353 | "text/plain": [ 354 | "eng 10594\n", 355 | "en-US 1699\n", 356 | "spa 419\n", 357 | "en-GB 341\n", 358 | "ger 238\n", 359 | "fre 209\n", 360 | "jpn 64\n", 361 | "por 27\n", 362 | "mul 21\n", 363 | "ita 19\n", 364 | "zho 16\n", 365 | "grc 12\n", 366 | "en-CA 9\n", 367 | "nl 7\n", 368 | "rus 7\n", 369 | "swe 6\n", 370 | "glg 4\n", 371 | "tur 3\n", 372 | "enm 3\n", 373 | "cat 3\n", 374 | "lat 3\n", 375 | "ara 2\n", 376 | "heb 1\n", 377 | "nor 1\n", 378 | "wel 1\n", 379 | "msa 1\n", 380 | "dan 1\n", 381 | "gla 1\n", 382 | "srp 1\n", 383 | "ale 1\n", 384 | "Name: language_code, dtype: int64" 385 | ] 386 | }, 387 | "execution_count": 38, 388 | "metadata": {}, 389 | "output_type": "execute_result" 390 | } 391 | ], 392 | "source": [ 393 | "books.language_code.value_counts()" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 26, 399 | "metadata": {}, 400 | "outputs": [ 401 | { 402 | "data": { 403 | "text/plain": [ 404 | "'\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n
titleauthorsaverage_ratingratings_count
0Harry Potter and the Half-Blood Prince (Harry ...J.K. Rowling4.561944099
4Harry Potter and the Prisoner of Azkaban (Harr...J.K. Rowling4.552149872
1Harry Potter and the Order of the Phoenix (Har...J.K. Rowling4.491996446
2Harry Potter and the Sorcerer\\'s Stone (Harry P...J.K. Rowling4.475629932
4455A Game of Thrones (A Song of Ice and Fire #1)George R.R. Martin4.451598396
5300Harry Potter and the Chamber of Secrets (Harry...J.K. Rowling4.412115562
6363The Book ThiefMarkus Zusak4.371410666
25The Fellowship of the Ring (The Lord of the Ri...J.R.R. Tolkien4.352009749
9319Where the Sidewalk EndsShel Silverstein4.301094416
2000The Hobbit or There and Back AgainJ.R.R. Tolkien4.262364968
'" 405 | ] 406 | }, 407 | "execution_count": 26, 408 | "metadata": {}, 409 | "output_type": "execute_result" 410 | } 411 | ], 412 | "source": [ 413 | "result.to_html()" 414 | ] 415 | } 416 | ], 417 | "metadata": { 418 | "kernelspec": { 419 | "display_name": "dataanalysis", 420 | "language": "python", 421 | "name": "dataanalysis" 422 | }, 423 | "language_info": { 424 | "codemirror_mode": { 425 | "name": "ipython", 426 | "version": 3 427 | }, 428 | "file_extension": ".py", 429 | "mimetype": "text/x-python", 430 | "name": "python", 431 | "nbconvert_exporter": "python", 432 | "pygments_lexer": "ipython3", 433 | "version": "3.7.2" 434 | } 435 | }, 436 | "nbformat": 4, 437 | "nbformat_minor": 2 438 | } 439 | -------------------------------------------------------------------------------- /lecturenotes/classroom.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE [Cohort] ( 2 | [CohortId] INTEGER NOT NULL PRIMARY KEY, 3 | [CohortName] NVARCHAR(50) NOT NULL 4 | ); 5 | CREATE TABLE [Students] ( 6 | [StudentId] INTEGER PRIMARY KEY NOT NULL, 7 | [StudentName] NVARCHAR(50) NOT NULL, 8 | [CohortId] INTEGER NULL, 9 | [Company] DATE NULL 10 | ); 11 | CREATE TABLE [Specialization] ( 12 | [SpecializationId] INTEGER NOT NULL PRIMARY KEY, 13 | [Specializationname] NVARCHAR(50) NOT NULL 14 | ); 15 | CREATE TABLE [Score] ( 16 | [StudentId] INTEGER NOT NULL, 17 | [SpecializationId] INTEGER NOT NULL, 18 | [Score] INTEGER NULL 19 | ); -------------------------------------------------------------------------------- /lecturenotes/googleanalytics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "0.24.2\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import pandas as pd\n", 18 | "print(pd.__version__)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 46, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "
\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | "
LanguageUsersNew UsersSessionsBounce RatePages / SessionAvg. Session DurationGoal Conversion RateGoal CompletionsGoal Value
0en-us23,49722,69635,1130.54%6.3500:02:3227.21%9,555$0.00
1id-id7,7977,61310,6170.65%4.8900:01:3816.26%1,726$0.00
2en-gb3,1983,0864,6250.56%5.5200:01:5021.36%988$0.00
3id2,1712,0942,7240.29%4.7500:01:4317.18%468$0.00
4en2242142953.05%5.0300:01:4426.78%79$0.00
\n", 127 | "
" 128 | ], 129 | "text/plain": [ 130 | " Language Users New Users Sessions Bounce Rate Pages / Session \\\n", 131 | "0 en-us 23,497 22,696 35,113 0.54% 6.35 \n", 132 | "1 id-id 7,797 7,613 10,617 0.65% 4.89 \n", 133 | "2 en-gb 3,198 3,086 4,625 0.56% 5.52 \n", 134 | "3 id 2,171 2,094 2,724 0.29% 4.75 \n", 135 | "4 en 224 214 295 3.05% 5.03 \n", 136 | "\n", 137 | " Avg. Session Duration Goal Conversion Rate Goal Completions Goal Value \n", 138 | "0 00:02:32 27.21% 9,555 $0.00 \n", 139 | "1 00:01:38 16.26% 1,726 $0.00 \n", 140 | "2 00:01:50 21.36% 988 $0.00 \n", 141 | "3 00:01:43 17.18% 468 $0.00 \n", 142 | "4 00:01:44 26.78% 79 $0.00 " 143 | ] 144 | }, 145 | "execution_count": 46, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "ana = pd.read_csv(\"data_input/analytics.csv\", comment=\"#\").head(11)\n", 152 | "ana.head()" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 47, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "ana.Users = ana.Users.str.replace(',', '')\n", 162 | "ana.Users = ana.Users.astype('int')\n", 163 | "\n", 164 | "ana['New Users'] = ana['New Users'].str.replace(',', '')\n", 165 | "ana['New Users'] = ana['New Users'].astype('int')" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 48, 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "data": { 175 | "text/html": [ 176 | "
\n", 177 | "\n", 190 | "\n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | "
LanguageUsersNew UsersSessionsBounce RatePages / SessionAvg. Session DurationGoal Conversion RateGoal CompletionsGoal ValueHiVol
6th-th1861862240.45%3.5000:00:404.91%11$0.00True
7en-sg1321252310.00%6.9200:02:1039.83%92$0.00False
8en-id1261191710.00%6.3900:04:1026.32%45$0.00False
9en-au93911270.00%5.5600:01:2219.69%25$0.00True
10NaN383513714055,1950.63%5.8600:02:1323.96%13,222$0.00True
\n", 280 | "
" 281 | ], 282 | "text/plain": [ 283 | " Language Users New Users Sessions Bounce Rate Pages / Session \\\n", 284 | "6 th-th 186 186 224 0.45% 3.50 \n", 285 | "7 en-sg 132 125 231 0.00% 6.92 \n", 286 | "8 en-id 126 119 171 0.00% 6.39 \n", 287 | "9 en-au 93 91 127 0.00% 5.56 \n", 288 | "10 NaN 38351 37140 55,195 0.63% 5.86 \n", 289 | "\n", 290 | " Avg. Session Duration Goal Conversion Rate Goal Completions Goal Value \\\n", 291 | "6 00:00:40 4.91% 11 $0.00 \n", 292 | "7 00:02:10 39.83% 92 $0.00 \n", 293 | "8 00:04:10 26.32% 45 $0.00 \n", 294 | "9 00:01:22 19.69% 25 $0.00 \n", 295 | "10 00:02:13 23.96% 13,222 $0.00 \n", 296 | "\n", 297 | " HiVol \n", 298 | "6 True \n", 299 | "7 False \n", 300 | "8 False \n", 301 | "9 True \n", 302 | "10 True " 303 | ] 304 | }, 305 | "execution_count": 48, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "ana['HiVol'] = ana['New Users']/ana['Users'] > 0.95\n", 312 | "ana.tail()" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 49, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "ana = ana.fillna(\"notprovided\")\n", 322 | "cond1 = ana.Language.str.startswith('en')\n", 323 | "cond2 = ana.Language.str.startswith('id')\n", 324 | "ana.loc[cond1, 'Language'] = 'en'\n", 325 | "ana.loc[cond2, 'Language'] = 'id'" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 50, 331 | "metadata": {}, 332 | "outputs": [ 333 | { 334 | "data": { 335 | "text/html": [ 336 | "
\n", 337 | "\n", 350 | "\n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | "
LanguageUsersNew UsersSessionsBounce RatePages / SessionAvg. Session DurationGoal Conversion RateGoal CompletionsGoal ValueHiVol
0en234972269635,1130.54%6.3500:02:3227.21%9,555$0.00True
1id7797761310,6170.65%4.8900:01:3816.26%1,726$0.00True
2en319830864,6250.56%5.5200:01:5021.36%988$0.00True
\n", 412 | "
" 413 | ], 414 | "text/plain": [ 415 | " Language Users New Users Sessions Bounce Rate Pages / Session \\\n", 416 | "0 en 23497 22696 35,113 0.54% 6.35 \n", 417 | "1 id 7797 7613 10,617 0.65% 4.89 \n", 418 | "2 en 3198 3086 4,625 0.56% 5.52 \n", 419 | "\n", 420 | " Avg. Session Duration Goal Conversion Rate Goal Completions Goal Value \\\n", 421 | "0 00:02:32 27.21% 9,555 $0.00 \n", 422 | "1 00:01:38 16.26% 1,726 $0.00 \n", 423 | "2 00:01:50 21.36% 988 $0.00 \n", 424 | "\n", 425 | " HiVol \n", 426 | "0 True \n", 427 | "1 True \n", 428 | "2 True " 429 | ] 430 | }, 431 | "execution_count": 50, 432 | "metadata": {}, 433 | "output_type": "execute_result" 434 | } 435 | ], 436 | "source": [ 437 | "ana.head(3)" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": 51, 443 | "metadata": {}, 444 | "outputs": [ 445 | { 446 | "data": { 447 | "text/html": [ 448 | "
\n", 449 | "\n", 462 | "\n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | "
HiVolFalseTrueAll
Language
en13.3122.4635.77
id0.0013.2813.28
notprovided0.005.865.86
th-th0.003.503.50
All13.3145.1058.41
\n", 510 | "
" 511 | ], 512 | "text/plain": [ 513 | "HiVol False True All\n", 514 | "Language \n", 515 | "en 13.31 22.46 35.77\n", 516 | "id 0.00 13.28 13.28\n", 517 | "notprovided 0.00 5.86 5.86\n", 518 | "th-th 0.00 3.50 3.50\n", 519 | "All 13.31 45.10 58.41" 520 | ] 521 | }, 522 | "execution_count": 51, 523 | "metadata": {}, 524 | "output_type": "execute_result" 525 | } 526 | ], 527 | "source": [ 528 | "pd.crosstab(index=ana.Language, \n", 529 | " columns=ana.HiVol,\n", 530 | " values=ana['Pages / Session'],\n", 531 | " aggfunc='sum',\n", 532 | " margins=True\n", 533 | " ).fillna(0).round(2)" 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": 52, 539 | "metadata": {}, 540 | "outputs": [ 541 | { 542 | "data": { 543 | "text/html": [ 544 | "
\n", 545 | "\n", 558 | "\n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | "
HiVolFalseTrue
Language
en6.6555.615000
idNaN4.426667
notprovidedNaN5.860000
th-thNaN3.500000
\n", 594 | "
" 595 | ], 596 | "text/plain": [ 597 | "HiVol False True \n", 598 | "Language \n", 599 | "en 6.655 5.615000\n", 600 | "id NaN 4.426667\n", 601 | "notprovided NaN 5.860000\n", 602 | "th-th NaN 3.500000" 603 | ] 604 | }, 605 | "execution_count": 52, 606 | "metadata": {}, 607 | "output_type": "execute_result" 608 | } 609 | ], 610 | "source": [ 611 | "pd.pivot_table(data=ana, \n", 612 | " index='Language', \n", 613 | " columns='HiVol', \n", 614 | " values='Pages / Session')" 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": 55, 620 | "metadata": {}, 621 | "outputs": [ 622 | { 623 | "data": { 624 | "text/html": [ 625 | "
\n", 626 | "\n", 639 | "\n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | "
Day IndexUsers
01/1/19122
11/2/19174
21/3/19240
31/4/19231
41/5/19172
\n", 675 | "
" 676 | ], 677 | "text/plain": [ 678 | " Day Index Users\n", 679 | "0 1/1/19 122\n", 680 | "1 1/2/19 174\n", 681 | "2 1/3/19 240\n", 682 | "3 1/4/19 231\n", 683 | "4 1/5/19 172" 684 | ] 685 | }, 686 | "execution_count": 55, 687 | "metadata": {}, 688 | "output_type": "execute_result" 689 | } 690 | ], 691 | "source": [ 692 | "lytics = pd.read_csv(\"data_input/analytics.csv\", skiprows=18)\n", 693 | "lytics.head()" 694 | ] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": 60, 699 | "metadata": {}, 700 | "outputs": [ 701 | { 702 | "data": { 703 | "text/html": [ 704 | "
\n", 705 | "\n", 718 | "\n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | "
Day IndexUsers
02019-01-01122
12019-01-02174
\n", 739 | "
" 740 | ], 741 | "text/plain": [ 742 | " Day Index Users\n", 743 | "0 2019-01-01 122\n", 744 | "1 2019-01-02 174" 745 | ] 746 | }, 747 | "execution_count": 60, 748 | "metadata": {}, 749 | "output_type": "execute_result" 750 | } 751 | ], 752 | "source": [ 753 | "lytics['Day Index'] = pd.to_datetime(lytics['Day Index'])\n", 754 | "lytics.head(2)" 755 | ] 756 | }, 757 | { 758 | "cell_type": "code", 759 | "execution_count": 64, 760 | "metadata": {}, 761 | "outputs": [ 762 | { 763 | "data": { 764 | "text/html": [ 765 | "
\n", 766 | "\n", 779 | "\n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | "
Day IndexUsersWeekdayMonth
02019-01-01122Tuesday1.0
12019-01-02174Wednesday1.0
\n", 806 | "
" 807 | ], 808 | "text/plain": [ 809 | " Day Index Users Weekday Month\n", 810 | "0 2019-01-01 122 Tuesday 1.0\n", 811 | "1 2019-01-02 174 Wednesday 1.0" 812 | ] 813 | }, 814 | "execution_count": 64, 815 | "metadata": {}, 816 | "output_type": "execute_result" 817 | } 818 | ], 819 | "source": [ 820 | "lytics['Weekday'] = lytics['Day Index'].dt.weekday_name\n", 821 | "lytics['Month'] = lytics['Day Index'].dt.month\n", 822 | "lytics.head(2)" 823 | ] 824 | }, 825 | { 826 | "cell_type": "code", 827 | "execution_count": 76, 828 | "metadata": {}, 829 | "outputs": [ 830 | { 831 | "data": { 832 | "text/plain": [ 833 | "0 January\n", 834 | "1 January\n", 835 | "2 January\n", 836 | "3 January\n", 837 | "4 January\n", 838 | "5 January\n", 839 | "6 January\n", 840 | "7 January\n", 841 | "8 January\n", 842 | "9 January\n", 843 | "10 January\n", 844 | "11 January\n", 845 | "12 January\n", 846 | "13 January\n", 847 | "14 January\n", 848 | "15 January\n", 849 | "16 January\n", 850 | "17 January\n", 851 | "18 January\n", 852 | "19 January\n", 853 | "20 January\n", 854 | "21 January\n", 855 | "22 January\n", 856 | "23 January\n", 857 | "24 January\n", 858 | "25 January\n", 859 | "26 January\n", 860 | "27 January\n", 861 | "28 January\n", 862 | "29 January\n", 863 | " ... \n", 864 | "91 April\n", 865 | "92 April\n", 866 | "93 April\n", 867 | "94 April\n", 868 | "95 April\n", 869 | "96 April\n", 870 | "97 April\n", 871 | "98 April\n", 872 | "99 April\n", 873 | "100 April\n", 874 | "101 April\n", 875 | "102 April\n", 876 | "103 April\n", 877 | "104 April\n", 878 | "105 April\n", 879 | "106 April\n", 880 | "107 April\n", 881 | "108 April\n", 882 | "109 April\n", 883 | "110 April\n", 884 | "111 April\n", 885 | "112 April\n", 886 | "113 April\n", 887 | "114 April\n", 888 | "115 April\n", 889 | "116 April\n", 890 | "117 April\n", 891 | "118 April\n", 892 | "119 April\n", 893 | "120 NaN\n", 894 | "Name: Day Index, Length: 121, dtype: object" 895 | ] 896 | }, 897 | "execution_count": 76, 898 | "metadata": {}, 899 | "output_type": "execute_result" 900 | } 901 | ], 902 | "source": [ 903 | "lytics['Day Index'].dt.month_name().head()" 904 | ] 905 | }, 906 | { 907 | "cell_type": "code", 908 | "execution_count": 71, 909 | "metadata": {}, 910 | "outputs": [], 911 | "source": [ 912 | "lytics.Users = lytics.Users.str.replace(\",\",\"\")\n", 913 | "lytics.Users = lytics.Users.astype('int')" 914 | ] 915 | }, 916 | { 917 | "cell_type": "code", 918 | "execution_count": 72, 919 | "metadata": {}, 920 | "outputs": [ 921 | { 922 | "data": { 923 | "text/html": [ 924 | "
\n", 925 | "\n", 942 | "\n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | "
Users
Month1.02.03.04.0
Weekday
Friday286.75525.00440.60304.00
Monday277.50387.00510.25350.80
Saturday204.50491.50309.20267.75
Sunday180.50303.25293.80213.25
Thursday288.80927.25548.50396.25
Tuesday308.80510.75694.75438.20
Wednesday296.80628.00568.50323.50
\n", 1015 | "
" 1016 | ], 1017 | "text/plain": [ 1018 | " Users \n", 1019 | "Month 1.0 2.0 3.0 4.0\n", 1020 | "Weekday \n", 1021 | "Friday 286.75 525.00 440.60 304.00\n", 1022 | "Monday 277.50 387.00 510.25 350.80\n", 1023 | "Saturday 204.50 491.50 309.20 267.75\n", 1024 | "Sunday 180.50 303.25 293.80 213.25\n", 1025 | "Thursday 288.80 927.25 548.50 396.25\n", 1026 | "Tuesday 308.80 510.75 694.75 438.20\n", 1027 | "Wednesday 296.80 628.00 568.50 323.50" 1028 | ] 1029 | }, 1030 | "execution_count": 72, 1031 | "metadata": {}, 1032 | "output_type": "execute_result" 1033 | } 1034 | ], 1035 | "source": [ 1036 | "pd.pivot_table(data=lytics, index='Weekday', columns='Month')" 1037 | ] 1038 | }, 1039 | { 1040 | "cell_type": "code", 1041 | "execution_count": null, 1042 | "metadata": {}, 1043 | "outputs": [], 1044 | "source": [ 1045 | "\n" 1046 | ] 1047 | } 1048 | ], 1049 | "metadata": { 1050 | "kernelspec": { 1051 | "display_name": "dataanalysis", 1052 | "language": "python", 1053 | "name": "dataanalysis" 1054 | }, 1055 | "language_info": { 1056 | "codemirror_mode": { 1057 | "name": "ipython", 1058 | "version": 3 1059 | }, 1060 | "file_extension": ".py", 1061 | "mimetype": "text/x-python", 1062 | "name": "python", 1063 | "nbconvert_exporter": "python", 1064 | "pygments_lexer": "ipython3", 1065 | "version": "3.7.2" 1066 | } 1067 | }, 1068 | "nbformat": 4, 1069 | "nbformat_minor": 2 1070 | } 1071 | -------------------------------------------------------------------------------- /lecturenotes/report_final.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | My Analysis 9 | 10 | 11 | 12 | 13 |
14 |

Best-selling Books with Great Ratings

15 |
16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 |
titleauthorsaverage_ratingratings_count
0Harry Potter and the Half-Blood Prince (Harry ...J.K. Rowling4.561944099
4Harry Potter and the Prisoner of Azkaban (Harr...J.K. Rowling4.552149872
1Harry Potter and the Order of the Phoenix (Har...J.K. Rowling4.491996446
2Harry Potter and the Sorcerer\'s Stone (Harry P...J.K. Rowling4.475629932
4455A Game of Thrones (A Song of Ice and Fire #1)George R.R. Martin4.451598396
5300Harry Potter and the Chamber of Secrets (Harry...J.K. Rowling4.412115562
6363The Book ThiefMarkus Zusak4.371410666
25The Fellowship of the Ring (The Lord of the Ri...J.R.R. Tolkien4.352009749
9319Where the Sidewalk EndsShel Silverstein4.301094416
2000The Hobbit or There and Back AgainJ.R.R. Tolkien4.262364968
99 |
100 | 101 |
102 | 103 | 104 | 105 | 106 | --------------------------------------------------------------------------------