├── LICENSE ├── Module1 └── Getting_Started_with_Data_Analysis_Code │ ├── 2 │ ├── arrays.py │ ├── linearalgebra.py │ ├── loadsave.py │ └── randomnumbers.py │ ├── 3 │ ├── datastructures.py │ └── person.csv │ ├── 4 │ ├── Makefile │ ├── README.md │ ├── annotate.py │ ├── axis.py │ ├── bar.py │ ├── better.py │ ├── contour.py │ ├── df.py │ ├── histogram.py │ ├── intro.py │ ├── legends.py │ ├── line.py │ ├── linestyle.py │ ├── matlab.py │ ├── morelegends.py │ ├── nox.py │ ├── pandasplot.py │ ├── scatter.py │ ├── series.py │ ├── subplot.py │ └── subs.py │ ├── 5 │ ├── Makefile │ ├── random_walk.png │ └── timeseries.py │ ├── 6 │ ├── ex_06-01.txt │ ├── ex_06-02.out │ ├── ex_06-02.txt │ ├── ex_06-03.txt │ ├── ex_06_03.out │ └── ex_06_04.out │ ├── 7 │ ├── Makefile │ ├── README.md │ ├── analysis.py │ ├── cities.tsv │ ├── clean_rbrted.py │ ├── makedata.py │ ├── rbrted.csv │ ├── rbrted_cleaned.csv │ ├── small.csv │ └── sunshine.tsv │ ├── 8 │ ├── .DS_Store │ ├── Makefile │ └── ml.py │ ├── Hints_And_Answers.docx │ ├── Hints_And_Answers.pdf │ ├── README.md │ ├── chapter2-data.txt │ └── requirements.txt ├── Module2 └── Python_Data_Analysis_code │ ├── Chapter 1 │ └── src1 │ │ ├── configure_matplotlib.ipynb │ │ ├── configure_numpy.py │ │ ├── configure_pd.py │ │ ├── install_ch1.py │ │ ├── ipython_history.py │ │ ├── log_demo.py │ │ └── report_weather.py │ ├── Chapter 10 │ ├── bagging.npy │ ├── boosting.npy │ ├── ch10util.py │ ├── conf_matrix.ipynb │ ├── dautil.json │ ├── default.npy │ ├── dummy_clf.ipynb │ ├── dummy_reg.ipynb │ ├── entropy.npy │ ├── etr.npy │ ├── evaluating_clusters.ipynb │ ├── kappa.ipynb │ ├── mae_rss.ipynb │ ├── mape_mpe.ipynb │ ├── matthews_correlation.ipynb │ ├── mse.ipynb │ ├── precision_recall.ipynb │ ├── rain_X_test.npy │ ├── rain_X_train.npy │ ├── rain_y_test.npy │ ├── rain_y_train.npy │ ├── random.npy │ ├── ransac.npy │ ├── rfc.npy │ ├── roc_auc.ipynb │ ├── stacking.npy │ ├── temp_X_test.npy │ ├── temp_X_train.npy │ ├── temp_y_test.npy │ ├── temp_y_train.npy │ ├── visualizing_goodness.ipynb │ └── votes.npy │ ├── Chapter 11 │ ├── applying_sift.ipynb │ ├── applying_surf.ipynb │ ├── clustering_hierarchy.ipynb │ ├── clustering_spectral.ipynb │ ├── covers.jpg │ ├── dautil.json │ ├── denoising_images.ipynb │ ├── detecting_faces.ipynb │ ├── extracting_patches.ipynb │ ├── extracting_texture.ipynb │ ├── img_metadata.py │ ├── quantizing_colors.ipynb │ └── searching_stars.ipynb │ ├── Chapter 12 │ ├── accessing_asyncio.ipynb │ ├── caching_lru.ipynb │ ├── caching_requests.ipynb │ ├── calculating_moments.ipynb │ ├── ch12util.py │ ├── compiling_numba.ipynb │ ├── dautil.json │ ├── distributing_execnet.ipynb │ ├── launching_futures.ipynb │ ├── mem_test.py │ ├── opencl_demo.ipynb │ ├── profiling_memory.ipynb │ ├── running_threads.ipynb │ ├── speeding_numexpr.ipynb │ └── stream_demo.ipynb │ ├── Chapter 2 │ ├── .DS_Store │ ├── Interactive.ipynb │ ├── anscombe.ipynb │ ├── choosing_colormaps.ipynb │ ├── choosing_palettes.ipynb │ ├── heat_map.ipynb │ ├── highlighting_influence.ipynb │ ├── hive_plot.ipynb │ ├── joblib │ │ ├── .DS_Store │ │ └── dautil │ │ │ ├── .DS_Store │ │ │ └── data │ │ │ ├── .DS_Store │ │ │ ├── download │ │ │ ├── .DS_Store │ │ │ ├── 490f7733de2a3f66c8b49c154e0df09d │ │ │ │ ├── metadata.json │ │ │ │ ├── output.pkl │ │ │ │ ├── output.pkl_01.npy │ │ │ │ ├── output.pkl_02.npy │ │ │ │ ├── output.pkl_03.npy │ │ │ │ ├── output.pkl_04.npy │ │ │ │ ├── output.pkl_05.npy │ │ │ │ └── output.pkl_06.npy │ │ │ ├── eb23268c45517fc462e55f43647b54ba │ │ │ │ ├── metadata.json │ │ │ │ ├── output.pkl │ │ │ │ ├── output.pkl_01.npy │ │ │ │ ├── output.pkl_02.npy │ │ │ │ ├── output.pkl_03.npy │ │ │ │ ├── output.pkl_04.npy │ │ │ │ ├── output.pkl_05.npy │ │ │ │ └── output.pkl_06.npy │ │ │ └── func_code.py │ │ │ └── get_countries │ │ │ ├── .DS_Store │ │ │ ├── ec5eb565b58ae8352bf85a3c8b37dcf4 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ └── func_code.py │ ├── mpld3_demo.ipynb │ ├── plot_map.ipynb │ ├── scatter_matrix.ipynb │ ├── using_ggplot.ipynb │ └── violins.ipynb │ ├── Chapter 3 │ ├── anova.ipynb │ ├── bayes_confidence.ipynb │ ├── correlating_pearson.ipynb │ ├── correlating_pointbiserial.ipynb │ ├── correlating_spearman.ipynb │ ├── dautil.json │ ├── determining_bias.ipynb │ ├── extreme_values.ipynb │ ├── fitting_expon.ipynb │ ├── fitting_gamma.ipynb │ ├── fitting_poisson.ipynb │ ├── joblib │ │ └── dautil │ │ │ └── data │ │ │ └── download │ │ │ ├── 1e0ecc166b05634fc4a48fe36993890d │ │ │ ├── metadata.json │ │ │ ├── output.pkl │ │ │ ├── output.pkl_01.npy │ │ │ ├── output.pkl_02.npy │ │ │ ├── output.pkl_03.npy │ │ │ ├── output.pkl_04.npy │ │ │ ├── output.pkl_05.npy │ │ │ └── output.pkl_06.npy │ │ │ └── func_code.py │ ├── kernel_density_estimation.ipynb │ └── sampling_weights.ipynb │ ├── Chapter 4 │ ├── central_tendency.ipynb │ ├── dautil.json │ ├── joblib │ │ ├── .DS_Store │ │ └── dautil │ │ │ ├── .DS_Store │ │ │ └── data │ │ │ ├── .DS_Store │ │ │ ├── download │ │ │ ├── 4c6627242bf3b15c57d61133efcf3d7f │ │ │ │ ├── metadata.json │ │ │ │ ├── output.pkl │ │ │ │ ├── output.pkl_01.npy │ │ │ │ ├── output.pkl_02.npy │ │ │ │ ├── output.pkl_03.npy │ │ │ │ ├── output.pkl_04.npy │ │ │ │ ├── output.pkl_05.npy │ │ │ │ └── output.pkl_06.npy │ │ │ └── func_code.py │ │ │ └── get_countries │ │ │ ├── .DS_Store │ │ │ ├── acee962592427bb9e5792478f1e8a5c0 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ └── func_code.py │ ├── mpmath_fit.ipynb │ ├── mpmath_linalg.ipynb │ ├── normalizing_boxcox.ipynb │ ├── outliers.ipynb │ ├── rebinning_data.ipynb │ ├── rlm_demo.ipynb │ ├── transforming_down.ipynb │ ├── transforming_ratios.ipynb │ ├── transforming_up.ipynb │ ├── weighted_ls.ipynb │ └── winsorising_data.ipynb │ ├── Chapter 5 │ ├── 460_cc_phantomjs.html │ ├── 468_live_phantomjs.html │ ├── app.py │ ├── clustering_spark.py │ ├── download_html.py │ ├── impl_association.py │ ├── processing_html.py │ ├── saved_urls.csv │ ├── star_schema.py │ ├── streaming_clustering.py │ ├── templates │ │ └── admin.html │ ├── test_simulating_browsing.py │ └── test_widget.ipynb │ ├── Chapter 6 │ ├── analyzing_audio.ipynb │ ├── analyzing_dct.ipynb │ ├── analyzing_peaks.ipynb │ ├── block_boot.ipynb │ ├── ch6util.py │ ├── dautil.json │ ├── discrete_wavelet.ipynb │ ├── estimating_welch.ipynb │ ├── eval_smooth.ipynb │ ├── exp_smoothing.ipynb │ ├── joblib │ │ ├── .DS_Store │ │ └── ch6util │ │ │ ├── .DS_Store │ │ │ └── read_wav │ │ │ ├── .DS_Store │ │ │ ├── 08d116bec02752b026577cdc25ee6ebe │ │ │ ├── metadata.json │ │ │ ├── output.pkl │ │ │ └── output.pkl_01.npy │ │ │ └── func_code.py │ ├── lomb_scargle.ipynb │ ├── moving_boot.ipynb │ ├── periodograms.ipynb │ └── phase_synchrony.ipynb │ ├── Chapter 7 │ ├── autoregressive_test.ipynb │ ├── calmar_sortino.ipynb │ ├── capm.ipynb │ ├── ch7util.py │ ├── correlating_market.ipynb │ ├── database_tables.py │ ├── dautil.json │ ├── non_parametric.ipynb │ ├── populate_database.ipynb │ ├── portfolio_optimization.ipynb │ ├── random_walk.ipynb │ ├── rets_stats.ipynb │ ├── sharpe_liquidity.ipynb │ └── simple_log_rets.ipynb │ ├── Chapter 8 │ ├── .DS_Store │ ├── .ropeproject │ │ ├── config.py │ │ ├── globalnames │ │ ├── history │ │ └── objectdb │ ├── 46_bbc_world.txt │ ├── assortativity.ipynb │ ├── avg_clustering.ipynb │ ├── between_centrality.ipynb │ ├── ch8util.py │ ├── clique_number.py │ ├── close_centrality.ipynb │ ├── corpus.py │ ├── cos_similarity.ipynb │ ├── dautil.json │ ├── dautil_log.conf │ ├── ghostdriver.log │ ├── joblib │ │ ├── .DS_Store │ │ └── ch8util │ │ │ ├── .DS_Store │ │ │ └── filter │ │ │ ├── 091aa2b4f103f69ebb3769a159072aef │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 0a11e4ca889a28ffd0e115a993c73a85 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 0b2300f23f9db4dd9b64f057f6a59671 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 0b3748ca6a09f1034e8375c137acad90 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 15ed994e3e2f8a175ecd8a6411b4e915 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 1b16b4736f53e409cd51d2b024dd2837 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 252c07dd9428eaaee00f37250b86b075 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 27266a1224539dda8511a94c27881f84 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 2daf7bd2df433ab1f1529c55bb0cba96 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 311c193bc050a456bbeca1b9ec1868c0 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 546f872ffd4d4aa5568807b844e76997 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 5627bb9c842cef425591dffa6af7bf8c │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 6130115cc23345d1ff896293f1dbf82e │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 6423924a61dcb0099518b0bb1f7ae5c5 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 66575d521c2999bb7249207b34e869e7 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 6fd448966c3953af632b386ac6537858 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 70cc93c760b8c2003ba5e5bbdf14436b │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 8779155bccf999a5dca00f0228ba4e21 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 8b5edc7dbcd55b96139bd76738e905f8 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 8b9c45d590dd16a4380302a0d9ecb80c │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 8c66f2c5483c5fbc1a63dc043674c36c │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 981a2017e77f6397b9d49f09b460440f │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 9d217e5d143e99f8f96449634805edd2 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── 9d59f4a3078ecc6850b9f98ebcd9d726 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── ae88995f75aaa74d35fa79d942e8cbad │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── b0ba8193711d4e36d2d642b42413b06a │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── b363f97c554e812e3c61a1bb7bab3e92 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── b415598458273f80f0d2951dfe1c6c1f │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── b8551fbd2207b856e1c26a62a980c511 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── bb029ea61ec1818e4551bc882ab41b45 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── bb747672f83a7465f3d4b3186998566f │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── bdaaefd6b7284b0f25970e8c9ac59936 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── beda652e99146dd4de54264f70fe4f81 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── c156c0f3db2a50802319bc66903e60dc │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── d6567a720fdaabffd38137c09dd7dcb6 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── d7509e13f1b96b21b83e7376567ba82b │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── dc8c1076d71a9035579c835320061e9c │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── e5a822d3724b82d348f48d5f91e3abbc │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── e99684cea5697ada54ba64d1337308a2 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── eb96ea4e13a34deb43ce953fae303809 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── f5a882d269a9e9a31881d6620437267e │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── f75cf54d64d1978576b7fc371eba21f3 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── feb56b72be37bd8aae0ed3c2fae0342d │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ ├── fee34b871486a31cfc40acdc234ddec4 │ │ │ ├── metadata.json │ │ │ └── output.pkl │ │ │ └── func_code.py │ ├── named_entity.py │ ├── net_density.ipynb │ ├── stemming_lemma.py │ ├── terms_database.py │ ├── tfidf.pkl │ ├── tfidf_df.pkl │ ├── tokenizing.py │ └── topic_extraction.py │ ├── Chapter 9 │ ├── applying_lda.ipynb │ ├── applying_pca.ipynb │ ├── bagging.ipynb │ ├── boosting.ipynb │ ├── ch9util.py │ ├── clustering_hierarchy.ipynb │ ├── dautil.json │ ├── feature_elimination.py │ ├── fit_ransac.ipynb │ ├── joblib │ │ ├── .DS_Store │ │ ├── __main__--usr-data-__ipython-input__ │ │ │ └── get_scores │ │ │ │ └── func_code.py │ │ └── ch9util │ │ │ ├── .DS_Store │ │ │ ├── learn │ │ │ ├── .DS_Store │ │ │ ├── 2364399946f87e46b30ce16478bf3ce1 │ │ │ │ ├── metadata.json │ │ │ │ ├── output.pkl │ │ │ │ ├── output.pkl_01.npy │ │ │ │ ├── output.pkl_02.npy │ │ │ │ └── output.pkl_03.npy │ │ │ ├── 908907c90d168a085e45ec93017d629b │ │ │ │ ├── metadata.json │ │ │ │ ├── output.pkl │ │ │ │ ├── output.pkl_01.npy │ │ │ │ ├── output.pkl_02.npy │ │ │ │ └── output.pkl_03.npy │ │ │ ├── a7795a8401df1f89aabdcc6094811fdc │ │ │ │ ├── metadata.json │ │ │ │ ├── output.pkl │ │ │ │ ├── output.pkl_01.npy │ │ │ │ ├── output.pkl_02.npy │ │ │ │ └── output.pkl_03.npy │ │ │ ├── db5ea238322c82871114a427a27c0695 │ │ │ │ ├── metadata.json │ │ │ │ ├── output.pkl │ │ │ │ ├── output.pkl_01.npy │ │ │ │ ├── output.pkl_02.npy │ │ │ │ └── output.pkl_03.npy │ │ │ ├── f3fbfae88102d9f80ac37e0a9b577931 │ │ │ │ ├── metadata.json │ │ │ │ ├── output.pkl │ │ │ │ ├── output.pkl_01.npy │ │ │ │ ├── output.pkl_02.npy │ │ │ │ └── output.pkl_03.npy │ │ │ └── func_code.py │ │ │ └── validate │ │ │ ├── .DS_Store │ │ │ ├── 09e28e76166ff8695c81b0f36ed3ae0f │ │ │ ├── metadata.json │ │ │ ├── output.pkl │ │ │ ├── output.pkl_01.npy │ │ │ └── output.pkl_02.npy │ │ │ ├── 0d5fa964789929ae2cd7af18c8155313 │ │ │ ├── metadata.json │ │ │ ├── output.pkl │ │ │ ├── output.pkl_01.npy │ │ │ └── output.pkl_02.npy │ │ │ ├── 11583eb1dd321a43fbe1f8f4b54ec1e4 │ │ │ ├── metadata.json │ │ │ ├── output.pkl │ │ │ ├── output.pkl_01.npy │ │ │ └── output.pkl_02.npy │ │ │ ├── 31d50c1236c9d1e74d3edf0c6de54969 │ │ │ ├── metadata.json │ │ │ ├── output.pkl │ │ │ ├── output.pkl_01.npy │ │ │ └── output.pkl_02.npy │ │ │ ├── 3be3ec0fce5e7391b8dbece404bfeec4 │ │ │ ├── metadata.json │ │ │ ├── output.pkl │ │ │ ├── output.pkl_01.npy │ │ │ └── output.pkl_02.npy │ │ │ ├── 6bf9b01d043ee447829a5dc2d9493f29 │ │ │ ├── metadata.json │ │ │ ├── output.pkl │ │ │ ├── output.pkl_01.npy │ │ │ └── output.pkl_02.npy │ │ │ ├── 8b592e0d5102bc9bc1ccb9910d83bb87 │ │ │ ├── metadata.json │ │ │ ├── output.pkl │ │ │ ├── output.pkl_01.npy │ │ │ └── output.pkl_02.npy │ │ │ ├── b6d29b67208417fdc0fca870891a84b6 │ │ │ ├── metadata.json │ │ │ ├── output.pkl │ │ │ ├── output.pkl_01.npy │ │ │ └── output.pkl_02.npy │ │ │ ├── e7e857409615695248a36145824f59c7 │ │ │ ├── metadata.json │ │ │ ├── output.pkl │ │ │ ├── output.pkl_01.npy │ │ │ └── output.pkl_02.npy │ │ │ └── func_code.py │ ├── nested_cv.ipynb │ ├── rain_pot.py │ ├── random_forest.ipynb │ ├── reusing_models.py │ ├── stacking_multiple.ipynb │ └── theano_tour.ipynb │ ├── Readme.TXT │ └── Software and hardware list.docx ├── Module3 └── Mastering_Python_Data_Analysis_Code │ ├── Appendix │ ├── B03551_Appendix_MP.ipynb │ ├── helpfunctions.py │ ├── mystyle.mplstyle │ ├── test.png │ └── test_markdown_file.md │ ├── Chapter 1 │ ├── B03551_01_code.ipynb │ ├── data │ │ ├── LICENSE.txt │ │ ├── README.md │ │ ├── movies.dat │ │ └── ratings.dat │ └── mystyle.mplstyle │ ├── Chapter 2 │ ├── B03551_02_code.ipynb │ ├── data │ │ ├── GSS2012merged.csv │ │ ├── GSS2012merged_R5.dta │ │ ├── GSS_Codebook_index.pdf │ │ ├── Release Notes for the GSS 2012 Merged R5.pdf │ │ └── hubble.csv │ └── mystyle.mplstyle │ ├── Chapter 3 │ ├── .ipynb_checkpoints │ │ ├── B03551_03_code-checkpoint.ipynb │ │ └── Chapter 3 Examples-checkpoint.ipynb │ ├── B03551_03_code.ipynb │ ├── data │ │ └── housefly-wing-lengths.txt │ ├── mystyle.mplstyle │ └── old │ │ ├── .ipynb_checkpoints │ │ └── Chapter 3 Examples-checkpoint.ipynb │ │ ├── Chapter 3 Examples.ipynb │ │ └── housefly-wing-lengths.txt │ ├── Chapter 4 │ ├── B03551_04_code.ipynb │ ├── data │ │ ├── country_centroids │ │ │ ├── country_centroids_README.xml │ │ │ ├── country_centroids_all.csv │ │ │ └── country_centroids_primary.csv │ │ ├── cow.csv │ │ ├── data_ch4.h5 │ │ ├── who_suicide_rates.csv │ │ └── who_suicide_rates.csv_backup │ ├── data_ch4.h5 │ ├── mystyle.mplstyle │ └── who_suicide_rates.csv │ ├── Chapter 5 │ ├── chapter_5_code.ipynb │ ├── data │ │ ├── cholera_deaths.txt │ │ ├── cholera_pumps.txt │ │ ├── data_ch4.h5 │ │ ├── data_ch5_clustering.h5 │ │ ├── data_ch5_clustering.pick │ │ ├── uzcJ2000.tab.gz │ │ └── uzcJ2000.tab │ │ │ └── uzcJ2000.tab │ └── mystyle.mplstyle │ ├── Chapter 6 │ ├── B03551_06_code.ipynb │ ├── data │ │ ├── AviationData.txt │ │ ├── co2_annmean_gl.txt │ │ ├── co2_gr_gl.txt │ │ ├── lawdome.smoothed.yr75.dat │ │ └── siple2.013.dat │ └── mystyle.mplstyle │ ├── Chapter 7 │ ├── B03551_07_code.ipynb │ ├── data │ │ ├── co2_gr_gl.txt │ │ ├── seeds.desc │ │ └── seeds_dataset.txt │ └── mystyle.mplstyle │ ├── Chapter 8 │ ├── B03551_08_code.ipynb │ ├── data │ │ ├── mean-daily-temperature-fisher-river.csv │ │ └── monthly-car-sales-in-quebec-1960.csv │ └── mystyle.mplstyle │ └── Software Hardware List.pdf └── Readme.md /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/2/arrays.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | """ 5 | Numpy arrays 6 | """ 7 | 8 | import numpy as np 9 | 10 | p = np.array([48.858598, 2.294495]) 11 | print(p.ndim) 12 | print(p.shape) 13 | print(p.dtype) 14 | 15 | # Data type 16 | a = np.array([1, 2, 3, 4]) 17 | print(a.dtype) 18 | 19 | float_b = a.astype(np.float64) 20 | print(float_b.dtype) 21 | 22 | # Array creation 23 | a = np.arange(7) 24 | print(a) 25 | print(a[1], a[4], a[-1]) 26 | 27 | a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) 28 | print(a[0, 2]) 29 | 30 | a[0, 2] = 10 31 | print(a) 32 | 33 | b = a[2] 34 | print(b) 35 | 36 | c = a[:2] 37 | print(c) 38 | 39 | b[-1] = 11 40 | print(a) 41 | 42 | # Fancy indexing 43 | a = np.array([3, 5, 1, 10]) 44 | b = (a % 5 == 0) 45 | print(b) 46 | 47 | c = np.array([[0, 1], [2, 3], [4, 5], [6, 7]]) 48 | print(c[b]) 49 | 50 | a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]) 51 | print(a[[2, 1]]) 52 | print(a[[-2, -1]]) 53 | print(a[[2, 3], [0, 1]]) 54 | 55 | # Numerical operations on arrays 56 | a = np.ones(4) 57 | print(a * 2) 58 | print(a + 3) 59 | 60 | a = np.ones([2, 4]) 61 | print(a * a) 62 | print(a + a) 63 | 64 | a = np.array([1, 2, 3, 4]) 65 | b = np.array([1, 1, 5, 3]) 66 | 67 | print(a == b) 68 | print(np.array_equal(a, b)) 69 | 70 | c = np.array([1, 0]) 71 | d = np.array([1, 1]) 72 | print(np.logical_and(c, d)) 73 | 74 | # Array functions 75 | a = np.array([[0, 5, 10], [20, 25, 30]]) 76 | print(a.reshape(3, 2)) 77 | print(a.T) 78 | 79 | a = np.array([[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]]) 80 | print(a.swapaxes(1, 2)) 81 | 82 | a = np.array([[1, 2, 3],[4,5,6]]) 83 | print(np.dot(a.T, a)) 84 | 85 | a = np.array ([[6, 34, 1, 6], [0, 5, 2, -1]]) 86 | print(np.sort(a)) 87 | print(np.sort(a, axis=0)) 88 | 89 | b = np.argsort(a) 90 | print(b) 91 | print(a[0][b[0]]) 92 | print(np.argmax(a)) -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/2/linearalgebra.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import numpy as np 5 | 6 | A = np.array([[1, 4, 6], 7 | [5, 2, 2], 8 | [-1, 6, 8]]) 9 | 10 | w, v = np.linalg.eig(A) 11 | print(w) 12 | print(v) 13 | 14 | A = np.array([[1, 4, 6], [5, 2, 2], [-1, 6, 8]]) 15 | b = np.array([[1], [2], [3]]) 16 | x = np.linalg.solve(A, b) 17 | print(x) 18 | 19 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/2/loadsave.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # Saving 5 | a = np.array([[0, 1, 2], [3, 4, 5]]) 6 | np.save('test1.npy', a) 7 | 8 | a = np.arange(4) 9 | b = np.arange(7) 10 | 11 | np.savez('test2.npz', arr0=a, arr1=b) 12 | 13 | # Loading 14 | dic = np.load('test2.npz') 15 | print(dic['arr0']) 16 | 17 | x = np.arange(4) 18 | np.savetxt('test3.out', x, delimiter=',') 19 | 20 | print(np.load('test1.npy')) 21 | print(np.loadtxt('test3.out', delimiter=',')) 22 | 23 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/2/randomnumbers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import numpy as np 5 | 6 | np.random.seed(20) 7 | print(np.random.rand(5)) 8 | print(np.random.rand(5)) 9 | 10 | np.random.seed(20) 11 | print(np.random.rand(5)) 12 | 13 | print(np.random.randint(10, 20, 5)) 14 | 15 | a = np.arange(10) 16 | np.random.shuffle(a) 17 | print(a) 18 | 19 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/3/person.csv: -------------------------------------------------------------------------------- 1 | name,age,career,province,sex 2 | Peter,16,pupil,TN,M 3 | Mary,21,student,SG,F 4 | Nam,22,student,HN,M 5 | Mai,31,nurse,SG,F 6 | John,28,lawer,SG,M 7 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/Makefile: -------------------------------------------------------------------------------- 1 | .SUFFIXES: .py .png 2 | 3 | TARGETS = \ 4 | annotate.png \ 5 | axis.png \ 6 | bar.png \ 7 | better.png \ 8 | contour.png \ 9 | df.png \ 10 | histogram.png \ 11 | intro.png \ 12 | legends.png \ 13 | line.png \ 14 | linestyle.png \ 15 | matlab.png \ 16 | morelegends.png \ 17 | nox.png \ 18 | pandasplot.png \ 19 | scatter.png \ 20 | series.png \ 21 | subplot.png \ 22 | subs.png \ 23 | 24 | all: $(TARGETS) 25 | 26 | .py.png: 27 | python $< 28 | 29 | clean: 30 | rm -f *.png 31 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/README.md: -------------------------------------------------------------------------------- 1 | Chapter 4 2 | ========= 3 | 4 | Run: 5 | 6 | $ make 7 | 8 | to generate all images and 9 | 10 | $ make clean 11 | 12 | to delete the generated PNG files. 13 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/annotate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | x = np.linspace(-2.4, 0.4, 20) 6 | y = x * x + 2 * x + 1 7 | plt.plot(x, y, 'c', linewidth=2.0) 8 | plt.text(-1.5, 1.8, 'y=x^2 + 2*x + 1', 9 | fontsize=14, style='italic') 10 | plt.annotate('minima point', xy=(-1, 0), 11 | xytext=(-1, 0.3), horizontalalignment='center', 12 | verticalalignment='top', 13 | arrowprops=dict(arrowstyle='->', 14 | connectionstyle='arc3')) 15 | 16 | plt.savefig('annotate.png') -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/axis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | x = np.linspace(0, 3, 6) 6 | y = np.power(x, 2) 7 | 8 | plt.plot(y) 9 | plt.xlabel('x') 10 | plt.ylabel('y') 11 | plt.title('Plot y value without given x values') 12 | plt.axis([0, 7, 0, 10]) 13 | plt.savefig('axis.png') 14 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/bar.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | X = np.arange(5) 6 | Y = 3.14 + 2.71 * np.random.rand(5) 7 | 8 | plt.subplots(2) 9 | 10 | plt.subplot(211) 11 | plt.bar(X, Y, align='center', alpha=0.4, color='y') 12 | plt.xlabel('x') 13 | plt.ylabel('y') 14 | plt.title('bar plot in vertical') 15 | 16 | plt.subplot(212) 17 | plt.barh(X, Y, align='center', alpha=0.4, color='c') 18 | plt.xlabel('x') 19 | plt.ylabel('y') 20 | plt.title('bar plot in horizontal') 21 | 22 | plt.savefig('bar.png') 23 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/better.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | x = np.linspace(0, 3, 6) 6 | y = np.power(x, 2) 7 | 8 | plt.axis([0, 6, 0, 10]) 9 | plt.plot(x, y) 10 | plt.xlabel('x') 11 | plt.ylabel('y') 12 | plt.title('Data Visualization using Pyplot from Matplotlib') 13 | 14 | plt.savefig('better.png') 15 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/contour.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | x = np.linspace(-1, 1, 255) 6 | y = np.linspace(-2, 2, 300) 7 | 8 | z = np.sin(y[:, np.newaxis] * np.cos(x)) 9 | 10 | plt.contour(x, y, z, 255, linewidth=2) 11 | plt.savefig('contour.png') 12 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/df.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | 6 | data = {'Median_Age': [24.2, 26.4, 28.5, 30.3], 7 | 'Density': [244, 256, 268, 279]} 8 | 9 | index_label = ['2000', '2005', '2010', '2014']; 10 | 11 | df1 = pd.DataFrame(data, index=index_label) 12 | df1.plot(kind='bar', subplots=True, sharex=True) 13 | 14 | plt.tight_layout() 15 | plt.savefig('df.png') 16 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/histogram.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | mu = 100 6 | sigma = 25 7 | fig, (ax0, ax1) = plt.subplots(ncols = 2) 8 | x = mu + sigma * np.random.randn(1000) 9 | ax0.hist(x, 20, normed=1, histtype='stepfilled', facecolor='g', alpha=0.75) 10 | ax0.set_title('Stepfilled histogram') 11 | ax1.hist(x, bins=[100,150, 165, 170, 195], normed=1, histtype='bar', rwidth=0.8) 12 | ax1.set_title('uniquel bins histogram') 13 | plt.tight_layout() #automatically adjust subplot parameters to give specified padding 14 | plt.savefig('histogram.png') 15 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/intro.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | x = np.linspace(0, 3, 6) 6 | y = np.power(x, 2) 7 | 8 | plt.plot(x, y) 9 | plt.xlabel('x') 10 | plt.ylabel('y') 11 | plt.title('Data Visualization using Pyplot from Matplotlib') 12 | plt.savefig('intro.png') 13 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/legends.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | x = np.linspace(0, 1, 20); 6 | y1 = np.sin(x) 7 | y2 = np.cos(x) 8 | y3 = np.tan(x) 9 | plt.plot(x, y1, 'c', label='y=sin(x)') 10 | plt.plot(x, y2, 'y', label='y=cos(x)') 11 | plt.plot(x, y3, 'r', label='y=tan(x)') 12 | plt.legend(loc='upper left') 13 | plt.savefig('legends.png') 14 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/line.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | x = np.linspace(0, 3, 6) 6 | y = np.power(x, 2) 7 | 8 | plt.plot(x*2, 'g^', x*3, 'rs', x**x, 'y-') 9 | 10 | plt.xlabel('x') 11 | plt.ylabel('y') 12 | plt.title('Plot y value without given x values') 13 | plt.axis([0, 6, 0, 30]) 14 | plt.savefig('line.png') 15 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/linestyle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | x = np.linspace(0, 3, 6) 6 | y = np.power(x, 2) 7 | 8 | line, = plt.plot(y, color='red', linewidth=2.0) 9 | line.set_linestyle('--') 10 | plt.setp(line, marker='o') 11 | 12 | plt.savefig('linestyle.png') 13 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/matlab.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | 3 | from numpy import * 4 | from matplotlib.pyplot import * 5 | 6 | x = linspace(0, 3, 6) 7 | y = power(x, 2) 8 | 9 | figure() 10 | plot(x, y, 'r') 11 | xlabel('x') 12 | ylabel('y') 13 | title('Data visualization with MATLAB-like API') 14 | 15 | savefig('matlab.png') 16 | 17 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/morelegends.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | x = np.linspace(0, 1, 20); 6 | y1 = np.sin(x) 7 | y2 = np.cos(x) 8 | y3 = np.tan(x) 9 | 10 | p1 = plt.plot(x, y1, 'c', label='y=sin(x)') 11 | p2 = plt.plot(x, y2, 'y', label='y=cos(x)') 12 | p3 = plt.plot(x, y3, 'r', label='y=tan(x)') 13 | 14 | lsin = plt.legend(handles=p1, loc='lower right') 15 | lcos = plt.legend(handles=p2, loc='upper left') 16 | ltan = plt.legend(handles=p3, loc='upper right') 17 | 18 | fig = plt.gcf() 19 | 20 | fig.gca().add_artist(lsin) 21 | fig.gca().add_artist(lcos) 22 | 23 | plt.tight_layout() 24 | 25 | plt.savefig('morelegends.png') 26 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/nox.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | x = np.linspace(0, 3, 6) 6 | y = np.power(x, 2) 7 | 8 | plt.plot(y) 9 | plt.xlabel('x') 10 | plt.ylabel('y') 11 | plt.title('Plot y value without given x values') 12 | plt.savefig('nox.png') 13 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/pandasplot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import pandas as pd 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | s = pd.Series(np.random.normal(10, 8, 20)) 9 | s.plot(style='ko-', alpha=0.4, label='Series plotting') 10 | plt.legend() 11 | 12 | plt.savefig('pandasplot.png') 13 | 14 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/scatter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | X = np.random.normal(0, 1, 1000) 6 | Y = np.random.normal(0, 1, 1000) 7 | 8 | plt.scatter(X, Y, c=['b', 'g', 'k', 'r', 'c']) 9 | # s = np.random.randint(10, 100) 10 | plt.savefig('scatter.png') 11 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/series.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | 6 | s = pd.Series(np.random.normal(10, 8, 20)) 7 | s.plot(style='ko-', alpha=0.4) 8 | plt.legend(['Series plotting']) 9 | 10 | plt.savefig('series.png') 11 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/subplot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | x = np.linspace(0, 3, 6) 6 | y = np.power(x, 2) 7 | 8 | plt.figure('a') 9 | 10 | plt.subplot(221) 11 | plt.plot(y + y, 'r--') 12 | 13 | plt.subplot(222) 14 | plt.plot(y * 3, 'ko') 15 | 16 | plt.subplot(223) 17 | plt.plot(y * y, 'b^') 18 | 19 | plt.subplot(224) 20 | 21 | # alter things after the fact 22 | plt.figure('a') 23 | plt.subplot(222) 24 | plt.title('Visualization of y * 3') 25 | 26 | plt.savefig('subplot.png') 27 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/4/subs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | x = np.linspace(0, 3, 6) 6 | y = np.power(x, 2) 7 | 8 | # another figure 9 | plt.figure('b') 10 | ax1 = plt.axes([0.05, 0.1, 0.4, 0.32]) 11 | ax2 = plt.axes([0.52, 0.1, 0.4, 0.32]) 12 | ax3 = plt.axes([0.05, 0.53, 0.87, 0.44]) 13 | 14 | plt.savefig('subs.png') 15 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/5/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python timeseries.py 3 | 4 | clean: 5 | rm -f time_series_*.png 6 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/5/random_walk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module1/Getting_Started_with_Data_Analysis_Code/5/random_walk.png -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/6/ex_06-01.txt: -------------------------------------------------------------------------------- 1 | Name,age,major_id,sex,hometown 2 | Nam,7,1,male,hcm 3 | Mai,11,1,female,hcm 4 | Lan,25,3,female,hn 5 | Hung,42,3,male,tn 6 | Nghia,26,3,male,dn 7 | Vinh,39,3,male,vl 8 | Hong,28,4,female,dn -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/6/ex_06-02.out: -------------------------------------------------------------------------------- 1 | 0;1;2;3;4 2 | Nam;7;1;male;hcm 3 | Mai;11;1;female;hcm 4 | Lan;25;3;female;hn 5 | Hung;42;3;male;tn 6 | Nghia;26;3;male;dn 7 | Vinh;39;3;male;vl 8 | Hong;28;4;female;dn 9 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/6/ex_06-02.txt: -------------------------------------------------------------------------------- 1 | Nam 7 1 male hcm 2 | Mai 11 1 female hcm 3 | Lan 25 3 female hn 4 | Hung 42 3 male tn 5 | Nghia 26 3 male dn 6 | Vinh 39 3 male vl 7 | Hong 28 4 female dn -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/6/ex_06-03.txt: -------------------------------------------------------------------------------- 1 | Nam 7 1 male hcm 2 | Mai 11 1 female hcm 3 | Lan 25 3 female hn 4 | Hung 42 3 male tn single 5 | Nghia 26 3 male dn single 6 | Vinh 39 3 male vl 7 | Hong 28 4 female dn -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/6/ex_06_03.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module1/Getting_Started_with_Data_Analysis_Code/6/ex_06_03.out -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/6/ex_06_04.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module1/Getting_Started_with_Data_Analysis_Code/6/ex_06_04.out -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/7/Makefile: -------------------------------------------------------------------------------- 1 | SHELL = /bin/bash 2 | 3 | all: rbrted_cleaned.csv sunshine.tsv 4 | 5 | rbrted_cleaned.csv: rbrted.csv 6 | python clean_rbrted.py 7 | 8 | clean: 9 | rm -f rbrted_cleaned.csv sunshine.tsv 10 | 11 | sunshine.tsv: 12 | python makedata.py > sunshine.tsv 13 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/7/README.md: -------------------------------------------------------------------------------- 1 | README 2 | ====== 3 | 4 | Chapter 7 code. 5 | 6 | The `analysis.py` files contains code from the chapter. 7 | 8 | The files: 9 | 10 | * clean_rbrted.py 11 | * makedata.py 12 | 13 | are helper scripts to clean some messy CSV ("Filtering") or to 14 | generate data ("Data Aggregation"). -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/7/cities.tsv: -------------------------------------------------------------------------------- 1 | 2000-01-03 London 6 2 | 2000-01-04 London 3 3 | 2000-01-05 London 4 4 | 2000-01-03 Mexico 3 5 | 2000-01-04 Mexico 9 6 | 2000-01-05 Mexico 8 7 | 2000-01-03 Mumbai 12 8 | 2000-01-04 Mumbai 9 9 | 2000-01-05 Mumbai 8 10 | 2000-01-03 Tokyo 5 11 | 2000-01-04 Tokyo 5 12 | 2000-01-05 Tokyo 6 13 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/7/clean_rbrted.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | 5 | Support code for "Filtering" subsection in Chapter 7. 6 | 7 | "(1987, 5, 20, 0, 0, 0)","18.63" 8 | "(1987, 5, 21, 0, 0, 0)","18.45" 9 | "(1987, 5, 22, 0, 0, 0)","18.55" 10 | """ 11 | 12 | import datetime 13 | import pandas as pd 14 | 15 | def convert_date(s): 16 | parts = s.replace("(", "").replace(")", "").split(",") 17 | if len(parts) < 6: 18 | return datetime.date(1970, 1, 1) 19 | return datetime.datetime(*[int(p) for p in parts]) 20 | 21 | if __name__ == '__main__': 22 | df = pd.read_csv("rbrted.csv", sep=',', names=["date", "price"], converters={"date": convert_date}).dropna() 23 | df.to_csv('rbrted_cleaned.csv', columns=["date", "price"], index=False) 24 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/7/makedata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Support code for "Data aggregation" subsection in Chapter 7. 5 | 6 | Create some artificial data. 7 | """ 8 | 9 | import datetime 10 | import pandas as pd 11 | import random 12 | 13 | citymap = { 14 | 'Germany': ['Hamburg', 'Munich', 'Berlin', 'Leipzig', 'Frankfurt'], 15 | 'UK': ['London', 'Manchester', 'Glasgow', 'Birmingham', 'Edinburgh'], 16 | 'France': ['Paris', 'Marseille', 'Lyon', 'Nice', 'Bordeax'], 17 | } 18 | 19 | for country, cities in citymap.iteritems(): 20 | for city in cities: 21 | for date in pd.date_range('2015-06-01', periods=10): 22 | row = [country, city, str(date.date()), str(random.randint(0, 10))] 23 | print("\t".join(row)) 24 | 25 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/7/small.csv: -------------------------------------------------------------------------------- 1 | 22,6.1 2 | 41,5.7 3 | 18,5.3* 4 | 5 | 29,NA 6 | 7 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/8/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module1/Getting_Started_with_Data_Analysis_Code/8/.DS_Store -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/8/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python ml.py 3 | 4 | clean: 5 | rm -f regression_*.png 6 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/Hints_And_Answers.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module1/Getting_Started_with_Data_Analysis_Code/Hints_And_Answers.docx -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/Hints_And_Answers.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module1/Getting_Started_with_Data_Analysis_Code/Hints_And_Answers.pdf -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/README.md: -------------------------------------------------------------------------------- 1 | Code bundle for 9781785285110: 2 | 3 | Getting Started with Data Analysis 4 | ================================== 5 | 6 | The code bundle contains most code from the chapter and additional helper 7 | files. 8 | 9 | To install the dependent libraries, run 10 | 11 | $ pip install -r requirements.txt 12 | 13 | using the `requirements.txt` that can be found alongside this README. 14 | 15 | Contents: 16 | 17 | Code_Bundle_Getting_Started_with_Data_Analysis 18 | ├── 2 19 | │   ├── arrays.py 20 | │   ├── linearalgebra.py 21 | │   ├── loadsave.py 22 | │   └── randomnumbers.py 23 | ├── 3 24 | │   ├── datastructures.py 25 | │   └── person.csv 26 | ├── 4 27 | │   ├── Makefile 28 | │   ├── README.md 29 | │   ├── annotate.py 30 | │   ├── axis.py 31 | │   ├── bar.py 32 | │   ├── better.py 33 | │   ├── contour.py 34 | │   ├── df.py 35 | │   ├── histogram.py 36 | │   ├── intro.py 37 | │   ├── legends.py 38 | │   ├── line.py 39 | │   ├── linestyle.py 40 | │   ├── matlab.py 41 | │   ├── morelegends.py 42 | │   ├── nox.py 43 | │   ├── pandasplot.py 44 | │   ├── scatter.py 45 | │   ├── series.py 46 | │   ├── subplot.py 47 | │   └── subs.py 48 | ├── 5 49 | │   ├── Makefile 50 | │   ├── random_walk.png 51 | │   └── timeseries.py 52 | ├── 6 53 | │   ├── ex_06-01.txt 54 | │   ├── ex_06-02.out 55 | │   ├── ex_06-02.txt 56 | │   ├── ex_06-03.txt 57 | │   ├── ex_06_03.out 58 | │   └── ex_06_04.out 59 | ├── 7 60 | │   ├── Makefile 61 | │   ├── README.md 62 | │   ├── analysis.py 63 | │   ├── cities.tsv 64 | │   ├── clean_rbrted.py 65 | │   ├── makedata.py 66 | │   ├── rbrted.csv 67 | │   ├── rbrted_cleaned.csv 68 | │   ├── small.csv 69 | │   └── sunshine.tsv 70 | ├── 8 71 | │   ├── Makefile 72 | │   └── ml.py 73 | ├── README.md 74 | └── requirements.txt 75 | 76 | 7 directories, 50 files 77 | -------------------------------------------------------------------------------- /Module1/Getting_Started_with_Data_Analysis_Code/requirements.txt: -------------------------------------------------------------------------------- 1 | appnope==0.1.0 2 | decorator==4.0.2 3 | funcsigs==0.4 4 | gnureadline==6.3.3 5 | ipython==4.0.0 6 | ipython-genutils==0.1.0 7 | matplotlib==1.4.3 8 | mock==1.3.0 9 | nose==1.3.7 10 | numexpr==2.4.4 11 | numpy==1.9.2 12 | pandas==0.16.2 13 | path.py==8.1.1 14 | pbr==1.8.0 15 | pexpect==3.3 16 | pickleshare==0.5 17 | pymongo==3.0.3 18 | pyparsing==2.0.3 19 | python-dateutil==2.4.2 20 | pytz==2015.4 21 | redis==2.10.3 22 | scikit-learn=0.16.1 23 | scipy==0.16.0 24 | seaborn==0.6.0 25 | simplegeneric==0.8.1 26 | six==1.9.0 27 | tables==3.2.2 28 | traitlets==4.0.0 29 | wheel==0.24.0 30 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 1/src1/configure_numpy.py: -------------------------------------------------------------------------------- 1 | from sklearn.utils import check_random_state 2 | import numpy as np 3 | from dautil import options 4 | from dautil import log_api 5 | 6 | random_state = check_random_state(42) 7 | a = random_state.randn(5) 8 | 9 | random_state = check_random_state(42) 10 | b = random_state.randn(5) 11 | 12 | np.testing.assert_array_equal(a, b) 13 | 14 | printer = log_api.Printer() 15 | printer.print("Default options", np.get_printoptions()) 16 | 17 | pi_array = np.pi * np.ones(30) 18 | options.set_np_options() 19 | print(pi_array) 20 | 21 | # Reset 22 | options.reset_np_options() 23 | print(pi_array) 24 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 1/src1/configure_pd.py: -------------------------------------------------------------------------------- 1 | from dautil import options 2 | import pandas as pd 3 | import numpy as np 4 | from dautil import log_api 5 | 6 | printer = log_api.Printer() 7 | print(pd.describe_option('precision')) 8 | print(pd.describe_option('max_rows')) 9 | 10 | printer.print('Initial precision', pd.get_option('precision')) 11 | printer.print('Initial max_rows', pd.get_option('max_rows')) 12 | 13 | # Random pi's, should use random state if possible 14 | np.random.seed(42) 15 | df = pd.DataFrame(np.pi * np.random.rand(6, 2)) 16 | printer.print('Initial df', df) 17 | 18 | options.set_pd_options() 19 | printer.print('df with different options', df) 20 | 21 | options.reset_pd_options() 22 | printer.print('df after reset', df) 23 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 1/src1/install_ch1.py: -------------------------------------------------------------------------------- 1 | import pip 2 | 3 | def install(package): 4 | pip.main(['install', package]) 5 | 6 | if __name__ == '__main__': 7 | install('dautil') 8 | install('appdirs') 9 | install('tabulate') 10 | install('landslide') 11 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 1/src1/ipython_history.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | from IPython.utils.path import get_ipython_dir 3 | import pprint 4 | import os 5 | 6 | def print_history(file): 7 | with sqlite3.connect(file) as con: 8 | c = con.cursor() 9 | c.execute("SELECT count(source_raw) as csr,\ 10 | source_raw FROM history\ 11 | GROUP BY source_raw\ 12 | ORDER BY csr") 13 | result = c.fetchall() 14 | pprint.pprint(result) 15 | c.close() 16 | 17 | hist_file = '%s/profile_default/history.sqlite' % get_ipython_dir() 18 | 19 | if os.path.exists(hist_file): 20 | print_history(hist_file) 21 | else: 22 | print("%s doesn't exist" % hist_file) 23 | 24 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 1/src1/log_demo.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | from dautil import log_api 6 | 7 | log_api.log(sys.modules, sys.argv[0]) 8 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 1/src1/report_weather.py: -------------------------------------------------------------------------------- 1 | from dautil import data 2 | from dautil import report 3 | import pandas as pd 4 | import numpy as np 5 | from tabulate import tabulate 6 | 7 | 8 | df = data.Weather.load() 9 | headers = [data.Weather.get_header(header) 10 | for header in df.columns.values.tolist()] 11 | df = df.describe() 12 | 13 | 14 | writer = report.RSTWriter() 15 | writer.h1('Weather Statistics') 16 | writer.add(tabulate(df, headers=headers, 17 | tablefmt='grid', floatfmt='.2f')) 18 | writer.divider() 19 | 20 | builder = report.DFBuilder(df.columns) 21 | builder.row(df.iloc[7].values - df.iloc[3].values) 22 | builder.row(df.iloc[6].values - df.iloc[4].values) 23 | df = builder.build(['ptp', 'iqr']) 24 | 25 | writer.h1('Peak-to-peak and Interquartile Range') 26 | headers = [data.Weather.get_header(header) 27 | for header in df.columns.values.tolist()] 28 | writer.add(tabulate(df, headers=headers, 29 | tablefmt='grid', floatfmt='.2f')) 30 | writer.write('slides.rst') 31 | generator = report.Generator('slides.rst', 'weather_report.html') 32 | generator.generate() 33 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/bagging.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/bagging.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/boosting.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/boosting.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/default.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/default.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/entropy.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/entropy.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/etr.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/etr.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/rain_X_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/rain_X_test.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/rain_X_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/rain_X_train.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/rain_y_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/rain_y_test.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/rain_y_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/rain_y_train.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/random.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/random.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/ransac.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/ransac.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/rfc.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/rfc.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/stacking.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/stacking.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/temp_X_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/temp_X_test.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/temp_X_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/temp_X_train.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/temp_y_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/temp_y_test.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/temp_y_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/temp_y_train.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 10/votes.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 10/votes.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 11/applying_sift.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import cv2\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import dautil as dl\n", 14 | "from scipy.misc import face" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "%matplotlib inline\n", 26 | "img = face()\n", 27 | "plt.title('Original')\n", 28 | "dl.plotting.img_show(plt.gca(), img)\n", 29 | "gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n", 30 | "\n", 31 | "plt.figure()\n", 32 | "plt.title('Gray')\n", 33 | "dl.plotting.img_show(plt.gca(), gray, cmap=plt.cm.gray)\n", 34 | "sift = cv2.xfeatures2d.SIFT_create()\n", 35 | "(kps, descs) = sift.detectAndCompute(gray, None)\n", 36 | "img2 = cv2.drawKeypoints(gray, kps, None, (0, 0, 255))\n", 37 | "\n", 38 | "plt.figure()\n", 39 | "plt.title('With Keypoints')\n", 40 | "dl.plotting.img_show(plt.gca(), img2)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": true 48 | }, 49 | "outputs": [], 50 | "source": [] 51 | } 52 | ], 53 | "metadata": { 54 | "kernelspec": { 55 | "display_name": "Python 3", 56 | "language": "python", 57 | "name": "python3" 58 | }, 59 | "language_info": { 60 | "codemirror_mode": { 61 | "name": "ipython", 62 | "version": 3 63 | }, 64 | "file_extension": ".py", 65 | "mimetype": "text/x-python", 66 | "name": "python", 67 | "nbconvert_exporter": "python", 68 | "pygments_lexer": "ipython3", 69 | "version": "3.4.3" 70 | } 71 | }, 72 | "nbformat": 4, 73 | "nbformat_minor": 0 74 | } 75 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 11/covers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 11/covers.jpg -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 11/denoising_images.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import cv2\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "from sklearn.datasets import load_sample_image\n", 14 | "import numpy as np\n", 15 | "import dautil as dl" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "collapsed": false 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "%matplotlib inline\n", 27 | "img = load_sample_image('china.jpg')\n", 28 | "dl.plotting.img_show(plt.gca(), img)\n", 29 | "plt.title('Original')\n", 30 | "Z = img.reshape((-1, 3))\n", 31 | "\n", 32 | "np.random.seed(59)\n", 33 | "noise = np.random.random(Z.shape) < 0.99\n", 34 | "\n", 35 | "noisy = (Z * noise).reshape((img.shape))\n", 36 | "\n", 37 | "plt.figure()\n", 38 | "plt.title('Noisy')\n", 39 | "dl.plotting.img_show(plt.gca(), noisy)\n", 40 | "\n", 41 | "cleaned = cv2.fastNlMeansDenoisingColored(noisy, None, 10, 10, 7, 21)\n", 42 | "plt.figure()\n", 43 | "plt.title('Cleaned')\n", 44 | "dl.plotting.img_show(plt.gca(), cleaned)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "collapsed": true 52 | }, 53 | "outputs": [], 54 | "source": [] 55 | } 56 | ], 57 | "metadata": { 58 | "kernelspec": { 59 | "display_name": "Python 3", 60 | "language": "python", 61 | "name": "python3" 62 | }, 63 | "language_info": { 64 | "codemirror_mode": { 65 | "name": "ipython", 66 | "version": 3 67 | }, 68 | "file_extension": ".py", 69 | "mimetype": "text/x-python", 70 | "name": "python", 71 | "nbconvert_exporter": "python", 72 | "pygments_lexer": "ipython3", 73 | "version": "3.4.3" 74 | } 75 | }, 76 | "nbformat": 4, 77 | "nbformat_minor": 0 78 | } 79 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 11/img_metadata.py: -------------------------------------------------------------------------------- 1 | import exifread 2 | import pprint 3 | 4 | f = open('covers.jpg', 'rb') 5 | 6 | # Return Exif tags 7 | tags = exifread.process_file(f) 8 | print(tags.keys()) 9 | pprint.pprint(tags) 10 | f.close() 11 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 12/caching_requests.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import requests\n", 12 | "import requests_cache" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "requests_cache.install_cache()" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "collapsed": false 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "%time requests.get('http://google.com')\n", 35 | "%time requests.get('http://google.com')\n", 36 | "requests_cache.clear()\n", 37 | "%time requests.get('http://google.com')" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": true 45 | }, 46 | "outputs": [], 47 | "source": [] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [] 57 | } 58 | ], 59 | "metadata": { 60 | "kernelspec": { 61 | "display_name": "Python 3", 62 | "language": "python", 63 | "name": "python3" 64 | }, 65 | "language_info": { 66 | "codemirror_mode": { 67 | "name": "ipython", 68 | "version": 3 69 | }, 70 | "file_extension": ".py", 71 | "mimetype": "text/x-python", 72 | "name": "python", 73 | "nbconvert_exporter": "python", 74 | "pygments_lexer": "ipython3", 75 | "version": "3.4.3" 76 | } 77 | }, 78 | "nbformat": 4, 79 | "nbformat_minor": 0 80 | } 81 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 12/ch12util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from functools import partial 3 | import dautil as dl 4 | 5 | 6 | def plot_times(ax, serial, parallel): 7 | cp = dl.plotting.CyclePlotter(ax) 8 | x = 2 ** np.arange(9) 9 | cp.plot(x, serial, label='Serial') 10 | cp.plot(x, parallel, label='Parallel') 11 | ax.set_xlabel('# Bootstraps') 12 | ax.set_ylabel('Time (s)') 13 | ax.set_title('Execution Time of Bootstrapping') 14 | ax.legend(loc='best') 15 | 16 | 17 | def plot_distro(ax, boot_vals, observed): 18 | dl.plotting.hist_norm_pdf(ax, boot_vals) 19 | ax.axvline(observed, color='k', lw=3, 20 | label='Observed') 21 | 22 | 23 | def time_many(code, n=1): 24 | times = [] 25 | 26 | for i in range(9): 27 | times.append(dl.perf.time_once(partial(code, n=2 ** i), n)) 28 | 29 | return times 30 | 31 | 32 | def bootstrap(arr): 33 | n = len(arr) 34 | indices = np.random.choice(n, size=n) 35 | 36 | return arr[indices] 37 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 12/mem_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def test_me(): 5 | a = np.random.random((999, 99)) 6 | b = np.random.random((99, 99)) 7 | a.ravel() 8 | b.tolist() 9 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 12/profiling_memory.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import dautil as dl\n", 12 | "from mem_test import test_me" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "%load_ext memory_profiler" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "collapsed": false 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "%memit dl.data.Weather.load()" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "collapsed": false 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "%mprun -f test_me test_me()" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": true 62 | }, 63 | "outputs": [], 64 | "source": [] 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": "Python 3", 70 | "language": "python", 71 | "name": "python3" 72 | }, 73 | "language_info": { 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "file_extension": ".py", 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.4.3" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 0 88 | } 89 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 12/speeding_numexpr.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numexpr as ne\n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "a = np.random.rand(1e6)\n", 24 | "b = np.random.rand(1e6)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "collapsed": false 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "%timeit 2 * a ** 3 + 3 * b ** 9\n", 36 | "%timeit ne.evaluate(\"2 * a ** 3 +3 * b ** 9 \")" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [], 46 | "source": [] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": { 52 | "collapsed": false 53 | }, 54 | "outputs": [], 55 | "source": [] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": true 62 | }, 63 | "outputs": [], 64 | "source": [] 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": "Python 3", 70 | "language": "python", 71 | "name": "python3" 72 | }, 73 | "language_info": { 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "file_extension": ".py", 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.4.3" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 0 88 | } 89 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/choosing_colormaps.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import matplotlib.pyplot as plt\n", 12 | "import matplotlib as mpl\n", 13 | "from dautil import plotting" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "collapsed": false 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "%matplotlib inline\n", 25 | "fig, axes = plt.subplots(4, 4)\n", 26 | "cmaps = ['autumn', 'spring', 'summer', 'winter',\n", 27 | " 'Reds', 'Blues', 'Greens', 'Purples',\n", 28 | " 'Oranges', 'pink', 'Greys', 'gray',\n", 29 | " 'binary', 'bone', 'hot', 'cool']\n", 30 | "\n", 31 | "for ax, cm in zip(axes.ravel(), cmaps):\n", 32 | " cmap = plt.cm.get_cmap(cm)\n", 33 | " cb = mpl.colorbar.ColorbarBase(ax, cmap=cmap, \n", 34 | " orientation='horizontal')\n", 35 | " cb.set_label(cm)\n", 36 | " ax.xaxis.set_ticklabels([])\n", 37 | "\n", 38 | "plt.tight_layout()" 39 | ] 40 | } 41 | ], 42 | "metadata": { 43 | "kernelspec": { 44 | "display_name": "Python 3", 45 | "language": "python", 46 | "name": "python3" 47 | }, 48 | "language_info": { 49 | "codemirror_mode": { 50 | "name": "ipython", 51 | "version": 3 52 | }, 53 | "file_extension": ".py", 54 | "mimetype": "text/x-python", 55 | "name": "python", 56 | "nbconvert_exporter": "python", 57 | "pygments_lexer": "ipython3", 58 | "version": "3.4.3" 59 | } 60 | }, 61 | "nbformat": 4, 62 | "nbformat_minor": 0 63 | } 64 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/metadata.json: -------------------------------------------------------------------------------- 1 | {"input_args": {"**": "{'end': 2014, 'indicator': 'sp.pop.grow', 'start': 1984}", "self": "", "*": "[]"}, "duration": 1.5583209991455078} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl_03.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl_03.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl_04.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl_04.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl_05.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl_05.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl_06.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/490f7733de2a3f66c8b49c154e0df09d/output.pkl_06.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/metadata.json: -------------------------------------------------------------------------------- 1 | {"duration": 2.0000369548797607, "input_args": {"**": "{'end': 2014, 'start': 2014, 'indicator': ['sp.pop.grow', 'ny.gdp.pcap.cd', 'se.prm.cmpt.zs'], 'country': 0 AW\n1 AF\n2 A9\n3 AO\n4 AL\n5 AD\n6 L5\n7 1A\n8 AE\n9 AR\n10 AM\n11 AS\n12 AG\n13 AU\n14 AT\n15 AZ\n16 BI\n17 BE\n18 BJ\n19 BF\n20 BD\n21 BG\n22 BH\n23 BS\n24 BA\n25 BY\n26 BZ\n27 BM\n28 BO\n29 BR\n ..\n234 TG\n235 TH\n236 TJ\n237 TM\n238 TL\n239 TO\n240 TT\n241 TN\n242 TR\n243 TV\n244 TW\n245 TZ\n246 UG\n247 UA\n248 XT\n249 UY\n250 US\n251 UZ\n252 VC\n253 VE\n254 VI\n255 VN\n256 VU\n257 1W\n258 WS\n259 A5\n260 YE\n261 ZA\n262 ZM\n263 ZW\nName: iso2c, dtype: object}", "self": "", "*": "[]"}} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl_03.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl_03.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl_04.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl_04.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl_05.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl_05.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl_06.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/eb23268c45517fc462e55f43647b54ba/output.pkl_06.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/download/func_code.py: -------------------------------------------------------------------------------- 1 | # first line: 157 2 | def download(self, *args, **kwargs): 3 | return wb.download(*args, **kwargs) 4 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/get_countries/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/get_countries/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/get_countries/ec5eb565b58ae8352bf85a3c8b37dcf4/metadata.json: -------------------------------------------------------------------------------- 1 | {"duration": 1.28059983253479, "input_args": {"*": "[]", "**": "{}", "self": ""}} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/get_countries/ec5eb565b58ae8352bf85a3c8b37dcf4/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/get_countries/ec5eb565b58ae8352bf85a3c8b37dcf4/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/joblib/dautil/data/get_countries/func_code.py: -------------------------------------------------------------------------------- 1 | # first line: 158 2 | def get_countries(self, *args, **kwargs): 3 | return wb.get_countries(*args, **kwargs) 4 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/mpld3_demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import mpld3\n", 14 | "mpld3.enable_notebook()" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "from mpld3 import plugins\n", 26 | "import seaborn as sns" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "from dautil import data\n", 38 | "from dautil import ts\n", 39 | "\n", 40 | "df = data.Weather.load()\n", 41 | "df = df[['TEMP', 'WIND_SPEED']]\n", 42 | "df = ts.groupby_yday(df).mean()\n", 43 | "\n", 44 | "fig, ax = plt.subplots()\n", 45 | "ax.set_title('Averages Grouped by Day of Year')\n", 46 | "points = ax.scatter(df['TEMP'], df['WIND_SPEED'],\n", 47 | " s=30, alpha=0.3)\n", 48 | "ax.set_xlabel(data.Weather.get_header('TEMP'))\n", 49 | "ax.set_ylabel(data.Weather.get_header('WIND_SPEED'))\n", 50 | "labels = [\"Day of year {0}\".format(i) for i in range(366)]\n", 51 | "tooltip = plugins.PointLabelTooltip(points, labels)\n", 52 | "\n", 53 | "plugins.connect(fig, tooltip)" 54 | ] 55 | } 56 | ], 57 | "metadata": { 58 | "kernelspec": { 59 | "display_name": "Python 3", 60 | "language": "python", 61 | "name": "python3" 62 | }, 63 | "language_info": { 64 | "codemirror_mode": { 65 | "name": "ipython", 66 | "version": 3 67 | }, 68 | "file_extension": ".py", 69 | "mimetype": "text/x-python", 70 | "name": "python", 71 | "nbconvert_exporter": "python", 72 | "pygments_lexer": "ipython3", 73 | "version": "3.4.3" 74 | } 75 | }, 76 | "nbformat": 4, 77 | "nbformat_minor": 0 78 | } 79 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/scatter_matrix.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "from dautil import data\n", 13 | "from dautil import ts\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "import matplotlib as mpl" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "df = data.Weather.load()\n", 28 | "df = ts.groupby_yday(df).mean()\n", 29 | "df.columns = [data.Weather.get_header(c) for c in df.columns]" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "%matplotlib inline\n", 41 | "\n", 42 | "# Seaborn plotting, issues due to NaNs\n", 43 | "sns.pairplot(df.fillna(0))" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "collapsed": false 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "sns.set({'figure.figsize': '16, 12'})\n", 55 | "mpl.rcParams['axes.linewidth'] = 9\n", 56 | "mpl.rcParams['lines.linewidth'] = 2\n", 57 | "plots = pd.scatter_matrix(df, marker='o', diagonal='kde')\n", 58 | "plt.show()" 59 | ] 60 | } 61 | ], 62 | "metadata": { 63 | "kernelspec": { 64 | "display_name": "Python 3", 65 | "language": "python", 66 | "name": "python3" 67 | }, 68 | "language_info": { 69 | "codemirror_mode": { 70 | "name": "ipython", 71 | "version": 3 72 | }, 73 | "file_extension": ".py", 74 | "mimetype": "text/x-python", 75 | "name": "python", 76 | "nbconvert_exporter": "python", 77 | "pygments_lexer": "ipython3", 78 | "version": "3.4.3" 79 | } 80 | }, 81 | "nbformat": 4, 82 | "nbformat_minor": 0 83 | } 84 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/using_ggplot.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pyggplot\n", 12 | "from dautil import data\n", 13 | "\n", 14 | "\n", 15 | "dawb = data.Worldbank()\n", 16 | "pop_grow = dawb.get_name('pop_grow')\n", 17 | "df = dawb.download(indicator=pop_grow, start=1984, end=2014)\n", 18 | "df = dawb.rename_columns(df, use_longnames=True)\n", 19 | "p = pyggplot.Plot(df)\n", 20 | "p.add_bar('country', dawb.get_longname(pop_grow), color='year')\n", 21 | "p.coord_flip()\n", 22 | "p.render_notebook()" 23 | ] 24 | } 25 | ], 26 | "metadata": { 27 | "kernelspec": { 28 | "display_name": "Python 3", 29 | "language": "python", 30 | "name": "python3" 31 | }, 32 | "language_info": { 33 | "codemirror_mode": { 34 | "name": "ipython", 35 | "version": 3 36 | }, 37 | "file_extension": ".py", 38 | "mimetype": "text/x-python", 39 | "name": "python", 40 | "nbconvert_exporter": "python", 41 | "pygments_lexer": "ipython3", 42 | "version": "3.4.3" 43 | } 44 | }, 45 | "nbformat": 4, 46 | "nbformat_minor": 0 47 | } 48 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 2/violins.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import seaborn as sns\n", 12 | "from dautil import data\n", 13 | "import matplotlib.pyplot as plt" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "df = data.Weather.load()\n", 25 | "zscores = (df - df.mean())/df.std()" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "%matplotlib inline\n", 37 | "plt.figure()\n", 38 | "plt.title('Weather Violin Plot')\n", 39 | "sns.violinplot(zscores.resample('M'))\n", 40 | "plt.ylabel('Z-scores')" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "plt.figure()\n", 52 | "plt.title('Rainy Weather vs Wind Speed')\n", 53 | "categorical = df\n", 54 | "categorical['RAIN'] = categorical['RAIN'] > 0\n", 55 | "ax = sns.violinplot(x=\"RAIN\", y=\"WIND_SPEED\", \n", 56 | " data=categorical)" 57 | ] 58 | } 59 | ], 60 | "metadata": { 61 | "kernelspec": { 62 | "display_name": "Python 3", 63 | "language": "python", 64 | "name": "python3" 65 | }, 66 | "language_info": { 67 | "codemirror_mode": { 68 | "name": "ipython", 69 | "version": 3 70 | }, 71 | "file_extension": ".py", 72 | "mimetype": "text/x-python", 73 | "name": "python", 74 | "nbconvert_exporter": "python", 75 | "pygments_lexer": "ipython3", 76 | "version": "3.4.3" 77 | } 78 | }, 79 | "nbformat": 4, 80 | "nbformat_minor": 0 81 | } 82 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/metadata.json: -------------------------------------------------------------------------------- 1 | {"duration": 1.796659231185913, "input_args": {"self": "", "*": "[]", "**": "{'end': 2014, 'indicator': 'en.atm.co2e.kt', 'start': 1900, 'country': 'NL'}"}} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl_03.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl_03.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl_04.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl_04.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl_05.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl_05.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl_06.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/1e0ecc166b05634fc4a48fe36993890d/output.pkl_06.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 3/joblib/dautil/data/download/func_code.py: -------------------------------------------------------------------------------- 1 | # first line: 201 2 | def download(self, *args, **kwargs): 3 | return wb.download(*args, **kwargs) 4 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 4/joblib/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/metadata.json: -------------------------------------------------------------------------------- 1 | {"duration": 4.958159923553467, "input_args": {"*": "[]", "self": "", "**": "{'end': 2010, 'indicator': ['sh.dyn.mort', 'ny.gdp.pcap.cd'], 'start': 2010, 'country': 0 AW\n1 AF\n2 A9\n3 AO\n4 AL\n5 AD\n6 L5\n7 1A\n8 AE\n9 AR\n10 AM\n11 AS\n12 AG\n13 AU\n14 AT\n15 AZ\n16 BI\n17 BE\n18 BJ\n19 BF\n20 BD\n21 BG\n22 BH\n23 BS\n24 BA\n25 BY\n26 BZ\n27 BM\n28 BO\n29 BR\n ..\n234 TG\n235 TH\n236 TJ\n237 TM\n238 TL\n239 TO\n240 TT\n241 TN\n242 TR\n243 TV\n244 TW\n245 TZ\n246 UG\n247 UA\n248 XT\n249 UY\n250 US\n251 UZ\n252 VC\n253 VE\n254 VI\n255 VN\n256 VU\n257 1W\n258 WS\n259 A5\n260 YE\n261 ZA\n262 ZM\n263 ZW\nName: iso2c, dtype: object}"}} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl_03.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl_03.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl_04.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl_04.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl_05.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl_05.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl_06.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/4c6627242bf3b15c57d61133efcf3d7f/output.pkl_06.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/download/func_code.py: -------------------------------------------------------------------------------- 1 | # first line: 272 2 | def download(self, *args, **kwargs): 3 | """ Caches the pandas.io.wb.download() results. 4 | 5 | :returns: The result of the query from cache or the WWW. 6 | """ 7 | return wb.download(*args, **kwargs) 8 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/get_countries/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/get_countries/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/get_countries/acee962592427bb9e5792478f1e8a5c0/metadata.json: -------------------------------------------------------------------------------- 1 | {"duration": 1.3614959716796875, "input_args": {"*": "[]", "self": "", "**": "{}"}} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/get_countries/acee962592427bb9e5792478f1e8a5c0/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/get_countries/acee962592427bb9e5792478f1e8a5c0/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 4/joblib/dautil/data/get_countries/func_code.py: -------------------------------------------------------------------------------- 1 | # first line: 265 2 | def get_countries(self, *args, **kwargs): 3 | """ Caches the pandas.io.wb.get_countries() results. 4 | 5 | :returns: The result of the query from cache or the WWW. 6 | """ 7 | return wb.get_countries(*args, **kwargs) 8 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 5/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask import render_template 3 | from flask import request 4 | from flask import redirect 5 | from flask import url_for 6 | 7 | app = Flask(__name__) 8 | 9 | 10 | @app.route('/') 11 | def home(): 12 | return "Test Site" 13 | 14 | 15 | @app.route('/secure', methods=['GET', 'POST']) 16 | def login(): 17 | error = None 18 | if request.method == 'POST': 19 | if request.form['username'] != 'admin' or\ 20 | request.form['password'] != 'admin': 21 | error = 'Invalid password or user name.' 22 | else: 23 | return redirect(url_for('home')) 24 | return render_template('admin.html', error=error) 25 | 26 | 27 | if __name__ == '__main__': 28 | app.run(debug=True) 29 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 5/download_html.py: -------------------------------------------------------------------------------- 1 | import dautil as dl 2 | import csv 3 | import os 4 | from selenium import webdriver 5 | from selenium.webdriver.support.ui import WebDriverWait 6 | from selenium.webdriver.support import expected_conditions as EC 7 | from selenium.webdriver.common.by import By 8 | 9 | 10 | LOGGER = dl.log_api.conf_logger('download_html') 11 | DRIVER = webdriver.PhantomJS() 12 | NAP_SECONDS = 10 13 | 14 | 15 | def write_text(fname): 16 | elems = [] 17 | 18 | try: 19 | DRIVER.get(dl.web.path2url(fname)) 20 | 21 | elems = WebDriverWait(DRIVER, NAP_SECONDS).until( 22 | EC.presence_of_all_elements_located((By.XPATH, '//p')) 23 | ) 24 | 25 | LOGGER.info('Elems', elems) 26 | 27 | with open(fname.replace('.html', '_phantomjs.html'), 'w') as pjs_file: 28 | LOGGER.warning('Writing to %s', pjs_file.name) 29 | pjs_file.write(DRIVER.page_source) 30 | 31 | except Exception: 32 | LOGGER.error("Error processing HTML", exc_info=True) 33 | 34 | new_name = fname.replace('html', 'txt') 35 | 36 | if not os.path.exists(new_name): 37 | with open(new_name, 'w') as txt_file: 38 | LOGGER.warning('Writing to %s', txt_file.name) 39 | 40 | lines = [e.text for e in elems] 41 | LOGGER.info('lines', lines) 42 | txt_file.write(' \n'.join(lines)) 43 | 44 | 45 | def main(): 46 | filedir = os.path.join(dl.data.get_data_dir(), 'edition.cnn.com') 47 | 48 | with open('saved_urls.csv') as csvfile: 49 | reader = csv.reader(csvfile) 50 | 51 | for line in reader: 52 | timestamp, count, basename, url = line 53 | fname = '_'.join([count, basename]) 54 | fname = os.path.join(filedir, fname) 55 | 56 | if not os.path.exists(fname): 57 | dl.data.download(url, fname) 58 | 59 | write_text(fname) 60 | 61 | if __name__ == '__main__': 62 | DRIVER.implicitly_wait(NAP_SECONDS) 63 | main() 64 | DRIVER.quit() 65 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 5/processing_html.py: -------------------------------------------------------------------------------- 1 | from lxml.html.clean import clean_html 2 | from difflib import Differ 3 | import unicodedata 4 | import dautil as dl 5 | 6 | PRINT = dl.log_api.Printer() 7 | 8 | def diff_files(text, cleaned): 9 | d = Differ() 10 | diff = list(d.compare(text.splitlines(keepends=True), 11 | cleaned.splitlines(keepends=True))) 12 | PRINT.print(diff) 13 | 14 | 15 | with open('460_cc_phantomjs.html') as html_file: 16 | text = html_file.read() 17 | cleaned = clean_html(text) 18 | diff_files(text, cleaned) 19 | PRINT.print(dl.web.find_hrefs(cleaned)) 20 | 21 | bulgarian = 'Питон is Bulgarian for Python' 22 | PRINT.print('Bulgarian', bulgarian) 23 | PRINT.print('Bulgarian ignored', unicodedata.normalize('NFKD', bulgarian).encode('ascii', 'ignore')) 24 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 5/streaming_clustering.py: -------------------------------------------------------------------------------- 1 | import dautil as dl 2 | from pyspark.mllib.clustering import StreamingKMeansModel 3 | from pyspark import SparkContext 4 | 5 | csv_file = dl.data.get_direct_marketing_csv() 6 | csv_rows = dl.data.read_csv(csv_file) 7 | 8 | stkm = StreamingKMeansModel(28 * [[0., 0., 0.]], 28 * [1.]) 9 | sc = SparkContext() 10 | 11 | for row in csv_rows: 12 | spend = dl.data.centify(row['spend']) 13 | 14 | if spend > 0: 15 | history = dl.data.centify(row['history']) 16 | data = sc.parallelize([[int(row['recency']), 17 | history, spend]]) 18 | stkm = stkm.update(data, 0., 'points') 19 | 20 | print(stkm.centers) 21 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 5/templates/admin.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Admin page 4 | 5 | 6 |
7 |

Please login

8 |
9 |
10 | 12 | 14 | 15 |
16 | {% if error %} 17 |

Error: {{ error }} 18 | {% endif %} 19 |

20 | 21 | 22 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 5/test_simulating_browsing.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | import time 3 | import unittest 4 | import dautil as dl 5 | 6 | 7 | NAP_SECS = 10 8 | 9 | 10 | class SeleniumTest(unittest.TestCase): 11 | def setUp(self): 12 | self.logger = dl.log_api.conf_logger(__name__) 13 | self.browser = webdriver.Firefox() 14 | 15 | def tearDown(self): 16 | self.browser.quit() 17 | 18 | def wait_and_click(self, toggle, text): 19 | xpath = "//a[@data-toggle='{0}' and contains(text(), '{1}')]" 20 | xpath = xpath.format(toggle, text) 21 | elem = dl.web.wait_browser(self.browser, xpath) 22 | elem.click() 23 | 24 | def test_widget(self): 25 | self.browser.implicitly_wait(NAP_SECS) 26 | self.browser.get('http://localhost:8888/notebooks/test_widget.ipynb') 27 | 28 | try: 29 | # Cell menu 30 | xpath = '//*[@id="menus"]/div/div/ul/li[5]/a' 31 | link = dl.web.wait_browser(self.browser, xpath) 32 | link.click() 33 | time.sleep(1) 34 | 35 | # Run all 36 | xpath = '//*[@id="run_all_cells"]/a' 37 | link = dl.web.wait_browser(self.browser, xpath) 38 | link.click() 39 | time.sleep(1) 40 | 41 | self.wait_and_click('tab', 'Figure') 42 | self.wait_and_click('collapse', 'figure.figsize') 43 | except Exception: 44 | self.logger.warning('Error while waiting to click', exc_info=True) 45 | self.browser.quit() 46 | 47 | time.sleep(NAP_SECS) 48 | self.browser.save_screenshot('widgets_screenshot.png') 49 | 50 | if __name__ == "__main__": 51 | unittest.main() 52 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 5/test_widget.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import dautil as dl\n", 12 | "context = dl.nb.Context('test')\n", 13 | "dl.nb.RcWidget(context)" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "outputs": [], 23 | "source": [] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": true 48 | }, 49 | "outputs": [], 50 | "source": [] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "collapsed": true 57 | }, 58 | "outputs": [], 59 | "source": [] 60 | } 61 | ], 62 | "metadata": { 63 | "kernelspec": { 64 | "display_name": "Python 3", 65 | "language": "python", 66 | "name": "python3" 67 | }, 68 | "language_info": { 69 | "codemirror_mode": { 70 | "name": "ipython", 71 | "version": 3 72 | }, 73 | "file_extension": ".py", 74 | "mimetype": "text/x-python", 75 | "name": "python", 76 | "nbconvert_exporter": "python", 77 | "pygments_lexer": "ipython3", 78 | "version": "3.4.3" 79 | } 80 | }, 81 | "nbformat": 4, 82 | "nbformat_minor": 0 83 | } 84 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 6/ch6util.py: -------------------------------------------------------------------------------- 1 | from joblib import Memory 2 | from scipy.io import wavfile 3 | import dautil as dl 4 | import numpy as np 5 | 6 | 7 | memory = Memory(cachedir='.') 8 | 9 | 10 | @memory.cache 11 | def read_wav(): 12 | wav = dl.data.get_smashing_baby() 13 | 14 | return wavfile.read(wav) 15 | 16 | 17 | def fit(df): 18 | slope, _ = np.polyfit(df.index.year, df.values, 1) 19 | 20 | return slope 21 | 22 | 23 | def diff_median(df, diff_order=1): 24 | return np.median(np.diff(df.values.flatten(), diff_order)) 25 | 26 | 27 | def amplitude(arr): 28 | return np.abs(np.fft.fft(arr)) 29 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 6/joblib/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 6/joblib/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 6/joblib/ch6util/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 6/joblib/ch6util/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 6/joblib/ch6util/read_wav/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 6/joblib/ch6util/read_wav/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 6/joblib/ch6util/read_wav/08d116bec02752b026577cdc25ee6ebe/metadata.json: -------------------------------------------------------------------------------- 1 | {"duration": 2.1675710678100586, "input_args": {}} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 6/joblib/ch6util/read_wav/08d116bec02752b026577cdc25ee6ebe/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 6/joblib/ch6util/read_wav/08d116bec02752b026577cdc25ee6ebe/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 6/joblib/ch6util/read_wav/08d116bec02752b026577cdc25ee6ebe/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 6/joblib/ch6util/read_wav/08d116bec02752b026577cdc25ee6ebe/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 6/joblib/ch6util/read_wav/func_code.py: -------------------------------------------------------------------------------- 1 | # first line: 10 2 | @memory.cache 3 | def read_wav(): 4 | wav = dl.data.get_smashing_baby() 5 | 6 | return wavfile.read(wav) 7 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 7/ch7util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | STOCKS = ['AAPL', 'INTC', 'MSFT', 'KO', 'DIS', 'MCD', 'NKE', 'IBM'] 6 | 7 | 8 | def log_rets(close): 9 | return np.diff(np.log(close)) 10 | 11 | 12 | def log_to_simple(rets): 13 | return np.exp(rets) - 1 14 | 15 | 16 | def merge_sp500(stock, sp500): 17 | return pd.merge(left=stock, right=sp500, 18 | right_index=True, left_index=True, 19 | suffixes=('_stock', '_sp500')).dropna() 20 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 7/database_tables.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column 2 | from sqlalchemy.ext.declarative import declarative_base 3 | from sqlalchemy import Date 4 | from sqlalchemy import ForeignKey 5 | from sqlalchemy import Integer 6 | from sqlalchemy import String 7 | 8 | Base = declarative_base() 9 | 10 | 11 | class StockPrice(Base): 12 | __tablename__ = 'stock_price' 13 | id = Column(Integer, primary_key=True) 14 | date_id = Column(Integer, ForeignKey('date_dim.id'), 15 | primary_key=True) 16 | asset_id = Column(Integer, ForeignKey('asset_dim.id'), 17 | primary_key=True) 18 | source_id = Column(Integer, ForeignKey('source_dim.id'), 19 | primary_key=True) 20 | open_price = Column(Integer) 21 | high_price = Column(Integer) 22 | low_price = Column(Integer) 23 | close_price = Column(Integer) 24 | adjusted_close = Column(Integer) 25 | volume = Column(Integer) 26 | 27 | 28 | class DateDim(Base): 29 | __tablename__ = 'date_dim' 30 | id = Column(Integer, primary_key=True) 31 | date = Column(Date, nullable=False, unique=True) 32 | day_of_month = Column(Integer, nullable=False) 33 | day_of_week = Column(Integer, nullable=False) 34 | month = Column(Integer, nullable=False) 35 | quarter = Column(Integer, nullable=False) 36 | year = Column(Integer, nullable=False) 37 | 38 | 39 | class AssetDim(Base): 40 | __tablename__ = 'asset_dim' 41 | id = Column(Integer, primary_key=True) 42 | symbol = Column(String, nullable=False, unique=True) 43 | name = Column(String, nullable=False) 44 | # Could make this a reference to separate table 45 | category = Column(String, nullable=False) 46 | country = Column(String, nullable=False) 47 | # Could make this a reference to separate table 48 | sector = Column(String, nullable=False) 49 | 50 | 51 | class SourceDim(Base): 52 | __tablename__ = 'source_dim' 53 | id = Column(Integer, primary_key=True) 54 | name = Column(String, nullable=False) 55 | url = Column(String) 56 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 7/simple_log_rets.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import dautil as dl\n", 12 | "import ch7util\n", 13 | "import matplotlib.pyplot as plt" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "collapsed": false 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "context = dl.nb.Context('simple_log_rets')\n", 25 | "lr = dl.nb.LatexRenderer(chapter=7, context=context)\n", 26 | "lr.render(r'r=\\frac{V_f - V_i}{V_i}')\n", 27 | "lr.render(r'R = \\ln\\left(\\frac{V_f}{V_i}\\right)')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "ohlc = dl.data.OHLC()\n", 39 | "sp500 = ohlc.get('^GSPC')['Adj Close']\n", 40 | "rets = sp500[1:]/sp500[:-1] - 1" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "%matplotlib inline\n", 52 | "dl.options.mimic_seaborn()\n", 53 | "_, ax = plt.subplots()\n", 54 | "cp = dl.plotting.CyclePlotter(ax)\n", 55 | "cp.plot(sp500.index, rets, label='Simple')\n", 56 | "cp.plot(sp500.index[1:], ch7util.log_rets(sp500), label='Log')\n", 57 | "ax.set_title('Simple and Log Returns')\n", 58 | "ax.set_xlabel('Date')\n", 59 | "ax.set_ylabel('Return')\n", 60 | "ax.legend(loc='best')" 61 | ] 62 | } 63 | ], 64 | "metadata": { 65 | "kernelspec": { 66 | "display_name": "Python 3", 67 | "language": "python", 68 | "name": "python3" 69 | }, 70 | "language_info": { 71 | "codemirror_mode": { 72 | "name": "ipython", 73 | "version": 3 74 | }, 75 | "file_extension": ".py", 76 | "mimetype": "text/x-python", 77 | "name": "python", 78 | "nbconvert_exporter": "python", 79 | "pygments_lexer": "ipython3", 80 | "version": "3.4.3" 81 | } 82 | }, 83 | "nbformat": 4, 84 | "nbformat_minor": 0 85 | } 86 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/.ropeproject/globalnames: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/.ropeproject/globalnames -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/.ropeproject/history: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/.ropeproject/history -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/.ropeproject/objectdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/.ropeproject/objectdb -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/assortativity.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import networkx as nx\n", 12 | "import dautil as dl" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "context = dl.nb.Context('assortavity')\n", 24 | "lr = dl.nb.LatexRenderer(chapter=8, start=7, context=context)\n", 25 | "lr.render(r'r = \\frac{\\sum_{jk}{jk (e_{jk} - q_j q_k)}}{\\sigma_{q}^{2}}')" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "fb_file = dl.data.SPANFB().load()\n", 37 | "G = nx.read_edgelist(fb_file,\n", 38 | " create_using=nx.Graph(),\n", 39 | " nodetype=int)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "collapsed": false 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "print('Degree Assortativity Coefficient',\n", 51 | " nx.degree_assortativity_coefficient(G))" 52 | ] 53 | } 54 | ], 55 | "metadata": { 56 | "kernelspec": { 57 | "display_name": "Python 3", 58 | "language": "python", 59 | "name": "python3" 60 | }, 61 | "language_info": { 62 | "codemirror_mode": { 63 | "name": "ipython", 64 | "version": 3 65 | }, 66 | "file_extension": ".py", 67 | "mimetype": "text/x-python", 68 | "name": "python", 69 | "nbconvert_exporter": "python", 70 | "pygments_lexer": "ipython3", 71 | "version": "3.4.3" 72 | } 73 | }, 74 | "nbformat": 4, 75 | "nbformat_minor": 0 76 | } 77 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/avg_clustering.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import networkx as nx\n", 12 | "import dautil as dl" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "context = dl.nb.Context('avg_clustering')\n", 24 | "lr = dl.nb.LatexRenderer(chapter=8, start=5, context=context)\n", 25 | "lr.render(r'C_i = \\frac{\\lambda_G(v)}{\\tau_G(v)}')\n", 26 | "lr.render(r'\\bar{C} = \\frac{1}{n}\\sum_{i=1}^{n} C_i')" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "fb_file = dl.data.SPANFB().load()\n", 38 | "G = nx.read_edgelist(fb_file,\n", 39 | " create_using=nx.Graph(),\n", 40 | " nodetype=int)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "print('Average Clustering',\n", 52 | " nx.average_clustering(G))" 53 | ] 54 | } 55 | ], 56 | "metadata": { 57 | "kernelspec": { 58 | "display_name": "Python 3", 59 | "language": "python", 60 | "name": "python3" 61 | }, 62 | "language_info": { 63 | "codemirror_mode": { 64 | "name": "ipython", 65 | "version": 3 66 | }, 67 | "file_extension": ".py", 68 | "mimetype": "text/x-python", 69 | "name": "python", 70 | "nbconvert_exporter": "python", 71 | "pygments_lexer": "ipython3", 72 | "version": "3.4.3" 73 | } 74 | }, 75 | "nbformat": 4, 76 | "nbformat_minor": 0 77 | } 78 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/between_centrality.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import networkx as nx\n", 12 | "import dautil as dl\n", 13 | "import pandas as pd" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "collapsed": false 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "context = dl.nb.Context('between_centrality')\n", 25 | "lr = dl.nb.LatexRenderer(chapter=8, start=4, context=context)\n", 26 | "lr.render(r'c_B(v) =\\sum_{s,t \\in V} \\frac{\\sigma(s, t|v)}{\\sigma(s, t)}')" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "fb_file = dl.data.SPANFB().load()\n", 38 | "G = nx.read_edgelist(fb_file,\n", 39 | " create_using=nx.Graph(),\n", 40 | " nodetype=int)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "key_values = nx.betweenness_centrality(G, k=256)\n", 52 | "df = pd.DataFrame.from_dict(key_values, orient='index')\n", 53 | "\n", 54 | "dl.options.set_pd_options()\n", 55 | "print('Betweenness Centrality', df)" 56 | ] 57 | } 58 | ], 59 | "metadata": { 60 | "kernelspec": { 61 | "display_name": "Python 3", 62 | "language": "python", 63 | "name": "python3" 64 | }, 65 | "language_info": { 66 | "codemirror_mode": { 67 | "name": "ipython", 68 | "version": 3 69 | }, 70 | "file_extension": ".py", 71 | "mimetype": "text/x-python", 72 | "name": "python", 73 | "nbconvert_exporter": "python", 74 | "pygments_lexer": "ipython3", 75 | "version": "3.4.3" 76 | } 77 | }, 78 | "nbformat": 4, 79 | "nbformat_minor": 0 80 | } 81 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/ch8util.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | from nltk.corpus import brown 3 | from joblib import Memory 4 | import dautil as dl 5 | 6 | 7 | memory = Memory(cachedir='.') 8 | 9 | 10 | def only_letters(word): 11 | for c in word: 12 | if not c.isalpha(): 13 | return False 14 | 15 | return True 16 | 17 | 18 | @memory.cache 19 | def filter(fid, lemmatizer, sw): 20 | words = [lemmatizer.lemmatize(w.lower()) for w in brown.words(fid) 21 | if len(w) > 1 and w.lower() not in sw] 22 | 23 | # Ignore words which only occur once 24 | counts = Counter(words) 25 | rare = set([w for w, c in counts.items() if c == 1]) 26 | 27 | filtered_words = [w for w in words if w not in rare] 28 | 29 | return [w for w in filtered_words if only_letters(w)] 30 | 31 | 32 | def load_terms(): 33 | return dl.data.from_pickle('tfidf_df.pkl')['term'].values 34 | 35 | 36 | def load_tfidf(): 37 | return dl.data.from_pickle('tfidf.pkl') 38 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/clique_number.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import dautil as dl 3 | 4 | 5 | fb_file = dl.data.SPANFB().load() 6 | G = nx.read_edgelist(fb_file, 7 | create_using=nx.Graph(), 8 | nodetype=int) 9 | 10 | print('Graph Clique Number', 11 | nx.graph_clique_number(G.subgraph(list(range(2048))))) 12 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/close_centrality.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import networkx as nx\n", 12 | "import dautil as dl" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "context = dl.nb.Context('close_centrality')\n", 24 | "lr = dl.nb.LatexRenderer(chapter=8, start=3, context=context)\n", 25 | "lr.render(r'C(u) = \\frac{n - 1}{\\sum_{v=1}^{n-1} d(v, u)}')" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "fb_file = dl.data.SPANFB().load()\n", 37 | "G = nx.read_edgelist(fb_file,\n", 38 | " create_using=nx.Graph(),\n", 39 | " nodetype=int)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "collapsed": false 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "print('Closeness Centrality Node 1',\n", 51 | " nx.closeness_centrality(G, 1))\n", 52 | "print('Closeness Centrality Node 4037',\n", 53 | " nx.closeness_centrality(G, 4037))" 54 | ] 55 | } 56 | ], 57 | "metadata": { 58 | "kernelspec": { 59 | "display_name": "Python 3", 60 | "language": "python", 61 | "name": "python3" 62 | }, 63 | "language_info": { 64 | "codemirror_mode": { 65 | "name": "ipython", 66 | "version": 3 67 | }, 68 | "file_extension": ".py", 69 | "mimetype": "text/x-python", 70 | "name": "python", 71 | "nbconvert_exporter": "python", 72 | "pygments_lexer": "ipython3", 73 | "version": "3.4.3" 74 | } 75 | }, 76 | "nbformat": 4, 77 | "nbformat_minor": 0 78 | } 79 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/corpus.py: -------------------------------------------------------------------------------- 1 | import feedparser as fp 2 | import urllib 3 | from selenium import webdriver 4 | from selenium.webdriver.support.ui import WebDriverWait 5 | from selenium.webdriver.support import expected_conditions as EC 6 | from selenium.webdriver.common.by import By 7 | import dautil as dl 8 | from nltk.corpus.reader import CategorizedPlaintextCorpusReader 9 | import os 10 | 11 | DRIVER = webdriver.PhantomJS() 12 | NAP_SECONDS = 10 13 | LOGGER = dl.log_api.conf_logger('corpus') 14 | 15 | def store_txt(url, fname, title): 16 | try: 17 | DRIVER.get(url) 18 | 19 | elems = WebDriverWait(DRIVER, NAP_SECONDS).until( 20 | EC.presence_of_all_elements_located((By.XPATH, '//p')) 21 | ) 22 | 23 | with open(fname, 'w') as txt_file: 24 | txt_file.write(title + '\n\n') 25 | lines = [e.text for e in elems] 26 | txt_file.write(' \n'.join(lines)) 27 | except Exception: 28 | LOGGER.error("Error processing HTML", exc_info=True) 29 | 30 | def fetch_news(dir): 31 | base = 'http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/{}/rss.xml' 32 | 33 | for category in ['world', 'technology']: 34 | rss = fp.parse(base.format(category)) 35 | 36 | for i, entry in enumerate(rss.entries): 37 | fname = '{0}_bbc_{1}.txt'.format(i, category) 38 | fname = os.path.join(dir, fname) 39 | 40 | if not dl.conf.file_exists(fname): 41 | store_txt(entry.link, fname, entry.title) 42 | 43 | if __name__ == "__main__": 44 | dir = os.path.join(dl.data.get_data_dir(), 'bbc_news_corpus') 45 | 46 | if not os.path.exists(dir): 47 | os.mkdir(dir) 48 | 49 | fetch_news(dir) 50 | reader = CategorizedPlaintextCorpusReader(dir, r'.*bbc.*\.txt', 51 | cat_pattern=r'.*bbc_(\w+)\.txt') 52 | printer = dl.log_api.Printer(nelems=3) 53 | printer.print('Categories', reader.categories()) 54 | printer.print('World fileids', reader.fileids(categories=['world'])) 55 | printer.print('Technology fileids', 56 | reader.fileids(categories=['technology'])) 57 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/dautil.json: -------------------------------------------------------------------------------- 1 | { 2 | "assortavity.latex": [ 3 | 7 4 | ], 5 | "avg_clustering.latex": [ 6 | 5, 7 | 6 8 | ], 9 | "between_centrality.latex": [ 10 | 4 11 | ], 12 | "close_centrality.latex": [ 13 | 3 14 | ], 15 | "cos_similarity": { 16 | "figure.figsize": [ 17 | 15.9, 18 | 11.9 19 | ] 20 | }, 21 | "cos_similarity.latex": [ 22 | 8 23 | ], 24 | "net_density.latex": [ 25 | 1, 26 | 2 27 | ] 28 | } -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/dautil_log.conf: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root 3 | 4 | [handlers] 5 | keys=consoleHandler 6 | 7 | [formatters] 8 | keys=simpleFormatter 9 | 10 | [logger_root] 11 | level=DEBUG 12 | handlers=consoleHandler 13 | 14 | [handler_consoleHandler] 15 | class=StreamHandler 16 | level=DEBUG 17 | formatter=simpleFormatter 18 | args=(sys.stdout,) 19 | 20 | [formatter_simpleFormatter] 21 | format=%(asctime)s - %(name)s - %(levelname)s - %(message)s 22 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/ghostdriver.log: -------------------------------------------------------------------------------- 1 | [INFO - 2016-02-29T16:15:22.887Z] GhostDriver - Main - running on port 52320 2 | [INFO - 2016-02-29T16:15:23.605Z] Session [a2eaed00-deff-11e5-abb5-95ee2ca6ab8c] - page.settings - {"XSSAuditingEnabled":false,"javascriptCanCloseWindows":true,"javascriptCanOpenWindows":true,"javascriptEnabled":true,"loadImages":true,"localToRemoteUrlAccessEnabled":false,"userAgent":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.0.0 Safari/538.1","webSecurityEnabled":true} 3 | [INFO - 2016-02-29T16:15:23.605Z] Session [a2eaed00-deff-11e5-abb5-95ee2ca6ab8c] - page.customHeaders: - {} 4 | [INFO - 2016-02-29T16:15:23.605Z] Session [a2eaed00-deff-11e5-abb5-95ee2ca6ab8c] - Session.negotiatedCapabilities - {"browserName":"phantomjs","version":"2.0.0","driverName":"ghostdriver","driverVersion":"1.2.0","platform":"mac-10.9 (Mavericks)-64bit","javascriptEnabled":true,"takesScreenshot":true,"handlesAlerts":false,"databaseEnabled":false,"locationContextEnabled":false,"applicationCacheEnabled":false,"browserConnectionEnabled":false,"cssSelectorsEnabled":true,"webStorageEnabled":false,"rotatable":false,"acceptSslCerts":false,"nativeEvents":true,"proxy":{"proxyType":"direct"}} 5 | [INFO - 2016-02-29T16:15:23.605Z] SessionManagerReqHand - _postNewSessionCommand - New Session Created: a2eaed00-deff-11e5-abb5-95ee2ca6ab8c 6 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/091aa2b4f103f69ebb3769a159072aef/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/091aa2b4f103f69ebb3769a159072aef/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/0a11e4ca889a28ffd0e115a993c73a85/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/0a11e4ca889a28ffd0e115a993c73a85/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/0b2300f23f9db4dd9b64f057f6a59671/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/0b2300f23f9db4dd9b64f057f6a59671/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/0b3748ca6a09f1034e8375c137acad90/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/0b3748ca6a09f1034e8375c137acad90/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/15ed994e3e2f8a175ecd8a6411b4e915/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/15ed994e3e2f8a175ecd8a6411b4e915/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/1b16b4736f53e409cd51d2b024dd2837/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/1b16b4736f53e409cd51d2b024dd2837/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/252c07dd9428eaaee00f37250b86b075/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/252c07dd9428eaaee00f37250b86b075/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/27266a1224539dda8511a94c27881f84/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/27266a1224539dda8511a94c27881f84/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/2daf7bd2df433ab1f1529c55bb0cba96/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/2daf7bd2df433ab1f1529c55bb0cba96/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/311c193bc050a456bbeca1b9ec1868c0/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/311c193bc050a456bbeca1b9ec1868c0/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/546f872ffd4d4aa5568807b844e76997/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/546f872ffd4d4aa5568807b844e76997/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/5627bb9c842cef425591dffa6af7bf8c/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/5627bb9c842cef425591dffa6af7bf8c/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/6130115cc23345d1ff896293f1dbf82e/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/6130115cc23345d1ff896293f1dbf82e/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/6423924a61dcb0099518b0bb1f7ae5c5/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/6423924a61dcb0099518b0bb1f7ae5c5/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/66575d521c2999bb7249207b34e869e7/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/66575d521c2999bb7249207b34e869e7/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/6fd448966c3953af632b386ac6537858/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/6fd448966c3953af632b386ac6537858/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/70cc93c760b8c2003ba5e5bbdf14436b/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/70cc93c760b8c2003ba5e5bbdf14436b/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/8779155bccf999a5dca00f0228ba4e21/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/8779155bccf999a5dca00f0228ba4e21/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/8b5edc7dbcd55b96139bd76738e905f8/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/8b5edc7dbcd55b96139bd76738e905f8/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/8b9c45d590dd16a4380302a0d9ecb80c/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/8b9c45d590dd16a4380302a0d9ecb80c/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/8c66f2c5483c5fbc1a63dc043674c36c/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/8c66f2c5483c5fbc1a63dc043674c36c/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/981a2017e77f6397b9d49f09b460440f/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/981a2017e77f6397b9d49f09b460440f/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/9d217e5d143e99f8f96449634805edd2/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/9d217e5d143e99f8f96449634805edd2/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/9d59f4a3078ecc6850b9f98ebcd9d726/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/9d59f4a3078ecc6850b9f98ebcd9d726/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/ae88995f75aaa74d35fa79d942e8cbad/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/ae88995f75aaa74d35fa79d942e8cbad/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/b0ba8193711d4e36d2d642b42413b06a/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/b0ba8193711d4e36d2d642b42413b06a/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/b363f97c554e812e3c61a1bb7bab3e92/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/b363f97c554e812e3c61a1bb7bab3e92/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/b415598458273f80f0d2951dfe1c6c1f/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/b415598458273f80f0d2951dfe1c6c1f/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/b8551fbd2207b856e1c26a62a980c511/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/b8551fbd2207b856e1c26a62a980c511/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/bb029ea61ec1818e4551bc882ab41b45/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/bb029ea61ec1818e4551bc882ab41b45/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/bb747672f83a7465f3d4b3186998566f/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/bb747672f83a7465f3d4b3186998566f/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/bdaaefd6b7284b0f25970e8c9ac59936/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/bdaaefd6b7284b0f25970e8c9ac59936/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/beda652e99146dd4de54264f70fe4f81/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/beda652e99146dd4de54264f70fe4f81/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/c156c0f3db2a50802319bc66903e60dc/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/c156c0f3db2a50802319bc66903e60dc/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/d6567a720fdaabffd38137c09dd7dcb6/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/d6567a720fdaabffd38137c09dd7dcb6/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/d7509e13f1b96b21b83e7376567ba82b/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/d7509e13f1b96b21b83e7376567ba82b/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/dc8c1076d71a9035579c835320061e9c/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/dc8c1076d71a9035579c835320061e9c/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/e5a822d3724b82d348f48d5f91e3abbc/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/e5a822d3724b82d348f48d5f91e3abbc/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/e99684cea5697ada54ba64d1337308a2/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/e99684cea5697ada54ba64d1337308a2/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/eb96ea4e13a34deb43ce953fae303809/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/eb96ea4e13a34deb43ce953fae303809/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/f5a882d269a9e9a31881d6620437267e/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/f5a882d269a9e9a31881d6620437267e/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/f75cf54d64d1978576b7fc371eba21f3/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/f75cf54d64d1978576b7fc371eba21f3/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/feb56b72be37bd8aae0ed3c2fae0342d/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/feb56b72be37bd8aae0ed3c2fae0342d/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/fee34b871486a31cfc40acdc234ddec4/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/fee34b871486a31cfc40acdc234ddec4/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/joblib/ch8util/filter/func_code.py: -------------------------------------------------------------------------------- 1 | # first line: 17 2 | @memory.cache 3 | def filter(fid, lemmatizer, sw): 4 | words = [lemmatizer.lemmatize(w.lower()) for w in brown.words(fid) 5 | if len(w) > 1 and w.lower() not in sw] 6 | 7 | # Ignore words which only occur once 8 | counts = Counter(words) 9 | rare = set([w for w, c in counts.items() if c == 1]) 10 | 11 | filtered_words = [w for w in words if w not in rare] 12 | 13 | return [w for w in filtered_words if only_letters(w)] 14 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/named_entity.py: -------------------------------------------------------------------------------- 1 | from nltk.tag.stanford import NERTagger 2 | import dautil as dl 3 | import os 4 | from zipfile import ZipFile 5 | from nltk.corpus import brown 6 | 7 | def download_ner(): 8 | url = 'http://nlp.stanford.edu/software/stanford-ner-2015-04-20.zip' 9 | dir = os.path.join(dl.data.get_data_dir(), 'ner') 10 | 11 | if not os.path.exists(dir): 12 | os.mkdir(dir) 13 | 14 | fname = 'stanford-ner-2015-04-20.zip' 15 | out = os.path.join(dir, fname) 16 | 17 | if not dl.conf.file_exists(out): 18 | dl.data.download(url, out) 19 | 20 | with ZipFile(out) as nerzip: 21 | nerzip.extractall(path=dir) 22 | 23 | return os.path.join(dir, fname.replace('.zip', '')) 24 | 25 | 26 | dir = download_ner() 27 | st = NERTagger(os.path.join(dir, 'classifiers', 28 | 'english.all.3class.distsim.crf.ser.gz'), 29 | os.path.join(dir, 'stanford-ner.jar')) 30 | fid = brown.fileids(categories='news')[0] 31 | printer = dl.log_api.Printer(nelems=9) 32 | 33 | tagged = [pair for pair in dl.collect.flatten(st.tag(brown.words(fid))) 34 | if pair[1] != 'O'] 35 | printer.print(tagged) 36 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/net_density.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import networkx as nx\n", 12 | "import dautil as dl" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "context = dl.nb.Context('net_density')\n", 24 | "lr = dl.nb.LatexRenderer(chapter=8, context=context)\n", 25 | "lr.render(r'd = \\frac{2m}{n(n-1)}')\n", 26 | "lr.render(r'd = \\frac{m}{n(n-1)}')" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "fb_file = dl.data.SPANFB().load()\n", 38 | "G = nx.read_edgelist(fb_file,\n", 39 | " create_using=nx.Graph(),\n", 40 | " nodetype=int)\n", 41 | "print('Density', nx.density(G))" 42 | ] 43 | } 44 | ], 45 | "metadata": { 46 | "kernelspec": { 47 | "display_name": "Python 3", 48 | "language": "python", 49 | "name": "python3" 50 | }, 51 | "language_info": { 52 | "codemirror_mode": { 53 | "name": "ipython", 54 | "version": 3 55 | }, 56 | "file_extension": ".py", 57 | "mimetype": "text/x-python", 58 | "name": "python", 59 | "nbconvert_exporter": "python", 60 | "pygments_lexer": "ipython3", 61 | "version": "3.4.3" 62 | } 63 | }, 64 | "nbformat": 4, 65 | "nbformat_minor": 0 66 | } 67 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/stemming_lemma.py: -------------------------------------------------------------------------------- 1 | from nltk.corpus import brown 2 | from nltk.corpus import stopwords 3 | from nltk.stem import PorterStemmer 4 | from nltk.stem import WordNetLemmatizer 5 | import ch8util 6 | from sklearn.feature_extraction.text import TfidfVectorizer 7 | import numpy as np 8 | import pandas as pd 9 | import pickle 10 | import dautil as dl 11 | 12 | 13 | stemmer = PorterStemmer() 14 | lemmatizer = WordNetLemmatizer() 15 | 16 | print('stem(analyses)', stemmer.stem('analyses')) 17 | print('lemmatize(analyses)', lemmatizer.lemmatize('analyses')) 18 | print() 19 | 20 | sw = set(stopwords.words()) 21 | texts = [] 22 | 23 | fids = brown.fileids(categories='news') 24 | 25 | for fid in fids: 26 | texts.append(" ".join(ch8util.filter(fid, lemmatizer, sw))) 27 | 28 | vectorizer = TfidfVectorizer() 29 | matrix = vectorizer.fit_transform(texts) 30 | 31 | with open('tfidf.pkl', 'wb') as pkl: 32 | pickle.dump(matrix, pkl) 33 | 34 | sums = np.array(matrix.sum(axis=0)).ravel() 35 | 36 | ranks = [(word, val) for word, val in 37 | zip(vectorizer.get_feature_names(), sums)] 38 | 39 | df = pd.DataFrame(ranks, columns=["term", "tfidf"]) 40 | df.to_pickle('tfidf_df.pkl') 41 | df = df.sort(['tfidf']) 42 | dl.options.set_pd_options() 43 | print(df) 44 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/tfidf.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/tfidf.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/tfidf_df.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 8/tfidf_df.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/tokenizing.py: -------------------------------------------------------------------------------- 1 | from nltk.tokenize import sent_tokenize 2 | from nltk.tokenize import word_tokenize 3 | import dautil as dl 4 | 5 | fname = '46_bbc_world.txt' 6 | printer = dl.log_api.Printer(nelems=3) 7 | 8 | with open(fname, "r", encoding="utf-8") as txt_file: 9 | txt = txt_file.read() 10 | printer.print('Sentences', sent_tokenize(txt)) 11 | printer.print('Words', word_tokenize(txt)) 12 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 8/topic_extraction.py: -------------------------------------------------------------------------------- 1 | from sklearn.decomposition import NMF 2 | import ch8util 3 | 4 | terms = ch8util.load_terms() 5 | tfidf = ch8util.load_tfidf() 6 | 7 | 8 | nmf = NMF(n_components=44, random_state=51).fit(tfidf) 9 | 10 | for topic_idx, topic in enumerate(nmf.components_): 11 | label = '{}: '.format(topic_idx) 12 | print(label, " ".join([terms[i] for i in topic.argsort()[:-9:-1]])) 13 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/applying_lda.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import dautil as dl\n", 12 | "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", 13 | "import matplotlib.pyplot as plt" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "df = dl.data.Weather.load().dropna()\n", 25 | "X = df.values\n", 26 | "y = df['WIND_DIR'].values" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "lda = LinearDiscriminantAnalysis(n_components=2)\n", 38 | "X_r = lda.fit(X, y).transform(X).T" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "%matplotlib inline\n", 50 | "dl.options.mimic_seaborn()\n", 51 | "plt.scatter(X_r[0], X_r[1])\n", 52 | "plt.xlabel('x')\n", 53 | "plt.ylabel('y')\n", 54 | "plt.title('Dimension Reduction with LDA')" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": true 62 | }, 63 | "outputs": [], 64 | "source": [] 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": "Python 3", 70 | "language": "python", 71 | "name": "python3" 72 | }, 73 | "language_info": { 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "file_extension": ".py", 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.4.3" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 0 88 | } 89 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/applying_pca.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import dautil as dl\n", 12 | "from sklearn.decomposition import PCA\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "from sklearn.preprocessing import scale" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "context = dl.nb.Context('applying_pca')\n", 26 | "lr = dl.nb.LatexRenderer(chapter=9, context=context)\n", 27 | "lr.render(r'\\mathbf{T}_L = \\mathbf{X} \\mathbf{W}_L')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "df = dl.data.Weather.load().dropna()\n", 39 | "df = dl.ts.groupby_yday(df).mean()\n", 40 | "X = df.values" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "pca = PCA(n_components=2)\n", 52 | "X_r = pca.fit_transform(scale(X)).T" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "%matplotlib inline\n", 64 | "dl.options.mimic_seaborn()\n", 65 | "plt.scatter(X_r[0], X_r[1])\n", 66 | "plt.xlabel('x')\n", 67 | "plt.ylabel('y')\n", 68 | "plt.title('Dimension Reducion with PCA')" 69 | ] 70 | } 71 | ], 72 | "metadata": { 73 | "kernelspec": { 74 | "display_name": "Python 3", 75 | "language": "python", 76 | "name": "python3" 77 | }, 78 | "language_info": { 79 | "codemirror_mode": { 80 | "name": "ipython", 81 | "version": 3 82 | }, 83 | "file_extension": ".py", 84 | "mimetype": "text/x-python", 85 | "name": "python", 86 | "nbconvert_exporter": "python", 87 | "pygments_lexer": "ipython3", 88 | "version": "3.4.3" 89 | } 90 | }, 91 | "nbformat": 4, 92 | "nbformat_minor": 0 93 | } 94 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/dautil.json: -------------------------------------------------------------------------------- 1 | { 2 | "applying_pca.latex": [ 3 | 1 4 | ], 5 | "bagging": { 6 | "figure.figsize": [ 7 | 10.0, 8 | 7.3 9 | ] 10 | }, 11 | "boosting": { 12 | "figure.figsize": [ 13 | 10.2, 14 | 7.4 15 | ] 16 | }, 17 | "clustering_hierarchy": { 18 | "figure.figsize": [ 19 | 15.9, 20 | 11.9 21 | ] 22 | }, 23 | "clustering_hierarchy.latex": [ 24 | 2, 25 | 3 26 | ], 27 | "fit_ransac": { 28 | "figure.figsize": [ 29 | 10.7, 30 | 8.0 31 | ] 32 | }, 33 | "nested_cv": { 34 | "figure.figsize": [ 35 | 10.8, 36 | 7.9 37 | ] 38 | }, 39 | "nested_cv.labels": [ 40 | [ 41 | { 42 | "legend": "loc=best", 43 | "title": "R Squared Scores of the Full Grid", 44 | "xlabel": "R squared" 45 | }, 46 | { 47 | "legend": "loc=best", 48 | "title": "Best Scores in Iterations", 49 | "xlabel": "R squared" 50 | } 51 | ], 52 | [ 53 | { 54 | "legend": "loc=best", 55 | "title": "Mean Scores Grouped by Fold", 56 | "xlabel": "R squared" 57 | }, 58 | { 59 | "legend": "loc=best", 60 | "title": "Standard Deviations per Iteration", 61 | "xlabel": "R squared" 62 | } 63 | ] 64 | ], 65 | "random_forest": { 66 | "figure.figsize": [ 67 | 10.1, 68 | 7.4 69 | ] 70 | }, 71 | "stacking_multiple": { 72 | "figure.figsize": [ 73 | 10.7, 74 | 7.8 75 | ] 76 | }, 77 | "theano_tour.latex": [ 78 | 4, 79 | 5, 80 | 6 81 | ] 82 | } -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/feature_elimination.py: -------------------------------------------------------------------------------- 1 | from sklearn.feature_selection import RFE 2 | from sklearn.svm import SVC 3 | from sklearn.svm import SVR 4 | from sklearn.preprocessing import MinMaxScaler 5 | import dautil as dl 6 | import warnings 7 | import numpy as np 8 | 9 | warnings.filterwarnings("ignore", category=DeprecationWarning) 10 | clf = SVC(random_state=42, kernel='linear') 11 | selector = RFE(clf) 12 | 13 | df = dl.data.Weather.load().dropna() 14 | df['RAIN'] = df['RAIN'] == 0 15 | df['DOY'] = [float(d.dayofyear) for d in df.index] 16 | scaler = MinMaxScaler() 17 | 18 | for c in df.columns: 19 | if c != 'RAIN': 20 | df[c] = scaler.fit_transform(df[c]) 21 | 22 | dl.options.set_pd_options() 23 | print(df.head(1)) 24 | X = df[:-1].values 25 | np.set_printoptions(formatter={'all': '{:.3f}'.format}) 26 | print(X[0]) 27 | np.set_printoptions() 28 | 29 | y = df['RAIN'][1:].values 30 | selector = selector.fit(X, y) 31 | print('Rain support', df.columns[selector.support_]) 32 | print('Rain rankings', selector.ranking_) 33 | 34 | reg = SVR(kernel='linear') 35 | selector = RFE(reg) 36 | y = df['TEMP'][1:].values 37 | selector = selector.fit(X, y) 38 | print('Temperature support', df.columns[selector.support_]) 39 | print('Temperature ranking', selector.ranking_) 40 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/__main__--usr-data-__ipython-input__/get_scores/func_code.py: -------------------------------------------------------------------------------- 1 | # first line: 1 2 | @memory.cache 3 | def get_scores(): 4 | df = dl.data.Weather.load()[['WIND_SPEED', 'TEMP', 'PRESSURE']].dropna() 5 | X = df.values[:-1] 6 | y = df['TEMP'][1:] 7 | 8 | params = { 'min_samples_split': [1, 3], 9 | 'min_samples_leaf': [3, 4]} 10 | 11 | gscv = GridSearchCV(ExtraTreesRegressor(bootstrap=True, 12 | random_state=37), 13 | param_grid=params, n_jobs=-1, cv=5) 14 | cv_outer = ShuffleSplit(len(X), n_iter=500, 15 | test_size=0.3, random_state=55) 16 | r2 = [] 17 | best = [] 18 | means = [] 19 | stds = [] 20 | 21 | for train_indices, test_indices in cv_outer: 22 | train_i = X[train_indices], y[train_indices] 23 | gscv.fit(*train_i) 24 | test_i = X[test_indices] 25 | gscv.predict(test_i) 26 | grid_scores = dl.collect.flatten([g.cv_validation_scores 27 | for g in gscv.grid_scores_]) 28 | r2.extend(grid_scores) 29 | means.extend(dl.collect.flatten([g.mean_validation_score 30 | for g in gscv.grid_scores_])) 31 | stds.append(np.std(grid_scores)) 32 | best.append(gscv.best_score_) 33 | 34 | return {'r2': r2, 'best': best, 'mean': means, 'std': stds} 35 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/2364399946f87e46b30ce16478bf3ce1/metadata.json: -------------------------------------------------------------------------------- 1 | {"input_args": {"est": "VotingClassifier(estimators=[('default', DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,\n max_features=None, max_leaf_nodes=None, min_samples_leaf=3,\n min_samples_split=2, min_weight_fraction_leaf=0.0,\n presort=False, random_state=53, splitter='best')), ('ent...lit=2, min_weight_fraction_leaf=0.0,\n presort=False, random_state=5, splitter='random'))],\n voting='soft', weights=(2, 1, 1))"}, "duration": 0.531548261642456} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/2364399946f87e46b30ce16478bf3ce1/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/2364399946f87e46b30ce16478bf3ce1/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/2364399946f87e46b30ce16478bf3ce1/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/2364399946f87e46b30ce16478bf3ce1/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/2364399946f87e46b30ce16478bf3ce1/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/2364399946f87e46b30ce16478bf3ce1/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/2364399946f87e46b30ce16478bf3ce1/output.pkl_03.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/2364399946f87e46b30ce16478bf3ce1/output.pkl_03.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/908907c90d168a085e45ec93017d629b/metadata.json: -------------------------------------------------------------------------------- 1 | {"input_args": {"est": "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,\n max_features=None, max_leaf_nodes=None, min_samples_leaf=3,\n min_samples_split=2, min_weight_fraction_leaf=0.0,\n presort=False, random_state=53, splitter='best')"}, "duration": 0.33733057975769043} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/908907c90d168a085e45ec93017d629b/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/908907c90d168a085e45ec93017d629b/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/908907c90d168a085e45ec93017d629b/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/908907c90d168a085e45ec93017d629b/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/908907c90d168a085e45ec93017d629b/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/908907c90d168a085e45ec93017d629b/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/908907c90d168a085e45ec93017d629b/output.pkl_03.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/908907c90d168a085e45ec93017d629b/output.pkl_03.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/a7795a8401df1f89aabdcc6094811fdc/metadata.json: -------------------------------------------------------------------------------- 1 | {"input_args": {"est": "BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,\n max_features=None, max_leaf_nodes=None, min_samples_leaf=3,\n min_samples_split=2, min_weight_fraction_leaf=0.0,\n presort=False, random_state=None, splitter='best'),\n bootstrap=True, bootstrap_features=True, max_features=1.0,\n max_samples=1.0, n_estimators=320, n_jobs=1, oob_score=False,\n random_state=43, verbose=0, warm_start=False)"}, "duration": 8.549299716949463} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/a7795a8401df1f89aabdcc6094811fdc/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/a7795a8401df1f89aabdcc6094811fdc/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/a7795a8401df1f89aabdcc6094811fdc/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/a7795a8401df1f89aabdcc6094811fdc/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/a7795a8401df1f89aabdcc6094811fdc/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/a7795a8401df1f89aabdcc6094811fdc/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/a7795a8401df1f89aabdcc6094811fdc/output.pkl_03.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/a7795a8401df1f89aabdcc6094811fdc/output.pkl_03.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/db5ea238322c82871114a427a27c0695/metadata.json: -------------------------------------------------------------------------------- 1 | {"input_args": {"est": "AdaBoostRegressor(base_estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,\n max_leaf_nodes=None, min_samples_leaf=2, min_samples_split=2,\n min_weight_fraction_leaf=0.0, presort=False, random_state=28,\n splitter='best'),\n learning_rate=1.0, loss='exponential', n_estimators=50,\n random_state=17)"}, "duration": 4.6582231521606445} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/db5ea238322c82871114a427a27c0695/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/db5ea238322c82871114a427a27c0695/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/db5ea238322c82871114a427a27c0695/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/db5ea238322c82871114a427a27c0695/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/db5ea238322c82871114a427a27c0695/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/db5ea238322c82871114a427a27c0695/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/db5ea238322c82871114a427a27c0695/output.pkl_03.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/db5ea238322c82871114a427a27c0695/output.pkl_03.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/f3fbfae88102d9f80ac37e0a9b577931/metadata.json: -------------------------------------------------------------------------------- 1 | {"input_args": {"est": "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',\n max_depth=4, max_features='auto', max_leaf_nodes=None,\n min_samples_leaf=3, min_samples_split=2,\n min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,\n oob_score=False, random_state=None, verbose=0,\n warm_start=False)"}, "duration": 2.617568254470825} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/f3fbfae88102d9f80ac37e0a9b577931/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/f3fbfae88102d9f80ac37e0a9b577931/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/f3fbfae88102d9f80ac37e0a9b577931/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/f3fbfae88102d9f80ac37e0a9b577931/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/f3fbfae88102d9f80ac37e0a9b577931/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/f3fbfae88102d9f80ac37e0a9b577931/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/f3fbfae88102d9f80ac37e0a9b577931/output.pkl_03.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/f3fbfae88102d9f80ac37e0a9b577931/output.pkl_03.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/learn/func_code.py: -------------------------------------------------------------------------------- 1 | # first line: 45 2 | @memory.cache(ignore=['X', 'y']) 3 | def learn(est, X, y): 4 | est_cp = deepcopy(est) 5 | 6 | return learning_curve(est_cp, X, y, n_jobs=-1) 7 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/.DS_Store -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/09e28e76166ff8695c81b0f36ed3ae0f/metadata.json: -------------------------------------------------------------------------------- 1 | {"duration": 2.3985280990600586, "input_args": {"est": "RANSACRegressor(base_estimator=None, is_data_valid=None, is_model_valid=None,\n max_trials=50, min_samples=None, random_state=27,\n residual_metric=None, residual_threshold=None, stop_n_inliers=inf,\n stop_probability=0.99, stop_score=inf)", "prange": "array([ 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170,\n 180, 190])", "pname": "'max_trials'"}} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/09e28e76166ff8695c81b0f36ed3ae0f/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/09e28e76166ff8695c81b0f36ed3ae0f/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/09e28e76166ff8695c81b0f36ed3ae0f/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/09e28e76166ff8695c81b0f36ed3ae0f/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/09e28e76166ff8695c81b0f36ed3ae0f/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/09e28e76166ff8695c81b0f36ed3ae0f/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/0d5fa964789929ae2cd7af18c8155313/metadata.json: -------------------------------------------------------------------------------- 1 | {"duration": 0.5483946800231934, "input_args": {"est": "RANSACRegressor(base_estimator=None, is_data_valid=None, is_model_valid=None,\n max_trials=50, min_samples=None, random_state=27,\n residual_metric=None, residual_threshold=None, stop_n_inliers=inf,\n stop_probability=0.99, stop_score=inf)", "prange": "array([ 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192])", "pname": "'stop_n_inliers'"}} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/0d5fa964789929ae2cd7af18c8155313/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/0d5fa964789929ae2cd7af18c8155313/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/0d5fa964789929ae2cd7af18c8155313/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/0d5fa964789929ae2cd7af18c8155313/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/0d5fa964789929ae2cd7af18c8155313/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/0d5fa964789929ae2cd7af18c8155313/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/11583eb1dd321a43fbe1f8f4b54ec1e4/metadata.json: -------------------------------------------------------------------------------- 1 | {"input_args": {"est": "AdaBoostRegressor(base_estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,\n max_leaf_nodes=None, min_samples_leaf=2, min_samples_split=2,\n min_weight_fraction_leaf=0.0, presort=False, random_state=28,\n splitter='best'),\n learning_rate=1.0, loss='exponential', n_estimators=50,\n random_state=17)", "prange": "array([ 0.1 , 0.2125, 0.325 , 0.4375, 0.55 , 0.6625, 0.775 ,\n 0.8875, 1. ])", "pname": "'learning_rate'"}, "duration": 59.319374084472656} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/11583eb1dd321a43fbe1f8f4b54ec1e4/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/11583eb1dd321a43fbe1f8f4b54ec1e4/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/11583eb1dd321a43fbe1f8f4b54ec1e4/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/11583eb1dd321a43fbe1f8f4b54ec1e4/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/11583eb1dd321a43fbe1f8f4b54ec1e4/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/11583eb1dd321a43fbe1f8f4b54ec1e4/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/31d50c1236c9d1e74d3edf0c6de54969/metadata.json: -------------------------------------------------------------------------------- 1 | {"input_args": {"est": "AdaBoostRegressor(base_estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,\n max_leaf_nodes=None, min_samples_leaf=2, min_samples_split=2,\n min_weight_fraction_leaf=0.0, presort=False, random_state=28,\n splitter='best'),\n learning_rate=1.0, loss='exponential', n_estimators=50,\n random_state=17)", "prange": "array([ 8, 16, 32, 64, 128, 256])", "pname": "'n_estimators'"}, "duration": 34.6153838634491} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/31d50c1236c9d1e74d3edf0c6de54969/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/31d50c1236c9d1e74d3edf0c6de54969/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/31d50c1236c9d1e74d3edf0c6de54969/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/31d50c1236c9d1e74d3edf0c6de54969/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/31d50c1236c9d1e74d3edf0c6de54969/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/31d50c1236c9d1e74d3edf0c6de54969/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/3be3ec0fce5e7391b8dbece404bfeec4/metadata.json: -------------------------------------------------------------------------------- 1 | {"duration": 0.9424378871917725, "input_args": {"est": "RANSACRegressor(base_estimator=None, is_data_valid=None, is_model_valid=None,\n max_trials=50, min_samples=None, random_state=27,\n residual_metric=None, residual_threshold=None, stop_n_inliers=inf,\n stop_probability=0.99, stop_score=inf)", "prange": "array([ 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98])", "pname": "'stop_probability'"}} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/3be3ec0fce5e7391b8dbece404bfeec4/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/3be3ec0fce5e7391b8dbece404bfeec4/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/3be3ec0fce5e7391b8dbece404bfeec4/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/3be3ec0fce5e7391b8dbece404bfeec4/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/3be3ec0fce5e7391b8dbece404bfeec4/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/3be3ec0fce5e7391b8dbece404bfeec4/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/6bf9b01d043ee447829a5dc2d9493f29/metadata.json: -------------------------------------------------------------------------------- 1 | {"input_args": {"prange": "array([ 1, 2, 4, 8, 16, 32, 64, 128, 256])", "est": "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',\n max_depth=4, max_features='auto', max_leaf_nodes=None,\n min_samples_leaf=3, min_samples_split=2,\n min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,\n oob_score=False, random_state=None, verbose=0,\n warm_start=False)", "pname": "'n_estimators'"}, "duration": 12.348050117492676} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/6bf9b01d043ee447829a5dc2d9493f29/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/6bf9b01d043ee447829a5dc2d9493f29/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/6bf9b01d043ee447829a5dc2d9493f29/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/6bf9b01d043ee447829a5dc2d9493f29/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/6bf9b01d043ee447829a5dc2d9493f29/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/6bf9b01d043ee447829a5dc2d9493f29/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/8b592e0d5102bc9bc1ccb9910d83bb87/metadata.json: -------------------------------------------------------------------------------- 1 | {"input_args": {"pname": "'n_estimators'", "prange": "array([ 16, 32, 64, 128, 256, 512, 1024])", "est": "BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,\n max_features=None, max_leaf_nodes=None, min_samples_leaf=3,\n min_samples_split=2, min_weight_fraction_leaf=0.0,\n presort=False, random_state=None, splitter='best'),\n bootstrap=True, bootstrap_features=True, max_features=1.0,\n max_samples=1.0, n_estimators=320, n_jobs=1, oob_score=False,\n random_state=43, verbose=0, warm_start=False)"}, "duration": 62.68083429336548} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/8b592e0d5102bc9bc1ccb9910d83bb87/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/8b592e0d5102bc9bc1ccb9910d83bb87/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/8b592e0d5102bc9bc1ccb9910d83bb87/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/8b592e0d5102bc9bc1ccb9910d83bb87/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/8b592e0d5102bc9bc1ccb9910d83bb87/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/8b592e0d5102bc9bc1ccb9910d83bb87/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/b6d29b67208417fdc0fca870891a84b6/metadata.json: -------------------------------------------------------------------------------- 1 | {"input_args": {"pname": "'max_samples'", "prange": "array([ 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192])", "est": "BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,\n max_features=None, max_leaf_nodes=None, min_samples_leaf=3,\n min_samples_split=2, min_weight_fraction_leaf=0.0,\n presort=False, random_state=None, splitter='best'),\n bootstrap=True, bootstrap_features=True, max_features=1.0,\n max_samples=1.0, n_estimators=320, n_jobs=1, oob_score=False,\n random_state=43, verbose=0, warm_start=False)"}, "duration": 43.1432249546051} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/b6d29b67208417fdc0fca870891a84b6/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/b6d29b67208417fdc0fca870891a84b6/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/b6d29b67208417fdc0fca870891a84b6/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/b6d29b67208417fdc0fca870891a84b6/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/b6d29b67208417fdc0fca870891a84b6/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/b6d29b67208417fdc0fca870891a84b6/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/e7e857409615695248a36145824f59c7/metadata.json: -------------------------------------------------------------------------------- 1 | {"input_args": {"prange": "array([2, 3, 4, 5, 6, 7, 8])", "est": "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',\n max_depth=4, max_features='auto', max_leaf_nodes=None,\n min_samples_leaf=3, min_samples_split=2,\n min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,\n oob_score=False, random_state=None, verbose=0,\n warm_start=False)", "pname": "'max_depth'"}, "duration": 19.593727827072144} -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/e7e857409615695248a36145824f59c7/output.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/e7e857409615695248a36145824f59c7/output.pkl -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/e7e857409615695248a36145824f59c7/output.pkl_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/e7e857409615695248a36145824f59c7/output.pkl_01.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/e7e857409615695248a36145824f59c7/output.pkl_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/e7e857409615695248a36145824f59c7/output.pkl_02.npy -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/joblib/ch9util/validate/func_code.py: -------------------------------------------------------------------------------- 1 | # first line: 17 2 | @memory.cache(ignore=['X', 'y']) 3 | def validate(est, X, y, pname, prange): 4 | est_cp = deepcopy(est) 5 | 6 | return validation_curve(est_cp, X, y, param_name=pname, 7 | param_range=prange, n_jobs=1) 8 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/rain_pot.py: -------------------------------------------------------------------------------- 1 | import ch9util 2 | from tpot import TPOT 3 | 4 | X_train, X_test, y_train, y_test = ch9util.rain_split() 5 | tpot = TPOT(generations=7, population_size=110, verbosity=2) 6 | tpot.fit(X_train, y_train) 7 | print(tpot.score(X_train, y_train, X_test, y_test)) 8 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Chapter 9/reusing_models.py: -------------------------------------------------------------------------------- 1 | from sklearn.grid_search import GridSearchCV 2 | from sklearn.ensemble import ExtraTreesRegressor 3 | import ch9util 4 | from tempfile import mkdtemp 5 | import os 6 | import joblib 7 | 8 | X_train, X_test, y_train, y_test = ch9util.temp_split() 9 | params = {'min_samples_split': [1, 3], 10 | 'bootstrap': [True, False], 11 | 'min_samples_leaf': [3, 4]} 12 | 13 | gscv = GridSearchCV(ExtraTreesRegressor(random_state=41), 14 | param_grid=params, cv=5) 15 | 16 | gscv.fit(X_train, y_train) 17 | preds = gscv.predict(X_test) 18 | ch9util.npy_save('etr.npy', preds) 19 | dir = mkdtemp() 20 | pkl = os.path.join(dir, 'params.pkl') 21 | joblib.dump(gscv.best_params_, pkl) 22 | params = joblib.load(pkl) 23 | print('Best params', gscv.best_params_) 24 | print('From pkl', params) 25 | est = ExtraTreesRegressor(random_state=41) 26 | est.set_params(**params) 27 | est.fit(X_train, y_train) 28 | preds2 = est.predict(X_test) 29 | print('Max diff', (preds - preds2).max()) 30 | -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Readme.TXT: -------------------------------------------------------------------------------- 1 | The Code folder contains the code samples used in this book. 2 | Software_Hardware_list file contains the list of required software and hardware for this book. 3 | 4 | Happy coding! :) -------------------------------------------------------------------------------- /Module2/Python_Data_Analysis_code/Software and hardware list.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module2/Python_Data_Analysis_code/Software and hardware list.docx -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Appendix/helpfunctions.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def despine(axs): 4 | # to be able to handle subplot grids 5 | # it assumes the input is a list of 6 | # axes instances, if it is not a list, 7 | # it puts it in one 8 | if type(axs) != type([]): 9 | axs = [axs] 10 | for ax in axs: 11 | ax.yaxis.set_ticks_position('left') 12 | ax.xaxis.set_ticks_position('bottom') 13 | ax.spines['bottom'].set_position(('outward', 10)) 14 | ax.spines['left'].set_position(('outward', 10)) 15 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Appendix/mystyle.mplstyle: -------------------------------------------------------------------------------- 1 | # LINES 2 | lines.linewidth: 3 3 | lines.solid_capstyle: butt 4 | 5 | # LEGEND 6 | legend.fancybox: True 7 | legend.framealpha: 0.5 8 | legend.scatterpoints: 1 9 | legend.numpoints: 2 10 | legend.shadow: False 11 | legend.frameon: False 12 | legend.fontsize: 10 13 | 14 | # GRID 15 | #grid.linestyle: dashed 16 | grid.linewidth: 1.0 17 | grid.color: afafaf 18 | 19 | 20 | # AXES 21 | axes.prop_cycle: cycler('color', ['30a2da', 'fc4f30', 'e5ae38', '6d904f', '8b8b8b']) 22 | #axes.prop_cycle: cycler('color', ['348abd', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', 'e41a1c']) 23 | #axes.color_cycle: 348ABD, E24A33, 685EA5, 777777, FBC15E, 8EBA42, FFB5B8 24 | # 348ABD : blue 25 | # E24A33 : red 26 | # 988ED5 : purple 27 | # 777777 : gray 28 | # FBC15E : yellow 29 | # 8EBA42 : green 30 | # FFB5B8 : pink 31 | #axes.prop_cycle: cycler('color', ['377eb8', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', '999999', 'e41a1c', 'dede00'] ) 32 | 33 | axes.facecolor: w 34 | axes.labelsize: medium 35 | axes.labelcolor: 333333 36 | axes.axisbelow: False 37 | axes.grid: False 38 | axes.edgecolor: 555555 39 | axes.linewidth: 2.0 40 | axes.titlesize: small 41 | #axes.spines.bottom: True 42 | #axes.spines.left: True 43 | #axes.spines.right: True 44 | #axes.spines.top: True 45 | 46 | # PATCHES 47 | patch.edgecolor: w 48 | patch.linewidth: 1 49 | 50 | 51 | # TICK SETTINGS 52 | ## X-axis 53 | xtick.major.size: 7 54 | xtick.minor.size: 4 55 | xtick.major.width: 1.5 56 | xtick.minor.width: 1.5 57 | #xtick.minor.visible: True 58 | xtick.labelsize: medium 59 | xtick.color: 333333 60 | ## Y-axis 61 | ytick.major.size: 7 62 | ytick.minor.size: 4 63 | ytick.major.width: 1.5 64 | ytick.minor.width: 1.5 65 | #ytick.minor.visible: True 66 | ytick.labelsize: medium 67 | ytick.color: 333333 68 | 69 | # FONT SETTINGS 70 | font.size:12.0 71 | 72 | # SAVE SETTINGS 73 | savefig.edgecolor: w 74 | savefig.facecolor: w 75 | savefig.dpi: 100 76 | 77 | # FIGURE SETTINGS 78 | figure.dpi: 100 79 | figure.figsize: 5.0, 3.5 80 | figure.facecolor: w 81 | figure.edgecolor: w 82 | figure.subplot.left: 0.08 83 | figure.subplot.right: 0.95 84 | figure.subplot.bottom: 0.07 85 | figure.titlesize: large 86 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Appendix/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module3/Mastering_Python_Data_Analysis_Code/Appendix/test.png -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Appendix/test_markdown_file.md: -------------------------------------------------------------------------------- 1 | # A Markdown text file for you to edit 2 | --- 3 | 4 | Some of the markdown syntax is rendered, such as headings. 5 | Others not, like 6 | * bullet lists. 7 | * They are not rendered.. 8 | 9 | ## Images 10 | Or images: 11 | ![test image](./test.png) 12 | 13 | ## Syntax highlighting 14 | However, much of the syntax is highlighted though. 15 | 16 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 1/data/LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Simon Dooms 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 1/mystyle.mplstyle: -------------------------------------------------------------------------------- 1 | # LINES 2 | lines.linewidth: 3 3 | lines.solid_capstyle: butt 4 | 5 | # LEGEND 6 | legend.fancybox: True 7 | legend.framealpha: 0.5 8 | legend.scatterpoints: 1 9 | legend.numpoints: 2 10 | legend.shadow: False 11 | legend.frameon: False 12 | legend.fontsize: 10 13 | 14 | # GRID 15 | #grid.linestyle: dashed 16 | grid.linewidth: 1.0 17 | grid.color: afafaf 18 | 19 | 20 | # AXES 21 | axes.prop_cycle: cycler('color', ['30a2da', 'fc4f30', 'e5ae38', '6d904f', '8b8b8b']) 22 | #axes.prop_cycle: cycler('color', ['348abd', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', 'e41a1c']) 23 | #axes.color_cycle: 348ABD, E24A33, 685EA5, 777777, FBC15E, 8EBA42, FFB5B8 24 | # 348ABD : blue 25 | # E24A33 : red 26 | # 988ED5 : purple 27 | # 777777 : gray 28 | # FBC15E : yellow 29 | # 8EBA42 : green 30 | # FFB5B8 : pink 31 | #axes.prop_cycle: cycler('color', ['377eb8', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', '999999', 'e41a1c', 'dede00'] ) 32 | 33 | axes.facecolor: w 34 | axes.labelsize: medium 35 | axes.labelcolor: 333333 36 | axes.axisbelow: False 37 | axes.grid: False 38 | axes.edgecolor: 555555 39 | axes.linewidth: 2.0 40 | axes.titlesize: small 41 | #axes.spines.bottom: True 42 | #axes.spines.left: True 43 | #axes.spines.right: True 44 | #axes.spines.top: True 45 | 46 | # PATCHES 47 | patch.edgecolor: w 48 | patch.linewidth: 1 49 | 50 | 51 | # TICK SETTINGS 52 | ## X-axis 53 | xtick.major.size: 7 54 | xtick.minor.size: 4 55 | xtick.major.width: 1.5 56 | xtick.minor.width: 1.5 57 | #xtick.minor.visible: True 58 | xtick.labelsize: medium 59 | xtick.color: 333333 60 | ## Y-axis 61 | ytick.major.size: 7 62 | ytick.minor.size: 4 63 | ytick.major.width: 1.5 64 | ytick.minor.width: 1.5 65 | #ytick.minor.visible: True 66 | ytick.labelsize: medium 67 | ytick.color: 333333 68 | 69 | # FONT SETTINGS 70 | font.size:12.0 71 | 72 | # SAVE SETTINGS 73 | savefig.edgecolor: w 74 | savefig.facecolor: w 75 | savefig.dpi: 100 76 | 77 | # FIGURE SETTINGS 78 | figure.dpi: 100 79 | figure.figsize: 5.0, 3.5 80 | figure.facecolor: w 81 | figure.edgecolor: w 82 | figure.subplot.left: 0.08 83 | figure.subplot.right: 0.95 84 | figure.subplot.bottom: 0.07 85 | figure.titlesize: large 86 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 2/data/GSS2012merged_R5.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module3/Mastering_Python_Data_Analysis_Code/Chapter 2/data/GSS2012merged_R5.dta -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 2/data/GSS_Codebook_index.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module3/Mastering_Python_Data_Analysis_Code/Chapter 2/data/GSS_Codebook_index.pdf -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 2/data/Release Notes for the GSS 2012 Merged R5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module3/Mastering_Python_Data_Analysis_Code/Chapter 2/data/Release Notes for the GSS 2012 Merged R5.pdf -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 2/data/hubble.csv: -------------------------------------------------------------------------------- 1 | #from http://www.harding.edu/jmackey/ps113/exercises/hubble_law(long).pdf 2 | # id r v 3 | SMC, 0.032, +170 4 | 5194, 0.5, +270 5 | 1055, 1.1, +450 6 | LMC, 0.034, +290 7 | 4449, 0.63, +200 8 | 7331, 1.1, +500 9 | 6822, 0.214, -130 10 | 4214, 0.8, +300 11 | 4258, 1.4, +500 12 | 598, 0.263,-70 13 | 3031, 0.9, -30 14 | 4151, 1.7, +960 15 | 221, 0.275,-185 16 | 3627, 0.9, +650 17 | 4382, 2.0, +500 18 | 224, 0.275,-220 19 | 4826, 0.9, +150 20 | 4472, 2.0, +850 21 | 5357, 0.45, +200 22 | 5236, 0.9, +500 23 | 4486, 2.0, +800 24 | 4736, 0.5, +290 25 | 1068, 1.0, +920 26 | 4649, 2.0, +1090 27 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 2/mystyle.mplstyle: -------------------------------------------------------------------------------- 1 | # LINES 2 | lines.linewidth: 3 3 | lines.solid_capstyle: butt 4 | 5 | # LEGEND 6 | legend.fancybox: True 7 | legend.framealpha: 0.5 8 | legend.scatterpoints: 1 9 | legend.numpoints: 2 10 | legend.shadow: False 11 | legend.frameon: False 12 | legend.fontsize: 10 13 | 14 | # GRID 15 | #grid.linestyle: dashed 16 | grid.linewidth: 1.0 17 | grid.color: afafaf 18 | 19 | 20 | # AXES 21 | axes.prop_cycle: cycler('color', ['30a2da', 'fc4f30', 'e5ae38', '6d904f', '8b8b8b']) 22 | #axes.prop_cycle: cycler('color', ['348abd', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', 'e41a1c']) 23 | #axes.color_cycle: 348ABD, E24A33, 685EA5, 777777, FBC15E, 8EBA42, FFB5B8 24 | # 348ABD : blue 25 | # E24A33 : red 26 | # 988ED5 : purple 27 | # 777777 : gray 28 | # FBC15E : yellow 29 | # 8EBA42 : green 30 | # FFB5B8 : pink 31 | #axes.prop_cycle: cycler('color', ['377eb8', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', '999999', 'e41a1c', 'dede00'] ) 32 | 33 | axes.facecolor: w 34 | axes.labelsize: medium 35 | axes.labelcolor: 333333 36 | axes.axisbelow: False 37 | axes.grid: False 38 | axes.edgecolor: 555555 39 | axes.linewidth: 2.0 40 | axes.titlesize: small 41 | #axes.spines.bottom: True 42 | #axes.spines.left: True 43 | #axes.spines.right: True 44 | #axes.spines.top: True 45 | 46 | # PATCHES 47 | patch.edgecolor: w 48 | patch.linewidth: 1 49 | 50 | 51 | # TICK SETTINGS 52 | ## X-axis 53 | xtick.major.size: 7 54 | xtick.minor.size: 4 55 | xtick.major.width: 1.5 56 | xtick.minor.width: 1.5 57 | #xtick.minor.visible: True 58 | xtick.labelsize: medium 59 | xtick.color: 333333 60 | ## Y-axis 61 | ytick.major.size: 7 62 | ytick.minor.size: 4 63 | ytick.major.width: 1.5 64 | ytick.minor.width: 1.5 65 | #ytick.minor.visible: True 66 | ytick.labelsize: medium 67 | ytick.color: 333333 68 | 69 | # FONT SETTINGS 70 | font.size:12.0 71 | 72 | # SAVE SETTINGS 73 | savefig.edgecolor: w 74 | savefig.facecolor: w 75 | savefig.dpi: 100 76 | 77 | # FIGURE SETTINGS 78 | figure.dpi: 100 79 | figure.figsize: 5.0, 3.5 80 | figure.facecolor: w 81 | figure.edgecolor: w 82 | figure.subplot.left: 0.08 83 | figure.subplot.right: 0.95 84 | figure.subplot.bottom: 0.07 85 | figure.titlesize: large 86 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 3/data/housefly-wing-lengths.txt: -------------------------------------------------------------------------------- 1 | 36 2 | 37 3 | 38 4 | 38 5 | 39 6 | 39 7 | 40 8 | 40 9 | 40 10 | 40 11 | 41 12 | 41 13 | 41 14 | 41 15 | 41 16 | 41 17 | 42 18 | 42 19 | 42 20 | 42 21 | 42 22 | 42 23 | 42 24 | 43 25 | 43 26 | 43 27 | 43 28 | 43 29 | 43 30 | 43 31 | 43 32 | 44 33 | 44 34 | 44 35 | 44 36 | 44 37 | 44 38 | 44 39 | 44 40 | 44 41 | 45 42 | 45 43 | 45 44 | 45 45 | 45 46 | 45 47 | 45 48 | 45 49 | 45 50 | 45 51 | 46 52 | 46 53 | 46 54 | 46 55 | 46 56 | 46 57 | 46 58 | 46 59 | 46 60 | 46 61 | 47 62 | 47 63 | 47 64 | 47 65 | 47 66 | 47 67 | 47 68 | 47 69 | 47 70 | 48 71 | 48 72 | 48 73 | 48 74 | 48 75 | 48 76 | 48 77 | 48 78 | 49 79 | 49 80 | 49 81 | 49 82 | 49 83 | 49 84 | 49 85 | 50 86 | 50 87 | 50 88 | 50 89 | 50 90 | 50 91 | 51 92 | 51 93 | 51 94 | 51 95 | 52 96 | 52 97 | 53 98 | 53 99 | 54 100 | 55 -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 3/mystyle.mplstyle: -------------------------------------------------------------------------------- 1 | # LINES 2 | lines.linewidth: 3 3 | lines.solid_capstyle: butt 4 | 5 | # LEGEND 6 | legend.fancybox: True 7 | legend.framealpha: 0.5 8 | legend.scatterpoints: 1 9 | legend.numpoints: 2 10 | legend.shadow: False 11 | legend.frameon: False 12 | legend.fontsize: 10 13 | 14 | # GRID 15 | #grid.linestyle: dashed 16 | grid.linewidth: 1.0 17 | grid.color: afafaf 18 | 19 | 20 | # AXES 21 | axes.prop_cycle: cycler('color', ['30a2da', 'fc4f30', 'e5ae38', '6d904f', '8b8b8b']) 22 | #axes.prop_cycle: cycler('color', ['348abd', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', 'e41a1c']) 23 | #axes.color_cycle: 348ABD, E24A33, 685EA5, 777777, FBC15E, 8EBA42, FFB5B8 24 | # 348ABD : blue 25 | # E24A33 : red 26 | # 988ED5 : purple 27 | # 777777 : gray 28 | # FBC15E : yellow 29 | # 8EBA42 : green 30 | # FFB5B8 : pink 31 | #axes.prop_cycle: cycler('color', ['377eb8', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', '999999', 'e41a1c', 'dede00'] ) 32 | 33 | axes.facecolor: w 34 | axes.labelsize: medium 35 | axes.labelcolor: 333333 36 | axes.axisbelow: False 37 | axes.grid: False 38 | axes.edgecolor: 555555 39 | axes.linewidth: 2.0 40 | axes.titlesize: small 41 | #axes.spines.bottom: True 42 | #axes.spines.left: True 43 | #axes.spines.right: True 44 | #axes.spines.top: True 45 | 46 | # PATCHES 47 | patch.edgecolor: w 48 | patch.linewidth: 1 49 | 50 | 51 | # TICK SETTINGS 52 | ## X-axis 53 | xtick.major.size: 7 54 | xtick.minor.size: 4 55 | xtick.major.width: 1.5 56 | xtick.minor.width: 1.5 57 | #xtick.minor.visible: True 58 | xtick.labelsize: medium 59 | xtick.color: 333333 60 | ## Y-axis 61 | ytick.major.size: 7 62 | ytick.minor.size: 4 63 | ytick.major.width: 1.5 64 | ytick.minor.width: 1.5 65 | #ytick.minor.visible: True 66 | ytick.labelsize: medium 67 | ytick.color: 333333 68 | 69 | # FONT SETTINGS 70 | font.size:12.0 71 | 72 | # SAVE SETTINGS 73 | savefig.edgecolor: w 74 | savefig.facecolor: w 75 | savefig.dpi: 100 76 | 77 | # FIGURE SETTINGS 78 | figure.dpi: 100 79 | figure.figsize: 5.0, 3.5 80 | figure.facecolor: w 81 | figure.edgecolor: w 82 | figure.subplot.left: 0.08 83 | figure.subplot.right: 0.95 84 | figure.subplot.bottom: 0.07 85 | figure.titlesize: large 86 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 3/old/housefly-wing-lengths.txt: -------------------------------------------------------------------------------- 1 | 36 2 | 37 3 | 38 4 | 38 5 | 39 6 | 39 7 | 40 8 | 40 9 | 40 10 | 40 11 | 41 12 | 41 13 | 41 14 | 41 15 | 41 16 | 41 17 | 42 18 | 42 19 | 42 20 | 42 21 | 42 22 | 42 23 | 42 24 | 43 25 | 43 26 | 43 27 | 43 28 | 43 29 | 43 30 | 43 31 | 43 32 | 44 33 | 44 34 | 44 35 | 44 36 | 44 37 | 44 38 | 44 39 | 44 40 | 44 41 | 45 42 | 45 43 | 45 44 | 45 45 | 45 46 | 45 47 | 45 48 | 45 49 | 45 50 | 45 51 | 46 52 | 46 53 | 46 54 | 46 55 | 46 56 | 46 57 | 46 58 | 46 59 | 46 60 | 46 61 | 47 62 | 47 63 | 47 64 | 47 65 | 47 66 | 47 67 | 47 68 | 47 69 | 47 70 | 48 71 | 48 72 | 48 73 | 48 74 | 48 75 | 48 76 | 48 77 | 48 78 | 49 79 | 49 80 | 49 81 | 49 82 | 49 83 | 49 84 | 49 85 | 50 86 | 50 87 | 50 88 | 50 89 | 50 90 | 50 91 | 51 92 | 51 93 | 51 94 | 51 95 | 52 96 | 52 97 | 53 98 | 53 99 | 54 100 | 55 -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 4/data/data_ch4.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module3/Mastering_Python_Data_Analysis_Code/Chapter 4/data/data_ch4.h5 -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 4/data_ch4.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module3/Mastering_Python_Data_Analysis_Code/Chapter 4/data_ch4.h5 -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 4/mystyle.mplstyle: -------------------------------------------------------------------------------- 1 | # LINES 2 | lines.linewidth: 3 3 | lines.solid_capstyle: butt 4 | 5 | # LEGEND 6 | legend.fancybox: True 7 | legend.framealpha: 0.5 8 | legend.scatterpoints: 1 9 | legend.numpoints: 2 10 | legend.shadow: False 11 | legend.frameon: False 12 | legend.fontsize: 10 13 | 14 | # GRID 15 | #grid.linestyle: dashed 16 | grid.linewidth: 1.0 17 | grid.color: afafaf 18 | 19 | 20 | # AXES 21 | axes.prop_cycle: cycler('color', ['30a2da', 'fc4f30', 'e5ae38', '6d904f', '8b8b8b']) 22 | #axes.prop_cycle: cycler('color', ['348abd', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', 'e41a1c']) 23 | #axes.color_cycle: 348ABD, E24A33, 685EA5, 777777, FBC15E, 8EBA42, FFB5B8 24 | # 348ABD : blue 25 | # E24A33 : red 26 | # 988ED5 : purple 27 | # 777777 : gray 28 | # FBC15E : yellow 29 | # 8EBA42 : green 30 | # FFB5B8 : pink 31 | #axes.prop_cycle: cycler('color', ['377eb8', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', '999999', 'e41a1c', 'dede00'] ) 32 | 33 | axes.facecolor: w 34 | axes.labelsize: medium 35 | axes.labelcolor: 333333 36 | axes.axisbelow: False 37 | axes.grid: False 38 | axes.edgecolor: 555555 39 | axes.linewidth: 2.0 40 | axes.titlesize: small 41 | #axes.spines.bottom: True 42 | #axes.spines.left: True 43 | #axes.spines.right: True 44 | #axes.spines.top: True 45 | 46 | # PATCHES 47 | patch.edgecolor: w 48 | patch.linewidth: 1 49 | 50 | 51 | # TICK SETTINGS 52 | ## X-axis 53 | xtick.major.size: 7 54 | xtick.minor.size: 4 55 | xtick.major.width: 1.5 56 | xtick.minor.width: 1.5 57 | #xtick.minor.visible: True 58 | xtick.labelsize: medium 59 | xtick.color: 333333 60 | ## Y-axis 61 | ytick.major.size: 7 62 | ytick.minor.size: 4 63 | ytick.major.width: 1.5 64 | ytick.minor.width: 1.5 65 | #ytick.minor.visible: True 66 | ytick.labelsize: medium 67 | ytick.color: 333333 68 | 69 | # FONT SETTINGS 70 | font.size:12.0 71 | 72 | # SAVE SETTINGS 73 | savefig.edgecolor: w 74 | savefig.facecolor: w 75 | savefig.dpi: 100 76 | 77 | # FIGURE SETTINGS 78 | figure.dpi: 100 79 | figure.figsize: 5.0, 3.5 80 | figure.facecolor: w 81 | figure.edgecolor: w 82 | figure.subplot.left: 0.08 83 | figure.subplot.right: 0.95 84 | figure.subplot.bottom: 0.07 85 | figure.titlesize: large 86 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 5/data/cholera_pumps.txt: -------------------------------------------------------------------------------- 1 | "X","Y" 2 | 8.6512012,17.8915997 3 | 10.9847803,18.5178509 4 | 13.3781900,17.3945408 5 | 14.8798304,17.8099194 6 | 8.6947680,14.9054699 7 | 8.8644161,12.7535400 8 | 12.5713596,11.7271700 9 | 10.6609697,7.4286470 10 | 13.5214596,7.9582500 11 | 16.4348907,9.2521296 12 | 18.9143906,9.7378187 13 | 16.0051098,5.0468378 14 | 8.9994402,5.1010232 15 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 5/data/data_ch4.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module3/Mastering_Python_Data_Analysis_Code/Chapter 5/data/data_ch4.h5 -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 5/data/data_ch5_clustering.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module3/Mastering_Python_Data_Analysis_Code/Chapter 5/data/data_ch5_clustering.h5 -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 5/data/data_ch5_clustering.pick: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module3/Mastering_Python_Data_Analysis_Code/Chapter 5/data/data_ch5_clustering.pick -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 5/data/uzcJ2000.tab.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module3/Mastering_Python_Data_Analysis_Code/Chapter 5/data/uzcJ2000.tab.gz -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 5/mystyle.mplstyle: -------------------------------------------------------------------------------- 1 | # LINES 2 | lines.linewidth: 3 3 | lines.solid_capstyle: butt 4 | 5 | # LEGEND 6 | legend.fancybox: True 7 | legend.framealpha: 0.5 8 | legend.scatterpoints: 1 9 | legend.numpoints: 2 10 | legend.shadow: False 11 | legend.frameon: False 12 | legend.fontsize: 10 13 | 14 | # GRID 15 | #grid.linestyle: dashed 16 | grid.linewidth: 1.0 17 | grid.color: afafaf 18 | 19 | 20 | # AXES 21 | axes.prop_cycle: cycler('color', ['30a2da', 'fc4f30', 'e5ae38', '6d904f', '8b8b8b']) 22 | #axes.prop_cycle: cycler('color', ['348abd', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', 'e41a1c']) 23 | #axes.color_cycle: 348ABD, E24A33, 685EA5, 777777, FBC15E, 8EBA42, FFB5B8 24 | # 348ABD : blue 25 | # E24A33 : red 26 | # 988ED5 : purple 27 | # 777777 : gray 28 | # FBC15E : yellow 29 | # 8EBA42 : green 30 | # FFB5B8 : pink 31 | #axes.prop_cycle: cycler('color', ['377eb8', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', '999999', 'e41a1c', 'dede00'] ) 32 | 33 | axes.facecolor: w 34 | axes.labelsize: medium 35 | axes.labelcolor: 333333 36 | axes.axisbelow: False 37 | axes.grid: False 38 | axes.edgecolor: 555555 39 | axes.linewidth: 2.0 40 | axes.titlesize: small 41 | #axes.spines.bottom: True 42 | #axes.spines.left: True 43 | #axes.spines.right: True 44 | #axes.spines.top: True 45 | 46 | # PATCHES 47 | patch.edgecolor: w 48 | patch.linewidth: 1 49 | 50 | 51 | # TICK SETTINGS 52 | ## X-axis 53 | xtick.major.size: 7 54 | xtick.minor.size: 4 55 | xtick.major.width: 1.5 56 | xtick.minor.width: 1.5 57 | #xtick.minor.visible: True 58 | xtick.labelsize: medium 59 | xtick.color: 333333 60 | ## Y-axis 61 | ytick.major.size: 7 62 | ytick.minor.size: 4 63 | ytick.major.width: 1.5 64 | ytick.minor.width: 1.5 65 | #ytick.minor.visible: True 66 | ytick.labelsize: medium 67 | ytick.color: 333333 68 | 69 | # FONT SETTINGS 70 | font.size:12.0 71 | 72 | # SAVE SETTINGS 73 | savefig.edgecolor: w 74 | savefig.facecolor: w 75 | savefig.dpi: 100 76 | 77 | # FIGURE SETTINGS 78 | figure.dpi: 100 79 | figure.figsize: 5.0, 3.5 80 | figure.facecolor: w 81 | figure.edgecolor: w 82 | figure.subplot.left: 0.08 83 | figure.subplot.right: 0.95 84 | figure.subplot.bottom: 0.07 85 | figure.titlesize: large 86 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 6/mystyle.mplstyle: -------------------------------------------------------------------------------- 1 | # LINES 2 | lines.linewidth: 3 3 | lines.solid_capstyle: butt 4 | 5 | # LEGEND 6 | legend.fancybox: True 7 | legend.framealpha: 0.5 8 | legend.scatterpoints: 1 9 | legend.numpoints: 2 10 | legend.shadow: False 11 | legend.frameon: False 12 | legend.fontsize: 10 13 | 14 | # GRID 15 | #grid.linestyle: dashed 16 | grid.linewidth: 1.0 17 | grid.color: afafaf 18 | 19 | 20 | # AXES 21 | axes.prop_cycle: cycler('color', ['30a2da', 'fc4f30', 'e5ae38', '6d904f', '8b8b8b']) 22 | #axes.prop_cycle: cycler('color', ['348abd', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', 'e41a1c']) 23 | #axes.color_cycle: 348ABD, E24A33, 685EA5, 777777, FBC15E, 8EBA42, FFB5B8 24 | # 348ABD : blue 25 | # E24A33 : red 26 | # 988ED5 : purple 27 | # 777777 : gray 28 | # FBC15E : yellow 29 | # 8EBA42 : green 30 | # FFB5B8 : pink 31 | #axes.prop_cycle: cycler('color', ['377eb8', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', '999999', 'e41a1c', 'dede00'] ) 32 | 33 | axes.facecolor: w 34 | axes.labelsize: medium 35 | axes.labelcolor: 333333 36 | axes.axisbelow: False 37 | axes.grid: False 38 | axes.edgecolor: 555555 39 | axes.linewidth: 2.0 40 | axes.titlesize: medium 41 | #axes.spines.bottom: True 42 | #axes.spines.left: True 43 | #axes.spines.right: True 44 | #axes.spines.top: True 45 | 46 | # PATCHES 47 | patch.edgecolor: w 48 | patch.linewidth: 1 49 | 50 | 51 | # TICK SETTINGS 52 | ## X-axis 53 | xtick.major.size: 7 54 | xtick.minor.size: 4 55 | xtick.major.width: 1.5 56 | xtick.minor.width: 1.5 57 | #xtick.minor.visible: True 58 | xtick.labelsize: medium 59 | xtick.color: 333333 60 | ## Y-axis 61 | ytick.major.size: 7 62 | ytick.minor.size: 4 63 | ytick.major.width: 1.5 64 | ytick.minor.width: 1.5 65 | #ytick.minor.visible: True 66 | ytick.labelsize: medium 67 | ytick.color: 333333 68 | 69 | # FONT SETTINGS 70 | font.size:12.0 71 | 72 | # SAVE SETTINGS 73 | savefig.edgecolor: w 74 | savefig.facecolor: w 75 | savefig.dpi: 100 76 | 77 | # FIGURE SETTINGS 78 | figure.dpi: 100 79 | figure.figsize: 5.0, 3.5 80 | figure.facecolor: w 81 | figure.edgecolor: w 82 | figure.subplot.left: 0.08 83 | figure.subplot.right: 0.95 84 | figure.subplot.bottom: 0.07 85 | figure.titlesize: large 86 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 7/data/seeds.desc: -------------------------------------------------------------------------------- 1 | 1. area A, 2 | 2. perimeter P, 3 | 3. compactness C = 4*pi*A/P^2, 4 | 4. length of kernel, 5 | 5. width of kernel, 6 | 6. asymmetry coefficient 7 | 7. length of kernel groove. 8 | 8. group 9 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 7/mystyle.mplstyle: -------------------------------------------------------------------------------- 1 | # LINES 2 | lines.linewidth: 3 3 | lines.solid_capstyle: butt 4 | 5 | # LEGEND 6 | legend.fancybox: True 7 | legend.framealpha: 0.5 8 | legend.scatterpoints: 1 9 | legend.numpoints: 2 10 | legend.shadow: False 11 | legend.frameon: False 12 | legend.fontsize: 10 13 | 14 | # GRID 15 | #grid.linestyle: dashed 16 | grid.linewidth: 1.0 17 | grid.color: afafaf 18 | 19 | 20 | # AXES 21 | axes.prop_cycle: cycler('color', ['30a2da', 'fc4f30', 'e5ae38', '6d904f', '8b8b8b']) 22 | #axes.prop_cycle: cycler('color', ['348abd', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', 'e41a1c']) 23 | #axes.color_cycle: 348ABD, E24A33, 685EA5, 777777, FBC15E, 8EBA42, FFB5B8 24 | # 348ABD : blue 25 | # E24A33 : red 26 | # 988ED5 : purple 27 | # 777777 : gray 28 | # FBC15E : yellow 29 | # 8EBA42 : green 30 | # FFB5B8 : pink 31 | #axes.prop_cycle: cycler('color', ['377eb8', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', '999999', 'e41a1c', 'dede00'] ) 32 | 33 | axes.facecolor: w 34 | axes.labelsize: medium 35 | axes.labelcolor: 333333 36 | axes.axisbelow: False 37 | axes.grid: False 38 | axes.edgecolor: 555555 39 | axes.linewidth: 2.0 40 | axes.titlesize: small 41 | #axes.spines.bottom: True 42 | #axes.spines.left: True 43 | #axes.spines.right: True 44 | #axes.spines.top: True 45 | 46 | # PATCHES 47 | patch.edgecolor: w 48 | patch.linewidth: 1 49 | 50 | 51 | # TICK SETTINGS 52 | ## X-axis 53 | xtick.major.size: 7 54 | xtick.minor.size: 4 55 | xtick.major.width: 1.5 56 | xtick.minor.width: 1.5 57 | #xtick.minor.visible: True 58 | xtick.labelsize: medium 59 | xtick.color: 333333 60 | ## Y-axis 61 | ytick.major.size: 7 62 | ytick.minor.size: 4 63 | ytick.major.width: 1.5 64 | ytick.minor.width: 1.5 65 | #ytick.minor.visible: True 66 | ytick.labelsize: medium 67 | ytick.color: 333333 68 | 69 | # FONT SETTINGS 70 | font.size:12.0 71 | 72 | # SAVE SETTINGS 73 | savefig.edgecolor: w 74 | savefig.facecolor: w 75 | savefig.dpi: 100 76 | 77 | # FIGURE SETTINGS 78 | figure.dpi: 100 79 | figure.figsize: 5.0, 3.5 80 | figure.facecolor: w 81 | figure.edgecolor: w 82 | figure.subplot.left: 0.08 83 | figure.subplot.right: 0.95 84 | figure.subplot.bottom: 0.07 85 | figure.titlesize: large 86 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Chapter 8/mystyle.mplstyle: -------------------------------------------------------------------------------- 1 | # LINES 2 | lines.linewidth: 3 3 | lines.solid_capstyle: butt 4 | 5 | # LEGEND 6 | legend.fancybox: True 7 | legend.framealpha: 0.5 8 | legend.scatterpoints: 1 9 | legend.numpoints: 2 10 | legend.shadow: False 11 | legend.frameon: False 12 | legend.fontsize: 10 13 | 14 | # GRID 15 | #grid.linestyle: dashed 16 | grid.linewidth: 1.0 17 | grid.color: afafaf 18 | 19 | 20 | # AXES 21 | axes.prop_cycle: cycler('color', ['30a2da', 'fc4f30', 'e5ae38', '6d904f', '8b8b8b']) 22 | #axes.prop_cycle: cycler('color', ['348abd', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', 'e41a1c']) 23 | #axes.color_cycle: 348ABD, E24A33, 685EA5, 777777, FBC15E, 8EBA42, FFB5B8 24 | # 348ABD : blue 25 | # E24A33 : red 26 | # 988ED5 : purple 27 | # 777777 : gray 28 | # FBC15E : yellow 29 | # 8EBA42 : green 30 | # FFB5B8 : pink 31 | #axes.prop_cycle: cycler('color', ['377eb8', 'ff7f00', '4daf4a', 'f781bf', 'a65628', '984ea3', '999999', 'e41a1c', 'dede00'] ) 32 | 33 | axes.facecolor: w 34 | axes.labelsize: medium 35 | axes.labelcolor: 333333 36 | axes.axisbelow: False 37 | axes.grid: False 38 | axes.edgecolor: 555555 39 | axes.linewidth: 2.0 40 | axes.titlesize: medium 41 | #axes.spines.bottom: True 42 | #axes.spines.left: True 43 | #axes.spines.right: True 44 | #axes.spines.top: True 45 | 46 | # PATCHES 47 | patch.edgecolor: w 48 | patch.linewidth: 1 49 | 50 | 51 | # TICK SETTINGS 52 | ## X-axis 53 | xtick.major.size: 7 54 | xtick.minor.size: 4 55 | xtick.major.width: 1.5 56 | xtick.minor.width: 1.5 57 | #xtick.minor.visible: True 58 | xtick.labelsize: medium 59 | xtick.color: 333333 60 | ## Y-axis 61 | ytick.major.size: 7 62 | ytick.minor.size: 4 63 | ytick.major.width: 1.5 64 | ytick.minor.width: 1.5 65 | #ytick.minor.visible: True 66 | ytick.labelsize: medium 67 | ytick.color: 333333 68 | 69 | # FONT SETTINGS 70 | font.size:12.0 71 | 72 | # SAVE SETTINGS 73 | savefig.edgecolor: w 74 | savefig.facecolor: w 75 | savefig.dpi: 100 76 | 77 | # FIGURE SETTINGS 78 | figure.dpi: 100 79 | figure.figsize: 5.0, 3.5 80 | figure.facecolor: w 81 | figure.edgecolor: w 82 | figure.subplot.left: 0.08 83 | figure.subplot.right: 0.95 84 | figure.subplot.bottom: 0.07 85 | figure.titlesize: large 86 | -------------------------------------------------------------------------------- /Module3/Mastering_Python_Data_Analysis_Code/Software Hardware List.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-End-to-end-Data-Analysis/c3bf8bdca453ce2d5c6070e306d30af3a86db109/Module3/Mastering_Python_Data_Analysis_Code/Software Hardware List.pdf -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ### Download a free PDF 5 | 6 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
7 |

https://packt.link/free-ebook/9781788394697

--------------------------------------------------------------------------------