├── Chapter01
    ├── c1_02_julia_good.jl
    └── c1_01_pv_function.py
├── Errata_03_52.png
├── Chapter10
    ├── c10_25_ltfat.m
    ├── c10_07_abalone_data_set.txt
    ├── c10_16_summarize_by_date.txt
    ├── c10_27_Granger_test01.R
    ├── c10_05_simdata.R
    ├── c10_21_ffMonthly.py
    ├── c10_03_help_AppliedPredictiveModeling.R
    ├── c10_04_data_AppliedPredictiveModeling.R
    ├── c10_22_businessCycle.R
    ├── c10_28_Granger_test02.R
    ├── c10_13_timeSeries.R
    ├── c10_08_get_UCIdatasets.R
    ├── c10_06_fisher_Z_score.R
    ├── c10_31_timeUsed.jl
    ├── c10_09_usGDP.R
    ├── c10_30_QuantEcon_simulated.jl
    ├── c10_14_movingAverage.R
    ├── c10_11_seasonality_usGDPquarterly.R
    ├── c10_10_usGDP_graph.R
    ├── c10_19_catwalk_not_complete.py
    ├── c10_18_annual_ret_sp500.txt
    ├── c10_30_ltfat_example.m
    ├── c10_20_grangerTest_IBM_sp500.R
    ├── c10_17_sp500_annual_return_nextYear.R
    ├── c10_12_datarobot_not_working.R
    ├── c10_26_pca.m
    ├── c10_24_ddd.m
    └── c10_02_using_Liblinear02.R
├── Chapter07
    ├── c7_05_tangent_line.R
    ├── c7_18_efficientFrontier.R
    ├── c7_24_lqramsey_with_beta.txt
    ├── c7_12_load_optim.m
    ├── c7_09_help_optimize.py
    ├── c7_16_optimization_JuPM_Not_working.jl
    ├── c7_01_quatradic_function.R
    ├── c7_17_optim_example.jl
    ├── c7_03_convex_function.R
    ├── c7_14_inline_fmins.m
    ├── c7_08_optimize_help.py
    ├── c7_15_fminsearch.m
    ├── c7_13_fminsearch.m
    ├── c7_02.R
    ├── c7_06_.R
    ├── c7_10_3D_graph.R
    ├── c7_19_optimization.m
    ├── c7_22_optim.jl
    ├── c7_04_convex_function2.R
    ├── c7_07_optimization_01.py
    ├── c7_20_JuMP01.jl
    ├── c7_11_ff5industries.R
    └── c7_21_JuMp02.jl
├── Chapter12
    ├── c12_18_taskview.txt
    ├── c12_17_taskView.R
    ├── c12_04_parallel04.R
    ├── c12_01_lapply.R
    ├── c12_05_snow_01.R
    ├── c12_10_plyr_arrange.R
    ├── c12_13_parallel.R
    ├── c12_07_snow_parallel_Rmpi_UNIX.R
    ├── c12_02_parallel_01.R
    ├── c12_12_pi_01.py
    ├── c12_06_plyr_example.R
    └── c12_03_makeCluster.R
├── Software and Hardware list.pdf
├── Chapter09
    ├── c9_28_install_Conda.jl
    ├── c9_37_list_taskView.txt
    ├── c9_35_print_algorithms.R
    ├── c9_31_intall_optiminterp.m
    ├── c9_07_Iris.R
    ├── c9_36_taskView_machineLearning.R
    ├── c9_22_reinforcementLearning_state_same_as_nextState.R
    ├── c9_23_example.m
    ├── c9_09_Bayes_titanic.R
    ├── c9_06_print_iris.py
    ├── c9_01_titanic.R
    ├── c9_20_unique_value_iris.py
    ├── c9_21_reinforcementLearning.R
    ├── c9_34_Kmean_randomNumbers.jl
    ├── c9_12_load_iris.py
    ├── c9_19_logicReg.R
    ├── c9_08_naiveBayes.R
    ├── c9_24_reinforcementLearning_example.R
    ├── c9_32_iris.jl
    ├── c9_38_iris_prediction.py
    ├── c9_45_same_as_c9_14_good.py
    ├── c9_44_same_as_c9_14_good.py
    ├── c9_10_RTextTools.R
    ├── c9_11_RTextTool_2.R
    ├── c9_17_others_1.R
    ├── c9_05_NYTime_01.R
    ├── c9_18_others_2.R
    ├── c9_42_ff3factorDaily.py
    ├── c9_16_generate_titanicRData.R
    ├── c9_33_bird_Kmeans.m
    ├── c9_14_iris_predicted_vs_trueOne.py
    ├── c9_04_simplist_One_tree_tinatic.R
    ├── c9_26_test.m
    ├── c9_27_bird.m
    ├── c9_15_FamaFrench3factorModel.py
    ├── c9_03_simplefied_tree_tinatic.R
    ├── c9_30_great_test.m
    ├── c9_29_processing_email.m
    ├── c9_13_short_version.py
    └── c9_25_octave_good_graph.m
├── Chapter06
    ├── c6_16_conda_commands.txt
    ├── c6_13_Pkg_add.jl
    ├── c6_23_sys_path.py
    ├── c6_09_update_package.R
    ├── c6_18_source.R
    ├── c6_24_environmentVars.R
    ├── c6_25_environmentVars.py
    ├── c6_26_get_environmentVars.m
    ├── c6_03_read_csv.R
    ├── c6_07_path_rattle_package.R
    ├── c6_08_install_package.m
    ├── c6_02_rattle.R
    ├── c6_27_manual_XLConnect.R
    ├── c6_01_QR_code_for_CNN.R
    ├── c6_21_py_compile.py
    ├── c6_05_taskViewFinance.R
    ├── c6_06_taskView_update.R
    ├── c6_15_load_unload_package.m
    ├── c6_12_import_matplotlib.py
    ├── c6_14_remove_update_packages.jl
    ├── c6_22_import_myPackage.py
    ├── c6_11_taskView_machineLearning.R
    ├── c6_17_financialCalculator.R
    ├── c6_10_table6_1.R
    └── c6_19.jl
├── Chapter08
    ├── c8_42_number_of_packages_task_view.txt
    ├── c8_43_webs.txt
    ├── c8_06_launch_rattle.R
    ├── c8_27_package_milk.py
    ├── c8_15_sklearn.py
    ├── c8_04_dendogram_animals.R
    ├── c8_07_dir_scipy_cluster.py
    ├── c8_14_install_taskViewCluster.R
    ├── c8_17_example_cluster.py
    ├── c8_16_functions_sklearn_cluster.py
    ├── c8_01_dist.R
    ├── c8_19_5points.R
    ├── c8_22_load_iris_data.py
    ├── c8_12_considerDirection.R
    ├── c8_11_randomForest_plot.R
    ├── c8_20_5pointsCluster.R
    ├── c8_03_cluster_animals.R
    ├── c8_24_01.jl
    ├── c8_28_iris_kMean_sklearn.py
    ├── c8_08_python_hierarchical.py
    ├── c8_09_randomUniformForest_not_working.R
    ├── c8_25_clustering.jl
    ├── c8_13_mixMod_bar.R
    ├── c8_23_randomNumbersFrom2normal.R
    ├── c8_31_generate_dendrogram_using20obsWine.R
    ├── c8_10_wine_quality.R
    ├── c8_05_kmeans01.R
    ├── c8_02_cluster.R
    ├── c8_29_PCA.py
    └── c8_41_plot_pca_iris.py
├── Chapter04
    ├── c4_02_sineFunction.R
    ├── c4_01_line.R
    ├── c4_20_save_pdf.R
    ├── c4_08_straghtLine.R
    ├── c4_05_simpleDraw.py
    ├── c4_18.jl
    ├── c4_28_chi2distribution.R
    ├── c4_09_python_fv.py
    ├── c4_32_coin_grey.R
    ├── c4_23_bisection_method.R
    ├── c4_03_pie.R
    ├── c4_29_annimation_flip_coin.R
    ├── c4_04_Pyplot_julia.jl
    ├── c4_21_plot_Julia.jl
    ├── c4_13_add_trendLine.R
    ├── c4_07_shaded_area_standard_normal_dist.R
    ├── c4_30_annimation3flip_coin.R
    ├── c4_16_plot_julia.jl
    ├── c4_31_pie_grey.R
    ├── c4_19_scatter_plot_PyPlot.jl
    ├── c4_27_3stock_connection.R
    ├── c4_33_plot_grey.jl
    ├── c4_06_add_labels.py
    ├── c4_22_brownian_motion_animation.R
    ├── c4_11_histogram.py
    ├── c4_15_add_Greek_letters.R
    ├── c4_26_qgraph_network.R
    ├── c4_10_getHistram_IBMreturn.py
    ├── c4_17_QuantEcon_julia.jl
    ├── c4_24_Brownian_motion_html.R
    ├── c4_25_bisectionMethod_html.R
    ├── c4_14_time_value_of_money.py
    └── c4_12_generate_Black_Scholes_formula.py
├── Chapter11
    ├── c11_03_dir_fincal.py
    ├── c11_01_qt_consol.py
    └── c11_02_myfincal.py
├── Chapter05
    ├── c5_10_isna.R
    ├── c5_13_critival_Tvalue.R
    ├── c5_22_ff5.R
    ├── c5_19_critical_Tvalue.py
    ├── c5_20_ff4_RData.R
    ├── c5_01_linear_graph.R
    ├── c5_21_cholesky_01.R
    ├── c5_24_critical_value_F_distribution.R
    ├── c5_12_replace_spna.py
    ├── c5_15_get_IBM_dailyFromQuandl.py
    ├── c5_02_linear_reg.R
    ├── c5_11_remove_spna.py
    ├── c5_14_OLS.jl
    ├── c5_08_remove_missing_data.R
    ├── c5_28_run_julia_program.jl
    ├── c5_26_f_ditribution_graph.R
    ├── c5_29_replace_na_with_mean.py
    ├── c5_23_number_outliers.R
    ├── c5_18_ff3_factor_ibm.R
    ├── c5_25_get_critical_value_F_test.py
    ├── c5_07_random_OLS.py
    ├── c5_17_ibm_beta.R
    ├── c5_30_run_linearRegressionOctave.m
    ├── c5_27_CAPM.jl
    ├── c5_31_CAPM.jl
    ├── c5_05_get_sp500Daily.py
    ├── c5_16_ibm_beta.py
    ├── c5_06_get_sp500monthly.py
    └── c5_09_annual_beta.py
├── Chapter02
    ├── c3_17_missing_code.py
    ├── c3_20_sort_R.R
    ├── c3_07_find_definitions_of_inputs.py
    ├── c3_15_cbsodata_list_of_data.py
    ├── c3_02_pandas_read_csv.py
    ├── c3_03_pandas_read_csv.py
    ├── c3_01.R
    ├── c3_27_datadotworld_1.py
    ├── c3_14_cbsodata.py
    ├── c3_10_R_package_foreign.R
    ├── c3_23_datadotworld.py
    ├── c3_04_save_RDatat.R
    ├── c3_16_missing_code_R.R
    ├── c3_24_get_iris.py
    ├── c3_22_sort_by2columns.R
    ├── c3_25_sort_Python.py
    ├── c3_11_R_package_dslabs.R
    ├── c3_30_merge_left_index.py
    ├── c3_29_merge_different_names.py
    ├── c3_12_merge_01.py
    ├── c3_26_ff3monthly2pickle.py
    ├── c3_31_merge_by2variables.py
    ├── c3_19_missing_code.py
    ├── c3_09_R_package_sjlabbeld.R
    ├── c3_18_missing_code_apropos.R
    ├── c3_08_merge_datasets.R
    ├── c3_13_merge_02_stock.py
    ├── c3_05_saveRDS.R
    ├── c3_21_sort_order.R
    └── c3_28_datadotworld_2good.py
├── Chapter03
    ├── c3_17_missing_code.py
    ├── c3_20_sort_R.R
    ├── c3_07_find_definitions_of_inputs.py
    ├── c3_15_cbsodata_list_of_data.py
    ├── c3_02_pandas_read_csv.py
    ├── c3_03_pandas_read_csv.py
    ├── c3_01.R
    ├── c3_27_datadotworld_1.py
    ├── c3_14_cbsodata.py
    ├── c3_34_read_ff3monthly_csv.py
    ├── c3_10_R_package_foreign.R
    ├── c3_23_datadotworld.py
    ├── c3_04_save_RDatat.R
    ├── c3_32_write_sas_write_spss_write_stata.R
    ├── c3_16_missing_code_R.R
    ├── c3_24_get_iris.py
    ├── c3_22_sort_by2columns.R
    ├── c3_25_sort_Python.py
    ├── c3_33_generate_z_csv.R
    ├── c3_11_R_package_dslabs.R
    ├── c3_30_merge_left_index.py
    ├── c3_29_merge_different_names.py
    ├── c3_19_missing_code.py
    ├── c3_12_merge_01.py
    ├── c3_26_ff3monthly2pickle.py
    ├── c3_31_merge_by2variables.py
    ├── c3_09_R_package_sjlabbeld.R
    ├── c3_18_missing_code_apropos.R
    ├── c3_08_merge_datasets.R
    ├── c3_13_merge_02_stock.py
    ├── c3_05_saveRDS.R
    ├── c3_21_sort_order.R
    └── c3_28_datadotworld_2good.py
└── LICENSE


/Chapter01/c1_02_julia_good.jl:
--------------------------------------------------------------------------------
1 | function sphere_vol(r)
2 |    return 4/3*pi*r^3
3 | end
4 | 
5 | sphere_vol(2.5)
6 | 


--------------------------------------------------------------------------------
/Errata_03_52.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Errata_03_52.png


--------------------------------------------------------------------------------
/Chapter10/c10_25_ltfat.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter10/c10_25_ltfat.m


--------------------------------------------------------------------------------
/Chapter10/c10_07_abalone_data_set.txt:
--------------------------------------------------------------------------------
1 | library(AppliedPredictiveModeling)
2 | data(abalone)
3 | dim(abalone)
4 | head(abalone)
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/Chapter07/c7_05_tangent_line.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter07/c7_05_tangent_line.R


--------------------------------------------------------------------------------
/Chapter12/c12_18_taskview.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter12/c12_18_taskview.txt


--------------------------------------------------------------------------------
/Software and Hardware list.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Software and Hardware list.pdf


--------------------------------------------------------------------------------
/Chapter09/c9_28_install_Conda.jl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter09/c9_28_install_Conda.jl


--------------------------------------------------------------------------------
/Chapter09/c9_37_list_taskView.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter09/c9_37_list_taskView.txt


--------------------------------------------------------------------------------
/Chapter06/c6_16_conda_commands.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter06/c6_16_conda_commands.txt


--------------------------------------------------------------------------------
/Chapter07/c7_18_efficientFrontier.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter07/c7_18_efficientFrontier.R


--------------------------------------------------------------------------------
/Chapter07/c7_24_lqramsey_with_beta.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter07/c7_24_lqramsey_with_beta.txt


--------------------------------------------------------------------------------
/Chapter08/c8_42_number_of_packages_task_view.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter08/c8_42_number_of_packages_task_view.txt


--------------------------------------------------------------------------------
/Chapter10/c10_16_summarize_by_date.txt:
--------------------------------------------------------------------------------
1 | 
2 | library(plyr)
3 | year<-c(2000,2000,2001,2001,2004)
4 | values<-c(2, 3, 3, 5, 6)
5 | df <- data.frame(DATE=year,B =values )
6 | dfsum <- ddply(df, c("DATE"),summarize,B=sum(B))


--------------------------------------------------------------------------------
/Chapter01/c1_01_pv_function.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Oct 29 15:42:25 2017
 4 | 
 5 | @author: yany
 6 | """
 7 | def pv_f(pv,r,n):
 8 |     return pv/(1+r)**n
 9 | #
10 | pv=pv_f(100,0.1,2)
11 | print(pv)
12 | 
13 | 


--------------------------------------------------------------------------------
/Chapter08/c8_43_webs.txt:
--------------------------------------------------------------------------------
1 | https://github.com/scipy/scipy/blob/master/scipy/cluster/vq.py
2 | 
3 | http://scikit-learn.org/stable/auto_examples/cluster/plot_digits_linkage.html#sphx-glr-auto-examples-cluster-plot-digits-linkage-py
4 | 
5 | http://scikit-learn.org/stable/auto_examples/decomposition/plot_pca_iris.html


--------------------------------------------------------------------------------
/Chapter06/c6_13_Pkg_add.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c6_13_Pkg_add.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 | Pkg.add("AbstractTable")


--------------------------------------------------------------------------------
/Chapter06/c6_23_sys_path.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c6_23_sys_path.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import sys
12 | sys.path


--------------------------------------------------------------------------------
/Chapter04/c4_02_sineFunction.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_02_sineFunction.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | plot(sin,-pi,pi)


--------------------------------------------------------------------------------
/Chapter06/c6_09_update_package.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c6_09_update_package.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | update.packages("rattle")


--------------------------------------------------------------------------------
/Chapter06/c6_18_source.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c6_18_source.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | source("c:/temp/fincalCalculator.R")


--------------------------------------------------------------------------------
/Chapter04/c4_01_line.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_01_line.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | x<-1:10
13 | y<-2+2*x
14 | plot(x,y)
15 | 


--------------------------------------------------------------------------------
/Chapter06/c6_24_environmentVars.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c6_24_environmentVars.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | 
13 |  Sys.getenv()
14 | 


--------------------------------------------------------------------------------
/Chapter06/c6_25_environmentVars.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c6_25_environmentVars.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import sys
12 | sys.path
13 | 


--------------------------------------------------------------------------------
/Chapter06/c6_26_get_environmentVars.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c6_26_get_environmentVars.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | #}
10 | 
11 | 
12 | 
13 | getenv('path')


--------------------------------------------------------------------------------
/Chapter08/c8_06_launch_rattle.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_06_launch_rattle.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(rattle)
12 | rattle()
13 | 


--------------------------------------------------------------------------------
/Chapter07/c7_12_load_optim.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c7_12_load_optim.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | #}
10 | 
11 | pkg load optim
12 | pkg describe -verbose optim


--------------------------------------------------------------------------------
/Chapter06/c6_03_read_csv.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c6_03_read_csv.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | x<-read.csv("http://canisius.edu/~yany/data/ibmDaily.csv")


--------------------------------------------------------------------------------
/Chapter06/c6_07_path_rattle_package.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c6_07_path_rattle.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 |  library(rattle)
12 |  path.package('rattle')
13 | 


--------------------------------------------------------------------------------
/Chapter06/c6_08_install_package.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c6_08_install_package.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | #}
10 | 
11 | 
12 | 
13 | pkg install statistics-1.3.0.tar.gz


--------------------------------------------------------------------------------
/Chapter08/c8_27_package_milk.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c8_27_package_milk.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import milk
13 | x=dir(milk)
14 | print(x)


--------------------------------------------------------------------------------
/Chapter09/c9_35_print_algorithms.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c9_35_print_algorithms.R
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/6/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | library(RTextTools)
13 | print_algorithms()


--------------------------------------------------------------------------------
/Chapter11/c11_03_dir_fincal.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c11_02_myfincal.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/8/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import fincal
12 | x=dir(fincal)
13 | print(x)
14 | 


--------------------------------------------------------------------------------
/Chapter08/c8_15_sklearn.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c8_15_sklearn.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | 
13 | import sklearn as sk
14 | x=dir(sk)
15 | print(x)


--------------------------------------------------------------------------------
/Chapter04/c4_20_save_pdf.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_20_save_pdf.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | pdf("c:/temp/myGraph.pdf")
13 | plot(cos,-2*pi,2*pi)
14 | dev.off()


--------------------------------------------------------------------------------
/Chapter06/c6_02_rattle.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c6_02_rattle.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | install.packages("rattle")
12 | library(rattle)
13 | rattle()
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/Chapter06/c6_27_manual_XLConnect.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c6_27_manual_XLConnect.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | 
13 | library(XLConnect)
14 | vignette("XLConnect")
15 | 


--------------------------------------------------------------------------------
/Chapter09/c9_31_intall_optiminterp.m:
--------------------------------------------------------------------------------
 1 | 
 2 | #{
 3 |   Name     : c9_31_install_optiminterp.m
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/6/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | #}
11 | 
12 | 
13 | 
14 | pkg install optiminterp-0.3.4.tar.gz


--------------------------------------------------------------------------------
/Chapter04/c4_08_straghtLine.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_08_straightLine.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 |  x<-seq(-3,3,by=0.05)
12 |  y<-2+2.5*x
13 |  plot(x,y,type="b")
14 | 
15 | 


--------------------------------------------------------------------------------
/Chapter05/c5_10_isna.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c5_10_isna.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | x<-c(NA,2,3,4,NA)
13 | y<-na.omit(x)
14 | m<-mean(y)
15 | 
16 | x[is.na(x)]<-m
17 | 


--------------------------------------------------------------------------------
/Chapter06/c6_01_QR_code_for_CNN.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c6_01_QR_code_for_CNN.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(qrcode)
12 | qrcode_gen("http://cnn.com")
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/Chapter02/c3_17_missing_code.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_17_missing_code.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | x={1,2,3,sp.nan,6,sp.nan}
12 | print(x)
13 | x.remove(sp.nan)
14 | print(x)


--------------------------------------------------------------------------------
/Chapter02/c3_20_sort_R.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_20_sort.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | set.seed(123)
12 | x<-rnorm(100)
13 | head(x)
14 | y<-sort(x)
15 | head(y)
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/Chapter03/c3_17_missing_code.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_17_missing_code.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | x={1,2,3,sp.nan,6,sp.nan}
12 | print(x)
13 | x.remove(sp.nan)
14 | print(x)


--------------------------------------------------------------------------------
/Chapter03/c3_20_sort_R.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_20_sort.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | set.seed(123)
12 | x<-rnorm(100)
13 | head(x)
14 | y<-sort(x)
15 | head(y)
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/Chapter06/c6_21_py_compile.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c6_21_py_compile.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import py_compile
13 | py_compile.compile('c:/temp/myPackage.py')
14 | 


--------------------------------------------------------------------------------
/Chapter04/c4_05_simpleDraw.py:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_05_simpleDraw.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | import matplotlib.pyplot as plt 
12 | plt.plot([2,3,8,12])
13 | plt.show()
14 | 
15 | 


--------------------------------------------------------------------------------
/Chapter06/c6_05_taskViewFinance.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c6_05_taskViewFinance.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | install.packages("ctv")
12 | library("ctv")
13 | install.views("Finance")
14 | 


--------------------------------------------------------------------------------
/Chapter06/c6_06_taskView_update.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c6_06_taskView_Update.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | install.packages("ctv")
12 | library("ctv")
13 | update.views("Finance")
14 | 


--------------------------------------------------------------------------------
/Chapter06/c6_15_load_unload_package.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c6_15_load_unload_package.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | #}
10 | 
11 | 
12 | 
13 | pkg load statistics
14 | 
15 | pkg unload statistics 
16 | 


--------------------------------------------------------------------------------
/Chapter02/c3_07_find_definitions_of_inputs.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_07_find_definitions_of_inputs.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import pandas as pd
13 | pd.read_csv()
14 | 
15 | 


--------------------------------------------------------------------------------
/Chapter02/c3_15_cbsodata_list_of_data.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_15_cbsodata_list_of_data.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import cbsodata as cb
12 | list=cb.get_table_list()
13 | print(list)


--------------------------------------------------------------------------------
/Chapter03/c3_07_find_definitions_of_inputs.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_07_find_definitions_of_inputs.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import pandas as pd
13 | pd.read_csv()
14 | 
15 | 


--------------------------------------------------------------------------------
/Chapter03/c3_15_cbsodata_list_of_data.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_15_cbsodata_list_of_data.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import cbsodata as cb
12 | list=cb.get_table_list()
13 | print(list)


--------------------------------------------------------------------------------
/Chapter05/c5_13_critival_Tvalue.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c5_13_critical_Tvalue.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | alpha<-0.01
12 | degreeFreedom<-50
13 | qt(1-alpha/2,degreeFreedom)
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/Chapter06/c6_12_import_matplotlib.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c6_12_import_matplotlib.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import matplotlib as mat
12 | x=dir(mat)
13 | print(x)
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/Chapter06/c6_14_remove_update_packages.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c6_14_remove_uudate_package.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 | 
12 | Pkg.rm("AbstractTable")
13 | 
14 | Pkg.update()
15 | 
16 | 


--------------------------------------------------------------------------------
/Chapter12/c12_17_taskView.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c12_17_taskView.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/14/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | install.packages("ctv")
12 | library("ctv")
13 | install.views("HighPerformanceComputing")
14 | 


--------------------------------------------------------------------------------
/Chapter05/c5_22_ff5.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c5_22_ff5.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | con<-url("http://canisius.edu/~yany/RData/ff5monthly.RData")
13 | load(con)
14 | head(.ff5monthly)
15 | 


--------------------------------------------------------------------------------
/Chapter04/c4_18.jl:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_18.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | Pkg.add("Gadfly")
12 | using Gadfly
13 | draw(SVG("output.svg", 6inch, 3inch), plot([sin, cos], 0, 25))
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/Chapter05/c5_19_critical_Tvalue.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c5_19_critical_Tvalue.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | from scipy import stats
12 | alpha=0.05
13 | print(stats.t.ppf(1-alpha/2, 100))
14 | 


--------------------------------------------------------------------------------
/Chapter05/c5_20_ff4_RData.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c5_20_ff4_RData.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | con<-url("http://canisius.edu/~yany/RData/ffc4monthly.RData")
12 | load(con)
13 | head(.ffc4monthly)
14 | 


--------------------------------------------------------------------------------
/Chapter08/c8_04_dendogram_animals.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_04_dendogram_animals.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | require(cluster)
11 | data(animals)
12 | x<-agnes(animals)
13 | pltree(x)
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/Chapter08/c8_07_dir_scipy_cluster.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c8_07_dir_scipy_cluster.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import scipy.cluster as cluster
13 | x=dir(cluster)
14 | print(x)
15 | 
16 | 


--------------------------------------------------------------------------------
/Chapter08/c8_14_install_taskViewCluster.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_14_install_taskViewCluster.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | install.packages("ctv")
12 | library("ctv")
13 | install.views("Cluster")
14 | 


--------------------------------------------------------------------------------
/Chapter08/c8_17_example_cluster.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c8_17_example_cluster.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | from sklearn.cluster.AgglomerativeCluster as cluster2
13 | help(cluster2)
14 | 


--------------------------------------------------------------------------------
/Chapter10/c10_27_Granger_test01.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_27_Grander_test01.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(lmtest)
12 | data(ChickEgg)
13 | dim(ChickEgg)
14 | ChickEgg[1:5,]
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/Chapter05/c5_01_linear_graph.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c5_01_linear_graph.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | x<--10:10
12 | y<-2+1.5*x
13 | title<-"A straight line"
14 | plot(x,y,type='l',main=title)
15 | 
16 | 


--------------------------------------------------------------------------------
/Chapter06/c6_22_import_myPackage.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c6_22_import_myPackage.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | sys.path.append("c:/temp/")
12 | import myPackage as my
13 | x=dir(my)
14 | print(x)
15 | 


--------------------------------------------------------------------------------
/Chapter07/c7_09_help_optimize.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c7_09_help_optimize.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | #import numpy as np
13 | 
14 | from scipy.optimize import minimize
15 | help(minimize)
16 | 


--------------------------------------------------------------------------------
/Chapter08/c8_16_functions_sklearn_cluster.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c8_16_functions_sklearn_cluster.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | 
13 | from sklearn import cluster
14 | x=dir(cluster)
15 | print(x)


--------------------------------------------------------------------------------
/Chapter02/c3_02_pandas_read_csv.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_02_pandas_read_csv.py
 3 |   Book     : Hans-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import pandas as pd
12 | data=pd.read_csv("c://temp/bezdekIris.data.txt",header=None)
13 | 
14 | 


--------------------------------------------------------------------------------
/Chapter03/c3_02_pandas_read_csv.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_02_pandas_read_csv.py
 3 |   Book     : Hans-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import pandas as pd
12 | data=pd.read_csv("c://temp/bezdekIris.data.txt",header=None)
13 | 
14 | 


--------------------------------------------------------------------------------
/Chapter05/c5_21_cholesky_01.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c5_21_cholesky_02.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | set.seed(123)
12 | n=1000
13 | x<-matrix(rnorm(n),200,5)
14 | y<-cor(x)
15 | cholesky<-chol(y)
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/Chapter05/c5_24_critical_value_F_distribution.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c5_24_critical_value_F_distribution.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 |  alpha<-0.1
12 |  d1<-1
13 |  d2<-1
14 |  qf(1-alpha,df1=d1,df2=d2)
15 | 


--------------------------------------------------------------------------------
/Chapter07/c7_16_optimization_JuPM_Not_working.jl:
--------------------------------------------------------------------------------
 1 | using JuMP
 2 | using Clp
 3 | 
 4 | m = Model(solver = ClpSolver())
 5 | @variable(m, 0 <= x <= 2 )
 6 | @variable(m, 0 <= y <= 30 )
 7 | 
 8 | @objective(m, Max, 5x + 3*y )
 9 | @constraint(m, 1x + 5y <= 3.0 )
10 | 
11 | print(m)
12 | 
13 | status = solve(m)
14 | 
15 | println("Objective value: ", getobjectivevalue(m))
16 | println("x = ", getvalue(x))
17 | println("y = ", getvalue(y))


--------------------------------------------------------------------------------
/Chapter10/c10_05_simdata.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_05_simData.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/23/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(eclust)
12 | data("simdata")
13 | dim(simdata)
14 | simdata[1:5, 1:6]
15 | table(simdata[,"E"])
16 | 
17 | 


--------------------------------------------------------------------------------
/Chapter10/c10_21_ffMonthly.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c10_21_ffMonthly.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import pandas as pd
13 | ff=pd.read_pickle("c:/temp/ffMonthly.pkl")
14 | print(ff.head())
15 | 
16 | 


--------------------------------------------------------------------------------
/Chapter08/c8_01_dist.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_01_dist.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | a<-c(2,5)
12 | b<-c(3,7)
13 | z<-rbind(a,b)
14 | dist(z, method = "euclidean")
15 | plot(z,lwd=20)
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/Chapter08/c8_19_5points.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_19_5points.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | data <- rbind(c(180,20), c(160,5), c(60, 150), c(160,60), c(80,120))
12 | plot(data, col = "red", lwd = 20)
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/Chapter09/c9_07_Iris.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c9_07_Iris.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(ggvis)
12 | x<-ggvis
13 | y<-layer_points
14 | iris %>% x(~Petal.Length,~Petal.Width,fill=~Species) %>% y()
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/Chapter09/c9_36_taskView_machineLearning.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c9_36_taskView_machineLearning.R
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/6/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | install.packages("ctv")
13 | library("ctv")
14 | install.views("MachineLearning")
15 | 


--------------------------------------------------------------------------------
/Chapter07/c7_01_quatradic_function.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c7_01+qiatradoc+fimctopm.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 |  x<-seq(-10,10,0.1)
12 |  a<--2
13 |  b<-10
14 |  c<-5
15 |  y<-a*x^2+b*x+c
16 |  plot(x,y,type='l')
17 | 


--------------------------------------------------------------------------------
/Chapter08/c8_22_load_iris_data.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c8_32_load_iris_data.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | 
13 | from sklearn import datasets
14 | import numpy as np
15 | x= datasets.load_iris()
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/Chapter05/c5_12_replace_spna.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c5_12_replace_spna.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import scipy as sp
12 | x=[2,3,sp.nan,sp.nan,4]
13 | y=x
14 | x.remove(sp.nan)
15 | m=sp.mean(x)
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Chapter05/c5_15_get_IBM_dailyFromQuandl.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c5_15_getIBM_dailyFromQuandl.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import quandl as qd
12 | x=qd.get("WIKI/ibm")
13 | print(x.head(2))
14 | print(x.tail(2))
15 | 


--------------------------------------------------------------------------------
/Chapter05/c5_02_linear_reg.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c5_02_linear_reg.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | set.seed(12345)
12 | x<-1:100
13 | a<-4
14 | beta<-5
15 | errorTerm<-rnorm(100)
16 | y<-a+beta*x+errorTerm
17 | lm(y~x)
18 | 
19 | 


--------------------------------------------------------------------------------
/Chapter05/c5_11_remove_spna.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c5_11_remove_spna.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import scipy as sp
13 | x={2,4,3,sp.nan,6,sp.nan,7}
14 | print(x)
15 | x.remove(sp.nan)
16 | print(x)
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Chapter07/c7_17_optim_example.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c7_17_optim_example.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 | 
12 | 
13 | 
14 | using Optim
15 | f(x) = (1.0 - x[1])^2 + 200.0 * (x[2] - x[1]^2)^2
16 | optimize(f, [0.0, 0.0])
17 | 


--------------------------------------------------------------------------------
/Chapter05/c5_14_OLS.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c5_14_OLS.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 |  use GLM, DataFrames
12 |  data = DataFrame(X=[1,2,3], Y=[2,4,7])
13 |  OLS = glm(@formula(Y ~ X), data, Normal(), IdentityLink())
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/Chapter02/c3_03_pandas_read_csv.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_03_pandas_read_csv.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | inFile="http://canisius.edu/~yany/data/bezdekIris.data.txt"
12 | import pandas as pd
13 | d=pd.read_csv(inFile,header=None)
14 | 


--------------------------------------------------------------------------------
/Chapter03/c3_03_pandas_read_csv.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_03_pandas_read_csv.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | inFile="http://canisius.edu/~yany/data/bezdekIris.data.txt"
12 | import pandas as pd
13 | d=pd.read_csv(inFile,header=None)
14 | 


--------------------------------------------------------------------------------
/Chapter06/c6_11_taskView_machineLearning.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c6_11_taskView_machineLearning.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | 
13 | install.packages("ctv")
14 | library("ctv")
15 | install.views("MachineLearning")
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Chapter10/c10_03_help_AppliedPredictiveModeling.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_03_help_AppliedPrecictiveModeling.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/23/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(AppliedPredictiveModeling)
12 | 
13 | data(solubility)
14 | ls(pattern="sol")
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/Chapter04/c4_28_chi2distribution.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_28_chi2distribution.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(animation)
12 | n<-5
13 | set.seed(123)
14 | f<-function(n) rchisq(n,n)
15 | clt.ani(FUN = f,mean=n,sd = sqrt(2*n))
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/Chapter07/c7_03_convex_function.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c7_03_convex_function.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | x<-seq(-10,10,0.1)
12 | a<-4
13 | b<- -2
14 | c<-10
15 | y<-a*x^2+b*x+c
16 | name<-"A convex function"
17 | plot(x,y,type='l',main=name)
18 | 


--------------------------------------------------------------------------------
/Chapter07/c7_14_inline_fmins.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c7_14_inline_fmins.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 |   source:  https://octave.sourceforge.io/octave/function/fminsearch.html
11 | #}
12 | 
13 | 
14 | 
15 | fmins(inline('(x(1)-5).^2+(x(2)-8).^4'),[0;0])
16 | 


--------------------------------------------------------------------------------
/Chapter10/c10_04_data_AppliedPredictiveModeling.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_04_data_AppliedPredictiveModeling.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/23/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(AppliedPredictiveModeling)
12 | data(solubility)
13 | ls(pattern="sol")
14 | 
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/Chapter10/c10_22_businessCycle.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_22_businessCycle.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | path<-"http://canisius.edu/~yany/RData/"
12 | dataSet<-"businesscycle"
13 | link<-paste(path,dataSet,".RData",sep='')
14 | load(url(link))
15 | 


--------------------------------------------------------------------------------
/Chapter09/c9_22_reinforcementLearning_state_same_as_nextState.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c9_22_reinforcementLearning_state_same_as_next.R
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/6/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | x<-subset(data,data$State==data$NextState)
13 | head(x)
14 | unique(x$Reward)
15 | 
16 | 


--------------------------------------------------------------------------------
/Chapter12/c12_04_parallel04.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c12_04_parallel.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/14/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | require(parallel) 
12 | nCores <- 8  # manually for non-cluster machines
13 | cl <- makeCluster(nCores) # by default this uses the PSOCK mechanism
14 | 
15 | 


--------------------------------------------------------------------------------
/Chapter04/c4_09_python_fv.py:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_09_python_fv.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | import numpy as np
12 | import matplotlib.pyplot as mlt 
13 | n=np.linspace(0,10,10)
14 | pv=100
15 | R=0.1
16 | fv=pv*(1+R)**n
17 | mlt.plot(n,fv)
18 | mlt.show()


--------------------------------------------------------------------------------
/Chapter05/c5_08_remove_missing_data.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c5_08_remove_missing_data.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | x<-c(NA,1,2,50,NA)
13 | y<-na.omit(x)
14 | mean(x)
15 | mean(y)
16 | 
17 | 
18 | 
19 | 
20 | y2<-na.exclude(x)
21 | 
22 | mean(y)
23 | 
24 | 


--------------------------------------------------------------------------------
/Chapter05/c5_28_run_julia_program.jl:
--------------------------------------------------------------------------------
 1 | 
 2 | ###
 3 |   Name     : c5_28_run_program.jl
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 1/25/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | ###
11 | 
12 | 
13 | # assume that helloworld.jl has the following one line. 
14 | # println("Hello world")
15 | 
16 | 
17 |  include("c:/temp/helloWorld.jl")


--------------------------------------------------------------------------------
/Chapter04/c4_32_coin_grey.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_32_coin_grey.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(animation)
12 | myProb<-c(0.5,0.5)
13 | ani.options(interval=0.2,nmax = ifelse(interactive(), 100, 2))
14 | flip.coin(bg = "grey",col=c("black","grey"))
15 | 
16 | 


--------------------------------------------------------------------------------
/Chapter05/c5_26_f_ditribution_graph.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c5_26_f_distribution_graph.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | d1<-4
12 | d2<-2
13 | n<-100
14 | x = seq(0, 5, length = n)
15 | plot(x, df(x = x, df1 = d1, df2 = d2),type='l')
16 | 
17 | 
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/Chapter08/c8_12_considerDirection.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_12_considerDirection.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | angles<-seq(0,380,30)
12 | y<-cos(angles)
13 | y2<-round(y,2)
14 | z<-cbind(angles,y2)
15 | colnames(z)<-c("ANGLE","cos(angle)")
16 | print(z)
17 | 
18 | 


--------------------------------------------------------------------------------
/Chapter09/c9_23_example.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c9_23_example.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | #}
10 | 
11 | 
12 | x = load('c:/temp/ex2x.dat');
13 | y = load('c:/temp/ex2y.dat');
14 | figure 
15 | plot(x, y, 'o');
16 | ylabel('Height in meters')
17 | xlabel('Age in years')
18 | 


--------------------------------------------------------------------------------
/Chapter07/c7_08_optimize_help.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c7_00_optimize_help.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import scipy as sp
12 | x=dir(sp.optimize)
13 | print(x)
14 | 
15 | 
16 | #import numpy as np
17 | 
18 | from scipy.optimize import minimize
19 | help(minimize)
20 | 


--------------------------------------------------------------------------------
/Chapter10/c10_28_Granger_test02.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c10_28_Granger_test02.R
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/24/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | library(lmtest)
13 | data(ChickEgg)
14 | grangertest(chicken~egg, order = 3, data = ChickEgg)
15 | 
16 | 
17 | grangertest(egg~chicken, order = 3, data = ChickEgg)


--------------------------------------------------------------------------------
/Chapter04/c4_23_bisection_method.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_23_bisection_method.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(animation)
12 | par(mar = c(4, 4, 1, 2))
13 | myFunction<-function(x) x^2 - 4
14 | title<-"Bisection method"
15 | bisection.method(FUN=myFunction,main =title)
16 | 


--------------------------------------------------------------------------------
/Chapter07/c7_15_fminsearch.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c7_15_fminsearch.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 |   source:  https://octave.sourceforge.io/octave/function/fminsearch.html
11 | #}
12 | 
13 | 
14 | fun = @(x)50*(x(1)^2-x(2))^2 + (x(1)-3)^2;
15 | x0 = [0,0];
16 | x = fminsearch(fun,x0)
17 | 


--------------------------------------------------------------------------------
/Chapter08/c8_11_randomForest_plot.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_11_rondomForest_plot.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(randomUniformForest)
12 | data(iris)
13 | ff<-unsupervised.randomUniformForest
14 | x =ff(iris[,-5],mtry=1,nodesize = 2)
15 | plot(x)
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/Chapter09/c9_09_Bayes_titanic.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c9_09_Bayes_titanic.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(e1071)
12 | data(Titanic)
13 | m <- naiveBayes(Survived ~ ., data = Titanic)
14 | output<-predict(m, as.data.frame(Titanic))
15 | 
16 | #
17 | print(m)
18 | print(output)


--------------------------------------------------------------------------------
/Chapter02/c3_01.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_01_iris_data.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | path<-"http://archive.ics.uci.edu/ml/machine-learning-databases/"
12 | dataSet<-"iris/bezdekIris.data"
13 | a<-paste(path,dataSet,sep='')
14 | x<-read.csv(a,header=F)
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/Chapter02/c3_27_datadotworld_1.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_27_datadotworld_1.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import datadotworld as dw
13 | dataset = 'jonloyens/an-intro-to-dataworld-dataset'
14 | data = dw.load_dataset(dataset, force_update=True) 
15 | list(dataset.dataframes)


--------------------------------------------------------------------------------
/Chapter03/c3_01.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_01_iris_data.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | path<-"http://archive.ics.uci.edu/ml/machine-learning-databases/"
12 | dataSet<-"iris/bezdekIris.data"
13 | a<-paste(path,dataSet,sep='')
14 | x<-read.csv(a,header=F)
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/Chapter03/c3_27_datadotworld_1.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_27_datadotworld_1.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import datadotworld as dw
13 | dataset = 'jonloyens/an-intro-to-dataworld-dataset'
14 | data = dw.load_dataset(dataset, force_update=True) 
15 | list(dataset.dataframes)


--------------------------------------------------------------------------------
/Chapter04/c4_03_pie.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_03_pie.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | results <- c(10,8,7,4,6)
12 | names<-c("1st","2nd","3rd","4th","5th")
13 | pct<-round(results/sum(results)*100)
14 | pct2<-rev(sort(pct))
15 | pie(results, labels = names, main="Pie Chart of results")
16 | 


--------------------------------------------------------------------------------
/Chapter04/c4_29_annimation_flip_coin.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_29_annimation_flip_coin.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | library(animation)
11 | myProb<-c(0.45,0.1, 0.45)
12 | oopt = ani.options(interval=0.2,nmax = ifelse(interactive(), 100, 2))
13 | flip.coin(bg = "yellow")
14 | 
15 | 
16 | 
17 | # 


--------------------------------------------------------------------------------
/Chapter09/c9_06_print_iris.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c9_06_print_iris.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import numpy as np
13 | from sklearn import datasets
14 | iris = datasets.load_iris()
15 | print("Data\n",iris.data[0:4,])
16 | print("target\n",iris.target[0:4])
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Chapter10/c10_13_timeSeries.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_13_timeSeries.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | library(timeSeries)
13 | data(MSFT)
14 | x <- MSFT
15 | by <- timeSequence(from = start(x),  to = end(x), by = "week")
16 | y<-aggregate(x,by,mean)
17 | head(x)
18 | head(y)
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/Chapter12/c12_01_lapply.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c12_02_lappy.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/14/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | lapply(1:3,function(x) c(sin(x),x^2))
13 | 
14 | 
15 | myFunctions<-c(sin(x),x^2+2,4*x^2-x^3-2)
16 | inputValue<-1:10
17 | output<-lapply(inputValue,function(x) myFunctions)
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/Chapter02/c3_14_cbsodata.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_14_cbsodata.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import pandas as pd
12 | import cbsodata as cb
13 | name='82070ENG'
14 | data = pd.DataFrame(cb.get_data(name))
15 | print(data.head())
16 | info=cb.get_info(name)
17 | print(info['Title'])
18 | 
19 | 


--------------------------------------------------------------------------------
/Chapter03/c3_14_cbsodata.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_14_cbsodata.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import pandas as pd
12 | import cbsodata as cb
13 | name='82070ENG'
14 | data = pd.DataFrame(cb.get_data(name))
15 | print(data.head())
16 | info=cb.get_info(name)
17 | print(info['Title'])
18 | 
19 | 


--------------------------------------------------------------------------------
/Chapter03/c3_34_read_ff3monthly_csv.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_34_read_ffmonthly_csv.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/16/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | inFile<-"http://canisius.edu/~yany/data/ff3monthly.csv"
12 | ff3monthly<-read.csv(inFile,skip=3)
13 | saveRDS(ff3monthly,file="c:/temp/ff3monthly.rds")
14 | 
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/Chapter09/c9_01_titanic.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c9_01_titanic.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | path<-"http://canisius.edu/~yany/RData/"
12 | dataSet<-"titanic"
13 | link<-paste(path,dataSet,".RData",sep='')
14 | con<-url(link)
15 | load(con)
16 | dim(.titanic)
17 | head(.titanic)
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/Chapter02/c3_10_R_package_foreign.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_10_R_package_foreigh.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | # http://calcnet.mth.cmich.edu/org/spss/Prj_airlinePassengers.htm
12 | 
13 | library(foreign)
14 | x<-read.spss("c:/temp/airline_passengers.sav", to.data.frame=TRUE)
15 | 
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Chapter03/c3_10_R_package_foreign.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_10_R_package_foreigh.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | # http://calcnet.mth.cmich.edu/org/spss/Prj_airlinePassengers.htm
12 | 
13 | library(foreign)
14 | x<-read.spss("c:/temp/airline_passengers.sav", to.data.frame=TRUE)
15 | 
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Chapter05/c5_29_replace_na_with_mean.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c5_29_repace_na_with_mean.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import scipy as sp
13 | import pandas as pd
14 | df = pd.DataFrame({'A' : [2,sp.nan,3,4]})
15 | print(df)
16 | df.fillna(df.mean(), inplace=True)
17 | print(df)
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/Chapter07/c7_13_fminsearch.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c7_13_fminsearch.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 |   source:  https://octave.sourceforge.io/octave/function/fminsearch.html
11 | #}
12 | 
13 | 
14 | fcn = @(x) (x(1)-5).^2 + (x(2)-8).^4
15 | 
16 | x0 = [0;0];
17 | 
18 | [xmin, fval] = fminsearch (fcn, x0)
19 | 
20 | 


--------------------------------------------------------------------------------
/Chapter10/c10_08_get_UCIdatasets.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_08_get_UCIdatasets.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | dataSet<-"UCIdatasets"
12 | path<-"http://canisius.edu/~yany/RData/"
13 | con<-paste(path,dataSet,".RData",sep='')
14 | load(url(con))
15 | dim(.UCIdatasets)
16 | head(.UCIdatasets)
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Chapter04/c4_04_Pyplot_julia.jl:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c4_04_PyPlot_julia.jl
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 1/25/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | # Pkg.add("Plots")
13 | 
14 | using PyPlot
15 | x = linspace(0,2*pi,1000)
16 | y = sin(2*x + 3*cos.(1.5*x));
17 | plot(x, y, color="green", linewidth=2.0, linestyle="--")
18 | 
19 | 
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/Chapter08/c8_20_5pointsCluster.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_20_5pointsCluter.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | library(cluster)
13 | data <- rbind(c(180,20), c(160,5), c(60, 150), c(160,60), c(80,120))
14 | output<-clara(data,2)
15 | output$clustering
16 | #output$clusinfo
17 | output$medoids
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/Chapter12/c12_05_snow_01.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c12_05_snow_01.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/14/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(snow)
12 | cl <- makeSOCKcluster(c("localhost","localhost"))
13 | clusterApply(cl, 1:2, get("+"), 3)
14 | clusterEvalQ(cl, library(boot))
15 | x<-1
16 | clusterExport(cl, "x")
17 | clusterCall(cl, function(y) x + y, 2)


--------------------------------------------------------------------------------
/Chapter02/c3_23_datadotworld.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_23_datadotworld.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import datadotworld as dw
13 | dataset = dw.load_dataset('c:/temp/an-intro-to-dataworld-dataset')
14 | list(dataset.dataframes)
15 |     ['changelog', 'datadotworldbballstats', 'datadotworldbballteam']
16 | 
17 | 


--------------------------------------------------------------------------------
/Chapter03/c3_23_datadotworld.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_23_datadotworld.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import datadotworld as dw
13 | dataset = dw.load_dataset('c:/temp/an-intro-to-dataworld-dataset')
14 | list(dataset.dataframes)
15 |     ['changelog', 'datadotworldbballstats', 'datadotworldbballteam']
16 | 
17 | 


--------------------------------------------------------------------------------
/Chapter02/c3_04_save_RDatat.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_04_save_RData.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | path<-"http://archive.ics.uci.edu/ml/machine-learning-databases/"
12 | dataSet<-"iris/bezdekIris.data"
13 | a<-paste(path,dataSet,sep='')
14 | iris<-read.csv(a,header=F)
15 | save(iris,file="c:/temp/iris.RData")
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/Chapter03/c3_04_save_RDatat.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_04_save_RData.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | path<-"http://archive.ics.uci.edu/ml/machine-learning-databases/"
12 | dataSet<-"iris/bezdekIris.data"
13 | a<-paste(path,dataSet,sep='')
14 | iris<-read.csv(a,header=F)
15 | save(iris,file="c:/temp/iris.RData")
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/Chapter04/c4_21_plot_Julia.jl:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c4_21_plot_julia.jl
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 1/25/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | 
13 | using PyPlot
14 | x = linspace(0, 10, 200)
15 | y = sin.(x)
16 | name=L"$y = \sin(x)$" 
17 | fig, ax = subplots()
18 | ax[:plot](x, y, "r-", linewidth=2, label=name,alpha=0.6)
19 | ax[:legend](loc="upper center")
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/Chapter04/c4_13_add_trendLine.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_13_add_trendLine.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | x=seq(-4,4,by=0.05)
12 | n<-length(x)
13 | y=2+3*x + rnorm(n)
14 | k<-coef(lm(y~x))
15 | intercept<-k[1]
16 | slope<-k[2]
17 | y2<-intercept+slope*x
18 | #
19 | plot(x,y,type="p",col="red")
20 | lines(x,y2,col="green")
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/Chapter10/c10_06_fisher_Z_score.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_05_fisher_z_score.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(eclust)
12 | data("simdata")
13 | X = simdata[,c(-1,-2)]
14 | firstCorr<-cor(X[1:50,])
15 | secondCorr<-cor(X[51:100,])
16 | score<-u_fisherZ(n0=100,cor0=firstCorr,n1=100,cor1=secondCorr)
17 | dim(score)
18 | score[1:5,1:5]
19 | 


--------------------------------------------------------------------------------
/Chapter11/c11_01_qt_consol.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c11_01_qt_consol.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/8/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import numpy as np
12 | from scipy.special import jn
13 | import matplotlib.pyplot as plt
14 | from matplotlib.pyplot import plot
15 | #
16 | x=np.linspace(0,3*np.pi)
17 | for i in range(6):
18 |     plot(x,jn(i,x))
19 | #
20 | plt.show()
21 | 


--------------------------------------------------------------------------------
/Chapter08/c8_03_cluster_animals.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_03_cluster_animals.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | library(cluster)
13 | data(animals)
14 | dim(animals)
15 | head(animals)
16 | colnames(animals)
17 | apply(animals,2, table) # simple overview
18 | 
19 | 
20 | 
21 | 
22 | 
23 | ma <- mona(animals)
24 | ma
25 | plot(ma)
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/Chapter10/c10_31_timeUsed.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c10_31_timeUsed.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 | 
12 | using QuantEcon
13 | nodes, weights = qnwlege(65, -2pi, 2pi);
14 | integral = do_quad(x -> cos(x), nodes, weights)
15 | @time quadgk(x -> cos.(x), -2pi, 2pi)
16 | @time do_quad(x -> cos.(x), nodes, weights)
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/Chapter09/c9_20_unique_value_iris.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 |   Name     : c9_20_unique_value_iris.py
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/6/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | """
11 | 
12 | import sklearn as sk
13 | from sklearn import datasets
14 | iris = datasets.load_iris()
15 | mylist=list(iris.target)
16 | used = []
17 | unique = [x for x in mylist if x not in used and used.append(x)]
18 | print(used)
19 | 
20 | 


--------------------------------------------------------------------------------
/Chapter04/c4_07_shaded_area_standard_normal_dist.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_07_shaded_area_standard_normal_dist.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 |  x<-seq(-3,3,length=100)
12 |  y<-dnorm(x,mean=0,sd=1)
13 |  plot(x,y,type="b",lwd=3,col="black")
14 |  x<-seq(-4,-2.33,length=100)
15 |  y<-dnorm(x,mean=0,sd=1)
16 |  polygon(c(-3,x,-2.33),c(0,y,0),col="red")
17 | 
18 | 


--------------------------------------------------------------------------------
/Chapter07/c7_02.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c7_02.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | a<--2
13 | b<-10
14 | c<-5
15 | f<-function(x)-(a*x^2+b*x+c)
16 | 
17 | optim(0.3,f)
18 | 
19 | 
20 | #optim(0,f,method="Brent",lower = -100, upper = 100)
21 | 
22 | 
23 | 
24 | f<-function(x)3*x^2-4*x+1
25 | optim(0.3,f)
26 | >optim(0,f,method="Brent",lower = -100, upper = 100)


--------------------------------------------------------------------------------
/Chapter07/c7_06_.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c7_06.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | A<-1.0
12 | mean<-0.10
13 | std<-0.5
14 | set.seed(123)
15 | nStock<-10
16 | nRet<-50
17 | #
18 | n<-nStock*nRet
19 | retMatrix<-matrix(rnorm(n,mean,std),nRet,nStock)
20 | names<-paste("RET",1:nStock,sep='')
21 | colnames(retMatrix)<-names
22 | w<-rep(1/nStock,nStock)
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/Chapter08/c8_24_01.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c8_24_01.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 | 
12 | using Base.Test
13 | using ClusterAnalysis
14 | 
15 | lbls = vec([1 1 1 2 2 2])
16 | expected = vec([2 2 2 3 3 3])
17 | 
18 | @test 1.0 == ClusterAnalysis.adjusted_rand_index(lbls, expected)
19 | 
20 | lbls = vec([1 1 1 2 2 2])
21 | expected = vec([1 2 1 1 2 1])
22 | 


--------------------------------------------------------------------------------
/Chapter08/c8_28_iris_kMean_sklearn.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c8_28_iris_kMean_sklearn.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | from sklearn import cluster
13 | from sklearn import datasets
14 | x=datasets.load_iris()
15 | iris = datasets.load_iris()
16 | x = iris.data
17 | k_means = cluster.KMeans(n_clusters=3)
18 | k_means.fit(x) 
19 | print(k_means.labels_[::10])


--------------------------------------------------------------------------------
/Chapter03/c3_32_write_sas_write_spss_write_stata.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_32_write_sas_write_spss_write_stata.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(haven)
12 | x<-1:100
13 | y<-matrix(x,50,2)
14 | z<-data.frame(y)
15 | colnames(z)<-c("a","b")
16 | write_sas(z,"c:/temp/tt.sas7bdat")
17 | write_spss(z,"c:/temp/tt.sav")
18 | write_stata(z,"c:/temp/tt.dta")
19 | 
20 | 


--------------------------------------------------------------------------------
/Chapter07/c7_10_3D_graph.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c7_10_3D.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(scatterplot3d)
12 | x<-seq(-2,2,0.05)
13 | y<-seq(-2,2,0.05)
14 | z<-(x^2-1)^2+(x^2*y-x-1)^2
15 | name<-"3 dimensional graph" 
16 | scatterplot3d(x, y, z, highlight.3d = TRUE, col.axis = "blue",
17 | col.grid = "lightblue", main =name, pch = 2)
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/Chapter08/c8_08_python_hierarchical.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c8_08_python_hierachical.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import numpy as np
13 | import scipy.cluster.hierarchy as hac
14 | import matplotlib.pyplot as plt
15 | #
16 | n=100
17 | x=np.random.normal(0,8,n)
18 | y=np.random.normal(10,8,n)
19 | a = [x,y]
20 | z = hac.linkage(a, method='single')
21 | plt.


--------------------------------------------------------------------------------
/Chapter08/c8_09_randomUniformForest_not_working.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c8_00_randomUniformForest.R
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 3/25/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | library(randomUniformForest)
13 | data(iris)
14 | x<-randomUniformForest(Species ~ ., data = iris, threads = 1, ntree = 20, BreimanBounds = FALSE) 
15 | # get the 10th tree
16 | OneTree <- getTree.randomUniformForest(x, 10)
17 | 
18 | 


--------------------------------------------------------------------------------
/Chapter08/c8_25_clustering.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c8_25_cluster.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 | 
12 | 
13 | using Clustering
14 | srand(12345)
15 | d = 10
16 | n = 500
17 | x = rand(d, n)
18 | S = -pairwise(Euclidean(), x, x)
19 | # set diagonal value to median value
20 | S = S - diagm(diag(S)) + median(S)*eye(size(S,1)) 
21 | R = affinityprop(S)
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/Chapter04/c4_30_annimation3flip_coin.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_30_annimation_flipCoin2.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | library(animation)
13 | myProb<-c(0.45,0.1, 0.45)
14 | names<- c("Head", "Stand", "Tail")
15 | oopt = ani.options(interval=0.2,nmax = ifelse(interactive(), 100, 2))
16 | flip.coin(faces =names,type="n",prob = myProb, col = c(1, 2, 4))
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Chapter08/c8_13_mixMod_bar.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_13_Rmixmod_bar.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(Rmixmod)
12 | data(birds)
13 | x <- mixmodCluster(birds,2)
14 | bb<-barplotCluster
15 | bb(x["bestResult"], birds)
16 | bb(x["bestResult"], birds, variables=c(2,3,4))
17 | bb(x["bestResult"], birds, variables=c("eyebrow","collar"))
18 | 
19 | 
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/Chapter02/c3_16_missing_code_R.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_16_missing_code.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | > head(na_example,20)
12 |  [1]  2  1  3  2  1  3  1  4  3  2  2 NA  2  2  1  4 NA  1  1  2
13 | > length(na_example)
14 | [1] 1000
15 | > x<-na.exclude(na_example)
16 | > length(x)
17 | [1] 855
18 | > head(x,20)
19 |  [1] 2 1 3 2 1 3 1 4 3 2 2 2 2 1 4 1 1 2 1 2
20 | > 
21 | 


--------------------------------------------------------------------------------
/Chapter03/c3_16_missing_code_R.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_16_missing_code.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | > head(na_example,20)
12 |  [1]  2  1  3  2  1  3  1  4  3  2  2 NA  2  2  1  4 NA  1  1  2
13 | > length(na_example)
14 | [1] 1000
15 | > x<-na.exclude(na_example)
16 | > length(x)
17 | [1] 855
18 | > head(x,20)
19 |  [1] 2 1 3 2 1 3 1 4 3 2 2 2 2 1 4 1 1 2 1 2
20 | > 
21 | 


--------------------------------------------------------------------------------
/Chapter04/c4_16_plot_julia.jl:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_16_plot_julia.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | # Pkg.add("Plots")
12 | 
13 | using Plots
14 | srand(123)
15 | plot(rand(5,5),linewidth=2,title="Simple plot")
16 | 
17 | 
18 | using PyPlot
19 | x = linspace(0,2*pi,1000)
20 | y = sin(2*x + 3*cos(1.5*x));
21 | plot(x, y, color="green", linewidth=2.0, linestyle="--")
22 | 
23 | 


--------------------------------------------------------------------------------
/Chapter04/c4_31_pie_grey.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_31_pie_grey.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | results <- c(10,8,7,4,6)
12 | names<-c("1st","2nd","3rd","4th","5th")
13 | pct<-round(results/sum(results)*100)
14 | pct2<-rev(sort(pct))
15 | name<-"Pie Chart of results"
16 | colors<-seq(0.4, 1.0, length = length(results))
17 | pie(results, labels = names, col = gray(colors),main=name)


--------------------------------------------------------------------------------
/Chapter06/c6_17_financialCalculator.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c6_17_financialCalculator.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | 
13 | pv_f<-function(fv,r,n) fv/(1+r)^n
14 | fv_f<-function(pv,r,n)pv*(1+r)^n
15 | pv_annuity<-function(c,r,n)c/r*(1-1/(1+r)^n)
16 | fv_annuity<-function(c,r,n)c/r*((1+r)^n-1)
17 | pv_perptuity<-function(c,r)c/r
18 | pv_perptuityDue<-function(c,r)c/r*(1+r)
19 | 


--------------------------------------------------------------------------------
/Chapter08/c8_23_randomNumbersFrom2normal.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_23_randomNumbersFrom2normal.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(cluster)
12 | set.seed(123)
13 | n1<-200; mean1<-0; std1<-8
14 | n2<-300; mean2<-80; std2<-8
15 | set1<-cbind(rnorm(n1,mean1,std1), rnorm(n1,mean1,std1))
16 | set2<-cbind(rnorm(n2,mean2,std2), rnorm(n2,mean2,std2))
17 | x <- rbind(set1,set2)
18 | 


--------------------------------------------------------------------------------
/Chapter04/c4_19_scatter_plot_PyPlot.jl:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_19_scatter_plot_PyPlot.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | using PyPlot
11 | n=50
12 | x = 100*rand(n)
13 | y = 100*rand(n)
14 | areas = 800*rand(n)
15 | fig = figure("pyplot_scatterplot",figsize=(10,10))
16 | ax = axes()
17 | scatter(x,y,s=areas,alpha=0.5)
18 | title("Scatter Plot")
19 | xlabel("X")
20 | ylabel("Y")
21 | grid("on")


--------------------------------------------------------------------------------
/Chapter04/c4_27_3stock_connection.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_27_3stock_connection.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(qgraph)
12 | stocks<-c("IBM","MSFT","WMT")
13 | x<-rep(stocks, each = 3)
14 | y<-rep(stocks, 3)
15 | correlation<-c(0,10,3,10,0,3,3,3,0)
16 | data <- as.matrix(data.frame(from =x , to =y, width =correlation))
17 | qgraph(data, mode = "direct", edge.color = rainbow(9))


--------------------------------------------------------------------------------
/Chapter04/c4_33_plot_grey.jl:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_16_plot_julia.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 | plot(rand(5,5),color="grep",linewidth=2,title="Simple plot")
11 | #plot(rand(5,5),color="grep",linewidth=2,title=name)
12 | "
13 | 
14 | # Pkg.add("Plots")
15 | 
16 | using Plots
17 | srand(123)
18 | name="Simple plot"
19 | plot(rand(5,5),color="gray",linewidth=2,title=name)
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/Chapter08/c8_31_generate_dendrogram_using20obsWine.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_31_generate_dendrogram_using20obsWine.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | library(rattle.data)
13 | data(wine)
14 | x<-head(wine,10)
15 | 
16 | library(rattle)
17 | rattle()
18 | 
19 | # choose R data set
20 | # choose x
21 | # hit Execute
22 | # choose cluster
23 | # hit Execute
24 | # choose Dendrogram
25 | 


--------------------------------------------------------------------------------
/Chapter09/c9_21_reinforcementLearning.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c9_21_reinforcementLearning.R
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/6/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | library("ReinforcementLearning")
13 | set.seed(123)
14 | data <- sampleGridSequence(1000)
15 | dim(data)
16 | head(data)
17 | unique(data$State)
18 | unique(data$Action)
19 | unique(data$NextState)
20 | unique(data$Reward)
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/Chapter02/c3_24_get_iris.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_24_get_iris.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import pandas as pd
12 | path="http://archive.ics.uci.edu/ml/machine-learning-databases/"
13 | dataset="iris/bezdekIris.data"
14 | inFile=path+dataset
15 | data=pd.read_csv(inFile,header=None)
16 | data.columns=["sepalLength","sepalWidth","petalLength","petalWidth","Class"]
17 | print(data.head(2))


--------------------------------------------------------------------------------
/Chapter03/c3_24_get_iris.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_24_get_iris.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import pandas as pd
12 | path="http://archive.ics.uci.edu/ml/machine-learning-databases/"
13 | dataset="iris/bezdekIris.data"
14 | inFile=path+dataset
15 | data=pd.read_csv(inFile,header=None)
16 | data.columns=["sepalLength","sepalWidth","petalLength","petalWidth","Class"]
17 | print(data.head(2))


--------------------------------------------------------------------------------
/Chapter04/c4_06_add_labels.py:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_06_add_labels.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | import scipy as sp
13 | import matplotlib.pyplot as plt 
14 | #
15 | x=sp.linspace(-2*sp.pi,2*sp.pi,200,endpoint=True)
16 | y=sp.cos(x)
17 | plt.plot(x,y)
18 | plt.xlabel("x-value")
19 | plt.ylabel("Cosine function")
20 | plt.title("Cosine curve from -2pi to 2pi")
21 | plt.show()
22 | 
23 | 


--------------------------------------------------------------------------------
/Chapter10/c10_09_usGDP.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_09_usGDP.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | path<-"http://canisius.edu/~yany/RData/"
12 | dataSet<-"usGDPannual"
13 | con<-paste(path,dataSet,".RData",sep='')
14 | load(url(con))
15 | 
16 | 
17 | head(.usGDPannual)
18 | dataSet<-"usGDPquarterly"
19 | con<-paste(path,dataSet,".RData",sep='')
20 | load(url(con))
21 | head(.usGDPquarterly)
22 | 
23 | 


--------------------------------------------------------------------------------
/Chapter10/c10_30_QuantEcon_simulated.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c10_30_QuantEcon_simulated.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 | 
12 | using QuantEcon
13 | P = [0.4 0.6; 0.2 0.8];
14 | mc = MarkovChain(P)
15 | x = simulate(mc, 100000);
16 | mean(x .== 1) 
17 | #
18 | mc2 = MarkovChain(P, ["employed", "unemployed"])
19 | simulate(mc2, 4)
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/Chapter12/c12_10_plyr_arrange.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c12_10_plyr_arrange.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/14/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(datasets)
12 | library(plyr)
13 | data(mtcars)
14 | #
15 | d1<-mtcars[with(mtcars, order(cyl, disp)), ]
16 | d2<-arrange(mtcars, cyl, disp)
17 | myCars = cbind(vehicle=row.names(mtcars), mtcars)
18 | d3<-arrange(myCars, cyl, disp)
19 | d4<-arrange(myCars, cyl, desc(disp))
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/Chapter02/c3_22_sort_by2columns.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c3_22_sort_by2variables.R
 4 |   Book     : Hands-on Data Science with Anaconda)
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 1/15/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 |  x<-c(1,3,1, 0.1,0.3,-0.4,100,300,30)
13 |  y<-data.frame(matrix(x,3,3))
14 |  colnames(y)<-c("ID","RET","Data1")
15 | 
16 |  y
17 | 
18 |  z<-y[order(y$ID,y$RET),]
19 | 
20 | 
21 | 
22 |  z2<-y[order(y$ID,decreasing = TRUE,y$RET),]
23 | z
24 | 
25 | 
26 | 
27 | 
28 | decreasing = FALSE
29 | 


--------------------------------------------------------------------------------
/Chapter03/c3_22_sort_by2columns.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c3_22_sort_by2variables.R
 4 |   Book     : Hands-on Data Science with Anaconda)
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 1/15/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 |  x<-c(1,3,1, 0.1,0.3,-0.4,100,300,30)
13 |  y<-data.frame(matrix(x,3,3))
14 |  colnames(y)<-c("ID","RET","Data1")
15 | 
16 |  y
17 | 
18 |  z<-y[order(y$ID,y$RET),]
19 | 
20 | 
21 | 
22 |  z2<-y[order(y$ID,decreasing = TRUE,y$RET),]
23 | z
24 | 
25 | 
26 | 
27 | 
28 | decreasing = FALSE
29 | 


--------------------------------------------------------------------------------
/Chapter05/c5_23_number_outliers.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c5_23_number_outliers.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 |  distance<-3
12 |  x<-read.csv("c:/temp/^GSPCweekly.csv")
13 |  p<-x$Adj.Close
14 |  ret<-p[2:n]/p[1:(n-1)]-1
15 |  m<-mean(ret)
16 |  std<-sd(ret)
17 |  ret2<-subset(ret,((ret-m)/std)>distance)
18 |  n2<-length(ret2)
19 | 
20 | 
21 |  head(x,2)
22 |  m
23 |  std
24 |  length(ret)
25 |  n2
26 | 


--------------------------------------------------------------------------------
/Chapter07/c7_19_optimization.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c7_19_optimization.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 | #}
11 | 
12 | 
13 | 
14 | OPTIONS = optimset('Display','iter');
15 | function f = fun2(x)
16 |    f = 0;
17 |    for k = -5:5
18 |       f = f + exp(-(x(1)-x(2))^2 - 2*x(1)^2)*cos(x(2))*sin(2*x(2));
19 |    end
20 | endfunction 
21 | x0 = [0.5,-0.5];
22 | [x,fval] = fminsearch(@fun2,x0,OPTIONS)
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/Chapter04/c4_22_brownian_motion_animation.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_22_Brownian_motion_anmation.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(animation)
12 | ani.options(interval = 0.05, nmax = 30)
13 | a<- c(3,3,2,0.5)
14 | b<-c(2,0.5,0)
15 | name<-"Brownian Motion"
16 | par(mar=a,mgp=b,tcl=-0.3,cex.axis=0.8,cex.lab=0.8,cex.main=1)
17 | brownian.motion(pch=21,cex=5,col="red",bg="yellow",main=name)
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/Chapter10/c10_14_movingAverage.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_14_movingAverage.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | movingAverageFunction<- function(data,n=10){
12 |   out= data
13 |   for(i in n:length(data)){
14 |     out[i] = mean(data[(i-n+1):i])
15 |   }
16 |   return(out)
17 | }
18 | 
19 | library(timeSeries)
20 | data(MSFT)
21 | p<-MSFT$Close
22 | #
23 | ma<-movingAverageFunction(p,3)
24 | head(p)
25 | head(ma)
26 | 


--------------------------------------------------------------------------------
/Chapter02/c3_25_sort_Python.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_25_sort_python.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import pandas as pd
13 | a = pd.DataFrame([[8,3],[8,2],[1,-1]],columns=['X','Y'])
14 | print(a)
15 | # sort by A ascedning, then B descending
16 | b= a.sort_values(['X', 'Y'], ascending=[1, 0])
17 | print(b)
18 | # sort by A and B, both ascedning
19 | c= a.sort_values(['X', 'Y'], ascending=[1, 1])
20 | print(c)


--------------------------------------------------------------------------------
/Chapter03/c3_25_sort_Python.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_25_sort_python.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import pandas as pd
13 | a = pd.DataFrame([[8,3],[8,2],[1,-1]],columns=['X','Y'])
14 | print(a)
15 | # sort by A ascedning, then B descending
16 | b= a.sort_values(['X', 'Y'], ascending=[1, 0])
17 | print(b)
18 | # sort by A and B, both ascedning
19 | c= a.sort_values(['X', 'Y'], ascending=[1, 1])
20 | print(c)


--------------------------------------------------------------------------------
/Chapter05/c5_18_ff3_factor_ibm.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c5_18_ff3_factor_ibm.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | con<-url("http://canisius.edu/~yany/RData/ff3monthly.RData")
12 | load(con)
13 | head(.ff3monthly)
14 | x<-read.csv("http://canisius.edu/~yany/data/ibmMonthly.csv")
15 | stock<-ret_f(x)
16 | final<-merge(stock,.ff3monthly)
17 | y<-final$RET
18 | x<-as.matrix(data.frame(final[,3:5]))
19 | summary(lm(y~x))
20 | 
21 | 


--------------------------------------------------------------------------------
/Chapter03/c3_33_generate_z_csv.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_33_generate_z_csv.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | set.seed(123)
12 | n=500
13 | x<-rnorm(n)
14 | x2<-x
15 | m=100
16 | y<-as.integer(runif(m)*n)
17 | x[y]<-0
18 | z<-matrix(x,n/10,10)
19 | outFile<-"c:/temp/z.csv"
20 | write.table(z,file=outFile,quote=F,row.names=F,col.names=F,sep=',')
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/Chapter04/c4_11_histogram.py:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_11_histogram.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | data = np.random.RandomState(10) 
14 | n=5000
15 | n2=1000
16 | x=data.normal(size=n)
17 | y=data.normal(loc=5, scale=2,size=n2)
18 | a=(x,y)
19 | b = np.hstack(a)
20 | plt.hist(b, bins='auto') 
21 | plt.title("Histogram with 'auto bins'")
22 | plt.show()
23 | 
24 | 


--------------------------------------------------------------------------------
/Chapter05/c5_25_get_critical_value_F_test.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c5_25_get_critical_value_F_test.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import scipy as sp
13 | alpha=0.10
14 | d1=1
15 | d2=1
16 | critical=sp.stats.f.ppf(q=1-alpha, dfn=d1, dfd=d2)
17 | prob=sp.stats.f.cdf(critical, dfn=d1, dfd=d2)
18 | print("alpha, d1, d2,  critical value, prob")
19 | print(alpha, d1, d2,  critical, prob)
20 | 
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/Chapter09/c9_34_Kmean_randomNumbers.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c9_34_Kmean_randomNumbers.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 | using Clustering
12 | srand(1234)
13 | nRow=5
14 | nCol=1000
15 | x = rand(nRow,nCol)
16 | maxInter=200  #max interation 
17 | nCluster=20
18 | R = kmeans(x,nCluster;maxiter=maxInter,display=:iter)
19 | @assert nclusters(R) ==nCluster
20 | c = counts(R)
21 | clusters= R.centers
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/Chapter10/c10_11_seasonality_usGDPquarterly.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_11_seasonality_usGDPqiarterly.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | library(astsa)
11 | path<-"http://canisius.edu/~yany/RData/"
12 | dataSet<-"usGDPquarterly"
13 | con<-paste(path,dataSet,".RData",sep='')
14 | load(url(con))
15 | x<-.usGDPquarterly$DATE
16 | y<-.usGDPquarterly$GDP_CURRENT
17 | plot(x,y)
18 | diff4 = diff(y,4)
19 | acf2(diff4,24)
20 | 
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/Chapter07/c7_22_optim.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c7_22_optim.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 | 
12 | 
13 | using Optim
14 | function g!(s, x)
15 |  s[1] = -2.0*(1.0-x[1])-400.0*(x[2]-x[1]^2)*x[1]
16 |  s[2] = 200.0*(x[2]-x[1]^2)
17 | end
18 | lower = [1.25, -2.1]
19 | upper = [Inf, Inf]
20 | initial_x = [2.0, 2.0]
21 | od = OnceDifferentiable(f, g!, initial_x)
22 | results = optimize(od, initial_x,lower,upper,Fminbox{GradientDescent}())


--------------------------------------------------------------------------------
/Chapter09/c9_12_load_iris.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c9_12_load_iris.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import sklearn as sk
12 | from sklearn import datasets
13 | iris = datasets.load_iris()
14 | print("data:\n",iris.data[0:4,])
15 | print("target",iris.target[0:2,])
16 | mylist=list(iris.target)
17 | used = []
18 | unique = [x for x in mylist if x not in used and used.append(x)]
19 | print("unique values for targets\n",used)
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/Chapter09/c9_19_logicReg.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c9_19_logicReg.R
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/6/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | library(LogicReg)
13 | data(logreg.testdat)
14 | y<-logreg.testdat[,1]
15 | x<-logreg.testdat[, 2:21]
16 | n=1000
17 | n2=25000
18 | set.seed(123)
19 | myanneal<-logreg.anneal.control(start=-1,end=-4,iter=n2,update=n)
20 | output<-logreg(resp=y,bin=x,type=2,select = 1,ntrees=2,anneal.control=myanneal)
21 | plot(output)
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/Chapter02/c3_11_R_package_dslabs.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_11_R_package_dslabs.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 |  murders
11 |                   state abb        region population total
12 | 1               Alabama  AL         South    4779736   135
13 | 2                Alaska  AK          West     710231    19
14 | 3               Arizona  AZ          West    6392017   232
15 | 
16 | "
17 | 
18 | library(dslabs)
19 | 
20 | data(murders)
21 | 
22 | head(merders)
23 | 
24 | 


--------------------------------------------------------------------------------
/Chapter03/c3_11_R_package_dslabs.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_11_R_package_dslabs.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 |  murders
11 |                   state abb        region population total
12 | 1               Alabama  AL         South    4779736   135
13 | 2                Alaska  AK          West     710231    19
14 | 3               Arizona  AZ          West    6392017   232
15 | 
16 | "
17 | 
18 | library(dslabs)
19 | 
20 | data(murders)
21 | 
22 | head(merders)
23 | 
24 | 


--------------------------------------------------------------------------------
/Chapter05/c5_07_random_OLS.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c5_07_random_OLS.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | from scipy import stats 
12 | import scipy as sp
13 | sp.random.seed(31233)
14 | alpha=2.0
15 | beta=3.8
16 | n=1000
17 | x=sp.arange(n)
18 | y=alpha+beta*x+sp.random.rand(n)
19 | (beta, alpha, r_value, p_value, std_err) = stats.linregress(y,x) 
20 | print("Alpha , Beta")
21 | print(alpha,beta) 
22 | print("R-squared=", r_value**2)
23 | print("p-value =", p_value)
24 | 


--------------------------------------------------------------------------------
/Chapter08/c8_10_wine_quality.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_10_wine_quality.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(randomUniformForest)
12 | data(wineQualityRed)
13 | x = wineQualityRed[, -ncol(wineQualityRed)]
14 | # run unsupervised analysis on the first half of dataset 
15 | data1 = 1:floor(nrow(x)/2)
16 | shortFunction<-unsupervised.randomUniformForest
17 | model1 =shortFunction(x,subset =data1,depth = 5) 
18 | plot(model1)
19 | 
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/Chapter09/c9_08_naiveBayes.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c9_08_naiveBayes.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(mlbench)
12 | data(HouseVotes84)
13 | head(HouseVotes84)
14 | 
15 | library(e1071)
16 | data(HouseVotes84, package = "mlbench")
17 | model<-naiveBayes(Class ~ ., data = HouseVotes84)
18 | #
19 | predict(model, HouseVotes84[1:10,])
20 | predict(model, HouseVotes84[1:10,], type = "raw")
21 | pred <- predict(model, HouseVotes84)
22 | table(pred, HouseVotes84$Class)
23 | 


--------------------------------------------------------------------------------
/Chapter07/c7_04_convex_function2.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c7_04_convex_function2.R
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 3/15/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | x<-seq(-4,4,0.1)
13 | a<-1
14 | b<-2
15 | c<-3
16 | myFunction<-function(x)a*x^2+b*x+c
17 | y<-myFunction(x)
18 | name<-"For a convex function:chord is above"
19 | plot(x,y,type='l',main=name)
20 | x1<--2
21 | y1<-myFunction(x1)
22 | x2<-3
23 | y2<-myFunction(x2)
24 | segments(x1, y1, x2, y2,col = par("fg"), lty = par("lty"), xpd = FALSE)
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/Chapter09/c9_24_reinforcementLearning_example.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c9_124_reinforcementLearning_example.R
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/6/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | library(ReinforcementLearning)
13 | set.seed(123)
14 | data <- sampleGridSequence(1000)
15 | control <- list(alpha = 0.1, gamma = 0.1, epsilon = 0.1)
16 | model <- ReinforcementLearning(data,s="State",a="Action",r="Reward",s_new="NextState",control=control)
17 | print(model)
18 | 
19 | 
20 | # Plotting learning curve
21 | plot(model)
22 | 
23 | 


--------------------------------------------------------------------------------
/Chapter07/c7_07_optimization_01.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c7_07_optimization_01.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import numpy as np
13 | from scipy.optimize import minimize
14 | def rosen(x):
15 |     """The Rosenbrock function"""
16 |     return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)
17 | #
18 | x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2])
19 | solution= minimize(rosen, x0, method='nelder-mead',options={'xtol': 1e-8, 'disp': True})
20 | 
21 | print(solution.x)
22 | 
23 | 


--------------------------------------------------------------------------------
/Chapter04/c4_15_add_Greek_letters.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_15_add_Greek_letters.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | set.seed(12345) 
12 | mu=4
13 | std=2
14 | nRandom=2000
15 | x <- rnorm(mean =mu, sd =std, n =nRandom)
16 | name<- "Normal Probability Density Function"
17 | curve(dnorm, from = -3, to = 3, n = 1000, main = name)
18 | formula<-expression(f(x) ==paste(frac(1, sqrt(2 * pi * sigma^2))," ",e^{frac(-(x - mu)^2, 2 * sigma^2)}))
19 | text(2, 0.3, formula, cex = 1.3)
20 | 
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/Chapter09/c9_32_iris.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c9_32_iris.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 | using Gadfly
12 | using RDatasets
13 | using Clustering
14 | iris = dataset("datasets", "iris")
15 | head(iris)
16 | features=permutedims(convert(Array, iris[:,1:4]),[2, 1])
17 | result=kmeans(features,3)                          
18 | nameX="PetalLength"
19 | nameY="PetalWidth"
20 | assignments=result.assignments  
21 | plot(iris, x=nameX,y=nameY,color=assignments,Geom.point)
22 | 
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/Chapter06/c6_10_table6_1.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c6_10_table6_1.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/1/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | library(rattle)
13 | 
14 | require(rattle)
15 | 
16 | install.packages("rattle")
17 | 
18 | update.packages("rattle")
19 | 
20 | search()
21 | 
22 | library()
23 | 
24 | "package:XML" %in% search()
25 | 
26 | "package:XML" %in% .packages(all.available=T)
27 | 
28 | detach(package:rattle)
29 | 
30 | detach("package:rattle",unload=TRUE)
31 | 
32 | help(package=rattle)
33 | 
34 | library(help="rattle")
35 | 


--------------------------------------------------------------------------------
/Chapter10/c10_10_usGDP_graph.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_10_usGDP_graph.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | path<-"http://canisius.edu/~yany/RData/"
12 | dataSet<-"usGDPannual"
13 | con<-paste(path,dataSet,".RData",sep='')
14 | load(url(con))
15 | title<-"US GDP"
16 | xTitle<-"Year"
17 | yTitle<-"US annual GDP"
18 | x<-.usGDPannual$YEAR
19 | y<-.usGDPannual$GDP
20 | plot(x,y,main=title,xlab=xTitle,ylab=yTitle)
21 | 
22 | 
23 | yTitle<-"Log US annual GDP"
24 | plot(x,log(y),main=title,xlab=xTitle,ylab=yTitle)


--------------------------------------------------------------------------------
/Chapter09/c9_38_iris_prediction.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 |   Name     : c9_38_iris_prediction.py
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/6/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | """
11 | 
12 | from sklearn import metrics
13 | from sklearn import datasets
14 | from sklearn.tree import DecisionTreeClassifier
15 | x=datasets.load_iris()
16 | model=DecisionTreeClassifier()
17 | model.fit(x.data, x.target)
18 | print(model)
19 | true=x.target
20 | predicted=model.predict(x.data)
21 | print(metrics.classification_report(true, predicted))
22 | print(metrics.confusion_matrix(true, predicted))


--------------------------------------------------------------------------------
/Chapter04/c4_26_qgraph_network.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_26_qgraph_network.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(qgraph)
12 | data(big5)
13 | data(big5groups)
14 | title("Correlations among 5 factors",line = 2.5)
15 | qgraph(cor(big5),minimum = 0.25,cut = 0.4,vsize = 1.5,
16 |     groups = big5groups,legend = TRUE, borders = FALSE,theme = 'gray')
17 | 
18 | 
19 | # colorful one 
20 | "
21 |  qgraph(cor(big5),minimum = 0.25,cut = 0.4,vsize = 1.5,
22 |     groups = big5groups,legend = TRUE, borders = FALSE)
23 | 
24 | "
25 | 
26 | 


--------------------------------------------------------------------------------
/Chapter07/c7_20_JuMP01.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c7_20_JuMP01.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 | 
12 | using JuMP
13 | using ECOS
14 | m= Model(solver =ECOSSolver())
15 | @variable(m, 0 <= x <= 2 )
16 | @variable(m, 0 <= y <= 30 )
17 | @setObjective(m, Max, 5x + 3*y )
18 | @addConstraint(m, 1x + 5y <= 3.0 )
19 | print(m)
20 | status = solve(m)
21 | println("Objective value: ", getObjectiveValue(m))
22 | println("x = ", getValue(x))
23 | println("y = ", getValue(y))
24 | 
25 | 
26 | #https://jump.readthedocs.io/en/release-0.2/jump.html


--------------------------------------------------------------------------------
/Chapter04/c4_10_getHistram_IBMreturn.py:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_10_getHistram_IBMreturn.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | import numpy as np
12 | import quandl as qd
13 | import matplotlib.pyplot as plt
14 | #
15 | x=qd.get("WIKI/ibm")
16 | p=x[['Adj. Close']]
17 | ret=p.diff()/p
18 | ret2=ret.dropna()
19 | np.histogram(ret2,bins='auto')
20 | plt.show()
21 | 
22 | 
23 | [n,bins,patches]=np.histogram(ret,100)
24 | mu=np.mean(ret)
25 | sigma=np.std(ret)
26 | x=mlt.mlab.normpdf(binds,mu,sigma)
27 | mlt.plot(bins,x,color="red",lw=2)
28 | mlt.show()


--------------------------------------------------------------------------------
/Chapter12/c12_13_parallel.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c12_13_parallel.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/27/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 |   
12 | library(parallel)
13 | detectCores()
14 | myFunction<- function(iter=1,n=5){
15 |     x<- rnorm(n, mean=0, sd=1 )
16 |     eps <- runif(n,-2,2)
17 |     y <- 1 + 2*x + eps
18 |     result<-lm( y ~ x )
19 |     final<-cbind(result$coef,confint(result))
20 |     return(final) 
21 | }
22 | #
23 | m<-5000
24 | n2<-5000
25 | system.time(lapply(1:m,myFunction,n=n2))
26 | system.time(mclapply(1:m,myFunction,n=n2))
27 | 
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/Chapter04/c4_17_QuantEcon_julia.jl:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_17_QuantEco_julia.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 |             https://lectures.quantecon.org/jl/julia_plots.html
11 | "
12 | 
13 | using QuantEcon: meshgrid
14 | using PyPlot:surf
15 | using Plots
16 | n = 50
17 | x = linspace(-3, 3, n)
18 | y = x
19 | z = Array{Float64}(n, n)
20 | f(x, y) = cos(x^2 + y^2) / (1 + x^2 + y^2)
21 | for i in 1:n
22 |     for j in 1:n
23 |         z[j, i] = f(x[i], y[j])
24 |     end
25 | end
26 | xgrid, ygrid = meshgrid(x, y)
27 | surf(xgrid, ygrid, z',alpha=0.7)


--------------------------------------------------------------------------------
/Chapter10/c10_19_catwalk_not_complete.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c10_16_catwalk_not_complete.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/23/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import datetime
12 | import pandas
13 | from sqlalchemy import create_engine
14 | from metta import metta_io as metta
15 | from catwalk.storage import FSModelStorageEngine, CSVMatrixStore
16 | from catwalk.model_trainers import ModelTrainer
17 | from catwalk.predictors import Predictor
18 | from catwalk.evaluation import ModelEvaluator
19 | from catwalk.utils import save_experiment_and_get_hash
20 | help(FSModelStorageEngine)
21 | 


--------------------------------------------------------------------------------
/Chapter02/c3_30_merge_left_index.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_30_merge_lef_index.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import pandas as pd
12 | import scipy as sp
13 | x= pd.DataFrame({'YEAR': [2010,2011, 2012, 2013],
14 |                  'IBM': [0.2, -0.3, 0.13, -0.2],
15 |                  'WMT': [0.1, 0, 0.05, 0.23]})
16 | y = pd.DataFrame({'date': [2011,2013,2014, 2015],
17 |                  'C': [0.12, 0.23, 0.11, -0.1],
18 |                  'SP500': [0.1,0.17, -0.05, 0.13]})
19 | print(pd.merge(x,y, right_index=True,left_index=True))
20 | 
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/Chapter03/c3_30_merge_left_index.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_30_merge_lef_index.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import pandas as pd
12 | import scipy as sp
13 | x= pd.DataFrame({'YEAR': [2010,2011, 2012, 2013],
14 |                  'IBM': [0.2, -0.3, 0.13, -0.2],
15 |                  'WMT': [0.1, 0, 0.05, 0.23]})
16 | y = pd.DataFrame({'date': [2011,2013,2014, 2015],
17 |                  'C': [0.12, 0.23, 0.11, -0.1],
18 |                  'SP500': [0.1,0.17, -0.05, 0.13]})
19 | print(pd.merge(x,y, right_index=True,left_index=True))
20 | 
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/Chapter10/c10_18_annual_ret_sp500.txt:
--------------------------------------------------------------------------------
 1 | library(timeSeries)
 2 | path<-'http://canisius.edu/~yany/RData/'
 3 | dataSet<-'sp500monthly.RData'
 4 | link<-paste(path,dataSet,sep='')
 5 | load(url(link))
 6 | p<-.sp500monthly$ADJ.CLOSE
 7 | n<-length(p)
 8 | logRet<-log(p[2:n]/p[1:(n-1)])
 9 | 
10 | x<-data.frame(logRet,row.names=.sp500monthly$DATE[2:n])
11 | by <- timeSequence(from = rownames(x)[1],  to = rownames(x)[n-1], by = "year")
12 | y<-aggregate(x,by,sum)
13 | 
14 | 
15 | 
16 | library(plyr)
17 | df <- data.frame(DATE = c("1", "1", "2", "3", "3"), B = c(2, 3, 3, 5, 6))
18 | dfsum <- ddply(df, c("DATE"), summarize, B = sum(B))
19 | 
20 | 
21 | 
22 | 
23 |  xx <- MSFT
24 |  byx <- timeSequence(from = start(xx),  to = end(xx), by = "week")
25 |  yy<-aggregate(xx,byx,mean)
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/Chapter02/c3_29_merge_different_names.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_28_merge_different_names.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import pandas as pd
13 | import scipy as sp
14 | x= pd.DataFrame({'YEAR': [2010,2011, 2012, 2013],
15 |                  'IBM': [0.2, -0.3, 0.13, -0.2],
16 |                  'WMT': [0.1, 0, 0.05, 0.23]})
17 | y = pd.DataFrame({'date': [2011,2013,2014, 2015],
18 |                  'C': [0.12, 0.23, 0.11, -0.1],
19 |                  'SP500': [0.1,0.17, -0.05, 0.13]})
20 | print(pd.merge(x,y, left_on='YEAR',right_on='date'))
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/Chapter03/c3_29_merge_different_names.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_28_merge_different_names.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import pandas as pd
13 | import scipy as sp
14 | x= pd.DataFrame({'YEAR': [2010,2011, 2012, 2013],
15 |                  'IBM': [0.2, -0.3, 0.13, -0.2],
16 |                  'WMT': [0.1, 0, 0.05, 0.23]})
17 | y = pd.DataFrame({'date': [2011,2013,2014, 2015],
18 |                  'C': [0.12, 0.23, 0.11, -0.1],
19 |                  'SP500': [0.1,0.17, -0.05, 0.13]})
20 | print(pd.merge(x,y, left_on='YEAR',right_on='date'))
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/Chapter09/c9_45_same_as_c9_14_good.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # coding: utf-8
 3 | 
 4 | # In[14]:
 5 | 
 6 | 
 7 | import numpy as np
 8 | from sklearn import datasets
 9 | from sklearn.neighbors import KNeighborsClassifier as KNC
10 | iris = datasets.load_iris()
11 | x= iris.data
12 | y= iris.target
13 | np.unique(y)
14 | np.random.seed(123)
15 | indices = np.random.permutation(len(x))
16 | iris_x_train = x[indices[:-10]]
17 | iris_y_train = y[indices[:-10]]
18 | iris_x_test  = x[indices[-10:]]
19 | iris_y_test  = y[indices[-10:]]
20 | model=KNC()
21 | model.fit(iris_x_train, iris_y_train) 
22 | KNC(algorithm='auto',leaf_size=30, metric='minkowski',
23 |    metric_params=None,n_jobs=1,n_neighbors=5, p=2,weights='uniform')
24 | out=model.predict(iris_x_test)
25 | print("predicted:",out)
26 | print("True     :",iris_y_test)
27 | 
28 | 


--------------------------------------------------------------------------------
/Chapter03/c3_19_missing_code.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_19_missing_code2.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/16/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import scipy as sp
13 | import pandas as pd
14 | path="https://canisius.edu/~yany/data/"
15 | dataSet="z.csv"
16 | infile=path+dataSet
17 | x=pd.read_csv(infile,header=None)
18 | print(x.head())
19 | print((x[[1,1,2,3,4,5]] ==0).sum())
20 | 
21 | 
22 | x2=x
23 | x2[[1,2,3,4,5]] = x2[[1,2,3,4,5]].replace(0, sp.NaN)
24 | print(x2.head())
25 | #print((x2== sp.NaN).sum())
26 | 
27 | x3=x2
28 | x3.fillna(x3.mean(), inplace=True)
29 | print(x3.head())
30 | 
31 | # print(x.describe())
32 | 
33 | 


--------------------------------------------------------------------------------
/Chapter07/c7_11_ff5industries.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c7_11_ff5industries.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | path<-"http://canisius.edu/~yany/RData/ff5industries.RData"
12 | load(url(path))
13 | retMatrix<-as.matrix(.ff5industries[,2:6]/100)
14 | n1<-ncol(retMatrix)
15 | w<-rep(1/n1,n1)
16 | A<-1.5
17 | bigValue=100
18 | #
19 | utilityFunction<-function(w){
20 |     portfolioRet<-retMatrix%*%w
21 |     x<-portfolioRet
22 |     loss<-(sum(w)-1)^2*bigValue
23 |     u=-(mean(x)-0.5*A*var(x))+loss
24 |     return(u)
25 | }
26 | optim(w,utilityFunction,lower =0, upper =0.5)
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/Chapter09/c9_44_same_as_c9_14_good.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # coding: utf-8
 3 | 
 4 | # In[15]:
 5 | 
 6 | 
 7 | import numpy as np
 8 | from sklearn import datasets
 9 | from sklearn.neighbors import KNeighborsClassifier as KNC
10 | iris = datasets.load_iris()
11 | x= iris.data
12 | y= iris.target
13 | np.unique(y)
14 | np.random.seed(123)
15 | indices = np.random.permutation(len(x))
16 | iris_x_train = x[indices[:-10]]
17 | iris_y_train = y[indices[:-10]]
18 | iris_x_test = x[indices[-10:]]
19 | iris_y_test = y[indices[-10:]]
20 | knn = KNC()
21 | knn.fit(iris_x_train, iris_y_train)
22 | KNC(algorithm='auto',leaf_size=30, metric='minkowski',
23 | metric_params=None,n_jobs=1,n_neighbors=5, p=2,weights='uniform')
24 | knn.predict(iris_x_test)
25 | out=knn.predict(iris_x_test)
26 | print("predicted:",out)
27 | print("True :",iris_y_test)
28 | 
29 | 


--------------------------------------------------------------------------------
/Chapter02/c3_12_merge_01.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_12_merge_01.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import scipy as sp
12 | import pandas as pd
13 | 
14 | x= pd.DataFrame({'key': ["ID1","ID2", "ID3", "ID4"],
15 |                  'x': [0.1, 0.02 0.05, 0.02],
16 |                  'y': [0, 1, 2, 3]})
17 | y = pd.DataFrame({'key': ['ID1', 'ID2', 'ID5', 'ID7'],
18 |                   'z': [11, 12, 22, 23],
19 |                   'd': [23, 15',2, 3]})
20 | print(sp.shape(x))
21 | print(sp.shape(y))
22 | result = pd.merge(x,y, on='key')
23 | print(result)
24 | result2=pd.merge(x,y)
25 | print(result2)
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/Chapter03/c3_12_merge_01.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_12_merge_01.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import scipy as sp
12 | import pandas as pd
13 | 
14 | x= pd.DataFrame({'key': ["ID1","ID2", "ID3", "ID4"],
15 |                  'x': [0.1, 0.02 0.05, 0.02],
16 |                  'y': [0, 1, 2, 3]})
17 | y = pd.DataFrame({'key': ['ID1', 'ID2', 'ID5', 'ID7'],
18 |                   'z': [11, 12, 22, 23],
19 |                   'd': [23, 15',2, 3]})
20 | print(sp.shape(x))
21 | print(sp.shape(y))
22 | result = pd.merge(x,y, on='key')
23 | print(result)
24 | result2=pd.merge(x,y)
25 | print(result2)
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/Chapter05/c5_17_ibm_beta.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c5_17_ibm_beta.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | ret_f<-function(data){
12 |    ddate<-as.Date(data[,1])
13 |    n<-nrow(data)
14 |    p<-data[,6]
15 |    ret<-p[2:n]/p[1:(n-1)]-1
16 |    final<-data.frame(ddate[2:n],ret,stringsAsFactors=F)
17 |    colnames(final)<-c("DATE","RET")
18 |    return(final)
19 | }
20 | #
21 | x<-read.csv("http://canisius.edu/~yany/data/ibmMonthly.csv")
22 | stock<-ret_f(x)
23 | #
24 | y<-read.csv("http://canisius.edu/~yany/data/^gspcMonthly.csv")
25 | mkt<-ref_f(y)
26 | colnames(mkt)<-c("DATE","MKTRET")
27 | #
28 | final<-merge(stock,mkt)
29 | 


--------------------------------------------------------------------------------
/Chapter05/c5_30_run_linearRegressionOctave.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c5_30_run_linearRegressionOctave.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | #}
10 | 
11 | 
12 | rand('seed',123)
13 | n = 50;
14 | x = sort(rand(n,1)*5-1);
15 | y = 2+1.5*x + randn(size(x));
16 | figure % open a new figure window
17 | plot(x, y, 'o');
18 | 
19 | #
20 | n = length(y); 
21 | x2= [ones(n, 1), x];  % Add a column of ones to x
22 | b = inv(x2'*x2)*x2'*y;
23 | R = y - (x2 * b);        # residuals
24 | v = (R'*R)/(4 - 3);      # residual variance
25 | sigma = v * inv(x2'*x2); # variance covariance matrix
26 | se = sqrt(diag(sigma));  # std errors of parameters 
27 | 


--------------------------------------------------------------------------------
/Chapter02/c3_26_ff3monthly2pickle.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_24_ff3monthly2pickle.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import pandas as pd
13 | infile="http://canisius.edu/~yany/data/ff3monthly.csv"
14 | ff3=pd.read_csv(infile,skiprows=3)
15 | print(ff3.head(2))
16 | # output to pickle
17 | ff3.to_pickle("c:/temp/ff3.pkl")
18 | # output to a csv file 
19 | outfile=open("c:/temp/ff3.csv","w")
20 | ff3.to_csv(outfile,index=None)
21 | outfile.close()
22 | # output to text file
23 | outfile2=open("c:/temp/ff3.txt","w")
24 | ff3.to_csv(outfile2, header=True, index=None, sep=' ', mode='a')
25 | outfile2.close()
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/Chapter03/c3_26_ff3monthly2pickle.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_24_ff3monthly2pickle.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import pandas as pd
13 | infile="http://canisius.edu/~yany/data/ff3monthly.csv"
14 | ff3=pd.read_csv(infile,skiprows=3)
15 | print(ff3.head(2))
16 | # output to pickle
17 | ff3.to_pickle("c:/temp/ff3.pkl")
18 | # output to a csv file 
19 | outfile=open("c:/temp/ff3.csv","w")
20 | ff3.to_csv(outfile,index=None)
21 | outfile.close()
22 | # output to text file
23 | outfile2=open("c:/temp/ff3.txt","w")
24 | ff3.to_csv(outfile2, header=True, index=None, sep=' ', mode='a')
25 | outfile2.close()
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/Chapter12/c12_07_snow_parallel_Rmpi_UNIX.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c12_07_snow_parallel_UNIX.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/14/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(snow)
12 | library(parallel)
13 | #library(Rmpi)
14 | myFunction<-function(n) {
15 |     a<-rnorm(n)
16 |     final<-log(abs(a))+a^3+2*a;
17 |     return(final)
18 | }
19 | #
20 | nCores=11;
21 | #Using multicore
22 | system.time(mclapply(rep(5E6,11),myFunction,mc.cores=nCores))
23 | #Using snow via MPI
24 | system.time(sapply(rep(5E6,11),myFunction))
25 | #cl <- getMPIcluster()
26 | cl <- makeCluster(c("localhost","localhost"), type = "SOCK")
27 | system.time(parSapply(cl,rep(5E6,11),myFunction))
28 | 


--------------------------------------------------------------------------------
/Chapter02/c3_31_merge_by2variables.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_31_merge_by2variables.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import pandas as pd
13 | x= pd.DataFrame({'ID': ['IBM', 'IBM', 'WMT', 'WMT'],
14 |                  'date': [2010, 2011, 2010, 2011],
15 |                   'SharesOut': [100, 40, 60, 90],
16 |                   'Asset': [20, 30, 10, 30]})
17 | 
18 | y = pd.DataFrame({'ID': ['IBM', 'IBM', 'C', 'WMT'],
19 |                   'date': [2010, 2014, 2010, 2010],
20 |                     'Ret': [0.1, 0.2, -0.1,0.2],
21 |                     'ROA': [0.04,-0.02,0.03,0.1]})
22 | 
23 | z= pd.merge(x,y, on=['ID', 'date'])


--------------------------------------------------------------------------------
/Chapter03/c3_31_merge_by2variables.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_31_merge_by2variables.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | 
12 | import pandas as pd
13 | x= pd.DataFrame({'ID': ['IBM', 'IBM', 'WMT', 'WMT'],
14 |                  'date': [2010, 2011, 2010, 2011],
15 |                   'SharesOut': [100, 40, 60, 90],
16 |                   'Asset': [20, 30, 10, 30]})
17 | 
18 | y = pd.DataFrame({'ID': ['IBM', 'IBM', 'C', 'WMT'],
19 |                   'date': [2010, 2014, 2010, 2010],
20 |                     'Ret': [0.1, 0.2, -0.1,0.2],
21 |                     'ROA': [0.04,-0.02,0.03,0.1]})
22 | 
23 | z= pd.merge(x,y, on=['ID', 'date'])


--------------------------------------------------------------------------------
/Chapter08/c8_05_kmeans01.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c8_05_kMeans01.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(readr) 
12 | library(corrplot)
13 | library(ggplot2)
14 | #
15 | path<-"http://canisius.edu/~yany/RData/wine.RData"
16 | load(url(path))
17 | red2<-red
18 | red2$quality<-NULL
19 | white2<-white
20 | white2$quality<-NULL
21 | red_cor<-cor(red2)
22 | white_cor<-cor(white2)
23 | class(red_cor)
24 | class(white_cor)
25 | #
26 | corrplot(red_cor,method="number")
27 | 
28 | corrplot(white_cor,method="number")
29 | 
30 | 
31 | 
32 | #https://www.kaggle.com/maitree/kmeans-unsupervised-learning-using-wine-dataset/notebook
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/Chapter09/c9_10_RTextTools.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c9_10_RTetTools.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(RTextTools)
12 | data(NYTimes)
13 | data <- NYTimes[sample(1:3100,size=100,replace=FALSE),]
14 | matrix <- create_matrix(cbind(data["Title"],data["Subject"]), language="english",
15 | removeNumbers=TRUE, stemWords=FALSE, weighting=tm::weightTfIdf)
16 | container <- create_container(matrix,data$Topic.Code,trainSize=1:75, testSize=76:100,
17 | virgin=FALSE)
18 | models <- train_models(container, algorithms=c("MAXENT","SVM"))
19 | results <- classify_models(container, models)
20 | analytics <- create_analytics(container, results)
21 | summary(analytics)


--------------------------------------------------------------------------------
/Chapter02/c3_19_missing_code.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_19_missing_code2.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import scipy as sp
12 | import pandas as pd
13 | path="https://archive.ics.uci.edu/ml/machine-learning-databases/"
14 | dataSet="pima-indians-diabetes/pima-indians-diabetes.data"
15 | infile=path+dataSet
16 | x=pd.read_csv(infile,header=None)
17 | print(x.head())
18 | print((x[[1,2,3,4,5]] == 0).sum())
19 | 
20 | x2=x
21 | x2[[1,2,3,4,5]] = x2[[1,2,3,4,5]].replace(0, sp.NaN)
22 | print(x2.head())
23 | #print((x2== sp.NaN).sum())
24 | 
25 | x3=x2
26 | x3.fillna(x3.mean(), inplace=True)
27 | print(x3.head())
28 | 
29 | # print(x.describe())
30 | 
31 | 


--------------------------------------------------------------------------------
/Chapter09/c9_11_RTextTool_2.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c9_11_RTextTool2.R
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/6/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | library(RTextTools)
13 | data(NYTimes)
14 | data <- NYTimes[sample(1:3100,size=100,replace=FALSE),]
15 | matrix <- create_matrix(cbind(data["Title"],data["Subject"]), language="english",
16 | removeNumbers=TRUE, stemWords=FALSE, weighting=tm::weightTfIdf)
17 | container <- create_container(matrix,data$Topic.Code,trainSize=1:75, testSize=76:100,
18 | virgin=TRUE)
19 | models <- train_models(container, algorithms=c("MAXENT","SVM"))
20 | results <- classify_models(container, models)
21 | analytics <- create_analytics(container, results)
22 | summary(analytics)


--------------------------------------------------------------------------------
/Chapter12/c12_02_parallel_01.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #http://gforge.se/2015/02/how-to-go-parallel-in-r-basics-tips/
 3 | 
 4 | library(parallel)
 5 | n_cores <- detectCores() - 1
 6 | cl <- makeCluster(n_cores)
 7 | parLapply(cl, 2:4,function(exponent) 2^exponent)
 8 | stopCluster(cl)
 9 | 
10 | 
11 | c2<-makeCluster(n_cores)
12 | base <- 2
13 | parLapply(c2, 2:4, function(exponent) base^exponent)
14 | stopCluster(c2)
15 | 
16 | Error in checkForRemoteErrors(val) : 
17 |   3 nodes produced errors; first error: object 'base' not found
18 | 
19 | 
20 | c3<-makeCluster(n_cores)
21 | base <- 2
22 | clusterExport(c3, "base")
23 | parLapply(c3, 2:4, function(exponent)  base^exponent)
24 | stopCluster(c3)
25 | 
26 | 
27 | c4<-makeCluster(no_cores)
28 | clusterExport(c4, "base")
29 | base <- 4
30 | # Run
31 | parLapply(c4, 2:4, function(exponent) 
32 |             base^exponent)
33 | 
34 | stopCluster(c4)


--------------------------------------------------------------------------------
/Chapter02/c3_09_R_package_sjlabbeld.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_09_package_sjlabbeld.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | library(sjlabelled)
13 | mydat <- read_spss("my_spss_data.sav")
14 | # retrieve variable labels
15 | mydat.var <- get_label(mydat)
16 | 
17 | 
18 | 
19 | 
20 | mydat <- read_spss("my_spss_data.sav", atomic.to.fac = TRUE)
21 | 
22 | # retrieve value labels
23 | mydat.val <- get_labels(mydat)
24 | ## End(Not run)
25 | 
26 | 
27 | 
28 | x<-1:100
29 | 
30 | write_spss(x,"c:/temp/t.spss", drop.na = FALSE)
31 | 
32 | write_stata(x, "c:/temp/stat.csv",drop.na = FALSE, version = 14)
33 | 
34 | write_sas(x, c"://temp/t.sas7bdat", drop.na = FALSE)
35 | 
36 | 


--------------------------------------------------------------------------------
/Chapter03/c3_09_R_package_sjlabbeld.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_09_package_sjlabbeld.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | library(sjlabelled)
13 | mydat <- read_spss("my_spss_data.sav")
14 | # retrieve variable labels
15 | mydat.var <- get_label(mydat)
16 | 
17 | 
18 | 
19 | 
20 | mydat <- read_spss("my_spss_data.sav", atomic.to.fac = TRUE)
21 | 
22 | # retrieve value labels
23 | mydat.val <- get_labels(mydat)
24 | ## End(Not run)
25 | 
26 | 
27 | 
28 | x<-1:100
29 | 
30 | write_spss(x,"c:/temp/t.spss", drop.na = FALSE)
31 | 
32 | write_stata(x, "c:/temp/stat.csv",drop.na = FALSE, version = 14)
33 | 
34 | write_sas(x, c"://temp/t.sas7bdat", drop.na = FALSE)
35 | 
36 | 


--------------------------------------------------------------------------------
/Chapter04/c4_24_Brownian_motion_html.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_24_brownian_motion_html.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(animation)
12 | a<- c(3, 3, 1, 0.5)
13 | b<- c(2, 0.5, 0)
14 | part1<-"Random walk on the 2D plane: for each point"
15 | part2<-"(x, y), x = x + rnorm(1) and y = y + rnorm(1)."
16 | des<-c(part1,part2)
17 | titleName<-"Demonstration of Brownian Motion"
18 | #
19 | saveHTML({ 
20 |   par(mar =a, mgp =b, tcl = -0.3,cex.axis = 0.8, cex.lab = 0.8, cex.main = 1)
21 |   ani.options(interval = 0.05, nmax = ifelse(interactive(),150, 10))
22 |   brownian.motion(pch = 21, cex = 5, col = "red", bg = "yellow")
23 |   },description =des,title = titleName)


--------------------------------------------------------------------------------
/Chapter09/c9_17_others_1.R:
--------------------------------------------------------------------------------
 1 | # https://stats.stackexchange.com/questions/21572/how-to-plot-decision-boundary-of-a-k-nearest-neighbor-classifier-from-elements-o/21602#21602
 2 | 
 3 | library(ElemStatLearn)
 4 | require(class)
 5 | x <- mixture.example$x
 6 | g <- mixture.example$y
 7 | xnew <- mixture.example$xnew
 8 | mod15 <- knn(x, xnew, g, k=15, prob=TRUE)
 9 | prob <- attr(mod15, "prob")
10 | prob <- ifelse(mod15=="1", prob, 1-prob)
11 | px1 <- mixture.example$px1
12 | px2 <- mixture.example$px2
13 | prob15 <- matrix(prob, length(px1), length(px2))
14 | par(mar=rep(2,4))
15 | contour(px1, px2, prob15, levels=0.5, labels="", xlab="", ylab="", main=
16 |         "15-nearest neighbour", axes=FALSE)
17 | points(x, col=ifelse(g==1, "coral", "cornflowerblue"))
18 | gd <- expand.grid(x=px1, y=px2)
19 | points(gd, pch=".", cex=1.2, col=ifelse(prob15>0.5, "coral", "cornflowerblue"))
20 | box()
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/Chapter10/c10_30_ltfat_example.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c10_30_ltfat_example.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 | #}
11 | 
12 | 
13 | pkg load ltfat
14 | f = greasy;
15 | name1="sparsified coefficients"
16 | name2="dual system coefficients"
17 | F = frame('dgtreal','gauss',64,512);
18 | lambda = 0.1;
19 | % Solve the basis pursuit problem
20 | [c,~,~,frec,cd] = franalasso(F,f,lambda);
21 | figure(1); % Plot sparse coefficients
22 | plotframe(F,c,’dynrange’,50);
23 | figure(2); % Plot coefficients 
24 | plotframe(F,cd,’dynrange’,50);
25 | norm(f-frec)
26 | figure(3);
27 | semilogx([sort(abs(c),'descend')/max(abs(c)),...
28 | sort(abs(cd),’descend’)/max(abs(cd))]);
29 | legend({name1,name2});
30 | 


--------------------------------------------------------------------------------
/Chapter12/c12_12_pi_01.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import sympy
 3 | #
 4 | def plot_one_digit_freqs(f1):
 5 |     """
 6 |     Plot one digit frequency counts using matplotlib.
 7 |     """
 8 |     ax = plt.plot(f1,'bo-')
 9 |     plt.title('Single digit counts in pi')
10 |     plt.xlabel('Digit')
11 |     plt.ylabel('Count')
12 |     return ax
13 | #
14 | def one_digit_freqs(digits, normalize=False):
15 |     """
16 |     Consume digits of pi and compute 1 digit freq. counts.
17 |     """
18 |     freqs = np.zeros(10, dtype='i4')
19 |     for d in digits:
20 |         freqs[int(d)] += 1
21 |     if normalize:
22 |         freqs = freqs/freqs.sum()
23 |     return freqs
24 | #
25 | pi = sympy.pi.evalf(40)
26 | pi
27 | pi = sympy.pi.evalf(10000)
28 | digits = (d for d in str(pi)[2:])  # create a sequence of digits
29 | freqs = one_digit_freqs(digits)
30 | plot_one_digit_freqs(freqs)
31 | plt.show()
32 | 


--------------------------------------------------------------------------------
/Chapter02/c3_18_missing_code_apropos.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_18_missing_code_apropos.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | > apropos("^na")
12 |  [1] "na.action"              "na.contiguous"          "na.exclude"            
13 |  [4] "na.fail"                "na.omit"                "na.pass"               
14 |  [7] "na_example"             "names"                  "names.POSIXlt"         
15 | [10] "names<-"                "names<-.POSIXlt"        "namespaceExport"       
16 | [13] "namespaceImport"        "namespaceImportClasses" "namespaceImportFrom"   
17 | [16] "namespaceImportMethods" "napredict"              "naprint"               
18 | [19] "naresid"                "nargs"                 
19 | 


--------------------------------------------------------------------------------
/Chapter03/c3_18_missing_code_apropos.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_18_missing_code_apropos.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | > apropos("^na")
12 |  [1] "na.action"              "na.contiguous"          "na.exclude"            
13 |  [4] "na.fail"                "na.omit"                "na.pass"               
14 |  [7] "na_example"             "names"                  "names.POSIXlt"         
15 | [10] "names<-"                "names<-.POSIXlt"        "namespaceExport"       
16 | [13] "namespaceImport"        "namespaceImportClasses" "namespaceImportFrom"   
17 | [16] "namespaceImportMethods" "napredict"              "naprint"               
18 | [19] "naresid"                "nargs"                 
19 | 


--------------------------------------------------------------------------------
/Chapter02/c3_08_merge_datasets.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_08_merge_datasets.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | set.seed(123)
12 | 
13 | nStocks<-4
14 | nPeriods<-24
15 | 
16 | x<-runif(nStocks*nPeriods,min=-0.1,max=0.20)
17 | a<-matrix(x,nPeriods,nStocks)
18 | 
19 | d1<-as.Date("2000-01-01")
20 | d2<-as.Date("2001-12-01")
21 | 
22 | dd<-seq(d1,d2,"months")
23 | stocks<-data.frame(dd,a)
24 | colnames(stocks)<-c("DATE",paste('stock',1:nStocks,sep=''))
25 | 
26 | 
27 | d3<-as.Date("1999-01-01")
28 | d4<-as.Date("2010-12-01")
29 | dd2<-seq(d3,d4,"months")
30 | 
31 | y<-runif(length(dd2),min=-0.05,max=0.1)
32 | market<-data.frame(dd2,y)
33 | colnames(market)<-c("DATE","MKT")
34 | 
35 | 
36 | final<-merge(stocks,market)


--------------------------------------------------------------------------------
/Chapter03/c3_08_merge_datasets.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_08_merge_datasets.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | set.seed(123)
12 | 
13 | nStocks<-4
14 | nPeriods<-24
15 | 
16 | x<-runif(nStocks*nPeriods,min=-0.1,max=0.20)
17 | a<-matrix(x,nPeriods,nStocks)
18 | 
19 | d1<-as.Date("2000-01-01")
20 | d2<-as.Date("2001-12-01")
21 | 
22 | dd<-seq(d1,d2,"months")
23 | stocks<-data.frame(dd,a)
24 | colnames(stocks)<-c("DATE",paste('stock',1:nStocks,sep=''))
25 | 
26 | 
27 | d3<-as.Date("1999-01-01")
28 | d4<-as.Date("2010-12-01")
29 | dd2<-seq(d3,d4,"months")
30 | 
31 | y<-runif(length(dd2),min=-0.05,max=0.1)
32 | market<-data.frame(dd2,y)
33 | colnames(market)<-c("DATE","MKT")
34 | 
35 | 
36 | final<-merge(stocks,market)


--------------------------------------------------------------------------------
/Chapter09/c9_05_NYTime_01.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c9_05_NYTime_01.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(RTextTools)
12 | data(NYTimes)
13 | set.seed(123)
14 | data <- NYTimes[sample(1:3100,size=100,replace=FALSE),]
15 | head(data)
16 | 
17 | 
18 | 
19 | matrix <- create_matrix(cbind(data["Title"],data["Subject"]), language="english", 
20 | removeNumbers=TRUE, stemWords=FALSE, weighting=tm::weightTfIdf)
21 | container <- create_container(matrix,data$Topic.Code,trainSize=1:75, testSize=76:100, 
22 | virgin=FALSE)
23 | models <- train_models(container, algorithms=c("MAXENT","SVM"))
24 | results <- classify_models(container, models)
25 | score_summary <- create_scoreSummary(container, results)
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/Chapter09/c9_18_others_2.R:
--------------------------------------------------------------------------------
 1 | # https://stats.stackexchange.com/questions/21572/how-to-plot-decision-boundary-of-a-k-nearest-neighbor-classifier-from-elements-o/21602#21602
 2 | 
 3 | library(ElemStatLearn)
 4 | require(class)
 5 | x <- mixture.example$x
 6 | y <- mixture.example$y
 7 | xnew <- mixture.example$xnew
 8 | px1 <- mixture.example$px1
 9 | px2 <- mixture.example$px2
10 | #
11 | color1<-"blue"
12 | color2<-"pink3"
13 | kNearest<-5  
14 | model<- knn(x, xnew,y,k=kNearest,prob=TRUE)
15 | title<-paste(kNearest,"-nearest neighbour")
16 | prob <- attr(model,"prob")
17 | prob <- ifelse(model=="1",prob,1-prob)
18 | prob15 <- matrix(prob,length(px1),length(px2))
19 | par(mar=rep(2,4))
20 | contour(px1,px2,prob15,levels=0.5,main=title,axes=FALSE)
21 | points(x, col=ifelse(g==1,color1,color2))
22 | gd <- expand.grid(x=px1, y=px2)
23 | points(gd,pch=".",cex=1.5,col=ifelse(prob15>0.5,color1,color2))
24 | box()
25 | 
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/Chapter08/c8_02_cluster.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c8_02_cluster.R
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 3/25/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | ## generate 500 objects, divided into 2 clusters.
13 | 
14 | 
15 | library(cluster)
16 | set.seed(123)
17 | n1<-200; mean1<-0; std1<-8
18 | n2<-300; mean2<-80; std2<-8
19 | set1<-cbind(rnorm(n1,mean1,std1), rnorm(n1,mean1,std1))
20 | set2<-cbind(rnorm(n2,mean2,std2), rnorm(n2,mean2,std2))
21 | x <- rbind(set1,set2)
22 | #
23 | data <- clara(x, 2, samples=50)
24 | plot(data)
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | data <- clara(x, 2, samples=50)
33 | #data
34 | #data$clusinfo
35 | ## using pamLike=TRUE  gives the same (apart from the 'call'):
36 | all.equal(data[-8],data(x, 2, samples=50, pamLike = TRUE)[-8])
37 | plot(data)
38 | 


--------------------------------------------------------------------------------
/Chapter04/c4_25_bisectionMethod_html.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_25_bisection_html.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(animation)
12 | a<- c(4, 4, 1, 2)
13 | part1<-"The bisection method is a root-finding algorithm"
14 | part2<-"which works by repeatedly dividing an interval in half and then"
15 | part3<-"selecting the subinterval in which a root exists."
16 | des<-c(part1,part2,part3)
17 | titleName<-"The Bisection Method for Root-finding on an Interval"
18 | #
19 | saveHTML({
20 |     par(mar = a)
21 |     bisection.method(main = "")
22 |     },img.name = "bisection.method", htmlfile = "bisection.method.html", 
23 |     ani.height = 400, ani.width = 600, interval = 1, 
24 |     title = titleName, description =des)
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/Chapter09/c9_42_ff3factorDaily.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 |   Name     : c9_42_ff3factorDaily.py
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/16/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | """
11 | 
12 | import  scipy as sp
13 | import pandas as pd
14 | import quandl as qd
15 | import statsmodels.api as sm
16 | #quandl.ApiConfig.api_key = 'YOUR_API_KEY'
17 | a=qd.get("WIKI/IBM") 
18 | p=a['Adj. Close']
19 | n=len(p)
20 | ret=[]
21 | #
22 | for i in range(n-1):
23 |     ret.append(p[i+1]/p[i]-1)
24 | #
25 | c=pd.DataFrame(ret,a.index[1:n],columns=['RET'])
26 | ff=pd.read_pickle('c:/temp/ffDaily.pkl')
27 | final=pd.merge(c,ff,left_index=True,right_index=True)
28 | y=final['RET']
29 | x=final[['MKT_RF','SMB','HML']]
30 | #x=final[['MKT_RF']]
31 | x=sm.add_constant(x)
32 | results=sm.OLS(y,x).fit()
33 | print(results.summary())
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/Chapter04/c4_14_time_value_of_money.py:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_14_time_value_of_money.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | import matplotlib.pyplot as plt
12 | #
13 | fig = plt.figure(facecolor='white')
14 | dd = plt.axes(frameon=False)
15 | dd.set_frame_on(False)
16 | dd.get_xaxis().tick_bottom()
17 | dd.axes.get_yaxis().set_visible(False)
18 | x=range(0,11,2)
19 | x1=range(len(x),0,-1)
20 | y = [0]*len(x);
21 | plt.annotate("$100 received today",xy=(0,0),xytext=(2,0.15),arrowprops=dict(facecolor='black',shrink=2))
22 | plt.annotate("$100 received in 2 years",xy=(2,0),xytext=(3.5,0.10),arrowprops=dict(facecolor='black',shrink=2))
23 | s = [50*2.5**n for n in x1];
24 | plt.title("Time value of money ")
25 | plt.xlabel("Time (number of years)")
26 | plt.scatter(x,y,s=s);
27 | plt.show()


--------------------------------------------------------------------------------
/Chapter09/c9_16_generate_titanicRData.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c9_16_generate_titanicRData.R
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/6/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | "
11 | 
12 | x<-read.csv("c:/temp/titanic.csv")
13 | 
14 | > head(x)
15 |   CLASS   AGE GENDER SURVIVED
16 | 1 First Adult   Male      Yes
17 | 2 First Adult   Male      Yes
18 | 3 First Adult   Male      Yes
19 | 4 First Adult   Male      Yes
20 | 5 First Adult   Male      Yes
21 | 6 First Adult   Male      Yes
22 | 
23 | 
24 | 
25 | .titanic<-x
26 | saveRDS(.titanic,file="c:/temp/titanic.rds")
27 | 
28 | save(.titanic,file="c:/temp/titanic.RData")
29 | 
30 | 
31 | path<-"http://canisius.edu/~yany/RData/"
32 | dataSet<-"titanic"
33 | link<-paste(path,dataSet,".RData",sep='')
34 | con<-url(link)
35 | load(con)
36 | dim(.titanic)
37 | head(.titanic)
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/Chapter12/c12_06_plyr_example.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c12_06_plyr_example.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/14/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | library(plyr)
13 | d1<-c(rep('A', 8), rep('B', 15), rep('C', 6))
14 | d2<-sample(c("M", "F"), size = 29, replace = TRUE)
15 | d3<-runif(n = 29, min = 18, max = 54)
16 | #
17 | dfx <- data.frame(group =d1,sex=d2,age=d3)
18 | #
19 | ddply(dfx, .(group, sex), summarize,
20 |  mean = round(mean(age), 2),
21 |  sd = round(sd(age), 2))
22 | #
23 | ddply(baseball[1:100,], ~ year, nrow)
24 | ddply(baseball, .(lg), c("nrow", "ncol"))
25 | rbi<-ddply(baseball, .(year), summarise,
26 |       mean_rbi=mean(rbi, na.rm = TRUE))
27 | plot(mean_rbi~year,type="l",data = rbi)
28 | base2<-ddply(baseball,.(id),mutate,
29 |     career_year = year - min(year) + 1
30 | )
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/Chapter10/c10_20_grangerTest_IBM_sp500.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_20_grangerTest_IBM_sp500.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | ret_f<-function(x,ticker=""){
13 |     n<-nrow(x)
14 |     p<-x[,6]
15 |     ret<-p[2:n]/p[1:(n-1)]-1
16 |     output<-data.frame(x[2:n,1],ret)
17 |     name<-paste("RET_",toupper(ticker),sep='')
18 |     colnames(output)<-c("DATE",name)
19 |     return(output)
20 | }
21 | x<-read.csv("http://canisius.edu/~yany/data/ibmDaily.csv",header=T)
22 | ibmRet<-ret_f(x,"ibm")
23 | x<-read.csv("http://canisius.edu/~yany/data/^gspcDaily.csv",header=T)
24 | mktRet<-ret_f(x,"mkt")
25 | final<-merge(ibmRet,mktRet)
26 | 
27 | 
28 | library(lmtest)
29 | grangertest(RET_IBM ~ RET_MKT, order = 1, data =final)
30 | 
31 | grangertest(RET_MKT ~ RET_IBM, order = 1, data =final)
32 | 
33 | 


--------------------------------------------------------------------------------
/Chapter02/c3_13_merge_02_stock.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 |   Name     : c3_13_merge_02_stock.py
 4 |   Book     : Hands-on Data Science with Anaconda)
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 1/15/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | """
11 | 
12 | import pandas as pd
13 | import scipy as sp
14 | x= pd.DataFrame({'YEAR': [2010,2011, 2012, 2013],
15 |                  'FirmA': [0.2, -0.3, 0.13, -0.2],
16 |                  'FirmB': [0.1, 0, 0.05, 0.23]})
17 | y = pd.DataFrame({'YEAR': [2011,2013,2014, 2015],
18 |                  'FirmC': [0.12, 0.23, 0.11, -0.1],
19 |                  'SP500': [0.1,0.17, -0.05, 0.13]})
20 | #
21 | print("\n  inner  merge ")
22 | print(pd.merge(x,y, on='YEAR'))
23 | print(" \n  outer merge  ")
24 | print(pd.merge(x,y, on='YEAR',how='outer'))
25 | print("\n  left  merge  ")
26 | print(pd.merge(x,y, on='YEAR',how='left'))
27 | print("\n  right  merge ")
28 | print(pd.merge(x,y, on='YEAR',how='right'))
29 | 


--------------------------------------------------------------------------------
/Chapter03/c3_13_merge_02_stock.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 |   Name     : c3_13_merge_02_stock.py
 4 |   Book     : Hands-on Data Science with Anaconda)
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 1/15/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | """
11 | 
12 | import pandas as pd
13 | import scipy as sp
14 | x= pd.DataFrame({'YEAR': [2010,2011, 2012, 2013],
15 |                  'FirmA': [0.2, -0.3, 0.13, -0.2],
16 |                  'FirmB': [0.1, 0, 0.05, 0.23]})
17 | y = pd.DataFrame({'YEAR': [2011,2013,2014, 2015],
18 |                  'FirmC': [0.12, 0.23, 0.11, -0.1],
19 |                  'SP500': [0.1,0.17, -0.05, 0.13]})
20 | #
21 | print("\n  inner  merge ")
22 | print(pd.merge(x,y, on='YEAR'))
23 | print(" \n  outer merge  ")
24 | print(pd.merge(x,y, on='YEAR',how='outer'))
25 | print("\n  left  merge  ")
26 | print(pd.merge(x,y, on='YEAR',how='left'))
27 | print("\n  right  merge ")
28 | print(pd.merge(x,y, on='YEAR',how='right'))
29 | 


--------------------------------------------------------------------------------
/Chapter09/c9_33_bird_Kmeans.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c9_33_birk_kMeans.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | #}
10 | 
11 | 
12 | A = double(imread('bird_small.png'));
13 | A = A / 255; % Divide by 255, values in the range 0 - 1
14 | imgSize = size(A);
15 | X = reshape(A, imgSize(1) * imgSize(2), 3);
16 | k = 16; % using 4-bit (16) colors,minimize cost
17 | [Centroids,idx,cost]=generateKMeansClustersMinCost(X,k,10,10);
18 | fprintf('Cost/Distortion of computed clusters:%.3f\n', cost);
19 | % regenerate colors & image
20 | NewX = Centroids(idx, :);
21 | NewA = reshape(NewX, imgSize(1), imgSize(2), 3);
22 | % compare both the images
23 | fprintf('Comparing original & compressed images\n');
24 | subplot(1, 2, 1);
25 | imagesc(A);
26 | axis("square");
27 | title('Original');
28 | subplot(1, 2, 2);
29 | imagesc(NewA);
30 | axis("square");
31 | title('Compressed');


--------------------------------------------------------------------------------
/Chapter09/c9_14_iris_predicted_vs_trueOne.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 |   Name     : c9_14_iris_prediceted_vs_truOne.py
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 5/26/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | """
11 | import numpy as np
12 | from sklearn import datasets
13 | from sklearn.neighbors import KNeighborsClassifier as KNC
14 | iris = datasets.load_iris()
15 | x= iris.data
16 | y= iris.target
17 | np.unique(y)
18 | np.random.seed(123)
19 | indices = np.random.permutation(len(x))
20 | iris_x_train = x[indices[:-10]]
21 | iris_y_train = y[indices[:-10]]
22 | iris_x_test  = x[indices[-10:]]
23 | iris_y_test  = y[indices[-10:]]
24 | knn = KNC()
25 | knn.fit(iris_x_train, iris_y_train) 
26 | KNC(algorithm='auto',leaf_size=30, metric='minkowski',
27 |    metric_params=None,n_jobs=1,n_neighbors=5, p=2,weights='uniform')
28 | out=knn.predict(iris_x_test)
29 | print("predicted:",out)
30 | print("True     :",iris_y_test)


--------------------------------------------------------------------------------
/Chapter09/c9_04_simplist_One_tree_tinatic.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c9_04_simplist_tree_titanic.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | library(rpart, quietly=TRUE)
13 | con<-url("http://canisius.edu/~yany/RData/titanic.RData")
14 | load(con)
15 | x<-.titanic
16 | scoring  <- FALSE
17 | set.seed(42)
18 | risk<-ident<-ignore<-weights<-numeric<-NULL
19 | str(dataset)
20 | n<- nrow(dataset)
21 | train  <- sample <- sample(n,0.7*n)
22 | validate<- sample(setdiff(seq_len(n),train),0.15*n)
23 | test<- setdiff(setdiff(seq_len(n), train), validate)
24 | inputVars<-categoric<-c("CLASS","AGE","GENDER")
25 | target<-"SURVIVED"
26 | output<-rpart(SURVIVED~.,data=x[train, c(inputVars, target)],
27 |    method="class",parms=list(split="information"),control=
28 |    rpart.control(usesurrogate=0,maxsurrogate=0))
29 | fancyRpartPlot(output, main="Decision Tree for Titanic")
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/Chapter05/c5_27_CAPM.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c5_27_CAPM.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/30/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 |  Note #1: go to http://finance.yahoo.com to dowload the last 5-year monthly
11 |           data for IBM and S&P500. The ticker for S&P500 is ^GSPC
12 |       #2: delete the first observation which contains 'null'
13 |       #3: since readtable is deprecated, see c5_31_CAPM.jl which 
14 |           uses the CSV.read() function
15 | ###
16 | 
17 | 
18 | using DataFrames;
19 | using GLM, StatsModels
20 | #
21 | x = readtable("c:/temp/ibmMonthly5years.csv")
22 | p=x[:Adj_Close]
23 | n=length(p)
24 | stockRet=p[2:n]./p[1:(n-1)]-1
25 | #
26 | y = readtable("c:/temp/sp500Monthly5years.csv")
27 | p2=y[:Adj_Close]
28 | n2=length(p2)
29 | mktRet=p2[2:n2]./p2[1:(n2-1)]-1
30 | #
31 | n3=min(length(stockRet),length(mktRet))
32 | data = DataFrame(X=mktRet[1:n3], Y=stockRet[1:n3])
33 | OLS = glm(@formula(Y ~ X), data, Normal(), IdentityLink())


--------------------------------------------------------------------------------
/Chapter05/c5_31_CAPM.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c5_31_CAPM.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/30/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 | Note #1: go to http://finance.yahoo.com to dowload the last 5-year monthly
11 |           data for IBM and S&P500. The ticker for S&P500 is ^GSPC
12 |       #2: delete the first observation which contains 'null'
13 |       #3: check if the last line is a blank line. If it is, delete it. 
14 | ###
15 | 
16 | using DataFrames, CSV
17 | using GLM, StatsModels
18 | #
19 | function f(x)
20 |   p=x[Symbol("Adj Close")]
21 |   n=length(p)
22 |   a=p[2:n]./p[1:(n-1)]-1.0
23 |   return convert(Array{Float64,1}, a)
24 | end
25 | #
26 | x = CSV.read("c:/temp/ibmMonthly5years.txt")
27 | stockRet=f(x)
28 | y = CSV.read("c:/temp/sp500Monthly5years.txt")
29 | mktRet=f(y)
30 | n3=min(length(stockRet),length(mktRet))
31 | data = DataFrame(X=mktRet[1:n3], Y=stockRet[1:n3])
32 | OLS = glm(@formula(Y ~ X), data, Normal(), IdentityLink())
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/Chapter09/c9_26_test.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c9_26_test.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | #}
10 | 
11 | function initEnv
12 |    % load packages
13 |    pkg load specfun;
14 |    % set-path for octavelib
15 |    addpath([pwd() 'c:/temp/']);
16 |    % clear
17 |    clear ; close all; clc;
18 | end
19 | 
20 | 
21 | 
22 | %init
23 | initEnv();
24 | %  Loads the dataset - adds X, Xval, yval to environment
25 | load('c:/temp/anomaly.dat');
26 | 
27 | %print(X);
28 | 
29 | % compute gaussian parameters
30 | %[mu sigma2] = computeGaussianParams(X);
31 | 
32 | % compute epsilon/threshold for probablity using validation-set
33 | %epsilon = computeThresholdForMultivarGaussian(Xval, yval, mu, sigma2);
34 | 
35 | % compute multivariate Guassian distribution probablity
36 | %probability = computeMultivarGaussianDistribution(X, mu, sigma2);
37 | 
38 | % count of outliers
39 | %fprintf("Number of outliers found: %d\n", sum(probability < epsilon));


--------------------------------------------------------------------------------
/Chapter05/c5_05_get_sp500Daily.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c5_05_get_sp500daily.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 | Objective: Get daily ^GSPC data from Yan's webpage 
11 | 
12 | Date   Open   High    Low  Close  Adj Close   Volume
13 | 0  1950-01-03  16.66  16.66  16.66  16.66      16.66  1260000
14 | 1  1950-01-04  16.85  16.85  16.85  16.85      16.85  1890000
15 |              Date         Open         High          Low        Close  \
16 | 17117  2018-01-11  2752.969971  2767.560059  2752.780029  2767.560059   
17 | 17118  2018-01-12  2770.179932  2787.850098  2769.639893  2786.239990   
18 | 
19 |          Adj Close      Volume  
20 | 17117  2767.560059  3641320000  
21 | 17118  2786.239990  3573970000  
22 | 
23 | """
24 | import pandas as pd
25 | 
26 | inFile="http://canisius.edu/~yany/data/^gspcDaily.csv"
27 | d=pd.read_csv(inFile)
28 | print(d.head(2))
29 | print(d.tail(2))
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 |  
37 | 
38 | 


--------------------------------------------------------------------------------
/Chapter06/c6_19.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c6_19.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 | 
12 | Pkg.add("Gadfly")
13 | using Gadfly
14 | draw(SVG("output.svg", 6inch, 3inch), plot([sin, cos], 0, 25))
15 | 
16 | 
17 | function module_functions(modname)
18 |     list = Symbol[]
19 |     for nm in names(modname)
20 |         typeof(eval(nm)) == Function && push!(list,nm)
21 |     end
22 |     return list
23 | end
24 | 
25 | 
26 | using PyPlot
27 | module_functions(PyPlot)
28 | 
29 | 
30 | using MTH229  
31 | f(x) = exp(-2*x^2)
32 | plot(f, -3, 3) 			
33 | 
34 | 
35 | using QuantEcon: meshgrid
36 | n = 50
37 | x = linspace(-3, 3, n)
38 | y = x
39 | z = Array{Float64}(n, n)
40 | f(x, y) = cos(x^2 + y^2) / (1 + x^2 + y^2)
41 | for i in 1:n
42 |     for j in 1:n
43 |         z[j, i] = f(x[i], y[j])
44 |     end
45 | end
46 | xgrid, ygrid = meshgrid(x, y)
47 | surf(xgrid, ygrid, z', cmap=ColorMap("jet"), alpha=0.7)
48 | zlim(-0.5, 1.0)
49 | 
50 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Packt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Chapter10/c10_17_sp500_annual_return_nextYear.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_17_sp500_annual_return_nextYear.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | library(data.table)
13 | path<-'http://canisius.edu/~yany/RData/'
14 | dataSet<-'sp500monthly.RData'
15 | link<-paste(path,dataSet,sep='')
16 | load(url(link))
17 | #head(.sp500monthly,2)
18 | p<-.sp500monthly$ADJ.CLOSE
19 | n<-length(p)
20 | logRet<-log(p[2:n]/p[1:(n-1)])
21 | years<-format(.sp500monthly$DATE[2:n],"%Y")
22 | y<-data.frame(.sp500monthly$DATE[2:n],years,logRet)
23 | colnames(y)<-c("DATE","YEAR","LOGRET")
24 | y2<- data.table(y)
25 | z<-y2[,sum(LOGRET),by=YEAR]
26 | z2<-na.omit(z)
27 | annualRet<-data.frame(z2$YEAR,exp(z2[,2])-1)
28 | n<-nrow(annualRet)
29 | std<-sd(annualRet[,2])
30 | stdErr<-std/sqrt(n)
31 | ourMean<-mean(annualRet[,2])
32 | min2<-ourMean-2*stdErr
33 | max2<-ourMean+2*stdErr
34 | cat("[min    mean     max ]\n")
35 | cat(min2,ourMean,max2,"\n")
36 | 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/Chapter07/c7_21_JuMp02.jl:
--------------------------------------------------------------------------------
 1 | ###
 2 |   Name     : c7_21_JuMP02.jl
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 3/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | ###
10 | 
11 | 
12 | 
13 | using JuMP
14 | using Gurobi
15 | #master = Model()
16 | #master = Model(solver = GLPKSolverLP(method=:Exact))
17 | master = Model(solver =GurobiSolver())
18 | @variable(master, x[1:2] >= 0)
19 | w=[14 31 36 45]
20 | A=[1 0; 1 0; 0 2; 1 0]
21 | b=[211; 395; 610; 97]
22 | @defConstrRef myCons[1:4]
23 | for i=1:4
24 |     myCons[i] = @addConstraint(master, dot(x, vec(A[i,:]))>=b[i])
25 | end
26 | @setObjective(master, Min, sum(x))
27 | master
28 | status=solve(master)
29 | getValue(x)
30 | println("\nOptimal Solution is:\n")s
31 | println("width: ", w)
32 | epsilon=1e-6
33 | for i=1:size(A,2)
34 |     if getValue(x[i])>epsilon 
35 |         println("Cutting Pattern: ", A[:,i], ", Number of Paper Rolls Cut Using this Pattern: ", getValue(x[i]))
36 |     end
37 | end
38 | 
39 | 
40 | 
41 | 
42 | # http://www.juliaopt.org/notebooks/Chiwei%20Yan%20-%20Cutting%20Stock.html


--------------------------------------------------------------------------------
/Chapter11/c11_02_myfincal.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c11_02_myfincal.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 5/8/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | def pv_f(r,n,fv):
12 |     """
13 |     Objective: estimate present value
14 |            r : period rate
15 |            n : number of periods
16 |           fv : future value
17 |                            fv
18 |     formula used : pv = --------      
19 |                         (1+r)**n
20 |     Example 1: >>>pv_f(0.1,1,100)        # meanings of input variables 
21 |                  90.9090909090909        # based on their input order
22 | 
23 |     Example #2 >>>pv_f(r=0.1,fv=100,n=1) # meanings based on keywords
24 |                  90.9090909090909
25 |     """
26 |     return fv/(1+r)**n
27 | #
28 | def pvGrowingPerpetuity(c,r,q):
29 |     return(c/(r-q))
30 | #
31 | def fv_f(pv,r,n):
32 |     return pv*(1+r)**n
33 | def fvAnnuity(r,n,c):
34 |     return c/r*((1+r)**n-1)
35 | #
36 | def fvAnnuityDue(r,n,c):
37 |     return c/r*((1+r)**n-1)*(1+r)
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/Chapter05/c5_16_ibm_beta.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c5_16_ibm_beta.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | """
10 | 
11 | import quandl as qd
12 | import pandas as pd
13 | from scipy import stats
14 | x=qd.get("WIKI/ibm",collapse='monthly')
15 | #
16 | p=x[['Adj. Close']]
17 | ret=p.diff()/p.shift(1)
18 | stockRet=ret.dropna()
19 | stockRet.columns=['stockRet']
20 | #stockRet.assign(yyyymm=stockRet.index.strftime("%Y%m"))
21 | #
22 | inFile="http://canisius.edu/~yany/data/sp500monthlyEndOfMonthDate.csv"
23 | y=pd.read_csv(inFile,index_col=0)
24 | d=y[['Adj.Close']]
25 | ret2=d.diff()/d.shift(1)
26 | mktRet=ret2.dropna()
27 | mktRet.columns=['mktRet']
28 | df= stockRet.merge(mktRet, how='inner', left_index=True, right_index=True)
29 | (beta,alpha,r_value,p_value,std_err)=stats.linregress(df.stockRet,df.mktRet)
30 | alpha=round(alpha,8)
31 | beta=round(beta,3)
32 | r_value=round(r_value,3)
33 | p_vaue=round(p_value,3)
34 | print("alpha, beta, R2 and P-value")
35 | print(alpha,beta,r_value,p_value)
36 | 


--------------------------------------------------------------------------------
/Chapter02/c3_05_saveRDS.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_05_saveRDS.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 |    1. sepal length in cm
11 |    2. sepal width in cm
12 |    3. petal length in cm
13 |    4. petal width in cm
14 |    5. class: 
15 |       -- Iris Setosa
16 |       -- Iris Versicolour
17 |       -- Iris Virginica
18 | "
19 | 
20 | inFile<-"http://canisius.edu/~yany/data/ff3monthly.csv"
21 | ff3monthly<-read.csv(inFile)
22 | saveRDS(ff3monthly,file="c:/temp/ff3monthly.rds")
23 | 
24 | 
25 | 
26 | > abc<-readRDS("c:/temp/ff3monthly.rds")
27 | > head(abc,3)
28 |         DATE MKT_RF     SMB     HML     RF
29 | 1 1926-07-01 0.0296 -0.0230 -0.0287 0.0022
30 | 2 1926-08-01 0.0264 -0.0140  0.0419 0.0025
31 | 3 1926-09-01 0.0036 -0.0132  0.0001 0.0023
32 | > head(ff3monthly,3)
33 |         DATE MKT_RF     SMB     HML     RF
34 | 1 1926-07-01 0.0296 -0.0230 -0.0287 0.0022
35 | 2 1926-08-01 0.0264 -0.0140  0.0419 0.0025
36 | 3 1926-09-01 0.0036 -0.0132  0.0001 0.0023
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/Chapter03/c3_05_saveRDS.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c3_05_saveRDS.R
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 |    1. sepal length in cm
11 |    2. sepal width in cm
12 |    3. petal length in cm
13 |    4. petal width in cm
14 |    5. class: 
15 |       -- Iris Setosa
16 |       -- Iris Versicolour
17 |       -- Iris Virginica
18 | "
19 | 
20 | inFile<-"http://canisius.edu/~yany/data/ff3monthly.csv"
21 | ff3monthly<-read.csv(inFile)
22 | saveRDS(ff3monthly,file="c:/temp/ff3monthly.rds")
23 | 
24 | 
25 | 
26 | > abc<-readRDS("c:/temp/ff3monthly.rds")
27 | > head(abc,3)
28 |         DATE MKT_RF     SMB     HML     RF
29 | 1 1926-07-01 0.0296 -0.0230 -0.0287 0.0022
30 | 2 1926-08-01 0.0264 -0.0140  0.0419 0.0025
31 | 3 1926-09-01 0.0036 -0.0132  0.0001 0.0023
32 | > head(ff3monthly,3)
33 |         DATE MKT_RF     SMB     HML     RF
34 | 1 1926-07-01 0.0296 -0.0230 -0.0287 0.0022
35 | 2 1926-08-01 0.0264 -0.0140  0.0419 0.0025
36 | 3 1926-09-01 0.0036 -0.0132  0.0001 0.0023
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/Chapter10/c10_12_datarobot_not_working.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_12_datarobot_not_working.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(datarobot)
12 | 
13 | "
14 | Did not connect to DataRobot on package startup. Use `ConnectToDataRobot`.
15 | To connect by default on startup, you can put a config file at: C:\Users\yany\Documents/.config/datarobot/drconfig.yaml
16 | 
17 | 
18 | https://cran.r-project.org/web/packages/datarobot/index.html
19 | 
20 | loc<- "YOUR-ENDPOINT-HERE"
21 | myToken<-"YOUR-API_TOKEN-HERE"
22 | onnectToDataRobot(endpoint=loc,token=myToken)
23 | 
24 | ConnectToDataRobot(endpoint = "YOUR-ENDPOINT-HERE", token = "YOUR-API_TOKEN-HERE")
25 | 
26 | https://app.datarobot.com/api/v2
27 | 
28 | loc1<- "https://app.datarobot.com/api/v2"
29 | loc2<-"C:/Users/yany/.config/datarobot/drconfig.yaml"
30 | ConnectToDataRobot(endpoint =loc1, token = loc2)
31 | 
32 | 
33 | https://cran.r-project.org/web/packages/datarobot/vignettes/IntroductionToDataRobot.html
34 | 
35 | "
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/Chapter04/c4_12_generate_Black_Scholes_formula.py:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c4_12_generate_Black_Scholes_formula.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | import matplotlib
12 | import numpy as np
13 | import matplotlib.mathtext as mathtext
14 | import matplotlib.pyplot as plt
15 | matplotlib.rc('image', origin='upper')
16 | parser = mathtext.MathTextParser("Bitmap")
17 | #parser = mathtext.MathTextParser("Black")
18 | #
19 | x1=r'$d_2=\frac{ln(S_0/K)+(r-\sigma^2/2)T}{\sigma\sqrt{T}}=d_1-\sigma\sqrt{T}$'
20 | x2=r'$d_1=\frac{ln(S_0/K)+(r+\sigma^2/2)T}{\sigma\sqrt{T}}$'
21 | x3=r' $c=S_0N(d_1)- Ke^{-rT}N(d_2)$'
22 | rgba1, depth1 = parser.to_rgba(x1, color='blue',fontsize=12, dpi=200)
23 | rgba2, depth2 = parser.to_rgba(x2, color='blue', fontsize=12, dpi=200)
24 | rgba3, depth3 = parser.to_rgba(x3, color='red',fontsize=14, dpi=200)
25 | fig = plt.figure()
26 | fig.figimage(rgba1.astype(float)/255., 100, 100)
27 | fig.figimage(rgba2.astype(float)/255., 100, 200)
28 | fig.figimage(rgba3.astype(float)/255., 100, 300)
29 | plt.show()
30 | 


--------------------------------------------------------------------------------
/Chapter09/c9_27_bird.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c9_27_bird.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | #}
10 | 
11 | #pkg install optiminterp-0.3.4.tar.gz
12 | pkg load optiminterp
13 | A = double(imread('c:/temp/bird_small.png'));
14 | A = A / 255; % Divide by 255 so that all values are in the range 0 - 1
15 | imgSize = size(A);
16 | X = reshape(A, imgSize(1) * imgSize(2), 3);
17 | 
18 | % compress image using 4-bit (16) colors & minimize cost
19 | k = 16;
20 | [Centroids, idx, cost] = generateKMeansClustersMinCost(X, k, 10, 10);
21 | fprintf('Cost/Distortion of computed clusters: %.3f\n', cost);
22 | fprintf('Program paused. Press enter to continue.\n');
23 | pause;
24 | 
25 | % regenerate colors & image
26 | NewX = Centroids(idx, :);
27 | NewA = reshape(NewX, imgSize(1), imgSize(2), 3);
28 | 
29 | % compare both the images
30 | fprintf('Comparing original and compressed images...\n');
31 | subplot(1, 2, 1);
32 | imagesc(A);
33 | axis("square");
34 | title('Original');
35 | subplot(1, 2, 2);
36 | imagesc(NewA);
37 | axis("square");
38 | title('Compressed');


--------------------------------------------------------------------------------
/Chapter05/c5_06_get_sp500monthly.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c5_06_get_sp500monthly.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 | Objective: Get monthly S&P500 (^GSPC) data from Yan's webpage 
11 | 
12 |     Date       Open   High    Low      Close  Adj Close    Volume
13 | 0  1950-01-01  16.660000  17.09  16.65  17.049999  17.049999  42570000
14 | 1  1950-02-01  17.049999  17.35  16.99  17.219999  17.219999  33430000
15 |            Date         Open         High          Low        Close  \
16 | 815  2017-12-01  2645.100098  2694.969971  2605.520020  2673.610107   
17 | 816  2018-01-01  2683.729980  2787.850098  2682.360107  2786.239990   
18 | 817  2018-01-12  2770.179932  2787.850098  2769.639893  2786.239990   
19 | 
20 |        Adj Close       Volume  
21 | 815  2673.610107  65251190000  
22 | 816  2786.239990  27862080000  
23 | 817  2786.239990   2129410147
24 | """
25 | import pandas as pd
26 | 
27 | inFile="http://canisius.edu/~yany/data/^gspcMonthly.csv"
28 | d=pd.read_csv(inFile)
29 | print(d.head(2))
30 | print(d.tail(3))
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 |  
38 | 
39 | 


--------------------------------------------------------------------------------
/Chapter09/c9_15_FamaFrench3factorModel.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 |   Name     : c9_15_FamaFrench3factorModel.py
 4 |   Book     : Hands-on Data Science with Anaconda )
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 4/6/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | """
11 | 
12 | import scipy as sp
13 | import numpy as np
14 | import pandas as pd
15 | import statsmodels.api as sm
16 | from matplotlib.finance import quotes_historical_yahoo_ochl as getData
17 | ticker='IBM'
18 | begdate=(2012,1,1)
19 | enddate=(2016,12,31)
20 | p= getData(ticker, begdate, enddate,asobject=True, adjusted=True)
21 | logret = sp.log(p.aclose[1:]/p.aclose[:-1])
22 | ddate=[]
23 | d0=p.date
24 | for i in range(0,sp.size(logret)):
25 |     x=''.join([d0[i].strftime("%Y"),d0[i].strftime("%m"),"01"])
26 |     ddate.append(pd.to_datetime(x, format='%Y%m%d').date())
27 | #    
28 | t=pd.DataFrame(logret,np.array(ddate),columns=['RET'])
29 | ret=sp.exp(t.groupby(t.index).sum())-1
30 | #
31 | ff=pd.read_pickle('c:/temp/ffMonthly.pkl')
32 | final=pd.merge(ret,ff,left_index=True,right_index=True)
33 | y=final['RET']
34 | x=final[['MKT_RF','SMB','HML']]
35 | x=sm.add_constant(x)
36 | results=sm.OLS(y,x).fit()
37 | print(results.summary())
38 | 


--------------------------------------------------------------------------------
/Chapter08/c8_29_PCA.py:
--------------------------------------------------------------------------------
 1 | 
 2 | #http://scikit-learn.org/stable/auto_examples/decomposition/plot_pca_iris.html
 3 | 
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | from mpl_toolkits.mplot3d import Axes3D
 7 | from sklearn import decomposition
 8 | from sklearn import datasets
 9 | np.random.seed(5)
10 | centers = [[1, 1], [-1, -1], [1, -1]]
11 | iris = datasets.load_iris()
12 | X = iris.data
13 | y = iris.target
14 | 
15 | fig = plt.figure(1, figsize=(4, 3))
16 | plt.clf()
17 | ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
18 | 
19 | plt.cla()
20 | pca = decomposition.PCA(n_components=3)
21 | pca.fit(X)
22 | X = pca.transform(X)
23 | 
24 | for name, label in [('Setosa', 0), ('Versicolour', 1), ('Virginica', 2)]:
25 |     ax.text3D(X[y == label, 0].mean(),
26 |               X[y == label, 1].mean() + 1.5,
27 |               X[y == label, 2].mean(), name,
28 |               horizontalalignment='center',
29 |               bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
30 | # Reorder the labels to have colors matching the cluster results
31 | y = np.choose(y, [1, 2, 0]).astype(np.float)
32 | ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.spectral,
33 |            edgecolor='k')
34 | 
35 | ax.w_xaxis.set_ticklabels([])
36 | ax.w_yaxis.set_ticklabels([])
37 | ax.w_zaxis.set_ticklabels([])
38 | 
39 | plt.show()


--------------------------------------------------------------------------------
/Chapter09/c9_03_simplefied_tree_tinatic.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c9_03_simplified_tree_titanic.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | 
12 | library(rpart, quietly=TRUE)
13 | building <- TRUE
14 | scoring  <- ! building
15 | set.seed(42)
16 | crs$dataset <- x
17 | str(crs$dataset)
18 | crs$nobs     <- nrow(crs$dataset)
19 | crs$train    <- crs$sample <- sample(crs$nobs, 0.7*crs$nobs)
20 | crs$validate <- sample(setdiff(seq_len(crs$nobs), crs$train), 0.15*crs$nobs)
21 | crs$test     <- setdiff(setdiff(seq_len(crs$nobs), crs$train), crs$validate)
22 | crs$input    <- c("CLASS", "AGE", "GENDER")
23 | crs$numeric  <- NULL
24 | crs$categoric<- c("CLASS", "AGE", "GENDER")
25 | crs$target   <- "SURVIVED"
26 | crs$risk     <- NULL
27 | crs$ident    <- NULL
28 | crs$ignore   <- NULL
29 | crs$weights  <- NULL
30 | #set.seed(crv$seed)
31 | crs$rpart <- rpart(SURVIVED ~ .,data=crs$dataset[crs$train, c(crs$input, crs$target)],
32 |     method="class",parms=list(split="information"),control=rpart.control(usesurrogate=0, 
33 |     maxsurrogate=0))
34 | print(crs$rpart)
35 | printcp(crs$rpart)
36 | cat("\n")
37 | fancyRpartPlot(crs$rpart, main="Decision Tree x $ SURVIVED")
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/Chapter02/c3_21_sort_order.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c3_21_sort_order.R
 4 |   Book     : Hands-on Data Science with Anaconda)
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 1/15/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | 
11 | >  dim(nyseListing)
12 | [1] 3387    4
13 | >  head(nyseListing)
14 |   Symbol                         Name MarketCap Exchange
15 | 1      A   Agilent Technologies, Inc. $12,852.3     NYSE
16 | 2     AA                   Alcoa Inc. $28,234.5     NYSE
17 | 3    AAI       AirTran Holdings, Inc.    $156.9     NYSE
18 | 4    AAP       Advance Auto Parts Inc  $3,507.4     NYSE
19 | 5    AAR              AMR CORPORATION     $81.7     NYSE
20 | 6    AAV ADVANTAGE ENERGY INCOME FUND  $1,674.4     NYSE
21 | > 
22 | >  x<-nyseListing[order(nyseListing$Name),]
23 | >  head(x)
24 |      Symbol                        Name MarketCap Exchange
25 | 2017    MMM                  3M Company $48,398.7     NYSE
26 | 557     CFD 40/86 Strategic Income Fund     $56.8     NYSE
27 | 1721    KDE   4Kids Entertainment, Inc.     $99.7     NYSE
28 | 2164    NDN        99 CENTS ONLY STORES    $432.3     NYSE
29 | 87      AHC       A.H. Belo Corporation    $107.4     NYSE
30 | 1242    GFW    AAG Holding Company Inc.      <NA>     NYSE
31 | 
32 | "
33 | 
34 |  library(fImport)
35 |  data(nyseListing)
36 |  dim(nyseListing)
37 |  head(nyseListing)
38 | 
39 |  x<-nyseListing[order(nyseListing$Name),]
40 |  head(x)
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/Chapter03/c3_21_sort_order.R:
--------------------------------------------------------------------------------
 1 | 
 2 | "
 3 |   Name     : c3_21_sort_order.R
 4 |   Book     : Hands-on Data Science with Anaconda)
 5 |   Publisher: Packt Publishing Ltd. 
 6 |   Author   : Yuxing Yan and James Yan
 7 |   Date     : 1/15/2018
 8 |   email    : yany@canisius.edu
 9 |              paulyxy@hotmail.com
10 | 
11 | >  dim(nyseListing)
12 | [1] 3387    4
13 | >  head(nyseListing)
14 |   Symbol                         Name MarketCap Exchange
15 | 1      A   Agilent Technologies, Inc. $12,852.3     NYSE
16 | 2     AA                   Alcoa Inc. $28,234.5     NYSE
17 | 3    AAI       AirTran Holdings, Inc.    $156.9     NYSE
18 | 4    AAP       Advance Auto Parts Inc  $3,507.4     NYSE
19 | 5    AAR              AMR CORPORATION     $81.7     NYSE
20 | 6    AAV ADVANTAGE ENERGY INCOME FUND  $1,674.4     NYSE
21 | > 
22 | >  x<-nyseListing[order(nyseListing$Name),]
23 | >  head(x)
24 |      Symbol                        Name MarketCap Exchange
25 | 2017    MMM                  3M Company $48,398.7     NYSE
26 | 557     CFD 40/86 Strategic Income Fund     $56.8     NYSE
27 | 1721    KDE   4Kids Entertainment, Inc.     $99.7     NYSE
28 | 2164    NDN        99 CENTS ONLY STORES    $432.3     NYSE
29 | 87      AHC       A.H. Belo Corporation    $107.4     NYSE
30 | 1242    GFW    AAG Holding Company Inc.      <NA>     NYSE
31 | 
32 | "
33 | 
34 |  library(fImport)
35 |  data(nyseListing)
36 |  dim(nyseListing)
37 |  head(nyseListing)
38 | 
39 |  x<-nyseListing[order(nyseListing$Name),]
40 |  head(x)
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/Chapter10/c10_26_pca.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c10_26_pca.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 | #}
11 | 
12 | 
13 | %init
14 | initEnv();
15 | 
16 | % load data for 5000 face-images of 32 x 32 size in grayscale
17 | load ('faces.mat');
18 | 
19 | % normalize input-features
20 | [mu, sigma] = computeScalingParams(X);
21 | XNorm = scaleFeatures(X, mu, sigma);
22 | 
23 | % perform PCA
24 | fprintf('Computing PCA...\n');
25 | if exist('OCTAVE_VERSION')
26 |     fflush(stdout);
27 | end
28 | [U, S] = computePCA(XNorm);
29 | 
30 | % project data into reduced size
31 | k = 100;
32 | fprintf('Projecting data to reduced dimensional space - from: %d to: %d\n', size(XNorm, 2), k);
33 | if exist('OCTAVE_VERSION')
34 |     fflush(stdout);
35 | end
36 | [Z, variance] = projectPCAData(XNorm, U, S, k);
37 | fprintf('Varianced retained by projecting to reduced-size: %f\n', variance);
38 | 
39 | % recover data from reduced-set
40 | fprintf('Recovering data to original dimensional space...\n');
41 | if exist('OCTAVE_VERSION')
42 |     fflush(stdout);
43 | end
44 | XRec = recoverPCAData(Z, U, k);
45 | 
46 | fprintf("Visualize face data before and after reduction\n");
47 | colormap(gray);
48 | for idx=1:10
49 |     subplot(2, 10, idx);
50 |     imagesc(reshape(X(idx, :), 32, 32));
51 |     axis("square", "off");
52 |     subplot(2, 10, 10+idx);
53 |     imagesc(reshape(XRec(idx, :), 32, 32));
54 |     axis("square", "off");
55 | end
56 | 


--------------------------------------------------------------------------------
/Chapter09/c9_30_great_test.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c9_30_great_test.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | #}
10 | 
11 | 
12 | %init
13 | %initEnv();
14 | pkg load specfun;
15 | % load data for 5000 face-images of 32 x 32 size in grayscale
16 | load ('c:/temp/faces.mat');
17 | 
18 | % normalize input-features
19 | [mu, sigma] = computeScalingParams(X);
20 | XNorm = scaleFeatures(X, mu, sigma);
21 | 
22 | % perform PCA
23 | fprintf('Computing PCA...\n');
24 | if exist('OCTAVE_VERSION')
25 |     fflush(stdout);
26 | end
27 | [U, S] = computePCA(XNorm);
28 | 
29 | % project data into reduced size
30 | k = 100;
31 | fprintf('Projecting data to reduced dimensional space - from: %d to: %d\n', size(XNorm, 2), k);
32 | if exist('OCTAVE_VERSION')
33 |     fflush(stdout);
34 | end
35 | [Z, variance] = projectPCAData(XNorm, U, S, k);
36 | fprintf('Varianced retained by projecting to reduced-size: %f\n', variance);
37 | 
38 | % recover data from reduced-set
39 | fprintf('Recovering data to original dimensional space...\n');
40 | if exist('OCTAVE_VERSION')
41 |     fflush(stdout);
42 | end
43 | XRec = recoverPCAData(Z, U, k);
44 | 
45 | fprintf("Visualize face data before and after reduction\n");
46 | colormap(gray);
47 | for idx=1:10
48 |     subplot(2, 10, idx);
49 |     imagesc(reshape(X(idx, :), 32, 32));
50 |     axis("square", "off");
51 |     subplot(2, 10, 10+idx);
52 |     imagesc(reshape(XRec(idx, :), 32, 32));
53 |     axis("square", "off");
54 | end


--------------------------------------------------------------------------------
/Chapter12/c12_03_makeCluster.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | cl <- makeCluster(getOption("cl.cores", 2))
 5 | clusterApply(cl, 1:2, get("+"), 3)
 6 | xx <- 1
 7 | clusterExport(cl, "xx")
 8 | clusterCall(cl, function(y) xx + y, 2)
 9 | 
10 | ## Use clusterMap like an mapply example
11 | clusterMap(cl, function(x, y) seq_len(x) + y,
12 |           c(a =  1, b = 2, c = 3), c(A = 10, B = 0, C = -10))
13 | 
14 | 
15 | parSapply(cl, 1:20, get("+"), 3)
16 | 
17 | ## A bootstrapping example, which can be done in many ways:
18 | clusterEvalQ(cl, {
19 |   ## set up each worker.  Could also use clusterExport()
20 |   library(boot)
21 |   cd4.rg <- function(data, mle) MASS::mvrnorm(nrow(data), mle$m, mle$v)
22 |   cd4.mle <- list(m = colMeans(cd4), v = var(cd4))
23 |   NULL
24 | })
25 | res <- clusterEvalQ(cl, boot(cd4, corr, R = 100,
26 |                     sim = "parametric", ran.gen = cd4.rg, mle = cd4.mle))
27 | library(boot)
28 | cd4.boot <- do.call(c, res)
29 | boot.ci(cd4.boot,  type = c("norm", "basic", "perc"),
30 |         conf = 0.9, h = atanh, hinv = tanh)
31 | stopCluster(cl)
32 | 
33 | ## or
34 | library(boot)
35 | run1 <- function(...) {
36 |    library(boot)
37 |    cd4.rg <- function(data, mle) MASS::mvrnorm(nrow(data), mle$m, mle$v)
38 |    cd4.mle <- list(m = colMeans(cd4), v = var(cd4))
39 |    boot(cd4, corr, R = 500, sim = "parametric",
40 |         ran.gen = cd4.rg, mle = cd4.mle)
41 | }
42 | cl <- makeCluster(mc <- getOption("cl.cores", 2))
43 | ## to make this reproducible
44 | clusterSetRNGStream(cl, 123)
45 | cd4.boot <- do.call(c, parLapply(cl, seq_len(mc), run1))
46 | boot.ci(cd4.boot,  type = c("norm", "basic", "perc"),
47 |         conf = 0.9, h = atanh, hinv = tanh)
48 | stopCluster(cl)


--------------------------------------------------------------------------------
/Chapter02/c3_28_datadotworld_2good.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_28_datadotworld_2good.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 | DataDotWorldBBallStats.name  pointspergame  assistspergame  \
11 | 0                         Jon           20.4             1.3   
12 | 1                         Rob           15.5             8.0   
13 | 2                      Sharon           30.1            11.2   
14 | 3                        Alex            8.2             0.5   
15 | 4                     Rebecca           12.3            17.0   
16 | 5                      Ariane           18.1             3.0   
17 | 6                       Bryon           16.0             8.5   
18 | 7                        Matt           13.0             2.1   
19 | 
20 |   DataDotWorldBBallTeam.name  height handedness  
21 | 0                        Jon    6'5"      Right  
22 | 1                        Rob  6'7.5"       Left  
23 | 2                     Sharon    6'3"      Right  
24 | 3                       Alex    6'2"      Right  
25 | 4                    Rebecca      7'      Right  
26 | 5                     Ariane    5'8"       Left  
27 | 6                      Bryon      7'      Right  
28 | 7                       Matt    5'5"      Right  
29 | """
30 | 
31 | import datadotworld as dw
32 | name='jonloyens/an-intro-to-dataworld-dataset'
33 | results = dw.query(name,
34 |     'SELECT * FROM `DataDotWorldBBallStats`, `DataDotWorldBBallTeam` '
35 |     'WHERE DataDotWorldBBallTeam.Name = DataDotWorldBBallStats.Name')
36 | df = results.dataframe
37 | print(df)


--------------------------------------------------------------------------------
/Chapter03/c3_28_datadotworld_2good.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c3_28_datadotworld_2good.py
 3 |   Book     : Hands-on Data Science with Anaconda)
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/15/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 | DataDotWorldBBallStats.name  pointspergame  assistspergame  \
11 | 0                         Jon           20.4             1.3   
12 | 1                         Rob           15.5             8.0   
13 | 2                      Sharon           30.1            11.2   
14 | 3                        Alex            8.2             0.5   
15 | 4                     Rebecca           12.3            17.0   
16 | 5                      Ariane           18.1             3.0   
17 | 6                       Bryon           16.0             8.5   
18 | 7                        Matt           13.0             2.1   
19 | 
20 |   DataDotWorldBBallTeam.name  height handedness  
21 | 0                        Jon    6'5"      Right  
22 | 1                        Rob  6'7.5"       Left  
23 | 2                     Sharon    6'3"      Right  
24 | 3                       Alex    6'2"      Right  
25 | 4                    Rebecca      7'      Right  
26 | 5                     Ariane    5'8"       Left  
27 | 6                      Bryon      7'      Right  
28 | 7                       Matt    5'5"      Right  
29 | """
30 | 
31 | import datadotworld as dw
32 | name='jonloyens/an-intro-to-dataworld-dataset'
33 | results = dw.query(name,
34 |     'SELECT * FROM `DataDotWorldBBallStats`, `DataDotWorldBBallTeam` '
35 |     'WHERE DataDotWorldBBallTeam.Name = DataDotWorldBBallStats.Name')
36 | df = results.dataframe
37 | print(df)


--------------------------------------------------------------------------------
/Chapter09/c9_29_processing_email.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c9_29_processing_email.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | #}
10 | 
11 | function features = extractEmailFeatures(filename, vocabList)
12 | vocablistSize = size(vocabList, 1);
13 | features = zeros(1, vocablistSize);
14 | 
15 | % read email contents from file
16 | fid = fopen(filename);
17 | if fid
18 |     email_contents = fscanf(fid, '%c', inf);
19 |     fclose(fid);
20 | else
21 |     return;
22 | end
23 | 
24 | % process email-contents
25 | email_contents = lower(email_contents); % lower-case
26 | email_contents = regexprep(email_contents, '<[^<>]+>', ' '); # strip HTML
27 | email_contents = regexprep(email_contents, '[0-9]+', 'number'); # replace numbers
28 | email_contents = regexprep(email_contents, '(http|https)://[^\s]*', 'httpaddr'); % replace URLs
29 | email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr'); % replace email-ids
30 | email_contents = regexprep(email_contents, '[$]+', 'dollar'); % replace dollar
31 | 
32 | % tokenize contents & extract features
33 | while ~isempty(email_contents)
34 |     [str, email_contents] = strtok(email_contents, [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]);
35 |     str = regexprep(str, '[^a-zA-Z0-9]', ''); % remove non-alphanumeric
36 |     % porter-stemmer
37 |     try str = porterStemmer(strtrim(str)); 
38 |     catch str = ''; continue;
39 |     end;
40 |     if length(str) < 1
41 |         continue;
42 |     end
43 |     [val, idx] = max(ismember(vocabList, str));
44 | 	if (val == 1)
45 | 	    features(idx) = 1;
46 | 	endif
47 | end
48 | 
49 | end


--------------------------------------------------------------------------------
/Chapter09/c9_13_short_version.py:
--------------------------------------------------------------------------------
 1 | # http://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html#sphx-glr-auto-examples-classification-plot-digits-classification-py
 2 | 
 3 | """
 4 |   Name     : c9_13_short_version_iris.py
 5 |   Book     : Hands-on Data Science with Anaconda )
 6 |   Publisher: Packt Publishing Ltd. 
 7 |   Author   : Yuxing Yan and James Yan
 8 |   Date     : 4/6/2018
 9 |   email    : yany@canisius.edu
10 |              paulyxy@hotmail.com
11 | """
12 | 
13 | import matplotlib.pyplot as plt
14 | from sklearn import datasets, svm, metrics
15 | from sklearn.metrics import classification_report as report
16 | #
17 | format1="Classification report for classifier %s:\n%s\n"
18 | format2="Confusion matrix:\n%s"
19 | digits = datasets.load_digits()
20 | imageLabels = list(zip(digits.images, digits.target))
21 | for index,(image,label) in enumerate(imageLabels[:4]):
22 |     plt.subplot(2, 4, index + 1)
23 |     plt.axis('off')
24 |     plt.imshow(image,cmap=plt.cm.gray_r,interpolation='nearest')
25 |     plt.title('Training: %i' % label)
26 | n=len(digits.images)
27 | data2 = digits.images.reshape((n,-1))
28 | classifier = svm.SVC(gamma=0.001)
29 | classifier.fit(data2[:n//2],digits.target[:n//2])
30 | expected = digits.target[n//2:]
31 | predicted = classifier.predict(data[n//2:])
32 | print(format1 % (classifier,report(expected, predicted)))
33 | print(format2 % metrics.confusion_matrix(expected, predicted))
34 | imageAndPredictions=list(zip(digits.images[n//2:], predicted))
35 | for index,(image,prediction) in enumerate(imageAndPredictions[:4]):
36 |     plt.subplot(2,4,index+5)
37 |     plt.axis('off')
38 |     plt.imshow(image,cmap=plt.cm.gray_r,interpolation='nearest')
39 |     plt.title('Prediction: %i' % prediction)
40 | plt.show()
41 | 
42 | 


--------------------------------------------------------------------------------
/Chapter08/c8_41_plot_pca_iris.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | =========================================================
 6 | PCA example with Iris Data-set
 7 | =========================================================
 8 | 
 9 | Principal Component Analysis applied to the Iris dataset.
10 | 
11 | See `here <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_ for more
12 | information on this dataset.
13 | 
14 | """
15 | print(__doc__)
16 | 
17 | 
18 | # Code source: Gaël Varoquaux
19 | # License: BSD 3 clause
20 | 
21 | import numpy as np
22 | import matplotlib.pyplot as plt
23 | from mpl_toolkits.mplot3d import Axes3D
24 | 
25 | 
26 | from sklearn import decomposition
27 | from sklearn import datasets
28 | 
29 | np.random.seed(5)
30 | 
31 | centers = [[1, 1], [-1, -1], [1, -1]]
32 | iris = datasets.load_iris()
33 | X = iris.data
34 | y = iris.target
35 | 
36 | fig = plt.figure(1, figsize=(4, 3))
37 | plt.clf()
38 | ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
39 | 
40 | plt.cla()
41 | pca = decomposition.PCA(n_components=3)
42 | pca.fit(X)
43 | X = pca.transform(X)
44 | 
45 | for name, label in [('Setosa', 0), ('Versicolour', 1), ('Virginica', 2)]:
46 |     ax.text3D(X[y == label, 0].mean(),
47 |               X[y == label, 1].mean() + 1.5,
48 |               X[y == label, 2].mean(), name,
49 |               horizontalalignment='center',
50 |               bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
51 | # Reorder the labels to have colors matching the cluster results
52 | y = np.choose(y, [1, 2, 0]).astype(np.float)
53 | ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.spectral,
54 |            edgecolor='k')
55 | 
56 | ax.w_xaxis.set_ticklabels([])
57 | ax.w_yaxis.set_ticklabels([])
58 | ax.w_zaxis.set_ticklabels([])
59 | 
60 | plt.show()
61 | 


--------------------------------------------------------------------------------
/Chapter10/c10_24_ddd.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c10_24_ddd.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/24/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 | #}
11 | 
12 | 
13 | 
14 | %init
15 | %initEnv();
16 | % read spam-vocab list into a struct
17 | % words which occur at least a 100 times in the spam corpus
18 | n = 1899; % list size
19 | vocabList = cell(n, 1);
20 | fid = fopen('c:/temp/spam-vocab.txt');
21 | for i=1:n
22 |     vocabList{i} = fscanf(fid, '%s', 1);
23 | end
24 | fclose(fid);
25 | % Load the Spam Email training & test datasets based on above spam-vocab list
26 | % X/Xtest: vector of length 1899 with 1 for known spam-words, and 0 otherwise
27 | % y/ytest: spam classification of 0 or 1
28 | load('c:/temp/spamTrain.mat'); % adds X, y to environment
29 | load('c:/temp/spamTest.mat'); % adds Xtest, ytest to environment
30 | 
31 | fprintf("Computing C & sigma for RBF-Kernel in SVM - this will take some time...\n");
32 | [C, gamma] = chooseRBFParamsForSVM(X, y, Xtest, ytest);
33 | 
34 | % SVM training
35 | % -s 0 : classification
36 | % -t 2 : RBF/Gaussina kernel
37 | % -c number : C
38 | % -g number : gamma
39 | fprintf("Training SVM with RBF-Kernel for C: %g and gamma: %g\n", C, gamma);
40 | model = svmtrain(y, X, sprintf('-s 0 -t 2 -c %g -g %g', C, gamma));
41 | 
42 | %% extract features from sample emails & predict
43 | num = 3;
44 | emailFeatures = zeros(1, n);
45 | for i=1:num
46 |     fileName = sprintf('email-sample-%d.txt', i);
47 |     emailFeatures = extractEmailFeatures(fileName, vocabList);
48 |     [pred, acc, prob] = svmpredict([0], emailFeatures, model, '-q');
49 |     if(pred == 1)
50 |         fprintf("%s is spam\n", fileName);
51 |     else
52 |         fprintf("%s is NOT spam\n", fileName);
53 |     end
54 | end


--------------------------------------------------------------------------------
/Chapter09/c9_25_octave_good_graph.m:
--------------------------------------------------------------------------------
 1 | #{
 2 |   Name     : c9_25_octave_good_graph.m
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/6/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 |  input data: http://canisius.edu/~yany/data/c9_input.csv
11 | #}
12 | 
13 | 
14 | 
15 | a=csvread("c:/temp/c9_input.csv");
16 | x=a(:,2);
17 | y=a(:,3);
18 | figure % open a new figure window
19 | plot(x, y, 'o');
20 | ylabel('Annual returns for S&P500')
21 | xlabel('Annual returns for IBM')
22 | #
23 | m = length(y);
24 | x = [ones(m, 1), x]; %add a column of ones to x
25 | theta = zeros(size(x,2),1);
26 | alpha = 0.07;
27 | delta = ones(size(theta));
28 | while abs(max(delta(:))) > 0.00001
29 |   h = sum(x * theta,2);
30 |   err = h - y;
31 |   delta = x' * err / m;
32 |   theta = theta - alpha * delta;
33 | end
34 | hold on % Plot new data without clearing old plot
35 | plot(x(:,2), x*theta, '-') % remember that x is now a matrix with 2
36 |                            % columns
37 |                            % and the second column contains the
38 |                            % time info
39 | legend('Training data', 'Linear regression')
40 | J_vals = zeros(100, 100);  % initialize 
41 | theta0_vals = linspace(-3, 3, 100);
42 | theta1_vals = linspace(-1, 1, 100);
43 | for i = 1:length(theta0_vals)
44 |   for j = 1:length(theta1_vals)
45 |     t = [theta0_vals(i); theta1_vals(j)];
46 |     h = sum(x * t);
47 |     J_vals(i,j) = sum((h - y).^2) / (2*m);
48 |   end
49 | end
50 | % Plot the surface plot
51 | % Because of the way meshgrids work in the surf command, we need to 
52 | % transpose J_vals before calling surf, or else the axes will be
53 | % flipped
54 | J_vals = J_vals';
55 | figure;
56 | surf(theta0_vals, theta1_vals, J_vals)
57 | xlabel('\theta_0'); ylabel('\theta_1')


--------------------------------------------------------------------------------
/Chapter05/c5_09_annual_beta.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Name     : c5_09_annual_beta.py
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 1/25/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | 
10 | Objective: using Quandl to replace Yahoo!Finance
11 |            since Yahoo has changed its data structure
12 | 
13 |  Book title: Python for Finance (2nd edition)
14 |  Author    : Yuxing Yan
15 |  Page #    : 192 
16 |  Date      : 1/23/2018  by Yuxing Yan
17 | 
18 |  Output of this program 
19 |   year,     alpha,   beta, R_value,  P_value
20 | (u'1962', 0.00012479, 0.411, 0.815, 6.1554743380330469e-61)
21 | (u'1963', 0.00033024, 0.342, 0.616, 1.3611738963159366e-27)
22 | """
23 | import numpy as np
24 | import scipy as sp
25 | import pandas as pd
26 | import quandl as qd
27 | from scipy import stats
28 | #
29 | ticker="wmt"
30 | x=qd.get("WIKI/"+ticker)
31 | p=x[['Adj. Close']]
32 | ret=p.diff()/p.shift(1)
33 | stockRet=ret.dropna()
34 | stockRet.columns=['stockRet']
35 | #
36 | inFile="http://canisius.edu/~yany/data/^gspcDaily.csv"
37 | y=pd.read_csv(inFile,index_col=0)
38 | d=y[['Adj Close']]
39 | ret2=d.diff()/d.shift(1)
40 | mktRet=ret2.dropna()
41 | mktRet.columns=['mktRet']
42 | 
43 | final= stockRet.merge(mktRet, how='inner', left_index=True, right_index=True)
44 | years=pd.unique(final.index.strftime("%Y"))
45 | 
46 | 
47 | print("    year,     alpha,   beta,R_value, P_value")
48 | for i in sp.arange(0,5):
49 | #for i in sp.arange(1,len(years)):
50 |     #print(years[i])
51 |     d=final[final.index.strftime("%Y")==years[i]]
52 |     (beta,alpha,r_value,p_value,std_err)=stats.linregress(d.stockRet,d.mktRet)
53 |     alpha=round(alpha,8)
54 |     beta=round(beta,3)
55 |     r_value=round(r_value,3)
56 |     p_vaue=round(p_value,3)
57 |     print(years[i],alpha,beta,r_value,p_value)
58 |   


--------------------------------------------------------------------------------
/Chapter10/c10_02_using_Liblinear02.R:
--------------------------------------------------------------------------------
 1 | "
 2 |   Name     : c10_02_using_Liblinear02.R
 3 |   Book     : Hands-on Data Science with Anaconda )
 4 |   Publisher: Packt Publishing Ltd. 
 5 |   Author   : Yuxing Yan and James Yan
 6 |   Date     : 4/23/2018
 7 |   email    : yany@canisius.edu
 8 |              paulyxy@hotmail.com
 9 | "
10 | 
11 | library(LiblineaR
12 | data(iris)
13 | attach(iris)
14 | x=iris[,1:4]
15 | y=factor(iris[,5])
16 | train=sample(1:dim(iris)[1],100)
17 | xTrain=x[train,];xTest=x[-train,]
18 | yTrain=y[train]; yTest=y[-train]
19 | s=scale(xTrain,center=TRUE,scale=TRUE)
20 | #
21 | tryTypes=c(0:7)
22 | tryCosts=c(1000,1,0.001)
23 | bestCost=NA
24 | bestAcc=0
25 | bestType=NA
26 | #
27 | for(ty in tryTypes){
28 |    for(co in tryCosts){
29 |      acc=LiblineaR(data=s,target=yTrain,type=ty,cost=co,bias=1,cross=5,verbose=FALSE)
30 |      cat("Results for C=",co,": ",acc," accuracy.\n",sep="")
31 |      if(acc>bestAcc){
32 |          bestCost=co
33 |          bestAcc=acc
34 |          bestType=ty
35 |      }
36 |    }
37 | }
38 | cat("Best model type is:",bestType,"\n")
39 | cat("Best cost is:",bestCost,"\n")
40 | cat("Best accuracy is:",bestAcc,"\n")
41 | # Re-train best model with best cost value.
42 | m=LiblineaR(data=s,target=yTrain,type=bestType,cost=bestCost,bias=1,verbose=FALSE)
43 | # Scale the test data
44 | s2=scale(xTest,attr(s,"scaled:center"),attr(s,"scaled:scale"))
45 | pr=FALSE; # Make prediction
46 | if(bestType==0 || bestType==7) pr=TRUE
47 | p=predict(m,s2,proba=pr,decisionValues=TRUE)
48 | res=table(p$predictions,yTest) # Display confusion matrix
49 | print(res)
50 | # Compute Balanced Classification Rate
51 | BCR=mean(c(res[1,1]/sum(res[,1]),res[2,2]/sum(res[,2]),res[3,3]/sum(res[,3])))
52 | #output
53 | print(BCR)
54 | 
55 | 
56 | cat("Best model type is:",bestType,"\n")
57 | cat("Best cost is:",bestCost,"\n")
58 | cat("Best accuracy is:",bestAcc,"\n")
59 | print(res)
60 | print(BCR)
61 | 
62 | 


--------------------------------------------------------------------------------