├── Chapter01 ├── c1_02_julia_good.jl └── c1_01_pv_function.py ├── Errata_03_52.png ├── Chapter10 ├── c10_25_ltfat.m ├── c10_07_abalone_data_set.txt ├── c10_16_summarize_by_date.txt ├── c10_27_Granger_test01.R ├── c10_05_simdata.R ├── c10_21_ffMonthly.py ├── c10_03_help_AppliedPredictiveModeling.R ├── c10_04_data_AppliedPredictiveModeling.R ├── c10_22_businessCycle.R ├── c10_28_Granger_test02.R ├── c10_13_timeSeries.R ├── c10_08_get_UCIdatasets.R ├── c10_06_fisher_Z_score.R ├── c10_31_timeUsed.jl ├── c10_09_usGDP.R ├── c10_30_QuantEcon_simulated.jl ├── c10_14_movingAverage.R ├── c10_11_seasonality_usGDPquarterly.R ├── c10_10_usGDP_graph.R ├── c10_19_catwalk_not_complete.py ├── c10_18_annual_ret_sp500.txt ├── c10_30_ltfat_example.m ├── c10_20_grangerTest_IBM_sp500.R ├── c10_17_sp500_annual_return_nextYear.R ├── c10_12_datarobot_not_working.R ├── c10_26_pca.m ├── c10_24_ddd.m └── c10_02_using_Liblinear02.R ├── Chapter07 ├── c7_05_tangent_line.R ├── c7_18_efficientFrontier.R ├── c7_24_lqramsey_with_beta.txt ├── c7_12_load_optim.m ├── c7_09_help_optimize.py ├── c7_16_optimization_JuPM_Not_working.jl ├── c7_01_quatradic_function.R ├── c7_17_optim_example.jl ├── c7_03_convex_function.R ├── c7_14_inline_fmins.m ├── c7_08_optimize_help.py ├── c7_15_fminsearch.m ├── c7_13_fminsearch.m ├── c7_02.R ├── c7_06_.R ├── c7_10_3D_graph.R ├── c7_19_optimization.m ├── c7_22_optim.jl ├── c7_04_convex_function2.R ├── c7_07_optimization_01.py ├── c7_20_JuMP01.jl ├── c7_11_ff5industries.R └── c7_21_JuMp02.jl ├── Chapter12 ├── c12_18_taskview.txt ├── c12_17_taskView.R ├── c12_04_parallel04.R ├── c12_01_lapply.R ├── c12_05_snow_01.R ├── c12_10_plyr_arrange.R ├── c12_13_parallel.R ├── c12_07_snow_parallel_Rmpi_UNIX.R ├── c12_02_parallel_01.R ├── c12_12_pi_01.py ├── c12_06_plyr_example.R └── c12_03_makeCluster.R ├── Software and Hardware list.pdf ├── Chapter09 ├── c9_28_install_Conda.jl ├── c9_37_list_taskView.txt ├── c9_35_print_algorithms.R ├── c9_31_intall_optiminterp.m ├── c9_07_Iris.R ├── c9_36_taskView_machineLearning.R ├── c9_22_reinforcementLearning_state_same_as_nextState.R ├── c9_23_example.m ├── c9_09_Bayes_titanic.R ├── c9_06_print_iris.py ├── c9_01_titanic.R ├── c9_20_unique_value_iris.py ├── c9_21_reinforcementLearning.R ├── c9_34_Kmean_randomNumbers.jl ├── c9_12_load_iris.py ├── c9_19_logicReg.R ├── c9_08_naiveBayes.R ├── c9_24_reinforcementLearning_example.R ├── c9_32_iris.jl ├── c9_38_iris_prediction.py ├── c9_45_same_as_c9_14_good.py ├── c9_44_same_as_c9_14_good.py ├── c9_10_RTextTools.R ├── c9_11_RTextTool_2.R ├── c9_17_others_1.R ├── c9_05_NYTime_01.R ├── c9_18_others_2.R ├── c9_42_ff3factorDaily.py ├── c9_16_generate_titanicRData.R ├── c9_33_bird_Kmeans.m ├── c9_14_iris_predicted_vs_trueOne.py ├── c9_04_simplist_One_tree_tinatic.R ├── c9_26_test.m ├── c9_27_bird.m ├── c9_15_FamaFrench3factorModel.py ├── c9_03_simplefied_tree_tinatic.R ├── c9_30_great_test.m ├── c9_29_processing_email.m ├── c9_13_short_version.py └── c9_25_octave_good_graph.m ├── Chapter06 ├── c6_16_conda_commands.txt ├── c6_13_Pkg_add.jl ├── c6_23_sys_path.py ├── c6_09_update_package.R ├── c6_18_source.R ├── c6_24_environmentVars.R ├── c6_25_environmentVars.py ├── c6_26_get_environmentVars.m ├── c6_03_read_csv.R ├── c6_07_path_rattle_package.R ├── c6_08_install_package.m ├── c6_02_rattle.R ├── c6_27_manual_XLConnect.R ├── c6_01_QR_code_for_CNN.R ├── c6_21_py_compile.py ├── c6_05_taskViewFinance.R ├── c6_06_taskView_update.R ├── c6_15_load_unload_package.m ├── c6_12_import_matplotlib.py ├── c6_14_remove_update_packages.jl ├── c6_22_import_myPackage.py ├── c6_11_taskView_machineLearning.R ├── c6_17_financialCalculator.R ├── c6_10_table6_1.R └── c6_19.jl ├── Chapter08 ├── c8_42_number_of_packages_task_view.txt ├── c8_43_webs.txt ├── c8_06_launch_rattle.R ├── c8_27_package_milk.py ├── c8_15_sklearn.py ├── c8_04_dendogram_animals.R ├── c8_07_dir_scipy_cluster.py ├── c8_14_install_taskViewCluster.R ├── c8_17_example_cluster.py ├── c8_16_functions_sklearn_cluster.py ├── c8_01_dist.R ├── c8_19_5points.R ├── c8_22_load_iris_data.py ├── c8_12_considerDirection.R ├── c8_11_randomForest_plot.R ├── c8_20_5pointsCluster.R ├── c8_03_cluster_animals.R ├── c8_24_01.jl ├── c8_28_iris_kMean_sklearn.py ├── c8_08_python_hierarchical.py ├── c8_09_randomUniformForest_not_working.R ├── c8_25_clustering.jl ├── c8_13_mixMod_bar.R ├── c8_23_randomNumbersFrom2normal.R ├── c8_31_generate_dendrogram_using20obsWine.R ├── c8_10_wine_quality.R ├── c8_05_kmeans01.R ├── c8_02_cluster.R ├── c8_29_PCA.py └── c8_41_plot_pca_iris.py ├── Chapter04 ├── c4_02_sineFunction.R ├── c4_01_line.R ├── c4_20_save_pdf.R ├── c4_08_straghtLine.R ├── c4_05_simpleDraw.py ├── c4_18.jl ├── c4_28_chi2distribution.R ├── c4_09_python_fv.py ├── c4_32_coin_grey.R ├── c4_23_bisection_method.R ├── c4_03_pie.R ├── c4_29_annimation_flip_coin.R ├── c4_04_Pyplot_julia.jl ├── c4_21_plot_Julia.jl ├── c4_13_add_trendLine.R ├── c4_07_shaded_area_standard_normal_dist.R ├── c4_30_annimation3flip_coin.R ├── c4_16_plot_julia.jl ├── c4_31_pie_grey.R ├── c4_19_scatter_plot_PyPlot.jl ├── c4_27_3stock_connection.R ├── c4_33_plot_grey.jl ├── c4_06_add_labels.py ├── c4_22_brownian_motion_animation.R ├── c4_11_histogram.py ├── c4_15_add_Greek_letters.R ├── c4_26_qgraph_network.R ├── c4_10_getHistram_IBMreturn.py ├── c4_17_QuantEcon_julia.jl ├── c4_24_Brownian_motion_html.R ├── c4_25_bisectionMethod_html.R ├── c4_14_time_value_of_money.py └── c4_12_generate_Black_Scholes_formula.py ├── Chapter11 ├── c11_03_dir_fincal.py ├── c11_01_qt_consol.py └── c11_02_myfincal.py ├── Chapter05 ├── c5_10_isna.R ├── c5_13_critival_Tvalue.R ├── c5_22_ff5.R ├── c5_19_critical_Tvalue.py ├── c5_20_ff4_RData.R ├── c5_01_linear_graph.R ├── c5_21_cholesky_01.R ├── c5_24_critical_value_F_distribution.R ├── c5_12_replace_spna.py ├── c5_15_get_IBM_dailyFromQuandl.py ├── c5_02_linear_reg.R ├── c5_11_remove_spna.py ├── c5_14_OLS.jl ├── c5_08_remove_missing_data.R ├── c5_28_run_julia_program.jl ├── c5_26_f_ditribution_graph.R ├── c5_29_replace_na_with_mean.py ├── c5_23_number_outliers.R ├── c5_18_ff3_factor_ibm.R ├── c5_25_get_critical_value_F_test.py ├── c5_07_random_OLS.py ├── c5_17_ibm_beta.R ├── c5_30_run_linearRegressionOctave.m ├── c5_27_CAPM.jl ├── c5_31_CAPM.jl ├── c5_05_get_sp500Daily.py ├── c5_16_ibm_beta.py ├── c5_06_get_sp500monthly.py └── c5_09_annual_beta.py ├── Chapter02 ├── c3_17_missing_code.py ├── c3_20_sort_R.R ├── c3_07_find_definitions_of_inputs.py ├── c3_15_cbsodata_list_of_data.py ├── c3_02_pandas_read_csv.py ├── c3_03_pandas_read_csv.py ├── c3_01.R ├── c3_27_datadotworld_1.py ├── c3_14_cbsodata.py ├── c3_10_R_package_foreign.R ├── c3_23_datadotworld.py ├── c3_04_save_RDatat.R ├── c3_16_missing_code_R.R ├── c3_24_get_iris.py ├── c3_22_sort_by2columns.R ├── c3_25_sort_Python.py ├── c3_11_R_package_dslabs.R ├── c3_30_merge_left_index.py ├── c3_29_merge_different_names.py ├── c3_12_merge_01.py ├── c3_26_ff3monthly2pickle.py ├── c3_31_merge_by2variables.py ├── c3_19_missing_code.py ├── c3_09_R_package_sjlabbeld.R ├── c3_18_missing_code_apropos.R ├── c3_08_merge_datasets.R ├── c3_13_merge_02_stock.py ├── c3_05_saveRDS.R ├── c3_21_sort_order.R └── c3_28_datadotworld_2good.py ├── Chapter03 ├── c3_17_missing_code.py ├── c3_20_sort_R.R ├── c3_07_find_definitions_of_inputs.py ├── c3_15_cbsodata_list_of_data.py ├── c3_02_pandas_read_csv.py ├── c3_03_pandas_read_csv.py ├── c3_01.R ├── c3_27_datadotworld_1.py ├── c3_14_cbsodata.py ├── c3_34_read_ff3monthly_csv.py ├── c3_10_R_package_foreign.R ├── c3_23_datadotworld.py ├── c3_04_save_RDatat.R ├── c3_32_write_sas_write_spss_write_stata.R ├── c3_16_missing_code_R.R ├── c3_24_get_iris.py ├── c3_22_sort_by2columns.R ├── c3_25_sort_Python.py ├── c3_33_generate_z_csv.R ├── c3_11_R_package_dslabs.R ├── c3_30_merge_left_index.py ├── c3_29_merge_different_names.py ├── c3_19_missing_code.py ├── c3_12_merge_01.py ├── c3_26_ff3monthly2pickle.py ├── c3_31_merge_by2variables.py ├── c3_09_R_package_sjlabbeld.R ├── c3_18_missing_code_apropos.R ├── c3_08_merge_datasets.R ├── c3_13_merge_02_stock.py ├── c3_05_saveRDS.R ├── c3_21_sort_order.R └── c3_28_datadotworld_2good.py └── LICENSE /Chapter01/c1_02_julia_good.jl: -------------------------------------------------------------------------------- 1 | function sphere_vol(r) 2 | return 4/3*pi*r^3 3 | end 4 | 5 | sphere_vol(2.5) 6 | -------------------------------------------------------------------------------- /Errata_03_52.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Errata_03_52.png -------------------------------------------------------------------------------- /Chapter10/c10_25_ltfat.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter10/c10_25_ltfat.m -------------------------------------------------------------------------------- /Chapter10/c10_07_abalone_data_set.txt: -------------------------------------------------------------------------------- 1 | library(AppliedPredictiveModeling) 2 | data(abalone) 3 | dim(abalone) 4 | head(abalone) 5 | 6 | 7 | -------------------------------------------------------------------------------- /Chapter07/c7_05_tangent_line.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter07/c7_05_tangent_line.R -------------------------------------------------------------------------------- /Chapter12/c12_18_taskview.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter12/c12_18_taskview.txt -------------------------------------------------------------------------------- /Software and Hardware list.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Software and Hardware list.pdf -------------------------------------------------------------------------------- /Chapter09/c9_28_install_Conda.jl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter09/c9_28_install_Conda.jl -------------------------------------------------------------------------------- /Chapter09/c9_37_list_taskView.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter09/c9_37_list_taskView.txt -------------------------------------------------------------------------------- /Chapter06/c6_16_conda_commands.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter06/c6_16_conda_commands.txt -------------------------------------------------------------------------------- /Chapter07/c7_18_efficientFrontier.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter07/c7_18_efficientFrontier.R -------------------------------------------------------------------------------- /Chapter07/c7_24_lqramsey_with_beta.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter07/c7_24_lqramsey_with_beta.txt -------------------------------------------------------------------------------- /Chapter08/c8_42_number_of_packages_task_view.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Hands-On-Data-Science-with-Anaconda/HEAD/Chapter08/c8_42_number_of_packages_task_view.txt -------------------------------------------------------------------------------- /Chapter10/c10_16_summarize_by_date.txt: -------------------------------------------------------------------------------- 1 | 2 | library(plyr) 3 | year<-c(2000,2000,2001,2001,2004) 4 | values<-c(2, 3, 3, 5, 6) 5 | df <- data.frame(DATE=year,B =values ) 6 | dfsum <- ddply(df, c("DATE"),summarize,B=sum(B)) -------------------------------------------------------------------------------- /Chapter01/c1_01_pv_function.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Oct 29 15:42:25 2017 4 | 5 | @author: yany 6 | """ 7 | def pv_f(pv,r,n): 8 | return pv/(1+r)**n 9 | # 10 | pv=pv_f(100,0.1,2) 11 | print(pv) 12 | 13 | -------------------------------------------------------------------------------- /Chapter08/c8_43_webs.txt: -------------------------------------------------------------------------------- 1 | https://github.com/scipy/scipy/blob/master/scipy/cluster/vq.py 2 | 3 | http://scikit-learn.org/stable/auto_examples/cluster/plot_digits_linkage.html#sphx-glr-auto-examples-cluster-plot-digits-linkage-py 4 | 5 | http://scikit-learn.org/stable/auto_examples/decomposition/plot_pca_iris.html -------------------------------------------------------------------------------- /Chapter06/c6_13_Pkg_add.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c6_13_Pkg_add.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | Pkg.add("AbstractTable") -------------------------------------------------------------------------------- /Chapter06/c6_23_sys_path.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c6_23_sys_path.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import sys 12 | sys.path -------------------------------------------------------------------------------- /Chapter04/c4_02_sineFunction.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_02_sineFunction.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | plot(sin,-pi,pi) -------------------------------------------------------------------------------- /Chapter06/c6_09_update_package.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c6_09_update_package.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | update.packages("rattle") -------------------------------------------------------------------------------- /Chapter06/c6_18_source.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c6_18_source.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | source("c:/temp/fincalCalculator.R") -------------------------------------------------------------------------------- /Chapter04/c4_01_line.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_01_line.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | x<-1:10 13 | y<-2+2*x 14 | plot(x,y) 15 | -------------------------------------------------------------------------------- /Chapter06/c6_24_environmentVars.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c6_24_environmentVars.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | 13 | Sys.getenv() 14 | -------------------------------------------------------------------------------- /Chapter06/c6_25_environmentVars.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c6_25_environmentVars.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import sys 12 | sys.path 13 | -------------------------------------------------------------------------------- /Chapter06/c6_26_get_environmentVars.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c6_26_get_environmentVars.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | #} 10 | 11 | 12 | 13 | getenv('path') -------------------------------------------------------------------------------- /Chapter08/c8_06_launch_rattle.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_06_launch_rattle.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(rattle) 12 | rattle() 13 | -------------------------------------------------------------------------------- /Chapter07/c7_12_load_optim.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c7_12_load_optim.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | #} 10 | 11 | pkg load optim 12 | pkg describe -verbose optim -------------------------------------------------------------------------------- /Chapter06/c6_03_read_csv.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c6_03_read_csv.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | x<-read.csv("http://canisius.edu/~yany/data/ibmDaily.csv") -------------------------------------------------------------------------------- /Chapter06/c6_07_path_rattle_package.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c6_07_path_rattle.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(rattle) 12 | path.package('rattle') 13 | -------------------------------------------------------------------------------- /Chapter06/c6_08_install_package.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c6_08_install_package.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | #} 10 | 11 | 12 | 13 | pkg install statistics-1.3.0.tar.gz -------------------------------------------------------------------------------- /Chapter08/c8_27_package_milk.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c8_27_package_milk.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import milk 13 | x=dir(milk) 14 | print(x) -------------------------------------------------------------------------------- /Chapter09/c9_35_print_algorithms.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c9_35_print_algorithms.R 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/6/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | library(RTextTools) 13 | print_algorithms() -------------------------------------------------------------------------------- /Chapter11/c11_03_dir_fincal.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c11_02_myfincal.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/8/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import fincal 12 | x=dir(fincal) 13 | print(x) 14 | -------------------------------------------------------------------------------- /Chapter08/c8_15_sklearn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c8_15_sklearn.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | 13 | import sklearn as sk 14 | x=dir(sk) 15 | print(x) -------------------------------------------------------------------------------- /Chapter04/c4_20_save_pdf.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_20_save_pdf.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | pdf("c:/temp/myGraph.pdf") 13 | plot(cos,-2*pi,2*pi) 14 | dev.off() -------------------------------------------------------------------------------- /Chapter06/c6_02_rattle.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c6_02_rattle.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | install.packages("rattle") 12 | library(rattle) 13 | rattle() 14 | 15 | 16 | -------------------------------------------------------------------------------- /Chapter06/c6_27_manual_XLConnect.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c6_27_manual_XLConnect.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | 13 | library(XLConnect) 14 | vignette("XLConnect") 15 | -------------------------------------------------------------------------------- /Chapter09/c9_31_intall_optiminterp.m: -------------------------------------------------------------------------------- 1 | 2 | #{ 3 | Name : c9_31_install_optiminterp.m 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/6/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | #} 11 | 12 | 13 | 14 | pkg install optiminterp-0.3.4.tar.gz -------------------------------------------------------------------------------- /Chapter04/c4_08_straghtLine.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_08_straightLine.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | x<-seq(-3,3,by=0.05) 12 | y<-2+2.5*x 13 | plot(x,y,type="b") 14 | 15 | -------------------------------------------------------------------------------- /Chapter05/c5_10_isna.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c5_10_isna.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | x<-c(NA,2,3,4,NA) 13 | y<-na.omit(x) 14 | m<-mean(y) 15 | 16 | x[is.na(x)]<-m 17 | -------------------------------------------------------------------------------- /Chapter06/c6_01_QR_code_for_CNN.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c6_01_QR_code_for_CNN.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(qrcode) 12 | qrcode_gen("http://cnn.com") 13 | 14 | 15 | -------------------------------------------------------------------------------- /Chapter02/c3_17_missing_code.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_17_missing_code.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | x={1,2,3,sp.nan,6,sp.nan} 12 | print(x) 13 | x.remove(sp.nan) 14 | print(x) -------------------------------------------------------------------------------- /Chapter02/c3_20_sort_R.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_20_sort.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | set.seed(123) 12 | x<-rnorm(100) 13 | head(x) 14 | y<-sort(x) 15 | head(y) 16 | 17 | 18 | -------------------------------------------------------------------------------- /Chapter03/c3_17_missing_code.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_17_missing_code.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | x={1,2,3,sp.nan,6,sp.nan} 12 | print(x) 13 | x.remove(sp.nan) 14 | print(x) -------------------------------------------------------------------------------- /Chapter03/c3_20_sort_R.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_20_sort.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | set.seed(123) 12 | x<-rnorm(100) 13 | head(x) 14 | y<-sort(x) 15 | head(y) 16 | 17 | 18 | -------------------------------------------------------------------------------- /Chapter06/c6_21_py_compile.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c6_21_py_compile.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import py_compile 13 | py_compile.compile('c:/temp/myPackage.py') 14 | -------------------------------------------------------------------------------- /Chapter04/c4_05_simpleDraw.py: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_05_simpleDraw.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | import matplotlib.pyplot as plt 12 | plt.plot([2,3,8,12]) 13 | plt.show() 14 | 15 | -------------------------------------------------------------------------------- /Chapter06/c6_05_taskViewFinance.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c6_05_taskViewFinance.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | install.packages("ctv") 12 | library("ctv") 13 | install.views("Finance") 14 | -------------------------------------------------------------------------------- /Chapter06/c6_06_taskView_update.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c6_06_taskView_Update.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | install.packages("ctv") 12 | library("ctv") 13 | update.views("Finance") 14 | -------------------------------------------------------------------------------- /Chapter06/c6_15_load_unload_package.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c6_15_load_unload_package.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | #} 10 | 11 | 12 | 13 | pkg load statistics 14 | 15 | pkg unload statistics 16 | -------------------------------------------------------------------------------- /Chapter02/c3_07_find_definitions_of_inputs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_07_find_definitions_of_inputs.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import pandas as pd 13 | pd.read_csv() 14 | 15 | -------------------------------------------------------------------------------- /Chapter02/c3_15_cbsodata_list_of_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_15_cbsodata_list_of_data.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import cbsodata as cb 12 | list=cb.get_table_list() 13 | print(list) -------------------------------------------------------------------------------- /Chapter03/c3_07_find_definitions_of_inputs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_07_find_definitions_of_inputs.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import pandas as pd 13 | pd.read_csv() 14 | 15 | -------------------------------------------------------------------------------- /Chapter03/c3_15_cbsodata_list_of_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_15_cbsodata_list_of_data.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import cbsodata as cb 12 | list=cb.get_table_list() 13 | print(list) -------------------------------------------------------------------------------- /Chapter05/c5_13_critival_Tvalue.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c5_13_critical_Tvalue.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | alpha<-0.01 12 | degreeFreedom<-50 13 | qt(1-alpha/2,degreeFreedom) 14 | 15 | 16 | -------------------------------------------------------------------------------- /Chapter06/c6_12_import_matplotlib.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c6_12_import_matplotlib.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import matplotlib as mat 12 | x=dir(mat) 13 | print(x) 14 | 15 | 16 | -------------------------------------------------------------------------------- /Chapter06/c6_14_remove_update_packages.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c6_14_remove_uudate_package.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | 12 | Pkg.rm("AbstractTable") 13 | 14 | Pkg.update() 15 | 16 | -------------------------------------------------------------------------------- /Chapter12/c12_17_taskView.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c12_17_taskView.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/14/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | install.packages("ctv") 12 | library("ctv") 13 | install.views("HighPerformanceComputing") 14 | -------------------------------------------------------------------------------- /Chapter05/c5_22_ff5.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c5_22_ff5.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | con<-url("http://canisius.edu/~yany/RData/ff5monthly.RData") 13 | load(con) 14 | head(.ff5monthly) 15 | -------------------------------------------------------------------------------- /Chapter04/c4_18.jl: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_18.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | Pkg.add("Gadfly") 12 | using Gadfly 13 | draw(SVG("output.svg", 6inch, 3inch), plot([sin, cos], 0, 25)) 14 | 15 | 16 | -------------------------------------------------------------------------------- /Chapter05/c5_19_critical_Tvalue.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c5_19_critical_Tvalue.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | from scipy import stats 12 | alpha=0.05 13 | print(stats.t.ppf(1-alpha/2, 100)) 14 | -------------------------------------------------------------------------------- /Chapter05/c5_20_ff4_RData.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c5_20_ff4_RData.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | con<-url("http://canisius.edu/~yany/RData/ffc4monthly.RData") 12 | load(con) 13 | head(.ffc4monthly) 14 | -------------------------------------------------------------------------------- /Chapter08/c8_04_dendogram_animals.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_04_dendogram_animals.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | require(cluster) 11 | data(animals) 12 | x<-agnes(animals) 13 | pltree(x) 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /Chapter08/c8_07_dir_scipy_cluster.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c8_07_dir_scipy_cluster.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import scipy.cluster as cluster 13 | x=dir(cluster) 14 | print(x) 15 | 16 | -------------------------------------------------------------------------------- /Chapter08/c8_14_install_taskViewCluster.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_14_install_taskViewCluster.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | install.packages("ctv") 12 | library("ctv") 13 | install.views("Cluster") 14 | -------------------------------------------------------------------------------- /Chapter08/c8_17_example_cluster.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c8_17_example_cluster.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | from sklearn.cluster.AgglomerativeCluster as cluster2 13 | help(cluster2) 14 | -------------------------------------------------------------------------------- /Chapter10/c10_27_Granger_test01.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_27_Grander_test01.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(lmtest) 12 | data(ChickEgg) 13 | dim(ChickEgg) 14 | ChickEgg[1:5,] 15 | 16 | 17 | -------------------------------------------------------------------------------- /Chapter05/c5_01_linear_graph.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c5_01_linear_graph.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | x<--10:10 12 | y<-2+1.5*x 13 | title<-"A straight line" 14 | plot(x,y,type='l',main=title) 15 | 16 | -------------------------------------------------------------------------------- /Chapter06/c6_22_import_myPackage.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c6_22_import_myPackage.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | sys.path.append("c:/temp/") 12 | import myPackage as my 13 | x=dir(my) 14 | print(x) 15 | -------------------------------------------------------------------------------- /Chapter07/c7_09_help_optimize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c7_09_help_optimize.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | #import numpy as np 13 | 14 | from scipy.optimize import minimize 15 | help(minimize) 16 | -------------------------------------------------------------------------------- /Chapter08/c8_16_functions_sklearn_cluster.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c8_16_functions_sklearn_cluster.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | 13 | from sklearn import cluster 14 | x=dir(cluster) 15 | print(x) -------------------------------------------------------------------------------- /Chapter02/c3_02_pandas_read_csv.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_02_pandas_read_csv.py 3 | Book : Hans-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import pandas as pd 12 | data=pd.read_csv("c://temp/bezdekIris.data.txt",header=None) 13 | 14 | -------------------------------------------------------------------------------- /Chapter03/c3_02_pandas_read_csv.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_02_pandas_read_csv.py 3 | Book : Hans-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import pandas as pd 12 | data=pd.read_csv("c://temp/bezdekIris.data.txt",header=None) 13 | 14 | -------------------------------------------------------------------------------- /Chapter05/c5_21_cholesky_01.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c5_21_cholesky_02.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | set.seed(123) 12 | n=1000 13 | x<-matrix(rnorm(n),200,5) 14 | y<-cor(x) 15 | cholesky<-chol(y) 16 | 17 | 18 | -------------------------------------------------------------------------------- /Chapter05/c5_24_critical_value_F_distribution.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c5_24_critical_value_F_distribution.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | alpha<-0.1 12 | d1<-1 13 | d2<-1 14 | qf(1-alpha,df1=d1,df2=d2) 15 | -------------------------------------------------------------------------------- /Chapter07/c7_16_optimization_JuPM_Not_working.jl: -------------------------------------------------------------------------------- 1 | using JuMP 2 | using Clp 3 | 4 | m = Model(solver = ClpSolver()) 5 | @variable(m, 0 <= x <= 2 ) 6 | @variable(m, 0 <= y <= 30 ) 7 | 8 | @objective(m, Max, 5x + 3*y ) 9 | @constraint(m, 1x + 5y <= 3.0 ) 10 | 11 | print(m) 12 | 13 | status = solve(m) 14 | 15 | println("Objective value: ", getobjectivevalue(m)) 16 | println("x = ", getvalue(x)) 17 | println("y = ", getvalue(y)) -------------------------------------------------------------------------------- /Chapter10/c10_05_simdata.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_05_simData.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/23/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(eclust) 12 | data("simdata") 13 | dim(simdata) 14 | simdata[1:5, 1:6] 15 | table(simdata[,"E"]) 16 | 17 | -------------------------------------------------------------------------------- /Chapter10/c10_21_ffMonthly.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c10_21_ffMonthly.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import pandas as pd 13 | ff=pd.read_pickle("c:/temp/ffMonthly.pkl") 14 | print(ff.head()) 15 | 16 | -------------------------------------------------------------------------------- /Chapter08/c8_01_dist.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_01_dist.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | a<-c(2,5) 12 | b<-c(3,7) 13 | z<-rbind(a,b) 14 | dist(z, method = "euclidean") 15 | plot(z,lwd=20) 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Chapter08/c8_19_5points.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_19_5points.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | data <- rbind(c(180,20), c(160,5), c(60, 150), c(160,60), c(80,120)) 12 | plot(data, col = "red", lwd = 20) 13 | 14 | 15 | -------------------------------------------------------------------------------- /Chapter09/c9_07_Iris.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c9_07_Iris.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(ggvis) 12 | x<-ggvis 13 | y<-layer_points 14 | iris %>% x(~Petal.Length,~Petal.Width,fill=~Species) %>% y() 15 | 16 | 17 | -------------------------------------------------------------------------------- /Chapter09/c9_36_taskView_machineLearning.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c9_36_taskView_machineLearning.R 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/6/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | install.packages("ctv") 13 | library("ctv") 14 | install.views("MachineLearning") 15 | -------------------------------------------------------------------------------- /Chapter07/c7_01_quatradic_function.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c7_01+qiatradoc+fimctopm.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | x<-seq(-10,10,0.1) 12 | a<--2 13 | b<-10 14 | c<-5 15 | y<-a*x^2+b*x+c 16 | plot(x,y,type='l') 17 | -------------------------------------------------------------------------------- /Chapter08/c8_22_load_iris_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c8_32_load_iris_data.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | 13 | from sklearn import datasets 14 | import numpy as np 15 | x= datasets.load_iris() 16 | 17 | 18 | -------------------------------------------------------------------------------- /Chapter05/c5_12_replace_spna.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c5_12_replace_spna.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import scipy as sp 12 | x=[2,3,sp.nan,sp.nan,4] 13 | y=x 14 | x.remove(sp.nan) 15 | m=sp.mean(x) 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /Chapter05/c5_15_get_IBM_dailyFromQuandl.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c5_15_getIBM_dailyFromQuandl.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import quandl as qd 12 | x=qd.get("WIKI/ibm") 13 | print(x.head(2)) 14 | print(x.tail(2)) 15 | -------------------------------------------------------------------------------- /Chapter05/c5_02_linear_reg.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c5_02_linear_reg.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | set.seed(12345) 12 | x<-1:100 13 | a<-4 14 | beta<-5 15 | errorTerm<-rnorm(100) 16 | y<-a+beta*x+errorTerm 17 | lm(y~x) 18 | 19 | -------------------------------------------------------------------------------- /Chapter05/c5_11_remove_spna.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c5_11_remove_spna.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import scipy as sp 13 | x={2,4,3,sp.nan,6,sp.nan,7} 14 | print(x) 15 | x.remove(sp.nan) 16 | print(x) 17 | 18 | 19 | -------------------------------------------------------------------------------- /Chapter07/c7_17_optim_example.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c7_17_optim_example.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | 12 | 13 | 14 | using Optim 15 | f(x) = (1.0 - x[1])^2 + 200.0 * (x[2] - x[1]^2)^2 16 | optimize(f, [0.0, 0.0]) 17 | -------------------------------------------------------------------------------- /Chapter05/c5_14_OLS.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c5_14_OLS.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | use GLM, DataFrames 12 | data = DataFrame(X=[1,2,3], Y=[2,4,7]) 13 | OLS = glm(@formula(Y ~ X), data, Normal(), IdentityLink()) 14 | 15 | 16 | -------------------------------------------------------------------------------- /Chapter02/c3_03_pandas_read_csv.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_03_pandas_read_csv.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | inFile="http://canisius.edu/~yany/data/bezdekIris.data.txt" 12 | import pandas as pd 13 | d=pd.read_csv(inFile,header=None) 14 | -------------------------------------------------------------------------------- /Chapter03/c3_03_pandas_read_csv.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_03_pandas_read_csv.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | inFile="http://canisius.edu/~yany/data/bezdekIris.data.txt" 12 | import pandas as pd 13 | d=pd.read_csv(inFile,header=None) 14 | -------------------------------------------------------------------------------- /Chapter06/c6_11_taskView_machineLearning.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c6_11_taskView_machineLearning.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | 13 | install.packages("ctv") 14 | library("ctv") 15 | install.views("MachineLearning") 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /Chapter10/c10_03_help_AppliedPredictiveModeling.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_03_help_AppliedPrecictiveModeling.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/23/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(AppliedPredictiveModeling) 12 | 13 | data(solubility) 14 | ls(pattern="sol") 15 | 16 | 17 | -------------------------------------------------------------------------------- /Chapter04/c4_28_chi2distribution.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_28_chi2distribution.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(animation) 12 | n<-5 13 | set.seed(123) 14 | f<-function(n) rchisq(n,n) 15 | clt.ani(FUN = f,mean=n,sd = sqrt(2*n)) 16 | 17 | 18 | -------------------------------------------------------------------------------- /Chapter07/c7_03_convex_function.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c7_03_convex_function.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | x<-seq(-10,10,0.1) 12 | a<-4 13 | b<- -2 14 | c<-10 15 | y<-a*x^2+b*x+c 16 | name<-"A convex function" 17 | plot(x,y,type='l',main=name) 18 | -------------------------------------------------------------------------------- /Chapter07/c7_14_inline_fmins.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c7_14_inline_fmins.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | source: https://octave.sourceforge.io/octave/function/fminsearch.html 11 | #} 12 | 13 | 14 | 15 | fmins(inline('(x(1)-5).^2+(x(2)-8).^4'),[0;0]) 16 | -------------------------------------------------------------------------------- /Chapter10/c10_04_data_AppliedPredictiveModeling.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_04_data_AppliedPredictiveModeling.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/23/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(AppliedPredictiveModeling) 12 | data(solubility) 13 | ls(pattern="sol") 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /Chapter10/c10_22_businessCycle.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_22_businessCycle.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | path<-"http://canisius.edu/~yany/RData/" 12 | dataSet<-"businesscycle" 13 | link<-paste(path,dataSet,".RData",sep='') 14 | load(url(link)) 15 | -------------------------------------------------------------------------------- /Chapter09/c9_22_reinforcementLearning_state_same_as_nextState.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c9_22_reinforcementLearning_state_same_as_next.R 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/6/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | x<-subset(data,data$State==data$NextState) 13 | head(x) 14 | unique(x$Reward) 15 | 16 | -------------------------------------------------------------------------------- /Chapter12/c12_04_parallel04.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c12_04_parallel.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/14/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | require(parallel) 12 | nCores <- 8 # manually for non-cluster machines 13 | cl <- makeCluster(nCores) # by default this uses the PSOCK mechanism 14 | 15 | -------------------------------------------------------------------------------- /Chapter04/c4_09_python_fv.py: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_09_python_fv.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as mlt 13 | n=np.linspace(0,10,10) 14 | pv=100 15 | R=0.1 16 | fv=pv*(1+R)**n 17 | mlt.plot(n,fv) 18 | mlt.show() -------------------------------------------------------------------------------- /Chapter05/c5_08_remove_missing_data.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c5_08_remove_missing_data.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | x<-c(NA,1,2,50,NA) 13 | y<-na.omit(x) 14 | mean(x) 15 | mean(y) 16 | 17 | 18 | 19 | 20 | y2<-na.exclude(x) 21 | 22 | mean(y) 23 | 24 | -------------------------------------------------------------------------------- /Chapter05/c5_28_run_julia_program.jl: -------------------------------------------------------------------------------- 1 | 2 | ### 3 | Name : c5_28_run_program.jl 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 1/25/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | ### 11 | 12 | 13 | # assume that helloworld.jl has the following one line. 14 | # println("Hello world") 15 | 16 | 17 | include("c:/temp/helloWorld.jl") -------------------------------------------------------------------------------- /Chapter04/c4_32_coin_grey.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_32_coin_grey.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(animation) 12 | myProb<-c(0.5,0.5) 13 | ani.options(interval=0.2,nmax = ifelse(interactive(), 100, 2)) 14 | flip.coin(bg = "grey",col=c("black","grey")) 15 | 16 | -------------------------------------------------------------------------------- /Chapter05/c5_26_f_ditribution_graph.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c5_26_f_distribution_graph.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | d1<-4 12 | d2<-2 13 | n<-100 14 | x = seq(0, 5, length = n) 15 | plot(x, df(x = x, df1 = d1, df2 = d2),type='l') 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /Chapter08/c8_12_considerDirection.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_12_considerDirection.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | angles<-seq(0,380,30) 12 | y<-cos(angles) 13 | y2<-round(y,2) 14 | z<-cbind(angles,y2) 15 | colnames(z)<-c("ANGLE","cos(angle)") 16 | print(z) 17 | 18 | -------------------------------------------------------------------------------- /Chapter09/c9_23_example.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c9_23_example.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | #} 10 | 11 | 12 | x = load('c:/temp/ex2x.dat'); 13 | y = load('c:/temp/ex2y.dat'); 14 | figure 15 | plot(x, y, 'o'); 16 | ylabel('Height in meters') 17 | xlabel('Age in years') 18 | -------------------------------------------------------------------------------- /Chapter07/c7_08_optimize_help.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c7_00_optimize_help.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import scipy as sp 12 | x=dir(sp.optimize) 13 | print(x) 14 | 15 | 16 | #import numpy as np 17 | 18 | from scipy.optimize import minimize 19 | help(minimize) 20 | -------------------------------------------------------------------------------- /Chapter10/c10_28_Granger_test02.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c10_28_Granger_test02.R 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/24/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | library(lmtest) 13 | data(ChickEgg) 14 | grangertest(chicken~egg, order = 3, data = ChickEgg) 15 | 16 | 17 | grangertest(egg~chicken, order = 3, data = ChickEgg) -------------------------------------------------------------------------------- /Chapter04/c4_23_bisection_method.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_23_bisection_method.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(animation) 12 | par(mar = c(4, 4, 1, 2)) 13 | myFunction<-function(x) x^2 - 4 14 | title<-"Bisection method" 15 | bisection.method(FUN=myFunction,main =title) 16 | -------------------------------------------------------------------------------- /Chapter07/c7_15_fminsearch.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c7_15_fminsearch.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | source: https://octave.sourceforge.io/octave/function/fminsearch.html 11 | #} 12 | 13 | 14 | fun = @(x)50*(x(1)^2-x(2))^2 + (x(1)-3)^2; 15 | x0 = [0,0]; 16 | x = fminsearch(fun,x0) 17 | -------------------------------------------------------------------------------- /Chapter08/c8_11_randomForest_plot.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_11_rondomForest_plot.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(randomUniformForest) 12 | data(iris) 13 | ff<-unsupervised.randomUniformForest 14 | x =ff(iris[,-5],mtry=1,nodesize = 2) 15 | plot(x) 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Chapter09/c9_09_Bayes_titanic.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c9_09_Bayes_titanic.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(e1071) 12 | data(Titanic) 13 | m <- naiveBayes(Survived ~ ., data = Titanic) 14 | output<-predict(m, as.data.frame(Titanic)) 15 | 16 | # 17 | print(m) 18 | print(output) -------------------------------------------------------------------------------- /Chapter02/c3_01.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_01_iris_data.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | path<-"http://archive.ics.uci.edu/ml/machine-learning-databases/" 12 | dataSet<-"iris/bezdekIris.data" 13 | a<-paste(path,dataSet,sep='') 14 | x<-read.csv(a,header=F) 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Chapter02/c3_27_datadotworld_1.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_27_datadotworld_1.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import datadotworld as dw 13 | dataset = 'jonloyens/an-intro-to-dataworld-dataset' 14 | data = dw.load_dataset(dataset, force_update=True) 15 | list(dataset.dataframes) -------------------------------------------------------------------------------- /Chapter03/c3_01.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_01_iris_data.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | path<-"http://archive.ics.uci.edu/ml/machine-learning-databases/" 12 | dataSet<-"iris/bezdekIris.data" 13 | a<-paste(path,dataSet,sep='') 14 | x<-read.csv(a,header=F) 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Chapter03/c3_27_datadotworld_1.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_27_datadotworld_1.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import datadotworld as dw 13 | dataset = 'jonloyens/an-intro-to-dataworld-dataset' 14 | data = dw.load_dataset(dataset, force_update=True) 15 | list(dataset.dataframes) -------------------------------------------------------------------------------- /Chapter04/c4_03_pie.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_03_pie.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | results <- c(10,8,7,4,6) 12 | names<-c("1st","2nd","3rd","4th","5th") 13 | pct<-round(results/sum(results)*100) 14 | pct2<-rev(sort(pct)) 15 | pie(results, labels = names, main="Pie Chart of results") 16 | -------------------------------------------------------------------------------- /Chapter04/c4_29_annimation_flip_coin.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_29_annimation_flip_coin.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | library(animation) 11 | myProb<-c(0.45,0.1, 0.45) 12 | oopt = ani.options(interval=0.2,nmax = ifelse(interactive(), 100, 2)) 13 | flip.coin(bg = "yellow") 14 | 15 | 16 | 17 | # -------------------------------------------------------------------------------- /Chapter09/c9_06_print_iris.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c9_06_print_iris.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import numpy as np 13 | from sklearn import datasets 14 | iris = datasets.load_iris() 15 | print("Data\n",iris.data[0:4,]) 16 | print("target\n",iris.target[0:4]) 17 | 18 | 19 | -------------------------------------------------------------------------------- /Chapter10/c10_13_timeSeries.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_13_timeSeries.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | library(timeSeries) 13 | data(MSFT) 14 | x <- MSFT 15 | by <- timeSequence(from = start(x), to = end(x), by = "week") 16 | y<-aggregate(x,by,mean) 17 | head(x) 18 | head(y) 19 | 20 | 21 | -------------------------------------------------------------------------------- /Chapter12/c12_01_lapply.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c12_02_lappy.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/14/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | lapply(1:3,function(x) c(sin(x),x^2)) 13 | 14 | 15 | myFunctions<-c(sin(x),x^2+2,4*x^2-x^3-2) 16 | inputValue<-1:10 17 | output<-lapply(inputValue,function(x) myFunctions) 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /Chapter02/c3_14_cbsodata.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_14_cbsodata.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import pandas as pd 12 | import cbsodata as cb 13 | name='82070ENG' 14 | data = pd.DataFrame(cb.get_data(name)) 15 | print(data.head()) 16 | info=cb.get_info(name) 17 | print(info['Title']) 18 | 19 | -------------------------------------------------------------------------------- /Chapter03/c3_14_cbsodata.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_14_cbsodata.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import pandas as pd 12 | import cbsodata as cb 13 | name='82070ENG' 14 | data = pd.DataFrame(cb.get_data(name)) 15 | print(data.head()) 16 | info=cb.get_info(name) 17 | print(info['Title']) 18 | 19 | -------------------------------------------------------------------------------- /Chapter03/c3_34_read_ff3monthly_csv.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_34_read_ffmonthly_csv.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/16/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | inFile<-"http://canisius.edu/~yany/data/ff3monthly.csv" 12 | ff3monthly<-read.csv(inFile,skip=3) 13 | saveRDS(ff3monthly,file="c:/temp/ff3monthly.rds") 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /Chapter09/c9_01_titanic.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c9_01_titanic.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | path<-"http://canisius.edu/~yany/RData/" 12 | dataSet<-"titanic" 13 | link<-paste(path,dataSet,".RData",sep='') 14 | con<-url(link) 15 | load(con) 16 | dim(.titanic) 17 | head(.titanic) 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /Chapter02/c3_10_R_package_foreign.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_10_R_package_foreigh.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | # http://calcnet.mth.cmich.edu/org/spss/Prj_airlinePassengers.htm 12 | 13 | library(foreign) 14 | x<-read.spss("c:/temp/airline_passengers.sav", to.data.frame=TRUE) 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /Chapter03/c3_10_R_package_foreign.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_10_R_package_foreigh.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | # http://calcnet.mth.cmich.edu/org/spss/Prj_airlinePassengers.htm 12 | 13 | library(foreign) 14 | x<-read.spss("c:/temp/airline_passengers.sav", to.data.frame=TRUE) 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /Chapter05/c5_29_replace_na_with_mean.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c5_29_repace_na_with_mean.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import scipy as sp 13 | import pandas as pd 14 | df = pd.DataFrame({'A' : [2,sp.nan,3,4]}) 15 | print(df) 16 | df.fillna(df.mean(), inplace=True) 17 | print(df) 18 | 19 | 20 | -------------------------------------------------------------------------------- /Chapter07/c7_13_fminsearch.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c7_13_fminsearch.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | source: https://octave.sourceforge.io/octave/function/fminsearch.html 11 | #} 12 | 13 | 14 | fcn = @(x) (x(1)-5).^2 + (x(2)-8).^4 15 | 16 | x0 = [0;0]; 17 | 18 | [xmin, fval] = fminsearch (fcn, x0) 19 | 20 | -------------------------------------------------------------------------------- /Chapter10/c10_08_get_UCIdatasets.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_08_get_UCIdatasets.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | dataSet<-"UCIdatasets" 12 | path<-"http://canisius.edu/~yany/RData/" 13 | con<-paste(path,dataSet,".RData",sep='') 14 | load(url(con)) 15 | dim(.UCIdatasets) 16 | head(.UCIdatasets) 17 | 18 | 19 | -------------------------------------------------------------------------------- /Chapter04/c4_04_Pyplot_julia.jl: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c4_04_PyPlot_julia.jl 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 1/25/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | # Pkg.add("Plots") 13 | 14 | using PyPlot 15 | x = linspace(0,2*pi,1000) 16 | y = sin(2*x + 3*cos.(1.5*x)); 17 | plot(x, y, color="green", linewidth=2.0, linestyle="--") 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /Chapter08/c8_20_5pointsCluster.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_20_5pointsCluter.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | library(cluster) 13 | data <- rbind(c(180,20), c(160,5), c(60, 150), c(160,60), c(80,120)) 14 | output<-clara(data,2) 15 | output$clustering 16 | #output$clusinfo 17 | output$medoids 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /Chapter12/c12_05_snow_01.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c12_05_snow_01.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/14/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(snow) 12 | cl <- makeSOCKcluster(c("localhost","localhost")) 13 | clusterApply(cl, 1:2, get("+"), 3) 14 | clusterEvalQ(cl, library(boot)) 15 | x<-1 16 | clusterExport(cl, "x") 17 | clusterCall(cl, function(y) x + y, 2) -------------------------------------------------------------------------------- /Chapter02/c3_23_datadotworld.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_23_datadotworld.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import datadotworld as dw 13 | dataset = dw.load_dataset('c:/temp/an-intro-to-dataworld-dataset') 14 | list(dataset.dataframes) 15 | ['changelog', 'datadotworldbballstats', 'datadotworldbballteam'] 16 | 17 | -------------------------------------------------------------------------------- /Chapter03/c3_23_datadotworld.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_23_datadotworld.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import datadotworld as dw 13 | dataset = dw.load_dataset('c:/temp/an-intro-to-dataworld-dataset') 14 | list(dataset.dataframes) 15 | ['changelog', 'datadotworldbballstats', 'datadotworldbballteam'] 16 | 17 | -------------------------------------------------------------------------------- /Chapter02/c3_04_save_RDatat.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_04_save_RData.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | path<-"http://archive.ics.uci.edu/ml/machine-learning-databases/" 12 | dataSet<-"iris/bezdekIris.data" 13 | a<-paste(path,dataSet,sep='') 14 | iris<-read.csv(a,header=F) 15 | save(iris,file="c:/temp/iris.RData") 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Chapter03/c3_04_save_RDatat.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_04_save_RData.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | path<-"http://archive.ics.uci.edu/ml/machine-learning-databases/" 12 | dataSet<-"iris/bezdekIris.data" 13 | a<-paste(path,dataSet,sep='') 14 | iris<-read.csv(a,header=F) 15 | save(iris,file="c:/temp/iris.RData") 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Chapter04/c4_21_plot_Julia.jl: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c4_21_plot_julia.jl 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 1/25/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | 13 | using PyPlot 14 | x = linspace(0, 10, 200) 15 | y = sin.(x) 16 | name=L"$y = \sin(x)$" 17 | fig, ax = subplots() 18 | ax[:plot](x, y, "r-", linewidth=2, label=name,alpha=0.6) 19 | ax[:legend](loc="upper center") 20 | 21 | 22 | -------------------------------------------------------------------------------- /Chapter04/c4_13_add_trendLine.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_13_add_trendLine.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | x=seq(-4,4,by=0.05) 12 | n<-length(x) 13 | y=2+3*x + rnorm(n) 14 | k<-coef(lm(y~x)) 15 | intercept<-k[1] 16 | slope<-k[2] 17 | y2<-intercept+slope*x 18 | # 19 | plot(x,y,type="p",col="red") 20 | lines(x,y2,col="green") 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /Chapter10/c10_06_fisher_Z_score.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_05_fisher_z_score.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(eclust) 12 | data("simdata") 13 | X = simdata[,c(-1,-2)] 14 | firstCorr<-cor(X[1:50,]) 15 | secondCorr<-cor(X[51:100,]) 16 | score<-u_fisherZ(n0=100,cor0=firstCorr,n1=100,cor1=secondCorr) 17 | dim(score) 18 | score[1:5,1:5] 19 | -------------------------------------------------------------------------------- /Chapter11/c11_01_qt_consol.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c11_01_qt_consol.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/8/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import numpy as np 12 | from scipy.special import jn 13 | import matplotlib.pyplot as plt 14 | from matplotlib.pyplot import plot 15 | # 16 | x=np.linspace(0,3*np.pi) 17 | for i in range(6): 18 | plot(x,jn(i,x)) 19 | # 20 | plt.show() 21 | -------------------------------------------------------------------------------- /Chapter08/c8_03_cluster_animals.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_03_cluster_animals.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | library(cluster) 13 | data(animals) 14 | dim(animals) 15 | head(animals) 16 | colnames(animals) 17 | apply(animals,2, table) # simple overview 18 | 19 | 20 | 21 | 22 | 23 | ma <- mona(animals) 24 | ma 25 | plot(ma) 26 | 27 | 28 | -------------------------------------------------------------------------------- /Chapter10/c10_31_timeUsed.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c10_31_timeUsed.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | 12 | using QuantEcon 13 | nodes, weights = qnwlege(65, -2pi, 2pi); 14 | integral = do_quad(x -> cos(x), nodes, weights) 15 | @time quadgk(x -> cos.(x), -2pi, 2pi) 16 | @time do_quad(x -> cos.(x), nodes, weights) 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /Chapter09/c9_20_unique_value_iris.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Name : c9_20_unique_value_iris.py 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/6/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | """ 11 | 12 | import sklearn as sk 13 | from sklearn import datasets 14 | iris = datasets.load_iris() 15 | mylist=list(iris.target) 16 | used = [] 17 | unique = [x for x in mylist if x not in used and used.append(x)] 18 | print(used) 19 | 20 | -------------------------------------------------------------------------------- /Chapter04/c4_07_shaded_area_standard_normal_dist.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_07_shaded_area_standard_normal_dist.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | x<-seq(-3,3,length=100) 12 | y<-dnorm(x,mean=0,sd=1) 13 | plot(x,y,type="b",lwd=3,col="black") 14 | x<-seq(-4,-2.33,length=100) 15 | y<-dnorm(x,mean=0,sd=1) 16 | polygon(c(-3,x,-2.33),c(0,y,0),col="red") 17 | 18 | -------------------------------------------------------------------------------- /Chapter07/c7_02.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c7_02.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | a<--2 13 | b<-10 14 | c<-5 15 | f<-function(x)-(a*x^2+b*x+c) 16 | 17 | optim(0.3,f) 18 | 19 | 20 | #optim(0,f,method="Brent",lower = -100, upper = 100) 21 | 22 | 23 | 24 | f<-function(x)3*x^2-4*x+1 25 | optim(0.3,f) 26 | >optim(0,f,method="Brent",lower = -100, upper = 100) -------------------------------------------------------------------------------- /Chapter07/c7_06_.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c7_06.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | A<-1.0 12 | mean<-0.10 13 | std<-0.5 14 | set.seed(123) 15 | nStock<-10 16 | nRet<-50 17 | # 18 | n<-nStock*nRet 19 | retMatrix<-matrix(rnorm(n,mean,std),nRet,nStock) 20 | names<-paste("RET",1:nStock,sep='') 21 | colnames(retMatrix)<-names 22 | w<-rep(1/nStock,nStock) 23 | 24 | 25 | -------------------------------------------------------------------------------- /Chapter08/c8_24_01.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c8_24_01.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | 12 | using Base.Test 13 | using ClusterAnalysis 14 | 15 | lbls = vec([1 1 1 2 2 2]) 16 | expected = vec([2 2 2 3 3 3]) 17 | 18 | @test 1.0 == ClusterAnalysis.adjusted_rand_index(lbls, expected) 19 | 20 | lbls = vec([1 1 1 2 2 2]) 21 | expected = vec([1 2 1 1 2 1]) 22 | -------------------------------------------------------------------------------- /Chapter08/c8_28_iris_kMean_sklearn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c8_28_iris_kMean_sklearn.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | from sklearn import cluster 13 | from sklearn import datasets 14 | x=datasets.load_iris() 15 | iris = datasets.load_iris() 16 | x = iris.data 17 | k_means = cluster.KMeans(n_clusters=3) 18 | k_means.fit(x) 19 | print(k_means.labels_[::10]) -------------------------------------------------------------------------------- /Chapter03/c3_32_write_sas_write_spss_write_stata.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_32_write_sas_write_spss_write_stata.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(haven) 12 | x<-1:100 13 | y<-matrix(x,50,2) 14 | z<-data.frame(y) 15 | colnames(z)<-c("a","b") 16 | write_sas(z,"c:/temp/tt.sas7bdat") 17 | write_spss(z,"c:/temp/tt.sav") 18 | write_stata(z,"c:/temp/tt.dta") 19 | 20 | -------------------------------------------------------------------------------- /Chapter07/c7_10_3D_graph.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c7_10_3D.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(scatterplot3d) 12 | x<-seq(-2,2,0.05) 13 | y<-seq(-2,2,0.05) 14 | z<-(x^2-1)^2+(x^2*y-x-1)^2 15 | name<-"3 dimensional graph" 16 | scatterplot3d(x, y, z, highlight.3d = TRUE, col.axis = "blue", 17 | col.grid = "lightblue", main =name, pch = 2) 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /Chapter08/c8_08_python_hierarchical.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c8_08_python_hierachical.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import numpy as np 13 | import scipy.cluster.hierarchy as hac 14 | import matplotlib.pyplot as plt 15 | # 16 | n=100 17 | x=np.random.normal(0,8,n) 18 | y=np.random.normal(10,8,n) 19 | a = [x,y] 20 | z = hac.linkage(a, method='single') 21 | plt. -------------------------------------------------------------------------------- /Chapter08/c8_09_randomUniformForest_not_working.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c8_00_randomUniformForest.R 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 3/25/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | library(randomUniformForest) 13 | data(iris) 14 | x<-randomUniformForest(Species ~ ., data = iris, threads = 1, ntree = 20, BreimanBounds = FALSE) 15 | # get the 10th tree 16 | OneTree <- getTree.randomUniformForest(x, 10) 17 | 18 | -------------------------------------------------------------------------------- /Chapter08/c8_25_clustering.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c8_25_cluster.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | 12 | 13 | using Clustering 14 | srand(12345) 15 | d = 10 16 | n = 500 17 | x = rand(d, n) 18 | S = -pairwise(Euclidean(), x, x) 19 | # set diagonal value to median value 20 | S = S - diagm(diag(S)) + median(S)*eye(size(S,1)) 21 | R = affinityprop(S) 22 | 23 | 24 | -------------------------------------------------------------------------------- /Chapter04/c4_30_annimation3flip_coin.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_30_annimation_flipCoin2.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | library(animation) 13 | myProb<-c(0.45,0.1, 0.45) 14 | names<- c("Head", "Stand", "Tail") 15 | oopt = ani.options(interval=0.2,nmax = ifelse(interactive(), 100, 2)) 16 | flip.coin(faces =names,type="n",prob = myProb, col = c(1, 2, 4)) 17 | 18 | 19 | -------------------------------------------------------------------------------- /Chapter08/c8_13_mixMod_bar.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_13_Rmixmod_bar.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(Rmixmod) 12 | data(birds) 13 | x <- mixmodCluster(birds,2) 14 | bb<-barplotCluster 15 | bb(x["bestResult"], birds) 16 | bb(x["bestResult"], birds, variables=c(2,3,4)) 17 | bb(x["bestResult"], birds, variables=c("eyebrow","collar")) 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /Chapter02/c3_16_missing_code_R.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_16_missing_code.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | > head(na_example,20) 12 | [1] 2 1 3 2 1 3 1 4 3 2 2 NA 2 2 1 4 NA 1 1 2 13 | > length(na_example) 14 | [1] 1000 15 | > x<-na.exclude(na_example) 16 | > length(x) 17 | [1] 855 18 | > head(x,20) 19 | [1] 2 1 3 2 1 3 1 4 3 2 2 2 2 1 4 1 1 2 1 2 20 | > 21 | -------------------------------------------------------------------------------- /Chapter03/c3_16_missing_code_R.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_16_missing_code.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | > head(na_example,20) 12 | [1] 2 1 3 2 1 3 1 4 3 2 2 NA 2 2 1 4 NA 1 1 2 13 | > length(na_example) 14 | [1] 1000 15 | > x<-na.exclude(na_example) 16 | > length(x) 17 | [1] 855 18 | > head(x,20) 19 | [1] 2 1 3 2 1 3 1 4 3 2 2 2 2 1 4 1 1 2 1 2 20 | > 21 | -------------------------------------------------------------------------------- /Chapter04/c4_16_plot_julia.jl: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_16_plot_julia.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | # Pkg.add("Plots") 12 | 13 | using Plots 14 | srand(123) 15 | plot(rand(5,5),linewidth=2,title="Simple plot") 16 | 17 | 18 | using PyPlot 19 | x = linspace(0,2*pi,1000) 20 | y = sin(2*x + 3*cos(1.5*x)); 21 | plot(x, y, color="green", linewidth=2.0, linestyle="--") 22 | 23 | -------------------------------------------------------------------------------- /Chapter04/c4_31_pie_grey.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_31_pie_grey.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | results <- c(10,8,7,4,6) 12 | names<-c("1st","2nd","3rd","4th","5th") 13 | pct<-round(results/sum(results)*100) 14 | pct2<-rev(sort(pct)) 15 | name<-"Pie Chart of results" 16 | colors<-seq(0.4, 1.0, length = length(results)) 17 | pie(results, labels = names, col = gray(colors),main=name) -------------------------------------------------------------------------------- /Chapter06/c6_17_financialCalculator.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c6_17_financialCalculator.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | 13 | pv_f<-function(fv,r,n) fv/(1+r)^n 14 | fv_f<-function(pv,r,n)pv*(1+r)^n 15 | pv_annuity<-function(c,r,n)c/r*(1-1/(1+r)^n) 16 | fv_annuity<-function(c,r,n)c/r*((1+r)^n-1) 17 | pv_perptuity<-function(c,r)c/r 18 | pv_perptuityDue<-function(c,r)c/r*(1+r) 19 | -------------------------------------------------------------------------------- /Chapter08/c8_23_randomNumbersFrom2normal.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_23_randomNumbersFrom2normal.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(cluster) 12 | set.seed(123) 13 | n1<-200; mean1<-0; std1<-8 14 | n2<-300; mean2<-80; std2<-8 15 | set1<-cbind(rnorm(n1,mean1,std1), rnorm(n1,mean1,std1)) 16 | set2<-cbind(rnorm(n2,mean2,std2), rnorm(n2,mean2,std2)) 17 | x <- rbind(set1,set2) 18 | -------------------------------------------------------------------------------- /Chapter04/c4_19_scatter_plot_PyPlot.jl: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_19_scatter_plot_PyPlot.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | using PyPlot 11 | n=50 12 | x = 100*rand(n) 13 | y = 100*rand(n) 14 | areas = 800*rand(n) 15 | fig = figure("pyplot_scatterplot",figsize=(10,10)) 16 | ax = axes() 17 | scatter(x,y,s=areas,alpha=0.5) 18 | title("Scatter Plot") 19 | xlabel("X") 20 | ylabel("Y") 21 | grid("on") -------------------------------------------------------------------------------- /Chapter04/c4_27_3stock_connection.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_27_3stock_connection.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(qgraph) 12 | stocks<-c("IBM","MSFT","WMT") 13 | x<-rep(stocks, each = 3) 14 | y<-rep(stocks, 3) 15 | correlation<-c(0,10,3,10,0,3,3,3,0) 16 | data <- as.matrix(data.frame(from =x , to =y, width =correlation)) 17 | qgraph(data, mode = "direct", edge.color = rainbow(9)) -------------------------------------------------------------------------------- /Chapter04/c4_33_plot_grey.jl: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_16_plot_julia.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | plot(rand(5,5),color="grep",linewidth=2,title="Simple plot") 11 | #plot(rand(5,5),color="grep",linewidth=2,title=name) 12 | " 13 | 14 | # Pkg.add("Plots") 15 | 16 | using Plots 17 | srand(123) 18 | name="Simple plot" 19 | plot(rand(5,5),color="gray",linewidth=2,title=name) 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /Chapter08/c8_31_generate_dendrogram_using20obsWine.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_31_generate_dendrogram_using20obsWine.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | library(rattle.data) 13 | data(wine) 14 | x<-head(wine,10) 15 | 16 | library(rattle) 17 | rattle() 18 | 19 | # choose R data set 20 | # choose x 21 | # hit Execute 22 | # choose cluster 23 | # hit Execute 24 | # choose Dendrogram 25 | -------------------------------------------------------------------------------- /Chapter09/c9_21_reinforcementLearning.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c9_21_reinforcementLearning.R 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/6/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | library("ReinforcementLearning") 13 | set.seed(123) 14 | data <- sampleGridSequence(1000) 15 | dim(data) 16 | head(data) 17 | unique(data$State) 18 | unique(data$Action) 19 | unique(data$NextState) 20 | unique(data$Reward) 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /Chapter02/c3_24_get_iris.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_24_get_iris.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import pandas as pd 12 | path="http://archive.ics.uci.edu/ml/machine-learning-databases/" 13 | dataset="iris/bezdekIris.data" 14 | inFile=path+dataset 15 | data=pd.read_csv(inFile,header=None) 16 | data.columns=["sepalLength","sepalWidth","petalLength","petalWidth","Class"] 17 | print(data.head(2)) -------------------------------------------------------------------------------- /Chapter03/c3_24_get_iris.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_24_get_iris.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import pandas as pd 12 | path="http://archive.ics.uci.edu/ml/machine-learning-databases/" 13 | dataset="iris/bezdekIris.data" 14 | inFile=path+dataset 15 | data=pd.read_csv(inFile,header=None) 16 | data.columns=["sepalLength","sepalWidth","petalLength","petalWidth","Class"] 17 | print(data.head(2)) -------------------------------------------------------------------------------- /Chapter04/c4_06_add_labels.py: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_06_add_labels.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | import scipy as sp 13 | import matplotlib.pyplot as plt 14 | # 15 | x=sp.linspace(-2*sp.pi,2*sp.pi,200,endpoint=True) 16 | y=sp.cos(x) 17 | plt.plot(x,y) 18 | plt.xlabel("x-value") 19 | plt.ylabel("Cosine function") 20 | plt.title("Cosine curve from -2pi to 2pi") 21 | plt.show() 22 | 23 | -------------------------------------------------------------------------------- /Chapter10/c10_09_usGDP.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_09_usGDP.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | path<-"http://canisius.edu/~yany/RData/" 12 | dataSet<-"usGDPannual" 13 | con<-paste(path,dataSet,".RData",sep='') 14 | load(url(con)) 15 | 16 | 17 | head(.usGDPannual) 18 | dataSet<-"usGDPquarterly" 19 | con<-paste(path,dataSet,".RData",sep='') 20 | load(url(con)) 21 | head(.usGDPquarterly) 22 | 23 | -------------------------------------------------------------------------------- /Chapter10/c10_30_QuantEcon_simulated.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c10_30_QuantEcon_simulated.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | 12 | using QuantEcon 13 | P = [0.4 0.6; 0.2 0.8]; 14 | mc = MarkovChain(P) 15 | x = simulate(mc, 100000); 16 | mean(x .== 1) 17 | # 18 | mc2 = MarkovChain(P, ["employed", "unemployed"]) 19 | simulate(mc2, 4) 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /Chapter12/c12_10_plyr_arrange.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c12_10_plyr_arrange.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/14/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(datasets) 12 | library(plyr) 13 | data(mtcars) 14 | # 15 | d1<-mtcars[with(mtcars, order(cyl, disp)), ] 16 | d2<-arrange(mtcars, cyl, disp) 17 | myCars = cbind(vehicle=row.names(mtcars), mtcars) 18 | d3<-arrange(myCars, cyl, disp) 19 | d4<-arrange(myCars, cyl, desc(disp)) 20 | 21 | 22 | -------------------------------------------------------------------------------- /Chapter02/c3_22_sort_by2columns.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c3_22_sort_by2variables.R 4 | Book : Hands-on Data Science with Anaconda) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 1/15/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | x<-c(1,3,1, 0.1,0.3,-0.4,100,300,30) 13 | y<-data.frame(matrix(x,3,3)) 14 | colnames(y)<-c("ID","RET","Data1") 15 | 16 | y 17 | 18 | z<-y[order(y$ID,y$RET),] 19 | 20 | 21 | 22 | z2<-y[order(y$ID,decreasing = TRUE,y$RET),] 23 | z 24 | 25 | 26 | 27 | 28 | decreasing = FALSE 29 | -------------------------------------------------------------------------------- /Chapter03/c3_22_sort_by2columns.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c3_22_sort_by2variables.R 4 | Book : Hands-on Data Science with Anaconda) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 1/15/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | x<-c(1,3,1, 0.1,0.3,-0.4,100,300,30) 13 | y<-data.frame(matrix(x,3,3)) 14 | colnames(y)<-c("ID","RET","Data1") 15 | 16 | y 17 | 18 | z<-y[order(y$ID,y$RET),] 19 | 20 | 21 | 22 | z2<-y[order(y$ID,decreasing = TRUE,y$RET),] 23 | z 24 | 25 | 26 | 27 | 28 | decreasing = FALSE 29 | -------------------------------------------------------------------------------- /Chapter05/c5_23_number_outliers.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c5_23_number_outliers.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | distance<-3 12 | x<-read.csv("c:/temp/^GSPCweekly.csv") 13 | p<-x$Adj.Close 14 | ret<-p[2:n]/p[1:(n-1)]-1 15 | m<-mean(ret) 16 | std<-sd(ret) 17 | ret2<-subset(ret,((ret-m)/std)>distance) 18 | n2<-length(ret2) 19 | 20 | 21 | head(x,2) 22 | m 23 | std 24 | length(ret) 25 | n2 26 | -------------------------------------------------------------------------------- /Chapter07/c7_19_optimization.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c7_19_optimization.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | #} 11 | 12 | 13 | 14 | OPTIONS = optimset('Display','iter'); 15 | function f = fun2(x) 16 | f = 0; 17 | for k = -5:5 18 | f = f + exp(-(x(1)-x(2))^2 - 2*x(1)^2)*cos(x(2))*sin(2*x(2)); 19 | end 20 | endfunction 21 | x0 = [0.5,-0.5]; 22 | [x,fval] = fminsearch(@fun2,x0,OPTIONS) 23 | 24 | 25 | -------------------------------------------------------------------------------- /Chapter04/c4_22_brownian_motion_animation.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_22_Brownian_motion_anmation.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(animation) 12 | ani.options(interval = 0.05, nmax = 30) 13 | a<- c(3,3,2,0.5) 14 | b<-c(2,0.5,0) 15 | name<-"Brownian Motion" 16 | par(mar=a,mgp=b,tcl=-0.3,cex.axis=0.8,cex.lab=0.8,cex.main=1) 17 | brownian.motion(pch=21,cex=5,col="red",bg="yellow",main=name) 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /Chapter10/c10_14_movingAverage.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_14_movingAverage.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | movingAverageFunction<- function(data,n=10){ 12 | out= data 13 | for(i in n:length(data)){ 14 | out[i] = mean(data[(i-n+1):i]) 15 | } 16 | return(out) 17 | } 18 | 19 | library(timeSeries) 20 | data(MSFT) 21 | p<-MSFT$Close 22 | # 23 | ma<-movingAverageFunction(p,3) 24 | head(p) 25 | head(ma) 26 | -------------------------------------------------------------------------------- /Chapter02/c3_25_sort_Python.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_25_sort_python.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import pandas as pd 13 | a = pd.DataFrame([[8,3],[8,2],[1,-1]],columns=['X','Y']) 14 | print(a) 15 | # sort by A ascedning, then B descending 16 | b= a.sort_values(['X', 'Y'], ascending=[1, 0]) 17 | print(b) 18 | # sort by A and B, both ascedning 19 | c= a.sort_values(['X', 'Y'], ascending=[1, 1]) 20 | print(c) -------------------------------------------------------------------------------- /Chapter03/c3_25_sort_Python.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_25_sort_python.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import pandas as pd 13 | a = pd.DataFrame([[8,3],[8,2],[1,-1]],columns=['X','Y']) 14 | print(a) 15 | # sort by A ascedning, then B descending 16 | b= a.sort_values(['X', 'Y'], ascending=[1, 0]) 17 | print(b) 18 | # sort by A and B, both ascedning 19 | c= a.sort_values(['X', 'Y'], ascending=[1, 1]) 20 | print(c) -------------------------------------------------------------------------------- /Chapter05/c5_18_ff3_factor_ibm.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c5_18_ff3_factor_ibm.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | con<-url("http://canisius.edu/~yany/RData/ff3monthly.RData") 12 | load(con) 13 | head(.ff3monthly) 14 | x<-read.csv("http://canisius.edu/~yany/data/ibmMonthly.csv") 15 | stock<-ret_f(x) 16 | final<-merge(stock,.ff3monthly) 17 | y<-final$RET 18 | x<-as.matrix(data.frame(final[,3:5])) 19 | summary(lm(y~x)) 20 | 21 | -------------------------------------------------------------------------------- /Chapter03/c3_33_generate_z_csv.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_33_generate_z_csv.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | set.seed(123) 12 | n=500 13 | x<-rnorm(n) 14 | x2<-x 15 | m=100 16 | y<-as.integer(runif(m)*n) 17 | x[y]<-0 18 | z<-matrix(x,n/10,10) 19 | outFile<-"c:/temp/z.csv" 20 | write.table(z,file=outFile,quote=F,row.names=F,col.names=F,sep=',') 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /Chapter04/c4_11_histogram.py: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_11_histogram.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | data = np.random.RandomState(10) 14 | n=5000 15 | n2=1000 16 | x=data.normal(size=n) 17 | y=data.normal(loc=5, scale=2,size=n2) 18 | a=(x,y) 19 | b = np.hstack(a) 20 | plt.hist(b, bins='auto') 21 | plt.title("Histogram with 'auto bins'") 22 | plt.show() 23 | 24 | -------------------------------------------------------------------------------- /Chapter05/c5_25_get_critical_value_F_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c5_25_get_critical_value_F_test.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import scipy as sp 13 | alpha=0.10 14 | d1=1 15 | d2=1 16 | critical=sp.stats.f.ppf(q=1-alpha, dfn=d1, dfd=d2) 17 | prob=sp.stats.f.cdf(critical, dfn=d1, dfd=d2) 18 | print("alpha, d1, d2, critical value, prob") 19 | print(alpha, d1, d2, critical, prob) 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /Chapter09/c9_34_Kmean_randomNumbers.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c9_34_Kmean_randomNumbers.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | using Clustering 12 | srand(1234) 13 | nRow=5 14 | nCol=1000 15 | x = rand(nRow,nCol) 16 | maxInter=200 #max interation 17 | nCluster=20 18 | R = kmeans(x,nCluster;maxiter=maxInter,display=:iter) 19 | @assert nclusters(R) ==nCluster 20 | c = counts(R) 21 | clusters= R.centers 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /Chapter10/c10_11_seasonality_usGDPquarterly.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_11_seasonality_usGDPqiarterly.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | library(astsa) 11 | path<-"http://canisius.edu/~yany/RData/" 12 | dataSet<-"usGDPquarterly" 13 | con<-paste(path,dataSet,".RData",sep='') 14 | load(url(con)) 15 | x<-.usGDPquarterly$DATE 16 | y<-.usGDPquarterly$GDP_CURRENT 17 | plot(x,y) 18 | diff4 = diff(y,4) 19 | acf2(diff4,24) 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /Chapter07/c7_22_optim.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c7_22_optim.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | 12 | 13 | using Optim 14 | function g!(s, x) 15 | s[1] = -2.0*(1.0-x[1])-400.0*(x[2]-x[1]^2)*x[1] 16 | s[2] = 200.0*(x[2]-x[1]^2) 17 | end 18 | lower = [1.25, -2.1] 19 | upper = [Inf, Inf] 20 | initial_x = [2.0, 2.0] 21 | od = OnceDifferentiable(f, g!, initial_x) 22 | results = optimize(od, initial_x,lower,upper,Fminbox{GradientDescent}()) -------------------------------------------------------------------------------- /Chapter09/c9_12_load_iris.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c9_12_load_iris.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import sklearn as sk 12 | from sklearn import datasets 13 | iris = datasets.load_iris() 14 | print("data:\n",iris.data[0:4,]) 15 | print("target",iris.target[0:2,]) 16 | mylist=list(iris.target) 17 | used = [] 18 | unique = [x for x in mylist if x not in used and used.append(x)] 19 | print("unique values for targets\n",used) 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /Chapter09/c9_19_logicReg.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c9_19_logicReg.R 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/6/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | library(LogicReg) 13 | data(logreg.testdat) 14 | y<-logreg.testdat[,1] 15 | x<-logreg.testdat[, 2:21] 16 | n=1000 17 | n2=25000 18 | set.seed(123) 19 | myanneal<-logreg.anneal.control(start=-1,end=-4,iter=n2,update=n) 20 | output<-logreg(resp=y,bin=x,type=2,select = 1,ntrees=2,anneal.control=myanneal) 21 | plot(output) 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /Chapter02/c3_11_R_package_dslabs.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_11_R_package_dslabs.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | murders 11 | state abb region population total 12 | 1 Alabama AL South 4779736 135 13 | 2 Alaska AK West 710231 19 14 | 3 Arizona AZ West 6392017 232 15 | 16 | " 17 | 18 | library(dslabs) 19 | 20 | data(murders) 21 | 22 | head(merders) 23 | 24 | -------------------------------------------------------------------------------- /Chapter03/c3_11_R_package_dslabs.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_11_R_package_dslabs.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | murders 11 | state abb region population total 12 | 1 Alabama AL South 4779736 135 13 | 2 Alaska AK West 710231 19 14 | 3 Arizona AZ West 6392017 232 15 | 16 | " 17 | 18 | library(dslabs) 19 | 20 | data(murders) 21 | 22 | head(merders) 23 | 24 | -------------------------------------------------------------------------------- /Chapter05/c5_07_random_OLS.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c5_07_random_OLS.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | from scipy import stats 12 | import scipy as sp 13 | sp.random.seed(31233) 14 | alpha=2.0 15 | beta=3.8 16 | n=1000 17 | x=sp.arange(n) 18 | y=alpha+beta*x+sp.random.rand(n) 19 | (beta, alpha, r_value, p_value, std_err) = stats.linregress(y,x) 20 | print("Alpha , Beta") 21 | print(alpha,beta) 22 | print("R-squared=", r_value**2) 23 | print("p-value =", p_value) 24 | -------------------------------------------------------------------------------- /Chapter08/c8_10_wine_quality.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_10_wine_quality.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(randomUniformForest) 12 | data(wineQualityRed) 13 | x = wineQualityRed[, -ncol(wineQualityRed)] 14 | # run unsupervised analysis on the first half of dataset 15 | data1 = 1:floor(nrow(x)/2) 16 | shortFunction<-unsupervised.randomUniformForest 17 | model1 =shortFunction(x,subset =data1,depth = 5) 18 | plot(model1) 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /Chapter09/c9_08_naiveBayes.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c9_08_naiveBayes.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(mlbench) 12 | data(HouseVotes84) 13 | head(HouseVotes84) 14 | 15 | library(e1071) 16 | data(HouseVotes84, package = "mlbench") 17 | model<-naiveBayes(Class ~ ., data = HouseVotes84) 18 | # 19 | predict(model, HouseVotes84[1:10,]) 20 | predict(model, HouseVotes84[1:10,], type = "raw") 21 | pred <- predict(model, HouseVotes84) 22 | table(pred, HouseVotes84$Class) 23 | -------------------------------------------------------------------------------- /Chapter07/c7_04_convex_function2.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c7_04_convex_function2.R 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 3/15/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | x<-seq(-4,4,0.1) 13 | a<-1 14 | b<-2 15 | c<-3 16 | myFunction<-function(x)a*x^2+b*x+c 17 | y<-myFunction(x) 18 | name<-"For a convex function:chord is above" 19 | plot(x,y,type='l',main=name) 20 | x1<--2 21 | y1<-myFunction(x1) 22 | x2<-3 23 | y2<-myFunction(x2) 24 | segments(x1, y1, x2, y2,col = par("fg"), lty = par("lty"), xpd = FALSE) 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /Chapter09/c9_24_reinforcementLearning_example.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c9_124_reinforcementLearning_example.R 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/6/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | library(ReinforcementLearning) 13 | set.seed(123) 14 | data <- sampleGridSequence(1000) 15 | control <- list(alpha = 0.1, gamma = 0.1, epsilon = 0.1) 16 | model <- ReinforcementLearning(data,s="State",a="Action",r="Reward",s_new="NextState",control=control) 17 | print(model) 18 | 19 | 20 | # Plotting learning curve 21 | plot(model) 22 | 23 | -------------------------------------------------------------------------------- /Chapter07/c7_07_optimization_01.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c7_07_optimization_01.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import numpy as np 13 | from scipy.optimize import minimize 14 | def rosen(x): 15 | """The Rosenbrock function""" 16 | return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0) 17 | # 18 | x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2]) 19 | solution= minimize(rosen, x0, method='nelder-mead',options={'xtol': 1e-8, 'disp': True}) 20 | 21 | print(solution.x) 22 | 23 | -------------------------------------------------------------------------------- /Chapter04/c4_15_add_Greek_letters.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_15_add_Greek_letters.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | set.seed(12345) 12 | mu=4 13 | std=2 14 | nRandom=2000 15 | x <- rnorm(mean =mu, sd =std, n =nRandom) 16 | name<- "Normal Probability Density Function" 17 | curve(dnorm, from = -3, to = 3, n = 1000, main = name) 18 | formula<-expression(f(x) ==paste(frac(1, sqrt(2 * pi * sigma^2))," ",e^{frac(-(x - mu)^2, 2 * sigma^2)})) 19 | text(2, 0.3, formula, cex = 1.3) 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /Chapter09/c9_32_iris.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c9_32_iris.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | using Gadfly 12 | using RDatasets 13 | using Clustering 14 | iris = dataset("datasets", "iris") 15 | head(iris) 16 | features=permutedims(convert(Array, iris[:,1:4]),[2, 1]) 17 | result=kmeans(features,3) 18 | nameX="PetalLength" 19 | nameY="PetalWidth" 20 | assignments=result.assignments 21 | plot(iris, x=nameX,y=nameY,color=assignments,Geom.point) 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /Chapter06/c6_10_table6_1.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c6_10_table6_1.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/1/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | library(rattle) 13 | 14 | require(rattle) 15 | 16 | install.packages("rattle") 17 | 18 | update.packages("rattle") 19 | 20 | search() 21 | 22 | library() 23 | 24 | "package:XML" %in% search() 25 | 26 | "package:XML" %in% .packages(all.available=T) 27 | 28 | detach(package:rattle) 29 | 30 | detach("package:rattle",unload=TRUE) 31 | 32 | help(package=rattle) 33 | 34 | library(help="rattle") 35 | -------------------------------------------------------------------------------- /Chapter10/c10_10_usGDP_graph.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_10_usGDP_graph.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | path<-"http://canisius.edu/~yany/RData/" 12 | dataSet<-"usGDPannual" 13 | con<-paste(path,dataSet,".RData",sep='') 14 | load(url(con)) 15 | title<-"US GDP" 16 | xTitle<-"Year" 17 | yTitle<-"US annual GDP" 18 | x<-.usGDPannual$YEAR 19 | y<-.usGDPannual$GDP 20 | plot(x,y,main=title,xlab=xTitle,ylab=yTitle) 21 | 22 | 23 | yTitle<-"Log US annual GDP" 24 | plot(x,log(y),main=title,xlab=xTitle,ylab=yTitle) -------------------------------------------------------------------------------- /Chapter09/c9_38_iris_prediction.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Name : c9_38_iris_prediction.py 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/6/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | """ 11 | 12 | from sklearn import metrics 13 | from sklearn import datasets 14 | from sklearn.tree import DecisionTreeClassifier 15 | x=datasets.load_iris() 16 | model=DecisionTreeClassifier() 17 | model.fit(x.data, x.target) 18 | print(model) 19 | true=x.target 20 | predicted=model.predict(x.data) 21 | print(metrics.classification_report(true, predicted)) 22 | print(metrics.confusion_matrix(true, predicted)) -------------------------------------------------------------------------------- /Chapter04/c4_26_qgraph_network.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_26_qgraph_network.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(qgraph) 12 | data(big5) 13 | data(big5groups) 14 | title("Correlations among 5 factors",line = 2.5) 15 | qgraph(cor(big5),minimum = 0.25,cut = 0.4,vsize = 1.5, 16 | groups = big5groups,legend = TRUE, borders = FALSE,theme = 'gray') 17 | 18 | 19 | # colorful one 20 | " 21 | qgraph(cor(big5),minimum = 0.25,cut = 0.4,vsize = 1.5, 22 | groups = big5groups,legend = TRUE, borders = FALSE) 23 | 24 | " 25 | 26 | -------------------------------------------------------------------------------- /Chapter07/c7_20_JuMP01.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c7_20_JuMP01.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | 12 | using JuMP 13 | using ECOS 14 | m= Model(solver =ECOSSolver()) 15 | @variable(m, 0 <= x <= 2 ) 16 | @variable(m, 0 <= y <= 30 ) 17 | @setObjective(m, Max, 5x + 3*y ) 18 | @addConstraint(m, 1x + 5y <= 3.0 ) 19 | print(m) 20 | status = solve(m) 21 | println("Objective value: ", getObjectiveValue(m)) 22 | println("x = ", getValue(x)) 23 | println("y = ", getValue(y)) 24 | 25 | 26 | #https://jump.readthedocs.io/en/release-0.2/jump.html -------------------------------------------------------------------------------- /Chapter04/c4_10_getHistram_IBMreturn.py: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_10_getHistram_IBMreturn.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | import numpy as np 12 | import quandl as qd 13 | import matplotlib.pyplot as plt 14 | # 15 | x=qd.get("WIKI/ibm") 16 | p=x[['Adj. Close']] 17 | ret=p.diff()/p 18 | ret2=ret.dropna() 19 | np.histogram(ret2,bins='auto') 20 | plt.show() 21 | 22 | 23 | [n,bins,patches]=np.histogram(ret,100) 24 | mu=np.mean(ret) 25 | sigma=np.std(ret) 26 | x=mlt.mlab.normpdf(binds,mu,sigma) 27 | mlt.plot(bins,x,color="red",lw=2) 28 | mlt.show() -------------------------------------------------------------------------------- /Chapter12/c12_13_parallel.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c12_13_parallel.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/27/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | library(parallel) 13 | detectCores() 14 | myFunction<- function(iter=1,n=5){ 15 | x<- rnorm(n, mean=0, sd=1 ) 16 | eps <- runif(n,-2,2) 17 | y <- 1 + 2*x + eps 18 | result<-lm( y ~ x ) 19 | final<-cbind(result$coef,confint(result)) 20 | return(final) 21 | } 22 | # 23 | m<-5000 24 | n2<-5000 25 | system.time(lapply(1:m,myFunction,n=n2)) 26 | system.time(mclapply(1:m,myFunction,n=n2)) 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /Chapter04/c4_17_QuantEcon_julia.jl: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_17_QuantEco_julia.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | https://lectures.quantecon.org/jl/julia_plots.html 11 | " 12 | 13 | using QuantEcon: meshgrid 14 | using PyPlot:surf 15 | using Plots 16 | n = 50 17 | x = linspace(-3, 3, n) 18 | y = x 19 | z = Array{Float64}(n, n) 20 | f(x, y) = cos(x^2 + y^2) / (1 + x^2 + y^2) 21 | for i in 1:n 22 | for j in 1:n 23 | z[j, i] = f(x[i], y[j]) 24 | end 25 | end 26 | xgrid, ygrid = meshgrid(x, y) 27 | surf(xgrid, ygrid, z',alpha=0.7) -------------------------------------------------------------------------------- /Chapter10/c10_19_catwalk_not_complete.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c10_16_catwalk_not_complete.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/23/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import datetime 12 | import pandas 13 | from sqlalchemy import create_engine 14 | from metta import metta_io as metta 15 | from catwalk.storage import FSModelStorageEngine, CSVMatrixStore 16 | from catwalk.model_trainers import ModelTrainer 17 | from catwalk.predictors import Predictor 18 | from catwalk.evaluation import ModelEvaluator 19 | from catwalk.utils import save_experiment_and_get_hash 20 | help(FSModelStorageEngine) 21 | -------------------------------------------------------------------------------- /Chapter02/c3_30_merge_left_index.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_30_merge_lef_index.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import pandas as pd 12 | import scipy as sp 13 | x= pd.DataFrame({'YEAR': [2010,2011, 2012, 2013], 14 | 'IBM': [0.2, -0.3, 0.13, -0.2], 15 | 'WMT': [0.1, 0, 0.05, 0.23]}) 16 | y = pd.DataFrame({'date': [2011,2013,2014, 2015], 17 | 'C': [0.12, 0.23, 0.11, -0.1], 18 | 'SP500': [0.1,0.17, -0.05, 0.13]}) 19 | print(pd.merge(x,y, right_index=True,left_index=True)) 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /Chapter03/c3_30_merge_left_index.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_30_merge_lef_index.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import pandas as pd 12 | import scipy as sp 13 | x= pd.DataFrame({'YEAR': [2010,2011, 2012, 2013], 14 | 'IBM': [0.2, -0.3, 0.13, -0.2], 15 | 'WMT': [0.1, 0, 0.05, 0.23]}) 16 | y = pd.DataFrame({'date': [2011,2013,2014, 2015], 17 | 'C': [0.12, 0.23, 0.11, -0.1], 18 | 'SP500': [0.1,0.17, -0.05, 0.13]}) 19 | print(pd.merge(x,y, right_index=True,left_index=True)) 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /Chapter10/c10_18_annual_ret_sp500.txt: -------------------------------------------------------------------------------- 1 | library(timeSeries) 2 | path<-'http://canisius.edu/~yany/RData/' 3 | dataSet<-'sp500monthly.RData' 4 | link<-paste(path,dataSet,sep='') 5 | load(url(link)) 6 | p<-.sp500monthly$ADJ.CLOSE 7 | n<-length(p) 8 | logRet<-log(p[2:n]/p[1:(n-1)]) 9 | 10 | x<-data.frame(logRet,row.names=.sp500monthly$DATE[2:n]) 11 | by <- timeSequence(from = rownames(x)[1], to = rownames(x)[n-1], by = "year") 12 | y<-aggregate(x,by,sum) 13 | 14 | 15 | 16 | library(plyr) 17 | df <- data.frame(DATE = c("1", "1", "2", "3", "3"), B = c(2, 3, 3, 5, 6)) 18 | dfsum <- ddply(df, c("DATE"), summarize, B = sum(B)) 19 | 20 | 21 | 22 | 23 | xx <- MSFT 24 | byx <- timeSequence(from = start(xx), to = end(xx), by = "week") 25 | yy<-aggregate(xx,byx,mean) 26 | 27 | 28 | -------------------------------------------------------------------------------- /Chapter02/c3_29_merge_different_names.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_28_merge_different_names.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import pandas as pd 13 | import scipy as sp 14 | x= pd.DataFrame({'YEAR': [2010,2011, 2012, 2013], 15 | 'IBM': [0.2, -0.3, 0.13, -0.2], 16 | 'WMT': [0.1, 0, 0.05, 0.23]}) 17 | y = pd.DataFrame({'date': [2011,2013,2014, 2015], 18 | 'C': [0.12, 0.23, 0.11, -0.1], 19 | 'SP500': [0.1,0.17, -0.05, 0.13]}) 20 | print(pd.merge(x,y, left_on='YEAR',right_on='date')) 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /Chapter03/c3_29_merge_different_names.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_28_merge_different_names.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import pandas as pd 13 | import scipy as sp 14 | x= pd.DataFrame({'YEAR': [2010,2011, 2012, 2013], 15 | 'IBM': [0.2, -0.3, 0.13, -0.2], 16 | 'WMT': [0.1, 0, 0.05, 0.23]}) 17 | y = pd.DataFrame({'date': [2011,2013,2014, 2015], 18 | 'C': [0.12, 0.23, 0.11, -0.1], 19 | 'SP500': [0.1,0.17, -0.05, 0.13]}) 20 | print(pd.merge(x,y, left_on='YEAR',right_on='date')) 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /Chapter09/c9_45_same_as_c9_14_good.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[14]: 5 | 6 | 7 | import numpy as np 8 | from sklearn import datasets 9 | from sklearn.neighbors import KNeighborsClassifier as KNC 10 | iris = datasets.load_iris() 11 | x= iris.data 12 | y= iris.target 13 | np.unique(y) 14 | np.random.seed(123) 15 | indices = np.random.permutation(len(x)) 16 | iris_x_train = x[indices[:-10]] 17 | iris_y_train = y[indices[:-10]] 18 | iris_x_test = x[indices[-10:]] 19 | iris_y_test = y[indices[-10:]] 20 | model=KNC() 21 | model.fit(iris_x_train, iris_y_train) 22 | KNC(algorithm='auto',leaf_size=30, metric='minkowski', 23 | metric_params=None,n_jobs=1,n_neighbors=5, p=2,weights='uniform') 24 | out=model.predict(iris_x_test) 25 | print("predicted:",out) 26 | print("True :",iris_y_test) 27 | 28 | -------------------------------------------------------------------------------- /Chapter03/c3_19_missing_code.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_19_missing_code2.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/16/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import scipy as sp 13 | import pandas as pd 14 | path="https://canisius.edu/~yany/data/" 15 | dataSet="z.csv" 16 | infile=path+dataSet 17 | x=pd.read_csv(infile,header=None) 18 | print(x.head()) 19 | print((x[[1,1,2,3,4,5]] ==0).sum()) 20 | 21 | 22 | x2=x 23 | x2[[1,2,3,4,5]] = x2[[1,2,3,4,5]].replace(0, sp.NaN) 24 | print(x2.head()) 25 | #print((x2== sp.NaN).sum()) 26 | 27 | x3=x2 28 | x3.fillna(x3.mean(), inplace=True) 29 | print(x3.head()) 30 | 31 | # print(x.describe()) 32 | 33 | -------------------------------------------------------------------------------- /Chapter07/c7_11_ff5industries.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c7_11_ff5industries.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | path<-"http://canisius.edu/~yany/RData/ff5industries.RData" 12 | load(url(path)) 13 | retMatrix<-as.matrix(.ff5industries[,2:6]/100) 14 | n1<-ncol(retMatrix) 15 | w<-rep(1/n1,n1) 16 | A<-1.5 17 | bigValue=100 18 | # 19 | utilityFunction<-function(w){ 20 | portfolioRet<-retMatrix%*%w 21 | x<-portfolioRet 22 | loss<-(sum(w)-1)^2*bigValue 23 | u=-(mean(x)-0.5*A*var(x))+loss 24 | return(u) 25 | } 26 | optim(w,utilityFunction,lower =0, upper =0.5) 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /Chapter09/c9_44_same_as_c9_14_good.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[15]: 5 | 6 | 7 | import numpy as np 8 | from sklearn import datasets 9 | from sklearn.neighbors import KNeighborsClassifier as KNC 10 | iris = datasets.load_iris() 11 | x= iris.data 12 | y= iris.target 13 | np.unique(y) 14 | np.random.seed(123) 15 | indices = np.random.permutation(len(x)) 16 | iris_x_train = x[indices[:-10]] 17 | iris_y_train = y[indices[:-10]] 18 | iris_x_test = x[indices[-10:]] 19 | iris_y_test = y[indices[-10:]] 20 | knn = KNC() 21 | knn.fit(iris_x_train, iris_y_train) 22 | KNC(algorithm='auto',leaf_size=30, metric='minkowski', 23 | metric_params=None,n_jobs=1,n_neighbors=5, p=2,weights='uniform') 24 | knn.predict(iris_x_test) 25 | out=knn.predict(iris_x_test) 26 | print("predicted:",out) 27 | print("True :",iris_y_test) 28 | 29 | -------------------------------------------------------------------------------- /Chapter02/c3_12_merge_01.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_12_merge_01.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import scipy as sp 12 | import pandas as pd 13 | 14 | x= pd.DataFrame({'key': ["ID1","ID2", "ID3", "ID4"], 15 | 'x': [0.1, 0.02 0.05, 0.02], 16 | 'y': [0, 1, 2, 3]}) 17 | y = pd.DataFrame({'key': ['ID1', 'ID2', 'ID5', 'ID7'], 18 | 'z': [11, 12, 22, 23], 19 | 'd': [23, 15',2, 3]}) 20 | print(sp.shape(x)) 21 | print(sp.shape(y)) 22 | result = pd.merge(x,y, on='key') 23 | print(result) 24 | result2=pd.merge(x,y) 25 | print(result2) 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /Chapter03/c3_12_merge_01.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_12_merge_01.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import scipy as sp 12 | import pandas as pd 13 | 14 | x= pd.DataFrame({'key': ["ID1","ID2", "ID3", "ID4"], 15 | 'x': [0.1, 0.02 0.05, 0.02], 16 | 'y': [0, 1, 2, 3]}) 17 | y = pd.DataFrame({'key': ['ID1', 'ID2', 'ID5', 'ID7'], 18 | 'z': [11, 12, 22, 23], 19 | 'd': [23, 15',2, 3]}) 20 | print(sp.shape(x)) 21 | print(sp.shape(y)) 22 | result = pd.merge(x,y, on='key') 23 | print(result) 24 | result2=pd.merge(x,y) 25 | print(result2) 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /Chapter05/c5_17_ibm_beta.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c5_17_ibm_beta.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | ret_f<-function(data){ 12 | ddate<-as.Date(data[,1]) 13 | n<-nrow(data) 14 | p<-data[,6] 15 | ret<-p[2:n]/p[1:(n-1)]-1 16 | final<-data.frame(ddate[2:n],ret,stringsAsFactors=F) 17 | colnames(final)<-c("DATE","RET") 18 | return(final) 19 | } 20 | # 21 | x<-read.csv("http://canisius.edu/~yany/data/ibmMonthly.csv") 22 | stock<-ret_f(x) 23 | # 24 | y<-read.csv("http://canisius.edu/~yany/data/^gspcMonthly.csv") 25 | mkt<-ref_f(y) 26 | colnames(mkt)<-c("DATE","MKTRET") 27 | # 28 | final<-merge(stock,mkt) 29 | -------------------------------------------------------------------------------- /Chapter05/c5_30_run_linearRegressionOctave.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c5_30_run_linearRegressionOctave.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | #} 10 | 11 | 12 | rand('seed',123) 13 | n = 50; 14 | x = sort(rand(n,1)*5-1); 15 | y = 2+1.5*x + randn(size(x)); 16 | figure % open a new figure window 17 | plot(x, y, 'o'); 18 | 19 | # 20 | n = length(y); 21 | x2= [ones(n, 1), x]; % Add a column of ones to x 22 | b = inv(x2'*x2)*x2'*y; 23 | R = y - (x2 * b); # residuals 24 | v = (R'*R)/(4 - 3); # residual variance 25 | sigma = v * inv(x2'*x2); # variance covariance matrix 26 | se = sqrt(diag(sigma)); # std errors of parameters 27 | -------------------------------------------------------------------------------- /Chapter02/c3_26_ff3monthly2pickle.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_24_ff3monthly2pickle.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import pandas as pd 13 | infile="http://canisius.edu/~yany/data/ff3monthly.csv" 14 | ff3=pd.read_csv(infile,skiprows=3) 15 | print(ff3.head(2)) 16 | # output to pickle 17 | ff3.to_pickle("c:/temp/ff3.pkl") 18 | # output to a csv file 19 | outfile=open("c:/temp/ff3.csv","w") 20 | ff3.to_csv(outfile,index=None) 21 | outfile.close() 22 | # output to text file 23 | outfile2=open("c:/temp/ff3.txt","w") 24 | ff3.to_csv(outfile2, header=True, index=None, sep=' ', mode='a') 25 | outfile2.close() 26 | 27 | 28 | -------------------------------------------------------------------------------- /Chapter03/c3_26_ff3monthly2pickle.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_24_ff3monthly2pickle.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import pandas as pd 13 | infile="http://canisius.edu/~yany/data/ff3monthly.csv" 14 | ff3=pd.read_csv(infile,skiprows=3) 15 | print(ff3.head(2)) 16 | # output to pickle 17 | ff3.to_pickle("c:/temp/ff3.pkl") 18 | # output to a csv file 19 | outfile=open("c:/temp/ff3.csv","w") 20 | ff3.to_csv(outfile,index=None) 21 | outfile.close() 22 | # output to text file 23 | outfile2=open("c:/temp/ff3.txt","w") 24 | ff3.to_csv(outfile2, header=True, index=None, sep=' ', mode='a') 25 | outfile2.close() 26 | 27 | 28 | -------------------------------------------------------------------------------- /Chapter12/c12_07_snow_parallel_Rmpi_UNIX.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c12_07_snow_parallel_UNIX.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/14/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(snow) 12 | library(parallel) 13 | #library(Rmpi) 14 | myFunction<-function(n) { 15 | a<-rnorm(n) 16 | final<-log(abs(a))+a^3+2*a; 17 | return(final) 18 | } 19 | # 20 | nCores=11; 21 | #Using multicore 22 | system.time(mclapply(rep(5E6,11),myFunction,mc.cores=nCores)) 23 | #Using snow via MPI 24 | system.time(sapply(rep(5E6,11),myFunction)) 25 | #cl <- getMPIcluster() 26 | cl <- makeCluster(c("localhost","localhost"), type = "SOCK") 27 | system.time(parSapply(cl,rep(5E6,11),myFunction)) 28 | -------------------------------------------------------------------------------- /Chapter02/c3_31_merge_by2variables.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_31_merge_by2variables.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import pandas as pd 13 | x= pd.DataFrame({'ID': ['IBM', 'IBM', 'WMT', 'WMT'], 14 | 'date': [2010, 2011, 2010, 2011], 15 | 'SharesOut': [100, 40, 60, 90], 16 | 'Asset': [20, 30, 10, 30]}) 17 | 18 | y = pd.DataFrame({'ID': ['IBM', 'IBM', 'C', 'WMT'], 19 | 'date': [2010, 2014, 2010, 2010], 20 | 'Ret': [0.1, 0.2, -0.1,0.2], 21 | 'ROA': [0.04,-0.02,0.03,0.1]}) 22 | 23 | z= pd.merge(x,y, on=['ID', 'date']) -------------------------------------------------------------------------------- /Chapter03/c3_31_merge_by2variables.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_31_merge_by2variables.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | 12 | import pandas as pd 13 | x= pd.DataFrame({'ID': ['IBM', 'IBM', 'WMT', 'WMT'], 14 | 'date': [2010, 2011, 2010, 2011], 15 | 'SharesOut': [100, 40, 60, 90], 16 | 'Asset': [20, 30, 10, 30]}) 17 | 18 | y = pd.DataFrame({'ID': ['IBM', 'IBM', 'C', 'WMT'], 19 | 'date': [2010, 2014, 2010, 2010], 20 | 'Ret': [0.1, 0.2, -0.1,0.2], 21 | 'ROA': [0.04,-0.02,0.03,0.1]}) 22 | 23 | z= pd.merge(x,y, on=['ID', 'date']) -------------------------------------------------------------------------------- /Chapter08/c8_05_kmeans01.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c8_05_kMeans01.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(readr) 12 | library(corrplot) 13 | library(ggplot2) 14 | # 15 | path<-"http://canisius.edu/~yany/RData/wine.RData" 16 | load(url(path)) 17 | red2<-red 18 | red2$quality<-NULL 19 | white2<-white 20 | white2$quality<-NULL 21 | red_cor<-cor(red2) 22 | white_cor<-cor(white2) 23 | class(red_cor) 24 | class(white_cor) 25 | # 26 | corrplot(red_cor,method="number") 27 | 28 | corrplot(white_cor,method="number") 29 | 30 | 31 | 32 | #https://www.kaggle.com/maitree/kmeans-unsupervised-learning-using-wine-dataset/notebook 33 | 34 | 35 | -------------------------------------------------------------------------------- /Chapter09/c9_10_RTextTools.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c9_10_RTetTools.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(RTextTools) 12 | data(NYTimes) 13 | data <- NYTimes[sample(1:3100,size=100,replace=FALSE),] 14 | matrix <- create_matrix(cbind(data["Title"],data["Subject"]), language="english", 15 | removeNumbers=TRUE, stemWords=FALSE, weighting=tm::weightTfIdf) 16 | container <- create_container(matrix,data$Topic.Code,trainSize=1:75, testSize=76:100, 17 | virgin=FALSE) 18 | models <- train_models(container, algorithms=c("MAXENT","SVM")) 19 | results <- classify_models(container, models) 20 | analytics <- create_analytics(container, results) 21 | summary(analytics) -------------------------------------------------------------------------------- /Chapter02/c3_19_missing_code.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_19_missing_code2.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import scipy as sp 12 | import pandas as pd 13 | path="https://archive.ics.uci.edu/ml/machine-learning-databases/" 14 | dataSet="pima-indians-diabetes/pima-indians-diabetes.data" 15 | infile=path+dataSet 16 | x=pd.read_csv(infile,header=None) 17 | print(x.head()) 18 | print((x[[1,2,3,4,5]] == 0).sum()) 19 | 20 | x2=x 21 | x2[[1,2,3,4,5]] = x2[[1,2,3,4,5]].replace(0, sp.NaN) 22 | print(x2.head()) 23 | #print((x2== sp.NaN).sum()) 24 | 25 | x3=x2 26 | x3.fillna(x3.mean(), inplace=True) 27 | print(x3.head()) 28 | 29 | # print(x.describe()) 30 | 31 | -------------------------------------------------------------------------------- /Chapter09/c9_11_RTextTool_2.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c9_11_RTextTool2.R 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/6/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | library(RTextTools) 13 | data(NYTimes) 14 | data <- NYTimes[sample(1:3100,size=100,replace=FALSE),] 15 | matrix <- create_matrix(cbind(data["Title"],data["Subject"]), language="english", 16 | removeNumbers=TRUE, stemWords=FALSE, weighting=tm::weightTfIdf) 17 | container <- create_container(matrix,data$Topic.Code,trainSize=1:75, testSize=76:100, 18 | virgin=TRUE) 19 | models <- train_models(container, algorithms=c("MAXENT","SVM")) 20 | results <- classify_models(container, models) 21 | analytics <- create_analytics(container, results) 22 | summary(analytics) -------------------------------------------------------------------------------- /Chapter12/c12_02_parallel_01.R: -------------------------------------------------------------------------------- 1 | 2 | #http://gforge.se/2015/02/how-to-go-parallel-in-r-basics-tips/ 3 | 4 | library(parallel) 5 | n_cores <- detectCores() - 1 6 | cl <- makeCluster(n_cores) 7 | parLapply(cl, 2:4,function(exponent) 2^exponent) 8 | stopCluster(cl) 9 | 10 | 11 | c2<-makeCluster(n_cores) 12 | base <- 2 13 | parLapply(c2, 2:4, function(exponent) base^exponent) 14 | stopCluster(c2) 15 | 16 | Error in checkForRemoteErrors(val) : 17 | 3 nodes produced errors; first error: object 'base' not found 18 | 19 | 20 | c3<-makeCluster(n_cores) 21 | base <- 2 22 | clusterExport(c3, "base") 23 | parLapply(c3, 2:4, function(exponent) base^exponent) 24 | stopCluster(c3) 25 | 26 | 27 | c4<-makeCluster(no_cores) 28 | clusterExport(c4, "base") 29 | base <- 4 30 | # Run 31 | parLapply(c4, 2:4, function(exponent) 32 | base^exponent) 33 | 34 | stopCluster(c4) -------------------------------------------------------------------------------- /Chapter02/c3_09_R_package_sjlabbeld.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_09_package_sjlabbeld.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | library(sjlabelled) 13 | mydat <- read_spss("my_spss_data.sav") 14 | # retrieve variable labels 15 | mydat.var <- get_label(mydat) 16 | 17 | 18 | 19 | 20 | mydat <- read_spss("my_spss_data.sav", atomic.to.fac = TRUE) 21 | 22 | # retrieve value labels 23 | mydat.val <- get_labels(mydat) 24 | ## End(Not run) 25 | 26 | 27 | 28 | x<-1:100 29 | 30 | write_spss(x,"c:/temp/t.spss", drop.na = FALSE) 31 | 32 | write_stata(x, "c:/temp/stat.csv",drop.na = FALSE, version = 14) 33 | 34 | write_sas(x, c"://temp/t.sas7bdat", drop.na = FALSE) 35 | 36 | -------------------------------------------------------------------------------- /Chapter03/c3_09_R_package_sjlabbeld.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_09_package_sjlabbeld.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | library(sjlabelled) 13 | mydat <- read_spss("my_spss_data.sav") 14 | # retrieve variable labels 15 | mydat.var <- get_label(mydat) 16 | 17 | 18 | 19 | 20 | mydat <- read_spss("my_spss_data.sav", atomic.to.fac = TRUE) 21 | 22 | # retrieve value labels 23 | mydat.val <- get_labels(mydat) 24 | ## End(Not run) 25 | 26 | 27 | 28 | x<-1:100 29 | 30 | write_spss(x,"c:/temp/t.spss", drop.na = FALSE) 31 | 32 | write_stata(x, "c:/temp/stat.csv",drop.na = FALSE, version = 14) 33 | 34 | write_sas(x, c"://temp/t.sas7bdat", drop.na = FALSE) 35 | 36 | -------------------------------------------------------------------------------- /Chapter04/c4_24_Brownian_motion_html.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_24_brownian_motion_html.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(animation) 12 | a<- c(3, 3, 1, 0.5) 13 | b<- c(2, 0.5, 0) 14 | part1<-"Random walk on the 2D plane: for each point" 15 | part2<-"(x, y), x = x + rnorm(1) and y = y + rnorm(1)." 16 | des<-c(part1,part2) 17 | titleName<-"Demonstration of Brownian Motion" 18 | # 19 | saveHTML({ 20 | par(mar =a, mgp =b, tcl = -0.3,cex.axis = 0.8, cex.lab = 0.8, cex.main = 1) 21 | ani.options(interval = 0.05, nmax = ifelse(interactive(),150, 10)) 22 | brownian.motion(pch = 21, cex = 5, col = "red", bg = "yellow") 23 | },description =des,title = titleName) -------------------------------------------------------------------------------- /Chapter09/c9_17_others_1.R: -------------------------------------------------------------------------------- 1 | # https://stats.stackexchange.com/questions/21572/how-to-plot-decision-boundary-of-a-k-nearest-neighbor-classifier-from-elements-o/21602#21602 2 | 3 | library(ElemStatLearn) 4 | require(class) 5 | x <- mixture.example$x 6 | g <- mixture.example$y 7 | xnew <- mixture.example$xnew 8 | mod15 <- knn(x, xnew, g, k=15, prob=TRUE) 9 | prob <- attr(mod15, "prob") 10 | prob <- ifelse(mod15=="1", prob, 1-prob) 11 | px1 <- mixture.example$px1 12 | px2 <- mixture.example$px2 13 | prob15 <- matrix(prob, length(px1), length(px2)) 14 | par(mar=rep(2,4)) 15 | contour(px1, px2, prob15, levels=0.5, labels="", xlab="", ylab="", main= 16 | "15-nearest neighbour", axes=FALSE) 17 | points(x, col=ifelse(g==1, "coral", "cornflowerblue")) 18 | gd <- expand.grid(x=px1, y=px2) 19 | points(gd, pch=".", cex=1.2, col=ifelse(prob15>0.5, "coral", "cornflowerblue")) 20 | box() 21 | 22 | 23 | -------------------------------------------------------------------------------- /Chapter10/c10_30_ltfat_example.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c10_30_ltfat_example.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | #} 11 | 12 | 13 | pkg load ltfat 14 | f = greasy; 15 | name1="sparsified coefficients" 16 | name2="dual system coefficients" 17 | F = frame('dgtreal','gauss',64,512); 18 | lambda = 0.1; 19 | % Solve the basis pursuit problem 20 | [c,~,~,frec,cd] = franalasso(F,f,lambda); 21 | figure(1); % Plot sparse coefficients 22 | plotframe(F,c,’dynrange’,50); 23 | figure(2); % Plot coefficients 24 | plotframe(F,cd,’dynrange’,50); 25 | norm(f-frec) 26 | figure(3); 27 | semilogx([sort(abs(c),'descend')/max(abs(c)),... 28 | sort(abs(cd),’descend’)/max(abs(cd))]); 29 | legend({name1,name2}); 30 | -------------------------------------------------------------------------------- /Chapter12/c12_12_pi_01.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import sympy 3 | # 4 | def plot_one_digit_freqs(f1): 5 | """ 6 | Plot one digit frequency counts using matplotlib. 7 | """ 8 | ax = plt.plot(f1,'bo-') 9 | plt.title('Single digit counts in pi') 10 | plt.xlabel('Digit') 11 | plt.ylabel('Count') 12 | return ax 13 | # 14 | def one_digit_freqs(digits, normalize=False): 15 | """ 16 | Consume digits of pi and compute 1 digit freq. counts. 17 | """ 18 | freqs = np.zeros(10, dtype='i4') 19 | for d in digits: 20 | freqs[int(d)] += 1 21 | if normalize: 22 | freqs = freqs/freqs.sum() 23 | return freqs 24 | # 25 | pi = sympy.pi.evalf(40) 26 | pi 27 | pi = sympy.pi.evalf(10000) 28 | digits = (d for d in str(pi)[2:]) # create a sequence of digits 29 | freqs = one_digit_freqs(digits) 30 | plot_one_digit_freqs(freqs) 31 | plt.show() 32 | -------------------------------------------------------------------------------- /Chapter02/c3_18_missing_code_apropos.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_18_missing_code_apropos.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | > apropos("^na") 12 | [1] "na.action" "na.contiguous" "na.exclude" 13 | [4] "na.fail" "na.omit" "na.pass" 14 | [7] "na_example" "names" "names.POSIXlt" 15 | [10] "names<-" "names<-.POSIXlt" "namespaceExport" 16 | [13] "namespaceImport" "namespaceImportClasses" "namespaceImportFrom" 17 | [16] "namespaceImportMethods" "napredict" "naprint" 18 | [19] "naresid" "nargs" 19 | -------------------------------------------------------------------------------- /Chapter03/c3_18_missing_code_apropos.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_18_missing_code_apropos.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | > apropos("^na") 12 | [1] "na.action" "na.contiguous" "na.exclude" 13 | [4] "na.fail" "na.omit" "na.pass" 14 | [7] "na_example" "names" "names.POSIXlt" 15 | [10] "names<-" "names<-.POSIXlt" "namespaceExport" 16 | [13] "namespaceImport" "namespaceImportClasses" "namespaceImportFrom" 17 | [16] "namespaceImportMethods" "napredict" "naprint" 18 | [19] "naresid" "nargs" 19 | -------------------------------------------------------------------------------- /Chapter02/c3_08_merge_datasets.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_08_merge_datasets.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | set.seed(123) 12 | 13 | nStocks<-4 14 | nPeriods<-24 15 | 16 | x<-runif(nStocks*nPeriods,min=-0.1,max=0.20) 17 | a<-matrix(x,nPeriods,nStocks) 18 | 19 | d1<-as.Date("2000-01-01") 20 | d2<-as.Date("2001-12-01") 21 | 22 | dd<-seq(d1,d2,"months") 23 | stocks<-data.frame(dd,a) 24 | colnames(stocks)<-c("DATE",paste('stock',1:nStocks,sep='')) 25 | 26 | 27 | d3<-as.Date("1999-01-01") 28 | d4<-as.Date("2010-12-01") 29 | dd2<-seq(d3,d4,"months") 30 | 31 | y<-runif(length(dd2),min=-0.05,max=0.1) 32 | market<-data.frame(dd2,y) 33 | colnames(market)<-c("DATE","MKT") 34 | 35 | 36 | final<-merge(stocks,market) -------------------------------------------------------------------------------- /Chapter03/c3_08_merge_datasets.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_08_merge_datasets.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | set.seed(123) 12 | 13 | nStocks<-4 14 | nPeriods<-24 15 | 16 | x<-runif(nStocks*nPeriods,min=-0.1,max=0.20) 17 | a<-matrix(x,nPeriods,nStocks) 18 | 19 | d1<-as.Date("2000-01-01") 20 | d2<-as.Date("2001-12-01") 21 | 22 | dd<-seq(d1,d2,"months") 23 | stocks<-data.frame(dd,a) 24 | colnames(stocks)<-c("DATE",paste('stock',1:nStocks,sep='')) 25 | 26 | 27 | d3<-as.Date("1999-01-01") 28 | d4<-as.Date("2010-12-01") 29 | dd2<-seq(d3,d4,"months") 30 | 31 | y<-runif(length(dd2),min=-0.05,max=0.1) 32 | market<-data.frame(dd2,y) 33 | colnames(market)<-c("DATE","MKT") 34 | 35 | 36 | final<-merge(stocks,market) -------------------------------------------------------------------------------- /Chapter09/c9_05_NYTime_01.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c9_05_NYTime_01.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(RTextTools) 12 | data(NYTimes) 13 | set.seed(123) 14 | data <- NYTimes[sample(1:3100,size=100,replace=FALSE),] 15 | head(data) 16 | 17 | 18 | 19 | matrix <- create_matrix(cbind(data["Title"],data["Subject"]), language="english", 20 | removeNumbers=TRUE, stemWords=FALSE, weighting=tm::weightTfIdf) 21 | container <- create_container(matrix,data$Topic.Code,trainSize=1:75, testSize=76:100, 22 | virgin=FALSE) 23 | models <- train_models(container, algorithms=c("MAXENT","SVM")) 24 | results <- classify_models(container, models) 25 | score_summary <- create_scoreSummary(container, results) 26 | 27 | 28 | -------------------------------------------------------------------------------- /Chapter09/c9_18_others_2.R: -------------------------------------------------------------------------------- 1 | # https://stats.stackexchange.com/questions/21572/how-to-plot-decision-boundary-of-a-k-nearest-neighbor-classifier-from-elements-o/21602#21602 2 | 3 | library(ElemStatLearn) 4 | require(class) 5 | x <- mixture.example$x 6 | y <- mixture.example$y 7 | xnew <- mixture.example$xnew 8 | px1 <- mixture.example$px1 9 | px2 <- mixture.example$px2 10 | # 11 | color1<-"blue" 12 | color2<-"pink3" 13 | kNearest<-5 14 | model<- knn(x, xnew,y,k=kNearest,prob=TRUE) 15 | title<-paste(kNearest,"-nearest neighbour") 16 | prob <- attr(model,"prob") 17 | prob <- ifelse(model=="1",prob,1-prob) 18 | prob15 <- matrix(prob,length(px1),length(px2)) 19 | par(mar=rep(2,4)) 20 | contour(px1,px2,prob15,levels=0.5,main=title,axes=FALSE) 21 | points(x, col=ifelse(g==1,color1,color2)) 22 | gd <- expand.grid(x=px1, y=px2) 23 | points(gd,pch=".",cex=1.5,col=ifelse(prob15>0.5,color1,color2)) 24 | box() 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /Chapter08/c8_02_cluster.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c8_02_cluster.R 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 3/25/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | ## generate 500 objects, divided into 2 clusters. 13 | 14 | 15 | library(cluster) 16 | set.seed(123) 17 | n1<-200; mean1<-0; std1<-8 18 | n2<-300; mean2<-80; std2<-8 19 | set1<-cbind(rnorm(n1,mean1,std1), rnorm(n1,mean1,std1)) 20 | set2<-cbind(rnorm(n2,mean2,std2), rnorm(n2,mean2,std2)) 21 | x <- rbind(set1,set2) 22 | # 23 | data <- clara(x, 2, samples=50) 24 | plot(data) 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | data <- clara(x, 2, samples=50) 33 | #data 34 | #data$clusinfo 35 | ## using pamLike=TRUE gives the same (apart from the 'call'): 36 | all.equal(data[-8],data(x, 2, samples=50, pamLike = TRUE)[-8]) 37 | plot(data) 38 | -------------------------------------------------------------------------------- /Chapter04/c4_25_bisectionMethod_html.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_25_bisection_html.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(animation) 12 | a<- c(4, 4, 1, 2) 13 | part1<-"The bisection method is a root-finding algorithm" 14 | part2<-"which works by repeatedly dividing an interval in half and then" 15 | part3<-"selecting the subinterval in which a root exists." 16 | des<-c(part1,part2,part3) 17 | titleName<-"The Bisection Method for Root-finding on an Interval" 18 | # 19 | saveHTML({ 20 | par(mar = a) 21 | bisection.method(main = "") 22 | },img.name = "bisection.method", htmlfile = "bisection.method.html", 23 | ani.height = 400, ani.width = 600, interval = 1, 24 | title = titleName, description =des) 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /Chapter09/c9_42_ff3factorDaily.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Name : c9_42_ff3factorDaily.py 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/16/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | """ 11 | 12 | import scipy as sp 13 | import pandas as pd 14 | import quandl as qd 15 | import statsmodels.api as sm 16 | #quandl.ApiConfig.api_key = 'YOUR_API_KEY' 17 | a=qd.get("WIKI/IBM") 18 | p=a['Adj. Close'] 19 | n=len(p) 20 | ret=[] 21 | # 22 | for i in range(n-1): 23 | ret.append(p[i+1]/p[i]-1) 24 | # 25 | c=pd.DataFrame(ret,a.index[1:n],columns=['RET']) 26 | ff=pd.read_pickle('c:/temp/ffDaily.pkl') 27 | final=pd.merge(c,ff,left_index=True,right_index=True) 28 | y=final['RET'] 29 | x=final[['MKT_RF','SMB','HML']] 30 | #x=final[['MKT_RF']] 31 | x=sm.add_constant(x) 32 | results=sm.OLS(y,x).fit() 33 | print(results.summary()) 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /Chapter04/c4_14_time_value_of_money.py: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_14_time_value_of_money.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | import matplotlib.pyplot as plt 12 | # 13 | fig = plt.figure(facecolor='white') 14 | dd = plt.axes(frameon=False) 15 | dd.set_frame_on(False) 16 | dd.get_xaxis().tick_bottom() 17 | dd.axes.get_yaxis().set_visible(False) 18 | x=range(0,11,2) 19 | x1=range(len(x),0,-1) 20 | y = [0]*len(x); 21 | plt.annotate("$100 received today",xy=(0,0),xytext=(2,0.15),arrowprops=dict(facecolor='black',shrink=2)) 22 | plt.annotate("$100 received in 2 years",xy=(2,0),xytext=(3.5,0.10),arrowprops=dict(facecolor='black',shrink=2)) 23 | s = [50*2.5**n for n in x1]; 24 | plt.title("Time value of money ") 25 | plt.xlabel("Time (number of years)") 26 | plt.scatter(x,y,s=s); 27 | plt.show() -------------------------------------------------------------------------------- /Chapter09/c9_16_generate_titanicRData.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c9_16_generate_titanicRData.R 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/6/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | " 11 | 12 | x<-read.csv("c:/temp/titanic.csv") 13 | 14 | > head(x) 15 | CLASS AGE GENDER SURVIVED 16 | 1 First Adult Male Yes 17 | 2 First Adult Male Yes 18 | 3 First Adult Male Yes 19 | 4 First Adult Male Yes 20 | 5 First Adult Male Yes 21 | 6 First Adult Male Yes 22 | 23 | 24 | 25 | .titanic<-x 26 | saveRDS(.titanic,file="c:/temp/titanic.rds") 27 | 28 | save(.titanic,file="c:/temp/titanic.RData") 29 | 30 | 31 | path<-"http://canisius.edu/~yany/RData/" 32 | dataSet<-"titanic" 33 | link<-paste(path,dataSet,".RData",sep='') 34 | con<-url(link) 35 | load(con) 36 | dim(.titanic) 37 | head(.titanic) 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /Chapter12/c12_06_plyr_example.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c12_06_plyr_example.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/14/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | library(plyr) 13 | d1<-c(rep('A', 8), rep('B', 15), rep('C', 6)) 14 | d2<-sample(c("M", "F"), size = 29, replace = TRUE) 15 | d3<-runif(n = 29, min = 18, max = 54) 16 | # 17 | dfx <- data.frame(group =d1,sex=d2,age=d3) 18 | # 19 | ddply(dfx, .(group, sex), summarize, 20 | mean = round(mean(age), 2), 21 | sd = round(sd(age), 2)) 22 | # 23 | ddply(baseball[1:100,], ~ year, nrow) 24 | ddply(baseball, .(lg), c("nrow", "ncol")) 25 | rbi<-ddply(baseball, .(year), summarise, 26 | mean_rbi=mean(rbi, na.rm = TRUE)) 27 | plot(mean_rbi~year,type="l",data = rbi) 28 | base2<-ddply(baseball,.(id),mutate, 29 | career_year = year - min(year) + 1 30 | ) 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /Chapter10/c10_20_grangerTest_IBM_sp500.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_20_grangerTest_IBM_sp500.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | ret_f<-function(x,ticker=""){ 13 | n<-nrow(x) 14 | p<-x[,6] 15 | ret<-p[2:n]/p[1:(n-1)]-1 16 | output<-data.frame(x[2:n,1],ret) 17 | name<-paste("RET_",toupper(ticker),sep='') 18 | colnames(output)<-c("DATE",name) 19 | return(output) 20 | } 21 | x<-read.csv("http://canisius.edu/~yany/data/ibmDaily.csv",header=T) 22 | ibmRet<-ret_f(x,"ibm") 23 | x<-read.csv("http://canisius.edu/~yany/data/^gspcDaily.csv",header=T) 24 | mktRet<-ret_f(x,"mkt") 25 | final<-merge(ibmRet,mktRet) 26 | 27 | 28 | library(lmtest) 29 | grangertest(RET_IBM ~ RET_MKT, order = 1, data =final) 30 | 31 | grangertest(RET_MKT ~ RET_IBM, order = 1, data =final) 32 | 33 | -------------------------------------------------------------------------------- /Chapter02/c3_13_merge_02_stock.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Name : c3_13_merge_02_stock.py 4 | Book : Hands-on Data Science with Anaconda) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 1/15/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | """ 11 | 12 | import pandas as pd 13 | import scipy as sp 14 | x= pd.DataFrame({'YEAR': [2010,2011, 2012, 2013], 15 | 'FirmA': [0.2, -0.3, 0.13, -0.2], 16 | 'FirmB': [0.1, 0, 0.05, 0.23]}) 17 | y = pd.DataFrame({'YEAR': [2011,2013,2014, 2015], 18 | 'FirmC': [0.12, 0.23, 0.11, -0.1], 19 | 'SP500': [0.1,0.17, -0.05, 0.13]}) 20 | # 21 | print("\n inner merge ") 22 | print(pd.merge(x,y, on='YEAR')) 23 | print(" \n outer merge ") 24 | print(pd.merge(x,y, on='YEAR',how='outer')) 25 | print("\n left merge ") 26 | print(pd.merge(x,y, on='YEAR',how='left')) 27 | print("\n right merge ") 28 | print(pd.merge(x,y, on='YEAR',how='right')) 29 | -------------------------------------------------------------------------------- /Chapter03/c3_13_merge_02_stock.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Name : c3_13_merge_02_stock.py 4 | Book : Hands-on Data Science with Anaconda) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 1/15/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | """ 11 | 12 | import pandas as pd 13 | import scipy as sp 14 | x= pd.DataFrame({'YEAR': [2010,2011, 2012, 2013], 15 | 'FirmA': [0.2, -0.3, 0.13, -0.2], 16 | 'FirmB': [0.1, 0, 0.05, 0.23]}) 17 | y = pd.DataFrame({'YEAR': [2011,2013,2014, 2015], 18 | 'FirmC': [0.12, 0.23, 0.11, -0.1], 19 | 'SP500': [0.1,0.17, -0.05, 0.13]}) 20 | # 21 | print("\n inner merge ") 22 | print(pd.merge(x,y, on='YEAR')) 23 | print(" \n outer merge ") 24 | print(pd.merge(x,y, on='YEAR',how='outer')) 25 | print("\n left merge ") 26 | print(pd.merge(x,y, on='YEAR',how='left')) 27 | print("\n right merge ") 28 | print(pd.merge(x,y, on='YEAR',how='right')) 29 | -------------------------------------------------------------------------------- /Chapter09/c9_33_bird_Kmeans.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c9_33_birk_kMeans.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | #} 10 | 11 | 12 | A = double(imread('bird_small.png')); 13 | A = A / 255; % Divide by 255, values in the range 0 - 1 14 | imgSize = size(A); 15 | X = reshape(A, imgSize(1) * imgSize(2), 3); 16 | k = 16; % using 4-bit (16) colors,minimize cost 17 | [Centroids,idx,cost]=generateKMeansClustersMinCost(X,k,10,10); 18 | fprintf('Cost/Distortion of computed clusters:%.3f\n', cost); 19 | % regenerate colors & image 20 | NewX = Centroids(idx, :); 21 | NewA = reshape(NewX, imgSize(1), imgSize(2), 3); 22 | % compare both the images 23 | fprintf('Comparing original & compressed images\n'); 24 | subplot(1, 2, 1); 25 | imagesc(A); 26 | axis("square"); 27 | title('Original'); 28 | subplot(1, 2, 2); 29 | imagesc(NewA); 30 | axis("square"); 31 | title('Compressed'); -------------------------------------------------------------------------------- /Chapter09/c9_14_iris_predicted_vs_trueOne.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Name : c9_14_iris_prediceted_vs_truOne.py 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 5/26/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | """ 11 | import numpy as np 12 | from sklearn import datasets 13 | from sklearn.neighbors import KNeighborsClassifier as KNC 14 | iris = datasets.load_iris() 15 | x= iris.data 16 | y= iris.target 17 | np.unique(y) 18 | np.random.seed(123) 19 | indices = np.random.permutation(len(x)) 20 | iris_x_train = x[indices[:-10]] 21 | iris_y_train = y[indices[:-10]] 22 | iris_x_test = x[indices[-10:]] 23 | iris_y_test = y[indices[-10:]] 24 | knn = KNC() 25 | knn.fit(iris_x_train, iris_y_train) 26 | KNC(algorithm='auto',leaf_size=30, metric='minkowski', 27 | metric_params=None,n_jobs=1,n_neighbors=5, p=2,weights='uniform') 28 | out=knn.predict(iris_x_test) 29 | print("predicted:",out) 30 | print("True :",iris_y_test) -------------------------------------------------------------------------------- /Chapter09/c9_04_simplist_One_tree_tinatic.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c9_04_simplist_tree_titanic.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | library(rpart, quietly=TRUE) 13 | con<-url("http://canisius.edu/~yany/RData/titanic.RData") 14 | load(con) 15 | x<-.titanic 16 | scoring <- FALSE 17 | set.seed(42) 18 | risk<-ident<-ignore<-weights<-numeric<-NULL 19 | str(dataset) 20 | n<- nrow(dataset) 21 | train <- sample <- sample(n,0.7*n) 22 | validate<- sample(setdiff(seq_len(n),train),0.15*n) 23 | test<- setdiff(setdiff(seq_len(n), train), validate) 24 | inputVars<-categoric<-c("CLASS","AGE","GENDER") 25 | target<-"SURVIVED" 26 | output<-rpart(SURVIVED~.,data=x[train, c(inputVars, target)], 27 | method="class",parms=list(split="information"),control= 28 | rpart.control(usesurrogate=0,maxsurrogate=0)) 29 | fancyRpartPlot(output, main="Decision Tree for Titanic") 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /Chapter05/c5_27_CAPM.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c5_27_CAPM.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/30/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | Note #1: go to http://finance.yahoo.com to dowload the last 5-year monthly 11 | data for IBM and S&P500. The ticker for S&P500 is ^GSPC 12 | #2: delete the first observation which contains 'null' 13 | #3: since readtable is deprecated, see c5_31_CAPM.jl which 14 | uses the CSV.read() function 15 | ### 16 | 17 | 18 | using DataFrames; 19 | using GLM, StatsModels 20 | # 21 | x = readtable("c:/temp/ibmMonthly5years.csv") 22 | p=x[:Adj_Close] 23 | n=length(p) 24 | stockRet=p[2:n]./p[1:(n-1)]-1 25 | # 26 | y = readtable("c:/temp/sp500Monthly5years.csv") 27 | p2=y[:Adj_Close] 28 | n2=length(p2) 29 | mktRet=p2[2:n2]./p2[1:(n2-1)]-1 30 | # 31 | n3=min(length(stockRet),length(mktRet)) 32 | data = DataFrame(X=mktRet[1:n3], Y=stockRet[1:n3]) 33 | OLS = glm(@formula(Y ~ X), data, Normal(), IdentityLink()) -------------------------------------------------------------------------------- /Chapter05/c5_31_CAPM.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c5_31_CAPM.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/30/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | Note #1: go to http://finance.yahoo.com to dowload the last 5-year monthly 11 | data for IBM and S&P500. The ticker for S&P500 is ^GSPC 12 | #2: delete the first observation which contains 'null' 13 | #3: check if the last line is a blank line. If it is, delete it. 14 | ### 15 | 16 | using DataFrames, CSV 17 | using GLM, StatsModels 18 | # 19 | function f(x) 20 | p=x[Symbol("Adj Close")] 21 | n=length(p) 22 | a=p[2:n]./p[1:(n-1)]-1.0 23 | return convert(Array{Float64,1}, a) 24 | end 25 | # 26 | x = CSV.read("c:/temp/ibmMonthly5years.txt") 27 | stockRet=f(x) 28 | y = CSV.read("c:/temp/sp500Monthly5years.txt") 29 | mktRet=f(y) 30 | n3=min(length(stockRet),length(mktRet)) 31 | data = DataFrame(X=mktRet[1:n3], Y=stockRet[1:n3]) 32 | OLS = glm(@formula(Y ~ X), data, Normal(), IdentityLink()) 33 | 34 | 35 | -------------------------------------------------------------------------------- /Chapter09/c9_26_test.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c9_26_test.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | #} 10 | 11 | function initEnv 12 | % load packages 13 | pkg load specfun; 14 | % set-path for octavelib 15 | addpath([pwd() 'c:/temp/']); 16 | % clear 17 | clear ; close all; clc; 18 | end 19 | 20 | 21 | 22 | %init 23 | initEnv(); 24 | % Loads the dataset - adds X, Xval, yval to environment 25 | load('c:/temp/anomaly.dat'); 26 | 27 | %print(X); 28 | 29 | % compute gaussian parameters 30 | %[mu sigma2] = computeGaussianParams(X); 31 | 32 | % compute epsilon/threshold for probablity using validation-set 33 | %epsilon = computeThresholdForMultivarGaussian(Xval, yval, mu, sigma2); 34 | 35 | % compute multivariate Guassian distribution probablity 36 | %probability = computeMultivarGaussianDistribution(X, mu, sigma2); 37 | 38 | % count of outliers 39 | %fprintf("Number of outliers found: %d\n", sum(probability < epsilon)); -------------------------------------------------------------------------------- /Chapter05/c5_05_get_sp500Daily.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c5_05_get_sp500daily.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | Objective: Get daily ^GSPC data from Yan's webpage 11 | 12 | Date Open High Low Close Adj Close Volume 13 | 0 1950-01-03 16.66 16.66 16.66 16.66 16.66 1260000 14 | 1 1950-01-04 16.85 16.85 16.85 16.85 16.85 1890000 15 | Date Open High Low Close \ 16 | 17117 2018-01-11 2752.969971 2767.560059 2752.780029 2767.560059 17 | 17118 2018-01-12 2770.179932 2787.850098 2769.639893 2786.239990 18 | 19 | Adj Close Volume 20 | 17117 2767.560059 3641320000 21 | 17118 2786.239990 3573970000 22 | 23 | """ 24 | import pandas as pd 25 | 26 | inFile="http://canisius.edu/~yany/data/^gspcDaily.csv" 27 | d=pd.read_csv(inFile) 28 | print(d.head(2)) 29 | print(d.tail(2)) 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /Chapter06/c6_19.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c6_19.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | 12 | Pkg.add("Gadfly") 13 | using Gadfly 14 | draw(SVG("output.svg", 6inch, 3inch), plot([sin, cos], 0, 25)) 15 | 16 | 17 | function module_functions(modname) 18 | list = Symbol[] 19 | for nm in names(modname) 20 | typeof(eval(nm)) == Function && push!(list,nm) 21 | end 22 | return list 23 | end 24 | 25 | 26 | using PyPlot 27 | module_functions(PyPlot) 28 | 29 | 30 | using MTH229 31 | f(x) = exp(-2*x^2) 32 | plot(f, -3, 3) 33 | 34 | 35 | using QuantEcon: meshgrid 36 | n = 50 37 | x = linspace(-3, 3, n) 38 | y = x 39 | z = Array{Float64}(n, n) 40 | f(x, y) = cos(x^2 + y^2) / (1 + x^2 + y^2) 41 | for i in 1:n 42 | for j in 1:n 43 | z[j, i] = f(x[i], y[j]) 44 | end 45 | end 46 | xgrid, ygrid = meshgrid(x, y) 47 | surf(xgrid, ygrid, z', cmap=ColorMap("jet"), alpha=0.7) 48 | zlim(-0.5, 1.0) 49 | 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Chapter10/c10_17_sp500_annual_return_nextYear.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_17_sp500_annual_return_nextYear.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | library(data.table) 13 | path<-'http://canisius.edu/~yany/RData/' 14 | dataSet<-'sp500monthly.RData' 15 | link<-paste(path,dataSet,sep='') 16 | load(url(link)) 17 | #head(.sp500monthly,2) 18 | p<-.sp500monthly$ADJ.CLOSE 19 | n<-length(p) 20 | logRet<-log(p[2:n]/p[1:(n-1)]) 21 | years<-format(.sp500monthly$DATE[2:n],"%Y") 22 | y<-data.frame(.sp500monthly$DATE[2:n],years,logRet) 23 | colnames(y)<-c("DATE","YEAR","LOGRET") 24 | y2<- data.table(y) 25 | z<-y2[,sum(LOGRET),by=YEAR] 26 | z2<-na.omit(z) 27 | annualRet<-data.frame(z2$YEAR,exp(z2[,2])-1) 28 | n<-nrow(annualRet) 29 | std<-sd(annualRet[,2]) 30 | stdErr<-std/sqrt(n) 31 | ourMean<-mean(annualRet[,2]) 32 | min2<-ourMean-2*stdErr 33 | max2<-ourMean+2*stdErr 34 | cat("[min mean max ]\n") 35 | cat(min2,ourMean,max2,"\n") 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /Chapter07/c7_21_JuMp02.jl: -------------------------------------------------------------------------------- 1 | ### 2 | Name : c7_21_JuMP02.jl 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 3/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | ### 10 | 11 | 12 | 13 | using JuMP 14 | using Gurobi 15 | #master = Model() 16 | #master = Model(solver = GLPKSolverLP(method=:Exact)) 17 | master = Model(solver =GurobiSolver()) 18 | @variable(master, x[1:2] >= 0) 19 | w=[14 31 36 45] 20 | A=[1 0; 1 0; 0 2; 1 0] 21 | b=[211; 395; 610; 97] 22 | @defConstrRef myCons[1:4] 23 | for i=1:4 24 | myCons[i] = @addConstraint(master, dot(x, vec(A[i,:]))>=b[i]) 25 | end 26 | @setObjective(master, Min, sum(x)) 27 | master 28 | status=solve(master) 29 | getValue(x) 30 | println("\nOptimal Solution is:\n")s 31 | println("width: ", w) 32 | epsilon=1e-6 33 | for i=1:size(A,2) 34 | if getValue(x[i])>epsilon 35 | println("Cutting Pattern: ", A[:,i], ", Number of Paper Rolls Cut Using this Pattern: ", getValue(x[i])) 36 | end 37 | end 38 | 39 | 40 | 41 | 42 | # http://www.juliaopt.org/notebooks/Chiwei%20Yan%20-%20Cutting%20Stock.html -------------------------------------------------------------------------------- /Chapter11/c11_02_myfincal.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c11_02_myfincal.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 5/8/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | def pv_f(r,n,fv): 12 | """ 13 | Objective: estimate present value 14 | r : period rate 15 | n : number of periods 16 | fv : future value 17 | fv 18 | formula used : pv = -------- 19 | (1+r)**n 20 | Example 1: >>>pv_f(0.1,1,100) # meanings of input variables 21 | 90.9090909090909 # based on their input order 22 | 23 | Example #2 >>>pv_f(r=0.1,fv=100,n=1) # meanings based on keywords 24 | 90.9090909090909 25 | """ 26 | return fv/(1+r)**n 27 | # 28 | def pvGrowingPerpetuity(c,r,q): 29 | return(c/(r-q)) 30 | # 31 | def fv_f(pv,r,n): 32 | return pv*(1+r)**n 33 | def fvAnnuity(r,n,c): 34 | return c/r*((1+r)**n-1) 35 | # 36 | def fvAnnuityDue(r,n,c): 37 | return c/r*((1+r)**n-1)*(1+r) 38 | 39 | 40 | -------------------------------------------------------------------------------- /Chapter05/c5_16_ibm_beta.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c5_16_ibm_beta.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | """ 10 | 11 | import quandl as qd 12 | import pandas as pd 13 | from scipy import stats 14 | x=qd.get("WIKI/ibm",collapse='monthly') 15 | # 16 | p=x[['Adj. Close']] 17 | ret=p.diff()/p.shift(1) 18 | stockRet=ret.dropna() 19 | stockRet.columns=['stockRet'] 20 | #stockRet.assign(yyyymm=stockRet.index.strftime("%Y%m")) 21 | # 22 | inFile="http://canisius.edu/~yany/data/sp500monthlyEndOfMonthDate.csv" 23 | y=pd.read_csv(inFile,index_col=0) 24 | d=y[['Adj.Close']] 25 | ret2=d.diff()/d.shift(1) 26 | mktRet=ret2.dropna() 27 | mktRet.columns=['mktRet'] 28 | df= stockRet.merge(mktRet, how='inner', left_index=True, right_index=True) 29 | (beta,alpha,r_value,p_value,std_err)=stats.linregress(df.stockRet,df.mktRet) 30 | alpha=round(alpha,8) 31 | beta=round(beta,3) 32 | r_value=round(r_value,3) 33 | p_vaue=round(p_value,3) 34 | print("alpha, beta, R2 and P-value") 35 | print(alpha,beta,r_value,p_value) 36 | -------------------------------------------------------------------------------- /Chapter02/c3_05_saveRDS.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_05_saveRDS.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | 1. sepal length in cm 11 | 2. sepal width in cm 12 | 3. petal length in cm 13 | 4. petal width in cm 14 | 5. class: 15 | -- Iris Setosa 16 | -- Iris Versicolour 17 | -- Iris Virginica 18 | " 19 | 20 | inFile<-"http://canisius.edu/~yany/data/ff3monthly.csv" 21 | ff3monthly<-read.csv(inFile) 22 | saveRDS(ff3monthly,file="c:/temp/ff3monthly.rds") 23 | 24 | 25 | 26 | > abc<-readRDS("c:/temp/ff3monthly.rds") 27 | > head(abc,3) 28 | DATE MKT_RF SMB HML RF 29 | 1 1926-07-01 0.0296 -0.0230 -0.0287 0.0022 30 | 2 1926-08-01 0.0264 -0.0140 0.0419 0.0025 31 | 3 1926-09-01 0.0036 -0.0132 0.0001 0.0023 32 | > head(ff3monthly,3) 33 | DATE MKT_RF SMB HML RF 34 | 1 1926-07-01 0.0296 -0.0230 -0.0287 0.0022 35 | 2 1926-08-01 0.0264 -0.0140 0.0419 0.0025 36 | 3 1926-09-01 0.0036 -0.0132 0.0001 0.0023 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /Chapter03/c3_05_saveRDS.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c3_05_saveRDS.R 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | 1. sepal length in cm 11 | 2. sepal width in cm 12 | 3. petal length in cm 13 | 4. petal width in cm 14 | 5. class: 15 | -- Iris Setosa 16 | -- Iris Versicolour 17 | -- Iris Virginica 18 | " 19 | 20 | inFile<-"http://canisius.edu/~yany/data/ff3monthly.csv" 21 | ff3monthly<-read.csv(inFile) 22 | saveRDS(ff3monthly,file="c:/temp/ff3monthly.rds") 23 | 24 | 25 | 26 | > abc<-readRDS("c:/temp/ff3monthly.rds") 27 | > head(abc,3) 28 | DATE MKT_RF SMB HML RF 29 | 1 1926-07-01 0.0296 -0.0230 -0.0287 0.0022 30 | 2 1926-08-01 0.0264 -0.0140 0.0419 0.0025 31 | 3 1926-09-01 0.0036 -0.0132 0.0001 0.0023 32 | > head(ff3monthly,3) 33 | DATE MKT_RF SMB HML RF 34 | 1 1926-07-01 0.0296 -0.0230 -0.0287 0.0022 35 | 2 1926-08-01 0.0264 -0.0140 0.0419 0.0025 36 | 3 1926-09-01 0.0036 -0.0132 0.0001 0.0023 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /Chapter10/c10_12_datarobot_not_working.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_12_datarobot_not_working.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(datarobot) 12 | 13 | " 14 | Did not connect to DataRobot on package startup. Use `ConnectToDataRobot`. 15 | To connect by default on startup, you can put a config file at: C:\Users\yany\Documents/.config/datarobot/drconfig.yaml 16 | 17 | 18 | https://cran.r-project.org/web/packages/datarobot/index.html 19 | 20 | loc<- "YOUR-ENDPOINT-HERE" 21 | myToken<-"YOUR-API_TOKEN-HERE" 22 | onnectToDataRobot(endpoint=loc,token=myToken) 23 | 24 | ConnectToDataRobot(endpoint = "YOUR-ENDPOINT-HERE", token = "YOUR-API_TOKEN-HERE") 25 | 26 | https://app.datarobot.com/api/v2 27 | 28 | loc1<- "https://app.datarobot.com/api/v2" 29 | loc2<-"C:/Users/yany/.config/datarobot/drconfig.yaml" 30 | ConnectToDataRobot(endpoint =loc1, token = loc2) 31 | 32 | 33 | https://cran.r-project.org/web/packages/datarobot/vignettes/IntroductionToDataRobot.html 34 | 35 | " 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /Chapter04/c4_12_generate_Black_Scholes_formula.py: -------------------------------------------------------------------------------- 1 | " 2 | Name : c4_12_generate_Black_Scholes_formula.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | import matplotlib 12 | import numpy as np 13 | import matplotlib.mathtext as mathtext 14 | import matplotlib.pyplot as plt 15 | matplotlib.rc('image', origin='upper') 16 | parser = mathtext.MathTextParser("Bitmap") 17 | #parser = mathtext.MathTextParser("Black") 18 | # 19 | x1=r'$d_2=\frac{ln(S_0/K)+(r-\sigma^2/2)T}{\sigma\sqrt{T}}=d_1-\sigma\sqrt{T}$' 20 | x2=r'$d_1=\frac{ln(S_0/K)+(r+\sigma^2/2)T}{\sigma\sqrt{T}}$' 21 | x3=r' $c=S_0N(d_1)- Ke^{-rT}N(d_2)$' 22 | rgba1, depth1 = parser.to_rgba(x1, color='blue',fontsize=12, dpi=200) 23 | rgba2, depth2 = parser.to_rgba(x2, color='blue', fontsize=12, dpi=200) 24 | rgba3, depth3 = parser.to_rgba(x3, color='red',fontsize=14, dpi=200) 25 | fig = plt.figure() 26 | fig.figimage(rgba1.astype(float)/255., 100, 100) 27 | fig.figimage(rgba2.astype(float)/255., 100, 200) 28 | fig.figimage(rgba3.astype(float)/255., 100, 300) 29 | plt.show() 30 | -------------------------------------------------------------------------------- /Chapter09/c9_27_bird.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c9_27_bird.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | #} 10 | 11 | #pkg install optiminterp-0.3.4.tar.gz 12 | pkg load optiminterp 13 | A = double(imread('c:/temp/bird_small.png')); 14 | A = A / 255; % Divide by 255 so that all values are in the range 0 - 1 15 | imgSize = size(A); 16 | X = reshape(A, imgSize(1) * imgSize(2), 3); 17 | 18 | % compress image using 4-bit (16) colors & minimize cost 19 | k = 16; 20 | [Centroids, idx, cost] = generateKMeansClustersMinCost(X, k, 10, 10); 21 | fprintf('Cost/Distortion of computed clusters: %.3f\n', cost); 22 | fprintf('Program paused. Press enter to continue.\n'); 23 | pause; 24 | 25 | % regenerate colors & image 26 | NewX = Centroids(idx, :); 27 | NewA = reshape(NewX, imgSize(1), imgSize(2), 3); 28 | 29 | % compare both the images 30 | fprintf('Comparing original and compressed images...\n'); 31 | subplot(1, 2, 1); 32 | imagesc(A); 33 | axis("square"); 34 | title('Original'); 35 | subplot(1, 2, 2); 36 | imagesc(NewA); 37 | axis("square"); 38 | title('Compressed'); -------------------------------------------------------------------------------- /Chapter05/c5_06_get_sp500monthly.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c5_06_get_sp500monthly.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | Objective: Get monthly S&P500 (^GSPC) data from Yan's webpage 11 | 12 | Date Open High Low Close Adj Close Volume 13 | 0 1950-01-01 16.660000 17.09 16.65 17.049999 17.049999 42570000 14 | 1 1950-02-01 17.049999 17.35 16.99 17.219999 17.219999 33430000 15 | Date Open High Low Close \ 16 | 815 2017-12-01 2645.100098 2694.969971 2605.520020 2673.610107 17 | 816 2018-01-01 2683.729980 2787.850098 2682.360107 2786.239990 18 | 817 2018-01-12 2770.179932 2787.850098 2769.639893 2786.239990 19 | 20 | Adj Close Volume 21 | 815 2673.610107 65251190000 22 | 816 2786.239990 27862080000 23 | 817 2786.239990 2129410147 24 | """ 25 | import pandas as pd 26 | 27 | inFile="http://canisius.edu/~yany/data/^gspcMonthly.csv" 28 | d=pd.read_csv(inFile) 29 | print(d.head(2)) 30 | print(d.tail(3)) 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /Chapter09/c9_15_FamaFrench3factorModel.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Name : c9_15_FamaFrench3factorModel.py 4 | Book : Hands-on Data Science with Anaconda ) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 4/6/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | """ 11 | 12 | import scipy as sp 13 | import numpy as np 14 | import pandas as pd 15 | import statsmodels.api as sm 16 | from matplotlib.finance import quotes_historical_yahoo_ochl as getData 17 | ticker='IBM' 18 | begdate=(2012,1,1) 19 | enddate=(2016,12,31) 20 | p= getData(ticker, begdate, enddate,asobject=True, adjusted=True) 21 | logret = sp.log(p.aclose[1:]/p.aclose[:-1]) 22 | ddate=[] 23 | d0=p.date 24 | for i in range(0,sp.size(logret)): 25 | x=''.join([d0[i].strftime("%Y"),d0[i].strftime("%m"),"01"]) 26 | ddate.append(pd.to_datetime(x, format='%Y%m%d').date()) 27 | # 28 | t=pd.DataFrame(logret,np.array(ddate),columns=['RET']) 29 | ret=sp.exp(t.groupby(t.index).sum())-1 30 | # 31 | ff=pd.read_pickle('c:/temp/ffMonthly.pkl') 32 | final=pd.merge(ret,ff,left_index=True,right_index=True) 33 | y=final['RET'] 34 | x=final[['MKT_RF','SMB','HML']] 35 | x=sm.add_constant(x) 36 | results=sm.OLS(y,x).fit() 37 | print(results.summary()) 38 | -------------------------------------------------------------------------------- /Chapter08/c8_29_PCA.py: -------------------------------------------------------------------------------- 1 | 2 | #http://scikit-learn.org/stable/auto_examples/decomposition/plot_pca_iris.html 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from mpl_toolkits.mplot3d import Axes3D 7 | from sklearn import decomposition 8 | from sklearn import datasets 9 | np.random.seed(5) 10 | centers = [[1, 1], [-1, -1], [1, -1]] 11 | iris = datasets.load_iris() 12 | X = iris.data 13 | y = iris.target 14 | 15 | fig = plt.figure(1, figsize=(4, 3)) 16 | plt.clf() 17 | ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) 18 | 19 | plt.cla() 20 | pca = decomposition.PCA(n_components=3) 21 | pca.fit(X) 22 | X = pca.transform(X) 23 | 24 | for name, label in [('Setosa', 0), ('Versicolour', 1), ('Virginica', 2)]: 25 | ax.text3D(X[y == label, 0].mean(), 26 | X[y == label, 1].mean() + 1.5, 27 | X[y == label, 2].mean(), name, 28 | horizontalalignment='center', 29 | bbox=dict(alpha=.5, edgecolor='w', facecolor='w')) 30 | # Reorder the labels to have colors matching the cluster results 31 | y = np.choose(y, [1, 2, 0]).astype(np.float) 32 | ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.spectral, 33 | edgecolor='k') 34 | 35 | ax.w_xaxis.set_ticklabels([]) 36 | ax.w_yaxis.set_ticklabels([]) 37 | ax.w_zaxis.set_ticklabels([]) 38 | 39 | plt.show() -------------------------------------------------------------------------------- /Chapter09/c9_03_simplefied_tree_tinatic.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c9_03_simplified_tree_titanic.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | 12 | library(rpart, quietly=TRUE) 13 | building <- TRUE 14 | scoring <- ! building 15 | set.seed(42) 16 | crs$dataset <- x 17 | str(crs$dataset) 18 | crs$nobs <- nrow(crs$dataset) 19 | crs$train <- crs$sample <- sample(crs$nobs, 0.7*crs$nobs) 20 | crs$validate <- sample(setdiff(seq_len(crs$nobs), crs$train), 0.15*crs$nobs) 21 | crs$test <- setdiff(setdiff(seq_len(crs$nobs), crs$train), crs$validate) 22 | crs$input <- c("CLASS", "AGE", "GENDER") 23 | crs$numeric <- NULL 24 | crs$categoric<- c("CLASS", "AGE", "GENDER") 25 | crs$target <- "SURVIVED" 26 | crs$risk <- NULL 27 | crs$ident <- NULL 28 | crs$ignore <- NULL 29 | crs$weights <- NULL 30 | #set.seed(crv$seed) 31 | crs$rpart <- rpart(SURVIVED ~ .,data=crs$dataset[crs$train, c(crs$input, crs$target)], 32 | method="class",parms=list(split="information"),control=rpart.control(usesurrogate=0, 33 | maxsurrogate=0)) 34 | print(crs$rpart) 35 | printcp(crs$rpart) 36 | cat("\n") 37 | fancyRpartPlot(crs$rpart, main="Decision Tree x $ SURVIVED") 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /Chapter02/c3_21_sort_order.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c3_21_sort_order.R 4 | Book : Hands-on Data Science with Anaconda) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 1/15/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | 11 | > dim(nyseListing) 12 | [1] 3387 4 13 | > head(nyseListing) 14 | Symbol Name MarketCap Exchange 15 | 1 A Agilent Technologies, Inc. $12,852.3 NYSE 16 | 2 AA Alcoa Inc. $28,234.5 NYSE 17 | 3 AAI AirTran Holdings, Inc. $156.9 NYSE 18 | 4 AAP Advance Auto Parts Inc $3,507.4 NYSE 19 | 5 AAR AMR CORPORATION $81.7 NYSE 20 | 6 AAV ADVANTAGE ENERGY INCOME FUND $1,674.4 NYSE 21 | > 22 | > x<-nyseListing[order(nyseListing$Name),] 23 | > head(x) 24 | Symbol Name MarketCap Exchange 25 | 2017 MMM 3M Company $48,398.7 NYSE 26 | 557 CFD 40/86 Strategic Income Fund $56.8 NYSE 27 | 1721 KDE 4Kids Entertainment, Inc. $99.7 NYSE 28 | 2164 NDN 99 CENTS ONLY STORES $432.3 NYSE 29 | 87 AHC A.H. Belo Corporation $107.4 NYSE 30 | 1242 GFW AAG Holding Company Inc. NYSE 31 | 32 | " 33 | 34 | library(fImport) 35 | data(nyseListing) 36 | dim(nyseListing) 37 | head(nyseListing) 38 | 39 | x<-nyseListing[order(nyseListing$Name),] 40 | head(x) 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /Chapter03/c3_21_sort_order.R: -------------------------------------------------------------------------------- 1 | 2 | " 3 | Name : c3_21_sort_order.R 4 | Book : Hands-on Data Science with Anaconda) 5 | Publisher: Packt Publishing Ltd. 6 | Author : Yuxing Yan and James Yan 7 | Date : 1/15/2018 8 | email : yany@canisius.edu 9 | paulyxy@hotmail.com 10 | 11 | > dim(nyseListing) 12 | [1] 3387 4 13 | > head(nyseListing) 14 | Symbol Name MarketCap Exchange 15 | 1 A Agilent Technologies, Inc. $12,852.3 NYSE 16 | 2 AA Alcoa Inc. $28,234.5 NYSE 17 | 3 AAI AirTran Holdings, Inc. $156.9 NYSE 18 | 4 AAP Advance Auto Parts Inc $3,507.4 NYSE 19 | 5 AAR AMR CORPORATION $81.7 NYSE 20 | 6 AAV ADVANTAGE ENERGY INCOME FUND $1,674.4 NYSE 21 | > 22 | > x<-nyseListing[order(nyseListing$Name),] 23 | > head(x) 24 | Symbol Name MarketCap Exchange 25 | 2017 MMM 3M Company $48,398.7 NYSE 26 | 557 CFD 40/86 Strategic Income Fund $56.8 NYSE 27 | 1721 KDE 4Kids Entertainment, Inc. $99.7 NYSE 28 | 2164 NDN 99 CENTS ONLY STORES $432.3 NYSE 29 | 87 AHC A.H. Belo Corporation $107.4 NYSE 30 | 1242 GFW AAG Holding Company Inc. NYSE 31 | 32 | " 33 | 34 | library(fImport) 35 | data(nyseListing) 36 | dim(nyseListing) 37 | head(nyseListing) 38 | 39 | x<-nyseListing[order(nyseListing$Name),] 40 | head(x) 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /Chapter10/c10_26_pca.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c10_26_pca.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | #} 11 | 12 | 13 | %init 14 | initEnv(); 15 | 16 | % load data for 5000 face-images of 32 x 32 size in grayscale 17 | load ('faces.mat'); 18 | 19 | % normalize input-features 20 | [mu, sigma] = computeScalingParams(X); 21 | XNorm = scaleFeatures(X, mu, sigma); 22 | 23 | % perform PCA 24 | fprintf('Computing PCA...\n'); 25 | if exist('OCTAVE_VERSION') 26 | fflush(stdout); 27 | end 28 | [U, S] = computePCA(XNorm); 29 | 30 | % project data into reduced size 31 | k = 100; 32 | fprintf('Projecting data to reduced dimensional space - from: %d to: %d\n', size(XNorm, 2), k); 33 | if exist('OCTAVE_VERSION') 34 | fflush(stdout); 35 | end 36 | [Z, variance] = projectPCAData(XNorm, U, S, k); 37 | fprintf('Varianced retained by projecting to reduced-size: %f\n', variance); 38 | 39 | % recover data from reduced-set 40 | fprintf('Recovering data to original dimensional space...\n'); 41 | if exist('OCTAVE_VERSION') 42 | fflush(stdout); 43 | end 44 | XRec = recoverPCAData(Z, U, k); 45 | 46 | fprintf("Visualize face data before and after reduction\n"); 47 | colormap(gray); 48 | for idx=1:10 49 | subplot(2, 10, idx); 50 | imagesc(reshape(X(idx, :), 32, 32)); 51 | axis("square", "off"); 52 | subplot(2, 10, 10+idx); 53 | imagesc(reshape(XRec(idx, :), 32, 32)); 54 | axis("square", "off"); 55 | end 56 | -------------------------------------------------------------------------------- /Chapter09/c9_30_great_test.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c9_30_great_test.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | #} 10 | 11 | 12 | %init 13 | %initEnv(); 14 | pkg load specfun; 15 | % load data for 5000 face-images of 32 x 32 size in grayscale 16 | load ('c:/temp/faces.mat'); 17 | 18 | % normalize input-features 19 | [mu, sigma] = computeScalingParams(X); 20 | XNorm = scaleFeatures(X, mu, sigma); 21 | 22 | % perform PCA 23 | fprintf('Computing PCA...\n'); 24 | if exist('OCTAVE_VERSION') 25 | fflush(stdout); 26 | end 27 | [U, S] = computePCA(XNorm); 28 | 29 | % project data into reduced size 30 | k = 100; 31 | fprintf('Projecting data to reduced dimensional space - from: %d to: %d\n', size(XNorm, 2), k); 32 | if exist('OCTAVE_VERSION') 33 | fflush(stdout); 34 | end 35 | [Z, variance] = projectPCAData(XNorm, U, S, k); 36 | fprintf('Varianced retained by projecting to reduced-size: %f\n', variance); 37 | 38 | % recover data from reduced-set 39 | fprintf('Recovering data to original dimensional space...\n'); 40 | if exist('OCTAVE_VERSION') 41 | fflush(stdout); 42 | end 43 | XRec = recoverPCAData(Z, U, k); 44 | 45 | fprintf("Visualize face data before and after reduction\n"); 46 | colormap(gray); 47 | for idx=1:10 48 | subplot(2, 10, idx); 49 | imagesc(reshape(X(idx, :), 32, 32)); 50 | axis("square", "off"); 51 | subplot(2, 10, 10+idx); 52 | imagesc(reshape(XRec(idx, :), 32, 32)); 53 | axis("square", "off"); 54 | end -------------------------------------------------------------------------------- /Chapter12/c12_03_makeCluster.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | cl <- makeCluster(getOption("cl.cores", 2)) 5 | clusterApply(cl, 1:2, get("+"), 3) 6 | xx <- 1 7 | clusterExport(cl, "xx") 8 | clusterCall(cl, function(y) xx + y, 2) 9 | 10 | ## Use clusterMap like an mapply example 11 | clusterMap(cl, function(x, y) seq_len(x) + y, 12 | c(a = 1, b = 2, c = 3), c(A = 10, B = 0, C = -10)) 13 | 14 | 15 | parSapply(cl, 1:20, get("+"), 3) 16 | 17 | ## A bootstrapping example, which can be done in many ways: 18 | clusterEvalQ(cl, { 19 | ## set up each worker. Could also use clusterExport() 20 | library(boot) 21 | cd4.rg <- function(data, mle) MASS::mvrnorm(nrow(data), mle$m, mle$v) 22 | cd4.mle <- list(m = colMeans(cd4), v = var(cd4)) 23 | NULL 24 | }) 25 | res <- clusterEvalQ(cl, boot(cd4, corr, R = 100, 26 | sim = "parametric", ran.gen = cd4.rg, mle = cd4.mle)) 27 | library(boot) 28 | cd4.boot <- do.call(c, res) 29 | boot.ci(cd4.boot, type = c("norm", "basic", "perc"), 30 | conf = 0.9, h = atanh, hinv = tanh) 31 | stopCluster(cl) 32 | 33 | ## or 34 | library(boot) 35 | run1 <- function(...) { 36 | library(boot) 37 | cd4.rg <- function(data, mle) MASS::mvrnorm(nrow(data), mle$m, mle$v) 38 | cd4.mle <- list(m = colMeans(cd4), v = var(cd4)) 39 | boot(cd4, corr, R = 500, sim = "parametric", 40 | ran.gen = cd4.rg, mle = cd4.mle) 41 | } 42 | cl <- makeCluster(mc <- getOption("cl.cores", 2)) 43 | ## to make this reproducible 44 | clusterSetRNGStream(cl, 123) 45 | cd4.boot <- do.call(c, parLapply(cl, seq_len(mc), run1)) 46 | boot.ci(cd4.boot, type = c("norm", "basic", "perc"), 47 | conf = 0.9, h = atanh, hinv = tanh) 48 | stopCluster(cl) -------------------------------------------------------------------------------- /Chapter02/c3_28_datadotworld_2good.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_28_datadotworld_2good.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | DataDotWorldBBallStats.name pointspergame assistspergame \ 11 | 0 Jon 20.4 1.3 12 | 1 Rob 15.5 8.0 13 | 2 Sharon 30.1 11.2 14 | 3 Alex 8.2 0.5 15 | 4 Rebecca 12.3 17.0 16 | 5 Ariane 18.1 3.0 17 | 6 Bryon 16.0 8.5 18 | 7 Matt 13.0 2.1 19 | 20 | DataDotWorldBBallTeam.name height handedness 21 | 0 Jon 6'5" Right 22 | 1 Rob 6'7.5" Left 23 | 2 Sharon 6'3" Right 24 | 3 Alex 6'2" Right 25 | 4 Rebecca 7' Right 26 | 5 Ariane 5'8" Left 27 | 6 Bryon 7' Right 28 | 7 Matt 5'5" Right 29 | """ 30 | 31 | import datadotworld as dw 32 | name='jonloyens/an-intro-to-dataworld-dataset' 33 | results = dw.query(name, 34 | 'SELECT * FROM `DataDotWorldBBallStats`, `DataDotWorldBBallTeam` ' 35 | 'WHERE DataDotWorldBBallTeam.Name = DataDotWorldBBallStats.Name') 36 | df = results.dataframe 37 | print(df) -------------------------------------------------------------------------------- /Chapter03/c3_28_datadotworld_2good.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c3_28_datadotworld_2good.py 3 | Book : Hands-on Data Science with Anaconda) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/15/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | DataDotWorldBBallStats.name pointspergame assistspergame \ 11 | 0 Jon 20.4 1.3 12 | 1 Rob 15.5 8.0 13 | 2 Sharon 30.1 11.2 14 | 3 Alex 8.2 0.5 15 | 4 Rebecca 12.3 17.0 16 | 5 Ariane 18.1 3.0 17 | 6 Bryon 16.0 8.5 18 | 7 Matt 13.0 2.1 19 | 20 | DataDotWorldBBallTeam.name height handedness 21 | 0 Jon 6'5" Right 22 | 1 Rob 6'7.5" Left 23 | 2 Sharon 6'3" Right 24 | 3 Alex 6'2" Right 25 | 4 Rebecca 7' Right 26 | 5 Ariane 5'8" Left 27 | 6 Bryon 7' Right 28 | 7 Matt 5'5" Right 29 | """ 30 | 31 | import datadotworld as dw 32 | name='jonloyens/an-intro-to-dataworld-dataset' 33 | results = dw.query(name, 34 | 'SELECT * FROM `DataDotWorldBBallStats`, `DataDotWorldBBallTeam` ' 35 | 'WHERE DataDotWorldBBallTeam.Name = DataDotWorldBBallStats.Name') 36 | df = results.dataframe 37 | print(df) -------------------------------------------------------------------------------- /Chapter09/c9_29_processing_email.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c9_29_processing_email.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | #} 10 | 11 | function features = extractEmailFeatures(filename, vocabList) 12 | vocablistSize = size(vocabList, 1); 13 | features = zeros(1, vocablistSize); 14 | 15 | % read email contents from file 16 | fid = fopen(filename); 17 | if fid 18 | email_contents = fscanf(fid, '%c', inf); 19 | fclose(fid); 20 | else 21 | return; 22 | end 23 | 24 | % process email-contents 25 | email_contents = lower(email_contents); % lower-case 26 | email_contents = regexprep(email_contents, '<[^<>]+>', ' '); # strip HTML 27 | email_contents = regexprep(email_contents, '[0-9]+', 'number'); # replace numbers 28 | email_contents = regexprep(email_contents, '(http|https)://[^\s]*', 'httpaddr'); % replace URLs 29 | email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr'); % replace email-ids 30 | email_contents = regexprep(email_contents, '[$]+', 'dollar'); % replace dollar 31 | 32 | % tokenize contents & extract features 33 | while ~isempty(email_contents) 34 | [str, email_contents] = strtok(email_contents, [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]); 35 | str = regexprep(str, '[^a-zA-Z0-9]', ''); % remove non-alphanumeric 36 | % porter-stemmer 37 | try str = porterStemmer(strtrim(str)); 38 | catch str = ''; continue; 39 | end; 40 | if length(str) < 1 41 | continue; 42 | end 43 | [val, idx] = max(ismember(vocabList, str)); 44 | if (val == 1) 45 | features(idx) = 1; 46 | endif 47 | end 48 | 49 | end -------------------------------------------------------------------------------- /Chapter09/c9_13_short_version.py: -------------------------------------------------------------------------------- 1 | # http://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html#sphx-glr-auto-examples-classification-plot-digits-classification-py 2 | 3 | """ 4 | Name : c9_13_short_version_iris.py 5 | Book : Hands-on Data Science with Anaconda ) 6 | Publisher: Packt Publishing Ltd. 7 | Author : Yuxing Yan and James Yan 8 | Date : 4/6/2018 9 | email : yany@canisius.edu 10 | paulyxy@hotmail.com 11 | """ 12 | 13 | import matplotlib.pyplot as plt 14 | from sklearn import datasets, svm, metrics 15 | from sklearn.metrics import classification_report as report 16 | # 17 | format1="Classification report for classifier %s:\n%s\n" 18 | format2="Confusion matrix:\n%s" 19 | digits = datasets.load_digits() 20 | imageLabels = list(zip(digits.images, digits.target)) 21 | for index,(image,label) in enumerate(imageLabels[:4]): 22 | plt.subplot(2, 4, index + 1) 23 | plt.axis('off') 24 | plt.imshow(image,cmap=plt.cm.gray_r,interpolation='nearest') 25 | plt.title('Training: %i' % label) 26 | n=len(digits.images) 27 | data2 = digits.images.reshape((n,-1)) 28 | classifier = svm.SVC(gamma=0.001) 29 | classifier.fit(data2[:n//2],digits.target[:n//2]) 30 | expected = digits.target[n//2:] 31 | predicted = classifier.predict(data[n//2:]) 32 | print(format1 % (classifier,report(expected, predicted))) 33 | print(format2 % metrics.confusion_matrix(expected, predicted)) 34 | imageAndPredictions=list(zip(digits.images[n//2:], predicted)) 35 | for index,(image,prediction) in enumerate(imageAndPredictions[:4]): 36 | plt.subplot(2,4,index+5) 37 | plt.axis('off') 38 | plt.imshow(image,cmap=plt.cm.gray_r,interpolation='nearest') 39 | plt.title('Prediction: %i' % prediction) 40 | plt.show() 41 | 42 | -------------------------------------------------------------------------------- /Chapter08/c8_41_plot_pca_iris.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | ========================================================= 6 | PCA example with Iris Data-set 7 | ========================================================= 8 | 9 | Principal Component Analysis applied to the Iris dataset. 10 | 11 | See `here `_ for more 12 | information on this dataset. 13 | 14 | """ 15 | print(__doc__) 16 | 17 | 18 | # Code source: Gaël Varoquaux 19 | # License: BSD 3 clause 20 | 21 | import numpy as np 22 | import matplotlib.pyplot as plt 23 | from mpl_toolkits.mplot3d import Axes3D 24 | 25 | 26 | from sklearn import decomposition 27 | from sklearn import datasets 28 | 29 | np.random.seed(5) 30 | 31 | centers = [[1, 1], [-1, -1], [1, -1]] 32 | iris = datasets.load_iris() 33 | X = iris.data 34 | y = iris.target 35 | 36 | fig = plt.figure(1, figsize=(4, 3)) 37 | plt.clf() 38 | ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) 39 | 40 | plt.cla() 41 | pca = decomposition.PCA(n_components=3) 42 | pca.fit(X) 43 | X = pca.transform(X) 44 | 45 | for name, label in [('Setosa', 0), ('Versicolour', 1), ('Virginica', 2)]: 46 | ax.text3D(X[y == label, 0].mean(), 47 | X[y == label, 1].mean() + 1.5, 48 | X[y == label, 2].mean(), name, 49 | horizontalalignment='center', 50 | bbox=dict(alpha=.5, edgecolor='w', facecolor='w')) 51 | # Reorder the labels to have colors matching the cluster results 52 | y = np.choose(y, [1, 2, 0]).astype(np.float) 53 | ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.spectral, 54 | edgecolor='k') 55 | 56 | ax.w_xaxis.set_ticklabels([]) 57 | ax.w_yaxis.set_ticklabels([]) 58 | ax.w_zaxis.set_ticklabels([]) 59 | 60 | plt.show() 61 | -------------------------------------------------------------------------------- /Chapter10/c10_24_ddd.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c10_24_ddd.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/24/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | #} 11 | 12 | 13 | 14 | %init 15 | %initEnv(); 16 | % read spam-vocab list into a struct 17 | % words which occur at least a 100 times in the spam corpus 18 | n = 1899; % list size 19 | vocabList = cell(n, 1); 20 | fid = fopen('c:/temp/spam-vocab.txt'); 21 | for i=1:n 22 | vocabList{i} = fscanf(fid, '%s', 1); 23 | end 24 | fclose(fid); 25 | % Load the Spam Email training & test datasets based on above spam-vocab list 26 | % X/Xtest: vector of length 1899 with 1 for known spam-words, and 0 otherwise 27 | % y/ytest: spam classification of 0 or 1 28 | load('c:/temp/spamTrain.mat'); % adds X, y to environment 29 | load('c:/temp/spamTest.mat'); % adds Xtest, ytest to environment 30 | 31 | fprintf("Computing C & sigma for RBF-Kernel in SVM - this will take some time...\n"); 32 | [C, gamma] = chooseRBFParamsForSVM(X, y, Xtest, ytest); 33 | 34 | % SVM training 35 | % -s 0 : classification 36 | % -t 2 : RBF/Gaussina kernel 37 | % -c number : C 38 | % -g number : gamma 39 | fprintf("Training SVM with RBF-Kernel for C: %g and gamma: %g\n", C, gamma); 40 | model = svmtrain(y, X, sprintf('-s 0 -t 2 -c %g -g %g', C, gamma)); 41 | 42 | %% extract features from sample emails & predict 43 | num = 3; 44 | emailFeatures = zeros(1, n); 45 | for i=1:num 46 | fileName = sprintf('email-sample-%d.txt', i); 47 | emailFeatures = extractEmailFeatures(fileName, vocabList); 48 | [pred, acc, prob] = svmpredict([0], emailFeatures, model, '-q'); 49 | if(pred == 1) 50 | fprintf("%s is spam\n", fileName); 51 | else 52 | fprintf("%s is NOT spam\n", fileName); 53 | end 54 | end -------------------------------------------------------------------------------- /Chapter09/c9_25_octave_good_graph.m: -------------------------------------------------------------------------------- 1 | #{ 2 | Name : c9_25_octave_good_graph.m 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/6/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | input data: http://canisius.edu/~yany/data/c9_input.csv 11 | #} 12 | 13 | 14 | 15 | a=csvread("c:/temp/c9_input.csv"); 16 | x=a(:,2); 17 | y=a(:,3); 18 | figure % open a new figure window 19 | plot(x, y, 'o'); 20 | ylabel('Annual returns for S&P500') 21 | xlabel('Annual returns for IBM') 22 | # 23 | m = length(y); 24 | x = [ones(m, 1), x]; %add a column of ones to x 25 | theta = zeros(size(x,2),1); 26 | alpha = 0.07; 27 | delta = ones(size(theta)); 28 | while abs(max(delta(:))) > 0.00001 29 | h = sum(x * theta,2); 30 | err = h - y; 31 | delta = x' * err / m; 32 | theta = theta - alpha * delta; 33 | end 34 | hold on % Plot new data without clearing old plot 35 | plot(x(:,2), x*theta, '-') % remember that x is now a matrix with 2 36 | % columns 37 | % and the second column contains the 38 | % time info 39 | legend('Training data', 'Linear regression') 40 | J_vals = zeros(100, 100); % initialize 41 | theta0_vals = linspace(-3, 3, 100); 42 | theta1_vals = linspace(-1, 1, 100); 43 | for i = 1:length(theta0_vals) 44 | for j = 1:length(theta1_vals) 45 | t = [theta0_vals(i); theta1_vals(j)]; 46 | h = sum(x * t); 47 | J_vals(i,j) = sum((h - y).^2) / (2*m); 48 | end 49 | end 50 | % Plot the surface plot 51 | % Because of the way meshgrids work in the surf command, we need to 52 | % transpose J_vals before calling surf, or else the axes will be 53 | % flipped 54 | J_vals = J_vals'; 55 | figure; 56 | surf(theta0_vals, theta1_vals, J_vals) 57 | xlabel('\theta_0'); ylabel('\theta_1') -------------------------------------------------------------------------------- /Chapter05/c5_09_annual_beta.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name : c5_09_annual_beta.py 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 1/25/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | 10 | Objective: using Quandl to replace Yahoo!Finance 11 | since Yahoo has changed its data structure 12 | 13 | Book title: Python for Finance (2nd edition) 14 | Author : Yuxing Yan 15 | Page # : 192 16 | Date : 1/23/2018 by Yuxing Yan 17 | 18 | Output of this program 19 | year, alpha, beta, R_value, P_value 20 | (u'1962', 0.00012479, 0.411, 0.815, 6.1554743380330469e-61) 21 | (u'1963', 0.00033024, 0.342, 0.616, 1.3611738963159366e-27) 22 | """ 23 | import numpy as np 24 | import scipy as sp 25 | import pandas as pd 26 | import quandl as qd 27 | from scipy import stats 28 | # 29 | ticker="wmt" 30 | x=qd.get("WIKI/"+ticker) 31 | p=x[['Adj. Close']] 32 | ret=p.diff()/p.shift(1) 33 | stockRet=ret.dropna() 34 | stockRet.columns=['stockRet'] 35 | # 36 | inFile="http://canisius.edu/~yany/data/^gspcDaily.csv" 37 | y=pd.read_csv(inFile,index_col=0) 38 | d=y[['Adj Close']] 39 | ret2=d.diff()/d.shift(1) 40 | mktRet=ret2.dropna() 41 | mktRet.columns=['mktRet'] 42 | 43 | final= stockRet.merge(mktRet, how='inner', left_index=True, right_index=True) 44 | years=pd.unique(final.index.strftime("%Y")) 45 | 46 | 47 | print(" year, alpha, beta,R_value, P_value") 48 | for i in sp.arange(0,5): 49 | #for i in sp.arange(1,len(years)): 50 | #print(years[i]) 51 | d=final[final.index.strftime("%Y")==years[i]] 52 | (beta,alpha,r_value,p_value,std_err)=stats.linregress(d.stockRet,d.mktRet) 53 | alpha=round(alpha,8) 54 | beta=round(beta,3) 55 | r_value=round(r_value,3) 56 | p_vaue=round(p_value,3) 57 | print(years[i],alpha,beta,r_value,p_value) 58 | -------------------------------------------------------------------------------- /Chapter10/c10_02_using_Liblinear02.R: -------------------------------------------------------------------------------- 1 | " 2 | Name : c10_02_using_Liblinear02.R 3 | Book : Hands-on Data Science with Anaconda ) 4 | Publisher: Packt Publishing Ltd. 5 | Author : Yuxing Yan and James Yan 6 | Date : 4/23/2018 7 | email : yany@canisius.edu 8 | paulyxy@hotmail.com 9 | " 10 | 11 | library(LiblineaR 12 | data(iris) 13 | attach(iris) 14 | x=iris[,1:4] 15 | y=factor(iris[,5]) 16 | train=sample(1:dim(iris)[1],100) 17 | xTrain=x[train,];xTest=x[-train,] 18 | yTrain=y[train]; yTest=y[-train] 19 | s=scale(xTrain,center=TRUE,scale=TRUE) 20 | # 21 | tryTypes=c(0:7) 22 | tryCosts=c(1000,1,0.001) 23 | bestCost=NA 24 | bestAcc=0 25 | bestType=NA 26 | # 27 | for(ty in tryTypes){ 28 | for(co in tryCosts){ 29 | acc=LiblineaR(data=s,target=yTrain,type=ty,cost=co,bias=1,cross=5,verbose=FALSE) 30 | cat("Results for C=",co,": ",acc," accuracy.\n",sep="") 31 | if(acc>bestAcc){ 32 | bestCost=co 33 | bestAcc=acc 34 | bestType=ty 35 | } 36 | } 37 | } 38 | cat("Best model type is:",bestType,"\n") 39 | cat("Best cost is:",bestCost,"\n") 40 | cat("Best accuracy is:",bestAcc,"\n") 41 | # Re-train best model with best cost value. 42 | m=LiblineaR(data=s,target=yTrain,type=bestType,cost=bestCost,bias=1,verbose=FALSE) 43 | # Scale the test data 44 | s2=scale(xTest,attr(s,"scaled:center"),attr(s,"scaled:scale")) 45 | pr=FALSE; # Make prediction 46 | if(bestType==0 || bestType==7) pr=TRUE 47 | p=predict(m,s2,proba=pr,decisionValues=TRUE) 48 | res=table(p$predictions,yTest) # Display confusion matrix 49 | print(res) 50 | # Compute Balanced Classification Rate 51 | BCR=mean(c(res[1,1]/sum(res[,1]),res[2,2]/sum(res[,2]),res[3,3]/sum(res[,3]))) 52 | #output 53 | print(BCR) 54 | 55 | 56 | cat("Best model type is:",bestType,"\n") 57 | cat("Best cost is:",bestCost,"\n") 58 | cat("Best accuracy is:",bestAcc,"\n") 59 | print(res) 60 | print(BCR) 61 | 62 | --------------------------------------------------------------------------------