├── .gitignore ├── BMI ├── BMI.Rmd ├── BMI.Rproj ├── BMI.md ├── BMI_files │ └── figure-markdown_github │ │ ├── unnamed-chunk-2-1.png │ │ └── unnamed-chunk-2-2.png └── bmi_data.RDS ├── BestOffers ├── BestOffers.Rmd ├── BestOffers.Rproj ├── BestOffers.md └── MonetDBLite_diagram.svg ├── Bookdata ├── README.md ├── bookdata.tsv.gz ├── bxBooks.RData ├── create_bookdata.R └── read_bookcrossing.R ├── Buzz ├── .gitignore ├── Buzz.Rproj ├── BuzzDataSetDoc.pdf ├── Buzz_score_example.Rmd ├── Buzz_score_example.html ├── Buzz_score_example.md ├── Buzz_score_example_files │ └── figure-markdown_github │ │ ├── unnamed-chunk-2-1.png │ │ └── unnamed-chunk-3-1.png ├── PeerPresentation.pdf ├── PeerPresentation_withNotes.pdf ├── ProjectSponsorPresentation.pdf ├── ProjectSponsorPresentation_withNotes.pdf ├── RCurl_client_example.Rmd ├── RCurl_client_example.md ├── README.md ├── TomsHardware-Absolute-Sigma-500.data.txt ├── TomsHardware-Absolute-Sigma-500.names.txt ├── TomsHardware-Relative-Sigma-500.data.txt ├── TomsHardware-Relative-Sigma-500.names.txt ├── UserPresentation.pdf ├── UserPresentation_withNotes.pdf ├── buzz_sample.csv ├── buzzapp │ ├── buzzapp.Rproj │ ├── buzzutils.R │ ├── server.R │ └── ui.R ├── buzzm.Rmd ├── buzzm.html ├── buzzm.md ├── buzzm_files │ └── figure-markdown_github │ │ ├── model-1.png │ │ ├── model-2.png │ │ ├── plottest-1.png │ │ └── plottrain-1.png ├── figure │ ├── unnamed-chunk-2-1.png │ └── unnamed-chunk-3-1.png ├── model_export.Rmd ├── model_export.md ├── model_export_files │ └── figure-markdown_github │ │ └── unnamed-chunk-4-1.png ├── plumber.R ├── rf_tree_1_plot.pdf ├── rfmodel.tsv └── thRS500.RDS ├── CDC ├── NatalBirthData.rData ├── NatalRiskData.rData ├── PrepNatalRiskData.R ├── README.md ├── UserGuide2010.pdf ├── gamSplinePlots.R ├── loadExample │ ├── README.md │ ├── SQLScrewdriver.jar │ ├── dbDef.xml │ ├── fieldRanges.tsv │ └── h2-1.3.170.jar ├── natal2010Sample.tsv.gz └── prepBirthWeightData.R ├── CodeExamples.zip ├── CodeExamples ├── README.txt ├── c01_The_data_science_process │ └── 00002_example_1.1_of_section_1.2.4.R ├── c02_Starting_with_R_and_data │ ├── 00003_informalexample_2.1_of_section_2.1.2.R │ ├── 00004_informalexample_2.2_of_section_2.1.2.R │ ├── 00005_informalexample_2.3_of_section_2.1.2.R │ ├── 00006_informalexample_2.4_of_section_2.1.2.R │ ├── 00007_informalexample_2.5_of_section_2.1.2.R │ ├── 00008_informalexample_2.6_of_section_2.1.2.R │ ├── 00009_informalexample_2.7_of_section_2.1.2.R │ ├── 00010_informalexample_2.8_of_section_2.1.2.R │ ├── 00011_informalexample_2.9_of_section_2.1.2.R │ ├── 00012_informalexample_2.10_of_section_2.1.2.R │ ├── 00013_informalexample_2.11_of_section_2.1.2.R │ ├── 00014_informalexample_2.12_of_section_2.2.1.txt │ ├── 00015_example_2.1_of_section_2.2.1.R │ ├── 00016_example_2.2_of_section_2.2.1.R │ ├── 00017_informalexample_2.13_of_section_2.2.2.txt │ ├── 00018_example_2.3_of_section_2.2.2.R │ ├── 00019_example_2.4_of_section_2.2.2.R │ ├── 00020_informalexample_2.14_of_section_2.2.2.txt │ ├── 00021_example_2.5_of_section_2.2.2.R │ ├── 00022_example_2.6_of_section_2.2.2.R │ ├── 00023_example_2.7_of_section_2.3.1.txt │ ├── 00024_example_2.8_of_section_2.3.1.R │ ├── 00025_example_2.9_of_section_2.3.1.R │ ├── 00026_example_2.10_of_section_2.3.1.R │ ├── 00027_informalexample_2.15_of_section_2.3.1.R │ ├── 00028_informalexample_2.16_of_section_2.3.1.R │ ├── 00029_informalexample_2.17_of_section_2.3.1.R │ ├── 00030_informalexample_2.18_of_section_2.3.1.R │ └── 00031_example_2.11_of_section_2.3.1.R ├── c03_Exploring_data │ ├── 00032_example_3.1_of_section_3.1.R │ ├── 00033_example_3.2_of_section_3.1.1.R │ ├── 00034_example_3.3_of_section_3.1.1.R │ ├── 00035_example_3.4_of_section_3.1.1.R │ ├── 00036_example_3.5_of_section_3.1.1.R │ ├── 00037_example_3.6_of_section_3.2.1.R │ ├── 00038_example_3.7_of_section_3.2.1.R │ ├── 00039_example_3.8_of_section_3.2.1.R │ ├── 00040_informalexample_3.1_of_section_3.2.1.txt │ ├── 00041_informalexample_3.2_of_section_3.2.1.R │ ├── 00042_example_3.9_of_section_3.2.1.R │ ├── 00043_example_3.10_of_section_3.2.1.R │ ├── 00044_example_3.11_of_section_3.2.2.R │ ├── 00045_example_3.12_of_section_3.2.2.R │ ├── 00046_example_3.13_of_section_3.2.2.R │ ├── 00047_informalexample_3.3_of_section_3.2.2.R │ ├── 00048_informalexample_3.4_of_section_3.2.2.R │ ├── 00049_example_3.14_of_section_3.2.2.R │ ├── 00050_example_3.15_of_section_3.2.2.R │ ├── 00051_example_3.16_of_section_3.2.2.R │ ├── 00052_example_3.17_of_section_3.2.2.R │ ├── 00053_example_3.18_of_section_3.2.2.R │ └── 00054_informalexample_3.5_of_section_3.2.2.R ├── c04_Managing_data │ ├── 00055_example_4.1_of_section_4.1.1.R │ ├── 00056_example_4.2_of_section_4.1.1.R │ ├── 00057_example_4.3_of_section_4.1.2.R │ ├── 00058_informalexample_4.1_of_section_4.1.3.R │ ├── 00059_example_4.4_of_section_4.1.3.R │ ├── 00060_example_4.5_of_section_4.1.3.R │ ├── 00061_example_4.6_of_section_4.1.3.R │ ├── 00062_example_4.7_of_section_4.2.R │ ├── 00063_example_4.8_of_section_4.2.1.R │ ├── 00064_example_4.9_of_section_4.2.2.R │ ├── 00065_example_4.10_of_section_4.2.2.R │ ├── 00066_example_4.11_of_section_4.2.2.R │ ├── 00067_informalexample_4.2_of_section_4.2.3.R │ ├── 00068_example_4.12_of_section_4.3.2.R │ └── 00069_example_4.13_of_section_4.3.3.R ├── c05_Data_Engineering_and_Data_Shaping │ ├── 00070_informalexample_5.1_of_section_5.1.1.R │ ├── 00071_informalexample_5.2_of_section_5.1.1.R │ ├── 00072_informalexample_5.3_of_section_5.1.1.R │ ├── 00073_informalexample_5.4_of_section_5.1.1.R │ ├── 00074_informalexample_5.5_of_section_5.1.1.R │ ├── 00075_informalexample_5.6_of_section_5.1.1.R │ ├── 00076_informalexample_5.7_of_section_5.1.2.R │ ├── 00077_informalexample_5.8_of_section_5.1.2.R │ ├── 00078_informalexample_5.9_of_section_5.1.2.R │ ├── 00079_informalexample_5.10_of_section_5.1.2.R │ ├── 00080_informalexample_5.11_of_section_5.1.2.R │ ├── 00081_informalexample_5.12_of_section_5.1.3.R │ ├── 00082_informalexample_5.13_of_section_5.1.3.R │ ├── 00083_informalexample_5.14_of_section_5.1.3.R │ ├── 00084_informalexample_5.15_of_section_5.1.3.R │ ├── 00085_informalexample_5.16_of_section_5.1.3.R │ ├── 00086_informalexample_5.17_of_section_5.1.3.R │ ├── 00087_informalexample_5.18_of_section_5.1.3.R │ ├── 00088_informalexample_5.19_of_section_5.2.1.R │ ├── 00089_informalexample_5.20_of_section_5.2.1.R │ ├── 00090_informalexample_5.21_of_section_5.2.1.R │ ├── 00091_informalexample_5.22_of_section_5.2.1.R │ ├── 00092_informalexample_5.23_of_section_5.2.1.R │ ├── 00093_informalexample_5.24_of_section_5.2.1.R │ ├── 00094_informalexample_5.25_of_section_5.2.1.R │ ├── 00095_informalexample_5.26_of_section_5.2.1.R │ ├── 00096_informalexample_5.27_of_section_5.2.1.R │ ├── 00097_informalexample_5.28_of_section_5.2.2.R │ ├── 00098_informalexample_5.29_of_section_5.3.1.R │ ├── 00099_informalexample_5.30_of_section_5.3.1.R │ ├── 00100_informalexample_5.31_of_section_5.3.1.R │ ├── 00101_informalexample_5.32_of_section_5.3.1.R │ ├── 00102_informalexample_5.33_of_section_5.3.1.R │ ├── 00103_informalexample_5.34_of_section_5.3.1.R │ ├── 00104_informalexample_5.35_of_section_5.3.1.R │ ├── 00105_informalexample_5.36_of_section_5.4.1.R │ ├── 00106_informalexample_5.37_of_section_5.4.1.R │ ├── 00107_informalexample_5.38_of_section_5.4.1.R │ ├── 00108_informalexample_5.39_of_section_5.4.1.R │ ├── 00109_informalexample_5.40_of_section_5.4.1.R │ ├── 00110_informalexample_5.41_of_section_5.4.1.R │ ├── 00111_informalexample_5.42_of_section_5.4.1.R │ ├── 00112_informalexample_5.43_of_section_5.4.1.R │ ├── 00113_informalexample_5.44_of_section_5.4.1.R │ ├── 00114_informalexample_5.45_of_section_5.4.1.R │ ├── 00115_informalexample_5.46_of_section_5.4.1.R │ ├── 00116_informalexample_5.47_of_section_5.4.1.R │ ├── 00117_informalexample_5.48_of_section_5.4.2.R │ ├── 00118_informalexample_5.49_of_section_5.4.2.R │ ├── 00119_informalexample_5.50_of_section_5.4.2.R │ ├── 00120_informalexample_5.51_of_section_5.4.2.R │ ├── 00121_informalexample_5.52_of_section_5.4.2.R │ ├── 00122_informalexample_5.53_of_section_5.4.2.R │ ├── 00123_informalexample_5.54_of_section_5.4.2.R │ ├── 00124_informalexample_5.55_of_section_5.4.2.R │ ├── 00125_informalexample_5.56_of_section_5.4.2.R │ ├── 00126_informalexample_5.57_of_section_5.4.2.R │ ├── 00127_informalexample_5.58_of_section_5.4.2.R │ ├── 00128_informalexample_5.59_of_section_5.4.2.R │ ├── 00129_informalexample_5.60_of_section_5.4.2.R │ ├── 00130_informalexample_5.61_of_section_5.4.2.R │ ├── 00131_informalexample_5.62_of_section_5.5.1.R │ ├── 00132_informalexample_5.63_of_section_5.5.1.R │ ├── 00133_informalexample_5.64_of_section_5.5.1.R │ ├── 00134_informalexample_5.65_of_section_5.5.1.R │ ├── 00135_informalexample_5.66_of_section_5.5.1.R │ ├── 00136_informalexample_5.67_of_section_5.5.2.R │ ├── 00137_informalexample_5.68_of_section_5.5.2.R │ ├── 00138_informalexample_5.69_of_section_5.5.2.R │ ├── 00139_informalexample_5.70_of_section_5.5.2.R │ ├── 00140_informalexample_5.71_of_section_5.5.2.R │ └── 00141_informalexample_5.72_of_section_5.5.2.R ├── c06_Choosing_and_evaluating_models │ ├── 00142_example_6.1_of_section_6.2.3.R │ ├── 00143_example_6.2_of_section_6.2.3.R │ ├── 00144_example_6.3_of_section_6.2.3.R │ ├── 00145_informalexample_6.1_of_section_6.2.3.R │ ├── 00146_example_6.4_of_section_6.2.3.R │ ├── 00147_informalexample_6.2_of_section_6.2.3.R │ ├── 00148_informalexample_6.3_of_section_6.2.3.R │ ├── 00149_informalexample_6.4_of_section_6.2.3.R │ ├── 00150_informalexample_6.5_of_section_6.2.3.R │ ├── 00151_informalexample_6.6_of_section_6.2.3.R │ ├── 00152_example_6.5_of_section_6.2.3.R │ ├── 00153_informalexample_6.7_of_section_6.2.3.R │ ├── 00154_example_6.6_of_section_6.2.4.R │ ├── 00155_example_6.7_of_section_6.2.4.R │ ├── 00156_example_6.8_of_section_6.2.4.R │ ├── 00157_example_6.9_of_section_6.2.5.R │ ├── 00158_example_6.10_of_section_6.2.5.R │ ├── 00159_example_6.11_of_section_6.2.5.R │ ├── 00160_example_6.12_of_section_6.2.5.R │ ├── 00161_example_6.13_of_section_6.2.5.R │ ├── 00162_example_6.14_of_section_6.3.2.R │ ├── 00163_example_6.15_of_section_6.3.2.R │ ├── 00164_example_6.16_of_section_6.3.2.R │ ├── 00165_example_6.17_of_section_6.3.2.R │ ├── 00166_example_6.18_of_section_6.3.2.R │ ├── 00167_example_6.19_of_section_6.3.2.R │ ├── 00168_informalexample_6.8_of_section_6.3.2.R │ ├── 00169_informalexample_6.9_of_section_6.3.2.txt │ ├── 00170_informalexample_6.10_of_section_6.3.2.txt │ ├── 00171_example_6.20_of_section_6.3.2.R │ ├── 00172_example_6.21_of_section_6.3.3.R │ ├── 00173_informalexample_6.11_of_section_6.3.3.R │ ├── 00174_informalexample_6.12_of_section_6.3.3.R │ ├── 00175_example_6.22_of_section_6.3.4.R │ ├── 00176_example_6.23_of_section_6.3.4.R │ ├── 00177_example_6.24_of_section_6.3.5.R │ ├── 00178_example_6.25_of_section_6.3.5.R │ ├── 00179_example_6.26_of_section_6.3.5.R │ ├── 00180_informalexample_6.13_of_section_6.3.5.R │ ├── 00181_example_6.27_of_section_6.3.5.R │ └── 00182_informalexample_6.14_of_section_6.3.5.R ├── c07_Linear_and_logistic_regression │ ├── 00183_informalexample_7.1_of_section_7.1.1.math │ ├── 00184_informalexample_7.2_of_section_7.1.1.math │ ├── 00185_equation_7.1_of_section_7.1.1.math │ ├── 00186_informalexample_7.3_of_section_7.1.1.math │ ├── 00187_informalexample_7.4_of_section_7.1.1.math │ ├── 00188_example_7.1_of_section_7.1.1.R │ ├── 00189_example_7.2_of_section_7.1.3.R │ ├── 00190_example_7.3_of_section_7.1.3.R │ ├── 00191_example_7.4_of_section_7.1.3.R │ ├── 00192_example_7.5_of_section_7.1.3.R │ ├── 00193_informalexample_7.5_of_section_7.1.4.math │ ├── 00194_informalexample_7.6_of_section_7.1.4.math │ ├── 00195_informalexample_7.7_of_section_7.1.5.txt │ ├── 00196_informalexample_7.8_of_section_7.1.5.txt │ ├── 00197_example_7.6_of_section_7.1.5.R │ ├── 00198_informalexample_7.9_of_section_7.1.5.txt │ ├── 00199_informalexample_7.10_of_section_7.1.5.R │ ├── 00200_informalexample_7.11_of_section_7.1.5.R │ ├── 00201_informalexample_7.12_of_section_7.2.1.math │ ├── 00202_informalexample_7.13_of_section_7.2.1.math │ ├── 00203_informalexample_7.14_of_section_7.2.1.R │ ├── 00204_informalexample_7.15_of_section_7.2.1.math │ ├── 00205_informalexample_7.16_of_section_7.2.1.math │ ├── 00206_equation_7.2_of_section_7.2.1.math │ ├── 00207_example_7.7_of_section_7.2.1.R │ ├── 00208_example_7.8_of_section_7.2.2.R │ ├── 00209_example_7.9_of_section_7.2.2.R │ ├── 00210_example_7.10_of_section_7.2.3.R │ ├── 00211_example_7.11_of_section_7.2.3.R │ ├── 00212_example_7.12_of_section_7.2.3.R │ ├── 00213_example_7.13_of_section_7.2.3.R │ ├── 00214_example_7.14_of_section_7.2.3.R │ ├── 00215_example_7.15_of_section_7.2.4.R │ ├── 00216_informalexample_7.17_of_section_7.2.4.math │ ├── 00217_example_7.16_of_section_7.2.5.R │ ├── 00218_informalexample_7.18_of_section_7.2.5.text │ ├── 00219_informalexample_7.19_of_section_7.2.5.text │ ├── 00220_informalexample_7.20_of_section_7.2.5.text │ ├── 00221_informalexample_7.21_of_section_7.2.5.text │ ├── 00222_example_7.17_of_section_7.2.5.R │ ├── 00223_example_7.18_of_section_7.2.5.R │ ├── 00224_informalexample_7.22_of_section_7.2.5.text │ ├── 00225_informalexample_7.23_of_section_7.2.5.text │ ├── 00226_example_7.19_of_section_7.2.5.R │ ├── 00227_example_7.20_of_section_7.2.5.R │ ├── 00228_informalexample_7.24_of_section_7.2.5.text │ ├── 00229_informalexample_7.25_of_section_7.2.5.text │ ├── 00230_example_7.21_of_section_7.3.1.R │ ├── 00231_example_7.22_of_section_7.3.1.R │ ├── 00232_informalexample_7.26_of_section_7.3.1.R │ ├── 00233_example_7.23_of_section_7.3.1.R │ ├── 00234_example_7.24_of_section_7.3.1.R │ ├── 00235_example_7.25_of_section_7.3.1.R │ ├── 00236_informalexample_7.27_of_section_7.3.2.text │ ├── 00237_informalexample_7.28_of_section_7.3.2.text │ ├── 00238_informalexample_7.29_of_section_7.3.2.text │ ├── 00239_informalexample_7.30_of_section_7.3.2.text │ ├── 00240_example_7.26_of_section_7.3.3.R │ ├── 00241_example_7.27_of_section_7.3.3.R │ ├── 00242_example_7.28_of_section_7.3.3.R │ ├── 00243_informalexample_7.31_of_section_7.3.3.R │ ├── 00244_example_7.29_of_section_7.3.3.R │ ├── 00245_example_7.30_of_section_7.3.3.R │ ├── 00246_example_7.31_of_section_7.3.3.R │ ├── 00247_example_7.32_of_section_7.3.3.R │ └── 00248_example_7.33_of_section_7.3.3.R ├── c08_Advanced_data_preparation │ ├── 00249_example_8.1_of_section_8.2.1.R │ ├── 00250_informalexample_8.1_of_section_8.2.1.R │ ├── 00251_informalexample_8.2_of_section_8.2.1.R │ ├── 00252_example_8.2_of_section_8.2.2.Rtxt │ ├── 00253_example_8.3_of_section_8.2.2.R │ ├── 00254_informalexample_8.3_of_section_8.2.2.R │ ├── 00255_example_8.4_of_section_8.3.R │ ├── 00256_example_8.5_of_section_8.3.R │ ├── 00257_informalexample_8.4_of_section_8.3.1.R │ ├── 00258_informalexample_8.5_of_section_8.3.1.R │ ├── 00259_informalexample_8.6_of_section_8.3.1.R │ ├── 00260_informalexample_8.7_of_section_8.3.1.R │ ├── 00261_informalexample_8.8_of_section_8.3.1.R │ ├── 00262_informalexample_8.9_of_section_8.3.2.R │ ├── 00263_informalexample_8.10_of_section_8.3.2.R │ ├── 00264_example_8.6_of_section_8.4.1.R │ ├── 00265_informalexample_8.11_of_section_8.4.1.R │ ├── 00266_informalexample_8.12_of_section_8.4.2.R │ ├── 00267_example_8.7_of_section_8.4.2.R │ ├── 00268_informalexample_8.13_of_section_8.4.2.R │ ├── 00269_informalexample_8.14_of_section_8.4.2.R │ ├── 00270_informalexample_8.15_of_section_8.4.2.R │ ├── 00271_informalexample_8.16_of_section_8.4.2.R │ ├── 00272_informalexample_8.17_of_section_8.5.R │ ├── 00273_informalexample_8.18_of_section_8.5.R │ ├── 00274_informalexample_8.19_of_section_8.5.R │ ├── 00275_informalexample_8.20_of_section_8.6.2.R │ ├── 00276_informalexample_8.21_of_section_8.6.2.R │ ├── 00277_informalexample_8.22_of_section_8.6.3.R │ ├── 00278_informalexample_8.23_of_section_8.6.4.R │ ├── 00279_informalexample_8.24_of_section_8.6.4.R │ ├── 00280_informalexample_8.25_of_section_8.6.5.R │ ├── 00281_informalexample_8.26_of_section_8.6.5.R │ ├── 00282_example_8.8_of_section_8.6.6.R │ ├── 00283_example_8.9_of_section_8.6.6.R │ └── 00284_example_8.10_of_section_8.6.6.R ├── c09_Unsupervised_methods │ ├── 00285_informalexample_9.1_of_section_9.1.1.math │ ├── 00286_informalexample_9.2_of_section_9.1.1.math │ ├── 00287_informalexample_9.3_of_section_9.1.1.math │ ├── 00288_informalexample_9.4_of_section_9.1.1.math │ ├── 00289_example_9.1_of_section_9.1.2.R │ ├── 00290_example_9.2_of_section_9.1.2.R │ ├── 00291_example_9.3_of_section_9.1.3.R │ ├── 00292_informalexample_9.5_of_section_9.1.3.Rtxt │ ├── 00293_example_9.4_of_section_9.1.3.R │ ├── 00294_example_9.5_of_section_9.1.3.R │ ├── 00295_example_9.6_of_section_9.1.3.R │ ├── 00296_example_9.7_of_section_9.1.3.R │ ├── 00297_example_9.8_of_section_9.1.3.R │ ├── 00298_informalexample_9.6_of_section_9.1.3.math │ ├── 00299_example_9.9_of_section_9.1.3.R │ ├── 00300_informalexample_9.7_of_section_9.1.3.math │ ├── 00301_informalexample_9.8_of_section_9.1.3.math │ ├── 00302_example_9.10_of_section_9.1.3.R │ ├── 00303_example_9.11_of_section_9.1.4.R │ ├── 00304_example_9.12_of_section_9.1.4.R │ ├── 00305_example_9.13_of_section_9.1.4.R │ ├── 00306_example_9.14_of_section_9.1.5.R │ ├── 00307_example_9.15_of_section_9.1.5.R │ ├── 00308_example_9.16_of_section_9.1.5.R │ ├── 00309_example_9.17_of_section_9.1.5.R │ ├── 00310_informalexample_9.9_of_section_9.2.2.txt │ ├── 00311_example_9.18_of_section_9.2.3.R │ ├── 00312_example_9.19_of_section_9.2.3.R │ ├── 00313_informalexample_9.10_of_section_9.2.3.R │ ├── 00314_example_9.20_of_section_9.2.3.R │ ├── 00315_example_9.21_of_section_9.2.3.R │ ├── 00316_example_9.22_of_section_9.2.3.R │ ├── 00317_informalexample_9.11_of_section_9.2.3.R │ ├── 00318_example_9.23_of_section_9.2.3.R │ ├── 00319_example_9.24_of_section_9.2.3.R │ ├── 00320_example_9.25_of_section_9.2.3.R │ ├── 00321_example_9.26_of_section_9.2.3.R │ ├── 00322_example_9.27_of_section_9.2.3.R │ └── 00323_example_9.28_of_section_9.2.3.R ├── c10_Exploring_advanced_methods │ ├── 00324_example_10.1_of_section_10.1.1.R │ ├── 00325_informalexample_10.1_of_section_10.1.1.R │ ├── 00326_example_10.2_of_section_10.1.2.R │ ├── 00327_example_10.3_of_section_10.1.3.R │ ├── 00328_informalexample_10.2_of_section_10.1.3.R │ ├── 00329_informalexample_10.3_of_section_10.1.3.R │ ├── 00330_informalexample_10.4_of_section_10.1.3.R │ ├── 00331_example_10.4_of_section_10.1.3.R │ ├── 00332_example_10.5_of_section_10.1.3.R │ ├── 00333_example_10.6_of_section_10.1.4.R │ ├── 00334_example_10.7_of_section_10.1.4.R │ ├── 00335_example_10.8_of_section_10.1.4.R │ ├── 00336_informalexample_10.5_of_section_10.1.4.R │ ├── 00337_informalexample_10.6_of_section_10.1.4.R │ ├── 00338_informalexample_10.7_of_section_10.1.4.R │ ├── 00339_informalexample_10.8_of_section_10.1.4.R │ ├── 00340_example_10.9_of_section_10.1.4.R │ ├── 00341_example_10.10_of_section_10.1.4.R │ ├── 00342_example_10.11_of_section_10.1.4.R │ ├── 00343_informalexample_10.9_of_section_10.2.1.math │ ├── 00344_informalexample_10.10_of_section_10.2.1.math │ ├── 00345_example_10.12_of_section_10.2.2.R │ ├── 00346_example_10.13_of_section_10.2.2.R │ ├── 00347_example_10.14_of_section_10.2.2.R │ ├── 00348_example_10.15_of_section_10.2.2.R │ ├── 00349_example_10.16_of_section_10.2.3.R │ ├── 00350_example_10.17_of_section_10.2.4.R │ ├── 00351_example_10.18_of_section_10.2.4.R │ ├── 00352_example_10.19_of_section_10.2.4.R │ ├── 00353_example_10.20_of_section_10.2.5.R │ ├── 00354_example_10.21_of_section_10.2.5.R │ ├── 00355_example_10.22_of_section_10.3.1.R │ ├── 00356_example_10.23_of_section_10.3.1.R │ ├── 00357_example_10.24_of_section_10.3.1.R │ ├── 00358_informalexample_10.11_of_section_10.3.2.math │ ├── 00359_informalexample_10.12_of_section_10.3.2.math │ ├── 00360_informalexample_10.13_of_section_10.3.2.math │ ├── 00361_example_10.25_of_section_10.3.3.R │ ├── 00362_informalexample_10.14_of_section_10.3.3.math │ └── 00363_informalexample_10.15_of_section_10.3.3.math ├── c11_Documentation_and_deployment │ ├── 00364_example_11.1_of_section_11.2.1.Rmd │ ├── 00365_informalexample_11.1_of_section_11.2.1.R │ ├── 00366_example_11.2_of_section_11.2.3.Rmd │ ├── 00367_example_11.3_of_section_11.2.3.md │ ├── 00368_example_11.4_of_section_11.2.3.md │ ├── 00369_example_11.5_of_section_11.3.1.R │ ├── 00370_informalexample_11.2_of_section_11.3.2.bash │ ├── 00371_example_11.6_of_section_11.3.2.bash │ ├── 00372_example_11.7_of_section_11.3.2.bash │ ├── 00373_example_11.8_of_section_11.3.3.bash │ ├── 00374_example_11.9_of_section_11.3.3.bash │ ├── 00375_informalexample_11.3_of_section_11.3.3.bash │ ├── 00376_example_11.10_of_section_11.3.4.bash │ ├── 00377_example_11.11_of_section_11.4.2.Rtxt │ ├── 00378_informalexample_11.4_of_section_11.4.2.Rtxt │ ├── 00379_example_11.12_of_section_11.4.2.Rmd │ └── 00380_informalexample_11.5_of_section_11.4.3.SQL ├── x0A_Starting_with_R_and_other_tools │ ├── 00381_informalexample_A.1_of_section_A.1.1.Rtxt │ ├── 00382_informalexample_A.2_of_section_A.1.5.txt │ ├── 00383_example_A.1_of_section_A.2.R │ ├── 00384_informalexample_A.3_of_section_A.2.1.R │ ├── 00385_example_A.2_of_section_A.2.1.R │ ├── 00386_example_A.3_of_section_A.2.1.R │ ├── 00387_example_A.4_of_section_A.2.1.R │ ├── 00388_example_A.5_of_section_A.2.1.R │ ├── 00389_informalexample_A.4_of_section_A.2.2.R │ ├── 00390_example_A.6_of_section_A.2.2.R │ ├── 00391_example_A.7_of_section_A.2.2.R │ ├── 00392_example_A.8_of_section_A.2.2.R │ ├── 00393_informalexample_A.5_of_section_A.3.1.R │ ├── 00394_informalexample_A.6_of_section_A.3.1.R │ ├── 00395_informalexample_A.7_of_section_A.3.1.R │ └── 00396_informalexample_A.8_of_section_A.3.1.R ├── x0B_Important_statistical_concepts │ ├── 00397_example_B.1_of_section_B.1.1.R │ ├── 00398_example_B.2_of_section_B.1.1.R │ ├── 00399_example_B.3_of_section_B.1.1.R │ ├── 00400_example_B.4_of_section_B.1.1.R │ ├── 00401_example_B.5_of_section_B.1.3.R │ ├── 00402_example_B.6_of_section_B.1.3.R │ ├── 00403_example_B.7_of_section_B.1.4.R │ ├── 00404_example_B.8_of_section_B.1.4.R │ ├── 00405_example_B.9_of_section_B.1.4.R │ ├── 00406_example_B.10_of_section_B.1.4.R │ ├── 00407_example_B.11_of_section_B.1.4.R │ ├── 00408_informalexample_B.1_of_section_B.2.1.math │ ├── 00409_example_B.12_of_section_B.2.2.R │ ├── 00410_example_B.13_of_section_B.2.2.R │ ├── 00411_example_B.14_of_section_B.2.2.R │ ├── 00412_example_B.15_of_section_B.2.2.R │ ├── 00413_example_B.16_of_section_B.2.2.R │ ├── 00414_informalexample_B.2_of_section_B.2.3.R │ ├── 00415_example_B.17_of_section_B.2.4.R │ ├── 00416_example_B.18_of_section_B.2.4.R │ ├── 00417_informalexample_B.3_of_section_B.2.4.R │ ├── 00418_example_B.19_of_section_B.3.1.R │ ├── 00419_example_B.20_of_section_B.3.1.R │ ├── 00420_example_B.21_of_section_B.3.2.R │ ├── 00421_example_B.22_of_section_B.3.2.R │ ├── 00422_example_B.23_of_section_B.3.2.R │ └── 00423_example_B.24_of_section_B.3.2.R └── xFront_Matter_Practical_Data_Science_with_R │ └── 00001_informalexample_Front_Matter.1_of_section_Front_Matter.5.6.bash ├── Custdata ├── README.txt ├── custdata.RDS ├── hhdata.RDS └── median_income.RDS ├── IMDB ├── IMDBtest.RDS ├── IMDBtrain.RDS ├── README.md ├── getIMDB.R └── lime_imdb_example.R ├── KDD2009 ├── AnalysisOfKDD2009.pdf ├── KDD2009.Rdata ├── KDD2009.Rproj ├── KDD2009vtreat.Rmd ├── KDD2009vtreat.md ├── KDD2009vtreat_files │ ├── figure-gfm │ │ ├── kddplot-1.png │ │ ├── kddplot-2.png │ │ ├── kddplot-3.png │ │ ├── kddplot-4.png │ │ ├── kddplot-5.png │ │ └── kddplot-6.png │ └── figure-markdown_github │ │ ├── kddplot-1.png │ │ ├── kddplot-2.png │ │ ├── kddplot-3.png │ │ ├── kddplot-4.png │ │ ├── kddplot-5.png │ │ └── kddplot-6.png ├── README.md ├── orange_small_train.data.gz ├── orange_small_train_appetency.labels.txt ├── orange_small_train_churn.labels.txt └── orange_small_train_upselling.labels.txt ├── LICENSE.md ├── LIME_iris ├── README_limeiris.md └── lime_iris_example.R ├── NotionalData ├── README.md └── exampleData.rData ├── PDSwR2.Rproj ├── PDSwR2_errata.html ├── PDSwR2_errata.md ├── PUMS ├── ACS2016_PUMS_README.pdf ├── PUMS1.Rmd ├── PUMS1.md ├── PUMS1_dplyr.Rmd ├── PUMS1_dplyr.md ├── PUMS1_dplyr_files │ └── figure-markdown_github │ │ └── unnamed-chunk-1-1.png ├── PUMS1_files │ └── figure-markdown_github │ │ └── unnamed-chunk-1-1.png ├── PUMS1_rquery.Rmd ├── PUMS1_rquery.md ├── PUMS1_rquery_files │ └── figure-markdown_github │ │ ├── unnamed-chunk-1-1.png │ │ └── unnamed-chunk-1-2.png ├── PUMSDataDict16.txt ├── PUMSsample.RDS ├── PUMSscatter1.pdf ├── README.md ├── data_dict.csv ├── download │ ├── .gitignore │ ├── ACS2016_PUMS_README.pdf │ ├── LoadPUMS.Rmd │ ├── LoadPUMS.md │ ├── LoadPUMSAll.Rmd │ ├── LoadPUMSAll.md │ ├── LoadPUMSAll_files │ │ └── figure-markdown_github │ │ │ ├── unnamed-chunk-1-1.png │ │ │ └── unnamed-chunk-1-2.png │ ├── LoadPUMS_h.Rmd │ ├── LoadPUMS_h.md │ ├── PUMSDataDict16.txt │ ├── README.txt │ ├── ReadDataDict.Rmd │ ├── ReadDataDict.md │ ├── data_dict.RDS │ └── download.Rproj ├── dpus_std_employee.RDS ├── incomedata.rds ├── makeSubSample.Rmd ├── psub.RDS ├── ss16hus.RDS ├── ss16hus_h.RDS ├── ss16pus.RDS └── ss16pus_h.RDS ├── Protein ├── README.md ├── protein.txt └── protein_README.txt ├── PseudoLog10 ├── .Rbuildignore ├── DESCRIPTION ├── NAMESPACE ├── PseudoLog10.Rproj ├── PseudoLog10.pdf ├── R │ └── pseudoLog10.R └── man │ └── pseudoLog10.Rd ├── README.md ├── RenderedExamples ├── .gitignore ├── c01_The_data_science_process.Rmd ├── c01_The_data_science_process.md ├── c02_Starting_with_R_and_data.Rmd ├── c02_Starting_with_R_and_data.md ├── c03_Exploring_data.Rmd ├── c03_Exploring_data.md ├── c04_Managing_data.Rmd ├── c04_Managing_data.md ├── c05_Data_Engineering_and_Data_Shaping.Rmd ├── c05_Data_Engineering_and_Data_Shaping.md ├── c06_Choosing_and_evaluating_models.Rmd ├── c06_Choosing_and_evaluating_models.md ├── c07_Linear_and_logistic_regression.Rmd ├── c07_Linear_and_logistic_regression.md ├── c08_Advanced_Data_Preparation.Rmd ├── c08_Advanced_Data_Preparation.md ├── c09_Unsupervised_methods.Rmd ├── c09_Unsupervised_methods.md ├── c10_Exploring_advanced_methods.Rmd ├── c10_Exploring_advanced_methods.md ├── c11_Documentation_and_deployment.Rmd ├── c11_Documentation_and_deployment.md ├── figure │ ├── 00031_example_2.11_of_section_2.3.1.R-1.png │ ├── 00037_example_3.6_of_section_3.2.1.R-1.png │ ├── 00038_example_3.7_of_section_3.2.1.R-1.png │ ├── 00039_example_3.8_of_section_3.2.1.R-1.png │ ├── 00041_informalexample_3.2_of_section_3.2.1.R-1.png │ ├── 00042_example_3.9_of_section_3.2.1.R-1.png │ ├── 00043_example_3.10_of_section_3.2.1.R-1.png │ ├── 00044_example_3.11_of_section_3.2.2.R-1.png │ ├── 00046_example_3.13_of_section_3.2.2.R-1.png │ ├── 00047_informalexample_3.3_of_section_3.2.2.R-1.png │ ├── 00048_informalexample_3.4_of_section_3.2.2.R-1.png │ ├── 00049_example_3.14_of_section_3.2.2.R-1.png │ ├── 00050_example_3.15_of_section_3.2.2.R-1.png │ ├── 00050_example_3.15_of_section_3.2.2.R-2.png │ ├── 00050_example_3.15_of_section_3.2.2.R-3.png │ ├── 00050_example_3.15_of_section_3.2.2.R-4.png │ ├── 00051_example_3.16_of_section_3.2.2.R-1.png │ ├── 00051_example_3.16_of_section_3.2.2.R-2.png │ ├── 00052_example_3.17_of_section_3.2.2.R-1.png │ ├── 00053_example_3.18_of_section_3.2.2.R-1.png │ ├── 00054_informalexample_3.5_of_section_3.2.2.R-1.png │ ├── 00071_informalexample_5.2_of_section_5.1.1.R-1.png │ ├── 00090_informalexample_5.21_of_section_5.2.1.R-1.png │ ├── 00094_informalexample_5.25_of_section_5.2.1.R-1.png │ ├── 00099_informalexample_5.30_of_section_5.3.1.R-1.png │ ├── 00132_informalexample_5.63_of_section_5.5.1.R-1.png │ ├── 00135_informalexample_5.66_of_section_5.5.1.R-1.png │ ├── 00138_informalexample_5.69_of_section_5.5.2.R-1.png │ ├── 00157_example_6.9_of_section_6.2.5.R-1.png │ ├── 00158_example_6.10_of_section_6.2.5.R-1.png │ ├── 00168_informalexample_6.8_of_section_6.3.2.R-1.png │ ├── 00171_example_6.20_of_section_6.3.2.R-1.png │ ├── 00176_example_6.23_of_section_6.3.4.R-1.png │ ├── 00179_example_6.26_of_section_6.3.5.R-1.png │ ├── 00180_informalexample_6.13_of_section_6.3.5.R-1.png │ ├── 00181_example_6.27_of_section_6.3.5.R-1.png │ ├── 00181_example_6.27_of_section_6.3.5.R-2.png │ ├── 00189_example_7.2_of_section_7.1.3.R-1.png │ ├── 00190_example_7.3_of_section_7.1.3.R-1.png │ ├── 00212_example_7.12_of_section_7.2.3.R-1.png │ ├── 00213_example_7.13_of_section_7.2.3.R-1.png │ ├── 00234_example_7.24_of_section_7.3.1.R-1.png │ ├── 00241_example_7.27_of_section_7.3.3.R-1.png │ ├── 00247_example_7.32_of_section_7.3.3.R-1.png │ ├── 00271_informalexample_8.16_of_section_8.4.2.R-1.png │ ├── 00271_informalexample_8.16_of_section_8.4.2.R-2.png │ ├── 00291_example_9.3_of_section_9.1.3.R-1.png │ ├── 00294_example_9.5_of_section_9.1.3.R-1.png │ ├── 00297_example_9.8_of_section_9.1.3.R-1.png │ ├── 00299_example_9.9_of_section_9.1.3.R-1.png │ ├── 00302_example_9.10_of_section_9.1.3.R-1.png │ ├── 00314_example_9.20_of_section_9.2.3.R-1.png │ ├── 00324_example_10.1_of_section_10.1.1.R-1.png │ ├── 00331_example_10.4_of_section_10.1.3.R-1.png │ ├── 00334_example_10.7_of_section_10.1.4.R-1.png │ ├── 00346_example_10.13_of_section_10.2.2.R-1.png │ ├── 00347_example_10.14_of_section_10.2.2.R-1.png │ ├── 00349_example_10.16_of_section_10.2.3.R-1.png │ ├── 00351_example_10.18_of_section_10.2.4.R-1.png │ ├── 00355_example_10.22_of_section_10.3.1.R-1.png │ ├── 00356_example_10.23_of_section_10.3.1.R-1.png │ ├── 00357_example_10.24_of_section_10.3.1.R-1.png │ ├── 00397_example_B.1_of_section_B.1.1.R-1.png │ ├── 00398_example_B.2_of_section_B.1.1.R-1.png │ ├── 00400_example_B.4_of_section_B.1.1.R-1.png │ ├── 00401_example_B.5_of_section_B.1.3.R-1.png │ ├── 00402_example_B.6_of_section_B.1.3.R-1.png │ ├── 00403_example_B.7_of_section_B.1.4.R-1.png │ ├── 00404_example_B.8_of_section_B.1.4.R-1.png │ ├── 00419_example_B.20_of_section_B.3.1.R-1.png │ └── 00419_example_B.20_of_section_B.3.1.R-2.png ├── render_examples.bash ├── x0A_Starting_with_R_and_other_tools.Rmd ├── x0A_Starting_with_R_and_other_tools.md ├── x0B_Important_statistical_concepts.Rmd └── x0B_Important_statistical_concepts.md ├── SQLExample ├── HotelRelation.pdf ├── README.Rmd ├── README.md ├── Workbook1.xlsx ├── figure │ └── allsteps.png ├── h2-1.3.170.jar ├── h2demodb_h2.h2.db └── h2demodb_h2.trace.db ├── Spambase ├── README.md └── spamD.tsv ├── Spirals ├── Spirals.Rproj ├── c10_SVM.Rmd ├── c10_SVM.md └── c10_SVM_files │ └── figure-markdown_github │ ├── 00433_example_10.22_of_section_10.3.1.R-1.png │ ├── 00434_example_10.23_of_section_10.3.1.R-1.png │ ├── 00435_example_10.24_of_section_10.3.1.R-1.png │ ├── large_mu-1.png │ ├── large_nu-1.png │ ├── small_mu-1.png │ ├── small_nu-1.png │ └── xgboost-1.png ├── Starting_with_R_and_Other_Tools.pdf ├── Statlog ├── Chapter_1_Example.Rmd ├── Chapter_1_Example.md ├── Chapter_1_Example_files │ └── figure-markdown_github │ │ └── present_model-1.png ├── GCDData.RData ├── GCDSteps.Rmd ├── GCDSteps.ipynb ├── GCDSteps.md ├── README.md ├── Statlog.Rproj ├── creditdata.RDS ├── german.data ├── loan_model_example.RData └── mapping.R ├── UCICar ├── README.md └── car.data.csv ├── auto_mpg ├── Data_Set_Description.txt ├── README.Rmd ├── README.md ├── UCI_Auto_MPG.pdf ├── auto-mpg.data-original.txt ├── auto-mpg.data.txt ├── auto-mpg.names.txt ├── auto_mpg.RDS ├── vtreat_example.Rmd └── vtreat_example.md ├── bioavailability ├── Caco-2 Permeability Assay.pdf ├── Figure4.gif ├── README.Rmd ├── README.md ├── WebPlotDigitizer.pdf ├── caco2.csv ├── figure │ ├── graph1.png │ ├── graphT.png │ ├── model1.png │ ├── synth1.png │ └── synthP.png ├── synth.RData └── synth.csv ├── cricketchirps ├── README.txt └── crickets.csv └── packages.R /BMI/BMI.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /BMI/BMI_files/figure-markdown_github/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/BMI/BMI_files/figure-markdown_github/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /BMI/BMI_files/figure-markdown_github/unnamed-chunk-2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/BMI/BMI_files/figure-markdown_github/unnamed-chunk-2-2.png -------------------------------------------------------------------------------- /BMI/bmi_data.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/BMI/bmi_data.RDS -------------------------------------------------------------------------------- /BestOffers/BestOffers.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /Bookdata/bookdata.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Bookdata/bookdata.tsv.gz -------------------------------------------------------------------------------- /Bookdata/bxBooks.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Bookdata/bxBooks.RData -------------------------------------------------------------------------------- /Bookdata/read_bookcrossing.R: -------------------------------------------------------------------------------- 1 | 2 | # first: replace \" with ' 3 | bxUsers <- read.table('BX-Users.csv',header=T,sep=';',comment.char='',stringsAsFactors=F) 4 | # first replace \" with blank 5 | bxBookRatings <- read.table('BX-Book-Ratings.csv',header=T,sep=';',comment.char='',stringsAsFactors=F) 6 | # first: replace \" with ' 7 | bxBooks <- read.table('BX-Books.csv',header=T,sep=';',comment.char='',stringsAsFactors=F) 8 | -------------------------------------------------------------------------------- /Buzz/.gitignore: -------------------------------------------------------------------------------- 1 | buzz.aux 2 | buzz.log 3 | buzz.out 4 | cache 5 | buzzm_cache 6 | -------------------------------------------------------------------------------- /Buzz/Buzz.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /Buzz/BuzzDataSetDoc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/BuzzDataSetDoc.pdf -------------------------------------------------------------------------------- /Buzz/Buzz_score_example.md: -------------------------------------------------------------------------------- 1 | Buzz scoring example 2 | ================ 3 | 4 | Example scoring (making predictions with) the Buzz data set. 5 | 6 | First attach the `randomForest` package and load the model and test data. 7 | 8 | ``` r 9 | suppressPackageStartupMessages(library("randomForest")) 10 | 11 | lst <- readRDS("thRS500.RDS") 12 | varslist <- lst$varslist 13 | fmodel <- lst$fmodel 14 | buzztest <- lst$buzztest 15 | rm(list = "lst") 16 | ``` 17 | 18 | Now show the quality of our model on held-out test data. 19 | 20 | ``` r 21 | buzztest$prediction <- predict(fmodel, newdata = buzztest, type = "prob")[, 2, drop = TRUE] 22 | 23 | WVPlots::ROCPlot(buzztest, "prediction", 24 | "buzz", 1, 25 | "ROC curve estimating quality of model predictions on held-out data") 26 | ``` 27 | 28 | ![](Buzz_score_example_files/figure-markdown_github/unnamed-chunk-3-1.png) 29 | -------------------------------------------------------------------------------- /Buzz/Buzz_score_example_files/figure-markdown_github/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/Buzz_score_example_files/figure-markdown_github/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /Buzz/Buzz_score_example_files/figure-markdown_github/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/Buzz_score_example_files/figure-markdown_github/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /Buzz/PeerPresentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/PeerPresentation.pdf -------------------------------------------------------------------------------- /Buzz/PeerPresentation_withNotes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/PeerPresentation_withNotes.pdf -------------------------------------------------------------------------------- /Buzz/ProjectSponsorPresentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/ProjectSponsorPresentation.pdf -------------------------------------------------------------------------------- /Buzz/ProjectSponsorPresentation_withNotes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/ProjectSponsorPresentation_withNotes.pdf -------------------------------------------------------------------------------- /Buzz/UserPresentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/UserPresentation.pdf -------------------------------------------------------------------------------- /Buzz/UserPresentation_withNotes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/UserPresentation_withNotes.pdf -------------------------------------------------------------------------------- /Buzz/buzzapp/buzzapp.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /Buzz/buzzm_files/figure-markdown_github/model-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/buzzm_files/figure-markdown_github/model-1.png -------------------------------------------------------------------------------- /Buzz/buzzm_files/figure-markdown_github/model-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/buzzm_files/figure-markdown_github/model-2.png -------------------------------------------------------------------------------- /Buzz/buzzm_files/figure-markdown_github/plottest-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/buzzm_files/figure-markdown_github/plottest-1.png -------------------------------------------------------------------------------- /Buzz/buzzm_files/figure-markdown_github/plottrain-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/buzzm_files/figure-markdown_github/plottrain-1.png -------------------------------------------------------------------------------- /Buzz/figure/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/figure/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /Buzz/figure/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/figure/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /Buzz/model_export_files/figure-markdown_github/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/model_export_files/figure-markdown_github/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /Buzz/rf_tree_1_plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/rf_tree_1_plot.pdf -------------------------------------------------------------------------------- /Buzz/thRS500.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/thRS500.RDS -------------------------------------------------------------------------------- /CDC/NatalBirthData.rData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CDC/NatalBirthData.rData -------------------------------------------------------------------------------- /CDC/NatalRiskData.rData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CDC/NatalRiskData.rData -------------------------------------------------------------------------------- /CDC/UserGuide2010.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CDC/UserGuide2010.pdf -------------------------------------------------------------------------------- /CDC/loadExample/SQLScrewdriver.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CDC/loadExample/SQLScrewdriver.jar -------------------------------------------------------------------------------- /CDC/loadExample/dbDef.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | testdb 5 | u 6 | u 7 | org.h2.Driver 8 | jdbc:h2:./NATAL;LOG=0;CACHE_SIZE=65536;LOCK_MODE=0;UNDO_LOG=0 9 | 10 | -------------------------------------------------------------------------------- /CDC/loadExample/h2-1.3.170.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CDC/loadExample/h2-1.3.170.jar -------------------------------------------------------------------------------- /CDC/natal2010Sample.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CDC/natal2010Sample.tsv.gz -------------------------------------------------------------------------------- /CodeExamples.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CodeExamples.zip -------------------------------------------------------------------------------- /CodeExamples/README.txt: -------------------------------------------------------------------------------- 1 | 2 | Example code and data for "Practical Data Science with R 2nd Edition" by Nina Zumel and John Mount, Manning 2019. 3 | 4 | Code examples license: 5 | This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. 6 | http://creativecommons.org/licenses/by-nc-sa/4.0/ 7 | No guarantee, indemnification or claim of fitness is made regarding any of these items. 8 | No claim of license on works of others or derived data. 9 | -------------------------------------------------------------------------------- /CodeExamples/c02_Starting_with_R_and_data/00003_informalexample_2.1_of_section_2.1.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 2.1 of section 2.1.2 2 | # (informalexample 2.1 of section 2.1.2) : Starting with R and data : Starting with R : R programming 3 | 4 | print(seq_len(25)) 5 | # [1] 1 2 3 4 5 6 7 8 9 10 11 12 6 | # [13] 13 14 15 16 17 18 19 20 21 22 23 24 7 | # [25] 25 8 | 9 | -------------------------------------------------------------------------------- /CodeExamples/c02_Starting_with_R_and_data/00006_informalexample_2.4_of_section_2.1.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 2.4 of section 2.1.2 2 | # (informalexample 2.4 of section 2.1.2) : Starting with R and data : Starting with R : R programming 3 | 4 | nchar("a string") 5 | # [1] 8 6 | 7 | nchar(c("a", "aa", "aaa", "aaaa")) 8 | # [1] 1 2 3 4 9 | 10 | -------------------------------------------------------------------------------- /CodeExamples/c02_Starting_with_R_and_data/00007_informalexample_2.5_of_section_2.1.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 2.5 of section 2.1.2 2 | # (informalexample 2.5 of section 2.1.2) : Starting with R and data : Starting with R : R programming 3 | 4 | 1 + 5 | 2 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c02_Starting_with_R_and_data/00008_informalexample_2.6_of_section_2.1.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 2.6 of section 2.1.2 2 | # (informalexample 2.6 of section 2.1.2) : Starting with R and data : Starting with R : R programming 3 | 4 | 1 5 | + 2 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c02_Starting_with_R_and_data/00010_informalexample_2.8_of_section_2.1.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 2.8 of section 2.1.2 2 | # (informalexample 2.8 of section 2.1.2) : Starting with R and data : Starting with R : R programming 3 | 4 | d <- data.frame(x = 1, y = 2) # Note: 1 5 | d2 <- d # Note: 2 6 | d$x <- 5 # Note: 3 7 | 8 | print(d) 9 | # x y 10 | # 1 5 2 11 | 12 | print(d2) 13 | # x y 14 | # 1 1 2 15 | 16 | # Note 1: 17 | # Create some example data and refer to it by the name d. 18 | 19 | # Note 2: 20 | # Create an additional reference d2 to the same data. 21 | 22 | # Note 3: 23 | # Alter the value referred to by d. 24 | 25 | -------------------------------------------------------------------------------- /CodeExamples/c02_Starting_with_R_and_data/00012_informalexample_2.10_of_section_2.1.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 2.10 of section 2.1.2 2 | # (informalexample 2.10 of section 2.1.2) : Starting with R and data : Starting with R : R programming 3 | 4 | library("dplyr") 5 | 6 | result <- data %>% 7 | arrange(., sort_key) %>% 8 | mutate(., ordered_sum_revenue = cumsum(revenue)) %>% 9 | mutate(., fraction_revenue_seen = ordered_sum_revenue/sum(revenue)) 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c02_Starting_with_R_and_data/00013_informalexample_2.11_of_section_2.1.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 2.11 of section 2.1.2 2 | # (informalexample 2.11 of section 2.1.2) : Starting with R and data : Starting with R : R programming 3 | 4 | d <- data.frame(col1 = c(1, 2, 3), col2 = c(-1, 0, 1)) 5 | d$col3 <- d$col1 + d$col2 6 | print(d) 7 | # col1 col2 col3 8 | # 1 1 -1 0 9 | # 2 2 0 2 10 | # 3 3 1 4 11 | 12 | -------------------------------------------------------------------------------- /CodeExamples/c02_Starting_with_R_and_data/00017_informalexample_2.13_of_section_2.2.2.txt: -------------------------------------------------------------------------------- 1 | # informalexample 2.13 of section 2.2.2 2 | # (informalexample 2.13 of section 2.2.2) : Starting with R and data : Working with data from files : Using R with less-structured data 3 | 4 | A11 6 A34 A43 1169 A65 A75 4 A93 A101 4 ... 5 | A12 48 A32 A43 5951 A61 A73 2 A92 A101 2 ... 6 | A14 12 A34 A46 2096 A61 A74 2 A93 A101 3 ... 7 | ... 8 | 9 | -------------------------------------------------------------------------------- /CodeExamples/c02_Starting_with_R_and_data/00018_example_2.3_of_section_2.2.2.R: -------------------------------------------------------------------------------- 1 | # example 2.3 of section 2.2.2 2 | # (example 2.3 of section 2.2.2) : Starting with R and data : Working with data from files : Using R with less-structured data 3 | # Title: Loading the credit dataset 4 | 5 | setwd("PDSwR2/Statlog") # Note: 1 6 | d <- read.table('german.data', sep=' ', 7 | stringsAsFactors = FALSE, header = FALSE) 8 | 9 | # Note 1: 10 | # Replace this path with the actual path where you have saved PDSwR2. 11 | 12 | -------------------------------------------------------------------------------- /CodeExamples/c02_Starting_with_R_and_data/00020_informalexample_2.14_of_section_2.2.2.txt: -------------------------------------------------------------------------------- 1 | # informalexample 2.14 of section 2.2.2 2 | # (informalexample 2.14 of section 2.2.2) : Starting with R and data : Working with data from files : Using R with less-structured data 3 | 4 | mapping <- c('A11' = '... < 0 DM', 5 | 'A12' = '0 <= ... < 200 DM', 6 | 'A13' = '... >= 200 DM / salary assignments for at least 1 year', 7 | ... 8 | ) 9 | 10 | -------------------------------------------------------------------------------- /CodeExamples/c02_Starting_with_R_and_data/00031_example_2.11_of_section_2.3.1.R: -------------------------------------------------------------------------------- 1 | # example 2.11 of section 2.3.1 2 | # (example 2.11 of section 2.3.1) : Starting with R and data : Working with relational databases : A production-size example 3 | # Title: Plotting the data 4 | 5 | WVPlots::ScatterHist( 6 | dpus, "AGEP", "PINCP", 7 | "Expected income (PINCP) as function age (AGEP)", 8 | smoothmethod = "lm", 9 | point_alpha = 0.025) 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00035_example_3.4_of_section_3.1.1.R: -------------------------------------------------------------------------------- 1 | # example 3.4 of section 3.1.1 2 | # (example 3.4 of section 3.1.1) : Exploring data : Using summary statistics to spot problems : Typical problems revealed by data summaries 3 | # Title: Looking at the data range of a variable 4 | 5 | summary(customer_data$income) 6 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 7 | ## -6900 10700 26200 41764 51700 1257000 # Note: 1 8 | 9 | # Note 1: 10 | # Income ranges from zero to over a million 11 | # dollars; a very wide range. 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00036_example_3.5_of_section_3.1.1.R: -------------------------------------------------------------------------------- 1 | # example 3.5 of section 3.1.1 2 | # (example 3.5 of section 3.1.1) : Exploring data : Using summary statistics to spot problems : Typical problems revealed by data summaries 3 | # Title: Checking units sounds silly, but mistakes can lead to spectacular errors if not caught 4 | 5 | IncomeK = customer_data$income/1000 6 | summary(IncomeK) # Note: 1 7 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 8 | ## -6.90 10.70 26.20 41.76 51.70 1257.00 9 | 10 | # Note 1: 11 | # The variable IncomeK is defined as IncomeK = customer_data$income/1000. But suppose you didn’t know 12 | # that. Looking only at the summary, the values could plausibly be 13 | # interpreted to mean either “hourly wage” or “yearly income in units 14 | # of $1000.” 15 | 16 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00037_example_3.6_of_section_3.2.1.R: -------------------------------------------------------------------------------- 1 | # example 3.6 of section 3.2.1 2 | # (example 3.6 of section 3.2.1) : Exploring data : Spotting problems using graphics and visualization : Visually checking distributions for a single variable 3 | # Title: Plotting a histogram 4 | 5 | library(ggplot2) # Note: 1 6 | ggplot(customer_data, aes(x=gas_usage)) + 7 | geom_histogram(binwidth=10, fill="gray") # Note: 2 8 | 9 | # Note 1: 10 | # Load the ggplot2 library, if you haven’t 11 | # already done so. 12 | 13 | # Note 2: 14 | # binwidth parameterThe binwidth parameter tells the 15 | # geom_histogram call how to make bins of ten dollar intervals (default is 16 | # datarange/30). The fill parameter specifies the color of the histogram 17 | # bars (default: black). 18 | 19 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00038_example_3.7_of_section_3.2.1.R: -------------------------------------------------------------------------------- 1 | # example 3.7 of section 3.2.1 2 | # (example 3.7 of section 3.2.1) : Exploring data : Spotting problems using graphics and visualization : Visually checking distributions for a single variable 3 | # Title: Producing a density plot 4 | 5 | library(scales) # Note: 1 6 | 7 | ggplot(customer_data, aes(x=income)) + geom_density() + 8 | scale_x_continuous(labels=dollar) # Note: 2 9 | 10 | # Note 1: 11 | # The scales package brings in the dollar 12 | # scale notation. 13 | 14 | # Note 2: 15 | # Set the x-axis labels to 16 | # dollars. 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00039_example_3.8_of_section_3.2.1.R: -------------------------------------------------------------------------------- 1 | # example 3.8 of section 3.2.1 2 | # (example 3.8 of section 3.2.1) : Exploring data : Spotting problems using graphics and visualization : Visually checking distributions for a single variable 3 | # Title: Creating a log-scaled density plot 4 | 5 | ggplot(customer_data, aes(x=income)) + 6 | geom_density() + 7 | scale_x_log10(breaks = c(10, 100, 1000, 10000, 100000, 1000000), labels=dollar) + # Note: 1 8 | annotation_logticks(sides="bt", color="gray") # Note: 2 9 | 10 | # Note 1: 11 | # Set the x-axis to be in log10 scale, with 12 | # manually set tick points and labels as dollars. 13 | 14 | # Note 2: 15 | # Add log-scaled tick marks to the top and 16 | # bottom of the graph. 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00040_informalexample_3.1_of_section_3.2.1.txt: -------------------------------------------------------------------------------- 1 | # informalexample 3.1 of section 3.2.1 2 | # (informalexample 3.1 of section 3.2.1) : Exploring data : Spotting problems using graphics and visualization : Visually checking distributions for a single variable 3 | 4 | ## Warning in self$trans$transform(x): NaNs produced 5 | ## Warning: Transformation introduced infinite values in continuous x-axis 6 | ## Warning: Removed 6856 rows containing non-finite values (stat_density). 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00041_informalexample_3.2_of_section_3.2.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 3.2 of section 3.2.1 2 | # (informalexample 3.2 of section 3.2.1) : Exploring data : Spotting problems using graphics and visualization : Visually checking distributions for a single variable 3 | 4 | ggplot(customer_data, aes(x=marital_status)) + geom_bar(fill="gray") 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00042_example_3.9_of_section_3.2.1.R: -------------------------------------------------------------------------------- 1 | # example 3.9 of section 3.2.1 2 | # (example 3.9 of section 3.2.1) : Exploring data : Spotting problems using graphics and visualization : Visually checking distributions for a single variable 3 | # Title: Producing a horizontal bar chart 4 | 5 | ggplot(customer_data, aes(x=state_of_res)) + 6 | geom_bar(fill="gray") + # Note: 1 7 | coord_flip() # Note: 2 8 | 9 | # Note 1: 10 | # Plot bar chart as before: state_of_res is on x-axis, count is on y-axis. 11 | 12 | # Note 2: 13 | # Flip the x and y axes: state_of_res is 14 | # now on the y-axis. 15 | 16 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00044_example_3.11_of_section_3.2.2.R: -------------------------------------------------------------------------------- 1 | # example 3.11 of section 3.2.2 2 | # (example 3.11 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables 3 | # Title: Producing a line plot 4 | 5 | x <- runif(100) # Note: 1 6 | y <- x^2 + 0.2*x # Note: 2 7 | ggplot(data.frame(x=x,y=y), aes(x=x,y=y)) + geom_line() # Note: 3 8 | 9 | # Note 1: 10 | # First, generate the data for this example. 11 | # The x variable is uniformly randomly distributed 12 | # between 0 and 1. 13 | 14 | # Note 2: 15 | # The y variable is a 16 | # quadratic function of x. 17 | 18 | # Note 3: 19 | # Plot the line plot. 20 | 21 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00047_informalexample_3.3_of_section_3.2.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 3.3 of section 3.2.2 2 | # (informalexample 3.3 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables 3 | 4 | ggplot(customer_data_samp, aes(x=age, y=income)) + 5 | geom_point() + geom_smooth() + 6 | ggtitle("Income as a function of age") 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00048_informalexample_3.4_of_section_3.2.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 3.4 of section 3.2.2 2 | # (informalexample 3.4 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables 3 | 4 | BinaryYScatterPlot(customer_data_samp, "age", "health_ins", 5 | title = "Probability of health insurance by age") 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00049_example_3.14_of_section_3.2.2.R: -------------------------------------------------------------------------------- 1 | # example 3.14 of section 3.2.2 2 | # (example 3.14 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables 3 | # Title: Producing a hexbin plot 4 | 5 | library(WVPlots) # Note: 1 6 | 7 | HexBinPlot(customer_data2, "age", "income", "Income as a function of age") + # Note: 2 8 | geom_smooth(color="black", se=FALSE) # Note: 3 9 | 10 | # Note 1: 11 | # Load the WVPlots library 12 | 13 | # Note 2: 14 | # Plot the hexbin of income as a function of age 15 | 16 | # Note 3: 17 | # Add the smoothing line in black; suppress 18 | # standard error ribbon (se=FALSE). 19 | 20 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00052_example_3.17_of_section_3.2.2.R: -------------------------------------------------------------------------------- 1 | # example 3.17 of section 3.2.2 2 | # (example 3.17 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables 3 | # Title: Comparing population densities across categories 4 | 5 | customer_data3 = subset(customer_data2, marital_status %in% c("Never married", "Widowed")) # Note: 1 6 | ggplot(customer_data3, aes(x=age, color=marital_status, linetype=marital_status)) + # Note: 2 7 | geom_density() + scale_color_brewer(palette="Dark2") 8 | 9 | # Note 1: 10 | # Restrict to the data for widowed or never married people. 11 | 12 | # Note 2: 13 | # Differentiate the color and line style of the plots by marital_status 14 | 15 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00053_example_3.18_of_section_3.2.2.R: -------------------------------------------------------------------------------- 1 | # example 3.18 of section 3.2.2 2 | # (example 3.18 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables 3 | # Title: Comparing population densities across categories with ShadowHist() 4 | 5 | ShadowHist(customer_data3, "age", "marital_status", 6 | "Age distribution for never married vs. widowed populations", 7 | binwidth=5) # Note: 1 8 | 9 | # Note 1: 10 | # Set the bin widths of the histogram to 5. 11 | 12 | -------------------------------------------------------------------------------- /CodeExamples/c03_Exploring_data/00054_informalexample_3.5_of_section_3.2.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 3.5 of section 3.2.2 2 | # (informalexample 3.5 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables 3 | 4 | ggplot(customer_data2, aes(x=age)) + 5 | geom_density() + facet_wrap(~marital_status) 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c04_Managing_data/00055_example_4.1_of_section_4.1.1.R: -------------------------------------------------------------------------------- 1 | # example 4.1 of section 4.1.1 2 | # (example 4.1 of section 4.1.1) : Managing data : Cleaning data : Domain-specific data cleaning 3 | # Title: Treating the age and income variables 4 | 5 | library(dplyr) 6 | customer_data = readRDS("custdata.RDS") # Note: 1 7 | 8 | customer_data <- customer_data %>% 9 | mutate(age = na_if(age, 0), # Note: 2 10 | income = ifelse(income < 0, NA, income)) # Note: 3 11 | 12 | # Note 1: 13 | # Load the data. 14 | 15 | # Note 2: 16 | # The function mutate() from the dplyr package adds columns to a data frame, or modifies existing columns. 17 | # The function na_if(), also from dplyr, turns a specific problematic value (in this case, 0) to NA 18 | 19 | # Note 3: 20 | # Convert negative incomes to NA 21 | 22 | -------------------------------------------------------------------------------- /CodeExamples/c04_Managing_data/00056_example_4.2_of_section_4.1.1.R: -------------------------------------------------------------------------------- 1 | # example 4.2 of section 4.1.1 2 | # (example 4.2 of section 4.1.1) : Managing data : Cleaning data : Domain-specific data cleaning 3 | # Title: Treating the gas_usage variable 4 | 5 | customer_data <- customer_data %>% 6 | mutate(gas_with_rent = (gas_usage == 1), # Note: 1 7 | gas_with_electricity = (gas_usage == 2), 8 | no_gas_bill = (gas_usage == 3) ) %>% 9 | mutate(gas_usage = ifelse(gas_usage < 4, NA, gas_usage)) # Note: 2 10 | 11 | # Note 1: 12 | # Create the three indicator variables. 13 | 14 | # Note 2: 15 | # Convert the special codes in the gas_usage column to NA. 16 | 17 | -------------------------------------------------------------------------------- /CodeExamples/c04_Managing_data/00058_informalexample_4.1_of_section_4.1.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 4.1 of section 4.1.3 2 | # (informalexample 4.1 of section 4.1.3) : Managing data : Cleaning data : The vtreat package for automatically treating missing variables 3 | 4 | varlist <- setdiff(colnames(customer_data), c("custid", "health_ins")) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c04_Managing_data/00059_example_4.4_of_section_4.1.3.R: -------------------------------------------------------------------------------- 1 | # example 4.4 of section 4.1.3 2 | # (example 4.4 of section 4.1.3) : Managing data : Cleaning data : The vtreat package for automatically treating missing variables 3 | # Title: Creating and applying a treatment plan 4 | 5 | library(vtreat) 6 | treatment_plan <- design_missingness_treatment(customer_data, varlist = varlist) 7 | training_prepared <- prepare(treatment_plan, customer_data) 8 | 9 | -------------------------------------------------------------------------------- /CodeExamples/c04_Managing_data/00063_example_4.8_of_section_4.2.1.R: -------------------------------------------------------------------------------- 1 | # example 4.8 of section 4.2.1 2 | # (example 4.8 of section 4.2.1) : Managing data : Data transformations : Normalization 3 | # Title: Normalizing by mean age 4 | 5 | summary(training_prepared$age) 6 | 7 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 8 | ## 21.00 34.00 48.00 49.22 62.00 120.00 9 | 10 | mean_age <- mean(training_prepared$age) 11 | age_normalized <- training_prepared$age/mean_age 12 | summary(age_normalized) 13 | 14 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 15 | ## 0.4267 0.6908 0.9753 1.0000 1.2597 2.4382 16 | 17 | -------------------------------------------------------------------------------- /CodeExamples/c04_Managing_data/00067_informalexample_4.2_of_section_4.2.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 4.2 of section 4.2.3 2 | # (informalexample 4.2 of section 4.2.3) : Managing data : Data transformations : Log transformations for skewed and wide distributions 3 | 4 | signedlog10 <- function(x) { 5 | ifelse(abs(x) <= 1, 0, sign(x)*log10(abs(x))) 6 | } 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00075_informalexample_5.6_of_section_5.1.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.6 of section 5.1.1 2 | # (informalexample 5.6 of section 5.1.1) : Data engineering and data shaping : Data selection : Sub-setting rows and columns 3 | 4 | library("dplyr") 5 | 6 | iris_dplyr <- iris %>% 7 | select(., 8 | Petal.Length, Petal.Width, Species) %>% 9 | filter(., 10 | Petal.Length > 2) 11 | 12 | head(iris_dplyr) 13 | 14 | ## Petal.Length Petal.Width Species 15 | ## 1 4.7 1.4 versicolor 16 | ## 2 4.5 1.5 versicolor 17 | ## 3 4.9 1.5 versicolor 18 | ## 4 4.0 1.3 versicolor 19 | ## 5 4.6 1.5 versicolor 20 | ## 6 4.5 1.3 versicolor 21 | 22 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00079_informalexample_5.10_of_section_5.1.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.10 of section 5.1.2 2 | # (informalexample 5.10 of section 5.1.2) : Data engineering and data shaping : Data selection : Removing records with incomplete data 3 | 4 | library("data.table") 5 | 6 | msleep_data.table <- as.data.table(msleep) 7 | 8 | clean_data.table = msleep_data.table[complete.cases(msleep_data.table), ] 9 | 10 | nrow(clean_data.table) 11 | 12 | ## [1] 20 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00080_informalexample_5.11_of_section_5.1.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.11 of section 5.1.2 2 | # (informalexample 5.11 of section 5.1.2) : Data engineering and data shaping : Data selection : Removing records with incomplete data 3 | 4 | library("dplyr") 5 | 6 | clean_dplyr <- msleep %>% 7 | filter(., complete.cases(.)) 8 | 9 | nrow(clean_dplyr) 10 | 11 | ## [1] 20 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00081_informalexample_5.12_of_section_5.1.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.12 of section 5.1.3 2 | # (informalexample 5.12 of section 5.1.3) : Data engineering and data shaping : Data selection : Ordering rows 3 | 4 | purchases <- wrapr::build_frame( # Note: 1 5 | "day", "hour", "n_purchase" | 6 | 1 , 9 , 5 | 7 | 2 , 9 , 3 | 8 | 2 , 11 , 5 | 9 | 1 , 13 , 1 | 10 | 2 , 13 , 3 | 11 | 1 , 14 , 1 ) 12 | 13 | # Note 1: 14 | # Use wrapr::build_frame to type data in directly in legible column order. 15 | 16 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00083_informalexample_5.14_of_section_5.1.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.14 of section 5.1.3 2 | # (informalexample 5.14 of section 5.1.3) : Data engineering and data shaping : Data selection : Ordering rows 3 | 4 | library("data.table") 5 | 6 | DT_purchases <- as.data.table(purchases) 7 | 8 | order_cols <- c("day", "hour") # Note: 1 9 | setorderv(DT_purchases, order_cols) 10 | 11 | DT_purchases[ , running_total := cumsum(n_purchase)] 12 | 13 | # print(DT_purchases) 14 | 15 | # Note 1: 16 | # Re-order data 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00084_informalexample_5.15_of_section_5.1.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.15 of section 5.1.3 2 | # (informalexample 5.15 of section 5.1.3) : Data engineering and data shaping : Data selection : Ordering rows 3 | 4 | library("dplyr") 5 | 6 | res <- purchases %>% 7 | arrange(., day, hour) %>% 8 | mutate(., running_total = cumsum(n_purchase)) 9 | 10 | # print(res) 11 | 12 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00087_informalexample_5.18_of_section_5.1.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.18 of section 5.1.3 2 | # (informalexample 5.18 of section 5.1.3) : Data engineering and data shaping : Data selection : Ordering rows 3 | 4 | library("dplyr") 5 | 6 | res <- purchases %>% 7 | arrange(., day, hour) %>% 8 | group_by(., day) %>% 9 | mutate(., running_total = cumsum(n_purchase)) %>% 10 | ungroup(.) 11 | 12 | # print(res) 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00089_informalexample_5.20_of_section_5.2.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.20 of section 5.2.1 2 | # (informalexample 5.20 of section 5.2.1) : Data engineering and data shaping : Basic data transforms : Add new columns 3 | 4 | library("lubridate") 5 | library("ggplot2") 6 | 7 | # create a function to make the date string. 8 | datestr = function(day, month, year) { 9 | paste(day, month, year, sep="-") 10 | } 11 | 12 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00093_informalexample_5.24_of_section_5.2.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.24 of section 5.2.1 2 | # (informalexample 5.24 of section 5.2.1) : Data engineering and data shaping : Basic data transforms : Add new columns 3 | 4 | library("dplyr") 5 | 6 | airquality_with_date2 <- airquality %>% 7 | mutate(., date = dmy(datestr(Day, Month, 1973))) %>% 8 | select(., Ozone, date) 9 | 10 | head(airquality_with_date2) 11 | 12 | ## Ozone date 13 | ## 1 41 1973-05-01 14 | ## 2 36 1973-05-02 15 | ## 3 12 1973-05-03 16 | ## 4 18 1973-05-04 17 | ## 5 NA 1973-05-05 18 | ## 6 28 1973-05-06 19 | 20 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00095_informalexample_5.26_of_section_5.2.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.26 of section 5.2.1 2 | # (informalexample 5.26 of section 5.2.1) : Data engineering and data shaping : Basic data transforms : Add new columns 3 | 4 | library("data.table") 5 | library("zoo") 6 | 7 | DT_airquality[, OzoneCorrected := na.locf(Ozone, na.rm=FALSE)] 8 | 9 | summary(DT_airquality) 10 | 11 | ## Ozone date OzoneCorrected 12 | ## Min. : 1.00 Min. :1973-05-01 Min. : 1.00 13 | ## 1st Qu.: 18.00 1st Qu.:1973-06-08 1st Qu.: 16.00 14 | ## Median : 31.50 Median :1973-07-16 Median : 30.00 15 | ## Mean : 42.13 Mean :1973-07-16 Mean : 39.78 16 | ## 3rd Qu.: 63.25 3rd Qu.:1973-08-23 3rd Qu.: 52.00 17 | ## Max. :168.00 Max. :1973-09-30 Max. :168.00 18 | ## NA's :37 19 | 20 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00096_informalexample_5.27_of_section_5.2.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.27 of section 5.2.1 2 | # (informalexample 5.27 of section 5.2.1) : Data engineering and data shaping : Basic data transforms : Add new columns 3 | 4 | library("dplyr") 5 | library("zoo") 6 | 7 | airquality_with_date %>% 8 | mutate(., 9 | OzoneCorrected = na.locf(Ozone, na.rm = FALSE)) %>% 10 | summary(.) 11 | 12 | ## Ozone date OzoneCorrected 13 | ## Min. : 1.00 Min. :1973-05-01 Min. : 1.00 14 | ## 1st Qu.: 18.00 1st Qu.:1973-06-08 1st Qu.: 16.00 15 | ## Median : 31.50 Median :1973-07-16 Median : 30.00 16 | ## Mean : 42.13 Mean :1973-07-16 Mean : 39.78 17 | ## 3rd Qu.: 63.25 3rd Qu.:1973-08-23 3rd Qu.: 52.00 18 | ## Max. :168.00 Max. :1973-09-30 Max. :168.00 19 | ## NA's :37 20 | 21 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00097_informalexample_5.28_of_section_5.2.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.28 of section 5.2.2 2 | # (informalexample 5.28 of section 5.2.2) : Data engineering and data shaping : Basic data transforms : Other simple operations 3 | 4 | d <- data.frame(x = 1:2, y = 3:4) 5 | print(d) 6 | #> x y 7 | #> 1 1 3 8 | #> 2 2 4 9 | 10 | colnames(d) <- c("BIGX", "BIGY") 11 | print(d) 12 | #> BIGX BIGY 13 | #> 1 1 3 14 | #> 2 2 4 15 | 16 | d$BIGX <- NULL 17 | print(d) 18 | #> BIGY 19 | #> 1 3 20 | #> 2 4 21 | 22 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00098_informalexample_5.29_of_section_5.3.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.29 of section 5.3.1 2 | # (informalexample 5.29 of section 5.3.1) : Data engineering and data shaping : Aggregating transforms : Combining many rows into summary rows 3 | 4 | library("datasets") 5 | library("ggplot2") 6 | 7 | head(iris) 8 | 9 | ## Sepal.Length Sepal.Width Petal.Length Petal.Width Species 10 | ## 1 5.1 3.5 1.4 0.2 setosa 11 | ## 2 4.9 3.0 1.4 0.2 setosa 12 | ## 3 4.7 3.2 1.3 0.2 setosa 13 | ## 4 4.6 3.1 1.5 0.2 setosa 14 | ## 5 5.0 3.6 1.4 0.2 setosa 15 | ## 6 5.4 3.9 1.7 0.4 setosa 16 | 17 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00100_informalexample_5.31_of_section_5.3.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.31 of section 5.3.1 2 | # (informalexample 5.31 of section 5.3.1) : Data engineering and data shaping : Aggregating transforms : Combining many rows into summary rows 3 | 4 | library("data.table") 5 | 6 | iris_data.table <- as.data.table(iris) 7 | iris_data.table <- iris_data.table[, 8 | .(Petal.Length = mean(Petal.Length), 9 | Petal.Width = mean(Petal.Width)), 10 | by = .(Species)] 11 | 12 | # print(iris_data.table) 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00101_informalexample_5.32_of_section_5.3.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.32 of section 5.3.1 2 | # (informalexample 5.32 of section 5.3.1) : Data engineering and data shaping : Aggregating transforms : Combining many rows into summary rows 3 | 4 | library("dplyr") 5 | 6 | iris_summary <- iris %>% group_by(., Species) %>% 7 | summarize(., 8 | Petal.Length = mean(Petal.Length), 9 | Petal.Width = mean(Petal.Width)) %>% 10 | ungroup(.) 11 | 12 | # print(iris_summary) 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00102_informalexample_5.33_of_section_5.3.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.33 of section 5.3.1 2 | # (informalexample 5.33 of section 5.3.1) : Data engineering and data shaping : Aggregating transforms : Combining many rows into summary rows 3 | 4 | iris_copy <- iris 5 | iris_copy$mean_Petal.Length <- ave(iris$Petal.Length, iris$Species, FUN = mean) 6 | iris_copy$mean_Petal.Width <- ave(iris$Petal.Width, iris$Species, FUN = mean) 7 | 8 | # head(iris_copy) 9 | # tail(iris_copy) 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00103_informalexample_5.34_of_section_5.3.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.34 of section 5.3.1 2 | # (informalexample 5.34 of section 5.3.1) : Data engineering and data shaping : Aggregating transforms : Combining many rows into summary rows 3 | 4 | library("data.table") 5 | 6 | iris_data.table <- as.data.table(iris) 7 | 8 | iris_data.table[ , 9 | `:=`(mean_Petal.Length = mean(Petal.Length), 10 | mean_Petal.Width = mean(Petal.Width)), 11 | by = "Species"] 12 | 13 | # print(iris_data.table) 14 | 15 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00104_informalexample_5.35_of_section_5.3.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.35 of section 5.3.1 2 | # (informalexample 5.35 of section 5.3.1) : Data engineering and data shaping : Aggregating transforms : Combining many rows into summary rows 3 | 4 | library("dplyr") 5 | 6 | iris_dplyr <- iris %>% 7 | group_by(., Species) %>% 8 | mutate(., 9 | mean_Petal.Length = mean(Petal.Length), 10 | mean_Petal.Width = mean(Petal.Width)) %>% 11 | ungroup(.) 12 | 13 | # head(iris_dplyr) 14 | 15 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00106_informalexample_5.37_of_section_5.4.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.37 of section 5.4.1 2 | # (informalexample 5.37 of section 5.4.1) : Data engineering and data shaping : Multi-table data transforms : Combining two or more ordered data frames quickly 3 | 4 | rbind_base = rbind(productTable, 5 | productTable2) 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00107_informalexample_5.38_of_section_5.4.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.38 of section 5.4.1 2 | # (informalexample 5.38 of section 5.4.1) : Data engineering and data shaping : Multi-table data transforms : Combining two or more ordered data frames quickly 3 | 4 | str(rbind_base) 5 | 6 | ## 'data.frame': 8 obs. of 2 variables: 7 | ## $ productID: Factor w/ 8 levels "p1","p2","p3",..: 1 2 3 4 5 6 7 8 8 | ## $ price : num 9.99 16.29 19.99 5.49 24.49 ... 9 | 10 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00108_informalexample_5.39_of_section_5.4.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.39 of section 5.4.1 2 | # (informalexample 5.39 of section 5.4.1) : Data engineering and data shaping : Multi-table data transforms : Combining two or more ordered data frames quickly 3 | 4 | library("data.table") 5 | 6 | rbindlist(list(productTable, 7 | productTable2)) 8 | 9 | ## productID price 10 | ## 1: p1 9.99 11 | ## 2: p2 16.29 12 | ## 3: p3 19.99 13 | ## 4: p4 5.49 14 | ## 5: p5 24.49 15 | ## 6: n1 25.49 16 | ## 7: n2 33.99 17 | ## 8: n3 17.99 18 | 19 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00114_informalexample_5.45_of_section_5.4.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.45 of section 5.4.1 2 | # (informalexample 5.45 of section 5.4.1) : Data engineering and data shaping : Multi-table data transforms : Combining two or more ordered data frames quickly 3 | 4 | cbind(productTable, salesTable[, -1]) 5 | 6 | ## productID price sold_store sold_online 7 | ## 1 p1 9.99 6 64 8 | ## 2 p2 16.29 31 1 9 | ## 3 p3 19.99 30 23 10 | ## 4 p4 5.49 31 67 11 | ## 5 p5 24.49 43 51 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00115_informalexample_5.46_of_section_5.4.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.46 of section 5.4.1 2 | # (informalexample 5.46 of section 5.4.1) : Data engineering and data shaping : Multi-table data transforms : Combining two or more ordered data frames quickly 3 | 4 | library("data.table") 5 | 6 | cbind(as.data.table(productTable), 7 | as.data.table(salesTable[, -1])) 8 | 9 | ## productID price sold_store sold_online 10 | ## 1: p1 9.99 6 64 11 | ## 2: p2 16.29 31 1 12 | ## 3: p3 19.99 30 23 13 | ## 4: p4 5.49 31 67 14 | ## 5: p5 24.49 43 51 15 | 16 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00116_informalexample_5.47_of_section_5.4.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.47 of section 5.4.1 2 | # (informalexample 5.47 of section 5.4.1) : Data engineering and data shaping : Multi-table data transforms : Combining two or more ordered data frames quickly 3 | 4 | library("dplyr") 5 | 6 | # list of data frames calling convention 7 | dplyr::bind_cols(list(productTable, salesTable[, -1])) 8 | 9 | ## productID price sold_store sold_online 10 | ## 1 p1 9.99 6 64 11 | ## 2 p2 16.29 31 1 12 | ## 3 p3 19.99 30 23 13 | ## 4 p4 5.49 31 67 14 | ## 5 p5 24.49 43 51 15 | 16 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00117_informalexample_5.48_of_section_5.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.48 of section 5.4.2 2 | # (informalexample 5.48 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables 3 | 4 | productTable <- wrapr::build_frame( 5 | "productID", "price" | 6 | "p1" , 9.99 | 7 | "p3" , 19.99 | 8 | "p4" , 5.49 | 9 | "p5" , 24.49 ) 10 | 11 | salesTable <- wrapr::build_frame( 12 | "productID", "unitsSold" | 13 | "p1" , 10 | 14 | "p2" , 43 | 15 | "p3" , 55 | 16 | "p4" , 8 ) 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00118_informalexample_5.49_of_section_5.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.49 of section 5.4.2 2 | # (informalexample 5.49 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables 3 | 4 | merge(productTable, salesTable, by = "productID", all.x = TRUE) 5 | 6 | ## productID price unitsSold 7 | ## 1 p1 9.99 10 8 | ## 2 p3 19.99 55 9 | ## 3 p4 5.49 8 10 | ## 4 p5 24.49 NA 11 | 12 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00120_informalexample_5.51_of_section_5.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.51 of section 5.4.2 2 | # (informalexample 5.51 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables 3 | 4 | library("data.table") 5 | 6 | joined_table <- productTable 7 | joined_table$unitsSold <- salesTable$unitsSold[match(joined_table$productID, 8 | salesTable$productID)] 9 | print(joined_table) 10 | 11 | ## productID price unitsSold 12 | ## 1 p1 9.99 10 13 | ## 2 p3 19.99 55 14 | ## 3 p4 5.49 8 15 | ## 4 p5 24.49 NA 16 | 17 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00121_informalexample_5.52_of_section_5.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.52 of section 5.4.2 2 | # (informalexample 5.52 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables 3 | 4 | library("dplyr") 5 | 6 | left_join(productTable, salesTable, by = "productID") 7 | 8 | ## productID price unitsSold 9 | ## 1 p1 9.99 10 10 | ## 2 p3 19.99 55 11 | ## 3 p4 5.49 8 12 | ## 4 p5 24.49 NA 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00122_informalexample_5.53_of_section_5.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.53 of section 5.4.2 2 | # (informalexample 5.53 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables 3 | 4 | merge(productTable, salesTable, by = "productID") 5 | 6 | ## productID price unitsSold 7 | ## 1 p1 9.99 10 8 | ## 2 p3 19.99 55 9 | ## 3 p4 5.49 8 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00123_informalexample_5.54_of_section_5.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.54 of section 5.4.2 2 | # (informalexample 5.54 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables 3 | 4 | library("data.table") 5 | 6 | productTable_data.table <- as.data.table(productTable) 7 | salesTable_data.table <- as.data.table(salesTable) 8 | 9 | merge(productTable, salesTable, by = "productID") 10 | 11 | ## productID price unitsSold 12 | ## 1 p1 9.99 10 13 | ## 2 p3 19.99 55 14 | ## 3 p4 5.49 8 15 | 16 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00124_informalexample_5.55_of_section_5.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.55 of section 5.4.2 2 | # (informalexample 5.55 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables 3 | 4 | library("dplyr") 5 | 6 | inner_join(productTable, salesTable, by = "productID") 7 | 8 | ## productID price unitsSold 9 | ## 1 p1 9.99 10 10 | ## 2 p3 19.99 55 11 | ## 3 p4 5.49 8 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00125_informalexample_5.56_of_section_5.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.56 of section 5.4.2 2 | # (informalexample 5.56 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables 3 | 4 | # note that merge orders the result by key column by default 5 | # use sort=FALSE to skip the sorting 6 | merge(productTable, salesTable, by = "productID", all=TRUE) 7 | 8 | ## productID price unitsSold 9 | ## 1 p1 9.99 10 10 | ## 2 p2 NA 43 11 | ## 3 p3 19.99 55 12 | ## 4 p4 5.49 8 13 | ## 5 p5 24.49 NA 14 | 15 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00126_informalexample_5.57_of_section_5.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.57 of section 5.4.2 2 | # (informalexample 5.57 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables 3 | 4 | library("data.table") 5 | 6 | productTable_data.table <- as.data.table(productTable) 7 | salesTable_data.table <- as.data.table(salesTable) 8 | 9 | merge(productTable_data.table, salesTable_data.table, 10 | by = "productID", all = TRUE) 11 | 12 | ## productID price unitsSold 13 | ## 1: p1 9.99 10 14 | ## 2: p2 NA 43 15 | ## 3: p3 19.99 55 16 | ## 4: p4 5.49 8 17 | ## 5: p5 24.49 NA 18 | 19 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00127_informalexample_5.58_of_section_5.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.58 of section 5.4.2 2 | # (informalexample 5.58 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables 3 | 4 | library("dplyr") 5 | 6 | full_join(productTable, salesTable, by = "productID") 7 | 8 | ## productID price unitsSold 9 | ## 1 p1 9.99 10 10 | ## 2 p3 19.99 55 11 | ## 3 p4 5.49 8 12 | ## 4 p5 24.49 NA 13 | ## 5 p2 NA 43 14 | 15 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00129_informalexample_5.60_of_section_5.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.60 of section 5.4.2 2 | # (informalexample 5.60 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables 3 | 4 | trades <- data.table( 5 | trade_id = c(32525, 32526), 6 | price = c(5.5, 9), 7 | quantity = c(100, 200), 8 | when = as.POSIXct(strptime( 9 | c("2018-10-18 2:13:42", 10 | "2018-10-18 2:19:20"), 11 | "%Y-%m-%d %H:%M:%S"))) 12 | 13 | print(trades) 14 | 15 | ## trade_id price quantity when 16 | ## 1: 32525 5.5 100 2018-10-18 02:13:42 17 | ## 2: 32526 9.0 200 2018-10-18 02:19:20 18 | 19 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00130_informalexample_5.61_of_section_5.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.61 of section 5.4.2 2 | # (informalexample 5.61 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables 3 | 4 | quotes[, quote_time := when] 5 | trades[ , trade_time := when ] 6 | quotes[ trades, on = "when", roll = TRUE ][ 7 | , .(quote_time, bid, price, ask, trade_id, trade_time) ] 8 | 9 | ## quote_time bid price ask trade_id trade_time 10 | ## 1: 2018-10-18 02:12:23 5 5.5 6 32525 2018-10-18 02:13:42 11 | ## 2: 2018-10-18 02:17:51 8 9.0 10 32526 2018-10-18 02:19:20 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00132_informalexample_5.63_of_section_5.5.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.63 of section 5.5.1 2 | # (informalexample 5.63 of section 5.5.1) : Data engineering and data shaping : Reshaping transforms : Moving data from wide to tall form 3 | 4 | # let's give an example of the kind of graph we have in mind, using just driver deaths 5 | library("ggplot2") 6 | 7 | ggplot(Seatbelts, 8 | aes(x = date, y = DriversKilled, color = law, shape = law)) + 9 | geom_point() + 10 | geom_smooth(se=FALSE) + 11 | ggtitle("UK car driver deaths by month") 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00133_informalexample_5.64_of_section_5.5.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.64 of section 5.5.1 2 | # (informalexample 5.64 of section 5.5.1) : Data engineering and data shaping : Reshaping transforms : Moving data from wide to tall form 3 | 4 | library("data.table") 5 | 6 | seatbelts_long2 <- 7 | melt.data.table(as.data.table(Seatbelts), 8 | id.vars = NULL, 9 | measure.vars = c("DriversKilled", "front", "rear"), 10 | variable.name = "victim_type", 11 | value.name = "nvictims") 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00134_informalexample_5.65_of_section_5.5.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.65 of section 5.5.1 2 | # (informalexample 5.65 of section 5.5.1) : Data engineering and data shaping : Reshaping transforms : Moving data from wide to tall form 3 | 4 | library("cdata") 5 | 6 | seatbelts_long3 <- unpivot_to_blocks( 7 | Seatbelts, 8 | nameForNewKeyColumn = "victim_type", 9 | nameForNewValueColumn = "nvictims", 10 | columnsToTakeFrom = c("DriversKilled", "front", "rear")) 11 | 12 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00139_informalexample_5.70_of_section_5.5.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.70 of section 5.5.2 2 | # (informalexample 5.70 of section 5.5.2) : Data engineering and data shaping : Reshaping transforms : Moving data from tall to wide form 3 | 4 | library("data.table") 5 | 6 | ChickWeight_wide2 <- dcast.data.table( 7 | as.data.table(ChickWeight), 8 | Chick ~ Time, 9 | value.var = "weight") 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c05_Data_Engineering_and_Data_Shaping/00140_informalexample_5.71_of_section_5.5.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 5.71 of section 5.5.2 2 | # (informalexample 5.71 of section 5.5.2) : Data engineering and data shaping : Reshaping transforms : Moving data from tall to wide form 3 | 4 | library("cdata") 5 | 6 | ChickWeight_wide3 <- pivot_to_rowrecs( 7 | ChickWeight, 8 | columnToTakeKeysFrom = "Time", 9 | columnToTakeValuesFrom = "weight", 10 | rowKeyColumns = "Chick") 11 | 12 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00143_example_6.2_of_section_6.2.3.R: -------------------------------------------------------------------------------- 1 | # example 6.2 of section 6.2.3 2 | # (example 6.2 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models 3 | # Title: Spam classifications 4 | 5 | sample <- spamTest[c(7,35,224,327), c('spam','pred')] 6 | print(sample) 7 | ## spam pred # Note: 1 8 | ## 115 spam 0.9903246227 9 | ## 361 spam 0.4800498077 10 | ## 2300 non-spam 0.0006846551 11 | ## 3428 non-spam 0.0001434345 12 | 13 | # Note 1: 14 | # The first column gives the predicted class 15 | # label (spam or non-spam). The second column gives 16 | # the predicted probability that an email is spam. 17 | # If the probability > 0.5 the email is labeled 18 | # “spam”; otherwise it is “non-spam”. 19 | 20 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00144_example_6.3_of_section_6.2.3.R: -------------------------------------------------------------------------------- 1 | # example 6.3 of section 6.2.3 2 | # (example 6.3 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models 3 | # Title: Spam confusion matrix 4 | 5 | confmat_spam <- table(truth = spamTest$spam, 6 | prediction = ifelse(spamTest$pred > 0.5, 7 | "spam", "non-spam")) 8 | print(confmat_spam) 9 | ## prediction 10 | ## truth non-spam spam 11 | ## non-spam 264 14 12 | ## spam 22 158 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00145_informalexample_6.1_of_section_6.2.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 6.1 of section 6.2.3 2 | # (informalexample 6.1 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models 3 | 4 | (confmat_spam[1,1] + confmat_spam[2,2]) / sum(confmat_spam) 5 | ## [1] 0.9213974 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00146_example_6.4_of_section_6.2.3.R: -------------------------------------------------------------------------------- 1 | # example 6.4 of section 6.2.3 2 | # (example 6.4 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models 3 | # Title: Entering the Akismet confusion matrix by hand 4 | 5 | confmat_akismet <- as.table(matrix(data=c(288-1,17,1,13882-17),nrow=2,ncol=2)) 6 | rownames(confmat_akismet) <- rownames(confmat_spam) 7 | colnames(confmat_akismet) <- colnames(confmat_spam) 8 | print(confmat_akismet) 9 | ## non-spam spam 10 | ## non-spam 287 1 11 | ## spam 17 13865 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00147_informalexample_6.2_of_section_6.2.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 6.2 of section 6.2.3 2 | # (informalexample 6.2 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models 3 | 4 | (confmat_akismet[1,1] + confmat_akismet[2,2]) / sum(confmat_akismet) 5 | ## [1] 0.9987297 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00148_informalexample_6.3_of_section_6.2.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 6.3 of section 6.2.3 2 | # (informalexample 6.3 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models 3 | 4 | confmat_spam[2,2] / (confmat_spam[2,2]+ confmat_spam[1,2]) 5 | ## [1] 0.9186047 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00149_informalexample_6.4_of_section_6.2.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 6.4 of section 6.2.3 2 | # (informalexample 6.4 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models 3 | 4 | confmat_akismet[2,2] / (confmat_akismet[2,2] + confmat_akismet[1,2]) 5 | ## [1] 0.9999279 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00150_informalexample_6.5_of_section_6.2.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 6.5 of section 6.2.3 2 | # (informalexample 6.5 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models 3 | 4 | confmat_spam[2,2] / (confmat_spam[2,2] + confmat_spam[2,1]) 5 | ## [1] 0.8777778 6 | 7 | confmat_akismet[2,2] / (confmat_akismet[2,2] + confmat_akismet[2,1]) 8 | ## [1] 0.9987754 9 | 10 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00151_informalexample_6.6_of_section_6.2.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 6.6 of section 6.2.3 2 | # (informalexample 6.6 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models 3 | 4 | precision <- confmat_spam[2,2] / (confmat_spam[2,2]+ confmat_spam[1,2]) 5 | recall <- confmat_spam[2,2] / (confmat_spam[2,2] + confmat_spam[2,1]) 6 | 7 | (F1 <- 2 * precision * recall / (precision + recall) ) 8 | ## [1] 0.8977273 9 | 10 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00153_informalexample_6.7_of_section_6.2.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 6.7 of section 6.2.3 2 | # (informalexample 6.7 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models 3 | 4 | confmat_spam[1,1] / (confmat_spam[1,1] + confmat_spam[1,2]) 5 | ## [1] 0.9496403 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00154_example_6.6_of_section_6.2.4.R: -------------------------------------------------------------------------------- 1 | # example 6.6 of section 6.2.4 2 | # (example 6.6 of section 6.2.4) : Choosing and evaluating models : Evaluating models : Evaluating scoring models 3 | # Title: Fit the cricket model and make predictions 4 | 5 | crickets <- read.csv("cricketchirps/crickets.csv") 6 | 7 | cricket_model <- lm(temperatureF ~ chirp_rate, data=crickets) 8 | crickets$temp_pred <- predict(cricket_model, newdata=crickets) 9 | 10 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00155_example_6.7_of_section_6.2.4.R: -------------------------------------------------------------------------------- 1 | # example 6.7 of section 6.2.4 2 | # (example 6.7 of section 6.2.4) : Choosing and evaluating models : Evaluating models : Evaluating scoring models 3 | # Title: Calculating RMSE 4 | 5 | error_sq <- (crickets$temp_pred - crickets$temperatureF)^2 6 | ( RMSE <- sqrt(mean(error_sq)) ) 7 | ## [1] 3.564149 8 | 9 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00157_example_6.9_of_section_6.2.5.R: -------------------------------------------------------------------------------- 1 | # example 6.9 of section 6.2.5 2 | # (example 6.9 of section 6.2.5) : Choosing and evaluating models : Evaluating models : Evaluating probability models 3 | # Title: Making a double density plot 4 | 5 | library(WVPlots) 6 | DoubleDensityPlot(spamTest, 7 | xvar = "pred", 8 | truthVar = "spam", 9 | title = "Distribution of scores for spam filter") 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00158_example_6.10_of_section_6.2.5.R: -------------------------------------------------------------------------------- 1 | # example 6.10 of section 6.2.5 2 | # (example 6.10 of section 6.2.5) : Choosing and evaluating models : Evaluating models : Evaluating probability models 3 | # Title: Plotting the receiver operating characteristic curve 4 | 5 | library(WVPlots) 6 | ROCPlot(spamTest, # Note: 1 7 | xvar = 'pred', 8 | truthVar = 'spam', 9 | truthTarget = 'spam', 10 | title = 'Spam filter test performance') 11 | 12 | library(sigr) 13 | calcAUC(spamTest$pred, spamTest$spam=='spam') # Note: 2 14 | ## [1] 0.9660072 15 | 16 | # Note 1: 17 | # Plot the receiver operating characteristic (ROC) curve. 18 | 19 | # Note 2: 20 | # Calculate the area under the ROC curve explicitly. 21 | 22 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00160_example_6.12_of_section_6.2.5.R: -------------------------------------------------------------------------------- 1 | # example 6.12 of section 6.2.5 2 | # (example 6.12 of section 6.2.5) : Choosing and evaluating models : Evaluating models : Evaluating probability models 3 | # Title: Computing the null model’s log likelihood 4 | 5 | (pNull <- mean(spamTrain$spam == 'spam')) 6 | ## [1] 0.3941588 7 | 8 | sum(ylogpy(y, pNull) + ylogpy(1-y, 1-pNull)) 9 | ## [1] -306.8964 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00161_example_6.13_of_section_6.2.5.R: -------------------------------------------------------------------------------- 1 | # example 6.13 of section 6.2.5 2 | # (example 6.13 of section 6.2.5) : Choosing and evaluating models : Evaluating models : Evaluating probability models 3 | # Title: Computing the deviance and pseudo R-squared 4 | 5 | library(sigr) 6 | 7 | (deviance <- calcDeviance(spamTest$pred, spamTest$spam == 'spam')) 8 | ## [1] 253.8598 9 | (nullDeviance <- calcDeviance(pNull, spamTest$spam == 'spam')) 10 | ## [1] 613.7929 11 | 12 | (pseudoR2 <- 1 - deviance/nullDeviance) 13 | ## [1] 0.586408 14 | 15 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00163_example_6.15_of_section_6.3.2.R: -------------------------------------------------------------------------------- 1 | # example 6.15 of section 6.3.2 2 | # (example 6.15 of section 6.3.2) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Walking through LIME: a small example 3 | # Title: Fit a model to the iris training data 4 | 5 | source("lime_iris_example.R") # Note: 1 6 | 7 | input <- as.matrix(train[, 1:4]) # Note: 2 8 | model <- fit_iris_example(input, train$class) 9 | 10 | # Note 1: 11 | # Load the convenience function. 12 | 13 | # Note 2: 14 | # The input to the model is the first four 15 | # columns of the training data, converted to a 16 | # matrix. 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00165_example_6.17_of_section_6.3.2.R: -------------------------------------------------------------------------------- 1 | # example 6.17 of section 6.3.2 2 | # (example 6.17 of section 6.3.2) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Walking through LIME: a small example 3 | # Title: Build a LIME explainer from the model and training data 4 | 5 | library(lime) 6 | explainer <- lime(train[,1:4], # Note: 1 7 | model = model, 8 | bin_continuous = TRUE, # Note: 2 9 | n_bins = 10) # Note: 3 10 | 11 | # Note 1: 12 | # Build the explainer from the training data. 13 | 14 | # Note 2: 15 | # Bin the continuous variables when making explanations. 16 | 17 | # Note 3: 18 | # Use 10 bins. 19 | 20 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00167_example_6.19_of_section_6.3.2.R: -------------------------------------------------------------------------------- 1 | # example 6.19 of section 6.3.2 2 | # (example 6.19 of section 6.3.2) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Walking through LIME: a small example 3 | # Title: Explain the iris example 4 | 5 | explanation <- lime::explain(example, 6 | explainer, 7 | n_labels = 1, # Note: 1 8 | n_features = 4) # Note: 2 9 | 10 | # Note 1: 11 | # The number of labels to explain; use 1 for binary classification. 12 | 13 | # Note 2: 14 | # The number of features to use when fitting the explanation. 15 | 16 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00168_informalexample_6.8_of_section_6.3.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 6.8 of section 6.3.2 2 | # (informalexample 6.8 of section 6.3.2) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Walking through LIME: a small example 3 | 4 | plot_features(explanation) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00169_informalexample_6.9_of_section_6.3.2.txt: -------------------------------------------------------------------------------- 1 | # informalexample 6.9 of section 6.3.2 2 | # (informalexample 6.9 of section 6.3.2) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Walking through LIME: a small example 3 | 4 | Sepal.Length Sepal.Width Petal.Length Petal.Width 5 | 5.1 3.5 1.4 0.2 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00170_informalexample_6.10_of_section_6.3.2.txt: -------------------------------------------------------------------------------- 1 | # informalexample 6.10 of section 6.3.2 2 | # (informalexample 6.10 of section 6.3.2) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Walking through LIME: a small example 3 | 4 | Sepal.Length Sepal.Width Petal.Length Petal.Width 5 | 5.505938 3.422535 1.3551 0.4259682 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00175_example_6.22_of_section_6.3.4.R: -------------------------------------------------------------------------------- 1 | # example 6.22 of section 6.3.4 2 | # (example 6.22 of section 6.3.4) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Train the text classifier 3 | # Title: Convert the texts and fit the model 4 | 5 | source("lime_imdb_example.R") 6 | 7 | vocab <- create_pruned_vocabulary(texts) # Note: 1 8 | dtm_train <- make_matrix(texts, vocab) # Note: 2 9 | model <- fit_imdb_model(dtm_train, labels) # Note: 3 10 | 11 | # Note 1: 12 | # Create the vocabulary from the training data. 13 | 14 | # Note 2: 15 | # Create the document-term matrix of the training corpus. 16 | 17 | # Note 3: 18 | # Train the model. 19 | 20 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00177_example_6.24_of_section_6.3.5.R: -------------------------------------------------------------------------------- 1 | # example 6.24 of section 6.3.5 2 | # (example 6.24 of section 6.3.5) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Explaining the classifier’s predictions 3 | # Title: Build an explainer for a text classifier 4 | 5 | explainer <- lime(texts, model = model, 6 | preprocess = function(x) make_matrix(x, vocab)) 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00179_example_6.26_of_section_6.3.5.R: -------------------------------------------------------------------------------- 1 | # example 6.26 of section 6.3.5 2 | # (example 6.26 of section 6.3.5) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Explaining the classifier’s predictions 3 | # Title: Explain the model's prediction 4 | 5 | explanation <- lime::explain(sample_case, 6 | explainer, 7 | n_labels = 1, 8 | n_features = 5) 9 | 10 | plot_features(explanation) 11 | 12 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00180_informalexample_6.13_of_section_6.3.5.R: -------------------------------------------------------------------------------- 1 | # informalexample 6.13 of section 6.3.5 2 | # (informalexample 6.13 of section 6.3.5) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Explaining the classifier’s predictions 3 | 4 | plot_text_explanations(explanation) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c06_Choosing_and_evaluating_models/00182_informalexample_6.14_of_section_6.3.5.R: -------------------------------------------------------------------------------- 1 | # informalexample 6.14 of section 6.3.5 2 | # (informalexample 6.14 of section 6.3.5) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Explaining the classifier’s predictions 3 | 4 | predict(model, newdata=make_matrix(sample_cases[2], vocab)) 5 | ## [1] 0.6052929 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00183_informalexample_7.1_of_section_7.1.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 7.1 of section 7.1.1 2 | # (informalexample 7.1 of section 7.1.1) : Linear and logistic regression : Using linear regression : Understanding linear regression 3 | 4 | pounds_lost = bc0 + b.cals * daily_cals_down 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00184_informalexample_7.2_of_section_7.1.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 7.2 of section 7.1.1 2 | # (informalexample 7.2 of section 7.1.1) : Linear and logistic regression : Using linear regression : Understanding linear regression 3 | 4 | pounds_lost[i] = b0 + b.cals * daily_cals_down[i] + b.exercise * daily_exercise[i] 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00185_equation_7.1_of_section_7.1.1.math: -------------------------------------------------------------------------------- 1 | # equation 7.1 of section 7.1.1 2 | # (equation 7.1 of section 7.1.1) : Linear and logistic regression : Using linear regression : Understanding linear regression 3 | # Title: Equation 7.1 The expression for a linear regression model 4 | 5 | y[i] ~ f(x[i,]) + e[i] = b[0] + b[1] * x[i,1] + ... + b[n] * x[i,n] + e[i] 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00186_informalexample_7.3_of_section_7.1.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 7.3 of section 7.1.1 2 | # (informalexample 7.3 of section 7.1.1) : Linear and logistic regression : Using linear regression : Understanding linear regression 3 | 4 | x[i]^2 nearly equals b[0] + b[1] * x[i] 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00187_informalexample_7.4_of_section_7.1.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 7.4 of section 7.1.1 2 | # (informalexample 7.4 of section 7.1.1) : Linear and logistic regression : Using linear regression : Understanding linear regression 3 | 4 | x[i]^2 nearly equals -22 + 11 * x[i] 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00190_example_7.3_of_section_7.1.3.R: -------------------------------------------------------------------------------- 1 | # example 7.3 of section 7.1.3 2 | # (example 7.3 of section 7.1.3) : Linear and logistic regression : Using linear regression : Making predictions 3 | # Title: Plotting residuals income as a function of predicted log income 4 | 5 | ggplot(data = dtest, aes(x = predLogPINCP, 6 | y = predLogPINCP - log10(PINCP))) + 7 | geom_point(alpha = 0.2, color = "darkgray") + 8 | geom_smooth(color = "darkblue") + 9 | ylab("residual error (prediction - actual)") 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00191_example_7.4_of_section_7.1.3.R: -------------------------------------------------------------------------------- 1 | # example 7.4 of section 7.1.3 2 | # (example 7.4 of section 7.1.3) : Linear and logistic regression : Using linear regression : Making predictions 3 | # Title: Computing R-squared 4 | 5 | rsq <- function(y, f) { 1 - sum((y - f)^2)/sum((y - mean(y))^2) } 6 | 7 | rsq(log10(dtrain$PINCP), dtrain$predLogPINCP) # Note: 1 8 | ## [1] 0.2976165 9 | 10 | rsq(log10(dtest$PINCP), dtest$predLogPINCP) # Note: 2 11 | ## [1] 0.2911965 12 | 13 | # Note 1: 14 | # R-squared of the model on the training data 15 | 16 | # Note 2: 17 | # R-squared of the model on the test data 18 | 19 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00192_example_7.5_of_section_7.1.3.R: -------------------------------------------------------------------------------- 1 | # example 7.5 of section 7.1.3 2 | # (example 7.5 of section 7.1.3) : Linear and logistic regression : Using linear regression : Making predictions 3 | # Title: Calculating root mean square error 4 | 5 | rmse <- function(y, f) { sqrt(mean( (y-f)^2 )) } 6 | 7 | rmse(log10(dtrain$PINCP), dtrain$predLogPINCP) # Note: 1 8 | ## [1] 0.2685855 9 | 10 | rmse(log10(dtest$PINCP), dtest$predLogPINCP) # Note: 2 11 | ## [1] 0.2675129 12 | 13 | # Note 1: 14 | # RMSE of the model on the training data 15 | 16 | # Note 2: 17 | # RMSE of the model on the test data 18 | 19 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00193_informalexample_7.5_of_section_7.1.4.math: -------------------------------------------------------------------------------- 1 | # informalexample 7.5 of section 7.1.4 2 | # (informalexample 7.5 of section 7.1.4) : Linear and logistic regression : Using linear regression : Finding relations and extracting advice 3 | 4 | log10(income_bachelors) = log10(income_no_hs_degree) + 0.36 5 | log10(income_bachelors) - log10(income_no_hs_degree) = 0.36 6 | (income_bachelors) / (income_no_hs_degree) = 10^(0.36) 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00194_informalexample_7.6_of_section_7.1.4.math: -------------------------------------------------------------------------------- 1 | # informalexample 7.6 of section 7.1.4 2 | # (informalexample 7.6 of section 7.1.4) : Linear and logistic regression : Using linear regression : Finding relations and extracting advice 3 | 4 | log10(income_bachelors) - log10(income_no_hs_degree) = 0.36 5 | log10(income_hs) - log10(income_no_hs_degree) = 0.11 6 | 7 | log10(income_bachelors) - log10(income_hs) = 0.36 - 0.11 # Note: 1 8 | (income_bachelors) / (income_hs) = 10^(0.36 - 0.11) 9 | 10 | # Note 1: 11 | # Subtract the second equation from the first 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00195_informalexample_7.7_of_section_7.1.5.txt: -------------------------------------------------------------------------------- 1 | # informalexample 7.7 of section 7.1.5 2 | # (informalexample 7.7 of section 7.1.5) : Linear and logistic regression : Using linear regression : Reading the model summary and characterizing coefficient quality 3 | 4 | Call: 5 | lm(formula = log10(PINCP) ~ AGEP + SEX + COW + SCHL, 6 | data = dtrain) 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00196_informalexample_7.8_of_section_7.1.5.txt: -------------------------------------------------------------------------------- 1 | # informalexample 7.8 of section 7.1.5 2 | # (informalexample 7.8 of section 7.1.5) : Linear and logistic regression : Using linear regression : Reading the model summary and characterizing coefficient quality 3 | 4 | Residuals: 5 | Min 1Q Median 3Q Max 6 | -1.5038 -0.1354 0.0187 0.1710 0.9741 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00197_example_7.6_of_section_7.1.5.R: -------------------------------------------------------------------------------- 1 | # example 7.6 of section 7.1.5 2 | # (example 7.6 of section 7.1.5) : Linear and logistic regression : Using linear regression : Reading the model summary and characterizing coefficient quality 3 | # Title: Summarizing residuals 4 | 5 | ( resids_train <- summary(log10(dtrain$PINCP) - predict(model, newdata = dtrain)) ) 6 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 7 | ## -1.5038 -0.1354 0.0187 0.0000 0.1710 0.9741 8 | 9 | ( resids_test <- summary(log10(dtest$PINCP) - predict(model, newdata = dtest)) ) 10 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 11 | ## -1.789150 -0.130733 0.027413 0.006359 0.175847 0.912646 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00198_informalexample_7.9_of_section_7.1.5.txt: -------------------------------------------------------------------------------- 1 | # informalexample 7.9 of section 7.1.5 2 | # (informalexample 7.9 of section 7.1.5) : Linear and logistic regression : Using linear regression : Reading the model summary and characterizing coefficient quality 3 | 4 | Residual standard error: 0.2688 on 11186 degrees of freedom 5 | Multiple R-squared: 0.2976, Adjusted R-squared: 0.2966 6 | F-statistic: 296.2 on 16 and 11186 DF, p-value: < 2.2e-16 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00199_informalexample_7.10_of_section_7.1.5.R: -------------------------------------------------------------------------------- 1 | # informalexample 7.10 of section 7.1.5 2 | # (informalexample 7.10 of section 7.1.5) : Linear and logistic regression : Using linear regression : Reading the model summary and characterizing coefficient quality 3 | 4 | (df <- nrow(dtrain) - nrow(summary(model)$coefficients) ) 5 | ## [1] 11186 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00200_informalexample_7.11_of_section_7.1.5.R: -------------------------------------------------------------------------------- 1 | # informalexample 7.11 of section 7.1.5 2 | # (informalexample 7.11 of section 7.1.5) : Linear and logistic regression : Using linear regression : Reading the model summary and characterizing coefficient quality 3 | 4 | (modelResidualError <- sqrt(sum(residuals(model)^2) / df)) 5 | ## [1] 0.2687895 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00201_informalexample_7.12_of_section_7.2.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 7.12 of section 7.2.1 2 | # (informalexample 7.12 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression 3 | 4 | odds[flight_delayed] = P[flight_delayed == TRUE] / P[flight_delayed == FALSE] 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00202_informalexample_7.13_of_section_7.2.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 7.13 of section 7.2.1 2 | # (informalexample 7.13 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression 3 | 4 | log_odds[flight_delayed] = log(P[flight_delayed == TRUE] / P[flight_delayed == FALSE]) 5 | 6 | Let: p = P[flight_delayed == TRUE]; then 7 | log_odds[flight_delayed] = log( p / (1 - p) ) 8 | 9 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00203_informalexample_7.14_of_section_7.2.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 7.14 of section 7.2.1 2 | # (informalexample 7.14 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression 3 | 4 | logit <- function(p) { log(p/(1-p)) } 5 | s <- function(x) { 1/(1 + exp(-x))} 6 | 7 | s(logit(0.7)) 8 | # [1] 0.7 9 | 10 | logit(s(-2)) 11 | # -2 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00204_informalexample_7.15_of_section_7.2.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 7.15 of section 7.2.1 2 | # (informalexample 7.15 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression 3 | 4 | logit(P[flight_delayed[i] == TRUE]) = b0 + b_origin * origin[i] + ... 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00205_informalexample_7.16_of_section_7.2.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 7.16 of section 7.2.1 2 | # (informalexample 7.16 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression 3 | 4 | P[flight_delayed[i] == TRUE] = s(b0 + b_origin * origin[i] + ...) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00206_equation_7.2_of_section_7.2.1.math: -------------------------------------------------------------------------------- 1 | # equation 7.2 of section 7.2.1 2 | # (equation 7.2 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression 3 | # Title: Equation 7.2 The expression for a logistic regression model 4 | 5 | P[y[i] in class of interest] ~ f(x[i,]) = s(a + b[1] * x[i,1] + ... + b[n] * x[i,n]) 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00207_example_7.7_of_section_7.2.1.R: -------------------------------------------------------------------------------- 1 | # example 7.7 of section 7.2.1 2 | # (example 7.7 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression 3 | # Title: Loading the CDC data 4 | 5 | load("NatalRiskData.rData") 6 | train <- sdata[sdata$ORIGRANDGROUP <= 5 , ] 7 | test <- sdata[sdata$ORIGRANDGROUP > 5, ] 8 | 9 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00208_example_7.8_of_section_7.2.2.R: -------------------------------------------------------------------------------- 1 | # example 7.8 of section 7.2.2 2 | # (example 7.8 of section 7.2.2) : Linear and logistic regression : Using logistic regression : Building a logistic regression model 3 | # Title: Building the model formula 4 | 5 | complications <- c("ULD_MECO","ULD_PRECIP","ULD_BREECH") 6 | riskfactors <- c("URF_DIAB", "URF_CHYPER", "URF_PHYPER", 7 | "URF_ECLAM") 8 | y <- "atRisk" 9 | x <- c("PWGT", 10 | "UPREVIS", 11 | "CIG_REC", 12 | "GESTREC3", 13 | "DPLURAL", 14 | complications, 15 | riskfactors) 16 | library(wrapr) 17 | fmla <- mk_formula(y, x) 18 | 19 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00209_example_7.9_of_section_7.2.2.R: -------------------------------------------------------------------------------- 1 | # example 7.9 of section 7.2.2 2 | # (example 7.9 of section 7.2.2) : Linear and logistic regression : Using logistic regression : Building a logistic regression model 3 | # Title: Fitting the logistic regression model 4 | 5 | print(fmla) 6 | 7 | ## atRisk ~ PWGT + UPREVIS + CIG_REC + GESTREC3 + DPLURAL + ULD_MECO + 8 | ## ULD_PRECIP + ULD_BREECH + URF_DIAB + URF_CHYPER + URF_PHYPER + 9 | ## URF_ECLAM 10 | ## 11 | 12 | model <- glm(fmla, data = train, family = binomial(link = "logit")) 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00210_example_7.10_of_section_7.2.3.R: -------------------------------------------------------------------------------- 1 | # example 7.10 of section 7.2.3 2 | # (example 7.10 of section 7.2.3) : Linear and logistic regression : Using logistic regression : Making predictions 3 | # Title: Applying the logistic regression model 4 | 5 | train$pred <- predict(model, newdata=train, type = "response") 6 | test$pred <- predict(model, newdata=test, type="response") 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00212_example_7.12_of_section_7.2.3.R: -------------------------------------------------------------------------------- 1 | # example 7.12 of section 7.2.3 2 | # (example 7.12 of section 7.2.3) : Linear and logistic regression : Using logistic regression : Making predictions 3 | # Title: Plotting distribution of prediction score grouped by known outcome 4 | 5 | library(WVPlots) 6 | DoubleDensityPlot(train, "pred", "atRisk", 7 | title = "Distribution of natality risk scores") 8 | 9 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00216_informalexample_7.17_of_section_7.2.4.math: -------------------------------------------------------------------------------- 1 | # informalexample 7.17 of section 7.2.4 2 | # (informalexample 7.17 of section 7.2.4) : Linear and logistic regression : Using logistic regression : Finding relations and extracting advice from logistic models 3 | 4 | p = odds * (1 - p) = odds - p * odds 5 | p * (1 + odds) = odds 6 | p = odds/(1 + odds) 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00218_informalexample_7.18_of_section_7.2.5.text: -------------------------------------------------------------------------------- 1 | # informalexample 7.18 of section 7.2.5 2 | # (informalexample 7.18 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients 3 | 4 | Call: 5 | glm(formula = fmla, family = binomial(link = "logit"), data = train) 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00219_informalexample_7.19_of_section_7.2.5.text: -------------------------------------------------------------------------------- 1 | # informalexample 7.19 of section 7.2.5 2 | # (informalexample 7.19 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients 3 | 4 | Deviance Residuals: 5 | Min 1Q Median 3Q Max 6 | -0.9732 -0.1818 -0.1511 -0.1358 3.2641 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00221_informalexample_7.21_of_section_7.2.5.text: -------------------------------------------------------------------------------- 1 | # informalexample 7.21 of section 7.2.5 2 | # (informalexample 7.21 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients 3 | 4 | Null deviance: 2698.7 on 14211 degrees of freedom 5 | Residual deviance: 2463.0 on 14198 degrees of freedom 6 | AIC: 2491 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00223_example_7.18_of_section_7.2.5.R: -------------------------------------------------------------------------------- 1 | # example 7.18 of section 7.2.5 2 | # (example 7.18 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients 3 | # Title: Calculating the pseudo R-squared 4 | 5 | pr2 <- 1 - (resid.dev / null.dev) 6 | 7 | print(pr2) 8 | ## [1] 0.08734674 9 | pr2.test <- 1 - (resid.dev.test / null.dev.test) 10 | print(pr2.test) 11 | ## [1] 0.07760427 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00224_informalexample_7.22_of_section_7.2.5.text: -------------------------------------------------------------------------------- 1 | # informalexample 7.22 of section 7.2.5 2 | # (informalexample 7.22 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients 3 | 4 | df.null = dim(train)[[1]] - 1 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00225_informalexample_7.23_of_section_7.2.5.text: -------------------------------------------------------------------------------- 1 | # informalexample 7.23 of section 7.2.5 2 | # (informalexample 7.23 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients 3 | 4 | df.model = dim(train)[[1]] - length(model$coefficients) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00227_example_7.20_of_section_7.2.5.R: -------------------------------------------------------------------------------- 1 | # example 7.20 of section 7.2.5 2 | # (example 7.20 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients 3 | # Title: Calculating the Akaike information criterion 4 | 5 | aic <- 2 * (length(model$coefficients) - 6 | loglikelihood(as.numeric(train$atRisk), pred)) 7 | aic 8 | ## [1] 2490.992 9 | 10 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00228_informalexample_7.24_of_section_7.2.5.text: -------------------------------------------------------------------------------- 1 | # informalexample 7.24 of section 7.2.5 2 | # (informalexample 7.24 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients 3 | 4 | Number of Fisher Scoring iterations: 7 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00229_informalexample_7.25_of_section_7.2.5.text: -------------------------------------------------------------------------------- 1 | # informalexample 7.25 of section 7.2.5 2 | # (informalexample 7.25 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients 3 | 4 | Warning message: 5 | glm.fit: fitted probabilities numerically 0 or 1 occurred 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00231_example_7.22_of_section_7.3.1.R: -------------------------------------------------------------------------------- 1 | # example 7.22 of section 7.3.1 2 | # (example 7.22 of section 7.3.1) : Linear and logistic regression : Regularization : An example of quasi-separation 3 | # Title: Fitting a logistic regression model 4 | 5 | library(wrapr) 6 | (fmla <- mk_formula(outcome, vars) ) 7 | 8 | ## fail ~ car_price + maint_price + doors + persons + lug_boot + 9 | ## safety 10 | ## 11 | 12 | model_glm <- glm(fmla, 13 | data = cars_train, 14 | family = binomial) 15 | 16 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00232_informalexample_7.26_of_section_7.3.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 7.26 of section 7.3.1 2 | # (informalexample 7.26 of section 7.3.1) : Linear and logistic regression : Regularization : An example of quasi-separation 3 | 4 | ## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00234_example_7.24_of_section_7.3.1.R: -------------------------------------------------------------------------------- 1 | # example 7.24 of section 7.3.1 2 | # (example 7.24 of section 7.3.1) : Linear and logistic regression : Regularization : An example of quasi-separation 3 | # Title: Looking at the logistic model’s coefficients 4 | 5 | coefs <- coef(model_glm)[-1] # Note: 1 6 | coef_frame <- data.frame(coef = names(coefs), 7 | value = coefs) 8 | 9 | library(ggplot2) 10 | ggplot(coef_frame, aes(x = coef, y = value)) + 11 | geom_pointrange(aes(ymin = 0, ymax = value)) + 12 | ggtitle("Coefficients of logistic regression model") + 13 | coord_flip() 14 | 15 | # Note 1: 16 | # Get the coefficients (except the intercept) 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00236_informalexample_7.27_of_section_7.3.2.text: -------------------------------------------------------------------------------- 1 | # informalexample 7.27 of section 7.3.2 2 | # (informalexample 7.27 of section 7.3.2) : Linear and logistic regression : Regularization : The types of regularized regression 3 | 4 | f(x[i,]) = b[0] + b[1] x[i,1] + ... b[n] x[i,n] 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00237_informalexample_7.28_of_section_7.3.2.text: -------------------------------------------------------------------------------- 1 | # informalexample 7.28 of section 7.3.2 2 | # (informalexample 7.28 of section 7.3.2) : Linear and logistic regression : Regularization : The types of regularized regression 3 | 4 | (y - f(x))^2 + lambda * (b[1]^2 + ... + b[n]^2) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00238_informalexample_7.29_of_section_7.3.2.text: -------------------------------------------------------------------------------- 1 | # informalexample 7.29 of section 7.3.2 2 | # (informalexample 7.29 of section 7.3.2) : Linear and logistic regression : Regularization : The types of regularized regression 3 | 4 | (y - f(x))^2 + lambda * ( abs(b[1]) + abs(b[2]) + .... abs(b[n]) ) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00239_informalexample_7.30_of_section_7.3.2.text: -------------------------------------------------------------------------------- 1 | # informalexample 7.30 of section 7.3.2 2 | # (informalexample 7.30 of section 7.3.2) : Linear and logistic regression : Regularization : The types of regularized regression 3 | 4 | (1 - alpha) * (b[1]^2 + ... + b[n]^2) + 5 | alpha * ( abs(b[1]) + abs(b[2]) + .... abs(b[n]) ) 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00243_informalexample_7.31_of_section_7.3.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 7.31 of section 7.3.3 2 | # (informalexample 7.31 of section 7.3.3) : Linear and logistic regression : Regularization : Regularized regression with glmnet 3 | 4 | prediction <- predict(model_ridge, 5 | newdata = cars_test, 6 | type="response", 7 | s = model_ridge$lambda.min) 8 | 9 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00245_example_7.30_of_section_7.3.3.R: -------------------------------------------------------------------------------- 1 | # example 7.30 of section 7.3.3 2 | # (example 7.30 of section 7.3.3) : Linear and logistic regression : Regularization : Regularized regression with glmnet 3 | # Title: The lasso model's test performance 4 | 5 | ### $confusion_matrix 6 | ## prediction 7 | ## truth passed unacceptable 8 | ## passed 150 9 9 | ## unacceptable 17 323 10 | ## 11 | ## $accuracy 12 | ## [1] 0.9478958 13 | ## 14 | ## $deviance 15 | ## [1] 112.7308 16 | 17 | -------------------------------------------------------------------------------- /CodeExamples/c07_Linear_and_logistic_regression/00246_example_7.31_of_section_7.3.3.R: -------------------------------------------------------------------------------- 1 | # example 7.31 of section 7.3.3 2 | # (example 7.31 of section 7.3.3) : Linear and logistic regression : Regularization : Regularized regression with glmnet 3 | # Title: Crossvalidating for both alpha and lambda 4 | 5 | (elastic_net <- cva.glmnet(fmla, 6 | cars_train, 7 | family = "binomial")) 8 | ## Call: 9 | ## cva.glmnet.formula(formula = fmla, data = cars_train, family = "binomial") 10 | ## 11 | ## Model fitting options: 12 | ## Sparse model matrix: FALSE 13 | ## Use model.frame: FALSE 14 | ## Alpha values: 0 0.001 0.008 0.027 0.064 0.125 0.216 0.343 0.512 0.729 1 15 | ## Number of crossvalidation folds for lambda: 10 16 | 17 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00250_informalexample_8.1_of_section_8.2.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.1 of section 8.2.1 2 | # (informalexample 8.1 of section 8.2.1) : Advanced data preparation : KDD and KDD Cup 2009 : Getting started with KDD Cup 2009 data 3 | 4 | outcome_summary <- table( 5 | churn = dTrain[, outcome], # Note: 1 6 | useNA = 'ifany') # Note: 2 7 | 8 | knitr::kable(outcome_summary) 9 | 10 | # Note 1: 11 | # Tabulate levels of churn outcome. 12 | 13 | # Note 2: 14 | # Include NA values in tabulation. 15 | 16 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00251_informalexample_8.2_of_section_8.2.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.2 of section 8.2.1 2 | # (informalexample 8.2 of section 8.2.1) : Advanced data preparation : KDD and KDD Cup 2009 : Getting started with KDD Cup 2009 data 3 | 4 | outcome_summary["1"] / sum(outcome_summary) # Note: 1 5 | # 1 6 | # 0.07347764 7 | 8 | # Note 1: 9 | # Estimate observed churn rate or prevalence. 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00254_informalexample_8.3_of_section_8.2.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.3 of section 8.2.2 2 | # (informalexample 8.3 of section 8.2.2) : Advanced data preparation : KDD and KDD Cup 2009 : The bull in the china shop approach 3 | 4 | head(dTrainAll$Var200) 5 | # [1] vynJTq9 0v21jmy 6 | # 15415 Levels: _84etK_ _9bTOWp _A3VKFm _bq4Nkb _ct4nkXBMp ... zzQ9udm 7 | 8 | length(unique(dTrainAll$Var200)) 9 | # [1] 14391 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00256_example_8.5_of_section_8.3.R: -------------------------------------------------------------------------------- 1 | # example 8.5 of section 8.3 2 | # (example 8.5 of section 8.3) : Advanced data preparation : Basic data preparation for classification 3 | # Title: Preparing data with vtreat 4 | 5 | dTrain_treated <- prepare(treatment_plan, 6 | dTrain, 7 | parallelCluster = parallel_cluster) 8 | 9 | head(colnames(dTrain)) 10 | ## [1] "Var1" "Var2" "Var3" "Var4" "Var5" "Var6" 11 | head(colnames(dTrain_treated)) # Note: 1 12 | ## [1] "Var1" "Var1_isBAD" "Var2" "Var2_isBAD" "Var3" 13 | ## [6] "Var3_isBAD" 14 | 15 | # Note 1: 16 | # Compare the columns of the original dTrain data to its treated counterpart. 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00259_informalexample_8.6_of_section_8.3.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.6 of section 8.3.1 2 | # (informalexample 8.6 of section 8.3.1) : Advanced data preparation : Basic data preparation for classification : The variable score frame 3 | 4 | comparison <- data.frame(original218 = dTrain$Var218, 5 | impact218 = dTrain_treated$Var218_catB) 6 | 7 | head(comparison) 8 | ## original218 impact218 9 | ## 1 cJvF -0.2180735 10 | ## 2 1.5155125 11 | ## 3 UYBR 0.1221393 12 | ## 4 UYBR 0.1221393 13 | ## 5 UYBR 0.1221393 14 | ## 6 UYBR 0.1221393 15 | 16 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00261_informalexample_8.8_of_section_8.3.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.8 of section 8.3.1 2 | # (informalexample 8.8 of section 8.3.1) : Advanced data preparation : Basic data preparation for classification : The variable score frame 3 | 4 | score_frame[score_frame$origName == "Var200", , drop = FALSE] 5 | 6 | # varName varMoves rsq sig needsSplit extraModelDegrees origName code 7 | # 361 Var200_catP TRUE 0.005729835 4.902546e-28 TRUE 13323 Var200 catP 8 | # 362 Var200_catB TRUE 0.001476298 2.516703e-08 TRUE 13323 Var200 catB 9 | # 428 Var200_lev_NA TRUE 0.005729838 4.902365e-28 FALSE 0 Var200 lev 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00262_informalexample_8.9_of_section_8.3.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.9 of section 8.3.2 2 | # (informalexample 8.9 of section 8.3.2) : Advanced data preparation : Basic data preparation for classification : Properly using the treatment plan 3 | 4 | dCal_treated <- prepare(treatment_plan, 5 | dCal, 6 | parallelCluster = parallel_cluster) 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00263_informalexample_8.10_of_section_8.3.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.10 of section 8.3.2 2 | # (informalexample 8.10 of section 8.3.2) : Advanced data preparation : Basic data preparation for classification : Properly using the treatment plan 3 | 4 | library("sigr") 5 | 6 | calcAUC(dTrain_treated$Var200_catB, dTrain_treated$churn) 7 | 8 | # [1] 0.8279249 9 | 10 | calcAUC(dCal_treated$Var200_catB, dCal_treated$churn) 11 | 12 | # [1] 0.5505401 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00265_informalexample_8.11_of_section_8.4.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.11 of section 8.4.1 2 | # (informalexample 8.11 of section 8.4.1) : Advanced data preparation : Advanced data preparation for classification : Using mkCrossFrameCExperiment() 3 | 4 | library("sigr") 5 | 6 | calcAUC(dTrainAll_treated$Var200_catB, dTrainAll_treated$churn) 7 | 8 | # [1] 0.5450466 9 | 10 | calcAUC(dTest_treated$Var200_catB, dTest_treated$churn) 11 | 12 | # [1] 0.5290295 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00269_informalexample_8.14_of_section_8.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.14 of section 8.4.2 2 | # (informalexample 8.14 of section 8.4.2) : Advanced data preparation : Advanced data preparation for classification : Building a model 3 | 4 | table(prediction = dTest_treated$glm_pred >= 0.5, 5 | truth = dTest$churn) 6 | # truth 7 | # prediction -1 1 8 | # FALSE 4591 375 9 | # TRUE 8 1 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00270_informalexample_8.15_of_section_8.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.15 of section 8.4.2 2 | # (informalexample 8.15 of section 8.4.2) : Advanced data preparation : Advanced data preparation for classification : Building a model 3 | 4 | table(prediction = dTest_treated$glm_pred>0.15, 5 | truth = dTest$churn) 6 | # truth 7 | # prediction -1 1 8 | # FALSE 4243 266 9 | # TRUE 356 110 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00271_informalexample_8.16_of_section_8.4.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.16 of section 8.4.2 2 | # (informalexample 8.16 of section 8.4.2) : Advanced data preparation : Advanced data preparation for classification : Building a model 3 | 4 | WVPlots::DoubleDensityPlot(dTest_treated, "glm_pred", "churn", 5 | "glm prediction on test, double density plot") 6 | 7 | WVPlots::PRTPlot(dTest_treated, "glm_pred", "churn", 8 | "glm prediction on test, enrichment plot", 9 | truthTarget = 1, 10 | plotvars = c("enrichment", "recall"), 11 | thresholdrange = c(0, 1.0)) 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00272_informalexample_8.17_of_section_8.5.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.17 of section 8.5 2 | # (informalexample 8.17 of section 8.5) : Advanced data preparation : Preparing data for regression modeling 3 | 4 | auto_mpg <- readRDS('auto_mpg.RDS') 5 | 6 | knitr::kable(head(auto_mpg)) # Note: 1 7 | 8 | # Note 1: 9 | # Take a quick look at the data. 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00275_informalexample_8.20_of_section_8.6.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.20 of section 8.6.2 2 | # (informalexample 8.20 of section 8.6.2) : Advanced data preparation : Mastering the vtreat package : Missing values 3 | 4 | library("wrapr") # Note: 1 5 | 6 | d <- build_frame( 7 | "x1" , "x2" , "x3", "y" | 8 | 1 , "a" , 6 , 10 | 9 | NA_real_, "b" , 7 , 20 | 10 | 3 , NA_character_, 8 , 30 ) 11 | 12 | knitr::kable(d) 13 | 14 | # Note 1: 15 | # Bring in the wrapr package for build_frame and the 16 | # wrapr “dot pipe”. 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00276_informalexample_8.21_of_section_8.6.2.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.21 of section 8.6.2 2 | # (informalexample 8.21 of section 8.6.2) : Advanced data preparation : Mastering the vtreat package : Missing values 3 | 4 | plan1 <- vtreat::design_missingness_treatment(d) 5 | vtreat::prepare(plan1, d) %.>% # Note: 1 6 | knitr::kable(.) 7 | 8 | # Note 1: 9 | # Here we are using wrapr’s dot pipe instead of 10 | # magrittr’s forward pipe. The dot pipe requires the 11 | # “explicit dot argument” notation discussed in 12 | # chapter 5. 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00279_informalexample_8.24_of_section_8.6.4.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.24 of section 8.6.4 2 | # (informalexample 8.24 of section 8.6.4) : Advanced data preparation : Mastering the vtreat package : Impact coding 3 | 4 | plan4 <- vtreat::designTreatmentsC(d, 5 | varlist = c("x1", "x2", "x3"), 6 | outcomename = "y", 7 | outcometarget = 20, 8 | codeRestriction = "catB", 9 | verbose = FALSE) 10 | vtreat::prepare(plan4, d) 11 | # x2_catB y 12 | # 1 -8.517343 10 13 | # 2 9.903538 20 14 | # 3 -8.517343 30 15 | 16 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00280_informalexample_8.25_of_section_8.6.5.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.25 of section 8.6.5 2 | # (informalexample 8.25 of section 8.6.5) : Advanced data preparation : Mastering the vtreat package : The treatment plan 3 | 4 | class(plan4) 5 | # [1] "treatmentplan" 6 | 7 | names(plan4) 8 | 9 | # [1] "treatments" "scoreFrame" "outcomename" "vtreatVersion" "outcomeType" 10 | # [6] "outcomeTarget" "meanY" "splitmethod" 11 | 12 | -------------------------------------------------------------------------------- /CodeExamples/c08_Advanced_data_preparation/00281_informalexample_8.26_of_section_8.6.5.R: -------------------------------------------------------------------------------- 1 | # informalexample 8.26 of section 8.6.5 2 | # (informalexample 8.26 of section 8.6.5) : Advanced data preparation : Mastering the vtreat package : The treatment plan 3 | 4 | plan4$scoreFrame 5 | 6 | # varName varMoves rsq sig needsSplit extraModelDegrees origName code 7 | # 1 x2_catB TRUE 1 0.0506719 TRUE 2 x2 catB 8 | 9 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00285_informalexample_9.1_of_section_9.1.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 9.1 of section 9.1.1 2 | # (informalexample 9.1 of section 9.1.1) : Unsupervised methods : Cluster analysis : Distances 3 | 4 | edist(x, y) <- sqrt((x[1] - y[1])^2 + (x[2] - y[2])^2 + ...) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00286_informalexample_9.2_of_section_9.1.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 9.2 of section 9.1.1 2 | # (informalexample 9.2 of section 9.1.1) : Unsupervised methods : Cluster analysis : Distances 3 | 4 | hdist(x, y) <- sum((x[1] != y[1]) + (x[2] != y[2]) + ...) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00287_informalexample_9.3_of_section_9.1.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 9.3 of section 9.1.1 2 | # (informalexample 9.3 of section 9.1.1) : Unsupervised methods : Cluster analysis : Distances 3 | 4 | mdist(x, y) <- sum(abs(x[1] - y[1]) + abs(x[2] - y[2]) + ...) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00288_informalexample_9.4_of_section_9.1.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 9.4 of section 9.1.1 2 | # (informalexample 9.4 of section 9.1.1) : Unsupervised methods : Cluster analysis : Distances 3 | 4 | dot(x, y) <- sum(x[1] * y[1] + x[2] * y[2] + ...) 5 | cossim(x, y) <- dot(x, y) / (sqrt(dot(x,x ) * dot(y, y))) 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00291_example_9.3_of_section_9.1.3.R: -------------------------------------------------------------------------------- 1 | # example 9.3 of section 9.1.3 2 | # (example 9.3 of section 9.1.3) : Unsupervised methods : Cluster analysis : Hierarchical clustering with hclust 3 | # Title: Hierarchical clustering 4 | 5 | distmat <- dist(pmatrix, method = "euclidean") # Note: 1 6 | pfit <- hclust(distmat, method = "ward.D") # Note: 2 7 | plot(pfit, labels = protein$Country) # Note: 3 8 | 9 | # Note 1: 10 | # Create the distance matrix. 11 | 12 | # Note 2: 13 | # Do the clustering. 14 | 15 | # Note 3: 16 | # Plot the dendrogram. 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00292_informalexample_9.5_of_section_9.1.3.Rtxt: -------------------------------------------------------------------------------- 1 | # informalexample 9.5 of section 9.1.3 2 | # (informalexample 9.5 of section 9.1.3) : Unsupervised methods : Cluster analysis : Hierarchical clustering with hclust 3 | 4 | rect.hclust(pfit, k=5) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00298_informalexample_9.6_of_section_9.1.3.math: -------------------------------------------------------------------------------- 1 | # informalexample 9.6 of section 9.1.3 2 | # (informalexample 9.6 of section 9.1.3) : Unsupervised methods : Cluster analysis : Hierarchical clustering with hclust 3 | 4 | BSS = TSS - WSS 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00300_informalexample_9.7_of_section_9.1.3.math: -------------------------------------------------------------------------------- 1 | # informalexample 9.7 of section 9.1.3 2 | # (informalexample 9.7 of section 9.1.3) : Unsupervised methods : Cluster analysis : Hierarchical clustering with hclust 3 | 4 | W = WSS / (n - k) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00301_informalexample_9.8_of_section_9.1.3.math: -------------------------------------------------------------------------------- 1 | # informalexample 9.8 of section 9.1.3 2 | # (informalexample 9.8 of section 9.1.3) : Unsupervised methods : Cluster analysis : Hierarchical clustering with hclust 3 | 4 | B = BSS / (k - 1) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00306_example_9.14_of_section_9.1.5.R: -------------------------------------------------------------------------------- 1 | # example 9.14 of section 9.1.5 2 | # (example 9.14 of section 9.1.5) : Unsupervised methods : Cluster analysis : Assigning new points to clusters 3 | # Title: A function to assign points to a cluster 4 | 5 | assign_cluster <- function(newpt, centers, xcenter = 0, xscale = 1) { 6 | xpt <- (newpt - xcenter) / xscale # Note: 1 7 | dists <- apply(centers, 1, FUN = function(c0) { sqr_edist(c0, xpt) }) # Note: 2 8 | which.min(dists) # Note: 3 9 | } 10 | 11 | # Note 1: 12 | # Center and scale the new data point. 13 | 14 | # Note 2: 15 | # Calculate how far the new data point is from 16 | # each of the cluster centers. 17 | 18 | # Note 3: 19 | # Return the cluster number of the closest 20 | # centroid. 21 | 22 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00308_example_9.16_of_section_9.1.5.R: -------------------------------------------------------------------------------- 1 | # example 9.16 of section 9.1.5 2 | # (example 9.16 of section 9.1.5) : Unsupervised methods : Cluster analysis : Assigning new points to clusters 3 | # Title: Unscale the centers 4 | 5 | unscaled = scale(tclusters$centers, center = FALSE, scale = 1 / tscale) 6 | rm_scales(scale(unscaled, center = -tcenter, scale = FALSE)) 7 | 8 | ## [,1] [,2] [,3] 9 | ## 1 9.8234797 -3.005977 4.7662651 10 | ## 2 -4.9749654 -4.862436 -5.0577002 11 | ## 3 0.8926698 1.185734 0.8336977 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00310_informalexample_9.9_of_section_9.2.2.txt: -------------------------------------------------------------------------------- 1 | # informalexample 9.9 of section 9.2.2 2 | # (informalexample 9.9 of section 9.2.2) : Unsupervised methods : Association rules : The example problem 3 | 4 | |token | userid| rating|title | 5 | |:---------------------|------:|------:|:---------------------| 6 | |always have popsicles | 172742| 0|Always Have Popsicles | 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00313_informalexample_9.10_of_section_9.2.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 9.10 of section 9.2.3 2 | # (informalexample 9.10 of section 9.2.3) : Unsupervised methods : Association rules : Mining association rules with the arules package 3 | 4 | basketSizes <- size(bookbaskets) 5 | summary(basketSizes) 6 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 7 | ## 1.0 1.0 1.0 11.1 4.0 10250.0 8 | 9 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00314_example_9.20_of_section_9.2.3.R: -------------------------------------------------------------------------------- 1 | # example 9.20 of section 9.2.3 2 | # (example 9.20 of section 9.2.3) : Unsupervised methods : Association rules : Mining association rules with the arules package 3 | # Title: Examining the size distribution 4 | 5 | quantile(basketSizes, probs = seq(0, 1, 0.1)) # Note: 1 6 | ## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 7 | ## 1 1 1 1 1 1 2 3 5 13 10253 8 | library(ggplot2) # Note: 2 9 | ggplot(data.frame(count = basketSizes)) + 10 | geom_density(aes(x = count)) + 11 | scale_x_log10() 12 | 13 | # Note 1: 14 | # Look at the basket size distribution, in 10% 15 | # increments. 16 | 17 | # Note 2: 18 | # Plot the distribution to get a better 19 | # look. 20 | 21 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00315_example_9.21_of_section_9.2.3.R: -------------------------------------------------------------------------------- 1 | # example 9.21 of section 9.2.3 2 | # (example 9.21 of section 9.2.3) : Unsupervised methods : Association rules : Mining association rules with the arules package 3 | # Title: Count how often each book occurs 4 | 5 | bookCount <- itemFrequency(bookbaskets, "absolute") 6 | summary(bookCount) 7 | 8 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 9 | ## 1.000 1.000 1.000 4.638 3.000 2502.000 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00317_informalexample_9.11_of_section_9.2.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 9.11 of section 9.2.3 2 | # (informalexample 9.11 of section 9.2.3) : Unsupervised methods : Association rules : Mining association rules with the arules package 3 | 4 | bookbaskets_use <- bookbaskets[basketSizes > 1] 5 | dim(bookbaskets_use) 6 | ## [1] 40822 220447 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c09_Unsupervised_methods/00320_example_9.25_of_section_9.2.3.R: -------------------------------------------------------------------------------- 1 | # example 9.25 of section 9.2.3 2 | # (example 9.25 of section 9.2.3) : Unsupervised methods : Association rules : Mining association rules with the arules package 3 | # Title: Get the five most confident rules 4 | 5 | library(magrittr) # Note: 1 6 | 7 | rules %>% 8 | sort(., by = "confidence") %>% # Note: 2 9 | head(., n = 5) %>% # Note: 3 10 | inspect(.) # Note: 4 11 | 12 | # Note 1: 13 | # Attach magrittr to get pipe notation. 14 | 15 | # Note 2: 16 | # Sort rules by confidence. 17 | 18 | # Note 3: 19 | # Get the first 5 rules. 20 | 21 | # Note 4: 22 | # Call inspect() to pretty-print the rules. 23 | 24 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00328_informalexample_10.2_of_section_10.1.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 10.2 of section 10.1.3 2 | # (informalexample 10.2 of section 10.1.3) : Exploring advanced methods : Tree-based methods : Using random forests to further improve prediction 3 | 4 | trainf <- rbind(trainperf_tree, trainperf_bag, trainperf_rf) 5 | pandoc.table(trainf, justify = perf_justify) 6 | ## 7 | ## 8 | ## model accuracy f1 dev.norm 9 | ## ---------------------- ---------- -------- ---------- 10 | ## tree, training 0.8996 0.8691 0.6304 11 | ## bagging, training 0.9160 0.8906 0.5106 12 | ## random forest, train 0.9884 0.9852 0.1440 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00329_informalexample_10.3_of_section_10.1.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 10.3 of section 10.1.3 2 | # (informalexample 10.3 of section 10.1.3) : Exploring advanced methods : Tree-based methods : Using random forests to further improve prediction 3 | 4 | testf <- rbind(testperf_tree, testperf_bag, testperf_rf) 5 | pandoc.table(testf, justify = perf_justify) 6 | ## 7 | ## 8 | ## model accuracy f1 dev.norm 9 | ## --------------------- ---------- -------- ---------- 10 | ## tree, test 0.8712 0.8280 0.7531 11 | ## bagging, test 0.9105 0.8791 0.5834 12 | ## random forest, test 0.9498 0.9341 0.3011 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00330_informalexample_10.4_of_section_10.1.3.R: -------------------------------------------------------------------------------- 1 | # informalexample 10.4 of section 10.1.3 2 | # (informalexample 10.4 of section 10.1.3) : Exploring advanced methods : Tree-based methods : Using random forests to further improve prediction 3 | 4 | difff <- data.frame(model = c("tree", "bagging", "random forest"), 5 | accuracy = trainf$accuracy - testf$accuracy, 6 | f1 = trainf$f1 - testf$f1, 7 | dev.norm = trainf$dev.norm - testf$dev.norm) 8 | 9 | pandoc.table(difff, justify=perf_justify) 10 | 11 | ## 12 | ## 13 | ## model accuracy f1 dev.norm 14 | ## --------------- ---------- --------- ---------- 15 | ## tree 0.028411 0.04111 -0.12275 16 | ## bagging 0.005523 0.01158 -0.07284 17 | ## random forest 0.038633 0.05110 -0.15711 18 | 19 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00336_informalexample_10.5_of_section_10.1.4.R: -------------------------------------------------------------------------------- 1 | # informalexample 10.5 of section 10.1.4 2 | # (informalexample 10.5 of section 10.1.4) : Exploring advanced methods : Tree-based methods : Gradient-boosted trees 3 | 4 | library(zeallot) 5 | c(texts, labels) %<-% readRDS("IMDBtrain.RDS") 6 | 7 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00337_informalexample_10.6_of_section_10.1.4.R: -------------------------------------------------------------------------------- 1 | # informalexample 10.6 of section 10.1.4 2 | # (informalexample 10.6 of section 10.1.4) : Exploring advanced methods : Tree-based methods : Gradient-boosted trees 3 | 4 | source("lime_imdb_example.R") 5 | vocab <- create_pruned_vocabulary(texts) 6 | dtm_train <- make_matrix(texts, vocab) 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00338_informalexample_10.7_of_section_10.1.4.R: -------------------------------------------------------------------------------- 1 | # informalexample 10.7 of section 10.1.4 2 | # (informalexample 10.7 of section 10.1.4) : Exploring advanced methods : Tree-based methods : Gradient-boosted trees 3 | 4 | cv <- xgb.cv(dtm_train, 5 | label = labels, 6 | params = list( 7 | objective = "binary:logistic" 8 | ), 9 | nfold = 5, 10 | nrounds = 500, 11 | early_stopping_rounds = 20, # Note: 1 12 | print_every_n = 10, 13 | metrics = "logloss") 14 | 15 | evalframe <- as.data.frame(cv$evaluation_log) 16 | (NROUNDS <- which.min(evalframe$test_logloss_mean)) 17 | ## [1] 319 18 | 19 | # Note 1: 20 | # Stop early if performance doesn’t improve for 21 | # 20 rounds. 22 | 23 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00342_example_10.11_of_section_10.1.4.R: -------------------------------------------------------------------------------- 1 | # example 10.11 of section 10.1.4 2 | # (example 10.11 of section 10.1.4) : Exploring advanced methods : Tree-based methods : Gradient-boosted trees 3 | # Title: Fit and apply an xgboost model for birth weight 4 | 5 | birthwt_model <- xgboost(as.matrix(train_treated), 6 | train$DBWT, 7 | params = list( 8 | objective = "reg:linear", 9 | base_score = mean(train$DBWT) 10 | ), 11 | nrounds = 50, 12 | verbose = FALSE) 13 | 14 | test_treated <- prepare(treatplan, test) 15 | pred <- predict(birthwt_model, as.matrix(test_treated)) 16 | 17 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00343_informalexample_10.9_of_section_10.2.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 10.9 of section 10.2.1 2 | # (informalexample 10.9 of section 10.2.1) : Exploring advanced methods : Using generalized additive models (GAMs) to learn non-monotone relationships : Understanding GAMs 3 | 4 | f(x[i, ]) = b0 + b[1] * x[i, 1] + b[2] * x[i, 2] + ... b[n] * x[i, n] 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00344_informalexample_10.10_of_section_10.2.1.math: -------------------------------------------------------------------------------- 1 | # informalexample 10.10 of section 10.2.1 2 | # (informalexample 10.10 of section 10.2.1) : Exploring advanced methods : Using generalized additive models (GAMs) to learn non-monotone relationships : Understanding GAMs 3 | 4 | f(x[i,]) = a0 + s_1(x[i, 1]) + s_2(x[i, 2]) + ... s_n(x[i, n]) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00345_example_10.12_of_section_10.2.2.R: -------------------------------------------------------------------------------- 1 | # example 10.12 of section 10.2.2 2 | # (example 10.12 of section 10.2.2) : Exploring advanced methods : Using generalized additive models (GAMs) to learn non-monotone relationships : A one-dimensional regression example 3 | # Title: Preparing an artificial problem 4 | 5 | set.seed(602957) 6 | 7 | x <- rnorm(1000) 8 | noise <- rnorm(1000, sd = 1.5) 9 | 10 | y <- 3 * sin(2 * x) + cos(0.75 * x) - 1.5 * (x^2) + noise 11 | 12 | select <- runif(1000) 13 | frame <- data.frame(y = y, x = x) 14 | 15 | train <- frame[select > 0.1, ] 16 | test <-frame[select <= 0.1, ] 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00349_example_10.16_of_section_10.2.3.R: -------------------------------------------------------------------------------- 1 | # example 10.16 of section 10.2.3 2 | # (example 10.16 of section 10.2.3) : Exploring advanced methods : Using generalized additive models (GAMs) to learn non-monotone relationships : Extracting the non-linear relationships 3 | # Title: Extracting a learned spline from a GAM 4 | 5 | sx <- predict(gam_model, type = "terms") 6 | summary(sx) 7 | ## s(x) 8 | ## Min. :-17.527035 9 | ## 1st Qu.: -2.378636 10 | ## Median : 0.009427 11 | ## Mean : 0.000000 12 | ## 3rd Qu.: 2.869166 13 | ## Max. : 4.084999 14 | 15 | xframe <- cbind(train, sx = sx[,1]) 16 | 17 | ggplot(xframe, aes(x = x)) + 18 | geom_point(aes(y = y), alpha = 0.4) + 19 | geom_line(aes(y = sx)) 20 | 21 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00353_example_10.20_of_section_10.2.5.R: -------------------------------------------------------------------------------- 1 | # example 10.20 of section 10.2.5 2 | # (example 10.20 of section 10.2.5) : Exploring advanced methods : Using generalized additive models (GAMs) to learn non-monotone relationships : Using GAM for logistic regression 3 | # Title: GLM logistic regression 4 | 5 | form <- as.formula("DBWT < 2000 ~ PWGT + WTGAIN + MAGER + UPREVIS") 6 | logmod <- glm(form, data = train, family = binomial(link = "logit")) 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00358_informalexample_10.11_of_section_10.3.2.math: -------------------------------------------------------------------------------- 1 | # informalexample 10.11 of section 10.3.2 2 | # (informalexample 10.11 of section 10.3.2) : Exploring advanced methods : Solving “inseparable” problems using support vector machines : Understanding support vector machines 3 | 4 | w %*% phi(x) + b >= 0 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00359_informalexample_10.12_of_section_10.3.2.math: -------------------------------------------------------------------------------- 1 | # informalexample 10.12 of section 10.3.2 2 | # (informalexample 10.12 of section 10.3.2) : Exploring advanced methods : Solving “inseparable” problems using support vector machines : Understanding support vector machines 3 | 4 | w %*% phi(x) + b >= u 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00360_informalexample_10.13_of_section_10.3.2.math: -------------------------------------------------------------------------------- 1 | # informalexample 10.13 of section 10.3.2 2 | # (informalexample 10.13 of section 10.3.2) : Exploring advanced methods : Solving “inseparable” problems using support vector machines : Understanding support vector machines 3 | 4 | w %*% phi(x) + b <= v 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00362_informalexample_10.14_of_section_10.3.3.math: -------------------------------------------------------------------------------- 1 | # informalexample 10.14 of section 10.3.3 2 | # (informalexample 10.14 of section 10.3.3) : Exploring advanced methods : Solving “inseparable” problems using support vector machines : Understanding kernel functions 3 | 4 | w = sum(a1 * phi(s1), ... , am * phi(sm)) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c10_Exploring_advanced_methods/00363_informalexample_10.15_of_section_10.3.3.math: -------------------------------------------------------------------------------- 1 | # informalexample 10.15 of section 10.3.3 2 | # (informalexample 10.15 of section 10.3.3) : Exploring advanced methods : Solving “inseparable” problems using support vector machines : Understanding kernel functions 3 | 4 | w %*% phi(x) + b = sum(a1 * k(s1, x),... , am * k(sm, x)) + b 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c11_Documentation_and_deployment/00365_informalexample_11.1_of_section_11.2.1.R: -------------------------------------------------------------------------------- 1 | # informalexample 11.1 of section 11.2.1 2 | # (informalexample 11.1 of section 11.2.1) : Documentation and deployment : Using R markdown to produce milestone documentation : What is R markdown? 3 | 4 | rmarkdown::render("Buzz_score_example.Rmd", rmarkdown::html_document()) 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/c11_Documentation_and_deployment/00366_example_11.2_of_section_11.2.3.Rmd: -------------------------------------------------------------------------------- 1 | # example 11.2 of section 11.2.3 2 | # (example 11.2 of section 11.2.3) : Documentation and deployment : Using R markdown to produce milestone documentation : Using knitr to document the Buzz data and produce the model 3 | # Title: Using the system() command to compute a file hash 4 | 5 | ```{r dataprep} 6 | infile <- "TomsHardware-Relative-Sigma-500.data.txt" 7 | paste('checked at', date()) 8 | system(paste('shasum', infile), intern = TRUE) # Note: 1 9 | buzzdata <- read.table(infile, header = FALSE, sep = ",") 10 | ... 11 | 12 | # Note 1: 13 | # system() functionRun a system-installed cryptographic hash 14 | # program (this program is outside of R’s install 15 | # image). 16 | 17 | -------------------------------------------------------------------------------- /CodeExamples/c11_Documentation_and_deployment/00367_example_11.3_of_section_11.2.3.md: -------------------------------------------------------------------------------- 1 | # example 11.3 of section 11.2.3 2 | # (example 11.3 of section 11.2.3) : Documentation and deployment : Using R markdown to produce milestone documentation : Using knitr to document the Buzz data and produce the model 3 | # Title: Calculating model performance 4 | 5 | ``` r 6 | rtest <- data.frame(truth = buzztest$buzz, 7 | pred = predict(fmodel, newdata = buzztest, type = "prob")[, 2, drop = TRUE]) 8 | print(accuracyMeasures(rtest$pred, rtest$truth)) 9 | ``` 10 | 11 | ## [1] "precision= 0.832402234636871 ; recall= 0.84180790960452" 12 | ## pred 13 | ## truth FALSE TRUE 14 | ## 0 584 30 15 | ## 1 28 149 16 | ## model accuracy f1 dev.norm AUC 17 | ## 1 model 0.9266751 0.8370787 0.42056 0.9702102 18 | 19 | -------------------------------------------------------------------------------- /CodeExamples/c11_Documentation_and_deployment/00370_informalexample_11.2_of_section_11.3.2.bash: -------------------------------------------------------------------------------- 1 | # informalexample 11.2 of section 11.3.2 2 | # (informalexample 11.2 of section 11.3.2) : Documentation and deployment : Using comments and version control for running documentation : Using version control to record history 3 | 4 | git add -A . # Note: 1 5 | git commit # Note: 2 6 | 7 | # Note 1: 8 | # Stage results to commit (specify what files 9 | # should be committed). 10 | 11 | # Note 2: 12 | # Actually perform the commit. 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/c11_Documentation_and_deployment/00371_example_11.6_of_section_11.3.2.bash: -------------------------------------------------------------------------------- 1 | # example 11.6 of section 11.3.2 2 | # (example 11.6 of section 11.3.2) : Documentation and deployment : Using comments and version control for running documentation : Using version control to record history 3 | # Title: Checking your project status 4 | 5 | $ git status 6 | On branch master 7 | Your branch is up to date with 'origin/master'. 8 | 9 | nothing to commit, working tree clean 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c11_Documentation_and_deployment/00372_example_11.7_of_section_11.3.2.bash: -------------------------------------------------------------------------------- 1 | # example 11.7 of section 11.3.2 2 | # (example 11.7 of section 11.3.2) : Documentation and deployment : Using comments and version control for running documentation : Using version control to record history 3 | # Title: Checking your project history 4 | 5 | $ git log 6 | commit d22572281d40522bc6ab524bbdee497964ff4af0 (HEAD -> master, origin/master) 7 | Author: John Mount 8 | Date: Tue Apr 16 16:24:23 2019 -0700 9 | 10 | technical edits ch7 11 | 12 | -------------------------------------------------------------------------------- /CodeExamples/c11_Documentation_and_deployment/00373_example_11.8_of_section_11.3.3.bash: -------------------------------------------------------------------------------- 1 | # example 11.8 of section 11.3.3 2 | # (example 11.8 of section 11.3.3) : Documentation and deployment : Using comments and version control for running documentation : Using version control to explore your project 3 | # Title: Finding who committed what 4 | 5 | git blame Buzz/buzzapp/server.R 6 | 4efb2b78 (John Mount 2019-04-24 16:22:43 -0700 1) # 7 | 4efb2b78 (John Mount 2019-04-24 16:22:43 -0700 2) # This is the server logic of a Shiny web application. You can run the 8 | 4efb2b78 (John Mount 2019-04-24 16:22:43 -0700 3) # application by clicking 'Run App' above. 9 | 4efb2b78 (John Mount 2019-04-24 16:22:43 -0700 4) # 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/c11_Documentation_and_deployment/00374_example_11.9_of_section_11.3.3.bash: -------------------------------------------------------------------------------- 1 | # example 11.9 of section 11.3.3 2 | # (example 11.9 of section 11.3.3) : Documentation and deployment : Using comments and version control for running documentation : Using version control to explore your project 3 | # Title: Finding line-based differences between two committed versions 4 | 5 | diff --git a/CDC/NatalBirthData.rData b/CDC/NatalBirthData.rData 6 | ... 7 | +++ b/CDC/prepBirthWeightData.R 8 | @@ -0,0 +1,83 @@ 9 | +data <- read.table("natal2010Sample.tsv.gz", 10 | + sep="\t", header = TRUE, stringsAsFactors = FALSE) 11 | + 12 | +# make a boolean from Y/N data 13 | +makevarYN = function(col) { 14 | + ifelse(col %in% c("", "U"), NA, col=="Y") 15 | +} 16 | ... 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/c11_Documentation_and_deployment/00375_informalexample_11.3_of_section_11.3.3.bash: -------------------------------------------------------------------------------- 1 | # informalexample 11.3 of section 11.3.3 2 | # (informalexample 11.3 of section 11.3.3) : Documentation and deployment : Using comments and version control for running documentation : Using version control to explore your project 3 | 4 | git log --name-status -- Buzz/buzz.pdf 5 | commit 96503d8ca35a61ed9765edff9800fc9302554a3b 6 | Author: John Mount 7 | Date: Wed Apr 17 16:41:48 2019 -0700 8 | 9 | fix links and re-build Buzz example 10 | 11 | D Buzz/buzz.pdf 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/c11_Documentation_and_deployment/00376_example_11.10_of_section_11.3.4.bash: -------------------------------------------------------------------------------- 1 | # example 11.10 of section 11.3.4 2 | # (example 11.10 of section 11.3.4) : Documentation and deployment : Using comments and version control for running documentation : Using version control to share work 3 | # Title: git remote 4 | 5 | $ git remote --verbose 6 | origin git@github.com:WinVector/PDSwR2.git (fetch) 7 | origin git@github.com:WinVector/PDSwR2.git (push) 8 | 9 | -------------------------------------------------------------------------------- /CodeExamples/c11_Documentation_and_deployment/00377_example_11.11_of_section_11.4.2.Rtxt: -------------------------------------------------------------------------------- 1 | # example 11.11 of section 11.4.2 2 | # (example 11.11 of section 11.4.2) : Documentation and deployment : Deploying models : Deploying models as HTTP services 3 | # Title: Buzz model as an R-based HTTP service 4 | 5 | library("randomForest") # Note: 1 6 | 7 | lst <- readRDS("thRS500.RDS") 8 | varslist <- lst$varslist 9 | fmodel <- lst$fmodel 10 | buzztest <- lst$buzztest 11 | rm(list = "lst") 12 | 13 | #* Score a data frame. 14 | #* @param d data frame to score 15 | #* @post /score_data 16 | function(d) { 17 | predict(fmodel, newdata = d, type = "prob") 18 | } 19 | 20 | # Note 1: 21 | # Attach the randomForest package, so we can run our randomForest model. 22 | 23 | -------------------------------------------------------------------------------- /CodeExamples/c11_Documentation_and_deployment/00378_informalexample_11.4_of_section_11.4.2.Rtxt: -------------------------------------------------------------------------------- 1 | # informalexample 11.4 of section 11.4.2 2 | # (informalexample 11.4 of section 11.4.2) : Documentation and deployment : Deploying models : Deploying models as HTTP services 3 | 4 | library("plumber") 5 | r <- plumb("plumber.R") 6 | r$run(port=8000) 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/c11_Documentation_and_deployment/00380_informalexample_11.5_of_section_11.4.3.SQL: -------------------------------------------------------------------------------- 1 | # informalexample 11.5 of section 11.4.3 2 | # (informalexample 11.5 of section 11.4.3) : Documentation and deployment : Deploying models : Deploying models by export 3 | 4 | CASE 5 | WHEN (`num.displays_06` >= 1517.5 AND 6 | `avg.auths.per.disc_00` < 2.25 AND 7 | `num.displays_06` < 2075.0) THEN ('0') 8 | WHEN (`num.displays_03` >= 1114.5 AND 9 | `atomic.containers_01` < 9.5 AND 10 | `avg.auths.per.disc_00` >= 2.25 AND 11 | `num.displays_06` < 2075.0) THEN ('0') 12 | WHEN ... 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/x0A_Starting_with_R_and_other_tools/00382_informalexample_A.2_of_section_A.1.5.txt: -------------------------------------------------------------------------------- 1 | # informalexample A.2 of section A.1.5 2 | # (informalexample A.2 of section A.1.5) : Starting with R and other tools : Installing the tools : R resources 3 | 4 | install.packages('ctv', repos = 'https://cran.r-project.org') 5 | library('ctv') 6 | # install.views('TimeSeries') # can take a LONG time 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/x0A_Starting_with_R_and_other_tools/00383_example_A.1_of_section_A.2.R: -------------------------------------------------------------------------------- 1 | # example A.1 of section A.2 2 | # (example A.1 of section A.2) : Starting with R and other tools : Starting with R 3 | # Title: Trying a few R commands 4 | 5 | 1 6 | ## [1] 1 7 | 1/2 8 | ## [1] 0.5 9 | 'Joe' 10 | ## [1] "Joe" 11 | "Joe" 12 | ## [1] "Joe" 13 | "Joe"=='Joe' 14 | ## [1] TRUE 15 | c() 16 | ## NULL 17 | is.null(c()) 18 | ## [1] TRUE 19 | is.null(5) 20 | ## [1] FALSE 21 | c(1) 22 | ## [1] 1 23 | c(1, 2) 24 | ## [1] 1 2 25 | c("Apple", 'Orange') 26 | ## [1] "Apple" "Orange" 27 | length(c(1, 2)) 28 | ## [1] 2 29 | vec <- c(1, 2) 30 | vec 31 | ## [1] 1 2 32 | 33 | -------------------------------------------------------------------------------- /CodeExamples/x0A_Starting_with_R_and_other_tools/00384_informalexample_A.3_of_section_A.2.1.R: -------------------------------------------------------------------------------- 1 | # informalexample A.3 of section A.2.1 2 | # (informalexample A.3 of section A.2.1) : Starting with R and other tools : Starting with R : Primary features of R 3 | 4 | x <- 2 5 | x < - 3 6 | ## [1] FALSE 7 | print(x) 8 | ## [1] 2 9 | 10 | -------------------------------------------------------------------------------- /CodeExamples/x0A_Starting_with_R_and_other_tools/00385_example_A.2_of_section_A.2.1.R: -------------------------------------------------------------------------------- 1 | # example A.2 of section A.2.1 2 | # (example A.2 of section A.2.1) : Starting with R and other tools : Starting with R : Primary features of R 3 | # Title: Binding values to function arguments 4 | 5 | divide <- function(numerator,denominator) { numerator/denominator } 6 | divide(1, 2) 7 | ## [1] 0.5 8 | 9 | divide(2, 1) 10 | ## [1] 2 11 | 12 | divide(denominator = 2, numerator = 1) 13 | ## [1] 0.5 14 | 15 | divide(denominator <- 2, numerator <- 1) # wrong symbol <-, yields 2, a wrong answer! 16 | ## [1] 2 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/x0A_Starting_with_R_and_other_tools/00386_example_A.3_of_section_A.2.1.R: -------------------------------------------------------------------------------- 1 | # example A.3 of section A.2.1 2 | # (example A.3 of section A.2.1) : Starting with R and other tools : Starting with R : Primary features of R 3 | # Title: Demonstrating side effects 4 | 5 | x<-1 6 | good <- function() { x <- 5} 7 | good() 8 | print(x) 9 | ## [1] 1 10 | 11 | bad <- function() { x <<- 5} 12 | bad() 13 | print(x) 14 | ## [1] 5 15 | 16 | -------------------------------------------------------------------------------- /CodeExamples/x0A_Starting_with_R_and_other_tools/00387_example_A.4_of_section_A.2.1.R: -------------------------------------------------------------------------------- 1 | # example A.4 of section A.2.1 2 | # (example A.4 of section A.2.1) : Starting with R and other tools : Starting with R : Primary features of R 3 | # Title: R truth tables for Boolean operators 4 | 5 | c(TRUE, TRUE, FALSE, FALSE) == c(TRUE, FALSE, TRUE, FALSE) 6 | ## [1] TRUE FALSE FALSE TRUE 7 | 8 | c(TRUE, TRUE, FALSE, FALSE) & c(TRUE, FALSE, TRUE, FALSE) 9 | ## [1] TRUE FALSE FALSE FALSE 10 | 11 | c(TRUE, TRUE, FALSE, FALSE) | c(TRUE, FALSE, TRUE, FALSE) 12 | ## [1] TRUE TRUE TRUE FALSE 13 | 14 | -------------------------------------------------------------------------------- /CodeExamples/x0A_Starting_with_R_and_other_tools/00388_example_A.5_of_section_A.2.1.R: -------------------------------------------------------------------------------- 1 | # example A.5 of section A.2.1 2 | # (example A.5 of section A.2.1) : Starting with R and other tools : Starting with R : Primary features of R 3 | # Title: Call-by-value effect 4 | 5 | a <- c(1, 2) 6 | b <- a 7 | 8 | print(b) 9 | 10 | a[[1]] <- 5 # Note: 1 11 | 12 | print(a) 13 | 14 | print(b) # Note: 2 15 | 16 | # Note 1: 17 | # “Alter a”. Actually this is 18 | # implemented by building an entirely new vector and 19 | # reassigning a to refer to this new vector. The old 20 | # value remains as it was, and any references 21 | # continue to see the old unaltered value. 22 | 23 | # Note 2: 24 | # Notice b’s value is not 25 | # changed. 26 | 27 | -------------------------------------------------------------------------------- /CodeExamples/x0A_Starting_with_R_and_other_tools/00389_informalexample_A.4_of_section_A.2.2.R: -------------------------------------------------------------------------------- 1 | # informalexample A.4 of section A.2.2 2 | # (informalexample A.4 of section A.2.2) : Starting with R and other tools : Starting with R : Primary R data types 3 | 4 | vec <- c(2, 3) 5 | vec[[2]] <- 5 6 | print(vec) 7 | ## [1] 2 5 8 | 9 | -------------------------------------------------------------------------------- /CodeExamples/x0A_Starting_with_R_and_other_tools/00390_example_A.6_of_section_A.2.2.R: -------------------------------------------------------------------------------- 1 | # example A.6 of section A.2.2 2 | # (example A.6 of section A.2.2) : Starting with R and other tools : Starting with R : Primary R data types 3 | # Title: Examples of R indexing operators 4 | 5 | x <- list('a' = 6, b = 'fred') 6 | names(x) 7 | ## [1] "a" "b" 8 | x$a 9 | ## [1] 6 10 | x$b 11 | ## [1] "fred" 12 | x[['a']] 13 | ## $a 14 | ## [1] 6 15 | 16 | x[c('a', 'a', 'b', 'b')] 17 | ## $a 18 | ## [1] 6 19 | ## 20 | ## $a 21 | ## [1] 6 22 | ## 23 | ## $b 24 | ## [1] "fred" 25 | ## 26 | ## $b 27 | ## [1] "fred" 28 | 29 | -------------------------------------------------------------------------------- /CodeExamples/x0A_Starting_with_R_and_other_tools/00391_example_A.7_of_section_A.2.2.R: -------------------------------------------------------------------------------- 1 | # example A.7 of section A.2.2 2 | # (example A.7 of section A.2.2) : Starting with R and other tools : Starting with R : Primary R data types 3 | # Title: R’s treatment of unexpected factor levels 4 | 5 | factor('red', levels = c('red', 'orange')) 6 | ## [1] red 7 | ## Levels: red orange 8 | 9 | factor('apple', levels = c('red', 'orange')) 10 | ## [1] 11 | ## Levels: red orange 12 | 13 | -------------------------------------------------------------------------------- /CodeExamples/x0B_Important_statistical_concepts/00397_example_B.1_of_section_B.1.1.R: -------------------------------------------------------------------------------- 1 | # example B.1 of section B.1.1 2 | # (example B.1 of section B.1.1) : Important statistical concepts : Distributions : Normal distribution 3 | # Title: Plotting the theoretical normal density 4 | 5 | library(ggplot2) 6 | 7 | x <- seq(from=-5, to=5, length.out=100) # the interval [-5 5] 8 | f <- dnorm(x) # normal with mean 0 and sd 1 9 | ggplot(data.frame(x=x,y=f), aes(x=x,y=y)) + geom_line() 10 | 11 | -------------------------------------------------------------------------------- /CodeExamples/x0B_Important_statistical_concepts/00398_example_B.2_of_section_B.1.1.R: -------------------------------------------------------------------------------- 1 | # example B.2 of section B.1.1 2 | # (example B.2 of section B.1.1) : Important statistical concepts : Distributions : Normal distribution 3 | # Title: Plotting an empirical normal density 4 | 5 | library(ggplot2) 6 | 7 | # draw 1000 points from a normal with mean 0, sd 1 8 | u <- rnorm(1000) 9 | 10 | # plot the distribution of points, 11 | # compared to normal curve as computed by dnorm() (dashed line) 12 | ggplot(data.frame(x=u), aes(x=x)) + geom_density() + 13 | geom_line(data=data.frame(x=x,y=f), aes(x=x,y=y), linetype=2) 14 | 15 | -------------------------------------------------------------------------------- /CodeExamples/x0B_Important_statistical_concepts/00399_example_B.3_of_section_B.1.1.R: -------------------------------------------------------------------------------- 1 | # example B.3 of section B.1.1 2 | # (example B.3 of section B.1.1) : Important statistical concepts : Distributions : Normal distribution 3 | # Title: Working with the normal cdf 4 | 5 | # --- estimate probabilities (areas) under the curve --- 6 | 7 | # 50% of the observations will be less than the mean 8 | pnorm(0) 9 | # [1] 0.5 10 | 11 | # about 2.3% of all observations are more than 2 standard 12 | # deviations below the mean 13 | pnorm(-2) 14 | # [1] 0.02275013 15 | 16 | # about 95.4% of all observations are within 2 standard deviations 17 | # from the mean 18 | pnorm(2) - pnorm(-2) 19 | # [1] 0.9544997 20 | 21 | -------------------------------------------------------------------------------- /CodeExamples/x0B_Important_statistical_concepts/00407_example_B.11_of_section_B.1.4.R: -------------------------------------------------------------------------------- 1 | # example B.11 of section B.1.4 2 | # (example B.11 of section B.1.4) : Important statistical concepts : Distributions : Binomial distribution 3 | # Title: Working with the binomial CDF 4 | 5 | # because this is a discrete probability distribution, 6 | # pbinom and qbinom are not exact inverses of each other 7 | 8 | # this direction works 9 | pbinom(45, nflips, 0.5) 10 | # [1] 0.1841008 11 | qbinom(0.1841008, nflips, 0.5) 12 | # [1] 45 13 | 14 | # this direction won't be exact 15 | qbinom(0.75, nflips, 0.5) 16 | # [1] 53 17 | pbinom(53, nflips, 0.5) 18 | # [1] 0.7579408 19 | 20 | -------------------------------------------------------------------------------- /CodeExamples/x0B_Important_statistical_concepts/00408_informalexample_B.1_of_section_B.2.1.math: -------------------------------------------------------------------------------- 1 | # informalexample B.1 of section B.2.1 2 | # (informalexample B.1 of section B.2.1) : Important statistical concepts : Statistical theory : Statistical philosophy 3 | 4 | E[(y[i] - f(x[i, ]))^2] = bias^2 + variance + irreducibleError 5 | 6 | -------------------------------------------------------------------------------- /CodeExamples/x0B_Important_statistical_concepts/00409_example_B.12_of_section_B.2.2.R: -------------------------------------------------------------------------------- 1 | # example B.12 of section B.2.2 2 | # (example B.12 of section B.2.2) : Important statistical concepts : Statistical theory : A/B tests 3 | # Title: Building simulated A/B test data 4 | 5 | set.seed(123515) 6 | d <- rbind( # Note: 1 7 | data.frame(group = 'A', converted = rbinom(100000, size = 1, p = 0.05)), # Note: 2 8 | data.frame(group = 'B', converted = rbinom(10000, size = 1, p = 0.055)) # Note: 3 9 | ) 10 | 11 | # Note 1: 12 | # Build a data frame to store simulated 13 | # examples. 14 | 15 | # Note 2: 16 | # Add 100,000 examples from the A group 17 | # simulating a conversion rate of 5%. 18 | 19 | # Note 3: 20 | # Add 10,000 examples from the B group 21 | # simulating a conversion rate of 5.5%. 22 | 23 | -------------------------------------------------------------------------------- /CodeExamples/x0B_Important_statistical_concepts/00410_example_B.13_of_section_B.2.2.R: -------------------------------------------------------------------------------- 1 | # example B.13 of section B.2.2 2 | # (example B.13 of section B.2.2) : Important statistical concepts : Statistical theory : A/B tests 3 | # Title: Summarizing the A/B test into a contingency table 4 | 5 | tab <- table(d) 6 | print(tab) 7 | ## converted 8 | ## group 0 1 9 | ## A 94979 5021 10 | ## B 9398 602 11 | 12 | -------------------------------------------------------------------------------- /CodeExamples/x0B_Important_statistical_concepts/00411_example_B.14_of_section_B.2.2.R: -------------------------------------------------------------------------------- 1 | # example B.14 of section B.2.2 2 | # (example B.14 of section B.2.2) : Important statistical concepts : Statistical theory : A/B tests 3 | # Title: Calculating the observed A and B rates 4 | 5 | aConversionRate <- tab['A','1']/sum(tab['A',]) 6 | print(aConversionRate) 7 | ## [1] 0.05021 8 | 9 | bConversionRate <- tab['B', '1'] / sum(tab['B', ]) 10 | print(bConversionRate) 11 | ## [1] 0.0602 12 | 13 | commonRate <- sum(tab[, '1']) / sum(tab) 14 | print(commonRate) 15 | ## [1] 0.05111818 16 | 17 | -------------------------------------------------------------------------------- /CodeExamples/x0B_Important_statistical_concepts/00412_example_B.15_of_section_B.2.2.R: -------------------------------------------------------------------------------- 1 | # example B.15 of section B.2.2 2 | # (example B.15 of section B.2.2) : Important statistical concepts : Statistical theory : A/B tests 3 | # Title: Calculating the significance of the observed difference in rates 4 | 5 | fisher.test(tab) 6 | 7 | ## Fisher's Exact Test for Count Data 8 | ## 9 | ## data: tab 10 | ## p-value = 2.469e-05 11 | ## alternative hypothesis: true odds ratio is not equal to 1 12 | ## 95 percent confidence interval: 13 | ## 1.108716 1.322464 14 | ## sample estimates: 15 | ## odds ratio 16 | ## 1.211706 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/x0B_Important_statistical_concepts/00414_informalexample_B.2_of_section_B.2.3.R: -------------------------------------------------------------------------------- 1 | # informalexample B.2 of section B.2.3 2 | # (informalexample B.2 of section B.2.3) : Important statistical concepts : Statistical theory : Power of tests 3 | 4 | library(pwr) 5 | pwr.p.test(h = ES.h(p1 = 0.045, p2 = 0.04), 6 | sig.level = 0.05, 7 | power = 0.8, 8 | alternative = "greater") 9 | 10 | # proportion power calculation for binomial distribution (arcsine transformation) 11 | # 12 | # h = 0.02479642 13 | # n = 10055.18 14 | # sig.level = 0.05 15 | # power = 0.8 16 | # alternative = greater 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/x0B_Important_statistical_concepts/00416_example_B.18_of_section_B.2.4.R: -------------------------------------------------------------------------------- 1 | # example B.18 of section B.2.4 2 | # (example B.18 of section B.2.4) : Important statistical concepts : Statistical theory : Specialized statistical tests 3 | # Title: Calculating the (non)significance of the observed correlation 4 | 5 | with(d, cor(EarnedIncome, CapitalGains, method = 'spearman')) 6 | 7 | # [1] 0.03083108 8 | 9 | (ctest <- with(d, cor.test(EarnedIncome, CapitalGains, method = 'spearman'))) 10 | 11 | # 12 | # Spearman's rank correlation rho 13 | # 14 | #data: EarnedIncome and CapitalGains 15 | #S = 161512, p-value = 0.7604 16 | #alternative hypothesis: true rho is not equal to 0 17 | #sample estimates: 18 | # rho 19 | #0.03083108 20 | 21 | -------------------------------------------------------------------------------- /CodeExamples/x0B_Important_statistical_concepts/00417_informalexample_B.3_of_section_B.2.4.R: -------------------------------------------------------------------------------- 1 | # informalexample B.3 of section B.2.4 2 | # (informalexample B.3 of section B.2.4) : Important statistical concepts : Statistical theory : Specialized statistical tests 3 | 4 | sigr::wrapCorTest(ctest) 5 | 6 | # [1] "Spearman's rank correlation rho: (r=0.03083, p=n.s.)." 7 | 8 | -------------------------------------------------------------------------------- /CodeExamples/x0B_Important_statistical_concepts/00418_example_B.19_of_section_B.3.1.R: -------------------------------------------------------------------------------- 1 | # example B.19 of section B.3.1 2 | # (example B.19 of section B.3.1) : Important statistical concepts : Examples of the statistical view of data : Sampling bias 3 | # Title: Misleading significance result from biased observations 4 | 5 | veryHighIncome <- subset(d, EarnedIncome+CapitalGains>=500000) 6 | print(with(veryHighIncome,cor.test(EarnedIncome,CapitalGains, 7 | method='spearman'))) 8 | # 9 | # Spearman's rank correlation rho 10 | # 11 | #data: EarnedIncome and CapitalGains 12 | #S = 1046, p-value < 2.2e-16 13 | #alternative hypothesis: true rho is not equal to 0 14 | #sample estimates: 15 | # rho 16 | #-0.8678571 17 | 18 | -------------------------------------------------------------------------------- /CodeExamples/xFront_Matter_Practical_Data_Science_with_R/00001_informalexample_Front_Matter.1_of_section_Front_Matter.5.6.bash: -------------------------------------------------------------------------------- 1 | # informalexample Front Matter.1 of section Front Matter.5.6 2 | # (informalexample Front Matter.1 of section Front Matter.5.6) : Practical Data Science with R : About this book : Working with this book 3 | 4 | git clone https://github.com/WinVector/PDSwR2.git 5 | 6 | -------------------------------------------------------------------------------- /Custdata/custdata.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Custdata/custdata.RDS -------------------------------------------------------------------------------- /Custdata/hhdata.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Custdata/hhdata.RDS -------------------------------------------------------------------------------- /Custdata/median_income.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Custdata/median_income.RDS -------------------------------------------------------------------------------- /IMDB/IMDBtest.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/IMDB/IMDBtest.RDS -------------------------------------------------------------------------------- /IMDB/IMDBtrain.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/IMDB/IMDBtrain.RDS -------------------------------------------------------------------------------- /IMDB/README.md: -------------------------------------------------------------------------------- 1 | 2 | Script and data to create example data files for section 6.3.3 of *Practical Data Science with R, Second Edition* 3 | 4 | Original data source:: 5 | 6 | Extraction script adapted from Listing 6.8 of Francois Chollet, J.J. Allaire, *Deep Learning with R*, Manning 2018 7 | -------------------------------------------------------------------------------- /KDD2009/AnalysisOfKDD2009.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/AnalysisOfKDD2009.pdf -------------------------------------------------------------------------------- /KDD2009/KDD2009.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009.Rdata -------------------------------------------------------------------------------- /KDD2009/KDD2009.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-1.png -------------------------------------------------------------------------------- /KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-2.png -------------------------------------------------------------------------------- /KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-3.png -------------------------------------------------------------------------------- /KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-4.png -------------------------------------------------------------------------------- /KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-5.png -------------------------------------------------------------------------------- /KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-6.png -------------------------------------------------------------------------------- /KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-1.png -------------------------------------------------------------------------------- /KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-2.png -------------------------------------------------------------------------------- /KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-3.png -------------------------------------------------------------------------------- /KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-4.png -------------------------------------------------------------------------------- /KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-5.png -------------------------------------------------------------------------------- /KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-6.png -------------------------------------------------------------------------------- /KDD2009/orange_small_train.data.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/orange_small_train.data.gz -------------------------------------------------------------------------------- /LIME_iris/README_limeiris.md: -------------------------------------------------------------------------------- 1 | 2 | Script for running iris example from section 6.3.2 of *Practical Data Science with R, Second Edition* 3 | -------------------------------------------------------------------------------- /NotionalData/README.md: -------------------------------------------------------------------------------- 1 | 2 | Notional data used to show how some code works. Not important where the data comes from. 3 | -------------------------------------------------------------------------------- /NotionalData/exampleData.rData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/NotionalData/exampleData.rData -------------------------------------------------------------------------------- /PDSwR2.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /PUMS/ACS2016_PUMS_README.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/ACS2016_PUMS_README.pdf -------------------------------------------------------------------------------- /PUMS/PUMS1_dplyr_files/figure-markdown_github/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMS1_dplyr_files/figure-markdown_github/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /PUMS/PUMS1_files/figure-markdown_github/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMS1_files/figure-markdown_github/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /PUMS/PUMS1_rquery_files/figure-markdown_github/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMS1_rquery_files/figure-markdown_github/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /PUMS/PUMS1_rquery_files/figure-markdown_github/unnamed-chunk-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMS1_rquery_files/figure-markdown_github/unnamed-chunk-1-2.png -------------------------------------------------------------------------------- /PUMS/PUMSDataDict16.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMSDataDict16.txt -------------------------------------------------------------------------------- /PUMS/PUMSsample.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMSsample.RDS -------------------------------------------------------------------------------- /PUMS/PUMSscatter1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMSscatter1.pdf -------------------------------------------------------------------------------- /PUMS/download/.gitignore: -------------------------------------------------------------------------------- 1 | csv_hus.zip 2 | csv_pus.zip 3 | dowload.bash 4 | ss13husa.csv 5 | ss13husb.csv 6 | ss13pusa.csv 7 | ss13pusb.csv 8 | ss16husa.csv.gz 9 | ss16husb.csv.gz 10 | ss16pusa.csv.gz 11 | ss16pusb.csv.gz 12 | -------------------------------------------------------------------------------- /PUMS/download/ACS2016_PUMS_README.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/download/ACS2016_PUMS_README.pdf -------------------------------------------------------------------------------- /PUMS/download/LoadPUMSAll_files/figure-markdown_github/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/download/LoadPUMSAll_files/figure-markdown_github/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /PUMS/download/LoadPUMSAll_files/figure-markdown_github/unnamed-chunk-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/download/LoadPUMSAll_files/figure-markdown_github/unnamed-chunk-1-2.png -------------------------------------------------------------------------------- /PUMS/download/PUMSDataDict16.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/download/PUMSDataDict16.txt -------------------------------------------------------------------------------- /PUMS/download/README.txt: -------------------------------------------------------------------------------- 1 | 2 | Prepare Census Data sample for use. 3 | 4 | * ReadDataDict.Rmd : read the census data dictionary into machine readable for, write as: ../data_dict.csv 5 | * LoadPUMSAll.Rmd : load all census data into databse. 6 | * LoadPUMS.Rmd : extract a weighted person based sample with matching households, write ../ss16pus.RDS and ../ss16hus.RDS . 7 | * LoadPUMS_h.Rmd : extract a weighted person based sample with matching households, write ../ss16hus_h.RDS and ../ss16pus_h.RDS. 8 | -------------------------------------------------------------------------------- /PUMS/download/data_dict.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/download/data_dict.RDS -------------------------------------------------------------------------------- /PUMS/download/download.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | -------------------------------------------------------------------------------- /PUMS/dpus_std_employee.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/dpus_std_employee.RDS -------------------------------------------------------------------------------- /PUMS/incomedata.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/incomedata.rds -------------------------------------------------------------------------------- /PUMS/psub.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/psub.RDS -------------------------------------------------------------------------------- /PUMS/ss16hus.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/ss16hus.RDS -------------------------------------------------------------------------------- /PUMS/ss16hus_h.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/ss16hus_h.RDS -------------------------------------------------------------------------------- /PUMS/ss16pus.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/ss16pus.RDS -------------------------------------------------------------------------------- /PUMS/ss16pus_h.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/ss16pus_h.RDS -------------------------------------------------------------------------------- /PseudoLog10/.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /PseudoLog10/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: PseudoLog10 2 | Type: Package 3 | Title: What the Package Does (Title Case) 4 | Version: 0.1.0 5 | Author: Who wrote it 6 | Maintainer: The package maintainer 7 | Description: More about what it does (maybe more than one line) 8 | Use four spaces when indenting paragraphs within the Description. 9 | License: What license is it under? 10 | Encoding: UTF-8 11 | LazyData: true 12 | RoxygenNote: 6.1.1 13 | -------------------------------------------------------------------------------- /PseudoLog10/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(pseudoLog10) 4 | -------------------------------------------------------------------------------- /PseudoLog10/PseudoLog10.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /PseudoLog10/PseudoLog10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PseudoLog10/PseudoLog10.pdf -------------------------------------------------------------------------------- /RenderedExamples/.gitignore: -------------------------------------------------------------------------------- 1 | dpus_std_employee.RDS 2 | -------------------------------------------------------------------------------- /RenderedExamples/figure/00031_example_2.11_of_section_2.3.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00031_example_2.11_of_section_2.3.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00037_example_3.6_of_section_3.2.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00037_example_3.6_of_section_3.2.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00038_example_3.7_of_section_3.2.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00038_example_3.7_of_section_3.2.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00039_example_3.8_of_section_3.2.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00039_example_3.8_of_section_3.2.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00041_informalexample_3.2_of_section_3.2.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00041_informalexample_3.2_of_section_3.2.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00042_example_3.9_of_section_3.2.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00042_example_3.9_of_section_3.2.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00043_example_3.10_of_section_3.2.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00043_example_3.10_of_section_3.2.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00044_example_3.11_of_section_3.2.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00044_example_3.11_of_section_3.2.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00046_example_3.13_of_section_3.2.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00046_example_3.13_of_section_3.2.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00047_informalexample_3.3_of_section_3.2.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00047_informalexample_3.3_of_section_3.2.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00048_informalexample_3.4_of_section_3.2.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00048_informalexample_3.4_of_section_3.2.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00049_example_3.14_of_section_3.2.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00049_example_3.14_of_section_3.2.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-2.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-3.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-4.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00051_example_3.16_of_section_3.2.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00051_example_3.16_of_section_3.2.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00051_example_3.16_of_section_3.2.2.R-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00051_example_3.16_of_section_3.2.2.R-2.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00052_example_3.17_of_section_3.2.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00052_example_3.17_of_section_3.2.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00053_example_3.18_of_section_3.2.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00053_example_3.18_of_section_3.2.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00054_informalexample_3.5_of_section_3.2.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00054_informalexample_3.5_of_section_3.2.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00071_informalexample_5.2_of_section_5.1.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00071_informalexample_5.2_of_section_5.1.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00090_informalexample_5.21_of_section_5.2.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00090_informalexample_5.21_of_section_5.2.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00094_informalexample_5.25_of_section_5.2.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00094_informalexample_5.25_of_section_5.2.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00099_informalexample_5.30_of_section_5.3.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00099_informalexample_5.30_of_section_5.3.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00132_informalexample_5.63_of_section_5.5.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00132_informalexample_5.63_of_section_5.5.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00135_informalexample_5.66_of_section_5.5.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00135_informalexample_5.66_of_section_5.5.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00138_informalexample_5.69_of_section_5.5.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00138_informalexample_5.69_of_section_5.5.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00157_example_6.9_of_section_6.2.5.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00157_example_6.9_of_section_6.2.5.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00158_example_6.10_of_section_6.2.5.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00158_example_6.10_of_section_6.2.5.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00168_informalexample_6.8_of_section_6.3.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00168_informalexample_6.8_of_section_6.3.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00171_example_6.20_of_section_6.3.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00171_example_6.20_of_section_6.3.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00176_example_6.23_of_section_6.3.4.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00176_example_6.23_of_section_6.3.4.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00179_example_6.26_of_section_6.3.5.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00179_example_6.26_of_section_6.3.5.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00180_informalexample_6.13_of_section_6.3.5.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00180_informalexample_6.13_of_section_6.3.5.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00181_example_6.27_of_section_6.3.5.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00181_example_6.27_of_section_6.3.5.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00181_example_6.27_of_section_6.3.5.R-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00181_example_6.27_of_section_6.3.5.R-2.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00189_example_7.2_of_section_7.1.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00189_example_7.2_of_section_7.1.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00190_example_7.3_of_section_7.1.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00190_example_7.3_of_section_7.1.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00212_example_7.12_of_section_7.2.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00212_example_7.12_of_section_7.2.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00213_example_7.13_of_section_7.2.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00213_example_7.13_of_section_7.2.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00234_example_7.24_of_section_7.3.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00234_example_7.24_of_section_7.3.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00241_example_7.27_of_section_7.3.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00241_example_7.27_of_section_7.3.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00247_example_7.32_of_section_7.3.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00247_example_7.32_of_section_7.3.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00271_informalexample_8.16_of_section_8.4.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00271_informalexample_8.16_of_section_8.4.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00271_informalexample_8.16_of_section_8.4.2.R-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00271_informalexample_8.16_of_section_8.4.2.R-2.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00291_example_9.3_of_section_9.1.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00291_example_9.3_of_section_9.1.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00294_example_9.5_of_section_9.1.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00294_example_9.5_of_section_9.1.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00297_example_9.8_of_section_9.1.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00297_example_9.8_of_section_9.1.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00299_example_9.9_of_section_9.1.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00299_example_9.9_of_section_9.1.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00302_example_9.10_of_section_9.1.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00302_example_9.10_of_section_9.1.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00314_example_9.20_of_section_9.2.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00314_example_9.20_of_section_9.2.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00324_example_10.1_of_section_10.1.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00324_example_10.1_of_section_10.1.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00331_example_10.4_of_section_10.1.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00331_example_10.4_of_section_10.1.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00334_example_10.7_of_section_10.1.4.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00334_example_10.7_of_section_10.1.4.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00346_example_10.13_of_section_10.2.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00346_example_10.13_of_section_10.2.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00347_example_10.14_of_section_10.2.2.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00347_example_10.14_of_section_10.2.2.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00349_example_10.16_of_section_10.2.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00349_example_10.16_of_section_10.2.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00351_example_10.18_of_section_10.2.4.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00351_example_10.18_of_section_10.2.4.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00355_example_10.22_of_section_10.3.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00355_example_10.22_of_section_10.3.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00356_example_10.23_of_section_10.3.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00356_example_10.23_of_section_10.3.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00357_example_10.24_of_section_10.3.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00357_example_10.24_of_section_10.3.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00397_example_B.1_of_section_B.1.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00397_example_B.1_of_section_B.1.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00398_example_B.2_of_section_B.1.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00398_example_B.2_of_section_B.1.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00400_example_B.4_of_section_B.1.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00400_example_B.4_of_section_B.1.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00401_example_B.5_of_section_B.1.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00401_example_B.5_of_section_B.1.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00402_example_B.6_of_section_B.1.3.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00402_example_B.6_of_section_B.1.3.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00403_example_B.7_of_section_B.1.4.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00403_example_B.7_of_section_B.1.4.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00404_example_B.8_of_section_B.1.4.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00404_example_B.8_of_section_B.1.4.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00419_example_B.20_of_section_B.3.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00419_example_B.20_of_section_B.3.1.R-1.png -------------------------------------------------------------------------------- /RenderedExamples/figure/00419_example_B.20_of_section_B.3.1.R-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00419_example_B.20_of_section_B.3.1.R-2.png -------------------------------------------------------------------------------- /SQLExample/HotelRelation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/SQLExample/HotelRelation.pdf -------------------------------------------------------------------------------- /SQLExample/Workbook1.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/SQLExample/Workbook1.xlsx -------------------------------------------------------------------------------- /SQLExample/figure/allsteps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/SQLExample/figure/allsteps.png -------------------------------------------------------------------------------- /SQLExample/h2-1.3.170.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/SQLExample/h2-1.3.170.jar -------------------------------------------------------------------------------- /SQLExample/h2demodb_h2.h2.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/SQLExample/h2demodb_h2.h2.db -------------------------------------------------------------------------------- /Spirals/Spirals.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /Spirals/c10_SVM_files/figure-markdown_github/00433_example_10.22_of_section_10.3.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/00433_example_10.22_of_section_10.3.1.R-1.png -------------------------------------------------------------------------------- /Spirals/c10_SVM_files/figure-markdown_github/00434_example_10.23_of_section_10.3.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/00434_example_10.23_of_section_10.3.1.R-1.png -------------------------------------------------------------------------------- /Spirals/c10_SVM_files/figure-markdown_github/00435_example_10.24_of_section_10.3.1.R-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/00435_example_10.24_of_section_10.3.1.R-1.png -------------------------------------------------------------------------------- /Spirals/c10_SVM_files/figure-markdown_github/large_mu-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/large_mu-1.png -------------------------------------------------------------------------------- /Spirals/c10_SVM_files/figure-markdown_github/large_nu-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/large_nu-1.png -------------------------------------------------------------------------------- /Spirals/c10_SVM_files/figure-markdown_github/small_mu-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/small_mu-1.png -------------------------------------------------------------------------------- /Spirals/c10_SVM_files/figure-markdown_github/small_nu-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/small_nu-1.png -------------------------------------------------------------------------------- /Spirals/c10_SVM_files/figure-markdown_github/xgboost-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/xgboost-1.png -------------------------------------------------------------------------------- /Starting_with_R_and_Other_Tools.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Starting_with_R_and_Other_Tools.pdf -------------------------------------------------------------------------------- /Statlog/Chapter_1_Example_files/figure-markdown_github/present_model-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Statlog/Chapter_1_Example_files/figure-markdown_github/present_model-1.png -------------------------------------------------------------------------------- /Statlog/GCDData.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Statlog/GCDData.RData -------------------------------------------------------------------------------- /Statlog/Statlog.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /Statlog/creditdata.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Statlog/creditdata.RDS -------------------------------------------------------------------------------- /Statlog/loan_model_example.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Statlog/loan_model_example.RData -------------------------------------------------------------------------------- /auto_mpg/UCI_Auto_MPG.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/auto_mpg/UCI_Auto_MPG.pdf -------------------------------------------------------------------------------- /auto_mpg/auto_mpg.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/auto_mpg/auto_mpg.RDS -------------------------------------------------------------------------------- /bioavailability/Caco-2 Permeability Assay.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/Caco-2 Permeability Assay.pdf -------------------------------------------------------------------------------- /bioavailability/Figure4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/Figure4.gif -------------------------------------------------------------------------------- /bioavailability/WebPlotDigitizer.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/WebPlotDigitizer.pdf -------------------------------------------------------------------------------- /bioavailability/figure/graph1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/figure/graph1.png -------------------------------------------------------------------------------- /bioavailability/figure/graphT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/figure/graphT.png -------------------------------------------------------------------------------- /bioavailability/figure/model1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/figure/model1.png -------------------------------------------------------------------------------- /bioavailability/figure/synth1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/figure/synth1.png -------------------------------------------------------------------------------- /bioavailability/figure/synthP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/figure/synthP.png -------------------------------------------------------------------------------- /bioavailability/synth.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/synth.RData -------------------------------------------------------------------------------- /cricketchirps/README.txt: -------------------------------------------------------------------------------- 1 | Cricket Chirps Vs. Temperature 2 | 3 | In the following data 4 | chirp_rate = chirps/sec for the striped ground cricket 5 | temperatureF = temperature in degrees Fahrenheit 6 | Reference: The Song of Insects by Dr.G.W. Pierce, Harvard College Press 7 | 8 | Data found (typed in) here: 9 | https://mathbits.com/MathBits/TISection/Statistics2/linearREAL.htm 10 | -------------------------------------------------------------------------------- /cricketchirps/crickets.csv: -------------------------------------------------------------------------------- 1 | chirp_rate,temperatureF 20,88.59999847 16,71.59999847 19.79999924,93.30000305 18.39999962,84.30000305 17.10000038,80.59999847 15.5,75.19999695 14.69999981,69.69999695 17.10000038,82 15.39999962,69.40000153 16.20000076,83.30000305 15,79.59999847 17.20000076,82.59999847 16,80.59999847 17,83.5 14.39999962,76.30000305 --------------------------------------------------------------------------------