├── .gitignore
├── BMI
├── BMI.Rmd
├── BMI.Rproj
├── BMI.md
├── BMI_files
│ └── figure-markdown_github
│ │ ├── unnamed-chunk-2-1.png
│ │ └── unnamed-chunk-2-2.png
└── bmi_data.RDS
├── BestOffers
├── BestOffers.Rmd
├── BestOffers.Rproj
├── BestOffers.md
└── MonetDBLite_diagram.svg
├── Bookdata
├── README.md
├── bookdata.tsv.gz
├── bxBooks.RData
├── create_bookdata.R
└── read_bookcrossing.R
├── Buzz
├── .gitignore
├── Buzz.Rproj
├── BuzzDataSetDoc.pdf
├── Buzz_score_example.Rmd
├── Buzz_score_example.html
├── Buzz_score_example.md
├── Buzz_score_example_files
│ └── figure-markdown_github
│ │ ├── unnamed-chunk-2-1.png
│ │ └── unnamed-chunk-3-1.png
├── PeerPresentation.pdf
├── PeerPresentation_withNotes.pdf
├── ProjectSponsorPresentation.pdf
├── ProjectSponsorPresentation_withNotes.pdf
├── RCurl_client_example.Rmd
├── RCurl_client_example.md
├── README.md
├── TomsHardware-Absolute-Sigma-500.data.txt
├── TomsHardware-Absolute-Sigma-500.names.txt
├── TomsHardware-Relative-Sigma-500.data.txt
├── TomsHardware-Relative-Sigma-500.names.txt
├── UserPresentation.pdf
├── UserPresentation_withNotes.pdf
├── buzz_sample.csv
├── buzzapp
│ ├── buzzapp.Rproj
│ ├── buzzutils.R
│ ├── server.R
│ └── ui.R
├── buzzm.Rmd
├── buzzm.html
├── buzzm.md
├── buzzm_files
│ └── figure-markdown_github
│ │ ├── model-1.png
│ │ ├── model-2.png
│ │ ├── plottest-1.png
│ │ └── plottrain-1.png
├── figure
│ ├── unnamed-chunk-2-1.png
│ └── unnamed-chunk-3-1.png
├── model_export.Rmd
├── model_export.md
├── model_export_files
│ └── figure-markdown_github
│ │ └── unnamed-chunk-4-1.png
├── plumber.R
├── rf_tree_1_plot.pdf
├── rfmodel.tsv
└── thRS500.RDS
├── CDC
├── NatalBirthData.rData
├── NatalRiskData.rData
├── PrepNatalRiskData.R
├── README.md
├── UserGuide2010.pdf
├── gamSplinePlots.R
├── loadExample
│ ├── README.md
│ ├── SQLScrewdriver.jar
│ ├── dbDef.xml
│ ├── fieldRanges.tsv
│ └── h2-1.3.170.jar
├── natal2010Sample.tsv.gz
└── prepBirthWeightData.R
├── CodeExamples.zip
├── CodeExamples
├── README.txt
├── c01_The_data_science_process
│ └── 00002_example_1.1_of_section_1.2.4.R
├── c02_Starting_with_R_and_data
│ ├── 00003_informalexample_2.1_of_section_2.1.2.R
│ ├── 00004_informalexample_2.2_of_section_2.1.2.R
│ ├── 00005_informalexample_2.3_of_section_2.1.2.R
│ ├── 00006_informalexample_2.4_of_section_2.1.2.R
│ ├── 00007_informalexample_2.5_of_section_2.1.2.R
│ ├── 00008_informalexample_2.6_of_section_2.1.2.R
│ ├── 00009_informalexample_2.7_of_section_2.1.2.R
│ ├── 00010_informalexample_2.8_of_section_2.1.2.R
│ ├── 00011_informalexample_2.9_of_section_2.1.2.R
│ ├── 00012_informalexample_2.10_of_section_2.1.2.R
│ ├── 00013_informalexample_2.11_of_section_2.1.2.R
│ ├── 00014_informalexample_2.12_of_section_2.2.1.txt
│ ├── 00015_example_2.1_of_section_2.2.1.R
│ ├── 00016_example_2.2_of_section_2.2.1.R
│ ├── 00017_informalexample_2.13_of_section_2.2.2.txt
│ ├── 00018_example_2.3_of_section_2.2.2.R
│ ├── 00019_example_2.4_of_section_2.2.2.R
│ ├── 00020_informalexample_2.14_of_section_2.2.2.txt
│ ├── 00021_example_2.5_of_section_2.2.2.R
│ ├── 00022_example_2.6_of_section_2.2.2.R
│ ├── 00023_example_2.7_of_section_2.3.1.txt
│ ├── 00024_example_2.8_of_section_2.3.1.R
│ ├── 00025_example_2.9_of_section_2.3.1.R
│ ├── 00026_example_2.10_of_section_2.3.1.R
│ ├── 00027_informalexample_2.15_of_section_2.3.1.R
│ ├── 00028_informalexample_2.16_of_section_2.3.1.R
│ ├── 00029_informalexample_2.17_of_section_2.3.1.R
│ ├── 00030_informalexample_2.18_of_section_2.3.1.R
│ └── 00031_example_2.11_of_section_2.3.1.R
├── c03_Exploring_data
│ ├── 00032_example_3.1_of_section_3.1.R
│ ├── 00033_example_3.2_of_section_3.1.1.R
│ ├── 00034_example_3.3_of_section_3.1.1.R
│ ├── 00035_example_3.4_of_section_3.1.1.R
│ ├── 00036_example_3.5_of_section_3.1.1.R
│ ├── 00037_example_3.6_of_section_3.2.1.R
│ ├── 00038_example_3.7_of_section_3.2.1.R
│ ├── 00039_example_3.8_of_section_3.2.1.R
│ ├── 00040_informalexample_3.1_of_section_3.2.1.txt
│ ├── 00041_informalexample_3.2_of_section_3.2.1.R
│ ├── 00042_example_3.9_of_section_3.2.1.R
│ ├── 00043_example_3.10_of_section_3.2.1.R
│ ├── 00044_example_3.11_of_section_3.2.2.R
│ ├── 00045_example_3.12_of_section_3.2.2.R
│ ├── 00046_example_3.13_of_section_3.2.2.R
│ ├── 00047_informalexample_3.3_of_section_3.2.2.R
│ ├── 00048_informalexample_3.4_of_section_3.2.2.R
│ ├── 00049_example_3.14_of_section_3.2.2.R
│ ├── 00050_example_3.15_of_section_3.2.2.R
│ ├── 00051_example_3.16_of_section_3.2.2.R
│ ├── 00052_example_3.17_of_section_3.2.2.R
│ ├── 00053_example_3.18_of_section_3.2.2.R
│ └── 00054_informalexample_3.5_of_section_3.2.2.R
├── c04_Managing_data
│ ├── 00055_example_4.1_of_section_4.1.1.R
│ ├── 00056_example_4.2_of_section_4.1.1.R
│ ├── 00057_example_4.3_of_section_4.1.2.R
│ ├── 00058_informalexample_4.1_of_section_4.1.3.R
│ ├── 00059_example_4.4_of_section_4.1.3.R
│ ├── 00060_example_4.5_of_section_4.1.3.R
│ ├── 00061_example_4.6_of_section_4.1.3.R
│ ├── 00062_example_4.7_of_section_4.2.R
│ ├── 00063_example_4.8_of_section_4.2.1.R
│ ├── 00064_example_4.9_of_section_4.2.2.R
│ ├── 00065_example_4.10_of_section_4.2.2.R
│ ├── 00066_example_4.11_of_section_4.2.2.R
│ ├── 00067_informalexample_4.2_of_section_4.2.3.R
│ ├── 00068_example_4.12_of_section_4.3.2.R
│ └── 00069_example_4.13_of_section_4.3.3.R
├── c05_Data_Engineering_and_Data_Shaping
│ ├── 00070_informalexample_5.1_of_section_5.1.1.R
│ ├── 00071_informalexample_5.2_of_section_5.1.1.R
│ ├── 00072_informalexample_5.3_of_section_5.1.1.R
│ ├── 00073_informalexample_5.4_of_section_5.1.1.R
│ ├── 00074_informalexample_5.5_of_section_5.1.1.R
│ ├── 00075_informalexample_5.6_of_section_5.1.1.R
│ ├── 00076_informalexample_5.7_of_section_5.1.2.R
│ ├── 00077_informalexample_5.8_of_section_5.1.2.R
│ ├── 00078_informalexample_5.9_of_section_5.1.2.R
│ ├── 00079_informalexample_5.10_of_section_5.1.2.R
│ ├── 00080_informalexample_5.11_of_section_5.1.2.R
│ ├── 00081_informalexample_5.12_of_section_5.1.3.R
│ ├── 00082_informalexample_5.13_of_section_5.1.3.R
│ ├── 00083_informalexample_5.14_of_section_5.1.3.R
│ ├── 00084_informalexample_5.15_of_section_5.1.3.R
│ ├── 00085_informalexample_5.16_of_section_5.1.3.R
│ ├── 00086_informalexample_5.17_of_section_5.1.3.R
│ ├── 00087_informalexample_5.18_of_section_5.1.3.R
│ ├── 00088_informalexample_5.19_of_section_5.2.1.R
│ ├── 00089_informalexample_5.20_of_section_5.2.1.R
│ ├── 00090_informalexample_5.21_of_section_5.2.1.R
│ ├── 00091_informalexample_5.22_of_section_5.2.1.R
│ ├── 00092_informalexample_5.23_of_section_5.2.1.R
│ ├── 00093_informalexample_5.24_of_section_5.2.1.R
│ ├── 00094_informalexample_5.25_of_section_5.2.1.R
│ ├── 00095_informalexample_5.26_of_section_5.2.1.R
│ ├── 00096_informalexample_5.27_of_section_5.2.1.R
│ ├── 00097_informalexample_5.28_of_section_5.2.2.R
│ ├── 00098_informalexample_5.29_of_section_5.3.1.R
│ ├── 00099_informalexample_5.30_of_section_5.3.1.R
│ ├── 00100_informalexample_5.31_of_section_5.3.1.R
│ ├── 00101_informalexample_5.32_of_section_5.3.1.R
│ ├── 00102_informalexample_5.33_of_section_5.3.1.R
│ ├── 00103_informalexample_5.34_of_section_5.3.1.R
│ ├── 00104_informalexample_5.35_of_section_5.3.1.R
│ ├── 00105_informalexample_5.36_of_section_5.4.1.R
│ ├── 00106_informalexample_5.37_of_section_5.4.1.R
│ ├── 00107_informalexample_5.38_of_section_5.4.1.R
│ ├── 00108_informalexample_5.39_of_section_5.4.1.R
│ ├── 00109_informalexample_5.40_of_section_5.4.1.R
│ ├── 00110_informalexample_5.41_of_section_5.4.1.R
│ ├── 00111_informalexample_5.42_of_section_5.4.1.R
│ ├── 00112_informalexample_5.43_of_section_5.4.1.R
│ ├── 00113_informalexample_5.44_of_section_5.4.1.R
│ ├── 00114_informalexample_5.45_of_section_5.4.1.R
│ ├── 00115_informalexample_5.46_of_section_5.4.1.R
│ ├── 00116_informalexample_5.47_of_section_5.4.1.R
│ ├── 00117_informalexample_5.48_of_section_5.4.2.R
│ ├── 00118_informalexample_5.49_of_section_5.4.2.R
│ ├── 00119_informalexample_5.50_of_section_5.4.2.R
│ ├── 00120_informalexample_5.51_of_section_5.4.2.R
│ ├── 00121_informalexample_5.52_of_section_5.4.2.R
│ ├── 00122_informalexample_5.53_of_section_5.4.2.R
│ ├── 00123_informalexample_5.54_of_section_5.4.2.R
│ ├── 00124_informalexample_5.55_of_section_5.4.2.R
│ ├── 00125_informalexample_5.56_of_section_5.4.2.R
│ ├── 00126_informalexample_5.57_of_section_5.4.2.R
│ ├── 00127_informalexample_5.58_of_section_5.4.2.R
│ ├── 00128_informalexample_5.59_of_section_5.4.2.R
│ ├── 00129_informalexample_5.60_of_section_5.4.2.R
│ ├── 00130_informalexample_5.61_of_section_5.4.2.R
│ ├── 00131_informalexample_5.62_of_section_5.5.1.R
│ ├── 00132_informalexample_5.63_of_section_5.5.1.R
│ ├── 00133_informalexample_5.64_of_section_5.5.1.R
│ ├── 00134_informalexample_5.65_of_section_5.5.1.R
│ ├── 00135_informalexample_5.66_of_section_5.5.1.R
│ ├── 00136_informalexample_5.67_of_section_5.5.2.R
│ ├── 00137_informalexample_5.68_of_section_5.5.2.R
│ ├── 00138_informalexample_5.69_of_section_5.5.2.R
│ ├── 00139_informalexample_5.70_of_section_5.5.2.R
│ ├── 00140_informalexample_5.71_of_section_5.5.2.R
│ └── 00141_informalexample_5.72_of_section_5.5.2.R
├── c06_Choosing_and_evaluating_models
│ ├── 00142_example_6.1_of_section_6.2.3.R
│ ├── 00143_example_6.2_of_section_6.2.3.R
│ ├── 00144_example_6.3_of_section_6.2.3.R
│ ├── 00145_informalexample_6.1_of_section_6.2.3.R
│ ├── 00146_example_6.4_of_section_6.2.3.R
│ ├── 00147_informalexample_6.2_of_section_6.2.3.R
│ ├── 00148_informalexample_6.3_of_section_6.2.3.R
│ ├── 00149_informalexample_6.4_of_section_6.2.3.R
│ ├── 00150_informalexample_6.5_of_section_6.2.3.R
│ ├── 00151_informalexample_6.6_of_section_6.2.3.R
│ ├── 00152_example_6.5_of_section_6.2.3.R
│ ├── 00153_informalexample_6.7_of_section_6.2.3.R
│ ├── 00154_example_6.6_of_section_6.2.4.R
│ ├── 00155_example_6.7_of_section_6.2.4.R
│ ├── 00156_example_6.8_of_section_6.2.4.R
│ ├── 00157_example_6.9_of_section_6.2.5.R
│ ├── 00158_example_6.10_of_section_6.2.5.R
│ ├── 00159_example_6.11_of_section_6.2.5.R
│ ├── 00160_example_6.12_of_section_6.2.5.R
│ ├── 00161_example_6.13_of_section_6.2.5.R
│ ├── 00162_example_6.14_of_section_6.3.2.R
│ ├── 00163_example_6.15_of_section_6.3.2.R
│ ├── 00164_example_6.16_of_section_6.3.2.R
│ ├── 00165_example_6.17_of_section_6.3.2.R
│ ├── 00166_example_6.18_of_section_6.3.2.R
│ ├── 00167_example_6.19_of_section_6.3.2.R
│ ├── 00168_informalexample_6.8_of_section_6.3.2.R
│ ├── 00169_informalexample_6.9_of_section_6.3.2.txt
│ ├── 00170_informalexample_6.10_of_section_6.3.2.txt
│ ├── 00171_example_6.20_of_section_6.3.2.R
│ ├── 00172_example_6.21_of_section_6.3.3.R
│ ├── 00173_informalexample_6.11_of_section_6.3.3.R
│ ├── 00174_informalexample_6.12_of_section_6.3.3.R
│ ├── 00175_example_6.22_of_section_6.3.4.R
│ ├── 00176_example_6.23_of_section_6.3.4.R
│ ├── 00177_example_6.24_of_section_6.3.5.R
│ ├── 00178_example_6.25_of_section_6.3.5.R
│ ├── 00179_example_6.26_of_section_6.3.5.R
│ ├── 00180_informalexample_6.13_of_section_6.3.5.R
│ ├── 00181_example_6.27_of_section_6.3.5.R
│ └── 00182_informalexample_6.14_of_section_6.3.5.R
├── c07_Linear_and_logistic_regression
│ ├── 00183_informalexample_7.1_of_section_7.1.1.math
│ ├── 00184_informalexample_7.2_of_section_7.1.1.math
│ ├── 00185_equation_7.1_of_section_7.1.1.math
│ ├── 00186_informalexample_7.3_of_section_7.1.1.math
│ ├── 00187_informalexample_7.4_of_section_7.1.1.math
│ ├── 00188_example_7.1_of_section_7.1.1.R
│ ├── 00189_example_7.2_of_section_7.1.3.R
│ ├── 00190_example_7.3_of_section_7.1.3.R
│ ├── 00191_example_7.4_of_section_7.1.3.R
│ ├── 00192_example_7.5_of_section_7.1.3.R
│ ├── 00193_informalexample_7.5_of_section_7.1.4.math
│ ├── 00194_informalexample_7.6_of_section_7.1.4.math
│ ├── 00195_informalexample_7.7_of_section_7.1.5.txt
│ ├── 00196_informalexample_7.8_of_section_7.1.5.txt
│ ├── 00197_example_7.6_of_section_7.1.5.R
│ ├── 00198_informalexample_7.9_of_section_7.1.5.txt
│ ├── 00199_informalexample_7.10_of_section_7.1.5.R
│ ├── 00200_informalexample_7.11_of_section_7.1.5.R
│ ├── 00201_informalexample_7.12_of_section_7.2.1.math
│ ├── 00202_informalexample_7.13_of_section_7.2.1.math
│ ├── 00203_informalexample_7.14_of_section_7.2.1.R
│ ├── 00204_informalexample_7.15_of_section_7.2.1.math
│ ├── 00205_informalexample_7.16_of_section_7.2.1.math
│ ├── 00206_equation_7.2_of_section_7.2.1.math
│ ├── 00207_example_7.7_of_section_7.2.1.R
│ ├── 00208_example_7.8_of_section_7.2.2.R
│ ├── 00209_example_7.9_of_section_7.2.2.R
│ ├── 00210_example_7.10_of_section_7.2.3.R
│ ├── 00211_example_7.11_of_section_7.2.3.R
│ ├── 00212_example_7.12_of_section_7.2.3.R
│ ├── 00213_example_7.13_of_section_7.2.3.R
│ ├── 00214_example_7.14_of_section_7.2.3.R
│ ├── 00215_example_7.15_of_section_7.2.4.R
│ ├── 00216_informalexample_7.17_of_section_7.2.4.math
│ ├── 00217_example_7.16_of_section_7.2.5.R
│ ├── 00218_informalexample_7.18_of_section_7.2.5.text
│ ├── 00219_informalexample_7.19_of_section_7.2.5.text
│ ├── 00220_informalexample_7.20_of_section_7.2.5.text
│ ├── 00221_informalexample_7.21_of_section_7.2.5.text
│ ├── 00222_example_7.17_of_section_7.2.5.R
│ ├── 00223_example_7.18_of_section_7.2.5.R
│ ├── 00224_informalexample_7.22_of_section_7.2.5.text
│ ├── 00225_informalexample_7.23_of_section_7.2.5.text
│ ├── 00226_example_7.19_of_section_7.2.5.R
│ ├── 00227_example_7.20_of_section_7.2.5.R
│ ├── 00228_informalexample_7.24_of_section_7.2.5.text
│ ├── 00229_informalexample_7.25_of_section_7.2.5.text
│ ├── 00230_example_7.21_of_section_7.3.1.R
│ ├── 00231_example_7.22_of_section_7.3.1.R
│ ├── 00232_informalexample_7.26_of_section_7.3.1.R
│ ├── 00233_example_7.23_of_section_7.3.1.R
│ ├── 00234_example_7.24_of_section_7.3.1.R
│ ├── 00235_example_7.25_of_section_7.3.1.R
│ ├── 00236_informalexample_7.27_of_section_7.3.2.text
│ ├── 00237_informalexample_7.28_of_section_7.3.2.text
│ ├── 00238_informalexample_7.29_of_section_7.3.2.text
│ ├── 00239_informalexample_7.30_of_section_7.3.2.text
│ ├── 00240_example_7.26_of_section_7.3.3.R
│ ├── 00241_example_7.27_of_section_7.3.3.R
│ ├── 00242_example_7.28_of_section_7.3.3.R
│ ├── 00243_informalexample_7.31_of_section_7.3.3.R
│ ├── 00244_example_7.29_of_section_7.3.3.R
│ ├── 00245_example_7.30_of_section_7.3.3.R
│ ├── 00246_example_7.31_of_section_7.3.3.R
│ ├── 00247_example_7.32_of_section_7.3.3.R
│ └── 00248_example_7.33_of_section_7.3.3.R
├── c08_Advanced_data_preparation
│ ├── 00249_example_8.1_of_section_8.2.1.R
│ ├── 00250_informalexample_8.1_of_section_8.2.1.R
│ ├── 00251_informalexample_8.2_of_section_8.2.1.R
│ ├── 00252_example_8.2_of_section_8.2.2.Rtxt
│ ├── 00253_example_8.3_of_section_8.2.2.R
│ ├── 00254_informalexample_8.3_of_section_8.2.2.R
│ ├── 00255_example_8.4_of_section_8.3.R
│ ├── 00256_example_8.5_of_section_8.3.R
│ ├── 00257_informalexample_8.4_of_section_8.3.1.R
│ ├── 00258_informalexample_8.5_of_section_8.3.1.R
│ ├── 00259_informalexample_8.6_of_section_8.3.1.R
│ ├── 00260_informalexample_8.7_of_section_8.3.1.R
│ ├── 00261_informalexample_8.8_of_section_8.3.1.R
│ ├── 00262_informalexample_8.9_of_section_8.3.2.R
│ ├── 00263_informalexample_8.10_of_section_8.3.2.R
│ ├── 00264_example_8.6_of_section_8.4.1.R
│ ├── 00265_informalexample_8.11_of_section_8.4.1.R
│ ├── 00266_informalexample_8.12_of_section_8.4.2.R
│ ├── 00267_example_8.7_of_section_8.4.2.R
│ ├── 00268_informalexample_8.13_of_section_8.4.2.R
│ ├── 00269_informalexample_8.14_of_section_8.4.2.R
│ ├── 00270_informalexample_8.15_of_section_8.4.2.R
│ ├── 00271_informalexample_8.16_of_section_8.4.2.R
│ ├── 00272_informalexample_8.17_of_section_8.5.R
│ ├── 00273_informalexample_8.18_of_section_8.5.R
│ ├── 00274_informalexample_8.19_of_section_8.5.R
│ ├── 00275_informalexample_8.20_of_section_8.6.2.R
│ ├── 00276_informalexample_8.21_of_section_8.6.2.R
│ ├── 00277_informalexample_8.22_of_section_8.6.3.R
│ ├── 00278_informalexample_8.23_of_section_8.6.4.R
│ ├── 00279_informalexample_8.24_of_section_8.6.4.R
│ ├── 00280_informalexample_8.25_of_section_8.6.5.R
│ ├── 00281_informalexample_8.26_of_section_8.6.5.R
│ ├── 00282_example_8.8_of_section_8.6.6.R
│ ├── 00283_example_8.9_of_section_8.6.6.R
│ └── 00284_example_8.10_of_section_8.6.6.R
├── c09_Unsupervised_methods
│ ├── 00285_informalexample_9.1_of_section_9.1.1.math
│ ├── 00286_informalexample_9.2_of_section_9.1.1.math
│ ├── 00287_informalexample_9.3_of_section_9.1.1.math
│ ├── 00288_informalexample_9.4_of_section_9.1.1.math
│ ├── 00289_example_9.1_of_section_9.1.2.R
│ ├── 00290_example_9.2_of_section_9.1.2.R
│ ├── 00291_example_9.3_of_section_9.1.3.R
│ ├── 00292_informalexample_9.5_of_section_9.1.3.Rtxt
│ ├── 00293_example_9.4_of_section_9.1.3.R
│ ├── 00294_example_9.5_of_section_9.1.3.R
│ ├── 00295_example_9.6_of_section_9.1.3.R
│ ├── 00296_example_9.7_of_section_9.1.3.R
│ ├── 00297_example_9.8_of_section_9.1.3.R
│ ├── 00298_informalexample_9.6_of_section_9.1.3.math
│ ├── 00299_example_9.9_of_section_9.1.3.R
│ ├── 00300_informalexample_9.7_of_section_9.1.3.math
│ ├── 00301_informalexample_9.8_of_section_9.1.3.math
│ ├── 00302_example_9.10_of_section_9.1.3.R
│ ├── 00303_example_9.11_of_section_9.1.4.R
│ ├── 00304_example_9.12_of_section_9.1.4.R
│ ├── 00305_example_9.13_of_section_9.1.4.R
│ ├── 00306_example_9.14_of_section_9.1.5.R
│ ├── 00307_example_9.15_of_section_9.1.5.R
│ ├── 00308_example_9.16_of_section_9.1.5.R
│ ├── 00309_example_9.17_of_section_9.1.5.R
│ ├── 00310_informalexample_9.9_of_section_9.2.2.txt
│ ├── 00311_example_9.18_of_section_9.2.3.R
│ ├── 00312_example_9.19_of_section_9.2.3.R
│ ├── 00313_informalexample_9.10_of_section_9.2.3.R
│ ├── 00314_example_9.20_of_section_9.2.3.R
│ ├── 00315_example_9.21_of_section_9.2.3.R
│ ├── 00316_example_9.22_of_section_9.2.3.R
│ ├── 00317_informalexample_9.11_of_section_9.2.3.R
│ ├── 00318_example_9.23_of_section_9.2.3.R
│ ├── 00319_example_9.24_of_section_9.2.3.R
│ ├── 00320_example_9.25_of_section_9.2.3.R
│ ├── 00321_example_9.26_of_section_9.2.3.R
│ ├── 00322_example_9.27_of_section_9.2.3.R
│ └── 00323_example_9.28_of_section_9.2.3.R
├── c10_Exploring_advanced_methods
│ ├── 00324_example_10.1_of_section_10.1.1.R
│ ├── 00325_informalexample_10.1_of_section_10.1.1.R
│ ├── 00326_example_10.2_of_section_10.1.2.R
│ ├── 00327_example_10.3_of_section_10.1.3.R
│ ├── 00328_informalexample_10.2_of_section_10.1.3.R
│ ├── 00329_informalexample_10.3_of_section_10.1.3.R
│ ├── 00330_informalexample_10.4_of_section_10.1.3.R
│ ├── 00331_example_10.4_of_section_10.1.3.R
│ ├── 00332_example_10.5_of_section_10.1.3.R
│ ├── 00333_example_10.6_of_section_10.1.4.R
│ ├── 00334_example_10.7_of_section_10.1.4.R
│ ├── 00335_example_10.8_of_section_10.1.4.R
│ ├── 00336_informalexample_10.5_of_section_10.1.4.R
│ ├── 00337_informalexample_10.6_of_section_10.1.4.R
│ ├── 00338_informalexample_10.7_of_section_10.1.4.R
│ ├── 00339_informalexample_10.8_of_section_10.1.4.R
│ ├── 00340_example_10.9_of_section_10.1.4.R
│ ├── 00341_example_10.10_of_section_10.1.4.R
│ ├── 00342_example_10.11_of_section_10.1.4.R
│ ├── 00343_informalexample_10.9_of_section_10.2.1.math
│ ├── 00344_informalexample_10.10_of_section_10.2.1.math
│ ├── 00345_example_10.12_of_section_10.2.2.R
│ ├── 00346_example_10.13_of_section_10.2.2.R
│ ├── 00347_example_10.14_of_section_10.2.2.R
│ ├── 00348_example_10.15_of_section_10.2.2.R
│ ├── 00349_example_10.16_of_section_10.2.3.R
│ ├── 00350_example_10.17_of_section_10.2.4.R
│ ├── 00351_example_10.18_of_section_10.2.4.R
│ ├── 00352_example_10.19_of_section_10.2.4.R
│ ├── 00353_example_10.20_of_section_10.2.5.R
│ ├── 00354_example_10.21_of_section_10.2.5.R
│ ├── 00355_example_10.22_of_section_10.3.1.R
│ ├── 00356_example_10.23_of_section_10.3.1.R
│ ├── 00357_example_10.24_of_section_10.3.1.R
│ ├── 00358_informalexample_10.11_of_section_10.3.2.math
│ ├── 00359_informalexample_10.12_of_section_10.3.2.math
│ ├── 00360_informalexample_10.13_of_section_10.3.2.math
│ ├── 00361_example_10.25_of_section_10.3.3.R
│ ├── 00362_informalexample_10.14_of_section_10.3.3.math
│ └── 00363_informalexample_10.15_of_section_10.3.3.math
├── c11_Documentation_and_deployment
│ ├── 00364_example_11.1_of_section_11.2.1.Rmd
│ ├── 00365_informalexample_11.1_of_section_11.2.1.R
│ ├── 00366_example_11.2_of_section_11.2.3.Rmd
│ ├── 00367_example_11.3_of_section_11.2.3.md
│ ├── 00368_example_11.4_of_section_11.2.3.md
│ ├── 00369_example_11.5_of_section_11.3.1.R
│ ├── 00370_informalexample_11.2_of_section_11.3.2.bash
│ ├── 00371_example_11.6_of_section_11.3.2.bash
│ ├── 00372_example_11.7_of_section_11.3.2.bash
│ ├── 00373_example_11.8_of_section_11.3.3.bash
│ ├── 00374_example_11.9_of_section_11.3.3.bash
│ ├── 00375_informalexample_11.3_of_section_11.3.3.bash
│ ├── 00376_example_11.10_of_section_11.3.4.bash
│ ├── 00377_example_11.11_of_section_11.4.2.Rtxt
│ ├── 00378_informalexample_11.4_of_section_11.4.2.Rtxt
│ ├── 00379_example_11.12_of_section_11.4.2.Rmd
│ └── 00380_informalexample_11.5_of_section_11.4.3.SQL
├── x0A_Starting_with_R_and_other_tools
│ ├── 00381_informalexample_A.1_of_section_A.1.1.Rtxt
│ ├── 00382_informalexample_A.2_of_section_A.1.5.txt
│ ├── 00383_example_A.1_of_section_A.2.R
│ ├── 00384_informalexample_A.3_of_section_A.2.1.R
│ ├── 00385_example_A.2_of_section_A.2.1.R
│ ├── 00386_example_A.3_of_section_A.2.1.R
│ ├── 00387_example_A.4_of_section_A.2.1.R
│ ├── 00388_example_A.5_of_section_A.2.1.R
│ ├── 00389_informalexample_A.4_of_section_A.2.2.R
│ ├── 00390_example_A.6_of_section_A.2.2.R
│ ├── 00391_example_A.7_of_section_A.2.2.R
│ ├── 00392_example_A.8_of_section_A.2.2.R
│ ├── 00393_informalexample_A.5_of_section_A.3.1.R
│ ├── 00394_informalexample_A.6_of_section_A.3.1.R
│ ├── 00395_informalexample_A.7_of_section_A.3.1.R
│ └── 00396_informalexample_A.8_of_section_A.3.1.R
├── x0B_Important_statistical_concepts
│ ├── 00397_example_B.1_of_section_B.1.1.R
│ ├── 00398_example_B.2_of_section_B.1.1.R
│ ├── 00399_example_B.3_of_section_B.1.1.R
│ ├── 00400_example_B.4_of_section_B.1.1.R
│ ├── 00401_example_B.5_of_section_B.1.3.R
│ ├── 00402_example_B.6_of_section_B.1.3.R
│ ├── 00403_example_B.7_of_section_B.1.4.R
│ ├── 00404_example_B.8_of_section_B.1.4.R
│ ├── 00405_example_B.9_of_section_B.1.4.R
│ ├── 00406_example_B.10_of_section_B.1.4.R
│ ├── 00407_example_B.11_of_section_B.1.4.R
│ ├── 00408_informalexample_B.1_of_section_B.2.1.math
│ ├── 00409_example_B.12_of_section_B.2.2.R
│ ├── 00410_example_B.13_of_section_B.2.2.R
│ ├── 00411_example_B.14_of_section_B.2.2.R
│ ├── 00412_example_B.15_of_section_B.2.2.R
│ ├── 00413_example_B.16_of_section_B.2.2.R
│ ├── 00414_informalexample_B.2_of_section_B.2.3.R
│ ├── 00415_example_B.17_of_section_B.2.4.R
│ ├── 00416_example_B.18_of_section_B.2.4.R
│ ├── 00417_informalexample_B.3_of_section_B.2.4.R
│ ├── 00418_example_B.19_of_section_B.3.1.R
│ ├── 00419_example_B.20_of_section_B.3.1.R
│ ├── 00420_example_B.21_of_section_B.3.2.R
│ ├── 00421_example_B.22_of_section_B.3.2.R
│ ├── 00422_example_B.23_of_section_B.3.2.R
│ └── 00423_example_B.24_of_section_B.3.2.R
└── xFront_Matter_Practical_Data_Science_with_R
│ └── 00001_informalexample_Front_Matter.1_of_section_Front_Matter.5.6.bash
├── Custdata
├── README.txt
├── custdata.RDS
├── hhdata.RDS
└── median_income.RDS
├── IMDB
├── IMDBtest.RDS
├── IMDBtrain.RDS
├── README.md
├── getIMDB.R
└── lime_imdb_example.R
├── KDD2009
├── AnalysisOfKDD2009.pdf
├── KDD2009.Rdata
├── KDD2009.Rproj
├── KDD2009vtreat.Rmd
├── KDD2009vtreat.md
├── KDD2009vtreat_files
│ ├── figure-gfm
│ │ ├── kddplot-1.png
│ │ ├── kddplot-2.png
│ │ ├── kddplot-3.png
│ │ ├── kddplot-4.png
│ │ ├── kddplot-5.png
│ │ └── kddplot-6.png
│ └── figure-markdown_github
│ │ ├── kddplot-1.png
│ │ ├── kddplot-2.png
│ │ ├── kddplot-3.png
│ │ ├── kddplot-4.png
│ │ ├── kddplot-5.png
│ │ └── kddplot-6.png
├── README.md
├── orange_small_train.data.gz
├── orange_small_train_appetency.labels.txt
├── orange_small_train_churn.labels.txt
└── orange_small_train_upselling.labels.txt
├── LICENSE.md
├── LIME_iris
├── README_limeiris.md
└── lime_iris_example.R
├── NotionalData
├── README.md
└── exampleData.rData
├── PDSwR2.Rproj
├── PDSwR2_errata.html
├── PDSwR2_errata.md
├── PUMS
├── ACS2016_PUMS_README.pdf
├── PUMS1.Rmd
├── PUMS1.md
├── PUMS1_dplyr.Rmd
├── PUMS1_dplyr.md
├── PUMS1_dplyr_files
│ └── figure-markdown_github
│ │ └── unnamed-chunk-1-1.png
├── PUMS1_files
│ └── figure-markdown_github
│ │ └── unnamed-chunk-1-1.png
├── PUMS1_rquery.Rmd
├── PUMS1_rquery.md
├── PUMS1_rquery_files
│ └── figure-markdown_github
│ │ ├── unnamed-chunk-1-1.png
│ │ └── unnamed-chunk-1-2.png
├── PUMSDataDict16.txt
├── PUMSsample.RDS
├── PUMSscatter1.pdf
├── README.md
├── data_dict.csv
├── download
│ ├── .gitignore
│ ├── ACS2016_PUMS_README.pdf
│ ├── LoadPUMS.Rmd
│ ├── LoadPUMS.md
│ ├── LoadPUMSAll.Rmd
│ ├── LoadPUMSAll.md
│ ├── LoadPUMSAll_files
│ │ └── figure-markdown_github
│ │ │ ├── unnamed-chunk-1-1.png
│ │ │ └── unnamed-chunk-1-2.png
│ ├── LoadPUMS_h.Rmd
│ ├── LoadPUMS_h.md
│ ├── PUMSDataDict16.txt
│ ├── README.txt
│ ├── ReadDataDict.Rmd
│ ├── ReadDataDict.md
│ ├── data_dict.RDS
│ └── download.Rproj
├── dpus_std_employee.RDS
├── incomedata.rds
├── makeSubSample.Rmd
├── psub.RDS
├── ss16hus.RDS
├── ss16hus_h.RDS
├── ss16pus.RDS
└── ss16pus_h.RDS
├── Protein
├── README.md
├── protein.txt
└── protein_README.txt
├── PseudoLog10
├── .Rbuildignore
├── DESCRIPTION
├── NAMESPACE
├── PseudoLog10.Rproj
├── PseudoLog10.pdf
├── R
│ └── pseudoLog10.R
└── man
│ └── pseudoLog10.Rd
├── README.md
├── RenderedExamples
├── .gitignore
├── c01_The_data_science_process.Rmd
├── c01_The_data_science_process.md
├── c02_Starting_with_R_and_data.Rmd
├── c02_Starting_with_R_and_data.md
├── c03_Exploring_data.Rmd
├── c03_Exploring_data.md
├── c04_Managing_data.Rmd
├── c04_Managing_data.md
├── c05_Data_Engineering_and_Data_Shaping.Rmd
├── c05_Data_Engineering_and_Data_Shaping.md
├── c06_Choosing_and_evaluating_models.Rmd
├── c06_Choosing_and_evaluating_models.md
├── c07_Linear_and_logistic_regression.Rmd
├── c07_Linear_and_logistic_regression.md
├── c08_Advanced_Data_Preparation.Rmd
├── c08_Advanced_Data_Preparation.md
├── c09_Unsupervised_methods.Rmd
├── c09_Unsupervised_methods.md
├── c10_Exploring_advanced_methods.Rmd
├── c10_Exploring_advanced_methods.md
├── c11_Documentation_and_deployment.Rmd
├── c11_Documentation_and_deployment.md
├── figure
│ ├── 00031_example_2.11_of_section_2.3.1.R-1.png
│ ├── 00037_example_3.6_of_section_3.2.1.R-1.png
│ ├── 00038_example_3.7_of_section_3.2.1.R-1.png
│ ├── 00039_example_3.8_of_section_3.2.1.R-1.png
│ ├── 00041_informalexample_3.2_of_section_3.2.1.R-1.png
│ ├── 00042_example_3.9_of_section_3.2.1.R-1.png
│ ├── 00043_example_3.10_of_section_3.2.1.R-1.png
│ ├── 00044_example_3.11_of_section_3.2.2.R-1.png
│ ├── 00046_example_3.13_of_section_3.2.2.R-1.png
│ ├── 00047_informalexample_3.3_of_section_3.2.2.R-1.png
│ ├── 00048_informalexample_3.4_of_section_3.2.2.R-1.png
│ ├── 00049_example_3.14_of_section_3.2.2.R-1.png
│ ├── 00050_example_3.15_of_section_3.2.2.R-1.png
│ ├── 00050_example_3.15_of_section_3.2.2.R-2.png
│ ├── 00050_example_3.15_of_section_3.2.2.R-3.png
│ ├── 00050_example_3.15_of_section_3.2.2.R-4.png
│ ├── 00051_example_3.16_of_section_3.2.2.R-1.png
│ ├── 00051_example_3.16_of_section_3.2.2.R-2.png
│ ├── 00052_example_3.17_of_section_3.2.2.R-1.png
│ ├── 00053_example_3.18_of_section_3.2.2.R-1.png
│ ├── 00054_informalexample_3.5_of_section_3.2.2.R-1.png
│ ├── 00071_informalexample_5.2_of_section_5.1.1.R-1.png
│ ├── 00090_informalexample_5.21_of_section_5.2.1.R-1.png
│ ├── 00094_informalexample_5.25_of_section_5.2.1.R-1.png
│ ├── 00099_informalexample_5.30_of_section_5.3.1.R-1.png
│ ├── 00132_informalexample_5.63_of_section_5.5.1.R-1.png
│ ├── 00135_informalexample_5.66_of_section_5.5.1.R-1.png
│ ├── 00138_informalexample_5.69_of_section_5.5.2.R-1.png
│ ├── 00157_example_6.9_of_section_6.2.5.R-1.png
│ ├── 00158_example_6.10_of_section_6.2.5.R-1.png
│ ├── 00168_informalexample_6.8_of_section_6.3.2.R-1.png
│ ├── 00171_example_6.20_of_section_6.3.2.R-1.png
│ ├── 00176_example_6.23_of_section_6.3.4.R-1.png
│ ├── 00179_example_6.26_of_section_6.3.5.R-1.png
│ ├── 00180_informalexample_6.13_of_section_6.3.5.R-1.png
│ ├── 00181_example_6.27_of_section_6.3.5.R-1.png
│ ├── 00181_example_6.27_of_section_6.3.5.R-2.png
│ ├── 00189_example_7.2_of_section_7.1.3.R-1.png
│ ├── 00190_example_7.3_of_section_7.1.3.R-1.png
│ ├── 00212_example_7.12_of_section_7.2.3.R-1.png
│ ├── 00213_example_7.13_of_section_7.2.3.R-1.png
│ ├── 00234_example_7.24_of_section_7.3.1.R-1.png
│ ├── 00241_example_7.27_of_section_7.3.3.R-1.png
│ ├── 00247_example_7.32_of_section_7.3.3.R-1.png
│ ├── 00271_informalexample_8.16_of_section_8.4.2.R-1.png
│ ├── 00271_informalexample_8.16_of_section_8.4.2.R-2.png
│ ├── 00291_example_9.3_of_section_9.1.3.R-1.png
│ ├── 00294_example_9.5_of_section_9.1.3.R-1.png
│ ├── 00297_example_9.8_of_section_9.1.3.R-1.png
│ ├── 00299_example_9.9_of_section_9.1.3.R-1.png
│ ├── 00302_example_9.10_of_section_9.1.3.R-1.png
│ ├── 00314_example_9.20_of_section_9.2.3.R-1.png
│ ├── 00324_example_10.1_of_section_10.1.1.R-1.png
│ ├── 00331_example_10.4_of_section_10.1.3.R-1.png
│ ├── 00334_example_10.7_of_section_10.1.4.R-1.png
│ ├── 00346_example_10.13_of_section_10.2.2.R-1.png
│ ├── 00347_example_10.14_of_section_10.2.2.R-1.png
│ ├── 00349_example_10.16_of_section_10.2.3.R-1.png
│ ├── 00351_example_10.18_of_section_10.2.4.R-1.png
│ ├── 00355_example_10.22_of_section_10.3.1.R-1.png
│ ├── 00356_example_10.23_of_section_10.3.1.R-1.png
│ ├── 00357_example_10.24_of_section_10.3.1.R-1.png
│ ├── 00397_example_B.1_of_section_B.1.1.R-1.png
│ ├── 00398_example_B.2_of_section_B.1.1.R-1.png
│ ├── 00400_example_B.4_of_section_B.1.1.R-1.png
│ ├── 00401_example_B.5_of_section_B.1.3.R-1.png
│ ├── 00402_example_B.6_of_section_B.1.3.R-1.png
│ ├── 00403_example_B.7_of_section_B.1.4.R-1.png
│ ├── 00404_example_B.8_of_section_B.1.4.R-1.png
│ ├── 00419_example_B.20_of_section_B.3.1.R-1.png
│ └── 00419_example_B.20_of_section_B.3.1.R-2.png
├── render_examples.bash
├── x0A_Starting_with_R_and_other_tools.Rmd
├── x0A_Starting_with_R_and_other_tools.md
├── x0B_Important_statistical_concepts.Rmd
└── x0B_Important_statistical_concepts.md
├── SQLExample
├── HotelRelation.pdf
├── README.Rmd
├── README.md
├── Workbook1.xlsx
├── figure
│ └── allsteps.png
├── h2-1.3.170.jar
├── h2demodb_h2.h2.db
└── h2demodb_h2.trace.db
├── Spambase
├── README.md
└── spamD.tsv
├── Spirals
├── Spirals.Rproj
├── c10_SVM.Rmd
├── c10_SVM.md
└── c10_SVM_files
│ └── figure-markdown_github
│ ├── 00433_example_10.22_of_section_10.3.1.R-1.png
│ ├── 00434_example_10.23_of_section_10.3.1.R-1.png
│ ├── 00435_example_10.24_of_section_10.3.1.R-1.png
│ ├── large_mu-1.png
│ ├── large_nu-1.png
│ ├── small_mu-1.png
│ ├── small_nu-1.png
│ └── xgboost-1.png
├── Starting_with_R_and_Other_Tools.pdf
├── Statlog
├── Chapter_1_Example.Rmd
├── Chapter_1_Example.md
├── Chapter_1_Example_files
│ └── figure-markdown_github
│ │ └── present_model-1.png
├── GCDData.RData
├── GCDSteps.Rmd
├── GCDSteps.ipynb
├── GCDSteps.md
├── README.md
├── Statlog.Rproj
├── creditdata.RDS
├── german.data
├── loan_model_example.RData
└── mapping.R
├── UCICar
├── README.md
└── car.data.csv
├── auto_mpg
├── Data_Set_Description.txt
├── README.Rmd
├── README.md
├── UCI_Auto_MPG.pdf
├── auto-mpg.data-original.txt
├── auto-mpg.data.txt
├── auto-mpg.names.txt
├── auto_mpg.RDS
├── vtreat_example.Rmd
└── vtreat_example.md
├── bioavailability
├── Caco-2 Permeability Assay.pdf
├── Figure4.gif
├── README.Rmd
├── README.md
├── WebPlotDigitizer.pdf
├── caco2.csv
├── figure
│ ├── graph1.png
│ ├── graphT.png
│ ├── model1.png
│ ├── synth1.png
│ └── synthP.png
├── synth.RData
└── synth.csv
├── cricketchirps
├── README.txt
└── crickets.csv
└── packages.R
/BMI/BMI.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------
/BMI/BMI_files/figure-markdown_github/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/BMI/BMI_files/figure-markdown_github/unnamed-chunk-2-1.png
--------------------------------------------------------------------------------
/BMI/BMI_files/figure-markdown_github/unnamed-chunk-2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/BMI/BMI_files/figure-markdown_github/unnamed-chunk-2-2.png
--------------------------------------------------------------------------------
/BMI/bmi_data.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/BMI/bmi_data.RDS
--------------------------------------------------------------------------------
/BestOffers/BestOffers.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------
/Bookdata/bookdata.tsv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Bookdata/bookdata.tsv.gz
--------------------------------------------------------------------------------
/Bookdata/bxBooks.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Bookdata/bxBooks.RData
--------------------------------------------------------------------------------
/Bookdata/read_bookcrossing.R:
--------------------------------------------------------------------------------
1 |
2 | # first: replace \" with '
3 | bxUsers <- read.table('BX-Users.csv',header=T,sep=';',comment.char='',stringsAsFactors=F)
4 | # first replace \" with blank
5 | bxBookRatings <- read.table('BX-Book-Ratings.csv',header=T,sep=';',comment.char='',stringsAsFactors=F)
6 | # first: replace \" with '
7 | bxBooks <- read.table('BX-Books.csv',header=T,sep=';',comment.char='',stringsAsFactors=F)
8 |
--------------------------------------------------------------------------------
/Buzz/.gitignore:
--------------------------------------------------------------------------------
1 | buzz.aux
2 | buzz.log
3 | buzz.out
4 | cache
5 | buzzm_cache
6 |
--------------------------------------------------------------------------------
/Buzz/Buzz.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------
/Buzz/BuzzDataSetDoc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/BuzzDataSetDoc.pdf
--------------------------------------------------------------------------------
/Buzz/Buzz_score_example.md:
--------------------------------------------------------------------------------
1 | Buzz scoring example
2 | ================
3 |
4 | Example scoring (making predictions with) the Buzz data set.
5 |
6 | First attach the `randomForest` package and load the model and test data.
7 |
8 | ``` r
9 | suppressPackageStartupMessages(library("randomForest"))
10 |
11 | lst <- readRDS("thRS500.RDS")
12 | varslist <- lst$varslist
13 | fmodel <- lst$fmodel
14 | buzztest <- lst$buzztest
15 | rm(list = "lst")
16 | ```
17 |
18 | Now show the quality of our model on held-out test data.
19 |
20 | ``` r
21 | buzztest$prediction <- predict(fmodel, newdata = buzztest, type = "prob")[, 2, drop = TRUE]
22 |
23 | WVPlots::ROCPlot(buzztest, "prediction",
24 | "buzz", 1,
25 | "ROC curve estimating quality of model predictions on held-out data")
26 | ```
27 |
28 | 
29 |
--------------------------------------------------------------------------------
/Buzz/Buzz_score_example_files/figure-markdown_github/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/Buzz_score_example_files/figure-markdown_github/unnamed-chunk-2-1.png
--------------------------------------------------------------------------------
/Buzz/Buzz_score_example_files/figure-markdown_github/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/Buzz_score_example_files/figure-markdown_github/unnamed-chunk-3-1.png
--------------------------------------------------------------------------------
/Buzz/PeerPresentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/PeerPresentation.pdf
--------------------------------------------------------------------------------
/Buzz/PeerPresentation_withNotes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/PeerPresentation_withNotes.pdf
--------------------------------------------------------------------------------
/Buzz/ProjectSponsorPresentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/ProjectSponsorPresentation.pdf
--------------------------------------------------------------------------------
/Buzz/ProjectSponsorPresentation_withNotes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/ProjectSponsorPresentation_withNotes.pdf
--------------------------------------------------------------------------------
/Buzz/UserPresentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/UserPresentation.pdf
--------------------------------------------------------------------------------
/Buzz/UserPresentation_withNotes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/UserPresentation_withNotes.pdf
--------------------------------------------------------------------------------
/Buzz/buzzapp/buzzapp.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------
/Buzz/buzzm_files/figure-markdown_github/model-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/buzzm_files/figure-markdown_github/model-1.png
--------------------------------------------------------------------------------
/Buzz/buzzm_files/figure-markdown_github/model-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/buzzm_files/figure-markdown_github/model-2.png
--------------------------------------------------------------------------------
/Buzz/buzzm_files/figure-markdown_github/plottest-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/buzzm_files/figure-markdown_github/plottest-1.png
--------------------------------------------------------------------------------
/Buzz/buzzm_files/figure-markdown_github/plottrain-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/buzzm_files/figure-markdown_github/plottrain-1.png
--------------------------------------------------------------------------------
/Buzz/figure/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/figure/unnamed-chunk-2-1.png
--------------------------------------------------------------------------------
/Buzz/figure/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/figure/unnamed-chunk-3-1.png
--------------------------------------------------------------------------------
/Buzz/model_export_files/figure-markdown_github/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/model_export_files/figure-markdown_github/unnamed-chunk-4-1.png
--------------------------------------------------------------------------------
/Buzz/rf_tree_1_plot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/rf_tree_1_plot.pdf
--------------------------------------------------------------------------------
/Buzz/thRS500.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Buzz/thRS500.RDS
--------------------------------------------------------------------------------
/CDC/NatalBirthData.rData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CDC/NatalBirthData.rData
--------------------------------------------------------------------------------
/CDC/NatalRiskData.rData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CDC/NatalRiskData.rData
--------------------------------------------------------------------------------
/CDC/UserGuide2010.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CDC/UserGuide2010.pdf
--------------------------------------------------------------------------------
/CDC/loadExample/SQLScrewdriver.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CDC/loadExample/SQLScrewdriver.jar
--------------------------------------------------------------------------------
/CDC/loadExample/dbDef.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | testdb
5 | u
6 | u
7 | org.h2.Driver
8 | jdbc:h2:./NATAL;LOG=0;CACHE_SIZE=65536;LOCK_MODE=0;UNDO_LOG=0
9 |
10 |
--------------------------------------------------------------------------------
/CDC/loadExample/h2-1.3.170.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CDC/loadExample/h2-1.3.170.jar
--------------------------------------------------------------------------------
/CDC/natal2010Sample.tsv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CDC/natal2010Sample.tsv.gz
--------------------------------------------------------------------------------
/CodeExamples.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/CodeExamples.zip
--------------------------------------------------------------------------------
/CodeExamples/README.txt:
--------------------------------------------------------------------------------
1 |
2 | Example code and data for "Practical Data Science with R 2nd Edition" by Nina Zumel and John Mount, Manning 2019.
3 |
4 | Code examples license:
5 | This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
6 | http://creativecommons.org/licenses/by-nc-sa/4.0/
7 | No guarantee, indemnification or claim of fitness is made regarding any of these items.
8 | No claim of license on works of others or derived data.
9 |
--------------------------------------------------------------------------------
/CodeExamples/c02_Starting_with_R_and_data/00003_informalexample_2.1_of_section_2.1.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 2.1 of section 2.1.2
2 | # (informalexample 2.1 of section 2.1.2) : Starting with R and data : Starting with R : R programming
3 |
4 | print(seq_len(25))
5 | # [1] 1 2 3 4 5 6 7 8 9 10 11 12
6 | # [13] 13 14 15 16 17 18 19 20 21 22 23 24
7 | # [25] 25
8 |
9 |
--------------------------------------------------------------------------------
/CodeExamples/c02_Starting_with_R_and_data/00006_informalexample_2.4_of_section_2.1.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 2.4 of section 2.1.2
2 | # (informalexample 2.4 of section 2.1.2) : Starting with R and data : Starting with R : R programming
3 |
4 | nchar("a string")
5 | # [1] 8
6 |
7 | nchar(c("a", "aa", "aaa", "aaaa"))
8 | # [1] 1 2 3 4
9 |
10 |
--------------------------------------------------------------------------------
/CodeExamples/c02_Starting_with_R_and_data/00007_informalexample_2.5_of_section_2.1.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 2.5 of section 2.1.2
2 | # (informalexample 2.5 of section 2.1.2) : Starting with R and data : Starting with R : R programming
3 |
4 | 1 +
5 | 2
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c02_Starting_with_R_and_data/00008_informalexample_2.6_of_section_2.1.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 2.6 of section 2.1.2
2 | # (informalexample 2.6 of section 2.1.2) : Starting with R and data : Starting with R : R programming
3 |
4 | 1
5 | + 2
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c02_Starting_with_R_and_data/00010_informalexample_2.8_of_section_2.1.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 2.8 of section 2.1.2
2 | # (informalexample 2.8 of section 2.1.2) : Starting with R and data : Starting with R : R programming
3 |
4 | d <- data.frame(x = 1, y = 2) # Note: 1
5 | d2 <- d # Note: 2
6 | d$x <- 5 # Note: 3
7 |
8 | print(d)
9 | # x y
10 | # 1 5 2
11 |
12 | print(d2)
13 | # x y
14 | # 1 1 2
15 |
16 | # Note 1:
17 | # Create some example data and refer to it by the name d.
18 |
19 | # Note 2:
20 | # Create an additional reference d2 to the same data.
21 |
22 | # Note 3:
23 | # Alter the value referred to by d.
24 |
25 |
--------------------------------------------------------------------------------
/CodeExamples/c02_Starting_with_R_and_data/00012_informalexample_2.10_of_section_2.1.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 2.10 of section 2.1.2
2 | # (informalexample 2.10 of section 2.1.2) : Starting with R and data : Starting with R : R programming
3 |
4 | library("dplyr")
5 |
6 | result <- data %>%
7 | arrange(., sort_key) %>%
8 | mutate(., ordered_sum_revenue = cumsum(revenue)) %>%
9 | mutate(., fraction_revenue_seen = ordered_sum_revenue/sum(revenue))
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c02_Starting_with_R_and_data/00013_informalexample_2.11_of_section_2.1.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 2.11 of section 2.1.2
2 | # (informalexample 2.11 of section 2.1.2) : Starting with R and data : Starting with R : R programming
3 |
4 | d <- data.frame(col1 = c(1, 2, 3), col2 = c(-1, 0, 1))
5 | d$col3 <- d$col1 + d$col2
6 | print(d)
7 | # col1 col2 col3
8 | # 1 1 -1 0
9 | # 2 2 0 2
10 | # 3 3 1 4
11 |
12 |
--------------------------------------------------------------------------------
/CodeExamples/c02_Starting_with_R_and_data/00017_informalexample_2.13_of_section_2.2.2.txt:
--------------------------------------------------------------------------------
1 | # informalexample 2.13 of section 2.2.2
2 | # (informalexample 2.13 of section 2.2.2) : Starting with R and data : Working with data from files : Using R with less-structured data
3 |
4 | A11 6 A34 A43 1169 A65 A75 4 A93 A101 4 ...
5 | A12 48 A32 A43 5951 A61 A73 2 A92 A101 2 ...
6 | A14 12 A34 A46 2096 A61 A74 2 A93 A101 3 ...
7 | ...
8 |
9 |
--------------------------------------------------------------------------------
/CodeExamples/c02_Starting_with_R_and_data/00018_example_2.3_of_section_2.2.2.R:
--------------------------------------------------------------------------------
1 | # example 2.3 of section 2.2.2
2 | # (example 2.3 of section 2.2.2) : Starting with R and data : Working with data from files : Using R with less-structured data
3 | # Title: Loading the credit dataset
4 |
5 | setwd("PDSwR2/Statlog") # Note: 1
6 | d <- read.table('german.data', sep=' ',
7 | stringsAsFactors = FALSE, header = FALSE)
8 |
9 | # Note 1:
10 | # Replace this path with the actual path where you have saved PDSwR2.
11 |
12 |
--------------------------------------------------------------------------------
/CodeExamples/c02_Starting_with_R_and_data/00020_informalexample_2.14_of_section_2.2.2.txt:
--------------------------------------------------------------------------------
1 | # informalexample 2.14 of section 2.2.2
2 | # (informalexample 2.14 of section 2.2.2) : Starting with R and data : Working with data from files : Using R with less-structured data
3 |
4 | mapping <- c('A11' = '... < 0 DM',
5 | 'A12' = '0 <= ... < 200 DM',
6 | 'A13' = '... >= 200 DM / salary assignments for at least 1 year',
7 | ...
8 | )
9 |
10 |
--------------------------------------------------------------------------------
/CodeExamples/c02_Starting_with_R_and_data/00031_example_2.11_of_section_2.3.1.R:
--------------------------------------------------------------------------------
1 | # example 2.11 of section 2.3.1
2 | # (example 2.11 of section 2.3.1) : Starting with R and data : Working with relational databases : A production-size example
3 | # Title: Plotting the data
4 |
5 | WVPlots::ScatterHist(
6 | dpus, "AGEP", "PINCP",
7 | "Expected income (PINCP) as function age (AGEP)",
8 | smoothmethod = "lm",
9 | point_alpha = 0.025)
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00035_example_3.4_of_section_3.1.1.R:
--------------------------------------------------------------------------------
1 | # example 3.4 of section 3.1.1
2 | # (example 3.4 of section 3.1.1) : Exploring data : Using summary statistics to spot problems : Typical problems revealed by data summaries
3 | # Title: Looking at the data range of a variable
4 |
5 | summary(customer_data$income)
6 | ## Min. 1st Qu. Median Mean 3rd Qu. Max.
7 | ## -6900 10700 26200 41764 51700 1257000 # Note: 1
8 |
9 | # Note 1:
10 | # Income ranges from zero to over a million
11 | # dollars; a very wide range.
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00036_example_3.5_of_section_3.1.1.R:
--------------------------------------------------------------------------------
1 | # example 3.5 of section 3.1.1
2 | # (example 3.5 of section 3.1.1) : Exploring data : Using summary statistics to spot problems : Typical problems revealed by data summaries
3 | # Title: Checking units sounds silly, but mistakes can lead to spectacular errors if not caught
4 |
5 | IncomeK = customer_data$income/1000
6 | summary(IncomeK) # Note: 1
7 | ## Min. 1st Qu. Median Mean 3rd Qu. Max.
8 | ## -6.90 10.70 26.20 41.76 51.70 1257.00
9 |
10 | # Note 1:
11 | # The variable IncomeK is defined as IncomeK = customer_data$income/1000. But suppose you didn’t know
12 | # that. Looking only at the summary, the values could plausibly be
13 | # interpreted to mean either “hourly wage” or “yearly income in units
14 | # of $1000.”
15 |
16 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00037_example_3.6_of_section_3.2.1.R:
--------------------------------------------------------------------------------
1 | # example 3.6 of section 3.2.1
2 | # (example 3.6 of section 3.2.1) : Exploring data : Spotting problems using graphics and visualization : Visually checking distributions for a single variable
3 | # Title: Plotting a histogram
4 |
5 | library(ggplot2) # Note: 1
6 | ggplot(customer_data, aes(x=gas_usage)) +
7 | geom_histogram(binwidth=10, fill="gray") # Note: 2
8 |
9 | # Note 1:
10 | # Load the ggplot2 library, if you haven’t
11 | # already done so.
12 |
13 | # Note 2:
14 | # binwidth parameterThe binwidth parameter tells the
15 | # geom_histogram call how to make bins of ten dollar intervals (default is
16 | # datarange/30). The fill parameter specifies the color of the histogram
17 | # bars (default: black).
18 |
19 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00038_example_3.7_of_section_3.2.1.R:
--------------------------------------------------------------------------------
1 | # example 3.7 of section 3.2.1
2 | # (example 3.7 of section 3.2.1) : Exploring data : Spotting problems using graphics and visualization : Visually checking distributions for a single variable
3 | # Title: Producing a density plot
4 |
5 | library(scales) # Note: 1
6 |
7 | ggplot(customer_data, aes(x=income)) + geom_density() +
8 | scale_x_continuous(labels=dollar) # Note: 2
9 |
10 | # Note 1:
11 | # The scales package brings in the dollar
12 | # scale notation.
13 |
14 | # Note 2:
15 | # Set the x-axis labels to
16 | # dollars.
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00039_example_3.8_of_section_3.2.1.R:
--------------------------------------------------------------------------------
1 | # example 3.8 of section 3.2.1
2 | # (example 3.8 of section 3.2.1) : Exploring data : Spotting problems using graphics and visualization : Visually checking distributions for a single variable
3 | # Title: Creating a log-scaled density plot
4 |
5 | ggplot(customer_data, aes(x=income)) +
6 | geom_density() +
7 | scale_x_log10(breaks = c(10, 100, 1000, 10000, 100000, 1000000), labels=dollar) + # Note: 1
8 | annotation_logticks(sides="bt", color="gray") # Note: 2
9 |
10 | # Note 1:
11 | # Set the x-axis to be in log10 scale, with
12 | # manually set tick points and labels as dollars.
13 |
14 | # Note 2:
15 | # Add log-scaled tick marks to the top and
16 | # bottom of the graph.
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00040_informalexample_3.1_of_section_3.2.1.txt:
--------------------------------------------------------------------------------
1 | # informalexample 3.1 of section 3.2.1
2 | # (informalexample 3.1 of section 3.2.1) : Exploring data : Spotting problems using graphics and visualization : Visually checking distributions for a single variable
3 |
4 | ## Warning in self$trans$transform(x): NaNs produced
5 | ## Warning: Transformation introduced infinite values in continuous x-axis
6 | ## Warning: Removed 6856 rows containing non-finite values (stat_density).
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00041_informalexample_3.2_of_section_3.2.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 3.2 of section 3.2.1
2 | # (informalexample 3.2 of section 3.2.1) : Exploring data : Spotting problems using graphics and visualization : Visually checking distributions for a single variable
3 |
4 | ggplot(customer_data, aes(x=marital_status)) + geom_bar(fill="gray")
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00042_example_3.9_of_section_3.2.1.R:
--------------------------------------------------------------------------------
1 | # example 3.9 of section 3.2.1
2 | # (example 3.9 of section 3.2.1) : Exploring data : Spotting problems using graphics and visualization : Visually checking distributions for a single variable
3 | # Title: Producing a horizontal bar chart
4 |
5 | ggplot(customer_data, aes(x=state_of_res)) +
6 | geom_bar(fill="gray") + # Note: 1
7 | coord_flip() # Note: 2
8 |
9 | # Note 1:
10 | # Plot bar chart as before: state_of_res is on x-axis, count is on y-axis.
11 |
12 | # Note 2:
13 | # Flip the x and y axes: state_of_res is
14 | # now on the y-axis.
15 |
16 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00044_example_3.11_of_section_3.2.2.R:
--------------------------------------------------------------------------------
1 | # example 3.11 of section 3.2.2
2 | # (example 3.11 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables
3 | # Title: Producing a line plot
4 |
5 | x <- runif(100) # Note: 1
6 | y <- x^2 + 0.2*x # Note: 2
7 | ggplot(data.frame(x=x,y=y), aes(x=x,y=y)) + geom_line() # Note: 3
8 |
9 | # Note 1:
10 | # First, generate the data for this example.
11 | # The x variable is uniformly randomly distributed
12 | # between 0 and 1.
13 |
14 | # Note 2:
15 | # The y variable is a
16 | # quadratic function of x.
17 |
18 | # Note 3:
19 | # Plot the line plot.
20 |
21 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00047_informalexample_3.3_of_section_3.2.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 3.3 of section 3.2.2
2 | # (informalexample 3.3 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables
3 |
4 | ggplot(customer_data_samp, aes(x=age, y=income)) +
5 | geom_point() + geom_smooth() +
6 | ggtitle("Income as a function of age")
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00048_informalexample_3.4_of_section_3.2.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 3.4 of section 3.2.2
2 | # (informalexample 3.4 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables
3 |
4 | BinaryYScatterPlot(customer_data_samp, "age", "health_ins",
5 | title = "Probability of health insurance by age")
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00049_example_3.14_of_section_3.2.2.R:
--------------------------------------------------------------------------------
1 | # example 3.14 of section 3.2.2
2 | # (example 3.14 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables
3 | # Title: Producing a hexbin plot
4 |
5 | library(WVPlots) # Note: 1
6 |
7 | HexBinPlot(customer_data2, "age", "income", "Income as a function of age") + # Note: 2
8 | geom_smooth(color="black", se=FALSE) # Note: 3
9 |
10 | # Note 1:
11 | # Load the WVPlots library
12 |
13 | # Note 2:
14 | # Plot the hexbin of income as a function of age
15 |
16 | # Note 3:
17 | # Add the smoothing line in black; suppress
18 | # standard error ribbon (se=FALSE).
19 |
20 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00052_example_3.17_of_section_3.2.2.R:
--------------------------------------------------------------------------------
1 | # example 3.17 of section 3.2.2
2 | # (example 3.17 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables
3 | # Title: Comparing population densities across categories
4 |
5 | customer_data3 = subset(customer_data2, marital_status %in% c("Never married", "Widowed")) # Note: 1
6 | ggplot(customer_data3, aes(x=age, color=marital_status, linetype=marital_status)) + # Note: 2
7 | geom_density() + scale_color_brewer(palette="Dark2")
8 |
9 | # Note 1:
10 | # Restrict to the data for widowed or never married people.
11 |
12 | # Note 2:
13 | # Differentiate the color and line style of the plots by marital_status
14 |
15 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00053_example_3.18_of_section_3.2.2.R:
--------------------------------------------------------------------------------
1 | # example 3.18 of section 3.2.2
2 | # (example 3.18 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables
3 | # Title: Comparing population densities across categories with ShadowHist()
4 |
5 | ShadowHist(customer_data3, "age", "marital_status",
6 | "Age distribution for never married vs. widowed populations",
7 | binwidth=5) # Note: 1
8 |
9 | # Note 1:
10 | # Set the bin widths of the histogram to 5.
11 |
12 |
--------------------------------------------------------------------------------
/CodeExamples/c03_Exploring_data/00054_informalexample_3.5_of_section_3.2.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 3.5 of section 3.2.2
2 | # (informalexample 3.5 of section 3.2.2) : Exploring data : Spotting problems using graphics and visualization : Visually checking relationships between two variables
3 |
4 | ggplot(customer_data2, aes(x=age)) +
5 | geom_density() + facet_wrap(~marital_status)
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c04_Managing_data/00055_example_4.1_of_section_4.1.1.R:
--------------------------------------------------------------------------------
1 | # example 4.1 of section 4.1.1
2 | # (example 4.1 of section 4.1.1) : Managing data : Cleaning data : Domain-specific data cleaning
3 | # Title: Treating the age and income variables
4 |
5 | library(dplyr)
6 | customer_data = readRDS("custdata.RDS") # Note: 1
7 |
8 | customer_data <- customer_data %>%
9 | mutate(age = na_if(age, 0), # Note: 2
10 | income = ifelse(income < 0, NA, income)) # Note: 3
11 |
12 | # Note 1:
13 | # Load the data.
14 |
15 | # Note 2:
16 | # The function mutate() from the dplyr package adds columns to a data frame, or modifies existing columns.
17 | # The function na_if(), also from dplyr, turns a specific problematic value (in this case, 0) to NA
18 |
19 | # Note 3:
20 | # Convert negative incomes to NA
21 |
22 |
--------------------------------------------------------------------------------
/CodeExamples/c04_Managing_data/00056_example_4.2_of_section_4.1.1.R:
--------------------------------------------------------------------------------
1 | # example 4.2 of section 4.1.1
2 | # (example 4.2 of section 4.1.1) : Managing data : Cleaning data : Domain-specific data cleaning
3 | # Title: Treating the gas_usage variable
4 |
5 | customer_data <- customer_data %>%
6 | mutate(gas_with_rent = (gas_usage == 1), # Note: 1
7 | gas_with_electricity = (gas_usage == 2),
8 | no_gas_bill = (gas_usage == 3) ) %>%
9 | mutate(gas_usage = ifelse(gas_usage < 4, NA, gas_usage)) # Note: 2
10 |
11 | # Note 1:
12 | # Create the three indicator variables.
13 |
14 | # Note 2:
15 | # Convert the special codes in the gas_usage column to NA.
16 |
17 |
--------------------------------------------------------------------------------
/CodeExamples/c04_Managing_data/00058_informalexample_4.1_of_section_4.1.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 4.1 of section 4.1.3
2 | # (informalexample 4.1 of section 4.1.3) : Managing data : Cleaning data : The vtreat package for automatically treating missing variables
3 |
4 | varlist <- setdiff(colnames(customer_data), c("custid", "health_ins"))
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c04_Managing_data/00059_example_4.4_of_section_4.1.3.R:
--------------------------------------------------------------------------------
1 | # example 4.4 of section 4.1.3
2 | # (example 4.4 of section 4.1.3) : Managing data : Cleaning data : The vtreat package for automatically treating missing variables
3 | # Title: Creating and applying a treatment plan
4 |
5 | library(vtreat)
6 | treatment_plan <- design_missingness_treatment(customer_data, varlist = varlist)
7 | training_prepared <- prepare(treatment_plan, customer_data)
8 |
9 |
--------------------------------------------------------------------------------
/CodeExamples/c04_Managing_data/00063_example_4.8_of_section_4.2.1.R:
--------------------------------------------------------------------------------
1 | # example 4.8 of section 4.2.1
2 | # (example 4.8 of section 4.2.1) : Managing data : Data transformations : Normalization
3 | # Title: Normalizing by mean age
4 |
5 | summary(training_prepared$age)
6 |
7 | ## Min. 1st Qu. Median Mean 3rd Qu. Max.
8 | ## 21.00 34.00 48.00 49.22 62.00 120.00
9 |
10 | mean_age <- mean(training_prepared$age)
11 | age_normalized <- training_prepared$age/mean_age
12 | summary(age_normalized)
13 |
14 | ## Min. 1st Qu. Median Mean 3rd Qu. Max.
15 | ## 0.4267 0.6908 0.9753 1.0000 1.2597 2.4382
16 |
17 |
--------------------------------------------------------------------------------
/CodeExamples/c04_Managing_data/00067_informalexample_4.2_of_section_4.2.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 4.2 of section 4.2.3
2 | # (informalexample 4.2 of section 4.2.3) : Managing data : Data transformations : Log transformations for skewed and wide distributions
3 |
4 | signedlog10 <- function(x) {
5 | ifelse(abs(x) <= 1, 0, sign(x)*log10(abs(x)))
6 | }
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00075_informalexample_5.6_of_section_5.1.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.6 of section 5.1.1
2 | # (informalexample 5.6 of section 5.1.1) : Data engineering and data shaping : Data selection : Sub-setting rows and columns
3 |
4 | library("dplyr")
5 |
6 | iris_dplyr <- iris %>%
7 | select(.,
8 | Petal.Length, Petal.Width, Species) %>%
9 | filter(.,
10 | Petal.Length > 2)
11 |
12 | head(iris_dplyr)
13 |
14 | ## Petal.Length Petal.Width Species
15 | ## 1 4.7 1.4 versicolor
16 | ## 2 4.5 1.5 versicolor
17 | ## 3 4.9 1.5 versicolor
18 | ## 4 4.0 1.3 versicolor
19 | ## 5 4.6 1.5 versicolor
20 | ## 6 4.5 1.3 versicolor
21 |
22 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00079_informalexample_5.10_of_section_5.1.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.10 of section 5.1.2
2 | # (informalexample 5.10 of section 5.1.2) : Data engineering and data shaping : Data selection : Removing records with incomplete data
3 |
4 | library("data.table")
5 |
6 | msleep_data.table <- as.data.table(msleep)
7 |
8 | clean_data.table = msleep_data.table[complete.cases(msleep_data.table), ]
9 |
10 | nrow(clean_data.table)
11 |
12 | ## [1] 20
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00080_informalexample_5.11_of_section_5.1.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.11 of section 5.1.2
2 | # (informalexample 5.11 of section 5.1.2) : Data engineering and data shaping : Data selection : Removing records with incomplete data
3 |
4 | library("dplyr")
5 |
6 | clean_dplyr <- msleep %>%
7 | filter(., complete.cases(.))
8 |
9 | nrow(clean_dplyr)
10 |
11 | ## [1] 20
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00081_informalexample_5.12_of_section_5.1.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.12 of section 5.1.3
2 | # (informalexample 5.12 of section 5.1.3) : Data engineering and data shaping : Data selection : Ordering rows
3 |
4 | purchases <- wrapr::build_frame( # Note: 1
5 | "day", "hour", "n_purchase" |
6 | 1 , 9 , 5 |
7 | 2 , 9 , 3 |
8 | 2 , 11 , 5 |
9 | 1 , 13 , 1 |
10 | 2 , 13 , 3 |
11 | 1 , 14 , 1 )
12 |
13 | # Note 1:
14 | # Use wrapr::build_frame to type data in directly in legible column order.
15 |
16 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00083_informalexample_5.14_of_section_5.1.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.14 of section 5.1.3
2 | # (informalexample 5.14 of section 5.1.3) : Data engineering and data shaping : Data selection : Ordering rows
3 |
4 | library("data.table")
5 |
6 | DT_purchases <- as.data.table(purchases)
7 |
8 | order_cols <- c("day", "hour") # Note: 1
9 | setorderv(DT_purchases, order_cols)
10 |
11 | DT_purchases[ , running_total := cumsum(n_purchase)]
12 |
13 | # print(DT_purchases)
14 |
15 | # Note 1:
16 | # Re-order data
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00084_informalexample_5.15_of_section_5.1.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.15 of section 5.1.3
2 | # (informalexample 5.15 of section 5.1.3) : Data engineering and data shaping : Data selection : Ordering rows
3 |
4 | library("dplyr")
5 |
6 | res <- purchases %>%
7 | arrange(., day, hour) %>%
8 | mutate(., running_total = cumsum(n_purchase))
9 |
10 | # print(res)
11 |
12 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00087_informalexample_5.18_of_section_5.1.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.18 of section 5.1.3
2 | # (informalexample 5.18 of section 5.1.3) : Data engineering and data shaping : Data selection : Ordering rows
3 |
4 | library("dplyr")
5 |
6 | res <- purchases %>%
7 | arrange(., day, hour) %>%
8 | group_by(., day) %>%
9 | mutate(., running_total = cumsum(n_purchase)) %>%
10 | ungroup(.)
11 |
12 | # print(res)
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00089_informalexample_5.20_of_section_5.2.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.20 of section 5.2.1
2 | # (informalexample 5.20 of section 5.2.1) : Data engineering and data shaping : Basic data transforms : Add new columns
3 |
4 | library("lubridate")
5 | library("ggplot2")
6 |
7 | # create a function to make the date string.
8 | datestr = function(day, month, year) {
9 | paste(day, month, year, sep="-")
10 | }
11 |
12 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00093_informalexample_5.24_of_section_5.2.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.24 of section 5.2.1
2 | # (informalexample 5.24 of section 5.2.1) : Data engineering and data shaping : Basic data transforms : Add new columns
3 |
4 | library("dplyr")
5 |
6 | airquality_with_date2 <- airquality %>%
7 | mutate(., date = dmy(datestr(Day, Month, 1973))) %>%
8 | select(., Ozone, date)
9 |
10 | head(airquality_with_date2)
11 |
12 | ## Ozone date
13 | ## 1 41 1973-05-01
14 | ## 2 36 1973-05-02
15 | ## 3 12 1973-05-03
16 | ## 4 18 1973-05-04
17 | ## 5 NA 1973-05-05
18 | ## 6 28 1973-05-06
19 |
20 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00095_informalexample_5.26_of_section_5.2.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.26 of section 5.2.1
2 | # (informalexample 5.26 of section 5.2.1) : Data engineering and data shaping : Basic data transforms : Add new columns
3 |
4 | library("data.table")
5 | library("zoo")
6 |
7 | DT_airquality[, OzoneCorrected := na.locf(Ozone, na.rm=FALSE)]
8 |
9 | summary(DT_airquality)
10 |
11 | ## Ozone date OzoneCorrected
12 | ## Min. : 1.00 Min. :1973-05-01 Min. : 1.00
13 | ## 1st Qu.: 18.00 1st Qu.:1973-06-08 1st Qu.: 16.00
14 | ## Median : 31.50 Median :1973-07-16 Median : 30.00
15 | ## Mean : 42.13 Mean :1973-07-16 Mean : 39.78
16 | ## 3rd Qu.: 63.25 3rd Qu.:1973-08-23 3rd Qu.: 52.00
17 | ## Max. :168.00 Max. :1973-09-30 Max. :168.00
18 | ## NA's :37
19 |
20 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00096_informalexample_5.27_of_section_5.2.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.27 of section 5.2.1
2 | # (informalexample 5.27 of section 5.2.1) : Data engineering and data shaping : Basic data transforms : Add new columns
3 |
4 | library("dplyr")
5 | library("zoo")
6 |
7 | airquality_with_date %>%
8 | mutate(.,
9 | OzoneCorrected = na.locf(Ozone, na.rm = FALSE)) %>%
10 | summary(.)
11 |
12 | ## Ozone date OzoneCorrected
13 | ## Min. : 1.00 Min. :1973-05-01 Min. : 1.00
14 | ## 1st Qu.: 18.00 1st Qu.:1973-06-08 1st Qu.: 16.00
15 | ## Median : 31.50 Median :1973-07-16 Median : 30.00
16 | ## Mean : 42.13 Mean :1973-07-16 Mean : 39.78
17 | ## 3rd Qu.: 63.25 3rd Qu.:1973-08-23 3rd Qu.: 52.00
18 | ## Max. :168.00 Max. :1973-09-30 Max. :168.00
19 | ## NA's :37
20 |
21 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00097_informalexample_5.28_of_section_5.2.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.28 of section 5.2.2
2 | # (informalexample 5.28 of section 5.2.2) : Data engineering and data shaping : Basic data transforms : Other simple operations
3 |
4 | d <- data.frame(x = 1:2, y = 3:4)
5 | print(d)
6 | #> x y
7 | #> 1 1 3
8 | #> 2 2 4
9 |
10 | colnames(d) <- c("BIGX", "BIGY")
11 | print(d)
12 | #> BIGX BIGY
13 | #> 1 1 3
14 | #> 2 2 4
15 |
16 | d$BIGX <- NULL
17 | print(d)
18 | #> BIGY
19 | #> 1 3
20 | #> 2 4
21 |
22 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00098_informalexample_5.29_of_section_5.3.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.29 of section 5.3.1
2 | # (informalexample 5.29 of section 5.3.1) : Data engineering and data shaping : Aggregating transforms : Combining many rows into summary rows
3 |
4 | library("datasets")
5 | library("ggplot2")
6 |
7 | head(iris)
8 |
9 | ## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
10 | ## 1 5.1 3.5 1.4 0.2 setosa
11 | ## 2 4.9 3.0 1.4 0.2 setosa
12 | ## 3 4.7 3.2 1.3 0.2 setosa
13 | ## 4 4.6 3.1 1.5 0.2 setosa
14 | ## 5 5.0 3.6 1.4 0.2 setosa
15 | ## 6 5.4 3.9 1.7 0.4 setosa
16 |
17 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00100_informalexample_5.31_of_section_5.3.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.31 of section 5.3.1
2 | # (informalexample 5.31 of section 5.3.1) : Data engineering and data shaping : Aggregating transforms : Combining many rows into summary rows
3 |
4 | library("data.table")
5 |
6 | iris_data.table <- as.data.table(iris)
7 | iris_data.table <- iris_data.table[,
8 | .(Petal.Length = mean(Petal.Length),
9 | Petal.Width = mean(Petal.Width)),
10 | by = .(Species)]
11 |
12 | # print(iris_data.table)
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00101_informalexample_5.32_of_section_5.3.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.32 of section 5.3.1
2 | # (informalexample 5.32 of section 5.3.1) : Data engineering and data shaping : Aggregating transforms : Combining many rows into summary rows
3 |
4 | library("dplyr")
5 |
6 | iris_summary <- iris %>% group_by(., Species) %>%
7 | summarize(.,
8 | Petal.Length = mean(Petal.Length),
9 | Petal.Width = mean(Petal.Width)) %>%
10 | ungroup(.)
11 |
12 | # print(iris_summary)
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00102_informalexample_5.33_of_section_5.3.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.33 of section 5.3.1
2 | # (informalexample 5.33 of section 5.3.1) : Data engineering and data shaping : Aggregating transforms : Combining many rows into summary rows
3 |
4 | iris_copy <- iris
5 | iris_copy$mean_Petal.Length <- ave(iris$Petal.Length, iris$Species, FUN = mean)
6 | iris_copy$mean_Petal.Width <- ave(iris$Petal.Width, iris$Species, FUN = mean)
7 |
8 | # head(iris_copy)
9 | # tail(iris_copy)
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00103_informalexample_5.34_of_section_5.3.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.34 of section 5.3.1
2 | # (informalexample 5.34 of section 5.3.1) : Data engineering and data shaping : Aggregating transforms : Combining many rows into summary rows
3 |
4 | library("data.table")
5 |
6 | iris_data.table <- as.data.table(iris)
7 |
8 | iris_data.table[ ,
9 | `:=`(mean_Petal.Length = mean(Petal.Length),
10 | mean_Petal.Width = mean(Petal.Width)),
11 | by = "Species"]
12 |
13 | # print(iris_data.table)
14 |
15 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00104_informalexample_5.35_of_section_5.3.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.35 of section 5.3.1
2 | # (informalexample 5.35 of section 5.3.1) : Data engineering and data shaping : Aggregating transforms : Combining many rows into summary rows
3 |
4 | library("dplyr")
5 |
6 | iris_dplyr <- iris %>%
7 | group_by(., Species) %>%
8 | mutate(.,
9 | mean_Petal.Length = mean(Petal.Length),
10 | mean_Petal.Width = mean(Petal.Width)) %>%
11 | ungroup(.)
12 |
13 | # head(iris_dplyr)
14 |
15 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00106_informalexample_5.37_of_section_5.4.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.37 of section 5.4.1
2 | # (informalexample 5.37 of section 5.4.1) : Data engineering and data shaping : Multi-table data transforms : Combining two or more ordered data frames quickly
3 |
4 | rbind_base = rbind(productTable,
5 | productTable2)
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00107_informalexample_5.38_of_section_5.4.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.38 of section 5.4.1
2 | # (informalexample 5.38 of section 5.4.1) : Data engineering and data shaping : Multi-table data transforms : Combining two or more ordered data frames quickly
3 |
4 | str(rbind_base)
5 |
6 | ## 'data.frame': 8 obs. of 2 variables:
7 | ## $ productID: Factor w/ 8 levels "p1","p2","p3",..: 1 2 3 4 5 6 7 8
8 | ## $ price : num 9.99 16.29 19.99 5.49 24.49 ...
9 |
10 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00108_informalexample_5.39_of_section_5.4.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.39 of section 5.4.1
2 | # (informalexample 5.39 of section 5.4.1) : Data engineering and data shaping : Multi-table data transforms : Combining two or more ordered data frames quickly
3 |
4 | library("data.table")
5 |
6 | rbindlist(list(productTable,
7 | productTable2))
8 |
9 | ## productID price
10 | ## 1: p1 9.99
11 | ## 2: p2 16.29
12 | ## 3: p3 19.99
13 | ## 4: p4 5.49
14 | ## 5: p5 24.49
15 | ## 6: n1 25.49
16 | ## 7: n2 33.99
17 | ## 8: n3 17.99
18 |
19 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00114_informalexample_5.45_of_section_5.4.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.45 of section 5.4.1
2 | # (informalexample 5.45 of section 5.4.1) : Data engineering and data shaping : Multi-table data transforms : Combining two or more ordered data frames quickly
3 |
4 | cbind(productTable, salesTable[, -1])
5 |
6 | ## productID price sold_store sold_online
7 | ## 1 p1 9.99 6 64
8 | ## 2 p2 16.29 31 1
9 | ## 3 p3 19.99 30 23
10 | ## 4 p4 5.49 31 67
11 | ## 5 p5 24.49 43 51
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00115_informalexample_5.46_of_section_5.4.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.46 of section 5.4.1
2 | # (informalexample 5.46 of section 5.4.1) : Data engineering and data shaping : Multi-table data transforms : Combining two or more ordered data frames quickly
3 |
4 | library("data.table")
5 |
6 | cbind(as.data.table(productTable),
7 | as.data.table(salesTable[, -1]))
8 |
9 | ## productID price sold_store sold_online
10 | ## 1: p1 9.99 6 64
11 | ## 2: p2 16.29 31 1
12 | ## 3: p3 19.99 30 23
13 | ## 4: p4 5.49 31 67
14 | ## 5: p5 24.49 43 51
15 |
16 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00116_informalexample_5.47_of_section_5.4.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.47 of section 5.4.1
2 | # (informalexample 5.47 of section 5.4.1) : Data engineering and data shaping : Multi-table data transforms : Combining two or more ordered data frames quickly
3 |
4 | library("dplyr")
5 |
6 | # list of data frames calling convention
7 | dplyr::bind_cols(list(productTable, salesTable[, -1]))
8 |
9 | ## productID price sold_store sold_online
10 | ## 1 p1 9.99 6 64
11 | ## 2 p2 16.29 31 1
12 | ## 3 p3 19.99 30 23
13 | ## 4 p4 5.49 31 67
14 | ## 5 p5 24.49 43 51
15 |
16 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00117_informalexample_5.48_of_section_5.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.48 of section 5.4.2
2 | # (informalexample 5.48 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables
3 |
4 | productTable <- wrapr::build_frame(
5 | "productID", "price" |
6 | "p1" , 9.99 |
7 | "p3" , 19.99 |
8 | "p4" , 5.49 |
9 | "p5" , 24.49 )
10 |
11 | salesTable <- wrapr::build_frame(
12 | "productID", "unitsSold" |
13 | "p1" , 10 |
14 | "p2" , 43 |
15 | "p3" , 55 |
16 | "p4" , 8 )
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00118_informalexample_5.49_of_section_5.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.49 of section 5.4.2
2 | # (informalexample 5.49 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables
3 |
4 | merge(productTable, salesTable, by = "productID", all.x = TRUE)
5 |
6 | ## productID price unitsSold
7 | ## 1 p1 9.99 10
8 | ## 2 p3 19.99 55
9 | ## 3 p4 5.49 8
10 | ## 4 p5 24.49 NA
11 |
12 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00120_informalexample_5.51_of_section_5.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.51 of section 5.4.2
2 | # (informalexample 5.51 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables
3 |
4 | library("data.table")
5 |
6 | joined_table <- productTable
7 | joined_table$unitsSold <- salesTable$unitsSold[match(joined_table$productID,
8 | salesTable$productID)]
9 | print(joined_table)
10 |
11 | ## productID price unitsSold
12 | ## 1 p1 9.99 10
13 | ## 2 p3 19.99 55
14 | ## 3 p4 5.49 8
15 | ## 4 p5 24.49 NA
16 |
17 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00121_informalexample_5.52_of_section_5.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.52 of section 5.4.2
2 | # (informalexample 5.52 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables
3 |
4 | library("dplyr")
5 |
6 | left_join(productTable, salesTable, by = "productID")
7 |
8 | ## productID price unitsSold
9 | ## 1 p1 9.99 10
10 | ## 2 p3 19.99 55
11 | ## 3 p4 5.49 8
12 | ## 4 p5 24.49 NA
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00122_informalexample_5.53_of_section_5.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.53 of section 5.4.2
2 | # (informalexample 5.53 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables
3 |
4 | merge(productTable, salesTable, by = "productID")
5 |
6 | ## productID price unitsSold
7 | ## 1 p1 9.99 10
8 | ## 2 p3 19.99 55
9 | ## 3 p4 5.49 8
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00123_informalexample_5.54_of_section_5.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.54 of section 5.4.2
2 | # (informalexample 5.54 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables
3 |
4 | library("data.table")
5 |
6 | productTable_data.table <- as.data.table(productTable)
7 | salesTable_data.table <- as.data.table(salesTable)
8 |
9 | merge(productTable, salesTable, by = "productID")
10 |
11 | ## productID price unitsSold
12 | ## 1 p1 9.99 10
13 | ## 2 p3 19.99 55
14 | ## 3 p4 5.49 8
15 |
16 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00124_informalexample_5.55_of_section_5.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.55 of section 5.4.2
2 | # (informalexample 5.55 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables
3 |
4 | library("dplyr")
5 |
6 | inner_join(productTable, salesTable, by = "productID")
7 |
8 | ## productID price unitsSold
9 | ## 1 p1 9.99 10
10 | ## 2 p3 19.99 55
11 | ## 3 p4 5.49 8
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00125_informalexample_5.56_of_section_5.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.56 of section 5.4.2
2 | # (informalexample 5.56 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables
3 |
4 | # note that merge orders the result by key column by default
5 | # use sort=FALSE to skip the sorting
6 | merge(productTable, salesTable, by = "productID", all=TRUE)
7 |
8 | ## productID price unitsSold
9 | ## 1 p1 9.99 10
10 | ## 2 p2 NA 43
11 | ## 3 p3 19.99 55
12 | ## 4 p4 5.49 8
13 | ## 5 p5 24.49 NA
14 |
15 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00126_informalexample_5.57_of_section_5.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.57 of section 5.4.2
2 | # (informalexample 5.57 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables
3 |
4 | library("data.table")
5 |
6 | productTable_data.table <- as.data.table(productTable)
7 | salesTable_data.table <- as.data.table(salesTable)
8 |
9 | merge(productTable_data.table, salesTable_data.table,
10 | by = "productID", all = TRUE)
11 |
12 | ## productID price unitsSold
13 | ## 1: p1 9.99 10
14 | ## 2: p2 NA 43
15 | ## 3: p3 19.99 55
16 | ## 4: p4 5.49 8
17 | ## 5: p5 24.49 NA
18 |
19 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00127_informalexample_5.58_of_section_5.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.58 of section 5.4.2
2 | # (informalexample 5.58 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables
3 |
4 | library("dplyr")
5 |
6 | full_join(productTable, salesTable, by = "productID")
7 |
8 | ## productID price unitsSold
9 | ## 1 p1 9.99 10
10 | ## 2 p3 19.99 55
11 | ## 3 p4 5.49 8
12 | ## 4 p5 24.49 NA
13 | ## 5 p2 NA 43
14 |
15 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00129_informalexample_5.60_of_section_5.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.60 of section 5.4.2
2 | # (informalexample 5.60 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables
3 |
4 | trades <- data.table(
5 | trade_id = c(32525, 32526),
6 | price = c(5.5, 9),
7 | quantity = c(100, 200),
8 | when = as.POSIXct(strptime(
9 | c("2018-10-18 2:13:42",
10 | "2018-10-18 2:19:20"),
11 | "%Y-%m-%d %H:%M:%S")))
12 |
13 | print(trades)
14 |
15 | ## trade_id price quantity when
16 | ## 1: 32525 5.5 100 2018-10-18 02:13:42
17 | ## 2: 32526 9.0 200 2018-10-18 02:19:20
18 |
19 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00130_informalexample_5.61_of_section_5.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.61 of section 5.4.2
2 | # (informalexample 5.61 of section 5.4.2) : Data engineering and data shaping : Multi-table data transforms : Principled methods to combine data from multiple tables
3 |
4 | quotes[, quote_time := when]
5 | trades[ , trade_time := when ]
6 | quotes[ trades, on = "when", roll = TRUE ][
7 | , .(quote_time, bid, price, ask, trade_id, trade_time) ]
8 |
9 | ## quote_time bid price ask trade_id trade_time
10 | ## 1: 2018-10-18 02:12:23 5 5.5 6 32525 2018-10-18 02:13:42
11 | ## 2: 2018-10-18 02:17:51 8 9.0 10 32526 2018-10-18 02:19:20
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00132_informalexample_5.63_of_section_5.5.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.63 of section 5.5.1
2 | # (informalexample 5.63 of section 5.5.1) : Data engineering and data shaping : Reshaping transforms : Moving data from wide to tall form
3 |
4 | # let's give an example of the kind of graph we have in mind, using just driver deaths
5 | library("ggplot2")
6 |
7 | ggplot(Seatbelts,
8 | aes(x = date, y = DriversKilled, color = law, shape = law)) +
9 | geom_point() +
10 | geom_smooth(se=FALSE) +
11 | ggtitle("UK car driver deaths by month")
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00133_informalexample_5.64_of_section_5.5.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.64 of section 5.5.1
2 | # (informalexample 5.64 of section 5.5.1) : Data engineering and data shaping : Reshaping transforms : Moving data from wide to tall form
3 |
4 | library("data.table")
5 |
6 | seatbelts_long2 <-
7 | melt.data.table(as.data.table(Seatbelts),
8 | id.vars = NULL,
9 | measure.vars = c("DriversKilled", "front", "rear"),
10 | variable.name = "victim_type",
11 | value.name = "nvictims")
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00134_informalexample_5.65_of_section_5.5.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.65 of section 5.5.1
2 | # (informalexample 5.65 of section 5.5.1) : Data engineering and data shaping : Reshaping transforms : Moving data from wide to tall form
3 |
4 | library("cdata")
5 |
6 | seatbelts_long3 <- unpivot_to_blocks(
7 | Seatbelts,
8 | nameForNewKeyColumn = "victim_type",
9 | nameForNewValueColumn = "nvictims",
10 | columnsToTakeFrom = c("DriversKilled", "front", "rear"))
11 |
12 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00139_informalexample_5.70_of_section_5.5.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.70 of section 5.5.2
2 | # (informalexample 5.70 of section 5.5.2) : Data engineering and data shaping : Reshaping transforms : Moving data from tall to wide form
3 |
4 | library("data.table")
5 |
6 | ChickWeight_wide2 <- dcast.data.table(
7 | as.data.table(ChickWeight),
8 | Chick ~ Time,
9 | value.var = "weight")
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c05_Data_Engineering_and_Data_Shaping/00140_informalexample_5.71_of_section_5.5.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 5.71 of section 5.5.2
2 | # (informalexample 5.71 of section 5.5.2) : Data engineering and data shaping : Reshaping transforms : Moving data from tall to wide form
3 |
4 | library("cdata")
5 |
6 | ChickWeight_wide3 <- pivot_to_rowrecs(
7 | ChickWeight,
8 | columnToTakeKeysFrom = "Time",
9 | columnToTakeValuesFrom = "weight",
10 | rowKeyColumns = "Chick")
11 |
12 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00143_example_6.2_of_section_6.2.3.R:
--------------------------------------------------------------------------------
1 | # example 6.2 of section 6.2.3
2 | # (example 6.2 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models
3 | # Title: Spam classifications
4 |
5 | sample <- spamTest[c(7,35,224,327), c('spam','pred')]
6 | print(sample)
7 | ## spam pred # Note: 1
8 | ## 115 spam 0.9903246227
9 | ## 361 spam 0.4800498077
10 | ## 2300 non-spam 0.0006846551
11 | ## 3428 non-spam 0.0001434345
12 |
13 | # Note 1:
14 | # The first column gives the predicted class
15 | # label (spam or non-spam). The second column gives
16 | # the predicted probability that an email is spam.
17 | # If the probability > 0.5 the email is labeled
18 | # “spam”; otherwise it is “non-spam”.
19 |
20 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00144_example_6.3_of_section_6.2.3.R:
--------------------------------------------------------------------------------
1 | # example 6.3 of section 6.2.3
2 | # (example 6.3 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models
3 | # Title: Spam confusion matrix
4 |
5 | confmat_spam <- table(truth = spamTest$spam,
6 | prediction = ifelse(spamTest$pred > 0.5,
7 | "spam", "non-spam"))
8 | print(confmat_spam)
9 | ## prediction
10 | ## truth non-spam spam
11 | ## non-spam 264 14
12 | ## spam 22 158
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00145_informalexample_6.1_of_section_6.2.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 6.1 of section 6.2.3
2 | # (informalexample 6.1 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models
3 |
4 | (confmat_spam[1,1] + confmat_spam[2,2]) / sum(confmat_spam)
5 | ## [1] 0.9213974
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00146_example_6.4_of_section_6.2.3.R:
--------------------------------------------------------------------------------
1 | # example 6.4 of section 6.2.3
2 | # (example 6.4 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models
3 | # Title: Entering the Akismet confusion matrix by hand
4 |
5 | confmat_akismet <- as.table(matrix(data=c(288-1,17,1,13882-17),nrow=2,ncol=2))
6 | rownames(confmat_akismet) <- rownames(confmat_spam)
7 | colnames(confmat_akismet) <- colnames(confmat_spam)
8 | print(confmat_akismet)
9 | ## non-spam spam
10 | ## non-spam 287 1
11 | ## spam 17 13865
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00147_informalexample_6.2_of_section_6.2.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 6.2 of section 6.2.3
2 | # (informalexample 6.2 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models
3 |
4 | (confmat_akismet[1,1] + confmat_akismet[2,2]) / sum(confmat_akismet)
5 | ## [1] 0.9987297
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00148_informalexample_6.3_of_section_6.2.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 6.3 of section 6.2.3
2 | # (informalexample 6.3 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models
3 |
4 | confmat_spam[2,2] / (confmat_spam[2,2]+ confmat_spam[1,2])
5 | ## [1] 0.9186047
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00149_informalexample_6.4_of_section_6.2.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 6.4 of section 6.2.3
2 | # (informalexample 6.4 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models
3 |
4 | confmat_akismet[2,2] / (confmat_akismet[2,2] + confmat_akismet[1,2])
5 | ## [1] 0.9999279
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00150_informalexample_6.5_of_section_6.2.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 6.5 of section 6.2.3
2 | # (informalexample 6.5 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models
3 |
4 | confmat_spam[2,2] / (confmat_spam[2,2] + confmat_spam[2,1])
5 | ## [1] 0.8777778
6 |
7 | confmat_akismet[2,2] / (confmat_akismet[2,2] + confmat_akismet[2,1])
8 | ## [1] 0.9987754
9 |
10 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00151_informalexample_6.6_of_section_6.2.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 6.6 of section 6.2.3
2 | # (informalexample 6.6 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models
3 |
4 | precision <- confmat_spam[2,2] / (confmat_spam[2,2]+ confmat_spam[1,2])
5 | recall <- confmat_spam[2,2] / (confmat_spam[2,2] + confmat_spam[2,1])
6 |
7 | (F1 <- 2 * precision * recall / (precision + recall) )
8 | ## [1] 0.8977273
9 |
10 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00153_informalexample_6.7_of_section_6.2.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 6.7 of section 6.2.3
2 | # (informalexample 6.7 of section 6.2.3) : Choosing and evaluating models : Evaluating models : Evaluating classification models
3 |
4 | confmat_spam[1,1] / (confmat_spam[1,1] + confmat_spam[1,2])
5 | ## [1] 0.9496403
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00154_example_6.6_of_section_6.2.4.R:
--------------------------------------------------------------------------------
1 | # example 6.6 of section 6.2.4
2 | # (example 6.6 of section 6.2.4) : Choosing and evaluating models : Evaluating models : Evaluating scoring models
3 | # Title: Fit the cricket model and make predictions
4 |
5 | crickets <- read.csv("cricketchirps/crickets.csv")
6 |
7 | cricket_model <- lm(temperatureF ~ chirp_rate, data=crickets)
8 | crickets$temp_pred <- predict(cricket_model, newdata=crickets)
9 |
10 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00155_example_6.7_of_section_6.2.4.R:
--------------------------------------------------------------------------------
1 | # example 6.7 of section 6.2.4
2 | # (example 6.7 of section 6.2.4) : Choosing and evaluating models : Evaluating models : Evaluating scoring models
3 | # Title: Calculating RMSE
4 |
5 | error_sq <- (crickets$temp_pred - crickets$temperatureF)^2
6 | ( RMSE <- sqrt(mean(error_sq)) )
7 | ## [1] 3.564149
8 |
9 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00157_example_6.9_of_section_6.2.5.R:
--------------------------------------------------------------------------------
1 | # example 6.9 of section 6.2.5
2 | # (example 6.9 of section 6.2.5) : Choosing and evaluating models : Evaluating models : Evaluating probability models
3 | # Title: Making a double density plot
4 |
5 | library(WVPlots)
6 | DoubleDensityPlot(spamTest,
7 | xvar = "pred",
8 | truthVar = "spam",
9 | title = "Distribution of scores for spam filter")
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00158_example_6.10_of_section_6.2.5.R:
--------------------------------------------------------------------------------
1 | # example 6.10 of section 6.2.5
2 | # (example 6.10 of section 6.2.5) : Choosing and evaluating models : Evaluating models : Evaluating probability models
3 | # Title: Plotting the receiver operating characteristic curve
4 |
5 | library(WVPlots)
6 | ROCPlot(spamTest, # Note: 1
7 | xvar = 'pred',
8 | truthVar = 'spam',
9 | truthTarget = 'spam',
10 | title = 'Spam filter test performance')
11 |
12 | library(sigr)
13 | calcAUC(spamTest$pred, spamTest$spam=='spam') # Note: 2
14 | ## [1] 0.9660072
15 |
16 | # Note 1:
17 | # Plot the receiver operating characteristic (ROC) curve.
18 |
19 | # Note 2:
20 | # Calculate the area under the ROC curve explicitly.
21 |
22 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00160_example_6.12_of_section_6.2.5.R:
--------------------------------------------------------------------------------
1 | # example 6.12 of section 6.2.5
2 | # (example 6.12 of section 6.2.5) : Choosing and evaluating models : Evaluating models : Evaluating probability models
3 | # Title: Computing the null model’s log likelihood
4 |
5 | (pNull <- mean(spamTrain$spam == 'spam'))
6 | ## [1] 0.3941588
7 |
8 | sum(ylogpy(y, pNull) + ylogpy(1-y, 1-pNull))
9 | ## [1] -306.8964
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00161_example_6.13_of_section_6.2.5.R:
--------------------------------------------------------------------------------
1 | # example 6.13 of section 6.2.5
2 | # (example 6.13 of section 6.2.5) : Choosing and evaluating models : Evaluating models : Evaluating probability models
3 | # Title: Computing the deviance and pseudo R-squared
4 |
5 | library(sigr)
6 |
7 | (deviance <- calcDeviance(spamTest$pred, spamTest$spam == 'spam'))
8 | ## [1] 253.8598
9 | (nullDeviance <- calcDeviance(pNull, spamTest$spam == 'spam'))
10 | ## [1] 613.7929
11 |
12 | (pseudoR2 <- 1 - deviance/nullDeviance)
13 | ## [1] 0.586408
14 |
15 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00163_example_6.15_of_section_6.3.2.R:
--------------------------------------------------------------------------------
1 | # example 6.15 of section 6.3.2
2 | # (example 6.15 of section 6.3.2) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Walking through LIME: a small example
3 | # Title: Fit a model to the iris training data
4 |
5 | source("lime_iris_example.R") # Note: 1
6 |
7 | input <- as.matrix(train[, 1:4]) # Note: 2
8 | model <- fit_iris_example(input, train$class)
9 |
10 | # Note 1:
11 | # Load the convenience function.
12 |
13 | # Note 2:
14 | # The input to the model is the first four
15 | # columns of the training data, converted to a
16 | # matrix.
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00165_example_6.17_of_section_6.3.2.R:
--------------------------------------------------------------------------------
1 | # example 6.17 of section 6.3.2
2 | # (example 6.17 of section 6.3.2) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Walking through LIME: a small example
3 | # Title: Build a LIME explainer from the model and training data
4 |
5 | library(lime)
6 | explainer <- lime(train[,1:4], # Note: 1
7 | model = model,
8 | bin_continuous = TRUE, # Note: 2
9 | n_bins = 10) # Note: 3
10 |
11 | # Note 1:
12 | # Build the explainer from the training data.
13 |
14 | # Note 2:
15 | # Bin the continuous variables when making explanations.
16 |
17 | # Note 3:
18 | # Use 10 bins.
19 |
20 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00167_example_6.19_of_section_6.3.2.R:
--------------------------------------------------------------------------------
1 | # example 6.19 of section 6.3.2
2 | # (example 6.19 of section 6.3.2) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Walking through LIME: a small example
3 | # Title: Explain the iris example
4 |
5 | explanation <- lime::explain(example,
6 | explainer,
7 | n_labels = 1, # Note: 1
8 | n_features = 4) # Note: 2
9 |
10 | # Note 1:
11 | # The number of labels to explain; use 1 for binary classification.
12 |
13 | # Note 2:
14 | # The number of features to use when fitting the explanation.
15 |
16 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00168_informalexample_6.8_of_section_6.3.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 6.8 of section 6.3.2
2 | # (informalexample 6.8 of section 6.3.2) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Walking through LIME: a small example
3 |
4 | plot_features(explanation)
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00169_informalexample_6.9_of_section_6.3.2.txt:
--------------------------------------------------------------------------------
1 | # informalexample 6.9 of section 6.3.2
2 | # (informalexample 6.9 of section 6.3.2) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Walking through LIME: a small example
3 |
4 | Sepal.Length Sepal.Width Petal.Length Petal.Width
5 | 5.1 3.5 1.4 0.2
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00170_informalexample_6.10_of_section_6.3.2.txt:
--------------------------------------------------------------------------------
1 | # informalexample 6.10 of section 6.3.2
2 | # (informalexample 6.10 of section 6.3.2) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Walking through LIME: a small example
3 |
4 | Sepal.Length Sepal.Width Petal.Length Petal.Width
5 | 5.505938 3.422535 1.3551 0.4259682
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00175_example_6.22_of_section_6.3.4.R:
--------------------------------------------------------------------------------
1 | # example 6.22 of section 6.3.4
2 | # (example 6.22 of section 6.3.4) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Train the text classifier
3 | # Title: Convert the texts and fit the model
4 |
5 | source("lime_imdb_example.R")
6 |
7 | vocab <- create_pruned_vocabulary(texts) # Note: 1
8 | dtm_train <- make_matrix(texts, vocab) # Note: 2
9 | model <- fit_imdb_model(dtm_train, labels) # Note: 3
10 |
11 | # Note 1:
12 | # Create the vocabulary from the training data.
13 |
14 | # Note 2:
15 | # Create the document-term matrix of the training corpus.
16 |
17 | # Note 3:
18 | # Train the model.
19 |
20 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00177_example_6.24_of_section_6.3.5.R:
--------------------------------------------------------------------------------
1 | # example 6.24 of section 6.3.5
2 | # (example 6.24 of section 6.3.5) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Explaining the classifier’s predictions
3 | # Title: Build an explainer for a text classifier
4 |
5 | explainer <- lime(texts, model = model,
6 | preprocess = function(x) make_matrix(x, vocab))
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00179_example_6.26_of_section_6.3.5.R:
--------------------------------------------------------------------------------
1 | # example 6.26 of section 6.3.5
2 | # (example 6.26 of section 6.3.5) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Explaining the classifier’s predictions
3 | # Title: Explain the model's prediction
4 |
5 | explanation <- lime::explain(sample_case,
6 | explainer,
7 | n_labels = 1,
8 | n_features = 5)
9 |
10 | plot_features(explanation)
11 |
12 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00180_informalexample_6.13_of_section_6.3.5.R:
--------------------------------------------------------------------------------
1 | # informalexample 6.13 of section 6.3.5
2 | # (informalexample 6.13 of section 6.3.5) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Explaining the classifier’s predictions
3 |
4 | plot_text_explanations(explanation)
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c06_Choosing_and_evaluating_models/00182_informalexample_6.14_of_section_6.3.5.R:
--------------------------------------------------------------------------------
1 | # informalexample 6.14 of section 6.3.5
2 | # (informalexample 6.14 of section 6.3.5) : Choosing and evaluating models : Local Interpretable Model-Agnostic Explanations (LIME) for explaining model predictions : Explaining the classifier’s predictions
3 |
4 | predict(model, newdata=make_matrix(sample_cases[2], vocab))
5 | ## [1] 0.6052929
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00183_informalexample_7.1_of_section_7.1.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 7.1 of section 7.1.1
2 | # (informalexample 7.1 of section 7.1.1) : Linear and logistic regression : Using linear regression : Understanding linear regression
3 |
4 | pounds_lost = bc0 + b.cals * daily_cals_down
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00184_informalexample_7.2_of_section_7.1.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 7.2 of section 7.1.1
2 | # (informalexample 7.2 of section 7.1.1) : Linear and logistic regression : Using linear regression : Understanding linear regression
3 |
4 | pounds_lost[i] = b0 + b.cals * daily_cals_down[i] + b.exercise * daily_exercise[i]
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00185_equation_7.1_of_section_7.1.1.math:
--------------------------------------------------------------------------------
1 | # equation 7.1 of section 7.1.1
2 | # (equation 7.1 of section 7.1.1) : Linear and logistic regression : Using linear regression : Understanding linear regression
3 | # Title: Equation 7.1 The expression for a linear regression model
4 |
5 | y[i] ~ f(x[i,]) + e[i] = b[0] + b[1] * x[i,1] + ... + b[n] * x[i,n] + e[i]
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00186_informalexample_7.3_of_section_7.1.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 7.3 of section 7.1.1
2 | # (informalexample 7.3 of section 7.1.1) : Linear and logistic regression : Using linear regression : Understanding linear regression
3 |
4 | x[i]^2 nearly equals b[0] + b[1] * x[i]
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00187_informalexample_7.4_of_section_7.1.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 7.4 of section 7.1.1
2 | # (informalexample 7.4 of section 7.1.1) : Linear and logistic regression : Using linear regression : Understanding linear regression
3 |
4 | x[i]^2 nearly equals -22 + 11 * x[i]
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00190_example_7.3_of_section_7.1.3.R:
--------------------------------------------------------------------------------
1 | # example 7.3 of section 7.1.3
2 | # (example 7.3 of section 7.1.3) : Linear and logistic regression : Using linear regression : Making predictions
3 | # Title: Plotting residuals income as a function of predicted log income
4 |
5 | ggplot(data = dtest, aes(x = predLogPINCP,
6 | y = predLogPINCP - log10(PINCP))) +
7 | geom_point(alpha = 0.2, color = "darkgray") +
8 | geom_smooth(color = "darkblue") +
9 | ylab("residual error (prediction - actual)")
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00191_example_7.4_of_section_7.1.3.R:
--------------------------------------------------------------------------------
1 | # example 7.4 of section 7.1.3
2 | # (example 7.4 of section 7.1.3) : Linear and logistic regression : Using linear regression : Making predictions
3 | # Title: Computing R-squared
4 |
5 | rsq <- function(y, f) { 1 - sum((y - f)^2)/sum((y - mean(y))^2) }
6 |
7 | rsq(log10(dtrain$PINCP), dtrain$predLogPINCP) # Note: 1
8 | ## [1] 0.2976165
9 |
10 | rsq(log10(dtest$PINCP), dtest$predLogPINCP) # Note: 2
11 | ## [1] 0.2911965
12 |
13 | # Note 1:
14 | # R-squared of the model on the training data
15 |
16 | # Note 2:
17 | # R-squared of the model on the test data
18 |
19 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00192_example_7.5_of_section_7.1.3.R:
--------------------------------------------------------------------------------
1 | # example 7.5 of section 7.1.3
2 | # (example 7.5 of section 7.1.3) : Linear and logistic regression : Using linear regression : Making predictions
3 | # Title: Calculating root mean square error
4 |
5 | rmse <- function(y, f) { sqrt(mean( (y-f)^2 )) }
6 |
7 | rmse(log10(dtrain$PINCP), dtrain$predLogPINCP) # Note: 1
8 | ## [1] 0.2685855
9 |
10 | rmse(log10(dtest$PINCP), dtest$predLogPINCP) # Note: 2
11 | ## [1] 0.2675129
12 |
13 | # Note 1:
14 | # RMSE of the model on the training data
15 |
16 | # Note 2:
17 | # RMSE of the model on the test data
18 |
19 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00193_informalexample_7.5_of_section_7.1.4.math:
--------------------------------------------------------------------------------
1 | # informalexample 7.5 of section 7.1.4
2 | # (informalexample 7.5 of section 7.1.4) : Linear and logistic regression : Using linear regression : Finding relations and extracting advice
3 |
4 | log10(income_bachelors) = log10(income_no_hs_degree) + 0.36
5 | log10(income_bachelors) - log10(income_no_hs_degree) = 0.36
6 | (income_bachelors) / (income_no_hs_degree) = 10^(0.36)
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00194_informalexample_7.6_of_section_7.1.4.math:
--------------------------------------------------------------------------------
1 | # informalexample 7.6 of section 7.1.4
2 | # (informalexample 7.6 of section 7.1.4) : Linear and logistic regression : Using linear regression : Finding relations and extracting advice
3 |
4 | log10(income_bachelors) - log10(income_no_hs_degree) = 0.36
5 | log10(income_hs) - log10(income_no_hs_degree) = 0.11
6 |
7 | log10(income_bachelors) - log10(income_hs) = 0.36 - 0.11 # Note: 1
8 | (income_bachelors) / (income_hs) = 10^(0.36 - 0.11)
9 |
10 | # Note 1:
11 | # Subtract the second equation from the first
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00195_informalexample_7.7_of_section_7.1.5.txt:
--------------------------------------------------------------------------------
1 | # informalexample 7.7 of section 7.1.5
2 | # (informalexample 7.7 of section 7.1.5) : Linear and logistic regression : Using linear regression : Reading the model summary and characterizing coefficient quality
3 |
4 | Call:
5 | lm(formula = log10(PINCP) ~ AGEP + SEX + COW + SCHL,
6 | data = dtrain)
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00196_informalexample_7.8_of_section_7.1.5.txt:
--------------------------------------------------------------------------------
1 | # informalexample 7.8 of section 7.1.5
2 | # (informalexample 7.8 of section 7.1.5) : Linear and logistic regression : Using linear regression : Reading the model summary and characterizing coefficient quality
3 |
4 | Residuals:
5 | Min 1Q Median 3Q Max
6 | -1.5038 -0.1354 0.0187 0.1710 0.9741
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00197_example_7.6_of_section_7.1.5.R:
--------------------------------------------------------------------------------
1 | # example 7.6 of section 7.1.5
2 | # (example 7.6 of section 7.1.5) : Linear and logistic regression : Using linear regression : Reading the model summary and characterizing coefficient quality
3 | # Title: Summarizing residuals
4 |
5 | ( resids_train <- summary(log10(dtrain$PINCP) - predict(model, newdata = dtrain)) )
6 | ## Min. 1st Qu. Median Mean 3rd Qu. Max.
7 | ## -1.5038 -0.1354 0.0187 0.0000 0.1710 0.9741
8 |
9 | ( resids_test <- summary(log10(dtest$PINCP) - predict(model, newdata = dtest)) )
10 | ## Min. 1st Qu. Median Mean 3rd Qu. Max.
11 | ## -1.789150 -0.130733 0.027413 0.006359 0.175847 0.912646
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00198_informalexample_7.9_of_section_7.1.5.txt:
--------------------------------------------------------------------------------
1 | # informalexample 7.9 of section 7.1.5
2 | # (informalexample 7.9 of section 7.1.5) : Linear and logistic regression : Using linear regression : Reading the model summary and characterizing coefficient quality
3 |
4 | Residual standard error: 0.2688 on 11186 degrees of freedom
5 | Multiple R-squared: 0.2976, Adjusted R-squared: 0.2966
6 | F-statistic: 296.2 on 16 and 11186 DF, p-value: < 2.2e-16
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00199_informalexample_7.10_of_section_7.1.5.R:
--------------------------------------------------------------------------------
1 | # informalexample 7.10 of section 7.1.5
2 | # (informalexample 7.10 of section 7.1.5) : Linear and logistic regression : Using linear regression : Reading the model summary and characterizing coefficient quality
3 |
4 | (df <- nrow(dtrain) - nrow(summary(model)$coefficients) )
5 | ## [1] 11186
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00200_informalexample_7.11_of_section_7.1.5.R:
--------------------------------------------------------------------------------
1 | # informalexample 7.11 of section 7.1.5
2 | # (informalexample 7.11 of section 7.1.5) : Linear and logistic regression : Using linear regression : Reading the model summary and characterizing coefficient quality
3 |
4 | (modelResidualError <- sqrt(sum(residuals(model)^2) / df))
5 | ## [1] 0.2687895
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00201_informalexample_7.12_of_section_7.2.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 7.12 of section 7.2.1
2 | # (informalexample 7.12 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression
3 |
4 | odds[flight_delayed] = P[flight_delayed == TRUE] / P[flight_delayed == FALSE]
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00202_informalexample_7.13_of_section_7.2.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 7.13 of section 7.2.1
2 | # (informalexample 7.13 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression
3 |
4 | log_odds[flight_delayed] = log(P[flight_delayed == TRUE] / P[flight_delayed == FALSE])
5 |
6 | Let: p = P[flight_delayed == TRUE]; then
7 | log_odds[flight_delayed] = log( p / (1 - p) )
8 |
9 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00203_informalexample_7.14_of_section_7.2.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 7.14 of section 7.2.1
2 | # (informalexample 7.14 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression
3 |
4 | logit <- function(p) { log(p/(1-p)) }
5 | s <- function(x) { 1/(1 + exp(-x))}
6 |
7 | s(logit(0.7))
8 | # [1] 0.7
9 |
10 | logit(s(-2))
11 | # -2
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00204_informalexample_7.15_of_section_7.2.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 7.15 of section 7.2.1
2 | # (informalexample 7.15 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression
3 |
4 | logit(P[flight_delayed[i] == TRUE]) = b0 + b_origin * origin[i] + ...
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00205_informalexample_7.16_of_section_7.2.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 7.16 of section 7.2.1
2 | # (informalexample 7.16 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression
3 |
4 | P[flight_delayed[i] == TRUE] = s(b0 + b_origin * origin[i] + ...)
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00206_equation_7.2_of_section_7.2.1.math:
--------------------------------------------------------------------------------
1 | # equation 7.2 of section 7.2.1
2 | # (equation 7.2 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression
3 | # Title: Equation 7.2 The expression for a logistic regression model
4 |
5 | P[y[i] in class of interest] ~ f(x[i,]) = s(a + b[1] * x[i,1] + ... + b[n] * x[i,n])
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00207_example_7.7_of_section_7.2.1.R:
--------------------------------------------------------------------------------
1 | # example 7.7 of section 7.2.1
2 | # (example 7.7 of section 7.2.1) : Linear and logistic regression : Using logistic regression : Understanding logistic regression
3 | # Title: Loading the CDC data
4 |
5 | load("NatalRiskData.rData")
6 | train <- sdata[sdata$ORIGRANDGROUP <= 5 , ]
7 | test <- sdata[sdata$ORIGRANDGROUP > 5, ]
8 |
9 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00208_example_7.8_of_section_7.2.2.R:
--------------------------------------------------------------------------------
1 | # example 7.8 of section 7.2.2
2 | # (example 7.8 of section 7.2.2) : Linear and logistic regression : Using logistic regression : Building a logistic regression model
3 | # Title: Building the model formula
4 |
5 | complications <- c("ULD_MECO","ULD_PRECIP","ULD_BREECH")
6 | riskfactors <- c("URF_DIAB", "URF_CHYPER", "URF_PHYPER",
7 | "URF_ECLAM")
8 | y <- "atRisk"
9 | x <- c("PWGT",
10 | "UPREVIS",
11 | "CIG_REC",
12 | "GESTREC3",
13 | "DPLURAL",
14 | complications,
15 | riskfactors)
16 | library(wrapr)
17 | fmla <- mk_formula(y, x)
18 |
19 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00209_example_7.9_of_section_7.2.2.R:
--------------------------------------------------------------------------------
1 | # example 7.9 of section 7.2.2
2 | # (example 7.9 of section 7.2.2) : Linear and logistic regression : Using logistic regression : Building a logistic regression model
3 | # Title: Fitting the logistic regression model
4 |
5 | print(fmla)
6 |
7 | ## atRisk ~ PWGT + UPREVIS + CIG_REC + GESTREC3 + DPLURAL + ULD_MECO +
8 | ## ULD_PRECIP + ULD_BREECH + URF_DIAB + URF_CHYPER + URF_PHYPER +
9 | ## URF_ECLAM
10 | ##
11 |
12 | model <- glm(fmla, data = train, family = binomial(link = "logit"))
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00210_example_7.10_of_section_7.2.3.R:
--------------------------------------------------------------------------------
1 | # example 7.10 of section 7.2.3
2 | # (example 7.10 of section 7.2.3) : Linear and logistic regression : Using logistic regression : Making predictions
3 | # Title: Applying the logistic regression model
4 |
5 | train$pred <- predict(model, newdata=train, type = "response")
6 | test$pred <- predict(model, newdata=test, type="response")
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00212_example_7.12_of_section_7.2.3.R:
--------------------------------------------------------------------------------
1 | # example 7.12 of section 7.2.3
2 | # (example 7.12 of section 7.2.3) : Linear and logistic regression : Using logistic regression : Making predictions
3 | # Title: Plotting distribution of prediction score grouped by known outcome
4 |
5 | library(WVPlots)
6 | DoubleDensityPlot(train, "pred", "atRisk",
7 | title = "Distribution of natality risk scores")
8 |
9 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00216_informalexample_7.17_of_section_7.2.4.math:
--------------------------------------------------------------------------------
1 | # informalexample 7.17 of section 7.2.4
2 | # (informalexample 7.17 of section 7.2.4) : Linear and logistic regression : Using logistic regression : Finding relations and extracting advice from logistic models
3 |
4 | p = odds * (1 - p) = odds - p * odds
5 | p * (1 + odds) = odds
6 | p = odds/(1 + odds)
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00218_informalexample_7.18_of_section_7.2.5.text:
--------------------------------------------------------------------------------
1 | # informalexample 7.18 of section 7.2.5
2 | # (informalexample 7.18 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients
3 |
4 | Call:
5 | glm(formula = fmla, family = binomial(link = "logit"), data = train)
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00219_informalexample_7.19_of_section_7.2.5.text:
--------------------------------------------------------------------------------
1 | # informalexample 7.19 of section 7.2.5
2 | # (informalexample 7.19 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients
3 |
4 | Deviance Residuals:
5 | Min 1Q Median 3Q Max
6 | -0.9732 -0.1818 -0.1511 -0.1358 3.2641
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00221_informalexample_7.21_of_section_7.2.5.text:
--------------------------------------------------------------------------------
1 | # informalexample 7.21 of section 7.2.5
2 | # (informalexample 7.21 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients
3 |
4 | Null deviance: 2698.7 on 14211 degrees of freedom
5 | Residual deviance: 2463.0 on 14198 degrees of freedom
6 | AIC: 2491
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00223_example_7.18_of_section_7.2.5.R:
--------------------------------------------------------------------------------
1 | # example 7.18 of section 7.2.5
2 | # (example 7.18 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients
3 | # Title: Calculating the pseudo R-squared
4 |
5 | pr2 <- 1 - (resid.dev / null.dev)
6 |
7 | print(pr2)
8 | ## [1] 0.08734674
9 | pr2.test <- 1 - (resid.dev.test / null.dev.test)
10 | print(pr2.test)
11 | ## [1] 0.07760427
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00224_informalexample_7.22_of_section_7.2.5.text:
--------------------------------------------------------------------------------
1 | # informalexample 7.22 of section 7.2.5
2 | # (informalexample 7.22 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients
3 |
4 | df.null = dim(train)[[1]] - 1
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00225_informalexample_7.23_of_section_7.2.5.text:
--------------------------------------------------------------------------------
1 | # informalexample 7.23 of section 7.2.5
2 | # (informalexample 7.23 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients
3 |
4 | df.model = dim(train)[[1]] - length(model$coefficients)
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00227_example_7.20_of_section_7.2.5.R:
--------------------------------------------------------------------------------
1 | # example 7.20 of section 7.2.5
2 | # (example 7.20 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients
3 | # Title: Calculating the Akaike information criterion
4 |
5 | aic <- 2 * (length(model$coefficients) -
6 | loglikelihood(as.numeric(train$atRisk), pred))
7 | aic
8 | ## [1] 2490.992
9 |
10 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00228_informalexample_7.24_of_section_7.2.5.text:
--------------------------------------------------------------------------------
1 | # informalexample 7.24 of section 7.2.5
2 | # (informalexample 7.24 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients
3 |
4 | Number of Fisher Scoring iterations: 7
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00229_informalexample_7.25_of_section_7.2.5.text:
--------------------------------------------------------------------------------
1 | # informalexample 7.25 of section 7.2.5
2 | # (informalexample 7.25 of section 7.2.5) : Linear and logistic regression : Using logistic regression : Reading the model summary and characterizing coefficients
3 |
4 | Warning message:
5 | glm.fit: fitted probabilities numerically 0 or 1 occurred
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00231_example_7.22_of_section_7.3.1.R:
--------------------------------------------------------------------------------
1 | # example 7.22 of section 7.3.1
2 | # (example 7.22 of section 7.3.1) : Linear and logistic regression : Regularization : An example of quasi-separation
3 | # Title: Fitting a logistic regression model
4 |
5 | library(wrapr)
6 | (fmla <- mk_formula(outcome, vars) )
7 |
8 | ## fail ~ car_price + maint_price + doors + persons + lug_boot +
9 | ## safety
10 | ##
11 |
12 | model_glm <- glm(fmla,
13 | data = cars_train,
14 | family = binomial)
15 |
16 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00232_informalexample_7.26_of_section_7.3.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 7.26 of section 7.3.1
2 | # (informalexample 7.26 of section 7.3.1) : Linear and logistic regression : Regularization : An example of quasi-separation
3 |
4 | ## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00234_example_7.24_of_section_7.3.1.R:
--------------------------------------------------------------------------------
1 | # example 7.24 of section 7.3.1
2 | # (example 7.24 of section 7.3.1) : Linear and logistic regression : Regularization : An example of quasi-separation
3 | # Title: Looking at the logistic model’s coefficients
4 |
5 | coefs <- coef(model_glm)[-1] # Note: 1
6 | coef_frame <- data.frame(coef = names(coefs),
7 | value = coefs)
8 |
9 | library(ggplot2)
10 | ggplot(coef_frame, aes(x = coef, y = value)) +
11 | geom_pointrange(aes(ymin = 0, ymax = value)) +
12 | ggtitle("Coefficients of logistic regression model") +
13 | coord_flip()
14 |
15 | # Note 1:
16 | # Get the coefficients (except the intercept)
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00236_informalexample_7.27_of_section_7.3.2.text:
--------------------------------------------------------------------------------
1 | # informalexample 7.27 of section 7.3.2
2 | # (informalexample 7.27 of section 7.3.2) : Linear and logistic regression : Regularization : The types of regularized regression
3 |
4 | f(x[i,]) = b[0] + b[1] x[i,1] + ... b[n] x[i,n]
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00237_informalexample_7.28_of_section_7.3.2.text:
--------------------------------------------------------------------------------
1 | # informalexample 7.28 of section 7.3.2
2 | # (informalexample 7.28 of section 7.3.2) : Linear and logistic regression : Regularization : The types of regularized regression
3 |
4 | (y - f(x))^2 + lambda * (b[1]^2 + ... + b[n]^2)
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00238_informalexample_7.29_of_section_7.3.2.text:
--------------------------------------------------------------------------------
1 | # informalexample 7.29 of section 7.3.2
2 | # (informalexample 7.29 of section 7.3.2) : Linear and logistic regression : Regularization : The types of regularized regression
3 |
4 | (y - f(x))^2 + lambda * ( abs(b[1]) + abs(b[2]) + .... abs(b[n]) )
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00239_informalexample_7.30_of_section_7.3.2.text:
--------------------------------------------------------------------------------
1 | # informalexample 7.30 of section 7.3.2
2 | # (informalexample 7.30 of section 7.3.2) : Linear and logistic regression : Regularization : The types of regularized regression
3 |
4 | (1 - alpha) * (b[1]^2 + ... + b[n]^2) +
5 | alpha * ( abs(b[1]) + abs(b[2]) + .... abs(b[n]) )
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00243_informalexample_7.31_of_section_7.3.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 7.31 of section 7.3.3
2 | # (informalexample 7.31 of section 7.3.3) : Linear and logistic regression : Regularization : Regularized regression with glmnet
3 |
4 | prediction <- predict(model_ridge,
5 | newdata = cars_test,
6 | type="response",
7 | s = model_ridge$lambda.min)
8 |
9 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00245_example_7.30_of_section_7.3.3.R:
--------------------------------------------------------------------------------
1 | # example 7.30 of section 7.3.3
2 | # (example 7.30 of section 7.3.3) : Linear and logistic regression : Regularization : Regularized regression with glmnet
3 | # Title: The lasso model's test performance
4 |
5 | ### $confusion_matrix
6 | ## prediction
7 | ## truth passed unacceptable
8 | ## passed 150 9
9 | ## unacceptable 17 323
10 | ##
11 | ## $accuracy
12 | ## [1] 0.9478958
13 | ##
14 | ## $deviance
15 | ## [1] 112.7308
16 |
17 |
--------------------------------------------------------------------------------
/CodeExamples/c07_Linear_and_logistic_regression/00246_example_7.31_of_section_7.3.3.R:
--------------------------------------------------------------------------------
1 | # example 7.31 of section 7.3.3
2 | # (example 7.31 of section 7.3.3) : Linear and logistic regression : Regularization : Regularized regression with glmnet
3 | # Title: Crossvalidating for both alpha and lambda
4 |
5 | (elastic_net <- cva.glmnet(fmla,
6 | cars_train,
7 | family = "binomial"))
8 | ## Call:
9 | ## cva.glmnet.formula(formula = fmla, data = cars_train, family = "binomial")
10 | ##
11 | ## Model fitting options:
12 | ## Sparse model matrix: FALSE
13 | ## Use model.frame: FALSE
14 | ## Alpha values: 0 0.001 0.008 0.027 0.064 0.125 0.216 0.343 0.512 0.729 1
15 | ## Number of crossvalidation folds for lambda: 10
16 |
17 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00250_informalexample_8.1_of_section_8.2.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.1 of section 8.2.1
2 | # (informalexample 8.1 of section 8.2.1) : Advanced data preparation : KDD and KDD Cup 2009 : Getting started with KDD Cup 2009 data
3 |
4 | outcome_summary <- table(
5 | churn = dTrain[, outcome], # Note: 1
6 | useNA = 'ifany') # Note: 2
7 |
8 | knitr::kable(outcome_summary)
9 |
10 | # Note 1:
11 | # Tabulate levels of churn outcome.
12 |
13 | # Note 2:
14 | # Include NA values in tabulation.
15 |
16 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00251_informalexample_8.2_of_section_8.2.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.2 of section 8.2.1
2 | # (informalexample 8.2 of section 8.2.1) : Advanced data preparation : KDD and KDD Cup 2009 : Getting started with KDD Cup 2009 data
3 |
4 | outcome_summary["1"] / sum(outcome_summary) # Note: 1
5 | # 1
6 | # 0.07347764
7 |
8 | # Note 1:
9 | # Estimate observed churn rate or prevalence.
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00254_informalexample_8.3_of_section_8.2.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.3 of section 8.2.2
2 | # (informalexample 8.3 of section 8.2.2) : Advanced data preparation : KDD and KDD Cup 2009 : The bull in the china shop approach
3 |
4 | head(dTrainAll$Var200)
5 | # [1] vynJTq9 0v21jmy
6 | # 15415 Levels: _84etK_ _9bTOWp _A3VKFm _bq4Nkb _ct4nkXBMp ... zzQ9udm
7 |
8 | length(unique(dTrainAll$Var200))
9 | # [1] 14391
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00256_example_8.5_of_section_8.3.R:
--------------------------------------------------------------------------------
1 | # example 8.5 of section 8.3
2 | # (example 8.5 of section 8.3) : Advanced data preparation : Basic data preparation for classification
3 | # Title: Preparing data with vtreat
4 |
5 | dTrain_treated <- prepare(treatment_plan,
6 | dTrain,
7 | parallelCluster = parallel_cluster)
8 |
9 | head(colnames(dTrain))
10 | ## [1] "Var1" "Var2" "Var3" "Var4" "Var5" "Var6"
11 | head(colnames(dTrain_treated)) # Note: 1
12 | ## [1] "Var1" "Var1_isBAD" "Var2" "Var2_isBAD" "Var3"
13 | ## [6] "Var3_isBAD"
14 |
15 | # Note 1:
16 | # Compare the columns of the original dTrain data to its treated counterpart.
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00259_informalexample_8.6_of_section_8.3.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.6 of section 8.3.1
2 | # (informalexample 8.6 of section 8.3.1) : Advanced data preparation : Basic data preparation for classification : The variable score frame
3 |
4 | comparison <- data.frame(original218 = dTrain$Var218,
5 | impact218 = dTrain_treated$Var218_catB)
6 |
7 | head(comparison)
8 | ## original218 impact218
9 | ## 1 cJvF -0.2180735
10 | ## 2 1.5155125
11 | ## 3 UYBR 0.1221393
12 | ## 4 UYBR 0.1221393
13 | ## 5 UYBR 0.1221393
14 | ## 6 UYBR 0.1221393
15 |
16 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00261_informalexample_8.8_of_section_8.3.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.8 of section 8.3.1
2 | # (informalexample 8.8 of section 8.3.1) : Advanced data preparation : Basic data preparation for classification : The variable score frame
3 |
4 | score_frame[score_frame$origName == "Var200", , drop = FALSE]
5 |
6 | # varName varMoves rsq sig needsSplit extraModelDegrees origName code
7 | # 361 Var200_catP TRUE 0.005729835 4.902546e-28 TRUE 13323 Var200 catP
8 | # 362 Var200_catB TRUE 0.001476298 2.516703e-08 TRUE 13323 Var200 catB
9 | # 428 Var200_lev_NA TRUE 0.005729838 4.902365e-28 FALSE 0 Var200 lev
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00262_informalexample_8.9_of_section_8.3.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.9 of section 8.3.2
2 | # (informalexample 8.9 of section 8.3.2) : Advanced data preparation : Basic data preparation for classification : Properly using the treatment plan
3 |
4 | dCal_treated <- prepare(treatment_plan,
5 | dCal,
6 | parallelCluster = parallel_cluster)
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00263_informalexample_8.10_of_section_8.3.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.10 of section 8.3.2
2 | # (informalexample 8.10 of section 8.3.2) : Advanced data preparation : Basic data preparation for classification : Properly using the treatment plan
3 |
4 | library("sigr")
5 |
6 | calcAUC(dTrain_treated$Var200_catB, dTrain_treated$churn)
7 |
8 | # [1] 0.8279249
9 |
10 | calcAUC(dCal_treated$Var200_catB, dCal_treated$churn)
11 |
12 | # [1] 0.5505401
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00265_informalexample_8.11_of_section_8.4.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.11 of section 8.4.1
2 | # (informalexample 8.11 of section 8.4.1) : Advanced data preparation : Advanced data preparation for classification : Using mkCrossFrameCExperiment()
3 |
4 | library("sigr")
5 |
6 | calcAUC(dTrainAll_treated$Var200_catB, dTrainAll_treated$churn)
7 |
8 | # [1] 0.5450466
9 |
10 | calcAUC(dTest_treated$Var200_catB, dTest_treated$churn)
11 |
12 | # [1] 0.5290295
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00269_informalexample_8.14_of_section_8.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.14 of section 8.4.2
2 | # (informalexample 8.14 of section 8.4.2) : Advanced data preparation : Advanced data preparation for classification : Building a model
3 |
4 | table(prediction = dTest_treated$glm_pred >= 0.5,
5 | truth = dTest$churn)
6 | # truth
7 | # prediction -1 1
8 | # FALSE 4591 375
9 | # TRUE 8 1
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00270_informalexample_8.15_of_section_8.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.15 of section 8.4.2
2 | # (informalexample 8.15 of section 8.4.2) : Advanced data preparation : Advanced data preparation for classification : Building a model
3 |
4 | table(prediction = dTest_treated$glm_pred>0.15,
5 | truth = dTest$churn)
6 | # truth
7 | # prediction -1 1
8 | # FALSE 4243 266
9 | # TRUE 356 110
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00271_informalexample_8.16_of_section_8.4.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.16 of section 8.4.2
2 | # (informalexample 8.16 of section 8.4.2) : Advanced data preparation : Advanced data preparation for classification : Building a model
3 |
4 | WVPlots::DoubleDensityPlot(dTest_treated, "glm_pred", "churn",
5 | "glm prediction on test, double density plot")
6 |
7 | WVPlots::PRTPlot(dTest_treated, "glm_pred", "churn",
8 | "glm prediction on test, enrichment plot",
9 | truthTarget = 1,
10 | plotvars = c("enrichment", "recall"),
11 | thresholdrange = c(0, 1.0))
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00272_informalexample_8.17_of_section_8.5.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.17 of section 8.5
2 | # (informalexample 8.17 of section 8.5) : Advanced data preparation : Preparing data for regression modeling
3 |
4 | auto_mpg <- readRDS('auto_mpg.RDS')
5 |
6 | knitr::kable(head(auto_mpg)) # Note: 1
7 |
8 | # Note 1:
9 | # Take a quick look at the data.
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00275_informalexample_8.20_of_section_8.6.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.20 of section 8.6.2
2 | # (informalexample 8.20 of section 8.6.2) : Advanced data preparation : Mastering the vtreat package : Missing values
3 |
4 | library("wrapr") # Note: 1
5 |
6 | d <- build_frame(
7 | "x1" , "x2" , "x3", "y" |
8 | 1 , "a" , 6 , 10 |
9 | NA_real_, "b" , 7 , 20 |
10 | 3 , NA_character_, 8 , 30 )
11 |
12 | knitr::kable(d)
13 |
14 | # Note 1:
15 | # Bring in the wrapr package for build_frame and the
16 | # wrapr “dot pipe”.
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00276_informalexample_8.21_of_section_8.6.2.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.21 of section 8.6.2
2 | # (informalexample 8.21 of section 8.6.2) : Advanced data preparation : Mastering the vtreat package : Missing values
3 |
4 | plan1 <- vtreat::design_missingness_treatment(d)
5 | vtreat::prepare(plan1, d) %.>% # Note: 1
6 | knitr::kable(.)
7 |
8 | # Note 1:
9 | # Here we are using wrapr’s dot pipe instead of
10 | # magrittr’s forward pipe. The dot pipe requires the
11 | # “explicit dot argument” notation discussed in
12 | # chapter 5.
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00279_informalexample_8.24_of_section_8.6.4.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.24 of section 8.6.4
2 | # (informalexample 8.24 of section 8.6.4) : Advanced data preparation : Mastering the vtreat package : Impact coding
3 |
4 | plan4 <- vtreat::designTreatmentsC(d,
5 | varlist = c("x1", "x2", "x3"),
6 | outcomename = "y",
7 | outcometarget = 20,
8 | codeRestriction = "catB",
9 | verbose = FALSE)
10 | vtreat::prepare(plan4, d)
11 | # x2_catB y
12 | # 1 -8.517343 10
13 | # 2 9.903538 20
14 | # 3 -8.517343 30
15 |
16 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00280_informalexample_8.25_of_section_8.6.5.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.25 of section 8.6.5
2 | # (informalexample 8.25 of section 8.6.5) : Advanced data preparation : Mastering the vtreat package : The treatment plan
3 |
4 | class(plan4)
5 | # [1] "treatmentplan"
6 |
7 | names(plan4)
8 |
9 | # [1] "treatments" "scoreFrame" "outcomename" "vtreatVersion" "outcomeType"
10 | # [6] "outcomeTarget" "meanY" "splitmethod"
11 |
12 |
--------------------------------------------------------------------------------
/CodeExamples/c08_Advanced_data_preparation/00281_informalexample_8.26_of_section_8.6.5.R:
--------------------------------------------------------------------------------
1 | # informalexample 8.26 of section 8.6.5
2 | # (informalexample 8.26 of section 8.6.5) : Advanced data preparation : Mastering the vtreat package : The treatment plan
3 |
4 | plan4$scoreFrame
5 |
6 | # varName varMoves rsq sig needsSplit extraModelDegrees origName code
7 | # 1 x2_catB TRUE 1 0.0506719 TRUE 2 x2 catB
8 |
9 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00285_informalexample_9.1_of_section_9.1.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 9.1 of section 9.1.1
2 | # (informalexample 9.1 of section 9.1.1) : Unsupervised methods : Cluster analysis : Distances
3 |
4 | edist(x, y) <- sqrt((x[1] - y[1])^2 + (x[2] - y[2])^2 + ...)
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00286_informalexample_9.2_of_section_9.1.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 9.2 of section 9.1.1
2 | # (informalexample 9.2 of section 9.1.1) : Unsupervised methods : Cluster analysis : Distances
3 |
4 | hdist(x, y) <- sum((x[1] != y[1]) + (x[2] != y[2]) + ...)
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00287_informalexample_9.3_of_section_9.1.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 9.3 of section 9.1.1
2 | # (informalexample 9.3 of section 9.1.1) : Unsupervised methods : Cluster analysis : Distances
3 |
4 | mdist(x, y) <- sum(abs(x[1] - y[1]) + abs(x[2] - y[2]) + ...)
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00288_informalexample_9.4_of_section_9.1.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 9.4 of section 9.1.1
2 | # (informalexample 9.4 of section 9.1.1) : Unsupervised methods : Cluster analysis : Distances
3 |
4 | dot(x, y) <- sum(x[1] * y[1] + x[2] * y[2] + ...)
5 | cossim(x, y) <- dot(x, y) / (sqrt(dot(x,x ) * dot(y, y)))
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00291_example_9.3_of_section_9.1.3.R:
--------------------------------------------------------------------------------
1 | # example 9.3 of section 9.1.3
2 | # (example 9.3 of section 9.1.3) : Unsupervised methods : Cluster analysis : Hierarchical clustering with hclust
3 | # Title: Hierarchical clustering
4 |
5 | distmat <- dist(pmatrix, method = "euclidean") # Note: 1
6 | pfit <- hclust(distmat, method = "ward.D") # Note: 2
7 | plot(pfit, labels = protein$Country) # Note: 3
8 |
9 | # Note 1:
10 | # Create the distance matrix.
11 |
12 | # Note 2:
13 | # Do the clustering.
14 |
15 | # Note 3:
16 | # Plot the dendrogram.
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00292_informalexample_9.5_of_section_9.1.3.Rtxt:
--------------------------------------------------------------------------------
1 | # informalexample 9.5 of section 9.1.3
2 | # (informalexample 9.5 of section 9.1.3) : Unsupervised methods : Cluster analysis : Hierarchical clustering with hclust
3 |
4 | rect.hclust(pfit, k=5)
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00298_informalexample_9.6_of_section_9.1.3.math:
--------------------------------------------------------------------------------
1 | # informalexample 9.6 of section 9.1.3
2 | # (informalexample 9.6 of section 9.1.3) : Unsupervised methods : Cluster analysis : Hierarchical clustering with hclust
3 |
4 | BSS = TSS - WSS
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00300_informalexample_9.7_of_section_9.1.3.math:
--------------------------------------------------------------------------------
1 | # informalexample 9.7 of section 9.1.3
2 | # (informalexample 9.7 of section 9.1.3) : Unsupervised methods : Cluster analysis : Hierarchical clustering with hclust
3 |
4 | W = WSS / (n - k)
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00301_informalexample_9.8_of_section_9.1.3.math:
--------------------------------------------------------------------------------
1 | # informalexample 9.8 of section 9.1.3
2 | # (informalexample 9.8 of section 9.1.3) : Unsupervised methods : Cluster analysis : Hierarchical clustering with hclust
3 |
4 | B = BSS / (k - 1)
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00306_example_9.14_of_section_9.1.5.R:
--------------------------------------------------------------------------------
1 | # example 9.14 of section 9.1.5
2 | # (example 9.14 of section 9.1.5) : Unsupervised methods : Cluster analysis : Assigning new points to clusters
3 | # Title: A function to assign points to a cluster
4 |
5 | assign_cluster <- function(newpt, centers, xcenter = 0, xscale = 1) {
6 | xpt <- (newpt - xcenter) / xscale # Note: 1
7 | dists <- apply(centers, 1, FUN = function(c0) { sqr_edist(c0, xpt) }) # Note: 2
8 | which.min(dists) # Note: 3
9 | }
10 |
11 | # Note 1:
12 | # Center and scale the new data point.
13 |
14 | # Note 2:
15 | # Calculate how far the new data point is from
16 | # each of the cluster centers.
17 |
18 | # Note 3:
19 | # Return the cluster number of the closest
20 | # centroid.
21 |
22 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00308_example_9.16_of_section_9.1.5.R:
--------------------------------------------------------------------------------
1 | # example 9.16 of section 9.1.5
2 | # (example 9.16 of section 9.1.5) : Unsupervised methods : Cluster analysis : Assigning new points to clusters
3 | # Title: Unscale the centers
4 |
5 | unscaled = scale(tclusters$centers, center = FALSE, scale = 1 / tscale)
6 | rm_scales(scale(unscaled, center = -tcenter, scale = FALSE))
7 |
8 | ## [,1] [,2] [,3]
9 | ## 1 9.8234797 -3.005977 4.7662651
10 | ## 2 -4.9749654 -4.862436 -5.0577002
11 | ## 3 0.8926698 1.185734 0.8336977
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00310_informalexample_9.9_of_section_9.2.2.txt:
--------------------------------------------------------------------------------
1 | # informalexample 9.9 of section 9.2.2
2 | # (informalexample 9.9 of section 9.2.2) : Unsupervised methods : Association rules : The example problem
3 |
4 | |token | userid| rating|title |
5 | |:---------------------|------:|------:|:---------------------|
6 | |always have popsicles | 172742| 0|Always Have Popsicles |
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00313_informalexample_9.10_of_section_9.2.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 9.10 of section 9.2.3
2 | # (informalexample 9.10 of section 9.2.3) : Unsupervised methods : Association rules : Mining association rules with the arules package
3 |
4 | basketSizes <- size(bookbaskets)
5 | summary(basketSizes)
6 | ## Min. 1st Qu. Median Mean 3rd Qu. Max.
7 | ## 1.0 1.0 1.0 11.1 4.0 10250.0
8 |
9 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00314_example_9.20_of_section_9.2.3.R:
--------------------------------------------------------------------------------
1 | # example 9.20 of section 9.2.3
2 | # (example 9.20 of section 9.2.3) : Unsupervised methods : Association rules : Mining association rules with the arules package
3 | # Title: Examining the size distribution
4 |
5 | quantile(basketSizes, probs = seq(0, 1, 0.1)) # Note: 1
6 | ## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
7 | ## 1 1 1 1 1 1 2 3 5 13 10253
8 | library(ggplot2) # Note: 2
9 | ggplot(data.frame(count = basketSizes)) +
10 | geom_density(aes(x = count)) +
11 | scale_x_log10()
12 |
13 | # Note 1:
14 | # Look at the basket size distribution, in 10%
15 | # increments.
16 |
17 | # Note 2:
18 | # Plot the distribution to get a better
19 | # look.
20 |
21 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00315_example_9.21_of_section_9.2.3.R:
--------------------------------------------------------------------------------
1 | # example 9.21 of section 9.2.3
2 | # (example 9.21 of section 9.2.3) : Unsupervised methods : Association rules : Mining association rules with the arules package
3 | # Title: Count how often each book occurs
4 |
5 | bookCount <- itemFrequency(bookbaskets, "absolute")
6 | summary(bookCount)
7 |
8 | ## Min. 1st Qu. Median Mean 3rd Qu. Max.
9 | ## 1.000 1.000 1.000 4.638 3.000 2502.000
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00317_informalexample_9.11_of_section_9.2.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 9.11 of section 9.2.3
2 | # (informalexample 9.11 of section 9.2.3) : Unsupervised methods : Association rules : Mining association rules with the arules package
3 |
4 | bookbaskets_use <- bookbaskets[basketSizes > 1]
5 | dim(bookbaskets_use)
6 | ## [1] 40822 220447
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c09_Unsupervised_methods/00320_example_9.25_of_section_9.2.3.R:
--------------------------------------------------------------------------------
1 | # example 9.25 of section 9.2.3
2 | # (example 9.25 of section 9.2.3) : Unsupervised methods : Association rules : Mining association rules with the arules package
3 | # Title: Get the five most confident rules
4 |
5 | library(magrittr) # Note: 1
6 |
7 | rules %>%
8 | sort(., by = "confidence") %>% # Note: 2
9 | head(., n = 5) %>% # Note: 3
10 | inspect(.) # Note: 4
11 |
12 | # Note 1:
13 | # Attach magrittr to get pipe notation.
14 |
15 | # Note 2:
16 | # Sort rules by confidence.
17 |
18 | # Note 3:
19 | # Get the first 5 rules.
20 |
21 | # Note 4:
22 | # Call inspect() to pretty-print the rules.
23 |
24 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00328_informalexample_10.2_of_section_10.1.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 10.2 of section 10.1.3
2 | # (informalexample 10.2 of section 10.1.3) : Exploring advanced methods : Tree-based methods : Using random forests to further improve prediction
3 |
4 | trainf <- rbind(trainperf_tree, trainperf_bag, trainperf_rf)
5 | pandoc.table(trainf, justify = perf_justify)
6 | ##
7 | ##
8 | ## model accuracy f1 dev.norm
9 | ## ---------------------- ---------- -------- ----------
10 | ## tree, training 0.8996 0.8691 0.6304
11 | ## bagging, training 0.9160 0.8906 0.5106
12 | ## random forest, train 0.9884 0.9852 0.1440
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00329_informalexample_10.3_of_section_10.1.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 10.3 of section 10.1.3
2 | # (informalexample 10.3 of section 10.1.3) : Exploring advanced methods : Tree-based methods : Using random forests to further improve prediction
3 |
4 | testf <- rbind(testperf_tree, testperf_bag, testperf_rf)
5 | pandoc.table(testf, justify = perf_justify)
6 | ##
7 | ##
8 | ## model accuracy f1 dev.norm
9 | ## --------------------- ---------- -------- ----------
10 | ## tree, test 0.8712 0.8280 0.7531
11 | ## bagging, test 0.9105 0.8791 0.5834
12 | ## random forest, test 0.9498 0.9341 0.3011
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00330_informalexample_10.4_of_section_10.1.3.R:
--------------------------------------------------------------------------------
1 | # informalexample 10.4 of section 10.1.3
2 | # (informalexample 10.4 of section 10.1.3) : Exploring advanced methods : Tree-based methods : Using random forests to further improve prediction
3 |
4 | difff <- data.frame(model = c("tree", "bagging", "random forest"),
5 | accuracy = trainf$accuracy - testf$accuracy,
6 | f1 = trainf$f1 - testf$f1,
7 | dev.norm = trainf$dev.norm - testf$dev.norm)
8 |
9 | pandoc.table(difff, justify=perf_justify)
10 |
11 | ##
12 | ##
13 | ## model accuracy f1 dev.norm
14 | ## --------------- ---------- --------- ----------
15 | ## tree 0.028411 0.04111 -0.12275
16 | ## bagging 0.005523 0.01158 -0.07284
17 | ## random forest 0.038633 0.05110 -0.15711
18 |
19 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00336_informalexample_10.5_of_section_10.1.4.R:
--------------------------------------------------------------------------------
1 | # informalexample 10.5 of section 10.1.4
2 | # (informalexample 10.5 of section 10.1.4) : Exploring advanced methods : Tree-based methods : Gradient-boosted trees
3 |
4 | library(zeallot)
5 | c(texts, labels) %<-% readRDS("IMDBtrain.RDS")
6 |
7 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00337_informalexample_10.6_of_section_10.1.4.R:
--------------------------------------------------------------------------------
1 | # informalexample 10.6 of section 10.1.4
2 | # (informalexample 10.6 of section 10.1.4) : Exploring advanced methods : Tree-based methods : Gradient-boosted trees
3 |
4 | source("lime_imdb_example.R")
5 | vocab <- create_pruned_vocabulary(texts)
6 | dtm_train <- make_matrix(texts, vocab)
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00338_informalexample_10.7_of_section_10.1.4.R:
--------------------------------------------------------------------------------
1 | # informalexample 10.7 of section 10.1.4
2 | # (informalexample 10.7 of section 10.1.4) : Exploring advanced methods : Tree-based methods : Gradient-boosted trees
3 |
4 | cv <- xgb.cv(dtm_train,
5 | label = labels,
6 | params = list(
7 | objective = "binary:logistic"
8 | ),
9 | nfold = 5,
10 | nrounds = 500,
11 | early_stopping_rounds = 20, # Note: 1
12 | print_every_n = 10,
13 | metrics = "logloss")
14 |
15 | evalframe <- as.data.frame(cv$evaluation_log)
16 | (NROUNDS <- which.min(evalframe$test_logloss_mean))
17 | ## [1] 319
18 |
19 | # Note 1:
20 | # Stop early if performance doesn’t improve for
21 | # 20 rounds.
22 |
23 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00342_example_10.11_of_section_10.1.4.R:
--------------------------------------------------------------------------------
1 | # example 10.11 of section 10.1.4
2 | # (example 10.11 of section 10.1.4) : Exploring advanced methods : Tree-based methods : Gradient-boosted trees
3 | # Title: Fit and apply an xgboost model for birth weight
4 |
5 | birthwt_model <- xgboost(as.matrix(train_treated),
6 | train$DBWT,
7 | params = list(
8 | objective = "reg:linear",
9 | base_score = mean(train$DBWT)
10 | ),
11 | nrounds = 50,
12 | verbose = FALSE)
13 |
14 | test_treated <- prepare(treatplan, test)
15 | pred <- predict(birthwt_model, as.matrix(test_treated))
16 |
17 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00343_informalexample_10.9_of_section_10.2.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 10.9 of section 10.2.1
2 | # (informalexample 10.9 of section 10.2.1) : Exploring advanced methods : Using generalized additive models (GAMs) to learn non-monotone relationships : Understanding GAMs
3 |
4 | f(x[i, ]) = b0 + b[1] * x[i, 1] + b[2] * x[i, 2] + ... b[n] * x[i, n]
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00344_informalexample_10.10_of_section_10.2.1.math:
--------------------------------------------------------------------------------
1 | # informalexample 10.10 of section 10.2.1
2 | # (informalexample 10.10 of section 10.2.1) : Exploring advanced methods : Using generalized additive models (GAMs) to learn non-monotone relationships : Understanding GAMs
3 |
4 | f(x[i,]) = a0 + s_1(x[i, 1]) + s_2(x[i, 2]) + ... s_n(x[i, n])
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00345_example_10.12_of_section_10.2.2.R:
--------------------------------------------------------------------------------
1 | # example 10.12 of section 10.2.2
2 | # (example 10.12 of section 10.2.2) : Exploring advanced methods : Using generalized additive models (GAMs) to learn non-monotone relationships : A one-dimensional regression example
3 | # Title: Preparing an artificial problem
4 |
5 | set.seed(602957)
6 |
7 | x <- rnorm(1000)
8 | noise <- rnorm(1000, sd = 1.5)
9 |
10 | y <- 3 * sin(2 * x) + cos(0.75 * x) - 1.5 * (x^2) + noise
11 |
12 | select <- runif(1000)
13 | frame <- data.frame(y = y, x = x)
14 |
15 | train <- frame[select > 0.1, ]
16 | test <-frame[select <= 0.1, ]
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00349_example_10.16_of_section_10.2.3.R:
--------------------------------------------------------------------------------
1 | # example 10.16 of section 10.2.3
2 | # (example 10.16 of section 10.2.3) : Exploring advanced methods : Using generalized additive models (GAMs) to learn non-monotone relationships : Extracting the non-linear relationships
3 | # Title: Extracting a learned spline from a GAM
4 |
5 | sx <- predict(gam_model, type = "terms")
6 | summary(sx)
7 | ## s(x)
8 | ## Min. :-17.527035
9 | ## 1st Qu.: -2.378636
10 | ## Median : 0.009427
11 | ## Mean : 0.000000
12 | ## 3rd Qu.: 2.869166
13 | ## Max. : 4.084999
14 |
15 | xframe <- cbind(train, sx = sx[,1])
16 |
17 | ggplot(xframe, aes(x = x)) +
18 | geom_point(aes(y = y), alpha = 0.4) +
19 | geom_line(aes(y = sx))
20 |
21 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00353_example_10.20_of_section_10.2.5.R:
--------------------------------------------------------------------------------
1 | # example 10.20 of section 10.2.5
2 | # (example 10.20 of section 10.2.5) : Exploring advanced methods : Using generalized additive models (GAMs) to learn non-monotone relationships : Using GAM for logistic regression
3 | # Title: GLM logistic regression
4 |
5 | form <- as.formula("DBWT < 2000 ~ PWGT + WTGAIN + MAGER + UPREVIS")
6 | logmod <- glm(form, data = train, family = binomial(link = "logit"))
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00358_informalexample_10.11_of_section_10.3.2.math:
--------------------------------------------------------------------------------
1 | # informalexample 10.11 of section 10.3.2
2 | # (informalexample 10.11 of section 10.3.2) : Exploring advanced methods : Solving “inseparable” problems using support vector machines : Understanding support vector machines
3 |
4 | w %*% phi(x) + b >= 0
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00359_informalexample_10.12_of_section_10.3.2.math:
--------------------------------------------------------------------------------
1 | # informalexample 10.12 of section 10.3.2
2 | # (informalexample 10.12 of section 10.3.2) : Exploring advanced methods : Solving “inseparable” problems using support vector machines : Understanding support vector machines
3 |
4 | w %*% phi(x) + b >= u
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00360_informalexample_10.13_of_section_10.3.2.math:
--------------------------------------------------------------------------------
1 | # informalexample 10.13 of section 10.3.2
2 | # (informalexample 10.13 of section 10.3.2) : Exploring advanced methods : Solving “inseparable” problems using support vector machines : Understanding support vector machines
3 |
4 | w %*% phi(x) + b <= v
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00362_informalexample_10.14_of_section_10.3.3.math:
--------------------------------------------------------------------------------
1 | # informalexample 10.14 of section 10.3.3
2 | # (informalexample 10.14 of section 10.3.3) : Exploring advanced methods : Solving “inseparable” problems using support vector machines : Understanding kernel functions
3 |
4 | w = sum(a1 * phi(s1), ... , am * phi(sm))
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c10_Exploring_advanced_methods/00363_informalexample_10.15_of_section_10.3.3.math:
--------------------------------------------------------------------------------
1 | # informalexample 10.15 of section 10.3.3
2 | # (informalexample 10.15 of section 10.3.3) : Exploring advanced methods : Solving “inseparable” problems using support vector machines : Understanding kernel functions
3 |
4 | w %*% phi(x) + b = sum(a1 * k(s1, x),... , am * k(sm, x)) + b
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c11_Documentation_and_deployment/00365_informalexample_11.1_of_section_11.2.1.R:
--------------------------------------------------------------------------------
1 | # informalexample 11.1 of section 11.2.1
2 | # (informalexample 11.1 of section 11.2.1) : Documentation and deployment : Using R markdown to produce milestone documentation : What is R markdown?
3 |
4 | rmarkdown::render("Buzz_score_example.Rmd", rmarkdown::html_document())
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/c11_Documentation_and_deployment/00366_example_11.2_of_section_11.2.3.Rmd:
--------------------------------------------------------------------------------
1 | # example 11.2 of section 11.2.3
2 | # (example 11.2 of section 11.2.3) : Documentation and deployment : Using R markdown to produce milestone documentation : Using knitr to document the Buzz data and produce the model
3 | # Title: Using the system() command to compute a file hash
4 |
5 | ```{r dataprep}
6 | infile <- "TomsHardware-Relative-Sigma-500.data.txt"
7 | paste('checked at', date())
8 | system(paste('shasum', infile), intern = TRUE) # Note: 1
9 | buzzdata <- read.table(infile, header = FALSE, sep = ",")
10 | ...
11 |
12 | # Note 1:
13 | # system() functionRun a system-installed cryptographic hash
14 | # program (this program is outside of R’s install
15 | # image).
16 |
17 |
--------------------------------------------------------------------------------
/CodeExamples/c11_Documentation_and_deployment/00367_example_11.3_of_section_11.2.3.md:
--------------------------------------------------------------------------------
1 | # example 11.3 of section 11.2.3
2 | # (example 11.3 of section 11.2.3) : Documentation and deployment : Using R markdown to produce milestone documentation : Using knitr to document the Buzz data and produce the model
3 | # Title: Calculating model performance
4 |
5 | ``` r
6 | rtest <- data.frame(truth = buzztest$buzz,
7 | pred = predict(fmodel, newdata = buzztest, type = "prob")[, 2, drop = TRUE])
8 | print(accuracyMeasures(rtest$pred, rtest$truth))
9 | ```
10 |
11 | ## [1] "precision= 0.832402234636871 ; recall= 0.84180790960452"
12 | ## pred
13 | ## truth FALSE TRUE
14 | ## 0 584 30
15 | ## 1 28 149
16 | ## model accuracy f1 dev.norm AUC
17 | ## 1 model 0.9266751 0.8370787 0.42056 0.9702102
18 |
19 |
--------------------------------------------------------------------------------
/CodeExamples/c11_Documentation_and_deployment/00370_informalexample_11.2_of_section_11.3.2.bash:
--------------------------------------------------------------------------------
1 | # informalexample 11.2 of section 11.3.2
2 | # (informalexample 11.2 of section 11.3.2) : Documentation and deployment : Using comments and version control for running documentation : Using version control to record history
3 |
4 | git add -A . # Note: 1
5 | git commit # Note: 2
6 |
7 | # Note 1:
8 | # Stage results to commit (specify what files
9 | # should be committed).
10 |
11 | # Note 2:
12 | # Actually perform the commit.
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/c11_Documentation_and_deployment/00371_example_11.6_of_section_11.3.2.bash:
--------------------------------------------------------------------------------
1 | # example 11.6 of section 11.3.2
2 | # (example 11.6 of section 11.3.2) : Documentation and deployment : Using comments and version control for running documentation : Using version control to record history
3 | # Title: Checking your project status
4 |
5 | $ git status
6 | On branch master
7 | Your branch is up to date with 'origin/master'.
8 |
9 | nothing to commit, working tree clean
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c11_Documentation_and_deployment/00372_example_11.7_of_section_11.3.2.bash:
--------------------------------------------------------------------------------
1 | # example 11.7 of section 11.3.2
2 | # (example 11.7 of section 11.3.2) : Documentation and deployment : Using comments and version control for running documentation : Using version control to record history
3 | # Title: Checking your project history
4 |
5 | $ git log
6 | commit d22572281d40522bc6ab524bbdee497964ff4af0 (HEAD -> master, origin/master)
7 | Author: John Mount
8 | Date: Tue Apr 16 16:24:23 2019 -0700
9 |
10 | technical edits ch7
11 |
12 |
--------------------------------------------------------------------------------
/CodeExamples/c11_Documentation_and_deployment/00373_example_11.8_of_section_11.3.3.bash:
--------------------------------------------------------------------------------
1 | # example 11.8 of section 11.3.3
2 | # (example 11.8 of section 11.3.3) : Documentation and deployment : Using comments and version control for running documentation : Using version control to explore your project
3 | # Title: Finding who committed what
4 |
5 | git blame Buzz/buzzapp/server.R
6 | 4efb2b78 (John Mount 2019-04-24 16:22:43 -0700 1) #
7 | 4efb2b78 (John Mount 2019-04-24 16:22:43 -0700 2) # This is the server logic of a Shiny web application. You can run the
8 | 4efb2b78 (John Mount 2019-04-24 16:22:43 -0700 3) # application by clicking 'Run App' above.
9 | 4efb2b78 (John Mount 2019-04-24 16:22:43 -0700 4) #
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/c11_Documentation_and_deployment/00374_example_11.9_of_section_11.3.3.bash:
--------------------------------------------------------------------------------
1 | # example 11.9 of section 11.3.3
2 | # (example 11.9 of section 11.3.3) : Documentation and deployment : Using comments and version control for running documentation : Using version control to explore your project
3 | # Title: Finding line-based differences between two committed versions
4 |
5 | diff --git a/CDC/NatalBirthData.rData b/CDC/NatalBirthData.rData
6 | ...
7 | +++ b/CDC/prepBirthWeightData.R
8 | @@ -0,0 +1,83 @@
9 | +data <- read.table("natal2010Sample.tsv.gz",
10 | + sep="\t", header = TRUE, stringsAsFactors = FALSE)
11 | +
12 | +# make a boolean from Y/N data
13 | +makevarYN = function(col) {
14 | + ifelse(col %in% c("", "U"), NA, col=="Y")
15 | +}
16 | ...
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/c11_Documentation_and_deployment/00375_informalexample_11.3_of_section_11.3.3.bash:
--------------------------------------------------------------------------------
1 | # informalexample 11.3 of section 11.3.3
2 | # (informalexample 11.3 of section 11.3.3) : Documentation and deployment : Using comments and version control for running documentation : Using version control to explore your project
3 |
4 | git log --name-status -- Buzz/buzz.pdf
5 | commit 96503d8ca35a61ed9765edff9800fc9302554a3b
6 | Author: John Mount
7 | Date: Wed Apr 17 16:41:48 2019 -0700
8 |
9 | fix links and re-build Buzz example
10 |
11 | D Buzz/buzz.pdf
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/c11_Documentation_and_deployment/00376_example_11.10_of_section_11.3.4.bash:
--------------------------------------------------------------------------------
1 | # example 11.10 of section 11.3.4
2 | # (example 11.10 of section 11.3.4) : Documentation and deployment : Using comments and version control for running documentation : Using version control to share work
3 | # Title: git remote
4 |
5 | $ git remote --verbose
6 | origin git@github.com:WinVector/PDSwR2.git (fetch)
7 | origin git@github.com:WinVector/PDSwR2.git (push)
8 |
9 |
--------------------------------------------------------------------------------
/CodeExamples/c11_Documentation_and_deployment/00377_example_11.11_of_section_11.4.2.Rtxt:
--------------------------------------------------------------------------------
1 | # example 11.11 of section 11.4.2
2 | # (example 11.11 of section 11.4.2) : Documentation and deployment : Deploying models : Deploying models as HTTP services
3 | # Title: Buzz model as an R-based HTTP service
4 |
5 | library("randomForest") # Note: 1
6 |
7 | lst <- readRDS("thRS500.RDS")
8 | varslist <- lst$varslist
9 | fmodel <- lst$fmodel
10 | buzztest <- lst$buzztest
11 | rm(list = "lst")
12 |
13 | #* Score a data frame.
14 | #* @param d data frame to score
15 | #* @post /score_data
16 | function(d) {
17 | predict(fmodel, newdata = d, type = "prob")
18 | }
19 |
20 | # Note 1:
21 | # Attach the randomForest package, so we can run our randomForest model.
22 |
23 |
--------------------------------------------------------------------------------
/CodeExamples/c11_Documentation_and_deployment/00378_informalexample_11.4_of_section_11.4.2.Rtxt:
--------------------------------------------------------------------------------
1 | # informalexample 11.4 of section 11.4.2
2 | # (informalexample 11.4 of section 11.4.2) : Documentation and deployment : Deploying models : Deploying models as HTTP services
3 |
4 | library("plumber")
5 | r <- plumb("plumber.R")
6 | r$run(port=8000)
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/c11_Documentation_and_deployment/00380_informalexample_11.5_of_section_11.4.3.SQL:
--------------------------------------------------------------------------------
1 | # informalexample 11.5 of section 11.4.3
2 | # (informalexample 11.5 of section 11.4.3) : Documentation and deployment : Deploying models : Deploying models by export
3 |
4 | CASE
5 | WHEN (`num.displays_06` >= 1517.5 AND
6 | `avg.auths.per.disc_00` < 2.25 AND
7 | `num.displays_06` < 2075.0) THEN ('0')
8 | WHEN (`num.displays_03` >= 1114.5 AND
9 | `atomic.containers_01` < 9.5 AND
10 | `avg.auths.per.disc_00` >= 2.25 AND
11 | `num.displays_06` < 2075.0) THEN ('0')
12 | WHEN ...
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/x0A_Starting_with_R_and_other_tools/00382_informalexample_A.2_of_section_A.1.5.txt:
--------------------------------------------------------------------------------
1 | # informalexample A.2 of section A.1.5
2 | # (informalexample A.2 of section A.1.5) : Starting with R and other tools : Installing the tools : R resources
3 |
4 | install.packages('ctv', repos = 'https://cran.r-project.org')
5 | library('ctv')
6 | # install.views('TimeSeries') # can take a LONG time
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/x0A_Starting_with_R_and_other_tools/00383_example_A.1_of_section_A.2.R:
--------------------------------------------------------------------------------
1 | # example A.1 of section A.2
2 | # (example A.1 of section A.2) : Starting with R and other tools : Starting with R
3 | # Title: Trying a few R commands
4 |
5 | 1
6 | ## [1] 1
7 | 1/2
8 | ## [1] 0.5
9 | 'Joe'
10 | ## [1] "Joe"
11 | "Joe"
12 | ## [1] "Joe"
13 | "Joe"=='Joe'
14 | ## [1] TRUE
15 | c()
16 | ## NULL
17 | is.null(c())
18 | ## [1] TRUE
19 | is.null(5)
20 | ## [1] FALSE
21 | c(1)
22 | ## [1] 1
23 | c(1, 2)
24 | ## [1] 1 2
25 | c("Apple", 'Orange')
26 | ## [1] "Apple" "Orange"
27 | length(c(1, 2))
28 | ## [1] 2
29 | vec <- c(1, 2)
30 | vec
31 | ## [1] 1 2
32 |
33 |
--------------------------------------------------------------------------------
/CodeExamples/x0A_Starting_with_R_and_other_tools/00384_informalexample_A.3_of_section_A.2.1.R:
--------------------------------------------------------------------------------
1 | # informalexample A.3 of section A.2.1
2 | # (informalexample A.3 of section A.2.1) : Starting with R and other tools : Starting with R : Primary features of R
3 |
4 | x <- 2
5 | x < - 3
6 | ## [1] FALSE
7 | print(x)
8 | ## [1] 2
9 |
10 |
--------------------------------------------------------------------------------
/CodeExamples/x0A_Starting_with_R_and_other_tools/00385_example_A.2_of_section_A.2.1.R:
--------------------------------------------------------------------------------
1 | # example A.2 of section A.2.1
2 | # (example A.2 of section A.2.1) : Starting with R and other tools : Starting with R : Primary features of R
3 | # Title: Binding values to function arguments
4 |
5 | divide <- function(numerator,denominator) { numerator/denominator }
6 | divide(1, 2)
7 | ## [1] 0.5
8 |
9 | divide(2, 1)
10 | ## [1] 2
11 |
12 | divide(denominator = 2, numerator = 1)
13 | ## [1] 0.5
14 |
15 | divide(denominator <- 2, numerator <- 1) # wrong symbol <-, yields 2, a wrong answer!
16 | ## [1] 2
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/x0A_Starting_with_R_and_other_tools/00386_example_A.3_of_section_A.2.1.R:
--------------------------------------------------------------------------------
1 | # example A.3 of section A.2.1
2 | # (example A.3 of section A.2.1) : Starting with R and other tools : Starting with R : Primary features of R
3 | # Title: Demonstrating side effects
4 |
5 | x<-1
6 | good <- function() { x <- 5}
7 | good()
8 | print(x)
9 | ## [1] 1
10 |
11 | bad <- function() { x <<- 5}
12 | bad()
13 | print(x)
14 | ## [1] 5
15 |
16 |
--------------------------------------------------------------------------------
/CodeExamples/x0A_Starting_with_R_and_other_tools/00387_example_A.4_of_section_A.2.1.R:
--------------------------------------------------------------------------------
1 | # example A.4 of section A.2.1
2 | # (example A.4 of section A.2.1) : Starting with R and other tools : Starting with R : Primary features of R
3 | # Title: R truth tables for Boolean operators
4 |
5 | c(TRUE, TRUE, FALSE, FALSE) == c(TRUE, FALSE, TRUE, FALSE)
6 | ## [1] TRUE FALSE FALSE TRUE
7 |
8 | c(TRUE, TRUE, FALSE, FALSE) & c(TRUE, FALSE, TRUE, FALSE)
9 | ## [1] TRUE FALSE FALSE FALSE
10 |
11 | c(TRUE, TRUE, FALSE, FALSE) | c(TRUE, FALSE, TRUE, FALSE)
12 | ## [1] TRUE TRUE TRUE FALSE
13 |
14 |
--------------------------------------------------------------------------------
/CodeExamples/x0A_Starting_with_R_and_other_tools/00388_example_A.5_of_section_A.2.1.R:
--------------------------------------------------------------------------------
1 | # example A.5 of section A.2.1
2 | # (example A.5 of section A.2.1) : Starting with R and other tools : Starting with R : Primary features of R
3 | # Title: Call-by-value effect
4 |
5 | a <- c(1, 2)
6 | b <- a
7 |
8 | print(b)
9 |
10 | a[[1]] <- 5 # Note: 1
11 |
12 | print(a)
13 |
14 | print(b) # Note: 2
15 |
16 | # Note 1:
17 | # “Alter a”. Actually this is
18 | # implemented by building an entirely new vector and
19 | # reassigning a to refer to this new vector. The old
20 | # value remains as it was, and any references
21 | # continue to see the old unaltered value.
22 |
23 | # Note 2:
24 | # Notice b’s value is not
25 | # changed.
26 |
27 |
--------------------------------------------------------------------------------
/CodeExamples/x0A_Starting_with_R_and_other_tools/00389_informalexample_A.4_of_section_A.2.2.R:
--------------------------------------------------------------------------------
1 | # informalexample A.4 of section A.2.2
2 | # (informalexample A.4 of section A.2.2) : Starting with R and other tools : Starting with R : Primary R data types
3 |
4 | vec <- c(2, 3)
5 | vec[[2]] <- 5
6 | print(vec)
7 | ## [1] 2 5
8 |
9 |
--------------------------------------------------------------------------------
/CodeExamples/x0A_Starting_with_R_and_other_tools/00390_example_A.6_of_section_A.2.2.R:
--------------------------------------------------------------------------------
1 | # example A.6 of section A.2.2
2 | # (example A.6 of section A.2.2) : Starting with R and other tools : Starting with R : Primary R data types
3 | # Title: Examples of R indexing operators
4 |
5 | x <- list('a' = 6, b = 'fred')
6 | names(x)
7 | ## [1] "a" "b"
8 | x$a
9 | ## [1] 6
10 | x$b
11 | ## [1] "fred"
12 | x[['a']]
13 | ## $a
14 | ## [1] 6
15 |
16 | x[c('a', 'a', 'b', 'b')]
17 | ## $a
18 | ## [1] 6
19 | ##
20 | ## $a
21 | ## [1] 6
22 | ##
23 | ## $b
24 | ## [1] "fred"
25 | ##
26 | ## $b
27 | ## [1] "fred"
28 |
29 |
--------------------------------------------------------------------------------
/CodeExamples/x0A_Starting_with_R_and_other_tools/00391_example_A.7_of_section_A.2.2.R:
--------------------------------------------------------------------------------
1 | # example A.7 of section A.2.2
2 | # (example A.7 of section A.2.2) : Starting with R and other tools : Starting with R : Primary R data types
3 | # Title: R’s treatment of unexpected factor levels
4 |
5 | factor('red', levels = c('red', 'orange'))
6 | ## [1] red
7 | ## Levels: red orange
8 |
9 | factor('apple', levels = c('red', 'orange'))
10 | ## [1]
11 | ## Levels: red orange
12 |
13 |
--------------------------------------------------------------------------------
/CodeExamples/x0B_Important_statistical_concepts/00397_example_B.1_of_section_B.1.1.R:
--------------------------------------------------------------------------------
1 | # example B.1 of section B.1.1
2 | # (example B.1 of section B.1.1) : Important statistical concepts : Distributions : Normal distribution
3 | # Title: Plotting the theoretical normal density
4 |
5 | library(ggplot2)
6 |
7 | x <- seq(from=-5, to=5, length.out=100) # the interval [-5 5]
8 | f <- dnorm(x) # normal with mean 0 and sd 1
9 | ggplot(data.frame(x=x,y=f), aes(x=x,y=y)) + geom_line()
10 |
11 |
--------------------------------------------------------------------------------
/CodeExamples/x0B_Important_statistical_concepts/00398_example_B.2_of_section_B.1.1.R:
--------------------------------------------------------------------------------
1 | # example B.2 of section B.1.1
2 | # (example B.2 of section B.1.1) : Important statistical concepts : Distributions : Normal distribution
3 | # Title: Plotting an empirical normal density
4 |
5 | library(ggplot2)
6 |
7 | # draw 1000 points from a normal with mean 0, sd 1
8 | u <- rnorm(1000)
9 |
10 | # plot the distribution of points,
11 | # compared to normal curve as computed by dnorm() (dashed line)
12 | ggplot(data.frame(x=u), aes(x=x)) + geom_density() +
13 | geom_line(data=data.frame(x=x,y=f), aes(x=x,y=y), linetype=2)
14 |
15 |
--------------------------------------------------------------------------------
/CodeExamples/x0B_Important_statistical_concepts/00399_example_B.3_of_section_B.1.1.R:
--------------------------------------------------------------------------------
1 | # example B.3 of section B.1.1
2 | # (example B.3 of section B.1.1) : Important statistical concepts : Distributions : Normal distribution
3 | # Title: Working with the normal cdf
4 |
5 | # --- estimate probabilities (areas) under the curve ---
6 |
7 | # 50% of the observations will be less than the mean
8 | pnorm(0)
9 | # [1] 0.5
10 |
11 | # about 2.3% of all observations are more than 2 standard
12 | # deviations below the mean
13 | pnorm(-2)
14 | # [1] 0.02275013
15 |
16 | # about 95.4% of all observations are within 2 standard deviations
17 | # from the mean
18 | pnorm(2) - pnorm(-2)
19 | # [1] 0.9544997
20 |
21 |
--------------------------------------------------------------------------------
/CodeExamples/x0B_Important_statistical_concepts/00407_example_B.11_of_section_B.1.4.R:
--------------------------------------------------------------------------------
1 | # example B.11 of section B.1.4
2 | # (example B.11 of section B.1.4) : Important statistical concepts : Distributions : Binomial distribution
3 | # Title: Working with the binomial CDF
4 |
5 | # because this is a discrete probability distribution,
6 | # pbinom and qbinom are not exact inverses of each other
7 |
8 | # this direction works
9 | pbinom(45, nflips, 0.5)
10 | # [1] 0.1841008
11 | qbinom(0.1841008, nflips, 0.5)
12 | # [1] 45
13 |
14 | # this direction won't be exact
15 | qbinom(0.75, nflips, 0.5)
16 | # [1] 53
17 | pbinom(53, nflips, 0.5)
18 | # [1] 0.7579408
19 |
20 |
--------------------------------------------------------------------------------
/CodeExamples/x0B_Important_statistical_concepts/00408_informalexample_B.1_of_section_B.2.1.math:
--------------------------------------------------------------------------------
1 | # informalexample B.1 of section B.2.1
2 | # (informalexample B.1 of section B.2.1) : Important statistical concepts : Statistical theory : Statistical philosophy
3 |
4 | E[(y[i] - f(x[i, ]))^2] = bias^2 + variance + irreducibleError
5 |
6 |
--------------------------------------------------------------------------------
/CodeExamples/x0B_Important_statistical_concepts/00409_example_B.12_of_section_B.2.2.R:
--------------------------------------------------------------------------------
1 | # example B.12 of section B.2.2
2 | # (example B.12 of section B.2.2) : Important statistical concepts : Statistical theory : A/B tests
3 | # Title: Building simulated A/B test data
4 |
5 | set.seed(123515)
6 | d <- rbind( # Note: 1
7 | data.frame(group = 'A', converted = rbinom(100000, size = 1, p = 0.05)), # Note: 2
8 | data.frame(group = 'B', converted = rbinom(10000, size = 1, p = 0.055)) # Note: 3
9 | )
10 |
11 | # Note 1:
12 | # Build a data frame to store simulated
13 | # examples.
14 |
15 | # Note 2:
16 | # Add 100,000 examples from the A group
17 | # simulating a conversion rate of 5%.
18 |
19 | # Note 3:
20 | # Add 10,000 examples from the B group
21 | # simulating a conversion rate of 5.5%.
22 |
23 |
--------------------------------------------------------------------------------
/CodeExamples/x0B_Important_statistical_concepts/00410_example_B.13_of_section_B.2.2.R:
--------------------------------------------------------------------------------
1 | # example B.13 of section B.2.2
2 | # (example B.13 of section B.2.2) : Important statistical concepts : Statistical theory : A/B tests
3 | # Title: Summarizing the A/B test into a contingency table
4 |
5 | tab <- table(d)
6 | print(tab)
7 | ## converted
8 | ## group 0 1
9 | ## A 94979 5021
10 | ## B 9398 602
11 |
12 |
--------------------------------------------------------------------------------
/CodeExamples/x0B_Important_statistical_concepts/00411_example_B.14_of_section_B.2.2.R:
--------------------------------------------------------------------------------
1 | # example B.14 of section B.2.2
2 | # (example B.14 of section B.2.2) : Important statistical concepts : Statistical theory : A/B tests
3 | # Title: Calculating the observed A and B rates
4 |
5 | aConversionRate <- tab['A','1']/sum(tab['A',])
6 | print(aConversionRate)
7 | ## [1] 0.05021
8 |
9 | bConversionRate <- tab['B', '1'] / sum(tab['B', ])
10 | print(bConversionRate)
11 | ## [1] 0.0602
12 |
13 | commonRate <- sum(tab[, '1']) / sum(tab)
14 | print(commonRate)
15 | ## [1] 0.05111818
16 |
17 |
--------------------------------------------------------------------------------
/CodeExamples/x0B_Important_statistical_concepts/00412_example_B.15_of_section_B.2.2.R:
--------------------------------------------------------------------------------
1 | # example B.15 of section B.2.2
2 | # (example B.15 of section B.2.2) : Important statistical concepts : Statistical theory : A/B tests
3 | # Title: Calculating the significance of the observed difference in rates
4 |
5 | fisher.test(tab)
6 |
7 | ## Fisher's Exact Test for Count Data
8 | ##
9 | ## data: tab
10 | ## p-value = 2.469e-05
11 | ## alternative hypothesis: true odds ratio is not equal to 1
12 | ## 95 percent confidence interval:
13 | ## 1.108716 1.322464
14 | ## sample estimates:
15 | ## odds ratio
16 | ## 1.211706
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/x0B_Important_statistical_concepts/00414_informalexample_B.2_of_section_B.2.3.R:
--------------------------------------------------------------------------------
1 | # informalexample B.2 of section B.2.3
2 | # (informalexample B.2 of section B.2.3) : Important statistical concepts : Statistical theory : Power of tests
3 |
4 | library(pwr)
5 | pwr.p.test(h = ES.h(p1 = 0.045, p2 = 0.04),
6 | sig.level = 0.05,
7 | power = 0.8,
8 | alternative = "greater")
9 |
10 | # proportion power calculation for binomial distribution (arcsine transformation)
11 | #
12 | # h = 0.02479642
13 | # n = 10055.18
14 | # sig.level = 0.05
15 | # power = 0.8
16 | # alternative = greater
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/x0B_Important_statistical_concepts/00416_example_B.18_of_section_B.2.4.R:
--------------------------------------------------------------------------------
1 | # example B.18 of section B.2.4
2 | # (example B.18 of section B.2.4) : Important statistical concepts : Statistical theory : Specialized statistical tests
3 | # Title: Calculating the (non)significance of the observed correlation
4 |
5 | with(d, cor(EarnedIncome, CapitalGains, method = 'spearman'))
6 |
7 | # [1] 0.03083108
8 |
9 | (ctest <- with(d, cor.test(EarnedIncome, CapitalGains, method = 'spearman')))
10 |
11 | #
12 | # Spearman's rank correlation rho
13 | #
14 | #data: EarnedIncome and CapitalGains
15 | #S = 161512, p-value = 0.7604
16 | #alternative hypothesis: true rho is not equal to 0
17 | #sample estimates:
18 | # rho
19 | #0.03083108
20 |
21 |
--------------------------------------------------------------------------------
/CodeExamples/x0B_Important_statistical_concepts/00417_informalexample_B.3_of_section_B.2.4.R:
--------------------------------------------------------------------------------
1 | # informalexample B.3 of section B.2.4
2 | # (informalexample B.3 of section B.2.4) : Important statistical concepts : Statistical theory : Specialized statistical tests
3 |
4 | sigr::wrapCorTest(ctest)
5 |
6 | # [1] "Spearman's rank correlation rho: (r=0.03083, p=n.s.)."
7 |
8 |
--------------------------------------------------------------------------------
/CodeExamples/x0B_Important_statistical_concepts/00418_example_B.19_of_section_B.3.1.R:
--------------------------------------------------------------------------------
1 | # example B.19 of section B.3.1
2 | # (example B.19 of section B.3.1) : Important statistical concepts : Examples of the statistical view of data : Sampling bias
3 | # Title: Misleading significance result from biased observations
4 |
5 | veryHighIncome <- subset(d, EarnedIncome+CapitalGains>=500000)
6 | print(with(veryHighIncome,cor.test(EarnedIncome,CapitalGains,
7 | method='spearman')))
8 | #
9 | # Spearman's rank correlation rho
10 | #
11 | #data: EarnedIncome and CapitalGains
12 | #S = 1046, p-value < 2.2e-16
13 | #alternative hypothesis: true rho is not equal to 0
14 | #sample estimates:
15 | # rho
16 | #-0.8678571
17 |
18 |
--------------------------------------------------------------------------------
/CodeExamples/xFront_Matter_Practical_Data_Science_with_R/00001_informalexample_Front_Matter.1_of_section_Front_Matter.5.6.bash:
--------------------------------------------------------------------------------
1 | # informalexample Front Matter.1 of section Front Matter.5.6
2 | # (informalexample Front Matter.1 of section Front Matter.5.6) : Practical Data Science with R : About this book : Working with this book
3 |
4 | git clone https://github.com/WinVector/PDSwR2.git
5 |
6 |
--------------------------------------------------------------------------------
/Custdata/custdata.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Custdata/custdata.RDS
--------------------------------------------------------------------------------
/Custdata/hhdata.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Custdata/hhdata.RDS
--------------------------------------------------------------------------------
/Custdata/median_income.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Custdata/median_income.RDS
--------------------------------------------------------------------------------
/IMDB/IMDBtest.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/IMDB/IMDBtest.RDS
--------------------------------------------------------------------------------
/IMDB/IMDBtrain.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/IMDB/IMDBtrain.RDS
--------------------------------------------------------------------------------
/IMDB/README.md:
--------------------------------------------------------------------------------
1 |
2 | Script and data to create example data files for section 6.3.3 of *Practical Data Science with R, Second Edition*
3 |
4 | Original data source::
5 |
6 | Extraction script adapted from Listing 6.8 of Francois Chollet, J.J. Allaire, *Deep Learning with R*, Manning 2018
7 |
--------------------------------------------------------------------------------
/KDD2009/AnalysisOfKDD2009.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/AnalysisOfKDD2009.pdf
--------------------------------------------------------------------------------
/KDD2009/KDD2009.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009.Rdata
--------------------------------------------------------------------------------
/KDD2009/KDD2009.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------
/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-1.png
--------------------------------------------------------------------------------
/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-2.png
--------------------------------------------------------------------------------
/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-3.png
--------------------------------------------------------------------------------
/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-4.png
--------------------------------------------------------------------------------
/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-5.png
--------------------------------------------------------------------------------
/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-gfm/kddplot-6.png
--------------------------------------------------------------------------------
/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-1.png
--------------------------------------------------------------------------------
/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-2.png
--------------------------------------------------------------------------------
/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-3.png
--------------------------------------------------------------------------------
/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-4.png
--------------------------------------------------------------------------------
/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-5.png
--------------------------------------------------------------------------------
/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/KDD2009vtreat_files/figure-markdown_github/kddplot-6.png
--------------------------------------------------------------------------------
/KDD2009/orange_small_train.data.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/KDD2009/orange_small_train.data.gz
--------------------------------------------------------------------------------
/LIME_iris/README_limeiris.md:
--------------------------------------------------------------------------------
1 |
2 | Script for running iris example from section 6.3.2 of *Practical Data Science with R, Second Edition*
3 |
--------------------------------------------------------------------------------
/NotionalData/README.md:
--------------------------------------------------------------------------------
1 |
2 | Notional data used to show how some code works. Not important where the data comes from.
3 |
--------------------------------------------------------------------------------
/NotionalData/exampleData.rData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/NotionalData/exampleData.rData
--------------------------------------------------------------------------------
/PDSwR2.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------
/PUMS/ACS2016_PUMS_README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/ACS2016_PUMS_README.pdf
--------------------------------------------------------------------------------
/PUMS/PUMS1_dplyr_files/figure-markdown_github/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMS1_dplyr_files/figure-markdown_github/unnamed-chunk-1-1.png
--------------------------------------------------------------------------------
/PUMS/PUMS1_files/figure-markdown_github/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMS1_files/figure-markdown_github/unnamed-chunk-1-1.png
--------------------------------------------------------------------------------
/PUMS/PUMS1_rquery_files/figure-markdown_github/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMS1_rquery_files/figure-markdown_github/unnamed-chunk-1-1.png
--------------------------------------------------------------------------------
/PUMS/PUMS1_rquery_files/figure-markdown_github/unnamed-chunk-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMS1_rquery_files/figure-markdown_github/unnamed-chunk-1-2.png
--------------------------------------------------------------------------------
/PUMS/PUMSDataDict16.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMSDataDict16.txt
--------------------------------------------------------------------------------
/PUMS/PUMSsample.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMSsample.RDS
--------------------------------------------------------------------------------
/PUMS/PUMSscatter1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/PUMSscatter1.pdf
--------------------------------------------------------------------------------
/PUMS/download/.gitignore:
--------------------------------------------------------------------------------
1 | csv_hus.zip
2 | csv_pus.zip
3 | dowload.bash
4 | ss13husa.csv
5 | ss13husb.csv
6 | ss13pusa.csv
7 | ss13pusb.csv
8 | ss16husa.csv.gz
9 | ss16husb.csv.gz
10 | ss16pusa.csv.gz
11 | ss16pusb.csv.gz
12 |
--------------------------------------------------------------------------------
/PUMS/download/ACS2016_PUMS_README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/download/ACS2016_PUMS_README.pdf
--------------------------------------------------------------------------------
/PUMS/download/LoadPUMSAll_files/figure-markdown_github/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/download/LoadPUMSAll_files/figure-markdown_github/unnamed-chunk-1-1.png
--------------------------------------------------------------------------------
/PUMS/download/LoadPUMSAll_files/figure-markdown_github/unnamed-chunk-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/download/LoadPUMSAll_files/figure-markdown_github/unnamed-chunk-1-2.png
--------------------------------------------------------------------------------
/PUMS/download/PUMSDataDict16.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/download/PUMSDataDict16.txt
--------------------------------------------------------------------------------
/PUMS/download/README.txt:
--------------------------------------------------------------------------------
1 |
2 | Prepare Census Data sample for use.
3 |
4 | * ReadDataDict.Rmd : read the census data dictionary into machine readable for, write as: ../data_dict.csv
5 | * LoadPUMSAll.Rmd : load all census data into databse.
6 | * LoadPUMS.Rmd : extract a weighted person based sample with matching households, write ../ss16pus.RDS and ../ss16hus.RDS .
7 | * LoadPUMS_h.Rmd : extract a weighted person based sample with matching households, write ../ss16hus_h.RDS and ../ss16pus_h.RDS.
8 |
--------------------------------------------------------------------------------
/PUMS/download/data_dict.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/download/data_dict.RDS
--------------------------------------------------------------------------------
/PUMS/download/download.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 |
--------------------------------------------------------------------------------
/PUMS/dpus_std_employee.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/dpus_std_employee.RDS
--------------------------------------------------------------------------------
/PUMS/incomedata.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/incomedata.rds
--------------------------------------------------------------------------------
/PUMS/psub.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/psub.RDS
--------------------------------------------------------------------------------
/PUMS/ss16hus.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/ss16hus.RDS
--------------------------------------------------------------------------------
/PUMS/ss16hus_h.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/ss16hus_h.RDS
--------------------------------------------------------------------------------
/PUMS/ss16pus.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/ss16pus.RDS
--------------------------------------------------------------------------------
/PUMS/ss16pus_h.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PUMS/ss16pus_h.RDS
--------------------------------------------------------------------------------
/PseudoLog10/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 |
--------------------------------------------------------------------------------
/PseudoLog10/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: PseudoLog10
2 | Type: Package
3 | Title: What the Package Does (Title Case)
4 | Version: 0.1.0
5 | Author: Who wrote it
6 | Maintainer: The package maintainer
7 | Description: More about what it does (maybe more than one line)
8 | Use four spaces when indenting paragraphs within the Description.
9 | License: What license is it under?
10 | Encoding: UTF-8
11 | LazyData: true
12 | RoxygenNote: 6.1.1
13 |
--------------------------------------------------------------------------------
/PseudoLog10/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(pseudoLog10)
4 |
--------------------------------------------------------------------------------
/PseudoLog10/PseudoLog10.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 |
--------------------------------------------------------------------------------
/PseudoLog10/PseudoLog10.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/PseudoLog10/PseudoLog10.pdf
--------------------------------------------------------------------------------
/RenderedExamples/.gitignore:
--------------------------------------------------------------------------------
1 | dpus_std_employee.RDS
2 |
--------------------------------------------------------------------------------
/RenderedExamples/figure/00031_example_2.11_of_section_2.3.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00031_example_2.11_of_section_2.3.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00037_example_3.6_of_section_3.2.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00037_example_3.6_of_section_3.2.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00038_example_3.7_of_section_3.2.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00038_example_3.7_of_section_3.2.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00039_example_3.8_of_section_3.2.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00039_example_3.8_of_section_3.2.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00041_informalexample_3.2_of_section_3.2.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00041_informalexample_3.2_of_section_3.2.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00042_example_3.9_of_section_3.2.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00042_example_3.9_of_section_3.2.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00043_example_3.10_of_section_3.2.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00043_example_3.10_of_section_3.2.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00044_example_3.11_of_section_3.2.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00044_example_3.11_of_section_3.2.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00046_example_3.13_of_section_3.2.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00046_example_3.13_of_section_3.2.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00047_informalexample_3.3_of_section_3.2.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00047_informalexample_3.3_of_section_3.2.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00048_informalexample_3.4_of_section_3.2.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00048_informalexample_3.4_of_section_3.2.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00049_example_3.14_of_section_3.2.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00049_example_3.14_of_section_3.2.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-2.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-3.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00050_example_3.15_of_section_3.2.2.R-4.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00051_example_3.16_of_section_3.2.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00051_example_3.16_of_section_3.2.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00051_example_3.16_of_section_3.2.2.R-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00051_example_3.16_of_section_3.2.2.R-2.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00052_example_3.17_of_section_3.2.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00052_example_3.17_of_section_3.2.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00053_example_3.18_of_section_3.2.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00053_example_3.18_of_section_3.2.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00054_informalexample_3.5_of_section_3.2.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00054_informalexample_3.5_of_section_3.2.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00071_informalexample_5.2_of_section_5.1.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00071_informalexample_5.2_of_section_5.1.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00090_informalexample_5.21_of_section_5.2.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00090_informalexample_5.21_of_section_5.2.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00094_informalexample_5.25_of_section_5.2.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00094_informalexample_5.25_of_section_5.2.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00099_informalexample_5.30_of_section_5.3.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00099_informalexample_5.30_of_section_5.3.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00132_informalexample_5.63_of_section_5.5.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00132_informalexample_5.63_of_section_5.5.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00135_informalexample_5.66_of_section_5.5.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00135_informalexample_5.66_of_section_5.5.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00138_informalexample_5.69_of_section_5.5.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00138_informalexample_5.69_of_section_5.5.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00157_example_6.9_of_section_6.2.5.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00157_example_6.9_of_section_6.2.5.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00158_example_6.10_of_section_6.2.5.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00158_example_6.10_of_section_6.2.5.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00168_informalexample_6.8_of_section_6.3.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00168_informalexample_6.8_of_section_6.3.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00171_example_6.20_of_section_6.3.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00171_example_6.20_of_section_6.3.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00176_example_6.23_of_section_6.3.4.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00176_example_6.23_of_section_6.3.4.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00179_example_6.26_of_section_6.3.5.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00179_example_6.26_of_section_6.3.5.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00180_informalexample_6.13_of_section_6.3.5.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00180_informalexample_6.13_of_section_6.3.5.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00181_example_6.27_of_section_6.3.5.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00181_example_6.27_of_section_6.3.5.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00181_example_6.27_of_section_6.3.5.R-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00181_example_6.27_of_section_6.3.5.R-2.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00189_example_7.2_of_section_7.1.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00189_example_7.2_of_section_7.1.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00190_example_7.3_of_section_7.1.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00190_example_7.3_of_section_7.1.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00212_example_7.12_of_section_7.2.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00212_example_7.12_of_section_7.2.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00213_example_7.13_of_section_7.2.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00213_example_7.13_of_section_7.2.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00234_example_7.24_of_section_7.3.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00234_example_7.24_of_section_7.3.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00241_example_7.27_of_section_7.3.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00241_example_7.27_of_section_7.3.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00247_example_7.32_of_section_7.3.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00247_example_7.32_of_section_7.3.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00271_informalexample_8.16_of_section_8.4.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00271_informalexample_8.16_of_section_8.4.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00271_informalexample_8.16_of_section_8.4.2.R-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00271_informalexample_8.16_of_section_8.4.2.R-2.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00291_example_9.3_of_section_9.1.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00291_example_9.3_of_section_9.1.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00294_example_9.5_of_section_9.1.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00294_example_9.5_of_section_9.1.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00297_example_9.8_of_section_9.1.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00297_example_9.8_of_section_9.1.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00299_example_9.9_of_section_9.1.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00299_example_9.9_of_section_9.1.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00302_example_9.10_of_section_9.1.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00302_example_9.10_of_section_9.1.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00314_example_9.20_of_section_9.2.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00314_example_9.20_of_section_9.2.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00324_example_10.1_of_section_10.1.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00324_example_10.1_of_section_10.1.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00331_example_10.4_of_section_10.1.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00331_example_10.4_of_section_10.1.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00334_example_10.7_of_section_10.1.4.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00334_example_10.7_of_section_10.1.4.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00346_example_10.13_of_section_10.2.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00346_example_10.13_of_section_10.2.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00347_example_10.14_of_section_10.2.2.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00347_example_10.14_of_section_10.2.2.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00349_example_10.16_of_section_10.2.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00349_example_10.16_of_section_10.2.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00351_example_10.18_of_section_10.2.4.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00351_example_10.18_of_section_10.2.4.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00355_example_10.22_of_section_10.3.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00355_example_10.22_of_section_10.3.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00356_example_10.23_of_section_10.3.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00356_example_10.23_of_section_10.3.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00357_example_10.24_of_section_10.3.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00357_example_10.24_of_section_10.3.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00397_example_B.1_of_section_B.1.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00397_example_B.1_of_section_B.1.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00398_example_B.2_of_section_B.1.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00398_example_B.2_of_section_B.1.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00400_example_B.4_of_section_B.1.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00400_example_B.4_of_section_B.1.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00401_example_B.5_of_section_B.1.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00401_example_B.5_of_section_B.1.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00402_example_B.6_of_section_B.1.3.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00402_example_B.6_of_section_B.1.3.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00403_example_B.7_of_section_B.1.4.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00403_example_B.7_of_section_B.1.4.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00404_example_B.8_of_section_B.1.4.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00404_example_B.8_of_section_B.1.4.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00419_example_B.20_of_section_B.3.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00419_example_B.20_of_section_B.3.1.R-1.png
--------------------------------------------------------------------------------
/RenderedExamples/figure/00419_example_B.20_of_section_B.3.1.R-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/RenderedExamples/figure/00419_example_B.20_of_section_B.3.1.R-2.png
--------------------------------------------------------------------------------
/SQLExample/HotelRelation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/SQLExample/HotelRelation.pdf
--------------------------------------------------------------------------------
/SQLExample/Workbook1.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/SQLExample/Workbook1.xlsx
--------------------------------------------------------------------------------
/SQLExample/figure/allsteps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/SQLExample/figure/allsteps.png
--------------------------------------------------------------------------------
/SQLExample/h2-1.3.170.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/SQLExample/h2-1.3.170.jar
--------------------------------------------------------------------------------
/SQLExample/h2demodb_h2.h2.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/SQLExample/h2demodb_h2.h2.db
--------------------------------------------------------------------------------
/Spirals/Spirals.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------
/Spirals/c10_SVM_files/figure-markdown_github/00433_example_10.22_of_section_10.3.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/00433_example_10.22_of_section_10.3.1.R-1.png
--------------------------------------------------------------------------------
/Spirals/c10_SVM_files/figure-markdown_github/00434_example_10.23_of_section_10.3.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/00434_example_10.23_of_section_10.3.1.R-1.png
--------------------------------------------------------------------------------
/Spirals/c10_SVM_files/figure-markdown_github/00435_example_10.24_of_section_10.3.1.R-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/00435_example_10.24_of_section_10.3.1.R-1.png
--------------------------------------------------------------------------------
/Spirals/c10_SVM_files/figure-markdown_github/large_mu-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/large_mu-1.png
--------------------------------------------------------------------------------
/Spirals/c10_SVM_files/figure-markdown_github/large_nu-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/large_nu-1.png
--------------------------------------------------------------------------------
/Spirals/c10_SVM_files/figure-markdown_github/small_mu-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/small_mu-1.png
--------------------------------------------------------------------------------
/Spirals/c10_SVM_files/figure-markdown_github/small_nu-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/small_nu-1.png
--------------------------------------------------------------------------------
/Spirals/c10_SVM_files/figure-markdown_github/xgboost-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Spirals/c10_SVM_files/figure-markdown_github/xgboost-1.png
--------------------------------------------------------------------------------
/Starting_with_R_and_Other_Tools.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Starting_with_R_and_Other_Tools.pdf
--------------------------------------------------------------------------------
/Statlog/Chapter_1_Example_files/figure-markdown_github/present_model-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Statlog/Chapter_1_Example_files/figure-markdown_github/present_model-1.png
--------------------------------------------------------------------------------
/Statlog/GCDData.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Statlog/GCDData.RData
--------------------------------------------------------------------------------
/Statlog/Statlog.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------
/Statlog/creditdata.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Statlog/creditdata.RDS
--------------------------------------------------------------------------------
/Statlog/loan_model_example.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/Statlog/loan_model_example.RData
--------------------------------------------------------------------------------
/auto_mpg/UCI_Auto_MPG.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/auto_mpg/UCI_Auto_MPG.pdf
--------------------------------------------------------------------------------
/auto_mpg/auto_mpg.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/auto_mpg/auto_mpg.RDS
--------------------------------------------------------------------------------
/bioavailability/Caco-2 Permeability Assay.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/Caco-2 Permeability Assay.pdf
--------------------------------------------------------------------------------
/bioavailability/Figure4.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/Figure4.gif
--------------------------------------------------------------------------------
/bioavailability/WebPlotDigitizer.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/WebPlotDigitizer.pdf
--------------------------------------------------------------------------------
/bioavailability/figure/graph1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/figure/graph1.png
--------------------------------------------------------------------------------
/bioavailability/figure/graphT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/figure/graphT.png
--------------------------------------------------------------------------------
/bioavailability/figure/model1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/figure/model1.png
--------------------------------------------------------------------------------
/bioavailability/figure/synth1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/figure/synth1.png
--------------------------------------------------------------------------------
/bioavailability/figure/synthP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/figure/synthP.png
--------------------------------------------------------------------------------
/bioavailability/synth.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/PDSwR2/331da8e127fbc86532f44434838e9408c039577b/bioavailability/synth.RData
--------------------------------------------------------------------------------
/cricketchirps/README.txt:
--------------------------------------------------------------------------------
1 | Cricket Chirps Vs. Temperature
2 |
3 | In the following data
4 | chirp_rate = chirps/sec for the striped ground cricket
5 | temperatureF = temperature in degrees Fahrenheit
6 | Reference: The Song of Insects by Dr.G.W. Pierce, Harvard College Press
7 |
8 | Data found (typed in) here:
9 | https://mathbits.com/MathBits/TISection/Statistics2/linearREAL.htm
10 |
--------------------------------------------------------------------------------
/cricketchirps/crickets.csv:
--------------------------------------------------------------------------------
1 | chirp_rate,temperatureF
20,88.59999847
16,71.59999847
19.79999924,93.30000305
18.39999962,84.30000305
17.10000038,80.59999847
15.5,75.19999695
14.69999981,69.69999695
17.10000038,82
15.39999962,69.40000153
16.20000076,83.30000305
15,79.59999847
17.20000076,82.59999847
16,80.59999847
17,83.5
14.39999962,76.30000305
--------------------------------------------------------------------------------