├── requirements.txt ├── slides ├── 05-data-visualization │ ├── css │ ├── assets │ ├── includes │ └── images │ │ ├── minard.png │ │ ├── antv-logo.png │ │ ├── plotly-logo.png │ │ ├── echarts-logo.png │ │ ├── ggplot2-exts.png │ │ ├── ggplot2-logo.png │ │ ├── plotly-gallery.png │ │ ├── plotnine-logo.png │ │ ├── echarts-gallery.png │ │ ├── big-data-visualization.png │ │ ├── ggplot2-visualization-stat-bar.png │ │ ├── the-grammar-of-graphics-layers.png │ │ └── 1913-piqua-ohio-advertisement-one-look-is-worth-a-thousand-words.jpg ├── 07-feature-engineering │ ├── css │ ├── assets │ ├── includes │ └── images │ │ ├── age.png │ │ ├── pca.jpg │ │ ├── binned-age.png │ │ ├── data-monitor.png │ │ ├── anomaly-comparison.png │ │ ├── anomaly-example-1.jpg │ │ ├── anomaly-example-2.jpg │ │ ├── xkcd-machine-learning.png │ │ ├── linear-relationships-male-female.png │ │ ├── linear-relationships-verb-tense.png │ │ ├── boxplot.svg │ │ └── gradient_with_normalization.svg ├── 01-data-science-introduction │ ├── css │ ├── assets │ ├── includes │ ├── 01-data-science-introduction.Rmd │ └── images │ │ └── crisp-dm-cycle.svg ├── 02-r-language-introduction │ ├── css │ ├── assets │ ├── includes │ └── images │ │ └── rstudio-environments.png ├── 11-clustering-algorithms │ ├── css │ ├── assets │ ├── includes │ └── images │ │ ├── kmeans-example-round-1.png │ │ ├── kmeans-example-round-2.png │ │ ├── kmeans-example-round-3.png │ │ ├── kmeans-example-round-7.png │ │ ├── agglomerative-clustering.png │ │ └── man-and-ed-distance.asy ├── 12-time-series-algorithms │ ├── css │ ├── assets │ ├── includes │ ├── images │ │ ├── stock.png │ │ ├── weather.png │ │ ├── peyton-manning-diagnostics.png │ │ ├── peyton-manning-trend-changepoints-example.png │ │ ├── peyton-manning-potential-changepoints-sample.png │ │ └── arima.svg │ └── data │ │ ├── skirts.txt │ │ ├── example_air_passengers.csv │ │ ├── nybirths.txt │ │ └── example_retail_sales.csv ├── 13-deep-learning-algorithms │ ├── css │ ├── assets │ ├── includes │ └── images │ │ ├── rnn.png │ │ ├── lstm.png │ │ ├── lenet-5.png │ │ ├── rnn-loops.png │ │ ├── conv-example.png │ │ ├── digit-pixels.png │ │ ├── lena-filters.png │ │ ├── max-pooling.png │ │ ├── tanh-function.png │ │ ├── ann-bp-network.png │ │ ├── conv2d-kernels.png │ │ ├── conv3d-kernels.png │ │ ├── lstm-cell-state.png │ │ ├── ann-mp-perceptron.png │ │ ├── conv-zero-padding.png │ │ ├── parameter-sharing.png │ │ ├── rnn-loops-unrolled.png │ │ ├── lstm-cell-input-gate.png │ │ ├── sparse-interactions.png │ │ ├── indirect-interactions.png │ │ ├── lstm-cell-forget-gate.png │ │ ├── lstm-cell-output-gate.png │ │ ├── lstm-cell-state-update.png │ │ ├── lstm-operations-symbols.png │ │ ├── ann-multilayer-perceptron.png │ │ ├── deep-learning-frameworks.jpg │ │ ├── lstm-pointwise-operation.png │ │ ├── cnn-accuracy-and-parameters.png │ │ ├── ann-perceptron-comparision-area-1.png │ │ ├── ann-perceptron-comparision-area-2.png │ │ ├── ann-perceptron-comparision-area-3.png │ │ ├── ann-perceptron-comparision-eor-1.png │ │ ├── ann-perceptron-comparision-eor-2.png │ │ ├── ann-perceptron-comparision-eor-3.png │ │ ├── rnn-long-term-dependencies-long.png │ │ ├── rnn-long-term-dependencies-short.png │ │ ├── ann-perceptron-comparision-structure-1.png │ │ ├── ann-perceptron-comparision-structure-2.png │ │ ├── ann-perceptron-comparision-structure-3.png │ │ ├── ann-perceptron-comparision-area-1.tex │ │ ├── ann-perceptron-comparision-area-2.tex │ │ ├── ann-perceptron-comparision-area-3.tex │ │ ├── ann-perceptron-comparision-eor-1.tex │ │ ├── ann-perceptron-comparision-eor-2.tex │ │ ├── ann-perceptron-comparision-eor-3.tex │ │ ├── ann-perceptron-comparision-structure-1.tex │ │ ├── ann-perceptron-comparision-structure-2.tex │ │ ├── ann-mp-perceptron.tex │ │ ├── ann-perceptron-comparision-structure-3.tex │ │ ├── ann-bp-network.tex │ │ └── ann-multilayer-perceptron.tex ├── 14-reproducible-research │ ├── css │ ├── assets │ ├── includes │ └── images │ │ ├── quarto.png │ │ ├── git-logo.png │ │ ├── octocat.png │ │ ├── github-logo.png │ │ ├── jupyterlab.png │ │ ├── not-equal.jpg │ │ ├── draw-an-owl.jpeg │ │ ├── jupyter-notebook.png │ │ ├── reproducibility-layers.png │ │ └── the-spectrum-of-reproducibility.png ├── 03-data-analytics-introduction-part-1 │ ├── css │ ├── assets │ ├── includes │ └── images │ │ ├── hadley.jpg │ │ ├── tidyverse.png │ │ ├── tidyr-unite.png │ │ ├── tidyr-separate.png │ │ ├── tidyr-pivot-longer.png │ │ ├── tidyr-pivot-wider.png │ │ ├── transform-logical.png │ │ └── tidyr-column-row-cell.png ├── 04-data-analytics-introduction-part-2 │ ├── css │ ├── assets │ ├── includes │ └── images │ │ ├── dplyr-join-anti.png │ │ ├── dplyr-join-semi.png │ │ ├── dplyr-join-inner.png │ │ ├── dplyr-join-outer.png │ │ ├── dplyr-join-setup-1.png │ │ ├── dplyr-join-setup-2.png │ │ ├── purrr-lists-invoke.png │ │ ├── purrr-lists-map2.png │ │ ├── dplyr-join-semi-many.png │ │ ├── dplyr-join-many-to-many.png │ │ ├── dplyr-join-one-to-many.png │ │ ├── purrr-lists-pmap-named.png │ │ ├── relational-nycflights.png │ │ └── purrr-lists-pmap-unnamed.png ├── 06-statistical-analytics-introduction │ ├── css │ ├── assets │ ├── includes │ └── images │ │ ├── lm-normal-q-q.png │ │ ├── gradient-descent.png │ │ ├── lm-scale-location.png │ │ ├── lm-residuals-leverage.png │ │ ├── lm-residuals-vs-fitted.png │ │ ├── normal-distribution-pdf.png │ │ └── exponential-distribution-pdf.png ├── 09-classification-algorithms-part-1 │ ├── css │ ├── assets │ ├── includes │ └── images │ │ └── logistic-curve.svg ├── 10-classification-algorithms-part-2 │ ├── css │ ├── assets │ ├── includes │ └── images │ │ ├── stacking.png │ │ ├── clfs-decision-regions.png │ │ ├── gbm-workflow-gbm-step-2.svg │ │ ├── gbm-workflow-gbm-step-1.svg │ │ ├── gbm-workflow-original-dession-tree.svg │ │ ├── gbm-workflow-gbm-step-2.graphml │ │ ├── gbm-workflow-gbm-step-1.graphml │ │ └── stacking-workflow.graphml ├── 08-model-evaluation-and-hyperparameter-optimization │ ├── css │ ├── assets │ ├── includes │ └── images │ │ ├── auc.png │ │ ├── l1.png │ │ ├── l2.png │ │ ├── roc-curve.png │ │ ├── lp-norm-balls.png │ │ ├── grid-random-search.png │ │ ├── precision-accuracy.png │ │ ├── bias-variance-dilemma.png │ │ ├── grid-search-workflow.png │ │ ├── heuristic-algorithms.png │ │ ├── underfitting-overfitting.png │ │ ├── wine-lr-regularization.png │ │ └── grid-search-cross-validation.png └── _common │ ├── assets │ ├── logo.png │ ├── avatar.png │ ├── code-good.svg │ └── code-bad.svg │ ├── design │ └── data-science-introduction-with-r.psd │ ├── scripts │ ├── resize-lecture-cover.sh │ ├── link-directories.sh │ └── wine-lr-regularization.py │ ├── includes │ └── after_body.html │ └── css │ └── style.css ├── docs ├── css │ └── table.css ├── images │ ├── others │ │ ├── jupyterlab-macos.png │ │ └── jupyterlab-windows.png │ ├── lecture │ │ ├── 05-data-visualization.png │ │ ├── 07-feature-engineering.png │ │ ├── 11-clustering-algorithms.png │ │ ├── 12-time-series-algorithms.png │ │ ├── 14-reproducible-research.png │ │ ├── 02-r-language-introduction.png │ │ ├── 13-deep-learning-algorithms.png │ │ ├── 01-data-science-introduction.png │ │ ├── 09-classification-algorithms-part-1.png │ │ ├── 10-classification-algorithms-part-2.png │ │ ├── 03-data-analytics-introduction-part-1.png │ │ ├── 04-data-analytics-introduction-part-2.png │ │ ├── 06-statistical-analytics-introduction.png │ │ └── 08-model-evaluation-and-hyperparameter-optimization.png │ └── data-science-introduction-with-r.png ├── lecture │ ├── 05-data-visualization.md │ ├── 07-feature-engineering.md │ ├── 11-clustering-algorithms.md │ ├── 12-time-series-algorithms.md │ ├── 14-reproducible-research.md │ ├── 02-r-language-introduction.md │ ├── 13-deep-learning-algorithms.md │ ├── 01-data-science-introduction.md │ ├── 06-statistical-analytics-introduction.md │ ├── 09-classification-algorithms-part-1.md │ ├── 10-classification-algorithms-part-2.md │ ├── 03-data-analytics-introduction-part-1.md │ ├── 04-data-analytics-introduction-part-2.md │ └── 08-model-evaluation-and-hyperparameter-optimization.md ├── index.md └── others │ └── 01-ide-installation-manual.md ├── .gitignore ├── CHANGELOG.md ├── README.md └── mkdocs.yml /requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs-material 2 | -------------------------------------------------------------------------------- /slides/05-data-visualization/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/07-feature-engineering/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/01-data-science-introduction/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/02-r-language-introduction/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/05-data-visualization/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/07-feature-engineering/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/11-clustering-algorithms/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/12-time-series-algorithms/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/14-reproducible-research/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/02-r-language-introduction/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/05-data-visualization/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /slides/07-feature-engineering/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /slides/11-clustering-algorithms/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/12-time-series-algorithms/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/14-reproducible-research/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/01-data-science-introduction/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/01-data-science-introduction/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /slides/02-r-language-introduction/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /slides/03-data-analytics-introduction-part-1/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/06-statistical-analytics-introduction/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/09-classification-algorithms-part-1/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/10-classification-algorithms-part-2/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/11-clustering-algorithms/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /slides/12-time-series-algorithms/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /slides/14-reproducible-research/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /slides/03-data-analytics-introduction-part-1/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/06-statistical-analytics-introduction/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/09-classification-algorithms-part-1/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/10-classification-algorithms-part-2/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/03-data-analytics-introduction-part-1/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /slides/06-statistical-analytics-introduction/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /slides/09-classification-algorithms-part-1/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /slides/10-classification-algorithms-part-2/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/css: -------------------------------------------------------------------------------- 1 | ../_common/css -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/assets: -------------------------------------------------------------------------------- 1 | ../_common/assets -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/includes: -------------------------------------------------------------------------------- 1 | ../_common/includes -------------------------------------------------------------------------------- /docs/css/table.css: -------------------------------------------------------------------------------- 1 | .md-typeset__table { 2 | min-width: 100%; 3 | } 4 | 5 | .md-typeset table:not([class]) { 6 | display: table; 7 | } -------------------------------------------------------------------------------- /slides/_common/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/_common/assets/logo.png -------------------------------------------------------------------------------- /slides/_common/assets/avatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/_common/assets/avatar.png -------------------------------------------------------------------------------- /docs/images/others/jupyterlab-macos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/others/jupyterlab-macos.png -------------------------------------------------------------------------------- /docs/images/others/jupyterlab-windows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/others/jupyterlab-windows.png -------------------------------------------------------------------------------- /slides/07-feature-engineering/images/age.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/07-feature-engineering/images/age.png -------------------------------------------------------------------------------- /slides/07-feature-engineering/images/pca.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/07-feature-engineering/images/pca.jpg -------------------------------------------------------------------------------- /docs/images/lecture/05-data-visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/05-data-visualization.png -------------------------------------------------------------------------------- /docs/images/lecture/07-feature-engineering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/07-feature-engineering.png -------------------------------------------------------------------------------- /slides/05-data-visualization/images/minard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/05-data-visualization/images/minard.png -------------------------------------------------------------------------------- /docs/images/data-science-introduction-with-r.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/data-science-introduction-with-r.png -------------------------------------------------------------------------------- /docs/images/lecture/11-clustering-algorithms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/11-clustering-algorithms.png -------------------------------------------------------------------------------- /docs/images/lecture/12-time-series-algorithms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/12-time-series-algorithms.png -------------------------------------------------------------------------------- /docs/images/lecture/14-reproducible-research.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/14-reproducible-research.png -------------------------------------------------------------------------------- /slides/05-data-visualization/images/antv-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/05-data-visualization/images/antv-logo.png -------------------------------------------------------------------------------- /slides/12-time-series-algorithms/images/stock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/12-time-series-algorithms/images/stock.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/rnn.png -------------------------------------------------------------------------------- /slides/14-reproducible-research/images/quarto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/14-reproducible-research/images/quarto.png -------------------------------------------------------------------------------- /docs/images/lecture/02-r-language-introduction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/02-r-language-introduction.png -------------------------------------------------------------------------------- /docs/images/lecture/13-deep-learning-algorithms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/13-deep-learning-algorithms.png -------------------------------------------------------------------------------- /slides/05-data-visualization/images/plotly-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/05-data-visualization/images/plotly-logo.png -------------------------------------------------------------------------------- /slides/07-feature-engineering/images/binned-age.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/07-feature-engineering/images/binned-age.png -------------------------------------------------------------------------------- /slides/12-time-series-algorithms/images/weather.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/12-time-series-algorithms/images/weather.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/lstm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/lstm.png -------------------------------------------------------------------------------- /slides/14-reproducible-research/images/git-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/14-reproducible-research/images/git-logo.png -------------------------------------------------------------------------------- /slides/14-reproducible-research/images/octocat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/14-reproducible-research/images/octocat.png -------------------------------------------------------------------------------- /docs/images/lecture/01-data-science-introduction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/01-data-science-introduction.png -------------------------------------------------------------------------------- /slides/05-data-visualization/images/echarts-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/05-data-visualization/images/echarts-logo.png -------------------------------------------------------------------------------- /slides/05-data-visualization/images/ggplot2-exts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/05-data-visualization/images/ggplot2-exts.png -------------------------------------------------------------------------------- /slides/05-data-visualization/images/ggplot2-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/05-data-visualization/images/ggplot2-logo.png -------------------------------------------------------------------------------- /slides/05-data-visualization/images/plotly-gallery.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/05-data-visualization/images/plotly-gallery.png -------------------------------------------------------------------------------- /slides/05-data-visualization/images/plotnine-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/05-data-visualization/images/plotnine-logo.png -------------------------------------------------------------------------------- /slides/07-feature-engineering/images/data-monitor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/07-feature-engineering/images/data-monitor.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/lenet-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/lenet-5.png -------------------------------------------------------------------------------- /slides/14-reproducible-research/images/github-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/14-reproducible-research/images/github-logo.png -------------------------------------------------------------------------------- /slides/14-reproducible-research/images/jupyterlab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/14-reproducible-research/images/jupyterlab.png -------------------------------------------------------------------------------- /slides/14-reproducible-research/images/not-equal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/14-reproducible-research/images/not-equal.jpg -------------------------------------------------------------------------------- /slides/05-data-visualization/images/echarts-gallery.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/05-data-visualization/images/echarts-gallery.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/rnn-loops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/rnn-loops.png -------------------------------------------------------------------------------- /slides/14-reproducible-research/images/draw-an-owl.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/14-reproducible-research/images/draw-an-owl.jpeg -------------------------------------------------------------------------------- /docs/images/lecture/09-classification-algorithms-part-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/09-classification-algorithms-part-1.png -------------------------------------------------------------------------------- /docs/images/lecture/10-classification-algorithms-part-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/10-classification-algorithms-part-2.png -------------------------------------------------------------------------------- /slides/07-feature-engineering/images/anomaly-comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/07-feature-engineering/images/anomaly-comparison.png -------------------------------------------------------------------------------- /slides/07-feature-engineering/images/anomaly-example-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/07-feature-engineering/images/anomaly-example-1.jpg -------------------------------------------------------------------------------- /slides/07-feature-engineering/images/anomaly-example-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/07-feature-engineering/images/anomaly-example-2.jpg -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/conv-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/conv-example.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/digit-pixels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/digit-pixels.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/lena-filters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/lena-filters.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/max-pooling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/max-pooling.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/tanh-function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/tanh-function.png -------------------------------------------------------------------------------- /slides/14-reproducible-research/images/jupyter-notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/14-reproducible-research/images/jupyter-notebook.png -------------------------------------------------------------------------------- /slides/_common/design/data-science-introduction-with-r.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/_common/design/data-science-introduction-with-r.psd -------------------------------------------------------------------------------- /docs/images/lecture/03-data-analytics-introduction-part-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/03-data-analytics-introduction-part-1.png -------------------------------------------------------------------------------- /docs/images/lecture/04-data-analytics-introduction-part-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/04-data-analytics-introduction-part-2.png -------------------------------------------------------------------------------- /docs/images/lecture/06-statistical-analytics-introduction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/06-statistical-analytics-introduction.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-bp-network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/ann-bp-network.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/conv2d-kernels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/conv2d-kernels.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/conv3d-kernels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/conv3d-kernels.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/lstm-cell-state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/lstm-cell-state.png -------------------------------------------------------------------------------- /slides/03-data-analytics-introduction-part-1/images/hadley.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/03-data-analytics-introduction-part-1/images/hadley.jpg -------------------------------------------------------------------------------- /slides/05-data-visualization/images/big-data-visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/05-data-visualization/images/big-data-visualization.png -------------------------------------------------------------------------------- /slides/07-feature-engineering/images/xkcd-machine-learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/07-feature-engineering/images/xkcd-machine-learning.png -------------------------------------------------------------------------------- /slides/10-classification-algorithms-part-2/images/stacking.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/10-classification-algorithms-part-2/images/stacking.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-mp-perceptron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/ann-mp-perceptron.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/conv-zero-padding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/conv-zero-padding.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/parameter-sharing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/parameter-sharing.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/rnn-loops-unrolled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/rnn-loops-unrolled.png -------------------------------------------------------------------------------- /slides/02-r-language-introduction/images/rstudio-environments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/02-r-language-introduction/images/rstudio-environments.png -------------------------------------------------------------------------------- /slides/03-data-analytics-introduction-part-1/images/tidyverse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/03-data-analytics-introduction-part-1/images/tidyverse.png -------------------------------------------------------------------------------- /slides/11-clustering-algorithms/images/kmeans-example-round-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/11-clustering-algorithms/images/kmeans-example-round-1.png -------------------------------------------------------------------------------- /slides/11-clustering-algorithms/images/kmeans-example-round-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/11-clustering-algorithms/images/kmeans-example-round-2.png -------------------------------------------------------------------------------- /slides/11-clustering-algorithms/images/kmeans-example-round-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/11-clustering-algorithms/images/kmeans-example-round-3.png -------------------------------------------------------------------------------- /slides/11-clustering-algorithms/images/kmeans-example-round-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/11-clustering-algorithms/images/kmeans-example-round-7.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/lstm-cell-input-gate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/lstm-cell-input-gate.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/sparse-interactions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/sparse-interactions.png -------------------------------------------------------------------------------- /slides/14-reproducible-research/images/reproducibility-layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/14-reproducible-research/images/reproducibility-layers.png -------------------------------------------------------------------------------- /slides/03-data-analytics-introduction-part-1/images/tidyr-unite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/03-data-analytics-introduction-part-1/images/tidyr-unite.png -------------------------------------------------------------------------------- /slides/06-statistical-analytics-introduction/images/lm-normal-q-q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/06-statistical-analytics-introduction/images/lm-normal-q-q.png -------------------------------------------------------------------------------- /slides/11-clustering-algorithms/images/agglomerative-clustering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/11-clustering-algorithms/images/agglomerative-clustering.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/indirect-interactions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/indirect-interactions.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/lstm-cell-forget-gate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/lstm-cell-forget-gate.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/lstm-cell-output-gate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/lstm-cell-output-gate.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/lstm-cell-state-update.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/lstm-cell-state-update.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/lstm-operations-symbols.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/lstm-operations-symbols.png -------------------------------------------------------------------------------- /slides/03-data-analytics-introduction-part-1/images/tidyr-separate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/03-data-analytics-introduction-part-1/images/tidyr-separate.png -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/dplyr-join-anti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/dplyr-join-anti.png -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/dplyr-join-semi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/dplyr-join-semi.png -------------------------------------------------------------------------------- /slides/05-data-visualization/images/ggplot2-visualization-stat-bar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/05-data-visualization/images/ggplot2-visualization-stat-bar.png -------------------------------------------------------------------------------- /slides/05-data-visualization/images/the-grammar-of-graphics-layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/05-data-visualization/images/the-grammar-of-graphics-layers.png -------------------------------------------------------------------------------- /slides/12-time-series-algorithms/images/peyton-manning-diagnostics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/12-time-series-algorithms/images/peyton-manning-diagnostics.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-multilayer-perceptron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/ann-multilayer-perceptron.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/deep-learning-frameworks.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/deep-learning-frameworks.jpg -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/lstm-pointwise-operation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/lstm-pointwise-operation.png -------------------------------------------------------------------------------- /docs/lecture/05-data-visualization.md: -------------------------------------------------------------------------------- 1 | # 数据可视化 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/lecture/07-feature-engineering.md: -------------------------------------------------------------------------------- 1 | # 特征工程 2 | 3 | 4 | -------------------------------------------------------------------------------- /slides/03-data-analytics-introduction-part-1/images/tidyr-pivot-longer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/03-data-analytics-introduction-part-1/images/tidyr-pivot-longer.png -------------------------------------------------------------------------------- /slides/03-data-analytics-introduction-part-1/images/tidyr-pivot-wider.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/03-data-analytics-introduction-part-1/images/tidyr-pivot-wider.png -------------------------------------------------------------------------------- /slides/03-data-analytics-introduction-part-1/images/transform-logical.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/03-data-analytics-introduction-part-1/images/transform-logical.png -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/dplyr-join-inner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/dplyr-join-inner.png -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/dplyr-join-outer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/dplyr-join-outer.png -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/dplyr-join-setup-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/dplyr-join-setup-1.png -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/dplyr-join-setup-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/dplyr-join-setup-2.png -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/purrr-lists-invoke.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/purrr-lists-invoke.png -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/purrr-lists-map2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/purrr-lists-map2.png -------------------------------------------------------------------------------- /slides/06-statistical-analytics-introduction/images/gradient-descent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/06-statistical-analytics-introduction/images/gradient-descent.png -------------------------------------------------------------------------------- /slides/06-statistical-analytics-introduction/images/lm-scale-location.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/06-statistical-analytics-introduction/images/lm-scale-location.png -------------------------------------------------------------------------------- /slides/07-feature-engineering/images/linear-relationships-male-female.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/07-feature-engineering/images/linear-relationships-male-female.png -------------------------------------------------------------------------------- /slides/07-feature-engineering/images/linear-relationships-verb-tense.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/07-feature-engineering/images/linear-relationships-verb-tense.png -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/images/auc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/08-model-evaluation-and-hyperparameter-optimization/images/auc.png -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/images/l1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/08-model-evaluation-and-hyperparameter-optimization/images/l1.png -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/images/l2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/08-model-evaluation-and-hyperparameter-optimization/images/l2.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/cnn-accuracy-and-parameters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/cnn-accuracy-and-parameters.png -------------------------------------------------------------------------------- /slides/14-reproducible-research/images/the-spectrum-of-reproducibility.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/14-reproducible-research/images/the-spectrum-of-reproducibility.png -------------------------------------------------------------------------------- /docs/images/lecture/08-model-evaluation-and-hyperparameter-optimization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/docs/images/lecture/08-model-evaluation-and-hyperparameter-optimization.png -------------------------------------------------------------------------------- /docs/lecture/11-clustering-algorithms.md: -------------------------------------------------------------------------------- 1 | # 聚类算法 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/lecture/12-time-series-algorithms.md: -------------------------------------------------------------------------------- 1 | # 时间序列算法 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/lecture/14-reproducible-research.md: -------------------------------------------------------------------------------- 1 | # 可重复性研究 2 | 3 | 4 | -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/dplyr-join-semi-many.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/dplyr-join-semi-many.png -------------------------------------------------------------------------------- /slides/10-classification-algorithms-part-2/images/clfs-decision-regions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/10-classification-algorithms-part-2/images/clfs-decision-regions.png -------------------------------------------------------------------------------- /docs/lecture/02-r-language-introduction.md: -------------------------------------------------------------------------------- 1 | # R 语言简介 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/lecture/13-deep-learning-algorithms.md: -------------------------------------------------------------------------------- 1 | # 深度学习算法 2 | 3 | 4 | -------------------------------------------------------------------------------- /slides/03-data-analytics-introduction-part-1/images/tidyr-column-row-cell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/03-data-analytics-introduction-part-1/images/tidyr-column-row-cell.png -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/dplyr-join-many-to-many.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/dplyr-join-many-to-many.png -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/dplyr-join-one-to-many.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/dplyr-join-one-to-many.png -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/purrr-lists-pmap-named.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/purrr-lists-pmap-named.png -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/relational-nycflights.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/relational-nycflights.png -------------------------------------------------------------------------------- /slides/06-statistical-analytics-introduction/images/lm-residuals-leverage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/06-statistical-analytics-introduction/images/lm-residuals-leverage.png -------------------------------------------------------------------------------- /slides/06-statistical-analytics-introduction/images/lm-residuals-vs-fitted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/06-statistical-analytics-introduction/images/lm-residuals-vs-fitted.png -------------------------------------------------------------------------------- /slides/06-statistical-analytics-introduction/images/normal-distribution-pdf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/06-statistical-analytics-introduction/images/normal-distribution-pdf.png -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/images/roc-curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/08-model-evaluation-and-hyperparameter-optimization/images/roc-curve.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-area-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-area-1.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-area-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-area-2.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-area-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-area-3.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-eor-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-eor-1.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-eor-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-eor-2.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-eor-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-eor-3.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/rnn-long-term-dependencies-long.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/rnn-long-term-dependencies-long.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/rnn-long-term-dependencies-short.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/rnn-long-term-dependencies-short.png -------------------------------------------------------------------------------- /docs/lecture/01-data-science-introduction.md: -------------------------------------------------------------------------------- 1 | # 数据科学简介 2 | 3 | 4 | -------------------------------------------------------------------------------- /slides/04-data-analytics-introduction-part-2/images/purrr-lists-pmap-unnamed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/04-data-analytics-introduction-part-2/images/purrr-lists-pmap-unnamed.png -------------------------------------------------------------------------------- /slides/06-statistical-analytics-introduction/images/exponential-distribution-pdf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/06-statistical-analytics-introduction/images/exponential-distribution-pdf.png -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/images/lp-norm-balls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/08-model-evaluation-and-hyperparameter-optimization/images/lp-norm-balls.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-structure-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-structure-1.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-structure-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-structure-2.png -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-structure-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-structure-3.png -------------------------------------------------------------------------------- /slides/12-time-series-algorithms/images/peyton-manning-trend-changepoints-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/12-time-series-algorithms/images/peyton-manning-trend-changepoints-example.png -------------------------------------------------------------------------------- /docs/lecture/06-statistical-analytics-introduction.md: -------------------------------------------------------------------------------- 1 | # 统计分析基础 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/lecture/09-classification-algorithms-part-1.md: -------------------------------------------------------------------------------- 1 | # 分类算法(上) 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/lecture/10-classification-algorithms-part-2.md: -------------------------------------------------------------------------------- 1 | # 分类算法(下) 2 | 3 | 4 | -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/images/grid-random-search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/08-model-evaluation-and-hyperparameter-optimization/images/grid-random-search.png -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/images/precision-accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/08-model-evaluation-and-hyperparameter-optimization/images/precision-accuracy.png -------------------------------------------------------------------------------- /slides/12-time-series-algorithms/images/peyton-manning-potential-changepoints-sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/12-time-series-algorithms/images/peyton-manning-potential-changepoints-sample.png -------------------------------------------------------------------------------- /docs/lecture/03-data-analytics-introduction-part-1.md: -------------------------------------------------------------------------------- 1 | # 数据分析基础(上) 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/lecture/04-data-analytics-introduction-part-2.md: -------------------------------------------------------------------------------- 1 | # 数据分析基础(下) 2 | 3 | 4 | -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/images/bias-variance-dilemma.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/08-model-evaluation-and-hyperparameter-optimization/images/bias-variance-dilemma.png -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/images/grid-search-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/08-model-evaluation-and-hyperparameter-optimization/images/grid-search-workflow.png -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/images/heuristic-algorithms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/08-model-evaluation-and-hyperparameter-optimization/images/heuristic-algorithms.png -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/images/underfitting-overfitting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/08-model-evaluation-and-hyperparameter-optimization/images/underfitting-overfitting.png -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/images/wine-lr-regularization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/08-model-evaluation-and-hyperparameter-optimization/images/wine-lr-regularization.png -------------------------------------------------------------------------------- /slides/08-model-evaluation-and-hyperparameter-optimization/images/grid-search-cross-validation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/08-model-evaluation-and-hyperparameter-optimization/images/grid-search-cross-validation.png -------------------------------------------------------------------------------- /slides/05-data-visualization/images/1913-piqua-ohio-advertisement-one-look-is-worth-a-thousand-words.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leovan/data-science-introduction-with-r/HEAD/slides/05-data-visualization/images/1913-piqua-ohio-advertisement-one-look-is-worth-a-thousand-words.jpg -------------------------------------------------------------------------------- /docs/lecture/08-model-evaluation-and-hyperparameter-optimization.md: -------------------------------------------------------------------------------- 1 | # 模型评估 & 超参数优化 2 | 3 | 4 | -------------------------------------------------------------------------------- /slides/_common/assets/code-good.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /slides/_common/scripts/resize-lecture-cover.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # go to slides images directory 4 | cd $(dirname $(dirname $(dirname $(dirname "$0")))) 5 | cd static/images/lecture 6 | 7 | # resize all images 8 | for image in ./*.png 9 | do 10 | convert -resize 800x ${image} ${image} 11 | done 12 | -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-area-1.tex: -------------------------------------------------------------------------------- 1 | \documentclass[tikz, convert={density=300,size=100x,outext=.png}]{standalone} 2 | \begin{document} 3 | \begin{tikzpicture} 4 | \fill [white] (0, 0) rectangle (4, 4); 5 | \fill [black] (0, 0) -- (0, 3) -- (1, 3) -- (4, 1) -- (4, 0) -- cycle; 6 | \draw [black] (0, 0) rectangle (4, 4); 7 | \end{tikzpicture} 8 | \end{document} -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-area-2.tex: -------------------------------------------------------------------------------- 1 | \documentclass[tikz, convert={density=300,size=100x,outext=.png}]{standalone} 2 | \begin{document} 3 | \begin{tikzpicture} 4 | \fill [white] (0, 0) rectangle (4, 4); 5 | \fill [black] (0, 0) -- (0, 4) -- (2, 4) -- (1, 2.5) -- (1.5, 1.5) -- (4, 0) -- cycle; 6 | \draw [black] (0, 0) rectangle (4, 4); 7 | \end{tikzpicture} 8 | \end{document} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # R 2 | .Rhistory 3 | .Rproj.user 4 | .RData 5 | .Ruserdata 6 | 7 | # VS Code 8 | .vscode 9 | 10 | # Python 11 | *.pyd 12 | *.pyc 13 | *.pyo 14 | .ipynb_checkpoints 15 | 16 | # JetBrains 17 | .idea 18 | 19 | # System 20 | .DS_Store 21 | *.log 22 | *.dump 23 | 24 | # Slide 25 | /slides/**/*_cache 26 | /slides/**/*_files 27 | /slides/**/libs 28 | /slides/**/generated 29 | /slides/**/*.html 30 | !/slides/_common/** 31 | 32 | # Others 33 | *.h5 34 | -------------------------------------------------------------------------------- /slides/11-clustering-algorithms/images/man-and-ed-distance.asy: -------------------------------------------------------------------------------- 1 | pen dps = linewidth(0.7) + fontsize(0); 2 | defaultpen(dps); 3 | draw((0, 100)--(0, 0), linewidth(2)); 4 | draw((0, 0)--(200, 0), linewidth(2)); 5 | draw((200, 0)--(0, 100), linewidth(2) + linetype("2 4")); 6 | pen dotstyle = black; 7 | dot((200, 0), dotstyle); 8 | dot((0, 100), dotstyle); 9 | label("$(2, 0)$", (220, 0)); 10 | label("$(0, 1)$", (0, 110)); 11 | label("Euclidean Distance", (160, 60)); 12 | label("Manhattan Distance", (0, -10)); -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-area-3.tex: -------------------------------------------------------------------------------- 1 | \documentclass[tikz, convert={density=300,size=100x,outext=.png}]{standalone} 2 | \begin{document} 3 | \begin{tikzpicture} 4 | \fill [black] (0, 0) rectangle (4, 4); 5 | \fill [white] (0.5, 3) -- (1, 3.25) -- (1.5, 2.75) -- (0.75, 2.25) -- cycle; 6 | \fill [white] (2, 1.5) -- (2.75, 1.75) -- (2.6, 2.3) -- (3.25, 2.5) -- (3.75, 1.5) -- (3.6, 0.75) -- (3.25, 0.5) -- (2.25, 0.75) -- cycle; 7 | \fill [black] (2.75, 1) -- (2.8, 1.5) -- (3.2, 1.7) -- (3.4, 1.3) -- (3.2, 0.9) -- cycle; 8 | \end{tikzpicture} 9 | \end{document} -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-eor-1.tex: -------------------------------------------------------------------------------- 1 | \documentclass[tikz, convert={density=300,size=100x,outext=.png}]{standalone} 2 | \begin{document} 3 | \tikzstyle{a}=[circle,draw=green!50,fill=green!20,thick,minimum size=6mm] 4 | \tikzstyle{b}=[rectangle,draw=red!50,fill=red!20,thick,minimum size=6mm] 5 | \begin{tikzpicture} 6 | \fill [white] (0, 0) rectangle (4, 4); 7 | \draw [black] (0, 0) rectangle (4, 4); 8 | \filldraw [fill=gray!60] (0, 1) -- (0, 4) -- (3, 4) -- cycle; 9 | \node [a] (a1) at (1, 3) {A}; 10 | \node [a] (a2) at (3, 1) {A}; 11 | \node [b] (b1) at (1, 1) {B}; 12 | \node [b] (b2) at (3, 3) {B}; 13 | \end{tikzpicture} 14 | \end{document} -------------------------------------------------------------------------------- /slides/_common/assets/code-bad.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [1.2.0](https://github.com/leovan/data-science-introduction-with-r/compare/v1.1.0...v1.2.0) (2024-02-11) 2 | 3 | - Remove R Markdown and add quarto to reproducible research chapter. 4 | - Remove GBM model code with `mlr3extralearners` in classification algorithms part 2 chapter. 5 | - Add XGBoost introduction in classification algorithms part 2 chapter. 6 | - Update slide background and fix slide styles. 7 | 8 | ## [1.1.0](https://github.com/leovan/data-science-introduction-with-r/compare/v1.0.0...v1.1.0) (2023-02-09) 9 | 10 | - Update website based on Hugo. 11 | - Optimize slides folder structure. 12 | - Fix incompatibility problem with latest libraries. 13 | 14 | ## 1.0.0 (2021-01-11) 15 | 16 | - First release. 17 | -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-eor-2.tex: -------------------------------------------------------------------------------- 1 | \documentclass[tikz, convert={density=300,size=100x,outext=.png}]{standalone} 2 | \begin{document} 3 | \tikzstyle{a}=[circle,draw=green!50,fill=green!20,thick,minimum size=6mm] 4 | \tikzstyle{b}=[rectangle,draw=red!50,fill=red!20,thick,minimum size=6mm] 5 | \begin{tikzpicture} 6 | \fill [white] (0, 0) rectangle (4, 4); 7 | \draw [black] (0, 0) rectangle (4, 4); 8 | \filldraw [fill=gray!60] (0, 4) -- (1, 4) -- (4, 1) -- (4, 0) -- (3, 0) -- (0, 3) -- cycle; 9 | \node [a] (a1) at (1, 3) {A}; 10 | \node [a] (a2) at (3, 1) {A}; 11 | \node [b] (b1) at (1, 1) {B}; 12 | \node [b] (b2) at (3, 3) {B}; 13 | \end{tikzpicture} 14 | \end{document} -------------------------------------------------------------------------------- /slides/12-time-series-algorithms/data/skirts.txt: -------------------------------------------------------------------------------- 1 | Diameter of skirts at hem, 1866-1911, annual 2 | #see also beards.1 3 | #SEE MARIJA NORUSIS'S 1981 SPSS PRIMER FOR DETAILS AND 4 | #ADDITIONAL DATA EXTENDING BACK TO 1842 AND FORWARD TO 1953 5 | SKIRTS 6 | 608. 7 | 617. 8 | 625. 9 | 636. 10 | 657. 11 | 691. 12 | 728. 13 | 784. 14 | 816. 15 | 876. 16 | 949. 17 | 997. 18 | 1027. 19 | 1047. 20 | 1049. 21 | 1018. 22 | 1021. 23 | 1012. 24 | 1018. 25 | 991. 26 | 962. 27 | 921. 28 | 871. 29 | 829. 30 | 822. 31 | 820. 32 | 802. 33 | 821. 34 | 819. 35 | 791. 36 | 746. 37 | 726. 38 | 661. 39 | 620. 40 | 588. 41 | 568. 42 | 542. 43 | 551. 44 | 541. 45 | 557. 46 | 556. 47 | 534. 48 | 528. 49 | 529. 50 | 523. 51 | 531. 52 | -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-eor-3.tex: -------------------------------------------------------------------------------- 1 | \documentclass[tikz, convert={density=300,size=100x,outext=.png}]{standalone} 2 | \begin{document} 3 | \tikzstyle{a}=[circle,draw=green!50,fill=green!20,thick,minimum size=6mm] 4 | \tikzstyle{b}=[rectangle,draw=red!50,fill=red!20,thick,minimum size=6mm] 5 | \begin{tikzpicture} 6 | \fill [white] (0, 0) rectangle (4, 4); 7 | \draw [black] (0, 0) rectangle (4, 4); 8 | \filldraw [fill=gray!60] (0.25, 2.25) -- (0.5, 3.5) -- (1.5, 3.75) -- (1.75, 2.4) -- cycle; 9 | \filldraw [fill=gray!60] (2.25, 1.25) -- (3, 1.75) -- (4, 1.5) -- (4, 0) -- (2.5, 0) -- cycle; 10 | \node [a] (a1) at (1, 3) {A}; 11 | \node [a] (a2) at (3, 1) {A}; 12 | \node [b] (b1) at (1, 1) {B}; 13 | \node [b] (b2) at (3, 3) {B}; 14 | \end{tikzpicture} 15 | \end{document} -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-structure-1.tex: -------------------------------------------------------------------------------- 1 | \documentclass[tikz, convert={density=300,size=100x,outext=.png}]{standalone} 2 | \usetikzlibrary{arrows} 3 | \begin{document} 4 | \tikzstyle{input}=[circle,draw=green!50,fill=green!20,thick,minimum size=6mm] 5 | \tikzstyle{output}=[circle,draw=blue!50,fill=blue!20,thick,minimum size=6mm] 6 | \tikzstyle{hidden}=[rectangle,draw=red!50,fill=red!20,thick,minimum size=6mm] 7 | \tikzstyle{transition}=[thick,draw=black!75,fill=black!20] 8 | \tikzstyle{pre}=[<-,shorten <=1pt,>=stealth',semithick] 9 | \tikzstyle{post}=[->,shorten >=1pt,>=stealth',semithick] 10 | \begin{tikzpicture}[node distance=1cm, auto] 11 | \node [output] (o1) {}; 12 | \node [input] (i1) [below of=o1, xshift=-10mm] {} 13 | edge [post] (o1); 14 | \node [input] (i2) [below of=o1, xshift=+10mm] {} 15 | edge [post] (o1); 16 | \end{tikzpicture} 17 | \end{document} -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # R 语言数据科学导论 logo 2 | 3 | ![Release](https://img.shields.io/github/release/leovan/data-science-introduction-with-r.svg) 4 | ![License](https://img.shields.io/badge/license-CC%20BY--NC--SA%204.0-blue.svg) 5 | ![Issues](https://img.shields.io/github/issues/leovan/data-science-introduction-with-r.svg) 6 | 7 | --- 8 | 9 | ## 简介 10 | 11 | 本项目是以 R 语言为基础的数据科学入门教程,源代码存储在 [Github](https://github.com/leovan/data-science-introduction-with-r) 中。 12 | 13 | ## 参考 14 | 15 | 1. 《R语言实战》(R in Action),Robert I. Kabacoff 著,王小宁、刘撷芯、黄俊文 等 译 16 | 2. 《R数据科学》(R for Data Science),Hadley Wickham & Garrett Grolemund 著,陈光欣 译 17 | 3. 《高级R语言编程指南》(Advanced R),Hadley Wickham 著,潘文捷、许金炜、李洪成 译 18 | 4. 《基于R语言的机器学习》(Introduction to Machine Learning with R),Scott V. Burger 著,马晶慧 译 19 | 5. 《机器学习与R语言》(Machine Learning with R),Brett Lantz 著,李洪成、许金炜、李舰译 译 20 | 6. 《统计学习方法》李航 著 21 | 7. 《机器学习》周志华 著 22 | 8. 《深度学习》(Deep Learning),Ian Goodfellow, Yoshua Bengio & Aaron Courville 著,赵申剑、黎彧君、符天凡、李凯 译 23 | 24 | ## 授权 25 | 26 | 本项目遵守 [CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/) 协议。 27 | -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-structure-2.tex: -------------------------------------------------------------------------------- 1 | \documentclass[tikz, convert={density=300,size=100x,outext=.png}]{standalone} 2 | \usetikzlibrary{arrows} 3 | \begin{document} 4 | \tikzstyle{input}=[circle,draw=green!50,fill=green!20,thick,minimum size=6mm] 5 | \tikzstyle{output}=[circle,draw=blue!50,fill=blue!20,thick,minimum size=6mm] 6 | \tikzstyle{hidden}=[rectangle,draw=red!50,fill=red!20,thick,minimum size=6mm] 7 | \tikzstyle{transition}=[thick,draw=black!75,fill=black!20] 8 | \tikzstyle{pre}=[<-,shorten <=1pt,>=stealth',semithick] 9 | \tikzstyle{post}=[->,shorten >=1pt,>=stealth',semithick] 10 | \begin{tikzpicture}[node distance=1.2cm, auto] 11 | \node [output] (o1) {}; 12 | \node [hidden] (h1) [below of=o1, xshift=-10mm] {} 13 | edge [post] (o1); 14 | \node [hidden] (h2) [below of=o1, xshift=+10mm] {} 15 | edge [post] (o1); 16 | \node [input] (i2) [below of=h1] {} 17 | edge [post] (h1) 18 | edge [post] (h2); 19 | \node [input] (i2) [below of=h2] {} 20 | edge [post] (h1) 21 | edge [post] (h2); 22 | \end{tikzpicture} 23 | \end{document} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # R 语言数据科学导论 logo 2 | 3 | ![Release](https://img.shields.io/github/release/leovan/data-science-introduction-with-r.svg) 4 | ![License](https://img.shields.io/badge/license-CC%20BY--NC--SA%204.0-blue.svg) 5 | ![Issues](https://img.shields.io/github/issues/leovan/data-science-introduction-with-r.svg) 6 | 7 | --- 8 | 9 | ## 简介 10 | 11 | 本项目是以 R 语言为基础的数据科学入门教程,在线版本托管在 上。 12 | 13 | ## 参考 14 | 15 | 1. 《R语言实战》(R in Action),Robert I. Kabacoff 著,王小宁、刘撷芯、黄俊文 等 译 16 | 2. 《R数据科学》(R for Data Science),Hadley Wickham & Garrett Grolemund 著,陈光欣 译 17 | 3. 《高级R语言编程指南》(Advanced R),Hadley Wickham 著,潘文捷、许金炜、李洪成 译 18 | 4. 《基于R语言的机器学习》(Introduction to Machine Learning with R),Scott V. Burger 著,马晶慧 译 19 | 5. 《机器学习与R语言》(Machine Learning with R),Brett Lantz 著,李洪成、许金炜、李舰译 译 20 | 6. 《统计学习方法》李航 著 21 | 7. 《机器学习》周志华 著 22 | 8. 《深度学习》(Deep Learning),Ian Goodfellow, Yoshua Bengio & Aaron Courville 著,赵申剑、黎彧君、符天凡、李凯 译 23 | 24 | ## 授权 25 | 26 | 本项目遵守 [CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/) 协议。 27 | 28 | 版权所有 © 2019-2025,范叶亮 | Leo Van 29 | -------------------------------------------------------------------------------- /slides/_common/scripts/link-directories.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # go to root directory of slides 4 | cd $(dirname $(dirname $(dirname "$0"))) 5 | 6 | # slides directories 7 | slides_directories=( 8 | "01-data-science-introduction" 9 | "02-r-language-introduction" 10 | "03-data-analytics-introduction-part-1" 11 | "04-data-analytics-introduction-part-2" 12 | "05-data-visualization" 13 | "06-statistical-analytics-introduction" 14 | "07-feature-engineering" 15 | "08-model-evaluation-and-hyperparameter-optimization" 16 | "09-classification-algorithms-part-1" 17 | "10-classification-algorithms-part-2" 18 | "11-clustering-algorithms" 19 | "12-time-series-algorithms" 20 | "13-deep-learning-algorithms" 21 | "14-reproducible-research" 22 | ) 23 | 24 | # link base resources 25 | for slide_dir in ${slides_directories[*]} 26 | do 27 | if [ -L ${slide_dir}/assets ] 28 | then 29 | rm ${slide_dir}/assets 30 | fi 31 | 32 | if [ -L ${slide_dir}/css ] 33 | then 34 | rm ${slide_dir}/css 35 | fi 36 | 37 | if [ -L ${slide_dir}/includes ] 38 | then 39 | rm ${slide_dir}/includes 40 | fi 41 | 42 | cd ${slide_dir} 43 | ln -s ../_common/assets assets 44 | ln -s ../_common/css css 45 | ln -s ../_common/includes includes 46 | cd .. 47 | done 48 | -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-mp-perceptron.tex: -------------------------------------------------------------------------------- 1 | \documentclass[tikz, convert={density=300,size=400x,outext=.png}]{standalone} 2 | \begin{document} 3 | \begin{tikzpicture} 4 | \node [left] at (0, 5) {$+1$}; 5 | \node [below left] at (1.5, 5) {$b_j$}; 6 | \draw [-] (0, 5) -- (1.5, 5); 7 | \draw [->] (1.5, 5) -- (2.5, 3.5); 8 | \node [above left] at (0, 4) {$x_1$}; 9 | \node [above] at (1, 3.5) {$w_{1j}$}; 10 | \node [] at (-0.3, 2.9) {$\cdot$}; 11 | \node [] at (-0.3, 3.1) {$\cdot$}; 12 | \node [] at (-0.3, 3.3) {$\cdot$}; 13 | \draw [->] (0, 0) -- (2, 1); 14 | \node [left] at (0, 2) {$x_i$}; 15 | \node [above] at (1, 2) {$w_{ij}$}; 16 | \node [] at (-0.3, 0.8) {$\cdot$}; 17 | \node [] at (-0.3, 1) {$\cdot$}; 18 | \node [] at (-0.3, 1.2) {$\cdot$}; 19 | \draw [->] (0, 2) -- (2, 2); 20 | \node [below left] at (0, 0) {$x_n$}; 21 | \node [above] at (1, 0.5) {$w_{nj}$}; 22 | \draw [->] (0, 4) -- (2, 3); 23 | \draw [fill = lightgray] (3.2, 1) -- (3.2, 3) arc (90:-90:1cm) -- cycle; 24 | \draw [fill = lightgray] (3.2, 1) arc (-90:-270:1cm) -- (3.2, 3) -- cycle; 25 | \node [] at (2.7, 2) {$\sum$}; 26 | \node [] at (3.7, 2) {$f$}; 27 | \node [above left] at (6.2, 2) {$O_{nj}$}; 28 | \draw [->] (4.2, 2) -- (6.2, 2); 29 | \end{tikzpicture} 30 | \end{document} -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-perceptron-comparision-structure-3.tex: -------------------------------------------------------------------------------- 1 | \documentclass[tikz, convert={density=300,size=100x,outext=.png}]{standalone} 2 | \usetikzlibrary{arrows} 3 | \begin{document} 4 | \tikzstyle{input}=[circle,draw=green!50,fill=green!20,thick,minimum size=6mm] 5 | \tikzstyle{output}=[circle,draw=blue!50,fill=blue!20,thick,minimum size=6mm] 6 | \tikzstyle{hidden}=[rectangle,draw=red!50,fill=red!20,thick,minimum size=6mm] 7 | \tikzstyle{transition}=[thick,draw=black!75,fill=black!20] 8 | \tikzstyle{pre}=[<-,shorten <=1pt,>=stealth',semithick] 9 | \tikzstyle{post}=[->,shorten >=1pt,>=stealth',semithick] 10 | \begin{tikzpicture}[node distance=1.2cm, auto] 11 | \node [output] (o1) {}; 12 | \node [hidden] (h21) [below of=o1, xshift=-10mm] {} 13 | edge [post] (o1); 14 | \node [hidden] (h22) [below of=o1, xshift=+10mm] {} 15 | edge [post] (o1); 16 | \node [hidden] (h11) [below of=h21, xshift=-10mm] {} 17 | edge [post] (h21) 18 | edge [post] (h22); 19 | \node [hidden] (h12) [below of=h21, xshift=+10mm] {} 20 | edge [post] (h21) 21 | edge [post] (h22); 22 | \node [hidden] (h13) [below of=h22, xshift=+10mm] {} 23 | edge [post] (h21) 24 | edge [post] (h22); 25 | \node [input] (i2) [below of=h12, xshift=-10mm] {} 26 | edge [post] (h11) 27 | edge [post] (h12) 28 | edge [post] (h13); 29 | \node [input] (i2) [below of=h12, xshift=+10mm] {} 30 | edge [post] (h11) 31 | edge [post] (h12) 32 | edge [post] (h13); 33 | \end{tikzpicture} 34 | \end{document} -------------------------------------------------------------------------------- /slides/12-time-series-algorithms/data/example_air_passengers.csv: -------------------------------------------------------------------------------- 1 | ds,y 1949-01-01,112 1949-02-01,118 1949-03-01,132 1949-04-01,129 1949-05-01,121 1949-06-01,135 1949-07-01,148 1949-08-01,148 1949-09-01,136 1949-10-01,119 1949-11-01,104 1949-12-01,118 1950-01-01,115 1950-02-01,126 1950-03-01,141 1950-04-01,135 1950-05-01,125 1950-06-01,149 1950-07-01,170 1950-08-01,170 1950-09-01,158 1950-10-01,133 1950-11-01,114 1950-12-01,140 1951-01-01,145 1951-02-01,150 1951-03-01,178 1951-04-01,163 1951-05-01,172 1951-06-01,178 1951-07-01,199 1951-08-01,199 1951-09-01,184 1951-10-01,162 1951-11-01,146 1951-12-01,166 1952-01-01,171 1952-02-01,180 1952-03-01,193 1952-04-01,181 1952-05-01,183 1952-06-01,218 1952-07-01,230 1952-08-01,242 1952-09-01,209 1952-10-01,191 1952-11-01,172 1952-12-01,194 1953-01-01,196 1953-02-01,196 1953-03-01,236 1953-04-01,235 1953-05-01,229 1953-06-01,243 1953-07-01,264 1953-08-01,272 1953-09-01,237 1953-10-01,211 1953-11-01,180 1953-12-01,201 1954-01-01,204 1954-02-01,188 1954-03-01,235 1954-04-01,227 1954-05-01,234 1954-06-01,264 1954-07-01,302 1954-08-01,293 1954-09-01,259 1954-10-01,229 1954-11-01,203 1954-12-01,229 1955-01-01,242 1955-02-01,233 1955-03-01,267 1955-04-01,269 1955-05-01,270 1955-06-01,315 1955-07-01,364 1955-08-01,347 1955-09-01,312 1955-10-01,274 1955-11-01,237 1955-12-01,278 1956-01-01,284 1956-02-01,277 1956-03-01,317 1956-04-01,313 1956-05-01,318 1956-06-01,374 1956-07-01,413 1956-08-01,405 1956-09-01,355 1956-10-01,306 1956-11-01,271 1956-12-01,306 1957-01-01,315 1957-02-01,301 1957-03-01,356 1957-04-01,348 1957-05-01,355 1957-06-01,422 1957-07-01,465 1957-08-01,467 1957-09-01,404 1957-10-01,347 1957-11-01,305 1957-12-01,336 1958-01-01,340 1958-02-01,318 1958-03-01,362 1958-04-01,348 1958-05-01,363 1958-06-01,435 1958-07-01,491 1958-08-01,505 1958-09-01,404 1958-10-01,359 1958-11-01,310 1958-12-01,337 1959-01-01,360 1959-02-01,342 1959-03-01,406 1959-04-01,396 1959-05-01,420 1959-06-01,472 1959-07-01,548 1959-08-01,559 1959-09-01,463 1959-10-01,407 1959-11-01,362 1959-12-01,405 1960-01-01,417 1960-02-01,391 1960-03-01,419 1960-04-01,461 1960-05-01,472 1960-06-01,535 1960-07-01,622 1960-08-01,606 1960-09-01,508 1960-10-01,461 1960-11-01,390 1960-12-01,432 -------------------------------------------------------------------------------- /slides/_common/scripts/wine-lr-regularization.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import matplotlib.style 5 | import matplotlib as mpl 6 | import matplotlib.pyplot as plt 7 | 8 | from sklearn.datasets import load_wine 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.preprocessing import StandardScaler 11 | from sklearn.linear_model import LogisticRegression 12 | 13 | X, y = load_wine(return_X_y=True) 14 | X_train, X_test, y_train, y_test = \ 15 | train_test_split(X, y, test_size=0.3, random_state=0, stratify=y) 16 | 17 | stdsc = StandardScaler() 18 | X_train_std = stdsc.fit_transform(X_train) 19 | X_test_std = stdsc.transform(X_test) 20 | 21 | colors = ['blue', 'green', 'red', 'cyan', 22 | 'magenta', 'yellow', 'black', 23 | 'pink', 'lightgreen', 'lightblue', 24 | 'gray', 'indigo', 'orange'] 25 | 26 | columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash', 27 | 'Alcalinity of ash', 'Magnesium', 'Total phenols', 28 | 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 29 | 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 30 | 'Proline'] 31 | 32 | weights, params = [], [] 33 | for c in np.arange(-4., 6.): 34 | lr = LogisticRegression( 35 | solver='liblinear', 36 | penalty='l1', 37 | C=10.**c, 38 | random_state=0) 39 | lr.fit(X_train_std, y_train) 40 | weights.append(lr.coef_[1]) 41 | params.append(10**c) 42 | 43 | weights = np.array(weights) 44 | 45 | plt.style.use('ggplot') 46 | 47 | mpl.rcParams['image.cmap'] = 'viridis' 48 | mpl.rcParams['font.serif'] = 'Source Han Serif' 49 | mpl.rcParams['font.sans-serif'] = 'Source Han Sans' 50 | 51 | fig = plt.figure(figsize=(6, 3.2)) 52 | ax = plt.subplot(111) 53 | 54 | for column, color in zip(range(weights.shape[1]), colors): 55 | plt.plot(params, weights[:, column], 56 | label=columns[column + 1], 57 | color=color) 58 | plt.axhline(0, color='black', linestyle='--', linewidth=3) 59 | plt.xlim([10**(-5), 10**5]) 60 | plt.ylabel('weight coefficient') 61 | plt.xlabel('C') 62 | plt.xscale('log') 63 | plt.legend(loc='upper left') 64 | ax.legend(loc='upper center', 65 | bbox_to_anchor=(1.4, 1), 66 | ncol=1, fancybox=True) 67 | 68 | plt.savefig('wine-lr-regularization.png', dpi=300, 69 | bbox_inches='tight', pad_inches=0.2) -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-bp-network.tex: -------------------------------------------------------------------------------- 1 | \documentclass[tikz, convert={density=300,size=x400,outext=.png}]{standalone} 2 | \usetikzlibrary{arrows} 3 | \tikzstyle{input}=[circle,draw=black,fill=white,thick,minimum size=1cm] 4 | \tikzstyle{output}=[circle,draw=black,fill=white,thick,minimum size=1cm] 5 | \tikzstyle{bais}=[circle,draw=black,fill=white,thick,minimum size=1cm] 6 | \tikzstyle{hidden}=[circle,draw=black,fill=white,thick,minimum size=1cm] 7 | \tikzstyle{pre}=[<-,shorten <=1pt,>=stealth',semithick] 8 | \tikzstyle{post}=[->,shorten >=1pt,>=stealth',semithick] 9 | \begin{document} 10 | \begin{tikzpicture}[node distance=2cm, auto] 11 | \node [] (x_1) {$x_1$}; 12 | \node [] (x_2) [below of=x_1] {$x_2$}; 13 | \node [input] (a_11) [right of=x_1] {$a^{\left(1\right)}_1$}; 14 | \node [input] (a_12) [below of=a_11] {$a^{\left(1\right)}_2$}; 15 | \node [bais] (b_1) [below of=a_12] {$b^{\left(1\right)}$}; 16 | \node [hidden] (a_21) [right of=a_11, yshift=1cm] {$a^{\left(2\right)}_1$}; 17 | \node [hidden] (a_22) [below of=a_21] {$a^{\left(2\right)}_2$}; 18 | \node [hidden] (a_23) [below of=a_22] {$a^{\left(2\right)}_3$}; 19 | \node [bais] (b_2) [below of=a_23] {$b^{\left(2\right)}$}; 20 | \node [hidden] (a_31) [right of=a_21, yshift=-1cm] {$a^{\left(3\right)}_1$}; 21 | \node [hidden] (a_32) [below of=a_31] {$a^{\left(3\right)}_2$}; 22 | \node [bais] (b_3) [below of=a_32] {$b^{\left(3\right)}$}; 23 | \node [output] (a_41) [right of=a_31, yshift=-1cm] {$a^{\left(4\right)}_1$}; 24 | \node [] (y) [right of=a_41] {$y$}; 25 | 26 | 27 | \path[post] 28 | (x_1) edge (a_11) 29 | (x_2) edge (a_12) 30 | (a_11) edge (a_21) edge (a_22) edge (a_23) 31 | (a_12) edge (a_21) edge (a_22) edge (a_23) 32 | (b_1) edge (a_21) edge (a_22) edge (a_23) 33 | (a_21) edge (a_31) edge (a_32) 34 | (a_22) edge (a_31) edge (a_32) 35 | (a_23) edge (a_31) edge (a_32) 36 | (b_2) edge (a_31) edge (a_32) 37 | (a_31) edge (a_41) 38 | (a_32) edge (a_41) 39 | (b_3) edge (a_41) 40 | (a_41) edge (y) 41 | ; 42 | \end{tikzpicture} 43 | \end{document} -------------------------------------------------------------------------------- /slides/12-time-series-algorithms/data/nybirths.txt: -------------------------------------------------------------------------------- 1 | NYBIRTHS 2 | 26.663 3 | 23.598 4 | 26.931 5 | 24.740 6 | 25.806 7 | 24.364 8 | 24.477 9 | 23.901 10 | 23.175 11 | 23.227 12 | 21.672 13 | 21.870 14 | 21.439 15 | 21.089 16 | 23.709 17 | 21.669 18 | 21.752 19 | 20.761 20 | 23.479 21 | 23.824 22 | 23.105 23 | 23.110 24 | 21.759 25 | 22.073 26 | 21.937 27 | 20.035 28 | 23.590 29 | 21.672 30 | 22.222 31 | 22.123 32 | 23.950 33 | 23.504 34 | 22.238 35 | 23.142 36 | 21.059 37 | 21.573 38 | 21.548 39 | 20.000 40 | 22.424 41 | 20.615 42 | 21.761 43 | 22.874 44 | 24.104 45 | 23.748 46 | 23.262 47 | 22.907 48 | 21.519 49 | 22.025 50 | 22.604 51 | 20.894 52 | 24.677 53 | 23.673 54 | 25.320 55 | 23.583 56 | 24.671 57 | 24.454 58 | 24.122 59 | 24.252 60 | 22.084 61 | 22.991 62 | 23.287 63 | 23.049 64 | 25.076 65 | 24.037 66 | 24.430 67 | 24.667 68 | 26.451 69 | 25.618 70 | 25.014 71 | 25.110 72 | 22.964 73 | 23.981 74 | 23.798 75 | 22.270 76 | 24.775 77 | 22.646 78 | 23.988 79 | 24.737 80 | 26.276 81 | 25.816 82 | 25.210 83 | 25.199 84 | 23.162 85 | 24.707 86 | 24.364 87 | 22.644 88 | 25.565 89 | 24.062 90 | 25.431 91 | 24.635 92 | 27.009 93 | 26.606 94 | 26.268 95 | 26.462 96 | 25.246 97 | 25.180 98 | 24.657 99 | 23.304 100 | 26.982 101 | 26.199 102 | 27.210 103 | 26.122 104 | 26.706 105 | 26.878 106 | 26.152 107 | 26.379 108 | 24.712 109 | 25.688 110 | 24.990 111 | 24.239 112 | 26.721 113 | 23.475 114 | 24.767 115 | 26.219 116 | 28.361 117 | 28.599 118 | 27.914 119 | 27.784 120 | 25.693 121 | 26.881 122 | 26.217 123 | 24.218 124 | 27.914 125 | 26.975 126 | 28.527 127 | 27.139 128 | 28.982 129 | 28.169 130 | 28.056 131 | 29.136 132 | 26.291 133 | 26.987 134 | 26.589 135 | 24.848 136 | 27.543 137 | 26.896 138 | 28.878 139 | 27.390 140 | 28.065 141 | 28.141 142 | 29.048 143 | 28.484 144 | 26.634 145 | 27.735 146 | 27.132 147 | 24.924 148 | 28.963 149 | 26.589 150 | 27.931 151 | 28.009 152 | 29.229 153 | 28.759 154 | 28.405 155 | 27.945 156 | 25.912 157 | 26.619 158 | 26.076 159 | 25.286 160 | 27.660 161 | 25.951 162 | 26.398 163 | 25.565 164 | 28.865 165 | 30.000 166 | 29.261 167 | 29.012 168 | 26.992 169 | 27.897 170 | -------------------------------------------------------------------------------- /slides/09-classification-algorithms-part-1/images/logistic-curve.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | Graph of Logistics Curve 7 | Originally Produced by GNUPLOT 4.2 patchlevel 2, hand compressed 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 26 | 27 | 28 | 29 | 30 | 31 | 0 32 | 0.5 33 | 1 34 | 35 | 36 | −6 37 | −4 38 | −2 39 | 0 40 | 2 41 | 4 42 | 6 43 | 44 | 45 | 46 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /slides/13-deep-learning-algorithms/images/ann-multilayer-perceptron.tex: -------------------------------------------------------------------------------- 1 | \documentclass[tikz, convert={density=300,size=x400,outext=.png}]{standalone} 2 | \usepackage[adobefonts]{ctex} 3 | \usetikzlibrary{arrows} 4 | \tikzstyle{input}=[circle,draw=green!50,fill=green!20,thick,minimum size=6mm] 5 | \tikzstyle{output}=[circle,draw=blue!50,fill=blue!20,thick,minimum size=6mm] 6 | \tikzstyle{blank}=[circle,thick,minimum size=6mm,font=\footnotesize] 7 | \tikzstyle{hidden}=[rectangle,draw=red!50,fill=red!20,thick,minimum size=6mm] 8 | \tikzstyle{transition}=[thick,draw=black!75,fill=black!20] 9 | \tikzstyle{pre}=[<-,shorten <=1pt,>=stealth',semithick] 10 | \tikzstyle{post}=[->,shorten >=1pt,>=stealth',semithick] 11 | \begin{document} 12 | \begin{tikzpicture}[node distance=1cm, auto] 13 | \node [blank] (il1) {$x_1$}; 14 | \node [blank] (il2) [below of=il1] {$x_2$}; 15 | \node [blank] (il3) [below of=il2] {...}; 16 | \node [blank] (il4) [below of=il3] {$x_n$}; 17 | 18 | \node [input] (i1) [right of=il1] {} 19 | edge [pre] (il1); 20 | \node [input] (i2) [right of=il2] {} 21 | edge [pre] (il2); 22 | \node [blank] (i3) [right of=il3] {...}; 23 | \node [input] (i4) [right of=il4] {} 24 | edge [pre] (il4); 25 | \node [blank] (iname) [below of=i4] {输入层}; 26 | 27 | \node [hidden] (h11) [right of=i1] {} 28 | edge [pre] (i1) 29 | edge [pre] (i2) 30 | edge [pre] (i4); 31 | \node [hidden] (h12) [right of=i2] {} 32 | edge [pre] (i1) 33 | edge [pre] (i2) 34 | edge [pre] (i4); 35 | \node [blank] (h13) [right of=i3] {...}; 36 | \node [hidden] (h14) [right of=i4] {} 37 | edge [pre] (i1) 38 | edge [pre] (i2) 39 | edge [pre] (i4); 40 | 41 | \node [hidden] (h21) [right of=h11] {} 42 | edge [pre] (h11) 43 | edge [pre] (h12) 44 | edge [pre] (h14); 45 | \node [hidden] (h22) [right of=h12] {} 46 | edge [pre] (h11) 47 | edge [pre] (h12) 48 | edge [pre] (h14); 49 | \node [blank] (h23) [right of=h13] {...}; 50 | \node [hidden] (h24) [right of=h14] {} 51 | edge [pre] (h11) 52 | edge [pre] (h12) 53 | edge [pre] (h14); 54 | \node [blank] (hname) [below of=h24] {隐含层}; 55 | 56 | \node [blank] (h31) [right of=h21] {...} 57 | edge [pre] (h21); 58 | \node [blank] (h32) [right of=h22] {...} 59 | edge [pre] (h22); 60 | \node [blank] (h33) [right of=h23] {...}; 61 | \node [blank] (h34) [right of=h24] {...} 62 | edge [pre] (h24); 63 | 64 | \node [output] (o1) [right of=h31] {} 65 | edge [pre] (h31); 66 | \node [output] (o2) [right of=h32] {} 67 | edge [pre] (h32); 68 | \node [blank] (o3) [right of=h33] {...}; 69 | \node [output] (o4) [right of=h34] {} 70 | edge [pre] (h34); 71 | \node [blank] (oname) [below of=o4] {输出层}; 72 | 73 | \node [blank] (ol1) [right of=o1] {$o_1$} 74 | edge [pre] (o1); 75 | \node [blank] (ol2) [right of=o2] {$o_2$} 76 | edge [pre] (o2); 77 | \node [blank] (ol3) [right of=o3] {...}; 78 | \node [blank] (ol4) [right of=o4] {$o_n$} 79 | edge [pre] (o4); 80 | \end{tikzpicture} 81 | \end{document} -------------------------------------------------------------------------------- /slides/10-classification-algorithms-part-2/images/gbm-workflow-gbm-step-2.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 0 18 | (-1, 1, -1, 1) 19 | 20 | -1 21 | (-1, -1) 22 | 残差:r21=0, r23=0 23 | 24 | 1 25 | (1, 1) 26 | 残差:r22=0, r24=0 27 | 28 | 29 | 经常到知乎提问 30 | 31 | 32 | 经常到知乎回答 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /slides/10-classification-algorithms-part-2/images/gbm-workflow-gbm-step-1.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 20 18 | (14, 16, 24, 26) 19 | 20 | 15 21 | (14, 16) 22 | 残差:r11=-1, r12=1 23 | 24 | 25 25 | (24, 26) 26 | 残差:r13=-1, r14=1 27 | 28 | 29 | 月均消费 ≤ 2000 30 | 31 | 32 | 月均消费 ≥ 2000 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /slides/_common/includes/after_body.html: -------------------------------------------------------------------------------- 1 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | # Website information 2 | site_name: R 语言数据科学导论 3 | site_url: https://ds-r.leovan.tech 4 | site_author: 范叶亮 | Leo Van 5 | site_description: R 语言数据科学导论 | Data Science Introduction with R 6 | 7 | # Repository 8 | repo_name: leovan/data-science-introduction-with-r 9 | repo_url: https://github.com/leovan/data-science-introduction-with-r 10 | edit_uri: edit/main/docs/ 11 | 12 | # Copyright 13 | copyright: 版权所有 © 2019-2025,范叶亮 | Leo Van 14 | 15 | # Navigation 16 | nav: 17 | - 主页: index.md 18 | - 讲义: 19 | - 数据科学简介: lecture/01-data-science-introduction.md 20 | - R 语言简介: lecture/02-r-language-introduction.md 21 | - 数据分析基础(上): lecture/03-data-analytics-introduction-part-1.md 22 | - 数据分析基础(下): lecture/04-data-analytics-introduction-part-2.md 23 | - 数据可视化: lecture/05-data-visualization.md 24 | - 统计分析基础: lecture/06-statistical-analytics-introduction.md 25 | - 特征工程: lecture/07-feature-engineering.md 26 | - 模型评估 & 超参数优化: lecture/08-model-evaluation-and-hyperparameter-optimization.md 27 | - 分类算法(上): lecture/09-classification-algorithms-part-1.md 28 | - 分类算法(下): lecture/10-classification-algorithms-part-2.md 29 | - 聚类算法: lecture/11-clustering-algorithms.md 30 | - 时间序列算法: lecture/12-time-series-algorithms.md 31 | - 深度学习算法: lecture/13-deep-learning-algorithms.md 32 | - 可重复性研究: lecture/14-reproducible-research.md 33 | - 其他: 34 | - 开发环境安装配置手册: others/01-ide-installation-manual.md 35 | 36 | # Theme 37 | theme: 38 | name: material 39 | include_search_page: false 40 | search_index_only: true 41 | language: zh 42 | features: 43 | - content.code.copy 44 | - navigation.indexes 45 | - navigation.top 46 | - search.highlight 47 | - search.suggest 48 | palette: 49 | - media: "(prefers-color-scheme: light)" 50 | scheme: default 51 | primary: indigo 52 | accent: indigo 53 | toggle: 54 | icon: material/weather-night 55 | name: 暗黑 56 | - media: "(prefers-color-scheme: dark)" 57 | scheme: slate 58 | primary: indigo 59 | accent: indigo 60 | toggle: 61 | icon: material/white-balance-sunny 62 | name: 明亮 63 | font: 64 | text: Roboto 65 | code: Roboto Mono 66 | favicon: images/data-science-introduction-with-r.png 67 | logo: images/data-science-introduction-with-r.png 68 | 69 | # Plugins 70 | plugins: 71 | - search: 72 | lang: en 73 | 74 | # Extra 75 | extra: 76 | social: 77 | - icon: material/home 78 | link: https://leovan.me 79 | name: 个人主页 80 | - icon: material/github 81 | link: https://github.com/leovan 82 | name: Github 83 | analytics: 84 | provider: google 85 | property: G-MCPZ7NTLEX 86 | extra_css: 87 | - css/table.css 88 | 89 | # Markdown extensions 90 | markdown_extensions: 91 | - abbr 92 | - admonition 93 | - attr_list 94 | - def_list 95 | - footnotes 96 | - md_in_html 97 | - meta 98 | - toc: 99 | permalink: true 100 | slugify: !!python/object/apply:pymdownx.slugs.slugify 101 | kwds: 102 | case: lower 103 | - pymdownx.arithmatex: 104 | generic: true 105 | - pymdownx.betterem: 106 | smart_enable: all 107 | - pymdownx.caret 108 | - pymdownx.critic 109 | - pymdownx.details 110 | - pymdownx.emoji: 111 | emoji_generator: !!python/name:material.extensions.emoji.to_svg 112 | emoji_index: !!python/name:material.extensions.emoji.twemoji 113 | - pymdownx.highlight: 114 | anchor_linenums: true 115 | line_spans: __span 116 | pygments_lang_class: true 117 | - pymdownx.inlinehilite 118 | - pymdownx.keys 119 | - pymdownx.magiclink: 120 | normalize_issue_symbols: true 121 | repo_url_shorthand: true 122 | user: leovan 123 | repo: duckling-chinese 124 | - pymdownx.mark 125 | - pymdownx.snippets 126 | - pymdownx.smartsymbols 127 | - pymdownx.superfences: 128 | custom_fences: 129 | - name: mermaid 130 | class: mermaid 131 | format: !!python/name:pymdownx.superfences.fence_code_format 132 | - pymdownx.tabbed: 133 | alternate_style: true 134 | combine_header_slug: true 135 | slugify: !!python/object/apply:pymdownx.slugs.slugify 136 | kwds: 137 | case: lower 138 | - pymdownx.tasklist: 139 | custom_checkbox: true 140 | - pymdownx.tilde 141 | -------------------------------------------------------------------------------- /docs/others/01-ide-installation-manual.md: -------------------------------------------------------------------------------- 1 | # 开发环境安装配置手册 2 | 3 | ## Windows 4 | 5 | > 建议使用 64 位 Windows 10 以上版本系统。 6 | 7 | ### 命令行 8 | 9 | 建议安装最新版的 PowerShell 作为命令行环境,相关下载和配置详见[官网](https://learn.microsoft.com/zh-cn/powershell/)。 10 | 11 | 打开 PowerShell 命令行,输入如下命令安装 scoop: 12 | 13 | ```powershell 14 | Set-ExecutionPolicy RemoteSigned -Scope CurrentUser 15 | irm get.scoop.sh | iex 16 | ``` 17 | 18 | [scoop](https://scoop.sh/) 是一个用于 Windows 的命令行包安装管理器,其可以方便用户在命令行中安装并管理应用和扩展包。 19 | 20 | 运行如下命令测试 scoop 是否成功安装: 21 | 22 | ```powershell 23 | scoop --version 24 | ``` 25 | 26 | 如果能得到类似如下输出,则说明安装成功: 27 | 28 | ``` 29 | Current Scoop version: 30 | v0.3.1 - Released at 2022-11-15 31 | ``` 32 | 33 | 运行如下命令为 scoop 添加额外的应用清单: 34 | 35 | ```powershell 36 | scoop bucket add extras 37 | ``` 38 | 39 | ### Git 40 | 41 | 输入如下命令安装 [Git](https://git-scm.com/): 42 | 43 | ```powershell 44 | scoop install git 45 | ``` 46 | 47 | 运行如下命令测试 Git 是否成功安装: 48 | 49 | ```powershell 50 | git --version 51 | ``` 52 | 53 | 如果能得到类似如下输出,则说明安装成功: 54 | 55 | ``` 56 | git version 2.39.0.windows.2 57 | ``` 58 | 59 | ### R 60 | 61 | 输入如下命令安装 [R](https://cloud.r-project.org/bin/windows/base/) 和 [Rtools](https://cloud.r-project.org/bin/windows/Rtools/): 62 | 63 | ```powershell 64 | scoop install r rtools 65 | ``` 66 | 67 | 运行如下命令测试 R 是否成功安装: 68 | 69 | ```powershell 70 | R.exe --version 71 | ``` 72 | 73 | 如果能得到类似如下输出,则说明安装成功: 74 | 75 | ``` 76 | R version 4.2.2 (2022-10-31 ucrt) -- "Innocent and Testing" 77 | Copyright (C) 2022 The R Foundation for Statistical Computing 78 | ``` 79 | 80 | 输入如下命令安装 [RStuido](https://posit.co/download/rstudio-desktop/): 81 | 82 | ```powershell 83 | scoop install rstudio 84 | ``` 85 | 86 | 运行如下命令测试 RStudio 是否成功安装: 87 | 88 | ```powershell 89 | rstudio.exe --version 90 | ``` 91 | 92 | 如果能得到类似如下输出,则说明安装成功: 93 | 94 | ``` 95 | 2022.12.0+353 96 | ``` 97 | 98 | ### Python 99 | 100 | 输入如下命令安装 [Python](https://www.python.org/): 101 | 102 | ```powershell 103 | scoop install python 104 | ``` 105 | 106 | 运行如下命令测试 Python 是否成功安装: 107 | 108 | ```powershell 109 | python --version 110 | ``` 111 | 112 | 如果能得到类似如下输出,则说明安装成功: 113 | 114 | ``` 115 | Python 3.11.1 116 | ``` 117 | 118 | ### Jupyter 119 | 120 | 将 Python 相关文件路径添加到 `{HOME}\Documents\.Renviron` 文件的 `PATH` 环境变量中: 121 | 122 | ``` 123 | PATH={PYTHON_PATH}\\Scripts;%PATH% 124 | ``` 125 | 126 | 其中 `{HOME}` 为用户目录,`{PYTHON_PATH}` 为 Python 的根目录。 127 | 128 | 在命令行中输入 `R.exe` 启动 R 命令行,在 R 命令行中运行如下命令安装 [IRkernel](https://irkernel.github.io/): 129 | 130 | ```r 131 | install.packages('IRkernel') 132 | IRkernel::installspec() 133 | ``` 134 | 135 | 退出 R 命令行,在系统命令行中运行如下命令启动 JupyterLab: 136 | 137 | ```bash 138 | jupyter-lab.exe 139 | ``` 140 | 141 | 系统会自动打开浏览器并显示 JupyterLab 页面: 142 | 143 | ![JupyterLab on macOS](../images/others/jupyterlab-windows.png) 144 | 145 | ## macOS 146 | 147 | > 建议使用 macOS 11 以上版本系统。 148 | 149 | ### 命令行 150 | 151 | 打开终端,输入如下命令安装 homebrew: 152 | 153 | ```bash 154 | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" 155 | ``` 156 | 157 | [Homebrew](https://brew.sh/) 是一个用于 macOS 的命令行包安装管理器,其可以方便用户在命令行中安装并管理应用和扩展包。 158 | 159 | 运行如下命令测试 scoop 是否成功安装: 160 | 161 | ```bash 162 | brew --version 163 | ``` 164 | 165 | 如果能得到类似如下输出,则说明安装成功: 166 | 167 | ``` 168 | Homebrew 3.6.19 169 | ``` 170 | 171 | ### Git 172 | 173 | 输入如下命令安装 [Git](https://git-scm.com/): 174 | 175 | ```bash 176 | brew install git 177 | ``` 178 | 179 | 运行如下命令测试 Git 是否成功安装: 180 | 181 | ```bash 182 | git --version 183 | ``` 184 | 185 | 如果能得到类似如下输出,则说明安装成功: 186 | 187 | ``` 188 | git version 2.39.1 189 | ``` 190 | 191 | ### R 192 | 193 | 输入如下命令安装 [R](https://cloud.r-project.org/bin/windows/base/) 和 [Rtools](https://cloud.r-project.org/bin/windows/Rtools/): 194 | 195 | ```bash 196 | brew install --cask r 197 | ``` 198 | 199 | 运行如下命令测试 R 是否成功安装: 200 | 201 | ```bash 202 | R --version 203 | ``` 204 | 205 | 如果能得到类似如下输出,则说明安装成功: 206 | 207 | ``` 208 | R version 4.2.2 (2022-10-31 ucrt) -- "Innocent and Testing" 209 | Copyright (C) 2022 The R Foundation for Statistical Computing 210 | ``` 211 | 212 | 输入如下命令安装 [RStuido](https://posit.co/download/rstudio-desktop/): 213 | 214 | ```bash 215 | brew install rstudio 216 | ``` 217 | 218 | 打开 RStudio 以检查是否成功安装。 219 | 220 | ### Python 221 | 222 | 输入如下命令安装 [Python](https://www.python.org/): 223 | 224 | ```bash 225 | brew install python 226 | ``` 227 | 228 | 运行如下命令测试 Python 是否成功安装: 229 | 230 | ```bash 231 | python --version 232 | ``` 233 | 234 | 如果能得到类似如下输出,则说明安装成功: 235 | 236 | ``` 237 | Python 3.10.9 238 | ``` 239 | 240 | ### Jupyter 241 | 242 | 将 Python 相关文件路径添加到 `{HOME}/.Renviron` 文件的 `PATH` 环境变量中: 243 | 244 | ``` 245 | PATH={PYTHON_PATH}/bin:$PATH 246 | ``` 247 | 248 | 其中 `{HOME}` 为用户目录,`{PYTHON_PATH}` 为 Python 的根目录。 249 | 250 | 在命令行中输入 `R` 启动 R 命令行,在 R 命令行中运行如下命令安装 [IRkernel](https://irkernel.github.io/): 251 | 252 | ```r 253 | install.packages('IRkernel') 254 | IRkernel::installspec() 255 | ``` 256 | 257 | 退出 R 命令行,在系统命令行中运行如下命令启动 JupyterLab: 258 | 259 | ```bash 260 | jupyter-lab 261 | ``` 262 | 263 | 系统会自动打开浏览器并显示 JupyterLab 页面: 264 | 265 | ![JupyterLab on macOS](../images/others/jupyterlab-macos.png) 266 | -------------------------------------------------------------------------------- /slides/10-classification-algorithms-part-2/images/gbm-workflow-original-dession-tree.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 15 18 | (14, 16) 19 | 20 | 25 21 | (24, 26) 22 | 23 | 14 24 | (14) 25 | 26 | 16 27 | (16) 28 | 29 | 24 30 | (24) 31 | 32 | 26 33 | (26) 34 | 35 | 20 36 | (14, 16, 24, 26) 37 | 38 | 39 | 日均上网时间 ≥ 1.1h 40 | 41 | 42 | 日均上网时间 ≤ 1.1h 43 | 44 | 45 | 全天上网 46 | 47 | 48 | 晚上上网 49 | 50 | 51 | 月均消费 ≤ 2000 52 | 53 | 54 | 月均消费 ≥ 2000 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /slides/12-time-series-algorithms/images/arima.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | ARIMA(p, d, q) 19 | 20 | 21 | 22 | 23 | 24 | 25 | ARMA(p, q) 26 | 27 | 28 | 29 | 30 | 31 | 32 | AR(p) 33 | 34 | 35 | 36 | 37 | 38 | 39 | MA(q) 40 | 41 | 42 | 43 | 44 | 45 | 46 | 白噪声 47 | 48 | 49 | d次差分 50 | 51 | 52 | 无滑动平均项 53 | (q=0) 54 | 55 | 56 | 无自回归项 57 | (p=0) 58 | 59 | 60 | p=0 61 | 62 | 63 | q=0 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /slides/12-time-series-algorithms/data/example_retail_sales.csv: -------------------------------------------------------------------------------- 1 | ds,y 2 | 1992-01-01,146376 3 | 1992-02-01,147079 4 | 1992-03-01,159336 5 | 1992-04-01,163669 6 | 1992-05-01,170068 7 | 1992-06-01,168663 8 | 1992-07-01,169890 9 | 1992-08-01,170364 10 | 1992-09-01,164617 11 | 1992-10-01,173655 12 | 1992-11-01,171547 13 | 1992-12-01,208838 14 | 1993-01-01,153221 15 | 1993-02-01,150087 16 | 1993-03-01,170439 17 | 1993-04-01,176456 18 | 1993-05-01,182231 19 | 1993-06-01,181535 20 | 1993-07-01,183682 21 | 1993-08-01,183318 22 | 1993-09-01,177406 23 | 1993-10-01,182737 24 | 1993-11-01,187443 25 | 1993-12-01,224540 26 | 1994-01-01,161349 27 | 1994-02-01,162841 28 | 1994-03-01,192319 29 | 1994-04-01,189569 30 | 1994-05-01,194927 31 | 1994-06-01,197946 32 | 1994-07-01,193355 33 | 1994-08-01,202388 34 | 1994-09-01,193954 35 | 1994-10-01,197956 36 | 1994-11-01,202520 37 | 1994-12-01,241111 38 | 1995-01-01,175344 39 | 1995-02-01,172138 40 | 1995-03-01,201279 41 | 1995-04-01,196039 42 | 1995-05-01,210478 43 | 1995-06-01,211844 44 | 1995-07-01,203411 45 | 1995-08-01,214248 46 | 1995-09-01,202122 47 | 1995-10-01,204044 48 | 1995-11-01,212190 49 | 1995-12-01,247491 50 | 1996-01-01,185019 51 | 1996-02-01,192380 52 | 1996-03-01,212110 53 | 1996-04-01,211718 54 | 1996-05-01,226936 55 | 1996-06-01,217511 56 | 1996-07-01,218111 57 | 1996-08-01,226062 58 | 1996-09-01,209250 59 | 1996-10-01,222663 60 | 1996-11-01,223953 61 | 1996-12-01,258081 62 | 1997-01-01,200389 63 | 1997-02-01,197556 64 | 1997-03-01,225133 65 | 1997-04-01,220329 66 | 1997-05-01,234190 67 | 1997-06-01,227365 68 | 1997-07-01,231521 69 | 1997-08-01,235252 70 | 1997-09-01,222807 71 | 1997-10-01,232251 72 | 1997-11-01,228284 73 | 1997-12-01,271054 74 | 1998-01-01,207853 75 | 1998-02-01,203863 76 | 1998-03-01,230313 77 | 1998-04-01,234503 78 | 1998-05-01,245027 79 | 1998-06-01,244067 80 | 1998-07-01,241431 81 | 1998-08-01,240462 82 | 1998-09-01,231243 83 | 1998-10-01,244234 84 | 1998-11-01,240991 85 | 1998-12-01,288969 86 | 1999-01-01,218126 87 | 1999-02-01,220650 88 | 1999-03-01,253550 89 | 1999-04-01,250783 90 | 1999-05-01,262113 91 | 1999-06-01,260918 92 | 1999-07-01,262051 93 | 1999-08-01,265089 94 | 1999-09-01,253905 95 | 1999-10-01,258040 96 | 1999-11-01,264106 97 | 1999-12-01,317659 98 | 2000-01-01,236422 99 | 2000-02-01,250580 100 | 2000-03-01,279515 101 | 2000-04-01,264417 102 | 2000-05-01,283706 103 | 2000-06-01,281288 104 | 2000-07-01,271146 105 | 2000-08-01,283944 106 | 2000-09-01,269155 107 | 2000-10-01,270899 108 | 2000-11-01,276507 109 | 2000-12-01,319958 110 | 2001-01-01,250746 111 | 2001-02-01,247772 112 | 2001-03-01,280449 113 | 2001-04-01,274925 114 | 2001-05-01,296013 115 | 2001-06-01,287881 116 | 2001-07-01,279098 117 | 2001-08-01,294763 118 | 2001-09-01,261924 119 | 2001-10-01,291596 120 | 2001-11-01,287537 121 | 2001-12-01,326202 122 | 2002-01-01,255598 123 | 2002-02-01,253086 124 | 2002-03-01,285261 125 | 2002-04-01,284747 126 | 2002-05-01,300402 127 | 2002-06-01,288854 128 | 2002-07-01,295433 129 | 2002-08-01,307256 130 | 2002-09-01,273189 131 | 2002-10-01,287540 132 | 2002-11-01,290705 133 | 2002-12-01,337006 134 | 2003-01-01,268328 135 | 2003-02-01,259051 136 | 2003-03-01,293693 137 | 2003-04-01,294251 138 | 2003-05-01,312389 139 | 2003-06-01,300998 140 | 2003-07-01,309923 141 | 2003-08-01,317056 142 | 2003-09-01,293890 143 | 2003-10-01,304036 144 | 2003-11-01,301265 145 | 2003-12-01,357577 146 | 2004-01-01,281460 147 | 2004-02-01,282444 148 | 2004-03-01,319077 149 | 2004-04-01,315191 150 | 2004-05-01,328408 151 | 2004-06-01,321044 152 | 2004-07-01,328000 153 | 2004-08-01,326317 154 | 2004-09-01,313524 155 | 2004-10-01,319726 156 | 2004-11-01,324259 157 | 2004-12-01,387155 158 | 2005-01-01,293261 159 | 2005-02-01,295062 160 | 2005-03-01,339141 161 | 2005-04-01,335632 162 | 2005-05-01,345348 163 | 2005-06-01,350945 164 | 2005-07-01,351827 165 | 2005-08-01,355701 166 | 2005-09-01,333289 167 | 2005-10-01,336134 168 | 2005-11-01,343798 169 | 2005-12-01,405608 170 | 2006-01-01,318546 171 | 2006-02-01,314051 172 | 2006-03-01,361993 173 | 2006-04-01,351667 174 | 2006-05-01,373560 175 | 2006-06-01,366615 176 | 2006-07-01,362203 177 | 2006-08-01,375795 178 | 2006-09-01,346214 179 | 2006-10-01,348796 180 | 2006-11-01,356928 181 | 2006-12-01,417991 182 | 2007-01-01,328877 183 | 2007-02-01,323162 184 | 2007-03-01,374142 185 | 2007-04-01,358535 186 | 2007-05-01,391512 187 | 2007-06-01,376639 188 | 2007-07-01,372354 189 | 2007-08-01,388016 190 | 2007-09-01,353936 191 | 2007-10-01,368681 192 | 2007-11-01,377802 193 | 2007-12-01,426077 194 | 2008-01-01,342697 195 | 2008-02-01,343937 196 | 2008-03-01,372923 197 | 2008-04-01,368923 198 | 2008-05-01,397969 199 | 2008-06-01,378490 200 | 2008-07-01,383686 201 | 2008-08-01,382852 202 | 2008-09-01,350560 203 | 2008-10-01,349884 204 | 2008-11-01,335571 205 | 2008-12-01,384286 206 | 2009-01-01,310269 207 | 2009-02-01,299488 208 | 2009-03-01,328568 209 | 2009-04-01,329866 210 | 2009-05-01,347768 211 | 2009-06-01,344439 212 | 2009-07-01,348106 213 | 2009-08-01,353473 214 | 2009-09-01,324708 215 | 2009-10-01,338630 216 | 2009-11-01,339386 217 | 2009-12-01,400264 218 | 2010-01-01,314640 219 | 2010-02-01,311022 220 | 2010-03-01,360819 221 | 2010-04-01,356460 222 | 2010-05-01,365713 223 | 2010-06-01,358675 224 | 2010-07-01,362027 225 | 2010-08-01,362682 226 | 2010-09-01,346069 227 | 2010-10-01,355212 228 | 2010-11-01,365809 229 | 2010-12-01,426654 230 | 2011-01-01,335608 231 | 2011-02-01,337352 232 | 2011-03-01,387092 233 | 2011-04-01,380754 234 | 2011-05-01,391970 235 | 2011-06-01,388636 236 | 2011-07-01,384600 237 | 2011-08-01,394548 238 | 2011-09-01,374895 239 | 2011-10-01,379364 240 | 2011-11-01,391081 241 | 2011-12-01,451669 242 | 2012-01-01,355058 243 | 2012-02-01,372523 244 | 2012-03-01,414275 245 | 2012-04-01,393035 246 | 2012-05-01,418648 247 | 2012-06-01,400996 248 | 2012-07-01,396020 249 | 2012-08-01,417911 250 | 2012-09-01,385597 251 | 2012-10-01,399341 252 | 2012-11-01,410992 253 | 2012-12-01,461994 254 | 2013-01-01,375537 255 | 2013-02-01,373938 256 | 2013-03-01,421638 257 | 2013-04-01,408381 258 | 2013-05-01,436985 259 | 2013-06-01,414701 260 | 2013-07-01,422357 261 | 2013-08-01,434950 262 | 2013-09-01,396199 263 | 2013-10-01,415740 264 | 2013-11-01,423611 265 | 2013-12-01,477205 266 | 2014-01-01,383399 267 | 2014-02-01,380315 268 | 2014-03-01,432806 269 | 2014-04-01,431415 270 | 2014-05-01,458822 271 | 2014-06-01,433152 272 | 2014-07-01,443005 273 | 2014-08-01,450913 274 | 2014-09-01,420871 275 | 2014-10-01,437702 276 | 2014-11-01,437910 277 | 2014-12-01,501232 278 | 2015-01-01,397252 279 | 2015-02-01,386935 280 | 2015-03-01,444110 281 | 2015-04-01,438217 282 | 2015-05-01,462615 283 | 2015-06-01,448229 284 | 2015-07-01,457710 285 | 2015-08-01,456340 286 | 2015-09-01,430917 287 | 2015-10-01,444959 288 | 2015-11-01,444507 289 | 2015-12-01,518253 290 | 2016-01-01,400928 291 | 2016-02-01,413554 292 | 2016-03-01,460093 293 | 2016-04-01,450935 294 | 2016-05-01,471421 295 | -------------------------------------------------------------------------------- /slides/07-feature-engineering/images/boxplot.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 中位数 47 | 第1个四分位数 48 | 第3个四分位数 49 | 上边界 50 | 离群点 51 | 52 | 53 | 54 | 离群点 55 | 下边界 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /slides/_common/css/style.css: -------------------------------------------------------------------------------- 1 | /* 字体 */ 2 | body { 3 | font-family: Palatino, 'Palatino Linotype', 'Palatino LT Std', Georgia, 'Latin Modern Roman', 'Linux Libertine', 'Liberation Serif', 'Noto Serif CJK SC', 'Noto Serif CJK TC', 'Source Han Serif SC', 'Source Han Serif CN', source-han-serif-sc, 'Source Han Serif TC', 'Source Han Serif TW', source-han-serif-tc, 'Source Han Serif', 'Songti SC', STSong, 'AR PL New Sung', 'AR PL SungtiL GB', NSimSun, SimSun, 'TW\-Sung', 'WenQuanYi Bitmap Song', 'AR PL UMing CN', 'AR PL UMing HK', 'AR PL UMing TW', 'AR PL UMing TW MBE', PMingLiU, MingLiU, serif; 4 | } 5 | 6 | h1, h2, h3 { 7 | font-family: 'Yanone Kaffeesatz', Palatino, 'Palatino Linotype', 'Palatino LT Std', Georgia, 'Latin Modern Roman', 'Linux Libertine', 'Liberation Serif', 'Noto Serif CJK SC', 'Noto Serif CJK TC', 'Source Han Serif SC', 'Source Han Serif CN', source-han-serif-sc, 'Source Han Serif TC', 'Source Han Serif TW', source-han-serif-tc, 'Source Han Serif', 'Songti SC', STSong, 'AR PL New Sung', 'AR PL SungtiL GB', NSimSun, SimSun, 'TW\-Sung', 'WenQuanYi Bitmap Song', 'AR PL UMing CN', 'AR PL UMing HK', 'AR PL UMing TW', 'AR PL UMing TW MBE', PMingLiU, MingLiU, serif; 8 | font-weight: bold; 9 | } 10 | 11 | h4, h5, h6 { 12 | font-family: Palatino, 'Palatino Linotype', 'Palatino LT Std', Georgia, 'Latin Modern Roman', 'Linux Libertine', 'Liberation Serif', 'Noto Serif CJK SC', 'Noto Serif CJK TC', 'Source Han Serif SC', 'Source Han Serif CN', source-han-serif-sc, 'Source Han Serif TC', 'Source Han Serif TW', source-han-serif-tc, 'Source Han Serif', 'Songti SC', STSong, 'AR PL New Sung', 'AR PL SungtiL GB', NSimSun, SimSun, 'TW\-Sung', 'WenQuanYi Bitmap Song', 'AR PL UMing CN', 'AR PL UMing HK', 'AR PL UMing TW', 'AR PL UMing TW MBE', PMingLiU, MingLiU, serif; 13 | font-weight: bold; 14 | } 15 | 16 | a { 17 | font-family: 'PT Sans Narrow', Palatino, 'Palatino Linotype', 'Palatino LT Std', Georgia, 'Latin Modern Roman', 'Linux Libertine', 'Liberation Serif', 'Noto Serif CJK SC', 'Noto Serif CJK TC', 'Source Han Serif SC', 'Source Han Serif CN', source-han-serif-sc, 'Source Han Serif TC', 'Source Han Serif TW', source-han-serif-tc, 'Source Han Serif', 'Songti SC', STSong, 'AR PL New Sung', 'AR PL SungtiL GB', NSimSun, SimSun, 'TW\-Sung', 'WenQuanYi Bitmap Song', 'AR PL UMing CN', 'AR PL UMing HK', 'AR PL UMing TW', 'AR PL UMing TW MBE', PMingLiU, MingLiU, serif; 18 | } 19 | 20 | .remark-code, .remark-inline-code { 21 | font-family: Iosevka, 'Sarasa Mono SC', 'Sarasa Mono TC', Consolas, 'Lucida Console', Menlo, Monaco, 'Andale Mono', 'Ubuntu Mono', 'Kaiti SC', STKaiti, 'AR PL UKai CN', 'AR PL UKai HK', 'AR PL UKai TW', 'AR PL UKai TW MBE', 'AR PL KaitiM GB', KaiTi, KaiTi_GB2312, DFKai-SB, 'TW\-Kai', monospace; 22 | } 23 | 24 | .caption, .footnote, tfoot { 25 | font-family: Palatino, 'Palatino Linotype', 'Palatino LT Std', Georgia, 'Latin Modern Roman', 'Linux Libertine', 'Liberation Serif', 'Kaiti SC', STKaiti, 'AR PL UKai CN', 'AR PL UKai HK', 'AR PL UKai TW', 'AR PL UKai TW MBE', 'AR PL KaitiM GB', KaiTi, KaiTi_GB2312, DFKai-SB, 'TW\-Kai', serif; 26 | font-size: 0.9em; 27 | } 28 | 29 | h1 { 30 | height: 79px; 31 | line-height: 79px; 32 | } 33 | 34 | /* 背景 */ 35 | .project-logo { 36 | background-image: url(../assets/logo.png); 37 | background-size: contain; 38 | background-repeat: no-repeat; 39 | position: absolute; 40 | opacity: 0.3; 41 | bottom: 80px; 42 | right: 80px; 43 | width: 256px; 44 | height: 256px; 45 | z-index: 0; 46 | } 47 | 48 | .content-logo { 49 | background-image: url(../assets/logo.png); 50 | background-size: contain; 51 | background-repeat: no-repeat; 52 | position: absolute; 53 | top: 60px; 54 | right: 80px; 55 | width: 70px; 56 | height: 70px; 57 | z-index: 0; 58 | } 59 | 60 | /* 页码 */ 61 | .title-slide .remark-slide-number, 62 | .content .remark-slide-number, 63 | .section .remark-slide-number, 64 | .thanks .remark-slide-number { 65 | display: none; 66 | } 67 | 68 | /* 封面 */ 69 | .title-slide h1, 70 | .title-slide h2, 71 | .title-slide h3 { 72 | color: #000000; 73 | height: 0.5em; 74 | line-height: 0.5em; 75 | } 76 | 77 | .title-slide h1 { 78 | margin-top: 120px; 79 | } 80 | 81 | .title h1 { 82 | font-size: 45px; 83 | } 84 | 85 | .project-title h1, 86 | .project-subtitle h1 { 87 | color: #000000; 88 | height: 0.6em; 89 | line-height: 0.6em; 90 | } 91 | 92 | .project-title h1 { 93 | margin-top: 60px; 94 | } 95 | 96 | .project-subtitle h1 { 97 | margin-top: 20px; 98 | } 99 | 100 | /* 目录 */ 101 | .content-page { 102 | margin: 100px 100px auto 100px; 103 | } 104 | 105 | .content-page-title { 106 | font-size: 40pt; 107 | font-weight: bold; 108 | } 109 | 110 | .content-page-list { 111 | font-size: 20pt; 112 | height: 40pt; 113 | line-height: 40pt; 114 | } 115 | 116 | /* 表格 */ 117 | table { 118 | margin: auto; 119 | border-top: 2px solid #111111; 120 | border-bottom: 2px solid #111111; 121 | border-spacing: 6px 6px; 122 | border-collapse: separate; 123 | } 124 | 125 | th { 126 | border-bottom: none; 127 | } 128 | 129 | thead, tfoot, tr:nth-child(even) { 130 | background: #eeeeee 131 | } 132 | 133 | th, td { 134 | padding: 5px; 135 | } 136 | 137 | /* 代码 */ 138 | .remark-code-line { 139 | line-break: anywhere; 140 | } 141 | 142 | /* 区块 */ 143 | blockquote, 144 | .blockquote-note, 145 | .blockquote-warning, 146 | .blockquote-caution, 147 | .blockquote-tip { 148 | margin-left: 0; 149 | padding-left: 1em; 150 | } 151 | 152 | blockquote { 153 | border-left: 4px solid #777777; 154 | } 155 | 156 | .blockquote-note { 157 | border-left: 4px solid #2680e3; 158 | } 159 | 160 | .blockquote-warning { 161 | border-left: 4px solid #ff7518; 162 | } 163 | 164 | .blockquote-caution { 165 | border-left: 4px solid #fe2a39; 166 | } 167 | 168 | .blockquote-tip { 169 | border-left: 4px solid #3fb620; 170 | } 171 | 172 | .code-good pre:before, 173 | .code-bad pre:before { 174 | display: block; 175 | float: left; 176 | width: 1em; 177 | height: 1em; 178 | margin: 9px 9px; 179 | } 180 | 181 | .code-good pre:before { 182 | content: url(../assets/code-good.svg); 183 | } 184 | 185 | .code-bad pre:before { 186 | content: url(../assets/code-bad.svg); 187 | } 188 | 189 | /* 其他元素 */ 190 | img, video, iframe { 191 | max-width: 100%; 192 | } 193 | 194 | pre { 195 | white-space: pre-wrap; 196 | } 197 | 198 | a, a > code { 199 | color: #775695; 200 | text-decoration: none; 201 | } 202 | 203 | .footnote { 204 | position: absolute; 205 | bottom: 3em; 206 | padding-right: 4em; 207 | font-size: 90%; 208 | } 209 | 210 | h4, h5 { 211 | margin-top: 1em; 212 | margin-bottom: 1em; 213 | } 214 | 215 | /* 垂直布局 */ 216 | .v-middle { 217 | vertical-align: middle; 218 | } 219 | 220 | /* 两栏布局 */ 221 | .left-column { 222 | color: #777777; 223 | width: 20%; 224 | height: 92%; 225 | float: left; 226 | } 227 | 228 | .left-column h2:last-of-type, 229 | .left-column h3:last-child { 230 | color: #000000; 231 | } 232 | 233 | .right-column { 234 | width: 75%; 235 | float: right; 236 | padding-top: 1em; 237 | } 238 | 239 | .pull-left { 240 | float: left; 241 | width: 47%; 242 | } 243 | 244 | .pull-right { 245 | float: right; 246 | width: 47%; 247 | } 248 | 249 | .pull-right ~ * { 250 | clear: both; 251 | } 252 | 253 | /* Grid 布局 */ 254 | .grid { 255 | display: flex; 256 | justify-content: space-between; 257 | } 258 | 259 | .grid-cell-1of2 { 260 | flex: 0 0 calc((100% / 2) - 1rem); 261 | } 262 | 263 | .grid-cell-1of3 { 264 | flex: 0 0 calc((100% / 3) - 1rem); 265 | } 266 | 267 | .grid-cell-2of3 { 268 | flex: 0 0 calc((100% / 3 * 2) - 1rem); 269 | } 270 | 271 | .grid-cell-1of4 { 272 | flex: 0 0 calc((100% / 4) - 1rem); 273 | } 274 | 275 | .grid-cell-3of4 { 276 | flex: 0 0 calc((100% / 4 * 3) - 1rem); 277 | } 278 | 279 | /* 打印样式 280 | 16:9 size: 1210px 681px; 281 | 4:3 size: 908px 681px; 282 | */ 283 | @page { 284 | size: 1210px 681px;; 285 | margin: 0; 286 | } 287 | 288 | @media print { 289 | .remark-slide-scaler { 290 | width: 100% !important; 291 | height: 100% !important; 292 | transform: scale(1) !important; 293 | top: 0 !important; 294 | left: 0 !important; 295 | } 296 | } -------------------------------------------------------------------------------- /slides/10-classification-algorithms-part-2/images/gbm-workflow-gbm-step-2.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 0 25 | (-1, 1, -1, 1) 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -1 44 | (-1, -1) 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 残差:r21=0, r23=0 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 1 72 | (1, 1) 73 | 74 | 75 | 76 | 77 | 78 | 79 | 残差:r22=0, r24=0 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 经常到知乎回答 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 经常到知乎提问 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /slides/10-classification-algorithms-part-2/images/gbm-workflow-gbm-step-1.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 20 25 | (14, 16, 24, 26) 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 15 44 | (14, 16) 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 残差:r11=-1, r12=1 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 25 72 | (24, 26) 73 | 74 | 75 | 76 | 77 | 78 | 79 | 残差:r13=-1, r14=1 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 月均消费 ≥ 2000 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 月均消费 ≤ 2000 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /slides/07-feature-engineering/images/gradient_with_normalization.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 圆 c 75 | 圆 c: 圆心为 A 且经过 B 的圆 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 圆 d 86 | 圆 d: 圆心为 A 且经过 C 的圆 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 圆 e 97 | 圆 e: 圆心为 A 且经过 D 的圆 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 圆 f 108 | 圆 f: 圆心为 A 且经过 E 的圆 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 圆 g 119 | 圆 g: 圆心为 A 且经过 F 的圆 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 圆 h 130 | 圆 h: 圆心为 A 且经过 F 的圆 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 向量 u 141 | 向量 u: 向量[G, H] 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 向量 u 152 | 向量 u: 向量[G, H] 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 向量 v 163 | 向量 v: 向量[H, I] 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 向量 v 174 | 向量 v: 向量[H, I] 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 向量 w 185 | 向量 w: 向量[I, J] 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 向量 w 196 | 向量 w: 向量[I, J] 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 向量 a 207 | 向量 a: 向量[J, K] 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 向量 a 218 | 向量 a: 向量[J, K] 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 点 K 229 | K = (1.88, 1.7) 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 点 K 240 | K = (1.88, 1.7) 241 | 242 | 243 | 244 | 245 | 246 | 247 | -------------------------------------------------------------------------------- /slides/01-data-science-introduction/01-data-science-introduction.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "数据科学简介" 3 | subtitle: "Data Science Introduction" 4 | author: "范叶亮" 5 | date: "" 6 | output: 7 | xaringan::moon_reader: 8 | mathjax: https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js 9 | css: 10 | - https://cdn.jsdelivr.net/npm/pseudocode@latest/build/pseudocode.min.css 11 | - css/style.css 12 | includes: 13 | after_body: includes/after_body.html 14 | lib_dir: libs 15 | nature: 16 | ratio: "16:9" 17 | highlightStyle: github 18 | highlightLines: true 19 | countIncrementalSlides: false 20 | --- 21 | class: content 22 | 23 | ```{r init-r, include=FALSE} 24 | options(htmltools.dir.version = FALSE) 25 | ``` 26 | 27 |
28 |

目录

29 |
  • 数据科学简介
  • 30 |
  • 数据科学工具箱
  • 31 |
  • 数据科学分工与流程
  • 32 |
    33 | 34 | --- 35 | class: section, center, middle 36 | 37 | # 数据科学简介 38 | 39 | --- 40 | class: 41 | 42 | # 数据科学 43 | 44 | 1974 年,Peter Naur 出版了**“计算方法的简介调查”** [1] 一书。该书中**“数据科学” (Data Science)** 一词被大量使用,同时对其作出定义:“数据科学是一门专门处理数据的科学。它被授权处置与其他科学领域中有关数据的表现与关联”。定义中强调了数据同其他科学领域之间存在的关系。 45 | 46 | 1997 年,Jeff Wu 在**“统计=数据科学?”** [2] 一文中重新探索了“统计 (Statistics)”一词的含义,他认为统计工作应该是由数据收集,数据建模和分析以及决策制定三部分组成。同时他倡导将“统计”一词重命名为“数据科学”,将“统计学家 (Statisticians)”一词重命名为**“数据科学家 (Data Scientists)”**。 47 | 48 | .footnote[ 49 | [1] P. Naur, Concise Survey of Computer Methods. _Petrocelli Books_, 1974.
    50 | [2] C. J. Wu, “Statistics = data science?,” 1997. 51 | ] 52 | 53 | --- 54 | class: 55 | 56 | # 数据科学 57 | 58 | 2001 年,William S. Cleveland 发表**“数据科学:为扩大统计技术领域的行动计划”** [1]。文章计划扩大统计领域相关的技术工作范围,正是由于范围的扩张,作者将这一改变的领域称之为“数据科学”。计划中划分了 6 大技术范围,其具体内容和占比如下: 59 | 60 | 1. **(25%) 多学科调查**:包括在相关主题领域内的数据分析协作。 61 | 2. **(20%) 处理数据的模型和方法**:包括统计模型;建模方法;等。 62 | 3. **(15%) 数据计算**:包括硬件系统;软件系统;计算算法。 63 | 4. **(15%) 教学方法**:包括小学,中学,大学,研究生,继续教育和企业培训的教学课程规划。 64 | 5. **(5%) 工具评估**:包括实践中工具使用情况的调查,新工具需求的调查以及开发新工具的过程研究。 65 | 6. **(20%) 理论**:包括数据科学的基础;模型方法,数据计算,教学和工具评估的基本方法;模型方法,数据计算,教学和评估的数学调查。 66 | 67 | .footnote[ 68 | [1] Cleveland, W. S. (2001). Data science: an action plan for expanding the technical areas of the field of statistics. _International statistical review, 69_(1), 21-26. 69 | ] 70 | 71 | --- 72 | class: 73 | 74 | # 数据科学 75 | 76 | 2002 年,国际科学理事会的科技数据委员会 (CODATA) 创立 **Data Science Journal** 杂志。2003 年,**Journal of Data Science** 创立。杂志为所有的数据工作者提供了一个很好的交流平台。 77 | 78 | 2005 年,美国国家科学委员会发布了**“长期数字数据收集促成二十一世纪的研究与教育”** [1]。报告中将数据科学家 (Data scientists) 定义为信息和计算机科学家,数据库和软件工程师,程序员等那些对于成功管理信息数据至关重要的人们。 79 | 80 | 2012 年,Tom Davenport 和 D.J. Patil 在哈佛商业评论中发表**“数据科学家:21 世纪最性感工作”** [2]。文章中将数据科学家评为 21 世纪最性感的职业。 81 | 82 | .footnote[ 83 | [1] N. S. Board, “Long-lived digital data collections enabling research and education in the 21st century.” http://www.nsf.gov/pubs/2005/nsb0540/, 2005.
    84 | [2] T. H. Davenport and D. Patil, “Data Scientist: The Sexiest Job of the 21st Century,” Harvard Business Review Magazine: https://hbr.org/2012/10/data-scientist-the-sexiest-job-of-the-21st-century, 2012. 85 | ] 86 | 87 | --- 88 | class: 89 | 90 | # 数据产品 91 | 92 | Patil 在**“数据的柔术:将数据转化为产品的艺术”** [1] 一文中解释说“**数据产品**是通过使用数据促进最终目标的产品”。因此可以说数据产品并不仅仅是指数据分析 (Data Analysis), 向高管提供的建议或是导致业务流程改善的洞察,而应该是一套完整有形的问题解决系统。 93 | 94 | 为了方便大家清楚理解数据产品的概念,我们比较两款产品:Excel 和 PYMK。Excel 大家应该比较熟悉,是微软 Office 套件中用于数据处理、统计分析和辅助决策的表格处理软件。PYMK 相对比较陌生,PYMK 全称为 People You May Know,是 LinkedIn 一套人物关系预测系统。 95 | 96 | .footnote[ 97 | [1] D. Patil, “Data jujitsu: the art of turning data into product,” tech. rep., _O’Reilly Media, Inc._, 2012. 98 | ] 99 | 100 | --- 101 | class: 102 | 103 | # 数据产品 104 | 105 | .center[ 106 | ### Excel 和 PYMK 特性对比 107 | ] 108 | 109 | | 特性 | Excel | PYMK | 110 | | :------ | :---------------------------- | :---------------------------- | 111 | | 系统 | 否 (通用分析软件) | 是 (预测系统) | 112 | | 数据源 | 用户指定,无具体形式和内容要求 | 人员年龄,性别,工作等个人信息 | 113 | | 数据理解 | 视用户操作而定 | 对数据有较充分理解 | 114 | | 算法应用 | 视用户操作而定 | 使用相关智能算法 | 115 | | 目标 | 无具体目标 | 寻找出可能认识的人 | 116 | | 结果 | 不同操作产生不同结果 | 可能认识的人或人物关系网 | 117 | 118 | --- 119 | class: 120 | 121 | # 数据产品 122 | 123 | 在**“什么是数据科学?”** [1] 一文中,Mike Loukides 的第一句话就指出了**“未来是属于那些能将数据转化成产品的人和公司的”**,也就是说数据的真正价值只有在进行深度加工处理并形成产品之后才能够被体现出来。可以说有价值的数据是一个有待开发的金矿,需要人们利用“数据产品”这把利器去开采才能够得到金灿灿的黄金。同时,文章也指出了数据科学和数据产品之间的关系:数据科学使数据产品的创造成为可能,也就是数据科学在数据产品的创造开发过程中扮演着至关重要的角色。 124 | 125 | .footnote[ 126 | [1] M. Loukides, “What is data science?,” tech. rep., _O’Reilly Media, Inc._, 2010. 127 | ] 128 | 129 | --- 130 | class: 131 | 132 | # 跨界 133 | 134 | **跨界 (Crossover) **一词在不同的领域有着各自具体的含义。**跨界音乐 (Crossover Music)** [1] 是指一个音乐作品被诠释成两种或更多的品味或流派。**跨界营销 (Crossover Marketing)** [2] 意味着打破传统的营销思维模式,实现多个品牌从不同角度诠释同一个用户特征,发挥不同类别品牌的协同效应。因此,跨界可以称得上是多种资源的一种融合创新。 135 | 136 | 开发数据产品同样也是一场跨界知识的融合。无论是组建一个数据产品开发团队还是成长为一个真正的数据科学家,都要对所涉及到的各种知识及其技能有所涉猎。当然“全”也并不意味着不“专”,正如开发数据产品的核心是数据科学的应用一样,数据科学家应掌握扎实的数据科学理论和应用能力。 137 | 138 | .footnote[ 139 | [1] Wikipedia, “Crossover (music).” http://en.wikipedia.org/wiki/Crossover_music.
    140 | [2] 邓勇兵, “跨界营销: 体验的综合诠释,” 中国市场, 2007. 141 | ] 142 | 143 | --- 144 | class: 145 | 146 | # 跨界 147 | 148 | | 知识类型 | 知识名称 | 和“开发数据产品”的关系 | 重要程度 | 149 | | :------- | :------------------------- | :--------------------- | :------- | 150 | | 领域知识 | 行业知识 (管理,金融) | 业务理解 | ⭑⭑⭒⭒⭒ | 151 | | 理论知识 | 基础数学 (微积分,代数) | 数据科学 (基础) | ⭑⭑⭑⭒⭒ | 152 | | 理论知识 | 统计学 | 数据科学 (统计分析) | ⭑⭑⭑⭑⭒ | 153 | | 理论知识 | 应用数学 (机器学习) | 数据科学 (建模分析) | ⭑⭑⭑⭑⭒ | 154 | | 工程知识 | 统计编程 (Python,R) | 数据科学 (模型计算) | ⭑⭑⭑⭑⭒ | 155 | | 工程知识 | 数据库知识 (MySQL,HIVE) | 数据源获取 | ⭑⭑⭑⭒⭒ | 156 | | 工程知识 | 软件工程 (系统设计,Java) | 系统开发 (基础) | ⭑⭑⭑⭒⭒ | 157 | | 工程知识 | 计算框架 (Hadoop,Spark) | 系统开发 (框架选择) | ⭑⭑⭑⭒⭒ | 158 | | 工程知识 | 前端技术 (配色,HTML) | 数据可视化 | ⭑⭑⭑⭒⭒ | 159 | 160 | --- 161 | class: section, center, middle 162 | 163 | # 数据科学工具箱 164 | 165 | --- 166 | class: 167 | 168 | # 数据科学常用工具 169 | 170 | 在数据科学领域,我们会用到多种多样的编程语言和工具。而编程语言和工具的选择取决于多种因素,例如:项目需要 (目标,预算,时间等);项目负责人和成员的专业背景和偏好,工具成本,功能性,可用性,学习曲线等等。 171 | 172 | 一般而言,这些编程语言和工具可以划分为如下 5 类: 173 | 174 | 1. 统计编程语言:Python,R,SPSS,SAS 175 | 2. 数据挖掘和机器学习工具箱:scikit-learn (Python),mlr3 (R),Weka (Java) 176 | 3. 传统编程语言:C/C++,Java,Scala 177 | 4. 分析平台和框架:RapidMiner,KNIME,Hadoop,Spark,Hive 178 | 5. 其他:SQL,Excel,Tableau 179 | 180 | KDnuggets 每年都会进行一项关于机器学习相关编程语言和工具使用的调研,在 2019 年,该项调查共有 1,800 个人参与,最终得票最高的 10 个编程语言和工具分别为:Python,RapidMiner,R,Excel,Anaconda,SQL,Tensorflow,Keras,scikit-learn,Tableau 和 Apache Spark。 181 | 182 | --- 183 | class: 184 | 185 | # 数据科学常用工具 186 | 187 | | 编程语言和工具 | 2019 占有率 | 2018 占有率 | 2017 占有率 | 2016 占有率 | 188 | | :------------- | :---------- | :---------- | :---------- | :---------- | 189 | | Python | 65.8% | 65.6% | 59.0% | 45.8% | 190 | | RapidMiner | 51.2% | 52.7% | 31.9% | 32.6% | 191 | | R | 46.6% | 48.5% | 56.6% | 49% | 192 | | Excel | 34.8% | 39.1% | 31.5% | 33.6% | 193 | | Anaconda | 33.9% | 33.4% | 24.3% | NA | 194 | | SQL | 32.8% | 39.6% | 39.2% | 35.5% | 195 | | Tensorflow | 31.7% | 29.9% | 22.7% | 6.8% | 196 | | Kears | 26.6% | 22.2% | 10.7% | NA | 197 | | scikit-learn | 25.5% | 24.4% | 21.9% | 17.2% | 198 | | Tableau | 22.1% | 21.5% | 21.8% | 18.5% | 199 | 200 | --- 201 | class: 202 | 203 | # 数据科学之战:R 与 Python 204 | 205 | .center[ 206 | ### 发展历史 207 | ] 208 | 209 | **R** [1] 是一套用于统计编程和绘图的自由软件编程语言与操作环境。R 语言是 S 语言的一种延伸和实现,由 Ross Ihaka 和 Robert Gentleman 于 1995 年设计开发的一种开源语言,因此称之为 R 语言。作为 S 语言的一种延伸,R 语言主要利用 C 语言,Fortran 和 R 语言开发完成。 210 | 211 | **Python** 是由 Guido Van Rossem 于 1991 年创建的一门强调效率和代码可读性的编程语言。Python 由 Python 软件基金会 (PSF) 负责其发展,其开发灵感主要来自于 C 语言和 Modula-3,部分来自于 ABC 语言。Python 的名字取自喜剧蒙提·派森的飞行马戏团 (Monty Python's Flying Circus)。 212 | 213 | .footnote[ 214 | [1] R. Project, “What is r?.” http://www.r-project.org/about.html. 215 | ] 216 | 217 | --- 218 | class: 219 | 220 | # 数据科学之战:R 与 Python 221 | 222 | .center[ 223 | ### 学习和使用 224 | ] 225 | 226 | **R** 语言可以使用简短的几行代码完成一个统计模型。R 语言也有其自己的代码样式表,但很少有人使用,不过保持一个良好的代码风格是一个还好的习惯。R 语言可以使用不同点方式实现相同的功能,例如显式的循环 (for) 和隐式的循环 (apply 方法) 等。在 R 语言中,可以还轻松的实现复杂的公式,同时一些常用的统计模型也是现成的方便使用。由于 R 语言的特点,开始学习时将会面临一个陡峭的学习曲线,不过一旦入门后就可以很容易的使用其高级特性。 227 | 228 | **Python** 是一个灵活的编程语言,由于其注重简便性和代码的易读性,Python 的学习曲线相对平缓,可以很好的用于编写一些简短代码。不过由于 Python 缩进式的代码风格,对于类 C 语言的使用者多少会影响其学习和使用。由于 Python 是一门更加通用的编程语言,其更多的优势在于编写网站和其他应用脚本。由于 Python 看重可读性和易用性,使得它的学习曲线相对比较低并且平缓。除了可以用于数据分析外,还可以帮助使用者快速高效的完成其他工作。 229 | 230 | --- 231 | class: 232 | 233 | # 数据科学之战:R 与 Python 234 | 235 | .center[ 236 | ### 代码库 237 | ] 238 | 239 | **R** 语言有一个庞大的扩展包库 [CRAN](https://cran.r-project.org/) (The Comprehensive R Archive Network),用户可自行贡献开源的扩展包供其他人员使用。R 语言提供最早的发布版本为 0.49 (1997 年 4 月 23 日),当时 CRAN 仅有 3 个镜像站点,仅提供 12 个包,仅编译了少量类 Unix 平台版本,Windows 和 macOS 版本在该版尚未提供。截止到 2019 年 10 月,CRAN 已有 96 个镜像站点,提供多达 15,121 个包。 240 | 241 | **Python** 提供一个代码库 [PyPi](https://pypi.org/) (Python Package Index),用户可以贡献自己的代码,截止到 2019 年 10 月,PyPi 共有 200,539 个项目。除此之外,[Conda](https://docs.conda.io/en/latest/) 为不同操作系统提供了一个环境和包的管理平台,除了能够管理 Python 以外,Conda 还能够管理 R,Ruby,Lua,Scala,Java,JavaScript,C/ C++,FORTRAN 等多种其他语言。 242 | 243 | --- 244 | class: 245 | 246 | # 选择哪种语言 247 | 248 | .center[ 249 | ### 如何选择? 250 | ] 251 | 252 | 1. 你要解决的问题是什么? 253 | 2. 学习一门新语言的成本是多少? 254 | 3. 在你的领域,常用的工具有哪些? 255 | 4. 其他常用的工具又有哪些?他们和常用的工具又有什么关系? 256 | 257 | --- 258 | class: section, center, middle 259 | 260 | # 数据科学分工与流程 261 | 262 | --- 263 | class: 264 | 265 | # 数据科学分工 266 | 267 | 根据 Donoho 在**“数据科学 50 年”** [1] 一文中的观点,将数据科学分为了 6 个部分: 268 | 269 | 1. 数据探索和准备 (Data Exploration and Preparation) 270 | 2. 数据表示和转换 (Data Representation and Transformation) 271 | 3. 数据加工计算 (Computing with Data) 272 | 4. 数据建模 (Data Modeling) 273 | 5. 数据可视化和展现 (Data Visualization and Presentation) 274 | 6. 数据科学的科学性 (Science about Data Science) 275 | 276 | .footnote[ 277 | [1] Donoho, D. (2017). 50 years of data science. _Journal of Computational and Graphical Statistics, 26_(4), 745-766. 278 | ] 279 | 280 | --- 281 | class: 282 | 283 | # 数据分析和挖掘流程 284 | 285 | .center[ 286 | ### 工业界数据分析和工作者采用的方法 287 | ] 288 | 289 | | 年份/方法 | CRISP-DM | My Own | SEMMA | KDD Process | 290 | | :------------------ | :------- | :----- | :---- | :---------- | 291 | | 2002 [1] | 51% | 23% | 12% | NA | 292 | | 2004 [2] | 42% | 28% | 10% | NA | 293 | | 2007 [3] | 42% | 19% | 13% | 7% | 294 | | 2014 [4] | 43% | 27.5% | 8.5% | 7.5% | 295 | 296 | .footnote[ 297 | [1] http://www.kdnuggets.com/polls/2002/methodology.htm
    298 | [2] http://www.kdnuggets.com/polls/2004/data_mining_methodology.htm
    299 | [3] http://www.kdnuggets.com/polls/2007/data_mining_methodology.htm
    300 | [4] https://www.kdnuggets.com/2014/10/crisp-dm-top-methodology-analytics-data-mining-data-science-projects.html 301 | ] 302 | 303 | --- 304 | class: 305 | 306 | # 数据分析和挖掘流程 307 | 308 | .pull-left[ 309 | **CRISP-DM** [1] 全称为跨行业数据挖掘标准流程 (Cross Industry Standard Process for Data Mining) Shearer 于 2000 年提出。CRISP-DM 对一个数据分析和挖掘项目的生命周期提供一个总体的描述。 310 | 311 | - 业务理解 (Business understanding) 312 | - 数据理解 (Data understanding) 313 | - 数据准备 (Data preparation) 314 | - 建模 (Modeling) 315 | - 评估 (Evaluation) 316 | - 部署 (Deployment) 317 | ] 318 | 319 | .pull-right[ 320 | ```{r, echo=F, out.width='80%'} 321 | knitr::include_graphics('images/crisp-dm-cycle.svg') 322 | ``` 323 | ] 324 | 325 | .footnote[ 326 | [1] Shearer, C. (2000). The CRISP-DM model: the new blueprint for data mining. _Journal of data warehousing, 5_(4), 13-22. 327 | ] 328 | 329 | --- 330 | class: 331 | 332 | # CRISP-DM 333 | 334 | .center[ 335 | ```{r, echo=F, out.width='75%'} 336 | knitr::include_graphics('images/crisp-dm-process-tasks-and-output.svg') 337 | ``` 338 | ] 339 | 340 | --- 341 | class: thanks, center, middle 342 | 343 | # 感谢倾听 344 | 345 | ![CC BY-NC-SA 4.0](assets/by-nc-sa.svg) 346 | 347 | 本作品采用 [**CC BY-NC-SA 4.0**](https://github.com/leovan/data-science-introduction-with-r/blob/main/LICENSE) 授权 348 | 349 | 版权所有 © [**范叶亮**](https://leovan.me) -------------------------------------------------------------------------------- /slides/01-data-science-introduction/images/crisp-dm-cycle.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Produced by OmniGraffle 7.4.2 5 | 2017-09-29 09:59:27 +0000 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 页-1 31 | 32 | 33 | Layer 1 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 数据 52 | 53 | 54 | 55 | 56 | 业务理解 57 | 58 | 59 | 60 | 61 | 数据理解 62 | 63 | 64 | 65 | 66 | 数据准备 67 | 68 | 69 | 70 | 71 | 建模 72 | 73 | 74 | 75 | 76 | 评估 77 | 78 | 79 | 80 | 81 | 部署 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /slides/10-classification-algorithms-part-2/images/stacking-workflow.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 数 25 | 据 26 | 集 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 初级 45 | 学习 46 | 方法 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | C1 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | ... 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | C2 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 次级 119 | 学习 120 | 方法 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 最终 139 | 输出 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | --------------------------------------------------------------------------------