├── .gitignore
├── figures
    ├── fig
    │   ├── 図1.2.pdf
    │   ├── 図1.4.pdf
    │   ├── 図10.1.pdf
    │   ├── 図10.2.pdf
    │   ├── 図11.1.pdf
    │   ├── 図11.2.pdf
    │   ├── 図11.3.pdf
    │   ├── 図11.4.pdf
    │   ├── 図11.5.png
    │   ├── 図2.2.pdf
    │   ├── 図2.3.pdf
    │   ├── 図2.4.png
    │   ├── 図2.5.png
    │   ├── 図2.6.png
    │   ├── 図2.7.png
    │   ├── 図3.1.pdf
    │   ├── 図6.1.pdf
    │   ├── 図7.1.pdf
    │   ├── 図7.2.pdf
    │   ├── 図7.3.pdf
    │   ├── 図8.1.pdf
    │   ├── 図A.1.png
    │   ├── 表11.1.pdf
    │   ├── 表11.b.pdf
    │   ├── 図1-last.pdf
    │   ├── 図1.1.a.png
    │   ├── 図1.1.b.png
    │   ├── 図2.1.a.png
    │   ├── 図2.1.b.png
    │   ├── 図2.8.a.png
    │   ├── 図2.8.b.png
    │   ├── 図2.9.a.png
    │   ├── 図2.9.b.png
    │   ├── 図1-preface.a.pdf
    │   ├── 図1-preface.b.pdf
    │   ├── 図1-last.md
    │   ├── 図3.1.md
    │   └── 図1.4.md
    ├── fig-p
    │   ├── 07-p-knn.pdf
    │   ├── 07-p-lm.pdf
    │   ├── 10-p-pr.pdf
    │   ├── 10-p-roc.pdf
    │   ├── 03-p-temp.pdf
    │   ├── 04-p-conf.pdf
    │   ├── 04-p-curve.pdf
    │   ├── 04-p-hist1.pdf
    │   ├── 04-p-hist2.pdf
    │   ├── 04-p-hist3.pdf
    │   ├── 04-p-iris.pdf
    │   ├── 04-p-mosaic.pdf
    │   ├── 04-p-rbinom.pdf
    │   ├── 04-p-rnorm.pdf
    │   ├── 04-p-runif.pdf
    │   ├── 07-p-plot.pdf
    │   ├── 07-p-tuning.pdf
    │   ├── 09-p-rpart.pdf
    │   ├── 09-p-rpart2.pdf
    │   ├── 09-p-varimp.pdf
    │   ├── 11-p-relu.pdf
    │   ├── 13-p-biplot.pdf
    │   ├── 13-p-elbow.pdf
    │   ├── 13-p-hclust.pdf
    │   ├── 04-p-boxplot.pdf
    │   ├── 04-p-pvalue1.pdf
    │   ├── 04-p-scatter.pdf
    │   ├── 07-p-boxplot.pdf
    │   ├── 07-p-residual.pdf
    │   ├── 08-p-boxplot.pdf
    │   ├── 08-p-sigmoid.pdf
    │   ├── 10-p-logistic.pdf
    │   ├── 11-p-h2o-wine.pdf
    │   ├── 13-p-heatmap.pdf
    │   ├── 04-p-boot-binom.pdf
    │   ├── 04-p-iris-group.pdf
    │   ├── 07-p-polynomial.pdf
    │   ├── 07-p-regression.pdf
    │   ├── 08-p-enet-path.pdf
    │   ├── 08-p-enet-tuning.pdf
    │   ├── 10-p-titanic-roc.pdf
    │   ├── 11-p-mnist-cnn.pdf
    │   ├── 11-p-mnist-id5.pdf
    │   ├── 11-p-mnist-lenet.pdf
    │   ├── 11-p-mnist-nnet.pdf
    │   ├── 11-p-regression.pdf
    │   ├── 04-p-random-sample.pdf
    │   ├── 07-p-tuning-train.pdf
    │   ├── 08-p-enet-tuning2.pdf
    │   ├── 10-p-titanic-tree.pdf
    │   ├── 13-p-pca-clusters.pdf
    │   ├── 08-p-boxplot-scaled.pdf
    │   ├── 11-p-classification.pdf
    │   ├── 11-p-mnist-lenet-miss.pdf
    │   ├── 12-p-airpassengers-lm.pdf
    │   ├── 12-p-airpassengers-arima.pdf
    │   ├── 12-p-airpassengers-prophet.pdf
    │   ├── 12-p-airpassengers-split.pdf
    │   ├── 04-p-runif.py
    │   ├── 04-p-curve.py
    │   ├── 10-p-logistic.py
    │   ├── 04-p-hist2.py
    │   ├── 08-p-sigmoid.py
    │   ├── 04-p-boxplot.py
    │   ├── 04-p-hist1.py
    │   ├── 07-p-plot.py
    │   ├── 11-p-mnist-id5.py
    │   ├── 04-p-rnorm.py
    │   ├── 11-p-relu.py
    │   ├── 04-p-scatter.py
    │   ├── 04-p-random-sample.py
    │   ├── 04-p-rbinom.py
    │   ├── 04-p-boot-binom.py
    │   ├── 04-p-hist3.py
    │   ├── Makefile
    │   ├── 08-p-boxplot.py
    │   ├── 04-p-iris.py
    │   ├── 13-p-hclust.py
    │   ├── 04-p-mosaic.py
    │   ├── 07-p-regression.py
    │   ├── 04-p-iris-group.py
    │   ├── 13-p-elbow.py
    │   ├── 09-p-varimp.py
    │   ├── 03-p-temp.py
    │   ├── 08-p-boxplot-scaled.py
    │   ├── 10-p-roc.py
    │   ├── 13-p-heatmap.py
    │   ├── 04-p-conf.py
    │   ├── 13-p-biplot.py
    │   ├── 07-p-residual.py
    │   ├── 10-p-pr.py
    │   ├── 09-p-rpart.py
    │   ├── 12-p-airpassengers-split.py
    │   ├── 07-p-knn.py
    │   ├── 07-p-lm.py
    │   ├── 11-p-h2o-wine.py
    │   ├── 04-p-pvalue1.py
    │   ├── 08-p-enet-path.py
    │   ├── 07-p-boxplot.py
    │   ├── 12-p-airpassengers-prophet.py
    │   ├── 13-p-pca-clusters.py
    │   ├── 12-p-airpassengers-lm.py
    │   ├── 09-p-rpart2.py
    │   ├── 07-p-tuning.py
    │   ├── 07-p-polynomial.py
    │   ├── 10-p-titanic-tree.py
    │   ├── 10-p-titanic-roc.py
    │   ├── 07-p-tuning-train.py
    │   ├── 11-p-regression.py
    │   ├── 12-p-airpassengers-arima.py
    │   ├── 08-p-enet-tuning.py
    │   ├── 11-p-classification.py
    │   ├── 11-p-mnist-nnet.py
    │   ├── 11-p-mnist-cnn.py
    │   ├── 11-p-mnist-lenet.py
    │   ├── 08-p-enet-tuning2.py
    │   └── 11-p-mnist-lenet-miss.py
    ├── fig-r
    │   ├── 07-r-knn.pdf
    │   ├── 07-r-lm.pdf
    │   ├── 10-r-pr.pdf
    │   ├── 10-r-roc.pdf
    │   ├── 03-r-temp.pdf
    │   ├── 04-r-conf.pdf
    │   ├── 04-r-curve.pdf
    │   ├── 04-r-hist1.pdf
    │   ├── 04-r-hist2.pdf
    │   ├── 04-r-hist3.pdf
    │   ├── 04-r-iris.pdf
    │   ├── 04-r-mosaic.pdf
    │   ├── 04-r-rbinom.pdf
    │   ├── 04-r-rnorm.pdf
    │   ├── 04-r-runif.pdf
    │   ├── 07-r-plot.pdf
    │   ├── 07-r-tuning.pdf
    │   ├── 09-r-rpart.pdf
    │   ├── 09-r-rpart2.pdf
    │   ├── 09-r-varimp.pdf
    │   ├── 10-r-rpart1.pdf
    │   ├── 10-r-rpart2.pdf
    │   ├── 10-r-rpart3.pdf
    │   ├── 11-r-relu.pdf
    │   ├── 13-r-biplot.pdf
    │   ├── 13-r-elbow.pdf
    │   ├── 13-r-hclust.pdf
    │   ├── 13-r-kmeans.pdf
    │   ├── 04-r-boxplot.R
    │   ├── 04-r-boxplot.pdf
    │   ├── 04-r-curve.R
    │   ├── 04-r-ggplot-f.pdf
    │   ├── 04-r-hist1.R
    │   ├── 04-r-pvalue1.pdf
    │   ├── 04-r-scatter.pdf
    │   ├── 07-r-boxplot.pdf
    │   ├── 07-r-residual.pdf
    │   ├── 08-r-boxplot.pdf
    │   ├── 08-r-nnet-3-2.pdf
    │   ├── 08-r-sigmoid.pdf
    │   ├── 10-r-logistic.pdf
    │   ├── 11-r-h2o-wine.pdf
    │   ├── 13-r-hclust2.pdf
    │   ├── 13-r-heatmap.pdf
    │   ├── 04-r-boot-binom.pdf
    │   ├── 04-r-ggplot-box.pdf
    │   ├── 04-r-ggplot-hist.pdf
    │   ├── 04-r-iris-group.pdf
    │   ├── 07-r-polynomial.pdf
    │   ├── 07-r-regression.pdf
    │   ├── 08-r-enet-path.pdf
    │   ├── 08-r-enet-tuning.pdf
    │   ├── 10-r-titanic-roc.pdf
    │   ├── 11-r-mnist-cnn.pdf
    │   ├── 11-r-mnist-id5.pdf
    │   ├── 11-r-mnist-lenet.pdf
    │   ├── 11-r-mnist-nnet.pdf
    │   ├── 11-r-regression.pdf
    │   ├── 04-r-ggplot-mosaic.pdf
    │   ├── 04-r-ggplot-point.pdf
    │   ├── 04-r-random-sample.pdf
    │   ├── 07-r-tuning-train.pdf
    │   ├── 08-r-enet-tuning2.pdf
    │   ├── 10-r-titanic-tree.pdf
    │   ├── 13-r-pca-clusters.pdf
    │   ├── 08-r-boxplot-scaled.pdf
    │   ├── 08-r-sigmoid.R
    │   ├── 10-r-logistic.R
    │   ├── 11-r-classification.pdf
    │   ├── 11-r-mnist-lenet-miss.pdf
    │   ├── 12-r-airpassengers-lm.pdf
    │   ├── 11-r-relu.R
    │   ├── 12-r-airpassengers-arima.pdf
    │   ├── 12-r-airpassengers-prophet.pdf
    │   ├── 12-r-airpassengers-split.pdf
    │   ├── 04-r-hist2.R
    │   ├── 04-r-scatter.R
    │   ├── 04-r-runif.R
    │   ├── 13-r-elbow.R
    │   ├── 04-r-hist3.R
    │   ├── 07-r-plot.R
    │   ├── 04-r-rnorm.R
    │   ├── 04-r-ggplot-point.R
    │   ├── 04-r-ggplot-f.R
    │   ├── 04-r-random-sample.R
    │   ├── 11-r-mnist-id5.R
    │   ├── 09-r-rpart.R
    │   ├── 09-r-varimp.R
    │   ├── 04-r-mosaic.R
    │   ├── 04-r-rbinom.R
    │   ├── 04-r-boot-binom.R
    │   ├── 04-r-ggplot-box.R
    │   ├── 04-r-ggplot-hist.R
    │   ├── 04-r-iris.R
    │   ├── 04-r-ggplot-mosaic.R
    │   ├── 07-r-tuning.R
    │   ├── 13-r-hclust.R
    │   ├── Makefile
    │   ├── 13-r-kmeans.R
    │   ├── 13-r-hclust2.R
    │   ├── 07-r-residual.R
    │   ├── 03-r-temp.R
    │   ├── 08-r-boxplot.R
    │   ├── 10-r-pr.R
    │   ├── 07-r-regression.R
    │   ├── 10-r-roc.R
    │   ├── 13-r-heatmap.R
    │   ├── 10-r-rpart3.R
    │   ├── 10-r-titanic-tree.R
    │   ├── 13-r-biplot.R
    │   ├── 07-r-lm.R
    │   ├── 08-r-boxplot-scaled.R
    │   ├── 09-r-rpart2.R
    │   ├── 07-r-knn.R
    │   ├── 08-r-enet-path.R
    │   ├── 13-r-pca-clusters.R
    │   ├── 04-r-conf.R
    │   ├── 07-r-boxplot.R
    │   ├── 04-r-iris-group.R
    │   ├── 10-r-rpart1.R
    │   ├── 11-r-h2o-wine.R
    │   ├── 12-r-airpassengers-split.R
    │   ├── 07-r-polynomial.R
    │   ├── 04-r-pvalue1.R
    │   ├── 10-r-rpart2.R
    │   ├── 08-r-nnet-3-2.R
    │   ├── 08-r-enet-tuning.R
    │   ├── 07-r-tuning-train.R
    │   ├── 10-r-titanic-roc.R
    │   ├── 12-r-airpassengers-arima.R
    │   ├── 12-r-airpassengers-prophet.R
    │   ├── 12-r-airpassengers-lm.R
    │   ├── 11-r-classification.R
    │   ├── 11-r-regression.R
    │   ├── 11-r-mnist-nnet.R
    │   ├── 11-r-mnist-id5.svg
    │   ├── 11-r-mnist-cnn.R
    │   ├── 11-r-mnist-lenet.R
    │   ├── 08-r-enet-tuning2.R
    │   └── 11-r-mnist-lenet-miss.R
    └── howtomake.md
├── data
    ├── exam.csv
    ├── exam.json
    ├── exam.xml
    └── wine.csv
├── docker
    ├── rstudio.sh
    ├── jupyter.sh
    ├── rstudio
    │   ├── README.md
    │   └── Dockerfile
    └── jupyter
    │   ├── README.md
    │   └── Dockerfile
├── addendum
    ├── 07.03.02
    │   ├── confidence_band_p.py
    │   ├── confidence_band_r.R
    │   ├── 1+3x+N(0,2x).csv
    │   └── README.md
    └── sagemaker
    │   ├── README.md
    │   └── sage-python.yml
├── docs
    └── exam.html
├── code
    ├── R-notebook
    │   ├── r-06.ipynb
    │   ├── README.md
    │   └── r-12.ipynb
    └── Python-notebook
    │   ├── README.md
    │   ├── python-06.ipynb
    │   ├── python-12.ipynb
    │   └── python-05.ipynb
├── README.md
└── update.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/*
2 | tmp*
3 | 


--------------------------------------------------------------------------------
/figures/fig/図1.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1.2.pdf


--------------------------------------------------------------------------------
/figures/fig/図1.4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1.4.pdf


--------------------------------------------------------------------------------
/figures/fig/図10.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図10.1.pdf


--------------------------------------------------------------------------------
/figures/fig/図10.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図10.2.pdf


--------------------------------------------------------------------------------
/figures/fig/図11.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.1.pdf


--------------------------------------------------------------------------------
/figures/fig/図11.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.2.pdf


--------------------------------------------------------------------------------
/figures/fig/図11.3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.3.pdf


--------------------------------------------------------------------------------
/figures/fig/図11.4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.4.pdf


--------------------------------------------------------------------------------
/figures/fig/図11.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.5.png


--------------------------------------------------------------------------------
/figures/fig/図2.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.2.pdf


--------------------------------------------------------------------------------
/figures/fig/図2.3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.3.pdf


--------------------------------------------------------------------------------
/figures/fig/図2.4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.4.png


--------------------------------------------------------------------------------
/figures/fig/図2.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.5.png


--------------------------------------------------------------------------------
/figures/fig/図2.6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.6.png


--------------------------------------------------------------------------------
/figures/fig/図2.7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.7.png


--------------------------------------------------------------------------------
/figures/fig/図3.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図3.1.pdf


--------------------------------------------------------------------------------
/figures/fig/図6.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図6.1.pdf


--------------------------------------------------------------------------------
/figures/fig/図7.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図7.1.pdf


--------------------------------------------------------------------------------
/figures/fig/図7.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図7.2.pdf


--------------------------------------------------------------------------------
/figures/fig/図7.3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図7.3.pdf


--------------------------------------------------------------------------------
/figures/fig/図8.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図8.1.pdf


--------------------------------------------------------------------------------
/figures/fig/図A.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図A.1.png


--------------------------------------------------------------------------------
/figures/fig/表11.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/表11.1.pdf


--------------------------------------------------------------------------------
/figures/fig/表11.b.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/表11.b.pdf


--------------------------------------------------------------------------------
/data/exam.csv:
--------------------------------------------------------------------------------
1 | name,english,math,gender
2 | A,60,70,f
3 | B,90,80,m
4 | C,70,90,m
5 | D,90,100,f
6 | 


--------------------------------------------------------------------------------
/figures/fig/図1-last.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1-last.pdf


--------------------------------------------------------------------------------
/figures/fig/図1.1.a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1.1.a.png


--------------------------------------------------------------------------------
/figures/fig/図1.1.b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1.1.b.png


--------------------------------------------------------------------------------
/figures/fig/図2.1.a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.1.a.png


--------------------------------------------------------------------------------
/figures/fig/図2.1.b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.1.b.png


--------------------------------------------------------------------------------
/figures/fig/図2.8.a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.8.a.png


--------------------------------------------------------------------------------
/figures/fig/図2.8.b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.8.b.png


--------------------------------------------------------------------------------
/figures/fig/図2.9.a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.9.a.png


--------------------------------------------------------------------------------
/figures/fig/図2.9.b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.9.b.png


--------------------------------------------------------------------------------
/figures/fig-p/07-p-knn.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-knn.pdf


--------------------------------------------------------------------------------
/figures/fig-p/07-p-lm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-lm.pdf


--------------------------------------------------------------------------------
/figures/fig-p/10-p-pr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-pr.pdf


--------------------------------------------------------------------------------
/figures/fig-p/10-p-roc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-roc.pdf


--------------------------------------------------------------------------------
/figures/fig-r/07-r-knn.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-knn.pdf


--------------------------------------------------------------------------------
/figures/fig-r/07-r-lm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-lm.pdf


--------------------------------------------------------------------------------
/figures/fig-r/10-r-pr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-pr.pdf


--------------------------------------------------------------------------------
/figures/fig-r/10-r-roc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-roc.pdf


--------------------------------------------------------------------------------
/figures/fig-p/03-p-temp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/03-p-temp.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-conf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-conf.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-curve.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-curve.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-hist1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-hist1.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-hist2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-hist2.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-hist3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-hist3.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-iris.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-iris.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-mosaic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-mosaic.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-rbinom.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-rbinom.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-rnorm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-rnorm.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-runif.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-runif.pdf


--------------------------------------------------------------------------------
/figures/fig-p/07-p-plot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-plot.pdf


--------------------------------------------------------------------------------
/figures/fig-p/07-p-tuning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-tuning.pdf


--------------------------------------------------------------------------------
/figures/fig-p/09-p-rpart.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/09-p-rpart.pdf


--------------------------------------------------------------------------------
/figures/fig-p/09-p-rpart2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/09-p-rpart2.pdf


--------------------------------------------------------------------------------
/figures/fig-p/09-p-varimp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/09-p-varimp.pdf


--------------------------------------------------------------------------------
/figures/fig-p/11-p-relu.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-relu.pdf


--------------------------------------------------------------------------------
/figures/fig-p/13-p-biplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-biplot.pdf


--------------------------------------------------------------------------------
/figures/fig-p/13-p-elbow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-elbow.pdf


--------------------------------------------------------------------------------
/figures/fig-p/13-p-hclust.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-hclust.pdf


--------------------------------------------------------------------------------
/figures/fig-r/03-r-temp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/03-r-temp.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-conf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-conf.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-curve.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-curve.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-hist1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-hist1.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-hist2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-hist2.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-hist3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-hist3.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-iris.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-iris.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-mosaic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-mosaic.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-rbinom.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-rbinom.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-rnorm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-rnorm.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-runif.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-runif.pdf


--------------------------------------------------------------------------------
/figures/fig-r/07-r-plot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-plot.pdf


--------------------------------------------------------------------------------
/figures/fig-r/07-r-tuning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-tuning.pdf


--------------------------------------------------------------------------------
/figures/fig-r/09-r-rpart.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/09-r-rpart.pdf


--------------------------------------------------------------------------------
/figures/fig-r/09-r-rpart2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/09-r-rpart2.pdf


--------------------------------------------------------------------------------
/figures/fig-r/09-r-varimp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/09-r-varimp.pdf


--------------------------------------------------------------------------------
/figures/fig-r/10-r-rpart1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-rpart1.pdf


--------------------------------------------------------------------------------
/figures/fig-r/10-r-rpart2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-rpart2.pdf


--------------------------------------------------------------------------------
/figures/fig-r/10-r-rpart3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-rpart3.pdf


--------------------------------------------------------------------------------
/figures/fig-r/11-r-relu.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-relu.pdf


--------------------------------------------------------------------------------
/figures/fig-r/13-r-biplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-biplot.pdf


--------------------------------------------------------------------------------
/figures/fig-r/13-r-elbow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-elbow.pdf


--------------------------------------------------------------------------------
/figures/fig-r/13-r-hclust.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-hclust.pdf


--------------------------------------------------------------------------------
/figures/fig-r/13-r-kmeans.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-kmeans.pdf


--------------------------------------------------------------------------------
/figures/fig/図1-preface.a.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1-preface.a.pdf


--------------------------------------------------------------------------------
/figures/fig/図1-preface.b.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1-preface.b.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-boxplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-boxplot.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-pvalue1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-pvalue1.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-scatter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-scatter.pdf


--------------------------------------------------------------------------------
/figures/fig-p/07-p-boxplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-boxplot.pdf


--------------------------------------------------------------------------------
/figures/fig-p/07-p-residual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-residual.pdf


--------------------------------------------------------------------------------
/figures/fig-p/08-p-boxplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-boxplot.pdf


--------------------------------------------------------------------------------
/figures/fig-p/08-p-sigmoid.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-sigmoid.pdf


--------------------------------------------------------------------------------
/figures/fig-p/10-p-logistic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-logistic.pdf


--------------------------------------------------------------------------------
/figures/fig-p/11-p-h2o-wine.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-h2o-wine.pdf


--------------------------------------------------------------------------------
/figures/fig-p/13-p-heatmap.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-heatmap.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-boxplot.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-boxplot.pdf", width = 6, height = 5.5)
2 | 
3 | boxplot(iris[, -5])
4 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-boxplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-boxplot.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-curve.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-curve.pdf", width = 6, height = 5.5)
2 | 
3 | curve(x^3 - x, -2, 2)
4 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-f.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-f.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-hist1.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-hist1.pdf", width = 6, height = 5.5)
2 | 
3 | hist(iris$Sepal.Length)
4 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-pvalue1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-pvalue1.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-scatter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-scatter.pdf


--------------------------------------------------------------------------------
/figures/fig-r/07-r-boxplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-boxplot.pdf


--------------------------------------------------------------------------------
/figures/fig-r/07-r-residual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-residual.pdf


--------------------------------------------------------------------------------
/figures/fig-r/08-r-boxplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-boxplot.pdf


--------------------------------------------------------------------------------
/figures/fig-r/08-r-nnet-3-2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-nnet-3-2.pdf


--------------------------------------------------------------------------------
/figures/fig-r/08-r-sigmoid.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-sigmoid.pdf


--------------------------------------------------------------------------------
/figures/fig-r/10-r-logistic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-logistic.pdf


--------------------------------------------------------------------------------
/figures/fig-r/11-r-h2o-wine.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-h2o-wine.pdf


--------------------------------------------------------------------------------
/figures/fig-r/13-r-hclust2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-hclust2.pdf


--------------------------------------------------------------------------------
/figures/fig-r/13-r-heatmap.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-heatmap.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-boot-binom.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-boot-binom.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-iris-group.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-iris-group.pdf


--------------------------------------------------------------------------------
/figures/fig-p/07-p-polynomial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-polynomial.pdf


--------------------------------------------------------------------------------
/figures/fig-p/07-p-regression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-regression.pdf


--------------------------------------------------------------------------------
/figures/fig-p/08-p-enet-path.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-enet-path.pdf


--------------------------------------------------------------------------------
/figures/fig-p/08-p-enet-tuning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-enet-tuning.pdf


--------------------------------------------------------------------------------
/figures/fig-p/10-p-titanic-roc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-titanic-roc.pdf


--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-cnn.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-cnn.pdf


--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-id5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-id5.pdf


--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-lenet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-lenet.pdf


--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-nnet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-nnet.pdf


--------------------------------------------------------------------------------
/figures/fig-p/11-p-regression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-regression.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-boot-binom.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-boot-binom.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-box.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-box.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-hist.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-hist.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-iris-group.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-iris-group.pdf


--------------------------------------------------------------------------------
/figures/fig-r/07-r-polynomial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-polynomial.pdf


--------------------------------------------------------------------------------
/figures/fig-r/07-r-regression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-regression.pdf


--------------------------------------------------------------------------------
/figures/fig-r/08-r-enet-path.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-enet-path.pdf


--------------------------------------------------------------------------------
/figures/fig-r/08-r-enet-tuning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-enet-tuning.pdf


--------------------------------------------------------------------------------
/figures/fig-r/10-r-titanic-roc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-titanic-roc.pdf


--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-cnn.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-cnn.pdf


--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-id5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-id5.pdf


--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-lenet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-lenet.pdf


--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-nnet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-nnet.pdf


--------------------------------------------------------------------------------
/figures/fig-r/11-r-regression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-regression.pdf


--------------------------------------------------------------------------------
/figures/fig-p/04-p-random-sample.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-random-sample.pdf


--------------------------------------------------------------------------------
/figures/fig-p/07-p-tuning-train.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-tuning-train.pdf


--------------------------------------------------------------------------------
/figures/fig-p/08-p-enet-tuning2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-enet-tuning2.pdf


--------------------------------------------------------------------------------
/figures/fig-p/10-p-titanic-tree.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-titanic-tree.pdf


--------------------------------------------------------------------------------
/figures/fig-p/13-p-pca-clusters.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-pca-clusters.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-mosaic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-mosaic.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-point.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-point.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-random-sample.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-random-sample.pdf


--------------------------------------------------------------------------------
/figures/fig-r/07-r-tuning-train.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-tuning-train.pdf


--------------------------------------------------------------------------------
/figures/fig-r/08-r-enet-tuning2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-enet-tuning2.pdf


--------------------------------------------------------------------------------
/figures/fig-r/10-r-titanic-tree.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-titanic-tree.pdf


--------------------------------------------------------------------------------
/figures/fig-r/13-r-pca-clusters.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-pca-clusters.pdf


--------------------------------------------------------------------------------
/figures/fig-p/08-p-boxplot-scaled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-boxplot-scaled.pdf


--------------------------------------------------------------------------------
/figures/fig-p/11-p-classification.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-classification.pdf


--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-lenet-miss.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-lenet-miss.pdf


--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-lm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/12-p-airpassengers-lm.pdf


--------------------------------------------------------------------------------
/figures/fig-r/08-r-boxplot-scaled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-boxplot-scaled.pdf


--------------------------------------------------------------------------------
/figures/fig-r/08-r-sigmoid.R:
--------------------------------------------------------------------------------
1 | pdf(file = "08-r-sigmoid.pdf", width = 6, height = 5.5)
2 | 
3 | curve(1 / (1 + exp(-x)), -6, 6)
4 | 


--------------------------------------------------------------------------------
/figures/fig-r/10-r-logistic.R:
--------------------------------------------------------------------------------
1 | pdf(file = "10-r-logistic.pdf", width = 6, height = 5.5)
2 | 
3 | curve(1 / (1 + exp(-x)), -6, 6)
4 | 


--------------------------------------------------------------------------------
/figures/fig-r/11-r-classification.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-classification.pdf


--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-lenet-miss.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-lenet-miss.pdf


--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-lm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/12-r-airpassengers-lm.pdf


--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-arima.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/12-p-airpassengers-arima.pdf


--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-prophet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/12-p-airpassengers-prophet.pdf


--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-split.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/12-p-airpassengers-split.pdf


--------------------------------------------------------------------------------
/figures/fig-r/11-r-relu.R:
--------------------------------------------------------------------------------
1 | pdf(file = "11-r-relu.pdf", width = 6, height = 5.5)
2 | 
3 | library(keras)
4 | curve(activation_relu(x), -3, 3)
5 | 


--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-arima.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/12-r-airpassengers-arima.pdf


--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-prophet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/12-r-airpassengers-prophet.pdf


--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-split.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/12-r-airpassengers-split.pdf


--------------------------------------------------------------------------------
/figures/fig-r/04-r-hist2.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-hist2.pdf", width = 6, height = 5.5)
2 | 
3 | x <- c(10, 20, 30)
4 | hist(x, breaks = 2) # 階級数は2
5 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-scatter.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-scatter.pdf", width = 6, height = 5.5)
2 | 
3 | plot(iris$Sepal.Length,
4 |      iris$Sepal.Width)
5 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-runif.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | 
4 | x = np.random.random(1000)
5 | plt.hist(x)
6 | 
7 | plt.savefig('04-p-runif.pdf')
8 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-runif.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-runif.pdf", width = 6, height = 5)
2 | 
3 | x <- runif(min = 0,  # 最小
4 |            max = 1,  # 最大
5 |            n = 1000) # 乱数の数
6 | hist(x)
7 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-curve.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | 
4 | x = np.linspace(-2, 2, 100)
5 | y = x**3 - x
6 | plt.plot(x, y)
7 | 
8 | plt.savefig('04-p-curve.pdf')
9 | 


--------------------------------------------------------------------------------
/figures/fig-p/10-p-logistic.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | x = np.arange(-6, 6, 0.1)
3 | y = 1 / (1 + np.exp(-x))
4 | import matplotlib.pyplot as plt
5 | plt.plot(x, y)
6 | plt.savefig('10-p-logistic.pdf')
7 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-hist2.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | 
3 | my_df = pd.DataFrame({'x': [10, 20, 30]})
4 | my_df.hist('x', bins=2) # 階級数は2
5 | 
6 | import matplotlib.pyplot as plt
7 | plt.savefig('04-p-hist2.pdf')
8 | 


--------------------------------------------------------------------------------
/figures/fig-p/08-p-sigmoid.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | 
4 | x = np.linspace(-6, 6, 100)
5 | y = 1 / (1 + np.exp(-x))
6 | plt.plot(x, y)
7 | plt.savefig('08-p-sigmoid.pdf')
8 | 


--------------------------------------------------------------------------------
/figures/fig-r/13-r-elbow.R:
--------------------------------------------------------------------------------
1 | pdf(file = "13-r-elbow.pdf", width = 6, height = 4.5)
2 | 
3 | library(tidyverse)
4 | library(factoextra)
5 | 
6 | my_data <- iris[, -5]
7 | fviz_nbclust(my_data, kmeans, method = "wss")
8 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-hist3.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-hist3.pdf", width = 6, height = 5.5)
2 | 
3 | x <- iris$Sepal.Length
4 | tmp <- seq(min(x), max(x),
5 |            length.out = 10)
6 | hist(x, breaks = tmp, right = FALSE)
7 | 


--------------------------------------------------------------------------------
/figures/fig-r/07-r-plot.R:
--------------------------------------------------------------------------------
1 | pdf(file = "07-r-plot.pdf", width = 6, height = 4.5)
2 | 
3 | library(tidyverse)
4 | my_data <- cars
5 | 
6 | my_data %>%
7 |   ggplot(aes(x = speed, y = dist)) +
8 |   geom_point()
9 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-boxplot.py:
--------------------------------------------------------------------------------
1 | import statsmodels.api as sm
2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
3 | 
4 | iris.boxplot()
5 | 
6 | import matplotlib.pyplot as plt
7 | plt.savefig('04-p-boxplot.pdf')
8 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-rnorm.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-rnorm.pdf", width = 6, height = 5)
2 | 
3 | r <- 10000
4 | x <- rnorm(mean = 50, # 平均
5 |            sd = 5,    # 標準偏差
6 |            n = r)     # 乱数の数
7 | hist(x, breaks = 40)
8 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-hist1.py:
--------------------------------------------------------------------------------
1 | import statsmodels.api as sm
2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
3 | 
4 | iris.hist('Sepal.Length')
5 | 
6 | import matplotlib.pyplot as plt
7 | plt.savefig('04-p-hist1.pdf')
8 | 


--------------------------------------------------------------------------------
/figures/fig-p/07-p-plot.py:
--------------------------------------------------------------------------------
1 | import statsmodels.api as sm
2 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
3 | my_data.plot(x='speed', style='o')
4 | import matplotlib.pyplot as plt
5 | plt.savefig('07-p-plot.pdf')
6 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-point.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-ggplot-point.pdf", width = 6, height = 4)
2 | 
3 | library(tidyverse)
4 | 
5 | iris %>%
6 |   ggplot(aes(x = Sepal.Length,
7 |              y = Sepal.Width)) +
8 |   geom_point()
9 | 


--------------------------------------------------------------------------------
/docker/rstudio.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | docker run \
 3 | -d \
 4 | -e PASSWORD=password \
 5 | -e ROOT=TRUE \
 6 | -p 8787:8787 \
 7 | -v "$(pwd):/home/rstudio/work" \
 8 | --platform linux/x86_64 \
 9 | --name rs \
10 | taroyabuki/rstudio
11 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-f.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-ggplot-f.pdf", width = 6, height = 4)
2 | 
3 | library(tidyverse)
4 | 
5 | f <- function(x) { x^3 - x }
6 | data.frame(x = c(-2, 2)) %>%
7 |   ggplot(aes(x = x)) +
8 |   stat_function(fun = f)
9 | 


--------------------------------------------------------------------------------
/docker/jupyter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | docker run \
 3 | -d \
 4 | -p 8888:8888 \
 5 | -v "$(pwd):/home/jovyan/work" \
 6 | --platform linux/x86_64 \
 7 | --name jr \
 8 | taroyabuki/jupyter \
 9 | start-notebook.sh \
10 | --NotebookApp.token='password'
11 | 


--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-id5.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
3 | 
4 | import matplotlib.pyplot as plt
5 | plt.matshow(x_train[4, :, :])
6 | plt.savefig('11-p-mnist-id5.pdf')
7 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-random-sample.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-random-sample.pdf", width = 6, height = 5)
2 | 
3 | x <- sample(x = 1:6,        # 範囲
4 |             size = 10000,   # 乱数の数
5 |             replace = TRUE) # 重複あり
6 | hist(x, breaks = 0:6) # ヒストグラム
7 | 


--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-id5.R:
--------------------------------------------------------------------------------
1 | pdf(file = "11-r-mnist-id5.pdf", width = 5.83, height = 4.13)
2 | 
3 | library(keras)
4 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
5 | 
6 | plot(as.raster(x = x_train[5, , ], max = max(x_train)))
7 | 


--------------------------------------------------------------------------------
/figures/fig-r/09-r-rpart.R:
--------------------------------------------------------------------------------
1 | pdf(file = "09-r-rpart.pdf", width = 5.83, height = 4.13)
2 | 
3 | library(caret)
4 | my_data <- iris
5 | my_model <- train(form = Species ~ ., data = my_data, method = "rpart2")
6 | rpart.plot::rpart.plot(my_model$finalModel, extra = 1)
7 | 


--------------------------------------------------------------------------------
/figures/fig-r/09-r-varimp.R:
--------------------------------------------------------------------------------
1 | pdf(file = "09-r-varimp.pdf", width = 5.83, height = 4.13)
2 | 
3 | library(caret)
4 | library(tidyverse)
5 | my_data <- iris
6 | 
7 | my_model <- train(form = Species ~ ., data = my_data, method = "rf")
8 | ggplot(varImp(my_model))
9 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-mosaic.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "04-r-mosaic.pdf", width = 6, height = 4.5)
 2 | 
 3 | my_df <- data.frame(
 4 |   Species = iris$Species,
 5 |   w_Sepal = iris$Sepal.Width > 3)
 6 | 
 7 | mosaicplot(
 8 |   formula = ~ Species + w_Sepal,
 9 |   data = my_df)
10 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-rnorm.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | r = 10000
 5 | x = np.random.normal(
 6 |     loc=50,  # 平均
 7 |     scale=5, # 標準偏差
 8 |     size=r)  # 乱数の数
 9 | plt.hist(x, bins=40)
10 | 
11 | plt.savefig('04-p-rnorm.pdf')
12 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-rbinom.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "04-r-rbinom.pdf", width = 6, height = 5)
 2 | 
 3 | n <- 100
 4 | p <- 0.5
 5 | r <- 10000
 6 | x <- rbinom(size = n, # 試行回数
 7 |             prob = p, # 確率
 8 |             n = r)    # 乱数の数
 9 | hist(x, breaks = max(x) - min(x))
10 | 


--------------------------------------------------------------------------------
/figures/fig-p/11-p-relu.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | from keras import activations
 4 | 
 5 | x = np.linspace(-3, 3, 100)
 6 | plt.plot(x, activations.relu(x))
 7 | plt.xlabel('x')
 8 | plt.ylabel('ReLU(x)')
 9 | plt.savefig('11-p-relu.pdf')
10 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-boot-binom.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-boot-binom.pdf", width = 6, height = 5)
2 | 
3 | X <- rep(0:1, c(13, 2))
4 | n <- 10^5
5 | result <- replicate(n, sum(sample(X, size = length(X), replace = TRUE)))
6 | hist(x = result,
7 |      breaks = 0:15,
8 |      right = FALSE)
9 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-scatter.py:
--------------------------------------------------------------------------------
 1 | import statsmodels.api as sm
 2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
 3 | 
 4 | iris.plot('Sepal.Length',
 5 |           'Sepal.Width',
 6 |           kind='scatter')
 7 | 
 8 | import matplotlib.pyplot as plt
 9 | plt.savefig('04-p-scatter.pdf')
10 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-random-sample.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | x = np.random.choice(
 5 |     a=range(1, 7),  # 1から6
 6 |     size=10000,     # 乱数の数
 7 |     replace=True)   # 重複あり
 8 | plt.hist(x, bins=6) # ヒストグラム
 9 | 
10 | plt.savefig('04-p-random-sample.pdf')
11 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-rbinom.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | n = 100
 5 | p = 0.5
 6 | r = 10000
 7 | x = np.random.binomial(
 8 |     n=n,    # 試行回数
 9 |     p=p,    # 確率
10 |     size=r) # 乱数の数
11 | plt.hist(x, bins=max(x) - min(x))
12 | 
13 | plt.savefig('04-p-rbinom.pdf')
14 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-box.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "04-r-ggplot-box.pdf", width = 6, height = 4)
 2 | 
 3 | library(tidyverse)
 4 | 
 5 | iris %>%
 6 |   pivot_longer(-Species) %>%
 7 |   ggplot(aes(
 8 |     x = factor(name,
 9 |                levels = names(iris)),
10 |     y = value)) +
11 |   geom_boxplot() +
12 |   xlab(NULL)
13 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-boot-binom.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | X = [0] * 13 + [1] * 2 # 手順1
 3 | n = 10**5
 4 | result = [sum(np.random.choice(X, len(X), replace=True)) # 手順4
 5 |           for _ in range(n)]
 6 | 
 7 | import matplotlib.pyplot as plt
 8 | plt.hist(result,
 9 |          bins=range(0, 16))
10 | plt.savefig('04-p-boot-binom.pdf')
11 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-hist.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "04-r-ggplot-hist.pdf", width = 6, height = 4)
 2 | 
 3 | library(tidyverse)
 4 | 
 5 | x <- iris$Sepal.Length
 6 | tmp <- seq(min(x), max(x),
 7 |            length.out = 10)
 8 | iris %>%
 9 |   ggplot(aes(x = Sepal.Length)) +
10 |   geom_histogram(breaks = tmp,
11 |                  closed = "left")
12 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-iris.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "04-r-iris.pdf", width = 6, height = 4)
 2 | 
 3 | library(tidyverse)
 4 | my_df <- psych::describe(iris[, -5])
 5 | 
 6 | tmp <- rownames(my_df)
 7 | my_df %>% ggplot(aes(x = factor(tmp, levels = tmp), y = mean)) +
 8 |   geom_col() +
 9 |   geom_errorbar(aes(ymin = mean - se, ymax = mean + se)) +
10 |   xlab(NULL)
11 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-hist3.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import statsmodels.api as sm
 3 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
 4 | 
 5 | x = iris['Sepal.Length']
 6 | tmp = np.linspace(min(x), max(x), 10)
 7 | iris.hist('Sepal.Length',
 8 |           bins=tmp.round(2))
 9 | 
10 | import matplotlib.pyplot as plt
11 | plt.savefig('04-p-hist3.pdf')
12 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-mosaic.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "04-r-ggplot-mosaic.pdf", width = 6, height = 4)
 2 | 
 3 | library(tidyverse)
 4 | 
 5 | library(ggmosaic)
 6 | my_df <- data.frame(
 7 |   Species = iris$Species,
 8 |   w_Sepal = iris$Sepal.Width > 3)
 9 | my_df %>%
10 |   ggplot() +
11 |   geom_mosaic(
12 |     aes(x = product(w_Sepal,
13 |                     Species)))
14 | 


--------------------------------------------------------------------------------
/addendum/07.03.02/confidence_band_p.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import seaborn as sns
 3 | 
 4 | data = pd.read_csv('1+3x+N(0,2x).csv')
 5 | x = data.x
 6 | y = data.y
 7 | n = len(x)
 8 | 
 9 | alpha = 0.99
10 | n_boot = 10000
11 | 
12 | sns.regplot(x=x, y=y, ci=100 * alpha, n_boot=n_boot)
13 | 
14 | import matplotlib.pyplot as plt
15 | plt.savefig('confidence_band_p.pdf')
16 | 


--------------------------------------------------------------------------------
/figures/fig-p/Makefile:
--------------------------------------------------------------------------------
 1 | SRC=$(wildcard *.py)
 2 | 
 3 | PDF=$(SRC:.py=.pdf)
 4 | 
 5 | all: $(PDF)
 6 | 
 7 | .SUFFIXES: .pdf .py
 8 | 
 9 | .py.pdf:
10 | 	if python3 $<; then\
11 | 		if [ -f /usr/bin/pdfcrop ]; then\
12 | 	   		pdfcrop $@;\
13 | 			rm $@;\
14 | 			mv $(basename $@)-crop.pdf $@;\
15 | 		fi;\
16 | 	else exit 1;\
17 | 	fi
18 | 
19 | clean:
20 | 	rm -f *.pdf *.log
21 | 


--------------------------------------------------------------------------------
/figures/fig-p/08-p-boxplot.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | my_url = ('https://raw.githubusercontent.com/taroyabuki/' +
 4 |           'fromzero/master/data/wine.csv')
 5 | my_data = pd.read_csv(my_url)
 6 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2']
 7 | 
 8 | X.boxplot(showmeans=True)
 9 | 
10 | import matplotlib.pyplot as plt
11 | plt.savefig('08-p-boxplot.pdf')
12 | 


--------------------------------------------------------------------------------
/figures/fig/図1-last.md:
--------------------------------------------------------------------------------
 1 | ```puml
 2 | @startuml
 3 | scale 0.8
 4 | skinparam {
 5 |   defaultFontName Hiragino Kaku Gothic ProN
 6 |   monochrome true
 7 |   shadowing false
 8 | }
 9 | 
10 | (リファレンス)
11 | (本書)-->(プログラミング入門)
12 | (本書)-->(データサイエンス入門)
13 | (本書)-->(統計学)
14 | プログラミング入門-->(言語についての高度な話題)
15 | データサイエンス入門-->(データサイエンスの理論と実践)
16 | 統計学-->(統計学の実践)
17 | @enduml
18 | ```
19 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-iris.py:
--------------------------------------------------------------------------------
 1 | import statsmodels.api as sm
 2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
 3 | 
 4 | my_df = iris.describe().transpose()[['mean', 'std']]
 5 | my_df['se'] = my_df['std'] / len(iris)**0.5
 6 | 
 7 | my_df.plot(y='mean', kind='bar', yerr='se', capsize=10)
 8 | 
 9 | import matplotlib.pyplot as plt
10 | plt.tight_layout()
11 | plt.savefig('04-p-iris.pdf')
12 | 


--------------------------------------------------------------------------------
/figures/fig-r/07-r-tuning.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "07-r-tuning.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | set.seed(0)
 4 | 
 5 | library(caret)
 6 | library(tidyverse)
 7 | my_data <- cars
 8 | my_model <- train(form = dist ~ speed, data = my_data, method = "knn",
 9 |                   tuneGrid = expand.grid(k = 1:15),
10 |                   trControl = trainControl(method = "LOOCV"))
11 | ggplot(my_model)
12 | 


--------------------------------------------------------------------------------
/figures/fig-r/13-r-hclust.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "13-r-hclust.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | my_data <- data.frame(
 4 |   x = c(  0, -16,  10,  10),
 5 |   y = c(  0,   0,  10, -15),
 6 |   row.names = c("A", "B", "C", "D"))
 7 | 
 8 | my_dist <- dist(my_data)
 9 | my_result <- hclust(my_dist)
10 | 
11 | factoextra::fviz_dend(
12 |   my_result,
13 |   k = 3,
14 |   rect = T, rect_fill = T)
15 | 


--------------------------------------------------------------------------------
/figures/fig-r/Makefile:
--------------------------------------------------------------------------------
 1 | SRC=$(wildcard *.R)
 2 | 
 3 | PDF=$(SRC:.R=.pdf)
 4 | 
 5 | all: $(PDF)
 6 | 
 7 | .SUFFIXES: .pdf .R
 8 | 
 9 | .R.pdf:
10 | 	if Rscript $<; then\
11 | 		if [ -f /usr/bin/pdfcrop ]; then\
12 | 			pdfcrop $@;\
13 | 			rm $@;\
14 | 			mv $(basename $@)-crop.pdf $@;\
15 | 	  	fi;\
16 | 	else\
17 | 		rm -f $@;\
18 | 		exit 1;\
19 | 	fi
20 | 
21 | clean:
22 | 	rm -f *.pdf *.log
23 | 


--------------------------------------------------------------------------------
/addendum/07.03.02/confidence_band_r.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "confidence_band_r.pdf", width = 6, height = 4.5)
 2 | 
 3 | library(boot)
 4 | library(tidyverse)
 5 | 
 6 | data <- read_csv("1+3x+N(0,2x).csv")
 7 | x <- data$x
 8 | y <- data$y
 9 | n <- nrow(data)
10 | 
11 | alpha <- 0.99
12 | data %>% ggplot(aes(x = x, y = y)) +
13 |   geom_point() +
14 |   stat_smooth(formula = y ~ x, method = "lm", level = alpha)
15 | 


--------------------------------------------------------------------------------
/docker/rstudio/README.md:
--------------------------------------------------------------------------------
1 | # RStudio用のコンテナ
2 | 
3 | - Docker Hub: https://hub.docker.com/r/taroyabuki/rstudio
4 | - 起動方法（3種類）
5 |     - [rstudio.sh](../rstudio.sh)を実行する．
6 |     - `wget https://raw.githubusercontent.com/taroyabuki/rp/master/docker/rstudio.sh`の後で，`sh rstudio.sh`
7 |     - `git clone https://github.com/taroyabuki/fromzero.git`の後で，`sh fromzero/docker/rstudio.sh`
8 | - RStudio Serverへのアクセス：http://localhost:8787
9 | 


--------------------------------------------------------------------------------
/figures/fig-r/13-r-kmeans.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "13-r-kmeans.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | library(tidyverse)
 4 | library(factoextra)
 5 | 
 6 | my_data <- iris[, -5]
 7 | 
 8 | f <- 2:5 %>% map(function(k) {
 9 |   my_data %>% kmeans(k) %>%
10 |     fviz_cluster(data = my_data, geom = "point") +
11 |     ggtitle(sprintf("k = %s", k))
12 | })
13 | gridExtra::grid.arrange(f[[1]], f[[2]], f[[3]], f[[4]], ncol = 2)
14 | 


--------------------------------------------------------------------------------
/figures/fig-r/13-r-hclust2.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "13-r-hclust2.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | my_data <- data.frame(
 4 |   x = c(  0, -16,  10,  10),
 5 |   y = c(  0,   0,  10, -15),
 6 |   row.names = c("A", "B", "C", "D"))
 7 | 
 8 | my_dist <- dist(my_data)
 9 | my_result <- hclust(my_dist)
10 | 
11 | factoextra::fviz_dend(
12 |   my_result,
13 |   k = 3,
14 |   rect = T, rect_fill = T,
15 |   type = "phylogenic")
16 | 


--------------------------------------------------------------------------------
/figures/fig-p/13-p-hclust.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | my_data = pd.DataFrame(
 3 |     {'x': [0, -16, 10, 10],
 4 |      'y': [0, 0, 10, -15]},
 5 |     index=['A', 'B', 'C', 'D'])
 6 | 
 7 | from scipy.cluster import hierarchy
 8 | my_result = hierarchy.linkage(my_data, metric='euclidean', method='complete')
 9 | hierarchy.dendrogram(my_result, labels=my_data.index)
10 | 
11 | import matplotlib.pyplot as plt
12 | plt.savefig('13-p-hclust.pdf')
13 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-mosaic.py:
--------------------------------------------------------------------------------
 1 | import statsmodels.api as sm
 2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
 3 | 
 4 | import pandas as pd
 5 | from statsmodels.graphics.mosaicplot \
 6 |     import mosaic
 7 | 
 8 | my_df = pd.DataFrame({
 9 |     'Species': iris.Species,
10 |     'w_Sepal': iris['Sepal.Width'] > 3})
11 | mosaic(my_df,
12 |        index=['Species', 'w_Sepal'])
13 | 
14 | import matplotlib.pyplot as plt
15 | plt.savefig('04-p-mosaic.pdf')
16 | 


--------------------------------------------------------------------------------
/figures/fig-p/07-p-regression.py:
--------------------------------------------------------------------------------
 1 | import seaborn as sns
 2 | import statsmodels.api as sm
 3 | 
 4 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
 5 | ax = sns.regplot(x='speed', y='dist', data=my_data)
 6 | ax.vlines(x=21.5, ymin=-5, ymax=67,   linestyles='dotted')
 7 | ax.hlines(y=67,   xmin=4,  xmax=21.5, linestyles='dotted')
 8 | ax.set_xlim(4, 25)
 9 | ax.set_ylim(-5, 125)
10 | 
11 | import matplotlib.pyplot as plt
12 | plt.savefig('07-p-regression.pdf')
13 | 


--------------------------------------------------------------------------------
/data/exam.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "name": "A",
 4 |     "english": 60,
 5 |     "math": 70,
 6 |     "gender": "f"
 7 |   },
 8 |   {
 9 |     "name": "B",
10 |     "english": 90,
11 |     "math": 80,
12 |     "gender": "m"
13 |   },
14 |   {
15 |     "name": "C",
16 |     "english": 70,
17 |     "math": 90,
18 |     "gender": "m"
19 |   },
20 |   {
21 |     "name": "D",
22 |     "english": 90,
23 |     "math": 100,
24 |     "gender": "f"
25 |   }
26 | ]


--------------------------------------------------------------------------------
/figures/fig-p/04-p-iris-group.py:
--------------------------------------------------------------------------------
 1 | import statsmodels.api as sm
 2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
 3 | 
 4 | my_group = iris.groupby('Species')                    # 品種ごとに，
 5 | my_df = my_group.agg('mean')                          # 各変数の，平均と
 6 | my_se = my_group.agg(lambda x: x.std() / len(x)**0.5) # 標準誤差を求める．
 7 | 
 8 | my_df.plot(kind='bar', yerr=my_se, capsize=5)
 9 | 
10 | import matplotlib.pyplot as plt
11 | plt.savefig('04-p-iris-group.pdf')
12 | 


--------------------------------------------------------------------------------
/figures/fig-p/13-p-elbow.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import statsmodels.api as sm
 3 | from sklearn.cluster import KMeans
 4 | 
 5 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
 6 | my_data = iris.iloc[:, 0:4]
 7 | 
 8 | k = range(1, 11)
 9 | my_df = pd.DataFrame({
10 |     'k': k,
11 |     'inertia': [KMeans(k).fit(my_data).inertia_ for k in range(1, 11)]})
12 | my_df.plot(x='k', style='o-', legend=False)
13 | 
14 | import matplotlib.pyplot as plt
15 | plt.savefig('13-p-elbow.pdf')
16 | 


--------------------------------------------------------------------------------
/figures/fig-p/09-p-varimp.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import statsmodels.api as sm
 3 | from sklearn.ensemble import RandomForestClassifier
 4 | 
 5 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
 6 | X, y = iris.iloc[:, 0:4], iris.Species
 7 | 
 8 | my_model = RandomForestClassifier().fit(X, y)
 9 | tmp = pd.Series(my_model.feature_importances_, index=X.columns)
10 | tmp.sort_values().plot(kind='barh')
11 | 
12 | import matplotlib.pyplot as plt
13 | plt.tight_layout()
14 | plt.savefig('09-p-varimp.pdf')
15 | 


--------------------------------------------------------------------------------
/figures/fig-r/07-r-residual.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "07-r-residual.pdf", width = 6, height = 4.5)
 2 | 
 3 | library(caret)
 4 | library(tidyverse)
 5 | my_data <- cars
 6 | my_model <- train(form = dist ~ speed, data = my_data, method = "lm")
 7 | y_ <- my_model %>% predict(my_data)
 8 | my_data$y_ <- y_
 9 | 
10 | my_data %>%
11 |   ggplot(aes(x = speed, y = dist)) +
12 |   geom_point() +
13 |   geom_line(aes(x = speed, y = y_)) +
14 |   geom_linerange(mapping = aes(ymin = y_, ymax = dist), linetype = "dotted")
15 | 


--------------------------------------------------------------------------------
/figures/fig-r/03-r-temp.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "03-r-temp.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | library(tidyverse)
 4 | 
 5 | my_wider <- data.frame(
 6 |   day = c(25, 26, 27),
 7 |   min = c(20, 21, 15),
 8 |   max = c(24, 27, 21))
 9 | 
10 | my_longer <- my_wider %>%
11 |   pivot_longer(-day)
12 | 
13 | my_longer %>%
14 |   ggplot(aes(x = day, y = value,
15 |              color = name)) +
16 |   geom_point() +
17 |   geom_line() +
18 |   ylab("temperature") +
19 |   scale_x_continuous(
20 |     breaks = my_longer$day)
21 | 


--------------------------------------------------------------------------------
/figures/fig-p/03-p-temp.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas as pd
 3 | 
 4 | my_df = pd.DataFrame({
 5 |     'day': [25, 26, 27],
 6 |     'min': [20, 21, 15],
 7 |     'max': [24, 27, 21]})
 8 | 
 9 | my_longer = my_df.melt(id_vars='day')
10 | 
11 | my_wider = my_longer.pivot(
12 |     index='day',
13 |     columns='variable',
14 |     values='value')
15 | 
16 | my_wider.plot(style='o-',
17 |               xticks=my_wider.index,
18 |               ylabel='temperature')
19 | 
20 | plt.savefig('03-p-temp.pdf')
21 | 


--------------------------------------------------------------------------------
/figures/fig-r/08-r-boxplot.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "08-r-boxplot.pdf", width = 6, height = 4.5)
 2 | 
 3 | library(tidyverse)
 4 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki",
 5 |                 "/fromzero/master/data/wine.csv")
 6 | my_data <- read_csv(my_url)
 7 | 
 8 | my_data %>%
 9 |   pivot_longer(-LPRICE2) %>%
10 |   ggplot(aes(x = factor(name, levels = names(my_data[, -1])),
11 |              y = value)) +
12 |   geom_boxplot() +
13 |   stat_summary(fun = mean, geom = "point", size = 3) +
14 |   xlab(NULL)
15 | 


--------------------------------------------------------------------------------
/figures/fig-p/08-p-boxplot-scaled.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.preprocessing import StandardScaler
 3 | 
 4 | my_url = ('https://raw.githubusercontent.com/taroyabuki/' +
 5 |           'fromzero/master/data/wine.csv')
 6 | my_data = pd.read_csv(my_url)
 7 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2']
 8 | 
 9 | pd.DataFrame(StandardScaler().fit_transform(X),
10 |              columns=X.columns).boxplot(showmeans=True)
11 | 
12 | import matplotlib.pyplot as plt
13 | plt.savefig('08-p-boxplot-scaled.pdf')
14 | 


--------------------------------------------------------------------------------
/figures/fig-r/10-r-pr.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "10-r-pr.pdf", width = 6, height = 5)
 2 | 
 3 | library(PRROC)
 4 | library(tidyverse)
 5 | 
 6 | y       <- c(  0,   1,   1,   0,   1,   0,    1,   0,   0,   1)
 7 | y_score <- c(0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5)
 8 | 
 9 | my_pr <- pr.curve(scores.class0 = y_score[y == 1],
10 |                     scores.class1 = y_score[y == 0],
11 |                     curve = TRUE)
12 | my_pr %>% plot(xlab = "Recall",
13 |                ylab = "Precision",
14 |                legend = FALSE)
15 | 


--------------------------------------------------------------------------------
/figures/fig-p/10-p-roc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | y       = np.array([  0,   1,   1,   0,   1,   0,    1,   0,   0,   1])
 3 | y_score = np.array([0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5])
 4 | 
 5 | from sklearn.metrics import roc_curve, RocCurveDisplay
 6 | 
 7 | my_fpr, my_tpr, _ = roc_curve(y_true=y,
 8 |                               y_score=y_score,
 9 |                               pos_label=1)
10 | RocCurveDisplay(fpr=my_fpr, tpr=my_tpr).plot()
11 | 
12 | import matplotlib.pyplot as plt
13 | plt.savefig('10-p-roc.pdf')
14 | 


--------------------------------------------------------------------------------
/figures/fig-r/07-r-regression.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "07-r-regression.pdf", width = 6, height = 4.5)
 2 | 
 3 | library(tidyverse)
 4 | 
 5 | my_data <- cars
 6 | tmp <- data.frame(speed = 21.5, dist = 67)
 7 | my_data %>% ggplot(aes(x = speed, y = dist)) +
 8 |   coord_cartesian(xlim = c(4, 25), ylim = c(0, 120)) +
 9 |   geom_point() +
10 |   stat_smooth(formula = y ~ x, method = "lm") +
11 |   geom_linerange(data = tmp, aes(ymin = -9, ymax = dist),  linetype = "dotted") +
12 |   geom_linerange(data = tmp, aes(xmin =  0, xmax = speed), linetype = "dotted")
13 | 


--------------------------------------------------------------------------------
/figures/fig-r/10-r-roc.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "10-r-roc.pdf", width = 6, height = 5)
 2 | 
 3 | library(PRROC)
 4 | library(tidyverse)
 5 | 
 6 | y       <- c(  0,   1,   1,   0,   1,   0,    1,   0,   0,   1)
 7 | y_score <- c(0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5)
 8 | 
 9 | my_roc <- roc.curve(scores.class0 = y_score[y == 1],
10 |                     scores.class1 = y_score[y == 0],
11 |                     curve = TRUE)
12 | my_roc %>% plot(xlab = "False Positive Rate",
13 |                 ylab = "True Positive Rate",
14 |                 legend = FALSE)
15 | 


--------------------------------------------------------------------------------
/figures/fig-p/13-p-heatmap.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import seaborn as sns
 3 | 
 4 | my_data = pd.DataFrame(
 5 |     {'language': [  0,  20,  20,  25,  22,  17],
 6 |      'english':  [  0,  20,  40,  20,  24,  18],
 7 |      'math':     [100,  20,   5,  30,  17,  25],
 8 |      'science':  [  0,  20,   5,  25,  16,  23],
 9 |      'society':  [  0,  20,  30,   0,  21,  17]},
10 |     index=       ['A', 'B', 'C', 'D', 'E', 'F'])
11 | 
12 | sns.clustermap(my_data, z_score=1) # 列ごとの標準化
13 | 
14 | import matplotlib.pyplot as plt
15 | plt.savefig('13-p-heatmap.pdf')
16 | 


--------------------------------------------------------------------------------
/figures/fig-r/13-r-heatmap.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "13-r-heatmap.pdf", width = 6, height = 5.5)
 2 | 
 3 | library(tidyverse)
 4 | 
 5 | my_data <- data.frame(
 6 |   language  = c(  0,  20,  20,  25,  22,  17),
 7 |   english   = c(  0,  20,  40,  20,  24,  18),
 8 |   math      = c(100,  20,   5,  30,  17,  25),
 9 |   science   = c(  0,  20,   5,  25,  16,  23),
10 |   society   = c(  0,  20,  30,   0,  21,  17),
11 |   row.names = c("A", "B", "C", "D", "E", "F"))
12 | 
13 | my_data %>% scale %>% # 列ごとの標準化
14 |   gplots::heatmap.2(cexRow = 1, cexCol = 1) # ラベルのサイズを指定して描画する．
15 | 


--------------------------------------------------------------------------------
/figures/fig-r/10-r-rpart3.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "10-r-rpart3.pdf", width = 6, height = 5.5)
 2 | 
 3 | library(caret)
 4 | library(tidyverse)
 5 | 
 6 | my_url <- str_c("https://raw.githubusercontent.com",
 7 |                 "/taroyabuki/fromzero/master/data/titanic.csv")
 8 | my_data <- read_csv(my_url)
 9 | 
10 | my_model3 <- train(form = Survived ~ Class, data = my_data, method = "rpart2",
11 |                    tuneGrid = data.frame(maxdepth = 2),
12 |                    trControl = trainControl(method = "LOOCV"))
13 | rpart.plot::rpart.plot(my_model3$finalModel, extra = 1)
14 | 


--------------------------------------------------------------------------------
/figures/fig-r/10-r-titanic-tree.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "10-r-titanic-tree.pdf", width = 6, height = 5)
 2 | 
 3 | library(caret)
 4 | library(tidyverse)
 5 | 
 6 | my_url <- str_c("https://raw.githubusercontent.com",
 7 |                 "/taroyabuki/fromzero/master/data/titanic.csv")
 8 | my_data <- read_csv(my_url)
 9 | 
10 | my_model <- train(form = Survived ~ ., data = my_data, method = "rpart2",
11 |                   tuneGrid = data.frame(maxdepth = 2),
12 |                   trControl = trainControl(method = "none"))
13 | rpart.plot::rpart.plot(my_model$finalModel, extra = 1)
14 | 


--------------------------------------------------------------------------------
/figures/fig-r/13-r-biplot.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "13-r-biplot.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | library(tidyverse)
 4 | 
 5 | my_data <- data.frame(
 6 |   language = c(  0, 20, 20, 25, 22, 17),
 7 |   english  = c(  0, 20, 40, 20, 24, 18),
 8 |   math     = c(100, 20,  5, 30, 17, 25),
 9 |   science  = c(  0, 20,  5, 25, 16, 23),
10 |   society  = c(  0, 20, 30,  0, 21, 17))
11 | row.names(my_data) <- c("A", "B", "C", "D", "E", "F")
12 | 
13 | my_result <- my_data %>% prcomp # 主成分分析（標準化なし）
14 | 
15 | my_result %>% ggbiplot::ggbiplot(labels = row.names(my_data), scale = 0)
16 | 


--------------------------------------------------------------------------------
/docker/jupyter/README.md:
--------------------------------------------------------------------------------
 1 | # Jupyter Notebook用のコンテナ
 2 | 
 3 | - Docker Hub: https://hub.docker.com/r/taroyabuki/jupyter
 4 | - 起動方法（3種類）
 5 |     - [rstudio.sh](../rstudio.sh)を実行する．
 6 |     - `wget https://raw.githubusercontent.com/taroyabuki/rp/master/docker/jupyter.sh`の後で，`sh jupyter.sh`
 7 |     - `git clone https://github.com/taroyabuki/fromzero.git`の後で，`sh fromzero/docker/jupyter.sh`
 8 | - Jupyter Notebookへのアクセス：http://localhost:8888
 9 | - Apple Chipについての注意
10 |     - Docker desktop 4.4.2で動作を確認しました．（4.1から4.3では動作しませんでした．）
11 |     - 11章のコードは実行できません．11章を読む際には，Google Colabを使ってください．
12 | 


--------------------------------------------------------------------------------
/figures/fig-r/07-r-lm.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "07-r-lm.pdf", width = 6, height = 4.5)
 2 | 
 3 | library(caret)
 4 | library(tidyverse)
 5 | my_data <- cars
 6 | 
 7 | my_model <- train(form = dist ~ speed, # モデル式
 8 |                   data = my_data,      # データ
 9 |                   method = "lm")       # 手法
10 | 
11 | f <- function(x) { my_model %>% predict(data.frame(speed = x)) }
12 | 
13 | my_data %>%
14 |   ggplot(aes(x = speed,
15 |              y = dist,
16 |              color = "data")) +
17 |   geom_point() +
18 |   stat_function(
19 |     fun = f,
20 |     mapping = aes(color = "model"))
21 | 


--------------------------------------------------------------------------------
/figures/fig-r/08-r-boxplot-scaled.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "08-r-boxplot-scaled.pdf", width = 6, height = 4.5)
 2 | 
 3 | library(tidyverse)
 4 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki",
 5 |                 "/fromzero/master/data/wine.csv")
 6 | my_data <- read_csv(my_url)
 7 | 
 8 | my_data %>%
 9 |   mutate_if(is.numeric, scale) %>% # 数値の列の標準化
10 |   pivot_longer(-LPRICE2) %>%
11 |   ggplot(aes(x = factor(name, levels = names(my_data[, -1])),
12 |              y = value)) +
13 |   geom_boxplot() +
14 |   stat_summary(fun = mean, geom = "point", size = 3) +
15 |   xlab(NULL)
16 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-conf.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from statsmodels.stats.proportion import binom_test
 4 | 
 5 | a = 0.05 # 有意水準
 6 | tmp = np.linspace(0, 1, 100)
 7 | 
 8 | my_df = pd.DataFrame({
 9 |     't': tmp,                                                  # 当たる確率
10 |     'q': a,                                                    # 水平線
11 |     'p': [binom_test(count=2, nobs=15, prop=t) for t in tmp]}) # p値
12 | 
13 | my_df.plot(x='t', legend=None, xlabel=r'$\theta$', ylabel=r'p-value')
14 | 
15 | import matplotlib.pyplot as plt
16 | plt.savefig('04-p-conf.pdf')
17 | 


--------------------------------------------------------------------------------
/figures/fig-p/13-p-biplot.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | my_data = pd.DataFrame(
 3 |     {'language': (  0, 20, 20, 25, 22, 17),
 4 |      'english':  (  0, 20, 40, 20, 24, 18),
 5 |      'math':     (100, 20,  5, 30, 17, 25),
 6 |      'science':  (  0, 20,  5, 25, 16, 23),
 7 |      'society':  (  0, 20, 30,  0, 21, 17)},
 8 |     index=['A', 'B', 'C', 'D', 'E', 'F'])
 9 | 
10 | from pca import pca
11 | my_model = pca(n_components=5)
12 | my_result = my_model.fit_transform(my_data)
13 | 
14 | my_model.biplot(legend=False) # バイプロット
15 | 
16 | import matplotlib.pyplot as plt
17 | plt.savefig('13-p-biplot.pdf')
18 | 


--------------------------------------------------------------------------------
/figures/fig-r/09-r-rpart2.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "09-r-rpart2.pdf", width = 6, height = 5.5)
 2 | 
 3 | library(caret)
 4 | my_data <- iris
 5 | 
 6 | my_model <- train(form = Species ~ ., data = my_data, method = "rpart2",
 7 |                   trControl = trainControl(method = "none"),
 8 |                   tuneGrid = data.frame(maxdepth = 3),
 9 |                   control = rpart::rpart.control(cp = 0.01,
10 |                                                  minbucket = 5,
11 |                                                  minsplit = 2))
12 | 
13 | rpart.plot::rpart.plot(
14 |   my_model$finalModel, extra = 1)
15 | 


--------------------------------------------------------------------------------
/figures/fig-p/07-p-residual.py:
--------------------------------------------------------------------------------
 1 | import statsmodels.api as sm
 2 | from sklearn.linear_model import LinearRegression
 3 | 
 4 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
 5 | X, y = my_data[['speed']], my_data['dist']
 6 | 
 7 | my_model = LinearRegression()
 8 | my_model.fit(X, y)
 9 | y_ = my_model.predict(X)
10 | my_data['y_'] = y_
11 | 
12 | ax = my_data.plot(x='speed', y='dist', style='o', legend=False)
13 | my_data.plot(x='speed', y='y_', style='-', legend=False, ax=ax)
14 | ax.vlines(x=X, ymin=y, ymax=y_, linestyles='dotted')
15 | 
16 | import matplotlib.pyplot as plt
17 | plt.savefig('07-p-residual.pdf')
18 | 


--------------------------------------------------------------------------------
/data/exam.xml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='UTF-8'?>
 2 | <root xmlns='https://www.example.net/ns/1.0'>
 3 |  <description>experiment results</description>
 4 |  <records>
 5 |   <record english='60' math='70'>
 6 |    <gender>f</gender>
 7 |    <name>A</name>
 8 |   </record>
 9 |   <record english='90' math='80'>
10 |    <gender>m</gender>
11 |    <name>B</name>
12 |   </record>
13 |   <record english='70' math='90'>
14 |    <gender>m</gender>
15 |    <name>C</name>
16 |   </record>
17 |   <record english='90' math='100'>
18 |    <gender>f</gender>
19 |    <name>D</name>
20 |   </record>
21 |  </records>
22 | </root>


--------------------------------------------------------------------------------
/figures/fig-r/07-r-knn.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "07-r-knn.pdf", width = 6, height = 4.5)
 2 | 
 3 | library(caret)
 4 | library(tidyverse)
 5 | my_data <- cars
 6 | my_model <- train(form = dist ~ speed, # モデル式
 7 |                   data = my_data,      # データ
 8 |                   method = "knn",      # 手法
 9 |                   tuneGrid = data.frame(k = 5))
10 | 
11 | f <- function(x) { my_model %>% predict(data.frame(speed = x)) }
12 | 
13 | my_data %>%
14 |   ggplot(aes(x = speed,
15 |              y = dist,
16 |              color = "data")) +
17 |   geom_point() +
18 |   stat_function(
19 |     fun = f,
20 |     mapping = aes(color = "model"))
21 | 


--------------------------------------------------------------------------------
/figures/fig-r/08-r-enet-path.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "08-r-enet-path.pdf", width = 6, height = 4.5)
 2 | 
 3 | library(tidyverse)
 4 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki",
 5 |                 "/fromzero/master/data/wine.csv")
 6 | my_data <- read_csv(my_url)
 7 | 
 8 | library(ggfortify)
 9 | library(glmnetUtils)
10 | 
11 | my_data2 <- my_data %>%
12 |   mutate_all(scale) # 標準化
13 | 
14 | B <- 0.1
15 | 
16 | glmnet(
17 |   form = LPRICE2 ~ .,
18 |   data = my_data2,
19 |   alpha = B) %>%
20 |   autoplot(xvar = "lambda") +
21 |   xlab("log A ( = log lambda)") +
22 |   theme(legend.position =
23 |           c(0.15, 0.25))
24 | 


--------------------------------------------------------------------------------
/figures/fig-r/13-r-pca-clusters.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "13-r-pca-clusters.pdf", width = 6, height = 4.5)
 2 | 
 3 | library(tidyverse)
 4 | my_data <- iris[, -5] %>% scale
 5 | 
 6 | my_result <- prcomp(my_data)$x %>% as.data.frame # 主成分分析
 7 | 
 8 | # 非階層的クラスタ分析の場合
 9 | my_result$cluster <- (my_data %>% scale %>% kmeans(3))$cluster %>% as.factor
10 | 
11 | # 階層的クラスタ分析の場合
12 | #my_result$cluster <- my_data %>% dist %>% hclust %>% cutree(3) %>% as.factor
13 | 
14 | my_result %>%
15 |   ggplot(aes(x = PC1, y = PC2, color = cluster)) + # 色でクラスタを表現する．
16 |   geom_point(shape = iris$Species) +               # 形で品種を表現する．
17 |   theme(legend.position = "none")
18 | 


--------------------------------------------------------------------------------
/figures/fig-p/10-p-pr.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | y       = np.array([  0,   1,   1,   0,   1,   0,    1,   0,   0,   1])
 3 | y_score = np.array([0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5])
 4 | 
 5 | from sklearn.metrics import precision_recall_curve, PrecisionRecallDisplay
 6 | 
 7 | my_precision, my_recall, _ = precision_recall_curve(y_true=y,
 8 |                                                     probas_pred=y_score,
 9 |                                                     pos_label=1)
10 | PrecisionRecallDisplay(precision=my_precision, recall=my_recall).plot()
11 | 
12 | import matplotlib.pyplot as plt
13 | plt.savefig('10-p-pr.pdf')
14 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-conf.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "04-r-conf.pdf", width = 6, height = 5.5)
 2 | 
 3 | library(exactci)
 4 | 
 5 | a <- 0.05                              # 有意水準
 6 | binom.exact(x = 2,                     # 当たった回数
 7 |             n = 15,                    # くじを引いた回数
 8 |             p = 4 / 10,                # 当たる確率（仮説）
 9 |             plot = TRUE,               # p値の描画
10 |             conf.level = 1 - a,        # 信頼係数（デフォルト）
11 |             tsmethod = "minlike",      # 両側p値の使用
12 |             alternative = "two.sided") # 両側検定（デフォルト）
13 |                                        # 左片側検定なら'less'
14 |                                        # 右片側検定なら'greater'
15 | 


--------------------------------------------------------------------------------
/figures/fig-r/07-r-boxplot.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "07-r-boxplot.pdf", width = 6, height = 5.5)
 2 | 
 3 | library(caret)
 4 | my_data <- cars
 5 | 
 6 | my_lm_model <- train(form = dist ~ speed, data = my_data, method = "lm",
 7 |                      trControl = trainControl(method = "LOOCV"))
 8 | 
 9 | my_knn_model <- train(form = dist ~ speed, data = my_data, method = "knn",
10 |                       tuneGrid = data.frame(k = 5),
11 |                       trControl = trainControl(method = "LOOCV"))
12 | y <- my_data$dist
13 | 
14 | my_df <- data.frame(
15 |   lm  = (y - my_lm_model$pred$pred)^2,
16 |   knn = (y - my_knn_model$pred$pred)^2)
17 | 
18 | boxplot(my_df, ylab = "r^2")
19 | 


--------------------------------------------------------------------------------
/figures/fig-p/09-p-rpart.py:
--------------------------------------------------------------------------------
 1 | import graphviz
 2 | import statsmodels.api as sm
 3 | from sklearn import tree
 4 | 
 5 | my_data = sm.datasets.get_rdataset('iris', 'datasets').data
 6 | X, y = my_data.iloc[:, 0:4], my_data.Species
 7 | 
 8 | my_model = tree.DecisionTreeClassifier(max_depth=2, random_state=0)
 9 | my_model.fit(X, y)
10 | 
11 | my_dot = tree.export_graphviz(decision_tree=my_model,
12 |                               out_file=None,
13 |                               feature_names=X.columns,
14 |                               class_names=my_model.classes_,
15 |                               filled=True)
16 | my_graph = graphviz.Source(my_dot)
17 | my_graph.render('09-p-rpart')
18 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-iris-group.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "04-r-iris-group.pdf", width = 6, height = 5)
 2 | 
 3 | library(tidyverse)
 4 | my_group <- iris %>% group_by(Species)
 5 | 
 6 | my_df <- my_group %>%
 7 |   summarize(across(everything(), mean)) %>% # 各列の平均
 8 |   pivot_longer(-Species)
 9 | 
10 | # 標準誤差を求める関数
11 | f <- function(x) { sd(x) / length(x)**0.5 }
12 | 
13 | tmp <- my_group %>%
14 |   summarize(across(everything(), f)) %>% # 各列の標準誤差
15 |   pivot_longer(-Species)
16 | 
17 | my_df$se <- tmp$value
18 | my_df %>%
19 |   ggplot(aes(x = Species, y = value, fill = name)) +
20 |   geom_col(position = "dodge") +
21 |   geom_errorbar(aes(ymin = value - se, ymax = value + se), position = "dodge")
22 | 


--------------------------------------------------------------------------------
/figures/fig-r/10-r-rpart1.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "10-r-rpart1.pdf", width = 6, height = 5.5)
 2 | 
 3 | library(caret)
 4 | library(tidyverse)
 5 | 
 6 | my_url <- str_c("https://raw.githubusercontent.com",
 7 |                 "/taroyabuki/fromzero/master/data/titanic.csv")
 8 | my_data <- read_csv(my_url)
 9 | 
10 | X <- my_data %>% select(Class)
11 | y <- my_data$Survived
12 | 
13 | options(warn = -1) # 警告を非表示にする．（tribbleに関する警告）
14 | my_model1 <- train(x = X, y = y, method = "rpart2",
15 |                    tuneGrid = data.frame(maxdepth = 2),
16 |                    trControl = trainControl(method = "LOOCV"))
17 | options(warn = 0)  # 警告を表示する．
18 | rpart.plot::rpart.plot(my_model1$finalModel, extra = 1)
19 | 


--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-split.py:
--------------------------------------------------------------------------------
 1 | from pmdarima.datasets import airpassengers
 2 | my_data = airpassengers.load_airpassengers()
 3 | 
 4 | n = len(my_data)
 5 | k = 108
 6 | 
 7 | import pandas as pd
 8 | my_ds = pd.date_range(
 9 |     start='1949/01/01',
10 |     end='1960/12/01',
11 |     freq='MS')
12 | my_df = pd.DataFrame({
13 |     'ds': my_ds,
14 |     'x': range(n),
15 |     'y': my_data},
16 |     index=my_ds)
17 | 
18 | my_train = my_df[        :k]
19 | my_test  = my_df[-(n - k): ]
20 | y = my_test.y
21 | 
22 | import matplotlib.pyplot as plt
23 | plt.plot(my_train.y, label='train')
24 | plt.plot(my_test.y,  label='test')
25 | plt.legend()
26 | plt.savefig('12-p-airpassengers-split.pdf')
27 | 


--------------------------------------------------------------------------------
/figures/fig-r/11-r-h2o-wine.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "11-r-h2o-wine.pdf", width = 6, height = 5.5)
 2 | 
 3 | library(h2o)
 4 | library(tidyverse)
 5 | 
 6 | h2o.init()
 7 | h2o.no_progress()
 8 | 
 9 | my_url <- str_c("https://raw.githubusercontent.com",
10 |                 "/taroyabuki/fromzero/master/data/wine.csv")
11 | my_data <- read_csv(my_url)
12 | my_frame <- as.h2o(my_data)
13 | 
14 | my_model <- h2o.automl(
15 |     y = "LPRICE2",             # 出力変数名
16 |     training_frame = my_frame, # H2OFrame
17 |     max_runtime_secs = 60)     # 訓練時間（秒）
18 | 
19 | min(my_model@leaderboard$rmse)
20 | 
21 | tmp <- my_model %>% predict(my_frame) %>%
22 |   as.data.frame
23 | y_ <- tmp$predict
24 | y  <- my_data$LPRICE2
25 | 
26 | plot(y, y_)
27 | 


--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-split.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "12-r-airpassengers-split.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | my_data <- as.vector(AirPassengers)
 4 | 
 5 | n <- length(my_data) # データ数（144）
 6 | k <- 108             # 訓練データ数
 7 | 
 8 | library(tidyverse)
 9 | library(tsibble)
10 | 
11 | my_ds <- seq(
12 |   from = yearmonth("1949/01"),
13 |   to   = yearmonth("1960/12"),
14 |   by   = 1)
15 | my_label <- rep(
16 |   c("train", "test"),
17 |   c(k, n - k))
18 | my_df <- tsibble(
19 |   ds    = my_ds,
20 |   x     = 0:(n - 1),
21 |   y     = my_data,
22 |   label = my_label,
23 |   index = ds) # 日時の列の指定
24 | 
25 | my_plot <- my_df %>%
26 |   ggplot(aes(x = ds, y = y, color = label)) +
27 |   geom_line()
28 | my_plot
29 | 


--------------------------------------------------------------------------------
/figures/fig-r/07-r-polynomial.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "07-r-polynomial.pdf", width = 6, height = 4.5)
 2 | 
 3 | library(caret)
 4 | library(tidyverse)
 5 | my_data <- cars
 6 | my_idx <- c(2, 11, 27, 34, 39, 44)
 7 | my_sample <- my_data[my_idx, ]
 8 | 
 9 | my_model <- train(form = dist ~ poly(speed, degree = 5, raw = TRUE),
10 |                   data = my_sample,
11 |                   method = "lm")
12 | 
13 | f <- function(x) { my_model %>% predict(data.frame(speed = x)) }
14 | 
15 | my_data %>%
16 |   ggplot(aes(x = speed, y = dist, color = "data")) +
17 |   geom_point() +
18 |   geom_point(data = my_sample, mapping = aes(color = "sample")) +
19 |   stat_function(fun = f, mapping = aes(color = "model")) +
20 |   coord_cartesian(ylim = c(0, 120))
21 | 


--------------------------------------------------------------------------------
/figures/fig-p/07-p-knn.py:
--------------------------------------------------------------------------------
 1 | # 準備
 2 | import statsmodels.api as sm
 3 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
 4 | X, y = my_data[['speed']], my_data['dist']
 5 | 
 6 | # 訓練
 7 | from sklearn.neighbors import KNeighborsRegressor
 8 | my_model = KNeighborsRegressor()
 9 | my_model.fit(X, y)
10 | 
11 | # 可視化の準備
12 | import numpy as np
13 | import pandas as pd
14 | tmp = pd.DataFrame({'speed': np.linspace(min(my_data.speed),
15 |                                          max(my_data.speed),
16 |                                          num=100)})
17 | tmp['model'] = my_model.predict(tmp)
18 | 
19 | pd.concat([my_data, tmp]).plot(
20 |     x='speed', style=['o', '-'])
21 | import matplotlib.pyplot as plt
22 | plt.savefig('07-p-knn.pdf')
23 | 


--------------------------------------------------------------------------------
/figures/fig-r/04-r-pvalue1.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "04-r-pvalue1.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | library(tidyverse)
 4 | 
 5 | t <- 4 / 10               # 当たる確率
 6 | n <- 15                   # くじを引いた回数
 7 | x <- 0:n                  # 当たった回数
 8 | my_pr  <- dbinom(x, n, t) # x回当たる確率
 9 | my_pr2 <- dbinom(2, n, t) # 2回当たる確率
10 | 
11 | my_data <- data.frame(x = x) %>%
12 |   mutate(probability = my_pr) %>%
13 |   mutate(color = my_pr <= my_pr2) # 当たる確率が，2回当たる確率以下
14 | 
15 | my_data %>% ggplot(aes(x = x, y = probability, color = color)) +
16 |   geom_point(size = 3) +
17 |   geom_linerange(aes(ymin = 0, ymax = probability), ) + # 垂直線
18 |   geom_hline(yintercept = my_pr2) +                     # 水平線
19 |   theme(legend.position = "none")
20 | 


--------------------------------------------------------------------------------
/figures/fig-r/10-r-rpart2.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "10-r-rpart2.pdf", width = 6, height = 5.5)
 2 | 
 3 | library(caret)
 4 | library(tidyverse)
 5 | 
 6 | my_url <- str_c("https://raw.githubusercontent.com",
 7 |                 "/taroyabuki/fromzero/master/data/titanic.csv")
 8 | my_data <- read_csv(my_url)
 9 | 
10 | my_enc <- my_data %>% dummyVars(formula = Survived ~ Class)
11 | my_data2 <- my_enc %>%
12 |   predict(my_data) %>%
13 |   as.data.frame %>%
14 |   mutate(Survived = my_data$Survived)
15 | 
16 | my_model2 <- train(form = Survived ~ ., data = my_data2, method = "rpart2",
17 |                    tuneGrid = data.frame(maxdepth = 2),
18 |                    trControl = trainControl(method = "LOOCV"))
19 | rpart.plot::rpart.plot(my_model2$finalModel, extra = 1)
20 | 


--------------------------------------------------------------------------------
/figures/fig-p/07-p-lm.py:
--------------------------------------------------------------------------------
 1 | # データの準備
 2 | import statsmodels.api as sm
 3 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
 4 | X, y = my_data[['speed']], my_data['dist']
 5 | 
 6 | # モデルの指定
 7 | from sklearn.linear_model import LinearRegression
 8 | my_model = LinearRegression()
 9 | 
10 | # モデルをデータにフィットさせる．
11 | my_model.fit(X, y)
12 | 
13 | import numpy as np
14 | import pandas as pd
15 | tmp = pd.DataFrame({'speed': np.linspace(min(my_data.speed),
16 |                                          max(my_data.speed),
17 |                                          100)})
18 | tmp['model'] = my_model.predict(tmp)
19 | 
20 | pd.concat([my_data, tmp]).plot(
21 |     x='speed', style=['o', '-'])
22 | 
23 | import matplotlib.pyplot as plt
24 | plt.savefig('07-p-lm.pdf')
25 | 


--------------------------------------------------------------------------------
/figures/fig-p/11-p-h2o-wine.py:
--------------------------------------------------------------------------------
 1 | import h2o
 2 | import pandas as pd
 3 | from h2o.automl import H2OAutoML
 4 | 
 5 | h2o.init()
 6 | h2o.no_progress()
 7 | 
 8 | my_url = ('https://raw.githubusercontent.com'
 9 |           '/taroyabuki/fromzero/master/data/wine.csv')
10 | my_data = pd.read_csv(my_url)
11 | my_frame = h2o.H2OFrame(my_data)
12 | 
13 | my_model = H2OAutoML(
14 |     max_runtime_secs=60)
15 | my_model.train(
16 |     y='LPRICE2',
17 |     training_frame=my_frame)
18 | 
19 | print(my_model.leaderboard['rmse'].min())
20 | 
21 | tmp = h2o.as_list(
22 |     my_model.predict(my_frame))
23 | 
24 | pd.DataFrame({
25 |     'y': my_data['LPRICE2'],
26 |     'y_': tmp['predict']}
27 | ).plot('y', 'y_', kind='scatter')
28 | 
29 | import matplotlib.pyplot as plt
30 | plt.savefig('11-p-h2o-wine.pdf')
31 | 


--------------------------------------------------------------------------------
/figures/fig-p/04-p-pvalue1.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from scipy import stats
 4 | 
 5 | t = 4 / 10                        # 当たる確率
 6 | n = 15                            # くじを引いた回数
 7 | x = np.array(range(0, n + 1))     # 当たった回数
 8 | my_pr  = stats.binom.pmf(x, n, t) # x回当たる確率
 9 | my_pr2 = stats.binom.pmf(2, n, t) # 2回当たる確率
10 | 
11 | my_data = pd.DataFrame({'x': x, 'y1': my_pr, 'y2': my_pr})
12 | my_data.loc[my_pr >  my_pr2, 'y1'] = np.nan # 当たる確率が，2回当たる確率超過
13 | my_data.loc[my_pr <= my_pr2, 'y2'] = np.nan # 当たる確率が，2回当たる確率以下
14 | ax = my_data.plot(x='x', style='o', ylabel='probability', legend=False)
15 | ax.hlines(y=my_pr2, xmin=0, xmax=15)    # 水平線
16 | ax.vlines(x=x,      ymin=0, ymax=my_pr) # 垂直線
17 | 
18 | import matplotlib.pyplot as plt
19 | plt.savefig('04-p-pvalue1.pdf')
20 | 


--------------------------------------------------------------------------------
/figures/fig-p/08-p-enet-path.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | my_url = ('https://raw.githubusercontent.com/taroyabuki/' +
 3 |           'fromzero/master/data/wine.csv')
 4 | my_data = pd.read_csv(my_url)
 5 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2']
 6 | 
 7 | import numpy as np
 8 | from scipy.stats import zscore
 9 | from sklearn.linear_model import enet_path
10 | 
11 | As = np.e**np.arange(2, -5.5, -0.1)
12 | B  = 0.1
13 | 
14 | _, my_path, _ = enet_path(
15 |     zscore(X), zscore(y),
16 |     alphas=As,
17 |     l1_ratio=B)
18 | 
19 | pd.DataFrame(
20 |     my_path.T,
21 |     columns=X.columns,
22 |     index=np.log(As)
23 | ).plot(
24 |     xlabel='log A ( = log alpha)',
25 |     ylabel='Coefficients')
26 | 
27 | import matplotlib.pyplot as plt
28 | plt.savefig('08-p-enet-path.pdf')
29 | 


--------------------------------------------------------------------------------
/figures/fig-r/08-r-nnet-3-2.R:
--------------------------------------------------------------------------------
 1 | library(tidyverse)
 2 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki",
 3 |                 "/fromzero/master/data/wine.csv")
 4 | my_data <- read_csv(my_url)
 5 | 
 6 | library(caret)
 7 | my_model <- train(form = LPRICE2 ~ .,
 8 |                   data = my_data,
 9 |                   method = "neuralnet",
10 |                   preProcess = c("center", "scale"),
11 |                   tuneGrid = data.frame(layer1 = 3,
12 |                                         layer2 = 2,
13 |                                         layer3 = 0),
14 |                   trControl = trainControl(method = "repeatedcv",
15 |                                            number = 5, repeats = 10))
16 | plot(my_model$finalModel)
17 | file.rename("Rplots.pdf", "08-r-nnet-3-2.pdf")
18 | 


--------------------------------------------------------------------------------
/addendum/07.03.02/1+3x+N(0,2x).csv:
--------------------------------------------------------------------------------
 1 | x,y
 2 | 1,2.4362828056041783
 3 | 2,13.320701642205943
 4 | 3,6.254185478549559
 5 | 4,12.158111887716473
 6 | 5,-7.54294281288999
 7 | 6,20.03682705412517
 8 | 7,-20.367878122873076
 9 | 8,37.62187087244209
10 | 9,28.888791768212027
11 | 10,23.865697903729448
12 | 11,20.35783532598032
13 | 12,24.90149878334255
14 | 13,40.93320462020407
15 | 14,83.5879864420934
16 | 15,45.15357518820319
17 | 16,8.733098913685623
18 | 17,82.25121873688809
19 | 18,64.23168654178178
20 | 19,58.72577858048793
21 | 20,12.867497576908818
22 | 21,52.88624028231115
23 | 22,154.2860167538335
24 | 23,176.96876331325072
25 | 24,158.57606765038622
26 | 25,59.5796921671421
27 | 26,26.85539442891543
28 | 27,73.23288430129338
29 | 28,51.56442153204847
30 | 29,49.82876803737508
31 | 30,148.41719344129336
32 | 


--------------------------------------------------------------------------------
/figures/fig/図3.1.md:
--------------------------------------------------------------------------------
 1 | ```puml
 2 | @startuml
 3 | skinparam {
 4 |   defaultFontName Hiragino Kaku Gothic ProN
 5 |   monochrome true
 6 |   shadowing false
 7 | }
 8 | 
 9 | package R {
10 | rectangle y as y1 #white
11 | rectangle x as x1 #white
12 | rectangle list1a #white;line:white as "
13 |   |  foo  |  bar  |  baz  |"
14 | rectangle list1b #white;line:white as "
15 |   |  foo  |  bar  |  baz  |"
16 | 
17 | x1 --> list1a
18 | y1 --> list1b
19 | x1 -[dotted]> y1 : "y <- x"
20 | list1a -[dotted]> list1b: copy
21 | }
22 | 
23 | package Python {
24 | rectangle y as y2 #white
25 | rectangle x as x2 #white
26 | rectangle list2 #white;line:white as "
27 |   |  foo  |  bar  |  baz  |"
28 | 
29 | x2 --> list2
30 | y2 --> list2
31 | x2 -[dotted]> y2 : "y = x"
32 | }
33 | @enduml
34 | ```
35 | 


--------------------------------------------------------------------------------
/figures/fig-r/08-r-enet-tuning.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "08-r-enet-tuning.pdf", width = 6, height = 4.5)
 2 | 
 3 | library(caret)
 4 | library(tidyverse)
 5 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki",
 6 |                 "/fromzero/master/data/wine.csv")
 7 | my_data <- read_csv(my_url)
 8 | 
 9 | As <- seq(0, 0.1, length.out = 21)
10 | Bs <- seq(0, 0.1, length.out =  6)
11 | 
12 | my_model <- train(
13 |   form = LPRICE2 ~ ., data = my_data, method = "glmnet", standardize = TRUE,
14 |   trControl = trainControl(method = "LOOCV"),
15 |   tuneGrid = expand.grid(lambda = As, alpha  = Bs))
16 | 
17 | tmp <- "B ( = alpha)"
18 | ggplot(my_model) +
19 |   theme(legend.position = c(0, 1), legend.justification = c(0, 1)) +
20 |   xlab("A ( = lambda)") +
21 |   guides(shape = guide_legend(tmp), color = guide_legend(tmp))
22 | 


--------------------------------------------------------------------------------
/figures/howtomake.md:
--------------------------------------------------------------------------------
 1 | # 画像の生成方法
 2 | 
 3 | コンテナjupyterかrstudioを使います（コンテナの生成方法は2.3節を参照）．
 4 | 
 5 | ```bash
 6 | docker exec -it jr bash
 7 | # あるいは
 8 | docker exec -it rs bash
 9 | ```
10 | 
11 | 以下はコンテナでの作業です．
12 | 
13 | ## 準備
14 | 
15 | ```bash
16 | apt update && apt install -y texlive-extra-utils pdf2svg
17 | 
18 | #cd work # 結果をホスト側に保存する場合
19 | git clone https://github.com/taroyabuki/fromzero.git
20 | cd fromzero/figures
21 | ```
22 | 
23 | 画像（PDFとSVG）を作ります．
24 | `-j`のあとの数値はCPUコアの数程度にしてください．
25 | ファイル（`*.R`や`*.py`）を更新したら，`make`以下を実行します．
26 | 更新されたものだけが，再生成されます．
27 | 
28 | 
29 | ## Rの図を作る場合
30 | 
31 | ```bash
32 | cd fig-r
33 | #make clean # すべて生成し直す場合
34 | make -j
35 | cd ..
36 | ```
37 | 
38 | ## Pythonの図を作る場合
39 | 
40 | ```bash
41 | cd fig-p
42 | #make clean # すべて生成し直す場合
43 | make -j
44 | cd ..
45 | ```
46 | 


--------------------------------------------------------------------------------
/data/wine.csv:
--------------------------------------------------------------------------------
 1 | LPRICE2,WRAIN,DEGREES,HRAIN,TIME_SV
 2 | -0.99868,600,17.1167,160,31
 3 | -0.4544,690,16.7333,80,30
 4 | -0.80796,502,17.15,130,28
 5 | -1.50926,420,16.1333,110,26
 6 | -1.71655,582,16.4167,187,25
 7 | -0.418,485,17.4833,187,24
 8 | -1.97491,763,16.4167,290,23
 9 | 0,830,17.3333,38,22
10 | -1.10572,697,16.3,52,21
11 | -1.78098,608,15.7167,155,20
12 | -1.18435,402,17.2667,96,19
13 | -2.24194,602,15.3667,267,18
14 | -0.74943,819,16.5333,86,17
15 | -1.65388,714,16.2333,118,16
16 | -2.25018,610,16.2,292,15
17 | -2.14784,575,16.55,244,14
18 | -0.90544,622,16.6667,89,13
19 | -1.30031,551,16.7667,112,12
20 | -2.28879,536,14.9833,158,11
21 | -1.857,376,17.0667,123,10
22 | -2.19958,574,16.3,184,9
23 | -1.20168,572,16.95,171,8
24 | -1.37264,418,17.65,247,7
25 | -2.23503,821,15.5833,87,6
26 | -1.30769,763,15.8167,51,5
27 | -1.5396,717,16.1667,122,4
28 | -1.99582,578,16,74,3
29 | 


--------------------------------------------------------------------------------
/figures/fig-p/07-p-boxplot.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import statsmodels.api as sm
 3 | from sklearn.linear_model import LinearRegression
 4 | from sklearn.model_selection import cross_val_score, LeaveOneOut
 5 | from sklearn.neighbors import KNeighborsRegressor
 6 | 
 7 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
 8 | X, y = my_data[['speed']], my_data['dist']
 9 | 
10 | my_lm_scores = cross_val_score(
11 |     LinearRegression(), X, y, cv=LeaveOneOut(), scoring='neg_mean_squared_error')
12 | 
13 | my_knn_socres = cross_val_score(
14 |     KNeighborsRegressor(n_neighbors=5), X, y, cv=LeaveOneOut(),
15 |     scoring='neg_mean_squared_error')
16 | 
17 | my_df = pd.DataFrame({
18 |     'lm': -my_lm_scores,
19 |     'knn': -my_knn_socres})
20 | 
21 | my_df.boxplot().set_ylabel("$r^2$")
22 | 
23 | import matplotlib.pyplot as plt
24 | plt.savefig('07-p-boxplot.pdf')
25 | 


--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-prophet.py:
--------------------------------------------------------------------------------
 1 | from pmdarima.datasets import airpassengers
 2 | my_data = airpassengers.load_airpassengers()
 3 | 
 4 | n = len(my_data)
 5 | k = 108
 6 | 
 7 | import pandas as pd
 8 | my_ds = pd.date_range(
 9 |     start='1949/01/01',
10 |     end='1960/12/01',
11 |     freq='MS')
12 | my_df = pd.DataFrame({
13 |     'ds': my_ds,
14 |     'x': range(n),
15 |     'y': my_data},
16 |     index=my_ds)
17 | 
18 | my_train = my_df[        :k]
19 | my_test  = my_df[-(n - k): ]
20 | 
21 | from fbprophet import Prophet
22 | my_prophet_model = Prophet(seasonality_mode='multiplicative')
23 | my_prophet_model.fit(my_train)
24 | 
25 | tmp = my_prophet_model.predict(my_test)
26 | 
27 | fig = my_prophet_model.plot(tmp)
28 | fig.axes[0].plot(my_train.ds, my_train.y)
29 | fig.axes[0].plot(my_test.ds, my_test.y, color='red')
30 | 
31 | import matplotlib.pyplot as plt
32 | plt.savefig('12-p-airpassengers-prophet.pdf')
33 | 


--------------------------------------------------------------------------------
/figures/fig-p/13-p-pca-clusters.py:
--------------------------------------------------------------------------------
 1 | import seaborn as sns
 2 | import statsmodels.api as sm
 3 | from pca import pca
 4 | from scipy.cluster import hierarchy
 5 | from scipy.stats import zscore
 6 | from sklearn.cluster import KMeans
 7 | 
 8 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
 9 | my_data = zscore(iris.iloc[:, 0:4])
10 | 
11 | my_model = pca() # 主成分分析
12 | my_result = my_model.fit_transform(my_data)['PC']
13 | my_result['Species'] = list(iris.Species)
14 | 
15 | # 非階層的クラスタ分析の場合
16 | my_result['cluster'] = KMeans(n_clusters=3).fit(my_data).labels_
17 | 
18 | # 階層的クラスタ分析の場合
19 | #my_result['cluster'] = hierarchy.cut_tree(
20 | #    hierarchy.linkage(my_data, method='complete'), 3)[:,0]
21 | 
22 | sns.scatterplot(x='PC1', y='PC2', data=my_result,
23 |                 hue='cluster', style='Species', palette='bright', legend=False)
24 | 
25 | import matplotlib.pyplot as plt
26 | plt.savefig('13-p-pca-clusters.pdf')
27 | 


--------------------------------------------------------------------------------
/figures/fig-r/07-r-tuning-train.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "07-r-tuning-train.pdf", width = 6, height = 4.5)
 2 | 
 3 | library(caret)
 4 | library(tidyverse)
 5 | my_data <- cars
 6 | 
 7 | my_loocv <- function(k) {
 8 |   my_model <- train(form = dist ~ speed, data = my_data, method = "knn",
 9 |                     tuneGrid = data.frame(k = k),
10 |                     trControl = trainControl(method = "LOOCV"))
11 |   y  <- my_data$dist
12 |   y_ <- my_model %>% predict(my_data)
13 |   list(k = k,
14 |        training = RMSE(y_, y),             # RMSE（訓練）
15 |        validation = my_model$results$RMSE) # RMSE（検証）
16 | }
17 | 
18 | my_results <- 1:15 %>% map_dfr(my_loocv)
19 | 
20 | my_results %>%
21 |   pivot_longer(-k) %>%
22 |   ggplot(aes(x = k, y = value,
23 |              color = name)) +
24 |   geom_line() + geom_point() +
25 |   xlab("#Neighbors") + ylab("RMSE") +
26 |   theme(legend.position = c(1, 0),
27 |         legend.justification = c(1, 0))
28 | 


--------------------------------------------------------------------------------
/figures/fig-r/10-r-titanic-roc.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "10-r-titanic-roc.pdf", width = 6, height = 5)
 2 | 
 3 | library(caret)
 4 | library(PRROC)
 5 | library(tidyverse)
 6 | 
 7 | my_url <- str_c("https://raw.githubusercontent.com",
 8 |                 "/taroyabuki/fromzero/master/data/titanic.csv")
 9 | my_data <- read_csv(my_url)
10 | 
11 | my_model <- train(form = Survived ~ ., data = my_data, method = "rpart2",
12 |                   tuneGrid = data.frame(maxdepth = 2),
13 |                   trControl = trainControl(method = "none"))
14 | 
15 | y <- my_data$Survived
16 | tmp <- my_model %>% predict(newdata = my_data, type = "prob")
17 | y_score <- tmp$Yes
18 | 
19 | my_roc <- roc.curve(scores.class0 = y_score[y == "Yes"],
20 |                     scores.class1 = y_score[y == "No"],
21 |                     curve = TRUE)
22 | my_roc %>% plot(xlab = "False Positive Rate",
23 |                 ylab = "True Positive Rate",
24 |                 legend = FALSE)
25 | 


--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-arima.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "12-r-airpassengers-arima.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | my_data <- as.vector(AirPassengers)
 4 | 
 5 | n <- length(my_data) # データ数（144）
 6 | k <- 108             # 訓練データ数
 7 | 
 8 | library(tidyverse)
 9 | library(tsibble)
10 | 
11 | my_ds <- seq(
12 |   from = yearmonth("1949/01"),
13 |   to   = yearmonth("1960/12"),
14 |   by   = 1)
15 | my_label <- rep(
16 |   c("train", "test"),
17 |   c(k, n - k))
18 | my_df <- tsibble(
19 |   ds    = my_ds,
20 |   x     = 0:(n - 1),
21 |   y     = my_data,
22 |   label = my_label,
23 |   index = ds) # 日時の列の指定
24 | 
25 | my_train <- my_df[   1:k,  ]
26 | my_test  <- my_df[- (1:k), ]
27 | 
28 | library(fable)
29 | my_arima_model <- my_train %>% model(ARIMA(y))
30 | 
31 | tmp <- my_arima_model %>% forecast(h = "3 years")
32 | 
33 | tmp %>% autoplot +
34 |   geom_line(data = my_df,
35 |             aes(x = ds,
36 |                 y = y,
37 |                 color = label))
38 | 


--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-lm.py:
--------------------------------------------------------------------------------
 1 | from pmdarima.datasets import airpassengers
 2 | my_data = airpassengers.load_airpassengers()
 3 | 
 4 | n = len(my_data)
 5 | k = 108
 6 | 
 7 | import pandas as pd
 8 | my_ds = pd.date_range(
 9 |     start='1949/01/01',
10 |     end='1960/12/01',
11 |     freq='MS')
12 | my_df = pd.DataFrame({
13 |     'ds': my_ds,
14 |     'x': range(n),
15 |     'y': my_data},
16 |     index=my_ds)
17 | 
18 | my_train = my_df[        :k]
19 | my_test  = my_df[-(n - k): ]
20 | 
21 | import matplotlib.pyplot as plt
22 | from sklearn.linear_model import LinearRegression
23 | 
24 | my_lm_model = LinearRegression()
25 | my_lm_model.fit(my_train[['x']], my_train.y)
26 | 
27 | y_ = my_lm_model.predict(my_df[['x']])
28 | tmp = pd.DataFrame(y_,
29 |                    index=my_df.index)
30 | plt.plot(my_train.y, label='train')
31 | plt.plot(my_test.y,  label='test')
32 | plt.plot(tmp,        label='model')
33 | plt.legend()
34 | plt.savefig('12-p-airpassengers-lm.pdf')
35 | 


--------------------------------------------------------------------------------
/figures/fig-p/09-p-rpart2.py:
--------------------------------------------------------------------------------
 1 | import graphviz
 2 | import statsmodels.api as sm
 3 | from sklearn import tree
 4 | from sklearn.model_selection import GridSearchCV, LeaveOneOut
 5 | 
 6 | my_data = sm.datasets.get_rdataset('iris', 'datasets').data
 7 | X, y = my_data.iloc[:, 0:4], my_data.Species
 8 | 
 9 | my_params = {
10 |     'max_depth': range(2, 6),
11 |     'min_samples_split': [2, 20],
12 |     'min_samples_leaf': range(1, 8)}
13 | 
14 | my_search = GridSearchCV(
15 |     estimator=tree.DecisionTreeClassifier(min_impurity_decrease=0.01,
16 |                                           random_state=0),
17 |     param_grid=my_params,
18 |     cv=LeaveOneOut(),
19 |     n_jobs=-1).fit(X, y)
20 | 
21 | my_model = my_search.best_estimator_
22 | my_dot = tree.export_graphviz(
23 |     decision_tree=my_model,
24 |     out_file=None,
25 |     feature_names=X.columns,
26 |     class_names=my_model.classes_,
27 |     filled=True)
28 | my_graph = graphviz.Source(my_dot)
29 | my_graph.render('09-p-rpart2')
30 | 


--------------------------------------------------------------------------------
/figures/fig-p/07-p-tuning.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import statsmodels.api as sm
 3 | from sklearn.model_selection import GridSearchCV, LeaveOneOut
 4 | from sklearn.neighbors import KNeighborsRegressor
 5 | 
 6 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
 7 | X, y = my_data[['speed']], my_data['dist']
 8 | 
 9 | my_params = {'n_neighbors': range(1, 16)} # 探索範囲（1以上16未満の整数）
10 | 
11 | my_search = GridSearchCV(estimator=KNeighborsRegressor(),
12 |                          param_grid=my_params,
13 |                          cv=LeaveOneOut(),
14 |                          scoring='neg_mean_squared_error')
15 | my_search.fit(X, y)
16 | 
17 | tmp = my_search.cv_results_                # チューニングの詳細
18 | my_scores = (-tmp['mean_test_score'])**0.5 # RMSE
19 | my_results = pd.DataFrame(tmp['params']).assign(validation=my_scores)
20 | 
21 | my_results.plot(x='n_neighbors',
22 |                 style='o-',
23 |                 ylabel='RMSE')
24 | 
25 | import matplotlib.pyplot as plt
26 | plt.savefig('07-p-tuning.pdf')
27 | 


--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-prophet.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "12-r-airpassengers-prophet.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | my_data <- as.vector(AirPassengers)
 4 | 
 5 | n <- length(my_data) # データ数（144）
 6 | k <- 108             # 訓練データ数
 7 | 
 8 | library(tidyverse)
 9 | library(tsibble)
10 | 
11 | my_ds <- seq(
12 |   from = yearmonth("1949/01"),
13 |   to   = yearmonth("1960/12"),
14 |   by   = 1)
15 | my_label <- rep(
16 |   c("train", "test"),
17 |   c(k, n - k))
18 | my_df <- tsibble(
19 |   ds    = my_ds,
20 |   x     = 0:(n - 1),
21 |   y     = my_data,
22 |   label = my_label,
23 |   index = ds) # 日時の列の指定
24 | 
25 | my_train <- my_df[   1:k,  ]
26 | my_test  <- my_df[- (1:k), ]
27 | 
28 | library(prophet)
29 | my_prophet_model <- my_train %>%
30 |   prophet(seasonality.mode = "multiplicative")
31 | 
32 | tmp <- my_prophet_model %>% predict(my_test)
33 | 
34 | my_prophet_model %>% plot(tmp) +
35 |   geom_line(data = my_train, aes(x = as.POSIXct(ds))) +
36 |   geom_line(data = my_test,  aes(x = as.POSIXct(ds)), color = "red")
37 | 


--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-lm.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "12-r-airpassengers-lm.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | my_data <- as.vector(AirPassengers)
 4 | 
 5 | n <- length(my_data) # データ数（144）
 6 | k <- 108             # 訓練データ数
 7 | 
 8 | library(tidyverse)
 9 | library(tsibble)
10 | 
11 | my_ds <- seq(
12 |   from = yearmonth("1949/01"),
13 |   to   = yearmonth("1960/12"),
14 |   by   = 1)
15 | my_label <- rep(
16 |   c("train", "test"),
17 |   c(k, n - k))
18 | my_df <- tsibble(
19 |   ds    = my_ds,
20 |   x     = 0:(n - 1),
21 |   y     = my_data,
22 |   label = my_label,
23 |   index = ds) # 日時の列の指定
24 | 
25 | my_train <- my_df[   1:k,  ]
26 | my_test  <- my_df[- (1:k), ]
27 | 
28 | library(caret)
29 | my_lm_model <- train(form = y ~ x, data = my_train, method = "lm")
30 | 
31 | y_ <- my_lm_model %>% predict(my_df)
32 | tmp <- my_df %>%
33 |   mutate(y = y_, label = "model")
34 | my_plot <- my_df %>%
35 |   ggplot(aes(x = ds,
36 |              y = y,
37 |              color = label)) +
38 |   geom_line()
39 | my_plot + geom_line(data = tmp)
40 | 


--------------------------------------------------------------------------------
/figures/fig-p/07-p-polynomial.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import statsmodels.api as sm
 4 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
 5 | 
 6 | my_idx = [1, 10, 26, 33, 38, 43]
 7 | my_sample = my_data.iloc[my_idx, ]
 8 | X, y = my_sample[['speed']], my_sample['dist']
 9 | 
10 | from sklearn.preprocessing import PolynomialFeatures
11 | d = 5
12 | X5 = PolynomialFeatures(d).fit_transform(X) # Xの1乗から5乗の変数
13 | 
14 | from sklearn.linear_model import LinearRegression
15 | my_model = LinearRegression()
16 | my_model.fit(X5, y)
17 | 
18 | tmp = pd.DataFrame({'speed': np.linspace(min(my_data.speed),
19 |                                          max(my_data.speed),
20 |                                          100)})
21 | X5 = PolynomialFeatures(d).fit_transform(tmp)
22 | tmp['model'] = my_model.predict(X5)
23 | 
24 | my_sample = my_sample.assign(sample=y)
25 | my_df = pd.concat([my_data, my_sample, tmp])
26 | my_df.plot(x='speed', style=['o', 'o', '-'], ylim=(0, 130))
27 | 
28 | import matplotlib.pyplot as plt
29 | plt.savefig('07-p-polynomial.pdf')
30 | 


--------------------------------------------------------------------------------
/figures/fig-p/10-p-titanic-tree.py:
--------------------------------------------------------------------------------
 1 | import graphviz
 2 | import pandas as pd
 3 | from sklearn import tree
 4 | from sklearn.pipeline import Pipeline
 5 | from sklearn.preprocessing import OneHotEncoder
 6 | 
 7 | my_url = ('https://raw.githubusercontent.com'
 8 |           '/taroyabuki/fromzero/master/data/titanic.csv')
 9 | my_data = pd.read_csv(my_url)
10 | 
11 | X, y = my_data.iloc[:, 0:3], my_data.Survived
12 | 
13 | my_pipeline = Pipeline([
14 |     ('ohe', OneHotEncoder(drop='first')),
15 |     ('tree', tree.DecisionTreeClassifier(max_depth=2, random_state=0,
16 |                                          min_impurity_decrease=0.01))])
17 | my_pipeline.fit(X, y)
18 | 
19 | my_enc  = my_pipeline.named_steps['ohe']
20 | my_tree = my_pipeline.named_steps['tree']
21 | 
22 | my_dot = tree.export_graphviz(
23 |     decision_tree=my_tree,
24 |     out_file=None,
25 |     feature_names=my_enc.get_feature_names(),
26 |     class_names=my_pipeline.classes_,
27 |     filled=True)
28 | graphviz.Source(my_dot)
29 | my_graph = graphviz.Source(my_dot)
30 | my_graph.render('10-p-titanic-tree')
31 | 


--------------------------------------------------------------------------------
/figures/fig-r/11-r-classification.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "11-r-classification.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | library(keras)
 4 | library(tidyverse)
 5 | 
 6 | my_data <- iris[sample(nrow(iris)), ]
 7 | 
 8 | X <- my_data %>%
 9 |   select(-Species) %>% scale
10 | y <- as.integer(my_data$Species) - 1
11 | 
12 | my_model <- keras_model_sequential() %>%
13 |   layer_dense(units = 3, activation = "relu", input_shape = c(4)) %>%
14 |   layer_dense(units = 3, activation = "softmax")
15 | 
16 | my_model %>% compile(
17 |   loss = "sparse_categorical_crossentropy",
18 |   optimizer = "rmsprop",
19 |   metrics = c("accuracy"))
20 | 
21 | my_cb <- callback_early_stopping(
22 |     patience = 20,
23 |     restore_best_weights = TRUE)
24 | 
25 | my_history <- my_model %>%
26 |   fit(x = X,
27 |       y = y,
28 |       validation_split = 0.25,
29 |       batch_size = 10,
30 |       epochs = 500,
31 |       callbacks = list(my_cb),
32 |       verbose = 0)
33 | 
34 | plot(my_history)
35 | 
36 | my_history
37 | 
38 | tmp <- my_model %>% predict(X)
39 | y_ <- apply(tmp, 1, which.max) - 1
40 | mean(y_ == y)
41 | 


--------------------------------------------------------------------------------
/figures/fig-r/11-r-regression.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "11-r-regression.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | library(keras)
 4 | library(tidyverse)
 5 | 
 6 | my_url <- str_c("https://raw.githubusercontent.com",
 7 |                 "/taroyabuki/fromzero/master/data/wine.csv")
 8 | tmp <- read_csv(my_url)
 9 | 
10 | my_data <- tmp[sample(nrow(tmp)), ]
11 | 
12 | X <- my_data %>%
13 |   select(-LPRICE2) %>% scale
14 | y <- my_data$LPRICE2
15 | 
16 | my_model <- keras_model_sequential() %>%
17 |   layer_dense(units = 3, activation = "relu", input_shape = c(4)) %>%
18 |   layer_dense(units = 1)
19 | 
20 | my_model %>% compile(
21 |     loss = "mse",
22 |     optimizer = "rmsprop")
23 | 
24 | my_cb <- callback_early_stopping(
25 |     patience = 20,
26 |     restore_best_weights = TRUE)
27 | 
28 | my_history <- my_model %>%
29 |   fit(x = X,
30 |       y = y,
31 |       validation_split = 0.25,
32 |       batch_size = 10,
33 |       epochs = 500,
34 |       callbacks = list(my_cb),
35 |       verbose = 0)
36 | 
37 | plot(my_history)
38 | 
39 | my_history
40 | 
41 | y_ <- my_model %>% predict(X)
42 | mean((y_ - y)^2)**0.5
43 | 


--------------------------------------------------------------------------------
/docs/exam.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <title>Exam</title>
 5 |   </head>
 6 |   <body>
 7 |     <table>
 8 |       <thead>
 9 |         <tr>
10 |           <td></td>
11 |           <th>name</th>
12 |           <th>english</th>
13 |           <th>math</th>
14 |           <th>gender</th>
15 |         </tr>
16 |       </thead>
17 |       <tbody>
18 |         <tr>
19 |           <td></td>
20 |           <td>A</td>
21 |           <td>60</td>
22 |           <td>70</td>
23 |           <td>f</td>
24 |         </tr>
25 |         <tr>
26 |           <td></td>
27 |           <td>B</td>
28 |           <td>90</td>
29 |           <td>80</td>
30 |           <td>m</td>
31 |         </tr>
32 |         <tr>
33 |           <td></td>
34 |           <td>C</td>
35 |           <td>70</td>
36 |           <td>90</td>
37 |           <td>m</td>
38 |         </tr>
39 |         <tr>
40 |           <td></td>
41 |           <td>D</td>
42 |           <td>90</td>
43 |           <td>100</td>
44 |           <td>f</td>
45 |         </tr>
46 |       </tbody>
47 |     </table>
48 |   </body>
49 | </html>
50 | 


--------------------------------------------------------------------------------
/figures/fig-p/10-p-titanic-roc.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn import tree
 3 | from sklearn.metrics import roc_curve, RocCurveDisplay, auc
 4 | from sklearn.pipeline import Pipeline
 5 | from sklearn.preprocessing import OneHotEncoder
 6 | 
 7 | my_url = ('https://raw.githubusercontent.com'
 8 |           '/taroyabuki/fromzero/master/data/titanic.csv')
 9 | my_data = pd.read_csv(my_url)
10 | 
11 | X, y = my_data.iloc[:, 0:3], my_data.Survived
12 | 
13 | my_pipeline = Pipeline([
14 |     ('ohe', OneHotEncoder(drop='first')),
15 |     ('tree', tree.DecisionTreeClassifier(max_depth=2,
16 |                                          min_impurity_decrease=0.01))])
17 | my_pipeline.fit(X, y)
18 | 
19 | tmp = pd.DataFrame(
20 |     my_pipeline.predict_proba(X),
21 |     columns=my_pipeline.classes_)
22 | y_score = tmp.Yes
23 | 
24 | my_fpr, my_tpr, _ = roc_curve(y_true=y,
25 |                               y_score=y_score,
26 |                               pos_label='Yes')
27 | my_auc = auc(x=my_fpr, y=my_tpr)
28 | RocCurveDisplay(fpr=my_fpr, tpr=my_tpr, roc_auc=my_auc).plot()
29 | 
30 | import matplotlib.pyplot as plt
31 | plt.savefig('10-p-titanic-roc.pdf')
32 | 


--------------------------------------------------------------------------------
/addendum/07.03.02/README.md:
--------------------------------------------------------------------------------
 1 | # 予測値の期待値の信頼区間
 2 | 
 3 | **本稿は本書の想定レベルを超えています．**
 4 | 
 5 | 7.3.2項で次のような絵を描いています（184頁）．これは，「speedが21.5のときのdistを予測する」というのがどういうことなのかを説明するためのものです．
 6 | 
 7 | R|Python
 8 | :--|:--
 9 | <img src="https://github.com/taroyabuki/fromzero/raw/main/figures/fig-r/07-r-regression.svg"/>|<img src="https://github.com/taroyabuki/fromzero/raw/main/figures/fig-p/07-p-regression.svg"/>
10 | 
11 | 直線から読み取れるのは，speedが21.5のときのdistの期待値が67になることです．しかし，直線が少し違ったものになる可能性を考慮すると，網掛け部分くらいになるかもしれません．この網掛けの部分を，予測値の期待値の**信頼区間**といいます．
12 | 
13 | 実現値として得られるのは，これに誤差が加わった結果で，それを考慮したものを**予測区間**といいます．（ここでは予測区間についてはこれ以上触れません．）
14 | 
15 | 本書のレベルではこれで終わりでいいのですが，上の絵の「RとPythonの網掛け部分が少し違っていること」に気付く方がいたので，少し補足します．
16 | 
17 | ## 簡単な説明
18 | 
19 | Rの`ggplot2::stat_smooth`で描いた結果は，「誤差はxによらず，同一の正規分布に従う」という仮定に基づく，理論的なものです（線形**正規**回帰モデル）．
20 | 
21 | Pythonの`seaborn.regplot`で描いた結果は，そういう仮定に基づかない，シミュレーション（ブートストラップ）によるものです（線形回帰モデル）．
22 | 
23 | データが仮定に合わないと違いが際立ちます．
24 | 
25 | R|Python
26 | :--|:--
27 | <img src="confidence_band_r.svg"/>|<img src="confidence_band_p.svg"/>
28 | 
29 | ## 詳しい説明
30 | 
31 | - [R側からの補足](confidence_band_R.ipynb)
32 | - [Python側からの補足](confidence_band_python.ipynb)


--------------------------------------------------------------------------------
/figures/fig-p/07-p-tuning-train.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import statsmodels.api as sm
 3 | from sklearn.metrics import mean_squared_error
 4 | from sklearn.model_selection import cross_val_score, LeaveOneOut
 5 | from sklearn.neighbors import KNeighborsRegressor
 6 | 
 7 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
 8 | X, y = my_data[['speed']], my_data['dist']
 9 | 
10 | def my_loocv(k):
11 |     my_model = KNeighborsRegressor(n_neighbors=k)
12 |     my_scores = cross_val_score(estimator=my_model, X=X, y=y,
13 |                                 cv=LeaveOneOut(),
14 |                                 scoring='neg_mean_squared_error')
15 |     y_ = my_model.fit(X, y).predict(X)
16 |     return pd.Series([k,
17 |                       (-my_scores.mean())**0.5,        # RMSE（検証）
18 |                       mean_squared_error(y_, y)**0.5], # RMSE（訓練）
19 |                      index=['n_neighbors', 'validation', 'training'])
20 | 
21 | my_results = pd.Series(range(1, 16)).apply(my_loocv)
22 | 
23 | my_results.plot(x='n_neighbors',
24 |                 style='o-',
25 |                 ylabel='RMSE')
26 | 
27 | import matplotlib.pyplot as plt
28 | plt.savefig('07-p-tuning-train.pdf')
29 | 


--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-nnet.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "11-r-mnist-nnet.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | library(keras)
 4 | 
 5 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
 6 | my_index <- sample(1:60000, 6000)
 7 | x_train <- x_train[my_index, , ]
 8 | y_train <- y_train[my_index]
 9 | 
10 | my_model <- keras_model_sequential() %>%
11 |   layer_flatten(input_shape = c(28, 28)) %>%
12 |   layer_dense(units = 256, activation = "relu") %>%
13 |   layer_dense(units = 10, activation = "softmax")
14 | 
15 | my_model %>% compile(loss = "sparse_categorical_crossentropy",
16 |                      optimizer = "rmsprop",
17 |                      metrics = c("accuracy"))
18 | 
19 | my_cb <- callback_early_stopping(patience = 5,
20 |                                  restore_best_weights = TRUE)
21 | 
22 | my_history <- my_model %>%
23 |   fit(x = x_train,
24 |       y = y_train,
25 |       validation_split = 0.2,
26 |       batch_size = 128,
27 |       epochs = 20,
28 |       callbacks = list(my_cb),
29 |       verbose = 0)
30 | 
31 | plot(my_history)
32 | 
33 | tmp <- my_model %>% predict(x_test)
34 | y_ <- apply(tmp, 1, which.max) - 1
35 | table(y_, y_test)
36 | 
37 | mean(y_ == y_test)
38 | 
39 | my_model %>% evaluate(x = x_test, y = y_test)
40 | 


--------------------------------------------------------------------------------
/figures/fig-p/11-p-regression.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import sklearn
 3 | from keras import callbacks, layers, models
 4 | from sklearn.preprocessing import StandardScaler
 5 | 
 6 | my_url = ('https://raw.githubusercontent.com'
 7 |           '/taroyabuki/fromzero/master/data/wine.csv')
 8 | tmp = pd.read_csv(my_url)
 9 | 
10 | my_data = sklearn.utils.shuffle(tmp)
11 | 
12 | my_scaler = StandardScaler()
13 | X = my_scaler.fit_transform(
14 |     my_data.drop(columns=['LPRICE2']))
15 | y = my_data['LPRICE2']
16 | 
17 | my_model = models.Sequential()
18 | my_model.add(layers.Dense(units=3, activation='relu', input_shape=[4]))
19 | my_model.add(layers.Dense(units=1))
20 | 
21 | my_model.compile(
22 |     loss='mse',
23 |     optimizer='rmsprop')
24 | 
25 | my_cb = callbacks.EarlyStopping(
26 |     patience=20,
27 |     restore_best_weights=True)
28 | 
29 | my_history = my_model.fit(
30 |     x=X,
31 |     y=y,
32 |     validation_split=0.25,
33 |     batch_size=10,
34 |     epochs=500,
35 |     callbacks=[my_cb],
36 |     verbose=0)
37 | 
38 | tmp = pd.DataFrame(my_history.history)
39 | tmp.plot(xlabel='epoch')
40 | 
41 | import matplotlib.pyplot as plt
42 | plt.savefig('11-p-regression.pdf')
43 | 
44 | print(tmp.iloc[-1, ])
45 | 
46 | y_ = my_model.predict(X)
47 | print(((y_.ravel() - y)**2).mean())
48 | 


--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-id5.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="166pt" height="166pt" viewBox="0 0 166 166" version="1.1">
 3 | <defs>
 4 | <clipPath id="clip1">
 5 |   <path d="M 0 0.679688 L 166 0.679688 L 166 165.558594 L 0 165.558594 Z M 0 0.679688 "/>
 6 | </clipPath>
 7 | <image id="image5" width="28" height="28" xlink:href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAIAAAD9b0jDAAAABmJLR0QA/wD/AP+gvaeTAAABpElEQVRIie3Tr4sCQRQH8HVPQVEWi4vJsqb9B0aLYNrkGrcY/Ac2CIJNEEwWQfePUNMmtSiCGATTFtGmhhU2qYjB9/DCcHJw4M14cslvm8fMZx7zQxDeeef/QwixLMtxHERExFKpZBiGZVmEkCdFwzD2+z0AIOJoNHIcBwDosNPpcHN+vz+dTh+PRwAYj8fZbDYQCEQikX6/T9FyucyNFotF2tRgMJAkiRYLhQItbjabWCzGJ9brdUQEgFardRcFQVgulxTN5/N8YrVaRcTL5WLbdigUosVgMKjr+vl8RsRarcYnRqNRejO2bd+LyWRyPp/THrvdbjgc5kNlWaaLE4mELMuVSmU2mx0OB3oa1+s1l8vxibRT13Xp/cJXttvtbrcDANd1uUUaQojneYi4Wq0ajYaqqvF4fDKZAECz2XwS/ZlMJnO73RDRNM2XoZqm0dPgfpuPw46KjKKmaezbs6KKorwenU6noij6fD52minr9RoAUqnUrzM/2NHT6aTruqIoi8XC87w/tPctkiQNh0MA6PV63H//sdtutwFAVdWXoYz5BHKmARpwaBENAAAAAElFTkSuQmCC"/>
 8 | </defs>
 9 | <g id="surface1">
10 | <g clip-path="url(#clip1)" clip-rule="nonzero">
11 | <use xlink:href="#image5" transform="matrix(5.888571,0,0,5.888571,0.84,0.68)"/>
12 | </g>
13 | </g>
14 | </svg>
15 | 


--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-arima.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from pmdarima.datasets import airpassengers
 3 | my_data = airpassengers.load_airpassengers()
 4 | 
 5 | n = len(my_data)
 6 | k = 108
 7 | 
 8 | import pandas as pd
 9 | my_ds = pd.date_range(
10 |     start='1949/01/01',
11 |     end='1960/12/01',
12 |     freq='MS')
13 | my_df = pd.DataFrame({
14 |     'ds': my_ds,
15 |     'x': range(n),
16 |     'y': my_data},
17 |     index=my_ds)
18 | 
19 | my_train = my_df[        :k]
20 | my_test  = my_df[-(n - k): ]
21 | 
22 | import pmdarima as pm
23 | my_arima_model = pm.auto_arima(my_train.y, m=12, trace=True)
24 | 
25 | y_, my_ci = my_arima_model.predict(len(my_test),         # 期間はテストデータと同じ．
26 |                                    alpha=0.05,           # 有意水準（デフォルト）
27 |                                    return_conf_int=True) # 信頼区間を求める．
28 | tmp = pd.DataFrame({'y': y_,
29 |                     'Lo': my_ci[:, 0],
30 |                     'Hi': my_ci[:, 1]},
31 |                    index=my_test.index)
32 | 
33 | plt.plot(my_train.y, label='train')
34 | plt.plot(my_test.y,  label='test')
35 | plt.plot(tmp.y,      label='model')
36 | plt.fill_between(tmp.index,
37 |                  tmp.Lo,
38 |                  tmp.Hi,
39 |                  alpha=0.25)
40 | plt.legend(loc='upper left')
41 | 
42 | plt.savefig('12-p-airpassengers-arima.pdf')
43 | 


--------------------------------------------------------------------------------
/figures/fig/図1.4.md:
--------------------------------------------------------------------------------
 1 | ```puml
 2 | @startuml
 3 | scale 0.8
 4 | skinparam {
 5 |   defaultFontName Hiragino Kaku Gothic ProN
 6 |   monochrome true
 7 |   shadowing false
 8 | }
 9 | 
10 | cloud HOMELAN as "家庭内LAN\nネットワーク：192.168.1.0\nサブネットマスク：255.255.255.0" {
11 |   rectangle ホストPC as "ホストPC\nIPアドレス：192.168.1.2" {
12 |     cloud ホストPC内LAN as "ホストPC内LAN\nネットワーク：172.17.0.0\nサブネットマスク：255.255.0.0" {
13 |       rectangle コンテナ as "Dockerコンテナ\nIPアドレス：172.17.43.181" {
14 |         rectangle コンテナ8787 as "ポート8787"
15 |         rectangle コンテナ8888 as "ポート8888"
16 |       }
17 |     }
18 |     rectangle ホスト8787 as "ポート8787"
19 |     rectangle ホスト8888 as "ポート8888"
20 |   }
21 |   rectangle PC3 as "PC\nIPアドレス：192.168.1.3"
22 |   rectangle Gateway as "Gateway, DNS Server\nIPアドレス：192.168.1.1"
23 |   ホストPC--Gateway
24 |   ホストPC-PC3
25 |   PC3--Gateway
26 |   コンテナ8787--ホスト8787
27 |   コンテナ8888--ホスト8888
28 | }
29 | 
30 | usecase http8787 as "localhost:8787"
31 | usecase http8888 as "localhost:8888"
32 | 
33 | http8787-up-ホスト8787
34 | http8888-up-ホスト8888
35 | 
36 | ホストPCのユーザ-up-http8787
37 | ホストPCのユーザ-up-http8888
38 | 
39 | cloud 組織AのLAN {
40 |     rectangle PC as "PC\nIPアドレス：192.168.1.2"
41 | }
42 | 
43 | cloud 組織BのLAN {
44 |     rectangle PC2 as "PC\nIPアドレス：192.168.1.2"
45 | }
46 | 組織AのLAN-Gateway
47 | 組織BのLAN-Gateway
48 | 組織AのLAN--組織BのLAN
49 | 
50 | @enduml
51 | ```
52 | 


--------------------------------------------------------------------------------
/figures/fig-p/08-p-enet-tuning.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn.linear_model import ElasticNet
 4 | from sklearn.model_selection import GridSearchCV, LeaveOneOut
 5 | from sklearn.pipeline import Pipeline
 6 | from sklearn.preprocessing import StandardScaler
 7 | 
 8 | my_url = ('https://raw.githubusercontent.com/taroyabuki/' +
 9 |           'fromzero/master/data/wine.csv')
10 | my_data = pd.read_csv(my_url)
11 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2']
12 | 
13 | As = np.linspace(0, 0.1, 21)
14 | Bs = np.linspace(0, 0.1,  6)
15 | 
16 | my_pipeline = Pipeline([('sc', StandardScaler()),
17 |                         ('enet', ElasticNet())])
18 | my_search = GridSearchCV(
19 |     estimator=my_pipeline,
20 |     param_grid={'enet__alpha': As, 'enet__l1_ratio': Bs},
21 |     cv=LeaveOneOut(),
22 |     scoring='neg_mean_squared_error',
23 |     n_jobs=-1).fit(X, y)
24 | 
25 | tmp = my_search.cv_results_                # チューニング結果の詳細
26 | my_scores = (-tmp['mean_test_score'])**0.5 # MSEからRMSEへの変換
27 | 
28 | my_results = pd.DataFrame(tmp['params']).assign(RMSE=my_scores).pivot(
29 |     index='enet__alpha',
30 |     columns='enet__l1_ratio',
31 |     values='RMSE')
32 | 
33 | my_results.plot(style='o-', xlabel='A ( = alpha)', ylabel='RMSE').legend(
34 |     title='B ( = l1_ratio)')
35 | 
36 | import matplotlib.pyplot as plt
37 | plt.savefig('08-p-enet-tuning.pdf')
38 | 


--------------------------------------------------------------------------------
/figures/fig-p/11-p-classification.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import sklearn
 4 | import statsmodels.api as sm
 5 | from keras import callbacks, layers, models
 6 | from sklearn.preprocessing import StandardScaler, LabelEncoder
 7 | 
 8 | tmp = sm.datasets.get_rdataset('iris', 'datasets').data
 9 | my_data = sklearn.utils.shuffle(tmp)
10 | 
11 | my_scaler = StandardScaler()
12 | X = my_scaler.fit_transform(
13 |     my_data.drop(columns=['Species']))
14 | my_enc = LabelEncoder()
15 | y = my_enc.fit_transform(
16 |     my_data['Species'])
17 | 
18 | my_model = models.Sequential()
19 | my_model.add(layers.Dense(units=3, activation='relu', input_shape=[4]))
20 | my_model.add(layers.Dense(units=3, activation='softmax'))
21 | 
22 | my_model.compile(loss='sparse_categorical_crossentropy',
23 |                  optimizer='rmsprop',
24 |                  metrics=['accuracy'])
25 | 
26 | my_cb = callbacks.EarlyStopping(
27 |     patience=20,
28 |     restore_best_weights=True)
29 | 
30 | my_history = my_model.fit(
31 |     x=X,
32 |     y=y,
33 |     validation_split=0.25,
34 |     batch_size=10,
35 |     epochs=500,
36 |     callbacks=[my_cb],
37 |     verbose=0)
38 | 
39 | tmp = pd.DataFrame(my_history.history)
40 | tmp.plot(xlabel='epoch')
41 | 
42 | import matplotlib.pyplot as plt
43 | plt.savefig('11-p-classification.pdf')
44 | 
45 | print(tmp.iloc[-1, ])
46 | 
47 | tmp = my_model.predict(X)
48 | y_ = np.argmax(tmp, axis=-1)
49 | print((y_ == y).mean())
50 | 


--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-cnn.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "11-r-mnist-cnn.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | library(keras)
 4 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
 5 | 
 6 | my_index <- sample(1:60000, 6000)
 7 | x_train <- x_train[my_index, , ]
 8 | y_train <- y_train[my_index]
 9 | 
10 | x_train <- x_train / 255
11 | x_test  <- x_test  / 255
12 | 
13 | x_train2d <- x_train %>% array_reshape(c(-1, 28, 28, 1))
14 | x_test2d  <- x_test  %>% array_reshape(c(-1, 28, 28, 1))
15 | 
16 | my_model <- keras_model_sequential() %>%
17 |   layer_conv_2d(filters = 32, kernel_size = 3,  # 畳み込み層
18 |                 activation = "relu",
19 |                 input_shape = c(28, 28, 1)) %>%
20 |   layer_max_pooling_2d(pool_size = 2) %>%       # プーリング層
21 |   layer_flatten() %>%
22 |   layer_dense(units = 128, activation = "relu") %>%
23 |   layer_dense(units = 10, activation = "softmax")
24 | 
25 | my_model %>% compile(
26 |   loss = "sparse_categorical_crossentropy",
27 |   optimizer = "rmsprop",
28 |   metrics = c("accuracy"))
29 | 
30 | my_cb <- callback_early_stopping(patience = 5,
31 |                                  restore_best_weights = TRUE)
32 | 
33 | my_history <- my_model %>%
34 |   fit(x = x_train2d,
35 |       y = y_train,
36 |       validation_split = 0.2,
37 |       batch_size = 128,
38 |       epochs = 20,
39 |       callbacks = list(my_cb),
40 |       verbose = 0)
41 | 
42 | plot(my_history)
43 | 
44 | my_model %>% evaluate(x = x_test2d, y = y_test)
45 | 


--------------------------------------------------------------------------------
/code/R-notebook/r-06.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "id": "fe515360",
 5 |       "cell_type": "markdown",
 6 |       "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)\n\n\n",
 7 |       "metadata": {}
 8 |     },
 9 |     {
10 |       "id": "e5fca29e",
11 |       "cell_type": "markdown",
12 |       "source": "## 6.1 \u6a5f\u68b0\u5b66\u7fd2\u306e\u76ee\u7684\uff08\u672c\u66f8\u306e\u5834\u5408\uff09\n\n\n",
13 |       "metadata": {}
14 |     },
15 |     {
16 |       "id": "f7848f95",
17 |       "cell_type": "markdown",
18 |       "source": "## 6.2 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u30c7\u30fc\u30bf",
19 |       "metadata": {}
20 |     },
21 |     {
22 |       "cell_type": "code",
23 |       "execution_count": null,
24 |       "metadata": {},
25 |       "outputs": [],
26 |       "source": [
27 |         "iris\n",
28 |         "# \u4ee5\u4e0b\u7701\u7565"
29 |       ],
30 |       "id": "ce116acc-11c8-4cd4-bfdf-ab9b9a7c4142"
31 |     },
32 |     {
33 |       "id": "9da0985a",
34 |       "cell_type": "markdown",
35 |       "source": "## 6.3 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u624b\u6cd5",
36 |       "metadata": {}
37 |     }
38 |   ],
39 |   "nbformat": 4,
40 |   "nbformat_minor": 5,
41 |   "metadata": {
42 |     "kernelspec": {
43 |       "name": "ir",
44 |       "display_name": "R"
45 |     }
46 |   }
47 | }


--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-nnet.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import tensorflow as tf
 4 | from random import sample
 5 | from keras import callbacks, layers, models
 6 | from sklearn.metrics import confusion_matrix
 7 | 
 8 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 9 | 
10 | my_index = sample(range(60000), 6000)
11 | x_train = x_train[my_index, :, :]
12 | y_train = y_train[my_index]
13 | 
14 | x_train = x_train / 255
15 | x_test  = x_test  / 255
16 | 
17 | my_model = models.Sequential()
18 | my_model.add(layers.Flatten(input_shape=[28, 28]))
19 | my_model.add(layers.Dense(units=256, activation="relu"))
20 | my_model.add(layers.Dense(units=10, activation="softmax"))
21 | 
22 | my_model.compile(loss='sparse_categorical_crossentropy',
23 |                  optimizer='rmsprop',
24 |                  metrics=['accuracy'])
25 | 
26 | my_cb = callbacks.EarlyStopping(patience=5,
27 |                                 restore_best_weights=True)
28 | 
29 | my_history = my_model.fit(
30 |     x=x_train,
31 |     y=y_train,
32 |     validation_split=0.2,
33 |     batch_size=128,
34 |     epochs=20,
35 |     callbacks=[my_cb],
36 |     verbose=0)
37 | 
38 | tmp = pd.DataFrame(my_history.history)
39 | tmp.plot(xlabel='epoch', style='o-')
40 | 
41 | import matplotlib.pyplot as plt
42 | plt.savefig('11-p-mnist-nnet.pdf')
43 | 
44 | tmp = my_model.predict(x_test)
45 | y_ = np.argmax(tmp, axis=-1)
46 | print(confusion_matrix(y_true=y_test, y_pred=y_))
47 | 
48 | print((y_test == y_).mean())
49 | 
50 | print(my_model.evaluate(x=x_test, y=y_test))
51 | 


--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-cnn.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from random import sample
 3 | import tensorflow as tf
 4 | from keras import callbacks, layers, models
 5 | 
 6 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 7 | 
 8 | my_index = sample(range(60000), 6000)
 9 | x_train = x_train[my_index, :, :]
10 | y_train = y_train[my_index]
11 | 
12 | x_train = x_train / 255
13 | x_test  = x_test  / 255
14 | 
15 | x_train2d = x_train.reshape(-1, 28, 28, 1)
16 | x_test2d = x_test.reshape(-1, 28, 28, 1)
17 | 
18 | my_model = models.Sequential()
19 | my_model.add(layers.Conv2D(filters=32, kernel_size=3, # 畳み込み層
20 |                            activation='relu',
21 |                            input_shape=[28, 28, 1]))
22 | my_model.add(layers.MaxPooling2D(pool_size=2))        # プーリング層
23 | my_model.add(layers.Flatten())
24 | my_model.add(layers.Dense(128, activation='relu'))
25 | my_model.add(layers.Dense(10, activation='softmax'))
26 | 
27 | my_model.compile(loss='sparse_categorical_crossentropy',
28 |                  optimizer='rmsprop',
29 |                  metrics=['accuracy'])
30 | 
31 | my_cb = callbacks.EarlyStopping(patience=5,
32 |                                 restore_best_weights=True)
33 | 
34 | my_history = my_model.fit(
35 |     x=x_train2d,
36 |     y=y_train,
37 |     validation_split=0.2,
38 |     batch_size=128,
39 |     epochs=20,
40 |     callbacks=[my_cb],
41 |     verbose=0)
42 | 
43 | tmp = pd.DataFrame(my_history.history)
44 | tmp.plot(xlabel='epoch', style='o-')
45 | 
46 | import matplotlib.pyplot as plt
47 | plt.savefig('11-p-mnist-cnn.pdf')
48 | 
49 | print(my_model.evaluate(x=x_test2d, y=y_test))
50 | 


--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-lenet.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "11-r-mnist-lenet.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | library(keras)
 4 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
 5 | 
 6 | my_index <- sample(1:60000, 6000)
 7 | x_train <- x_train[my_index, , ]
 8 | y_train <- y_train[my_index]
 9 | 
10 | x_train <- x_train / 255
11 | x_test  <- x_test  / 255
12 | 
13 | x_train2d <- x_train %>% array_reshape(c(-1, 28, 28, 1))
14 | x_test2d  <- x_test  %>% array_reshape(c(-1, 28, 28, 1))
15 | 
16 | my_model <- keras_model_sequential() %>%
17 |   layer_conv_2d(filters = 20, kernel_size = 5, activation = "relu",
18 |                 input_shape = c(28, 28, 1)) %>%
19 |   layer_max_pooling_2d(pool_size = 2, strides = 2) %>%
20 |   layer_conv_2d(filters = 50, kernel_size = 5, activation = "relu") %>%
21 |   layer_max_pooling_2d(pool_size = 2, strides = 2) %>%
22 |   layer_dropout(rate = 0.25) %>%
23 |   layer_flatten() %>%
24 |   layer_dense(units = 500, activation = "relu") %>%
25 |   layer_dropout(rate = 0.5) %>%
26 |   layer_dense(units = 10, activation = "softmax")
27 | 
28 | my_model %>% compile(
29 |   loss = "sparse_categorical_crossentropy",
30 |   optimizer = "rmsprop",
31 |   metrics = c("accuracy"))
32 | 
33 | my_cb <- callback_early_stopping(patience = 5,
34 |                                  restore_best_weights = TRUE)
35 | 
36 | my_history <- my_model %>%
37 |   fit(x = x_train2d,
38 |       y = y_train,
39 |       validation_split = 0.2,
40 |       batch_size = 128,
41 |       epochs = 20,
42 |       callbacks = list(my_cb),
43 |       verbose = 0)
44 | 
45 | plot(my_history)
46 | 
47 | my_model %>% evaluate(x = x_test2d, y = y_test)
48 | 


--------------------------------------------------------------------------------
/docker/rstudio/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rocker/tidyverse
 2 | 
 3 | USER root
 4 | 
 5 | RUN rm /etc/dpkg/dpkg.cfg.d/excludes \
 6 |   && sed -i -e 's%http://[^ ]\+%mirror://mirrors.ubuntu.com/mirrors.txt%g' /etc/apt/sources.list \
 7 |   && apt-get update \
 8 |   && apt-get install -y --no-install-recommends \
 9 |     curl \
10 |     default-jdk \
11 |     dnsutils \
12 |     iputils-ping \
13 |     less \
14 |     libglpk-dev \
15 |     libnode64 \
16 |     libtbb2 \
17 |     net-tools \
18 |     vim-tiny \
19 |   && apt-get --reinstall install -y man-db coreutils manpages \
20 |   && apt-get clean \
21 |   && rm -rf /var/lib/apt/lists/* \
22 |   && yes | unminimize
23 | 
24 | USER rstudio
25 | 
26 | RUN Rscript -e ' \
27 |   options(Ncpus = 32); \
28 |   options(repos = "https://cran.ism.ac.jp"); \
29 |   Sys.setenv(DOWNLOAD_STATIC_LIBV8=1); \
30 |   install.packages(c( \
31 |     "caret", \
32 |     "doParallel", \
33 |     "epitools", \
34 |     "exactci", \
35 |     "fable", \
36 |     "factoextra", \
37 |     "feasts", \
38 |     "furrr", \
39 |     "ggfortify", \
40 |     "ggmosaic", \
41 |     "glmnetUtils", \
42 |     "gplots", \
43 |     "h2o", \
44 |     "igraph", \
45 |     "keras", \
46 |     "leaps", \
47 |     "lintr", \
48 |     "neuralnet", \
49 |     "pastecs", \
50 |     "prophet", \
51 |     "PRROC", \
52 |     "psych", \
53 |     "proxy", \
54 |     "randomForest", \
55 |     "reticulate", \
56 |     "rpart.plot", \
57 |     "tsibble", \
58 |     "urca", \
59 |     "vcd" \
60 |   )); \
61 |   remotes::install_version("xgboost", version = "1.4.1.1"); \
62 |   remotes::install_github(c("vqv/ggbiplot")); \
63 |   reticulate::install_miniconda(); \
64 |   keras::install_keras();'
65 | 
66 | WORKDIR /home/rstudio
67 | 
68 | USER root
69 | 


--------------------------------------------------------------------------------
/figures/fig-r/08-r-enet-tuning2.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "08-r-enet-tuning2.pdf", width = 6, height = 4.5)
 2 | 
 3 | library(furrr)
 4 | plan(multisession)
 5 | 
 6 | library(tidyverse)
 7 | my_url <- str_c("https://raw.githubusercontent.com",
 8 |                 "/taroyabuki/fromzero/master/data/wine.csv")
 9 | my_data <- read_csv(my_url)
10 | 
11 | my_sd <- function(x) { # √標本分散を計算する関数
12 |   n <- length(x)
13 |   sd(x) * sqrt((n - 1) / n)
14 | }
15 | 
16 | my_loocv <- function(A, B) {
17 |   my_predict <- function(id) {
18 |     my_train <- my_data[-id, ]
19 |     my_valid <- my_data[ id, ]
20 |     y <- my_train$LPRICE2
21 |     u <- mean(y)
22 |     s <- my_sd(y)
23 |     my_train2 <- my_train %>% mutate(LPRICE2 = (y - u) / s)
24 |     my_model <-
25 |       glmnetUtils::glmnet(
26 |         form = LPRICE2 ~ ., data = my_train2,
27 |         lambda = A,  alpha = B, standardize = TRUE)
28 |     (my_model %>% predict(my_valid, exact = TRUE) * s + u)[1]
29 |   }
30 |   y  <- my_data$LPRICE2
31 |   y_ <- seq_len(length(y)) %>% map_dbl(my_predict)
32 |   rmse <- mean((y_ - y)^2)^0.5
33 |   list(A = A, B = B, RMSE = rmse)
34 | }
35 | 
36 | As <- seq(0, 0.1, length.out = 21)
37 | Bs <- seq(0, 0.1, length.out = 6)
38 | my_params <- expand.grid(A = As, B = Bs)
39 | 
40 | tmp <- my_params %>% future_pmap_dfr(my_loocv)
41 | 
42 | my_result <- tmp %>%
43 |   mutate(B = as.factor(B)) %>%
44 |   group_by(A, B) %>%
45 |   summarise(RMSE = mean(RMSE), .groups = "drop")
46 | 
47 | my_result %>% filter(RMSE == min(RMSE))
48 | 
49 | my_result %>% ggplot(aes(x = A, y = RMSE, color = B)) +
50 |   geom_point() +
51 |   geom_line() +
52 |   theme(legend.position = c(0, 0),
53 |         legend.justification = c(0, 0)) +
54 |   xlab("A ( = lambda)") +
55 |   guides(color = guide_legend("B ( = alpha)"))
56 | 


--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-lenet.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from random import sample
 3 | import tensorflow as tf
 4 | from keras import callbacks, layers, models
 5 | 
 6 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 7 | 
 8 | my_index = sample(range(60000), 6000)
 9 | x_train = x_train[my_index, :, :]
10 | y_train = y_train[my_index]
11 | 
12 | x_train = x_train / 255
13 | x_test  = x_test  / 255
14 | 
15 | x_train2d = x_train.reshape(-1, 28, 28, 1)
16 | x_test2d = x_test.reshape(-1, 28, 28, 1)
17 | 
18 | my_model = models.Sequential()
19 | my_model.add(layers.Conv2D(filters=20, kernel_size=5, activation='relu',
20 |                            input_shape=(28, 28, 1)))
21 | my_model.add(layers.MaxPooling2D(pool_size=2, strides=2))
22 | my_model.add(layers.Conv2D(filters=20, kernel_size=5, activation='relu'))
23 | my_model.add(layers.MaxPooling2D(pool_size=2, strides=2))
24 | my_model.add(layers.Dropout(rate=0.25))
25 | my_model.add(layers.Flatten())
26 | my_model.add(layers.Dense(500, activation='relu'))
27 | my_model.add(layers.Dropout(rate=0.5))
28 | my_model.add(layers.Dense(10, activation='softmax'))
29 | 
30 | my_model.compile(loss='sparse_categorical_crossentropy',
31 |                  optimizer='rmsprop',
32 |                  metrics=['accuracy'])
33 | 
34 | my_cb = callbacks.EarlyStopping(patience=5,
35 |                                 restore_best_weights=True)
36 | 
37 | my_history = my_model.fit(
38 |     x=x_train2d,
39 |     y=y_train,
40 |     validation_split=0.2,
41 |     batch_size=128,
42 |     epochs=20,
43 |     callbacks=[my_cb],
44 |     verbose=0)
45 | 
46 | tmp = pd.DataFrame(my_history.history)
47 | tmp.plot(xlabel='epoch', style='o-')
48 | 
49 | import matplotlib.pyplot as plt
50 | plt.savefig('11-p-mnist-lenet.pdf')
51 | 
52 | print(my_model.evaluate(x=x_test2d, y=y_test))
53 | 


--------------------------------------------------------------------------------
/figures/fig-p/08-p-enet-tuning2.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import numpy as np
 3 | import pandas as pd
 4 | from pandarallel import pandarallel
 5 | from scipy.stats import zscore
 6 | from sklearn.linear_model import ElasticNet
 7 | from sklearn.metrics import mean_squared_error
 8 | from sklearn.pipeline import Pipeline
 9 | from sklearn.preprocessing import StandardScaler
10 | 
11 | my_url = ('https://raw.githubusercontent.com'
12 |           '/taroyabuki/fromzero/master/data/wine.csv')
13 | my_data = pd.read_csv(my_url)
14 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2']
15 | 
16 | def my_loocv(A, B):
17 |     def my_predict(id):
18 |         my_train = my_data.drop([id])
19 |         my_valid = my_data.take([id])
20 |         X, y = my_train.drop(columns=['LPRICE2']), my_train.LPRICE2
21 |         u = y.mean()
22 |         s = y.std(ddof=0)
23 |         my_model = Pipeline([
24 |             ('sc', StandardScaler()),
25 |             ('enet', ElasticNet(alpha=A, l1_ratio=B))]).fit(X, zscore(y))
26 |         X = my_valid.drop(columns=['LPRICE2'])
27 |         return (my_model.predict(X) * s + u)[0]
28 | 
29 |     y_ = [my_predict(id) for id in range(len(my_data))]
30 |     rmse = mean_squared_error(y_, y)**0.5
31 |     return pd.Series([A, B, rmse], index=['A', 'B', 'RMSE'])
32 | 
33 | As = np.linspace(0, 0.1, 21)
34 | Bs = np.linspace(0, 0.1,  6)
35 | my_plan = pd.DataFrame(itertools.product(As, Bs), columns=['A', 'B'])
36 | 
37 | pandarallel.initialize()
38 | my_results = my_plan.parallel_apply(lambda row: my_loocv(*row), axis=1)
39 | 
40 | print(my_results[my_results.RMSE == my_results.RMSE.min()])
41 | 
42 | my_results.pivot(index='A', columns='B', values='RMSE').plot(
43 |     style='o-', xlabel='A ( = alpha)', ylabel='RMSE').legend(
44 |     title='B ( = l1_ratio)')
45 | 
46 | import matplotlib.pyplot as plt
47 | plt.savefig('08-p-enet-tuning2.pdf')
48 | 


--------------------------------------------------------------------------------
/docker/jupyter/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM jupyter/datascience-notebook:python-3.8.8
 2 | 
 3 | USER root
 4 | 
 5 | RUN rm /etc/dpkg/dpkg.cfg.d/excludes \
 6 |   && sed -i -e 's%http://[^ ]\+%mirror://mirrors.ubuntu.com/mirrors.txt%g' /etc/apt/sources.list \
 7 |   && apt-get update \
 8 |   && apt-get install -y --no-install-recommends \
 9 |     default-jdk \
10 |     dnsutils \
11 |     graphviz \
12 |     iputils-ping \
13 |     less \
14 |     net-tools \
15 |   && apt-get --reinstall install -y man-db coreutils manpages \
16 |   && apt-get clean \
17 |   && rm -rf /var/lib/apt/lists/* \
18 |   && echo y | unminimize
19 | 
20 | USER jovyan
21 | 
22 | # pystan==2.* for fbprophet
23 | RUN pip install \
24 |   graphviz \
25 |   h2o \
26 |   japanize_matplotlib \
27 |   keras \
28 |   lxml \
29 |   pandarallel \
30 |   pca \
31 |   pmdarima \
32 |   pycodestyle \
33 |   pystan==2.* \
34 |   tensorflow \
35 |   xgboost
36 | 
37 | RUN pip install fbprophet
38 | 
39 | RUN mkdir -p /home/jovyan/.ipython/profile_default && echo "c.InteractiveShell.ast_node_interactivity = 'all'" > /home/jovyan/.ipython/profile_default/ipython_config.py
40 | 
41 | RUN Rscript -e ' \
42 |   options(Ncpus = 32); \
43 |   options(repos = "https://cran.ism.ac.jp"); \
44 |   Sys.setenv(DOWNLOAD_STATIC_LIBV8=1); \
45 |   install.packages(c( \
46 |     "doParallel", \
47 |     "e1071", \
48 |     "epitools", \
49 |     "exactci", \
50 |     "fable", \
51 |     "factoextra", \
52 |     "feasts", \
53 |     "furrr", \
54 |     "ggfortify", \
55 |     "ggmosaic", \
56 |     "gplots", \
57 |     "glmnetUtils", \
58 |     "h2o", \
59 |     "igraph", \
60 |     "keras", \
61 |     "leaps", \
62 |     "lintr", \
63 |     "neuralnet", \
64 |     "pastecs", \
65 |     "prophet", \
66 |     "proxy", \
67 |     "PRROC", \
68 |     "psych", \
69 |     "rpart.plot", \
70 |     "tsibble", \
71 |     "vcd" \
72 |   )); \
73 |   remotes::install_version("xgboost", version = "1.4.1.1"); \
74 |   remotes::install_github(c("vqv/ggbiplot"));'
75 | 
76 | WORKDIR /home/jovyan
77 | 
78 | USER root
79 | 


--------------------------------------------------------------------------------
/code/R-notebook/README.md:
--------------------------------------------------------------------------------
 1 | # Jupyter Notebooks for R
 2 | 
 3 | chapter|Open in Colab
 4 | --|--
 5 | 03|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-03.ipynb)
 6 | 04|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-04.ipynb)
 7 | 05|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-05.ipynb)
 8 | 06|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-06.ipynb)
 9 | 07|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-07.ipynb)
10 | 08|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-08.ipynb)
11 | 09|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-09.ipynb)
12 | 10|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-10.ipynb)
13 | 11|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-11.ipynb)
14 | 12|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-12.ipynb)
15 | 13|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-13.ipynb)
16 | 


--------------------------------------------------------------------------------
/code/Python-notebook/README.md:
--------------------------------------------------------------------------------
 1 | # Jupyter Notebooks for Python
 2 | 
 3 | chapter|Open in Colab
 4 | --|--
 5 | 03|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-03.ipynb)
 6 | 04|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-04.ipynb)
 7 | 05|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-05.ipynb)
 8 | 06|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-06.ipynb)
 9 | 07|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-07.ipynb)
10 | 08|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-08.ipynb)
11 | 09|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-09.ipynb)
12 | 10|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-10.ipynb)
13 | 11|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-11.ipynb)
14 | 12|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-12.ipynb)
15 | 13|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-13.ipynb)
16 | 


--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-lenet-miss.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import pandas as pd
 4 | import tensorflow as tf
 5 | from random import sample
 6 | from keras import callbacks, layers, models
 7 | 
 8 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 9 | 
10 | #my_index = sample(range(60000), 6000)
11 | #x_train = x_train[my_index, :, :]
12 | #y_train = y_train[my_index]
13 | 
14 | x_train = x_train / 255
15 | x_test  = x_test  / 255
16 | 
17 | x_train2d = x_train.reshape(-1, 28, 28, 1)
18 | x_test2d = x_test.reshape(-1, 28, 28, 1)
19 | 
20 | my_model = models.Sequential()
21 | my_model.add(layers.Conv2D(filters=20, kernel_size=5, activation='relu',
22 |                            input_shape=(28, 28, 1)))
23 | my_model.add(layers.MaxPooling2D(pool_size=2, strides=2))
24 | my_model.add(layers.Conv2D(filters=20, kernel_size=5, activation='relu'))
25 | my_model.add(layers.MaxPooling2D(pool_size=2, strides=2))
26 | my_model.add(layers.Dropout(rate=0.25))
27 | my_model.add(layers.Flatten())
28 | my_model.add(layers.Dense(500, activation='relu'))
29 | my_model.add(layers.Dropout(rate=0.5))
30 | my_model.add(layers.Dense(10, activation='softmax'))
31 | 
32 | my_model.compile(loss='sparse_categorical_crossentropy',
33 |                  optimizer='rmsprop',
34 |                  metrics=['accuracy'])
35 | 
36 | my_cb = callbacks.EarlyStopping(patience=5,
37 |                                 restore_best_weights=True)
38 | 
39 | my_history = my_model.fit(
40 |     x=x_train2d,
41 |     y=y_train,
42 |     validation_split=0.2,
43 |     batch_size=128,
44 |     epochs=20,
45 |     callbacks=[my_cb],
46 |     verbose=0)
47 | 
48 | y_prob = my_model.predict(x_test2d)                    # カテゴリに属する確率
49 | 
50 | tmp = pd.DataFrame({
51 |     'y_prob': np.max(y_prob, axis=1),                  # 確率の最大値
52 |     'y_': np.argmax(y_prob, axis=1),                   # 予測カテゴリ
53 |     'y': y_test,                                       # 正解
54 |     'id': range(len(y_test))})                         # 番号
55 | 
56 | tmp = tmp[tmp.y_ != tmp.y]                             # 予測がはずれたものを残す
57 | my_result = tmp.sort_values('y_prob', ascending=False) # 確率の大きい順に並び替える
58 | print(my_result.head())
59 | 
60 | for i in range(5):
61 |     plt.subplot(1, 5, i + 1)
62 |     ans = my_result['y'].iloc[i]
63 |     id = my_result['id'].iloc[i]
64 |     plt.title(f'{ans} ({id})')
65 |     plt.imshow(x_test[id])
66 |     plt.axis('off')
67 | 
68 | plt.savefig('11-p-mnist-lenet-miss.pdf')
69 | 


--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-lenet-miss.R:
--------------------------------------------------------------------------------
 1 | pdf(file = "11-r-mnist-lenet-miss.pdf", width = 5.83, height = 4.13)
 2 | 
 3 | library(keras)
 4 | library(tidyverse)
 5 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
 6 | 
 7 | #my_index <- sample(1:60000, 6000)
 8 | #x_train <- x_train[my_index, , ]
 9 | #y_train <- y_train[my_index]
10 | 
11 | x_train <- x_train / 255
12 | x_test  <- x_test  / 255
13 | 
14 | x_train2d <- x_train %>% array_reshape(c(-1, 28, 28, 1))
15 | x_test2d  <- x_test  %>% array_reshape(c(-1, 28, 28, 1))
16 | 
17 | my_model <- keras_model_sequential() %>%
18 |   layer_conv_2d(filters = 20, kernel_size = 5, activation = "relu",
19 |                 input_shape = c(28, 28, 1)) %>%
20 |   layer_max_pooling_2d(pool_size = 2, strides = 2) %>%
21 |   layer_conv_2d(filters = 50, kernel_size = 5, activation = "relu") %>%
22 |   layer_max_pooling_2d(pool_size = 2, strides = 2) %>%
23 |   layer_dropout(rate = 0.25) %>%
24 |   layer_flatten() %>%
25 |   layer_dense(units = 500, activation = "relu") %>%
26 |   layer_dropout(rate = 0.5) %>%
27 |   layer_dense(units = 10, activation = "softmax")
28 | 
29 | my_model %>% compile(
30 |   loss = "sparse_categorical_crossentropy",
31 |   optimizer = "rmsprop",
32 |   metrics = c("accuracy"))
33 | 
34 | my_cb <- callback_early_stopping(patience = 5,
35 |                                  restore_best_weights = TRUE)
36 | 
37 | my_history <- my_model %>%
38 |   fit(x = x_train2d,
39 |       y = y_train,
40 |       validation_split = 0.2,
41 |       batch_size = 128,
42 |       epochs = 20,
43 |       callbacks = list(my_cb),
44 |       verbose = 0)
45 | 
46 | y_prob <- my_model %>% predict(x_test2d) # カテゴリに属する確率
47 | 
48 | my_result <- data.frame(
49 |   y_prob = apply(y_prob, 1, max),           # 確率の最大値
50 |   y_     = apply(y_prob, 1, which.max) - 1, # 予測カテゴリ
51 |   y      = y_test,                          # 正解
52 |   id     = seq_len(length(y_test))) %>%     # 番号
53 |   filter(y_ != y) %>%                       # 予測がはずれたものを残す
54 |   arrange(desc(y_prob))                     # 確率の大きい順に並び替える
55 | head(my_result)
56 | 
57 | tmp <- my_result[1:5, ]$id
58 | my_labels <- sprintf("%s (%s)",
59 |   my_result[1:5, ]$y, tmp)
60 | my_fig <- expand.grid(
61 |   label = my_labels,
62 |   y = 28:1,
63 |   x = 1:28)
64 | my_fig$z <- as.vector(
65 |   x_test[tmp, , ])
66 | 
67 | my_fig %>% ggplot(
68 |   aes(x = x, y = y, fill = z)) +
69 |   geom_raster() +
70 |   coord_fixed() +
71 |   theme_void() +
72 |   theme(legend.position = "none") +
73 |   facet_grid(. ~ label)
74 | 


--------------------------------------------------------------------------------
/code/Python-notebook/python-06.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "id": "c5f5660f",
 5 |       "cell_type": "markdown",
 6 |       "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)\n\n\n",
 7 |       "metadata": {}
 8 |     },
 9 |     {
10 |       "id": "4e6dc4c2",
11 |       "cell_type": "markdown",
12 |       "source": "## 6.1 \u6a5f\u68b0\u5b66\u7fd2\u306e\u76ee\u7684\uff08\u672c\u66f8\u306e\u5834\u5408\uff09\n\n\n",
13 |       "metadata": {}
14 |     },
15 |     {
16 |       "id": "11686fa0",
17 |       "cell_type": "markdown",
18 |       "source": "## 6.2 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u30c7\u30fc\u30bf",
19 |       "metadata": {}
20 |     },
21 |     {
22 |       "cell_type": "code",
23 |       "execution_count": null,
24 |       "metadata": {},
25 |       "outputs": [],
26 |       "source": [
27 |         "import statsmodels.api as sm\n",
28 |         "iris = sm.datasets.get_rdataset('iris', 'datasets').data\n",
29 |         "iris.head()\n",
30 |         "# \u4ee5\u4e0b\u7701\u7565"
31 |       ],
32 |       "id": "8fc0d772-605e-46ee-b679-2603d838c891"
33 |     },
34 |     {
35 |       "cell_type": "code",
36 |       "execution_count": null,
37 |       "metadata": {},
38 |       "outputs": [],
39 |       "source": [
40 |         "import seaborn as sns\n",
41 |         "iris = sns.load_dataset('iris')\n",
42 |         "iris.head()\n",
43 |         "# \u4ee5\u4e0b\u7701\u7565"
44 |       ],
45 |       "id": "c506c249-f4ca-4057-af97-58037c02a6ae"
46 |     },
47 |     {
48 |       "cell_type": "code",
49 |       "execution_count": null,
50 |       "metadata": {},
51 |       "outputs": [],
52 |       "source": [
53 |         "import pandas as pd\n",
54 |         "from sklearn.datasets import load_iris\n",
55 |         "tmp = load_iris()\n",
56 |         "iris = pd.DataFrame(tmp.data, columns=tmp.feature_names)\n",
57 |         "iris['target'] = tmp.target_names[tmp.target]\n",
58 |         "iris.head()\n",
59 |         "# \u4ee5\u4e0b\u7701\u7565"
60 |       ],
61 |       "id": "94e44eb0-09ae-4573-8eb8-7d76662ca5ea"
62 |     },
63 |     {
64 |       "id": "9edbd001",
65 |       "cell_type": "markdown",
66 |       "source": "## 6.3 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u624b\u6cd5",
67 |       "metadata": {}
68 |     }
69 |   ],
70 |   "nbformat": 4,
71 |   "nbformat_minor": 5,
72 |   "metadata": {
73 |     "kernelspec": {
74 |       "name": "python3",
75 |       "display_name": "Python 3"
76 |     }
77 |   }
78 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [講談社サイエンティフィク](https://www.kspub.co.jp/) ／ [実践Data Scienceシリーズ](https://www.kspub.co.jp/book/series/S069.html) ／ [ゼロからはじめるデータサイエンス入門](https://www.kspub.co.jp/book/detail/5132326.html)
 2 | 
 3 | # ゼロからはじめるデータサイエンス入門（講談社, 2021）サポートサイト
 4 | 
 5 | - [書店へのリンク集（版元ドットコム）](https://www.hanmoto.com/bd/isbn/9784065132326)
 6 | - [国会図書館](https://ndlsearch.ndl.go.jp/books/R100000002-I031834151)
 7 | 
 8 | <img src="https://www.kspub.co.jp/book/detail/images/8e2cee80a3e43a0cbbecef67a945b93613c656b0.jpg" alt="書影" style="width:300px;"/>
 9 | 
10 | 著者：**辻真吾**（[@tsjshg](https://twitter.com/tsjshg)）・**矢吹太朗**（[@yabuki](https://twitter.com/yabuki)）
11 | 
12 | RやPythonのコード（具体的なコンピュータプログラム）の読み書きを通じてデータサイエンスについて学ぶための一冊です．
13 | コードなしで学びたいという人には，別の書籍にあたることをお勧めします．
14 | 
15 | 本書には，次の三つの特徴があります．
16 | 
17 | 1. 第1部「データサイエンスの準備」で，準備に時間をかけています．
18 | 1. ほぼ全ての例をコードに基づいて説明しています．本書掲載のコードはサポートサイト（[ここ](#コード)）でも公開しています（使用方法は2.6節を参照）．
19 | 1. 第2部「機械学習」では，ほぼ全ての課題をRとPythonで解決し，同じ結果を得ることを試みています．
20 | 
21 | ## [更新情報・正誤表](update.md)
22 | 
23 | ## 目次
24 | 
25 | - はじめに
26 | - 第1部
27 |     - 第1章 コンピュータとネットワーク
28 |     - 第2章 データサイエンスのための環境
29 |     - 第3章 RとPython
30 |     - 第4章 統計入門
31 |     - 第5章 前処理
32 | - 第2部
33 |     - 第6章 機械学習の目的・データ・手法
34 |     - 第7章 回帰1（単回帰）
35 |     - 第8章 回帰2（重回帰）
36 |     - 第9章 分類1（多値分類）
37 |     - 第10章 分類2（2値分類）
38 |     - 第11章 深層学習とAutoML
39 |     - 第12章 時系列予測
40 |     - 第13章 教師なし学習
41 | - 付録A 環境構築
42 | - おわりに
43 | - 参考文献
44 | - 索引
45 | 
46 | ## コード
47 | 
48 | 言語|システム|コード|実行結果
49 | --|--|--|--
50 | R|Google Colab|[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/r.ipynb)|[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/r-results.ipynb)
51 | R|Jupyter|[r.ipynb](code/r.ipynb)|[r-results.ipynb](code/r-results.ipynb)
52 | R|RStudio|[r.Rmd](code/r.Rmd)|[r.html](https://taroyabuki.github.io/fromzero/r.html)
53 | Python|Google Colab|[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/python.ipynb)|[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/python-results.ipynb)
54 | Python|Jupyter|[python.ipynb](code/python.ipynb)|[python-results.ipynb](code/python-results.ipynb)
55 | Python|RStudio|[python.Rmd](code/python.Rmd)|[python.html](https://taroyabuki.github.io/fromzero/python.html)
56 | 
57 | コードの使い方は，2.6節を参照してください[^1][^2]．
58 | 
59 | [^1]: [Amazon SageMaker Studio Lab](https://github.com/taroyabuki/fromzero/tree/main/addendum/sagemaker)での動作も確認済みです．
60 | 
61 | [^2]: Apple Chipでは，JupyterとRStudio上では第11章のコードが動作しません．第11章のコードを試す場合は，Google Colabを利用してください．
62 | 
63 | ## Docker
64 | 
65 | 環境|言語|説明
66 | --|--|--
67 | Jupyter Notebook|R, Python|[Jupyter Notebook](docker/jupyter)
68 | RStudio|R|[RStudio](docker/rstudio)
69 | 
70 | Dockerの使い方は，2.3節を参照してください．
71 | 
72 | ## [画像とそのソースコード](figures)
73 | 
74 | ## ライセンス
75 | 
76 | The contents of https://github.com/taroyabuki/fromzero by Shingo Tsuji and Taro Yabuki is licensed under the [Apache License, Version 2.0](LICENSE).
77 | 


--------------------------------------------------------------------------------
/addendum/sagemaker/README.md:
--------------------------------------------------------------------------------
  1 | # Amazon SageMaker Studio Lab
  2 | 
  3 | 無料の[Amazon SageMaker Studio Lab](https://studiolab.sagemaker.aws/)（以下，Studio Lab）で本書のコードを動かすための環境を作ります．Studio Labの概要は，[Amazon SageMaker Studio Lab入門](https://atmarkit.itmedia.co.jp/ait/subtop/features/di/sagemakerstudiolab_index.html)を参照してください．
  4 | 
  5 | TerminalでGitHubリポジトリをクローンします．
  6 | 
  7 | ```bash
  8 | git clone https://github.com/taroyabuki/fromzero.git
  9 | ```
 10 | 
 11 | ## 仮想環境の構築
 12 | 
 13 | ```bash
 14 | # Rの場合
 15 | conda env create --file fromzero/addendum/sagemaker/sage-r.yml
 16 | 
 17 | # Pythonの場合
 18 | conda env create --file fromzero/addendum/sagemaker/sage-python.yml
 19 | ```
 20 | 
 21 | ## Jupyter Notebookの利用
 22 | 
 23 | 画面左のファイルブラウザーがあります．そこから，次のノートブックを開いてください．
 24 | 
 25 | 言語|カーネル|全体のノートブック|各章のノートブック
 26 | --|--|--|--
 27 | R|sage-r:R|fromzero/code/r.ipynb|fromzero/code/R-notebook
 28 | Python|sage-python:Python|fromzero/code/python.ipynb|fromzero/code/Python-notebook
 29 | 
 30 | ノートブックのファイル（.ipynb）をダブルクリックするとカーネル選択のダイアログが出るので，Rの場合はsage-r:R，Pythonの場合はsage-python:Pythonを選択してください．
 31 | 
 32 | 補足：Jupyter Notebook（Python）の出力を本書と同じにするためには，最初に次のコードを実行してください．54頁の脚註24のようにしてもかまいません．
 33 | 
 34 | ```python
 35 | from IPython.core.interactiveshell import InteractiveShell
 36 | InteractiveShell.ast_node_interactivity = "all"
 37 | ```
 38 | 
 39 | ## 仮想環境の削除
 40 | 
 41 | ```bash
 42 | # Rの場合
 43 | conda remove -n sage-r --all -y
 44 | 
 45 | # Pythonの場合
 46 | conda remove -n sage-python --all -y
 47 | ```
 48 | 
 49 | すべてを削除してやり直す方法は，[Amazon SageMaker Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/studio-lab-use-manage.html#:~:text=Start%20runtime.-,Reset%20environment,-To%20remove%20all)に掲載されています．
 50 | 
 51 | ## 補足
 52 | 
 53 | 環境構築に使った.ymlは次のように作成しました（このコードを実行する必要はありません）．
 54 | 
 55 | ```bash
 56 | # Rの場合
 57 | conda create -y -n sage-r python=3.8.8
 58 | conda activate sage-r
 59 | 
 60 | conda install -y -c conda-forge \
 61 |   r-caret \
 62 |   r-doparallel \
 63 |   r-exactci \
 64 |   r-fable \
 65 |   r-factoextra \
 66 |   r-feasts \
 67 |   r-furrr \
 68 |   r-ggfortify \
 69 |   r-ggmosaic \
 70 |   r-glmnetutils \
 71 |   r-h2o==3.34.0.3 \
 72 |   r-igraph \
 73 |   r-irkernel \
 74 |   r-keras \
 75 |   r-neuralnet \
 76 |   r-pastecs \
 77 |   r-prophet \
 78 |   r-prroc \
 79 |   r-psych \
 80 |   r-randomforest \
 81 |   r-remotes \
 82 |   r-rpart.plot \
 83 |   r-tidyverse \
 84 |   r-urca \
 85 |   r-vcd \
 86 |   r-xgboost==1.4.1
 87 | 
 88 | conda install -y -c bioconda r-ggbiplot
 89 | 
 90 | Rscript -e 'keras::install_keras()'
 91 | 
 92 | conda env export -n sage-r > sage-r.yml
 93 | ```
 94 | 
 95 | ```bash
 96 | # Pythonの場合
 97 | conda create -y -n sage-python python=3.8.8
 98 | conda activate sage-python
 99 | 
100 | conda install -y \
101 |   fbprophet \
102 |   ipykernel \
103 |   keras \
104 |   lxml \
105 |   matplotlib \
106 |   pandarallel \
107 |   pmdarima \
108 |   python-graphviz \
109 |   seaborn \
110 |   scikit-learn \
111 |   scipy==1.6.3 \
112 |   statsmodels \
113 |   tensorflow-gpu \
114 |   xgboost==1.5.1
115 | 
116 | conda install -y -c anaconda h2o h2o-py
117 | 
118 | pip install pca
119 | 
120 | conda env export -n sage-python > sage-python.yml
121 | ```
122 | 


--------------------------------------------------------------------------------
/update.md:
--------------------------------------------------------------------------------
 1 | # 更新情報・正誤表
 2 | 
 3 | 公開しているコードでは，以下の内容を反映しています．
 4 | 
 5 | ## 更新情報
 6 | 
 7 | 場所|説明
 8 | --|--
 9 | p. 6|Windows 11には，脚註4で紹介しているWindows Terminalが搭載されています．
10 | p. 20|[Amazon SageMaker Studio Lab](addendum/sagemaker)での動作も確認済みです．表2.1のクラウド・ノートブックに相当します．
11 | p. 22 脚註3|Google Colabでノートブックを新規作成した後で，ランタイム→ランタイムのタイプを変更で，Rを選択できるようになりました．
12 | p. 77|**（バージョン依存）** 3.4.2.1のPythonのコードの`my_df2 = my_df.append(tmp)`を`my_df2 = pd.concat([my_df, tmp])`としなければならないことがあります．
13 | p. 112 脚註6|**（バージョン依存）** 対象を数値の列に限定するオプション`numeric_only=True`が必要な場合があります．
14 | p. 113|**（バージョン依存）** Pythonのコードを，`my_df.var(numeric_only=True)`や`my_df.apply('var', numeric_only=True)`としなければならないことがあります．
15 | p. 115|**（バージョン依存）** Pythonのコードを，`my_df.groupby('gender').mean(numeric_only=True)`あるいは`my_df.groupby('gender').agg('mean', numeric_only=True)`あるいは`my_df.drop(['name'], axis=1).groupby('gender').agg(np.mean)`としなければならないことがあります．
16 | p. 151, 152|GitHub上でのCSVファイルの表示方法が変更されたので，https://github.com/taroyabuki/fromzero/blob/master/data/exam.csv の代わりにhttps://taroyabuki.github.io/fromzero/exam.html を使ってください．
17 | p. 160, 161|**（バージョン依存）** Pythonのコードの`get_feature_names()`を`get_feature_names_out()`としなければならないことがあります．
18 | p. 184|[予測値の期待値の信頼区間](addendum/07.03.02/)
19 | p. 194|[「7.4.3 当てはまりの良さの指標の問題点」についての補足](addendum/07.04.03.ipynb)
20 | p. 271, 275|XGBoostで`ValueError: Invalid classes inferred from unique values of y. Expected: [0 1 2], got ['setosa' 'versicolor' 'virginica']`というエラーが出る場合は，`LabelEncoder`を使ってラベルを数値に変換してください．
21 | p. 271, 275|9.4.2, 9.5.3項のPythonのコードで警告がたくさん出る場合は，`warnings.simplefilter`の引数の「`, UserWarning`」を削除してみてください．
22 | p. 277|9.6.2項のPythonのコードで警告がたくさん出る場合は，`MLPClassifier()`を`MLPClassifier(max_iter=1000)`に変更してみてください．
23 | p. 292, 298|**（バージョン依存）** Pythonのコードの`get_feature_names()`を`get_feature_names_out()`としなければならないことがあります．
24 | p. 297|**（バージョン依存）** Pythonのコードの`LogisticRegression(penalty='none')`を`LogisticRegression(penalty=None)`としなければならないことがあります．
25 | 第11章|Google Colabでは，Kerasの代わりにKeras3をインストールして，`library(keras3)`で読み込んでください（公開コードは対応済み）．
26 | p. 309|**（バージョン依存）** Rでエラーが出る場合は，`list`を`rbind`に変更してください．
27 | p. 309|**（バージョン依存）** Pythonでは`y, y_1, y_2 = np.array(y), np.array(y_1), np.array(y_2)`として，リストをアレイに変換しなければならないことがあります．
28 | p. 310, 329|Rのコード`左辺 %<-% 右辺`が正しく動作しない場合は，事前に`library(zeallot)`を実行してください．
29 | p. 342|**（バージョン依存）** Pythonのコードの`from fbprophet import Prophet`を`from prophet import Prophet`としなければならないことがあります．
30 | 
31 | ## 正誤表
32 | 
33 | 次の誤りは第6刷で修正しました．
34 | 
35 | 場所|誤|正
36 | --|--|--
37 | p. 138 本文1行目|確率（約0.22）|確率（約0.022）
38 | 
39 | 次の誤りは第5刷で修正しました．
40 | 
41 | 場所|誤|正
42 | --|--|--
43 | p. 258 本文3行目|グラフの中で|連結グラフ（任意の2点を線をつないで結べるグラフ）の中で
44 | p. 351 Pythonのコード|`vals, vecs = np.linalg.eig(S)   # 固有値と固有ベクトル`|`vals, vecs = np.linalg.eig(S)   # 固有値と固有ベクトル`<br>`idx = np.argsort(-vals)              # 固有値の大きい順の番号`<br>`vals, vecs = vals[idx], vecs[:, idx] # 固有値の大きい順での並べ替え`
45 | 
46 | 次の誤りは第4刷で修正しました．
47 | 
48 | 場所|誤|正
49 | --|--|--
50 | p. 56 最初のコード（R）|`0.3333333`|`3.333333`
51 | p. 56 最初のコード（Python）|`0.3333333333333333`|`3.3333333333333335`
52 | p. 56 脚註1|0.3333333|3.333333
53 | p. 119 脚註9|[4.3, 4.7, 5.1, 5.5, 5.9, 6.300000000000001, 6.7, 7.1000000000000005, 7.5, 7.9]です．小さな誤差が，観測値6.3や7.1が属する階級に影響し，このままではヒストグラムがRと同じになりません．同じにするために，ここでは，`round`で誤差を消しています．|[4.3, 4.7, 5.1, 5.5, 5.9, 6.3, 6.7, 7.1, 7.5, 7.9]から少しずれます．Rも同様なのですが，Rではそのずれを丸めて消してから数を数えます．ここでは，Pythonでもそうなるように，`round`で数値を丸めています．
54 | p. 184 脚註4|回帰直線|予測値の期待値
55 | p. 194 Pythonのコード（2箇所）|`PolynomialFeatures(d)`|`PolynomialFeatures(d, include_bias=False)`
56 | p. 233 旁註|`sc`や`lm`|`sc`や`lr`
57 | p. 233 旁註|`my_model.named_steps.lm`|`my_pipeline.named_steps.lr`
58 | p. 240 旁註|`sfs`と`lm`|`sfs`と`lr`
59 | p. 272 下から2行目|Sepal.With|Sepal.Width
60 | p. 341 脚註5|`autoplot(level = c(80, 90))`|`autoplot(level = c(80, 95))`
61 | p. 349 本文上から3行目|描かれいます|描かれています
62 | 


--------------------------------------------------------------------------------
/code/R-notebook/r-12.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "id": "bc5770da",
  5 |       "cell_type": "markdown",
  6 |       "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)",
  7 |       "metadata": {}
  8 |     },
  9 |     {
 10 |       "cell_type": "code",
 11 |       "execution_count": null,
 12 |       "metadata": {},
 13 |       "outputs": [],
 14 |       "source": [
 15 |         "# Google Colaboratory\u306e\u74b0\u5883\u8a2d\u5b9a\n",
 16 |         "if (Sys.getenv(\"COLAB_JUPYTER_IP\") != \"\") {\n",
 17 |         "  options(Ncpus = parallel::detectCores())\n",
 18 |         "  installed_packages <- rownames(installed.packages())\n",
 19 |         "  packages_to_install <- c(\"caret\", \"fable\", \"feasts\", \"prophet\", \"tsibble\", \"urca\")\n",
 20 |         "  install.packages(setdiff(packages_to_install, installed_packages))\n",
 21 |         "  install.packages(c(\"ggplot2\"))\n",
 22 |         "}"
 23 |       ],
 24 |       "id": "464ec67c-16a7-4275-83d5-52bb7831ad0d"
 25 |     },
 26 |     {
 27 |       "id": "ce518daf",
 28 |       "cell_type": "markdown",
 29 |       "source": "## 12.1 \u65e5\u6642\u3068\u65e5\u6642\u306e\u5217",
 30 |       "metadata": {}
 31 |     },
 32 |     {
 33 |       "cell_type": "code",
 34 |       "execution_count": null,
 35 |       "metadata": {},
 36 |       "outputs": [],
 37 |       "source": [
 38 |         "as.POSIXct(\"2021-01-01\")"
 39 |       ],
 40 |       "id": "1924ebe8-4882-4ff3-b33f-d4153e8015cd"
 41 |     },
 42 |     {
 43 |       "cell_type": "code",
 44 |       "execution_count": null,
 45 |       "metadata": {},
 46 |       "outputs": [],
 47 |       "source": [
 48 |         "library(tsibble)\n",
 49 |         "\n",
 50 |         "seq(from = 2021, to = 2023, by = 1)\n",
 51 |         "\n",
 52 |         "seq(from = yearmonth(\"202101\"), to = yearmonth(\"202103\"), by = 2)\n",
 53 |         "\n",
 54 |         "seq(from = as.POSIXct(\"2021-01-01\"), to = as.POSIXct(\"2021-01-03\"), by = \"1 day\")\n",
 55 |         "\n",
 56 |         "seq(from = as.POSIXct(\"2021-01-01 00:00:00\"),\n",
 57 |         "    to   = as.POSIXct(\"2021-01-01 03:00:00\"), by = \"2 hour\")"
 58 |       ],
 59 |       "id": "0f224c5a-78c5-45e8-abf2-7904a5c2319f"
 60 |     },
 61 |     {
 62 |       "id": "29cbc74a",
 63 |       "cell_type": "markdown",
 64 |       "source": "## 12.2 \u6642\u7cfb\u5217\u30c7\u30fc\u30bf\u306e\u4e88\u6e2c",
 65 |       "metadata": {}
 66 |     },
 67 |     {
 68 |       "cell_type": "code",
 69 |       "execution_count": null,
 70 |       "metadata": {},
 71 |       "outputs": [],
 72 |       "source": [
 73 |         "my_data <- as.vector(AirPassengers)"
 74 |       ],
 75 |       "id": "2cb040ee-9b23-4d7e-b0c7-6d39baf86c47"
 76 |     },
 77 |     {
 78 |       "cell_type": "code",
 79 |       "execution_count": null,
 80 |       "metadata": {},
 81 |       "outputs": [],
 82 |       "source": [
 83 |         "n <- length(my_data) # \u30c7\u30fc\u30bf\u6570\uff08144\uff09\n",
 84 |         "k <- 108             # \u8a13\u7df4\u30c7\u30fc\u30bf\u6570"
 85 |       ],
 86 |       "id": "551c2b5a-8f7b-474a-984d-d785fab1a107"
 87 |     },
 88 |     {
 89 |       "cell_type": "code",
 90 |       "execution_count": null,
 91 |       "metadata": {},
 92 |       "outputs": [],
 93 |       "source": [
 94 |         "library(tidyverse)\n",
 95 |         "library(tsibble)\n",
 96 |         "\n",
 97 |         "my_ds <- seq(\n",
 98 |         "  from = yearmonth(\"1949/01\"),\n",
 99 |         "  to   = yearmonth(\"1960/12\"),\n",
100 |         "  by   = 1)\n",
101 |         "my_label <- rep(\n",
102 |         "  c(\"train\", \"test\"),\n",
103 |         "  c(k, n - k))\n",
104 |         "my_df <- tsibble(\n",
105 |         "  ds    = my_ds,\n",
106 |         "  x     = 0:(n - 1),\n",
107 |         "  y     = my_data,\n",
108 |         "  label = my_label,\n",
109 |         "  index = ds) # \u65e5\u6642\u306e\u5217\u306e\u6307\u5b9a\n",
110 |         "\n",
111 |         "head(my_df)"
112 |       ],
113 |       "id": "2c3c9a07-fce3-4f34-8411-3f99100d4580"
114 |     },
115 |     {
116 |       "cell_type": "code",
117 |       "execution_count": null,
118 |       "metadata": {},
119 |       "outputs": [],
120 |       "source": [
121 |         "my_train <- my_df[  1:k , ]\n",
122 |         "my_test  <- my_df[-(1:k), ]\n",
123 |         "y <- my_test$y"
124 |       ],
125 |       "id": "a830e70a-136d-40f4-b30a-36635556d054"
126 |     },
127 |     {
128 |       "cell_type": "code",
129 |       "execution_count": null,
130 |       "metadata": {},
131 |       "outputs": [],
132 |       "source": [
133 |         "my_plot <- my_df %>%\n",
134 |         "  ggplot(aes(x = ds,\n",
135 |         "             y = y,\n",
136 |         "             color = label)) +\n",
137 |         "  geom_line()\n",
138 |         "my_plot"
139 |       ],
140 |       "id": "84617760-c969-4691-87d4-35e9b3c37d2d"
141 |     },
142 |     {
143 |       "cell_type": "code",
144 |       "execution_count": null,
145 |       "metadata": {},
146 |       "outputs": [],
147 |       "source": [
148 |         "library(caret)\n",
149 |         "my_lm_model <- train(form = y ~ x, data = my_train, method = \"lm\")\n",
150 |         "y_ <- my_lm_model %>% predict(my_test)\n",
151 |         "caret::RMSE(y, y_) # RMSE\uff08\u30c6\u30b9\u30c8\uff09"
152 |       ],
153 |       "id": "1c2cd6a3-13a6-48f4-9615-0602d3e60958"
154 |     },
155 |     {
156 |       "cell_type": "code",
157 |       "execution_count": null,
158 |       "metadata": {},
159 |       "outputs": [],
160 |       "source": [
161 |         "y_ <- my_lm_model %>% predict(my_df)\n",
162 |         "tmp <- my_df %>%\n",
163 |         "  mutate(y = y_, label = \"model\")\n",
164 |         "my_plot + geom_line(data = tmp)"
165 |       ],
166 |       "id": "bf914a75-a3dc-40cb-8d0f-8d5dfd2ab11c"
167 |     },
168 |     {
169 |       "cell_type": "code",
170 |       "execution_count": null,
171 |       "metadata": {},
172 |       "outputs": [],
173 |       "source": [
174 |         "library(fable)\n",
175 |         "my_arima_model <- my_train %>% model(ARIMA(y))\n",
176 |         "my_arima_model"
177 |       ],
178 |       "id": "b7c8e8cb-940b-4eee-8fc8-6bb16e043927"
179 |     },
180 |     {
181 |       "cell_type": "code",
182 |       "execution_count": null,
183 |       "metadata": {},
184 |       "outputs": [],
185 |       "source": [
186 |         "tmp <- my_arima_model %>% forecast(h = \"3 years\")\n",
187 |         "head(tmp)"
188 |       ],
189 |       "id": "6fc63b0e-666e-4a05-90ac-88e5210500ff"
190 |     },
191 |     {
192 |       "cell_type": "code",
193 |       "execution_count": null,
194 |       "metadata": {},
195 |       "outputs": [],
196 |       "source": [
197 |         "y_ <- tmp$.mean\n",
198 |         "caret::RMSE(y_, y)"
199 |       ],
200 |       "id": "19166890-94bf-4442-b5d8-e45dc405d0e4"
201 |     },
202 |     {
203 |       "cell_type": "code",
204 |       "execution_count": null,
205 |       "metadata": {},
206 |       "outputs": [],
207 |       "source": [
208 |         "# \u4e88\u6e2c\u7d50\u679c\u306e\u307f\u3067\u3088\u3044\u5834\u5408\n",
209 |         "#tmp %>% autoplot\n",
210 |         "\n",
211 |         "tmp %>% autoplot +\n",
212 |         "  geom_line(data = my_df,\n",
213 |         "            aes(x = ds,\n",
214 |         "                y = y,\n",
215 |         "                color = label))"
216 |       ],
217 |       "id": "6f82290c-51f8-4523-a225-46542fe46055"
218 |     },
219 |     {
220 |       "cell_type": "code",
221 |       "execution_count": null,
222 |       "metadata": {},
223 |       "outputs": [],
224 |       "source": [
225 |         "library(prophet)\n",
226 |         "my_prophet_model <- my_train %>%\n",
227 |         "  prophet(seasonality.mode = \"multiplicative\")"
228 |       ],
229 |       "id": "285cd4a1-cc34-4a04-8671-12ab80c52ca0"
230 |     },
231 |     {
232 |       "cell_type": "code",
233 |       "execution_count": null,
234 |       "metadata": {},
235 |       "outputs": [],
236 |       "source": [
237 |         "tmp <- my_prophet_model %>% predict(my_test)\n",
238 |         "head(tmp[, c(\"ds\", \"yhat\", \"yhat_lower\", \"yhat_upper\")])"
239 |       ],
240 |       "id": "a24e324e-882d-4d09-91c8-37632736c396"
241 |     },
242 |     {
243 |       "cell_type": "code",
244 |       "execution_count": null,
245 |       "metadata": {},
246 |       "outputs": [],
247 |       "source": [
248 |         "y_ <- tmp$yhat\n",
249 |         "caret::RMSE(y_, y)"
250 |       ],
251 |       "id": "c532ee13-3b8d-407f-89a1-7c0668486f15"
252 |     },
253 |     {
254 |       "cell_type": "code",
255 |       "execution_count": null,
256 |       "metadata": {},
257 |       "outputs": [],
258 |       "source": [
259 |         "# my_prophet_model %>% plot(tmp) # \u4e88\u6e2c\u7d50\u679c\u306e\u307f\u3067\u3088\u3044\u5834\u5408\n",
260 |         "\n",
261 |         "my_prophet_model %>% plot(tmp) +\n",
262 |         "  geom_line(data = my_train, aes(x = as.POSIXct(ds))) +\n",
263 |         "  geom_line(data = my_test,  aes(x = as.POSIXct(ds)), color = \"red\")"
264 |       ],
265 |       "id": "b4b145ed-eef4-4399-bf35-7948c988f688"
266 |     }
267 |   ],
268 |   "nbformat": 4,
269 |   "nbformat_minor": 5,
270 |   "metadata": {
271 |     "kernelspec": {
272 |       "name": "ir",
273 |       "display_name": "R"
274 |     }
275 |   }
276 | }


--------------------------------------------------------------------------------
/code/Python-notebook/python-12.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "id": "f86449f9",
  5 |       "cell_type": "markdown",
  6 |       "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)",
  7 |       "metadata": {}
  8 |     },
  9 |     {
 10 |       "cell_type": "code",
 11 |       "execution_count": null,
 12 |       "metadata": {},
 13 |       "outputs": [],
 14 |       "source": [
 15 |         "# Google Colaboratory\u306e\u74b0\u5883\u8a2d\u5b9a\n",
 16 |         "import os\n",
 17 |         "if 'COLAB_GPU' in os.environ:\n",
 18 |         "  !python -m pip install pmdarima | tail -n 1"
 19 |       ],
 20 |       "id": "01c13eaf-b572-4570-979a-15591bc77674"
 21 |     },
 22 |     {
 23 |       "id": "efe8c46d",
 24 |       "cell_type": "markdown",
 25 |       "source": "## 12.1 \u65e5\u6642\u3068\u65e5\u6642\u306e\u5217",
 26 |       "metadata": {}
 27 |     },
 28 |     {
 29 |       "cell_type": "code",
 30 |       "execution_count": null,
 31 |       "metadata": {},
 32 |       "outputs": [],
 33 |       "source": [
 34 |         "import pandas as pd\n",
 35 |         "pd.to_datetime('2020-01-01')"
 36 |       ],
 37 |       "id": "ad2111fa-8b0d-4ccb-a872-de1004bb5ea2"
 38 |     },
 39 |     {
 40 |       "cell_type": "code",
 41 |       "execution_count": null,
 42 |       "metadata": {},
 43 |       "outputs": [],
 44 |       "source": [
 45 |         "pd.date_range(start='2021-01-01', end='2023-01-01', freq='1A')\n",
 46 |         "\n",
 47 |         "pd.date_range(start='2021-01-01', end='2023-01-01', freq='1AS')\n",
 48 |         "\n",
 49 |         "pd.date_range(start='2021-01-01', end='2021-03-01', freq='2M')\n",
 50 |         "\n",
 51 |         "pd.date_range(start='2021-01-01', end='2021-03-01', freq='2MS')\n",
 52 |         "\n",
 53 |         "pd.date_range(start='2021-01-01', end='2021-01-03', freq='1D')\n",
 54 |         "\n",
 55 |         "pd.date_range(start='2021-01-01 00:00:00', end='2021-01-01 03:00:00', freq='2H')"
 56 |       ],
 57 |       "id": "9a125fa4-d6d9-456b-a27c-39aed4f96653"
 58 |     },
 59 |     {
 60 |       "id": "ee17dc29",
 61 |       "cell_type": "markdown",
 62 |       "source": "## 12.2 \u6642\u7cfb\u5217\u30c7\u30fc\u30bf\u306e\u4e88\u6e2c",
 63 |       "metadata": {}
 64 |     },
 65 |     {
 66 |       "cell_type": "code",
 67 |       "execution_count": null,
 68 |       "metadata": {},
 69 |       "outputs": [],
 70 |       "source": [
 71 |         "import matplotlib.pyplot as plt\n",
 72 |         "import pandas as pd\n",
 73 |         "from pmdarima.datasets import airpassengers\n",
 74 |         "from sklearn.metrics import mean_squared_error\n",
 75 |         "\n",
 76 |         "my_data = airpassengers.load_airpassengers()"
 77 |       ],
 78 |       "id": "8518eedc-0cf0-473b-9588-5ec3cc4c6dc3"
 79 |     },
 80 |     {
 81 |       "cell_type": "code",
 82 |       "execution_count": null,
 83 |       "metadata": {},
 84 |       "outputs": [],
 85 |       "source": [
 86 |         "n = len(my_data) # \u30c7\u30fc\u30bf\u6570\uff08144\uff09\n",
 87 |         "k = 108          # \u8a13\u7df4\u30c7\u30fc\u30bf\u6570"
 88 |       ],
 89 |       "id": "b055dc4e-65b6-4cc7-852e-50cc6a87f318"
 90 |     },
 91 |     {
 92 |       "cell_type": "code",
 93 |       "execution_count": null,
 94 |       "metadata": {},
 95 |       "outputs": [],
 96 |       "source": [
 97 |         "my_ds = pd.date_range(\n",
 98 |         "    start='1949/01/01',\n",
 99 |         "    end='1960/12/01',\n",
100 |         "    freq='MS')\n",
101 |         "my_df = pd.DataFrame({\n",
102 |         "    'ds': my_ds,\n",
103 |         "    'x': range(n),\n",
104 |         "    'y': my_data},\n",
105 |         "    index=my_ds)\n",
106 |         "my_df.head()"
107 |       ],
108 |       "id": "091ac6e3-3429-4730-b480-0b17fbd5173f"
109 |     },
110 |     {
111 |       "cell_type": "code",
112 |       "execution_count": null,
113 |       "metadata": {},
114 |       "outputs": [],
115 |       "source": [
116 |         "my_train = my_df[        :k]\n",
117 |         "my_test  = my_df[-(n - k): ]\n",
118 |         "y = my_test.y"
119 |       ],
120 |       "id": "6dd363fe-7392-4bb6-86aa-5f92e2b685e5"
121 |     },
122 |     {
123 |       "cell_type": "code",
124 |       "execution_count": null,
125 |       "metadata": {},
126 |       "outputs": [],
127 |       "source": [
128 |         "plt.plot(my_train.y, label='train')\n",
129 |         "plt.plot(my_test.y,  label='test')\n",
130 |         "plt.legend()"
131 |       ],
132 |       "id": "9d0cc675-c03f-4c58-893d-ddd04cbfd8c6"
133 |     },
134 |     {
135 |       "cell_type": "code",
136 |       "execution_count": null,
137 |       "metadata": {},
138 |       "outputs": [],
139 |       "source": [
140 |         "from sklearn.linear_model import LinearRegression\n",
141 |         "\n",
142 |         "my_lm_model = LinearRegression()\n",
143 |         "my_lm_model.fit(my_train[['x']], my_train.y)\n",
144 |         "\n",
145 |         "X = my_test[['x']]\n",
146 |         "y_ = my_lm_model.predict(X)\n",
147 |         "mean_squared_error(y, y_)**0.5 # RMSE\uff08\u30c6\u30b9\u30c8\uff09"
148 |       ],
149 |       "id": "8f54ed6b-04c1-4ec4-adee-d9845c60a5c9"
150 |     },
151 |     {
152 |       "cell_type": "code",
153 |       "execution_count": null,
154 |       "metadata": {},
155 |       "outputs": [],
156 |       "source": [
157 |         "y_ = my_lm_model.predict(my_df[['x']])\n",
158 |         "tmp = pd.DataFrame(y_,\n",
159 |         "                   index=my_df.index)\n",
160 |         "plt.plot(my_train.y, label='train')\n",
161 |         "plt.plot(my_test.y,  label='test')\n",
162 |         "plt.plot(tmp, label='model')\n",
163 |         "plt.legend()"
164 |       ],
165 |       "id": "49eed027-c6d5-46dc-9d2a-f6ee12eb49ee"
166 |     },
167 |     {
168 |       "cell_type": "code",
169 |       "execution_count": null,
170 |       "metadata": {},
171 |       "outputs": [],
172 |       "source": [
173 |         "import pmdarima as pm\n",
174 |         "my_arima_model = pm.auto_arima(my_train.y, m=12, trace=True)"
175 |       ],
176 |       "id": "3869df3e-9fb3-491d-926f-991b43092303"
177 |     },
178 |     {
179 |       "cell_type": "code",
180 |       "execution_count": null,
181 |       "metadata": {},
182 |       "outputs": [],
183 |       "source": [
184 |         "y_, my_ci = my_arima_model.predict(len(my_test),         # \u671f\u9593\u306f\u30c6\u30b9\u30c8\u30c7\u30fc\u30bf\u3068\u540c\u3058\uff0e\n",
185 |         "                                   alpha=0.05,           # \u6709\u610f\u6c34\u6e96\uff08\u30c7\u30d5\u30a9\u30eb\u30c8\uff09\n",
186 |         "                                   return_conf_int=True) # \u4fe1\u983c\u533a\u9593\u3092\u6c42\u3081\u308b\uff0e\n",
187 |         "tmp = pd.DataFrame({'y': y_,\n",
188 |         "                    'Lo': my_ci[:, 0],\n",
189 |         "                    'Hi': my_ci[:, 1]},\n",
190 |         "                   index=my_test.index)\n",
191 |         "tmp.head()"
192 |       ],
193 |       "id": "f1c97130-5ab0-4cdc-a8fb-8f9288bdbdba"
194 |     },
195 |     {
196 |       "cell_type": "code",
197 |       "execution_count": null,
198 |       "metadata": {},
199 |       "outputs": [],
200 |       "source": [
201 |         "mean_squared_error(y, y_)**0.5"
202 |       ],
203 |       "id": "0e230755-e4fd-4098-ba68-aa9eb51e4021"
204 |     },
205 |     {
206 |       "cell_type": "code",
207 |       "execution_count": null,
208 |       "metadata": {},
209 |       "outputs": [],
210 |       "source": [
211 |         "plt.plot(my_train.y, label='train')\n",
212 |         "plt.plot(my_test.y,  label='test')\n",
213 |         "plt.plot(tmp.y,      label='model')\n",
214 |         "plt.fill_between(tmp.index,\n",
215 |         "                 tmp.Lo,\n",
216 |         "                 tmp.Hi,\n",
217 |         "                 alpha=0.25) # \u4e0d\u900f\u660e\u5ea6\n",
218 |         "plt.legend(loc='upper left')"
219 |       ],
220 |       "id": "1904f642-1352-48e4-b998-59505e434131"
221 |     },
222 |     {
223 |       "cell_type": "code",
224 |       "execution_count": null,
225 |       "metadata": {},
226 |       "outputs": [],
227 |       "source": [
228 |         "try: from fbprophet import Prophet\n",
229 |         "except ImportError: from prophet import Prophet\n",
230 |         "my_prophet_model = Prophet(seasonality_mode='multiplicative')\n",
231 |         "my_prophet_model.fit(my_train)"
232 |       ],
233 |       "id": "b8cd2056-fd59-47e8-8023-f6e4a5d77e51"
234 |     },
235 |     {
236 |       "cell_type": "code",
237 |       "execution_count": null,
238 |       "metadata": {},
239 |       "outputs": [],
240 |       "source": [
241 |         "tmp = my_prophet_model.predict(my_test)\n",
242 |         "tmp[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head()"
243 |       ],
244 |       "id": "4c06ee6c-4454-4c37-87ad-ceaf0f1f159c"
245 |     },
246 |     {
247 |       "cell_type": "code",
248 |       "execution_count": null,
249 |       "metadata": {},
250 |       "outputs": [],
251 |       "source": [
252 |         "y_ = tmp.yhat\n",
253 |         "mean_squared_error(y, y_)**0.5"
254 |       ],
255 |       "id": "f89c7ebe-4ef5-48fd-8fe9-d3f7def4b513"
256 |     },
257 |     {
258 |       "cell_type": "code",
259 |       "execution_count": null,
260 |       "metadata": {},
261 |       "outputs": [],
262 |       "source": [
263 |         "# my_prophet_model.plot(tmp) # \u4e88\u6e2c\u7d50\u679c\u306e\u307f\u3067\u3088\u3044\u5834\u5408\n",
264 |         "\n",
265 |         "fig = my_prophet_model.plot(tmp)\n",
266 |         "fig.axes[0].plot(my_train.ds, my_train.y)\n",
267 |         "fig.axes[0].plot(my_test.ds, my_test.y, color='red')"
268 |       ],
269 |       "id": "ff77ca1c-2efb-4362-92ac-74c33dcd2b38"
270 |     }
271 |   ],
272 |   "nbformat": 4,
273 |   "nbformat_minor": 5,
274 |   "metadata": {
275 |     "kernelspec": {
276 |       "name": "python3",
277 |       "display_name": "Python 3"
278 |     }
279 |   }
280 | }


--------------------------------------------------------------------------------
/addendum/sagemaker/sage-python.yml:
--------------------------------------------------------------------------------
  1 | name: sage-python
  2 | channels:
  3 |   - anaconda
  4 |   - conda-forge
  5 | dependencies:
  6 |   - _libgcc_mutex=0.1=conda_forge
  7 |   - _openmp_mutex=4.5=2_gnu
  8 |   - _py-xgboost-mutex=2.0=cpu_0
  9 |   - abseil-cpp=20210324.2=h9c3ff4c_0
 10 |   - absl-py=0.15.0=pyhd8ed1ab_0
 11 |   - aiohttp=3.8.1=py38h0a891b7_1
 12 |   - aiosignal=1.2.0=pyhd8ed1ab_0
 13 |   - alsa-lib=1.2.3.2=h166bdaf_0
 14 |   - arviz=0.12.0=pyhd8ed1ab_0
 15 |   - asttokens=2.0.5=pyhd8ed1ab_0
 16 |   - astunparse=1.6.3=pyhd8ed1ab_0
 17 |   - async-timeout=4.0.2=pyhd8ed1ab_0
 18 |   - atk-1.0=2.36.0=h3371d22_4
 19 |   - attrs=21.4.0=pyhd8ed1ab_0
 20 |   - backcall=0.2.0=pyh9f0ad1d_0
 21 |   - backports=1.0=py_2
 22 |   - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
 23 |   - binutils_impl_linux-64=2.36.1=h193b22a_2
 24 |   - binutils_linux-64=2.36=hf3e587d_33
 25 |   - blinker=1.4=py_1
 26 |   - brotli=1.0.9=h166bdaf_7
 27 |   - brotli-bin=1.0.9=h166bdaf_7
 28 |   - bzip2=1.0.8=h7f98852_4
 29 |   - c-ares=1.18.1=h7f98852_0
 30 |   - ca-certificates=2022.4.26=h06a4308_0
 31 |   - cached-property=1.5.2=hd8ed1ab_1
 32 |   - cached_property=1.5.2=pyha770c72_1
 33 |   - cachetools=5.0.0=pyhd8ed1ab_0
 34 |   - cairo=1.16.0=h6cf1ce9_1008
 35 |   - certifi=2021.10.8=py38h06a4308_2
 36 |   - cftime=1.6.0=py38h71d37f0_1
 37 |   - charset-normalizer=2.0.12=pyhd8ed1ab_0
 38 |   - click=8.1.3=py38h578d9bd_0
 39 |   - colorama=0.4.4=pyhd3eb1b0_0
 40 |   - convertdate=2.4.0=pyhd8ed1ab_0
 41 |   - cryptography=36.0.2=py38h2b5fc30_1
 42 |   - cudatoolkit=11.6.0=habf752d_10
 43 |   - cudnn=8.2.1.32=h86fa8c9_0
 44 |   - curl=7.83.0=h7bff187_0
 45 |   - cycler=0.11.0=pyhd8ed1ab_0
 46 |   - cython=0.29.28=py38hfa26641_2
 47 |   - dbus=1.13.18=hb2f20db_0
 48 |   - debugpy=1.6.0=py38hfa26641_0
 49 |   - decorator=5.1.1=pyhd8ed1ab_0
 50 |   - dill=0.3.4=pyhd8ed1ab_0
 51 |   - entrypoints=0.4=pyhd8ed1ab_0
 52 |   - ephem=4.1.3=py38h0a891b7_4
 53 |   - executing=0.8.3=pyhd8ed1ab_0
 54 |   - expat=2.4.8=h27087fc_0
 55 |   - fbprophet=0.7.1=py38h950e882_0
 56 |   - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
 57 |   - font-ttf-inconsolata=3.000=h77eed37_0
 58 |   - font-ttf-source-code-pro=2.038=h77eed37_0
 59 |   - font-ttf-ubuntu=0.83=hab24e00_0
 60 |   - fontconfig=2.14.0=h8e229c2_0
 61 |   - fonts-conda-ecosystem=1=0
 62 |   - fonts-conda-forge=1=0
 63 |   - fonttools=4.33.3=py38h0a891b7_0
 64 |   - freetype=2.11.0=h70c0345_0
 65 |   - fribidi=1.0.10=h36c2ea0_0
 66 |   - frozenlist=1.3.0=py38h0a891b7_1
 67 |   - future=0.18.2=py38_1
 68 |   - gast=0.4.0=pyh9f0ad1d_0
 69 |   - gcc_impl_linux-64=7.5.0=habd7529_20
 70 |   - gcc_linux-64=7.5.0=h47867f9_33
 71 |   - gdk-pixbuf=2.42.6=h04a7f16_0
 72 |   - gettext=0.19.8.1=h0b5b191_1005
 73 |   - giflib=5.2.1=h36c2ea0_2
 74 |   - glib=2.69.1=h4ff587b_1
 75 |   - glib-tools=2.68.4=h9c3ff4c_0
 76 |   - google-auth=2.6.6=pyh6c4a22f_0
 77 |   - google-auth-oauthlib=0.4.6=pyhd8ed1ab_0
 78 |   - google-pasta=0.2.0=pyh8c360ce_0
 79 |   - graphite2=1.3.13=h58526e2_1001
 80 |   - graphviz=2.47.2=h85b4f2f_0
 81 |   - grpc-cpp=1.39.1=h850795e_1
 82 |   - grpcio=1.39.0=py38hdd6454d_0
 83 |   - gst-plugins-base=1.18.5=hf529b03_0
 84 |   - gstreamer=1.18.5=h76c114f_0
 85 |   - gtk2=2.24.33=h539f30e_1
 86 |   - gts=0.7.6=h64030ff_2
 87 |   - gxx_impl_linux-64=7.5.0=hd0bb8aa_20
 88 |   - gxx_linux-64=7.5.0=h555fc39_33
 89 |   - h2o=3.18.0.2=0
 90 |   - h2o-py=3.36.1.1=pyhd8ed1ab_0
 91 |   - h5py=3.1.0=nompi_py38hafa665b_100
 92 |   - harfbuzz=3.0.0=h83ec7ef_1
 93 |   - hdf4=4.2.15=h10796ff_3
 94 |   - hdf5=1.10.6=nompi_h6a2412b_1114
 95 |   - hijri-converter=2.2.3=pyhd8ed1ab_0
 96 |   - holidays=0.13=pyhd8ed1ab_0
 97 |   - icu=68.2=h9c3ff4c_0
 98 |   - idna=3.3=pyhd3eb1b0_0
 99 |   - importlib-metadata=4.11.3=py38h578d9bd_1
100 |   - importlib_metadata=4.11.3=hd8ed1ab_1
101 |   - ipykernel=6.13.0=py38h7f3c49e_0
102 |   - ipython=8.3.0=py38h578d9bd_0
103 |   - jbig=2.1=h7f98852_2003
104 |   - jedi=0.18.1=py38h578d9bd_1
105 |   - joblib=1.1.0=pyhd8ed1ab_0
106 |   - jpeg=9e=h166bdaf_1
107 |   - jupyter_client=7.3.0=pyhd8ed1ab_0
108 |   - jupyter_core=4.9.2=py38h578d9bd_0
109 |   - keras=2.6.0=pyhd8ed1ab_1
110 |   - keras-preprocessing=1.1.2=pyhd8ed1ab_0
111 |   - kernel-headers_linux-64=2.6.32=he073ed8_15
112 |   - keyutils=1.6.1=h166bdaf_0
113 |   - kiwisolver=1.4.2=py38h43d8883_1
114 |   - korean_lunar_calendar=0.2.1=pyh9f0ad1d_0
115 |   - krb5=1.19.3=h3790be6_0
116 |   - lcms2=2.12=hddcbb42_0
117 |   - ld_impl_linux-64=2.36.1=hea4e1c9_2
118 |   - lerc=3.0=h9c3ff4c_0
119 |   - libblas=3.9.0=14_linux64_openblas
120 |   - libbrotlicommon=1.0.9=h166bdaf_7
121 |   - libbrotlidec=1.0.9=h166bdaf_7
122 |   - libbrotlienc=1.0.9=h166bdaf_7
123 |   - libcblas=3.9.0=14_linux64_openblas
124 |   - libclang=11.1.0=default_ha53f305_1
125 |   - libcurl=7.83.0=h7bff187_0
126 |   - libdeflate=1.10=h7f98852_0
127 |   - libedit=3.1.20191231=he28a2e2_2
128 |   - libev=4.33=h516909a_1
129 |   - libevent=2.1.10=h9b69904_4
130 |   - libffi=3.3=h58526e2_2
131 |   - libgcc-devel_linux-64=7.5.0=hda03d7c_20
132 |   - libgcc-ng=11.2.0=h1d223b6_16
133 |   - libgd=2.3.3=h6ad9fb6_0
134 |   - libgfortran-ng=11.2.0=h69a702a_16
135 |   - libgfortran5=11.2.0=h5c6108e_16
136 |   - libglib=2.68.4=h3e27bee_0
137 |   - libgomp=11.2.0=h1d223b6_16
138 |   - libiconv=1.16=h516909a_0
139 |   - liblapack=3.9.0=14_linux64_openblas
140 |   - libllvm11=11.1.0=hf817b99_3
141 |   - libnetcdf=4.8.1=nompi_hcd642e3_100
142 |   - libnghttp2=1.47.0=h727a467_0
143 |   - libogg=1.3.4=h7f98852_1
144 |   - libopenblas=0.3.20=pthreads_h78a6416_0
145 |   - libopus=1.3.1=h7f98852_1
146 |   - libpng=1.6.37=hbc83047_0
147 |   - libpq=13.5=hd57d9b9_1
148 |   - libprotobuf=3.16.0=h780b84a_0
149 |   - librsvg=2.50.5=hc3c00ef_0
150 |   - libsodium=1.0.18=h36c2ea0_1
151 |   - libssh2=1.10.0=ha56f1ee_2
152 |   - libstdcxx-devel_linux-64=7.5.0=hb016644_20
153 |   - libstdcxx-ng=11.2.0=he4da1e4_16
154 |   - libtiff=4.3.0=h542a066_3
155 |   - libtool=2.4.6=h9c3ff4c_1008
156 |   - libuuid=2.32.1=h7f98852_1000
157 |   - libvorbis=1.3.7=h9c3ff4c_0
158 |   - libwebp=1.2.2=h3452ae3_0
159 |   - libwebp-base=1.2.2=h7f98852_1
160 |   - libxcb=1.14=h7b6447c_0
161 |   - libxgboost=1.5.1=cpu_h3d145d1_2
162 |   - libxkbcommon=1.0.3=he3ba5ed_0
163 |   - libxml2=2.9.10=h72842e0_4
164 |   - libxslt=1.1.33=h15afd5d_2
165 |   - libzip=1.8.0=h4de3113_1
166 |   - libzlib=1.2.11=h166bdaf_1014
167 |   - lunarcalendar=0.0.9=py_0
168 |   - lxml=4.8.0=py38h0a891b7_3
169 |   - lz4-c=1.9.3=h9c3ff4c_1
170 |   - markdown=3.3.7=pyhd8ed1ab_0
171 |   - matplotlib=3.5.2=py38h578d9bd_0
172 |   - matplotlib-base=3.5.2=py38h826bfd8_0
173 |   - matplotlib-inline=0.1.3=pyhd8ed1ab_0
174 |   - multidict=6.0.2=py38h0a891b7_1
175 |   - munkres=1.1.4=pyh9f0ad1d_0
176 |   - mysql-common=8.0.29=haf5c9bc_0
177 |   - mysql-libs=8.0.29=h28c427c_0
178 |   - nccl=2.12.10.1=h0800d71_0
179 |   - ncurses=6.3=h27087fc_1
180 |   - nest-asyncio=1.5.5=pyhd8ed1ab_0
181 |   - netcdf4=1.5.7=nompi_py38hcc16cfe_101
182 |   - nspr=4.32=h9c3ff4c_1
183 |   - nss=3.77=h2350873_0
184 |   - oauthlib=3.2.0=pyhd8ed1ab_0
185 |   - openjdk=11.0.13=h87a67e3_0
186 |   - openjpeg=2.4.0=hb52868f_1
187 |   - openssl=1.1.1o=h166bdaf_0
188 |   - opt_einsum=3.3.0=pyhd8ed1ab_1
189 |   - packaging=21.3=pyhd8ed1ab_0
190 |   - pandarallel=1.6.1=pyhd8ed1ab_0
191 |   - pandas=1.4.2=py38h47df419_1
192 |   - pango=1.48.10=h54213e6_2
193 |   - parso=0.8.3=pyhd8ed1ab_0
194 |   - patsy=0.5.2=pyhd8ed1ab_0
195 |   - pcre=8.45=h295c915_0
196 |   - pexpect=4.8.0=pyh9f0ad1d_2
197 |   - pickleshare=0.7.5=py_1003
198 |   - pillow=9.1.0=py38h0ee0e06_2
199 |   - pip=22.0.4=pyhd8ed1ab_0
200 |   - pixman=0.40.0=h36c2ea0_0
201 |   - pmdarima=1.8.2=py38h497a2fe_3
202 |   - prompt-toolkit=3.0.29=pyha770c72_0
203 |   - protobuf=3.16.0=py38h709712a_0
204 |   - psutil=5.9.0=py38h0a891b7_1
205 |   - pthread-stubs=0.4=h36c2ea0_1001
206 |   - ptyprocess=0.7.0=pyhd3deb0d_0
207 |   - pure_eval=0.2.2=pyhd8ed1ab_0
208 |   - py-xgboost=1.5.1=cpu_py38h66f0ec1_2
209 |   - pyasn1=0.4.8=py_0
210 |   - pyasn1-modules=0.2.7=py_0
211 |   - pycparser=2.21=pyhd3eb1b0_0
212 |   - pygments=2.12.0=pyhd8ed1ab_0
213 |   - pyjwt=2.3.0=pyhd8ed1ab_1
214 |   - pymeeus=0.5.10=pyhd8ed1ab_0
215 |   - pyopenssl=22.0.0=pyhd3eb1b0_0
216 |   - pyparsing=3.0.8=pyhd8ed1ab_0
217 |   - pyqt=5.12.3=py38h578d9bd_8
218 |   - pyqt-impl=5.12.3=py38h0ffb2e6_8
219 |   - pyqt5-sip=4.19.18=py38h709712a_8
220 |   - pyqtchart=5.12=py38h7400c14_8
221 |   - pyqtwebengine=5.12.1=py38h7400c14_8
222 |   - pysocks=1.7.1=py38h06a4308_0
223 |   - pystan=2.19.1.1=py38hc5bc63f_2
224 |   - python=3.8.8=hffdb5ce_0_cpython
225 |   - python-dateutil=2.8.2=pyhd8ed1ab_0
226 |   - python-flatbuffers=1.12=pyhd8ed1ab_1
227 |   - python-graphviz=0.20=pyhaef67bd_0
228 |   - python_abi=3.8=2_cp38
229 |   - pytz=2022.1=pyhd8ed1ab_0
230 |   - pyu2f=0.1.5=pyhd8ed1ab_0
231 |   - pyzmq=22.3.0=py38hfc09fa9_2
232 |   - qt=5.12.9=hda022c4_4
233 |   - re2=2021.09.01=h9c3ff4c_0
234 |   - readline=8.1=h46c0cb4_0
235 |   - requests=2.27.1=pyhd3eb1b0_0
236 |   - requests-oauthlib=1.3.1=pyhd8ed1ab_0
237 |   - rsa=4.8=pyhd8ed1ab_0
238 |   - scikit-learn=1.0.2=py38h1561384_0
239 |   - scipy=1.6.3=py38h7b17777_0
240 |   - seaborn=0.11.2=hd8ed1ab_0
241 |   - seaborn-base=0.11.2=pyhd8ed1ab_0
242 |   - setuptools=49.6.0=py38h578d9bd_3
243 |   - six=1.15.0=pyh9f0ad1d_0
244 |   - snappy=1.1.9=hbd366e4_0
245 |   - sqlite=3.38.4=h4ff8645_0
246 |   - stack_data=0.2.0=pyhd8ed1ab_0
247 |   - statsmodels=0.13.2=py38h6c62de6_0
248 |   - sysroot_linux-64=2.12=he073ed8_15
249 |   - tabulate=0.8.9=py38h06a4308_0
250 |   - tensorboard=2.9.0=pyhd8ed1ab_0
251 |   - tensorboard-data-server=0.6.0=py38h2b5fc30_2
252 |   - tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0
253 |   - tensorflow=2.6.0=cuda112py38hbe5352d_2
254 |   - tensorflow-base=2.6.0=cuda112py38heae9c4c_2
255 |   - tensorflow-estimator=2.6.0=cuda112py38hb2194ef_2
256 |   - tensorflow-gpu=2.6.0=cuda112py38h0bbbad9_2
257 |   - termcolor=1.1.0=py_2
258 |   - threadpoolctl=3.1.0=pyh8a188c0_0
259 |   - tk=8.6.12=h27826a3_0
260 |   - tornado=6.1=py38h0a891b7_3
261 |   - tqdm=4.64.0=pyhd8ed1ab_0
262 |   - traitlets=5.1.1=pyhd8ed1ab_0
263 |   - typing-extensions=3.7.4.3=0
264 |   - typing_extensions=3.7.4.3=py_0
265 |   - unicodedata2=14.0.0=py38h0a891b7_1
266 |   - urllib3=1.26.9=py38h06a4308_0
267 |   - wcwidth=0.2.5=pyh9f0ad1d_2
268 |   - werkzeug=2.1.2=pyhd8ed1ab_1
269 |   - wheel=0.37.1=pyhd8ed1ab_0
270 |   - wrapt=1.12.1=py38h497a2fe_3
271 |   - xarray=2022.3.0=pyhd8ed1ab_0
272 |   - xgboost=1.5.1=cpu_py38h66f0ec1_2
273 |   - xorg-kbproto=1.0.7=h7f98852_1002
274 |   - xorg-libice=1.0.10=h7f98852_0
275 |   - xorg-libsm=1.2.3=hd9c2040_1000
276 |   - xorg-libx11=1.7.2=h7f98852_0
277 |   - xorg-libxau=1.0.9=h7f98852_0
278 |   - xorg-libxdmcp=1.1.3=h7f98852_0
279 |   - xorg-libxext=1.3.4=h7f98852_1
280 |   - xorg-libxrender=0.9.10=h7f98852_1003
281 |   - xorg-renderproto=0.11.1=h7f98852_1002
282 |   - xorg-xextproto=7.3.0=h7f98852_1002
283 |   - xorg-xproto=7.0.31=h7f98852_1007
284 |   - xz=5.2.5=h516909a_1
285 |   - yarl=1.7.2=py38h0a891b7_2
286 |   - zeromq=4.3.4=h9c3ff4c_1
287 |   - zipp=3.8.0=pyhd8ed1ab_0
288 |   - zlib=1.2.11=h166bdaf_1014
289 |   - zstd=1.5.2=ha95c52a_0
290 |   - pip:
291 |     - brotlipy==0.7.0
292 |     - cffi==1.14.6
293 |     - colourmap==1.1.4
294 |     - numpy==1.19.5
295 |     - pca==1.8.0
296 |     - scatterd==1.1.1
297 |     - sklearn==0.0
298 |     - wget==3.2
299 | prefix: /home/studio-lab-user/.conda/envs/sage-python
300 | 


--------------------------------------------------------------------------------
/code/Python-notebook/python-05.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "id": "1438c612",
  5 |       "cell_type": "markdown",
  6 |       "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)\n\n\n",
  7 |       "metadata": {}
  8 |     },
  9 |     {
 10 |       "id": "edf8369d",
 11 |       "cell_type": "markdown",
 12 |       "source": "## 5.1 \u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f",
 13 |       "metadata": {}
 14 |     },
 15 |     {
 16 |       "cell_type": "code",
 17 |       "execution_count": null,
 18 |       "metadata": {},
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "!wget https://raw.githubusercontent.com/taroyabuki/fromzero/master/data/exam.csv"
 22 |       ],
 23 |       "id": "dde757dc-9d6e-451c-8dae-235f5d11837f"
 24 |     },
 25 |     {
 26 |       "cell_type": "code",
 27 |       "execution_count": null,
 28 |       "metadata": {},
 29 |       "outputs": [],
 30 |       "source": [
 31 |         "import pandas as pd\n",
 32 |         "my_df = pd.read_csv('exam.csv')\n",
 33 |         "my_df"
 34 |       ],
 35 |       "id": "5e3cc804-49d8-4385-b3a0-add7fdb06dbf"
 36 |     },
 37 |     {
 38 |       "cell_type": "code",
 39 |       "execution_count": null,
 40 |       "metadata": {},
 41 |       "outputs": [],
 42 |       "source": [
 43 |         "my_url = ('https://raw.githubusercontent.com/taroyabuki'\n",
 44 |         "          '/fromzero/master/data/exam.csv')\n",
 45 |         "my_df = pd.read_csv(my_url)"
 46 |       ],
 47 |       "id": "a650bedc-240c-4d20-b683-6f533797c093"
 48 |     },
 49 |     {
 50 |       "cell_type": "code",
 51 |       "execution_count": null,
 52 |       "metadata": {},
 53 |       "outputs": [],
 54 |       "source": [
 55 |         "my_df2 = pd.read_csv('exam.csv',\n",
 56 |         "    index_col='name')\n",
 57 |         "my_df2"
 58 |       ],
 59 |       "id": "917f3b47-9d4d-489c-8f66-edb8f7482d90"
 60 |     },
 61 |     {
 62 |       "cell_type": "code",
 63 |       "execution_count": null,
 64 |       "metadata": {},
 65 |       "outputs": [],
 66 |       "source": [
 67 |         "my_df.to_csv('exam2.csv', index=False)"
 68 |       ],
 69 |       "id": "30d65be6-8ad2-455d-8251-ff1e1417275f"
 70 |     },
 71 |     {
 72 |       "cell_type": "code",
 73 |       "execution_count": null,
 74 |       "metadata": {},
 75 |       "outputs": [],
 76 |       "source": [
 77 |         "my_df2.to_csv('exam3.csv')"
 78 |       ],
 79 |       "id": "e1df7d45-417b-462d-9c41-f573dacda2d7"
 80 |     },
 81 |     {
 82 |       "cell_type": "code",
 83 |       "execution_count": null,
 84 |       "metadata": {},
 85 |       "outputs": [],
 86 |       "source": [
 87 |         "my_df = pd.read_csv('exam.csv',\n",
 88 |         "    encoding='UTF-8')"
 89 |       ],
 90 |       "id": "1a639f8c-4c4a-4328-ab51-8b8570914508"
 91 |     },
 92 |     {
 93 |       "cell_type": "code",
 94 |       "execution_count": null,
 95 |       "metadata": {},
 96 |       "outputs": [],
 97 |       "source": [
 98 |         "my_df.to_csv('exam2.csv', index=False, encoding='UTF-8')"
 99 |       ],
100 |       "id": "8ff33ada-1100-4f30-99ef-39a4d6ad69e8"
101 |     },
102 |     {
103 |       "cell_type": "code",
104 |       "execution_count": null,
105 |       "metadata": {},
106 |       "outputs": [],
107 |       "source": [
108 |         "my_url = 'https://taroyabuki.github.io/fromzero/exam.html'\n",
109 |         "my_tables = pd.read_html(my_url)"
110 |       ],
111 |       "id": "4a779db4-1630-403f-a8e5-8423aa4a7193"
112 |     },
113 |     {
114 |       "cell_type": "code",
115 |       "execution_count": null,
116 |       "metadata": {},
117 |       "outputs": [],
118 |       "source": [
119 |         "my_tables"
120 |       ],
121 |       "id": "3b61881e-cd4b-4bf4-87f7-e35cea5fd7fc"
122 |     },
123 |     {
124 |       "cell_type": "code",
125 |       "execution_count": null,
126 |       "metadata": {},
127 |       "outputs": [],
128 |       "source": [
129 |         "my_tables[0]"
130 |       ],
131 |       "id": "f9476c63-cc57-4771-9788-a185445edaa4"
132 |     },
133 |     {
134 |       "cell_type": "code",
135 |       "execution_count": null,
136 |       "metadata": {},
137 |       "outputs": [],
138 |       "source": [
139 |         "# 1\u5217\u76ee\u4ee5\u964d\u3092\u53d6\u308a\u51fa\u3059\uff0e\n",
140 |         "my_data = my_tables[0].iloc[:, 1:]\n",
141 |         "my_data"
142 |       ],
143 |       "id": "16e538cf-013a-4b4b-9c45-0e4ff0897bdf"
144 |     },
145 |     {
146 |       "cell_type": "code",
147 |       "execution_count": null,
148 |       "metadata": {},
149 |       "outputs": [],
150 |       "source": [
151 |         "my_url = ('https://raw.githubusercontent.com/taroyabuki'\n",
152 |         "          '/fromzero/master/data/exam.json')\n",
153 |         "my_data = pd.read_json(my_url)\n",
154 |         "#my_data = pd.read_json('exam.json') # \uff08\u30d5\u30a1\u30a4\u30eb\u3092\u4f7f\u3046\u5834\u5408\uff09\n",
155 |         "my_data"
156 |       ],
157 |       "id": "13e9c6e1-47ab-467b-b851-0091a203a907"
158 |     },
159 |     {
160 |       "cell_type": "code",
161 |       "execution_count": null,
162 |       "metadata": {},
163 |       "outputs": [],
164 |       "source": [
165 |         "import xml.etree.ElementTree as ET\n",
166 |         "from urllib.request import urlopen\n",
167 |         "\n",
168 |         "my_url = ('https://raw.githubusercontent.com/taroyabuki'\n",
169 |         "          '/fromzero/master/data/exam.xml')\n",
170 |         "with urlopen(my_url) as f:\n",
171 |         "    my_tree = ET.parse(f)       # XML\u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f\n",
172 |         "\n",
173 |         "#my_tree = ET.parse('exam.xml') # \uff08\u30d5\u30a1\u30a4\u30eb\u3092\u4f7f\u3046\u5834\u5408\uff09\n",
174 |         "my_ns = '{https://www.example.net/ns/1.0}' # \u540d\u524d\u7a7a\u9593"
175 |       ],
176 |       "id": "93fcbfaf-c73b-4f53-844e-56dd5ed485c1"
177 |     },
178 |     {
179 |       "cell_type": "code",
180 |       "execution_count": null,
181 |       "metadata": {},
182 |       "outputs": [],
183 |       "source": [
184 |         "my_records = my_tree.findall(f'.//{my_ns}record')"
185 |       ],
186 |       "id": "ada9000a-d26f-4870-a1e0-a316c10bb88b"
187 |     },
188 |     {
189 |       "cell_type": "code",
190 |       "execution_count": null,
191 |       "metadata": {},
192 |       "outputs": [],
193 |       "source": [
194 |         "def f(record):\n",
195 |         "    my_dic1 = record.attrib # \u5c5e\u6027\u3092\u53d6\u308a\u51fa\u3059\uff0e\n",
196 |         "    # \u5b50\u8981\u7d20\u306e\u540d\u524d\u3068\u5185\u5bb9\u306e\u30da\u30a2\u3092\u8f9e\u66f8\u306b\u3059\u308b\uff0e\n",
197 |         "    my_dic2 = {child.tag.replace(my_ns, ''): child.text for child in list(record)}\n",
198 |         "    return {**my_dic1, **my_dic2} # \u8f9e\u66f8\u3092\u7d50\u5408\u3059\u308b\uff0e"
199 |       ],
200 |       "id": "c9f2b840-74b1-455c-9068-a589fbb4d4fa"
201 |     },
202 |     {
203 |       "cell_type": "code",
204 |       "execution_count": null,
205 |       "metadata": {},
206 |       "outputs": [],
207 |       "source": [
208 |         "my_data = pd.DataFrame([f(record) for record in my_records])\n",
209 |         "my_data['english'] = pd.to_numeric(my_data['english'])\n",
210 |         "my_data['math']    = pd.to_numeric(my_data['math'])\n",
211 |         "my_data"
212 |       ],
213 |       "id": "49c1e7fe-aff6-48a0-8132-d5003d79ca6c"
214 |     },
215 |     {
216 |       "id": "ca69954f",
217 |       "cell_type": "markdown",
218 |       "source": "## 5.2 \u30c7\u30fc\u30bf\u306e\u5909\u63db",
219 |       "metadata": {}
220 |     },
221 |     {
222 |       "cell_type": "code",
223 |       "execution_count": null,
224 |       "metadata": {},
225 |       "outputs": [],
226 |       "source": [
227 |         "import numpy as np\n",
228 |         "from scipy.stats import zscore\n",
229 |         "\n",
230 |         "x1 = [1, 2, 3]\n",
231 |         "\n",
232 |         "z1 = ((x1 - np.mean(x1)) /\n",
233 |         "      np.std(x1, ddof=1))\n",
234 |         "# \u3042\u308b\u3044\u306f\n",
235 |         "z1 = zscore(x1, ddof=1)\n",
236 |         "\n",
237 |         "z1"
238 |       ],
239 |       "id": "90ff1c01-02d5-40c4-9f07-56c52e64a48e"
240 |     },
241 |     {
242 |       "cell_type": "code",
243 |       "execution_count": null,
244 |       "metadata": {},
245 |       "outputs": [],
246 |       "source": [
247 |         "z1.mean(), np.std(z1, ddof=1)"
248 |       ],
249 |       "id": "87140628-936c-4d2c-a4b8-f9caea5322b4"
250 |     },
251 |     {
252 |       "cell_type": "code",
253 |       "execution_count": null,
254 |       "metadata": {},
255 |       "outputs": [],
256 |       "source": [
257 |         "z1 * np.std(x1, ddof=1) + np.mean(x1)"
258 |       ],
259 |       "id": "48505728-2c9a-48da-bc13-7dc6e316a1f3"
260 |     },
261 |     {
262 |       "cell_type": "code",
263 |       "execution_count": null,
264 |       "metadata": {},
265 |       "outputs": [],
266 |       "source": [
267 |         "x2 = [1, 3, 5]\n",
268 |         "z2 = ((x2 - np.mean(x1)) /\n",
269 |         "      np.std(x1, ddof=1))\n",
270 |         "z2.mean(), np.std(z2, ddof=1)"
271 |       ],
272 |       "id": "2169f911-ba5f-4d45-8d4d-02adda1adfb2"
273 |     },
274 |     {
275 |       "cell_type": "code",
276 |       "execution_count": null,
277 |       "metadata": {},
278 |       "outputs": [],
279 |       "source": [
280 |         "import pandas as pd\n",
281 |         "from sklearn.preprocessing import (\n",
282 |         "    OneHotEncoder)\n",
283 |         "\n",
284 |         "my_df = pd.DataFrame({\n",
285 |         "    'id':    [ 1 ,  2 ,  3 ],\n",
286 |         "    'class': ['A', 'B', 'C']})\n",
287 |         "\n",
288 |         "my_enc = OneHotEncoder()\n",
289 |         "tmp = my_enc.fit_transform(\n",
290 |         "    my_df[['class']]).toarray()\n",
291 |         "my_names = my_enc.get_feature_names() \\\n",
292 |         "if hasattr(my_enc, 'get_feature_names') \\\n",
293 |         "else my_enc.get_feature_names_out()\n",
294 |         "pd.DataFrame(tmp, columns=my_names)"
295 |       ],
296 |       "id": "b97df278-4912-490f-9518-146ef7171868"
297 |     },
298 |     {
299 |       "cell_type": "code",
300 |       "execution_count": null,
301 |       "metadata": {},
302 |       "outputs": [],
303 |       "source": [
304 |         "my_df2 = pd.DataFrame({\n",
305 |         "    'id':    [ 4 ,  5,   6 ],\n",
306 |         "    'class': ['B', 'C', 'B']})\n",
307 |         "tmp = my_enc.transform(\n",
308 |         "    my_df2[['class']]).toarray()\n",
309 |         "pd.DataFrame(tmp, columns=my_names)"
310 |       ],
311 |       "id": "61707b06-bef2-466b-8eee-2612578af36d"
312 |     },
313 |     {
314 |       "cell_type": "code",
315 |       "execution_count": null,
316 |       "metadata": {},
317 |       "outputs": [],
318 |       "source": [
319 |         "my_enc = OneHotEncoder(drop='first')\n",
320 |         "\n",
321 |         "tmp = my_enc.fit_transform(\n",
322 |         "    my_df[['class']]).toarray()\n",
323 |         "my_names = my_enc.get_feature_names() \\\n",
324 |         "if hasattr(my_enc, 'get_feature_names') \\\n",
325 |         "else my_enc.get_feature_names_out()\n",
326 |         "pd.DataFrame(tmp, columns=my_names)\n",
327 |         "\n",
328 |         "tmp = my_enc.transform(\n",
329 |         "    my_df2[['class']]).toarray()\n",
330 |         "pd.DataFrame(tmp, columns=my_names)"
331 |       ],
332 |       "id": "d551e6a4-ef05-44ff-b5ef-1d337077850d"
333 |     }
334 |   ],
335 |   "nbformat": 4,
336 |   "nbformat_minor": 5,
337 |   "metadata": {
338 |     "kernelspec": {
339 |       "name": "python3",
340 |       "display_name": "Python 3"
341 |     }
342 |   }
343 | }


--------------------------------------------------------------------------------