├── .gitignore
├── figures
├── fig
│ ├── 図1.2.pdf
│ ├── 図1.4.pdf
│ ├── 図10.1.pdf
│ ├── 図10.2.pdf
│ ├── 図11.1.pdf
│ ├── 図11.2.pdf
│ ├── 図11.3.pdf
│ ├── 図11.4.pdf
│ ├── 図11.5.png
│ ├── 図2.2.pdf
│ ├── 図2.3.pdf
│ ├── 図2.4.png
│ ├── 図2.5.png
│ ├── 図2.6.png
│ ├── 図2.7.png
│ ├── 図3.1.pdf
│ ├── 図6.1.pdf
│ ├── 図7.1.pdf
│ ├── 図7.2.pdf
│ ├── 図7.3.pdf
│ ├── 図8.1.pdf
│ ├── 図A.1.png
│ ├── 表11.1.pdf
│ ├── 表11.b.pdf
│ ├── 図1-last.pdf
│ ├── 図1.1.a.png
│ ├── 図1.1.b.png
│ ├── 図2.1.a.png
│ ├── 図2.1.b.png
│ ├── 図2.8.a.png
│ ├── 図2.8.b.png
│ ├── 図2.9.a.png
│ ├── 図2.9.b.png
│ ├── 図1-preface.a.pdf
│ ├── 図1-preface.b.pdf
│ ├── 図1-last.md
│ ├── 図3.1.md
│ └── 図1.4.md
├── fig-p
│ ├── 07-p-knn.pdf
│ ├── 07-p-lm.pdf
│ ├── 10-p-pr.pdf
│ ├── 10-p-roc.pdf
│ ├── 03-p-temp.pdf
│ ├── 04-p-conf.pdf
│ ├── 04-p-curve.pdf
│ ├── 04-p-hist1.pdf
│ ├── 04-p-hist2.pdf
│ ├── 04-p-hist3.pdf
│ ├── 04-p-iris.pdf
│ ├── 04-p-mosaic.pdf
│ ├── 04-p-rbinom.pdf
│ ├── 04-p-rnorm.pdf
│ ├── 04-p-runif.pdf
│ ├── 07-p-plot.pdf
│ ├── 07-p-tuning.pdf
│ ├── 09-p-rpart.pdf
│ ├── 09-p-rpart2.pdf
│ ├── 09-p-varimp.pdf
│ ├── 11-p-relu.pdf
│ ├── 13-p-biplot.pdf
│ ├── 13-p-elbow.pdf
│ ├── 13-p-hclust.pdf
│ ├── 04-p-boxplot.pdf
│ ├── 04-p-pvalue1.pdf
│ ├── 04-p-scatter.pdf
│ ├── 07-p-boxplot.pdf
│ ├── 07-p-residual.pdf
│ ├── 08-p-boxplot.pdf
│ ├── 08-p-sigmoid.pdf
│ ├── 10-p-logistic.pdf
│ ├── 11-p-h2o-wine.pdf
│ ├── 13-p-heatmap.pdf
│ ├── 04-p-boot-binom.pdf
│ ├── 04-p-iris-group.pdf
│ ├── 07-p-polynomial.pdf
│ ├── 07-p-regression.pdf
│ ├── 08-p-enet-path.pdf
│ ├── 08-p-enet-tuning.pdf
│ ├── 10-p-titanic-roc.pdf
│ ├── 11-p-mnist-cnn.pdf
│ ├── 11-p-mnist-id5.pdf
│ ├── 11-p-mnist-lenet.pdf
│ ├── 11-p-mnist-nnet.pdf
│ ├── 11-p-regression.pdf
│ ├── 04-p-random-sample.pdf
│ ├── 07-p-tuning-train.pdf
│ ├── 08-p-enet-tuning2.pdf
│ ├── 10-p-titanic-tree.pdf
│ ├── 13-p-pca-clusters.pdf
│ ├── 08-p-boxplot-scaled.pdf
│ ├── 11-p-classification.pdf
│ ├── 11-p-mnist-lenet-miss.pdf
│ ├── 12-p-airpassengers-lm.pdf
│ ├── 12-p-airpassengers-arima.pdf
│ ├── 12-p-airpassengers-prophet.pdf
│ ├── 12-p-airpassengers-split.pdf
│ ├── 04-p-runif.py
│ ├── 04-p-curve.py
│ ├── 10-p-logistic.py
│ ├── 04-p-hist2.py
│ ├── 08-p-sigmoid.py
│ ├── 04-p-boxplot.py
│ ├── 04-p-hist1.py
│ ├── 07-p-plot.py
│ ├── 11-p-mnist-id5.py
│ ├── 04-p-rnorm.py
│ ├── 11-p-relu.py
│ ├── 04-p-scatter.py
│ ├── 04-p-random-sample.py
│ ├── 04-p-rbinom.py
│ ├── 04-p-boot-binom.py
│ ├── 04-p-hist3.py
│ ├── Makefile
│ ├── 08-p-boxplot.py
│ ├── 04-p-iris.py
│ ├── 13-p-hclust.py
│ ├── 04-p-mosaic.py
│ ├── 07-p-regression.py
│ ├── 04-p-iris-group.py
│ ├── 13-p-elbow.py
│ ├── 09-p-varimp.py
│ ├── 03-p-temp.py
│ ├── 08-p-boxplot-scaled.py
│ ├── 10-p-roc.py
│ ├── 13-p-heatmap.py
│ ├── 04-p-conf.py
│ ├── 13-p-biplot.py
│ ├── 07-p-residual.py
│ ├── 10-p-pr.py
│ ├── 09-p-rpart.py
│ ├── 12-p-airpassengers-split.py
│ ├── 07-p-knn.py
│ ├── 07-p-lm.py
│ ├── 11-p-h2o-wine.py
│ ├── 04-p-pvalue1.py
│ ├── 08-p-enet-path.py
│ ├── 07-p-boxplot.py
│ ├── 12-p-airpassengers-prophet.py
│ ├── 13-p-pca-clusters.py
│ ├── 12-p-airpassengers-lm.py
│ ├── 09-p-rpart2.py
│ ├── 07-p-tuning.py
│ ├── 07-p-polynomial.py
│ ├── 10-p-titanic-tree.py
│ ├── 10-p-titanic-roc.py
│ ├── 07-p-tuning-train.py
│ ├── 11-p-regression.py
│ ├── 12-p-airpassengers-arima.py
│ ├── 08-p-enet-tuning.py
│ ├── 11-p-classification.py
│ ├── 11-p-mnist-nnet.py
│ ├── 11-p-mnist-cnn.py
│ ├── 11-p-mnist-lenet.py
│ ├── 08-p-enet-tuning2.py
│ └── 11-p-mnist-lenet-miss.py
├── fig-r
│ ├── 07-r-knn.pdf
│ ├── 07-r-lm.pdf
│ ├── 10-r-pr.pdf
│ ├── 10-r-roc.pdf
│ ├── 03-r-temp.pdf
│ ├── 04-r-conf.pdf
│ ├── 04-r-curve.pdf
│ ├── 04-r-hist1.pdf
│ ├── 04-r-hist2.pdf
│ ├── 04-r-hist3.pdf
│ ├── 04-r-iris.pdf
│ ├── 04-r-mosaic.pdf
│ ├── 04-r-rbinom.pdf
│ ├── 04-r-rnorm.pdf
│ ├── 04-r-runif.pdf
│ ├── 07-r-plot.pdf
│ ├── 07-r-tuning.pdf
│ ├── 09-r-rpart.pdf
│ ├── 09-r-rpart2.pdf
│ ├── 09-r-varimp.pdf
│ ├── 10-r-rpart1.pdf
│ ├── 10-r-rpart2.pdf
│ ├── 10-r-rpart3.pdf
│ ├── 11-r-relu.pdf
│ ├── 13-r-biplot.pdf
│ ├── 13-r-elbow.pdf
│ ├── 13-r-hclust.pdf
│ ├── 13-r-kmeans.pdf
│ ├── 04-r-boxplot.R
│ ├── 04-r-boxplot.pdf
│ ├── 04-r-curve.R
│ ├── 04-r-ggplot-f.pdf
│ ├── 04-r-hist1.R
│ ├── 04-r-pvalue1.pdf
│ ├── 04-r-scatter.pdf
│ ├── 07-r-boxplot.pdf
│ ├── 07-r-residual.pdf
│ ├── 08-r-boxplot.pdf
│ ├── 08-r-nnet-3-2.pdf
│ ├── 08-r-sigmoid.pdf
│ ├── 10-r-logistic.pdf
│ ├── 11-r-h2o-wine.pdf
│ ├── 13-r-hclust2.pdf
│ ├── 13-r-heatmap.pdf
│ ├── 04-r-boot-binom.pdf
│ ├── 04-r-ggplot-box.pdf
│ ├── 04-r-ggplot-hist.pdf
│ ├── 04-r-iris-group.pdf
│ ├── 07-r-polynomial.pdf
│ ├── 07-r-regression.pdf
│ ├── 08-r-enet-path.pdf
│ ├── 08-r-enet-tuning.pdf
│ ├── 10-r-titanic-roc.pdf
│ ├── 11-r-mnist-cnn.pdf
│ ├── 11-r-mnist-id5.pdf
│ ├── 11-r-mnist-lenet.pdf
│ ├── 11-r-mnist-nnet.pdf
│ ├── 11-r-regression.pdf
│ ├── 04-r-ggplot-mosaic.pdf
│ ├── 04-r-ggplot-point.pdf
│ ├── 04-r-random-sample.pdf
│ ├── 07-r-tuning-train.pdf
│ ├── 08-r-enet-tuning2.pdf
│ ├── 10-r-titanic-tree.pdf
│ ├── 13-r-pca-clusters.pdf
│ ├── 08-r-boxplot-scaled.pdf
│ ├── 08-r-sigmoid.R
│ ├── 10-r-logistic.R
│ ├── 11-r-classification.pdf
│ ├── 11-r-mnist-lenet-miss.pdf
│ ├── 12-r-airpassengers-lm.pdf
│ ├── 11-r-relu.R
│ ├── 12-r-airpassengers-arima.pdf
│ ├── 12-r-airpassengers-prophet.pdf
│ ├── 12-r-airpassengers-split.pdf
│ ├── 04-r-hist2.R
│ ├── 04-r-scatter.R
│ ├── 04-r-runif.R
│ ├── 13-r-elbow.R
│ ├── 04-r-hist3.R
│ ├── 07-r-plot.R
│ ├── 04-r-rnorm.R
│ ├── 04-r-ggplot-point.R
│ ├── 04-r-ggplot-f.R
│ ├── 04-r-random-sample.R
│ ├── 11-r-mnist-id5.R
│ ├── 09-r-rpart.R
│ ├── 09-r-varimp.R
│ ├── 04-r-mosaic.R
│ ├── 04-r-rbinom.R
│ ├── 04-r-boot-binom.R
│ ├── 04-r-ggplot-box.R
│ ├── 04-r-ggplot-hist.R
│ ├── 04-r-iris.R
│ ├── 04-r-ggplot-mosaic.R
│ ├── 07-r-tuning.R
│ ├── 13-r-hclust.R
│ ├── Makefile
│ ├── 13-r-kmeans.R
│ ├── 13-r-hclust2.R
│ ├── 07-r-residual.R
│ ├── 03-r-temp.R
│ ├── 08-r-boxplot.R
│ ├── 10-r-pr.R
│ ├── 07-r-regression.R
│ ├── 10-r-roc.R
│ ├── 13-r-heatmap.R
│ ├── 10-r-rpart3.R
│ ├── 10-r-titanic-tree.R
│ ├── 13-r-biplot.R
│ ├── 07-r-lm.R
│ ├── 08-r-boxplot-scaled.R
│ ├── 09-r-rpart2.R
│ ├── 07-r-knn.R
│ ├── 08-r-enet-path.R
│ ├── 13-r-pca-clusters.R
│ ├── 04-r-conf.R
│ ├── 07-r-boxplot.R
│ ├── 04-r-iris-group.R
│ ├── 10-r-rpart1.R
│ ├── 11-r-h2o-wine.R
│ ├── 12-r-airpassengers-split.R
│ ├── 07-r-polynomial.R
│ ├── 04-r-pvalue1.R
│ ├── 10-r-rpart2.R
│ ├── 08-r-nnet-3-2.R
│ ├── 08-r-enet-tuning.R
│ ├── 07-r-tuning-train.R
│ ├── 10-r-titanic-roc.R
│ ├── 12-r-airpassengers-arima.R
│ ├── 12-r-airpassengers-prophet.R
│ ├── 12-r-airpassengers-lm.R
│ ├── 11-r-classification.R
│ ├── 11-r-regression.R
│ ├── 11-r-mnist-nnet.R
│ ├── 11-r-mnist-id5.svg
│ ├── 11-r-mnist-cnn.R
│ ├── 11-r-mnist-lenet.R
│ ├── 08-r-enet-tuning2.R
│ └── 11-r-mnist-lenet-miss.R
└── howtomake.md
├── data
├── exam.csv
├── exam.json
├── exam.xml
└── wine.csv
├── docker
├── rstudio.sh
├── jupyter.sh
├── rstudio
│ ├── README.md
│ └── Dockerfile
└── jupyter
│ ├── README.md
│ └── Dockerfile
├── addendum
├── 07.03.02
│ ├── confidence_band_p.py
│ ├── confidence_band_r.R
│ ├── 1+3x+N(0,2x).csv
│ └── README.md
└── sagemaker
│ ├── README.md
│ └── sage-python.yml
├── docs
└── exam.html
├── code
├── R-notebook
│ ├── r-06.ipynb
│ ├── README.md
│ └── r-12.ipynb
└── Python-notebook
│ ├── README.md
│ ├── python-06.ipynb
│ ├── python-12.ipynb
│ └── python-05.ipynb
├── README.md
└── update.md
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/*
2 | tmp*
3 |
--------------------------------------------------------------------------------
/figures/fig/図1.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1.2.pdf
--------------------------------------------------------------------------------
/figures/fig/図1.4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1.4.pdf
--------------------------------------------------------------------------------
/figures/fig/図10.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図10.1.pdf
--------------------------------------------------------------------------------
/figures/fig/図10.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図10.2.pdf
--------------------------------------------------------------------------------
/figures/fig/図11.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.1.pdf
--------------------------------------------------------------------------------
/figures/fig/図11.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.2.pdf
--------------------------------------------------------------------------------
/figures/fig/図11.3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.3.pdf
--------------------------------------------------------------------------------
/figures/fig/図11.4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.4.pdf
--------------------------------------------------------------------------------
/figures/fig/図11.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.5.png
--------------------------------------------------------------------------------
/figures/fig/図2.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.2.pdf
--------------------------------------------------------------------------------
/figures/fig/図2.3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.3.pdf
--------------------------------------------------------------------------------
/figures/fig/図2.4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.4.png
--------------------------------------------------------------------------------
/figures/fig/図2.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.5.png
--------------------------------------------------------------------------------
/figures/fig/図2.6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.6.png
--------------------------------------------------------------------------------
/figures/fig/図2.7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.7.png
--------------------------------------------------------------------------------
/figures/fig/図3.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図3.1.pdf
--------------------------------------------------------------------------------
/figures/fig/図6.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図6.1.pdf
--------------------------------------------------------------------------------
/figures/fig/図7.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図7.1.pdf
--------------------------------------------------------------------------------
/figures/fig/図7.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図7.2.pdf
--------------------------------------------------------------------------------
/figures/fig/図7.3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図7.3.pdf
--------------------------------------------------------------------------------
/figures/fig/図8.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図8.1.pdf
--------------------------------------------------------------------------------
/figures/fig/図A.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図A.1.png
--------------------------------------------------------------------------------
/figures/fig/表11.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/表11.1.pdf
--------------------------------------------------------------------------------
/figures/fig/表11.b.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/表11.b.pdf
--------------------------------------------------------------------------------
/data/exam.csv:
--------------------------------------------------------------------------------
1 | name,english,math,gender
2 | A,60,70,f
3 | B,90,80,m
4 | C,70,90,m
5 | D,90,100,f
6 |
--------------------------------------------------------------------------------
/figures/fig/図1-last.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1-last.pdf
--------------------------------------------------------------------------------
/figures/fig/図1.1.a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1.1.a.png
--------------------------------------------------------------------------------
/figures/fig/図1.1.b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1.1.b.png
--------------------------------------------------------------------------------
/figures/fig/図2.1.a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.1.a.png
--------------------------------------------------------------------------------
/figures/fig/図2.1.b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.1.b.png
--------------------------------------------------------------------------------
/figures/fig/図2.8.a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.8.a.png
--------------------------------------------------------------------------------
/figures/fig/図2.8.b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.8.b.png
--------------------------------------------------------------------------------
/figures/fig/図2.9.a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.9.a.png
--------------------------------------------------------------------------------
/figures/fig/図2.9.b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.9.b.png
--------------------------------------------------------------------------------
/figures/fig-p/07-p-knn.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-knn.pdf
--------------------------------------------------------------------------------
/figures/fig-p/07-p-lm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-lm.pdf
--------------------------------------------------------------------------------
/figures/fig-p/10-p-pr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-pr.pdf
--------------------------------------------------------------------------------
/figures/fig-p/10-p-roc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-roc.pdf
--------------------------------------------------------------------------------
/figures/fig-r/07-r-knn.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-knn.pdf
--------------------------------------------------------------------------------
/figures/fig-r/07-r-lm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-lm.pdf
--------------------------------------------------------------------------------
/figures/fig-r/10-r-pr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-pr.pdf
--------------------------------------------------------------------------------
/figures/fig-r/10-r-roc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-roc.pdf
--------------------------------------------------------------------------------
/figures/fig-p/03-p-temp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/03-p-temp.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-conf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-conf.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-curve.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-curve.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-hist1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-hist1.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-hist2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-hist2.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-hist3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-hist3.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-iris.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-iris.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-mosaic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-mosaic.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-rbinom.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-rbinom.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-rnorm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-rnorm.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-runif.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-runif.pdf
--------------------------------------------------------------------------------
/figures/fig-p/07-p-plot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-plot.pdf
--------------------------------------------------------------------------------
/figures/fig-p/07-p-tuning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-tuning.pdf
--------------------------------------------------------------------------------
/figures/fig-p/09-p-rpart.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/09-p-rpart.pdf
--------------------------------------------------------------------------------
/figures/fig-p/09-p-rpart2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/09-p-rpart2.pdf
--------------------------------------------------------------------------------
/figures/fig-p/09-p-varimp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/09-p-varimp.pdf
--------------------------------------------------------------------------------
/figures/fig-p/11-p-relu.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-relu.pdf
--------------------------------------------------------------------------------
/figures/fig-p/13-p-biplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-biplot.pdf
--------------------------------------------------------------------------------
/figures/fig-p/13-p-elbow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-elbow.pdf
--------------------------------------------------------------------------------
/figures/fig-p/13-p-hclust.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-hclust.pdf
--------------------------------------------------------------------------------
/figures/fig-r/03-r-temp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/03-r-temp.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-conf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-conf.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-curve.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-curve.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-hist1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-hist1.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-hist2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-hist2.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-hist3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-hist3.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-iris.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-iris.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-mosaic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-mosaic.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-rbinom.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-rbinom.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-rnorm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-rnorm.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-runif.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-runif.pdf
--------------------------------------------------------------------------------
/figures/fig-r/07-r-plot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-plot.pdf
--------------------------------------------------------------------------------
/figures/fig-r/07-r-tuning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-tuning.pdf
--------------------------------------------------------------------------------
/figures/fig-r/09-r-rpart.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/09-r-rpart.pdf
--------------------------------------------------------------------------------
/figures/fig-r/09-r-rpart2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/09-r-rpart2.pdf
--------------------------------------------------------------------------------
/figures/fig-r/09-r-varimp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/09-r-varimp.pdf
--------------------------------------------------------------------------------
/figures/fig-r/10-r-rpart1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-rpart1.pdf
--------------------------------------------------------------------------------
/figures/fig-r/10-r-rpart2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-rpart2.pdf
--------------------------------------------------------------------------------
/figures/fig-r/10-r-rpart3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-rpart3.pdf
--------------------------------------------------------------------------------
/figures/fig-r/11-r-relu.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-relu.pdf
--------------------------------------------------------------------------------
/figures/fig-r/13-r-biplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-biplot.pdf
--------------------------------------------------------------------------------
/figures/fig-r/13-r-elbow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-elbow.pdf
--------------------------------------------------------------------------------
/figures/fig-r/13-r-hclust.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-hclust.pdf
--------------------------------------------------------------------------------
/figures/fig-r/13-r-kmeans.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-kmeans.pdf
--------------------------------------------------------------------------------
/figures/fig/図1-preface.a.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1-preface.a.pdf
--------------------------------------------------------------------------------
/figures/fig/図1-preface.b.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1-preface.b.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-boxplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-boxplot.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-pvalue1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-pvalue1.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-scatter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-scatter.pdf
--------------------------------------------------------------------------------
/figures/fig-p/07-p-boxplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-boxplot.pdf
--------------------------------------------------------------------------------
/figures/fig-p/07-p-residual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-residual.pdf
--------------------------------------------------------------------------------
/figures/fig-p/08-p-boxplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-boxplot.pdf
--------------------------------------------------------------------------------
/figures/fig-p/08-p-sigmoid.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-sigmoid.pdf
--------------------------------------------------------------------------------
/figures/fig-p/10-p-logistic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-logistic.pdf
--------------------------------------------------------------------------------
/figures/fig-p/11-p-h2o-wine.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-h2o-wine.pdf
--------------------------------------------------------------------------------
/figures/fig-p/13-p-heatmap.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-heatmap.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-boxplot.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-boxplot.pdf", width = 6, height = 5.5)
2 |
3 | boxplot(iris[, -5])
4 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-boxplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-boxplot.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-curve.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-curve.pdf", width = 6, height = 5.5)
2 |
3 | curve(x^3 - x, -2, 2)
4 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-f.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-f.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-hist1.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-hist1.pdf", width = 6, height = 5.5)
2 |
3 | hist(iris$Sepal.Length)
4 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-pvalue1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-pvalue1.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-scatter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-scatter.pdf
--------------------------------------------------------------------------------
/figures/fig-r/07-r-boxplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-boxplot.pdf
--------------------------------------------------------------------------------
/figures/fig-r/07-r-residual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-residual.pdf
--------------------------------------------------------------------------------
/figures/fig-r/08-r-boxplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-boxplot.pdf
--------------------------------------------------------------------------------
/figures/fig-r/08-r-nnet-3-2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-nnet-3-2.pdf
--------------------------------------------------------------------------------
/figures/fig-r/08-r-sigmoid.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-sigmoid.pdf
--------------------------------------------------------------------------------
/figures/fig-r/10-r-logistic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-logistic.pdf
--------------------------------------------------------------------------------
/figures/fig-r/11-r-h2o-wine.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-h2o-wine.pdf
--------------------------------------------------------------------------------
/figures/fig-r/13-r-hclust2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-hclust2.pdf
--------------------------------------------------------------------------------
/figures/fig-r/13-r-heatmap.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-heatmap.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-boot-binom.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-boot-binom.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-iris-group.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-iris-group.pdf
--------------------------------------------------------------------------------
/figures/fig-p/07-p-polynomial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-polynomial.pdf
--------------------------------------------------------------------------------
/figures/fig-p/07-p-regression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-regression.pdf
--------------------------------------------------------------------------------
/figures/fig-p/08-p-enet-path.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-enet-path.pdf
--------------------------------------------------------------------------------
/figures/fig-p/08-p-enet-tuning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-enet-tuning.pdf
--------------------------------------------------------------------------------
/figures/fig-p/10-p-titanic-roc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-titanic-roc.pdf
--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-cnn.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-cnn.pdf
--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-id5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-id5.pdf
--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-lenet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-lenet.pdf
--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-nnet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-nnet.pdf
--------------------------------------------------------------------------------
/figures/fig-p/11-p-regression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-regression.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-boot-binom.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-boot-binom.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-box.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-box.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-hist.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-hist.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-iris-group.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-iris-group.pdf
--------------------------------------------------------------------------------
/figures/fig-r/07-r-polynomial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-polynomial.pdf
--------------------------------------------------------------------------------
/figures/fig-r/07-r-regression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-regression.pdf
--------------------------------------------------------------------------------
/figures/fig-r/08-r-enet-path.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-enet-path.pdf
--------------------------------------------------------------------------------
/figures/fig-r/08-r-enet-tuning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-enet-tuning.pdf
--------------------------------------------------------------------------------
/figures/fig-r/10-r-titanic-roc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-titanic-roc.pdf
--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-cnn.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-cnn.pdf
--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-id5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-id5.pdf
--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-lenet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-lenet.pdf
--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-nnet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-nnet.pdf
--------------------------------------------------------------------------------
/figures/fig-r/11-r-regression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-regression.pdf
--------------------------------------------------------------------------------
/figures/fig-p/04-p-random-sample.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-random-sample.pdf
--------------------------------------------------------------------------------
/figures/fig-p/07-p-tuning-train.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-tuning-train.pdf
--------------------------------------------------------------------------------
/figures/fig-p/08-p-enet-tuning2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-enet-tuning2.pdf
--------------------------------------------------------------------------------
/figures/fig-p/10-p-titanic-tree.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-titanic-tree.pdf
--------------------------------------------------------------------------------
/figures/fig-p/13-p-pca-clusters.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-pca-clusters.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-mosaic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-mosaic.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-point.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-point.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-random-sample.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-random-sample.pdf
--------------------------------------------------------------------------------
/figures/fig-r/07-r-tuning-train.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-tuning-train.pdf
--------------------------------------------------------------------------------
/figures/fig-r/08-r-enet-tuning2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-enet-tuning2.pdf
--------------------------------------------------------------------------------
/figures/fig-r/10-r-titanic-tree.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-titanic-tree.pdf
--------------------------------------------------------------------------------
/figures/fig-r/13-r-pca-clusters.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-pca-clusters.pdf
--------------------------------------------------------------------------------
/figures/fig-p/08-p-boxplot-scaled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-boxplot-scaled.pdf
--------------------------------------------------------------------------------
/figures/fig-p/11-p-classification.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-classification.pdf
--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-lenet-miss.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-lenet-miss.pdf
--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-lm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/12-p-airpassengers-lm.pdf
--------------------------------------------------------------------------------
/figures/fig-r/08-r-boxplot-scaled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-boxplot-scaled.pdf
--------------------------------------------------------------------------------
/figures/fig-r/08-r-sigmoid.R:
--------------------------------------------------------------------------------
1 | pdf(file = "08-r-sigmoid.pdf", width = 6, height = 5.5)
2 |
3 | curve(1 / (1 + exp(-x)), -6, 6)
4 |
--------------------------------------------------------------------------------
/figures/fig-r/10-r-logistic.R:
--------------------------------------------------------------------------------
1 | pdf(file = "10-r-logistic.pdf", width = 6, height = 5.5)
2 |
3 | curve(1 / (1 + exp(-x)), -6, 6)
4 |
--------------------------------------------------------------------------------
/figures/fig-r/11-r-classification.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-classification.pdf
--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-lenet-miss.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-lenet-miss.pdf
--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-lm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/12-r-airpassengers-lm.pdf
--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-arima.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/12-p-airpassengers-arima.pdf
--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-prophet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/12-p-airpassengers-prophet.pdf
--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-split.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/12-p-airpassengers-split.pdf
--------------------------------------------------------------------------------
/figures/fig-r/11-r-relu.R:
--------------------------------------------------------------------------------
1 | pdf(file = "11-r-relu.pdf", width = 6, height = 5.5)
2 |
3 | library(keras)
4 | curve(activation_relu(x), -3, 3)
5 |
--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-arima.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/12-r-airpassengers-arima.pdf
--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-prophet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/12-r-airpassengers-prophet.pdf
--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-split.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/12-r-airpassengers-split.pdf
--------------------------------------------------------------------------------
/figures/fig-r/04-r-hist2.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-hist2.pdf", width = 6, height = 5.5)
2 |
3 | x <- c(10, 20, 30)
4 | hist(x, breaks = 2) # 階級数は2
5 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-scatter.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-scatter.pdf", width = 6, height = 5.5)
2 |
3 | plot(iris$Sepal.Length,
4 | iris$Sepal.Width)
5 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-runif.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | x = np.random.random(1000)
5 | plt.hist(x)
6 |
7 | plt.savefig('04-p-runif.pdf')
8 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-runif.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-runif.pdf", width = 6, height = 5)
2 |
3 | x <- runif(min = 0, # 最小
4 | max = 1, # 最大
5 | n = 1000) # 乱数の数
6 | hist(x)
7 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-curve.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | x = np.linspace(-2, 2, 100)
5 | y = x**3 - x
6 | plt.plot(x, y)
7 |
8 | plt.savefig('04-p-curve.pdf')
9 |
--------------------------------------------------------------------------------
/figures/fig-p/10-p-logistic.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | x = np.arange(-6, 6, 0.1)
3 | y = 1 / (1 + np.exp(-x))
4 | import matplotlib.pyplot as plt
5 | plt.plot(x, y)
6 | plt.savefig('10-p-logistic.pdf')
7 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-hist2.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | my_df = pd.DataFrame({'x': [10, 20, 30]})
4 | my_df.hist('x', bins=2) # 階級数は2
5 |
6 | import matplotlib.pyplot as plt
7 | plt.savefig('04-p-hist2.pdf')
8 |
--------------------------------------------------------------------------------
/figures/fig-p/08-p-sigmoid.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 |
4 | x = np.linspace(-6, 6, 100)
5 | y = 1 / (1 + np.exp(-x))
6 | plt.plot(x, y)
7 | plt.savefig('08-p-sigmoid.pdf')
8 |
--------------------------------------------------------------------------------
/figures/fig-r/13-r-elbow.R:
--------------------------------------------------------------------------------
1 | pdf(file = "13-r-elbow.pdf", width = 6, height = 4.5)
2 |
3 | library(tidyverse)
4 | library(factoextra)
5 |
6 | my_data <- iris[, -5]
7 | fviz_nbclust(my_data, kmeans, method = "wss")
8 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-hist3.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-hist3.pdf", width = 6, height = 5.5)
2 |
3 | x <- iris$Sepal.Length
4 | tmp <- seq(min(x), max(x),
5 | length.out = 10)
6 | hist(x, breaks = tmp, right = FALSE)
7 |
--------------------------------------------------------------------------------
/figures/fig-r/07-r-plot.R:
--------------------------------------------------------------------------------
1 | pdf(file = "07-r-plot.pdf", width = 6, height = 4.5)
2 |
3 | library(tidyverse)
4 | my_data <- cars
5 |
6 | my_data %>%
7 | ggplot(aes(x = speed, y = dist)) +
8 | geom_point()
9 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-boxplot.py:
--------------------------------------------------------------------------------
1 | import statsmodels.api as sm
2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
3 |
4 | iris.boxplot()
5 |
6 | import matplotlib.pyplot as plt
7 | plt.savefig('04-p-boxplot.pdf')
8 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-rnorm.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-rnorm.pdf", width = 6, height = 5)
2 |
3 | r <- 10000
4 | x <- rnorm(mean = 50, # 平均
5 | sd = 5, # 標準偏差
6 | n = r) # 乱数の数
7 | hist(x, breaks = 40)
8 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-hist1.py:
--------------------------------------------------------------------------------
1 | import statsmodels.api as sm
2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
3 |
4 | iris.hist('Sepal.Length')
5 |
6 | import matplotlib.pyplot as plt
7 | plt.savefig('04-p-hist1.pdf')
8 |
--------------------------------------------------------------------------------
/figures/fig-p/07-p-plot.py:
--------------------------------------------------------------------------------
1 | import statsmodels.api as sm
2 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
3 | my_data.plot(x='speed', style='o')
4 | import matplotlib.pyplot as plt
5 | plt.savefig('07-p-plot.pdf')
6 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-point.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-ggplot-point.pdf", width = 6, height = 4)
2 |
3 | library(tidyverse)
4 |
5 | iris %>%
6 | ggplot(aes(x = Sepal.Length,
7 | y = Sepal.Width)) +
8 | geom_point()
9 |
--------------------------------------------------------------------------------
/docker/rstudio.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | docker run \
3 | -d \
4 | -e PASSWORD=password \
5 | -e ROOT=TRUE \
6 | -p 8787:8787 \
7 | -v "$(pwd):/home/rstudio/work" \
8 | --platform linux/x86_64 \
9 | --name rs \
10 | taroyabuki/rstudio
11 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-f.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-ggplot-f.pdf", width = 6, height = 4)
2 |
3 | library(tidyverse)
4 |
5 | f <- function(x) { x^3 - x }
6 | data.frame(x = c(-2, 2)) %>%
7 | ggplot(aes(x = x)) +
8 | stat_function(fun = f)
9 |
--------------------------------------------------------------------------------
/docker/jupyter.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | docker run \
3 | -d \
4 | -p 8888:8888 \
5 | -v "$(pwd):/home/jovyan/work" \
6 | --platform linux/x86_64 \
7 | --name jr \
8 | taroyabuki/jupyter \
9 | start-notebook.sh \
10 | --NotebookApp.token='password'
11 |
--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-id5.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
3 |
4 | import matplotlib.pyplot as plt
5 | plt.matshow(x_train[4, :, :])
6 | plt.savefig('11-p-mnist-id5.pdf')
7 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-random-sample.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-random-sample.pdf", width = 6, height = 5)
2 |
3 | x <- sample(x = 1:6, # 範囲
4 | size = 10000, # 乱数の数
5 | replace = TRUE) # 重複あり
6 | hist(x, breaks = 0:6) # ヒストグラム
7 |
--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-id5.R:
--------------------------------------------------------------------------------
1 | pdf(file = "11-r-mnist-id5.pdf", width = 5.83, height = 4.13)
2 |
3 | library(keras)
4 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
5 |
6 | plot(as.raster(x = x_train[5, , ], max = max(x_train)))
7 |
--------------------------------------------------------------------------------
/figures/fig-r/09-r-rpart.R:
--------------------------------------------------------------------------------
1 | pdf(file = "09-r-rpart.pdf", width = 5.83, height = 4.13)
2 |
3 | library(caret)
4 | my_data <- iris
5 | my_model <- train(form = Species ~ ., data = my_data, method = "rpart2")
6 | rpart.plot::rpart.plot(my_model$finalModel, extra = 1)
7 |
--------------------------------------------------------------------------------
/figures/fig-r/09-r-varimp.R:
--------------------------------------------------------------------------------
1 | pdf(file = "09-r-varimp.pdf", width = 5.83, height = 4.13)
2 |
3 | library(caret)
4 | library(tidyverse)
5 | my_data <- iris
6 |
7 | my_model <- train(form = Species ~ ., data = my_data, method = "rf")
8 | ggplot(varImp(my_model))
9 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-mosaic.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-mosaic.pdf", width = 6, height = 4.5)
2 |
3 | my_df <- data.frame(
4 | Species = iris$Species,
5 | w_Sepal = iris$Sepal.Width > 3)
6 |
7 | mosaicplot(
8 | formula = ~ Species + w_Sepal,
9 | data = my_df)
10 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-rnorm.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | r = 10000
5 | x = np.random.normal(
6 | loc=50, # 平均
7 | scale=5, # 標準偏差
8 | size=r) # 乱数の数
9 | plt.hist(x, bins=40)
10 |
11 | plt.savefig('04-p-rnorm.pdf')
12 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-rbinom.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-rbinom.pdf", width = 6, height = 5)
2 |
3 | n <- 100
4 | p <- 0.5
5 | r <- 10000
6 | x <- rbinom(size = n, # 試行回数
7 | prob = p, # 確率
8 | n = r) # 乱数の数
9 | hist(x, breaks = max(x) - min(x))
10 |
--------------------------------------------------------------------------------
/figures/fig-p/11-p-relu.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | from keras import activations
4 |
5 | x = np.linspace(-3, 3, 100)
6 | plt.plot(x, activations.relu(x))
7 | plt.xlabel('x')
8 | plt.ylabel('ReLU(x)')
9 | plt.savefig('11-p-relu.pdf')
10 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-boot-binom.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-boot-binom.pdf", width = 6, height = 5)
2 |
3 | X <- rep(0:1, c(13, 2))
4 | n <- 10^5
5 | result <- replicate(n, sum(sample(X, size = length(X), replace = TRUE)))
6 | hist(x = result,
7 | breaks = 0:15,
8 | right = FALSE)
9 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-scatter.py:
--------------------------------------------------------------------------------
1 | import statsmodels.api as sm
2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
3 |
4 | iris.plot('Sepal.Length',
5 | 'Sepal.Width',
6 | kind='scatter')
7 |
8 | import matplotlib.pyplot as plt
9 | plt.savefig('04-p-scatter.pdf')
10 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-random-sample.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | x = np.random.choice(
5 | a=range(1, 7), # 1から6
6 | size=10000, # 乱数の数
7 | replace=True) # 重複あり
8 | plt.hist(x, bins=6) # ヒストグラム
9 |
10 | plt.savefig('04-p-random-sample.pdf')
11 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-rbinom.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | n = 100
5 | p = 0.5
6 | r = 10000
7 | x = np.random.binomial(
8 | n=n, # 試行回数
9 | p=p, # 確率
10 | size=r) # 乱数の数
11 | plt.hist(x, bins=max(x) - min(x))
12 |
13 | plt.savefig('04-p-rbinom.pdf')
14 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-box.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-ggplot-box.pdf", width = 6, height = 4)
2 |
3 | library(tidyverse)
4 |
5 | iris %>%
6 | pivot_longer(-Species) %>%
7 | ggplot(aes(
8 | x = factor(name,
9 | levels = names(iris)),
10 | y = value)) +
11 | geom_boxplot() +
12 | xlab(NULL)
13 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-boot-binom.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | X = [0] * 13 + [1] * 2 # 手順1
3 | n = 10**5
4 | result = [sum(np.random.choice(X, len(X), replace=True)) # 手順4
5 | for _ in range(n)]
6 |
7 | import matplotlib.pyplot as plt
8 | plt.hist(result,
9 | bins=range(0, 16))
10 | plt.savefig('04-p-boot-binom.pdf')
11 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-hist.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-ggplot-hist.pdf", width = 6, height = 4)
2 |
3 | library(tidyverse)
4 |
5 | x <- iris$Sepal.Length
6 | tmp <- seq(min(x), max(x),
7 | length.out = 10)
8 | iris %>%
9 | ggplot(aes(x = Sepal.Length)) +
10 | geom_histogram(breaks = tmp,
11 | closed = "left")
12 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-iris.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-iris.pdf", width = 6, height = 4)
2 |
3 | library(tidyverse)
4 | my_df <- psych::describe(iris[, -5])
5 |
6 | tmp <- rownames(my_df)
7 | my_df %>% ggplot(aes(x = factor(tmp, levels = tmp), y = mean)) +
8 | geom_col() +
9 | geom_errorbar(aes(ymin = mean - se, ymax = mean + se)) +
10 | xlab(NULL)
11 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-hist3.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import statsmodels.api as sm
3 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
4 |
5 | x = iris['Sepal.Length']
6 | tmp = np.linspace(min(x), max(x), 10)
7 | iris.hist('Sepal.Length',
8 | bins=tmp.round(2))
9 |
10 | import matplotlib.pyplot as plt
11 | plt.savefig('04-p-hist3.pdf')
12 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-ggplot-mosaic.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-ggplot-mosaic.pdf", width = 6, height = 4)
2 |
3 | library(tidyverse)
4 |
5 | library(ggmosaic)
6 | my_df <- data.frame(
7 | Species = iris$Species,
8 | w_Sepal = iris$Sepal.Width > 3)
9 | my_df %>%
10 | ggplot() +
11 | geom_mosaic(
12 | aes(x = product(w_Sepal,
13 | Species)))
14 |
--------------------------------------------------------------------------------
/addendum/07.03.02/confidence_band_p.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import seaborn as sns
3 |
4 | data = pd.read_csv('1+3x+N(0,2x).csv')
5 | x = data.x
6 | y = data.y
7 | n = len(x)
8 |
9 | alpha = 0.99
10 | n_boot = 10000
11 |
12 | sns.regplot(x=x, y=y, ci=100 * alpha, n_boot=n_boot)
13 |
14 | import matplotlib.pyplot as plt
15 | plt.savefig('confidence_band_p.pdf')
16 |
--------------------------------------------------------------------------------
/figures/fig-p/Makefile:
--------------------------------------------------------------------------------
1 | SRC=$(wildcard *.py)
2 |
3 | PDF=$(SRC:.py=.pdf)
4 |
5 | all: $(PDF)
6 |
7 | .SUFFIXES: .pdf .py
8 |
9 | .py.pdf:
10 | if python3 $<; then\
11 | if [ -f /usr/bin/pdfcrop ]; then\
12 | pdfcrop $@;\
13 | rm $@;\
14 | mv $(basename $@)-crop.pdf $@;\
15 | fi;\
16 | else exit 1;\
17 | fi
18 |
19 | clean:
20 | rm -f *.pdf *.log
21 |
--------------------------------------------------------------------------------
/figures/fig-p/08-p-boxplot.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | my_url = ('https://raw.githubusercontent.com/taroyabuki/' +
4 | 'fromzero/master/data/wine.csv')
5 | my_data = pd.read_csv(my_url)
6 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2']
7 |
8 | X.boxplot(showmeans=True)
9 |
10 | import matplotlib.pyplot as plt
11 | plt.savefig('08-p-boxplot.pdf')
12 |
--------------------------------------------------------------------------------
/figures/fig/図1-last.md:
--------------------------------------------------------------------------------
1 | ```puml
2 | @startuml
3 | scale 0.8
4 | skinparam {
5 | defaultFontName Hiragino Kaku Gothic ProN
6 | monochrome true
7 | shadowing false
8 | }
9 |
10 | (リファレンス)
11 | (本書)-->(プログラミング入門)
12 | (本書)-->(データサイエンス入門)
13 | (本書)-->(統計学)
14 | プログラミング入門-->(言語についての高度な話題)
15 | データサイエンス入門-->(データサイエンスの理論と実践)
16 | 統計学-->(統計学の実践)
17 | @enduml
18 | ```
19 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-iris.py:
--------------------------------------------------------------------------------
1 | import statsmodels.api as sm
2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
3 |
4 | my_df = iris.describe().transpose()[['mean', 'std']]
5 | my_df['se'] = my_df['std'] / len(iris)**0.5
6 |
7 | my_df.plot(y='mean', kind='bar', yerr='se', capsize=10)
8 |
9 | import matplotlib.pyplot as plt
10 | plt.tight_layout()
11 | plt.savefig('04-p-iris.pdf')
12 |
--------------------------------------------------------------------------------
/figures/fig-r/07-r-tuning.R:
--------------------------------------------------------------------------------
1 | pdf(file = "07-r-tuning.pdf", width = 5.83, height = 4.13)
2 |
3 | set.seed(0)
4 |
5 | library(caret)
6 | library(tidyverse)
7 | my_data <- cars
8 | my_model <- train(form = dist ~ speed, data = my_data, method = "knn",
9 | tuneGrid = expand.grid(k = 1:15),
10 | trControl = trainControl(method = "LOOCV"))
11 | ggplot(my_model)
12 |
--------------------------------------------------------------------------------
/figures/fig-r/13-r-hclust.R:
--------------------------------------------------------------------------------
1 | pdf(file = "13-r-hclust.pdf", width = 5.83, height = 4.13)
2 |
3 | my_data <- data.frame(
4 | x = c( 0, -16, 10, 10),
5 | y = c( 0, 0, 10, -15),
6 | row.names = c("A", "B", "C", "D"))
7 |
8 | my_dist <- dist(my_data)
9 | my_result <- hclust(my_dist)
10 |
11 | factoextra::fviz_dend(
12 | my_result,
13 | k = 3,
14 | rect = T, rect_fill = T)
15 |
--------------------------------------------------------------------------------
/figures/fig-r/Makefile:
--------------------------------------------------------------------------------
1 | SRC=$(wildcard *.R)
2 |
3 | PDF=$(SRC:.R=.pdf)
4 |
5 | all: $(PDF)
6 |
7 | .SUFFIXES: .pdf .R
8 |
9 | .R.pdf:
10 | if Rscript $<; then\
11 | if [ -f /usr/bin/pdfcrop ]; then\
12 | pdfcrop $@;\
13 | rm $@;\
14 | mv $(basename $@)-crop.pdf $@;\
15 | fi;\
16 | else\
17 | rm -f $@;\
18 | exit 1;\
19 | fi
20 |
21 | clean:
22 | rm -f *.pdf *.log
23 |
--------------------------------------------------------------------------------
/addendum/07.03.02/confidence_band_r.R:
--------------------------------------------------------------------------------
1 | pdf(file = "confidence_band_r.pdf", width = 6, height = 4.5)
2 |
3 | library(boot)
4 | library(tidyverse)
5 |
6 | data <- read_csv("1+3x+N(0,2x).csv")
7 | x <- data$x
8 | y <- data$y
9 | n <- nrow(data)
10 |
11 | alpha <- 0.99
12 | data %>% ggplot(aes(x = x, y = y)) +
13 | geom_point() +
14 | stat_smooth(formula = y ~ x, method = "lm", level = alpha)
15 |
--------------------------------------------------------------------------------
/docker/rstudio/README.md:
--------------------------------------------------------------------------------
1 | # RStudio用のコンテナ
2 |
3 | - Docker Hub: https://hub.docker.com/r/taroyabuki/rstudio
4 | - 起動方法(3種類)
5 | - [rstudio.sh](../rstudio.sh)を実行する.
6 | - `wget https://raw.githubusercontent.com/taroyabuki/rp/master/docker/rstudio.sh`の後で,`sh rstudio.sh`
7 | - `git clone https://github.com/taroyabuki/fromzero.git`の後で,`sh fromzero/docker/rstudio.sh`
8 | - RStudio Serverへのアクセス:http://localhost:8787
9 |
--------------------------------------------------------------------------------
/figures/fig-r/13-r-kmeans.R:
--------------------------------------------------------------------------------
1 | pdf(file = "13-r-kmeans.pdf", width = 5.83, height = 4.13)
2 |
3 | library(tidyverse)
4 | library(factoextra)
5 |
6 | my_data <- iris[, -5]
7 |
8 | f <- 2:5 %>% map(function(k) {
9 | my_data %>% kmeans(k) %>%
10 | fviz_cluster(data = my_data, geom = "point") +
11 | ggtitle(sprintf("k = %s", k))
12 | })
13 | gridExtra::grid.arrange(f[[1]], f[[2]], f[[3]], f[[4]], ncol = 2)
14 |
--------------------------------------------------------------------------------
/figures/fig-r/13-r-hclust2.R:
--------------------------------------------------------------------------------
1 | pdf(file = "13-r-hclust2.pdf", width = 5.83, height = 4.13)
2 |
3 | my_data <- data.frame(
4 | x = c( 0, -16, 10, 10),
5 | y = c( 0, 0, 10, -15),
6 | row.names = c("A", "B", "C", "D"))
7 |
8 | my_dist <- dist(my_data)
9 | my_result <- hclust(my_dist)
10 |
11 | factoextra::fviz_dend(
12 | my_result,
13 | k = 3,
14 | rect = T, rect_fill = T,
15 | type = "phylogenic")
16 |
--------------------------------------------------------------------------------
/figures/fig-p/13-p-hclust.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | my_data = pd.DataFrame(
3 | {'x': [0, -16, 10, 10],
4 | 'y': [0, 0, 10, -15]},
5 | index=['A', 'B', 'C', 'D'])
6 |
7 | from scipy.cluster import hierarchy
8 | my_result = hierarchy.linkage(my_data, metric='euclidean', method='complete')
9 | hierarchy.dendrogram(my_result, labels=my_data.index)
10 |
11 | import matplotlib.pyplot as plt
12 | plt.savefig('13-p-hclust.pdf')
13 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-mosaic.py:
--------------------------------------------------------------------------------
1 | import statsmodels.api as sm
2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
3 |
4 | import pandas as pd
5 | from statsmodels.graphics.mosaicplot \
6 | import mosaic
7 |
8 | my_df = pd.DataFrame({
9 | 'Species': iris.Species,
10 | 'w_Sepal': iris['Sepal.Width'] > 3})
11 | mosaic(my_df,
12 | index=['Species', 'w_Sepal'])
13 |
14 | import matplotlib.pyplot as plt
15 | plt.savefig('04-p-mosaic.pdf')
16 |
--------------------------------------------------------------------------------
/figures/fig-p/07-p-regression.py:
--------------------------------------------------------------------------------
1 | import seaborn as sns
2 | import statsmodels.api as sm
3 |
4 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
5 | ax = sns.regplot(x='speed', y='dist', data=my_data)
6 | ax.vlines(x=21.5, ymin=-5, ymax=67, linestyles='dotted')
7 | ax.hlines(y=67, xmin=4, xmax=21.5, linestyles='dotted')
8 | ax.set_xlim(4, 25)
9 | ax.set_ylim(-5, 125)
10 |
11 | import matplotlib.pyplot as plt
12 | plt.savefig('07-p-regression.pdf')
13 |
--------------------------------------------------------------------------------
/data/exam.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "name": "A",
4 | "english": 60,
5 | "math": 70,
6 | "gender": "f"
7 | },
8 | {
9 | "name": "B",
10 | "english": 90,
11 | "math": 80,
12 | "gender": "m"
13 | },
14 | {
15 | "name": "C",
16 | "english": 70,
17 | "math": 90,
18 | "gender": "m"
19 | },
20 | {
21 | "name": "D",
22 | "english": 90,
23 | "math": 100,
24 | "gender": "f"
25 | }
26 | ]
--------------------------------------------------------------------------------
/figures/fig-p/04-p-iris-group.py:
--------------------------------------------------------------------------------
1 | import statsmodels.api as sm
2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
3 |
4 | my_group = iris.groupby('Species') # 品種ごとに,
5 | my_df = my_group.agg('mean') # 各変数の,平均と
6 | my_se = my_group.agg(lambda x: x.std() / len(x)**0.5) # 標準誤差を求める.
7 |
8 | my_df.plot(kind='bar', yerr=my_se, capsize=5)
9 |
10 | import matplotlib.pyplot as plt
11 | plt.savefig('04-p-iris-group.pdf')
12 |
--------------------------------------------------------------------------------
/figures/fig-p/13-p-elbow.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import statsmodels.api as sm
3 | from sklearn.cluster import KMeans
4 |
5 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
6 | my_data = iris.iloc[:, 0:4]
7 |
8 | k = range(1, 11)
9 | my_df = pd.DataFrame({
10 | 'k': k,
11 | 'inertia': [KMeans(k).fit(my_data).inertia_ for k in range(1, 11)]})
12 | my_df.plot(x='k', style='o-', legend=False)
13 |
14 | import matplotlib.pyplot as plt
15 | plt.savefig('13-p-elbow.pdf')
16 |
--------------------------------------------------------------------------------
/figures/fig-p/09-p-varimp.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import statsmodels.api as sm
3 | from sklearn.ensemble import RandomForestClassifier
4 |
5 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
6 | X, y = iris.iloc[:, 0:4], iris.Species
7 |
8 | my_model = RandomForestClassifier().fit(X, y)
9 | tmp = pd.Series(my_model.feature_importances_, index=X.columns)
10 | tmp.sort_values().plot(kind='barh')
11 |
12 | import matplotlib.pyplot as plt
13 | plt.tight_layout()
14 | plt.savefig('09-p-varimp.pdf')
15 |
--------------------------------------------------------------------------------
/figures/fig-r/07-r-residual.R:
--------------------------------------------------------------------------------
1 | pdf(file = "07-r-residual.pdf", width = 6, height = 4.5)
2 |
3 | library(caret)
4 | library(tidyverse)
5 | my_data <- cars
6 | my_model <- train(form = dist ~ speed, data = my_data, method = "lm")
7 | y_ <- my_model %>% predict(my_data)
8 | my_data$y_ <- y_
9 |
10 | my_data %>%
11 | ggplot(aes(x = speed, y = dist)) +
12 | geom_point() +
13 | geom_line(aes(x = speed, y = y_)) +
14 | geom_linerange(mapping = aes(ymin = y_, ymax = dist), linetype = "dotted")
15 |
--------------------------------------------------------------------------------
/figures/fig-r/03-r-temp.R:
--------------------------------------------------------------------------------
1 | pdf(file = "03-r-temp.pdf", width = 5.83, height = 4.13)
2 |
3 | library(tidyverse)
4 |
5 | my_wider <- data.frame(
6 | day = c(25, 26, 27),
7 | min = c(20, 21, 15),
8 | max = c(24, 27, 21))
9 |
10 | my_longer <- my_wider %>%
11 | pivot_longer(-day)
12 |
13 | my_longer %>%
14 | ggplot(aes(x = day, y = value,
15 | color = name)) +
16 | geom_point() +
17 | geom_line() +
18 | ylab("temperature") +
19 | scale_x_continuous(
20 | breaks = my_longer$day)
21 |
--------------------------------------------------------------------------------
/figures/fig-p/03-p-temp.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import pandas as pd
3 |
4 | my_df = pd.DataFrame({
5 | 'day': [25, 26, 27],
6 | 'min': [20, 21, 15],
7 | 'max': [24, 27, 21]})
8 |
9 | my_longer = my_df.melt(id_vars='day')
10 |
11 | my_wider = my_longer.pivot(
12 | index='day',
13 | columns='variable',
14 | values='value')
15 |
16 | my_wider.plot(style='o-',
17 | xticks=my_wider.index,
18 | ylabel='temperature')
19 |
20 | plt.savefig('03-p-temp.pdf')
21 |
--------------------------------------------------------------------------------
/figures/fig-r/08-r-boxplot.R:
--------------------------------------------------------------------------------
1 | pdf(file = "08-r-boxplot.pdf", width = 6, height = 4.5)
2 |
3 | library(tidyverse)
4 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki",
5 | "/fromzero/master/data/wine.csv")
6 | my_data <- read_csv(my_url)
7 |
8 | my_data %>%
9 | pivot_longer(-LPRICE2) %>%
10 | ggplot(aes(x = factor(name, levels = names(my_data[, -1])),
11 | y = value)) +
12 | geom_boxplot() +
13 | stat_summary(fun = mean, geom = "point", size = 3) +
14 | xlab(NULL)
15 |
--------------------------------------------------------------------------------
/figures/fig-p/08-p-boxplot-scaled.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from sklearn.preprocessing import StandardScaler
3 |
4 | my_url = ('https://raw.githubusercontent.com/taroyabuki/' +
5 | 'fromzero/master/data/wine.csv')
6 | my_data = pd.read_csv(my_url)
7 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2']
8 |
9 | pd.DataFrame(StandardScaler().fit_transform(X),
10 | columns=X.columns).boxplot(showmeans=True)
11 |
12 | import matplotlib.pyplot as plt
13 | plt.savefig('08-p-boxplot-scaled.pdf')
14 |
--------------------------------------------------------------------------------
/figures/fig-r/10-r-pr.R:
--------------------------------------------------------------------------------
1 | pdf(file = "10-r-pr.pdf", width = 6, height = 5)
2 |
3 | library(PRROC)
4 | library(tidyverse)
5 |
6 | y <- c( 0, 1, 1, 0, 1, 0, 1, 0, 0, 1)
7 | y_score <- c(0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5)
8 |
9 | my_pr <- pr.curve(scores.class0 = y_score[y == 1],
10 | scores.class1 = y_score[y == 0],
11 | curve = TRUE)
12 | my_pr %>% plot(xlab = "Recall",
13 | ylab = "Precision",
14 | legend = FALSE)
15 |
--------------------------------------------------------------------------------
/figures/fig-p/10-p-roc.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | y = np.array([ 0, 1, 1, 0, 1, 0, 1, 0, 0, 1])
3 | y_score = np.array([0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5])
4 |
5 | from sklearn.metrics import roc_curve, RocCurveDisplay
6 |
7 | my_fpr, my_tpr, _ = roc_curve(y_true=y,
8 | y_score=y_score,
9 | pos_label=1)
10 | RocCurveDisplay(fpr=my_fpr, tpr=my_tpr).plot()
11 |
12 | import matplotlib.pyplot as plt
13 | plt.savefig('10-p-roc.pdf')
14 |
--------------------------------------------------------------------------------
/figures/fig-r/07-r-regression.R:
--------------------------------------------------------------------------------
1 | pdf(file = "07-r-regression.pdf", width = 6, height = 4.5)
2 |
3 | library(tidyverse)
4 |
5 | my_data <- cars
6 | tmp <- data.frame(speed = 21.5, dist = 67)
7 | my_data %>% ggplot(aes(x = speed, y = dist)) +
8 | coord_cartesian(xlim = c(4, 25), ylim = c(0, 120)) +
9 | geom_point() +
10 | stat_smooth(formula = y ~ x, method = "lm") +
11 | geom_linerange(data = tmp, aes(ymin = -9, ymax = dist), linetype = "dotted") +
12 | geom_linerange(data = tmp, aes(xmin = 0, xmax = speed), linetype = "dotted")
13 |
--------------------------------------------------------------------------------
/figures/fig-r/10-r-roc.R:
--------------------------------------------------------------------------------
1 | pdf(file = "10-r-roc.pdf", width = 6, height = 5)
2 |
3 | library(PRROC)
4 | library(tidyverse)
5 |
6 | y <- c( 0, 1, 1, 0, 1, 0, 1, 0, 0, 1)
7 | y_score <- c(0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5)
8 |
9 | my_roc <- roc.curve(scores.class0 = y_score[y == 1],
10 | scores.class1 = y_score[y == 0],
11 | curve = TRUE)
12 | my_roc %>% plot(xlab = "False Positive Rate",
13 | ylab = "True Positive Rate",
14 | legend = FALSE)
15 |
--------------------------------------------------------------------------------
/figures/fig-p/13-p-heatmap.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import seaborn as sns
3 |
4 | my_data = pd.DataFrame(
5 | {'language': [ 0, 20, 20, 25, 22, 17],
6 | 'english': [ 0, 20, 40, 20, 24, 18],
7 | 'math': [100, 20, 5, 30, 17, 25],
8 | 'science': [ 0, 20, 5, 25, 16, 23],
9 | 'society': [ 0, 20, 30, 0, 21, 17]},
10 | index= ['A', 'B', 'C', 'D', 'E', 'F'])
11 |
12 | sns.clustermap(my_data, z_score=1) # 列ごとの標準化
13 |
14 | import matplotlib.pyplot as plt
15 | plt.savefig('13-p-heatmap.pdf')
16 |
--------------------------------------------------------------------------------
/figures/fig-r/13-r-heatmap.R:
--------------------------------------------------------------------------------
1 | pdf(file = "13-r-heatmap.pdf", width = 6, height = 5.5)
2 |
3 | library(tidyverse)
4 |
5 | my_data <- data.frame(
6 | language = c( 0, 20, 20, 25, 22, 17),
7 | english = c( 0, 20, 40, 20, 24, 18),
8 | math = c(100, 20, 5, 30, 17, 25),
9 | science = c( 0, 20, 5, 25, 16, 23),
10 | society = c( 0, 20, 30, 0, 21, 17),
11 | row.names = c("A", "B", "C", "D", "E", "F"))
12 |
13 | my_data %>% scale %>% # 列ごとの標準化
14 | gplots::heatmap.2(cexRow = 1, cexCol = 1) # ラベルのサイズを指定して描画する.
15 |
--------------------------------------------------------------------------------
/figures/fig-r/10-r-rpart3.R:
--------------------------------------------------------------------------------
1 | pdf(file = "10-r-rpart3.pdf", width = 6, height = 5.5)
2 |
3 | library(caret)
4 | library(tidyverse)
5 |
6 | my_url <- str_c("https://raw.githubusercontent.com",
7 | "/taroyabuki/fromzero/master/data/titanic.csv")
8 | my_data <- read_csv(my_url)
9 |
10 | my_model3 <- train(form = Survived ~ Class, data = my_data, method = "rpart2",
11 | tuneGrid = data.frame(maxdepth = 2),
12 | trControl = trainControl(method = "LOOCV"))
13 | rpart.plot::rpart.plot(my_model3$finalModel, extra = 1)
14 |
--------------------------------------------------------------------------------
/figures/fig-r/10-r-titanic-tree.R:
--------------------------------------------------------------------------------
1 | pdf(file = "10-r-titanic-tree.pdf", width = 6, height = 5)
2 |
3 | library(caret)
4 | library(tidyverse)
5 |
6 | my_url <- str_c("https://raw.githubusercontent.com",
7 | "/taroyabuki/fromzero/master/data/titanic.csv")
8 | my_data <- read_csv(my_url)
9 |
10 | my_model <- train(form = Survived ~ ., data = my_data, method = "rpart2",
11 | tuneGrid = data.frame(maxdepth = 2),
12 | trControl = trainControl(method = "none"))
13 | rpart.plot::rpart.plot(my_model$finalModel, extra = 1)
14 |
--------------------------------------------------------------------------------
/figures/fig-r/13-r-biplot.R:
--------------------------------------------------------------------------------
1 | pdf(file = "13-r-biplot.pdf", width = 5.83, height = 4.13)
2 |
3 | library(tidyverse)
4 |
5 | my_data <- data.frame(
6 | language = c( 0, 20, 20, 25, 22, 17),
7 | english = c( 0, 20, 40, 20, 24, 18),
8 | math = c(100, 20, 5, 30, 17, 25),
9 | science = c( 0, 20, 5, 25, 16, 23),
10 | society = c( 0, 20, 30, 0, 21, 17))
11 | row.names(my_data) <- c("A", "B", "C", "D", "E", "F")
12 |
13 | my_result <- my_data %>% prcomp # 主成分分析(標準化なし)
14 |
15 | my_result %>% ggbiplot::ggbiplot(labels = row.names(my_data), scale = 0)
16 |
--------------------------------------------------------------------------------
/docker/jupyter/README.md:
--------------------------------------------------------------------------------
1 | # Jupyter Notebook用のコンテナ
2 |
3 | - Docker Hub: https://hub.docker.com/r/taroyabuki/jupyter
4 | - 起動方法(3種類)
5 | - [rstudio.sh](../rstudio.sh)を実行する.
6 | - `wget https://raw.githubusercontent.com/taroyabuki/rp/master/docker/jupyter.sh`の後で,`sh jupyter.sh`
7 | - `git clone https://github.com/taroyabuki/fromzero.git`の後で,`sh fromzero/docker/jupyter.sh`
8 | - Jupyter Notebookへのアクセス:http://localhost:8888
9 | - Apple Chipについての注意
10 | - Docker desktop 4.4.2で動作を確認しました.(4.1から4.3では動作しませんでした.)
11 | - 11章のコードは実行できません.11章を読む際には,Google Colabを使ってください.
12 |
--------------------------------------------------------------------------------
/figures/fig-r/07-r-lm.R:
--------------------------------------------------------------------------------
1 | pdf(file = "07-r-lm.pdf", width = 6, height = 4.5)
2 |
3 | library(caret)
4 | library(tidyverse)
5 | my_data <- cars
6 |
7 | my_model <- train(form = dist ~ speed, # モデル式
8 | data = my_data, # データ
9 | method = "lm") # 手法
10 |
11 | f <- function(x) { my_model %>% predict(data.frame(speed = x)) }
12 |
13 | my_data %>%
14 | ggplot(aes(x = speed,
15 | y = dist,
16 | color = "data")) +
17 | geom_point() +
18 | stat_function(
19 | fun = f,
20 | mapping = aes(color = "model"))
21 |
--------------------------------------------------------------------------------
/figures/fig-r/08-r-boxplot-scaled.R:
--------------------------------------------------------------------------------
1 | pdf(file = "08-r-boxplot-scaled.pdf", width = 6, height = 4.5)
2 |
3 | library(tidyverse)
4 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki",
5 | "/fromzero/master/data/wine.csv")
6 | my_data <- read_csv(my_url)
7 |
8 | my_data %>%
9 | mutate_if(is.numeric, scale) %>% # 数値の列の標準化
10 | pivot_longer(-LPRICE2) %>%
11 | ggplot(aes(x = factor(name, levels = names(my_data[, -1])),
12 | y = value)) +
13 | geom_boxplot() +
14 | stat_summary(fun = mean, geom = "point", size = 3) +
15 | xlab(NULL)
16 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-conf.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | from statsmodels.stats.proportion import binom_test
4 |
5 | a = 0.05 # 有意水準
6 | tmp = np.linspace(0, 1, 100)
7 |
8 | my_df = pd.DataFrame({
9 | 't': tmp, # 当たる確率
10 | 'q': a, # 水平線
11 | 'p': [binom_test(count=2, nobs=15, prop=t) for t in tmp]}) # p値
12 |
13 | my_df.plot(x='t', legend=None, xlabel=r'$\theta$', ylabel=r'p-value')
14 |
15 | import matplotlib.pyplot as plt
16 | plt.savefig('04-p-conf.pdf')
17 |
--------------------------------------------------------------------------------
/figures/fig-p/13-p-biplot.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | my_data = pd.DataFrame(
3 | {'language': ( 0, 20, 20, 25, 22, 17),
4 | 'english': ( 0, 20, 40, 20, 24, 18),
5 | 'math': (100, 20, 5, 30, 17, 25),
6 | 'science': ( 0, 20, 5, 25, 16, 23),
7 | 'society': ( 0, 20, 30, 0, 21, 17)},
8 | index=['A', 'B', 'C', 'D', 'E', 'F'])
9 |
10 | from pca import pca
11 | my_model = pca(n_components=5)
12 | my_result = my_model.fit_transform(my_data)
13 |
14 | my_model.biplot(legend=False) # バイプロット
15 |
16 | import matplotlib.pyplot as plt
17 | plt.savefig('13-p-biplot.pdf')
18 |
--------------------------------------------------------------------------------
/figures/fig-r/09-r-rpart2.R:
--------------------------------------------------------------------------------
1 | pdf(file = "09-r-rpart2.pdf", width = 6, height = 5.5)
2 |
3 | library(caret)
4 | my_data <- iris
5 |
6 | my_model <- train(form = Species ~ ., data = my_data, method = "rpart2",
7 | trControl = trainControl(method = "none"),
8 | tuneGrid = data.frame(maxdepth = 3),
9 | control = rpart::rpart.control(cp = 0.01,
10 | minbucket = 5,
11 | minsplit = 2))
12 |
13 | rpart.plot::rpart.plot(
14 | my_model$finalModel, extra = 1)
15 |
--------------------------------------------------------------------------------
/figures/fig-p/07-p-residual.py:
--------------------------------------------------------------------------------
1 | import statsmodels.api as sm
2 | from sklearn.linear_model import LinearRegression
3 |
4 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
5 | X, y = my_data[['speed']], my_data['dist']
6 |
7 | my_model = LinearRegression()
8 | my_model.fit(X, y)
9 | y_ = my_model.predict(X)
10 | my_data['y_'] = y_
11 |
12 | ax = my_data.plot(x='speed', y='dist', style='o', legend=False)
13 | my_data.plot(x='speed', y='y_', style='-', legend=False, ax=ax)
14 | ax.vlines(x=X, ymin=y, ymax=y_, linestyles='dotted')
15 |
16 | import matplotlib.pyplot as plt
17 | plt.savefig('07-p-residual.pdf')
18 |
--------------------------------------------------------------------------------
/data/exam.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | experiment results
4 |
5 |
6 | f
7 | A
8 |
9 |
10 | m
11 | B
12 |
13 |
14 | m
15 | C
16 |
17 |
18 | f
19 | D
20 |
21 |
22 |
--------------------------------------------------------------------------------
/figures/fig-r/07-r-knn.R:
--------------------------------------------------------------------------------
1 | pdf(file = "07-r-knn.pdf", width = 6, height = 4.5)
2 |
3 | library(caret)
4 | library(tidyverse)
5 | my_data <- cars
6 | my_model <- train(form = dist ~ speed, # モデル式
7 | data = my_data, # データ
8 | method = "knn", # 手法
9 | tuneGrid = data.frame(k = 5))
10 |
11 | f <- function(x) { my_model %>% predict(data.frame(speed = x)) }
12 |
13 | my_data %>%
14 | ggplot(aes(x = speed,
15 | y = dist,
16 | color = "data")) +
17 | geom_point() +
18 | stat_function(
19 | fun = f,
20 | mapping = aes(color = "model"))
21 |
--------------------------------------------------------------------------------
/figures/fig-r/08-r-enet-path.R:
--------------------------------------------------------------------------------
1 | pdf(file = "08-r-enet-path.pdf", width = 6, height = 4.5)
2 |
3 | library(tidyverse)
4 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki",
5 | "/fromzero/master/data/wine.csv")
6 | my_data <- read_csv(my_url)
7 |
8 | library(ggfortify)
9 | library(glmnetUtils)
10 |
11 | my_data2 <- my_data %>%
12 | mutate_all(scale) # 標準化
13 |
14 | B <- 0.1
15 |
16 | glmnet(
17 | form = LPRICE2 ~ .,
18 | data = my_data2,
19 | alpha = B) %>%
20 | autoplot(xvar = "lambda") +
21 | xlab("log A ( = log lambda)") +
22 | theme(legend.position =
23 | c(0.15, 0.25))
24 |
--------------------------------------------------------------------------------
/figures/fig-r/13-r-pca-clusters.R:
--------------------------------------------------------------------------------
1 | pdf(file = "13-r-pca-clusters.pdf", width = 6, height = 4.5)
2 |
3 | library(tidyverse)
4 | my_data <- iris[, -5] %>% scale
5 |
6 | my_result <- prcomp(my_data)$x %>% as.data.frame # 主成分分析
7 |
8 | # 非階層的クラスタ分析の場合
9 | my_result$cluster <- (my_data %>% scale %>% kmeans(3))$cluster %>% as.factor
10 |
11 | # 階層的クラスタ分析の場合
12 | #my_result$cluster <- my_data %>% dist %>% hclust %>% cutree(3) %>% as.factor
13 |
14 | my_result %>%
15 | ggplot(aes(x = PC1, y = PC2, color = cluster)) + # 色でクラスタを表現する.
16 | geom_point(shape = iris$Species) + # 形で品種を表現する.
17 | theme(legend.position = "none")
18 |
--------------------------------------------------------------------------------
/figures/fig-p/10-p-pr.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | y = np.array([ 0, 1, 1, 0, 1, 0, 1, 0, 0, 1])
3 | y_score = np.array([0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5])
4 |
5 | from sklearn.metrics import precision_recall_curve, PrecisionRecallDisplay
6 |
7 | my_precision, my_recall, _ = precision_recall_curve(y_true=y,
8 | probas_pred=y_score,
9 | pos_label=1)
10 | PrecisionRecallDisplay(precision=my_precision, recall=my_recall).plot()
11 |
12 | import matplotlib.pyplot as plt
13 | plt.savefig('10-p-pr.pdf')
14 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-conf.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-conf.pdf", width = 6, height = 5.5)
2 |
3 | library(exactci)
4 |
5 | a <- 0.05 # 有意水準
6 | binom.exact(x = 2, # 当たった回数
7 | n = 15, # くじを引いた回数
8 | p = 4 / 10, # 当たる確率(仮説)
9 | plot = TRUE, # p値の描画
10 | conf.level = 1 - a, # 信頼係数(デフォルト)
11 | tsmethod = "minlike", # 両側p値の使用
12 | alternative = "two.sided") # 両側検定(デフォルト)
13 | # 左片側検定なら'less'
14 | # 右片側検定なら'greater'
15 |
--------------------------------------------------------------------------------
/figures/fig-r/07-r-boxplot.R:
--------------------------------------------------------------------------------
1 | pdf(file = "07-r-boxplot.pdf", width = 6, height = 5.5)
2 |
3 | library(caret)
4 | my_data <- cars
5 |
6 | my_lm_model <- train(form = dist ~ speed, data = my_data, method = "lm",
7 | trControl = trainControl(method = "LOOCV"))
8 |
9 | my_knn_model <- train(form = dist ~ speed, data = my_data, method = "knn",
10 | tuneGrid = data.frame(k = 5),
11 | trControl = trainControl(method = "LOOCV"))
12 | y <- my_data$dist
13 |
14 | my_df <- data.frame(
15 | lm = (y - my_lm_model$pred$pred)^2,
16 | knn = (y - my_knn_model$pred$pred)^2)
17 |
18 | boxplot(my_df, ylab = "r^2")
19 |
--------------------------------------------------------------------------------
/figures/fig-p/09-p-rpart.py:
--------------------------------------------------------------------------------
1 | import graphviz
2 | import statsmodels.api as sm
3 | from sklearn import tree
4 |
5 | my_data = sm.datasets.get_rdataset('iris', 'datasets').data
6 | X, y = my_data.iloc[:, 0:4], my_data.Species
7 |
8 | my_model = tree.DecisionTreeClassifier(max_depth=2, random_state=0)
9 | my_model.fit(X, y)
10 |
11 | my_dot = tree.export_graphviz(decision_tree=my_model,
12 | out_file=None,
13 | feature_names=X.columns,
14 | class_names=my_model.classes_,
15 | filled=True)
16 | my_graph = graphviz.Source(my_dot)
17 | my_graph.render('09-p-rpart')
18 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-iris-group.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-iris-group.pdf", width = 6, height = 5)
2 |
3 | library(tidyverse)
4 | my_group <- iris %>% group_by(Species)
5 |
6 | my_df <- my_group %>%
7 | summarize(across(everything(), mean)) %>% # 各列の平均
8 | pivot_longer(-Species)
9 |
10 | # 標準誤差を求める関数
11 | f <- function(x) { sd(x) / length(x)**0.5 }
12 |
13 | tmp <- my_group %>%
14 | summarize(across(everything(), f)) %>% # 各列の標準誤差
15 | pivot_longer(-Species)
16 |
17 | my_df$se <- tmp$value
18 | my_df %>%
19 | ggplot(aes(x = Species, y = value, fill = name)) +
20 | geom_col(position = "dodge") +
21 | geom_errorbar(aes(ymin = value - se, ymax = value + se), position = "dodge")
22 |
--------------------------------------------------------------------------------
/figures/fig-r/10-r-rpart1.R:
--------------------------------------------------------------------------------
1 | pdf(file = "10-r-rpart1.pdf", width = 6, height = 5.5)
2 |
3 | library(caret)
4 | library(tidyverse)
5 |
6 | my_url <- str_c("https://raw.githubusercontent.com",
7 | "/taroyabuki/fromzero/master/data/titanic.csv")
8 | my_data <- read_csv(my_url)
9 |
10 | X <- my_data %>% select(Class)
11 | y <- my_data$Survived
12 |
13 | options(warn = -1) # 警告を非表示にする.(tribbleに関する警告)
14 | my_model1 <- train(x = X, y = y, method = "rpart2",
15 | tuneGrid = data.frame(maxdepth = 2),
16 | trControl = trainControl(method = "LOOCV"))
17 | options(warn = 0) # 警告を表示する.
18 | rpart.plot::rpart.plot(my_model1$finalModel, extra = 1)
19 |
--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-split.py:
--------------------------------------------------------------------------------
1 | from pmdarima.datasets import airpassengers
2 | my_data = airpassengers.load_airpassengers()
3 |
4 | n = len(my_data)
5 | k = 108
6 |
7 | import pandas as pd
8 | my_ds = pd.date_range(
9 | start='1949/01/01',
10 | end='1960/12/01',
11 | freq='MS')
12 | my_df = pd.DataFrame({
13 | 'ds': my_ds,
14 | 'x': range(n),
15 | 'y': my_data},
16 | index=my_ds)
17 |
18 | my_train = my_df[ :k]
19 | my_test = my_df[-(n - k): ]
20 | y = my_test.y
21 |
22 | import matplotlib.pyplot as plt
23 | plt.plot(my_train.y, label='train')
24 | plt.plot(my_test.y, label='test')
25 | plt.legend()
26 | plt.savefig('12-p-airpassengers-split.pdf')
27 |
--------------------------------------------------------------------------------
/figures/fig-r/11-r-h2o-wine.R:
--------------------------------------------------------------------------------
1 | pdf(file = "11-r-h2o-wine.pdf", width = 6, height = 5.5)
2 |
3 | library(h2o)
4 | library(tidyverse)
5 |
6 | h2o.init()
7 | h2o.no_progress()
8 |
9 | my_url <- str_c("https://raw.githubusercontent.com",
10 | "/taroyabuki/fromzero/master/data/wine.csv")
11 | my_data <- read_csv(my_url)
12 | my_frame <- as.h2o(my_data)
13 |
14 | my_model <- h2o.automl(
15 | y = "LPRICE2", # 出力変数名
16 | training_frame = my_frame, # H2OFrame
17 | max_runtime_secs = 60) # 訓練時間(秒)
18 |
19 | min(my_model@leaderboard$rmse)
20 |
21 | tmp <- my_model %>% predict(my_frame) %>%
22 | as.data.frame
23 | y_ <- tmp$predict
24 | y <- my_data$LPRICE2
25 |
26 | plot(y, y_)
27 |
--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-split.R:
--------------------------------------------------------------------------------
1 | pdf(file = "12-r-airpassengers-split.pdf", width = 5.83, height = 4.13)
2 |
3 | my_data <- as.vector(AirPassengers)
4 |
5 | n <- length(my_data) # データ数(144)
6 | k <- 108 # 訓練データ数
7 |
8 | library(tidyverse)
9 | library(tsibble)
10 |
11 | my_ds <- seq(
12 | from = yearmonth("1949/01"),
13 | to = yearmonth("1960/12"),
14 | by = 1)
15 | my_label <- rep(
16 | c("train", "test"),
17 | c(k, n - k))
18 | my_df <- tsibble(
19 | ds = my_ds,
20 | x = 0:(n - 1),
21 | y = my_data,
22 | label = my_label,
23 | index = ds) # 日時の列の指定
24 |
25 | my_plot <- my_df %>%
26 | ggplot(aes(x = ds, y = y, color = label)) +
27 | geom_line()
28 | my_plot
29 |
--------------------------------------------------------------------------------
/figures/fig-r/07-r-polynomial.R:
--------------------------------------------------------------------------------
1 | pdf(file = "07-r-polynomial.pdf", width = 6, height = 4.5)
2 |
3 | library(caret)
4 | library(tidyverse)
5 | my_data <- cars
6 | my_idx <- c(2, 11, 27, 34, 39, 44)
7 | my_sample <- my_data[my_idx, ]
8 |
9 | my_model <- train(form = dist ~ poly(speed, degree = 5, raw = TRUE),
10 | data = my_sample,
11 | method = "lm")
12 |
13 | f <- function(x) { my_model %>% predict(data.frame(speed = x)) }
14 |
15 | my_data %>%
16 | ggplot(aes(x = speed, y = dist, color = "data")) +
17 | geom_point() +
18 | geom_point(data = my_sample, mapping = aes(color = "sample")) +
19 | stat_function(fun = f, mapping = aes(color = "model")) +
20 | coord_cartesian(ylim = c(0, 120))
21 |
--------------------------------------------------------------------------------
/figures/fig-p/07-p-knn.py:
--------------------------------------------------------------------------------
1 | # 準備
2 | import statsmodels.api as sm
3 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
4 | X, y = my_data[['speed']], my_data['dist']
5 |
6 | # 訓練
7 | from sklearn.neighbors import KNeighborsRegressor
8 | my_model = KNeighborsRegressor()
9 | my_model.fit(X, y)
10 |
11 | # 可視化の準備
12 | import numpy as np
13 | import pandas as pd
14 | tmp = pd.DataFrame({'speed': np.linspace(min(my_data.speed),
15 | max(my_data.speed),
16 | num=100)})
17 | tmp['model'] = my_model.predict(tmp)
18 |
19 | pd.concat([my_data, tmp]).plot(
20 | x='speed', style=['o', '-'])
21 | import matplotlib.pyplot as plt
22 | plt.savefig('07-p-knn.pdf')
23 |
--------------------------------------------------------------------------------
/figures/fig-r/04-r-pvalue1.R:
--------------------------------------------------------------------------------
1 | pdf(file = "04-r-pvalue1.pdf", width = 5.83, height = 4.13)
2 |
3 | library(tidyverse)
4 |
5 | t <- 4 / 10 # 当たる確率
6 | n <- 15 # くじを引いた回数
7 | x <- 0:n # 当たった回数
8 | my_pr <- dbinom(x, n, t) # x回当たる確率
9 | my_pr2 <- dbinom(2, n, t) # 2回当たる確率
10 |
11 | my_data <- data.frame(x = x) %>%
12 | mutate(probability = my_pr) %>%
13 | mutate(color = my_pr <= my_pr2) # 当たる確率が,2回当たる確率以下
14 |
15 | my_data %>% ggplot(aes(x = x, y = probability, color = color)) +
16 | geom_point(size = 3) +
17 | geom_linerange(aes(ymin = 0, ymax = probability), ) + # 垂直線
18 | geom_hline(yintercept = my_pr2) + # 水平線
19 | theme(legend.position = "none")
20 |
--------------------------------------------------------------------------------
/figures/fig-r/10-r-rpart2.R:
--------------------------------------------------------------------------------
1 | pdf(file = "10-r-rpart2.pdf", width = 6, height = 5.5)
2 |
3 | library(caret)
4 | library(tidyverse)
5 |
6 | my_url <- str_c("https://raw.githubusercontent.com",
7 | "/taroyabuki/fromzero/master/data/titanic.csv")
8 | my_data <- read_csv(my_url)
9 |
10 | my_enc <- my_data %>% dummyVars(formula = Survived ~ Class)
11 | my_data2 <- my_enc %>%
12 | predict(my_data) %>%
13 | as.data.frame %>%
14 | mutate(Survived = my_data$Survived)
15 |
16 | my_model2 <- train(form = Survived ~ ., data = my_data2, method = "rpart2",
17 | tuneGrid = data.frame(maxdepth = 2),
18 | trControl = trainControl(method = "LOOCV"))
19 | rpart.plot::rpart.plot(my_model2$finalModel, extra = 1)
20 |
--------------------------------------------------------------------------------
/figures/fig-p/07-p-lm.py:
--------------------------------------------------------------------------------
1 | # データの準備
2 | import statsmodels.api as sm
3 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
4 | X, y = my_data[['speed']], my_data['dist']
5 |
6 | # モデルの指定
7 | from sklearn.linear_model import LinearRegression
8 | my_model = LinearRegression()
9 |
10 | # モデルをデータにフィットさせる.
11 | my_model.fit(X, y)
12 |
13 | import numpy as np
14 | import pandas as pd
15 | tmp = pd.DataFrame({'speed': np.linspace(min(my_data.speed),
16 | max(my_data.speed),
17 | 100)})
18 | tmp['model'] = my_model.predict(tmp)
19 |
20 | pd.concat([my_data, tmp]).plot(
21 | x='speed', style=['o', '-'])
22 |
23 | import matplotlib.pyplot as plt
24 | plt.savefig('07-p-lm.pdf')
25 |
--------------------------------------------------------------------------------
/figures/fig-p/11-p-h2o-wine.py:
--------------------------------------------------------------------------------
1 | import h2o
2 | import pandas as pd
3 | from h2o.automl import H2OAutoML
4 |
5 | h2o.init()
6 | h2o.no_progress()
7 |
8 | my_url = ('https://raw.githubusercontent.com'
9 | '/taroyabuki/fromzero/master/data/wine.csv')
10 | my_data = pd.read_csv(my_url)
11 | my_frame = h2o.H2OFrame(my_data)
12 |
13 | my_model = H2OAutoML(
14 | max_runtime_secs=60)
15 | my_model.train(
16 | y='LPRICE2',
17 | training_frame=my_frame)
18 |
19 | print(my_model.leaderboard['rmse'].min())
20 |
21 | tmp = h2o.as_list(
22 | my_model.predict(my_frame))
23 |
24 | pd.DataFrame({
25 | 'y': my_data['LPRICE2'],
26 | 'y_': tmp['predict']}
27 | ).plot('y', 'y_', kind='scatter')
28 |
29 | import matplotlib.pyplot as plt
30 | plt.savefig('11-p-h2o-wine.pdf')
31 |
--------------------------------------------------------------------------------
/figures/fig-p/04-p-pvalue1.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | from scipy import stats
4 |
5 | t = 4 / 10 # 当たる確率
6 | n = 15 # くじを引いた回数
7 | x = np.array(range(0, n + 1)) # 当たった回数
8 | my_pr = stats.binom.pmf(x, n, t) # x回当たる確率
9 | my_pr2 = stats.binom.pmf(2, n, t) # 2回当たる確率
10 |
11 | my_data = pd.DataFrame({'x': x, 'y1': my_pr, 'y2': my_pr})
12 | my_data.loc[my_pr > my_pr2, 'y1'] = np.nan # 当たる確率が,2回当たる確率超過
13 | my_data.loc[my_pr <= my_pr2, 'y2'] = np.nan # 当たる確率が,2回当たる確率以下
14 | ax = my_data.plot(x='x', style='o', ylabel='probability', legend=False)
15 | ax.hlines(y=my_pr2, xmin=0, xmax=15) # 水平線
16 | ax.vlines(x=x, ymin=0, ymax=my_pr) # 垂直線
17 |
18 | import matplotlib.pyplot as plt
19 | plt.savefig('04-p-pvalue1.pdf')
20 |
--------------------------------------------------------------------------------
/figures/fig-p/08-p-enet-path.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | my_url = ('https://raw.githubusercontent.com/taroyabuki/' +
3 | 'fromzero/master/data/wine.csv')
4 | my_data = pd.read_csv(my_url)
5 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2']
6 |
7 | import numpy as np
8 | from scipy.stats import zscore
9 | from sklearn.linear_model import enet_path
10 |
11 | As = np.e**np.arange(2, -5.5, -0.1)
12 | B = 0.1
13 |
14 | _, my_path, _ = enet_path(
15 | zscore(X), zscore(y),
16 | alphas=As,
17 | l1_ratio=B)
18 |
19 | pd.DataFrame(
20 | my_path.T,
21 | columns=X.columns,
22 | index=np.log(As)
23 | ).plot(
24 | xlabel='log A ( = log alpha)',
25 | ylabel='Coefficients')
26 |
27 | import matplotlib.pyplot as plt
28 | plt.savefig('08-p-enet-path.pdf')
29 |
--------------------------------------------------------------------------------
/figures/fig-r/08-r-nnet-3-2.R:
--------------------------------------------------------------------------------
1 | library(tidyverse)
2 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki",
3 | "/fromzero/master/data/wine.csv")
4 | my_data <- read_csv(my_url)
5 |
6 | library(caret)
7 | my_model <- train(form = LPRICE2 ~ .,
8 | data = my_data,
9 | method = "neuralnet",
10 | preProcess = c("center", "scale"),
11 | tuneGrid = data.frame(layer1 = 3,
12 | layer2 = 2,
13 | layer3 = 0),
14 | trControl = trainControl(method = "repeatedcv",
15 | number = 5, repeats = 10))
16 | plot(my_model$finalModel)
17 | file.rename("Rplots.pdf", "08-r-nnet-3-2.pdf")
18 |
--------------------------------------------------------------------------------
/addendum/07.03.02/1+3x+N(0,2x).csv:
--------------------------------------------------------------------------------
1 | x,y
2 | 1,2.4362828056041783
3 | 2,13.320701642205943
4 | 3,6.254185478549559
5 | 4,12.158111887716473
6 | 5,-7.54294281288999
7 | 6,20.03682705412517
8 | 7,-20.367878122873076
9 | 8,37.62187087244209
10 | 9,28.888791768212027
11 | 10,23.865697903729448
12 | 11,20.35783532598032
13 | 12,24.90149878334255
14 | 13,40.93320462020407
15 | 14,83.5879864420934
16 | 15,45.15357518820319
17 | 16,8.733098913685623
18 | 17,82.25121873688809
19 | 18,64.23168654178178
20 | 19,58.72577858048793
21 | 20,12.867497576908818
22 | 21,52.88624028231115
23 | 22,154.2860167538335
24 | 23,176.96876331325072
25 | 24,158.57606765038622
26 | 25,59.5796921671421
27 | 26,26.85539442891543
28 | 27,73.23288430129338
29 | 28,51.56442153204847
30 | 29,49.82876803737508
31 | 30,148.41719344129336
32 |
--------------------------------------------------------------------------------
/figures/fig/図3.1.md:
--------------------------------------------------------------------------------
1 | ```puml
2 | @startuml
3 | skinparam {
4 | defaultFontName Hiragino Kaku Gothic ProN
5 | monochrome true
6 | shadowing false
7 | }
8 |
9 | package R {
10 | rectangle y as y1 #white
11 | rectangle x as x1 #white
12 | rectangle list1a #white;line:white as "
13 | | foo | bar | baz |"
14 | rectangle list1b #white;line:white as "
15 | | foo | bar | baz |"
16 |
17 | x1 --> list1a
18 | y1 --> list1b
19 | x1 -[dotted]> y1 : "y <- x"
20 | list1a -[dotted]> list1b: copy
21 | }
22 |
23 | package Python {
24 | rectangle y as y2 #white
25 | rectangle x as x2 #white
26 | rectangle list2 #white;line:white as "
27 | | foo | bar | baz |"
28 |
29 | x2 --> list2
30 | y2 --> list2
31 | x2 -[dotted]> y2 : "y = x"
32 | }
33 | @enduml
34 | ```
35 |
--------------------------------------------------------------------------------
/figures/fig-r/08-r-enet-tuning.R:
--------------------------------------------------------------------------------
1 | pdf(file = "08-r-enet-tuning.pdf", width = 6, height = 4.5)
2 |
3 | library(caret)
4 | library(tidyverse)
5 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki",
6 | "/fromzero/master/data/wine.csv")
7 | my_data <- read_csv(my_url)
8 |
9 | As <- seq(0, 0.1, length.out = 21)
10 | Bs <- seq(0, 0.1, length.out = 6)
11 |
12 | my_model <- train(
13 | form = LPRICE2 ~ ., data = my_data, method = "glmnet", standardize = TRUE,
14 | trControl = trainControl(method = "LOOCV"),
15 | tuneGrid = expand.grid(lambda = As, alpha = Bs))
16 |
17 | tmp <- "B ( = alpha)"
18 | ggplot(my_model) +
19 | theme(legend.position = c(0, 1), legend.justification = c(0, 1)) +
20 | xlab("A ( = lambda)") +
21 | guides(shape = guide_legend(tmp), color = guide_legend(tmp))
22 |
--------------------------------------------------------------------------------
/figures/howtomake.md:
--------------------------------------------------------------------------------
1 | # 画像の生成方法
2 |
3 | コンテナjupyterかrstudioを使います(コンテナの生成方法は2.3節を参照).
4 |
5 | ```bash
6 | docker exec -it jr bash
7 | # あるいは
8 | docker exec -it rs bash
9 | ```
10 |
11 | 以下はコンテナでの作業です.
12 |
13 | ## 準備
14 |
15 | ```bash
16 | apt update && apt install -y texlive-extra-utils pdf2svg
17 |
18 | #cd work # 結果をホスト側に保存する場合
19 | git clone https://github.com/taroyabuki/fromzero.git
20 | cd fromzero/figures
21 | ```
22 |
23 | 画像(PDFとSVG)を作ります.
24 | `-j`のあとの数値はCPUコアの数程度にしてください.
25 | ファイル(`*.R`や`*.py`)を更新したら,`make`以下を実行します.
26 | 更新されたものだけが,再生成されます.
27 |
28 |
29 | ## Rの図を作る場合
30 |
31 | ```bash
32 | cd fig-r
33 | #make clean # すべて生成し直す場合
34 | make -j
35 | cd ..
36 | ```
37 |
38 | ## Pythonの図を作る場合
39 |
40 | ```bash
41 | cd fig-p
42 | #make clean # すべて生成し直す場合
43 | make -j
44 | cd ..
45 | ```
46 |
--------------------------------------------------------------------------------
/data/wine.csv:
--------------------------------------------------------------------------------
1 | LPRICE2,WRAIN,DEGREES,HRAIN,TIME_SV
2 | -0.99868,600,17.1167,160,31
3 | -0.4544,690,16.7333,80,30
4 | -0.80796,502,17.15,130,28
5 | -1.50926,420,16.1333,110,26
6 | -1.71655,582,16.4167,187,25
7 | -0.418,485,17.4833,187,24
8 | -1.97491,763,16.4167,290,23
9 | 0,830,17.3333,38,22
10 | -1.10572,697,16.3,52,21
11 | -1.78098,608,15.7167,155,20
12 | -1.18435,402,17.2667,96,19
13 | -2.24194,602,15.3667,267,18
14 | -0.74943,819,16.5333,86,17
15 | -1.65388,714,16.2333,118,16
16 | -2.25018,610,16.2,292,15
17 | -2.14784,575,16.55,244,14
18 | -0.90544,622,16.6667,89,13
19 | -1.30031,551,16.7667,112,12
20 | -2.28879,536,14.9833,158,11
21 | -1.857,376,17.0667,123,10
22 | -2.19958,574,16.3,184,9
23 | -1.20168,572,16.95,171,8
24 | -1.37264,418,17.65,247,7
25 | -2.23503,821,15.5833,87,6
26 | -1.30769,763,15.8167,51,5
27 | -1.5396,717,16.1667,122,4
28 | -1.99582,578,16,74,3
29 |
--------------------------------------------------------------------------------
/figures/fig-p/07-p-boxplot.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import statsmodels.api as sm
3 | from sklearn.linear_model import LinearRegression
4 | from sklearn.model_selection import cross_val_score, LeaveOneOut
5 | from sklearn.neighbors import KNeighborsRegressor
6 |
7 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
8 | X, y = my_data[['speed']], my_data['dist']
9 |
10 | my_lm_scores = cross_val_score(
11 | LinearRegression(), X, y, cv=LeaveOneOut(), scoring='neg_mean_squared_error')
12 |
13 | my_knn_socres = cross_val_score(
14 | KNeighborsRegressor(n_neighbors=5), X, y, cv=LeaveOneOut(),
15 | scoring='neg_mean_squared_error')
16 |
17 | my_df = pd.DataFrame({
18 | 'lm': -my_lm_scores,
19 | 'knn': -my_knn_socres})
20 |
21 | my_df.boxplot().set_ylabel("$r^2$")
22 |
23 | import matplotlib.pyplot as plt
24 | plt.savefig('07-p-boxplot.pdf')
25 |
--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-prophet.py:
--------------------------------------------------------------------------------
1 | from pmdarima.datasets import airpassengers
2 | my_data = airpassengers.load_airpassengers()
3 |
4 | n = len(my_data)
5 | k = 108
6 |
7 | import pandas as pd
8 | my_ds = pd.date_range(
9 | start='1949/01/01',
10 | end='1960/12/01',
11 | freq='MS')
12 | my_df = pd.DataFrame({
13 | 'ds': my_ds,
14 | 'x': range(n),
15 | 'y': my_data},
16 | index=my_ds)
17 |
18 | my_train = my_df[ :k]
19 | my_test = my_df[-(n - k): ]
20 |
21 | from fbprophet import Prophet
22 | my_prophet_model = Prophet(seasonality_mode='multiplicative')
23 | my_prophet_model.fit(my_train)
24 |
25 | tmp = my_prophet_model.predict(my_test)
26 |
27 | fig = my_prophet_model.plot(tmp)
28 | fig.axes[0].plot(my_train.ds, my_train.y)
29 | fig.axes[0].plot(my_test.ds, my_test.y, color='red')
30 |
31 | import matplotlib.pyplot as plt
32 | plt.savefig('12-p-airpassengers-prophet.pdf')
33 |
--------------------------------------------------------------------------------
/figures/fig-p/13-p-pca-clusters.py:
--------------------------------------------------------------------------------
1 | import seaborn as sns
2 | import statsmodels.api as sm
3 | from pca import pca
4 | from scipy.cluster import hierarchy
5 | from scipy.stats import zscore
6 | from sklearn.cluster import KMeans
7 |
8 | iris = sm.datasets.get_rdataset('iris', 'datasets').data
9 | my_data = zscore(iris.iloc[:, 0:4])
10 |
11 | my_model = pca() # 主成分分析
12 | my_result = my_model.fit_transform(my_data)['PC']
13 | my_result['Species'] = list(iris.Species)
14 |
15 | # 非階層的クラスタ分析の場合
16 | my_result['cluster'] = KMeans(n_clusters=3).fit(my_data).labels_
17 |
18 | # 階層的クラスタ分析の場合
19 | #my_result['cluster'] = hierarchy.cut_tree(
20 | # hierarchy.linkage(my_data, method='complete'), 3)[:,0]
21 |
22 | sns.scatterplot(x='PC1', y='PC2', data=my_result,
23 | hue='cluster', style='Species', palette='bright', legend=False)
24 |
25 | import matplotlib.pyplot as plt
26 | plt.savefig('13-p-pca-clusters.pdf')
27 |
--------------------------------------------------------------------------------
/figures/fig-r/07-r-tuning-train.R:
--------------------------------------------------------------------------------
1 | pdf(file = "07-r-tuning-train.pdf", width = 6, height = 4.5)
2 |
3 | library(caret)
4 | library(tidyverse)
5 | my_data <- cars
6 |
7 | my_loocv <- function(k) {
8 | my_model <- train(form = dist ~ speed, data = my_data, method = "knn",
9 | tuneGrid = data.frame(k = k),
10 | trControl = trainControl(method = "LOOCV"))
11 | y <- my_data$dist
12 | y_ <- my_model %>% predict(my_data)
13 | list(k = k,
14 | training = RMSE(y_, y), # RMSE(訓練)
15 | validation = my_model$results$RMSE) # RMSE(検証)
16 | }
17 |
18 | my_results <- 1:15 %>% map_dfr(my_loocv)
19 |
20 | my_results %>%
21 | pivot_longer(-k) %>%
22 | ggplot(aes(x = k, y = value,
23 | color = name)) +
24 | geom_line() + geom_point() +
25 | xlab("#Neighbors") + ylab("RMSE") +
26 | theme(legend.position = c(1, 0),
27 | legend.justification = c(1, 0))
28 |
--------------------------------------------------------------------------------
/figures/fig-r/10-r-titanic-roc.R:
--------------------------------------------------------------------------------
1 | pdf(file = "10-r-titanic-roc.pdf", width = 6, height = 5)
2 |
3 | library(caret)
4 | library(PRROC)
5 | library(tidyverse)
6 |
7 | my_url <- str_c("https://raw.githubusercontent.com",
8 | "/taroyabuki/fromzero/master/data/titanic.csv")
9 | my_data <- read_csv(my_url)
10 |
11 | my_model <- train(form = Survived ~ ., data = my_data, method = "rpart2",
12 | tuneGrid = data.frame(maxdepth = 2),
13 | trControl = trainControl(method = "none"))
14 |
15 | y <- my_data$Survived
16 | tmp <- my_model %>% predict(newdata = my_data, type = "prob")
17 | y_score <- tmp$Yes
18 |
19 | my_roc <- roc.curve(scores.class0 = y_score[y == "Yes"],
20 | scores.class1 = y_score[y == "No"],
21 | curve = TRUE)
22 | my_roc %>% plot(xlab = "False Positive Rate",
23 | ylab = "True Positive Rate",
24 | legend = FALSE)
25 |
--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-arima.R:
--------------------------------------------------------------------------------
1 | pdf(file = "12-r-airpassengers-arima.pdf", width = 5.83, height = 4.13)
2 |
3 | my_data <- as.vector(AirPassengers)
4 |
5 | n <- length(my_data) # データ数(144)
6 | k <- 108 # 訓練データ数
7 |
8 | library(tidyverse)
9 | library(tsibble)
10 |
11 | my_ds <- seq(
12 | from = yearmonth("1949/01"),
13 | to = yearmonth("1960/12"),
14 | by = 1)
15 | my_label <- rep(
16 | c("train", "test"),
17 | c(k, n - k))
18 | my_df <- tsibble(
19 | ds = my_ds,
20 | x = 0:(n - 1),
21 | y = my_data,
22 | label = my_label,
23 | index = ds) # 日時の列の指定
24 |
25 | my_train <- my_df[ 1:k, ]
26 | my_test <- my_df[- (1:k), ]
27 |
28 | library(fable)
29 | my_arima_model <- my_train %>% model(ARIMA(y))
30 |
31 | tmp <- my_arima_model %>% forecast(h = "3 years")
32 |
33 | tmp %>% autoplot +
34 | geom_line(data = my_df,
35 | aes(x = ds,
36 | y = y,
37 | color = label))
38 |
--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-lm.py:
--------------------------------------------------------------------------------
1 | from pmdarima.datasets import airpassengers
2 | my_data = airpassengers.load_airpassengers()
3 |
4 | n = len(my_data)
5 | k = 108
6 |
7 | import pandas as pd
8 | my_ds = pd.date_range(
9 | start='1949/01/01',
10 | end='1960/12/01',
11 | freq='MS')
12 | my_df = pd.DataFrame({
13 | 'ds': my_ds,
14 | 'x': range(n),
15 | 'y': my_data},
16 | index=my_ds)
17 |
18 | my_train = my_df[ :k]
19 | my_test = my_df[-(n - k): ]
20 |
21 | import matplotlib.pyplot as plt
22 | from sklearn.linear_model import LinearRegression
23 |
24 | my_lm_model = LinearRegression()
25 | my_lm_model.fit(my_train[['x']], my_train.y)
26 |
27 | y_ = my_lm_model.predict(my_df[['x']])
28 | tmp = pd.DataFrame(y_,
29 | index=my_df.index)
30 | plt.plot(my_train.y, label='train')
31 | plt.plot(my_test.y, label='test')
32 | plt.plot(tmp, label='model')
33 | plt.legend()
34 | plt.savefig('12-p-airpassengers-lm.pdf')
35 |
--------------------------------------------------------------------------------
/figures/fig-p/09-p-rpart2.py:
--------------------------------------------------------------------------------
1 | import graphviz
2 | import statsmodels.api as sm
3 | from sklearn import tree
4 | from sklearn.model_selection import GridSearchCV, LeaveOneOut
5 |
6 | my_data = sm.datasets.get_rdataset('iris', 'datasets').data
7 | X, y = my_data.iloc[:, 0:4], my_data.Species
8 |
9 | my_params = {
10 | 'max_depth': range(2, 6),
11 | 'min_samples_split': [2, 20],
12 | 'min_samples_leaf': range(1, 8)}
13 |
14 | my_search = GridSearchCV(
15 | estimator=tree.DecisionTreeClassifier(min_impurity_decrease=0.01,
16 | random_state=0),
17 | param_grid=my_params,
18 | cv=LeaveOneOut(),
19 | n_jobs=-1).fit(X, y)
20 |
21 | my_model = my_search.best_estimator_
22 | my_dot = tree.export_graphviz(
23 | decision_tree=my_model,
24 | out_file=None,
25 | feature_names=X.columns,
26 | class_names=my_model.classes_,
27 | filled=True)
28 | my_graph = graphviz.Source(my_dot)
29 | my_graph.render('09-p-rpart2')
30 |
--------------------------------------------------------------------------------
/figures/fig-p/07-p-tuning.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import statsmodels.api as sm
3 | from sklearn.model_selection import GridSearchCV, LeaveOneOut
4 | from sklearn.neighbors import KNeighborsRegressor
5 |
6 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
7 | X, y = my_data[['speed']], my_data['dist']
8 |
9 | my_params = {'n_neighbors': range(1, 16)} # 探索範囲(1以上16未満の整数)
10 |
11 | my_search = GridSearchCV(estimator=KNeighborsRegressor(),
12 | param_grid=my_params,
13 | cv=LeaveOneOut(),
14 | scoring='neg_mean_squared_error')
15 | my_search.fit(X, y)
16 |
17 | tmp = my_search.cv_results_ # チューニングの詳細
18 | my_scores = (-tmp['mean_test_score'])**0.5 # RMSE
19 | my_results = pd.DataFrame(tmp['params']).assign(validation=my_scores)
20 |
21 | my_results.plot(x='n_neighbors',
22 | style='o-',
23 | ylabel='RMSE')
24 |
25 | import matplotlib.pyplot as plt
26 | plt.savefig('07-p-tuning.pdf')
27 |
--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-prophet.R:
--------------------------------------------------------------------------------
1 | pdf(file = "12-r-airpassengers-prophet.pdf", width = 5.83, height = 4.13)
2 |
3 | my_data <- as.vector(AirPassengers)
4 |
5 | n <- length(my_data) # データ数(144)
6 | k <- 108 # 訓練データ数
7 |
8 | library(tidyverse)
9 | library(tsibble)
10 |
11 | my_ds <- seq(
12 | from = yearmonth("1949/01"),
13 | to = yearmonth("1960/12"),
14 | by = 1)
15 | my_label <- rep(
16 | c("train", "test"),
17 | c(k, n - k))
18 | my_df <- tsibble(
19 | ds = my_ds,
20 | x = 0:(n - 1),
21 | y = my_data,
22 | label = my_label,
23 | index = ds) # 日時の列の指定
24 |
25 | my_train <- my_df[ 1:k, ]
26 | my_test <- my_df[- (1:k), ]
27 |
28 | library(prophet)
29 | my_prophet_model <- my_train %>%
30 | prophet(seasonality.mode = "multiplicative")
31 |
32 | tmp <- my_prophet_model %>% predict(my_test)
33 |
34 | my_prophet_model %>% plot(tmp) +
35 | geom_line(data = my_train, aes(x = as.POSIXct(ds))) +
36 | geom_line(data = my_test, aes(x = as.POSIXct(ds)), color = "red")
37 |
--------------------------------------------------------------------------------
/figures/fig-r/12-r-airpassengers-lm.R:
--------------------------------------------------------------------------------
1 | pdf(file = "12-r-airpassengers-lm.pdf", width = 5.83, height = 4.13)
2 |
3 | my_data <- as.vector(AirPassengers)
4 |
5 | n <- length(my_data) # データ数(144)
6 | k <- 108 # 訓練データ数
7 |
8 | library(tidyverse)
9 | library(tsibble)
10 |
11 | my_ds <- seq(
12 | from = yearmonth("1949/01"),
13 | to = yearmonth("1960/12"),
14 | by = 1)
15 | my_label <- rep(
16 | c("train", "test"),
17 | c(k, n - k))
18 | my_df <- tsibble(
19 | ds = my_ds,
20 | x = 0:(n - 1),
21 | y = my_data,
22 | label = my_label,
23 | index = ds) # 日時の列の指定
24 |
25 | my_train <- my_df[ 1:k, ]
26 | my_test <- my_df[- (1:k), ]
27 |
28 | library(caret)
29 | my_lm_model <- train(form = y ~ x, data = my_train, method = "lm")
30 |
31 | y_ <- my_lm_model %>% predict(my_df)
32 | tmp <- my_df %>%
33 | mutate(y = y_, label = "model")
34 | my_plot <- my_df %>%
35 | ggplot(aes(x = ds,
36 | y = y,
37 | color = label)) +
38 | geom_line()
39 | my_plot + geom_line(data = tmp)
40 |
--------------------------------------------------------------------------------
/figures/fig-p/07-p-polynomial.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import statsmodels.api as sm
4 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
5 |
6 | my_idx = [1, 10, 26, 33, 38, 43]
7 | my_sample = my_data.iloc[my_idx, ]
8 | X, y = my_sample[['speed']], my_sample['dist']
9 |
10 | from sklearn.preprocessing import PolynomialFeatures
11 | d = 5
12 | X5 = PolynomialFeatures(d).fit_transform(X) # Xの1乗から5乗の変数
13 |
14 | from sklearn.linear_model import LinearRegression
15 | my_model = LinearRegression()
16 | my_model.fit(X5, y)
17 |
18 | tmp = pd.DataFrame({'speed': np.linspace(min(my_data.speed),
19 | max(my_data.speed),
20 | 100)})
21 | X5 = PolynomialFeatures(d).fit_transform(tmp)
22 | tmp['model'] = my_model.predict(X5)
23 |
24 | my_sample = my_sample.assign(sample=y)
25 | my_df = pd.concat([my_data, my_sample, tmp])
26 | my_df.plot(x='speed', style=['o', 'o', '-'], ylim=(0, 130))
27 |
28 | import matplotlib.pyplot as plt
29 | plt.savefig('07-p-polynomial.pdf')
30 |
--------------------------------------------------------------------------------
/figures/fig-p/10-p-titanic-tree.py:
--------------------------------------------------------------------------------
1 | import graphviz
2 | import pandas as pd
3 | from sklearn import tree
4 | from sklearn.pipeline import Pipeline
5 | from sklearn.preprocessing import OneHotEncoder
6 |
7 | my_url = ('https://raw.githubusercontent.com'
8 | '/taroyabuki/fromzero/master/data/titanic.csv')
9 | my_data = pd.read_csv(my_url)
10 |
11 | X, y = my_data.iloc[:, 0:3], my_data.Survived
12 |
13 | my_pipeline = Pipeline([
14 | ('ohe', OneHotEncoder(drop='first')),
15 | ('tree', tree.DecisionTreeClassifier(max_depth=2, random_state=0,
16 | min_impurity_decrease=0.01))])
17 | my_pipeline.fit(X, y)
18 |
19 | my_enc = my_pipeline.named_steps['ohe']
20 | my_tree = my_pipeline.named_steps['tree']
21 |
22 | my_dot = tree.export_graphviz(
23 | decision_tree=my_tree,
24 | out_file=None,
25 | feature_names=my_enc.get_feature_names(),
26 | class_names=my_pipeline.classes_,
27 | filled=True)
28 | graphviz.Source(my_dot)
29 | my_graph = graphviz.Source(my_dot)
30 | my_graph.render('10-p-titanic-tree')
31 |
--------------------------------------------------------------------------------
/figures/fig-r/11-r-classification.R:
--------------------------------------------------------------------------------
1 | pdf(file = "11-r-classification.pdf", width = 5.83, height = 4.13)
2 |
3 | library(keras)
4 | library(tidyverse)
5 |
6 | my_data <- iris[sample(nrow(iris)), ]
7 |
8 | X <- my_data %>%
9 | select(-Species) %>% scale
10 | y <- as.integer(my_data$Species) - 1
11 |
12 | my_model <- keras_model_sequential() %>%
13 | layer_dense(units = 3, activation = "relu", input_shape = c(4)) %>%
14 | layer_dense(units = 3, activation = "softmax")
15 |
16 | my_model %>% compile(
17 | loss = "sparse_categorical_crossentropy",
18 | optimizer = "rmsprop",
19 | metrics = c("accuracy"))
20 |
21 | my_cb <- callback_early_stopping(
22 | patience = 20,
23 | restore_best_weights = TRUE)
24 |
25 | my_history <- my_model %>%
26 | fit(x = X,
27 | y = y,
28 | validation_split = 0.25,
29 | batch_size = 10,
30 | epochs = 500,
31 | callbacks = list(my_cb),
32 | verbose = 0)
33 |
34 | plot(my_history)
35 |
36 | my_history
37 |
38 | tmp <- my_model %>% predict(X)
39 | y_ <- apply(tmp, 1, which.max) - 1
40 | mean(y_ == y)
41 |
--------------------------------------------------------------------------------
/figures/fig-r/11-r-regression.R:
--------------------------------------------------------------------------------
1 | pdf(file = "11-r-regression.pdf", width = 5.83, height = 4.13)
2 |
3 | library(keras)
4 | library(tidyverse)
5 |
6 | my_url <- str_c("https://raw.githubusercontent.com",
7 | "/taroyabuki/fromzero/master/data/wine.csv")
8 | tmp <- read_csv(my_url)
9 |
10 | my_data <- tmp[sample(nrow(tmp)), ]
11 |
12 | X <- my_data %>%
13 | select(-LPRICE2) %>% scale
14 | y <- my_data$LPRICE2
15 |
16 | my_model <- keras_model_sequential() %>%
17 | layer_dense(units = 3, activation = "relu", input_shape = c(4)) %>%
18 | layer_dense(units = 1)
19 |
20 | my_model %>% compile(
21 | loss = "mse",
22 | optimizer = "rmsprop")
23 |
24 | my_cb <- callback_early_stopping(
25 | patience = 20,
26 | restore_best_weights = TRUE)
27 |
28 | my_history <- my_model %>%
29 | fit(x = X,
30 | y = y,
31 | validation_split = 0.25,
32 | batch_size = 10,
33 | epochs = 500,
34 | callbacks = list(my_cb),
35 | verbose = 0)
36 |
37 | plot(my_history)
38 |
39 | my_history
40 |
41 | y_ <- my_model %>% predict(X)
42 | mean((y_ - y)^2)**0.5
43 |
--------------------------------------------------------------------------------
/docs/exam.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exam
5 |
6 |
7 |
8 |
9 |
10 | |
11 | name |
12 | english |
13 | math |
14 | gender |
15 |
16 |
17 |
18 |
19 | |
20 | A |
21 | 60 |
22 | 70 |
23 | f |
24 |
25 |
26 | |
27 | B |
28 | 90 |
29 | 80 |
30 | m |
31 |
32 |
33 | |
34 | C |
35 | 70 |
36 | 90 |
37 | m |
38 |
39 |
40 | |
41 | D |
42 | 90 |
43 | 100 |
44 | f |
45 |
46 |
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/figures/fig-p/10-p-titanic-roc.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from sklearn import tree
3 | from sklearn.metrics import roc_curve, RocCurveDisplay, auc
4 | from sklearn.pipeline import Pipeline
5 | from sklearn.preprocessing import OneHotEncoder
6 |
7 | my_url = ('https://raw.githubusercontent.com'
8 | '/taroyabuki/fromzero/master/data/titanic.csv')
9 | my_data = pd.read_csv(my_url)
10 |
11 | X, y = my_data.iloc[:, 0:3], my_data.Survived
12 |
13 | my_pipeline = Pipeline([
14 | ('ohe', OneHotEncoder(drop='first')),
15 | ('tree', tree.DecisionTreeClassifier(max_depth=2,
16 | min_impurity_decrease=0.01))])
17 | my_pipeline.fit(X, y)
18 |
19 | tmp = pd.DataFrame(
20 | my_pipeline.predict_proba(X),
21 | columns=my_pipeline.classes_)
22 | y_score = tmp.Yes
23 |
24 | my_fpr, my_tpr, _ = roc_curve(y_true=y,
25 | y_score=y_score,
26 | pos_label='Yes')
27 | my_auc = auc(x=my_fpr, y=my_tpr)
28 | RocCurveDisplay(fpr=my_fpr, tpr=my_tpr, roc_auc=my_auc).plot()
29 |
30 | import matplotlib.pyplot as plt
31 | plt.savefig('10-p-titanic-roc.pdf')
32 |
--------------------------------------------------------------------------------
/addendum/07.03.02/README.md:
--------------------------------------------------------------------------------
1 | # 予測値の期待値の信頼区間
2 |
3 | **本稿は本書の想定レベルを超えています.**
4 |
5 | 7.3.2項で次のような絵を描いています(184頁).これは,「speedが21.5のときのdistを予測する」というのがどういうことなのかを説明するためのものです.
6 |
7 | R|Python
8 | :--|:--
9 |
|
10 |
11 | 直線から読み取れるのは,speedが21.5のときのdistの期待値が67になることです.しかし,直線が少し違ったものになる可能性を考慮すると,網掛け部分くらいになるかもしれません.この網掛けの部分を,予測値の期待値の**信頼区間**といいます.
12 |
13 | 実現値として得られるのは,これに誤差が加わった結果で,それを考慮したものを**予測区間**といいます.(ここでは予測区間についてはこれ以上触れません.)
14 |
15 | 本書のレベルではこれで終わりでいいのですが,上の絵の「RとPythonの網掛け部分が少し違っていること」に気付く方がいたので,少し補足します.
16 |
17 | ## 簡単な説明
18 |
19 | Rの`ggplot2::stat_smooth`で描いた結果は,「誤差はxによらず,同一の正規分布に従う」という仮定に基づく,理論的なものです(線形**正規**回帰モデル).
20 |
21 | Pythonの`seaborn.regplot`で描いた結果は,そういう仮定に基づかない,シミュレーション(ブートストラップ)によるものです(線形回帰モデル).
22 |
23 | データが仮定に合わないと違いが際立ちます.
24 |
25 | R|Python
26 | :--|:--
27 |
|
28 |
29 | ## 詳しい説明
30 |
31 | - [R側からの補足](confidence_band_R.ipynb)
32 | - [Python側からの補足](confidence_band_python.ipynb)
--------------------------------------------------------------------------------
/figures/fig-p/07-p-tuning-train.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import statsmodels.api as sm
3 | from sklearn.metrics import mean_squared_error
4 | from sklearn.model_selection import cross_val_score, LeaveOneOut
5 | from sklearn.neighbors import KNeighborsRegressor
6 |
7 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data
8 | X, y = my_data[['speed']], my_data['dist']
9 |
10 | def my_loocv(k):
11 | my_model = KNeighborsRegressor(n_neighbors=k)
12 | my_scores = cross_val_score(estimator=my_model, X=X, y=y,
13 | cv=LeaveOneOut(),
14 | scoring='neg_mean_squared_error')
15 | y_ = my_model.fit(X, y).predict(X)
16 | return pd.Series([k,
17 | (-my_scores.mean())**0.5, # RMSE(検証)
18 | mean_squared_error(y_, y)**0.5], # RMSE(訓練)
19 | index=['n_neighbors', 'validation', 'training'])
20 |
21 | my_results = pd.Series(range(1, 16)).apply(my_loocv)
22 |
23 | my_results.plot(x='n_neighbors',
24 | style='o-',
25 | ylabel='RMSE')
26 |
27 | import matplotlib.pyplot as plt
28 | plt.savefig('07-p-tuning-train.pdf')
29 |
--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-nnet.R:
--------------------------------------------------------------------------------
1 | pdf(file = "11-r-mnist-nnet.pdf", width = 5.83, height = 4.13)
2 |
3 | library(keras)
4 |
5 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
6 | my_index <- sample(1:60000, 6000)
7 | x_train <- x_train[my_index, , ]
8 | y_train <- y_train[my_index]
9 |
10 | my_model <- keras_model_sequential() %>%
11 | layer_flatten(input_shape = c(28, 28)) %>%
12 | layer_dense(units = 256, activation = "relu") %>%
13 | layer_dense(units = 10, activation = "softmax")
14 |
15 | my_model %>% compile(loss = "sparse_categorical_crossentropy",
16 | optimizer = "rmsprop",
17 | metrics = c("accuracy"))
18 |
19 | my_cb <- callback_early_stopping(patience = 5,
20 | restore_best_weights = TRUE)
21 |
22 | my_history <- my_model %>%
23 | fit(x = x_train,
24 | y = y_train,
25 | validation_split = 0.2,
26 | batch_size = 128,
27 | epochs = 20,
28 | callbacks = list(my_cb),
29 | verbose = 0)
30 |
31 | plot(my_history)
32 |
33 | tmp <- my_model %>% predict(x_test)
34 | y_ <- apply(tmp, 1, which.max) - 1
35 | table(y_, y_test)
36 |
37 | mean(y_ == y_test)
38 |
39 | my_model %>% evaluate(x = x_test, y = y_test)
40 |
--------------------------------------------------------------------------------
/figures/fig-p/11-p-regression.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import sklearn
3 | from keras import callbacks, layers, models
4 | from sklearn.preprocessing import StandardScaler
5 |
6 | my_url = ('https://raw.githubusercontent.com'
7 | '/taroyabuki/fromzero/master/data/wine.csv')
8 | tmp = pd.read_csv(my_url)
9 |
10 | my_data = sklearn.utils.shuffle(tmp)
11 |
12 | my_scaler = StandardScaler()
13 | X = my_scaler.fit_transform(
14 | my_data.drop(columns=['LPRICE2']))
15 | y = my_data['LPRICE2']
16 |
17 | my_model = models.Sequential()
18 | my_model.add(layers.Dense(units=3, activation='relu', input_shape=[4]))
19 | my_model.add(layers.Dense(units=1))
20 |
21 | my_model.compile(
22 | loss='mse',
23 | optimizer='rmsprop')
24 |
25 | my_cb = callbacks.EarlyStopping(
26 | patience=20,
27 | restore_best_weights=True)
28 |
29 | my_history = my_model.fit(
30 | x=X,
31 | y=y,
32 | validation_split=0.25,
33 | batch_size=10,
34 | epochs=500,
35 | callbacks=[my_cb],
36 | verbose=0)
37 |
38 | tmp = pd.DataFrame(my_history.history)
39 | tmp.plot(xlabel='epoch')
40 |
41 | import matplotlib.pyplot as plt
42 | plt.savefig('11-p-regression.pdf')
43 |
44 | print(tmp.iloc[-1, ])
45 |
46 | y_ = my_model.predict(X)
47 | print(((y_.ravel() - y)**2).mean())
48 |
--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-id5.svg:
--------------------------------------------------------------------------------
1 |
2 |
15 |
--------------------------------------------------------------------------------
/figures/fig-p/12-p-airpassengers-arima.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from pmdarima.datasets import airpassengers
3 | my_data = airpassengers.load_airpassengers()
4 |
5 | n = len(my_data)
6 | k = 108
7 |
8 | import pandas as pd
9 | my_ds = pd.date_range(
10 | start='1949/01/01',
11 | end='1960/12/01',
12 | freq='MS')
13 | my_df = pd.DataFrame({
14 | 'ds': my_ds,
15 | 'x': range(n),
16 | 'y': my_data},
17 | index=my_ds)
18 |
19 | my_train = my_df[ :k]
20 | my_test = my_df[-(n - k): ]
21 |
22 | import pmdarima as pm
23 | my_arima_model = pm.auto_arima(my_train.y, m=12, trace=True)
24 |
25 | y_, my_ci = my_arima_model.predict(len(my_test), # 期間はテストデータと同じ.
26 | alpha=0.05, # 有意水準(デフォルト)
27 | return_conf_int=True) # 信頼区間を求める.
28 | tmp = pd.DataFrame({'y': y_,
29 | 'Lo': my_ci[:, 0],
30 | 'Hi': my_ci[:, 1]},
31 | index=my_test.index)
32 |
33 | plt.plot(my_train.y, label='train')
34 | plt.plot(my_test.y, label='test')
35 | plt.plot(tmp.y, label='model')
36 | plt.fill_between(tmp.index,
37 | tmp.Lo,
38 | tmp.Hi,
39 | alpha=0.25)
40 | plt.legend(loc='upper left')
41 |
42 | plt.savefig('12-p-airpassengers-arima.pdf')
43 |
--------------------------------------------------------------------------------
/figures/fig/図1.4.md:
--------------------------------------------------------------------------------
1 | ```puml
2 | @startuml
3 | scale 0.8
4 | skinparam {
5 | defaultFontName Hiragino Kaku Gothic ProN
6 | monochrome true
7 | shadowing false
8 | }
9 |
10 | cloud HOMELAN as "家庭内LAN\nネットワーク:192.168.1.0\nサブネットマスク:255.255.255.0" {
11 | rectangle ホストPC as "ホストPC\nIPアドレス:192.168.1.2" {
12 | cloud ホストPC内LAN as "ホストPC内LAN\nネットワーク:172.17.0.0\nサブネットマスク:255.255.0.0" {
13 | rectangle コンテナ as "Dockerコンテナ\nIPアドレス:172.17.43.181" {
14 | rectangle コンテナ8787 as "ポート8787"
15 | rectangle コンテナ8888 as "ポート8888"
16 | }
17 | }
18 | rectangle ホスト8787 as "ポート8787"
19 | rectangle ホスト8888 as "ポート8888"
20 | }
21 | rectangle PC3 as "PC\nIPアドレス:192.168.1.3"
22 | rectangle Gateway as "Gateway, DNS Server\nIPアドレス:192.168.1.1"
23 | ホストPC--Gateway
24 | ホストPC-PC3
25 | PC3--Gateway
26 | コンテナ8787--ホスト8787
27 | コンテナ8888--ホスト8888
28 | }
29 |
30 | usecase http8787 as "localhost:8787"
31 | usecase http8888 as "localhost:8888"
32 |
33 | http8787-up-ホスト8787
34 | http8888-up-ホスト8888
35 |
36 | ホストPCのユーザ-up-http8787
37 | ホストPCのユーザ-up-http8888
38 |
39 | cloud 組織AのLAN {
40 | rectangle PC as "PC\nIPアドレス:192.168.1.2"
41 | }
42 |
43 | cloud 組織BのLAN {
44 | rectangle PC2 as "PC\nIPアドレス:192.168.1.2"
45 | }
46 | 組織AのLAN-Gateway
47 | 組織BのLAN-Gateway
48 | 組織AのLAN--組織BのLAN
49 |
50 | @enduml
51 | ```
52 |
--------------------------------------------------------------------------------
/figures/fig-p/08-p-enet-tuning.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | from sklearn.linear_model import ElasticNet
4 | from sklearn.model_selection import GridSearchCV, LeaveOneOut
5 | from sklearn.pipeline import Pipeline
6 | from sklearn.preprocessing import StandardScaler
7 |
8 | my_url = ('https://raw.githubusercontent.com/taroyabuki/' +
9 | 'fromzero/master/data/wine.csv')
10 | my_data = pd.read_csv(my_url)
11 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2']
12 |
13 | As = np.linspace(0, 0.1, 21)
14 | Bs = np.linspace(0, 0.1, 6)
15 |
16 | my_pipeline = Pipeline([('sc', StandardScaler()),
17 | ('enet', ElasticNet())])
18 | my_search = GridSearchCV(
19 | estimator=my_pipeline,
20 | param_grid={'enet__alpha': As, 'enet__l1_ratio': Bs},
21 | cv=LeaveOneOut(),
22 | scoring='neg_mean_squared_error',
23 | n_jobs=-1).fit(X, y)
24 |
25 | tmp = my_search.cv_results_ # チューニング結果の詳細
26 | my_scores = (-tmp['mean_test_score'])**0.5 # MSEからRMSEへの変換
27 |
28 | my_results = pd.DataFrame(tmp['params']).assign(RMSE=my_scores).pivot(
29 | index='enet__alpha',
30 | columns='enet__l1_ratio',
31 | values='RMSE')
32 |
33 | my_results.plot(style='o-', xlabel='A ( = alpha)', ylabel='RMSE').legend(
34 | title='B ( = l1_ratio)')
35 |
36 | import matplotlib.pyplot as plt
37 | plt.savefig('08-p-enet-tuning.pdf')
38 |
--------------------------------------------------------------------------------
/figures/fig-p/11-p-classification.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import sklearn
4 | import statsmodels.api as sm
5 | from keras import callbacks, layers, models
6 | from sklearn.preprocessing import StandardScaler, LabelEncoder
7 |
8 | tmp = sm.datasets.get_rdataset('iris', 'datasets').data
9 | my_data = sklearn.utils.shuffle(tmp)
10 |
11 | my_scaler = StandardScaler()
12 | X = my_scaler.fit_transform(
13 | my_data.drop(columns=['Species']))
14 | my_enc = LabelEncoder()
15 | y = my_enc.fit_transform(
16 | my_data['Species'])
17 |
18 | my_model = models.Sequential()
19 | my_model.add(layers.Dense(units=3, activation='relu', input_shape=[4]))
20 | my_model.add(layers.Dense(units=3, activation='softmax'))
21 |
22 | my_model.compile(loss='sparse_categorical_crossentropy',
23 | optimizer='rmsprop',
24 | metrics=['accuracy'])
25 |
26 | my_cb = callbacks.EarlyStopping(
27 | patience=20,
28 | restore_best_weights=True)
29 |
30 | my_history = my_model.fit(
31 | x=X,
32 | y=y,
33 | validation_split=0.25,
34 | batch_size=10,
35 | epochs=500,
36 | callbacks=[my_cb],
37 | verbose=0)
38 |
39 | tmp = pd.DataFrame(my_history.history)
40 | tmp.plot(xlabel='epoch')
41 |
42 | import matplotlib.pyplot as plt
43 | plt.savefig('11-p-classification.pdf')
44 |
45 | print(tmp.iloc[-1, ])
46 |
47 | tmp = my_model.predict(X)
48 | y_ = np.argmax(tmp, axis=-1)
49 | print((y_ == y).mean())
50 |
--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-cnn.R:
--------------------------------------------------------------------------------
1 | pdf(file = "11-r-mnist-cnn.pdf", width = 5.83, height = 4.13)
2 |
3 | library(keras)
4 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
5 |
6 | my_index <- sample(1:60000, 6000)
7 | x_train <- x_train[my_index, , ]
8 | y_train <- y_train[my_index]
9 |
10 | x_train <- x_train / 255
11 | x_test <- x_test / 255
12 |
13 | x_train2d <- x_train %>% array_reshape(c(-1, 28, 28, 1))
14 | x_test2d <- x_test %>% array_reshape(c(-1, 28, 28, 1))
15 |
16 | my_model <- keras_model_sequential() %>%
17 | layer_conv_2d(filters = 32, kernel_size = 3, # 畳み込み層
18 | activation = "relu",
19 | input_shape = c(28, 28, 1)) %>%
20 | layer_max_pooling_2d(pool_size = 2) %>% # プーリング層
21 | layer_flatten() %>%
22 | layer_dense(units = 128, activation = "relu") %>%
23 | layer_dense(units = 10, activation = "softmax")
24 |
25 | my_model %>% compile(
26 | loss = "sparse_categorical_crossentropy",
27 | optimizer = "rmsprop",
28 | metrics = c("accuracy"))
29 |
30 | my_cb <- callback_early_stopping(patience = 5,
31 | restore_best_weights = TRUE)
32 |
33 | my_history <- my_model %>%
34 | fit(x = x_train2d,
35 | y = y_train,
36 | validation_split = 0.2,
37 | batch_size = 128,
38 | epochs = 20,
39 | callbacks = list(my_cb),
40 | verbose = 0)
41 |
42 | plot(my_history)
43 |
44 | my_model %>% evaluate(x = x_test2d, y = y_test)
45 |
--------------------------------------------------------------------------------
/code/R-notebook/r-06.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "id": "fe515360",
5 | "cell_type": "markdown",
6 | "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)\n\n\n",
7 | "metadata": {}
8 | },
9 | {
10 | "id": "e5fca29e",
11 | "cell_type": "markdown",
12 | "source": "## 6.1 \u6a5f\u68b0\u5b66\u7fd2\u306e\u76ee\u7684\uff08\u672c\u66f8\u306e\u5834\u5408\uff09\n\n\n",
13 | "metadata": {}
14 | },
15 | {
16 | "id": "f7848f95",
17 | "cell_type": "markdown",
18 | "source": "## 6.2 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u30c7\u30fc\u30bf",
19 | "metadata": {}
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": null,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "iris\n",
28 | "# \u4ee5\u4e0b\u7701\u7565"
29 | ],
30 | "id": "ce116acc-11c8-4cd4-bfdf-ab9b9a7c4142"
31 | },
32 | {
33 | "id": "9da0985a",
34 | "cell_type": "markdown",
35 | "source": "## 6.3 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u624b\u6cd5",
36 | "metadata": {}
37 | }
38 | ],
39 | "nbformat": 4,
40 | "nbformat_minor": 5,
41 | "metadata": {
42 | "kernelspec": {
43 | "name": "ir",
44 | "display_name": "R"
45 | }
46 | }
47 | }
--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-nnet.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import tensorflow as tf
4 | from random import sample
5 | from keras import callbacks, layers, models
6 | from sklearn.metrics import confusion_matrix
7 |
8 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
9 |
10 | my_index = sample(range(60000), 6000)
11 | x_train = x_train[my_index, :, :]
12 | y_train = y_train[my_index]
13 |
14 | x_train = x_train / 255
15 | x_test = x_test / 255
16 |
17 | my_model = models.Sequential()
18 | my_model.add(layers.Flatten(input_shape=[28, 28]))
19 | my_model.add(layers.Dense(units=256, activation="relu"))
20 | my_model.add(layers.Dense(units=10, activation="softmax"))
21 |
22 | my_model.compile(loss='sparse_categorical_crossentropy',
23 | optimizer='rmsprop',
24 | metrics=['accuracy'])
25 |
26 | my_cb = callbacks.EarlyStopping(patience=5,
27 | restore_best_weights=True)
28 |
29 | my_history = my_model.fit(
30 | x=x_train,
31 | y=y_train,
32 | validation_split=0.2,
33 | batch_size=128,
34 | epochs=20,
35 | callbacks=[my_cb],
36 | verbose=0)
37 |
38 | tmp = pd.DataFrame(my_history.history)
39 | tmp.plot(xlabel='epoch', style='o-')
40 |
41 | import matplotlib.pyplot as plt
42 | plt.savefig('11-p-mnist-nnet.pdf')
43 |
44 | tmp = my_model.predict(x_test)
45 | y_ = np.argmax(tmp, axis=-1)
46 | print(confusion_matrix(y_true=y_test, y_pred=y_))
47 |
48 | print((y_test == y_).mean())
49 |
50 | print(my_model.evaluate(x=x_test, y=y_test))
51 |
--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-cnn.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from random import sample
3 | import tensorflow as tf
4 | from keras import callbacks, layers, models
5 |
6 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
7 |
8 | my_index = sample(range(60000), 6000)
9 | x_train = x_train[my_index, :, :]
10 | y_train = y_train[my_index]
11 |
12 | x_train = x_train / 255
13 | x_test = x_test / 255
14 |
15 | x_train2d = x_train.reshape(-1, 28, 28, 1)
16 | x_test2d = x_test.reshape(-1, 28, 28, 1)
17 |
18 | my_model = models.Sequential()
19 | my_model.add(layers.Conv2D(filters=32, kernel_size=3, # 畳み込み層
20 | activation='relu',
21 | input_shape=[28, 28, 1]))
22 | my_model.add(layers.MaxPooling2D(pool_size=2)) # プーリング層
23 | my_model.add(layers.Flatten())
24 | my_model.add(layers.Dense(128, activation='relu'))
25 | my_model.add(layers.Dense(10, activation='softmax'))
26 |
27 | my_model.compile(loss='sparse_categorical_crossentropy',
28 | optimizer='rmsprop',
29 | metrics=['accuracy'])
30 |
31 | my_cb = callbacks.EarlyStopping(patience=5,
32 | restore_best_weights=True)
33 |
34 | my_history = my_model.fit(
35 | x=x_train2d,
36 | y=y_train,
37 | validation_split=0.2,
38 | batch_size=128,
39 | epochs=20,
40 | callbacks=[my_cb],
41 | verbose=0)
42 |
43 | tmp = pd.DataFrame(my_history.history)
44 | tmp.plot(xlabel='epoch', style='o-')
45 |
46 | import matplotlib.pyplot as plt
47 | plt.savefig('11-p-mnist-cnn.pdf')
48 |
49 | print(my_model.evaluate(x=x_test2d, y=y_test))
50 |
--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-lenet.R:
--------------------------------------------------------------------------------
1 | pdf(file = "11-r-mnist-lenet.pdf", width = 5.83, height = 4.13)
2 |
3 | library(keras)
4 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
5 |
6 | my_index <- sample(1:60000, 6000)
7 | x_train <- x_train[my_index, , ]
8 | y_train <- y_train[my_index]
9 |
10 | x_train <- x_train / 255
11 | x_test <- x_test / 255
12 |
13 | x_train2d <- x_train %>% array_reshape(c(-1, 28, 28, 1))
14 | x_test2d <- x_test %>% array_reshape(c(-1, 28, 28, 1))
15 |
16 | my_model <- keras_model_sequential() %>%
17 | layer_conv_2d(filters = 20, kernel_size = 5, activation = "relu",
18 | input_shape = c(28, 28, 1)) %>%
19 | layer_max_pooling_2d(pool_size = 2, strides = 2) %>%
20 | layer_conv_2d(filters = 50, kernel_size = 5, activation = "relu") %>%
21 | layer_max_pooling_2d(pool_size = 2, strides = 2) %>%
22 | layer_dropout(rate = 0.25) %>%
23 | layer_flatten() %>%
24 | layer_dense(units = 500, activation = "relu") %>%
25 | layer_dropout(rate = 0.5) %>%
26 | layer_dense(units = 10, activation = "softmax")
27 |
28 | my_model %>% compile(
29 | loss = "sparse_categorical_crossentropy",
30 | optimizer = "rmsprop",
31 | metrics = c("accuracy"))
32 |
33 | my_cb <- callback_early_stopping(patience = 5,
34 | restore_best_weights = TRUE)
35 |
36 | my_history <- my_model %>%
37 | fit(x = x_train2d,
38 | y = y_train,
39 | validation_split = 0.2,
40 | batch_size = 128,
41 | epochs = 20,
42 | callbacks = list(my_cb),
43 | verbose = 0)
44 |
45 | plot(my_history)
46 |
47 | my_model %>% evaluate(x = x_test2d, y = y_test)
48 |
--------------------------------------------------------------------------------
/docker/rstudio/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM rocker/tidyverse
2 |
3 | USER root
4 |
5 | RUN rm /etc/dpkg/dpkg.cfg.d/excludes \
6 | && sed -i -e 's%http://[^ ]\+%mirror://mirrors.ubuntu.com/mirrors.txt%g' /etc/apt/sources.list \
7 | && apt-get update \
8 | && apt-get install -y --no-install-recommends \
9 | curl \
10 | default-jdk \
11 | dnsutils \
12 | iputils-ping \
13 | less \
14 | libglpk-dev \
15 | libnode64 \
16 | libtbb2 \
17 | net-tools \
18 | vim-tiny \
19 | && apt-get --reinstall install -y man-db coreutils manpages \
20 | && apt-get clean \
21 | && rm -rf /var/lib/apt/lists/* \
22 | && yes | unminimize
23 |
24 | USER rstudio
25 |
26 | RUN Rscript -e ' \
27 | options(Ncpus = 32); \
28 | options(repos = "https://cran.ism.ac.jp"); \
29 | Sys.setenv(DOWNLOAD_STATIC_LIBV8=1); \
30 | install.packages(c( \
31 | "caret", \
32 | "doParallel", \
33 | "epitools", \
34 | "exactci", \
35 | "fable", \
36 | "factoextra", \
37 | "feasts", \
38 | "furrr", \
39 | "ggfortify", \
40 | "ggmosaic", \
41 | "glmnetUtils", \
42 | "gplots", \
43 | "h2o", \
44 | "igraph", \
45 | "keras", \
46 | "leaps", \
47 | "lintr", \
48 | "neuralnet", \
49 | "pastecs", \
50 | "prophet", \
51 | "PRROC", \
52 | "psych", \
53 | "proxy", \
54 | "randomForest", \
55 | "reticulate", \
56 | "rpart.plot", \
57 | "tsibble", \
58 | "urca", \
59 | "vcd" \
60 | )); \
61 | remotes::install_version("xgboost", version = "1.4.1.1"); \
62 | remotes::install_github(c("vqv/ggbiplot")); \
63 | reticulate::install_miniconda(); \
64 | keras::install_keras();'
65 |
66 | WORKDIR /home/rstudio
67 |
68 | USER root
69 |
--------------------------------------------------------------------------------
/figures/fig-r/08-r-enet-tuning2.R:
--------------------------------------------------------------------------------
1 | pdf(file = "08-r-enet-tuning2.pdf", width = 6, height = 4.5)
2 |
3 | library(furrr)
4 | plan(multisession)
5 |
6 | library(tidyverse)
7 | my_url <- str_c("https://raw.githubusercontent.com",
8 | "/taroyabuki/fromzero/master/data/wine.csv")
9 | my_data <- read_csv(my_url)
10 |
11 | my_sd <- function(x) { # √標本分散を計算する関数
12 | n <- length(x)
13 | sd(x) * sqrt((n - 1) / n)
14 | }
15 |
16 | my_loocv <- function(A, B) {
17 | my_predict <- function(id) {
18 | my_train <- my_data[-id, ]
19 | my_valid <- my_data[ id, ]
20 | y <- my_train$LPRICE2
21 | u <- mean(y)
22 | s <- my_sd(y)
23 | my_train2 <- my_train %>% mutate(LPRICE2 = (y - u) / s)
24 | my_model <-
25 | glmnetUtils::glmnet(
26 | form = LPRICE2 ~ ., data = my_train2,
27 | lambda = A, alpha = B, standardize = TRUE)
28 | (my_model %>% predict(my_valid, exact = TRUE) * s + u)[1]
29 | }
30 | y <- my_data$LPRICE2
31 | y_ <- seq_len(length(y)) %>% map_dbl(my_predict)
32 | rmse <- mean((y_ - y)^2)^0.5
33 | list(A = A, B = B, RMSE = rmse)
34 | }
35 |
36 | As <- seq(0, 0.1, length.out = 21)
37 | Bs <- seq(0, 0.1, length.out = 6)
38 | my_params <- expand.grid(A = As, B = Bs)
39 |
40 | tmp <- my_params %>% future_pmap_dfr(my_loocv)
41 |
42 | my_result <- tmp %>%
43 | mutate(B = as.factor(B)) %>%
44 | group_by(A, B) %>%
45 | summarise(RMSE = mean(RMSE), .groups = "drop")
46 |
47 | my_result %>% filter(RMSE == min(RMSE))
48 |
49 | my_result %>% ggplot(aes(x = A, y = RMSE, color = B)) +
50 | geom_point() +
51 | geom_line() +
52 | theme(legend.position = c(0, 0),
53 | legend.justification = c(0, 0)) +
54 | xlab("A ( = lambda)") +
55 | guides(color = guide_legend("B ( = alpha)"))
56 |
--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-lenet.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from random import sample
3 | import tensorflow as tf
4 | from keras import callbacks, layers, models
5 |
6 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
7 |
8 | my_index = sample(range(60000), 6000)
9 | x_train = x_train[my_index, :, :]
10 | y_train = y_train[my_index]
11 |
12 | x_train = x_train / 255
13 | x_test = x_test / 255
14 |
15 | x_train2d = x_train.reshape(-1, 28, 28, 1)
16 | x_test2d = x_test.reshape(-1, 28, 28, 1)
17 |
18 | my_model = models.Sequential()
19 | my_model.add(layers.Conv2D(filters=20, kernel_size=5, activation='relu',
20 | input_shape=(28, 28, 1)))
21 | my_model.add(layers.MaxPooling2D(pool_size=2, strides=2))
22 | my_model.add(layers.Conv2D(filters=20, kernel_size=5, activation='relu'))
23 | my_model.add(layers.MaxPooling2D(pool_size=2, strides=2))
24 | my_model.add(layers.Dropout(rate=0.25))
25 | my_model.add(layers.Flatten())
26 | my_model.add(layers.Dense(500, activation='relu'))
27 | my_model.add(layers.Dropout(rate=0.5))
28 | my_model.add(layers.Dense(10, activation='softmax'))
29 |
30 | my_model.compile(loss='sparse_categorical_crossentropy',
31 | optimizer='rmsprop',
32 | metrics=['accuracy'])
33 |
34 | my_cb = callbacks.EarlyStopping(patience=5,
35 | restore_best_weights=True)
36 |
37 | my_history = my_model.fit(
38 | x=x_train2d,
39 | y=y_train,
40 | validation_split=0.2,
41 | batch_size=128,
42 | epochs=20,
43 | callbacks=[my_cb],
44 | verbose=0)
45 |
46 | tmp = pd.DataFrame(my_history.history)
47 | tmp.plot(xlabel='epoch', style='o-')
48 |
49 | import matplotlib.pyplot as plt
50 | plt.savefig('11-p-mnist-lenet.pdf')
51 |
52 | print(my_model.evaluate(x=x_test2d, y=y_test))
53 |
--------------------------------------------------------------------------------
/figures/fig-p/08-p-enet-tuning2.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | import numpy as np
3 | import pandas as pd
4 | from pandarallel import pandarallel
5 | from scipy.stats import zscore
6 | from sklearn.linear_model import ElasticNet
7 | from sklearn.metrics import mean_squared_error
8 | from sklearn.pipeline import Pipeline
9 | from sklearn.preprocessing import StandardScaler
10 |
11 | my_url = ('https://raw.githubusercontent.com'
12 | '/taroyabuki/fromzero/master/data/wine.csv')
13 | my_data = pd.read_csv(my_url)
14 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2']
15 |
16 | def my_loocv(A, B):
17 | def my_predict(id):
18 | my_train = my_data.drop([id])
19 | my_valid = my_data.take([id])
20 | X, y = my_train.drop(columns=['LPRICE2']), my_train.LPRICE2
21 | u = y.mean()
22 | s = y.std(ddof=0)
23 | my_model = Pipeline([
24 | ('sc', StandardScaler()),
25 | ('enet', ElasticNet(alpha=A, l1_ratio=B))]).fit(X, zscore(y))
26 | X = my_valid.drop(columns=['LPRICE2'])
27 | return (my_model.predict(X) * s + u)[0]
28 |
29 | y_ = [my_predict(id) for id in range(len(my_data))]
30 | rmse = mean_squared_error(y_, y)**0.5
31 | return pd.Series([A, B, rmse], index=['A', 'B', 'RMSE'])
32 |
33 | As = np.linspace(0, 0.1, 21)
34 | Bs = np.linspace(0, 0.1, 6)
35 | my_plan = pd.DataFrame(itertools.product(As, Bs), columns=['A', 'B'])
36 |
37 | pandarallel.initialize()
38 | my_results = my_plan.parallel_apply(lambda row: my_loocv(*row), axis=1)
39 |
40 | print(my_results[my_results.RMSE == my_results.RMSE.min()])
41 |
42 | my_results.pivot(index='A', columns='B', values='RMSE').plot(
43 | style='o-', xlabel='A ( = alpha)', ylabel='RMSE').legend(
44 | title='B ( = l1_ratio)')
45 |
46 | import matplotlib.pyplot as plt
47 | plt.savefig('08-p-enet-tuning2.pdf')
48 |
--------------------------------------------------------------------------------
/docker/jupyter/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM jupyter/datascience-notebook:python-3.8.8
2 |
3 | USER root
4 |
5 | RUN rm /etc/dpkg/dpkg.cfg.d/excludes \
6 | && sed -i -e 's%http://[^ ]\+%mirror://mirrors.ubuntu.com/mirrors.txt%g' /etc/apt/sources.list \
7 | && apt-get update \
8 | && apt-get install -y --no-install-recommends \
9 | default-jdk \
10 | dnsutils \
11 | graphviz \
12 | iputils-ping \
13 | less \
14 | net-tools \
15 | && apt-get --reinstall install -y man-db coreutils manpages \
16 | && apt-get clean \
17 | && rm -rf /var/lib/apt/lists/* \
18 | && echo y | unminimize
19 |
20 | USER jovyan
21 |
22 | # pystan==2.* for fbprophet
23 | RUN pip install \
24 | graphviz \
25 | h2o \
26 | japanize_matplotlib \
27 | keras \
28 | lxml \
29 | pandarallel \
30 | pca \
31 | pmdarima \
32 | pycodestyle \
33 | pystan==2.* \
34 | tensorflow \
35 | xgboost
36 |
37 | RUN pip install fbprophet
38 |
39 | RUN mkdir -p /home/jovyan/.ipython/profile_default && echo "c.InteractiveShell.ast_node_interactivity = 'all'" > /home/jovyan/.ipython/profile_default/ipython_config.py
40 |
41 | RUN Rscript -e ' \
42 | options(Ncpus = 32); \
43 | options(repos = "https://cran.ism.ac.jp"); \
44 | Sys.setenv(DOWNLOAD_STATIC_LIBV8=1); \
45 | install.packages(c( \
46 | "doParallel", \
47 | "e1071", \
48 | "epitools", \
49 | "exactci", \
50 | "fable", \
51 | "factoextra", \
52 | "feasts", \
53 | "furrr", \
54 | "ggfortify", \
55 | "ggmosaic", \
56 | "gplots", \
57 | "glmnetUtils", \
58 | "h2o", \
59 | "igraph", \
60 | "keras", \
61 | "leaps", \
62 | "lintr", \
63 | "neuralnet", \
64 | "pastecs", \
65 | "prophet", \
66 | "proxy", \
67 | "PRROC", \
68 | "psych", \
69 | "rpart.plot", \
70 | "tsibble", \
71 | "vcd" \
72 | )); \
73 | remotes::install_version("xgboost", version = "1.4.1.1"); \
74 | remotes::install_github(c("vqv/ggbiplot"));'
75 |
76 | WORKDIR /home/jovyan
77 |
78 | USER root
79 |
--------------------------------------------------------------------------------
/code/R-notebook/README.md:
--------------------------------------------------------------------------------
1 | # Jupyter Notebooks for R
2 |
3 | chapter|Open in Colab
4 | --|--
5 | 03|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-03.ipynb)
6 | 04|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-04.ipynb)
7 | 05|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-05.ipynb)
8 | 06|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-06.ipynb)
9 | 07|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-07.ipynb)
10 | 08|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-08.ipynb)
11 | 09|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-09.ipynb)
12 | 10|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-10.ipynb)
13 | 11|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-11.ipynb)
14 | 12|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-12.ipynb)
15 | 13|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-13.ipynb)
16 |
--------------------------------------------------------------------------------
/code/Python-notebook/README.md:
--------------------------------------------------------------------------------
1 | # Jupyter Notebooks for Python
2 |
3 | chapter|Open in Colab
4 | --|--
5 | 03|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-03.ipynb)
6 | 04|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-04.ipynb)
7 | 05|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-05.ipynb)
8 | 06|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-06.ipynb)
9 | 07|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-07.ipynb)
10 | 08|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-08.ipynb)
11 | 09|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-09.ipynb)
12 | 10|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-10.ipynb)
13 | 11|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-11.ipynb)
14 | 12|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-12.ipynb)
15 | 13|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-13.ipynb)
16 |
--------------------------------------------------------------------------------
/figures/fig-p/11-p-mnist-lenet-miss.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | import pandas as pd
4 | import tensorflow as tf
5 | from random import sample
6 | from keras import callbacks, layers, models
7 |
8 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
9 |
10 | #my_index = sample(range(60000), 6000)
11 | #x_train = x_train[my_index, :, :]
12 | #y_train = y_train[my_index]
13 |
14 | x_train = x_train / 255
15 | x_test = x_test / 255
16 |
17 | x_train2d = x_train.reshape(-1, 28, 28, 1)
18 | x_test2d = x_test.reshape(-1, 28, 28, 1)
19 |
20 | my_model = models.Sequential()
21 | my_model.add(layers.Conv2D(filters=20, kernel_size=5, activation='relu',
22 | input_shape=(28, 28, 1)))
23 | my_model.add(layers.MaxPooling2D(pool_size=2, strides=2))
24 | my_model.add(layers.Conv2D(filters=20, kernel_size=5, activation='relu'))
25 | my_model.add(layers.MaxPooling2D(pool_size=2, strides=2))
26 | my_model.add(layers.Dropout(rate=0.25))
27 | my_model.add(layers.Flatten())
28 | my_model.add(layers.Dense(500, activation='relu'))
29 | my_model.add(layers.Dropout(rate=0.5))
30 | my_model.add(layers.Dense(10, activation='softmax'))
31 |
32 | my_model.compile(loss='sparse_categorical_crossentropy',
33 | optimizer='rmsprop',
34 | metrics=['accuracy'])
35 |
36 | my_cb = callbacks.EarlyStopping(patience=5,
37 | restore_best_weights=True)
38 |
39 | my_history = my_model.fit(
40 | x=x_train2d,
41 | y=y_train,
42 | validation_split=0.2,
43 | batch_size=128,
44 | epochs=20,
45 | callbacks=[my_cb],
46 | verbose=0)
47 |
48 | y_prob = my_model.predict(x_test2d) # カテゴリに属する確率
49 |
50 | tmp = pd.DataFrame({
51 | 'y_prob': np.max(y_prob, axis=1), # 確率の最大値
52 | 'y_': np.argmax(y_prob, axis=1), # 予測カテゴリ
53 | 'y': y_test, # 正解
54 | 'id': range(len(y_test))}) # 番号
55 |
56 | tmp = tmp[tmp.y_ != tmp.y] # 予測がはずれたものを残す
57 | my_result = tmp.sort_values('y_prob', ascending=False) # 確率の大きい順に並び替える
58 | print(my_result.head())
59 |
60 | for i in range(5):
61 | plt.subplot(1, 5, i + 1)
62 | ans = my_result['y'].iloc[i]
63 | id = my_result['id'].iloc[i]
64 | plt.title(f'{ans} ({id})')
65 | plt.imshow(x_test[id])
66 | plt.axis('off')
67 |
68 | plt.savefig('11-p-mnist-lenet-miss.pdf')
69 |
--------------------------------------------------------------------------------
/figures/fig-r/11-r-mnist-lenet-miss.R:
--------------------------------------------------------------------------------
1 | pdf(file = "11-r-mnist-lenet-miss.pdf", width = 5.83, height = 4.13)
2 |
3 | library(keras)
4 | library(tidyverse)
5 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
6 |
7 | #my_index <- sample(1:60000, 6000)
8 | #x_train <- x_train[my_index, , ]
9 | #y_train <- y_train[my_index]
10 |
11 | x_train <- x_train / 255
12 | x_test <- x_test / 255
13 |
14 | x_train2d <- x_train %>% array_reshape(c(-1, 28, 28, 1))
15 | x_test2d <- x_test %>% array_reshape(c(-1, 28, 28, 1))
16 |
17 | my_model <- keras_model_sequential() %>%
18 | layer_conv_2d(filters = 20, kernel_size = 5, activation = "relu",
19 | input_shape = c(28, 28, 1)) %>%
20 | layer_max_pooling_2d(pool_size = 2, strides = 2) %>%
21 | layer_conv_2d(filters = 50, kernel_size = 5, activation = "relu") %>%
22 | layer_max_pooling_2d(pool_size = 2, strides = 2) %>%
23 | layer_dropout(rate = 0.25) %>%
24 | layer_flatten() %>%
25 | layer_dense(units = 500, activation = "relu") %>%
26 | layer_dropout(rate = 0.5) %>%
27 | layer_dense(units = 10, activation = "softmax")
28 |
29 | my_model %>% compile(
30 | loss = "sparse_categorical_crossentropy",
31 | optimizer = "rmsprop",
32 | metrics = c("accuracy"))
33 |
34 | my_cb <- callback_early_stopping(patience = 5,
35 | restore_best_weights = TRUE)
36 |
37 | my_history <- my_model %>%
38 | fit(x = x_train2d,
39 | y = y_train,
40 | validation_split = 0.2,
41 | batch_size = 128,
42 | epochs = 20,
43 | callbacks = list(my_cb),
44 | verbose = 0)
45 |
46 | y_prob <- my_model %>% predict(x_test2d) # カテゴリに属する確率
47 |
48 | my_result <- data.frame(
49 | y_prob = apply(y_prob, 1, max), # 確率の最大値
50 | y_ = apply(y_prob, 1, which.max) - 1, # 予測カテゴリ
51 | y = y_test, # 正解
52 | id = seq_len(length(y_test))) %>% # 番号
53 | filter(y_ != y) %>% # 予測がはずれたものを残す
54 | arrange(desc(y_prob)) # 確率の大きい順に並び替える
55 | head(my_result)
56 |
57 | tmp <- my_result[1:5, ]$id
58 | my_labels <- sprintf("%s (%s)",
59 | my_result[1:5, ]$y, tmp)
60 | my_fig <- expand.grid(
61 | label = my_labels,
62 | y = 28:1,
63 | x = 1:28)
64 | my_fig$z <- as.vector(
65 | x_test[tmp, , ])
66 |
67 | my_fig %>% ggplot(
68 | aes(x = x, y = y, fill = z)) +
69 | geom_raster() +
70 | coord_fixed() +
71 | theme_void() +
72 | theme(legend.position = "none") +
73 | facet_grid(. ~ label)
74 |
--------------------------------------------------------------------------------
/code/Python-notebook/python-06.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "id": "c5f5660f",
5 | "cell_type": "markdown",
6 | "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)\n\n\n",
7 | "metadata": {}
8 | },
9 | {
10 | "id": "4e6dc4c2",
11 | "cell_type": "markdown",
12 | "source": "## 6.1 \u6a5f\u68b0\u5b66\u7fd2\u306e\u76ee\u7684\uff08\u672c\u66f8\u306e\u5834\u5408\uff09\n\n\n",
13 | "metadata": {}
14 | },
15 | {
16 | "id": "11686fa0",
17 | "cell_type": "markdown",
18 | "source": "## 6.2 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u30c7\u30fc\u30bf",
19 | "metadata": {}
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": null,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "import statsmodels.api as sm\n",
28 | "iris = sm.datasets.get_rdataset('iris', 'datasets').data\n",
29 | "iris.head()\n",
30 | "# \u4ee5\u4e0b\u7701\u7565"
31 | ],
32 | "id": "8fc0d772-605e-46ee-b679-2603d838c891"
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "import seaborn as sns\n",
41 | "iris = sns.load_dataset('iris')\n",
42 | "iris.head()\n",
43 | "# \u4ee5\u4e0b\u7701\u7565"
44 | ],
45 | "id": "c506c249-f4ca-4057-af97-58037c02a6ae"
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": null,
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "import pandas as pd\n",
54 | "from sklearn.datasets import load_iris\n",
55 | "tmp = load_iris()\n",
56 | "iris = pd.DataFrame(tmp.data, columns=tmp.feature_names)\n",
57 | "iris['target'] = tmp.target_names[tmp.target]\n",
58 | "iris.head()\n",
59 | "# \u4ee5\u4e0b\u7701\u7565"
60 | ],
61 | "id": "94e44eb0-09ae-4573-8eb8-7d76662ca5ea"
62 | },
63 | {
64 | "id": "9edbd001",
65 | "cell_type": "markdown",
66 | "source": "## 6.3 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u624b\u6cd5",
67 | "metadata": {}
68 | }
69 | ],
70 | "nbformat": 4,
71 | "nbformat_minor": 5,
72 | "metadata": {
73 | "kernelspec": {
74 | "name": "python3",
75 | "display_name": "Python 3"
76 | }
77 | }
78 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [講談社サイエンティフィク](https://www.kspub.co.jp/) / [実践Data Scienceシリーズ](https://www.kspub.co.jp/book/series/S069.html) / [ゼロからはじめるデータサイエンス入門](https://www.kspub.co.jp/book/detail/5132326.html)
2 |
3 | # ゼロからはじめるデータサイエンス入門(講談社, 2021)サポートサイト
4 |
5 | - [書店へのリンク集(版元ドットコム)](https://www.hanmoto.com/bd/isbn/9784065132326)
6 | - [国会図書館](https://ndlsearch.ndl.go.jp/books/R100000002-I031834151)
7 |
8 |
9 |
10 | 著者:**辻真吾**([@tsjshg](https://twitter.com/tsjshg))・**矢吹太朗**([@yabuki](https://twitter.com/yabuki))
11 |
12 | RやPythonのコード(具体的なコンピュータプログラム)の読み書きを通じてデータサイエンスについて学ぶための一冊です.
13 | コードなしで学びたいという人には,別の書籍にあたることをお勧めします.
14 |
15 | 本書には,次の三つの特徴があります.
16 |
17 | 1. 第1部「データサイエンスの準備」で,準備に時間をかけています.
18 | 1. ほぼ全ての例をコードに基づいて説明しています.本書掲載のコードはサポートサイト([ここ](#コード))でも公開しています(使用方法は2.6節を参照).
19 | 1. 第2部「機械学習」では,ほぼ全ての課題をRとPythonで解決し,同じ結果を得ることを試みています.
20 |
21 | ## [更新情報・正誤表](update.md)
22 |
23 | ## 目次
24 |
25 | - はじめに
26 | - 第1部
27 | - 第1章 コンピュータとネットワーク
28 | - 第2章 データサイエンスのための環境
29 | - 第3章 RとPython
30 | - 第4章 統計入門
31 | - 第5章 前処理
32 | - 第2部
33 | - 第6章 機械学習の目的・データ・手法
34 | - 第7章 回帰1(単回帰)
35 | - 第8章 回帰2(重回帰)
36 | - 第9章 分類1(多値分類)
37 | - 第10章 分類2(2値分類)
38 | - 第11章 深層学習とAutoML
39 | - 第12章 時系列予測
40 | - 第13章 教師なし学習
41 | - 付録A 環境構築
42 | - おわりに
43 | - 参考文献
44 | - 索引
45 |
46 | ## コード
47 |
48 | 言語|システム|コード|実行結果
49 | --|--|--|--
50 | R|Google Colab|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/r.ipynb)|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/r-results.ipynb)
51 | R|Jupyter|[r.ipynb](code/r.ipynb)|[r-results.ipynb](code/r-results.ipynb)
52 | R|RStudio|[r.Rmd](code/r.Rmd)|[r.html](https://taroyabuki.github.io/fromzero/r.html)
53 | Python|Google Colab|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/python.ipynb)|[](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/python-results.ipynb)
54 | Python|Jupyter|[python.ipynb](code/python.ipynb)|[python-results.ipynb](code/python-results.ipynb)
55 | Python|RStudio|[python.Rmd](code/python.Rmd)|[python.html](https://taroyabuki.github.io/fromzero/python.html)
56 |
57 | コードの使い方は,2.6節を参照してください[^1][^2].
58 |
59 | [^1]: [Amazon SageMaker Studio Lab](https://github.com/taroyabuki/fromzero/tree/main/addendum/sagemaker)での動作も確認済みです.
60 |
61 | [^2]: Apple Chipでは,JupyterとRStudio上では第11章のコードが動作しません.第11章のコードを試す場合は,Google Colabを利用してください.
62 |
63 | ## Docker
64 |
65 | 環境|言語|説明
66 | --|--|--
67 | Jupyter Notebook|R, Python|[Jupyter Notebook](docker/jupyter)
68 | RStudio|R|[RStudio](docker/rstudio)
69 |
70 | Dockerの使い方は,2.3節を参照してください.
71 |
72 | ## [画像とそのソースコード](figures)
73 |
74 | ## ライセンス
75 |
76 | The contents of https://github.com/taroyabuki/fromzero by Shingo Tsuji and Taro Yabuki is licensed under the [Apache License, Version 2.0](LICENSE).
77 |
--------------------------------------------------------------------------------
/addendum/sagemaker/README.md:
--------------------------------------------------------------------------------
1 | # Amazon SageMaker Studio Lab
2 |
3 | 無料の[Amazon SageMaker Studio Lab](https://studiolab.sagemaker.aws/)(以下,Studio Lab)で本書のコードを動かすための環境を作ります.Studio Labの概要は,[Amazon SageMaker Studio Lab入門](https://atmarkit.itmedia.co.jp/ait/subtop/features/di/sagemakerstudiolab_index.html)を参照してください.
4 |
5 | TerminalでGitHubリポジトリをクローンします.
6 |
7 | ```bash
8 | git clone https://github.com/taroyabuki/fromzero.git
9 | ```
10 |
11 | ## 仮想環境の構築
12 |
13 | ```bash
14 | # Rの場合
15 | conda env create --file fromzero/addendum/sagemaker/sage-r.yml
16 |
17 | # Pythonの場合
18 | conda env create --file fromzero/addendum/sagemaker/sage-python.yml
19 | ```
20 |
21 | ## Jupyter Notebookの利用
22 |
23 | 画面左のファイルブラウザーがあります.そこから,次のノートブックを開いてください.
24 |
25 | 言語|カーネル|全体のノートブック|各章のノートブック
26 | --|--|--|--
27 | R|sage-r:R|fromzero/code/r.ipynb|fromzero/code/R-notebook
28 | Python|sage-python:Python|fromzero/code/python.ipynb|fromzero/code/Python-notebook
29 |
30 | ノートブックのファイル(.ipynb)をダブルクリックするとカーネル選択のダイアログが出るので,Rの場合はsage-r:R,Pythonの場合はsage-python:Pythonを選択してください.
31 |
32 | 補足:Jupyter Notebook(Python)の出力を本書と同じにするためには,最初に次のコードを実行してください.54頁の脚註24のようにしてもかまいません.
33 |
34 | ```python
35 | from IPython.core.interactiveshell import InteractiveShell
36 | InteractiveShell.ast_node_interactivity = "all"
37 | ```
38 |
39 | ## 仮想環境の削除
40 |
41 | ```bash
42 | # Rの場合
43 | conda remove -n sage-r --all -y
44 |
45 | # Pythonの場合
46 | conda remove -n sage-python --all -y
47 | ```
48 |
49 | すべてを削除してやり直す方法は,[Amazon SageMaker Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/studio-lab-use-manage.html#:~:text=Start%20runtime.-,Reset%20environment,-To%20remove%20all)に掲載されています.
50 |
51 | ## 補足
52 |
53 | 環境構築に使った.ymlは次のように作成しました(このコードを実行する必要はありません).
54 |
55 | ```bash
56 | # Rの場合
57 | conda create -y -n sage-r python=3.8.8
58 | conda activate sage-r
59 |
60 | conda install -y -c conda-forge \
61 | r-caret \
62 | r-doparallel \
63 | r-exactci \
64 | r-fable \
65 | r-factoextra \
66 | r-feasts \
67 | r-furrr \
68 | r-ggfortify \
69 | r-ggmosaic \
70 | r-glmnetutils \
71 | r-h2o==3.34.0.3 \
72 | r-igraph \
73 | r-irkernel \
74 | r-keras \
75 | r-neuralnet \
76 | r-pastecs \
77 | r-prophet \
78 | r-prroc \
79 | r-psych \
80 | r-randomforest \
81 | r-remotes \
82 | r-rpart.plot \
83 | r-tidyverse \
84 | r-urca \
85 | r-vcd \
86 | r-xgboost==1.4.1
87 |
88 | conda install -y -c bioconda r-ggbiplot
89 |
90 | Rscript -e 'keras::install_keras()'
91 |
92 | conda env export -n sage-r > sage-r.yml
93 | ```
94 |
95 | ```bash
96 | # Pythonの場合
97 | conda create -y -n sage-python python=3.8.8
98 | conda activate sage-python
99 |
100 | conda install -y \
101 | fbprophet \
102 | ipykernel \
103 | keras \
104 | lxml \
105 | matplotlib \
106 | pandarallel \
107 | pmdarima \
108 | python-graphviz \
109 | seaborn \
110 | scikit-learn \
111 | scipy==1.6.3 \
112 | statsmodels \
113 | tensorflow-gpu \
114 | xgboost==1.5.1
115 |
116 | conda install -y -c anaconda h2o h2o-py
117 |
118 | pip install pca
119 |
120 | conda env export -n sage-python > sage-python.yml
121 | ```
122 |
--------------------------------------------------------------------------------
/update.md:
--------------------------------------------------------------------------------
1 | # 更新情報・正誤表
2 |
3 | 公開しているコードでは,以下の内容を反映しています.
4 |
5 | ## 更新情報
6 |
7 | 場所|説明
8 | --|--
9 | p. 6|Windows 11には,脚註4で紹介しているWindows Terminalが搭載されています.
10 | p. 20|[Amazon SageMaker Studio Lab](addendum/sagemaker)での動作も確認済みです.表2.1のクラウド・ノートブックに相当します.
11 | p. 22 脚註3|Google Colabでノートブックを新規作成した後で,ランタイム→ランタイムのタイプを変更で,Rを選択できるようになりました.
12 | p. 77|**(バージョン依存)** 3.4.2.1のPythonのコードの`my_df2 = my_df.append(tmp)`を`my_df2 = pd.concat([my_df, tmp])`としなければならないことがあります.
13 | p. 112 脚註6|**(バージョン依存)** 対象を数値の列に限定するオプション`numeric_only=True`が必要な場合があります.
14 | p. 113|**(バージョン依存)** Pythonのコードを,`my_df.var(numeric_only=True)`や`my_df.apply('var', numeric_only=True)`としなければならないことがあります.
15 | p. 115|**(バージョン依存)** Pythonのコードを,`my_df.groupby('gender').mean(numeric_only=True)`あるいは`my_df.groupby('gender').agg('mean', numeric_only=True)`あるいは`my_df.drop(['name'], axis=1).groupby('gender').agg(np.mean)`としなければならないことがあります.
16 | p. 151, 152|GitHub上でのCSVファイルの表示方法が変更されたので,https://github.com/taroyabuki/fromzero/blob/master/data/exam.csv の代わりにhttps://taroyabuki.github.io/fromzero/exam.html を使ってください.
17 | p. 160, 161|**(バージョン依存)** Pythonのコードの`get_feature_names()`を`get_feature_names_out()`としなければならないことがあります.
18 | p. 184|[予測値の期待値の信頼区間](addendum/07.03.02/)
19 | p. 194|[「7.4.3 当てはまりの良さの指標の問題点」についての補足](addendum/07.04.03.ipynb)
20 | p. 271, 275|XGBoostで`ValueError: Invalid classes inferred from unique values of y. Expected: [0 1 2], got ['setosa' 'versicolor' 'virginica']`というエラーが出る場合は,`LabelEncoder`を使ってラベルを数値に変換してください.
21 | p. 271, 275|9.4.2, 9.5.3項のPythonのコードで警告がたくさん出る場合は,`warnings.simplefilter`の引数の「`, UserWarning`」を削除してみてください.
22 | p. 277|9.6.2項のPythonのコードで警告がたくさん出る場合は,`MLPClassifier()`を`MLPClassifier(max_iter=1000)`に変更してみてください.
23 | p. 292, 298|**(バージョン依存)** Pythonのコードの`get_feature_names()`を`get_feature_names_out()`としなければならないことがあります.
24 | p. 297|**(バージョン依存)** Pythonのコードの`LogisticRegression(penalty='none')`を`LogisticRegression(penalty=None)`としなければならないことがあります.
25 | 第11章|Google Colabでは,Kerasの代わりにKeras3をインストールして,`library(keras3)`で読み込んでください(公開コードは対応済み).
26 | p. 309|**(バージョン依存)** Rでエラーが出る場合は,`list`を`rbind`に変更してください.
27 | p. 309|**(バージョン依存)** Pythonでは`y, y_1, y_2 = np.array(y), np.array(y_1), np.array(y_2)`として,リストをアレイに変換しなければならないことがあります.
28 | p. 310, 329|Rのコード`左辺 %<-% 右辺`が正しく動作しない場合は,事前に`library(zeallot)`を実行してください.
29 | p. 342|**(バージョン依存)** Pythonのコードの`from fbprophet import Prophet`を`from prophet import Prophet`としなければならないことがあります.
30 |
31 | ## 正誤表
32 |
33 | 次の誤りは第6刷で修正しました.
34 |
35 | 場所|誤|正
36 | --|--|--
37 | p. 138 本文1行目|確率(約0.22)|確率(約0.022)
38 |
39 | 次の誤りは第5刷で修正しました.
40 |
41 | 場所|誤|正
42 | --|--|--
43 | p. 258 本文3行目|グラフの中で|連結グラフ(任意の2点を線をつないで結べるグラフ)の中で
44 | p. 351 Pythonのコード|`vals, vecs = np.linalg.eig(S) # 固有値と固有ベクトル`|`vals, vecs = np.linalg.eig(S) # 固有値と固有ベクトル`
`idx = np.argsort(-vals) # 固有値の大きい順の番号`
`vals, vecs = vals[idx], vecs[:, idx] # 固有値の大きい順での並べ替え`
45 |
46 | 次の誤りは第4刷で修正しました.
47 |
48 | 場所|誤|正
49 | --|--|--
50 | p. 56 最初のコード(R)|`0.3333333`|`3.333333`
51 | p. 56 最初のコード(Python)|`0.3333333333333333`|`3.3333333333333335`
52 | p. 56 脚註1|0.3333333|3.333333
53 | p. 119 脚註9|[4.3, 4.7, 5.1, 5.5, 5.9, 6.300000000000001, 6.7, 7.1000000000000005, 7.5, 7.9]です.小さな誤差が,観測値6.3や7.1が属する階級に影響し,このままではヒストグラムがRと同じになりません.同じにするために,ここでは,`round`で誤差を消しています.|[4.3, 4.7, 5.1, 5.5, 5.9, 6.3, 6.7, 7.1, 7.5, 7.9]から少しずれます.Rも同様なのですが,Rではそのずれを丸めて消してから数を数えます.ここでは,Pythonでもそうなるように,`round`で数値を丸めています.
54 | p. 184 脚註4|回帰直線|予測値の期待値
55 | p. 194 Pythonのコード(2箇所)|`PolynomialFeatures(d)`|`PolynomialFeatures(d, include_bias=False)`
56 | p. 233 旁註|`sc`や`lm`|`sc`や`lr`
57 | p. 233 旁註|`my_model.named_steps.lm`|`my_pipeline.named_steps.lr`
58 | p. 240 旁註|`sfs`と`lm`|`sfs`と`lr`
59 | p. 272 下から2行目|Sepal.With|Sepal.Width
60 | p. 341 脚註5|`autoplot(level = c(80, 90))`|`autoplot(level = c(80, 95))`
61 | p. 349 本文上から3行目|描かれいます|描かれています
62 |
--------------------------------------------------------------------------------
/code/R-notebook/r-12.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "id": "bc5770da",
5 | "cell_type": "markdown",
6 | "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)",
7 | "metadata": {}
8 | },
9 | {
10 | "cell_type": "code",
11 | "execution_count": null,
12 | "metadata": {},
13 | "outputs": [],
14 | "source": [
15 | "# Google Colaboratory\u306e\u74b0\u5883\u8a2d\u5b9a\n",
16 | "if (Sys.getenv(\"COLAB_JUPYTER_IP\") != \"\") {\n",
17 | " options(Ncpus = parallel::detectCores())\n",
18 | " installed_packages <- rownames(installed.packages())\n",
19 | " packages_to_install <- c(\"caret\", \"fable\", \"feasts\", \"prophet\", \"tsibble\", \"urca\")\n",
20 | " install.packages(setdiff(packages_to_install, installed_packages))\n",
21 | " install.packages(c(\"ggplot2\"))\n",
22 | "}"
23 | ],
24 | "id": "464ec67c-16a7-4275-83d5-52bb7831ad0d"
25 | },
26 | {
27 | "id": "ce518daf",
28 | "cell_type": "markdown",
29 | "source": "## 12.1 \u65e5\u6642\u3068\u65e5\u6642\u306e\u5217",
30 | "metadata": {}
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": null,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "as.POSIXct(\"2021-01-01\")"
39 | ],
40 | "id": "1924ebe8-4882-4ff3-b33f-d4153e8015cd"
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "library(tsibble)\n",
49 | "\n",
50 | "seq(from = 2021, to = 2023, by = 1)\n",
51 | "\n",
52 | "seq(from = yearmonth(\"202101\"), to = yearmonth(\"202103\"), by = 2)\n",
53 | "\n",
54 | "seq(from = as.POSIXct(\"2021-01-01\"), to = as.POSIXct(\"2021-01-03\"), by = \"1 day\")\n",
55 | "\n",
56 | "seq(from = as.POSIXct(\"2021-01-01 00:00:00\"),\n",
57 | " to = as.POSIXct(\"2021-01-01 03:00:00\"), by = \"2 hour\")"
58 | ],
59 | "id": "0f224c5a-78c5-45e8-abf2-7904a5c2319f"
60 | },
61 | {
62 | "id": "29cbc74a",
63 | "cell_type": "markdown",
64 | "source": "## 12.2 \u6642\u7cfb\u5217\u30c7\u30fc\u30bf\u306e\u4e88\u6e2c",
65 | "metadata": {}
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {},
71 | "outputs": [],
72 | "source": [
73 | "my_data <- as.vector(AirPassengers)"
74 | ],
75 | "id": "2cb040ee-9b23-4d7e-b0c7-6d39baf86c47"
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": null,
80 | "metadata": {},
81 | "outputs": [],
82 | "source": [
83 | "n <- length(my_data) # \u30c7\u30fc\u30bf\u6570\uff08144\uff09\n",
84 | "k <- 108 # \u8a13\u7df4\u30c7\u30fc\u30bf\u6570"
85 | ],
86 | "id": "551c2b5a-8f7b-474a-984d-d785fab1a107"
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "library(tidyverse)\n",
95 | "library(tsibble)\n",
96 | "\n",
97 | "my_ds <- seq(\n",
98 | " from = yearmonth(\"1949/01\"),\n",
99 | " to = yearmonth(\"1960/12\"),\n",
100 | " by = 1)\n",
101 | "my_label <- rep(\n",
102 | " c(\"train\", \"test\"),\n",
103 | " c(k, n - k))\n",
104 | "my_df <- tsibble(\n",
105 | " ds = my_ds,\n",
106 | " x = 0:(n - 1),\n",
107 | " y = my_data,\n",
108 | " label = my_label,\n",
109 | " index = ds) # \u65e5\u6642\u306e\u5217\u306e\u6307\u5b9a\n",
110 | "\n",
111 | "head(my_df)"
112 | ],
113 | "id": "2c3c9a07-fce3-4f34-8411-3f99100d4580"
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": null,
118 | "metadata": {},
119 | "outputs": [],
120 | "source": [
121 | "my_train <- my_df[ 1:k , ]\n",
122 | "my_test <- my_df[-(1:k), ]\n",
123 | "y <- my_test$y"
124 | ],
125 | "id": "a830e70a-136d-40f4-b30a-36635556d054"
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": null,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "my_plot <- my_df %>%\n",
134 | " ggplot(aes(x = ds,\n",
135 | " y = y,\n",
136 | " color = label)) +\n",
137 | " geom_line()\n",
138 | "my_plot"
139 | ],
140 | "id": "84617760-c969-4691-87d4-35e9b3c37d2d"
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": null,
145 | "metadata": {},
146 | "outputs": [],
147 | "source": [
148 | "library(caret)\n",
149 | "my_lm_model <- train(form = y ~ x, data = my_train, method = \"lm\")\n",
150 | "y_ <- my_lm_model %>% predict(my_test)\n",
151 | "caret::RMSE(y, y_) # RMSE\uff08\u30c6\u30b9\u30c8\uff09"
152 | ],
153 | "id": "1c2cd6a3-13a6-48f4-9615-0602d3e60958"
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": null,
158 | "metadata": {},
159 | "outputs": [],
160 | "source": [
161 | "y_ <- my_lm_model %>% predict(my_df)\n",
162 | "tmp <- my_df %>%\n",
163 | " mutate(y = y_, label = \"model\")\n",
164 | "my_plot + geom_line(data = tmp)"
165 | ],
166 | "id": "bf914a75-a3dc-40cb-8d0f-8d5dfd2ab11c"
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {},
172 | "outputs": [],
173 | "source": [
174 | "library(fable)\n",
175 | "my_arima_model <- my_train %>% model(ARIMA(y))\n",
176 | "my_arima_model"
177 | ],
178 | "id": "b7c8e8cb-940b-4eee-8fc8-6bb16e043927"
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": null,
183 | "metadata": {},
184 | "outputs": [],
185 | "source": [
186 | "tmp <- my_arima_model %>% forecast(h = \"3 years\")\n",
187 | "head(tmp)"
188 | ],
189 | "id": "6fc63b0e-666e-4a05-90ac-88e5210500ff"
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": null,
194 | "metadata": {},
195 | "outputs": [],
196 | "source": [
197 | "y_ <- tmp$.mean\n",
198 | "caret::RMSE(y_, y)"
199 | ],
200 | "id": "19166890-94bf-4442-b5d8-e45dc405d0e4"
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": null,
205 | "metadata": {},
206 | "outputs": [],
207 | "source": [
208 | "# \u4e88\u6e2c\u7d50\u679c\u306e\u307f\u3067\u3088\u3044\u5834\u5408\n",
209 | "#tmp %>% autoplot\n",
210 | "\n",
211 | "tmp %>% autoplot +\n",
212 | " geom_line(data = my_df,\n",
213 | " aes(x = ds,\n",
214 | " y = y,\n",
215 | " color = label))"
216 | ],
217 | "id": "6f82290c-51f8-4523-a225-46542fe46055"
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": null,
222 | "metadata": {},
223 | "outputs": [],
224 | "source": [
225 | "library(prophet)\n",
226 | "my_prophet_model <- my_train %>%\n",
227 | " prophet(seasonality.mode = \"multiplicative\")"
228 | ],
229 | "id": "285cd4a1-cc34-4a04-8671-12ab80c52ca0"
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": null,
234 | "metadata": {},
235 | "outputs": [],
236 | "source": [
237 | "tmp <- my_prophet_model %>% predict(my_test)\n",
238 | "head(tmp[, c(\"ds\", \"yhat\", \"yhat_lower\", \"yhat_upper\")])"
239 | ],
240 | "id": "a24e324e-882d-4d09-91c8-37632736c396"
241 | },
242 | {
243 | "cell_type": "code",
244 | "execution_count": null,
245 | "metadata": {},
246 | "outputs": [],
247 | "source": [
248 | "y_ <- tmp$yhat\n",
249 | "caret::RMSE(y_, y)"
250 | ],
251 | "id": "c532ee13-3b8d-407f-89a1-7c0668486f15"
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": null,
256 | "metadata": {},
257 | "outputs": [],
258 | "source": [
259 | "# my_prophet_model %>% plot(tmp) # \u4e88\u6e2c\u7d50\u679c\u306e\u307f\u3067\u3088\u3044\u5834\u5408\n",
260 | "\n",
261 | "my_prophet_model %>% plot(tmp) +\n",
262 | " geom_line(data = my_train, aes(x = as.POSIXct(ds))) +\n",
263 | " geom_line(data = my_test, aes(x = as.POSIXct(ds)), color = \"red\")"
264 | ],
265 | "id": "b4b145ed-eef4-4399-bf35-7948c988f688"
266 | }
267 | ],
268 | "nbformat": 4,
269 | "nbformat_minor": 5,
270 | "metadata": {
271 | "kernelspec": {
272 | "name": "ir",
273 | "display_name": "R"
274 | }
275 | }
276 | }
--------------------------------------------------------------------------------
/code/Python-notebook/python-12.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "id": "f86449f9",
5 | "cell_type": "markdown",
6 | "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)",
7 | "metadata": {}
8 | },
9 | {
10 | "cell_type": "code",
11 | "execution_count": null,
12 | "metadata": {},
13 | "outputs": [],
14 | "source": [
15 | "# Google Colaboratory\u306e\u74b0\u5883\u8a2d\u5b9a\n",
16 | "import os\n",
17 | "if 'COLAB_GPU' in os.environ:\n",
18 | " !python -m pip install pmdarima | tail -n 1"
19 | ],
20 | "id": "01c13eaf-b572-4570-979a-15591bc77674"
21 | },
22 | {
23 | "id": "efe8c46d",
24 | "cell_type": "markdown",
25 | "source": "## 12.1 \u65e5\u6642\u3068\u65e5\u6642\u306e\u5217",
26 | "metadata": {}
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": null,
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "import pandas as pd\n",
35 | "pd.to_datetime('2020-01-01')"
36 | ],
37 | "id": "ad2111fa-8b0d-4ccb-a872-de1004bb5ea2"
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": null,
42 | "metadata": {},
43 | "outputs": [],
44 | "source": [
45 | "pd.date_range(start='2021-01-01', end='2023-01-01', freq='1A')\n",
46 | "\n",
47 | "pd.date_range(start='2021-01-01', end='2023-01-01', freq='1AS')\n",
48 | "\n",
49 | "pd.date_range(start='2021-01-01', end='2021-03-01', freq='2M')\n",
50 | "\n",
51 | "pd.date_range(start='2021-01-01', end='2021-03-01', freq='2MS')\n",
52 | "\n",
53 | "pd.date_range(start='2021-01-01', end='2021-01-03', freq='1D')\n",
54 | "\n",
55 | "pd.date_range(start='2021-01-01 00:00:00', end='2021-01-01 03:00:00', freq='2H')"
56 | ],
57 | "id": "9a125fa4-d6d9-456b-a27c-39aed4f96653"
58 | },
59 | {
60 | "id": "ee17dc29",
61 | "cell_type": "markdown",
62 | "source": "## 12.2 \u6642\u7cfb\u5217\u30c7\u30fc\u30bf\u306e\u4e88\u6e2c",
63 | "metadata": {}
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "import matplotlib.pyplot as plt\n",
72 | "import pandas as pd\n",
73 | "from pmdarima.datasets import airpassengers\n",
74 | "from sklearn.metrics import mean_squared_error\n",
75 | "\n",
76 | "my_data = airpassengers.load_airpassengers()"
77 | ],
78 | "id": "8518eedc-0cf0-473b-9588-5ec3cc4c6dc3"
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": [
86 | "n = len(my_data) # \u30c7\u30fc\u30bf\u6570\uff08144\uff09\n",
87 | "k = 108 # \u8a13\u7df4\u30c7\u30fc\u30bf\u6570"
88 | ],
89 | "id": "b055dc4e-65b6-4cc7-852e-50cc6a87f318"
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": null,
94 | "metadata": {},
95 | "outputs": [],
96 | "source": [
97 | "my_ds = pd.date_range(\n",
98 | " start='1949/01/01',\n",
99 | " end='1960/12/01',\n",
100 | " freq='MS')\n",
101 | "my_df = pd.DataFrame({\n",
102 | " 'ds': my_ds,\n",
103 | " 'x': range(n),\n",
104 | " 'y': my_data},\n",
105 | " index=my_ds)\n",
106 | "my_df.head()"
107 | ],
108 | "id": "091ac6e3-3429-4730-b480-0b17fbd5173f"
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": null,
113 | "metadata": {},
114 | "outputs": [],
115 | "source": [
116 | "my_train = my_df[ :k]\n",
117 | "my_test = my_df[-(n - k): ]\n",
118 | "y = my_test.y"
119 | ],
120 | "id": "6dd363fe-7392-4bb6-86aa-5f92e2b685e5"
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "plt.plot(my_train.y, label='train')\n",
129 | "plt.plot(my_test.y, label='test')\n",
130 | "plt.legend()"
131 | ],
132 | "id": "9d0cc675-c03f-4c58-893d-ddd04cbfd8c6"
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {},
138 | "outputs": [],
139 | "source": [
140 | "from sklearn.linear_model import LinearRegression\n",
141 | "\n",
142 | "my_lm_model = LinearRegression()\n",
143 | "my_lm_model.fit(my_train[['x']], my_train.y)\n",
144 | "\n",
145 | "X = my_test[['x']]\n",
146 | "y_ = my_lm_model.predict(X)\n",
147 | "mean_squared_error(y, y_)**0.5 # RMSE\uff08\u30c6\u30b9\u30c8\uff09"
148 | ],
149 | "id": "8f54ed6b-04c1-4ec4-adee-d9845c60a5c9"
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": null,
154 | "metadata": {},
155 | "outputs": [],
156 | "source": [
157 | "y_ = my_lm_model.predict(my_df[['x']])\n",
158 | "tmp = pd.DataFrame(y_,\n",
159 | " index=my_df.index)\n",
160 | "plt.plot(my_train.y, label='train')\n",
161 | "plt.plot(my_test.y, label='test')\n",
162 | "plt.plot(tmp, label='model')\n",
163 | "plt.legend()"
164 | ],
165 | "id": "49eed027-c6d5-46dc-9d2a-f6ee12eb49ee"
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": null,
170 | "metadata": {},
171 | "outputs": [],
172 | "source": [
173 | "import pmdarima as pm\n",
174 | "my_arima_model = pm.auto_arima(my_train.y, m=12, trace=True)"
175 | ],
176 | "id": "3869df3e-9fb3-491d-926f-991b43092303"
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "y_, my_ci = my_arima_model.predict(len(my_test), # \u671f\u9593\u306f\u30c6\u30b9\u30c8\u30c7\u30fc\u30bf\u3068\u540c\u3058\uff0e\n",
185 | " alpha=0.05, # \u6709\u610f\u6c34\u6e96\uff08\u30c7\u30d5\u30a9\u30eb\u30c8\uff09\n",
186 | " return_conf_int=True) # \u4fe1\u983c\u533a\u9593\u3092\u6c42\u3081\u308b\uff0e\n",
187 | "tmp = pd.DataFrame({'y': y_,\n",
188 | " 'Lo': my_ci[:, 0],\n",
189 | " 'Hi': my_ci[:, 1]},\n",
190 | " index=my_test.index)\n",
191 | "tmp.head()"
192 | ],
193 | "id": "f1c97130-5ab0-4cdc-a8fb-8f9288bdbdba"
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": null,
198 | "metadata": {},
199 | "outputs": [],
200 | "source": [
201 | "mean_squared_error(y, y_)**0.5"
202 | ],
203 | "id": "0e230755-e4fd-4098-ba68-aa9eb51e4021"
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": null,
208 | "metadata": {},
209 | "outputs": [],
210 | "source": [
211 | "plt.plot(my_train.y, label='train')\n",
212 | "plt.plot(my_test.y, label='test')\n",
213 | "plt.plot(tmp.y, label='model')\n",
214 | "plt.fill_between(tmp.index,\n",
215 | " tmp.Lo,\n",
216 | " tmp.Hi,\n",
217 | " alpha=0.25) # \u4e0d\u900f\u660e\u5ea6\n",
218 | "plt.legend(loc='upper left')"
219 | ],
220 | "id": "1904f642-1352-48e4-b998-59505e434131"
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": null,
225 | "metadata": {},
226 | "outputs": [],
227 | "source": [
228 | "try: from fbprophet import Prophet\n",
229 | "except ImportError: from prophet import Prophet\n",
230 | "my_prophet_model = Prophet(seasonality_mode='multiplicative')\n",
231 | "my_prophet_model.fit(my_train)"
232 | ],
233 | "id": "b8cd2056-fd59-47e8-8023-f6e4a5d77e51"
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": null,
238 | "metadata": {},
239 | "outputs": [],
240 | "source": [
241 | "tmp = my_prophet_model.predict(my_test)\n",
242 | "tmp[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head()"
243 | ],
244 | "id": "4c06ee6c-4454-4c37-87ad-ceaf0f1f159c"
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": null,
249 | "metadata": {},
250 | "outputs": [],
251 | "source": [
252 | "y_ = tmp.yhat\n",
253 | "mean_squared_error(y, y_)**0.5"
254 | ],
255 | "id": "f89c7ebe-4ef5-48fd-8fe9-d3f7def4b513"
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": null,
260 | "metadata": {},
261 | "outputs": [],
262 | "source": [
263 | "# my_prophet_model.plot(tmp) # \u4e88\u6e2c\u7d50\u679c\u306e\u307f\u3067\u3088\u3044\u5834\u5408\n",
264 | "\n",
265 | "fig = my_prophet_model.plot(tmp)\n",
266 | "fig.axes[0].plot(my_train.ds, my_train.y)\n",
267 | "fig.axes[0].plot(my_test.ds, my_test.y, color='red')"
268 | ],
269 | "id": "ff77ca1c-2efb-4362-92ac-74c33dcd2b38"
270 | }
271 | ],
272 | "nbformat": 4,
273 | "nbformat_minor": 5,
274 | "metadata": {
275 | "kernelspec": {
276 | "name": "python3",
277 | "display_name": "Python 3"
278 | }
279 | }
280 | }
--------------------------------------------------------------------------------
/addendum/sagemaker/sage-python.yml:
--------------------------------------------------------------------------------
1 | name: sage-python
2 | channels:
3 | - anaconda
4 | - conda-forge
5 | dependencies:
6 | - _libgcc_mutex=0.1=conda_forge
7 | - _openmp_mutex=4.5=2_gnu
8 | - _py-xgboost-mutex=2.0=cpu_0
9 | - abseil-cpp=20210324.2=h9c3ff4c_0
10 | - absl-py=0.15.0=pyhd8ed1ab_0
11 | - aiohttp=3.8.1=py38h0a891b7_1
12 | - aiosignal=1.2.0=pyhd8ed1ab_0
13 | - alsa-lib=1.2.3.2=h166bdaf_0
14 | - arviz=0.12.0=pyhd8ed1ab_0
15 | - asttokens=2.0.5=pyhd8ed1ab_0
16 | - astunparse=1.6.3=pyhd8ed1ab_0
17 | - async-timeout=4.0.2=pyhd8ed1ab_0
18 | - atk-1.0=2.36.0=h3371d22_4
19 | - attrs=21.4.0=pyhd8ed1ab_0
20 | - backcall=0.2.0=pyh9f0ad1d_0
21 | - backports=1.0=py_2
22 | - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
23 | - binutils_impl_linux-64=2.36.1=h193b22a_2
24 | - binutils_linux-64=2.36=hf3e587d_33
25 | - blinker=1.4=py_1
26 | - brotli=1.0.9=h166bdaf_7
27 | - brotli-bin=1.0.9=h166bdaf_7
28 | - bzip2=1.0.8=h7f98852_4
29 | - c-ares=1.18.1=h7f98852_0
30 | - ca-certificates=2022.4.26=h06a4308_0
31 | - cached-property=1.5.2=hd8ed1ab_1
32 | - cached_property=1.5.2=pyha770c72_1
33 | - cachetools=5.0.0=pyhd8ed1ab_0
34 | - cairo=1.16.0=h6cf1ce9_1008
35 | - certifi=2021.10.8=py38h06a4308_2
36 | - cftime=1.6.0=py38h71d37f0_1
37 | - charset-normalizer=2.0.12=pyhd8ed1ab_0
38 | - click=8.1.3=py38h578d9bd_0
39 | - colorama=0.4.4=pyhd3eb1b0_0
40 | - convertdate=2.4.0=pyhd8ed1ab_0
41 | - cryptography=36.0.2=py38h2b5fc30_1
42 | - cudatoolkit=11.6.0=habf752d_10
43 | - cudnn=8.2.1.32=h86fa8c9_0
44 | - curl=7.83.0=h7bff187_0
45 | - cycler=0.11.0=pyhd8ed1ab_0
46 | - cython=0.29.28=py38hfa26641_2
47 | - dbus=1.13.18=hb2f20db_0
48 | - debugpy=1.6.0=py38hfa26641_0
49 | - decorator=5.1.1=pyhd8ed1ab_0
50 | - dill=0.3.4=pyhd8ed1ab_0
51 | - entrypoints=0.4=pyhd8ed1ab_0
52 | - ephem=4.1.3=py38h0a891b7_4
53 | - executing=0.8.3=pyhd8ed1ab_0
54 | - expat=2.4.8=h27087fc_0
55 | - fbprophet=0.7.1=py38h950e882_0
56 | - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
57 | - font-ttf-inconsolata=3.000=h77eed37_0
58 | - font-ttf-source-code-pro=2.038=h77eed37_0
59 | - font-ttf-ubuntu=0.83=hab24e00_0
60 | - fontconfig=2.14.0=h8e229c2_0
61 | - fonts-conda-ecosystem=1=0
62 | - fonts-conda-forge=1=0
63 | - fonttools=4.33.3=py38h0a891b7_0
64 | - freetype=2.11.0=h70c0345_0
65 | - fribidi=1.0.10=h36c2ea0_0
66 | - frozenlist=1.3.0=py38h0a891b7_1
67 | - future=0.18.2=py38_1
68 | - gast=0.4.0=pyh9f0ad1d_0
69 | - gcc_impl_linux-64=7.5.0=habd7529_20
70 | - gcc_linux-64=7.5.0=h47867f9_33
71 | - gdk-pixbuf=2.42.6=h04a7f16_0
72 | - gettext=0.19.8.1=h0b5b191_1005
73 | - giflib=5.2.1=h36c2ea0_2
74 | - glib=2.69.1=h4ff587b_1
75 | - glib-tools=2.68.4=h9c3ff4c_0
76 | - google-auth=2.6.6=pyh6c4a22f_0
77 | - google-auth-oauthlib=0.4.6=pyhd8ed1ab_0
78 | - google-pasta=0.2.0=pyh8c360ce_0
79 | - graphite2=1.3.13=h58526e2_1001
80 | - graphviz=2.47.2=h85b4f2f_0
81 | - grpc-cpp=1.39.1=h850795e_1
82 | - grpcio=1.39.0=py38hdd6454d_0
83 | - gst-plugins-base=1.18.5=hf529b03_0
84 | - gstreamer=1.18.5=h76c114f_0
85 | - gtk2=2.24.33=h539f30e_1
86 | - gts=0.7.6=h64030ff_2
87 | - gxx_impl_linux-64=7.5.0=hd0bb8aa_20
88 | - gxx_linux-64=7.5.0=h555fc39_33
89 | - h2o=3.18.0.2=0
90 | - h2o-py=3.36.1.1=pyhd8ed1ab_0
91 | - h5py=3.1.0=nompi_py38hafa665b_100
92 | - harfbuzz=3.0.0=h83ec7ef_1
93 | - hdf4=4.2.15=h10796ff_3
94 | - hdf5=1.10.6=nompi_h6a2412b_1114
95 | - hijri-converter=2.2.3=pyhd8ed1ab_0
96 | - holidays=0.13=pyhd8ed1ab_0
97 | - icu=68.2=h9c3ff4c_0
98 | - idna=3.3=pyhd3eb1b0_0
99 | - importlib-metadata=4.11.3=py38h578d9bd_1
100 | - importlib_metadata=4.11.3=hd8ed1ab_1
101 | - ipykernel=6.13.0=py38h7f3c49e_0
102 | - ipython=8.3.0=py38h578d9bd_0
103 | - jbig=2.1=h7f98852_2003
104 | - jedi=0.18.1=py38h578d9bd_1
105 | - joblib=1.1.0=pyhd8ed1ab_0
106 | - jpeg=9e=h166bdaf_1
107 | - jupyter_client=7.3.0=pyhd8ed1ab_0
108 | - jupyter_core=4.9.2=py38h578d9bd_0
109 | - keras=2.6.0=pyhd8ed1ab_1
110 | - keras-preprocessing=1.1.2=pyhd8ed1ab_0
111 | - kernel-headers_linux-64=2.6.32=he073ed8_15
112 | - keyutils=1.6.1=h166bdaf_0
113 | - kiwisolver=1.4.2=py38h43d8883_1
114 | - korean_lunar_calendar=0.2.1=pyh9f0ad1d_0
115 | - krb5=1.19.3=h3790be6_0
116 | - lcms2=2.12=hddcbb42_0
117 | - ld_impl_linux-64=2.36.1=hea4e1c9_2
118 | - lerc=3.0=h9c3ff4c_0
119 | - libblas=3.9.0=14_linux64_openblas
120 | - libbrotlicommon=1.0.9=h166bdaf_7
121 | - libbrotlidec=1.0.9=h166bdaf_7
122 | - libbrotlienc=1.0.9=h166bdaf_7
123 | - libcblas=3.9.0=14_linux64_openblas
124 | - libclang=11.1.0=default_ha53f305_1
125 | - libcurl=7.83.0=h7bff187_0
126 | - libdeflate=1.10=h7f98852_0
127 | - libedit=3.1.20191231=he28a2e2_2
128 | - libev=4.33=h516909a_1
129 | - libevent=2.1.10=h9b69904_4
130 | - libffi=3.3=h58526e2_2
131 | - libgcc-devel_linux-64=7.5.0=hda03d7c_20
132 | - libgcc-ng=11.2.0=h1d223b6_16
133 | - libgd=2.3.3=h6ad9fb6_0
134 | - libgfortran-ng=11.2.0=h69a702a_16
135 | - libgfortran5=11.2.0=h5c6108e_16
136 | - libglib=2.68.4=h3e27bee_0
137 | - libgomp=11.2.0=h1d223b6_16
138 | - libiconv=1.16=h516909a_0
139 | - liblapack=3.9.0=14_linux64_openblas
140 | - libllvm11=11.1.0=hf817b99_3
141 | - libnetcdf=4.8.1=nompi_hcd642e3_100
142 | - libnghttp2=1.47.0=h727a467_0
143 | - libogg=1.3.4=h7f98852_1
144 | - libopenblas=0.3.20=pthreads_h78a6416_0
145 | - libopus=1.3.1=h7f98852_1
146 | - libpng=1.6.37=hbc83047_0
147 | - libpq=13.5=hd57d9b9_1
148 | - libprotobuf=3.16.0=h780b84a_0
149 | - librsvg=2.50.5=hc3c00ef_0
150 | - libsodium=1.0.18=h36c2ea0_1
151 | - libssh2=1.10.0=ha56f1ee_2
152 | - libstdcxx-devel_linux-64=7.5.0=hb016644_20
153 | - libstdcxx-ng=11.2.0=he4da1e4_16
154 | - libtiff=4.3.0=h542a066_3
155 | - libtool=2.4.6=h9c3ff4c_1008
156 | - libuuid=2.32.1=h7f98852_1000
157 | - libvorbis=1.3.7=h9c3ff4c_0
158 | - libwebp=1.2.2=h3452ae3_0
159 | - libwebp-base=1.2.2=h7f98852_1
160 | - libxcb=1.14=h7b6447c_0
161 | - libxgboost=1.5.1=cpu_h3d145d1_2
162 | - libxkbcommon=1.0.3=he3ba5ed_0
163 | - libxml2=2.9.10=h72842e0_4
164 | - libxslt=1.1.33=h15afd5d_2
165 | - libzip=1.8.0=h4de3113_1
166 | - libzlib=1.2.11=h166bdaf_1014
167 | - lunarcalendar=0.0.9=py_0
168 | - lxml=4.8.0=py38h0a891b7_3
169 | - lz4-c=1.9.3=h9c3ff4c_1
170 | - markdown=3.3.7=pyhd8ed1ab_0
171 | - matplotlib=3.5.2=py38h578d9bd_0
172 | - matplotlib-base=3.5.2=py38h826bfd8_0
173 | - matplotlib-inline=0.1.3=pyhd8ed1ab_0
174 | - multidict=6.0.2=py38h0a891b7_1
175 | - munkres=1.1.4=pyh9f0ad1d_0
176 | - mysql-common=8.0.29=haf5c9bc_0
177 | - mysql-libs=8.0.29=h28c427c_0
178 | - nccl=2.12.10.1=h0800d71_0
179 | - ncurses=6.3=h27087fc_1
180 | - nest-asyncio=1.5.5=pyhd8ed1ab_0
181 | - netcdf4=1.5.7=nompi_py38hcc16cfe_101
182 | - nspr=4.32=h9c3ff4c_1
183 | - nss=3.77=h2350873_0
184 | - oauthlib=3.2.0=pyhd8ed1ab_0
185 | - openjdk=11.0.13=h87a67e3_0
186 | - openjpeg=2.4.0=hb52868f_1
187 | - openssl=1.1.1o=h166bdaf_0
188 | - opt_einsum=3.3.0=pyhd8ed1ab_1
189 | - packaging=21.3=pyhd8ed1ab_0
190 | - pandarallel=1.6.1=pyhd8ed1ab_0
191 | - pandas=1.4.2=py38h47df419_1
192 | - pango=1.48.10=h54213e6_2
193 | - parso=0.8.3=pyhd8ed1ab_0
194 | - patsy=0.5.2=pyhd8ed1ab_0
195 | - pcre=8.45=h295c915_0
196 | - pexpect=4.8.0=pyh9f0ad1d_2
197 | - pickleshare=0.7.5=py_1003
198 | - pillow=9.1.0=py38h0ee0e06_2
199 | - pip=22.0.4=pyhd8ed1ab_0
200 | - pixman=0.40.0=h36c2ea0_0
201 | - pmdarima=1.8.2=py38h497a2fe_3
202 | - prompt-toolkit=3.0.29=pyha770c72_0
203 | - protobuf=3.16.0=py38h709712a_0
204 | - psutil=5.9.0=py38h0a891b7_1
205 | - pthread-stubs=0.4=h36c2ea0_1001
206 | - ptyprocess=0.7.0=pyhd3deb0d_0
207 | - pure_eval=0.2.2=pyhd8ed1ab_0
208 | - py-xgboost=1.5.1=cpu_py38h66f0ec1_2
209 | - pyasn1=0.4.8=py_0
210 | - pyasn1-modules=0.2.7=py_0
211 | - pycparser=2.21=pyhd3eb1b0_0
212 | - pygments=2.12.0=pyhd8ed1ab_0
213 | - pyjwt=2.3.0=pyhd8ed1ab_1
214 | - pymeeus=0.5.10=pyhd8ed1ab_0
215 | - pyopenssl=22.0.0=pyhd3eb1b0_0
216 | - pyparsing=3.0.8=pyhd8ed1ab_0
217 | - pyqt=5.12.3=py38h578d9bd_8
218 | - pyqt-impl=5.12.3=py38h0ffb2e6_8
219 | - pyqt5-sip=4.19.18=py38h709712a_8
220 | - pyqtchart=5.12=py38h7400c14_8
221 | - pyqtwebengine=5.12.1=py38h7400c14_8
222 | - pysocks=1.7.1=py38h06a4308_0
223 | - pystan=2.19.1.1=py38hc5bc63f_2
224 | - python=3.8.8=hffdb5ce_0_cpython
225 | - python-dateutil=2.8.2=pyhd8ed1ab_0
226 | - python-flatbuffers=1.12=pyhd8ed1ab_1
227 | - python-graphviz=0.20=pyhaef67bd_0
228 | - python_abi=3.8=2_cp38
229 | - pytz=2022.1=pyhd8ed1ab_0
230 | - pyu2f=0.1.5=pyhd8ed1ab_0
231 | - pyzmq=22.3.0=py38hfc09fa9_2
232 | - qt=5.12.9=hda022c4_4
233 | - re2=2021.09.01=h9c3ff4c_0
234 | - readline=8.1=h46c0cb4_0
235 | - requests=2.27.1=pyhd3eb1b0_0
236 | - requests-oauthlib=1.3.1=pyhd8ed1ab_0
237 | - rsa=4.8=pyhd8ed1ab_0
238 | - scikit-learn=1.0.2=py38h1561384_0
239 | - scipy=1.6.3=py38h7b17777_0
240 | - seaborn=0.11.2=hd8ed1ab_0
241 | - seaborn-base=0.11.2=pyhd8ed1ab_0
242 | - setuptools=49.6.0=py38h578d9bd_3
243 | - six=1.15.0=pyh9f0ad1d_0
244 | - snappy=1.1.9=hbd366e4_0
245 | - sqlite=3.38.4=h4ff8645_0
246 | - stack_data=0.2.0=pyhd8ed1ab_0
247 | - statsmodels=0.13.2=py38h6c62de6_0
248 | - sysroot_linux-64=2.12=he073ed8_15
249 | - tabulate=0.8.9=py38h06a4308_0
250 | - tensorboard=2.9.0=pyhd8ed1ab_0
251 | - tensorboard-data-server=0.6.0=py38h2b5fc30_2
252 | - tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0
253 | - tensorflow=2.6.0=cuda112py38hbe5352d_2
254 | - tensorflow-base=2.6.0=cuda112py38heae9c4c_2
255 | - tensorflow-estimator=2.6.0=cuda112py38hb2194ef_2
256 | - tensorflow-gpu=2.6.0=cuda112py38h0bbbad9_2
257 | - termcolor=1.1.0=py_2
258 | - threadpoolctl=3.1.0=pyh8a188c0_0
259 | - tk=8.6.12=h27826a3_0
260 | - tornado=6.1=py38h0a891b7_3
261 | - tqdm=4.64.0=pyhd8ed1ab_0
262 | - traitlets=5.1.1=pyhd8ed1ab_0
263 | - typing-extensions=3.7.4.3=0
264 | - typing_extensions=3.7.4.3=py_0
265 | - unicodedata2=14.0.0=py38h0a891b7_1
266 | - urllib3=1.26.9=py38h06a4308_0
267 | - wcwidth=0.2.5=pyh9f0ad1d_2
268 | - werkzeug=2.1.2=pyhd8ed1ab_1
269 | - wheel=0.37.1=pyhd8ed1ab_0
270 | - wrapt=1.12.1=py38h497a2fe_3
271 | - xarray=2022.3.0=pyhd8ed1ab_0
272 | - xgboost=1.5.1=cpu_py38h66f0ec1_2
273 | - xorg-kbproto=1.0.7=h7f98852_1002
274 | - xorg-libice=1.0.10=h7f98852_0
275 | - xorg-libsm=1.2.3=hd9c2040_1000
276 | - xorg-libx11=1.7.2=h7f98852_0
277 | - xorg-libxau=1.0.9=h7f98852_0
278 | - xorg-libxdmcp=1.1.3=h7f98852_0
279 | - xorg-libxext=1.3.4=h7f98852_1
280 | - xorg-libxrender=0.9.10=h7f98852_1003
281 | - xorg-renderproto=0.11.1=h7f98852_1002
282 | - xorg-xextproto=7.3.0=h7f98852_1002
283 | - xorg-xproto=7.0.31=h7f98852_1007
284 | - xz=5.2.5=h516909a_1
285 | - yarl=1.7.2=py38h0a891b7_2
286 | - zeromq=4.3.4=h9c3ff4c_1
287 | - zipp=3.8.0=pyhd8ed1ab_0
288 | - zlib=1.2.11=h166bdaf_1014
289 | - zstd=1.5.2=ha95c52a_0
290 | - pip:
291 | - brotlipy==0.7.0
292 | - cffi==1.14.6
293 | - colourmap==1.1.4
294 | - numpy==1.19.5
295 | - pca==1.8.0
296 | - scatterd==1.1.1
297 | - sklearn==0.0
298 | - wget==3.2
299 | prefix: /home/studio-lab-user/.conda/envs/sage-python
300 |
--------------------------------------------------------------------------------
/code/Python-notebook/python-05.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "id": "1438c612",
5 | "cell_type": "markdown",
6 | "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)\n\n\n",
7 | "metadata": {}
8 | },
9 | {
10 | "id": "edf8369d",
11 | "cell_type": "markdown",
12 | "source": "## 5.1 \u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f",
13 | "metadata": {}
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": null,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "!wget https://raw.githubusercontent.com/taroyabuki/fromzero/master/data/exam.csv"
22 | ],
23 | "id": "dde757dc-9d6e-451c-8dae-235f5d11837f"
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "import pandas as pd\n",
32 | "my_df = pd.read_csv('exam.csv')\n",
33 | "my_df"
34 | ],
35 | "id": "5e3cc804-49d8-4385-b3a0-add7fdb06dbf"
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "my_url = ('https://raw.githubusercontent.com/taroyabuki'\n",
44 | " '/fromzero/master/data/exam.csv')\n",
45 | "my_df = pd.read_csv(my_url)"
46 | ],
47 | "id": "a650bedc-240c-4d20-b683-6f533797c093"
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": null,
52 | "metadata": {},
53 | "outputs": [],
54 | "source": [
55 | "my_df2 = pd.read_csv('exam.csv',\n",
56 | " index_col='name')\n",
57 | "my_df2"
58 | ],
59 | "id": "917f3b47-9d4d-489c-8f66-edb8f7482d90"
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "my_df.to_csv('exam2.csv', index=False)"
68 | ],
69 | "id": "30d65be6-8ad2-455d-8251-ff1e1417275f"
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "metadata": {},
75 | "outputs": [],
76 | "source": [
77 | "my_df2.to_csv('exam3.csv')"
78 | ],
79 | "id": "e1df7d45-417b-462d-9c41-f573dacda2d7"
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": null,
84 | "metadata": {},
85 | "outputs": [],
86 | "source": [
87 | "my_df = pd.read_csv('exam.csv',\n",
88 | " encoding='UTF-8')"
89 | ],
90 | "id": "1a639f8c-4c4a-4328-ab51-8b8570914508"
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": null,
95 | "metadata": {},
96 | "outputs": [],
97 | "source": [
98 | "my_df.to_csv('exam2.csv', index=False, encoding='UTF-8')"
99 | ],
100 | "id": "8ff33ada-1100-4f30-99ef-39a4d6ad69e8"
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "my_url = 'https://taroyabuki.github.io/fromzero/exam.html'\n",
109 | "my_tables = pd.read_html(my_url)"
110 | ],
111 | "id": "4a779db4-1630-403f-a8e5-8423aa4a7193"
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": null,
116 | "metadata": {},
117 | "outputs": [],
118 | "source": [
119 | "my_tables"
120 | ],
121 | "id": "3b61881e-cd4b-4bf4-87f7-e35cea5fd7fc"
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": null,
126 | "metadata": {},
127 | "outputs": [],
128 | "source": [
129 | "my_tables[0]"
130 | ],
131 | "id": "f9476c63-cc57-4771-9788-a185445edaa4"
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": null,
136 | "metadata": {},
137 | "outputs": [],
138 | "source": [
139 | "# 1\u5217\u76ee\u4ee5\u964d\u3092\u53d6\u308a\u51fa\u3059\uff0e\n",
140 | "my_data = my_tables[0].iloc[:, 1:]\n",
141 | "my_data"
142 | ],
143 | "id": "16e538cf-013a-4b4b-9c45-0e4ff0897bdf"
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": null,
148 | "metadata": {},
149 | "outputs": [],
150 | "source": [
151 | "my_url = ('https://raw.githubusercontent.com/taroyabuki'\n",
152 | " '/fromzero/master/data/exam.json')\n",
153 | "my_data = pd.read_json(my_url)\n",
154 | "#my_data = pd.read_json('exam.json') # \uff08\u30d5\u30a1\u30a4\u30eb\u3092\u4f7f\u3046\u5834\u5408\uff09\n",
155 | "my_data"
156 | ],
157 | "id": "13e9c6e1-47ab-467b-b851-0091a203a907"
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": null,
162 | "metadata": {},
163 | "outputs": [],
164 | "source": [
165 | "import xml.etree.ElementTree as ET\n",
166 | "from urllib.request import urlopen\n",
167 | "\n",
168 | "my_url = ('https://raw.githubusercontent.com/taroyabuki'\n",
169 | " '/fromzero/master/data/exam.xml')\n",
170 | "with urlopen(my_url) as f:\n",
171 | " my_tree = ET.parse(f) # XML\u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f\n",
172 | "\n",
173 | "#my_tree = ET.parse('exam.xml') # \uff08\u30d5\u30a1\u30a4\u30eb\u3092\u4f7f\u3046\u5834\u5408\uff09\n",
174 | "my_ns = '{https://www.example.net/ns/1.0}' # \u540d\u524d\u7a7a\u9593"
175 | ],
176 | "id": "93fcbfaf-c73b-4f53-844e-56dd5ed485c1"
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "my_records = my_tree.findall(f'.//{my_ns}record')"
185 | ],
186 | "id": "ada9000a-d26f-4870-a1e0-a316c10bb88b"
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "metadata": {},
192 | "outputs": [],
193 | "source": [
194 | "def f(record):\n",
195 | " my_dic1 = record.attrib # \u5c5e\u6027\u3092\u53d6\u308a\u51fa\u3059\uff0e\n",
196 | " # \u5b50\u8981\u7d20\u306e\u540d\u524d\u3068\u5185\u5bb9\u306e\u30da\u30a2\u3092\u8f9e\u66f8\u306b\u3059\u308b\uff0e\n",
197 | " my_dic2 = {child.tag.replace(my_ns, ''): child.text for child in list(record)}\n",
198 | " return {**my_dic1, **my_dic2} # \u8f9e\u66f8\u3092\u7d50\u5408\u3059\u308b\uff0e"
199 | ],
200 | "id": "c9f2b840-74b1-455c-9068-a589fbb4d4fa"
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": null,
205 | "metadata": {},
206 | "outputs": [],
207 | "source": [
208 | "my_data = pd.DataFrame([f(record) for record in my_records])\n",
209 | "my_data['english'] = pd.to_numeric(my_data['english'])\n",
210 | "my_data['math'] = pd.to_numeric(my_data['math'])\n",
211 | "my_data"
212 | ],
213 | "id": "49c1e7fe-aff6-48a0-8132-d5003d79ca6c"
214 | },
215 | {
216 | "id": "ca69954f",
217 | "cell_type": "markdown",
218 | "source": "## 5.2 \u30c7\u30fc\u30bf\u306e\u5909\u63db",
219 | "metadata": {}
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": null,
224 | "metadata": {},
225 | "outputs": [],
226 | "source": [
227 | "import numpy as np\n",
228 | "from scipy.stats import zscore\n",
229 | "\n",
230 | "x1 = [1, 2, 3]\n",
231 | "\n",
232 | "z1 = ((x1 - np.mean(x1)) /\n",
233 | " np.std(x1, ddof=1))\n",
234 | "# \u3042\u308b\u3044\u306f\n",
235 | "z1 = zscore(x1, ddof=1)\n",
236 | "\n",
237 | "z1"
238 | ],
239 | "id": "90ff1c01-02d5-40c4-9f07-56c52e64a48e"
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": null,
244 | "metadata": {},
245 | "outputs": [],
246 | "source": [
247 | "z1.mean(), np.std(z1, ddof=1)"
248 | ],
249 | "id": "87140628-936c-4d2c-a4b8-f9caea5322b4"
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": null,
254 | "metadata": {},
255 | "outputs": [],
256 | "source": [
257 | "z1 * np.std(x1, ddof=1) + np.mean(x1)"
258 | ],
259 | "id": "48505728-2c9a-48da-bc13-7dc6e316a1f3"
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": null,
264 | "metadata": {},
265 | "outputs": [],
266 | "source": [
267 | "x2 = [1, 3, 5]\n",
268 | "z2 = ((x2 - np.mean(x1)) /\n",
269 | " np.std(x1, ddof=1))\n",
270 | "z2.mean(), np.std(z2, ddof=1)"
271 | ],
272 | "id": "2169f911-ba5f-4d45-8d4d-02adda1adfb2"
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": null,
277 | "metadata": {},
278 | "outputs": [],
279 | "source": [
280 | "import pandas as pd\n",
281 | "from sklearn.preprocessing import (\n",
282 | " OneHotEncoder)\n",
283 | "\n",
284 | "my_df = pd.DataFrame({\n",
285 | " 'id': [ 1 , 2 , 3 ],\n",
286 | " 'class': ['A', 'B', 'C']})\n",
287 | "\n",
288 | "my_enc = OneHotEncoder()\n",
289 | "tmp = my_enc.fit_transform(\n",
290 | " my_df[['class']]).toarray()\n",
291 | "my_names = my_enc.get_feature_names() \\\n",
292 | "if hasattr(my_enc, 'get_feature_names') \\\n",
293 | "else my_enc.get_feature_names_out()\n",
294 | "pd.DataFrame(tmp, columns=my_names)"
295 | ],
296 | "id": "b97df278-4912-490f-9518-146ef7171868"
297 | },
298 | {
299 | "cell_type": "code",
300 | "execution_count": null,
301 | "metadata": {},
302 | "outputs": [],
303 | "source": [
304 | "my_df2 = pd.DataFrame({\n",
305 | " 'id': [ 4 , 5, 6 ],\n",
306 | " 'class': ['B', 'C', 'B']})\n",
307 | "tmp = my_enc.transform(\n",
308 | " my_df2[['class']]).toarray()\n",
309 | "pd.DataFrame(tmp, columns=my_names)"
310 | ],
311 | "id": "61707b06-bef2-466b-8eee-2612578af36d"
312 | },
313 | {
314 | "cell_type": "code",
315 | "execution_count": null,
316 | "metadata": {},
317 | "outputs": [],
318 | "source": [
319 | "my_enc = OneHotEncoder(drop='first')\n",
320 | "\n",
321 | "tmp = my_enc.fit_transform(\n",
322 | " my_df[['class']]).toarray()\n",
323 | "my_names = my_enc.get_feature_names() \\\n",
324 | "if hasattr(my_enc, 'get_feature_names') \\\n",
325 | "else my_enc.get_feature_names_out()\n",
326 | "pd.DataFrame(tmp, columns=my_names)\n",
327 | "\n",
328 | "tmp = my_enc.transform(\n",
329 | " my_df2[['class']]).toarray()\n",
330 | "pd.DataFrame(tmp, columns=my_names)"
331 | ],
332 | "id": "d551e6a4-ef05-44ff-b5ef-1d337077850d"
333 | }
334 | ],
335 | "nbformat": 4,
336 | "nbformat_minor": 5,
337 | "metadata": {
338 | "kernelspec": {
339 | "name": "python3",
340 | "display_name": "Python 3"
341 | }
342 | }
343 | }
--------------------------------------------------------------------------------