├── .gitignore ├── figures ├── fig │ ├── 図1.2.pdf │ ├── 図1.4.pdf │ ├── 図10.1.pdf │ ├── 図10.2.pdf │ ├── 図11.1.pdf │ ├── 図11.2.pdf │ ├── 図11.3.pdf │ ├── 図11.4.pdf │ ├── 図11.5.png │ ├── 図2.2.pdf │ ├── 図2.3.pdf │ ├── 図2.4.png │ ├── 図2.5.png │ ├── 図2.6.png │ ├── 図2.7.png │ ├── 図3.1.pdf │ ├── 図6.1.pdf │ ├── 図7.1.pdf │ ├── 図7.2.pdf │ ├── 図7.3.pdf │ ├── 図8.1.pdf │ ├── 図A.1.png │ ├── 表11.1.pdf │ ├── 表11.b.pdf │ ├── 図1-last.pdf │ ├── 図1.1.a.png │ ├── 図1.1.b.png │ ├── 図2.1.a.png │ ├── 図2.1.b.png │ ├── 図2.8.a.png │ ├── 図2.8.b.png │ ├── 図2.9.a.png │ ├── 図2.9.b.png │ ├── 図1-preface.a.pdf │ ├── 図1-preface.b.pdf │ ├── 図1-last.md │ ├── 図3.1.md │ └── 図1.4.md ├── fig-p │ ├── 07-p-knn.pdf │ ├── 07-p-lm.pdf │ ├── 10-p-pr.pdf │ ├── 10-p-roc.pdf │ ├── 03-p-temp.pdf │ ├── 04-p-conf.pdf │ ├── 04-p-curve.pdf │ ├── 04-p-hist1.pdf │ ├── 04-p-hist2.pdf │ ├── 04-p-hist3.pdf │ ├── 04-p-iris.pdf │ ├── 04-p-mosaic.pdf │ ├── 04-p-rbinom.pdf │ ├── 04-p-rnorm.pdf │ ├── 04-p-runif.pdf │ ├── 07-p-plot.pdf │ ├── 07-p-tuning.pdf │ ├── 09-p-rpart.pdf │ ├── 09-p-rpart2.pdf │ ├── 09-p-varimp.pdf │ ├── 11-p-relu.pdf │ ├── 13-p-biplot.pdf │ ├── 13-p-elbow.pdf │ ├── 13-p-hclust.pdf │ ├── 04-p-boxplot.pdf │ ├── 04-p-pvalue1.pdf │ ├── 04-p-scatter.pdf │ ├── 07-p-boxplot.pdf │ ├── 07-p-residual.pdf │ ├── 08-p-boxplot.pdf │ ├── 08-p-sigmoid.pdf │ ├── 10-p-logistic.pdf │ ├── 11-p-h2o-wine.pdf │ ├── 13-p-heatmap.pdf │ ├── 04-p-boot-binom.pdf │ ├── 04-p-iris-group.pdf │ ├── 07-p-polynomial.pdf │ ├── 07-p-regression.pdf │ ├── 08-p-enet-path.pdf │ ├── 08-p-enet-tuning.pdf │ ├── 10-p-titanic-roc.pdf │ ├── 11-p-mnist-cnn.pdf │ ├── 11-p-mnist-id5.pdf │ ├── 11-p-mnist-lenet.pdf │ ├── 11-p-mnist-nnet.pdf │ ├── 11-p-regression.pdf │ ├── 04-p-random-sample.pdf │ ├── 07-p-tuning-train.pdf │ ├── 08-p-enet-tuning2.pdf │ ├── 10-p-titanic-tree.pdf │ ├── 13-p-pca-clusters.pdf │ ├── 08-p-boxplot-scaled.pdf │ ├── 11-p-classification.pdf │ ├── 11-p-mnist-lenet-miss.pdf │ ├── 12-p-airpassengers-lm.pdf │ ├── 12-p-airpassengers-arima.pdf │ ├── 12-p-airpassengers-prophet.pdf │ ├── 12-p-airpassengers-split.pdf │ ├── 04-p-runif.py │ ├── 04-p-curve.py │ ├── 10-p-logistic.py │ ├── 04-p-hist2.py │ ├── 08-p-sigmoid.py │ ├── 04-p-boxplot.py │ ├── 04-p-hist1.py │ ├── 07-p-plot.py │ ├── 11-p-mnist-id5.py │ ├── 04-p-rnorm.py │ ├── 11-p-relu.py │ ├── 04-p-scatter.py │ ├── 04-p-random-sample.py │ ├── 04-p-rbinom.py │ ├── 04-p-boot-binom.py │ ├── 04-p-hist3.py │ ├── Makefile │ ├── 08-p-boxplot.py │ ├── 04-p-iris.py │ ├── 13-p-hclust.py │ ├── 04-p-mosaic.py │ ├── 07-p-regression.py │ ├── 04-p-iris-group.py │ ├── 13-p-elbow.py │ ├── 09-p-varimp.py │ ├── 03-p-temp.py │ ├── 08-p-boxplot-scaled.py │ ├── 10-p-roc.py │ ├── 13-p-heatmap.py │ ├── 04-p-conf.py │ ├── 13-p-biplot.py │ ├── 07-p-residual.py │ ├── 10-p-pr.py │ ├── 09-p-rpart.py │ ├── 12-p-airpassengers-split.py │ ├── 07-p-knn.py │ ├── 07-p-lm.py │ ├── 11-p-h2o-wine.py │ ├── 04-p-pvalue1.py │ ├── 08-p-enet-path.py │ ├── 07-p-boxplot.py │ ├── 12-p-airpassengers-prophet.py │ ├── 13-p-pca-clusters.py │ ├── 12-p-airpassengers-lm.py │ ├── 09-p-rpart2.py │ ├── 07-p-tuning.py │ ├── 07-p-polynomial.py │ ├── 10-p-titanic-tree.py │ ├── 10-p-titanic-roc.py │ ├── 07-p-tuning-train.py │ ├── 11-p-regression.py │ ├── 12-p-airpassengers-arima.py │ ├── 08-p-enet-tuning.py │ ├── 11-p-classification.py │ ├── 11-p-mnist-nnet.py │ ├── 11-p-mnist-cnn.py │ ├── 11-p-mnist-lenet.py │ ├── 08-p-enet-tuning2.py │ └── 11-p-mnist-lenet-miss.py ├── fig-r │ ├── 07-r-knn.pdf │ ├── 07-r-lm.pdf │ ├── 10-r-pr.pdf │ ├── 10-r-roc.pdf │ ├── 03-r-temp.pdf │ ├── 04-r-conf.pdf │ ├── 04-r-curve.pdf │ ├── 04-r-hist1.pdf │ ├── 04-r-hist2.pdf │ ├── 04-r-hist3.pdf │ ├── 04-r-iris.pdf │ ├── 04-r-mosaic.pdf │ ├── 04-r-rbinom.pdf │ ├── 04-r-rnorm.pdf │ ├── 04-r-runif.pdf │ ├── 07-r-plot.pdf │ ├── 07-r-tuning.pdf │ ├── 09-r-rpart.pdf │ ├── 09-r-rpart2.pdf │ ├── 09-r-varimp.pdf │ ├── 10-r-rpart1.pdf │ ├── 10-r-rpart2.pdf │ ├── 10-r-rpart3.pdf │ ├── 11-r-relu.pdf │ ├── 13-r-biplot.pdf │ ├── 13-r-elbow.pdf │ ├── 13-r-hclust.pdf │ ├── 13-r-kmeans.pdf │ ├── 04-r-boxplot.R │ ├── 04-r-boxplot.pdf │ ├── 04-r-curve.R │ ├── 04-r-ggplot-f.pdf │ ├── 04-r-hist1.R │ ├── 04-r-pvalue1.pdf │ ├── 04-r-scatter.pdf │ ├── 07-r-boxplot.pdf │ ├── 07-r-residual.pdf │ ├── 08-r-boxplot.pdf │ ├── 08-r-nnet-3-2.pdf │ ├── 08-r-sigmoid.pdf │ ├── 10-r-logistic.pdf │ ├── 11-r-h2o-wine.pdf │ ├── 13-r-hclust2.pdf │ ├── 13-r-heatmap.pdf │ ├── 04-r-boot-binom.pdf │ ├── 04-r-ggplot-box.pdf │ ├── 04-r-ggplot-hist.pdf │ ├── 04-r-iris-group.pdf │ ├── 07-r-polynomial.pdf │ ├── 07-r-regression.pdf │ ├── 08-r-enet-path.pdf │ ├── 08-r-enet-tuning.pdf │ ├── 10-r-titanic-roc.pdf │ ├── 11-r-mnist-cnn.pdf │ ├── 11-r-mnist-id5.pdf │ ├── 11-r-mnist-lenet.pdf │ ├── 11-r-mnist-nnet.pdf │ ├── 11-r-regression.pdf │ ├── 04-r-ggplot-mosaic.pdf │ ├── 04-r-ggplot-point.pdf │ ├── 04-r-random-sample.pdf │ ├── 07-r-tuning-train.pdf │ ├── 08-r-enet-tuning2.pdf │ ├── 10-r-titanic-tree.pdf │ ├── 13-r-pca-clusters.pdf │ ├── 08-r-boxplot-scaled.pdf │ ├── 08-r-sigmoid.R │ ├── 10-r-logistic.R │ ├── 11-r-classification.pdf │ ├── 11-r-mnist-lenet-miss.pdf │ ├── 12-r-airpassengers-lm.pdf │ ├── 11-r-relu.R │ ├── 12-r-airpassengers-arima.pdf │ ├── 12-r-airpassengers-prophet.pdf │ ├── 12-r-airpassengers-split.pdf │ ├── 04-r-hist2.R │ ├── 04-r-scatter.R │ ├── 04-r-runif.R │ ├── 13-r-elbow.R │ ├── 04-r-hist3.R │ ├── 07-r-plot.R │ ├── 04-r-rnorm.R │ ├── 04-r-ggplot-point.R │ ├── 04-r-ggplot-f.R │ ├── 04-r-random-sample.R │ ├── 11-r-mnist-id5.R │ ├── 09-r-rpart.R │ ├── 09-r-varimp.R │ ├── 04-r-mosaic.R │ ├── 04-r-rbinom.R │ ├── 04-r-boot-binom.R │ ├── 04-r-ggplot-box.R │ ├── 04-r-ggplot-hist.R │ ├── 04-r-iris.R │ ├── 04-r-ggplot-mosaic.R │ ├── 07-r-tuning.R │ ├── 13-r-hclust.R │ ├── Makefile │ ├── 13-r-kmeans.R │ ├── 13-r-hclust2.R │ ├── 07-r-residual.R │ ├── 03-r-temp.R │ ├── 08-r-boxplot.R │ ├── 10-r-pr.R │ ├── 07-r-regression.R │ ├── 10-r-roc.R │ ├── 13-r-heatmap.R │ ├── 10-r-rpart3.R │ ├── 10-r-titanic-tree.R │ ├── 13-r-biplot.R │ ├── 07-r-lm.R │ ├── 08-r-boxplot-scaled.R │ ├── 09-r-rpart2.R │ ├── 07-r-knn.R │ ├── 08-r-enet-path.R │ ├── 13-r-pca-clusters.R │ ├── 04-r-conf.R │ ├── 07-r-boxplot.R │ ├── 04-r-iris-group.R │ ├── 10-r-rpart1.R │ ├── 11-r-h2o-wine.R │ ├── 12-r-airpassengers-split.R │ ├── 07-r-polynomial.R │ ├── 04-r-pvalue1.R │ ├── 10-r-rpart2.R │ ├── 08-r-nnet-3-2.R │ ├── 08-r-enet-tuning.R │ ├── 07-r-tuning-train.R │ ├── 10-r-titanic-roc.R │ ├── 12-r-airpassengers-arima.R │ ├── 12-r-airpassengers-prophet.R │ ├── 12-r-airpassengers-lm.R │ ├── 11-r-classification.R │ ├── 11-r-regression.R │ ├── 11-r-mnist-nnet.R │ ├── 11-r-mnist-id5.svg │ ├── 11-r-mnist-cnn.R │ ├── 11-r-mnist-lenet.R │ ├── 08-r-enet-tuning2.R │ └── 11-r-mnist-lenet-miss.R └── howtomake.md ├── data ├── exam.csv ├── exam.json ├── exam.xml └── wine.csv ├── docker ├── rstudio.sh ├── jupyter.sh ├── rstudio │ ├── README.md │ └── Dockerfile └── jupyter │ ├── README.md │ └── Dockerfile ├── addendum ├── 07.03.02 │ ├── confidence_band_p.py │ ├── confidence_band_r.R │ ├── 1+3x+N(0,2x).csv │ └── README.md └── sagemaker │ ├── README.md │ └── sage-python.yml ├── docs └── exam.html ├── code ├── R-notebook │ ├── r-06.ipynb │ ├── README.md │ └── r-12.ipynb └── Python-notebook │ ├── README.md │ ├── python-06.ipynb │ ├── python-12.ipynb │ └── python-05.ipynb ├── README.md └── update.md /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/* 2 | tmp* 3 | -------------------------------------------------------------------------------- /figures/fig/図1.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1.2.pdf -------------------------------------------------------------------------------- /figures/fig/図1.4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1.4.pdf -------------------------------------------------------------------------------- /figures/fig/図10.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図10.1.pdf -------------------------------------------------------------------------------- /figures/fig/図10.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図10.2.pdf -------------------------------------------------------------------------------- /figures/fig/図11.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.1.pdf -------------------------------------------------------------------------------- /figures/fig/図11.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.2.pdf -------------------------------------------------------------------------------- /figures/fig/図11.3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.3.pdf -------------------------------------------------------------------------------- /figures/fig/図11.4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.4.pdf -------------------------------------------------------------------------------- /figures/fig/図11.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図11.5.png -------------------------------------------------------------------------------- /figures/fig/図2.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.2.pdf -------------------------------------------------------------------------------- /figures/fig/図2.3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.3.pdf -------------------------------------------------------------------------------- /figures/fig/図2.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.4.png -------------------------------------------------------------------------------- /figures/fig/図2.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.5.png -------------------------------------------------------------------------------- /figures/fig/図2.6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.6.png -------------------------------------------------------------------------------- /figures/fig/図2.7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.7.png -------------------------------------------------------------------------------- /figures/fig/図3.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図3.1.pdf -------------------------------------------------------------------------------- /figures/fig/図6.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図6.1.pdf -------------------------------------------------------------------------------- /figures/fig/図7.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図7.1.pdf -------------------------------------------------------------------------------- /figures/fig/図7.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図7.2.pdf -------------------------------------------------------------------------------- /figures/fig/図7.3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図7.3.pdf -------------------------------------------------------------------------------- /figures/fig/図8.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図8.1.pdf -------------------------------------------------------------------------------- /figures/fig/図A.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図A.1.png -------------------------------------------------------------------------------- /figures/fig/表11.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/表11.1.pdf -------------------------------------------------------------------------------- /figures/fig/表11.b.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/表11.b.pdf -------------------------------------------------------------------------------- /data/exam.csv: -------------------------------------------------------------------------------- 1 | name,english,math,gender 2 | A,60,70,f 3 | B,90,80,m 4 | C,70,90,m 5 | D,90,100,f 6 | -------------------------------------------------------------------------------- /figures/fig/図1-last.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1-last.pdf -------------------------------------------------------------------------------- /figures/fig/図1.1.a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1.1.a.png -------------------------------------------------------------------------------- /figures/fig/図1.1.b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1.1.b.png -------------------------------------------------------------------------------- /figures/fig/図2.1.a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.1.a.png -------------------------------------------------------------------------------- /figures/fig/図2.1.b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.1.b.png -------------------------------------------------------------------------------- /figures/fig/図2.8.a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.8.a.png -------------------------------------------------------------------------------- /figures/fig/図2.8.b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.8.b.png -------------------------------------------------------------------------------- /figures/fig/図2.9.a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.9.a.png -------------------------------------------------------------------------------- /figures/fig/図2.9.b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図2.9.b.png -------------------------------------------------------------------------------- /figures/fig-p/07-p-knn.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-knn.pdf -------------------------------------------------------------------------------- /figures/fig-p/07-p-lm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-lm.pdf -------------------------------------------------------------------------------- /figures/fig-p/10-p-pr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-pr.pdf -------------------------------------------------------------------------------- /figures/fig-p/10-p-roc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-roc.pdf -------------------------------------------------------------------------------- /figures/fig-r/07-r-knn.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-knn.pdf -------------------------------------------------------------------------------- /figures/fig-r/07-r-lm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-lm.pdf -------------------------------------------------------------------------------- /figures/fig-r/10-r-pr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-pr.pdf -------------------------------------------------------------------------------- /figures/fig-r/10-r-roc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-roc.pdf -------------------------------------------------------------------------------- /figures/fig-p/03-p-temp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/03-p-temp.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-conf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-conf.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-curve.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-hist1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-hist1.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-hist2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-hist2.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-hist3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-hist3.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-iris.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-iris.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-mosaic.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-mosaic.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-rbinom.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-rbinom.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-rnorm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-rnorm.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-runif.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-runif.pdf -------------------------------------------------------------------------------- /figures/fig-p/07-p-plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-plot.pdf -------------------------------------------------------------------------------- /figures/fig-p/07-p-tuning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-tuning.pdf -------------------------------------------------------------------------------- /figures/fig-p/09-p-rpart.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/09-p-rpart.pdf -------------------------------------------------------------------------------- /figures/fig-p/09-p-rpart2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/09-p-rpart2.pdf -------------------------------------------------------------------------------- /figures/fig-p/09-p-varimp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/09-p-varimp.pdf -------------------------------------------------------------------------------- /figures/fig-p/11-p-relu.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-relu.pdf -------------------------------------------------------------------------------- /figures/fig-p/13-p-biplot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-biplot.pdf -------------------------------------------------------------------------------- /figures/fig-p/13-p-elbow.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-elbow.pdf -------------------------------------------------------------------------------- /figures/fig-p/13-p-hclust.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-hclust.pdf -------------------------------------------------------------------------------- /figures/fig-r/03-r-temp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/03-r-temp.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-conf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-conf.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-curve.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-hist1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-hist1.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-hist2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-hist2.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-hist3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-hist3.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-iris.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-iris.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-mosaic.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-mosaic.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-rbinom.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-rbinom.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-rnorm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-rnorm.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-runif.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-runif.pdf -------------------------------------------------------------------------------- /figures/fig-r/07-r-plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-plot.pdf -------------------------------------------------------------------------------- /figures/fig-r/07-r-tuning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-tuning.pdf -------------------------------------------------------------------------------- /figures/fig-r/09-r-rpart.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/09-r-rpart.pdf -------------------------------------------------------------------------------- /figures/fig-r/09-r-rpart2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/09-r-rpart2.pdf -------------------------------------------------------------------------------- /figures/fig-r/09-r-varimp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/09-r-varimp.pdf -------------------------------------------------------------------------------- /figures/fig-r/10-r-rpart1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-rpart1.pdf -------------------------------------------------------------------------------- /figures/fig-r/10-r-rpart2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-rpart2.pdf -------------------------------------------------------------------------------- /figures/fig-r/10-r-rpart3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-rpart3.pdf -------------------------------------------------------------------------------- /figures/fig-r/11-r-relu.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-relu.pdf -------------------------------------------------------------------------------- /figures/fig-r/13-r-biplot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-biplot.pdf -------------------------------------------------------------------------------- /figures/fig-r/13-r-elbow.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-elbow.pdf -------------------------------------------------------------------------------- /figures/fig-r/13-r-hclust.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-hclust.pdf -------------------------------------------------------------------------------- /figures/fig-r/13-r-kmeans.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-kmeans.pdf -------------------------------------------------------------------------------- /figures/fig/図1-preface.a.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1-preface.a.pdf -------------------------------------------------------------------------------- /figures/fig/図1-preface.b.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig/図1-preface.b.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-boxplot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-boxplot.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-pvalue1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-pvalue1.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-scatter.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-scatter.pdf -------------------------------------------------------------------------------- /figures/fig-p/07-p-boxplot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-boxplot.pdf -------------------------------------------------------------------------------- /figures/fig-p/07-p-residual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-residual.pdf -------------------------------------------------------------------------------- /figures/fig-p/08-p-boxplot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-boxplot.pdf -------------------------------------------------------------------------------- /figures/fig-p/08-p-sigmoid.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-sigmoid.pdf -------------------------------------------------------------------------------- /figures/fig-p/10-p-logistic.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-logistic.pdf -------------------------------------------------------------------------------- /figures/fig-p/11-p-h2o-wine.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-h2o-wine.pdf -------------------------------------------------------------------------------- /figures/fig-p/13-p-heatmap.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-heatmap.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-boxplot.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-boxplot.pdf", width = 6, height = 5.5) 2 | 3 | boxplot(iris[, -5]) 4 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-boxplot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-boxplot.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-curve.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-curve.pdf", width = 6, height = 5.5) 2 | 3 | curve(x^3 - x, -2, 2) 4 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-ggplot-f.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-f.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-hist1.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-hist1.pdf", width = 6, height = 5.5) 2 | 3 | hist(iris$Sepal.Length) 4 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-pvalue1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-pvalue1.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-scatter.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-scatter.pdf -------------------------------------------------------------------------------- /figures/fig-r/07-r-boxplot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-boxplot.pdf -------------------------------------------------------------------------------- /figures/fig-r/07-r-residual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-residual.pdf -------------------------------------------------------------------------------- /figures/fig-r/08-r-boxplot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-boxplot.pdf -------------------------------------------------------------------------------- /figures/fig-r/08-r-nnet-3-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-nnet-3-2.pdf -------------------------------------------------------------------------------- /figures/fig-r/08-r-sigmoid.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-sigmoid.pdf -------------------------------------------------------------------------------- /figures/fig-r/10-r-logistic.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-logistic.pdf -------------------------------------------------------------------------------- /figures/fig-r/11-r-h2o-wine.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-h2o-wine.pdf -------------------------------------------------------------------------------- /figures/fig-r/13-r-hclust2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-hclust2.pdf -------------------------------------------------------------------------------- /figures/fig-r/13-r-heatmap.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-heatmap.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-boot-binom.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-boot-binom.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-iris-group.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-iris-group.pdf -------------------------------------------------------------------------------- /figures/fig-p/07-p-polynomial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-polynomial.pdf -------------------------------------------------------------------------------- /figures/fig-p/07-p-regression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-regression.pdf -------------------------------------------------------------------------------- /figures/fig-p/08-p-enet-path.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-enet-path.pdf -------------------------------------------------------------------------------- /figures/fig-p/08-p-enet-tuning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-enet-tuning.pdf -------------------------------------------------------------------------------- /figures/fig-p/10-p-titanic-roc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-titanic-roc.pdf -------------------------------------------------------------------------------- /figures/fig-p/11-p-mnist-cnn.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-cnn.pdf -------------------------------------------------------------------------------- /figures/fig-p/11-p-mnist-id5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-id5.pdf -------------------------------------------------------------------------------- /figures/fig-p/11-p-mnist-lenet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-lenet.pdf -------------------------------------------------------------------------------- /figures/fig-p/11-p-mnist-nnet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-nnet.pdf -------------------------------------------------------------------------------- /figures/fig-p/11-p-regression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-regression.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-boot-binom.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-boot-binom.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-ggplot-box.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-box.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-ggplot-hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-hist.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-iris-group.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-iris-group.pdf -------------------------------------------------------------------------------- /figures/fig-r/07-r-polynomial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-polynomial.pdf -------------------------------------------------------------------------------- /figures/fig-r/07-r-regression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-regression.pdf -------------------------------------------------------------------------------- /figures/fig-r/08-r-enet-path.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-enet-path.pdf -------------------------------------------------------------------------------- /figures/fig-r/08-r-enet-tuning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-enet-tuning.pdf -------------------------------------------------------------------------------- /figures/fig-r/10-r-titanic-roc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-titanic-roc.pdf -------------------------------------------------------------------------------- /figures/fig-r/11-r-mnist-cnn.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-cnn.pdf -------------------------------------------------------------------------------- /figures/fig-r/11-r-mnist-id5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-id5.pdf -------------------------------------------------------------------------------- /figures/fig-r/11-r-mnist-lenet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-lenet.pdf -------------------------------------------------------------------------------- /figures/fig-r/11-r-mnist-nnet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-nnet.pdf -------------------------------------------------------------------------------- /figures/fig-r/11-r-regression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-regression.pdf -------------------------------------------------------------------------------- /figures/fig-p/04-p-random-sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/04-p-random-sample.pdf -------------------------------------------------------------------------------- /figures/fig-p/07-p-tuning-train.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/07-p-tuning-train.pdf -------------------------------------------------------------------------------- /figures/fig-p/08-p-enet-tuning2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-enet-tuning2.pdf -------------------------------------------------------------------------------- /figures/fig-p/10-p-titanic-tree.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/10-p-titanic-tree.pdf -------------------------------------------------------------------------------- /figures/fig-p/13-p-pca-clusters.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/13-p-pca-clusters.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-ggplot-mosaic.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-mosaic.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-ggplot-point.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-ggplot-point.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-random-sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/04-r-random-sample.pdf -------------------------------------------------------------------------------- /figures/fig-r/07-r-tuning-train.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/07-r-tuning-train.pdf -------------------------------------------------------------------------------- /figures/fig-r/08-r-enet-tuning2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-enet-tuning2.pdf -------------------------------------------------------------------------------- /figures/fig-r/10-r-titanic-tree.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/10-r-titanic-tree.pdf -------------------------------------------------------------------------------- /figures/fig-r/13-r-pca-clusters.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/13-r-pca-clusters.pdf -------------------------------------------------------------------------------- /figures/fig-p/08-p-boxplot-scaled.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/08-p-boxplot-scaled.pdf -------------------------------------------------------------------------------- /figures/fig-p/11-p-classification.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-classification.pdf -------------------------------------------------------------------------------- /figures/fig-p/11-p-mnist-lenet-miss.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/11-p-mnist-lenet-miss.pdf -------------------------------------------------------------------------------- /figures/fig-p/12-p-airpassengers-lm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/12-p-airpassengers-lm.pdf -------------------------------------------------------------------------------- /figures/fig-r/08-r-boxplot-scaled.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/08-r-boxplot-scaled.pdf -------------------------------------------------------------------------------- /figures/fig-r/08-r-sigmoid.R: -------------------------------------------------------------------------------- 1 | pdf(file = "08-r-sigmoid.pdf", width = 6, height = 5.5) 2 | 3 | curve(1 / (1 + exp(-x)), -6, 6) 4 | -------------------------------------------------------------------------------- /figures/fig-r/10-r-logistic.R: -------------------------------------------------------------------------------- 1 | pdf(file = "10-r-logistic.pdf", width = 6, height = 5.5) 2 | 3 | curve(1 / (1 + exp(-x)), -6, 6) 4 | -------------------------------------------------------------------------------- /figures/fig-r/11-r-classification.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-classification.pdf -------------------------------------------------------------------------------- /figures/fig-r/11-r-mnist-lenet-miss.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/11-r-mnist-lenet-miss.pdf -------------------------------------------------------------------------------- /figures/fig-r/12-r-airpassengers-lm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/12-r-airpassengers-lm.pdf -------------------------------------------------------------------------------- /figures/fig-p/12-p-airpassengers-arima.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/12-p-airpassengers-arima.pdf -------------------------------------------------------------------------------- /figures/fig-p/12-p-airpassengers-prophet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/12-p-airpassengers-prophet.pdf -------------------------------------------------------------------------------- /figures/fig-p/12-p-airpassengers-split.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-p/12-p-airpassengers-split.pdf -------------------------------------------------------------------------------- /figures/fig-r/11-r-relu.R: -------------------------------------------------------------------------------- 1 | pdf(file = "11-r-relu.pdf", width = 6, height = 5.5) 2 | 3 | library(keras) 4 | curve(activation_relu(x), -3, 3) 5 | -------------------------------------------------------------------------------- /figures/fig-r/12-r-airpassengers-arima.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/12-r-airpassengers-arima.pdf -------------------------------------------------------------------------------- /figures/fig-r/12-r-airpassengers-prophet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/12-r-airpassengers-prophet.pdf -------------------------------------------------------------------------------- /figures/fig-r/12-r-airpassengers-split.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taroyabuki/fromzero/HEAD/figures/fig-r/12-r-airpassengers-split.pdf -------------------------------------------------------------------------------- /figures/fig-r/04-r-hist2.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-hist2.pdf", width = 6, height = 5.5) 2 | 3 | x <- c(10, 20, 30) 4 | hist(x, breaks = 2) # 階級数は2 5 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-scatter.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-scatter.pdf", width = 6, height = 5.5) 2 | 3 | plot(iris$Sepal.Length, 4 | iris$Sepal.Width) 5 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-runif.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | x = np.random.random(1000) 5 | plt.hist(x) 6 | 7 | plt.savefig('04-p-runif.pdf') 8 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-runif.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-runif.pdf", width = 6, height = 5) 2 | 3 | x <- runif(min = 0, # 最小 4 | max = 1, # 最大 5 | n = 1000) # 乱数の数 6 | hist(x) 7 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-curve.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | x = np.linspace(-2, 2, 100) 5 | y = x**3 - x 6 | plt.plot(x, y) 7 | 8 | plt.savefig('04-p-curve.pdf') 9 | -------------------------------------------------------------------------------- /figures/fig-p/10-p-logistic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | x = np.arange(-6, 6, 0.1) 3 | y = 1 / (1 + np.exp(-x)) 4 | import matplotlib.pyplot as plt 5 | plt.plot(x, y) 6 | plt.savefig('10-p-logistic.pdf') 7 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-hist2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | my_df = pd.DataFrame({'x': [10, 20, 30]}) 4 | my_df.hist('x', bins=2) # 階級数は2 5 | 6 | import matplotlib.pyplot as plt 7 | plt.savefig('04-p-hist2.pdf') 8 | -------------------------------------------------------------------------------- /figures/fig-p/08-p-sigmoid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | x = np.linspace(-6, 6, 100) 5 | y = 1 / (1 + np.exp(-x)) 6 | plt.plot(x, y) 7 | plt.savefig('08-p-sigmoid.pdf') 8 | -------------------------------------------------------------------------------- /figures/fig-r/13-r-elbow.R: -------------------------------------------------------------------------------- 1 | pdf(file = "13-r-elbow.pdf", width = 6, height = 4.5) 2 | 3 | library(tidyverse) 4 | library(factoextra) 5 | 6 | my_data <- iris[, -5] 7 | fviz_nbclust(my_data, kmeans, method = "wss") 8 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-hist3.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-hist3.pdf", width = 6, height = 5.5) 2 | 3 | x <- iris$Sepal.Length 4 | tmp <- seq(min(x), max(x), 5 | length.out = 10) 6 | hist(x, breaks = tmp, right = FALSE) 7 | -------------------------------------------------------------------------------- /figures/fig-r/07-r-plot.R: -------------------------------------------------------------------------------- 1 | pdf(file = "07-r-plot.pdf", width = 6, height = 4.5) 2 | 3 | library(tidyverse) 4 | my_data <- cars 5 | 6 | my_data %>% 7 | ggplot(aes(x = speed, y = dist)) + 8 | geom_point() 9 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-boxplot.py: -------------------------------------------------------------------------------- 1 | import statsmodels.api as sm 2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data 3 | 4 | iris.boxplot() 5 | 6 | import matplotlib.pyplot as plt 7 | plt.savefig('04-p-boxplot.pdf') 8 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-rnorm.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-rnorm.pdf", width = 6, height = 5) 2 | 3 | r <- 10000 4 | x <- rnorm(mean = 50, # 平均 5 | sd = 5, # 標準偏差 6 | n = r) # 乱数の数 7 | hist(x, breaks = 40) 8 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-hist1.py: -------------------------------------------------------------------------------- 1 | import statsmodels.api as sm 2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data 3 | 4 | iris.hist('Sepal.Length') 5 | 6 | import matplotlib.pyplot as plt 7 | plt.savefig('04-p-hist1.pdf') 8 | -------------------------------------------------------------------------------- /figures/fig-p/07-p-plot.py: -------------------------------------------------------------------------------- 1 | import statsmodels.api as sm 2 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data 3 | my_data.plot(x='speed', style='o') 4 | import matplotlib.pyplot as plt 5 | plt.savefig('07-p-plot.pdf') 6 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-ggplot-point.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-ggplot-point.pdf", width = 6, height = 4) 2 | 3 | library(tidyverse) 4 | 5 | iris %>% 6 | ggplot(aes(x = Sepal.Length, 7 | y = Sepal.Width)) + 8 | geom_point() 9 | -------------------------------------------------------------------------------- /docker/rstudio.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | docker run \ 3 | -d \ 4 | -e PASSWORD=password \ 5 | -e ROOT=TRUE \ 6 | -p 8787:8787 \ 7 | -v "$(pwd):/home/rstudio/work" \ 8 | --platform linux/x86_64 \ 9 | --name rs \ 10 | taroyabuki/rstudio 11 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-ggplot-f.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-ggplot-f.pdf", width = 6, height = 4) 2 | 3 | library(tidyverse) 4 | 5 | f <- function(x) { x^3 - x } 6 | data.frame(x = c(-2, 2)) %>% 7 | ggplot(aes(x = x)) + 8 | stat_function(fun = f) 9 | -------------------------------------------------------------------------------- /docker/jupyter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | docker run \ 3 | -d \ 4 | -p 8888:8888 \ 5 | -v "$(pwd):/home/jovyan/work" \ 6 | --platform linux/x86_64 \ 7 | --name jr \ 8 | taroyabuki/jupyter \ 9 | start-notebook.sh \ 10 | --NotebookApp.token='password' 11 | -------------------------------------------------------------------------------- /figures/fig-p/11-p-mnist-id5.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() 3 | 4 | import matplotlib.pyplot as plt 5 | plt.matshow(x_train[4, :, :]) 6 | plt.savefig('11-p-mnist-id5.pdf') 7 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-random-sample.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-random-sample.pdf", width = 6, height = 5) 2 | 3 | x <- sample(x = 1:6, # 範囲 4 | size = 10000, # 乱数の数 5 | replace = TRUE) # 重複あり 6 | hist(x, breaks = 0:6) # ヒストグラム 7 | -------------------------------------------------------------------------------- /figures/fig-r/11-r-mnist-id5.R: -------------------------------------------------------------------------------- 1 | pdf(file = "11-r-mnist-id5.pdf", width = 5.83, height = 4.13) 2 | 3 | library(keras) 4 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist() 5 | 6 | plot(as.raster(x = x_train[5, , ], max = max(x_train))) 7 | -------------------------------------------------------------------------------- /figures/fig-r/09-r-rpart.R: -------------------------------------------------------------------------------- 1 | pdf(file = "09-r-rpart.pdf", width = 5.83, height = 4.13) 2 | 3 | library(caret) 4 | my_data <- iris 5 | my_model <- train(form = Species ~ ., data = my_data, method = "rpart2") 6 | rpart.plot::rpart.plot(my_model$finalModel, extra = 1) 7 | -------------------------------------------------------------------------------- /figures/fig-r/09-r-varimp.R: -------------------------------------------------------------------------------- 1 | pdf(file = "09-r-varimp.pdf", width = 5.83, height = 4.13) 2 | 3 | library(caret) 4 | library(tidyverse) 5 | my_data <- iris 6 | 7 | my_model <- train(form = Species ~ ., data = my_data, method = "rf") 8 | ggplot(varImp(my_model)) 9 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-mosaic.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-mosaic.pdf", width = 6, height = 4.5) 2 | 3 | my_df <- data.frame( 4 | Species = iris$Species, 5 | w_Sepal = iris$Sepal.Width > 3) 6 | 7 | mosaicplot( 8 | formula = ~ Species + w_Sepal, 9 | data = my_df) 10 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-rnorm.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | r = 10000 5 | x = np.random.normal( 6 | loc=50, # 平均 7 | scale=5, # 標準偏差 8 | size=r) # 乱数の数 9 | plt.hist(x, bins=40) 10 | 11 | plt.savefig('04-p-rnorm.pdf') 12 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-rbinom.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-rbinom.pdf", width = 6, height = 5) 2 | 3 | n <- 100 4 | p <- 0.5 5 | r <- 10000 6 | x <- rbinom(size = n, # 試行回数 7 | prob = p, # 確率 8 | n = r) # 乱数の数 9 | hist(x, breaks = max(x) - min(x)) 10 | -------------------------------------------------------------------------------- /figures/fig-p/11-p-relu.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from keras import activations 4 | 5 | x = np.linspace(-3, 3, 100) 6 | plt.plot(x, activations.relu(x)) 7 | plt.xlabel('x') 8 | plt.ylabel('ReLU(x)') 9 | plt.savefig('11-p-relu.pdf') 10 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-boot-binom.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-boot-binom.pdf", width = 6, height = 5) 2 | 3 | X <- rep(0:1, c(13, 2)) 4 | n <- 10^5 5 | result <- replicate(n, sum(sample(X, size = length(X), replace = TRUE))) 6 | hist(x = result, 7 | breaks = 0:15, 8 | right = FALSE) 9 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-scatter.py: -------------------------------------------------------------------------------- 1 | import statsmodels.api as sm 2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data 3 | 4 | iris.plot('Sepal.Length', 5 | 'Sepal.Width', 6 | kind='scatter') 7 | 8 | import matplotlib.pyplot as plt 9 | plt.savefig('04-p-scatter.pdf') 10 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-random-sample.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | x = np.random.choice( 5 | a=range(1, 7), # 1から6 6 | size=10000, # 乱数の数 7 | replace=True) # 重複あり 8 | plt.hist(x, bins=6) # ヒストグラム 9 | 10 | plt.savefig('04-p-random-sample.pdf') 11 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-rbinom.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | n = 100 5 | p = 0.5 6 | r = 10000 7 | x = np.random.binomial( 8 | n=n, # 試行回数 9 | p=p, # 確率 10 | size=r) # 乱数の数 11 | plt.hist(x, bins=max(x) - min(x)) 12 | 13 | plt.savefig('04-p-rbinom.pdf') 14 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-ggplot-box.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-ggplot-box.pdf", width = 6, height = 4) 2 | 3 | library(tidyverse) 4 | 5 | iris %>% 6 | pivot_longer(-Species) %>% 7 | ggplot(aes( 8 | x = factor(name, 9 | levels = names(iris)), 10 | y = value)) + 11 | geom_boxplot() + 12 | xlab(NULL) 13 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-boot-binom.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | X = [0] * 13 + [1] * 2 # 手順1 3 | n = 10**5 4 | result = [sum(np.random.choice(X, len(X), replace=True)) # 手順4 5 | for _ in range(n)] 6 | 7 | import matplotlib.pyplot as plt 8 | plt.hist(result, 9 | bins=range(0, 16)) 10 | plt.savefig('04-p-boot-binom.pdf') 11 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-ggplot-hist.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-ggplot-hist.pdf", width = 6, height = 4) 2 | 3 | library(tidyverse) 4 | 5 | x <- iris$Sepal.Length 6 | tmp <- seq(min(x), max(x), 7 | length.out = 10) 8 | iris %>% 9 | ggplot(aes(x = Sepal.Length)) + 10 | geom_histogram(breaks = tmp, 11 | closed = "left") 12 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-iris.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-iris.pdf", width = 6, height = 4) 2 | 3 | library(tidyverse) 4 | my_df <- psych::describe(iris[, -5]) 5 | 6 | tmp <- rownames(my_df) 7 | my_df %>% ggplot(aes(x = factor(tmp, levels = tmp), y = mean)) + 8 | geom_col() + 9 | geom_errorbar(aes(ymin = mean - se, ymax = mean + se)) + 10 | xlab(NULL) 11 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-hist3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import statsmodels.api as sm 3 | iris = sm.datasets.get_rdataset('iris', 'datasets').data 4 | 5 | x = iris['Sepal.Length'] 6 | tmp = np.linspace(min(x), max(x), 10) 7 | iris.hist('Sepal.Length', 8 | bins=tmp.round(2)) 9 | 10 | import matplotlib.pyplot as plt 11 | plt.savefig('04-p-hist3.pdf') 12 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-ggplot-mosaic.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-ggplot-mosaic.pdf", width = 6, height = 4) 2 | 3 | library(tidyverse) 4 | 5 | library(ggmosaic) 6 | my_df <- data.frame( 7 | Species = iris$Species, 8 | w_Sepal = iris$Sepal.Width > 3) 9 | my_df %>% 10 | ggplot() + 11 | geom_mosaic( 12 | aes(x = product(w_Sepal, 13 | Species))) 14 | -------------------------------------------------------------------------------- /addendum/07.03.02/confidence_band_p.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import seaborn as sns 3 | 4 | data = pd.read_csv('1+3x+N(0,2x).csv') 5 | x = data.x 6 | y = data.y 7 | n = len(x) 8 | 9 | alpha = 0.99 10 | n_boot = 10000 11 | 12 | sns.regplot(x=x, y=y, ci=100 * alpha, n_boot=n_boot) 13 | 14 | import matplotlib.pyplot as plt 15 | plt.savefig('confidence_band_p.pdf') 16 | -------------------------------------------------------------------------------- /figures/fig-p/Makefile: -------------------------------------------------------------------------------- 1 | SRC=$(wildcard *.py) 2 | 3 | PDF=$(SRC:.py=.pdf) 4 | 5 | all: $(PDF) 6 | 7 | .SUFFIXES: .pdf .py 8 | 9 | .py.pdf: 10 | if python3 $<; then\ 11 | if [ -f /usr/bin/pdfcrop ]; then\ 12 | pdfcrop $@;\ 13 | rm $@;\ 14 | mv $(basename $@)-crop.pdf $@;\ 15 | fi;\ 16 | else exit 1;\ 17 | fi 18 | 19 | clean: 20 | rm -f *.pdf *.log 21 | -------------------------------------------------------------------------------- /figures/fig-p/08-p-boxplot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | my_url = ('https://raw.githubusercontent.com/taroyabuki/' + 4 | 'fromzero/master/data/wine.csv') 5 | my_data = pd.read_csv(my_url) 6 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2'] 7 | 8 | X.boxplot(showmeans=True) 9 | 10 | import matplotlib.pyplot as plt 11 | plt.savefig('08-p-boxplot.pdf') 12 | -------------------------------------------------------------------------------- /figures/fig/図1-last.md: -------------------------------------------------------------------------------- 1 | ```puml 2 | @startuml 3 | scale 0.8 4 | skinparam { 5 | defaultFontName Hiragino Kaku Gothic ProN 6 | monochrome true 7 | shadowing false 8 | } 9 | 10 | (リファレンス) 11 | (本書)-->(プログラミング入門) 12 | (本書)-->(データサイエンス入門) 13 | (本書)-->(統計学) 14 | プログラミング入門-->(言語についての高度な話題) 15 | データサイエンス入門-->(データサイエンスの理論と実践) 16 | 統計学-->(統計学の実践) 17 | @enduml 18 | ``` 19 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-iris.py: -------------------------------------------------------------------------------- 1 | import statsmodels.api as sm 2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data 3 | 4 | my_df = iris.describe().transpose()[['mean', 'std']] 5 | my_df['se'] = my_df['std'] / len(iris)**0.5 6 | 7 | my_df.plot(y='mean', kind='bar', yerr='se', capsize=10) 8 | 9 | import matplotlib.pyplot as plt 10 | plt.tight_layout() 11 | plt.savefig('04-p-iris.pdf') 12 | -------------------------------------------------------------------------------- /figures/fig-r/07-r-tuning.R: -------------------------------------------------------------------------------- 1 | pdf(file = "07-r-tuning.pdf", width = 5.83, height = 4.13) 2 | 3 | set.seed(0) 4 | 5 | library(caret) 6 | library(tidyverse) 7 | my_data <- cars 8 | my_model <- train(form = dist ~ speed, data = my_data, method = "knn", 9 | tuneGrid = expand.grid(k = 1:15), 10 | trControl = trainControl(method = "LOOCV")) 11 | ggplot(my_model) 12 | -------------------------------------------------------------------------------- /figures/fig-r/13-r-hclust.R: -------------------------------------------------------------------------------- 1 | pdf(file = "13-r-hclust.pdf", width = 5.83, height = 4.13) 2 | 3 | my_data <- data.frame( 4 | x = c( 0, -16, 10, 10), 5 | y = c( 0, 0, 10, -15), 6 | row.names = c("A", "B", "C", "D")) 7 | 8 | my_dist <- dist(my_data) 9 | my_result <- hclust(my_dist) 10 | 11 | factoextra::fviz_dend( 12 | my_result, 13 | k = 3, 14 | rect = T, rect_fill = T) 15 | -------------------------------------------------------------------------------- /figures/fig-r/Makefile: -------------------------------------------------------------------------------- 1 | SRC=$(wildcard *.R) 2 | 3 | PDF=$(SRC:.R=.pdf) 4 | 5 | all: $(PDF) 6 | 7 | .SUFFIXES: .pdf .R 8 | 9 | .R.pdf: 10 | if Rscript $<; then\ 11 | if [ -f /usr/bin/pdfcrop ]; then\ 12 | pdfcrop $@;\ 13 | rm $@;\ 14 | mv $(basename $@)-crop.pdf $@;\ 15 | fi;\ 16 | else\ 17 | rm -f $@;\ 18 | exit 1;\ 19 | fi 20 | 21 | clean: 22 | rm -f *.pdf *.log 23 | -------------------------------------------------------------------------------- /addendum/07.03.02/confidence_band_r.R: -------------------------------------------------------------------------------- 1 | pdf(file = "confidence_band_r.pdf", width = 6, height = 4.5) 2 | 3 | library(boot) 4 | library(tidyverse) 5 | 6 | data <- read_csv("1+3x+N(0,2x).csv") 7 | x <- data$x 8 | y <- data$y 9 | n <- nrow(data) 10 | 11 | alpha <- 0.99 12 | data %>% ggplot(aes(x = x, y = y)) + 13 | geom_point() + 14 | stat_smooth(formula = y ~ x, method = "lm", level = alpha) 15 | -------------------------------------------------------------------------------- /docker/rstudio/README.md: -------------------------------------------------------------------------------- 1 | # RStudio用のコンテナ 2 | 3 | - Docker Hub: https://hub.docker.com/r/taroyabuki/rstudio 4 | - 起動方法(3種類) 5 | - [rstudio.sh](../rstudio.sh)を実行する. 6 | - `wget https://raw.githubusercontent.com/taroyabuki/rp/master/docker/rstudio.sh`の後で,`sh rstudio.sh` 7 | - `git clone https://github.com/taroyabuki/fromzero.git`の後で,`sh fromzero/docker/rstudio.sh` 8 | - RStudio Serverへのアクセス:http://localhost:8787 9 | -------------------------------------------------------------------------------- /figures/fig-r/13-r-kmeans.R: -------------------------------------------------------------------------------- 1 | pdf(file = "13-r-kmeans.pdf", width = 5.83, height = 4.13) 2 | 3 | library(tidyverse) 4 | library(factoextra) 5 | 6 | my_data <- iris[, -5] 7 | 8 | f <- 2:5 %>% map(function(k) { 9 | my_data %>% kmeans(k) %>% 10 | fviz_cluster(data = my_data, geom = "point") + 11 | ggtitle(sprintf("k = %s", k)) 12 | }) 13 | gridExtra::grid.arrange(f[[1]], f[[2]], f[[3]], f[[4]], ncol = 2) 14 | -------------------------------------------------------------------------------- /figures/fig-r/13-r-hclust2.R: -------------------------------------------------------------------------------- 1 | pdf(file = "13-r-hclust2.pdf", width = 5.83, height = 4.13) 2 | 3 | my_data <- data.frame( 4 | x = c( 0, -16, 10, 10), 5 | y = c( 0, 0, 10, -15), 6 | row.names = c("A", "B", "C", "D")) 7 | 8 | my_dist <- dist(my_data) 9 | my_result <- hclust(my_dist) 10 | 11 | factoextra::fviz_dend( 12 | my_result, 13 | k = 3, 14 | rect = T, rect_fill = T, 15 | type = "phylogenic") 16 | -------------------------------------------------------------------------------- /figures/fig-p/13-p-hclust.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | my_data = pd.DataFrame( 3 | {'x': [0, -16, 10, 10], 4 | 'y': [0, 0, 10, -15]}, 5 | index=['A', 'B', 'C', 'D']) 6 | 7 | from scipy.cluster import hierarchy 8 | my_result = hierarchy.linkage(my_data, metric='euclidean', method='complete') 9 | hierarchy.dendrogram(my_result, labels=my_data.index) 10 | 11 | import matplotlib.pyplot as plt 12 | plt.savefig('13-p-hclust.pdf') 13 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-mosaic.py: -------------------------------------------------------------------------------- 1 | import statsmodels.api as sm 2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data 3 | 4 | import pandas as pd 5 | from statsmodels.graphics.mosaicplot \ 6 | import mosaic 7 | 8 | my_df = pd.DataFrame({ 9 | 'Species': iris.Species, 10 | 'w_Sepal': iris['Sepal.Width'] > 3}) 11 | mosaic(my_df, 12 | index=['Species', 'w_Sepal']) 13 | 14 | import matplotlib.pyplot as plt 15 | plt.savefig('04-p-mosaic.pdf') 16 | -------------------------------------------------------------------------------- /figures/fig-p/07-p-regression.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import statsmodels.api as sm 3 | 4 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data 5 | ax = sns.regplot(x='speed', y='dist', data=my_data) 6 | ax.vlines(x=21.5, ymin=-5, ymax=67, linestyles='dotted') 7 | ax.hlines(y=67, xmin=4, xmax=21.5, linestyles='dotted') 8 | ax.set_xlim(4, 25) 9 | ax.set_ylim(-5, 125) 10 | 11 | import matplotlib.pyplot as plt 12 | plt.savefig('07-p-regression.pdf') 13 | -------------------------------------------------------------------------------- /data/exam.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "A", 4 | "english": 60, 5 | "math": 70, 6 | "gender": "f" 7 | }, 8 | { 9 | "name": "B", 10 | "english": 90, 11 | "math": 80, 12 | "gender": "m" 13 | }, 14 | { 15 | "name": "C", 16 | "english": 70, 17 | "math": 90, 18 | "gender": "m" 19 | }, 20 | { 21 | "name": "D", 22 | "english": 90, 23 | "math": 100, 24 | "gender": "f" 25 | } 26 | ] -------------------------------------------------------------------------------- /figures/fig-p/04-p-iris-group.py: -------------------------------------------------------------------------------- 1 | import statsmodels.api as sm 2 | iris = sm.datasets.get_rdataset('iris', 'datasets').data 3 | 4 | my_group = iris.groupby('Species') # 品種ごとに, 5 | my_df = my_group.agg('mean') # 各変数の,平均と 6 | my_se = my_group.agg(lambda x: x.std() / len(x)**0.5) # 標準誤差を求める. 7 | 8 | my_df.plot(kind='bar', yerr=my_se, capsize=5) 9 | 10 | import matplotlib.pyplot as plt 11 | plt.savefig('04-p-iris-group.pdf') 12 | -------------------------------------------------------------------------------- /figures/fig-p/13-p-elbow.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import statsmodels.api as sm 3 | from sklearn.cluster import KMeans 4 | 5 | iris = sm.datasets.get_rdataset('iris', 'datasets').data 6 | my_data = iris.iloc[:, 0:4] 7 | 8 | k = range(1, 11) 9 | my_df = pd.DataFrame({ 10 | 'k': k, 11 | 'inertia': [KMeans(k).fit(my_data).inertia_ for k in range(1, 11)]}) 12 | my_df.plot(x='k', style='o-', legend=False) 13 | 14 | import matplotlib.pyplot as plt 15 | plt.savefig('13-p-elbow.pdf') 16 | -------------------------------------------------------------------------------- /figures/fig-p/09-p-varimp.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import statsmodels.api as sm 3 | from sklearn.ensemble import RandomForestClassifier 4 | 5 | iris = sm.datasets.get_rdataset('iris', 'datasets').data 6 | X, y = iris.iloc[:, 0:4], iris.Species 7 | 8 | my_model = RandomForestClassifier().fit(X, y) 9 | tmp = pd.Series(my_model.feature_importances_, index=X.columns) 10 | tmp.sort_values().plot(kind='barh') 11 | 12 | import matplotlib.pyplot as plt 13 | plt.tight_layout() 14 | plt.savefig('09-p-varimp.pdf') 15 | -------------------------------------------------------------------------------- /figures/fig-r/07-r-residual.R: -------------------------------------------------------------------------------- 1 | pdf(file = "07-r-residual.pdf", width = 6, height = 4.5) 2 | 3 | library(caret) 4 | library(tidyverse) 5 | my_data <- cars 6 | my_model <- train(form = dist ~ speed, data = my_data, method = "lm") 7 | y_ <- my_model %>% predict(my_data) 8 | my_data$y_ <- y_ 9 | 10 | my_data %>% 11 | ggplot(aes(x = speed, y = dist)) + 12 | geom_point() + 13 | geom_line(aes(x = speed, y = y_)) + 14 | geom_linerange(mapping = aes(ymin = y_, ymax = dist), linetype = "dotted") 15 | -------------------------------------------------------------------------------- /figures/fig-r/03-r-temp.R: -------------------------------------------------------------------------------- 1 | pdf(file = "03-r-temp.pdf", width = 5.83, height = 4.13) 2 | 3 | library(tidyverse) 4 | 5 | my_wider <- data.frame( 6 | day = c(25, 26, 27), 7 | min = c(20, 21, 15), 8 | max = c(24, 27, 21)) 9 | 10 | my_longer <- my_wider %>% 11 | pivot_longer(-day) 12 | 13 | my_longer %>% 14 | ggplot(aes(x = day, y = value, 15 | color = name)) + 16 | geom_point() + 17 | geom_line() + 18 | ylab("temperature") + 19 | scale_x_continuous( 20 | breaks = my_longer$day) 21 | -------------------------------------------------------------------------------- /figures/fig-p/03-p-temp.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | 4 | my_df = pd.DataFrame({ 5 | 'day': [25, 26, 27], 6 | 'min': [20, 21, 15], 7 | 'max': [24, 27, 21]}) 8 | 9 | my_longer = my_df.melt(id_vars='day') 10 | 11 | my_wider = my_longer.pivot( 12 | index='day', 13 | columns='variable', 14 | values='value') 15 | 16 | my_wider.plot(style='o-', 17 | xticks=my_wider.index, 18 | ylabel='temperature') 19 | 20 | plt.savefig('03-p-temp.pdf') 21 | -------------------------------------------------------------------------------- /figures/fig-r/08-r-boxplot.R: -------------------------------------------------------------------------------- 1 | pdf(file = "08-r-boxplot.pdf", width = 6, height = 4.5) 2 | 3 | library(tidyverse) 4 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki", 5 | "/fromzero/master/data/wine.csv") 6 | my_data <- read_csv(my_url) 7 | 8 | my_data %>% 9 | pivot_longer(-LPRICE2) %>% 10 | ggplot(aes(x = factor(name, levels = names(my_data[, -1])), 11 | y = value)) + 12 | geom_boxplot() + 13 | stat_summary(fun = mean, geom = "point", size = 3) + 14 | xlab(NULL) 15 | -------------------------------------------------------------------------------- /figures/fig-p/08-p-boxplot-scaled.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import StandardScaler 3 | 4 | my_url = ('https://raw.githubusercontent.com/taroyabuki/' + 5 | 'fromzero/master/data/wine.csv') 6 | my_data = pd.read_csv(my_url) 7 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2'] 8 | 9 | pd.DataFrame(StandardScaler().fit_transform(X), 10 | columns=X.columns).boxplot(showmeans=True) 11 | 12 | import matplotlib.pyplot as plt 13 | plt.savefig('08-p-boxplot-scaled.pdf') 14 | -------------------------------------------------------------------------------- /figures/fig-r/10-r-pr.R: -------------------------------------------------------------------------------- 1 | pdf(file = "10-r-pr.pdf", width = 6, height = 5) 2 | 3 | library(PRROC) 4 | library(tidyverse) 5 | 6 | y <- c( 0, 1, 1, 0, 1, 0, 1, 0, 0, 1) 7 | y_score <- c(0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5) 8 | 9 | my_pr <- pr.curve(scores.class0 = y_score[y == 1], 10 | scores.class1 = y_score[y == 0], 11 | curve = TRUE) 12 | my_pr %>% plot(xlab = "Recall", 13 | ylab = "Precision", 14 | legend = FALSE) 15 | -------------------------------------------------------------------------------- /figures/fig-p/10-p-roc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | y = np.array([ 0, 1, 1, 0, 1, 0, 1, 0, 0, 1]) 3 | y_score = np.array([0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5]) 4 | 5 | from sklearn.metrics import roc_curve, RocCurveDisplay 6 | 7 | my_fpr, my_tpr, _ = roc_curve(y_true=y, 8 | y_score=y_score, 9 | pos_label=1) 10 | RocCurveDisplay(fpr=my_fpr, tpr=my_tpr).plot() 11 | 12 | import matplotlib.pyplot as plt 13 | plt.savefig('10-p-roc.pdf') 14 | -------------------------------------------------------------------------------- /figures/fig-r/07-r-regression.R: -------------------------------------------------------------------------------- 1 | pdf(file = "07-r-regression.pdf", width = 6, height = 4.5) 2 | 3 | library(tidyverse) 4 | 5 | my_data <- cars 6 | tmp <- data.frame(speed = 21.5, dist = 67) 7 | my_data %>% ggplot(aes(x = speed, y = dist)) + 8 | coord_cartesian(xlim = c(4, 25), ylim = c(0, 120)) + 9 | geom_point() + 10 | stat_smooth(formula = y ~ x, method = "lm") + 11 | geom_linerange(data = tmp, aes(ymin = -9, ymax = dist), linetype = "dotted") + 12 | geom_linerange(data = tmp, aes(xmin = 0, xmax = speed), linetype = "dotted") 13 | -------------------------------------------------------------------------------- /figures/fig-r/10-r-roc.R: -------------------------------------------------------------------------------- 1 | pdf(file = "10-r-roc.pdf", width = 6, height = 5) 2 | 3 | library(PRROC) 4 | library(tidyverse) 5 | 6 | y <- c( 0, 1, 1, 0, 1, 0, 1, 0, 0, 1) 7 | y_score <- c(0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5) 8 | 9 | my_roc <- roc.curve(scores.class0 = y_score[y == 1], 10 | scores.class1 = y_score[y == 0], 11 | curve = TRUE) 12 | my_roc %>% plot(xlab = "False Positive Rate", 13 | ylab = "True Positive Rate", 14 | legend = FALSE) 15 | -------------------------------------------------------------------------------- /figures/fig-p/13-p-heatmap.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import seaborn as sns 3 | 4 | my_data = pd.DataFrame( 5 | {'language': [ 0, 20, 20, 25, 22, 17], 6 | 'english': [ 0, 20, 40, 20, 24, 18], 7 | 'math': [100, 20, 5, 30, 17, 25], 8 | 'science': [ 0, 20, 5, 25, 16, 23], 9 | 'society': [ 0, 20, 30, 0, 21, 17]}, 10 | index= ['A', 'B', 'C', 'D', 'E', 'F']) 11 | 12 | sns.clustermap(my_data, z_score=1) # 列ごとの標準化 13 | 14 | import matplotlib.pyplot as plt 15 | plt.savefig('13-p-heatmap.pdf') 16 | -------------------------------------------------------------------------------- /figures/fig-r/13-r-heatmap.R: -------------------------------------------------------------------------------- 1 | pdf(file = "13-r-heatmap.pdf", width = 6, height = 5.5) 2 | 3 | library(tidyverse) 4 | 5 | my_data <- data.frame( 6 | language = c( 0, 20, 20, 25, 22, 17), 7 | english = c( 0, 20, 40, 20, 24, 18), 8 | math = c(100, 20, 5, 30, 17, 25), 9 | science = c( 0, 20, 5, 25, 16, 23), 10 | society = c( 0, 20, 30, 0, 21, 17), 11 | row.names = c("A", "B", "C", "D", "E", "F")) 12 | 13 | my_data %>% scale %>% # 列ごとの標準化 14 | gplots::heatmap.2(cexRow = 1, cexCol = 1) # ラベルのサイズを指定して描画する. 15 | -------------------------------------------------------------------------------- /figures/fig-r/10-r-rpart3.R: -------------------------------------------------------------------------------- 1 | pdf(file = "10-r-rpart3.pdf", width = 6, height = 5.5) 2 | 3 | library(caret) 4 | library(tidyverse) 5 | 6 | my_url <- str_c("https://raw.githubusercontent.com", 7 | "/taroyabuki/fromzero/master/data/titanic.csv") 8 | my_data <- read_csv(my_url) 9 | 10 | my_model3 <- train(form = Survived ~ Class, data = my_data, method = "rpart2", 11 | tuneGrid = data.frame(maxdepth = 2), 12 | trControl = trainControl(method = "LOOCV")) 13 | rpart.plot::rpart.plot(my_model3$finalModel, extra = 1) 14 | -------------------------------------------------------------------------------- /figures/fig-r/10-r-titanic-tree.R: -------------------------------------------------------------------------------- 1 | pdf(file = "10-r-titanic-tree.pdf", width = 6, height = 5) 2 | 3 | library(caret) 4 | library(tidyverse) 5 | 6 | my_url <- str_c("https://raw.githubusercontent.com", 7 | "/taroyabuki/fromzero/master/data/titanic.csv") 8 | my_data <- read_csv(my_url) 9 | 10 | my_model <- train(form = Survived ~ ., data = my_data, method = "rpart2", 11 | tuneGrid = data.frame(maxdepth = 2), 12 | trControl = trainControl(method = "none")) 13 | rpart.plot::rpart.plot(my_model$finalModel, extra = 1) 14 | -------------------------------------------------------------------------------- /figures/fig-r/13-r-biplot.R: -------------------------------------------------------------------------------- 1 | pdf(file = "13-r-biplot.pdf", width = 5.83, height = 4.13) 2 | 3 | library(tidyverse) 4 | 5 | my_data <- data.frame( 6 | language = c( 0, 20, 20, 25, 22, 17), 7 | english = c( 0, 20, 40, 20, 24, 18), 8 | math = c(100, 20, 5, 30, 17, 25), 9 | science = c( 0, 20, 5, 25, 16, 23), 10 | society = c( 0, 20, 30, 0, 21, 17)) 11 | row.names(my_data) <- c("A", "B", "C", "D", "E", "F") 12 | 13 | my_result <- my_data %>% prcomp # 主成分分析(標準化なし) 14 | 15 | my_result %>% ggbiplot::ggbiplot(labels = row.names(my_data), scale = 0) 16 | -------------------------------------------------------------------------------- /docker/jupyter/README.md: -------------------------------------------------------------------------------- 1 | # Jupyter Notebook用のコンテナ 2 | 3 | - Docker Hub: https://hub.docker.com/r/taroyabuki/jupyter 4 | - 起動方法(3種類) 5 | - [rstudio.sh](../rstudio.sh)を実行する. 6 | - `wget https://raw.githubusercontent.com/taroyabuki/rp/master/docker/jupyter.sh`の後で,`sh jupyter.sh` 7 | - `git clone https://github.com/taroyabuki/fromzero.git`の後で,`sh fromzero/docker/jupyter.sh` 8 | - Jupyter Notebookへのアクセス:http://localhost:8888 9 | - Apple Chipについての注意 10 | - Docker desktop 4.4.2で動作を確認しました.(4.1から4.3では動作しませんでした.) 11 | - 11章のコードは実行できません.11章を読む際には,Google Colabを使ってください. 12 | -------------------------------------------------------------------------------- /figures/fig-r/07-r-lm.R: -------------------------------------------------------------------------------- 1 | pdf(file = "07-r-lm.pdf", width = 6, height = 4.5) 2 | 3 | library(caret) 4 | library(tidyverse) 5 | my_data <- cars 6 | 7 | my_model <- train(form = dist ~ speed, # モデル式 8 | data = my_data, # データ 9 | method = "lm") # 手法 10 | 11 | f <- function(x) { my_model %>% predict(data.frame(speed = x)) } 12 | 13 | my_data %>% 14 | ggplot(aes(x = speed, 15 | y = dist, 16 | color = "data")) + 17 | geom_point() + 18 | stat_function( 19 | fun = f, 20 | mapping = aes(color = "model")) 21 | -------------------------------------------------------------------------------- /figures/fig-r/08-r-boxplot-scaled.R: -------------------------------------------------------------------------------- 1 | pdf(file = "08-r-boxplot-scaled.pdf", width = 6, height = 4.5) 2 | 3 | library(tidyverse) 4 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki", 5 | "/fromzero/master/data/wine.csv") 6 | my_data <- read_csv(my_url) 7 | 8 | my_data %>% 9 | mutate_if(is.numeric, scale) %>% # 数値の列の標準化 10 | pivot_longer(-LPRICE2) %>% 11 | ggplot(aes(x = factor(name, levels = names(my_data[, -1])), 12 | y = value)) + 13 | geom_boxplot() + 14 | stat_summary(fun = mean, geom = "point", size = 3) + 15 | xlab(NULL) 16 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-conf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from statsmodels.stats.proportion import binom_test 4 | 5 | a = 0.05 # 有意水準 6 | tmp = np.linspace(0, 1, 100) 7 | 8 | my_df = pd.DataFrame({ 9 | 't': tmp, # 当たる確率 10 | 'q': a, # 水平線 11 | 'p': [binom_test(count=2, nobs=15, prop=t) for t in tmp]}) # p値 12 | 13 | my_df.plot(x='t', legend=None, xlabel=r'$\theta$', ylabel=r'p-value') 14 | 15 | import matplotlib.pyplot as plt 16 | plt.savefig('04-p-conf.pdf') 17 | -------------------------------------------------------------------------------- /figures/fig-p/13-p-biplot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | my_data = pd.DataFrame( 3 | {'language': ( 0, 20, 20, 25, 22, 17), 4 | 'english': ( 0, 20, 40, 20, 24, 18), 5 | 'math': (100, 20, 5, 30, 17, 25), 6 | 'science': ( 0, 20, 5, 25, 16, 23), 7 | 'society': ( 0, 20, 30, 0, 21, 17)}, 8 | index=['A', 'B', 'C', 'D', 'E', 'F']) 9 | 10 | from pca import pca 11 | my_model = pca(n_components=5) 12 | my_result = my_model.fit_transform(my_data) 13 | 14 | my_model.biplot(legend=False) # バイプロット 15 | 16 | import matplotlib.pyplot as plt 17 | plt.savefig('13-p-biplot.pdf') 18 | -------------------------------------------------------------------------------- /figures/fig-r/09-r-rpart2.R: -------------------------------------------------------------------------------- 1 | pdf(file = "09-r-rpart2.pdf", width = 6, height = 5.5) 2 | 3 | library(caret) 4 | my_data <- iris 5 | 6 | my_model <- train(form = Species ~ ., data = my_data, method = "rpart2", 7 | trControl = trainControl(method = "none"), 8 | tuneGrid = data.frame(maxdepth = 3), 9 | control = rpart::rpart.control(cp = 0.01, 10 | minbucket = 5, 11 | minsplit = 2)) 12 | 13 | rpart.plot::rpart.plot( 14 | my_model$finalModel, extra = 1) 15 | -------------------------------------------------------------------------------- /figures/fig-p/07-p-residual.py: -------------------------------------------------------------------------------- 1 | import statsmodels.api as sm 2 | from sklearn.linear_model import LinearRegression 3 | 4 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data 5 | X, y = my_data[['speed']], my_data['dist'] 6 | 7 | my_model = LinearRegression() 8 | my_model.fit(X, y) 9 | y_ = my_model.predict(X) 10 | my_data['y_'] = y_ 11 | 12 | ax = my_data.plot(x='speed', y='dist', style='o', legend=False) 13 | my_data.plot(x='speed', y='y_', style='-', legend=False, ax=ax) 14 | ax.vlines(x=X, ymin=y, ymax=y_, linestyles='dotted') 15 | 16 | import matplotlib.pyplot as plt 17 | plt.savefig('07-p-residual.pdf') 18 | -------------------------------------------------------------------------------- /data/exam.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | experiment results 4 | 5 | 6 | f 7 | A 8 | 9 | 10 | m 11 | B 12 | 13 | 14 | m 15 | C 16 | 17 | 18 | f 19 | D 20 | 21 | 22 | -------------------------------------------------------------------------------- /figures/fig-r/07-r-knn.R: -------------------------------------------------------------------------------- 1 | pdf(file = "07-r-knn.pdf", width = 6, height = 4.5) 2 | 3 | library(caret) 4 | library(tidyverse) 5 | my_data <- cars 6 | my_model <- train(form = dist ~ speed, # モデル式 7 | data = my_data, # データ 8 | method = "knn", # 手法 9 | tuneGrid = data.frame(k = 5)) 10 | 11 | f <- function(x) { my_model %>% predict(data.frame(speed = x)) } 12 | 13 | my_data %>% 14 | ggplot(aes(x = speed, 15 | y = dist, 16 | color = "data")) + 17 | geom_point() + 18 | stat_function( 19 | fun = f, 20 | mapping = aes(color = "model")) 21 | -------------------------------------------------------------------------------- /figures/fig-r/08-r-enet-path.R: -------------------------------------------------------------------------------- 1 | pdf(file = "08-r-enet-path.pdf", width = 6, height = 4.5) 2 | 3 | library(tidyverse) 4 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki", 5 | "/fromzero/master/data/wine.csv") 6 | my_data <- read_csv(my_url) 7 | 8 | library(ggfortify) 9 | library(glmnetUtils) 10 | 11 | my_data2 <- my_data %>% 12 | mutate_all(scale) # 標準化 13 | 14 | B <- 0.1 15 | 16 | glmnet( 17 | form = LPRICE2 ~ ., 18 | data = my_data2, 19 | alpha = B) %>% 20 | autoplot(xvar = "lambda") + 21 | xlab("log A ( = log lambda)") + 22 | theme(legend.position = 23 | c(0.15, 0.25)) 24 | -------------------------------------------------------------------------------- /figures/fig-r/13-r-pca-clusters.R: -------------------------------------------------------------------------------- 1 | pdf(file = "13-r-pca-clusters.pdf", width = 6, height = 4.5) 2 | 3 | library(tidyverse) 4 | my_data <- iris[, -5] %>% scale 5 | 6 | my_result <- prcomp(my_data)$x %>% as.data.frame # 主成分分析 7 | 8 | # 非階層的クラスタ分析の場合 9 | my_result$cluster <- (my_data %>% scale %>% kmeans(3))$cluster %>% as.factor 10 | 11 | # 階層的クラスタ分析の場合 12 | #my_result$cluster <- my_data %>% dist %>% hclust %>% cutree(3) %>% as.factor 13 | 14 | my_result %>% 15 | ggplot(aes(x = PC1, y = PC2, color = cluster)) + # 色でクラスタを表現する. 16 | geom_point(shape = iris$Species) + # 形で品種を表現する. 17 | theme(legend.position = "none") 18 | -------------------------------------------------------------------------------- /figures/fig-p/10-p-pr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | y = np.array([ 0, 1, 1, 0, 1, 0, 1, 0, 0, 1]) 3 | y_score = np.array([0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5]) 4 | 5 | from sklearn.metrics import precision_recall_curve, PrecisionRecallDisplay 6 | 7 | my_precision, my_recall, _ = precision_recall_curve(y_true=y, 8 | probas_pred=y_score, 9 | pos_label=1) 10 | PrecisionRecallDisplay(precision=my_precision, recall=my_recall).plot() 11 | 12 | import matplotlib.pyplot as plt 13 | plt.savefig('10-p-pr.pdf') 14 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-conf.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-conf.pdf", width = 6, height = 5.5) 2 | 3 | library(exactci) 4 | 5 | a <- 0.05 # 有意水準 6 | binom.exact(x = 2, # 当たった回数 7 | n = 15, # くじを引いた回数 8 | p = 4 / 10, # 当たる確率(仮説) 9 | plot = TRUE, # p値の描画 10 | conf.level = 1 - a, # 信頼係数(デフォルト) 11 | tsmethod = "minlike", # 両側p値の使用 12 | alternative = "two.sided") # 両側検定(デフォルト) 13 | # 左片側検定なら'less' 14 | # 右片側検定なら'greater' 15 | -------------------------------------------------------------------------------- /figures/fig-r/07-r-boxplot.R: -------------------------------------------------------------------------------- 1 | pdf(file = "07-r-boxplot.pdf", width = 6, height = 5.5) 2 | 3 | library(caret) 4 | my_data <- cars 5 | 6 | my_lm_model <- train(form = dist ~ speed, data = my_data, method = "lm", 7 | trControl = trainControl(method = "LOOCV")) 8 | 9 | my_knn_model <- train(form = dist ~ speed, data = my_data, method = "knn", 10 | tuneGrid = data.frame(k = 5), 11 | trControl = trainControl(method = "LOOCV")) 12 | y <- my_data$dist 13 | 14 | my_df <- data.frame( 15 | lm = (y - my_lm_model$pred$pred)^2, 16 | knn = (y - my_knn_model$pred$pred)^2) 17 | 18 | boxplot(my_df, ylab = "r^2") 19 | -------------------------------------------------------------------------------- /figures/fig-p/09-p-rpart.py: -------------------------------------------------------------------------------- 1 | import graphviz 2 | import statsmodels.api as sm 3 | from sklearn import tree 4 | 5 | my_data = sm.datasets.get_rdataset('iris', 'datasets').data 6 | X, y = my_data.iloc[:, 0:4], my_data.Species 7 | 8 | my_model = tree.DecisionTreeClassifier(max_depth=2, random_state=0) 9 | my_model.fit(X, y) 10 | 11 | my_dot = tree.export_graphviz(decision_tree=my_model, 12 | out_file=None, 13 | feature_names=X.columns, 14 | class_names=my_model.classes_, 15 | filled=True) 16 | my_graph = graphviz.Source(my_dot) 17 | my_graph.render('09-p-rpart') 18 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-iris-group.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-iris-group.pdf", width = 6, height = 5) 2 | 3 | library(tidyverse) 4 | my_group <- iris %>% group_by(Species) 5 | 6 | my_df <- my_group %>% 7 | summarize(across(everything(), mean)) %>% # 各列の平均 8 | pivot_longer(-Species) 9 | 10 | # 標準誤差を求める関数 11 | f <- function(x) { sd(x) / length(x)**0.5 } 12 | 13 | tmp <- my_group %>% 14 | summarize(across(everything(), f)) %>% # 各列の標準誤差 15 | pivot_longer(-Species) 16 | 17 | my_df$se <- tmp$value 18 | my_df %>% 19 | ggplot(aes(x = Species, y = value, fill = name)) + 20 | geom_col(position = "dodge") + 21 | geom_errorbar(aes(ymin = value - se, ymax = value + se), position = "dodge") 22 | -------------------------------------------------------------------------------- /figures/fig-r/10-r-rpart1.R: -------------------------------------------------------------------------------- 1 | pdf(file = "10-r-rpart1.pdf", width = 6, height = 5.5) 2 | 3 | library(caret) 4 | library(tidyverse) 5 | 6 | my_url <- str_c("https://raw.githubusercontent.com", 7 | "/taroyabuki/fromzero/master/data/titanic.csv") 8 | my_data <- read_csv(my_url) 9 | 10 | X <- my_data %>% select(Class) 11 | y <- my_data$Survived 12 | 13 | options(warn = -1) # 警告を非表示にする.(tribbleに関する警告) 14 | my_model1 <- train(x = X, y = y, method = "rpart2", 15 | tuneGrid = data.frame(maxdepth = 2), 16 | trControl = trainControl(method = "LOOCV")) 17 | options(warn = 0) # 警告を表示する. 18 | rpart.plot::rpart.plot(my_model1$finalModel, extra = 1) 19 | -------------------------------------------------------------------------------- /figures/fig-p/12-p-airpassengers-split.py: -------------------------------------------------------------------------------- 1 | from pmdarima.datasets import airpassengers 2 | my_data = airpassengers.load_airpassengers() 3 | 4 | n = len(my_data) 5 | k = 108 6 | 7 | import pandas as pd 8 | my_ds = pd.date_range( 9 | start='1949/01/01', 10 | end='1960/12/01', 11 | freq='MS') 12 | my_df = pd.DataFrame({ 13 | 'ds': my_ds, 14 | 'x': range(n), 15 | 'y': my_data}, 16 | index=my_ds) 17 | 18 | my_train = my_df[ :k] 19 | my_test = my_df[-(n - k): ] 20 | y = my_test.y 21 | 22 | import matplotlib.pyplot as plt 23 | plt.plot(my_train.y, label='train') 24 | plt.plot(my_test.y, label='test') 25 | plt.legend() 26 | plt.savefig('12-p-airpassengers-split.pdf') 27 | -------------------------------------------------------------------------------- /figures/fig-r/11-r-h2o-wine.R: -------------------------------------------------------------------------------- 1 | pdf(file = "11-r-h2o-wine.pdf", width = 6, height = 5.5) 2 | 3 | library(h2o) 4 | library(tidyverse) 5 | 6 | h2o.init() 7 | h2o.no_progress() 8 | 9 | my_url <- str_c("https://raw.githubusercontent.com", 10 | "/taroyabuki/fromzero/master/data/wine.csv") 11 | my_data <- read_csv(my_url) 12 | my_frame <- as.h2o(my_data) 13 | 14 | my_model <- h2o.automl( 15 | y = "LPRICE2", # 出力変数名 16 | training_frame = my_frame, # H2OFrame 17 | max_runtime_secs = 60) # 訓練時間(秒) 18 | 19 | min(my_model@leaderboard$rmse) 20 | 21 | tmp <- my_model %>% predict(my_frame) %>% 22 | as.data.frame 23 | y_ <- tmp$predict 24 | y <- my_data$LPRICE2 25 | 26 | plot(y, y_) 27 | -------------------------------------------------------------------------------- /figures/fig-r/12-r-airpassengers-split.R: -------------------------------------------------------------------------------- 1 | pdf(file = "12-r-airpassengers-split.pdf", width = 5.83, height = 4.13) 2 | 3 | my_data <- as.vector(AirPassengers) 4 | 5 | n <- length(my_data) # データ数(144) 6 | k <- 108 # 訓練データ数 7 | 8 | library(tidyverse) 9 | library(tsibble) 10 | 11 | my_ds <- seq( 12 | from = yearmonth("1949/01"), 13 | to = yearmonth("1960/12"), 14 | by = 1) 15 | my_label <- rep( 16 | c("train", "test"), 17 | c(k, n - k)) 18 | my_df <- tsibble( 19 | ds = my_ds, 20 | x = 0:(n - 1), 21 | y = my_data, 22 | label = my_label, 23 | index = ds) # 日時の列の指定 24 | 25 | my_plot <- my_df %>% 26 | ggplot(aes(x = ds, y = y, color = label)) + 27 | geom_line() 28 | my_plot 29 | -------------------------------------------------------------------------------- /figures/fig-r/07-r-polynomial.R: -------------------------------------------------------------------------------- 1 | pdf(file = "07-r-polynomial.pdf", width = 6, height = 4.5) 2 | 3 | library(caret) 4 | library(tidyverse) 5 | my_data <- cars 6 | my_idx <- c(2, 11, 27, 34, 39, 44) 7 | my_sample <- my_data[my_idx, ] 8 | 9 | my_model <- train(form = dist ~ poly(speed, degree = 5, raw = TRUE), 10 | data = my_sample, 11 | method = "lm") 12 | 13 | f <- function(x) { my_model %>% predict(data.frame(speed = x)) } 14 | 15 | my_data %>% 16 | ggplot(aes(x = speed, y = dist, color = "data")) + 17 | geom_point() + 18 | geom_point(data = my_sample, mapping = aes(color = "sample")) + 19 | stat_function(fun = f, mapping = aes(color = "model")) + 20 | coord_cartesian(ylim = c(0, 120)) 21 | -------------------------------------------------------------------------------- /figures/fig-p/07-p-knn.py: -------------------------------------------------------------------------------- 1 | # 準備 2 | import statsmodels.api as sm 3 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data 4 | X, y = my_data[['speed']], my_data['dist'] 5 | 6 | # 訓練 7 | from sklearn.neighbors import KNeighborsRegressor 8 | my_model = KNeighborsRegressor() 9 | my_model.fit(X, y) 10 | 11 | # 可視化の準備 12 | import numpy as np 13 | import pandas as pd 14 | tmp = pd.DataFrame({'speed': np.linspace(min(my_data.speed), 15 | max(my_data.speed), 16 | num=100)}) 17 | tmp['model'] = my_model.predict(tmp) 18 | 19 | pd.concat([my_data, tmp]).plot( 20 | x='speed', style=['o', '-']) 21 | import matplotlib.pyplot as plt 22 | plt.savefig('07-p-knn.pdf') 23 | -------------------------------------------------------------------------------- /figures/fig-r/04-r-pvalue1.R: -------------------------------------------------------------------------------- 1 | pdf(file = "04-r-pvalue1.pdf", width = 5.83, height = 4.13) 2 | 3 | library(tidyverse) 4 | 5 | t <- 4 / 10 # 当たる確率 6 | n <- 15 # くじを引いた回数 7 | x <- 0:n # 当たった回数 8 | my_pr <- dbinom(x, n, t) # x回当たる確率 9 | my_pr2 <- dbinom(2, n, t) # 2回当たる確率 10 | 11 | my_data <- data.frame(x = x) %>% 12 | mutate(probability = my_pr) %>% 13 | mutate(color = my_pr <= my_pr2) # 当たる確率が,2回当たる確率以下 14 | 15 | my_data %>% ggplot(aes(x = x, y = probability, color = color)) + 16 | geom_point(size = 3) + 17 | geom_linerange(aes(ymin = 0, ymax = probability), ) + # 垂直線 18 | geom_hline(yintercept = my_pr2) + # 水平線 19 | theme(legend.position = "none") 20 | -------------------------------------------------------------------------------- /figures/fig-r/10-r-rpart2.R: -------------------------------------------------------------------------------- 1 | pdf(file = "10-r-rpart2.pdf", width = 6, height = 5.5) 2 | 3 | library(caret) 4 | library(tidyverse) 5 | 6 | my_url <- str_c("https://raw.githubusercontent.com", 7 | "/taroyabuki/fromzero/master/data/titanic.csv") 8 | my_data <- read_csv(my_url) 9 | 10 | my_enc <- my_data %>% dummyVars(formula = Survived ~ Class) 11 | my_data2 <- my_enc %>% 12 | predict(my_data) %>% 13 | as.data.frame %>% 14 | mutate(Survived = my_data$Survived) 15 | 16 | my_model2 <- train(form = Survived ~ ., data = my_data2, method = "rpart2", 17 | tuneGrid = data.frame(maxdepth = 2), 18 | trControl = trainControl(method = "LOOCV")) 19 | rpart.plot::rpart.plot(my_model2$finalModel, extra = 1) 20 | -------------------------------------------------------------------------------- /figures/fig-p/07-p-lm.py: -------------------------------------------------------------------------------- 1 | # データの準備 2 | import statsmodels.api as sm 3 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data 4 | X, y = my_data[['speed']], my_data['dist'] 5 | 6 | # モデルの指定 7 | from sklearn.linear_model import LinearRegression 8 | my_model = LinearRegression() 9 | 10 | # モデルをデータにフィットさせる. 11 | my_model.fit(X, y) 12 | 13 | import numpy as np 14 | import pandas as pd 15 | tmp = pd.DataFrame({'speed': np.linspace(min(my_data.speed), 16 | max(my_data.speed), 17 | 100)}) 18 | tmp['model'] = my_model.predict(tmp) 19 | 20 | pd.concat([my_data, tmp]).plot( 21 | x='speed', style=['o', '-']) 22 | 23 | import matplotlib.pyplot as plt 24 | plt.savefig('07-p-lm.pdf') 25 | -------------------------------------------------------------------------------- /figures/fig-p/11-p-h2o-wine.py: -------------------------------------------------------------------------------- 1 | import h2o 2 | import pandas as pd 3 | from h2o.automl import H2OAutoML 4 | 5 | h2o.init() 6 | h2o.no_progress() 7 | 8 | my_url = ('https://raw.githubusercontent.com' 9 | '/taroyabuki/fromzero/master/data/wine.csv') 10 | my_data = pd.read_csv(my_url) 11 | my_frame = h2o.H2OFrame(my_data) 12 | 13 | my_model = H2OAutoML( 14 | max_runtime_secs=60) 15 | my_model.train( 16 | y='LPRICE2', 17 | training_frame=my_frame) 18 | 19 | print(my_model.leaderboard['rmse'].min()) 20 | 21 | tmp = h2o.as_list( 22 | my_model.predict(my_frame)) 23 | 24 | pd.DataFrame({ 25 | 'y': my_data['LPRICE2'], 26 | 'y_': tmp['predict']} 27 | ).plot('y', 'y_', kind='scatter') 28 | 29 | import matplotlib.pyplot as plt 30 | plt.savefig('11-p-h2o-wine.pdf') 31 | -------------------------------------------------------------------------------- /figures/fig-p/04-p-pvalue1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from scipy import stats 4 | 5 | t = 4 / 10 # 当たる確率 6 | n = 15 # くじを引いた回数 7 | x = np.array(range(0, n + 1)) # 当たった回数 8 | my_pr = stats.binom.pmf(x, n, t) # x回当たる確率 9 | my_pr2 = stats.binom.pmf(2, n, t) # 2回当たる確率 10 | 11 | my_data = pd.DataFrame({'x': x, 'y1': my_pr, 'y2': my_pr}) 12 | my_data.loc[my_pr > my_pr2, 'y1'] = np.nan # 当たる確率が,2回当たる確率超過 13 | my_data.loc[my_pr <= my_pr2, 'y2'] = np.nan # 当たる確率が,2回当たる確率以下 14 | ax = my_data.plot(x='x', style='o', ylabel='probability', legend=False) 15 | ax.hlines(y=my_pr2, xmin=0, xmax=15) # 水平線 16 | ax.vlines(x=x, ymin=0, ymax=my_pr) # 垂直線 17 | 18 | import matplotlib.pyplot as plt 19 | plt.savefig('04-p-pvalue1.pdf') 20 | -------------------------------------------------------------------------------- /figures/fig-p/08-p-enet-path.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | my_url = ('https://raw.githubusercontent.com/taroyabuki/' + 3 | 'fromzero/master/data/wine.csv') 4 | my_data = pd.read_csv(my_url) 5 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2'] 6 | 7 | import numpy as np 8 | from scipy.stats import zscore 9 | from sklearn.linear_model import enet_path 10 | 11 | As = np.e**np.arange(2, -5.5, -0.1) 12 | B = 0.1 13 | 14 | _, my_path, _ = enet_path( 15 | zscore(X), zscore(y), 16 | alphas=As, 17 | l1_ratio=B) 18 | 19 | pd.DataFrame( 20 | my_path.T, 21 | columns=X.columns, 22 | index=np.log(As) 23 | ).plot( 24 | xlabel='log A ( = log alpha)', 25 | ylabel='Coefficients') 26 | 27 | import matplotlib.pyplot as plt 28 | plt.savefig('08-p-enet-path.pdf') 29 | -------------------------------------------------------------------------------- /figures/fig-r/08-r-nnet-3-2.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki", 3 | "/fromzero/master/data/wine.csv") 4 | my_data <- read_csv(my_url) 5 | 6 | library(caret) 7 | my_model <- train(form = LPRICE2 ~ ., 8 | data = my_data, 9 | method = "neuralnet", 10 | preProcess = c("center", "scale"), 11 | tuneGrid = data.frame(layer1 = 3, 12 | layer2 = 2, 13 | layer3 = 0), 14 | trControl = trainControl(method = "repeatedcv", 15 | number = 5, repeats = 10)) 16 | plot(my_model$finalModel) 17 | file.rename("Rplots.pdf", "08-r-nnet-3-2.pdf") 18 | -------------------------------------------------------------------------------- /addendum/07.03.02/1+3x+N(0,2x).csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 1,2.4362828056041783 3 | 2,13.320701642205943 4 | 3,6.254185478549559 5 | 4,12.158111887716473 6 | 5,-7.54294281288999 7 | 6,20.03682705412517 8 | 7,-20.367878122873076 9 | 8,37.62187087244209 10 | 9,28.888791768212027 11 | 10,23.865697903729448 12 | 11,20.35783532598032 13 | 12,24.90149878334255 14 | 13,40.93320462020407 15 | 14,83.5879864420934 16 | 15,45.15357518820319 17 | 16,8.733098913685623 18 | 17,82.25121873688809 19 | 18,64.23168654178178 20 | 19,58.72577858048793 21 | 20,12.867497576908818 22 | 21,52.88624028231115 23 | 22,154.2860167538335 24 | 23,176.96876331325072 25 | 24,158.57606765038622 26 | 25,59.5796921671421 27 | 26,26.85539442891543 28 | 27,73.23288430129338 29 | 28,51.56442153204847 30 | 29,49.82876803737508 31 | 30,148.41719344129336 32 | -------------------------------------------------------------------------------- /figures/fig/図3.1.md: -------------------------------------------------------------------------------- 1 | ```puml 2 | @startuml 3 | skinparam { 4 | defaultFontName Hiragino Kaku Gothic ProN 5 | monochrome true 6 | shadowing false 7 | } 8 | 9 | package R { 10 | rectangle y as y1 #white 11 | rectangle x as x1 #white 12 | rectangle list1a #white;line:white as " 13 | | foo | bar | baz |" 14 | rectangle list1b #white;line:white as " 15 | | foo | bar | baz |" 16 | 17 | x1 --> list1a 18 | y1 --> list1b 19 | x1 -[dotted]> y1 : "y <- x" 20 | list1a -[dotted]> list1b: copy 21 | } 22 | 23 | package Python { 24 | rectangle y as y2 #white 25 | rectangle x as x2 #white 26 | rectangle list2 #white;line:white as " 27 | | foo | bar | baz |" 28 | 29 | x2 --> list2 30 | y2 --> list2 31 | x2 -[dotted]> y2 : "y = x" 32 | } 33 | @enduml 34 | ``` 35 | -------------------------------------------------------------------------------- /figures/fig-r/08-r-enet-tuning.R: -------------------------------------------------------------------------------- 1 | pdf(file = "08-r-enet-tuning.pdf", width = 6, height = 4.5) 2 | 3 | library(caret) 4 | library(tidyverse) 5 | my_url <- str_c("https://raw.githubusercontent.com/taroyabuki", 6 | "/fromzero/master/data/wine.csv") 7 | my_data <- read_csv(my_url) 8 | 9 | As <- seq(0, 0.1, length.out = 21) 10 | Bs <- seq(0, 0.1, length.out = 6) 11 | 12 | my_model <- train( 13 | form = LPRICE2 ~ ., data = my_data, method = "glmnet", standardize = TRUE, 14 | trControl = trainControl(method = "LOOCV"), 15 | tuneGrid = expand.grid(lambda = As, alpha = Bs)) 16 | 17 | tmp <- "B ( = alpha)" 18 | ggplot(my_model) + 19 | theme(legend.position = c(0, 1), legend.justification = c(0, 1)) + 20 | xlab("A ( = lambda)") + 21 | guides(shape = guide_legend(tmp), color = guide_legend(tmp)) 22 | -------------------------------------------------------------------------------- /figures/howtomake.md: -------------------------------------------------------------------------------- 1 | # 画像の生成方法 2 | 3 | コンテナjupyterかrstudioを使います(コンテナの生成方法は2.3節を参照). 4 | 5 | ```bash 6 | docker exec -it jr bash 7 | # あるいは 8 | docker exec -it rs bash 9 | ``` 10 | 11 | 以下はコンテナでの作業です. 12 | 13 | ## 準備 14 | 15 | ```bash 16 | apt update && apt install -y texlive-extra-utils pdf2svg 17 | 18 | #cd work # 結果をホスト側に保存する場合 19 | git clone https://github.com/taroyabuki/fromzero.git 20 | cd fromzero/figures 21 | ``` 22 | 23 | 画像(PDFとSVG)を作ります. 24 | `-j`のあとの数値はCPUコアの数程度にしてください. 25 | ファイル(`*.R`や`*.py`)を更新したら,`make`以下を実行します. 26 | 更新されたものだけが,再生成されます. 27 | 28 | 29 | ## Rの図を作る場合 30 | 31 | ```bash 32 | cd fig-r 33 | #make clean # すべて生成し直す場合 34 | make -j 35 | cd .. 36 | ``` 37 | 38 | ## Pythonの図を作る場合 39 | 40 | ```bash 41 | cd fig-p 42 | #make clean # すべて生成し直す場合 43 | make -j 44 | cd .. 45 | ``` 46 | -------------------------------------------------------------------------------- /data/wine.csv: -------------------------------------------------------------------------------- 1 | LPRICE2,WRAIN,DEGREES,HRAIN,TIME_SV 2 | -0.99868,600,17.1167,160,31 3 | -0.4544,690,16.7333,80,30 4 | -0.80796,502,17.15,130,28 5 | -1.50926,420,16.1333,110,26 6 | -1.71655,582,16.4167,187,25 7 | -0.418,485,17.4833,187,24 8 | -1.97491,763,16.4167,290,23 9 | 0,830,17.3333,38,22 10 | -1.10572,697,16.3,52,21 11 | -1.78098,608,15.7167,155,20 12 | -1.18435,402,17.2667,96,19 13 | -2.24194,602,15.3667,267,18 14 | -0.74943,819,16.5333,86,17 15 | -1.65388,714,16.2333,118,16 16 | -2.25018,610,16.2,292,15 17 | -2.14784,575,16.55,244,14 18 | -0.90544,622,16.6667,89,13 19 | -1.30031,551,16.7667,112,12 20 | -2.28879,536,14.9833,158,11 21 | -1.857,376,17.0667,123,10 22 | -2.19958,574,16.3,184,9 23 | -1.20168,572,16.95,171,8 24 | -1.37264,418,17.65,247,7 25 | -2.23503,821,15.5833,87,6 26 | -1.30769,763,15.8167,51,5 27 | -1.5396,717,16.1667,122,4 28 | -1.99582,578,16,74,3 29 | -------------------------------------------------------------------------------- /figures/fig-p/07-p-boxplot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import statsmodels.api as sm 3 | from sklearn.linear_model import LinearRegression 4 | from sklearn.model_selection import cross_val_score, LeaveOneOut 5 | from sklearn.neighbors import KNeighborsRegressor 6 | 7 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data 8 | X, y = my_data[['speed']], my_data['dist'] 9 | 10 | my_lm_scores = cross_val_score( 11 | LinearRegression(), X, y, cv=LeaveOneOut(), scoring='neg_mean_squared_error') 12 | 13 | my_knn_socres = cross_val_score( 14 | KNeighborsRegressor(n_neighbors=5), X, y, cv=LeaveOneOut(), 15 | scoring='neg_mean_squared_error') 16 | 17 | my_df = pd.DataFrame({ 18 | 'lm': -my_lm_scores, 19 | 'knn': -my_knn_socres}) 20 | 21 | my_df.boxplot().set_ylabel("$r^2$") 22 | 23 | import matplotlib.pyplot as plt 24 | plt.savefig('07-p-boxplot.pdf') 25 | -------------------------------------------------------------------------------- /figures/fig-p/12-p-airpassengers-prophet.py: -------------------------------------------------------------------------------- 1 | from pmdarima.datasets import airpassengers 2 | my_data = airpassengers.load_airpassengers() 3 | 4 | n = len(my_data) 5 | k = 108 6 | 7 | import pandas as pd 8 | my_ds = pd.date_range( 9 | start='1949/01/01', 10 | end='1960/12/01', 11 | freq='MS') 12 | my_df = pd.DataFrame({ 13 | 'ds': my_ds, 14 | 'x': range(n), 15 | 'y': my_data}, 16 | index=my_ds) 17 | 18 | my_train = my_df[ :k] 19 | my_test = my_df[-(n - k): ] 20 | 21 | from fbprophet import Prophet 22 | my_prophet_model = Prophet(seasonality_mode='multiplicative') 23 | my_prophet_model.fit(my_train) 24 | 25 | tmp = my_prophet_model.predict(my_test) 26 | 27 | fig = my_prophet_model.plot(tmp) 28 | fig.axes[0].plot(my_train.ds, my_train.y) 29 | fig.axes[0].plot(my_test.ds, my_test.y, color='red') 30 | 31 | import matplotlib.pyplot as plt 32 | plt.savefig('12-p-airpassengers-prophet.pdf') 33 | -------------------------------------------------------------------------------- /figures/fig-p/13-p-pca-clusters.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import statsmodels.api as sm 3 | from pca import pca 4 | from scipy.cluster import hierarchy 5 | from scipy.stats import zscore 6 | from sklearn.cluster import KMeans 7 | 8 | iris = sm.datasets.get_rdataset('iris', 'datasets').data 9 | my_data = zscore(iris.iloc[:, 0:4]) 10 | 11 | my_model = pca() # 主成分分析 12 | my_result = my_model.fit_transform(my_data)['PC'] 13 | my_result['Species'] = list(iris.Species) 14 | 15 | # 非階層的クラスタ分析の場合 16 | my_result['cluster'] = KMeans(n_clusters=3).fit(my_data).labels_ 17 | 18 | # 階層的クラスタ分析の場合 19 | #my_result['cluster'] = hierarchy.cut_tree( 20 | # hierarchy.linkage(my_data, method='complete'), 3)[:,0] 21 | 22 | sns.scatterplot(x='PC1', y='PC2', data=my_result, 23 | hue='cluster', style='Species', palette='bright', legend=False) 24 | 25 | import matplotlib.pyplot as plt 26 | plt.savefig('13-p-pca-clusters.pdf') 27 | -------------------------------------------------------------------------------- /figures/fig-r/07-r-tuning-train.R: -------------------------------------------------------------------------------- 1 | pdf(file = "07-r-tuning-train.pdf", width = 6, height = 4.5) 2 | 3 | library(caret) 4 | library(tidyverse) 5 | my_data <- cars 6 | 7 | my_loocv <- function(k) { 8 | my_model <- train(form = dist ~ speed, data = my_data, method = "knn", 9 | tuneGrid = data.frame(k = k), 10 | trControl = trainControl(method = "LOOCV")) 11 | y <- my_data$dist 12 | y_ <- my_model %>% predict(my_data) 13 | list(k = k, 14 | training = RMSE(y_, y), # RMSE(訓練) 15 | validation = my_model$results$RMSE) # RMSE(検証) 16 | } 17 | 18 | my_results <- 1:15 %>% map_dfr(my_loocv) 19 | 20 | my_results %>% 21 | pivot_longer(-k) %>% 22 | ggplot(aes(x = k, y = value, 23 | color = name)) + 24 | geom_line() + geom_point() + 25 | xlab("#Neighbors") + ylab("RMSE") + 26 | theme(legend.position = c(1, 0), 27 | legend.justification = c(1, 0)) 28 | -------------------------------------------------------------------------------- /figures/fig-r/10-r-titanic-roc.R: -------------------------------------------------------------------------------- 1 | pdf(file = "10-r-titanic-roc.pdf", width = 6, height = 5) 2 | 3 | library(caret) 4 | library(PRROC) 5 | library(tidyverse) 6 | 7 | my_url <- str_c("https://raw.githubusercontent.com", 8 | "/taroyabuki/fromzero/master/data/titanic.csv") 9 | my_data <- read_csv(my_url) 10 | 11 | my_model <- train(form = Survived ~ ., data = my_data, method = "rpart2", 12 | tuneGrid = data.frame(maxdepth = 2), 13 | trControl = trainControl(method = "none")) 14 | 15 | y <- my_data$Survived 16 | tmp <- my_model %>% predict(newdata = my_data, type = "prob") 17 | y_score <- tmp$Yes 18 | 19 | my_roc <- roc.curve(scores.class0 = y_score[y == "Yes"], 20 | scores.class1 = y_score[y == "No"], 21 | curve = TRUE) 22 | my_roc %>% plot(xlab = "False Positive Rate", 23 | ylab = "True Positive Rate", 24 | legend = FALSE) 25 | -------------------------------------------------------------------------------- /figures/fig-r/12-r-airpassengers-arima.R: -------------------------------------------------------------------------------- 1 | pdf(file = "12-r-airpassengers-arima.pdf", width = 5.83, height = 4.13) 2 | 3 | my_data <- as.vector(AirPassengers) 4 | 5 | n <- length(my_data) # データ数(144) 6 | k <- 108 # 訓練データ数 7 | 8 | library(tidyverse) 9 | library(tsibble) 10 | 11 | my_ds <- seq( 12 | from = yearmonth("1949/01"), 13 | to = yearmonth("1960/12"), 14 | by = 1) 15 | my_label <- rep( 16 | c("train", "test"), 17 | c(k, n - k)) 18 | my_df <- tsibble( 19 | ds = my_ds, 20 | x = 0:(n - 1), 21 | y = my_data, 22 | label = my_label, 23 | index = ds) # 日時の列の指定 24 | 25 | my_train <- my_df[ 1:k, ] 26 | my_test <- my_df[- (1:k), ] 27 | 28 | library(fable) 29 | my_arima_model <- my_train %>% model(ARIMA(y)) 30 | 31 | tmp <- my_arima_model %>% forecast(h = "3 years") 32 | 33 | tmp %>% autoplot + 34 | geom_line(data = my_df, 35 | aes(x = ds, 36 | y = y, 37 | color = label)) 38 | -------------------------------------------------------------------------------- /figures/fig-p/12-p-airpassengers-lm.py: -------------------------------------------------------------------------------- 1 | from pmdarima.datasets import airpassengers 2 | my_data = airpassengers.load_airpassengers() 3 | 4 | n = len(my_data) 5 | k = 108 6 | 7 | import pandas as pd 8 | my_ds = pd.date_range( 9 | start='1949/01/01', 10 | end='1960/12/01', 11 | freq='MS') 12 | my_df = pd.DataFrame({ 13 | 'ds': my_ds, 14 | 'x': range(n), 15 | 'y': my_data}, 16 | index=my_ds) 17 | 18 | my_train = my_df[ :k] 19 | my_test = my_df[-(n - k): ] 20 | 21 | import matplotlib.pyplot as plt 22 | from sklearn.linear_model import LinearRegression 23 | 24 | my_lm_model = LinearRegression() 25 | my_lm_model.fit(my_train[['x']], my_train.y) 26 | 27 | y_ = my_lm_model.predict(my_df[['x']]) 28 | tmp = pd.DataFrame(y_, 29 | index=my_df.index) 30 | plt.plot(my_train.y, label='train') 31 | plt.plot(my_test.y, label='test') 32 | plt.plot(tmp, label='model') 33 | plt.legend() 34 | plt.savefig('12-p-airpassengers-lm.pdf') 35 | -------------------------------------------------------------------------------- /figures/fig-p/09-p-rpart2.py: -------------------------------------------------------------------------------- 1 | import graphviz 2 | import statsmodels.api as sm 3 | from sklearn import tree 4 | from sklearn.model_selection import GridSearchCV, LeaveOneOut 5 | 6 | my_data = sm.datasets.get_rdataset('iris', 'datasets').data 7 | X, y = my_data.iloc[:, 0:4], my_data.Species 8 | 9 | my_params = { 10 | 'max_depth': range(2, 6), 11 | 'min_samples_split': [2, 20], 12 | 'min_samples_leaf': range(1, 8)} 13 | 14 | my_search = GridSearchCV( 15 | estimator=tree.DecisionTreeClassifier(min_impurity_decrease=0.01, 16 | random_state=0), 17 | param_grid=my_params, 18 | cv=LeaveOneOut(), 19 | n_jobs=-1).fit(X, y) 20 | 21 | my_model = my_search.best_estimator_ 22 | my_dot = tree.export_graphviz( 23 | decision_tree=my_model, 24 | out_file=None, 25 | feature_names=X.columns, 26 | class_names=my_model.classes_, 27 | filled=True) 28 | my_graph = graphviz.Source(my_dot) 29 | my_graph.render('09-p-rpart2') 30 | -------------------------------------------------------------------------------- /figures/fig-p/07-p-tuning.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import statsmodels.api as sm 3 | from sklearn.model_selection import GridSearchCV, LeaveOneOut 4 | from sklearn.neighbors import KNeighborsRegressor 5 | 6 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data 7 | X, y = my_data[['speed']], my_data['dist'] 8 | 9 | my_params = {'n_neighbors': range(1, 16)} # 探索範囲(1以上16未満の整数) 10 | 11 | my_search = GridSearchCV(estimator=KNeighborsRegressor(), 12 | param_grid=my_params, 13 | cv=LeaveOneOut(), 14 | scoring='neg_mean_squared_error') 15 | my_search.fit(X, y) 16 | 17 | tmp = my_search.cv_results_ # チューニングの詳細 18 | my_scores = (-tmp['mean_test_score'])**0.5 # RMSE 19 | my_results = pd.DataFrame(tmp['params']).assign(validation=my_scores) 20 | 21 | my_results.plot(x='n_neighbors', 22 | style='o-', 23 | ylabel='RMSE') 24 | 25 | import matplotlib.pyplot as plt 26 | plt.savefig('07-p-tuning.pdf') 27 | -------------------------------------------------------------------------------- /figures/fig-r/12-r-airpassengers-prophet.R: -------------------------------------------------------------------------------- 1 | pdf(file = "12-r-airpassengers-prophet.pdf", width = 5.83, height = 4.13) 2 | 3 | my_data <- as.vector(AirPassengers) 4 | 5 | n <- length(my_data) # データ数(144) 6 | k <- 108 # 訓練データ数 7 | 8 | library(tidyverse) 9 | library(tsibble) 10 | 11 | my_ds <- seq( 12 | from = yearmonth("1949/01"), 13 | to = yearmonth("1960/12"), 14 | by = 1) 15 | my_label <- rep( 16 | c("train", "test"), 17 | c(k, n - k)) 18 | my_df <- tsibble( 19 | ds = my_ds, 20 | x = 0:(n - 1), 21 | y = my_data, 22 | label = my_label, 23 | index = ds) # 日時の列の指定 24 | 25 | my_train <- my_df[ 1:k, ] 26 | my_test <- my_df[- (1:k), ] 27 | 28 | library(prophet) 29 | my_prophet_model <- my_train %>% 30 | prophet(seasonality.mode = "multiplicative") 31 | 32 | tmp <- my_prophet_model %>% predict(my_test) 33 | 34 | my_prophet_model %>% plot(tmp) + 35 | geom_line(data = my_train, aes(x = as.POSIXct(ds))) + 36 | geom_line(data = my_test, aes(x = as.POSIXct(ds)), color = "red") 37 | -------------------------------------------------------------------------------- /figures/fig-r/12-r-airpassengers-lm.R: -------------------------------------------------------------------------------- 1 | pdf(file = "12-r-airpassengers-lm.pdf", width = 5.83, height = 4.13) 2 | 3 | my_data <- as.vector(AirPassengers) 4 | 5 | n <- length(my_data) # データ数(144) 6 | k <- 108 # 訓練データ数 7 | 8 | library(tidyverse) 9 | library(tsibble) 10 | 11 | my_ds <- seq( 12 | from = yearmonth("1949/01"), 13 | to = yearmonth("1960/12"), 14 | by = 1) 15 | my_label <- rep( 16 | c("train", "test"), 17 | c(k, n - k)) 18 | my_df <- tsibble( 19 | ds = my_ds, 20 | x = 0:(n - 1), 21 | y = my_data, 22 | label = my_label, 23 | index = ds) # 日時の列の指定 24 | 25 | my_train <- my_df[ 1:k, ] 26 | my_test <- my_df[- (1:k), ] 27 | 28 | library(caret) 29 | my_lm_model <- train(form = y ~ x, data = my_train, method = "lm") 30 | 31 | y_ <- my_lm_model %>% predict(my_df) 32 | tmp <- my_df %>% 33 | mutate(y = y_, label = "model") 34 | my_plot <- my_df %>% 35 | ggplot(aes(x = ds, 36 | y = y, 37 | color = label)) + 38 | geom_line() 39 | my_plot + geom_line(data = tmp) 40 | -------------------------------------------------------------------------------- /figures/fig-p/07-p-polynomial.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import statsmodels.api as sm 4 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data 5 | 6 | my_idx = [1, 10, 26, 33, 38, 43] 7 | my_sample = my_data.iloc[my_idx, ] 8 | X, y = my_sample[['speed']], my_sample['dist'] 9 | 10 | from sklearn.preprocessing import PolynomialFeatures 11 | d = 5 12 | X5 = PolynomialFeatures(d).fit_transform(X) # Xの1乗から5乗の変数 13 | 14 | from sklearn.linear_model import LinearRegression 15 | my_model = LinearRegression() 16 | my_model.fit(X5, y) 17 | 18 | tmp = pd.DataFrame({'speed': np.linspace(min(my_data.speed), 19 | max(my_data.speed), 20 | 100)}) 21 | X5 = PolynomialFeatures(d).fit_transform(tmp) 22 | tmp['model'] = my_model.predict(X5) 23 | 24 | my_sample = my_sample.assign(sample=y) 25 | my_df = pd.concat([my_data, my_sample, tmp]) 26 | my_df.plot(x='speed', style=['o', 'o', '-'], ylim=(0, 130)) 27 | 28 | import matplotlib.pyplot as plt 29 | plt.savefig('07-p-polynomial.pdf') 30 | -------------------------------------------------------------------------------- /figures/fig-p/10-p-titanic-tree.py: -------------------------------------------------------------------------------- 1 | import graphviz 2 | import pandas as pd 3 | from sklearn import tree 4 | from sklearn.pipeline import Pipeline 5 | from sklearn.preprocessing import OneHotEncoder 6 | 7 | my_url = ('https://raw.githubusercontent.com' 8 | '/taroyabuki/fromzero/master/data/titanic.csv') 9 | my_data = pd.read_csv(my_url) 10 | 11 | X, y = my_data.iloc[:, 0:3], my_data.Survived 12 | 13 | my_pipeline = Pipeline([ 14 | ('ohe', OneHotEncoder(drop='first')), 15 | ('tree', tree.DecisionTreeClassifier(max_depth=2, random_state=0, 16 | min_impurity_decrease=0.01))]) 17 | my_pipeline.fit(X, y) 18 | 19 | my_enc = my_pipeline.named_steps['ohe'] 20 | my_tree = my_pipeline.named_steps['tree'] 21 | 22 | my_dot = tree.export_graphviz( 23 | decision_tree=my_tree, 24 | out_file=None, 25 | feature_names=my_enc.get_feature_names(), 26 | class_names=my_pipeline.classes_, 27 | filled=True) 28 | graphviz.Source(my_dot) 29 | my_graph = graphviz.Source(my_dot) 30 | my_graph.render('10-p-titanic-tree') 31 | -------------------------------------------------------------------------------- /figures/fig-r/11-r-classification.R: -------------------------------------------------------------------------------- 1 | pdf(file = "11-r-classification.pdf", width = 5.83, height = 4.13) 2 | 3 | library(keras) 4 | library(tidyverse) 5 | 6 | my_data <- iris[sample(nrow(iris)), ] 7 | 8 | X <- my_data %>% 9 | select(-Species) %>% scale 10 | y <- as.integer(my_data$Species) - 1 11 | 12 | my_model <- keras_model_sequential() %>% 13 | layer_dense(units = 3, activation = "relu", input_shape = c(4)) %>% 14 | layer_dense(units = 3, activation = "softmax") 15 | 16 | my_model %>% compile( 17 | loss = "sparse_categorical_crossentropy", 18 | optimizer = "rmsprop", 19 | metrics = c("accuracy")) 20 | 21 | my_cb <- callback_early_stopping( 22 | patience = 20, 23 | restore_best_weights = TRUE) 24 | 25 | my_history <- my_model %>% 26 | fit(x = X, 27 | y = y, 28 | validation_split = 0.25, 29 | batch_size = 10, 30 | epochs = 500, 31 | callbacks = list(my_cb), 32 | verbose = 0) 33 | 34 | plot(my_history) 35 | 36 | my_history 37 | 38 | tmp <- my_model %>% predict(X) 39 | y_ <- apply(tmp, 1, which.max) - 1 40 | mean(y_ == y) 41 | -------------------------------------------------------------------------------- /figures/fig-r/11-r-regression.R: -------------------------------------------------------------------------------- 1 | pdf(file = "11-r-regression.pdf", width = 5.83, height = 4.13) 2 | 3 | library(keras) 4 | library(tidyverse) 5 | 6 | my_url <- str_c("https://raw.githubusercontent.com", 7 | "/taroyabuki/fromzero/master/data/wine.csv") 8 | tmp <- read_csv(my_url) 9 | 10 | my_data <- tmp[sample(nrow(tmp)), ] 11 | 12 | X <- my_data %>% 13 | select(-LPRICE2) %>% scale 14 | y <- my_data$LPRICE2 15 | 16 | my_model <- keras_model_sequential() %>% 17 | layer_dense(units = 3, activation = "relu", input_shape = c(4)) %>% 18 | layer_dense(units = 1) 19 | 20 | my_model %>% compile( 21 | loss = "mse", 22 | optimizer = "rmsprop") 23 | 24 | my_cb <- callback_early_stopping( 25 | patience = 20, 26 | restore_best_weights = TRUE) 27 | 28 | my_history <- my_model %>% 29 | fit(x = X, 30 | y = y, 31 | validation_split = 0.25, 32 | batch_size = 10, 33 | epochs = 500, 34 | callbacks = list(my_cb), 35 | verbose = 0) 36 | 37 | plot(my_history) 38 | 39 | my_history 40 | 41 | y_ <- my_model %>% predict(X) 42 | mean((y_ - y)^2)**0.5 43 | -------------------------------------------------------------------------------- /docs/exam.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exam 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 |
nameenglishmathgender
A6070f
B9080m
C7090m
D90100f
48 | 49 | 50 | -------------------------------------------------------------------------------- /figures/fig-p/10-p-titanic-roc.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn import tree 3 | from sklearn.metrics import roc_curve, RocCurveDisplay, auc 4 | from sklearn.pipeline import Pipeline 5 | from sklearn.preprocessing import OneHotEncoder 6 | 7 | my_url = ('https://raw.githubusercontent.com' 8 | '/taroyabuki/fromzero/master/data/titanic.csv') 9 | my_data = pd.read_csv(my_url) 10 | 11 | X, y = my_data.iloc[:, 0:3], my_data.Survived 12 | 13 | my_pipeline = Pipeline([ 14 | ('ohe', OneHotEncoder(drop='first')), 15 | ('tree', tree.DecisionTreeClassifier(max_depth=2, 16 | min_impurity_decrease=0.01))]) 17 | my_pipeline.fit(X, y) 18 | 19 | tmp = pd.DataFrame( 20 | my_pipeline.predict_proba(X), 21 | columns=my_pipeline.classes_) 22 | y_score = tmp.Yes 23 | 24 | my_fpr, my_tpr, _ = roc_curve(y_true=y, 25 | y_score=y_score, 26 | pos_label='Yes') 27 | my_auc = auc(x=my_fpr, y=my_tpr) 28 | RocCurveDisplay(fpr=my_fpr, tpr=my_tpr, roc_auc=my_auc).plot() 29 | 30 | import matplotlib.pyplot as plt 31 | plt.savefig('10-p-titanic-roc.pdf') 32 | -------------------------------------------------------------------------------- /addendum/07.03.02/README.md: -------------------------------------------------------------------------------- 1 | # 予測値の期待値の信頼区間 2 | 3 | **本稿は本書の想定レベルを超えています.** 4 | 5 | 7.3.2項で次のような絵を描いています(184頁).これは,「speedが21.5のときのdistを予測する」というのがどういうことなのかを説明するためのものです. 6 | 7 | R|Python 8 | :--|:-- 9 | | 10 | 11 | 直線から読み取れるのは,speedが21.5のときのdistの期待値が67になることです.しかし,直線が少し違ったものになる可能性を考慮すると,網掛け部分くらいになるかもしれません.この網掛けの部分を,予測値の期待値の**信頼区間**といいます. 12 | 13 | 実現値として得られるのは,これに誤差が加わった結果で,それを考慮したものを**予測区間**といいます.(ここでは予測区間についてはこれ以上触れません.) 14 | 15 | 本書のレベルではこれで終わりでいいのですが,上の絵の「RとPythonの網掛け部分が少し違っていること」に気付く方がいたので,少し補足します. 16 | 17 | ## 簡単な説明 18 | 19 | Rの`ggplot2::stat_smooth`で描いた結果は,「誤差はxによらず,同一の正規分布に従う」という仮定に基づく,理論的なものです(線形**正規**回帰モデル). 20 | 21 | Pythonの`seaborn.regplot`で描いた結果は,そういう仮定に基づかない,シミュレーション(ブートストラップ)によるものです(線形回帰モデル). 22 | 23 | データが仮定に合わないと違いが際立ちます. 24 | 25 | R|Python 26 | :--|:-- 27 | | 28 | 29 | ## 詳しい説明 30 | 31 | - [R側からの補足](confidence_band_R.ipynb) 32 | - [Python側からの補足](confidence_band_python.ipynb) -------------------------------------------------------------------------------- /figures/fig-p/07-p-tuning-train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import statsmodels.api as sm 3 | from sklearn.metrics import mean_squared_error 4 | from sklearn.model_selection import cross_val_score, LeaveOneOut 5 | from sklearn.neighbors import KNeighborsRegressor 6 | 7 | my_data = sm.datasets.get_rdataset('cars', 'datasets').data 8 | X, y = my_data[['speed']], my_data['dist'] 9 | 10 | def my_loocv(k): 11 | my_model = KNeighborsRegressor(n_neighbors=k) 12 | my_scores = cross_val_score(estimator=my_model, X=X, y=y, 13 | cv=LeaveOneOut(), 14 | scoring='neg_mean_squared_error') 15 | y_ = my_model.fit(X, y).predict(X) 16 | return pd.Series([k, 17 | (-my_scores.mean())**0.5, # RMSE(検証) 18 | mean_squared_error(y_, y)**0.5], # RMSE(訓練) 19 | index=['n_neighbors', 'validation', 'training']) 20 | 21 | my_results = pd.Series(range(1, 16)).apply(my_loocv) 22 | 23 | my_results.plot(x='n_neighbors', 24 | style='o-', 25 | ylabel='RMSE') 26 | 27 | import matplotlib.pyplot as plt 28 | plt.savefig('07-p-tuning-train.pdf') 29 | -------------------------------------------------------------------------------- /figures/fig-r/11-r-mnist-nnet.R: -------------------------------------------------------------------------------- 1 | pdf(file = "11-r-mnist-nnet.pdf", width = 5.83, height = 4.13) 2 | 3 | library(keras) 4 | 5 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist() 6 | my_index <- sample(1:60000, 6000) 7 | x_train <- x_train[my_index, , ] 8 | y_train <- y_train[my_index] 9 | 10 | my_model <- keras_model_sequential() %>% 11 | layer_flatten(input_shape = c(28, 28)) %>% 12 | layer_dense(units = 256, activation = "relu") %>% 13 | layer_dense(units = 10, activation = "softmax") 14 | 15 | my_model %>% compile(loss = "sparse_categorical_crossentropy", 16 | optimizer = "rmsprop", 17 | metrics = c("accuracy")) 18 | 19 | my_cb <- callback_early_stopping(patience = 5, 20 | restore_best_weights = TRUE) 21 | 22 | my_history <- my_model %>% 23 | fit(x = x_train, 24 | y = y_train, 25 | validation_split = 0.2, 26 | batch_size = 128, 27 | epochs = 20, 28 | callbacks = list(my_cb), 29 | verbose = 0) 30 | 31 | plot(my_history) 32 | 33 | tmp <- my_model %>% predict(x_test) 34 | y_ <- apply(tmp, 1, which.max) - 1 35 | table(y_, y_test) 36 | 37 | mean(y_ == y_test) 38 | 39 | my_model %>% evaluate(x = x_test, y = y_test) 40 | -------------------------------------------------------------------------------- /figures/fig-p/11-p-regression.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import sklearn 3 | from keras import callbacks, layers, models 4 | from sklearn.preprocessing import StandardScaler 5 | 6 | my_url = ('https://raw.githubusercontent.com' 7 | '/taroyabuki/fromzero/master/data/wine.csv') 8 | tmp = pd.read_csv(my_url) 9 | 10 | my_data = sklearn.utils.shuffle(tmp) 11 | 12 | my_scaler = StandardScaler() 13 | X = my_scaler.fit_transform( 14 | my_data.drop(columns=['LPRICE2'])) 15 | y = my_data['LPRICE2'] 16 | 17 | my_model = models.Sequential() 18 | my_model.add(layers.Dense(units=3, activation='relu', input_shape=[4])) 19 | my_model.add(layers.Dense(units=1)) 20 | 21 | my_model.compile( 22 | loss='mse', 23 | optimizer='rmsprop') 24 | 25 | my_cb = callbacks.EarlyStopping( 26 | patience=20, 27 | restore_best_weights=True) 28 | 29 | my_history = my_model.fit( 30 | x=X, 31 | y=y, 32 | validation_split=0.25, 33 | batch_size=10, 34 | epochs=500, 35 | callbacks=[my_cb], 36 | verbose=0) 37 | 38 | tmp = pd.DataFrame(my_history.history) 39 | tmp.plot(xlabel='epoch') 40 | 41 | import matplotlib.pyplot as plt 42 | plt.savefig('11-p-regression.pdf') 43 | 44 | print(tmp.iloc[-1, ]) 45 | 46 | y_ = my_model.predict(X) 47 | print(((y_.ravel() - y)**2).mean()) 48 | -------------------------------------------------------------------------------- /figures/fig-r/11-r-mnist-id5.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /figures/fig-p/12-p-airpassengers-arima.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from pmdarima.datasets import airpassengers 3 | my_data = airpassengers.load_airpassengers() 4 | 5 | n = len(my_data) 6 | k = 108 7 | 8 | import pandas as pd 9 | my_ds = pd.date_range( 10 | start='1949/01/01', 11 | end='1960/12/01', 12 | freq='MS') 13 | my_df = pd.DataFrame({ 14 | 'ds': my_ds, 15 | 'x': range(n), 16 | 'y': my_data}, 17 | index=my_ds) 18 | 19 | my_train = my_df[ :k] 20 | my_test = my_df[-(n - k): ] 21 | 22 | import pmdarima as pm 23 | my_arima_model = pm.auto_arima(my_train.y, m=12, trace=True) 24 | 25 | y_, my_ci = my_arima_model.predict(len(my_test), # 期間はテストデータと同じ. 26 | alpha=0.05, # 有意水準(デフォルト) 27 | return_conf_int=True) # 信頼区間を求める. 28 | tmp = pd.DataFrame({'y': y_, 29 | 'Lo': my_ci[:, 0], 30 | 'Hi': my_ci[:, 1]}, 31 | index=my_test.index) 32 | 33 | plt.plot(my_train.y, label='train') 34 | plt.plot(my_test.y, label='test') 35 | plt.plot(tmp.y, label='model') 36 | plt.fill_between(tmp.index, 37 | tmp.Lo, 38 | tmp.Hi, 39 | alpha=0.25) 40 | plt.legend(loc='upper left') 41 | 42 | plt.savefig('12-p-airpassengers-arima.pdf') 43 | -------------------------------------------------------------------------------- /figures/fig/図1.4.md: -------------------------------------------------------------------------------- 1 | ```puml 2 | @startuml 3 | scale 0.8 4 | skinparam { 5 | defaultFontName Hiragino Kaku Gothic ProN 6 | monochrome true 7 | shadowing false 8 | } 9 | 10 | cloud HOMELAN as "家庭内LAN\nネットワーク:192.168.1.0\nサブネットマスク:255.255.255.0" { 11 | rectangle ホストPC as "ホストPC\nIPアドレス:192.168.1.2" { 12 | cloud ホストPC内LAN as "ホストPC内LAN\nネットワーク:172.17.0.0\nサブネットマスク:255.255.0.0" { 13 | rectangle コンテナ as "Dockerコンテナ\nIPアドレス:172.17.43.181" { 14 | rectangle コンテナ8787 as "ポート8787" 15 | rectangle コンテナ8888 as "ポート8888" 16 | } 17 | } 18 | rectangle ホスト8787 as "ポート8787" 19 | rectangle ホスト8888 as "ポート8888" 20 | } 21 | rectangle PC3 as "PC\nIPアドレス:192.168.1.3" 22 | rectangle Gateway as "Gateway, DNS Server\nIPアドレス:192.168.1.1" 23 | ホストPC--Gateway 24 | ホストPC-PC3 25 | PC3--Gateway 26 | コンテナ8787--ホスト8787 27 | コンテナ8888--ホスト8888 28 | } 29 | 30 | usecase http8787 as "localhost:8787" 31 | usecase http8888 as "localhost:8888" 32 | 33 | http8787-up-ホスト8787 34 | http8888-up-ホスト8888 35 | 36 | ホストPCのユーザ-up-http8787 37 | ホストPCのユーザ-up-http8888 38 | 39 | cloud 組織AのLAN { 40 | rectangle PC as "PC\nIPアドレス:192.168.1.2" 41 | } 42 | 43 | cloud 組織BのLAN { 44 | rectangle PC2 as "PC\nIPアドレス:192.168.1.2" 45 | } 46 | 組織AのLAN-Gateway 47 | 組織BのLAN-Gateway 48 | 組織AのLAN--組織BのLAN 49 | 50 | @enduml 51 | ``` 52 | -------------------------------------------------------------------------------- /figures/fig-p/08-p-enet-tuning.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.linear_model import ElasticNet 4 | from sklearn.model_selection import GridSearchCV, LeaveOneOut 5 | from sklearn.pipeline import Pipeline 6 | from sklearn.preprocessing import StandardScaler 7 | 8 | my_url = ('https://raw.githubusercontent.com/taroyabuki/' + 9 | 'fromzero/master/data/wine.csv') 10 | my_data = pd.read_csv(my_url) 11 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2'] 12 | 13 | As = np.linspace(0, 0.1, 21) 14 | Bs = np.linspace(0, 0.1, 6) 15 | 16 | my_pipeline = Pipeline([('sc', StandardScaler()), 17 | ('enet', ElasticNet())]) 18 | my_search = GridSearchCV( 19 | estimator=my_pipeline, 20 | param_grid={'enet__alpha': As, 'enet__l1_ratio': Bs}, 21 | cv=LeaveOneOut(), 22 | scoring='neg_mean_squared_error', 23 | n_jobs=-1).fit(X, y) 24 | 25 | tmp = my_search.cv_results_ # チューニング結果の詳細 26 | my_scores = (-tmp['mean_test_score'])**0.5 # MSEからRMSEへの変換 27 | 28 | my_results = pd.DataFrame(tmp['params']).assign(RMSE=my_scores).pivot( 29 | index='enet__alpha', 30 | columns='enet__l1_ratio', 31 | values='RMSE') 32 | 33 | my_results.plot(style='o-', xlabel='A ( = alpha)', ylabel='RMSE').legend( 34 | title='B ( = l1_ratio)') 35 | 36 | import matplotlib.pyplot as plt 37 | plt.savefig('08-p-enet-tuning.pdf') 38 | -------------------------------------------------------------------------------- /figures/fig-p/11-p-classification.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import sklearn 4 | import statsmodels.api as sm 5 | from keras import callbacks, layers, models 6 | from sklearn.preprocessing import StandardScaler, LabelEncoder 7 | 8 | tmp = sm.datasets.get_rdataset('iris', 'datasets').data 9 | my_data = sklearn.utils.shuffle(tmp) 10 | 11 | my_scaler = StandardScaler() 12 | X = my_scaler.fit_transform( 13 | my_data.drop(columns=['Species'])) 14 | my_enc = LabelEncoder() 15 | y = my_enc.fit_transform( 16 | my_data['Species']) 17 | 18 | my_model = models.Sequential() 19 | my_model.add(layers.Dense(units=3, activation='relu', input_shape=[4])) 20 | my_model.add(layers.Dense(units=3, activation='softmax')) 21 | 22 | my_model.compile(loss='sparse_categorical_crossentropy', 23 | optimizer='rmsprop', 24 | metrics=['accuracy']) 25 | 26 | my_cb = callbacks.EarlyStopping( 27 | patience=20, 28 | restore_best_weights=True) 29 | 30 | my_history = my_model.fit( 31 | x=X, 32 | y=y, 33 | validation_split=0.25, 34 | batch_size=10, 35 | epochs=500, 36 | callbacks=[my_cb], 37 | verbose=0) 38 | 39 | tmp = pd.DataFrame(my_history.history) 40 | tmp.plot(xlabel='epoch') 41 | 42 | import matplotlib.pyplot as plt 43 | plt.savefig('11-p-classification.pdf') 44 | 45 | print(tmp.iloc[-1, ]) 46 | 47 | tmp = my_model.predict(X) 48 | y_ = np.argmax(tmp, axis=-1) 49 | print((y_ == y).mean()) 50 | -------------------------------------------------------------------------------- /figures/fig-r/11-r-mnist-cnn.R: -------------------------------------------------------------------------------- 1 | pdf(file = "11-r-mnist-cnn.pdf", width = 5.83, height = 4.13) 2 | 3 | library(keras) 4 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist() 5 | 6 | my_index <- sample(1:60000, 6000) 7 | x_train <- x_train[my_index, , ] 8 | y_train <- y_train[my_index] 9 | 10 | x_train <- x_train / 255 11 | x_test <- x_test / 255 12 | 13 | x_train2d <- x_train %>% array_reshape(c(-1, 28, 28, 1)) 14 | x_test2d <- x_test %>% array_reshape(c(-1, 28, 28, 1)) 15 | 16 | my_model <- keras_model_sequential() %>% 17 | layer_conv_2d(filters = 32, kernel_size = 3, # 畳み込み層 18 | activation = "relu", 19 | input_shape = c(28, 28, 1)) %>% 20 | layer_max_pooling_2d(pool_size = 2) %>% # プーリング層 21 | layer_flatten() %>% 22 | layer_dense(units = 128, activation = "relu") %>% 23 | layer_dense(units = 10, activation = "softmax") 24 | 25 | my_model %>% compile( 26 | loss = "sparse_categorical_crossentropy", 27 | optimizer = "rmsprop", 28 | metrics = c("accuracy")) 29 | 30 | my_cb <- callback_early_stopping(patience = 5, 31 | restore_best_weights = TRUE) 32 | 33 | my_history <- my_model %>% 34 | fit(x = x_train2d, 35 | y = y_train, 36 | validation_split = 0.2, 37 | batch_size = 128, 38 | epochs = 20, 39 | callbacks = list(my_cb), 40 | verbose = 0) 41 | 42 | plot(my_history) 43 | 44 | my_model %>% evaluate(x = x_test2d, y = y_test) 45 | -------------------------------------------------------------------------------- /code/R-notebook/r-06.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "id": "fe515360", 5 | "cell_type": "markdown", 6 | "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)\n\n\n", 7 | "metadata": {} 8 | }, 9 | { 10 | "id": "e5fca29e", 11 | "cell_type": "markdown", 12 | "source": "## 6.1 \u6a5f\u68b0\u5b66\u7fd2\u306e\u76ee\u7684\uff08\u672c\u66f8\u306e\u5834\u5408\uff09\n\n\n", 13 | "metadata": {} 14 | }, 15 | { 16 | "id": "f7848f95", 17 | "cell_type": "markdown", 18 | "source": "## 6.2 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u30c7\u30fc\u30bf", 19 | "metadata": {} 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "iris\n", 28 | "# \u4ee5\u4e0b\u7701\u7565" 29 | ], 30 | "id": "ce116acc-11c8-4cd4-bfdf-ab9b9a7c4142" 31 | }, 32 | { 33 | "id": "9da0985a", 34 | "cell_type": "markdown", 35 | "source": "## 6.3 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u624b\u6cd5", 36 | "metadata": {} 37 | } 38 | ], 39 | "nbformat": 4, 40 | "nbformat_minor": 5, 41 | "metadata": { 42 | "kernelspec": { 43 | "name": "ir", 44 | "display_name": "R" 45 | } 46 | } 47 | } -------------------------------------------------------------------------------- /figures/fig-p/11-p-mnist-nnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import tensorflow as tf 4 | from random import sample 5 | from keras import callbacks, layers, models 6 | from sklearn.metrics import confusion_matrix 7 | 8 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() 9 | 10 | my_index = sample(range(60000), 6000) 11 | x_train = x_train[my_index, :, :] 12 | y_train = y_train[my_index] 13 | 14 | x_train = x_train / 255 15 | x_test = x_test / 255 16 | 17 | my_model = models.Sequential() 18 | my_model.add(layers.Flatten(input_shape=[28, 28])) 19 | my_model.add(layers.Dense(units=256, activation="relu")) 20 | my_model.add(layers.Dense(units=10, activation="softmax")) 21 | 22 | my_model.compile(loss='sparse_categorical_crossentropy', 23 | optimizer='rmsprop', 24 | metrics=['accuracy']) 25 | 26 | my_cb = callbacks.EarlyStopping(patience=5, 27 | restore_best_weights=True) 28 | 29 | my_history = my_model.fit( 30 | x=x_train, 31 | y=y_train, 32 | validation_split=0.2, 33 | batch_size=128, 34 | epochs=20, 35 | callbacks=[my_cb], 36 | verbose=0) 37 | 38 | tmp = pd.DataFrame(my_history.history) 39 | tmp.plot(xlabel='epoch', style='o-') 40 | 41 | import matplotlib.pyplot as plt 42 | plt.savefig('11-p-mnist-nnet.pdf') 43 | 44 | tmp = my_model.predict(x_test) 45 | y_ = np.argmax(tmp, axis=-1) 46 | print(confusion_matrix(y_true=y_test, y_pred=y_)) 47 | 48 | print((y_test == y_).mean()) 49 | 50 | print(my_model.evaluate(x=x_test, y=y_test)) 51 | -------------------------------------------------------------------------------- /figures/fig-p/11-p-mnist-cnn.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from random import sample 3 | import tensorflow as tf 4 | from keras import callbacks, layers, models 5 | 6 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() 7 | 8 | my_index = sample(range(60000), 6000) 9 | x_train = x_train[my_index, :, :] 10 | y_train = y_train[my_index] 11 | 12 | x_train = x_train / 255 13 | x_test = x_test / 255 14 | 15 | x_train2d = x_train.reshape(-1, 28, 28, 1) 16 | x_test2d = x_test.reshape(-1, 28, 28, 1) 17 | 18 | my_model = models.Sequential() 19 | my_model.add(layers.Conv2D(filters=32, kernel_size=3, # 畳み込み層 20 | activation='relu', 21 | input_shape=[28, 28, 1])) 22 | my_model.add(layers.MaxPooling2D(pool_size=2)) # プーリング層 23 | my_model.add(layers.Flatten()) 24 | my_model.add(layers.Dense(128, activation='relu')) 25 | my_model.add(layers.Dense(10, activation='softmax')) 26 | 27 | my_model.compile(loss='sparse_categorical_crossentropy', 28 | optimizer='rmsprop', 29 | metrics=['accuracy']) 30 | 31 | my_cb = callbacks.EarlyStopping(patience=5, 32 | restore_best_weights=True) 33 | 34 | my_history = my_model.fit( 35 | x=x_train2d, 36 | y=y_train, 37 | validation_split=0.2, 38 | batch_size=128, 39 | epochs=20, 40 | callbacks=[my_cb], 41 | verbose=0) 42 | 43 | tmp = pd.DataFrame(my_history.history) 44 | tmp.plot(xlabel='epoch', style='o-') 45 | 46 | import matplotlib.pyplot as plt 47 | plt.savefig('11-p-mnist-cnn.pdf') 48 | 49 | print(my_model.evaluate(x=x_test2d, y=y_test)) 50 | -------------------------------------------------------------------------------- /figures/fig-r/11-r-mnist-lenet.R: -------------------------------------------------------------------------------- 1 | pdf(file = "11-r-mnist-lenet.pdf", width = 5.83, height = 4.13) 2 | 3 | library(keras) 4 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist() 5 | 6 | my_index <- sample(1:60000, 6000) 7 | x_train <- x_train[my_index, , ] 8 | y_train <- y_train[my_index] 9 | 10 | x_train <- x_train / 255 11 | x_test <- x_test / 255 12 | 13 | x_train2d <- x_train %>% array_reshape(c(-1, 28, 28, 1)) 14 | x_test2d <- x_test %>% array_reshape(c(-1, 28, 28, 1)) 15 | 16 | my_model <- keras_model_sequential() %>% 17 | layer_conv_2d(filters = 20, kernel_size = 5, activation = "relu", 18 | input_shape = c(28, 28, 1)) %>% 19 | layer_max_pooling_2d(pool_size = 2, strides = 2) %>% 20 | layer_conv_2d(filters = 50, kernel_size = 5, activation = "relu") %>% 21 | layer_max_pooling_2d(pool_size = 2, strides = 2) %>% 22 | layer_dropout(rate = 0.25) %>% 23 | layer_flatten() %>% 24 | layer_dense(units = 500, activation = "relu") %>% 25 | layer_dropout(rate = 0.5) %>% 26 | layer_dense(units = 10, activation = "softmax") 27 | 28 | my_model %>% compile( 29 | loss = "sparse_categorical_crossentropy", 30 | optimizer = "rmsprop", 31 | metrics = c("accuracy")) 32 | 33 | my_cb <- callback_early_stopping(patience = 5, 34 | restore_best_weights = TRUE) 35 | 36 | my_history <- my_model %>% 37 | fit(x = x_train2d, 38 | y = y_train, 39 | validation_split = 0.2, 40 | batch_size = 128, 41 | epochs = 20, 42 | callbacks = list(my_cb), 43 | verbose = 0) 44 | 45 | plot(my_history) 46 | 47 | my_model %>% evaluate(x = x_test2d, y = y_test) 48 | -------------------------------------------------------------------------------- /docker/rstudio/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rocker/tidyverse 2 | 3 | USER root 4 | 5 | RUN rm /etc/dpkg/dpkg.cfg.d/excludes \ 6 | && sed -i -e 's%http://[^ ]\+%mirror://mirrors.ubuntu.com/mirrors.txt%g' /etc/apt/sources.list \ 7 | && apt-get update \ 8 | && apt-get install -y --no-install-recommends \ 9 | curl \ 10 | default-jdk \ 11 | dnsutils \ 12 | iputils-ping \ 13 | less \ 14 | libglpk-dev \ 15 | libnode64 \ 16 | libtbb2 \ 17 | net-tools \ 18 | vim-tiny \ 19 | && apt-get --reinstall install -y man-db coreutils manpages \ 20 | && apt-get clean \ 21 | && rm -rf /var/lib/apt/lists/* \ 22 | && yes | unminimize 23 | 24 | USER rstudio 25 | 26 | RUN Rscript -e ' \ 27 | options(Ncpus = 32); \ 28 | options(repos = "https://cran.ism.ac.jp"); \ 29 | Sys.setenv(DOWNLOAD_STATIC_LIBV8=1); \ 30 | install.packages(c( \ 31 | "caret", \ 32 | "doParallel", \ 33 | "epitools", \ 34 | "exactci", \ 35 | "fable", \ 36 | "factoextra", \ 37 | "feasts", \ 38 | "furrr", \ 39 | "ggfortify", \ 40 | "ggmosaic", \ 41 | "glmnetUtils", \ 42 | "gplots", \ 43 | "h2o", \ 44 | "igraph", \ 45 | "keras", \ 46 | "leaps", \ 47 | "lintr", \ 48 | "neuralnet", \ 49 | "pastecs", \ 50 | "prophet", \ 51 | "PRROC", \ 52 | "psych", \ 53 | "proxy", \ 54 | "randomForest", \ 55 | "reticulate", \ 56 | "rpart.plot", \ 57 | "tsibble", \ 58 | "urca", \ 59 | "vcd" \ 60 | )); \ 61 | remotes::install_version("xgboost", version = "1.4.1.1"); \ 62 | remotes::install_github(c("vqv/ggbiplot")); \ 63 | reticulate::install_miniconda(); \ 64 | keras::install_keras();' 65 | 66 | WORKDIR /home/rstudio 67 | 68 | USER root 69 | -------------------------------------------------------------------------------- /figures/fig-r/08-r-enet-tuning2.R: -------------------------------------------------------------------------------- 1 | pdf(file = "08-r-enet-tuning2.pdf", width = 6, height = 4.5) 2 | 3 | library(furrr) 4 | plan(multisession) 5 | 6 | library(tidyverse) 7 | my_url <- str_c("https://raw.githubusercontent.com", 8 | "/taroyabuki/fromzero/master/data/wine.csv") 9 | my_data <- read_csv(my_url) 10 | 11 | my_sd <- function(x) { # √標本分散を計算する関数 12 | n <- length(x) 13 | sd(x) * sqrt((n - 1) / n) 14 | } 15 | 16 | my_loocv <- function(A, B) { 17 | my_predict <- function(id) { 18 | my_train <- my_data[-id, ] 19 | my_valid <- my_data[ id, ] 20 | y <- my_train$LPRICE2 21 | u <- mean(y) 22 | s <- my_sd(y) 23 | my_train2 <- my_train %>% mutate(LPRICE2 = (y - u) / s) 24 | my_model <- 25 | glmnetUtils::glmnet( 26 | form = LPRICE2 ~ ., data = my_train2, 27 | lambda = A, alpha = B, standardize = TRUE) 28 | (my_model %>% predict(my_valid, exact = TRUE) * s + u)[1] 29 | } 30 | y <- my_data$LPRICE2 31 | y_ <- seq_len(length(y)) %>% map_dbl(my_predict) 32 | rmse <- mean((y_ - y)^2)^0.5 33 | list(A = A, B = B, RMSE = rmse) 34 | } 35 | 36 | As <- seq(0, 0.1, length.out = 21) 37 | Bs <- seq(0, 0.1, length.out = 6) 38 | my_params <- expand.grid(A = As, B = Bs) 39 | 40 | tmp <- my_params %>% future_pmap_dfr(my_loocv) 41 | 42 | my_result <- tmp %>% 43 | mutate(B = as.factor(B)) %>% 44 | group_by(A, B) %>% 45 | summarise(RMSE = mean(RMSE), .groups = "drop") 46 | 47 | my_result %>% filter(RMSE == min(RMSE)) 48 | 49 | my_result %>% ggplot(aes(x = A, y = RMSE, color = B)) + 50 | geom_point() + 51 | geom_line() + 52 | theme(legend.position = c(0, 0), 53 | legend.justification = c(0, 0)) + 54 | xlab("A ( = lambda)") + 55 | guides(color = guide_legend("B ( = alpha)")) 56 | -------------------------------------------------------------------------------- /figures/fig-p/11-p-mnist-lenet.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from random import sample 3 | import tensorflow as tf 4 | from keras import callbacks, layers, models 5 | 6 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() 7 | 8 | my_index = sample(range(60000), 6000) 9 | x_train = x_train[my_index, :, :] 10 | y_train = y_train[my_index] 11 | 12 | x_train = x_train / 255 13 | x_test = x_test / 255 14 | 15 | x_train2d = x_train.reshape(-1, 28, 28, 1) 16 | x_test2d = x_test.reshape(-1, 28, 28, 1) 17 | 18 | my_model = models.Sequential() 19 | my_model.add(layers.Conv2D(filters=20, kernel_size=5, activation='relu', 20 | input_shape=(28, 28, 1))) 21 | my_model.add(layers.MaxPooling2D(pool_size=2, strides=2)) 22 | my_model.add(layers.Conv2D(filters=20, kernel_size=5, activation='relu')) 23 | my_model.add(layers.MaxPooling2D(pool_size=2, strides=2)) 24 | my_model.add(layers.Dropout(rate=0.25)) 25 | my_model.add(layers.Flatten()) 26 | my_model.add(layers.Dense(500, activation='relu')) 27 | my_model.add(layers.Dropout(rate=0.5)) 28 | my_model.add(layers.Dense(10, activation='softmax')) 29 | 30 | my_model.compile(loss='sparse_categorical_crossentropy', 31 | optimizer='rmsprop', 32 | metrics=['accuracy']) 33 | 34 | my_cb = callbacks.EarlyStopping(patience=5, 35 | restore_best_weights=True) 36 | 37 | my_history = my_model.fit( 38 | x=x_train2d, 39 | y=y_train, 40 | validation_split=0.2, 41 | batch_size=128, 42 | epochs=20, 43 | callbacks=[my_cb], 44 | verbose=0) 45 | 46 | tmp = pd.DataFrame(my_history.history) 47 | tmp.plot(xlabel='epoch', style='o-') 48 | 49 | import matplotlib.pyplot as plt 50 | plt.savefig('11-p-mnist-lenet.pdf') 51 | 52 | print(my_model.evaluate(x=x_test2d, y=y_test)) 53 | -------------------------------------------------------------------------------- /figures/fig-p/08-p-enet-tuning2.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import numpy as np 3 | import pandas as pd 4 | from pandarallel import pandarallel 5 | from scipy.stats import zscore 6 | from sklearn.linear_model import ElasticNet 7 | from sklearn.metrics import mean_squared_error 8 | from sklearn.pipeline import Pipeline 9 | from sklearn.preprocessing import StandardScaler 10 | 11 | my_url = ('https://raw.githubusercontent.com' 12 | '/taroyabuki/fromzero/master/data/wine.csv') 13 | my_data = pd.read_csv(my_url) 14 | X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2'] 15 | 16 | def my_loocv(A, B): 17 | def my_predict(id): 18 | my_train = my_data.drop([id]) 19 | my_valid = my_data.take([id]) 20 | X, y = my_train.drop(columns=['LPRICE2']), my_train.LPRICE2 21 | u = y.mean() 22 | s = y.std(ddof=0) 23 | my_model = Pipeline([ 24 | ('sc', StandardScaler()), 25 | ('enet', ElasticNet(alpha=A, l1_ratio=B))]).fit(X, zscore(y)) 26 | X = my_valid.drop(columns=['LPRICE2']) 27 | return (my_model.predict(X) * s + u)[0] 28 | 29 | y_ = [my_predict(id) for id in range(len(my_data))] 30 | rmse = mean_squared_error(y_, y)**0.5 31 | return pd.Series([A, B, rmse], index=['A', 'B', 'RMSE']) 32 | 33 | As = np.linspace(0, 0.1, 21) 34 | Bs = np.linspace(0, 0.1, 6) 35 | my_plan = pd.DataFrame(itertools.product(As, Bs), columns=['A', 'B']) 36 | 37 | pandarallel.initialize() 38 | my_results = my_plan.parallel_apply(lambda row: my_loocv(*row), axis=1) 39 | 40 | print(my_results[my_results.RMSE == my_results.RMSE.min()]) 41 | 42 | my_results.pivot(index='A', columns='B', values='RMSE').plot( 43 | style='o-', xlabel='A ( = alpha)', ylabel='RMSE').legend( 44 | title='B ( = l1_ratio)') 45 | 46 | import matplotlib.pyplot as plt 47 | plt.savefig('08-p-enet-tuning2.pdf') 48 | -------------------------------------------------------------------------------- /docker/jupyter/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jupyter/datascience-notebook:python-3.8.8 2 | 3 | USER root 4 | 5 | RUN rm /etc/dpkg/dpkg.cfg.d/excludes \ 6 | && sed -i -e 's%http://[^ ]\+%mirror://mirrors.ubuntu.com/mirrors.txt%g' /etc/apt/sources.list \ 7 | && apt-get update \ 8 | && apt-get install -y --no-install-recommends \ 9 | default-jdk \ 10 | dnsutils \ 11 | graphviz \ 12 | iputils-ping \ 13 | less \ 14 | net-tools \ 15 | && apt-get --reinstall install -y man-db coreutils manpages \ 16 | && apt-get clean \ 17 | && rm -rf /var/lib/apt/lists/* \ 18 | && echo y | unminimize 19 | 20 | USER jovyan 21 | 22 | # pystan==2.* for fbprophet 23 | RUN pip install \ 24 | graphviz \ 25 | h2o \ 26 | japanize_matplotlib \ 27 | keras \ 28 | lxml \ 29 | pandarallel \ 30 | pca \ 31 | pmdarima \ 32 | pycodestyle \ 33 | pystan==2.* \ 34 | tensorflow \ 35 | xgboost 36 | 37 | RUN pip install fbprophet 38 | 39 | RUN mkdir -p /home/jovyan/.ipython/profile_default && echo "c.InteractiveShell.ast_node_interactivity = 'all'" > /home/jovyan/.ipython/profile_default/ipython_config.py 40 | 41 | RUN Rscript -e ' \ 42 | options(Ncpus = 32); \ 43 | options(repos = "https://cran.ism.ac.jp"); \ 44 | Sys.setenv(DOWNLOAD_STATIC_LIBV8=1); \ 45 | install.packages(c( \ 46 | "doParallel", \ 47 | "e1071", \ 48 | "epitools", \ 49 | "exactci", \ 50 | "fable", \ 51 | "factoextra", \ 52 | "feasts", \ 53 | "furrr", \ 54 | "ggfortify", \ 55 | "ggmosaic", \ 56 | "gplots", \ 57 | "glmnetUtils", \ 58 | "h2o", \ 59 | "igraph", \ 60 | "keras", \ 61 | "leaps", \ 62 | "lintr", \ 63 | "neuralnet", \ 64 | "pastecs", \ 65 | "prophet", \ 66 | "proxy", \ 67 | "PRROC", \ 68 | "psych", \ 69 | "rpart.plot", \ 70 | "tsibble", \ 71 | "vcd" \ 72 | )); \ 73 | remotes::install_version("xgboost", version = "1.4.1.1"); \ 74 | remotes::install_github(c("vqv/ggbiplot"));' 75 | 76 | WORKDIR /home/jovyan 77 | 78 | USER root 79 | -------------------------------------------------------------------------------- /code/R-notebook/README.md: -------------------------------------------------------------------------------- 1 | # Jupyter Notebooks for R 2 | 3 | chapter|Open in Colab 4 | --|-- 5 | 03|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-03.ipynb) 6 | 04|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-04.ipynb) 7 | 05|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-05.ipynb) 8 | 06|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-06.ipynb) 9 | 07|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-07.ipynb) 10 | 08|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-08.ipynb) 11 | 09|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-09.ipynb) 12 | 10|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-10.ipynb) 13 | 11|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-11.ipynb) 14 | 12|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-12.ipynb) 15 | 13|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/R-notebook/r-13.ipynb) 16 | -------------------------------------------------------------------------------- /code/Python-notebook/README.md: -------------------------------------------------------------------------------- 1 | # Jupyter Notebooks for Python 2 | 3 | chapter|Open in Colab 4 | --|-- 5 | 03|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-03.ipynb) 6 | 04|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-04.ipynb) 7 | 05|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-05.ipynb) 8 | 06|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-06.ipynb) 9 | 07|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-07.ipynb) 10 | 08|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-08.ipynb) 11 | 09|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-09.ipynb) 12 | 10|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-10.ipynb) 13 | 11|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-11.ipynb) 14 | 12|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-12.ipynb) 15 | 13|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/Python-notebook/python-13.ipynb) 16 | -------------------------------------------------------------------------------- /figures/fig-p/11-p-mnist-lenet-miss.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import pandas as pd 4 | import tensorflow as tf 5 | from random import sample 6 | from keras import callbacks, layers, models 7 | 8 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() 9 | 10 | #my_index = sample(range(60000), 6000) 11 | #x_train = x_train[my_index, :, :] 12 | #y_train = y_train[my_index] 13 | 14 | x_train = x_train / 255 15 | x_test = x_test / 255 16 | 17 | x_train2d = x_train.reshape(-1, 28, 28, 1) 18 | x_test2d = x_test.reshape(-1, 28, 28, 1) 19 | 20 | my_model = models.Sequential() 21 | my_model.add(layers.Conv2D(filters=20, kernel_size=5, activation='relu', 22 | input_shape=(28, 28, 1))) 23 | my_model.add(layers.MaxPooling2D(pool_size=2, strides=2)) 24 | my_model.add(layers.Conv2D(filters=20, kernel_size=5, activation='relu')) 25 | my_model.add(layers.MaxPooling2D(pool_size=2, strides=2)) 26 | my_model.add(layers.Dropout(rate=0.25)) 27 | my_model.add(layers.Flatten()) 28 | my_model.add(layers.Dense(500, activation='relu')) 29 | my_model.add(layers.Dropout(rate=0.5)) 30 | my_model.add(layers.Dense(10, activation='softmax')) 31 | 32 | my_model.compile(loss='sparse_categorical_crossentropy', 33 | optimizer='rmsprop', 34 | metrics=['accuracy']) 35 | 36 | my_cb = callbacks.EarlyStopping(patience=5, 37 | restore_best_weights=True) 38 | 39 | my_history = my_model.fit( 40 | x=x_train2d, 41 | y=y_train, 42 | validation_split=0.2, 43 | batch_size=128, 44 | epochs=20, 45 | callbacks=[my_cb], 46 | verbose=0) 47 | 48 | y_prob = my_model.predict(x_test2d) # カテゴリに属する確率 49 | 50 | tmp = pd.DataFrame({ 51 | 'y_prob': np.max(y_prob, axis=1), # 確率の最大値 52 | 'y_': np.argmax(y_prob, axis=1), # 予測カテゴリ 53 | 'y': y_test, # 正解 54 | 'id': range(len(y_test))}) # 番号 55 | 56 | tmp = tmp[tmp.y_ != tmp.y] # 予測がはずれたものを残す 57 | my_result = tmp.sort_values('y_prob', ascending=False) # 確率の大きい順に並び替える 58 | print(my_result.head()) 59 | 60 | for i in range(5): 61 | plt.subplot(1, 5, i + 1) 62 | ans = my_result['y'].iloc[i] 63 | id = my_result['id'].iloc[i] 64 | plt.title(f'{ans} ({id})') 65 | plt.imshow(x_test[id]) 66 | plt.axis('off') 67 | 68 | plt.savefig('11-p-mnist-lenet-miss.pdf') 69 | -------------------------------------------------------------------------------- /figures/fig-r/11-r-mnist-lenet-miss.R: -------------------------------------------------------------------------------- 1 | pdf(file = "11-r-mnist-lenet-miss.pdf", width = 5.83, height = 4.13) 2 | 3 | library(keras) 4 | library(tidyverse) 5 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist() 6 | 7 | #my_index <- sample(1:60000, 6000) 8 | #x_train <- x_train[my_index, , ] 9 | #y_train <- y_train[my_index] 10 | 11 | x_train <- x_train / 255 12 | x_test <- x_test / 255 13 | 14 | x_train2d <- x_train %>% array_reshape(c(-1, 28, 28, 1)) 15 | x_test2d <- x_test %>% array_reshape(c(-1, 28, 28, 1)) 16 | 17 | my_model <- keras_model_sequential() %>% 18 | layer_conv_2d(filters = 20, kernel_size = 5, activation = "relu", 19 | input_shape = c(28, 28, 1)) %>% 20 | layer_max_pooling_2d(pool_size = 2, strides = 2) %>% 21 | layer_conv_2d(filters = 50, kernel_size = 5, activation = "relu") %>% 22 | layer_max_pooling_2d(pool_size = 2, strides = 2) %>% 23 | layer_dropout(rate = 0.25) %>% 24 | layer_flatten() %>% 25 | layer_dense(units = 500, activation = "relu") %>% 26 | layer_dropout(rate = 0.5) %>% 27 | layer_dense(units = 10, activation = "softmax") 28 | 29 | my_model %>% compile( 30 | loss = "sparse_categorical_crossentropy", 31 | optimizer = "rmsprop", 32 | metrics = c("accuracy")) 33 | 34 | my_cb <- callback_early_stopping(patience = 5, 35 | restore_best_weights = TRUE) 36 | 37 | my_history <- my_model %>% 38 | fit(x = x_train2d, 39 | y = y_train, 40 | validation_split = 0.2, 41 | batch_size = 128, 42 | epochs = 20, 43 | callbacks = list(my_cb), 44 | verbose = 0) 45 | 46 | y_prob <- my_model %>% predict(x_test2d) # カテゴリに属する確率 47 | 48 | my_result <- data.frame( 49 | y_prob = apply(y_prob, 1, max), # 確率の最大値 50 | y_ = apply(y_prob, 1, which.max) - 1, # 予測カテゴリ 51 | y = y_test, # 正解 52 | id = seq_len(length(y_test))) %>% # 番号 53 | filter(y_ != y) %>% # 予測がはずれたものを残す 54 | arrange(desc(y_prob)) # 確率の大きい順に並び替える 55 | head(my_result) 56 | 57 | tmp <- my_result[1:5, ]$id 58 | my_labels <- sprintf("%s (%s)", 59 | my_result[1:5, ]$y, tmp) 60 | my_fig <- expand.grid( 61 | label = my_labels, 62 | y = 28:1, 63 | x = 1:28) 64 | my_fig$z <- as.vector( 65 | x_test[tmp, , ]) 66 | 67 | my_fig %>% ggplot( 68 | aes(x = x, y = y, fill = z)) + 69 | geom_raster() + 70 | coord_fixed() + 71 | theme_void() + 72 | theme(legend.position = "none") + 73 | facet_grid(. ~ label) 74 | -------------------------------------------------------------------------------- /code/Python-notebook/python-06.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "id": "c5f5660f", 5 | "cell_type": "markdown", 6 | "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)\n\n\n", 7 | "metadata": {} 8 | }, 9 | { 10 | "id": "4e6dc4c2", 11 | "cell_type": "markdown", 12 | "source": "## 6.1 \u6a5f\u68b0\u5b66\u7fd2\u306e\u76ee\u7684\uff08\u672c\u66f8\u306e\u5834\u5408\uff09\n\n\n", 13 | "metadata": {} 14 | }, 15 | { 16 | "id": "11686fa0", 17 | "cell_type": "markdown", 18 | "source": "## 6.2 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u30c7\u30fc\u30bf", 19 | "metadata": {} 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import statsmodels.api as sm\n", 28 | "iris = sm.datasets.get_rdataset('iris', 'datasets').data\n", 29 | "iris.head()\n", 30 | "# \u4ee5\u4e0b\u7701\u7565" 31 | ], 32 | "id": "8fc0d772-605e-46ee-b679-2603d838c891" 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "import seaborn as sns\n", 41 | "iris = sns.load_dataset('iris')\n", 42 | "iris.head()\n", 43 | "# \u4ee5\u4e0b\u7701\u7565" 44 | ], 45 | "id": "c506c249-f4ca-4057-af97-58037c02a6ae" 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "import pandas as pd\n", 54 | "from sklearn.datasets import load_iris\n", 55 | "tmp = load_iris()\n", 56 | "iris = pd.DataFrame(tmp.data, columns=tmp.feature_names)\n", 57 | "iris['target'] = tmp.target_names[tmp.target]\n", 58 | "iris.head()\n", 59 | "# \u4ee5\u4e0b\u7701\u7565" 60 | ], 61 | "id": "94e44eb0-09ae-4573-8eb8-7d76662ca5ea" 62 | }, 63 | { 64 | "id": "9edbd001", 65 | "cell_type": "markdown", 66 | "source": "## 6.3 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u624b\u6cd5", 67 | "metadata": {} 68 | } 69 | ], 70 | "nbformat": 4, 71 | "nbformat_minor": 5, 72 | "metadata": { 73 | "kernelspec": { 74 | "name": "python3", 75 | "display_name": "Python 3" 76 | } 77 | } 78 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [講談社サイエンティフィク](https://www.kspub.co.jp/) / [実践Data Scienceシリーズ](https://www.kspub.co.jp/book/series/S069.html) / [ゼロからはじめるデータサイエンス入門](https://www.kspub.co.jp/book/detail/5132326.html) 2 | 3 | # ゼロからはじめるデータサイエンス入門(講談社, 2021)サポートサイト 4 | 5 | - [書店へのリンク集(版元ドットコム)](https://www.hanmoto.com/bd/isbn/9784065132326) 6 | - [国会図書館](https://ndlsearch.ndl.go.jp/books/R100000002-I031834151) 7 | 8 | 書影 9 | 10 | 著者:**辻真吾**([@tsjshg](https://twitter.com/tsjshg))・**矢吹太朗**([@yabuki](https://twitter.com/yabuki)) 11 | 12 | RやPythonのコード(具体的なコンピュータプログラム)の読み書きを通じてデータサイエンスについて学ぶための一冊です. 13 | コードなしで学びたいという人には,別の書籍にあたることをお勧めします. 14 | 15 | 本書には,次の三つの特徴があります. 16 | 17 | 1. 第1部「データサイエンスの準備」で,準備に時間をかけています. 18 | 1. ほぼ全ての例をコードに基づいて説明しています.本書掲載のコードはサポートサイト([ここ](#コード))でも公開しています(使用方法は2.6節を参照). 19 | 1. 第2部「機械学習」では,ほぼ全ての課題をRとPythonで解決し,同じ結果を得ることを試みています. 20 | 21 | ## [更新情報・正誤表](update.md) 22 | 23 | ## 目次 24 | 25 | - はじめに 26 | - 第1部 27 | - 第1章 コンピュータとネットワーク 28 | - 第2章 データサイエンスのための環境 29 | - 第3章 RとPython 30 | - 第4章 統計入門 31 | - 第5章 前処理 32 | - 第2部 33 | - 第6章 機械学習の目的・データ・手法 34 | - 第7章 回帰1(単回帰) 35 | - 第8章 回帰2(重回帰) 36 | - 第9章 分類1(多値分類) 37 | - 第10章 分類2(2値分類) 38 | - 第11章 深層学習とAutoML 39 | - 第12章 時系列予測 40 | - 第13章 教師なし学習 41 | - 付録A 環境構築 42 | - おわりに 43 | - 参考文献 44 | - 索引 45 | 46 | ## コード 47 | 48 | 言語|システム|コード|実行結果 49 | --|--|--|-- 50 | R|Google Colab|[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/r.ipynb)|[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/r-results.ipynb) 51 | R|Jupyter|[r.ipynb](code/r.ipynb)|[r-results.ipynb](code/r-results.ipynb) 52 | R|RStudio|[r.Rmd](code/r.Rmd)|[r.html](https://taroyabuki.github.io/fromzero/r.html) 53 | Python|Google Colab|[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/python.ipynb)|[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taroyabuki/fromzero/blob/master/code/python-results.ipynb) 54 | Python|Jupyter|[python.ipynb](code/python.ipynb)|[python-results.ipynb](code/python-results.ipynb) 55 | Python|RStudio|[python.Rmd](code/python.Rmd)|[python.html](https://taroyabuki.github.io/fromzero/python.html) 56 | 57 | コードの使い方は,2.6節を参照してください[^1][^2]. 58 | 59 | [^1]: [Amazon SageMaker Studio Lab](https://github.com/taroyabuki/fromzero/tree/main/addendum/sagemaker)での動作も確認済みです. 60 | 61 | [^2]: Apple Chipでは,JupyterとRStudio上では第11章のコードが動作しません.第11章のコードを試す場合は,Google Colabを利用してください. 62 | 63 | ## Docker 64 | 65 | 環境|言語|説明 66 | --|--|-- 67 | Jupyter Notebook|R, Python|[Jupyter Notebook](docker/jupyter) 68 | RStudio|R|[RStudio](docker/rstudio) 69 | 70 | Dockerの使い方は,2.3節を参照してください. 71 | 72 | ## [画像とそのソースコード](figures) 73 | 74 | ## ライセンス 75 | 76 | The contents of https://github.com/taroyabuki/fromzero by Shingo Tsuji and Taro Yabuki is licensed under the [Apache License, Version 2.0](LICENSE). 77 | -------------------------------------------------------------------------------- /addendum/sagemaker/README.md: -------------------------------------------------------------------------------- 1 | # Amazon SageMaker Studio Lab 2 | 3 | 無料の[Amazon SageMaker Studio Lab](https://studiolab.sagemaker.aws/)(以下,Studio Lab)で本書のコードを動かすための環境を作ります.Studio Labの概要は,[Amazon SageMaker Studio Lab入門](https://atmarkit.itmedia.co.jp/ait/subtop/features/di/sagemakerstudiolab_index.html)を参照してください. 4 | 5 | TerminalでGitHubリポジトリをクローンします. 6 | 7 | ```bash 8 | git clone https://github.com/taroyabuki/fromzero.git 9 | ``` 10 | 11 | ## 仮想環境の構築 12 | 13 | ```bash 14 | # Rの場合 15 | conda env create --file fromzero/addendum/sagemaker/sage-r.yml 16 | 17 | # Pythonの場合 18 | conda env create --file fromzero/addendum/sagemaker/sage-python.yml 19 | ``` 20 | 21 | ## Jupyter Notebookの利用 22 | 23 | 画面左のファイルブラウザーがあります.そこから,次のノートブックを開いてください. 24 | 25 | 言語|カーネル|全体のノートブック|各章のノートブック 26 | --|--|--|-- 27 | R|sage-r:R|fromzero/code/r.ipynb|fromzero/code/R-notebook 28 | Python|sage-python:Python|fromzero/code/python.ipynb|fromzero/code/Python-notebook 29 | 30 | ノートブックのファイル(.ipynb)をダブルクリックするとカーネル選択のダイアログが出るので,Rの場合はsage-r:R,Pythonの場合はsage-python:Pythonを選択してください. 31 | 32 | 補足:Jupyter Notebook(Python)の出力を本書と同じにするためには,最初に次のコードを実行してください.54頁の脚註24のようにしてもかまいません. 33 | 34 | ```python 35 | from IPython.core.interactiveshell import InteractiveShell 36 | InteractiveShell.ast_node_interactivity = "all" 37 | ``` 38 | 39 | ## 仮想環境の削除 40 | 41 | ```bash 42 | # Rの場合 43 | conda remove -n sage-r --all -y 44 | 45 | # Pythonの場合 46 | conda remove -n sage-python --all -y 47 | ``` 48 | 49 | すべてを削除してやり直す方法は,[Amazon SageMaker Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/studio-lab-use-manage.html#:~:text=Start%20runtime.-,Reset%20environment,-To%20remove%20all)に掲載されています. 50 | 51 | ## 補足 52 | 53 | 環境構築に使った.ymlは次のように作成しました(このコードを実行する必要はありません). 54 | 55 | ```bash 56 | # Rの場合 57 | conda create -y -n sage-r python=3.8.8 58 | conda activate sage-r 59 | 60 | conda install -y -c conda-forge \ 61 | r-caret \ 62 | r-doparallel \ 63 | r-exactci \ 64 | r-fable \ 65 | r-factoextra \ 66 | r-feasts \ 67 | r-furrr \ 68 | r-ggfortify \ 69 | r-ggmosaic \ 70 | r-glmnetutils \ 71 | r-h2o==3.34.0.3 \ 72 | r-igraph \ 73 | r-irkernel \ 74 | r-keras \ 75 | r-neuralnet \ 76 | r-pastecs \ 77 | r-prophet \ 78 | r-prroc \ 79 | r-psych \ 80 | r-randomforest \ 81 | r-remotes \ 82 | r-rpart.plot \ 83 | r-tidyverse \ 84 | r-urca \ 85 | r-vcd \ 86 | r-xgboost==1.4.1 87 | 88 | conda install -y -c bioconda r-ggbiplot 89 | 90 | Rscript -e 'keras::install_keras()' 91 | 92 | conda env export -n sage-r > sage-r.yml 93 | ``` 94 | 95 | ```bash 96 | # Pythonの場合 97 | conda create -y -n sage-python python=3.8.8 98 | conda activate sage-python 99 | 100 | conda install -y \ 101 | fbprophet \ 102 | ipykernel \ 103 | keras \ 104 | lxml \ 105 | matplotlib \ 106 | pandarallel \ 107 | pmdarima \ 108 | python-graphviz \ 109 | seaborn \ 110 | scikit-learn \ 111 | scipy==1.6.3 \ 112 | statsmodels \ 113 | tensorflow-gpu \ 114 | xgboost==1.5.1 115 | 116 | conda install -y -c anaconda h2o h2o-py 117 | 118 | pip install pca 119 | 120 | conda env export -n sage-python > sage-python.yml 121 | ``` 122 | -------------------------------------------------------------------------------- /update.md: -------------------------------------------------------------------------------- 1 | # 更新情報・正誤表 2 | 3 | 公開しているコードでは,以下の内容を反映しています. 4 | 5 | ## 更新情報 6 | 7 | 場所|説明 8 | --|-- 9 | p. 6|Windows 11には,脚註4で紹介しているWindows Terminalが搭載されています. 10 | p. 20|[Amazon SageMaker Studio Lab](addendum/sagemaker)での動作も確認済みです.表2.1のクラウド・ノートブックに相当します. 11 | p. 22 脚註3|Google Colabでノートブックを新規作成した後で,ランタイム→ランタイムのタイプを変更で,Rを選択できるようになりました. 12 | p. 77|**(バージョン依存)** 3.4.2.1のPythonのコードの`my_df2 = my_df.append(tmp)`を`my_df2 = pd.concat([my_df, tmp])`としなければならないことがあります. 13 | p. 112 脚註6|**(バージョン依存)** 対象を数値の列に限定するオプション`numeric_only=True`が必要な場合があります. 14 | p. 113|**(バージョン依存)** Pythonのコードを,`my_df.var(numeric_only=True)`や`my_df.apply('var', numeric_only=True)`としなければならないことがあります. 15 | p. 115|**(バージョン依存)** Pythonのコードを,`my_df.groupby('gender').mean(numeric_only=True)`あるいは`my_df.groupby('gender').agg('mean', numeric_only=True)`あるいは`my_df.drop(['name'], axis=1).groupby('gender').agg(np.mean)`としなければならないことがあります. 16 | p. 151, 152|GitHub上でのCSVファイルの表示方法が変更されたので,https://github.com/taroyabuki/fromzero/blob/master/data/exam.csv の代わりにhttps://taroyabuki.github.io/fromzero/exam.html を使ってください. 17 | p. 160, 161|**(バージョン依存)** Pythonのコードの`get_feature_names()`を`get_feature_names_out()`としなければならないことがあります. 18 | p. 184|[予測値の期待値の信頼区間](addendum/07.03.02/) 19 | p. 194|[「7.4.3 当てはまりの良さの指標の問題点」についての補足](addendum/07.04.03.ipynb) 20 | p. 271, 275|XGBoostで`ValueError: Invalid classes inferred from unique values of y. Expected: [0 1 2], got ['setosa' 'versicolor' 'virginica']`というエラーが出る場合は,`LabelEncoder`を使ってラベルを数値に変換してください. 21 | p. 271, 275|9.4.2, 9.5.3項のPythonのコードで警告がたくさん出る場合は,`warnings.simplefilter`の引数の「`, UserWarning`」を削除してみてください. 22 | p. 277|9.6.2項のPythonのコードで警告がたくさん出る場合は,`MLPClassifier()`を`MLPClassifier(max_iter=1000)`に変更してみてください. 23 | p. 292, 298|**(バージョン依存)** Pythonのコードの`get_feature_names()`を`get_feature_names_out()`としなければならないことがあります. 24 | p. 297|**(バージョン依存)** Pythonのコードの`LogisticRegression(penalty='none')`を`LogisticRegression(penalty=None)`としなければならないことがあります. 25 | 第11章|Google Colabでは,Kerasの代わりにKeras3をインストールして,`library(keras3)`で読み込んでください(公開コードは対応済み). 26 | p. 309|**(バージョン依存)** Rでエラーが出る場合は,`list`を`rbind`に変更してください. 27 | p. 309|**(バージョン依存)** Pythonでは`y, y_1, y_2 = np.array(y), np.array(y_1), np.array(y_2)`として,リストをアレイに変換しなければならないことがあります. 28 | p. 310, 329|Rのコード`左辺 %<-% 右辺`が正しく動作しない場合は,事前に`library(zeallot)`を実行してください. 29 | p. 342|**(バージョン依存)** Pythonのコードの`from fbprophet import Prophet`を`from prophet import Prophet`としなければならないことがあります. 30 | 31 | ## 正誤表 32 | 33 | 次の誤りは第6刷で修正しました. 34 | 35 | 場所|誤|正 36 | --|--|-- 37 | p. 138 本文1行目|確率(約0.22)|確率(約0.022) 38 | 39 | 次の誤りは第5刷で修正しました. 40 | 41 | 場所|誤|正 42 | --|--|-- 43 | p. 258 本文3行目|グラフの中で|連結グラフ(任意の2点を線をつないで結べるグラフ)の中で 44 | p. 351 Pythonのコード|`vals, vecs = np.linalg.eig(S) # 固有値と固有ベクトル`|`vals, vecs = np.linalg.eig(S) # 固有値と固有ベクトル`
`idx = np.argsort(-vals) # 固有値の大きい順の番号`
`vals, vecs = vals[idx], vecs[:, idx] # 固有値の大きい順での並べ替え` 45 | 46 | 次の誤りは第4刷で修正しました. 47 | 48 | 場所|誤|正 49 | --|--|-- 50 | p. 56 最初のコード(R)|`0.3333333`|`3.333333` 51 | p. 56 最初のコード(Python)|`0.3333333333333333`|`3.3333333333333335` 52 | p. 56 脚註1|0.3333333|3.333333 53 | p. 119 脚註9|[4.3, 4.7, 5.1, 5.5, 5.9, 6.300000000000001, 6.7, 7.1000000000000005, 7.5, 7.9]です.小さな誤差が,観測値6.3や7.1が属する階級に影響し,このままではヒストグラムがRと同じになりません.同じにするために,ここでは,`round`で誤差を消しています.|[4.3, 4.7, 5.1, 5.5, 5.9, 6.3, 6.7, 7.1, 7.5, 7.9]から少しずれます.Rも同様なのですが,Rではそのずれを丸めて消してから数を数えます.ここでは,Pythonでもそうなるように,`round`で数値を丸めています. 54 | p. 184 脚註4|回帰直線|予測値の期待値 55 | p. 194 Pythonのコード(2箇所)|`PolynomialFeatures(d)`|`PolynomialFeatures(d, include_bias=False)` 56 | p. 233 旁註|`sc`や`lm`|`sc`や`lr` 57 | p. 233 旁註|`my_model.named_steps.lm`|`my_pipeline.named_steps.lr` 58 | p. 240 旁註|`sfs`と`lm`|`sfs`と`lr` 59 | p. 272 下から2行目|Sepal.With|Sepal.Width 60 | p. 341 脚註5|`autoplot(level = c(80, 90))`|`autoplot(level = c(80, 95))` 61 | p. 349 本文上から3行目|描かれいます|描かれています 62 | -------------------------------------------------------------------------------- /code/R-notebook/r-12.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "id": "bc5770da", 5 | "cell_type": "markdown", 6 | "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)", 7 | "metadata": {} 8 | }, 9 | { 10 | "cell_type": "code", 11 | "execution_count": null, 12 | "metadata": {}, 13 | "outputs": [], 14 | "source": [ 15 | "# Google Colaboratory\u306e\u74b0\u5883\u8a2d\u5b9a\n", 16 | "if (Sys.getenv(\"COLAB_JUPYTER_IP\") != \"\") {\n", 17 | " options(Ncpus = parallel::detectCores())\n", 18 | " installed_packages <- rownames(installed.packages())\n", 19 | " packages_to_install <- c(\"caret\", \"fable\", \"feasts\", \"prophet\", \"tsibble\", \"urca\")\n", 20 | " install.packages(setdiff(packages_to_install, installed_packages))\n", 21 | " install.packages(c(\"ggplot2\"))\n", 22 | "}" 23 | ], 24 | "id": "464ec67c-16a7-4275-83d5-52bb7831ad0d" 25 | }, 26 | { 27 | "id": "ce518daf", 28 | "cell_type": "markdown", 29 | "source": "## 12.1 \u65e5\u6642\u3068\u65e5\u6642\u306e\u5217", 30 | "metadata": {} 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "as.POSIXct(\"2021-01-01\")" 39 | ], 40 | "id": "1924ebe8-4882-4ff3-b33f-d4153e8015cd" 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "library(tsibble)\n", 49 | "\n", 50 | "seq(from = 2021, to = 2023, by = 1)\n", 51 | "\n", 52 | "seq(from = yearmonth(\"202101\"), to = yearmonth(\"202103\"), by = 2)\n", 53 | "\n", 54 | "seq(from = as.POSIXct(\"2021-01-01\"), to = as.POSIXct(\"2021-01-03\"), by = \"1 day\")\n", 55 | "\n", 56 | "seq(from = as.POSIXct(\"2021-01-01 00:00:00\"),\n", 57 | " to = as.POSIXct(\"2021-01-01 03:00:00\"), by = \"2 hour\")" 58 | ], 59 | "id": "0f224c5a-78c5-45e8-abf2-7904a5c2319f" 60 | }, 61 | { 62 | "id": "29cbc74a", 63 | "cell_type": "markdown", 64 | "source": "## 12.2 \u6642\u7cfb\u5217\u30c7\u30fc\u30bf\u306e\u4e88\u6e2c", 65 | "metadata": {} 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "my_data <- as.vector(AirPassengers)" 74 | ], 75 | "id": "2cb040ee-9b23-4d7e-b0c7-6d39baf86c47" 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "n <- length(my_data) # \u30c7\u30fc\u30bf\u6570\uff08144\uff09\n", 84 | "k <- 108 # \u8a13\u7df4\u30c7\u30fc\u30bf\u6570" 85 | ], 86 | "id": "551c2b5a-8f7b-474a-984d-d785fab1a107" 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "library(tidyverse)\n", 95 | "library(tsibble)\n", 96 | "\n", 97 | "my_ds <- seq(\n", 98 | " from = yearmonth(\"1949/01\"),\n", 99 | " to = yearmonth(\"1960/12\"),\n", 100 | " by = 1)\n", 101 | "my_label <- rep(\n", 102 | " c(\"train\", \"test\"),\n", 103 | " c(k, n - k))\n", 104 | "my_df <- tsibble(\n", 105 | " ds = my_ds,\n", 106 | " x = 0:(n - 1),\n", 107 | " y = my_data,\n", 108 | " label = my_label,\n", 109 | " index = ds) # \u65e5\u6642\u306e\u5217\u306e\u6307\u5b9a\n", 110 | "\n", 111 | "head(my_df)" 112 | ], 113 | "id": "2c3c9a07-fce3-4f34-8411-3f99100d4580" 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "my_train <- my_df[ 1:k , ]\n", 122 | "my_test <- my_df[-(1:k), ]\n", 123 | "y <- my_test$y" 124 | ], 125 | "id": "a830e70a-136d-40f4-b30a-36635556d054" 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "my_plot <- my_df %>%\n", 134 | " ggplot(aes(x = ds,\n", 135 | " y = y,\n", 136 | " color = label)) +\n", 137 | " geom_line()\n", 138 | "my_plot" 139 | ], 140 | "id": "84617760-c969-4691-87d4-35e9b3c37d2d" 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "library(caret)\n", 149 | "my_lm_model <- train(form = y ~ x, data = my_train, method = \"lm\")\n", 150 | "y_ <- my_lm_model %>% predict(my_test)\n", 151 | "caret::RMSE(y, y_) # RMSE\uff08\u30c6\u30b9\u30c8\uff09" 152 | ], 153 | "id": "1c2cd6a3-13a6-48f4-9615-0602d3e60958" 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "y_ <- my_lm_model %>% predict(my_df)\n", 162 | "tmp <- my_df %>%\n", 163 | " mutate(y = y_, label = \"model\")\n", 164 | "my_plot + geom_line(data = tmp)" 165 | ], 166 | "id": "bf914a75-a3dc-40cb-8d0f-8d5dfd2ab11c" 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "library(fable)\n", 175 | "my_arima_model <- my_train %>% model(ARIMA(y))\n", 176 | "my_arima_model" 177 | ], 178 | "id": "b7c8e8cb-940b-4eee-8fc8-6bb16e043927" 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "tmp <- my_arima_model %>% forecast(h = \"3 years\")\n", 187 | "head(tmp)" 188 | ], 189 | "id": "6fc63b0e-666e-4a05-90ac-88e5210500ff" 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "y_ <- tmp$.mean\n", 198 | "caret::RMSE(y_, y)" 199 | ], 200 | "id": "19166890-94bf-4442-b5d8-e45dc405d0e4" 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "# \u4e88\u6e2c\u7d50\u679c\u306e\u307f\u3067\u3088\u3044\u5834\u5408\n", 209 | "#tmp %>% autoplot\n", 210 | "\n", 211 | "tmp %>% autoplot +\n", 212 | " geom_line(data = my_df,\n", 213 | " aes(x = ds,\n", 214 | " y = y,\n", 215 | " color = label))" 216 | ], 217 | "id": "6f82290c-51f8-4523-a225-46542fe46055" 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "library(prophet)\n", 226 | "my_prophet_model <- my_train %>%\n", 227 | " prophet(seasonality.mode = \"multiplicative\")" 228 | ], 229 | "id": "285cd4a1-cc34-4a04-8671-12ab80c52ca0" 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "tmp <- my_prophet_model %>% predict(my_test)\n", 238 | "head(tmp[, c(\"ds\", \"yhat\", \"yhat_lower\", \"yhat_upper\")])" 239 | ], 240 | "id": "a24e324e-882d-4d09-91c8-37632736c396" 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "y_ <- tmp$yhat\n", 249 | "caret::RMSE(y_, y)" 250 | ], 251 | "id": "c532ee13-3b8d-407f-89a1-7c0668486f15" 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "# my_prophet_model %>% plot(tmp) # \u4e88\u6e2c\u7d50\u679c\u306e\u307f\u3067\u3088\u3044\u5834\u5408\n", 260 | "\n", 261 | "my_prophet_model %>% plot(tmp) +\n", 262 | " geom_line(data = my_train, aes(x = as.POSIXct(ds))) +\n", 263 | " geom_line(data = my_test, aes(x = as.POSIXct(ds)), color = \"red\")" 264 | ], 265 | "id": "b4b145ed-eef4-4399-bf35-7948c988f688" 266 | } 267 | ], 268 | "nbformat": 4, 269 | "nbformat_minor": 5, 270 | "metadata": { 271 | "kernelspec": { 272 | "name": "ir", 273 | "display_name": "R" 274 | } 275 | } 276 | } -------------------------------------------------------------------------------- /code/Python-notebook/python-12.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "id": "f86449f9", 5 | "cell_type": "markdown", 6 | "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)", 7 | "metadata": {} 8 | }, 9 | { 10 | "cell_type": "code", 11 | "execution_count": null, 12 | "metadata": {}, 13 | "outputs": [], 14 | "source": [ 15 | "# Google Colaboratory\u306e\u74b0\u5883\u8a2d\u5b9a\n", 16 | "import os\n", 17 | "if 'COLAB_GPU' in os.environ:\n", 18 | " !python -m pip install pmdarima | tail -n 1" 19 | ], 20 | "id": "01c13eaf-b572-4570-979a-15591bc77674" 21 | }, 22 | { 23 | "id": "efe8c46d", 24 | "cell_type": "markdown", 25 | "source": "## 12.1 \u65e5\u6642\u3068\u65e5\u6642\u306e\u5217", 26 | "metadata": {} 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import pandas as pd\n", 35 | "pd.to_datetime('2020-01-01')" 36 | ], 37 | "id": "ad2111fa-8b0d-4ccb-a872-de1004bb5ea2" 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "pd.date_range(start='2021-01-01', end='2023-01-01', freq='1A')\n", 46 | "\n", 47 | "pd.date_range(start='2021-01-01', end='2023-01-01', freq='1AS')\n", 48 | "\n", 49 | "pd.date_range(start='2021-01-01', end='2021-03-01', freq='2M')\n", 50 | "\n", 51 | "pd.date_range(start='2021-01-01', end='2021-03-01', freq='2MS')\n", 52 | "\n", 53 | "pd.date_range(start='2021-01-01', end='2021-01-03', freq='1D')\n", 54 | "\n", 55 | "pd.date_range(start='2021-01-01 00:00:00', end='2021-01-01 03:00:00', freq='2H')" 56 | ], 57 | "id": "9a125fa4-d6d9-456b-a27c-39aed4f96653" 58 | }, 59 | { 60 | "id": "ee17dc29", 61 | "cell_type": "markdown", 62 | "source": "## 12.2 \u6642\u7cfb\u5217\u30c7\u30fc\u30bf\u306e\u4e88\u6e2c", 63 | "metadata": {} 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "import matplotlib.pyplot as plt\n", 72 | "import pandas as pd\n", 73 | "from pmdarima.datasets import airpassengers\n", 74 | "from sklearn.metrics import mean_squared_error\n", 75 | "\n", 76 | "my_data = airpassengers.load_airpassengers()" 77 | ], 78 | "id": "8518eedc-0cf0-473b-9588-5ec3cc4c6dc3" 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "n = len(my_data) # \u30c7\u30fc\u30bf\u6570\uff08144\uff09\n", 87 | "k = 108 # \u8a13\u7df4\u30c7\u30fc\u30bf\u6570" 88 | ], 89 | "id": "b055dc4e-65b6-4cc7-852e-50cc6a87f318" 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "my_ds = pd.date_range(\n", 98 | " start='1949/01/01',\n", 99 | " end='1960/12/01',\n", 100 | " freq='MS')\n", 101 | "my_df = pd.DataFrame({\n", 102 | " 'ds': my_ds,\n", 103 | " 'x': range(n),\n", 104 | " 'y': my_data},\n", 105 | " index=my_ds)\n", 106 | "my_df.head()" 107 | ], 108 | "id": "091ac6e3-3429-4730-b480-0b17fbd5173f" 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "my_train = my_df[ :k]\n", 117 | "my_test = my_df[-(n - k): ]\n", 118 | "y = my_test.y" 119 | ], 120 | "id": "6dd363fe-7392-4bb6-86aa-5f92e2b685e5" 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "plt.plot(my_train.y, label='train')\n", 129 | "plt.plot(my_test.y, label='test')\n", 130 | "plt.legend()" 131 | ], 132 | "id": "9d0cc675-c03f-4c58-893d-ddd04cbfd8c6" 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "from sklearn.linear_model import LinearRegression\n", 141 | "\n", 142 | "my_lm_model = LinearRegression()\n", 143 | "my_lm_model.fit(my_train[['x']], my_train.y)\n", 144 | "\n", 145 | "X = my_test[['x']]\n", 146 | "y_ = my_lm_model.predict(X)\n", 147 | "mean_squared_error(y, y_)**0.5 # RMSE\uff08\u30c6\u30b9\u30c8\uff09" 148 | ], 149 | "id": "8f54ed6b-04c1-4ec4-adee-d9845c60a5c9" 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "y_ = my_lm_model.predict(my_df[['x']])\n", 158 | "tmp = pd.DataFrame(y_,\n", 159 | " index=my_df.index)\n", 160 | "plt.plot(my_train.y, label='train')\n", 161 | "plt.plot(my_test.y, label='test')\n", 162 | "plt.plot(tmp, label='model')\n", 163 | "plt.legend()" 164 | ], 165 | "id": "49eed027-c6d5-46dc-9d2a-f6ee12eb49ee" 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "import pmdarima as pm\n", 174 | "my_arima_model = pm.auto_arima(my_train.y, m=12, trace=True)" 175 | ], 176 | "id": "3869df3e-9fb3-491d-926f-991b43092303" 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "y_, my_ci = my_arima_model.predict(len(my_test), # \u671f\u9593\u306f\u30c6\u30b9\u30c8\u30c7\u30fc\u30bf\u3068\u540c\u3058\uff0e\n", 185 | " alpha=0.05, # \u6709\u610f\u6c34\u6e96\uff08\u30c7\u30d5\u30a9\u30eb\u30c8\uff09\n", 186 | " return_conf_int=True) # \u4fe1\u983c\u533a\u9593\u3092\u6c42\u3081\u308b\uff0e\n", 187 | "tmp = pd.DataFrame({'y': y_,\n", 188 | " 'Lo': my_ci[:, 0],\n", 189 | " 'Hi': my_ci[:, 1]},\n", 190 | " index=my_test.index)\n", 191 | "tmp.head()" 192 | ], 193 | "id": "f1c97130-5ab0-4cdc-a8fb-8f9288bdbdba" 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "mean_squared_error(y, y_)**0.5" 202 | ], 203 | "id": "0e230755-e4fd-4098-ba68-aa9eb51e4021" 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "plt.plot(my_train.y, label='train')\n", 212 | "plt.plot(my_test.y, label='test')\n", 213 | "plt.plot(tmp.y, label='model')\n", 214 | "plt.fill_between(tmp.index,\n", 215 | " tmp.Lo,\n", 216 | " tmp.Hi,\n", 217 | " alpha=0.25) # \u4e0d\u900f\u660e\u5ea6\n", 218 | "plt.legend(loc='upper left')" 219 | ], 220 | "id": "1904f642-1352-48e4-b998-59505e434131" 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "try: from fbprophet import Prophet\n", 229 | "except ImportError: from prophet import Prophet\n", 230 | "my_prophet_model = Prophet(seasonality_mode='multiplicative')\n", 231 | "my_prophet_model.fit(my_train)" 232 | ], 233 | "id": "b8cd2056-fd59-47e8-8023-f6e4a5d77e51" 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "tmp = my_prophet_model.predict(my_test)\n", 242 | "tmp[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head()" 243 | ], 244 | "id": "4c06ee6c-4454-4c37-87ad-ceaf0f1f159c" 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "y_ = tmp.yhat\n", 253 | "mean_squared_error(y, y_)**0.5" 254 | ], 255 | "id": "f89c7ebe-4ef5-48fd-8fe9-d3f7def4b513" 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "# my_prophet_model.plot(tmp) # \u4e88\u6e2c\u7d50\u679c\u306e\u307f\u3067\u3088\u3044\u5834\u5408\n", 264 | "\n", 265 | "fig = my_prophet_model.plot(tmp)\n", 266 | "fig.axes[0].plot(my_train.ds, my_train.y)\n", 267 | "fig.axes[0].plot(my_test.ds, my_test.y, color='red')" 268 | ], 269 | "id": "ff77ca1c-2efb-4362-92ac-74c33dcd2b38" 270 | } 271 | ], 272 | "nbformat": 4, 273 | "nbformat_minor": 5, 274 | "metadata": { 275 | "kernelspec": { 276 | "name": "python3", 277 | "display_name": "Python 3" 278 | } 279 | } 280 | } -------------------------------------------------------------------------------- /addendum/sagemaker/sage-python.yml: -------------------------------------------------------------------------------- 1 | name: sage-python 2 | channels: 3 | - anaconda 4 | - conda-forge 5 | dependencies: 6 | - _libgcc_mutex=0.1=conda_forge 7 | - _openmp_mutex=4.5=2_gnu 8 | - _py-xgboost-mutex=2.0=cpu_0 9 | - abseil-cpp=20210324.2=h9c3ff4c_0 10 | - absl-py=0.15.0=pyhd8ed1ab_0 11 | - aiohttp=3.8.1=py38h0a891b7_1 12 | - aiosignal=1.2.0=pyhd8ed1ab_0 13 | - alsa-lib=1.2.3.2=h166bdaf_0 14 | - arviz=0.12.0=pyhd8ed1ab_0 15 | - asttokens=2.0.5=pyhd8ed1ab_0 16 | - astunparse=1.6.3=pyhd8ed1ab_0 17 | - async-timeout=4.0.2=pyhd8ed1ab_0 18 | - atk-1.0=2.36.0=h3371d22_4 19 | - attrs=21.4.0=pyhd8ed1ab_0 20 | - backcall=0.2.0=pyh9f0ad1d_0 21 | - backports=1.0=py_2 22 | - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0 23 | - binutils_impl_linux-64=2.36.1=h193b22a_2 24 | - binutils_linux-64=2.36=hf3e587d_33 25 | - blinker=1.4=py_1 26 | - brotli=1.0.9=h166bdaf_7 27 | - brotli-bin=1.0.9=h166bdaf_7 28 | - bzip2=1.0.8=h7f98852_4 29 | - c-ares=1.18.1=h7f98852_0 30 | - ca-certificates=2022.4.26=h06a4308_0 31 | - cached-property=1.5.2=hd8ed1ab_1 32 | - cached_property=1.5.2=pyha770c72_1 33 | - cachetools=5.0.0=pyhd8ed1ab_0 34 | - cairo=1.16.0=h6cf1ce9_1008 35 | - certifi=2021.10.8=py38h06a4308_2 36 | - cftime=1.6.0=py38h71d37f0_1 37 | - charset-normalizer=2.0.12=pyhd8ed1ab_0 38 | - click=8.1.3=py38h578d9bd_0 39 | - colorama=0.4.4=pyhd3eb1b0_0 40 | - convertdate=2.4.0=pyhd8ed1ab_0 41 | - cryptography=36.0.2=py38h2b5fc30_1 42 | - cudatoolkit=11.6.0=habf752d_10 43 | - cudnn=8.2.1.32=h86fa8c9_0 44 | - curl=7.83.0=h7bff187_0 45 | - cycler=0.11.0=pyhd8ed1ab_0 46 | - cython=0.29.28=py38hfa26641_2 47 | - dbus=1.13.18=hb2f20db_0 48 | - debugpy=1.6.0=py38hfa26641_0 49 | - decorator=5.1.1=pyhd8ed1ab_0 50 | - dill=0.3.4=pyhd8ed1ab_0 51 | - entrypoints=0.4=pyhd8ed1ab_0 52 | - ephem=4.1.3=py38h0a891b7_4 53 | - executing=0.8.3=pyhd8ed1ab_0 54 | - expat=2.4.8=h27087fc_0 55 | - fbprophet=0.7.1=py38h950e882_0 56 | - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 57 | - font-ttf-inconsolata=3.000=h77eed37_0 58 | - font-ttf-source-code-pro=2.038=h77eed37_0 59 | - font-ttf-ubuntu=0.83=hab24e00_0 60 | - fontconfig=2.14.0=h8e229c2_0 61 | - fonts-conda-ecosystem=1=0 62 | - fonts-conda-forge=1=0 63 | - fonttools=4.33.3=py38h0a891b7_0 64 | - freetype=2.11.0=h70c0345_0 65 | - fribidi=1.0.10=h36c2ea0_0 66 | - frozenlist=1.3.0=py38h0a891b7_1 67 | - future=0.18.2=py38_1 68 | - gast=0.4.0=pyh9f0ad1d_0 69 | - gcc_impl_linux-64=7.5.0=habd7529_20 70 | - gcc_linux-64=7.5.0=h47867f9_33 71 | - gdk-pixbuf=2.42.6=h04a7f16_0 72 | - gettext=0.19.8.1=h0b5b191_1005 73 | - giflib=5.2.1=h36c2ea0_2 74 | - glib=2.69.1=h4ff587b_1 75 | - glib-tools=2.68.4=h9c3ff4c_0 76 | - google-auth=2.6.6=pyh6c4a22f_0 77 | - google-auth-oauthlib=0.4.6=pyhd8ed1ab_0 78 | - google-pasta=0.2.0=pyh8c360ce_0 79 | - graphite2=1.3.13=h58526e2_1001 80 | - graphviz=2.47.2=h85b4f2f_0 81 | - grpc-cpp=1.39.1=h850795e_1 82 | - grpcio=1.39.0=py38hdd6454d_0 83 | - gst-plugins-base=1.18.5=hf529b03_0 84 | - gstreamer=1.18.5=h76c114f_0 85 | - gtk2=2.24.33=h539f30e_1 86 | - gts=0.7.6=h64030ff_2 87 | - gxx_impl_linux-64=7.5.0=hd0bb8aa_20 88 | - gxx_linux-64=7.5.0=h555fc39_33 89 | - h2o=3.18.0.2=0 90 | - h2o-py=3.36.1.1=pyhd8ed1ab_0 91 | - h5py=3.1.0=nompi_py38hafa665b_100 92 | - harfbuzz=3.0.0=h83ec7ef_1 93 | - hdf4=4.2.15=h10796ff_3 94 | - hdf5=1.10.6=nompi_h6a2412b_1114 95 | - hijri-converter=2.2.3=pyhd8ed1ab_0 96 | - holidays=0.13=pyhd8ed1ab_0 97 | - icu=68.2=h9c3ff4c_0 98 | - idna=3.3=pyhd3eb1b0_0 99 | - importlib-metadata=4.11.3=py38h578d9bd_1 100 | - importlib_metadata=4.11.3=hd8ed1ab_1 101 | - ipykernel=6.13.0=py38h7f3c49e_0 102 | - ipython=8.3.0=py38h578d9bd_0 103 | - jbig=2.1=h7f98852_2003 104 | - jedi=0.18.1=py38h578d9bd_1 105 | - joblib=1.1.0=pyhd8ed1ab_0 106 | - jpeg=9e=h166bdaf_1 107 | - jupyter_client=7.3.0=pyhd8ed1ab_0 108 | - jupyter_core=4.9.2=py38h578d9bd_0 109 | - keras=2.6.0=pyhd8ed1ab_1 110 | - keras-preprocessing=1.1.2=pyhd8ed1ab_0 111 | - kernel-headers_linux-64=2.6.32=he073ed8_15 112 | - keyutils=1.6.1=h166bdaf_0 113 | - kiwisolver=1.4.2=py38h43d8883_1 114 | - korean_lunar_calendar=0.2.1=pyh9f0ad1d_0 115 | - krb5=1.19.3=h3790be6_0 116 | - lcms2=2.12=hddcbb42_0 117 | - ld_impl_linux-64=2.36.1=hea4e1c9_2 118 | - lerc=3.0=h9c3ff4c_0 119 | - libblas=3.9.0=14_linux64_openblas 120 | - libbrotlicommon=1.0.9=h166bdaf_7 121 | - libbrotlidec=1.0.9=h166bdaf_7 122 | - libbrotlienc=1.0.9=h166bdaf_7 123 | - libcblas=3.9.0=14_linux64_openblas 124 | - libclang=11.1.0=default_ha53f305_1 125 | - libcurl=7.83.0=h7bff187_0 126 | - libdeflate=1.10=h7f98852_0 127 | - libedit=3.1.20191231=he28a2e2_2 128 | - libev=4.33=h516909a_1 129 | - libevent=2.1.10=h9b69904_4 130 | - libffi=3.3=h58526e2_2 131 | - libgcc-devel_linux-64=7.5.0=hda03d7c_20 132 | - libgcc-ng=11.2.0=h1d223b6_16 133 | - libgd=2.3.3=h6ad9fb6_0 134 | - libgfortran-ng=11.2.0=h69a702a_16 135 | - libgfortran5=11.2.0=h5c6108e_16 136 | - libglib=2.68.4=h3e27bee_0 137 | - libgomp=11.2.0=h1d223b6_16 138 | - libiconv=1.16=h516909a_0 139 | - liblapack=3.9.0=14_linux64_openblas 140 | - libllvm11=11.1.0=hf817b99_3 141 | - libnetcdf=4.8.1=nompi_hcd642e3_100 142 | - libnghttp2=1.47.0=h727a467_0 143 | - libogg=1.3.4=h7f98852_1 144 | - libopenblas=0.3.20=pthreads_h78a6416_0 145 | - libopus=1.3.1=h7f98852_1 146 | - libpng=1.6.37=hbc83047_0 147 | - libpq=13.5=hd57d9b9_1 148 | - libprotobuf=3.16.0=h780b84a_0 149 | - librsvg=2.50.5=hc3c00ef_0 150 | - libsodium=1.0.18=h36c2ea0_1 151 | - libssh2=1.10.0=ha56f1ee_2 152 | - libstdcxx-devel_linux-64=7.5.0=hb016644_20 153 | - libstdcxx-ng=11.2.0=he4da1e4_16 154 | - libtiff=4.3.0=h542a066_3 155 | - libtool=2.4.6=h9c3ff4c_1008 156 | - libuuid=2.32.1=h7f98852_1000 157 | - libvorbis=1.3.7=h9c3ff4c_0 158 | - libwebp=1.2.2=h3452ae3_0 159 | - libwebp-base=1.2.2=h7f98852_1 160 | - libxcb=1.14=h7b6447c_0 161 | - libxgboost=1.5.1=cpu_h3d145d1_2 162 | - libxkbcommon=1.0.3=he3ba5ed_0 163 | - libxml2=2.9.10=h72842e0_4 164 | - libxslt=1.1.33=h15afd5d_2 165 | - libzip=1.8.0=h4de3113_1 166 | - libzlib=1.2.11=h166bdaf_1014 167 | - lunarcalendar=0.0.9=py_0 168 | - lxml=4.8.0=py38h0a891b7_3 169 | - lz4-c=1.9.3=h9c3ff4c_1 170 | - markdown=3.3.7=pyhd8ed1ab_0 171 | - matplotlib=3.5.2=py38h578d9bd_0 172 | - matplotlib-base=3.5.2=py38h826bfd8_0 173 | - matplotlib-inline=0.1.3=pyhd8ed1ab_0 174 | - multidict=6.0.2=py38h0a891b7_1 175 | - munkres=1.1.4=pyh9f0ad1d_0 176 | - mysql-common=8.0.29=haf5c9bc_0 177 | - mysql-libs=8.0.29=h28c427c_0 178 | - nccl=2.12.10.1=h0800d71_0 179 | - ncurses=6.3=h27087fc_1 180 | - nest-asyncio=1.5.5=pyhd8ed1ab_0 181 | - netcdf4=1.5.7=nompi_py38hcc16cfe_101 182 | - nspr=4.32=h9c3ff4c_1 183 | - nss=3.77=h2350873_0 184 | - oauthlib=3.2.0=pyhd8ed1ab_0 185 | - openjdk=11.0.13=h87a67e3_0 186 | - openjpeg=2.4.0=hb52868f_1 187 | - openssl=1.1.1o=h166bdaf_0 188 | - opt_einsum=3.3.0=pyhd8ed1ab_1 189 | - packaging=21.3=pyhd8ed1ab_0 190 | - pandarallel=1.6.1=pyhd8ed1ab_0 191 | - pandas=1.4.2=py38h47df419_1 192 | - pango=1.48.10=h54213e6_2 193 | - parso=0.8.3=pyhd8ed1ab_0 194 | - patsy=0.5.2=pyhd8ed1ab_0 195 | - pcre=8.45=h295c915_0 196 | - pexpect=4.8.0=pyh9f0ad1d_2 197 | - pickleshare=0.7.5=py_1003 198 | - pillow=9.1.0=py38h0ee0e06_2 199 | - pip=22.0.4=pyhd8ed1ab_0 200 | - pixman=0.40.0=h36c2ea0_0 201 | - pmdarima=1.8.2=py38h497a2fe_3 202 | - prompt-toolkit=3.0.29=pyha770c72_0 203 | - protobuf=3.16.0=py38h709712a_0 204 | - psutil=5.9.0=py38h0a891b7_1 205 | - pthread-stubs=0.4=h36c2ea0_1001 206 | - ptyprocess=0.7.0=pyhd3deb0d_0 207 | - pure_eval=0.2.2=pyhd8ed1ab_0 208 | - py-xgboost=1.5.1=cpu_py38h66f0ec1_2 209 | - pyasn1=0.4.8=py_0 210 | - pyasn1-modules=0.2.7=py_0 211 | - pycparser=2.21=pyhd3eb1b0_0 212 | - pygments=2.12.0=pyhd8ed1ab_0 213 | - pyjwt=2.3.0=pyhd8ed1ab_1 214 | - pymeeus=0.5.10=pyhd8ed1ab_0 215 | - pyopenssl=22.0.0=pyhd3eb1b0_0 216 | - pyparsing=3.0.8=pyhd8ed1ab_0 217 | - pyqt=5.12.3=py38h578d9bd_8 218 | - pyqt-impl=5.12.3=py38h0ffb2e6_8 219 | - pyqt5-sip=4.19.18=py38h709712a_8 220 | - pyqtchart=5.12=py38h7400c14_8 221 | - pyqtwebengine=5.12.1=py38h7400c14_8 222 | - pysocks=1.7.1=py38h06a4308_0 223 | - pystan=2.19.1.1=py38hc5bc63f_2 224 | - python=3.8.8=hffdb5ce_0_cpython 225 | - python-dateutil=2.8.2=pyhd8ed1ab_0 226 | - python-flatbuffers=1.12=pyhd8ed1ab_1 227 | - python-graphviz=0.20=pyhaef67bd_0 228 | - python_abi=3.8=2_cp38 229 | - pytz=2022.1=pyhd8ed1ab_0 230 | - pyu2f=0.1.5=pyhd8ed1ab_0 231 | - pyzmq=22.3.0=py38hfc09fa9_2 232 | - qt=5.12.9=hda022c4_4 233 | - re2=2021.09.01=h9c3ff4c_0 234 | - readline=8.1=h46c0cb4_0 235 | - requests=2.27.1=pyhd3eb1b0_0 236 | - requests-oauthlib=1.3.1=pyhd8ed1ab_0 237 | - rsa=4.8=pyhd8ed1ab_0 238 | - scikit-learn=1.0.2=py38h1561384_0 239 | - scipy=1.6.3=py38h7b17777_0 240 | - seaborn=0.11.2=hd8ed1ab_0 241 | - seaborn-base=0.11.2=pyhd8ed1ab_0 242 | - setuptools=49.6.0=py38h578d9bd_3 243 | - six=1.15.0=pyh9f0ad1d_0 244 | - snappy=1.1.9=hbd366e4_0 245 | - sqlite=3.38.4=h4ff8645_0 246 | - stack_data=0.2.0=pyhd8ed1ab_0 247 | - statsmodels=0.13.2=py38h6c62de6_0 248 | - sysroot_linux-64=2.12=he073ed8_15 249 | - tabulate=0.8.9=py38h06a4308_0 250 | - tensorboard=2.9.0=pyhd8ed1ab_0 251 | - tensorboard-data-server=0.6.0=py38h2b5fc30_2 252 | - tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0 253 | - tensorflow=2.6.0=cuda112py38hbe5352d_2 254 | - tensorflow-base=2.6.0=cuda112py38heae9c4c_2 255 | - tensorflow-estimator=2.6.0=cuda112py38hb2194ef_2 256 | - tensorflow-gpu=2.6.0=cuda112py38h0bbbad9_2 257 | - termcolor=1.1.0=py_2 258 | - threadpoolctl=3.1.0=pyh8a188c0_0 259 | - tk=8.6.12=h27826a3_0 260 | - tornado=6.1=py38h0a891b7_3 261 | - tqdm=4.64.0=pyhd8ed1ab_0 262 | - traitlets=5.1.1=pyhd8ed1ab_0 263 | - typing-extensions=3.7.4.3=0 264 | - typing_extensions=3.7.4.3=py_0 265 | - unicodedata2=14.0.0=py38h0a891b7_1 266 | - urllib3=1.26.9=py38h06a4308_0 267 | - wcwidth=0.2.5=pyh9f0ad1d_2 268 | - werkzeug=2.1.2=pyhd8ed1ab_1 269 | - wheel=0.37.1=pyhd8ed1ab_0 270 | - wrapt=1.12.1=py38h497a2fe_3 271 | - xarray=2022.3.0=pyhd8ed1ab_0 272 | - xgboost=1.5.1=cpu_py38h66f0ec1_2 273 | - xorg-kbproto=1.0.7=h7f98852_1002 274 | - xorg-libice=1.0.10=h7f98852_0 275 | - xorg-libsm=1.2.3=hd9c2040_1000 276 | - xorg-libx11=1.7.2=h7f98852_0 277 | - xorg-libxau=1.0.9=h7f98852_0 278 | - xorg-libxdmcp=1.1.3=h7f98852_0 279 | - xorg-libxext=1.3.4=h7f98852_1 280 | - xorg-libxrender=0.9.10=h7f98852_1003 281 | - xorg-renderproto=0.11.1=h7f98852_1002 282 | - xorg-xextproto=7.3.0=h7f98852_1002 283 | - xorg-xproto=7.0.31=h7f98852_1007 284 | - xz=5.2.5=h516909a_1 285 | - yarl=1.7.2=py38h0a891b7_2 286 | - zeromq=4.3.4=h9c3ff4c_1 287 | - zipp=3.8.0=pyhd8ed1ab_0 288 | - zlib=1.2.11=h166bdaf_1014 289 | - zstd=1.5.2=ha95c52a_0 290 | - pip: 291 | - brotlipy==0.7.0 292 | - cffi==1.14.6 293 | - colourmap==1.1.4 294 | - numpy==1.19.5 295 | - pca==1.8.0 296 | - scatterd==1.1.1 297 | - sklearn==0.0 298 | - wget==3.2 299 | prefix: /home/studio-lab-user/.conda/envs/sage-python 300 | -------------------------------------------------------------------------------- /code/Python-notebook/python-05.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "id": "1438c612", 5 | "cell_type": "markdown", 6 | "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)\n\n\n", 7 | "metadata": {} 8 | }, 9 | { 10 | "id": "edf8369d", 11 | "cell_type": "markdown", 12 | "source": "## 5.1 \u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f", 13 | "metadata": {} 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "!wget https://raw.githubusercontent.com/taroyabuki/fromzero/master/data/exam.csv" 22 | ], 23 | "id": "dde757dc-9d6e-451c-8dae-235f5d11837f" 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "import pandas as pd\n", 32 | "my_df = pd.read_csv('exam.csv')\n", 33 | "my_df" 34 | ], 35 | "id": "5e3cc804-49d8-4385-b3a0-add7fdb06dbf" 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "my_url = ('https://raw.githubusercontent.com/taroyabuki'\n", 44 | " '/fromzero/master/data/exam.csv')\n", 45 | "my_df = pd.read_csv(my_url)" 46 | ], 47 | "id": "a650bedc-240c-4d20-b683-6f533797c093" 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "my_df2 = pd.read_csv('exam.csv',\n", 56 | " index_col='name')\n", 57 | "my_df2" 58 | ], 59 | "id": "917f3b47-9d4d-489c-8f66-edb8f7482d90" 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "my_df.to_csv('exam2.csv', index=False)" 68 | ], 69 | "id": "30d65be6-8ad2-455d-8251-ff1e1417275f" 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "my_df2.to_csv('exam3.csv')" 78 | ], 79 | "id": "e1df7d45-417b-462d-9c41-f573dacda2d7" 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "my_df = pd.read_csv('exam.csv',\n", 88 | " encoding='UTF-8')" 89 | ], 90 | "id": "1a639f8c-4c4a-4328-ab51-8b8570914508" 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "my_df.to_csv('exam2.csv', index=False, encoding='UTF-8')" 99 | ], 100 | "id": "8ff33ada-1100-4f30-99ef-39a4d6ad69e8" 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "my_url = 'https://taroyabuki.github.io/fromzero/exam.html'\n", 109 | "my_tables = pd.read_html(my_url)" 110 | ], 111 | "id": "4a779db4-1630-403f-a8e5-8423aa4a7193" 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "my_tables" 120 | ], 121 | "id": "3b61881e-cd4b-4bf4-87f7-e35cea5fd7fc" 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "my_tables[0]" 130 | ], 131 | "id": "f9476c63-cc57-4771-9788-a185445edaa4" 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "# 1\u5217\u76ee\u4ee5\u964d\u3092\u53d6\u308a\u51fa\u3059\uff0e\n", 140 | "my_data = my_tables[0].iloc[:, 1:]\n", 141 | "my_data" 142 | ], 143 | "id": "16e538cf-013a-4b4b-9c45-0e4ff0897bdf" 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "my_url = ('https://raw.githubusercontent.com/taroyabuki'\n", 152 | " '/fromzero/master/data/exam.json')\n", 153 | "my_data = pd.read_json(my_url)\n", 154 | "#my_data = pd.read_json('exam.json') # \uff08\u30d5\u30a1\u30a4\u30eb\u3092\u4f7f\u3046\u5834\u5408\uff09\n", 155 | "my_data" 156 | ], 157 | "id": "13e9c6e1-47ab-467b-b851-0091a203a907" 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "import xml.etree.ElementTree as ET\n", 166 | "from urllib.request import urlopen\n", 167 | "\n", 168 | "my_url = ('https://raw.githubusercontent.com/taroyabuki'\n", 169 | " '/fromzero/master/data/exam.xml')\n", 170 | "with urlopen(my_url) as f:\n", 171 | " my_tree = ET.parse(f) # XML\u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f\n", 172 | "\n", 173 | "#my_tree = ET.parse('exam.xml') # \uff08\u30d5\u30a1\u30a4\u30eb\u3092\u4f7f\u3046\u5834\u5408\uff09\n", 174 | "my_ns = '{https://www.example.net/ns/1.0}' # \u540d\u524d\u7a7a\u9593" 175 | ], 176 | "id": "93fcbfaf-c73b-4f53-844e-56dd5ed485c1" 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "my_records = my_tree.findall(f'.//{my_ns}record')" 185 | ], 186 | "id": "ada9000a-d26f-4870-a1e0-a316c10bb88b" 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "def f(record):\n", 195 | " my_dic1 = record.attrib # \u5c5e\u6027\u3092\u53d6\u308a\u51fa\u3059\uff0e\n", 196 | " # \u5b50\u8981\u7d20\u306e\u540d\u524d\u3068\u5185\u5bb9\u306e\u30da\u30a2\u3092\u8f9e\u66f8\u306b\u3059\u308b\uff0e\n", 197 | " my_dic2 = {child.tag.replace(my_ns, ''): child.text for child in list(record)}\n", 198 | " return {**my_dic1, **my_dic2} # \u8f9e\u66f8\u3092\u7d50\u5408\u3059\u308b\uff0e" 199 | ], 200 | "id": "c9f2b840-74b1-455c-9068-a589fbb4d4fa" 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "my_data = pd.DataFrame([f(record) for record in my_records])\n", 209 | "my_data['english'] = pd.to_numeric(my_data['english'])\n", 210 | "my_data['math'] = pd.to_numeric(my_data['math'])\n", 211 | "my_data" 212 | ], 213 | "id": "49c1e7fe-aff6-48a0-8132-d5003d79ca6c" 214 | }, 215 | { 216 | "id": "ca69954f", 217 | "cell_type": "markdown", 218 | "source": "## 5.2 \u30c7\u30fc\u30bf\u306e\u5909\u63db", 219 | "metadata": {} 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "import numpy as np\n", 228 | "from scipy.stats import zscore\n", 229 | "\n", 230 | "x1 = [1, 2, 3]\n", 231 | "\n", 232 | "z1 = ((x1 - np.mean(x1)) /\n", 233 | " np.std(x1, ddof=1))\n", 234 | "# \u3042\u308b\u3044\u306f\n", 235 | "z1 = zscore(x1, ddof=1)\n", 236 | "\n", 237 | "z1" 238 | ], 239 | "id": "90ff1c01-02d5-40c4-9f07-56c52e64a48e" 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "z1.mean(), np.std(z1, ddof=1)" 248 | ], 249 | "id": "87140628-936c-4d2c-a4b8-f9caea5322b4" 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "z1 * np.std(x1, ddof=1) + np.mean(x1)" 258 | ], 259 | "id": "48505728-2c9a-48da-bc13-7dc6e316a1f3" 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "x2 = [1, 3, 5]\n", 268 | "z2 = ((x2 - np.mean(x1)) /\n", 269 | " np.std(x1, ddof=1))\n", 270 | "z2.mean(), np.std(z2, ddof=1)" 271 | ], 272 | "id": "2169f911-ba5f-4d45-8d4d-02adda1adfb2" 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "import pandas as pd\n", 281 | "from sklearn.preprocessing import (\n", 282 | " OneHotEncoder)\n", 283 | "\n", 284 | "my_df = pd.DataFrame({\n", 285 | " 'id': [ 1 , 2 , 3 ],\n", 286 | " 'class': ['A', 'B', 'C']})\n", 287 | "\n", 288 | "my_enc = OneHotEncoder()\n", 289 | "tmp = my_enc.fit_transform(\n", 290 | " my_df[['class']]).toarray()\n", 291 | "my_names = my_enc.get_feature_names() \\\n", 292 | "if hasattr(my_enc, 'get_feature_names') \\\n", 293 | "else my_enc.get_feature_names_out()\n", 294 | "pd.DataFrame(tmp, columns=my_names)" 295 | ], 296 | "id": "b97df278-4912-490f-9518-146ef7171868" 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [ 304 | "my_df2 = pd.DataFrame({\n", 305 | " 'id': [ 4 , 5, 6 ],\n", 306 | " 'class': ['B', 'C', 'B']})\n", 307 | "tmp = my_enc.transform(\n", 308 | " my_df2[['class']]).toarray()\n", 309 | "pd.DataFrame(tmp, columns=my_names)" 310 | ], 311 | "id": "61707b06-bef2-466b-8eee-2612578af36d" 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "my_enc = OneHotEncoder(drop='first')\n", 320 | "\n", 321 | "tmp = my_enc.fit_transform(\n", 322 | " my_df[['class']]).toarray()\n", 323 | "my_names = my_enc.get_feature_names() \\\n", 324 | "if hasattr(my_enc, 'get_feature_names') \\\n", 325 | "else my_enc.get_feature_names_out()\n", 326 | "pd.DataFrame(tmp, columns=my_names)\n", 327 | "\n", 328 | "tmp = my_enc.transform(\n", 329 | " my_df2[['class']]).toarray()\n", 330 | "pd.DataFrame(tmp, columns=my_names)" 331 | ], 332 | "id": "d551e6a4-ef05-44ff-b5ef-1d337077850d" 333 | } 334 | ], 335 | "nbformat": 4, 336 | "nbformat_minor": 5, 337 | "metadata": { 338 | "kernelspec": { 339 | "name": "python3", 340 | "display_name": "Python 3" 341 | } 342 | } 343 | } --------------------------------------------------------------------------------