├── kato&hoshino └── kato_hoshino.R └── tachimori ├── reg.R └── str.R /kato&hoshino/kato_hoshino.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iwanami-datascience/vol3/HEAD/kato&hoshino/kato_hoshino.R -------------------------------------------------------------------------------- /tachimori/reg.R: -------------------------------------------------------------------------------- 1 | #******************************************************** 2 | # 回帰モデルの利用 3 | #******************************************************** 4 | 5 | #======================================================== 6 | # データの生成 7 | #======================================================== 8 | # 相関のある2つの乱数 x, z を発生させる関数 9 | rcorr <- function(n=1000, r=1) { 10 | x <<- rnorm(n) 11 | z <- rnorm(n) 12 | z <<- r*x + sqrt(1-r^2)*z 13 | } 14 | 15 | # 相関が0.8の乱数を500個発生させた 16 | set.seed(202) 17 | rcorr(500, 0.8) 18 | # 確認 19 | plot(x, z) 20 | cor(x, z) 21 | 22 | # x, zからyを作成 23 | e <- rnorm(500) 24 | y <- 1.5*x + 1.1*z + e 25 | 26 | #======================================================== 27 | # 回帰 28 | #======================================================== 29 | # xだけで回帰 30 | resultX <- lm(y~x) 31 | summary(resultX) 32 | 33 | # 交絡因子zをモデルに追加 34 | resultXZ <- lm(y~x+z) 35 | summary(resultXZ) 36 | 37 | # 図9 38 | # (a) 39 | par(mfrow=c(1,2)) 40 | curve(1.5*x, from =-1, to =1, lwd = 8, ylab = "y",xaxt="n", yaxt="n", col='gray',main="(a)") 41 | abline(resultX, lwd = 4) 42 | # (b) 43 | curve(1.5*x, from =-1, to =1, lwd = 8, ylab = "y",xaxt="n", yaxt="n", col='gray',main="(b)") 44 | abline(resultXZ, lwd = 4) 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /tachimori/str.R: -------------------------------------------------------------------------------- 1 | #******************************************************** 2 | # 層別解析 3 | #******************************************************** 4 | 5 | #======================================================== 6 | # データの生成 7 | #======================================================== 8 | # 相関のある2つの乱数 x, z を発生させる関数 9 | rcorr <- function(n=1000, r=1) { 10 | x <<- rnorm(n) 11 | z <- rnorm(n) 12 | z <<- r*x + sqrt(1-r^2)*z 13 | } 14 | 15 | # 相関が0.8の乱数を500個発生させた 16 | set.seed(202) 17 | rcorr(500, 0.8) 18 | # 確認 19 | plot(x, z) 20 | cor(x, z) 21 | 22 | # x, zからyを作成 23 | e <- rnorm(500) 24 | y <- 1.5*x + 1.1*z + e 25 | 26 | #======================================================== 27 | # 層別 28 | #======================================================== 29 | # zを離散化してデータに追加 30 | z.str <- cut(z, c(-Inf, -1, 0, 1, Inf), labels=c("1","2","3","4")) 31 | data <- data.frame(x, y, z, z.str) 32 | 33 | # 層に分けない場合 34 | resultX <- lm(data$y~data$x) 35 | summary(resultX) 36 | 37 | #--------------------------------------------------------- 38 | # 4層に分けて層別解析 39 | df.str <- split(data, z.str) 40 | 41 | resultX1 <- lm(df.str$'1'$y~df.str$'1'$x) 42 | summary(resultX1) 43 | 44 | resultX2 <- lm(df.str$'2'$y~df.str$'2'$x) 45 | summary(resultX2) 46 | 47 | resultX3 <- lm(df.str$'3'$y~df.str$'3'$x) 48 | summary(resultX3) 49 | 50 | resultX4 <- lm(df.str$'4'$y~df.str$'4'$x) 51 | summary(resultX4) 52 | 53 | # xの回帰係数を統合 54 | numer <- 0 55 | denom <- 0 56 | 57 | for (i in 1:4) { 58 | eval(parse(text=paste("beta <- summary(resultX", i, ")$coefficients[2,1]", sep=""))) 59 | eval(parse(text=paste("se <- summary(resultX", i, ")$coefficients[2,2]", sep=""))) 60 | numer <- numer + beta*(1/se^2) 61 | denom <- denom + 1/se^2 62 | } 63 | 64 | pooled_beta <- numer/denom 65 | pooled_beta 66 | 67 | #--------------------------------------------------------- 68 | # 図7 69 | plot(data$x, data$y, pch=as.numeric(data$z.str), xlim=c(min(data$x),max(data$x)), ylim=c(min(data$y),max(data$y)), xlab = "x",ylab = "y",cex=1.1) 70 | abline(0,1.5, lwd = 8, col='gray') 71 | abline(resultX, lwd = 4) 72 | 73 | # 図8 74 | par(mfrow=c(2,2), mar = c(3, 3, 2, 2), mgp = c(1, 0, 0)) 75 | plot(df.str$'1'$x, df.str$'1'$y, pch=1, xlim=c(min(data$x),max(data$x)), ylim=c(min(data$y),max(data$y)), xlab = "x",ylab = "y",xaxt="n", yaxt="n") 76 | abline(resultX1) 77 | abline(1.1*mean(df.str$'1'$z),1.5, lwd = 4, col='gray') 78 | 79 | plot(df.str$'2'$x, df.str$'2'$y, pch=2, xlim=c(min(data$x),max(data$x)), ylim=c(min(data$y),max(data$y)), xlab = "x",ylab = "y",xaxt="n", yaxt="n") 80 | abline(resultX2) 81 | abline(1.1*mean(df.str$'2'$z),1.5, lwd = 4, col='gray') 82 | 83 | plot(df.str$'3'$x, df.str$'3'$y, pch=3, xlim=c(min(data$x),max(data$x)), ylim=c(min(data$y),max(data$y)), xlab = "x",ylab = "y",xaxt="n", yaxt="n") 84 | abline(resultX3) 85 | abline(1.1*mean(df.str$'3'$z),1.5, lwd = 4, col='gray') 86 | 87 | plot(df.str$'4'$x, df.str$'4'$y, pch=4, xlim=c(min(data$x),max(data$x)), ylim=c(min(data$y),max(data$y)), xlab = "x",ylab = "y",xaxt="n", yaxt="n") 88 | abline(resultX4) 89 | abline(1.1*mean(df.str$'4'$z),1.5, lwd = 4, col='gray') --------------------------------------------------------------------------------