├── README.md ├── file ├── ijcai17-CAT.pptx └── 基于加权回归模型预测商家未来客流量.pdf └── weighted_model.R /README.md: -------------------------------------------------------------------------------- 1 | # ijcai-17-top1-single-mole-solution 2 | 3 | - file 文件中包含答辩ppt和解决方案详细文档 4 | - 比赛链接:https://tianchi.aliyun.com/competition/introduction.htm?spm=5176.11163580.0.0.74c864f0mJ1mhl&raceId=231591 5 | -------------------------------------------------------------------------------- /file/ijcai17-CAT.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YouChouNoBB/ijcai-17-top1-single-mole-solution/bfb6d0dea9afdf351c3098d830457bae12e2b599/file/ijcai17-CAT.pptx -------------------------------------------------------------------------------- /file/基于加权回归模型预测商家未来客流量.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YouChouNoBB/ijcai-17-top1-single-mole-solution/bfb6d0dea9afdf351c3098d830457bae12e2b599/file/基于加权回归模型预测商家未来客流量.pdf -------------------------------------------------------------------------------- /weighted_model.R: -------------------------------------------------------------------------------- 1 | library(data.table) 2 | library(parallel) 3 | 4 | Yhzf = fread("user_pay.txt", col.names = c("yh_id", "sj_id", "sj")) 5 | Yhzf$rx = as.integer(as.Date(Yhzf$sj, "%Y-%m-%d") - as.Date("2016-11-01", "%Y-%m-%d")) 6 | Yhzf$yr = (Yhzf$rx + 904) %% 7 + 1 7 | Sjtz = fread("shop_info.txt", col.names = c("sj_id", "sm", "wzbh", "rjxf", "pf", "pls", "mddj", "yjplmc", "ejflmc", "sjflmc"), encoding = "UTF-8") 8 | Tq = fread("weather.txt", col.names = c("sm", "rq", "tq")) 9 | Tq$rx = as.integer(as.Date(Tq$rq, "%Y-%m-%d") - as.Date("2016-11-01", "%Y-%m-%d")) 10 | 11 | Xl = Yhzf[, .(sjkll = length(yh_id)), .(sj_id, rx, yr)] 12 | 13 | Yc = function(xla, sjtza, tqa, qsrx, fw, fwa = fw, r = 0, s = 0){ 14 | Qdzy = function(x, qz = 1){ 15 | if (length(x[!is.na(x)]) == 0) 16 | return(0) 17 | 18 | df = double(0) 19 | for (a in min(x, na.rm = T):max(x, na.rm = T)) 20 | df = c(df, sum(abs((a - x) / (a + x)) * qz, na.rm = T)) 21 | 22 | mean((min(x, na.rm = T):max(x, na.rm = T))[df == min(df)]) 23 | } 24 | 25 | Jsjzxs = function(yy, y, qz, r, s, bc){ 26 | df = double(0) 27 | for(a in seq(r, s, bc)) 28 | df = c(df, sum(abs((yy * a - y) / (yy * a + y) * qz), na.rm = T)) 29 | 30 | mean(seq(r, s, bc)[df == min(df)]) 31 | } 32 | 33 | Qdyrqz = function(yr, yra, r){ 34 | qz = rep(0, length(yra)) 35 | if (yr %in% 1:4){ 36 | qz[yra %in% 1:4] = 1 37 | qz[yra == 5] = 1 38 | qz[yra %in% 6:7] = 1 - 0.95 * r 39 | } 40 | if (yr == 5){ 41 | qz[yra %in% 1:4] = 1 - 0.95 * r 42 | qz[yra == 5] = 1 43 | qz[yra %in% 6:7] = 1 - 0.95 * r 44 | } 45 | if (yr == 6){ 46 | qz[yra %in% 1:5] = 1 - 0.95 * r 47 | qz[yra == 6] = 1 48 | qz[yra == 7] = 1 - 0.32 * r 49 | } 50 | if (yr == 7){ 51 | qz[yra %in% 1:5] = 1 - 0.95 * r 52 | qz[yra == 6] = 1 - 0.32 * r 53 | qz[yra == 7] = 1 54 | } 55 | qz 56 | } 57 | 58 | xla = merge(xla, sjtza[, .(sj_id, sm)], by = "sj_id", all.x = T) 59 | xla = merge(xla, tqa[, .(rx = rx - qsrx, sm, tq)], by = c("rx", "sm"), all.x = T) 60 | xla$tq[is.na(xla$tq)] = 0 61 | xla$sjkll = xla$sjkll / (1 + 0.005 * xla$tq) 62 | 63 | sjyr = data.table( 64 | sj_id = rep(sjtza$sj_id, each = 7) 65 | , yr = 1:7 66 | ) 67 | 68 | sjyr = merge(sjyr, xla[rx %in% fwa, .(sjyrpjkll = sum(sjkll / abs(rx %/% 7) ** 0.2) / sum(1 / abs(rx %/% 7) ** 0.2)), .(sj_id, yr)], by = c("sj_id", "yr"), all.x = T) 69 | sjyr = merge(sjyr, xla[rx %in% fwa, .(sjpjkll = sum(sjkll / abs(rx %/% 7) ** 0.2) / sum(1 / abs(rx %/% 7) ** 0.2)), sj_id], by = "sj_id", all.x = T) 70 | sjyr$sjyrb = sjyr$sjyrpjkll / sjyr$sjpjkll 71 | sjyr$sjyrb[is.na(sjyr$sjyrb)] = 1 72 | sjyr = sjyr[, .(sj_id, yr, sjyrb)] 73 | 74 | sjyrjzxs = xla[rx %in% fwa, .(sj_id, rx, zx = rx %/% 7, yr, sjkll)] 75 | sjyrjzxs = merge(sjyrjzxs, xla[rx %in% fwa, .(sjzzykll = Qdzy(sjkll)), .(sj_id, zx = rx %/% 7)], by = c("sj_id", "zx")) 76 | sjyrjzxs = sjyrjzxs[, .(sjyrjzxs = Jsjzxs(sjzzykll, sjkll, 1 / abs(zx) ** 0.3, 0.3, 1.8, 0.01)), .(sj_id, yr)] 77 | 78 | xlb = xla[rx %in% fw, .(sj_id, rx, zx = rx %/% 7, yr, sjkll)] 79 | xlb = merge(xlb, xla[, .(zkll = sum(sjkll)), .(zx = rx %/% 7)], by = "zx", all.x = T) 80 | xlb$yczkll = sum(xla[rx >= -7]$sjkll) 81 | xlb$sjkll = xlb$sjkll / xlb$zkll * xlb$yczkll 82 | xlb[is.na(xlb)] = 0 83 | 84 | xlc = merge(xlb, xla[, .(sjzxrxa = min(rx)), sj_id], by = "sj_id") 85 | xlc = merge(xlc, xla[rx %in% fw, .(sjzxrxb = min(rx)), sj_id], by = "sj_id") 86 | xlc$sjzxrx = 0.5 * xlc$sjzxrxa + 0.5 * xlc$sjzxrxb 87 | 88 | xlc = merge(xlc, sjyrjzxs, by = c("sj_id", "yr"), all.x = T) 89 | xlc = merge(xlc, sjyr, by = c("sj_id", "yr"), all.x = T) 90 | xlc[is.na(xlc)] = 1 91 | xlc$sjkll = xlc$sjkll / (xlc$sjyrjzxs ** 0.3 * xlc$sjyrb ** 0.7) ** s 92 | 93 | ycxl = data.table( 94 | sj_id = rep(sjtza$sj_id, each = 14 * length(fw)) 95 | , sm = rep(sjtza$sm, each = 14 * length(fw)) 96 | , rx = rep(0:13, each = length(fw)) 97 | , yr = rep((qsrx + 904:917) %% 7 + 1, each = length(fw)) 98 | , xlrx = fw 99 | , xlyr = (fw + qsrx + 904) %% 7 + 1 100 | ) 101 | ycxl = merge(ycxl, xlc[, .(sj_id, xlrx = rx, xlzx = zx, xlyr = yr, sjkll, sjzxrx)], by = c("sj_id", "xlrx", "xlyr")) 102 | ycxl[is.na(ycxl)] = 0 103 | ycxl = ycxl[, .( 104 | yc = 0.5 * Qdzy(sjkll, Qdyrqz(yr, xlyr, r) * abs(xlrx - sjzxrx)) 105 | + 0.3 * Qdzy(sjkll, (xlrx - min(fw) + 1) ** 3 * Qdyrqz(yr, xlyr, r) * abs(xlrx - sjzxrx)) 106 | + 0.2 * Qdzy(sjkll, 1 / abs(xlrx) * Qdyrqz(yr, xlyr, r)) 107 | ), .(sj_id, rx, yr, sjzxrx)] 108 | 109 | yca = data.table( 110 | sj_id = rep(sjtza$sj_id, each = 14) 111 | , rx = 0:13 112 | , yr = (qsrx + 904:917) %% 7 + 1 113 | ) 114 | yca = merge(yca, ycxl, by = c("sj_id", "rx", "yr"), all.x = T) 115 | yca[is.na(yca)] = 0 116 | yca = merge(yca, sjyrjzxs, by = c("sj_id", "yr"), all.x = T) 117 | yca = merge(yca, sjyr, by = c("sj_id", "yr"), all.x = T) 118 | yca[is.na(yca)] = 1 119 | yca$yc = yca$yc * (yca$sjyrjzxs ** 0.3 * yca$sjyrb ** 0.7) ** s 120 | setorder(yca, rx) 121 | setorder(yca, sj_id) 122 | yca$yc 123 | } 124 | 125 | Xla = Xl 126 | Qdyc = function(a, Yc, Xla, Sjtz, Tq){ 127 | library(data.table) 128 | rj = c(-1:-21, -36:-42, -120:-133, -148:-175, -190:-203, -365:-378) 129 | if (a == 1) 130 | return(0.30 * Yc(Xla, Sjtz, Tq, 0, c(-1:-21, -36:-42), rj, r = 1)) 131 | if (a == 2) 132 | return(0.10 * Yc(Xla, Sjtz, Tq, 0, c(-1:-21, -29:-42), rj, r = 1)) 133 | if (a == 3) 134 | return(0.10 * Yc(Xla, Sjtz, Tq, 0, -1:-21, rj, r = 1)) 135 | if (a == 4) 136 | return(0.10 * Yc(Xla, Sjtz, Tq, 0, -1:-28, rj, r = 1)) 137 | if (a == 5) 138 | return(0.07 * Yc(Xla, Sjtz, Tq, 0, -1:-7, rj, s = 1)) 139 | if (a == 6) 140 | return(0.09 * Yc(Xla, Sjtz, Tq, 0, -1:-14, rj, s = 1)) 141 | if (a == 7) 142 | return(0.11 * Yc(Xla, Sjtz, Tq, 0, -1:-21, rj, s = 1)) 143 | if (a == 8) 144 | return(0.13 * Yc(Xla, Sjtz, Tq, 0, c(-1:-21, -36:-42), rj, s = 1)) 145 | } 146 | 147 | Cl = makeCluster(getOption("cl.cores", 4)) 148 | Ycl = parLapply(Cl, 1:8, Qdyc, Yc = Yc, Xla = Xla, Sjtz = Sjtz, Tq = Tq) 149 | stopCluster(Cl) 150 | Yca = data.table(sj_id = rep(Sjtz$sj_id, each = 14), rx = 0:13, yc = 0) 151 | for (a in 1:8){ 152 | Yca$yc = Yca$yc + Ycl[[a]] 153 | } 154 | 155 | Yca = merge(Yca, Sjtz[, .(sj_id, sm)], by = c("sj_id")) 156 | Yca = merge(Yca, Tq[, .(rx = rx, sm, tq)], by = c("rx", "sm"), all.x = T) 157 | Yca$tq[is.na(Yca$tq)] = 0 158 | Yca$yc = Yca$yc * (1 + 0.005 * Yca$tq) 159 | 160 | Yca$yc[Yca$rx == 10] = Yca$yc[Yca$rx == 10] * 1.1 161 | 162 | Yca$yc = as.integer(Yca$yc) 163 | setorder(Yca, rx) 164 | setorder(Yca, sj_id) 165 | 166 | Tj = data.table(sj_id = unique(Yca$sj_id)) 167 | for (a in 0:13){ 168 | Tj = cbind(Tj, Yca[rx == a, .(yc)]) 169 | names(Tj)[ncol(Tj)] = paste0("r", a) 170 | } 171 | 172 | write.table(Tj, "0.csv", col.names = F, row.names = F, sep = ",", quote = F) 173 | --------------------------------------------------------------------------------