├── .gitignore ├── Power.ipynb ├── Randomization inference in networks.ipynb ├── Randomization inference.ipynb ├── Using covariates.ipynb ├── cai_data ├── cai.adjacency.RData └── cai.main.tsv ├── prep_data.R └── pseudo_facebook_small.tsv /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # iPython 60 | .ipynb_checkpoints 61 | 62 | pseudo_facebook.tsv 63 | -------------------------------------------------------------------------------- /Power.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Consequences of low power" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [ 17 | { 18 | "name": "stderr", 19 | "output_type": "stream", 20 | "text": [ 21 | "Loading required package: iterators\n", 22 | "Loading required package: parallel\n" 23 | ] 24 | } 25 | ], 26 | "source": [ 27 | "library(foreach)\n", 28 | "library(doMC)\n", 29 | "registerDoMC(4)\n", 30 | "library(ggplot2)\n", 31 | "theme_set(theme_bw())\n", 32 | "library(repr)\n", 33 | "options(repr.plot.width=6, repr.plot.height=4)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/plain": [ 46 | "\n", 47 | " Two-sample t test power calculation \n", 48 | "\n", 49 | " n = 50\n", 50 | " delta = 0.1\n", 51 | " sd = 1\n", 52 | " sig.level = 0.05\n", 53 | " power = 0.0715\n", 54 | " alternative = two.sided\n", 55 | "\n", 56 | "NOTE: n is number in *each* group\n" 57 | ] 58 | }, 59 | "execution_count": 2, 60 | "metadata": {}, 61 | "output_type": "execute_result" 62 | } 63 | ], 64 | "source": [ 65 | "\n", 66 | "power.t.test(n = 50, delta = .1, sig.level = .05)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 3, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/html": [ 79 | "\n", 80 | "\n", 81 | "\n", 82 | "\t\n", 83 | "\n", 84 | "
estp
1-0.0002160.999
\n" 85 | ], 86 | "text/latex": [ 87 | "\\begin{tabular}{r|ll}\n", 88 | " & est & p\\\\\n", 89 | "\\hline\n", 90 | "\t1 & -0.000216 & 0.999\\\\\n", 91 | "\\end{tabular}\n" 92 | ], 93 | "text/plain": [ 94 | " est p\n", 95 | "1 -0.000216 0.999" 96 | ] 97 | }, 98 | "execution_count": 3, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "do.sim <- function(n, delta) {\n", 105 | " z <- rep(0:1, each = n)\n", 106 | " y <- z * delta + rnorm(n * 2)\n", 107 | " \n", 108 | " r <- t.test(y ~ z)\n", 109 | " data.frame(est = unname(r$estimate[2] - r$estimate[1]), p = r$p.value)\n", 110 | "}\n", 111 | "do.sim(50, .1)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 4, 117 | "metadata": { 118 | "collapsed": false 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "results <- foreach(i = 1:1e4, .combine = rbind) %dopar% do.sim(50, .1)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 5, 128 | "metadata": { 129 | "collapsed": false 130 | }, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | " est p \n", 136 | " Min. :-0.694 Min. :0.000 \n", 137 | " 1st Qu.:-0.036 1st Qu.:0.199 \n", 138 | " Median : 0.100 Median :0.447 \n", 139 | " Mean : 0.099 Mean :0.461 \n", 140 | " 3rd Qu.: 0.231 3rd Qu.:0.711 \n", 141 | " Max. : 0.980 Max. :1.000 " 142 | ] 143 | }, 144 | "execution_count": 5, 145 | "metadata": {}, 146 | "output_type": "execute_result" 147 | } 148 | ], 149 | "source": [ 150 | "summary(results)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 6, 156 | "metadata": { 157 | "collapsed": false 158 | }, 159 | "outputs": [ 160 | { 161 | "name": "stderr", 162 | "output_type": "stream", 163 | "text": [ 164 | "stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.\n" 165 | ] 166 | }, 167 | { 168 | "data": { 169 | "image/png": "", 170 | "image/svg+xml": [ 171 | "\n", 172 | "\n", 173 | "\n", 174 | "\n", 175 | "\n", 176 | "\n", 177 | "\n", 178 | "\n", 179 | "\n", 180 | "\n", 181 | "\n", 182 | "\n", 183 | "\n", 184 | "\n", 185 | "\n", 186 | "\n", 187 | "\n", 188 | "\n", 189 | "\n", 190 | "\n", 191 | "\n", 192 | "\n", 193 | "\n", 194 | "\n", 195 | "\n", 196 | "\n", 197 | "\n", 198 | "\n", 199 | "\n", 200 | "\n", 201 | "\n", 202 | "\n", 203 | "\n", 204 | "\n", 205 | "\n", 206 | "\n", 207 | "\n", 208 | "\n", 209 | "\n", 210 | "\n", 211 | "\n", 212 | "\n", 213 | "\n", 214 | "\n", 215 | "\n", 216 | "\n", 217 | "\n", 218 | "\n", 219 | "\n", 220 | "\n", 221 | "\n", 222 | "\n", 223 | "\n", 224 | "\n", 225 | "\n", 226 | "\n", 227 | "\n", 228 | "\n", 229 | "\n", 230 | "\n", 231 | "\n", 232 | "\n", 233 | "\n", 234 | "\n", 235 | "\n", 236 | "\n", 237 | "\n", 238 | "\n", 239 | "\n", 240 | "\n", 241 | "\n", 242 | "\n", 243 | "\n", 244 | "\n", 245 | "\n", 246 | "\n", 247 | "\n", 248 | "\n", 249 | "\n", 250 | "\n", 251 | "\n", 252 | "\n", 253 | "\n", 254 | "\n", 255 | "\n", 256 | "\n", 257 | "\n", 258 | "\n", 259 | "\n", 260 | "\n", 261 | "\n", 262 | "\n", 263 | "\n", 264 | "\n", 265 | "\n", 266 | "\n", 267 | "\n", 268 | "\n", 269 | "\n", 270 | "\n", 271 | "\n", 272 | "\n", 273 | "\n", 274 | "\n", 275 | "\n", 276 | "\n", 277 | "\n", 278 | "\n", 279 | "\n", 280 | "\n", 281 | "\n", 282 | " \n", 283 | "\n", 284 | "\n", 285 | " \n", 286 | "\n", 287 | "\n", 288 | " \n", 289 | "\n", 290 | "\n", 291 | " \n", 292 | "\n", 293 | "\n", 294 | " \n", 295 | "\n", 296 | "\n", 297 | " \n", 298 | "\n", 299 | "\n", 300 | " \n", 301 | "\n", 302 | "\n", 303 | " \n", 304 | "\n", 305 | "\n", 306 | " \n", 307 | "\n", 308 | "\n", 309 | " \n", 310 | "\n", 311 | "\n", 312 | " \n", 313 | "\n", 314 | "\n", 315 | " \n", 316 | "\n", 317 | "\n", 318 | " \n", 319 | "\n", 320 | "\n", 321 | " \n", 322 | "\n", 323 | "\n", 324 | " \n", 325 | "\n", 326 | "\n", 327 | " \n", 328 | "\n", 329 | "\n", 330 | " \n", 331 | "\n", 332 | "\n", 333 | " \n", 334 | "\n", 335 | "\n", 336 | " \n", 337 | "\n", 338 | "\n", 339 | "\n", 340 | "\n", 341 | "\n", 342 | "\n", 343 | "\n", 344 | "\n", 345 | "\n", 346 | "\n", 347 | "\n", 348 | "\n", 349 | "\n", 350 | "\n", 351 | "\n", 352 | "\n", 353 | "\n", 354 | "\n", 355 | "\n", 356 | "\n", 357 | "\n", 358 | "\n", 359 | "\n", 360 | "\n", 361 | "\n", 362 | "\n", 363 | "\n", 364 | "\n", 365 | "\n", 366 | "\n", 367 | "\n", 368 | "\n", 369 | "\n", 370 | "\n", 371 | "\n", 372 | "\n", 373 | "\n", 374 | "\n", 375 | "\n", 376 | "\n", 377 | "\n", 378 | "\n", 379 | "\n", 380 | "\n", 381 | "\n", 382 | "\n", 383 | "\n", 384 | "\n", 385 | "\n", 386 | "\n", 387 | "\n", 388 | "\n", 389 | "\n", 390 | "\n", 391 | "\n", 392 | "\n", 393 | "\n", 394 | "\n", 395 | "\n", 396 | "\n", 397 | "\n", 398 | "\n", 399 | "\n", 400 | "\n", 401 | "\n", 402 | "\n", 403 | "\n", 404 | "\n", 405 | "\n", 406 | "\n", 407 | "\n", 408 | "\n", 409 | "\n", 410 | "\n", 411 | "\n", 412 | "\n", 413 | "\n", 414 | "\n", 415 | "\n", 416 | "\n", 417 | "\n", 418 | "\n", 419 | "\n", 420 | "\n", 421 | "\n", 422 | "\n", 423 | "\n", 424 | "\n", 425 | "\n", 426 | "\n", 427 | "\n", 428 | "\n", 429 | "\n", 430 | "\n", 431 | "\n", 432 | "\n", 433 | "\n", 434 | "\n", 435 | "\n", 436 | "\n", 437 | " \n", 438 | "\n", 439 | "\n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | "\n", 444 | "\n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | "\n", 449 | "\n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | "\n", 454 | "\n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | "\n", 460 | "\n", 461 | "\n", 462 | "\n", 463 | "\n", 464 | "\n", 465 | "\n", 466 | "\n", 467 | "\n", 468 | "\n", 469 | "\n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | "\n", 475 | "\n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | "\n", 480 | "\n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | "\n", 485 | "\n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | "\n", 490 | "\n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | "\n", 495 | "\n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | "\n", 502 | "\n", 503 | "\n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | "\n", 513 | "\n", 514 | "\n", 515 | "\n", 516 | "\n", 517 | "\n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | "\n", 524 | "\n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | "\n", 530 | "\n", 531 | "\n" 532 | ], 533 | "text/plain": [ 534 | "plot without title" 535 | ] 536 | }, 537 | "metadata": { 538 | "image/svg+xml": { 539 | "isolated": true 540 | } 541 | }, 542 | "output_type": "display_data" 543 | } 544 | ], 545 | "source": [ 546 | "ggplot(\n", 547 | " aes(x = est, fill = p < 0.05),\n", 548 | " data = results\n", 549 | ") +\n", 550 | "geom_histogram()" 551 | ] 552 | }, 553 | { 554 | "cell_type": "markdown", 555 | "metadata": {}, 556 | "source": [ 557 | "## Overestimating effects" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 7, 563 | "metadata": { 564 | "collapsed": false 565 | }, 566 | "outputs": [ 567 | { 568 | "data": { 569 | "text/plain": [ 570 | " Min. 1st Qu. Median Mean 3rd Qu. Max. \n", 571 | " -0.694 -0.036 0.100 0.099 0.231 0.980 " 572 | ] 573 | }, 574 | "execution_count": 7, 575 | "metadata": {}, 576 | "output_type": "execute_result" 577 | } 578 | ], 579 | "source": [ 580 | "summary(results$est)" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": 8, 586 | "metadata": { 587 | "collapsed": false 588 | }, 589 | "outputs": [ 590 | { 591 | "data": { 592 | "text/plain": [ 593 | " Min. 1st Qu. Median Mean 3rd Qu. Max. \n", 594 | " -0.694 0.407 0.452 0.395 0.514 0.980 " 595 | ] 596 | }, 597 | "execution_count": 8, 598 | "metadata": {}, 599 | "output_type": "execute_result" 600 | } 601 | ], 602 | "source": [ 603 | "summary(results$est[results$p < 0.05])" 604 | ] 605 | }, 606 | { 607 | "cell_type": "markdown", 608 | "metadata": {}, 609 | "source": [ 610 | "Gelman and Carlin call the \"exaggeration factor: the expected (absolute) value of the estimate divided by the effect size, if it is statistically significantly different from zero.\"" 611 | ] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "execution_count": 9, 616 | "metadata": { 617 | "collapsed": false 618 | }, 619 | "outputs": [ 620 | { 621 | "data": { 622 | "text/html": [ 623 | "0.476724715735216" 624 | ], 625 | "text/latex": [ 626 | "0.476724715735216" 627 | ], 628 | "text/markdown": [ 629 | "0.476724715735216" 630 | ], 631 | "text/plain": [ 632 | "[1] 0.477" 633 | ] 634 | }, 635 | "execution_count": 9, 636 | "metadata": {}, 637 | "output_type": "execute_result" 638 | } 639 | ], 640 | "source": [ 641 | "mean(abs(results$est[results$p < 0.05]))" 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": 10, 647 | "metadata": { 648 | "collapsed": false 649 | }, 650 | "outputs": [ 651 | { 652 | "data": { 653 | "text/html": [ 654 | "4.76724715735216" 655 | ], 656 | "text/latex": [ 657 | "4.76724715735216" 658 | ], 659 | "text/markdown": [ 660 | "4.76724715735216" 661 | ], 662 | "text/plain": [ 663 | "[1] 4.77" 664 | ] 665 | }, 666 | "execution_count": 10, 667 | "metadata": {}, 668 | "output_type": "execute_result" 669 | } 670 | ], 671 | "source": [ 672 | "mean(abs(results$est[results$p < 0.05])) / .1" 673 | ] 674 | }, 675 | { 676 | "cell_type": "markdown", 677 | "metadata": {}, 678 | "source": [ 679 | "That is, we get estimates that are almost 5 times too large.\n", 680 | "\n", 681 | "We also get estimates with the wrong sign." 682 | ] 683 | }, 684 | { 685 | "cell_type": "code", 686 | "execution_count": 11, 687 | "metadata": { 688 | "collapsed": false 689 | }, 690 | "outputs": [ 691 | { 692 | "data": { 693 | "text/plain": [ 694 | "\n", 695 | " -1 1 \n", 696 | " 75 740 " 697 | ] 698 | }, 699 | "execution_count": 11, 700 | "metadata": {}, 701 | "output_type": "execute_result" 702 | } 703 | ], 704 | "source": [ 705 | "table(sign(results$est[results$p < 0.05]))" 706 | ] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": 12, 711 | "metadata": { 712 | "collapsed": false 713 | }, 714 | "outputs": [ 715 | { 716 | "data": { 717 | "text/html": [ 718 | "0.0920245398773006" 719 | ], 720 | "text/latex": [ 721 | "0.0920245398773006" 722 | ], 723 | "text/markdown": [ 724 | "0.0920245398773006" 725 | ], 726 | "text/plain": [ 727 | "[1] 0.092" 728 | ] 729 | }, 730 | "execution_count": 12, 731 | "metadata": {}, 732 | "output_type": "execute_result" 733 | } 734 | ], 735 | "source": [ 736 | "mean(results$est[results$p < 0.05] < 0)" 737 | ] 738 | }, 739 | { 740 | "cell_type": "markdown", 741 | "metadata": {}, 742 | "source": [ 743 | "This isn't the worst possible case. Many experiments have even lower power.\n", 744 | "\n", 745 | "### In combination with bad stopping rules\n", 746 | "\n", 747 | "Low power also makes the use of stopping rules more problematic.\n", 748 | "\n", 749 | "Here we do a simulation where we start with some n in each of treatment and control. Then check whether we have a significant result. If not, add some incremental n to each condition. Repeat until there is a significant result or we hit a maximum n." 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": 13, 755 | "metadata": { 756 | "collapsed": true 757 | }, 758 | "outputs": [], 759 | "source": [ 760 | "add.data <- function(n, d, z = c(), y = c()) {\n", 761 | " z.new <- rep(0:1, each = n)\n", 762 | " y.new <- z.new * d + rnorm(n * 2)\n", 763 | " z <- c(z, z.new)\n", 764 | " y <- c(y, y.new)\n", 765 | " t <- t.test(y ~ z)\n", 766 | " list(z = z, y = y, t = t)\n", 767 | "}\n", 768 | "\n", 769 | "do.stopping.rule.sim <- function(n, d, n.inc, n.max) {\n", 770 | " r <- list(z = c(), y = c())\n", 771 | " while (TRUE) {\n", 772 | " nc <- length(r$z) / 2\n", 773 | " if (nc + n.inc >= n.max)\n", 774 | " break;\n", 775 | " ni <- if(nc == 0) n else n.inc\n", 776 | " r <- add.data(ni, d, r$z, r$y)\n", 777 | " if (r$t$p.value < 0.05)\n", 778 | " break;\n", 779 | " }\n", 780 | " r\n", 781 | "}" 782 | ] 783 | }, 784 | { 785 | "cell_type": "code", 786 | "execution_count": 14, 787 | "metadata": { 788 | "collapsed": false 789 | }, 790 | "outputs": [ 791 | { 792 | "data": { 793 | "text/plain": [ 794 | "\n", 795 | "\tWelch Two Sample t-test\n", 796 | "\n", 797 | "data: y by z\n", 798 | "t = -0.7, df = 200, p-value = 0.5\n", 799 | "alternative hypothesis: true difference in means is not equal to 0\n", 800 | "95 percent confidence interval:\n", 801 | " -0.361 0.172\n", 802 | "sample estimates:\n", 803 | "mean in group 0 mean in group 1 \n", 804 | " -0.0166 0.0780 \n" 805 | ] 806 | }, 807 | "execution_count": 14, 808 | "metadata": {}, 809 | "output_type": "execute_result" 810 | } 811 | ], 812 | "source": [ 813 | "# one run, with no true effect\n", 814 | "do.stopping.rule.sim(n = 20, d = 0, n.inc = 2, n.max = 120)$t" 815 | ] 816 | }, 817 | { 818 | "cell_type": "markdown", 819 | "metadata": {}, 820 | "source": [ 821 | "Run many simulations of experiments run with this stopping rule:" 822 | ] 823 | }, 824 | { 825 | "cell_type": "code", 826 | "execution_count": 18, 827 | "metadata": { 828 | "collapsed": true 829 | }, 830 | "outputs": [], 831 | "source": [ 832 | "set.seed(8001)\n", 833 | "sr <- foreach(i = 1:1e3, .combine = rbind) %dopar% {\n", 834 | " r <- do.stopping.rule.sim(20, 0, 5, 50)\n", 835 | " data.frame(est = r$t$estimate[2] - r$t$estimate[1],\n", 836 | " p = r$t$p.value,\n", 837 | " n = length(r$z)\n", 838 | " )\n", 839 | " }" 840 | ] 841 | }, 842 | { 843 | "cell_type": "code", 844 | "execution_count": 19, 845 | "metadata": { 846 | "collapsed": false 847 | }, 848 | "outputs": [ 849 | { 850 | "data": { 851 | "text/plain": [ 852 | " est p n \n", 853 | " Min. :-1.073 Min. :0.000 Min. :40.0 \n", 854 | " 1st Qu.:-0.154 1st Qu.:0.216 1st Qu.:90.0 \n", 855 | " Median :-0.022 Median :0.506 Median :90.0 \n", 856 | " Mean :-0.015 Mean :0.486 Mean :85.7 \n", 857 | " 3rd Qu.: 0.128 3rd Qu.:0.751 3rd Qu.:90.0 \n", 858 | " Max. : 1.003 Max. :0.998 Max. :90.0 " 859 | ] 860 | }, 861 | "execution_count": 19, 862 | "metadata": {}, 863 | "output_type": "execute_result" 864 | } 865 | ], 866 | "source": [ 867 | "summary(sr)" 868 | ] 869 | }, 870 | { 871 | "cell_type": "code", 872 | "execution_count": 20, 873 | "metadata": { 874 | "collapsed": false 875 | }, 876 | "outputs": [ 877 | { 878 | "data": { 879 | "text/html": [ 880 | "0.13" 881 | ], 882 | "text/latex": [ 883 | "0.13" 884 | ], 885 | "text/markdown": [ 886 | "0.13" 887 | ], 888 | "text/plain": [ 889 | "[1] 0.13" 890 | ] 891 | }, 892 | "execution_count": 20, 893 | "metadata": {}, 894 | "output_type": "execute_result" 895 | } 896 | ], 897 | "source": [ 898 | "mean(sr$p < 0.05)" 899 | ] 900 | }, 901 | { 902 | "cell_type": "markdown", 903 | "metadata": {}, 904 | "source": [ 905 | "That is, rather than Type I error rate of 5%, we have 13%." 906 | ] 907 | }, 908 | { 909 | "cell_type": "code", 910 | "execution_count": null, 911 | "metadata": { 912 | "collapsed": true 913 | }, 914 | "outputs": [], 915 | "source": [] 916 | } 917 | ], 918 | "metadata": { 919 | "kernelspec": { 920 | "display_name": "R", 921 | "language": "R", 922 | "name": "ir" 923 | }, 924 | "language_info": { 925 | "codemirror_mode": "r", 926 | "file_extension": ".r", 927 | "mimetype": "text/x-r-source", 928 | "name": "R", 929 | "pygments_lexer": "r", 930 | "version": "3.2.2" 931 | } 932 | }, 933 | "nbformat": 4, 934 | "nbformat_minor": 0 935 | } 936 | -------------------------------------------------------------------------------- /Randomization inference in networks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Randomization inference for spillovers in networks\n", 8 | "\n", 9 | "This uses data from:\n", 10 | "Cai, Jing, Alain De Janvry, and Elisabeth Sadoulet. 2015. \"Social Networks and the Decision to Insure.\" American Economic Journal: Applied Economics, 7(2): 81-108.\n", 11 | "https://www.aeaweb.org/articles.php?doi=10.1257/app.20130442\n", 12 | "\n", 13 | "That paper examines spillover effects in rural Chinese farmers being encouraged to sign up for insurance. Households were randomly assigned to different periods in which to be encouraged to get insurance and whether that encouragement was 'intensive'.\n", 14 | "\n", 15 | "\"The social network survey asked household heads to list five close friends, either within or outside the village, with whom they most frequently discuss rice production or financial issues. Respondents were asked to rank these friends based on which one would be consulted first, second, etc.\"\n", 16 | "\n", 17 | "We are essentially re-doing Table 2 column 2 (there are some minor differences because of how we have simplified things a bit).\n" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 8, 23 | "metadata": { 24 | "collapsed": true 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "options(digits = 3)\n", 29 | "library(ggplot2)\n", 30 | "theme_set(theme_bw())\n", 31 | "options(repr.plot.width = 6)\n", 32 | "options(repr.plot.height = 4)\n", 33 | "\n", 34 | "library(icsw)\n", 35 | "library(foreach)\n", 36 | "library(Matrix)\n", 37 | "library(lfe)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 9, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "cai <- read.table(\"cai_data/cai.main.tsv\", sep = \"\\t\")" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 10, 54 | "metadata": { 55 | "collapsed": false 56 | }, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/html": [ 61 | "\n", 62 | "\n", 63 | "\n", 64 | "\t\n", 65 | "\t\n", 66 | "\t\n", 67 | "\t\n", 68 | "\t\n", 69 | "\t\n", 70 | "\n", 71 | "
idaddressregionvillagetakeup_surveyagemaledelayintensiveinfo_noneintensive.nondelay.peersn.peers
11111385fusheng671fusheng137101104
21111035fusheng211fusheng160101100
31111363fusheng51fusheng056100103
41111042fusheng211fusheng157100111
51111045fusheng211fusheng145110124
61111038fusheng211fusheng161111104
\n" 72 | ], 73 | "text/latex": [ 74 | "\\begin{tabular}{r|llllllllllll}\n", 75 | " & id & address & region & village & takeup_survey & age & male & delay & intensive & info_none & intensive.nondelay.peers & n.peers\\\\\n", 76 | "\\hline\n", 77 | "\t1 & 1111385 & fusheng67 & 1 & fusheng & 1 & 37 & 1 & 0 & 1 & 1 & 0 & 4\\\\\n", 78 | "\t2 & 1111035 & fusheng21 & 1 & fusheng & 1 & 60 & 1 & 0 & 1 & 1 & 0 & 0\\\\\n", 79 | "\t3 & 1111363 & fusheng5 & 1 & fusheng & 0 & 56 & 1 & 0 & 0 & 1 & 0 & 3\\\\\n", 80 | "\t4 & 1111042 & fusheng21 & 1 & fusheng & 1 & 57 & 1 & 0 & 0 & 1 & 1 & 1\\\\\n", 81 | "\t5 & 1111045 & fusheng21 & 1 & fusheng & 1 & 45 & 1 & 1 & 0 & 1 & 2 & 4\\\\\n", 82 | "\t6 & 1111038 & fusheng21 & 1 & fusheng & 1 & 61 & 1 & 1 & 1 & 1 & 0 & 4\\\\\n", 83 | "\\end{tabular}\n" 84 | ], 85 | "text/plain": [ 86 | " id address region village takeup_survey age male delay intensive info_none intensive.nondelay.peers n.peers\n", 87 | "1 1111385 fusheng67 1 fusheng 1 37 1 0 1 1 0 4\n", 88 | "2 1111035 fusheng21 1 fusheng 1 60 1 0 1 1 0 0\n", 89 | "3 1111363 fusheng5 1 fusheng 0 56 1 0 0 1 0 3\n", 90 | "4 1111042 fusheng21 1 fusheng 1 57 1 0 0 1 1 1\n", 91 | "5 1111045 fusheng21 1 fusheng 1 45 1 1 0 1 2 4\n", 92 | "6 1111038 fusheng21 1 fusheng 1 61 1 1 1 1 0 4" 93 | ] 94 | }, 95 | "execution_count": 10, 96 | "metadata": {}, 97 | "output_type": "execute_result" 98 | } 99 | ], 100 | "source": [ 101 | "head(cai)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "The main outcome is whether they sign for insurance `takeup_survey`." 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 11, 114 | "metadata": { 115 | "collapsed": false, 116 | "scrolled": true 117 | }, 118 | "outputs": [ 119 | { 120 | "name": "stderr", 121 | "output_type": "stream", 122 | "text": [ 123 | " [[ suppressing 10 column names ‘1111385’, ‘1111035’, ‘1111363’ ... ]]\n", 124 | " [[ suppressing 10 column names ‘1111385’, ‘1111035’, ‘1111363’ ... ]]\n" 125 | ] 126 | }, 127 | { 128 | "data": { 129 | "text/plain": [ 130 | "10 x 10 sparse Matrix of class \"dgCMatrix\"\n", 131 | " \n", 132 | "1111385 . . . . . . . . . .\n", 133 | "1111035 . . . . . . . . . .\n", 134 | "1111363 . . . . . . . . . .\n", 135 | "1111042 . . . . . . . . . .\n", 136 | "1111045 . . . . . . . 1 . .\n", 137 | "1111038 . . . . . . . 1 1 .\n", 138 | "1111034 . . . . . . . . . 1\n", 139 | "1111055 . . . . 1 . . . . .\n", 140 | "1111050 . . . . 1 1 . 1 . 1\n", 141 | "1111031 . . . . 1 . . 1 . ." 142 | ] 143 | }, 144 | "execution_count": 11, 145 | "metadata": {}, 146 | "output_type": "execute_result" 147 | } 148 | ], 149 | "source": [ 150 | "load(\"cai_data/cai.adjacency.RData\")\n", 151 | "A[1:10,1:10]" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "Now let's estimate the relationship between how many peers were given the strong encouragement to sign up for insurance in the prior period, `intensive.nondelay.peers` and the outcome `takeup_survey`. We will only do this for egos who didn't get the treatment in the prior period and who didn't receive information, as part of their treatment, about the adoption rates in their area. (This is also what the paper does.)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "Now we can get a point estimate for the effects of peer treatments:" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 12, 171 | "metadata": { 172 | "collapsed": false 173 | }, 174 | "outputs": [ 175 | { 176 | "data": { 177 | "text/plain": [ 178 | "\n", 179 | "Call:\n", 180 | "lm(formula = takeup_survey ~ intensive + I(intensive.nondelay.peers/n.peers) + \n", 181 | " factor(n.peers), data = cai)\n", 182 | "\n", 183 | "Residuals:\n", 184 | " Min 1Q Median 3Q Max \n", 185 | "-0.576 -0.451 -0.373 0.533 0.688 \n", 186 | "\n", 187 | "Coefficients:\n", 188 | " Estimate Std. Error t value Pr(>|t|) \n", 189 | "(Intercept) 0.3116 0.0340 9.16 < 2e-16 ***\n", 190 | "intensive 0.0779 0.0148 5.26 1.5e-07 ***\n", 191 | "I(intensive.nondelay.peers/n.peers) 0.0508 0.0295 1.72 0.08469 . \n", 192 | "factor(n.peers)2 0.0563 0.0389 1.45 0.14796 \n", 193 | "factor(n.peers)3 0.0610 0.0361 1.69 0.09071 . \n", 194 | "factor(n.peers)4 0.0833 0.0350 2.38 0.01746 * \n", 195 | "factor(n.peers)5 0.1356 0.0354 3.83 0.00013 ***\n", 196 | "---\n", 197 | "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n", 198 | "\n", 199 | "Residual standard error: 0.494 on 4514 degrees of freedom\n", 200 | " (256 observations deleted due to missingness)\n", 201 | "Multiple R-squared: 0.0124,\tAdjusted R-squared: 0.0111 \n", 202 | "F-statistic: 9.42 on 6 and 4514 DF, p-value: 2.62e-10\n" 203 | ] 204 | }, 205 | "execution_count": 12, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "lm.1 <- lm(\n", 212 | " takeup_survey ~ intensive + I(intensive.nondelay.peers/n.peers) + factor(n.peers),\n", 213 | " data = cai\n", 214 | ")\n", 215 | "summary(lm.1)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "There could be something here. We see both an effect of one's own treatment, and an effect of the fraction of peers treated in the early period.\n", 223 | "\n", 224 | "Note that depending on your number of peers, the fraction of treated peers can only take on some values. This violates the 'positivity' support assumption for causal inference. Even if this worked with the number of treated peers, and considered only 0 or more than 0 treated peers, the propensity scores would be heterogeneous. The author attempt to deal with this by adding the indicators for each number of friends.\n", 225 | "\n", 226 | "Make a function that, given the data (or permuted data) computes our test statistic -- the regression coefficient from above. We can see that it gives the same results as before:" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 13, 232 | "metadata": { 233 | "collapsed": false 234 | }, 235 | "outputs": [ 236 | { 237 | "name": "stdout", 238 | "output_type": "stream", 239 | "text": [ 240 | "I(z.peers/n.peers) \n", 241 | " 0.0508 \n" 242 | ] 243 | } 244 | ], 245 | "source": [ 246 | "peer.regression.coef <- function(z, z.peers, n.peers, y) {\n", 247 | " coef(lm(y ~ z + I(z.peers / n.peers) + factor(n.peers)))[3]\n", 248 | "}\n", 249 | "\n", 250 | "obs.coef <- with(\n", 251 | " cai,\n", 252 | " peer.regression.coef(intensive, intensive.nondelay.peers, n.peers, takeup_survey)\n", 253 | ")\n", 254 | "print(obs.coef)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "Now we write a function to do the focal-auxillary permuation and compute the test statistic for each permutation." 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 14, 267 | "metadata": { 268 | "collapsed": true 269 | }, 270 | "outputs": [], 271 | "source": [ 272 | "do.focal.aux.permutation <- function(adj.mat, z, n.peers, y,\n", 273 | " is.focal, R = 1e3,\n", 274 | " fnc = peer.regression.coef) {\n", 275 | " foreach(i = 1:R, .combine = 'c') %do% {\n", 276 | " zp <- z\n", 277 | " zp[!is.focal] <- sample(z[!is.focal]) # permute treatments for auxillary vertices only\n", 278 | " zp.peers <- as.vector(adj.mat %*% zp) # re-compute number of peers treated\n", 279 | " fnc(z[is.focal], zp.peers[is.focal], n.peers[is.focal], y[is.focal])\n", 280 | " }\n", 281 | "}\n" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "We can now call this function to draw from the distribution of the test statistic under the null of no spillovers (but possible direct effects).\n", 289 | "\n", 290 | "Just for illustration, let's start by just selecting a random 2000 units as focal units." 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "Actually in the paper, they mainly don't focus on contemporaneous influence. Rather the authors look for effects of the assignment of peers treated in period 1 on egos only assigned in round 2." 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 15, 303 | "metadata": { 304 | "collapsed": false 305 | }, 306 | "outputs": [ 307 | { 308 | "data": { 309 | "text/plain": [ 310 | " Min. 1st Qu. Median Mean 3rd Qu. Max. \n", 311 | "-0.0371 0.0434 0.0661 0.0650 0.0883 0.1760 " 312 | ] 313 | }, 314 | "execution_count": 15, 315 | "metadata": {}, 316 | "output_type": "execute_result" 317 | } 318 | ], 319 | "source": [ 320 | "\n", 321 | "cai$is.focal <- sample(c(rep(TRUE, 2000), rep(FALSE, nrow(cai) - 2000)))\n", 322 | "\n", 323 | "null.coefs <- do.focal.aux.permutation(\n", 324 | " A,\n", 325 | " cai$intensive,\n", 326 | " cai$n.peers, cai$takeup_survey,\n", 327 | " cai$is.focal,\n", 328 | " R = 1e3\n", 329 | ")\n", 330 | "\n", 331 | "summary(null.coefs)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 16, 337 | "metadata": { 338 | "collapsed": false 339 | }, 340 | "outputs": [ 341 | { 342 | "name": "stdout", 343 | "output_type": "stream", 344 | "text": [ 345 | "I(z.peers/n.peers) \n", 346 | " 0.0449 \n" 347 | ] 348 | } 349 | ], 350 | "source": [ 351 | "obs.coef <- with(\n", 352 | " subset(cai, is.focal),\n", 353 | " peer.regression.coef(intensive, intensive.nondelay.peers, n.peers, takeup_survey)\n", 354 | ")\n", 355 | "print(obs.coef)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 17, 361 | "metadata": { 362 | "collapsed": false 363 | }, 364 | "outputs": [ 365 | { 366 | "data": { 367 | "text/html": [ 368 | "0.526" 369 | ], 370 | "text/latex": [ 371 | "0.526" 372 | ], 373 | "text/markdown": [ 374 | "0.526" 375 | ], 376 | "text/plain": [ 377 | "[1] 0.526" 378 | ] 379 | }, 380 | "execution_count": 17, 381 | "metadata": {}, 382 | "output_type": "execute_result" 383 | } 384 | ], 385 | "source": [ 386 | "two.sided.p.value.perm <- function(obs, null.draws) {\n", 387 | " lower.p <- mean(obs > null.draws)\n", 388 | " upper.p <- mean(obs < null.draws)\n", 389 | " 2 * min(lower.p, upper.p)\n", 390 | "}\n", 391 | "\n", 392 | "two.sided.p.value.perm(obs.coef, null.coefs)" 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "So actually this doesn't look statistically significant...\n", 400 | "\n", 401 | "But, in fact, in the paper they focus their analysis a bit more in a couple ways. They can restrict attention to households treated in the second period.\n", 402 | "\n", 403 | "Another way is to restrict attention to households that were not, as part of the treatment, randomly assigned to get information about insurance adoption in their area. Perhaps that social information would reduce the impact of other social info. These are identified with `info_none == 1` if you want to try that." 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 18, 409 | "metadata": { 410 | "collapsed": false 411 | }, 412 | "outputs": [ 413 | { 414 | "name": "stdout", 415 | "output_type": "stream", 416 | "text": [ 417 | "I(z.peers/n.peers) \n", 418 | " 0.0508 \n" 419 | ] 420 | } 421 | ], 422 | "source": [ 423 | "# a relevant subset\n", 424 | "cai$relevant.case <- with(cai, delay == 1 )\n", 425 | "\n", 426 | "obs.coef <- with(\n", 427 | " cai,\n", 428 | " peer.regression.coef(intensive, intensive.nondelay.peers, n.peers, takeup_survey)\n", 429 | ")\n", 430 | "print(obs.coef)" 431 | ] 432 | }, 433 | { 434 | "cell_type": "markdown", 435 | "metadata": {}, 436 | "source": [ 437 | "Since only some units have relevant outcomes now, we can make all of them the focal units." 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": 19, 443 | "metadata": { 444 | "collapsed": false 445 | }, 446 | "outputs": [ 447 | { 448 | "data": { 449 | "text/plain": [ 450 | " Min. 1st Qu. Median Mean 3rd Qu. Max. \n", 451 | "-0.0155 0.0467 0.0632 0.0630 0.0787 0.1340 " 452 | ] 453 | }, 454 | "execution_count": 19, 455 | "metadata": {}, 456 | "output_type": "execute_result" 457 | } 458 | ], 459 | "source": [ 460 | "cai$is.focal <- cai$relevant.case\n", 461 | "\n", 462 | "null.coefs <- do.focal.aux.permutation(\n", 463 | " A,\n", 464 | " cai$intensive,\n", 465 | " cai$n.peers, cai$takeup_survey,\n", 466 | " cai$is.focal,\n", 467 | " R = 1e3\n", 468 | ")\n", 469 | "\n", 470 | "summary(null.coefs)" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": 20, 476 | "metadata": { 477 | "collapsed": false 478 | }, 479 | "outputs": [ 480 | { 481 | "name": "stdout", 482 | "output_type": "stream", 483 | "text": [ 484 | "I(z.peers/n.peers) \n", 485 | " 0.169 \n" 486 | ] 487 | } 488 | ], 489 | "source": [ 490 | "obs.coef <- with(\n", 491 | " subset(cai, is.focal),\n", 492 | " peer.regression.coef(intensive, intensive.nondelay.peers, n.peers, takeup_survey)\n", 493 | ")\n", 494 | "print(obs.coef)" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 21, 500 | "metadata": { 501 | "collapsed": false 502 | }, 503 | "outputs": [ 504 | { 505 | "data": { 506 | "text/html": [ 507 | "0" 508 | ], 509 | "text/latex": [ 510 | "0" 511 | ], 512 | "text/markdown": [ 513 | "0" 514 | ], 515 | "text/plain": [ 516 | "[1] 0" 517 | ] 518 | }, 519 | "execution_count": 21, 520 | "metadata": {}, 521 | "output_type": "execute_result" 522 | } 523 | ], 524 | "source": [ 525 | "two.sided.p.value.perm(obs.coef, null.coefs)" 526 | ] 527 | }, 528 | { 529 | "cell_type": "markdown", 530 | "metadata": {}, 531 | "source": [ 532 | "So the p-value is very close to 0. For this subpopulation, there is strong evidence of a spillover effect." 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": null, 538 | "metadata": { 539 | "collapsed": true 540 | }, 541 | "outputs": [], 542 | "source": [] 543 | } 544 | ], 545 | "metadata": { 546 | "kernelspec": { 547 | "display_name": "R", 548 | "language": "R", 549 | "name": "ir" 550 | }, 551 | "language_info": { 552 | "codemirror_mode": "r", 553 | "file_extension": ".r", 554 | "mimetype": "text/x-r-source", 555 | "name": "R", 556 | "pygments_lexer": "r", 557 | "version": "3.2.2" 558 | } 559 | }, 560 | "nbformat": 4, 561 | "nbformat_minor": 0 562 | } 563 | -------------------------------------------------------------------------------- /Using covariates.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Using covariates to increase precision" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 67, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "options(digits = 2)\n", 19 | "library(foreach)\n", 20 | "library(doMC)\n", 21 | "registerDoMC(cores = 4)\n", 22 | "library(ggplot2)\n", 23 | "theme_set(theme_bw())\n", 24 | "library(repr)\n", 25 | "options(repr.plot.width=6, repr.plot.height=4)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "Synthetic Facebook data set from https://www.udacity.com/wiki/ud651#!#data-sets.\n", 33 | "We are working with a subset of this to make the simulations fast." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 42, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "ps <- read.table(\"pseudo_facebook_small.tsv\", header = TRUE)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 72, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/html": [ 57 | "
    \n", 58 | "\t
  1. 10000
  2. \n", 59 | "\t
  3. 15
  4. \n", 60 | "
\n" 61 | ], 62 | "text/latex": [ 63 | "\\begin{enumerate*}\n", 64 | "\\item 10000\n", 65 | "\\item 15\n", 66 | "\\end{enumerate*}\n" 67 | ], 68 | "text/markdown": [ 69 | "1. 10000\n", 70 | "2. 15\n", 71 | "\n", 72 | "\n" 73 | ], 74 | "text/plain": [ 75 | "[1] 10000 15" 76 | ] 77 | }, 78 | "execution_count": 72, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | }, 82 | { 83 | "data": { 84 | "text/plain": [ 85 | " Min. 1st Qu. Median Mean 3rd Qu. Max. \n", 86 | " 0 1 10 149 76 13600 " 87 | ] 88 | }, 89 | "execution_count": 72, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | }, 93 | { 94 | "data": { 95 | "text/plain": [ 96 | " Min. 1st Qu. Median Mean 3rd Qu. Max. \n", 97 | " 0 229 411 539 672 2820 " 98 | ] 99 | }, 100 | "execution_count": 72, 101 | "metadata": {}, 102 | "output_type": "execute_result" 103 | } 104 | ], 105 | "source": [ 106 | "dim(ps)\n", 107 | "summary(ps$likes)\n", 108 | "summary(ps$tenure)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "# Simulate estimates under the sharp null\n", 116 | "Simulation that computes simple difference in means and regression adjustment estimators. We use two regression adjustments: one with a single linear term and one with an interaction term with the covariate centered. See Lin (2013) in Annals of Applied Statistics." 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 73, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "do.sim <- function(x, y, nt = round(n / 2)) {\n", 128 | " n <- length(x)\n", 129 | " \n", 130 | " z.c <- rep(0, n)\n", 131 | " z.c[sample.int(n, nt)] <- 1\n", 132 | " \n", 133 | " tau.sd <- unname(coef(lm(y ~ z.c))[2]) # unadjusted\n", 134 | " tau.adj <- unname(coef(lm(y ~ z.c + x))[2]) # regression adjustment\n", 135 | " \n", 136 | " # interaction w centered covariate:\n", 137 | " x0 <- x - mean(x)\n", 138 | " tau.adj.int <- unname(coef(lm(y ~ z.c * x0))[2])\n", 139 | " \n", 140 | " c(th.sd = tau.sd, th.adj = tau.adj, th.adj.int = tau.adj.int)\n", 141 | "}" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "Do a single simulation:" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 66, 154 | "metadata": { 155 | "collapsed": false, 156 | "scrolled": true 157 | }, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/html": [ 162 | "
\n", 163 | "\t
th.sd
\n", 164 | "\t\t
0.0513337038631001
\n", 165 | "\t
th.adj
\n", 166 | "\t\t
0.0596912108653627
\n", 167 | "\t
th.adj.int
\n", 168 | "\t\t
0.058737335222844
\n", 169 | "
\n" 170 | ], 171 | "text/latex": [ 172 | "\\begin{description*}\n", 173 | "\\item[th.sd] 0.0513337038631001\n", 174 | "\\item[th.adj] 0.0596912108653627\n", 175 | "\\item[th.adj.int] 0.058737335222844\n", 176 | "\\end{description*}\n" 177 | ], 178 | "text/markdown": [ 179 | "th.sd\n", 180 | ": 0.0513337038631001th.adj\n", 181 | ": 0.0596912108653627th.adj.int\n", 182 | ": 0.058737335222844\n", 183 | "\n" 184 | ], 185 | "text/plain": [ 186 | " th.sd th.adj th.adj.int \n", 187 | " 0.051 0.060 0.059 " 188 | ] 189 | }, 190 | "execution_count": 66, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | } 194 | ], 195 | "source": [ 196 | "with(ps, do.sim(tenure, log1p(likes), nt = 2000))" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "The estimates, as we should expect, are quite similar. But we really want to compare their variance and error across many possible randomizations.\n", 204 | "\n", 205 | "Do many simulations:" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 59, 211 | "metadata": { 212 | "collapsed": false 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "sr <- foreach(i = 1:1e3, .combine = rbind) %dopar%\n", 217 | " with(ps, do.sim(tenure, log1p(likes), nt = 2000))" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 60, 223 | "metadata": { 224 | "collapsed": false 225 | }, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/plain": [ 230 | " th.sd th.adj th.adj.int \n", 231 | " Min. :-0.182 Min. :-0.182 Min. :-0.184 \n", 232 | " 1st Qu.:-0.039 1st Qu.:-0.038 1st Qu.:-0.038 \n", 233 | " Median : 0.000 Median : 0.000 Median : 0.000 \n", 234 | " Mean : 0.000 Mean : 0.000 Mean : 0.000 \n", 235 | " 3rd Qu.: 0.038 3rd Qu.: 0.036 3rd Qu.: 0.036 \n", 236 | " Max. : 0.191 Max. : 0.190 Max. : 0.190 " 237 | ] 238 | }, 239 | "execution_count": 60, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "summary(sr)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 61, 251 | "metadata": { 252 | "collapsed": false 253 | }, 254 | "outputs": [ 255 | { 256 | "data": { 257 | "text/html": [ 258 | "
\n", 259 | "\t
th.sd
\n", 260 | "\t\t
0.0580209274316362
\n", 261 | "\t
th.adj
\n", 262 | "\t\t
0.0573698971473993
\n", 263 | "\t
th.adj.int
\n", 264 | "\t\t
0.0574078054448429
\n", 265 | "
\n" 266 | ], 267 | "text/latex": [ 268 | "\\begin{description*}\n", 269 | "\\item[th.sd] 0.0580209274316362\n", 270 | "\\item[th.adj] 0.0573698971473993\n", 271 | "\\item[th.adj.int] 0.0574078054448429\n", 272 | "\\end{description*}\n" 273 | ], 274 | "text/markdown": [ 275 | "th.sd\n", 276 | ": 0.0580209274316362th.adj\n", 277 | ": 0.0573698971473993th.adj.int\n", 278 | ": 0.0574078054448429\n", 279 | "\n" 280 | ], 281 | "text/plain": [ 282 | " th.sd th.adj th.adj.int \n", 283 | " 0.058 0.057 0.057 " 284 | ] 285 | }, 286 | "execution_count": 61, 287 | "metadata": {}, 288 | "output_type": "execute_result" 289 | } 290 | ], 291 | "source": [ 292 | "apply(sr, 2, sd)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 62, 298 | "metadata": { 299 | "collapsed": false 300 | }, 301 | "outputs": [ 302 | { 303 | "data": { 304 | "text/html": [ 305 | "
\n", 306 | "\t
th.sd
\n", 307 | "\t\t
0.000182805467572636
\n", 308 | "\t
th.adj
\n", 309 | "\t\t
0.000250078723388885
\n", 310 | "\t
th.adj.int
\n", 311 | "\t\t
0.000304896568073275
\n", 312 | "
\n" 313 | ], 314 | "text/latex": [ 315 | "\\begin{description*}\n", 316 | "\\item[th.sd] 0.000182805467572636\n", 317 | "\\item[th.adj] 0.000250078723388885\n", 318 | "\\item[th.adj.int] 0.000304896568073275\n", 319 | "\\end{description*}\n" 320 | ], 321 | "text/markdown": [ 322 | "th.sd\n", 323 | ": 0.000182805467572636th.adj\n", 324 | ": 0.000250078723388885th.adj.int\n", 325 | ": 0.000304896568073275\n", 326 | "\n" 327 | ], 328 | "text/plain": [ 329 | " th.sd th.adj th.adj.int \n", 330 | " 0.00018 0.00025 0.00030 " 331 | ] 332 | }, 333 | "execution_count": 62, 334 | "metadata": {}, 335 | "output_type": "execute_result" 336 | } 337 | ], 338 | "source": [ 339 | "apply(sr, 2, mean)" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 63, 345 | "metadata": { 346 | "collapsed": false 347 | }, 348 | "outputs": [ 349 | { 350 | "data": { 351 | "text/html": [ 352 | "
\n", 353 | "\t
th.sd
\n", 354 | "\t\t
0.0579921978359688
\n", 355 | "\t
th.adj
\n", 356 | "\t\t
0.0573417503480001
\n", 357 | "\t
th.adj.int
\n", 358 | "\t\t
0.0573799044246692
\n", 359 | "
\n" 360 | ], 361 | "text/latex": [ 362 | "\\begin{description*}\n", 363 | "\\item[th.sd] 0.0579921978359688\n", 364 | "\\item[th.adj] 0.0573417503480001\n", 365 | "\\item[th.adj.int] 0.0573799044246692\n", 366 | "\\end{description*}\n" 367 | ], 368 | "text/markdown": [ 369 | "th.sd\n", 370 | ": 0.0579921978359688th.adj\n", 371 | ": 0.0573417503480001th.adj.int\n", 372 | ": 0.0573799044246692\n", 373 | "\n" 374 | ], 375 | "text/plain": [ 376 | " th.sd th.adj th.adj.int \n", 377 | " 0.058 0.057 0.057 " 378 | ] 379 | }, 380 | "execution_count": 63, 381 | "metadata": {}, 382 | "output_type": "execute_result" 383 | } 384 | ], 385 | "source": [ 386 | "rmses <- sqrt(apply(sr, 2, function(x) mean(x^2)))\n", 387 | "rmses" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "Reduction in root mean squared error for tau:" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": 64, 400 | "metadata": { 401 | "collapsed": false 402 | }, 403 | "outputs": [ 404 | { 405 | "data": { 406 | "text/html": [ 407 | "
\n", 408 | "\t
th.sd
\n", 409 | "\t\t
0
\n", 410 | "\t
th.adj
\n", 411 | "\t\t
0.0112161206548594
\n", 412 | "\t
th.adj.int
\n", 413 | "\t\t
0.0105582032436756
\n", 414 | "
\n" 415 | ], 416 | "text/latex": [ 417 | "\\begin{description*}\n", 418 | "\\item[th.sd] 0\n", 419 | "\\item[th.adj] 0.0112161206548594\n", 420 | "\\item[th.adj.int] 0.0105582032436756\n", 421 | "\\end{description*}\n" 422 | ], 423 | "text/markdown": [ 424 | "th.sd\n", 425 | ": 0th.adj\n", 426 | ": 0.0112161206548594th.adj.int\n", 427 | ": 0.0105582032436756\n", 428 | "\n" 429 | ], 430 | "text/plain": [ 431 | " th.sd th.adj th.adj.int \n", 432 | " 0.000 0.011 0.011 " 433 | ] 434 | }, 435 | "execution_count": 64, 436 | "metadata": {}, 437 | "output_type": "execute_result" 438 | } 439 | ], 440 | "source": [ 441 | "1 - rmses / rmses[1]" 442 | ] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "metadata": {}, 447 | "source": [ 448 | "This isn't much reduction. That doesn't mean there isn't clear evidence of tenure being related to likes; we can clearly detect a relationships, but mainly because we have a large N, not because there is a strong association:" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 65, 454 | "metadata": { 455 | "collapsed": false 456 | }, 457 | "outputs": [ 458 | { 459 | "data": { 460 | "text/plain": [ 461 | "\n", 462 | "Call:\n", 463 | "lm(formula = log1p(likes) ~ tenure, data = ps)\n", 464 | "\n", 465 | "Residuals:\n", 466 | " Min 1Q Median 3Q Max \n", 467 | "-3.925 -2.120 -0.217 1.659 6.745 \n", 468 | "\n", 469 | "Coefficients:\n", 470 | " Estimate Std. Error t value Pr(>|t|) \n", 471 | "(Intercept) 2.233249 0.033869 65.9 <2e-16 ***\n", 472 | "tenure 0.000795 0.000048 16.6 <2e-16 ***\n", 473 | "---\n", 474 | "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n", 475 | "\n", 476 | "Residual standard error: 2.2 on 9998 degrees of freedom\n", 477 | "Multiple R-squared: 0.0268,\tAdjusted R-squared: 0.0267 \n", 478 | "F-statistic: 275 on 1 and 9998 DF, p-value: <2e-16\n" 479 | ] 480 | }, 481 | "execution_count": 65, 482 | "metadata": {}, 483 | "output_type": "execute_result" 484 | } 485 | ], 486 | "source": [ 487 | "summary(lm(log1p(likes) ~ tenure, data = ps))" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 68, 493 | "metadata": { 494 | "collapsed": false 495 | }, 496 | "outputs": [ 497 | { 498 | "name": "stderr", 499 | "output_type": "stream", 500 | "text": [ 501 | "geom_smooth: method=\"auto\" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = \"cs\"). Use 'method = x' to change the smoothing method.\n" 502 | ] 503 | }, 504 | { 505 | "data": { 506 | "image/png": "", 507 | "image/svg+xml": [ 508 | "\n", 509 | "\n", 510 | "\n", 511 | "\n", 512 | "\n", 513 | "\n", 514 | "\n", 515 | "\n", 516 | "\n", 517 | "\n", 518 | "\n", 519 | "\n", 520 | "\n", 521 | "\n", 522 | "\n", 523 | "\n", 524 | "\n", 525 | "\n", 526 | "\n", 527 | "\n", 528 | "\n", 529 | "\n", 530 | "\n", 531 | "\n", 532 | "\n", 533 | "\n", 534 | "\n", 535 | "\n", 536 | "\n", 537 | "\n", 538 | "\n", 539 | "\n", 540 | "\n", 541 | "\n", 542 | "\n", 543 | "\n", 544 | "\n", 545 | "\n", 546 | "\n", 547 | "\n", 548 | "\n", 549 | "\n", 550 | "\n", 551 | "\n", 552 | "\n", 553 | "\n", 554 | "\n", 555 | "\n", 556 | "\n", 557 | "\n", 558 | "\n", 559 | "\n", 560 | "\n", 561 | "\n", 562 | "\n", 563 | "\n", 564 | "\n", 565 | "\n", 566 | "\n", 567 | "\n", 568 | "\n", 569 | "\n", 570 | "\n", 571 | "\n", 572 | "\n", 573 | "\n", 574 | "\n", 575 | "\n", 576 | "\n", 577 | "\n", 578 | "\n", 579 | "\n", 580 | "\n", 581 | "\n", 582 | "\n", 583 | "\n", 584 | "\n", 585 | "\n", 586 | "\n", 587 | "\n", 588 | "\n", 589 | "\n", 590 | "\n", 591 | "\n", 592 | "\n", 593 | "\n", 594 | "\n", 595 | "\n", 596 | "\n", 597 | "\n", 598 | "\n", 599 | "\n", 600 | "\n", 601 | "\n", 602 | "\n", 603 | "\n", 604 | "\n", 605 | "\n", 606 | "\n", 607 | "\n", 608 | "\n", 609 | "\n", 610 | "\n", 611 | "\n", 612 | "\n", 613 | " \n", 614 | "\n", 615 | "\n", 616 | " \n", 617 | "\n", 618 | "\n", 619 | " \n", 620 | "\n", 621 | "\n", 622 | " \n", 623 | "\n", 624 | "\n", 625 | " \n", 626 | "\n", 627 | "\n", 628 | " \n", 629 | "\n", 630 | "\n", 631 | " \n", 632 | "\n", 633 | "\n", 634 | " \n", 635 | "\n", 636 | "\n", 637 | " \n", 638 | "\n", 639 | "\n", 640 | " \n", 641 | "\n", 642 | "\n", 643 | " \n", 644 | "\n", 645 | "\n", 646 | " \n", 647 | "\n", 648 | "\n", 649 | " \n", 650 | "\n", 651 | "\n", 652 | " \n", 653 | "\n", 654 | "\n", 655 | " \n", 656 | "\n", 657 | "\n", 658 | " \n", 659 | "\n", 660 | "\n", 661 | " \n", 662 | "\n", 663 | "\n", 664 | " \n", 665 | "\n", 666 | "\n", 667 | " \n", 668 | "\n", 669 | "\n", 670 | " \n", 671 | "\n", 672 | "\n", 673 | " \n", 674 | "\n", 675 | "\n", 676 | "\n", 677 | "\n", 678 | "\n", 679 | "\n", 680 | "\n", 681 | "\n", 682 | "\n", 683 | "\n", 684 | "\n", 685 | "\n", 686 | "\n", 687 | "\n", 688 | "\n", 689 | "\n", 690 | "\n", 691 | "\n", 692 | "\n", 693 | "\n", 694 | "\n", 695 | "\n", 696 | "\n", 697 | "\n", 698 | "\n", 699 | "\n", 700 | "\n", 701 | "\n", 702 | "\n", 703 | "\n", 704 | "\n", 705 | "\n", 706 | "\n", 707 | "\n", 708 | "\n", 709 | "\n", 710 | "\n", 711 | "\n", 712 | "\n", 713 | "\n", 714 | "\n", 715 | "\n", 716 | "\n", 717 | "\n", 718 | "\n", 719 | "\n", 720 | "\n", 721 | "\n", 722 | "\n", 723 | "\n", 724 | "\n", 725 | "\n", 726 | "\n", 727 | "\n", 728 | "\n", 729 | "\n", 730 | "\n", 731 | "\n", 732 | "\n", 733 | "\n", 734 | "\n", 735 | "\n", 736 | "\n", 737 | "\n", 738 | "\n", 739 | "\n", 740 | "\n", 741 | "\n", 742 | "\n", 743 | "\n", 744 | "\n", 745 | "\n", 746 | " \n", 747 | "\n", 748 | "\n", 749 | " \n", 750 | "\n", 751 | "\n", 752 | " \n", 753 | "\n", 754 | "\n", 755 | " \n", 756 | "\n", 757 | "\n", 758 | " \n", 759 | "\n", 760 | "\n", 761 | "\n", 762 | "\n", 763 | "\n", 764 | "\n", 765 | "\n", 766 | "\n", 767 | "\n", 768 | "\n", 769 | "\n", 770 | "\n", 771 | " \n", 772 | "\n", 773 | "\n", 774 | " \n", 775 | "\n", 776 | "\n", 777 | " \n", 778 | "\n", 779 | "\n", 780 | " \n", 781 | "\n", 782 | "\n", 783 | " \n", 784 | "\n", 785 | "\n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | "\n", 800 | "\n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | "\n", 814 | "\n", 815 | "\n" 816 | ], 817 | "text/plain": [ 818 | "plot without title" 819 | ] 820 | }, 821 | "metadata": { 822 | "image/svg+xml": { 823 | "isolated": true 824 | } 825 | }, 826 | "output_type": "display_data" 827 | } 828 | ], 829 | "source": [ 830 | "ggplot(\n", 831 | " aes(x = log1p(tenure), y = log1p(likes)),\n", 832 | " data = ps\n", 833 | ") + \n", 834 | "geom_smooth()" 835 | ] 836 | }, 837 | { 838 | "cell_type": "markdown", 839 | "metadata": {}, 840 | "source": [ 841 | "## Exercises\n", 842 | "Compare regression adjustment with post-stratification and blocking (ie pre-stratification).\n", 843 | "\n", 844 | "Conduct similar analysis of another data set." 845 | ] 846 | }, 847 | { 848 | "cell_type": "code", 849 | "execution_count": null, 850 | "metadata": { 851 | "collapsed": true 852 | }, 853 | "outputs": [], 854 | "source": [] 855 | } 856 | ], 857 | "metadata": { 858 | "kernelspec": { 859 | "display_name": "R", 860 | "language": "R", 861 | "name": "ir" 862 | }, 863 | "language_info": { 864 | "codemirror_mode": "r", 865 | "file_extension": ".r", 866 | "mimetype": "text/x-r-source", 867 | "name": "R", 868 | "pygments_lexer": "r", 869 | "version": "3.2.2" 870 | } 871 | }, 872 | "nbformat": 4, 873 | "nbformat_minor": 0 874 | } 875 | -------------------------------------------------------------------------------- /cai_data/cai.adjacency.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deaneckles/randomization_inference/97b851c079853b1d546acc68db7c5fd45cc82e3e/cai_data/cai.adjacency.RData -------------------------------------------------------------------------------- /prep_data.R: -------------------------------------------------------------------------------- 1 | library(igraph) 2 | library(foreign) 3 | library(Matrix) 4 | library(lfe) 5 | library(dplyr) 6 | library(foreach) 7 | 8 | cain.all <- read.dta("cai_data/0422allinforawnet.dta") 9 | cais.all <- read.dta("cai_data/0422survey.dta") 10 | 11 | cai.all <- read.csv("cai_data/0422analysis.csv") 12 | cai.all$id <- as.character(cai.all$id) 13 | 14 | cain.all <- subset(cain.all, !is.na(network_id) & network_id != 99) 15 | 16 | cain <- cain.all 17 | 18 | cain.el <- apply(as.matrix(cain[, c("id", "network_id")]), 2, as.character) 19 | cain.el <- cain.el[!is.na(cain.el[, 2]), ] 20 | 21 | ids.in.net <- unique(c(cain.el[, 1], cain.el[, 2])) 22 | egos.in.net <- unique(cain.el[, 1]) 23 | ids.in.survey <- unique(cai.all$id) 24 | ids.in.net.only <- ids.in.net[!ids.in.net %in% ids.in.survey] 25 | egos.in.net.only <- egos.in.net[!egos.in.net %in% ids.in.survey] 26 | ids.in.survey.only <- ids.in.survey[!ids.in.survey %in% ids.in.net] 27 | 28 | cai <- subset(cai.all, id %in% egos.in.net) 29 | 30 | g <- graph_from_edgelist(cain.el, directed = TRUE) 31 | 32 | cain.peer.summary <- cain[!duplicated(cain$network_id),] %>% 33 | select( 34 | id = network_id, 35 | village = network_village, 36 | address = network_address, 37 | takeup_survey, delay, intensive, understanding 38 | ) %>% 39 | mutate(id = as.character(id)) %>% 40 | filter(!is.na(intensive)) 41 | 42 | peers.in.net <- unique(cain.peer.summary$id) 43 | ids.in.survey <- unique(cai.all$id) 44 | peers.in.net.only <- peers.in.net[!peers.in.net %in% ids.in.survey] 45 | 46 | caic <- bind_rows( 47 | cai, 48 | cain.peer.summary[cain.peer.summary$id %in% peers.in.net.only,] 49 | ) 50 | 51 | head(vertex_attr(g, 'name')) 52 | tmp.name <- vertex_attr(g, 'name') 53 | caic$name <- caic$id 54 | vertex_attr(g, index = as.character(caic$id)) <- as.list(caic) 55 | vertex_attr(g, 'name') <- tmp.name 56 | head(vertex_attr(g, 'id')) 57 | head(vertex_attr(g, 'name')) 58 | head(vertex_attr(g, 'village')) 59 | 60 | g1 <- induced_subgraph(g, V(g)[which(!is.na(V(g)$intensive))]) 61 | 62 | # make adjacency matrix 63 | A <- as_adj(g1, sparse = TRUE, names = T) 64 | A.df <- as.data.frame(vertex_attr(g1)) 65 | 66 | A.df$intensive.0 <- ifelse(is.na(A.df$intensive), 0, A.df$intensive) 67 | A.df$delay.0 <- ifelse(is.na(A.df$delay), 0, A.df$delay) 68 | A.df$intensive.peers <- as.vector(A %*% A.df$intensive.0) 69 | A.df$intensive.nond.peers <- as.vector(A %*% (A.df$intensive.0 * (1-A.df$delay.0))) 70 | A.df$default.peers <- as.vector(A %*% A.df$default) 71 | A.df$n.peers <- rowSums(A) 72 | A.df$n.elig.peers <- as.vector(A %*% !is.na(A.df$intensive)) 73 | 74 | # compare my counts with the data 75 | with(A.df, table( 76 | round(network_obs * network_rate_preintensive), 77 | intensive.nond.peers, 78 | useNA = "ifany" 79 | )) 80 | 81 | with(A.df, table( 82 | network_obs, 83 | n.peers, 84 | useNA = "ifany" 85 | )) 86 | 87 | with(A.df, table( 88 | n.peers, 89 | n.elig.peers 90 | )) 91 | 92 | tmp <- merge( 93 | cain.s, A.df %>% select(id, intensive.nond.peers, network_obs, network_rate_preintensive)) 94 | 95 | with(tmp, table( 96 | round(network_obs * network_rate_preintensive), 97 | n.intensive.pre, 98 | useNA = "ifany" 99 | )) 100 | 101 | ### 102 | # write simplified data 103 | A.df.to.write <- A.df %>% 104 | select( 105 | id, address, region, village, takeup_survey, age, male, 106 | delay, intensive, info_none, intensive.nondelay.peers = intensive.nond.peers, n.peers 107 | ) 108 | 109 | write.table( 110 | A.df.to.write, 111 | file = "cai_data/cai.main.tsv", 112 | row.names = TRUE, 113 | sep = "\t" 114 | ) 115 | 116 | save(A, file = "cai_data/cai.adjacency.RData") 117 | --------------------------------------------------------------------------------