├── Macros.sas ├── README.md ├── RunMacros.sas └── license.md /Macros.sas: -------------------------------------------------------------------------------- 1 | /******************************************************************************/ 2 | /**This code contains the macros for: *****************************************/ 3 | /**%SIMULATE: creates simulated cohort studies*********************************/ 4 | /**%ABM: runs an ABM taking parameter inputs from any specified source*********/ 5 | /**%METHOD_COMPARISON: runs %simulate, %abm, and %gformula under given inputs**/ 6 | /**%SEEDGEN: generates matrix of random number seeds***************************/ 7 | /**%LABELS2: creates labels for ABM results************************************/ 8 | /**%PRINTRESULTS: labels and outputs results of each method********************/ 9 | /**%CREATEPARAMS: creates input parameter datasets for creation of the cohorts*/ 10 | /**%FINAL_RESULTS: combines macros into a single run of the simulation study***/ 11 | /******************************************************************************/ 12 | 13 | 14 | 15 | /******************************************************************************/ 16 | /* model: logit(L) = sgamma0 + sgamma1*U +sgamma2*L_1 + sgamma3*L_2 +sgamma4*A_1 + sgamma5*A_2*/ 17 | /* model: logit(A) = salpha0 + salpha1*L + salpha2*L_1+ salpha3*L_2 + salpha4*A_1 + salpha5*A_2 */ 18 | /******************************************************************************/ 19 | /*Macro SIMULATE: Simulate data under a SNAFTM with exponential failure times*/ 20 | %macro simulate( 21 | subjects= , /* number of subjects in the simulated data sets */ 22 | tpoints= , /* maximum number of measurements for each subject in the simulated data */ 23 | psi1= , /* true psi = AFT parameter for causal effect of treatment on survival */ 24 | 25 | sgamma0 = , sgamma1 = , sgamma2 = , sgamma3 = , sgamma4 = , sgamma5 = , sgamma6 = , 26 | kappa = , tau = , 27 | 28 | salpha0 = , salpha1 = , salpha2 = , salpha3 = , 29 | 30 | 31 | cut= , /*t for int(t) cutpoint, for use with exact algortihm*/ 32 | lam= , /*scale parameter for weibull*/ 33 | shape=1, /*shape parameter for weibull; set to 1 for T~exp(lam)*/ 34 | 35 | dataout = Simdata_all, 36 | datasum = Simdata_surv, 37 | contU = 0, 38 | pCD0= 39 | ); 40 | 41 | data &dataout; 42 | length A A_l1 A_l2 CD CD_l1 CD_l2 Y /*Y_l1*/ T 3; 43 | 44 | sT0 = %sysevalf(&sT0) ; 45 | sCD = %sysevalf(&sCD) ; 46 | sA = %sysevalf(&sA) ; 47 | sY = %sysevalf(&sY); 48 | 49 | %let tpoints_sim = %sysevalf(&tpoints + 2); /*simulate two additional time points to generate pre-baseline CD4. Times 1 and 2 are thus pre-baseline and have A = 0 and Y=0*/ 50 | 51 | cut = %sysevalf(&cut); 52 | 53 | do id=1 to %sysevalf(&subjects); 54 | call ranexp(sT0,expvar); 55 | T0=(expvar/(&lam**&shape))**(1/&shape); /*T0 ~exp(lambda)*/ 56 | 57 | if &contU = 0 then do; 58 | if T0 < cut then IT0=1; /*U = 1 if T0 < x*/ 59 | else IT0=0; /*U = 0 if T0>=x*/ 60 | end; 61 | else if &contU = 1 then do; 62 | IT0=T0; 63 | end; 64 | 65 | maxT= &tpoints_sim; 66 | 67 | initA = .; 68 | 69 | array CDarray CDp2 CDp1 CD1-CD&tpoints; 70 | array pCDarray pCDp2 pCDp1 pCD1-pCD&tpoints; 71 | array Aarray Ap2 Ap1 A1-A&tpoints; 72 | array pAarray pAp2 pAp1 pA1-pA&tpoints; 73 | array Yarray Yp2 Yp1 Y1-Y&tpoints; 74 | array Ymarray Ymp2 Ymp1 Ym1-Ym&tpoints; 75 | array tarray tp2 tp1 t1-t&tpoints; 76 | 77 | time = 1.0; 78 | 79 | 80 | do j=1 to &tpoints_sim; 81 | 82 | if j = maxT +1 then leave; 83 | 84 | if j = 1 then do; 85 | A_l1 = 0; A_l2 = 0; CD_l1 = 0; CD_l2 = 0; 86 | 87 | end; 88 | 89 | else if j = 2 then do; 90 | A_l1 = Aarray(j-1); A_l2 = 0; CD_l1 = CDarray(j-1); CD_l2 = 0; 91 | 92 | end; 93 | 94 | else do; 95 | A_l1 = Aarray(j-1); A_l2 = Aarray(j-2); CD_l1 = CDarray(j-1); CD_l2 = CDarray(j-2); 96 | 97 | end; 98 | 99 | 100 | if j le 2 then do; 101 | 102 | pCDarray(j) = 1; 103 | CDarray(j) = 1; 104 | end; 105 | 106 | else if j = 3 then do; 107 | 108 | pCDarray(j) = 1-&pCD0; 109 | call ranbin(sCD,1,pCDarray(j),CDarray(j)); 110 | end; 111 | else if j > 3 then do; 112 | 113 | pCDarray(j) = (&sgamma4)*A_l1 + (1-&sgamma4)*(exp(&sgamma1 + &kappa*(log(IT0 + &tau))))/(1+(exp(&sgamma1 + &kappa*(log(IT0 + &tau))))); 114 | 115 | if pCDarray(j)=1 or pCDarray(j) = 0 then CDarray(j) = pCDarray(j); 116 | else do; 117 | call ranbin(sCD,1,pCDarray(j),CDarray(j)); 118 | end; 119 | end; 120 | 121 | CD = CDarray(j); 122 | 123 | if A_l1 = 1 then do; 124 | Aarray(j) = 1; 125 | end; 126 | 127 | else do; 128 | if j in (1,2) then do; 129 | Aarray(j) = 0; pAarray(j) = 0; 130 | end; 131 | else do; 132 | logitA = &salpha0 + &salpha1*(1-CD) + &salpha2*(1-CD_l1) +&salpha3*(1-CD_l2); 133 | pAarray(j) = 1/(1+exp(-logitA)); 134 | call ranbin(sA,1,pAarray(j),Aarray(j)); 135 | end; 136 | end; 137 | 138 | A = Aarray(j); 139 | 140 | 141 | if j = maxT then do; 142 | 143 | Tcounter = 0; 144 | Tholder = 0; 145 | TAholder = exp(&Psi1*Aarray(1)); 146 | 147 | do while (Tholder + TAholder <= IT0 and (Tcounter+1) < maxT); 148 | 149 | Tcounter + 1; 150 | Tholder = Tholder + TAholder; 151 | TAholder = exp(&Psi1*Aarray(Tcounter+1)); 152 | end; 153 | Tfunc = Tcounter +(1/TAholder)*(IT0-Tholder); 154 | end; 155 | end; /*end j to &tpoints*/ 156 | 157 | 158 | do tloop =3 to &tpoints_sim; /*baseline is at tpoint = 3*/ 159 | tpoint = tloop - 2; 160 | tpoint2 = tpoint - 1; 161 | 162 | CD = CDarray(tloop); 163 | pCD = pCDarray(tloop); 164 | 165 | CD_0 = CDarray(3); 166 | CD_l1 = CDarray(tloop - 1); 167 | CD_l2 = CDarray(tloop - 2); 168 | 169 | A_0 = Aarray(3); 170 | A = Aarray(tloop); 171 | pA = pAarray(tloop); 172 | A_l1 = Aarray(tloop - 1); 173 | A_l2 = Aarray(tloop - 2); 174 | 175 | T = Tfunc - 3; 176 | 177 | if tpoint < Tfunc <= tpoint +1 and tpoint < maxT then Y = 1; 178 | else Y = 0; 179 | 180 | output; 181 | 182 | end; /* end of tloop loop */ 183 | end; /* end of id loop */ 184 | 185 | keep id tpoint tpoint2 time T0 IT0 Y A A_l1 A_l2 A_0 CD CD_0 CD_l1 CD_l2 pA T maxT pCD; 186 | run; 187 | 188 | data &dataout; 189 | set &dataout; 190 | if tpoint ne . and y ne .; 191 | run; 192 | 193 | proc means data = &dataout; 194 | var T0 IT0; 195 | run; 196 | 197 | proc sort data=&dataout; 198 | by tpoint; 199 | data temp1; 200 | set &dataout; 201 | where tpoint=1; 202 | proc means data=temp1 /*noprint*/; 203 | var A CD; 204 | output out=temp mean=pA_0 pCD_0 ; 205 | run; 206 | 207 | proc means data = &dataout; 208 | var A CD Y ; 209 | run; 210 | 211 | /*Create dataset with overall survival over time*/ 212 | proc sort data=&dataout; 213 | by tpoint; 214 | run; 215 | 216 | proc freq data=&dataout /*noprint*/; 217 | tables tpoint*CD tpoint*A /nopercent nocol; 218 | run; 219 | 220 | 221 | proc freq data=&dataout /*noprint*/; 222 | tables tpoint*Y / out=counts nopercent nocol; 223 | run; 224 | 225 | proc transpose data=counts out=output (drop=_name_ _label_) prefix=Y_; 226 | by tpoint; 227 | id Y; 228 | var COUNT; 229 | run; 230 | data output; 231 | set output; 232 | id = 1; 233 | proc transpose data=output out=output2 prefix=Y_0_; 234 | by id; 235 | id tpoint; 236 | var Y_0 ; 237 | proc transpose data=output out=output3 prefix=Y_1_; 238 | by id; 239 | id tpoint; 240 | var Y_1 ; 241 | data wide; 242 | merge output2(drop=_name_) output3(drop=_name_); 243 | by id; 244 | data &datasum; 245 | set wide; 246 | s0 = 1; 247 | array sY(&tpoints) sY1 - sY&tpoints; 248 | array pY(&tpoints) pY1 - pY&tpoints; 249 | array Y0(&tpoints) Y_0_1 - Y_0_&tpoints; 250 | array Y1(&tpoints) Y_1_1 - Y_1_&tpoints; 251 | 252 | do i=1 to &tpoints; 253 | if i = 1 then do; 254 | pY(i) = ( Y1(i))/(Y0(i)+Y1(i)); 255 | sY(i) = s0*(1-pY(i)); 256 | end; 257 | else do; 258 | pY(i) = ( Y1(i))/(Y0(i)+Y1(i)); 259 | sY(i) = sY(i-1)*(1-pY(i)); 260 | end; 261 | end; 262 | 263 | do i=1 to &tpoints ; 264 | tpoint = i; 265 | p_Y = pY(i); 266 | s_tpoint = sY(i); 267 | output; 268 | end; 269 | 270 | keep tpoint p_Y s_tpoint; 271 | run; 272 | 273 | /*dataset for g-formula (time from 0 to 11)*/ 274 | data &dataout; 275 | set &dataout; 276 | tpoint = tpoint - 1; 277 | tpoint2 = tpoint2 - 1; 278 | keep id tpoint tpoint2 Y A A_l1 A_l2 A_0 CD CD_l1 CD_l2 CD_0 IT0 T maxT; 279 | run; 280 | 281 | 282 | proc datasets library=work nolist; 283 | delete counts output output2 output3 wide temp1; 284 | quit; 285 | 286 | 287 | %mend simulate; 288 | 289 | 290 | /*******************************************************************************************************************************/ 291 | /******************************************************************************************************************************/ 292 | /**Macro METHOD_COMPARISON: simulates data under AFT, runs g-formula on data, runs agent-based model with specified parameters*/ 293 | %macro method_comparison( 294 | tpoints = , /*number of time points for simulation*/ 295 | subjects = , /*sample size for g-formula and ABMs*/ 296 | subjectsSim = , /*sample size of simulated cohort*/ 297 | nsamples = , /*number of bootstrap samples for g-formula*/ 298 | 299 | lam = , cut= , contU = 0, /*parameters for prevalence of U in simulated data*/ 300 | psi1 = 0, /*true effect of A on Y; psi1 = 0 for null effect*/ 301 | 302 | 303 | /* model: logit(L) = sgamma0 + sgamma1*U +sgamma2*L_1 + sgamma3*L_2 +sgamma4*A_1 + sgamma5*A_2*/ 304 | sgamma0 = ,sgamma1 = , sgamma2 = , sgamma3 = , sgamma4 = , sgamma5 = , sgamma6 = , 305 | kappa = , tau = , 306 | 307 | 308 | /* model: logit(A) = salpha0 + salpha1*L + salpha2*L_1+ salpha3*L_2 + salpha4*A_1 + salpha5*A_2 */ 309 | salpha0 = , salpha1 = , salpha2 = , salpha3 = , 310 | 311 | 312 | refint = , 313 | 314 | pCD0 = , 315 | 316 | betadata= betas, 317 | abm_paramdata = , /*name of external dataset containing parameter estimate(s) for use in ABM simulation */ 318 | scenario = , /*scenario number for record keeping*/ 319 | paramdest = /*output file for beta parameters*/ 320 | 321 | ); 322 | 323 | /*Record start time and date*/ 324 | %let timenow=%sysfunc(time(), time.); 325 | %let datenow=%sysfunc(date(), date9.); 326 | %put Beginning Simulation; 327 | %put Start time is &datenow / &timenow ; 328 | %put ; 329 | 330 | /*set up interventions for g-formula*/ 331 | %let interv1 = 332 | intno = 1, 333 | intlabel = 'Always treat', 334 | nintvar = 1, 335 | intvar1 = A, 336 | inttype1 = 1, 337 | intvalue1 = 1, 338 | intpr1 = 1, 339 | inttimes1 = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; 340 | %let interv2 = 341 | intno = 2, 342 | intlabel = 'Never treat', 343 | nintvar = 1, 344 | intvar1 = A, 345 | inttype1 = 1, 346 | intvalue1 = 0, 347 | intpr1 = 1, 348 | inttimes1 = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; 349 | 350 | /*simulate random seeds*/ 351 | %let lstream = %sysevalf(&subjects)*%sysevalf(&tpoints); 352 | %let fseed =%eval(123 + %sysevalf(&scenario)); 353 | 354 | %seedgen(fseed=&fseed, /*initial seed for seed generator*/ 355 | lstream= &lstream , /* number of seeds needed for one stream */ 356 | seeds_per_line= 10 , /*number of seeds per individual*/ 357 | nseeds = 1 358 | ); 359 | 360 | %let seeds=%sysfunc(open(work.Seeds)); 361 | %let NObs=%sysfunc(attrn(&seeds,NOBS)); 362 | %syscall set(seeds); 363 | 364 | %let rc=%sysfunc(fetchobs(&seeds, 1)); 365 | 366 | /*Assign seed values for data simulation and ABM*/ 367 | %let sT0 = %sysevalf(&seed1); 368 | %let sCD = %sysevalf(&seed2); 369 | %let sA = %sysevalf(&seed3); 370 | %let sY = %sysevalf(&seed4); 371 | 372 | %let sABM1 = %sysevalf(&seed5); 373 | %let sABM2 = %sysevalf(&seed6); 374 | %let sABM3 = %sysevalf(&seed7); 375 | %let sABM4 = %sysevalf(&seed8); 376 | %let sABM5 = %sysevalf(&seed9); 377 | 378 | %put Simulating initial data; 379 | %put ; 380 | 381 | /*simulate dataset, store in Simdata_all*/ 382 | %simulate( 383 | subjects =&subjectsSim, 384 | tpoints = &tpoints, 385 | sgamma0 = &sgamma0, sgamma1 = &sgamma1, sgamma2 = &sgamma2, sgamma3 = &sgamma3, sgamma4 = &sgamma4, sgamma5 = &sgamma5, sgamma6 = &sgamma6, 386 | salpha0 = &salpha0, salpha1 = &salpha1, salpha2 = &salpha2, salpha3 = &salpha3, 387 | kappa = &kappa, tau = &tau, 388 | psi1=&psi1, 389 | lam=&lam, 390 | cut=&cut, 391 | contU = &contU, pCD0 = &pCD0 392 | ); 393 | 394 | data _null_; 395 | set temp; 396 | call symput("pA0", pA_0); 397 | call symput("pCD0",pCD_0); 398 | run; 399 | 400 | 401 | data Simdata_combined; 402 | set temp; 403 | keep pA_0 pCD_0; 404 | run; 405 | data results_obs; 406 | set Simdata_surv; 407 | run; 408 | 409 | proc sort data= Simdata_all; 410 | by id; 411 | run; 412 | 413 | data first; 414 | set Simdata_all end = _end_; 415 | by id; 416 | if first.id then output; 417 | run; 418 | 419 | proc printto print = &outputdest; 420 | run; 421 | %if &contU = 0 %then %do; 422 | proc freq data=first; 423 | tables IT0; 424 | title "Prevalence of U (cut = &cut), Scenario: &scen"; 425 | run; 426 | %end; 427 | %else %if &contU = 1 %then %do; 428 | proc means data = first mean std min max p5 p95; 429 | var IT0; 430 | title "Distribution of continuous U, Scenario: &scen"; 431 | run; 432 | %end; 433 | proc printto; 434 | run; 435 | 436 | %put Data simulation complete; 437 | %put ; 438 | 439 | 440 | /*Run G-formula on Simulated Dataset*/ 441 | %put Running G-formula; 442 | %put ; 443 | 444 | 445 | /*g-formula without confounding adjustment*/ 446 | title 'GFORMULA SAMPLE'; 447 | %gformula( 448 | data= Simdata_all, 449 | id=id, 450 | time=tpoint, 451 | timeptype =conspl, 452 | timeknots = 1 3 6 9 12, 453 | timeinc = 1, 454 | timepoints = &tpoints, 455 | 456 | outc=Y, 457 | outctype = binsurv, 458 | 459 | ncov=1, 460 | cov1 = A, cov1otype = 1, cov1ptype = lag1bin, 461 | 462 | seed= %eval(1234+%sysevalf(&scenario)), 463 | 464 | nsimul= , 465 | nsamples = &nsamples, 466 | 467 | numint=2, 468 | intervname=interv, 469 | 470 | betadata= betas_conf, 471 | survdata=work.survdth, 472 | savelib=work, 473 | covmeandata=covmeandth, 474 | observed_surv=obsurvdth, 475 | refint = &refint, /*never treat as reference*/ 476 | print_stats=0, 477 | /*pringlogstats=0, 478 | outputs=no*/ 479 | resultsdata=results_gform_conf 480 | ); 481 | 482 | 483 | /*Combine g-formula results across runs*/ 484 | data results_gform_conf; 485 | set results_gform_conf; 486 | keep int int2 pD pD_llim95 pD_ulim95 rd rd_llim95 rd_ulim95 rr rr_llim95 rr_ulim95 pD_mean pD_std intervened averinterv; 487 | run; 488 | 489 | 490 | /*G-formula with confounding adjustment*/ 491 | title 'GFORMULA SAMPLE'; 492 | %gformula( 493 | data= Simdata_all, 494 | id=id, 495 | time=tpoint, 496 | timeptype =conspl, 497 | timeknots = 1 3 6 9 12, 498 | timeinc = 1, 499 | timepoints = &tpoints, 500 | 501 | outc=Y, 502 | outctype = binsurv, 503 | outcinteract= 1*2, 504 | 505 | ncov=2, 506 | cov1 = CD, cov1otype = 1, cov1ptype = lag2bin, 507 | cov2 = A, cov2otype = 1, cov2ptype = lag2bin, 508 | 509 | seed= %eval(1234+%sysevalf(&scenario)), 510 | 511 | nsimul= , 512 | nsamples = &nsamples, 513 | 514 | numint=2, 515 | intervname=interv, 516 | 517 | betadata= betas, 518 | survdata=work.survdth, 519 | savelib=work, 520 | covmeandata=covmeandth, 521 | observed_surv=obsurvdth, 522 | refint = &refint, /*never treat as reference*/ 523 | print_stats=0, 524 | /*pringlogstats=0, 525 | outputs=no*/ 526 | resultsdata=results_gform 527 | ); 528 | 529 | /*Combine g-formula results across runs*/ 530 | data results_gform; 531 | set results_gform; 532 | keep int int2 pD pD_llim95 pD_ulim95 rd rd_llim95 rd_ulim95 rr rr_llim95 rr_ulim95 pD_mean pD_std intervened averinterv; 533 | run; 534 | 535 | 536 | /*output beta parameter estimates*/ 537 | %let n = betas_; 538 | %let x = &n.&scenario; 539 | 540 | data &x; 541 | set betas /*(drop=_sample_)*/; 542 | run; 543 | 544 | proc datasets library=work nolist; 545 | delete betas _beta_ _paramdata_ _simuldata_ _inputd_ _ref_ fin finfin; 546 | quit; 547 | 548 | proc printto print = ¶mdest; 549 | run; 550 | proc print data = &x (obs = 1); 551 | title "G-formula parameter estimates"; 552 | run; 553 | proc printto; 554 | run; 555 | 556 | %put G-formula complete; 557 | %put ; 558 | 559 | 560 | %put Running ABM; 561 | %put ; 562 | 563 | 564 | /*Run ABM*/ 565 | 566 | /*ABM type 1 only for base case scenario*/ 567 | %if &scenario = 1 %then %do; 568 | 569 | 570 | %gformula( 571 | data= Simdata_all, 572 | id=id, 573 | time=tpoint, 574 | timeptype =conspl, 575 | timeknots = 1 3 6 9 12, 576 | timeinc = 1, 577 | timepoints = &tpoints, 578 | 579 | outc=Y, 580 | outctype = binsurv, 581 | outcinteract= 1*2, 582 | 583 | ncov=2, 584 | cov1 = CD, cov1otype = 1, cov1ptype = lag2bin, 585 | cov2 = A, cov2otype = 1, cov2ptype = lag2bin, 586 | 587 | seed= %eval(1234+%sysevalf(&scenario)), 588 | 589 | nsimul= , 590 | nsamples = 0, 591 | 592 | numint=2, 593 | intervname=interv, 594 | 595 | betadata= work.&x, 596 | usebetadata=1, 597 | survdata=work.survdth, 598 | savelib=work, 599 | covmeandata=covmeandth, 600 | observed_surv=obsurvdth, 601 | refint = &refint, 602 | print_stats=0, 603 | resultsdata= ABM_summary 604 | ); 605 | 606 | data results_ABM; 607 | set ABM_summary; 608 | keep int int2 pD pD_llim95 pD_ulim95 rd rd_llim95 rd_ulim95 rr rr_llim95 rr_ulim95 pD_mean pD_std intervened averinterv; 609 | 610 | run; 611 | proc datasets library=work nolist; 612 | delete ABM_summary; 613 | quit; 614 | 615 | proc printto print = ¶mdest; 616 | run; 617 | proc print data = &x (obs = 1); 618 | title "ABM1 parameter estimates"; 619 | run; 620 | proc printto; 621 | run; 622 | 623 | 624 | 625 | %end; 626 | 627 | /*ABM types 1,2, 3 for other scenarios*/ 628 | %else %if &scenario > 1 %then %do; 629 | 630 | 631 | %put Running ABM - all params current pop; 632 | %put ; 633 | 634 | %gformula( 635 | data= Simdata_all, 636 | id=id, 637 | time=tpoint, 638 | timeptype =conspl, 639 | timeknots = 1 3 6 9 12, 640 | timeinc = 1, 641 | timepoints = &tpoints, 642 | 643 | outc=Y, 644 | outctype = binsurv, 645 | outcinteract= 1*2, 646 | 647 | ncov=2, 648 | cov1 = CD, cov1otype = 1, cov1ptype = lag2bin, 649 | cov2 = A, cov2otype = 1, cov2ptype = lag2bin, 650 | 651 | seed= %eval(1234+%sysevalf(&scenario)), 652 | 653 | nsimul= , 654 | nsamples = 0, 655 | 656 | numint=2, 657 | intervname=interv, 658 | 659 | betadata= work.&x, 660 | usebetadata=1, 661 | survdata=work.survdth, 662 | savelib=work, 663 | covmeandata=covmeandth, 664 | observed_surv=obsurvdth, 665 | refint = &refint, 666 | print_stats=0, 667 | resultsdata= ABM_summary_a 668 | ); 669 | 670 | data results_ABM_a; 671 | set ABM_summary_a; 672 | keep int int2 pD pD_llim95 pD_ulim95 rd rd_llim95 rd_ulim95 rr rr_llim95 rr_ulim95 pD_mean pD_std intervened averinterv; 673 | 674 | run; 675 | proc datasets library=work nolist; 676 | delete ABM_summary_a; 677 | quit; 678 | 679 | proc printto print = ¶mdest; 680 | run; 681 | proc print data = &x (obs = 1); 682 | title "ABM1 parameter estimates"; 683 | run; 684 | proc printto; 685 | run; 686 | %put Running ABM - all params base case; 687 | %put ; 688 | 689 | %gformula( 690 | data= Simdata_all, 691 | id=id, 692 | time=tpoint, 693 | timeptype =conspl, 694 | timeknots = 1 3 6 9 12, 695 | timeinc = 1, 696 | timepoints = &tpoints, 697 | 698 | outc=Y, 699 | outctype = binsurv, 700 | outcinteract= 1*2, 701 | 702 | ncov=2, 703 | cov1 = CD, cov1otype = 1, cov1ptype = lag2bin, 704 | cov2 = A, cov2otype = 1, cov2ptype = lag2bin, 705 | 706 | seed= %eval(1234+%sysevalf(&scenario)), 707 | 708 | nsimul= , 709 | nsamples = 0, 710 | 711 | numint=2, 712 | intervname=interv, 713 | 714 | betadata= work.&&abm_paramdata, 715 | usebetadata=1, 716 | survdata=work.survdth, 717 | savelib=work, 718 | covmeandata=covmeandth, 719 | observed_surv=obsurvdth, 720 | refint = &refint, 721 | print_stats=0, 722 | resultsdata= ABM_summary_b 723 | ); 724 | 725 | data results_ABM_b; 726 | set ABM_summary_b; 727 | keep int int2 pD pD_llim95 pD_ulim95 rd rd_llim95 rd_ulim95 rr rr_llim95 rr_ulim95 pD_mean pD_std intervened averinterv; 728 | 729 | run; 730 | proc datasets library=work nolist; 731 | delete ABM_summary_b; 732 | quit; 733 | 734 | proc printto print = ¶mdest; 735 | run; 736 | proc print data = work.&&abm_paramdata (obs = 1); 737 | title "ABM2 parameter estimates"; 738 | run; 739 | proc printto; 740 | run; 741 | 742 | 743 | %put Running ABM - only past A params base case; 744 | %put ; 745 | 746 | 747 | data abm5_l_a; 748 | set work.&x; 749 | keep _sample_ bvar1_0-bvar1_8 bvar2_0 - bvar2_9 boutc0-boutc6 boutc8-boutc9; 750 | run; 751 | data abm5_y; 752 | set work.&&abm_paramdata; 753 | keep _sample_ boutc7; 754 | run; 755 | 756 | data abm5; 757 | merge abm5_l_a abm5_y; 758 | by _sample_; 759 | run; 760 | 761 | %gformula( 762 | data= Simdata_all, 763 | id=id, 764 | time=tpoint, 765 | timeptype =conspl, 766 | timeknots = 1 3 6 9 12, 767 | timeinc = 1, 768 | timepoints = &tpoints, 769 | 770 | outc=Y, 771 | outctype = binsurv, 772 | outcinteract= 1*2, 773 | 774 | ncov=2, 775 | cov1 = CD, cov1otype = 1, cov1ptype = lag2bin, 776 | cov2 = A, cov2otype = 1, cov2ptype = lag2bin, 777 | seed= %eval(1234+%sysevalf(&scenario)), 778 | 779 | nsimul= , 780 | nsamples = 0, 781 | 782 | numint=2, 783 | intervname=interv, 784 | 785 | betadata= work.abm5, 786 | usebetadata=1, 787 | survdata=work.survdth, 788 | savelib=work, 789 | covmeandata=covmeandth, 790 | observed_surv=obsurvdth, 791 | refint = &refint, 792 | print_stats=0, 793 | resultsdata= ABM_summary_e 794 | ); 795 | 796 | data results_ABM_e; 797 | set ABM_summary_e; 798 | keep int int2 pD pD_llim95 pD_ulim95 rd rd_llim95 rd_ulim95 rr rr_llim95 rr_ulim95 pD_mean pD_std intervened averinterv; 799 | 800 | run; 801 | proc datasets library=work nolist; 802 | delete ABM_summary_e; 803 | quit; 804 | 805 | proc printto print = ¶mdest; 806 | run; 807 | proc print data = work.abm5 (obs = 1); 808 | title "ABM3 parameter estimates"; 809 | run; 810 | proc printto; 811 | run; 812 | 813 | 814 | %put Running ABM - only past A params cohort of interest; 815 | %put ; 816 | 817 | data abm4_l_a; 818 | set work.&&abm_paramdata; 819 | keep _sample_ bvar1_0-bvar1_8 bvar2_0 - bvar2_9 boutc0-boutc6 boutc8-boutc9; 820 | run; 821 | 822 | data abm4_y; 823 | set work.&x; 824 | keep _sample_ boutc7; 825 | run; 826 | 827 | data abm4; 828 | merge abm4_l_a abm4_y; 829 | by _sample_; 830 | run; 831 | 832 | %gformula( 833 | data= Simdata_all, 834 | id=id, 835 | time=tpoint, 836 | timeptype =conspl, 837 | timeknots = 1 3 6 9 12, 838 | timeinc = 1, 839 | timepoints = &tpoints, 840 | 841 | outc=Y, 842 | outctype = binsurv, 843 | outcinteract= 1*2, 844 | 845 | ncov=2, 846 | cov1 = CD, cov1otype = 1, cov1ptype = lag2bin, 847 | cov2 = A, cov2otype = 1, cov2ptype = lag2bin, 848 | 849 | seed= %eval(1234+%sysevalf(&scenario)), 850 | 851 | nsimul= , 852 | nsamples = 0, 853 | 854 | numint=2, 855 | intervname=interv, 856 | 857 | betadata= work.abm4, 858 | usebetadata=1, 859 | survdata=work.survdth, 860 | savelib=work, 861 | covmeandata=covmeandth, 862 | observed_surv=obsurvdth, 863 | refint = &refint, 864 | print_stats=0, 865 | resultsdata= ABM_summary_d 866 | ); 867 | 868 | data results_ABM_d; 869 | set ABM_summary_d; 870 | keep int int2 pD pD_llim95 pD_ulim95 rd rd_llim95 rd_ulim95 rr rr_llim95 rr_ulim95 pD_mean pD_std intervened averinterv; 871 | 872 | run; 873 | proc datasets library=work nolist; 874 | delete ABM_summary_d; 875 | quit; 876 | 877 | 878 | proc printto print = ¶mdest; 879 | run; 880 | proc print data = work.abm4 (obs = 1); 881 | title "ABM4 parameter estimates"; 882 | run; 883 | proc printto; 884 | run; 885 | 886 | proc datasets library=work nolist; 887 | delete &x; 888 | quit; 889 | %end; 890 | 891 | %put ABM complete; 892 | %put ; 893 | 894 | 895 | %let rc2=%sysfunc(close(&seeds)); 896 | 897 | /*Print Final output*/ 898 | 899 | %let nametemp10 = cuminc_sim; 900 | %let nametemp11 = final_OBScuminc; 901 | %let nametemp12 = &nametemp10.&scenario; 902 | %let namefinal1 = &nametemp11.&scenario; 903 | 904 | %let nametemp21 = final_gformula; 905 | %let namefinal2 = &nametemp21.&scenario; 906 | 907 | 908 | %let nametemp21b = final_gformula_conf; 909 | %let namefinal2b = &nametemp21b.&scenario; 910 | 911 | %if &scenario = 1 %then %do; 912 | 913 | %let nametemp31 = final_abmcuminc; 914 | %let namefinal3 = &nametemp31.&scenario; 915 | 916 | %end; 917 | 918 | %else %if &scenario > 1 %then %do; 919 | 920 | %let nametemp31a = final_abm1cuminc; 921 | %let namefinal3a = &nametemp31a.&scenario; 922 | 923 | %let nametemp31b = final_abm2cuminc; 924 | %let namefinal3b = &nametemp31b.&scenario; 925 | 926 | %let nametemp31e = final_abm5cuminc; 927 | %let namefinal3e = &nametemp31e.&scenario; 928 | 929 | %let nametemp31d = final_abm4cuminc; 930 | %let namefinal3d = &nametemp31d.&scenario; 931 | 932 | %end; 933 | 934 | /*Simulated Data*/ 935 | data &nametemp12; 936 | set results_obs; 937 | if tpoint = symget('tpoints') then do; 938 | pY = (1-s_tpoint)*100; 939 | St = s_tpoint*100; 940 | end; 941 | run; 942 | 943 | proc univariate data=&nametemp12 noprint; 944 | var pY St; 945 | output out = &namefinal1 946 | mean = pY_mean St_mean; 947 | data &namefinal1; 948 | set &namefinal1; 949 | %labels2; 950 | 951 | 952 | /*G-formula*/ 953 | data &namefinal2; 954 | set results_gform; 955 | run; 956 | 957 | data &namefinal2b; 958 | set results_gform_conf; 959 | run; 960 | 961 | 962 | /*ABM*/ 963 | 964 | %if &scenario = 1 %then %do; 965 | data &namefinal3; 966 | set results_ABM ; 967 | run; 968 | 969 | %end; 970 | 971 | %else %if &scenario > 1 %then %do; 972 | 973 | data &namefinal3a; 974 | set results_ABM_a ; 975 | %labels2; 976 | run; 977 | 978 | data &namefinal3b; 979 | set results_ABM_b ; 980 | %labels2; 981 | run; 982 | 983 | data &namefinal3e; 984 | set results_ABM_e ; 985 | %labels2; 986 | run; 987 | 988 | data &namefinal3d; 989 | set results_ABM_d ; 990 | %labels2; 991 | run; 992 | %end; 993 | 994 | 995 | 996 | %let timenow2=%sysfunc(time(), time.); 997 | %let datenow2=%sysfunc(date(), date9.); 998 | %put ; 999 | %put End time is &datenow2 / &timenow2 ; 1000 | %put ; 1001 | 1002 | %mend method_comparison; 1003 | 1004 | 1005 | /******************************************************************************/ 1006 | /******************************************************************************/ 1007 | /*Macro SEEDGEN: Generate random seeds*/ 1008 | %macro seedgen(fseed=,lstream=,seeds_per_line =,nseeds=); 1009 | %if %bquote(&seeds_per_line) = %then %let seeds_per_line = 1 ; 1010 | data seeds( keep = bsample %do i = 1 %to %eval(&seeds_per_line); seed&i %end;); 1011 | retain seed &fseed; 1012 | bsample = 0 ; 1013 | do i=1 to min((&nseeds)*&lstream,2**31-1) by &lstream; 1014 | do index = 1 to %eval(&seeds_per_line); 1015 | 1016 | do j=1 to &lstream; 1017 | call ranuni(seed,x); 1018 | end; 1019 | 1020 | if i = 1 and index = 1 then do; 1021 | index = 2; 1022 | seed1 = &fseed ; 1023 | end; 1024 | 1025 | %do i = 1 %to %eval(&seeds_per_line); 1026 | if index = &i then seed&i = seed; 1027 | %end; 1028 | end; 1029 | bsample = bsample + 1; 1030 | output; 1031 | end; 1032 | run; 1033 | 1034 | proc print data = seeds; 1035 | run; 1036 | 1037 | %mend seedgen; 1038 | 1039 | 1040 | /******************************************************************************/ 1041 | /******************************************************************************/ 1042 | /*Macro LABELS2: format output for simulation and agent-based model*/ 1043 | %macro labels2; 1044 | label int = 'Intervention'; 1045 | label int2 = 'Description'; 1046 | 1047 | label tpoint ='Time'; 1048 | 1049 | label p_Y = 'Risk (%)'; 1050 | label pY_mean = 'Risk (%)'; 1051 | label pY_std = 'Risk (%), SD'; 1052 | label pY_llim95 ='Lower limit 95% CI'; 1053 | label pY_ulim95 ='Upper limit 95% CI'; 1054 | 1055 | label rd = 'Risk Difference (%)'; 1056 | label rd_mean = 'Risk Difference (%)'; 1057 | label rd_std = 'Risk Difference (%), SD'; 1058 | label rd_ulim95 = 'Upper limit 95% CI'; 1059 | label rd_llim95 = 'Lower limit 95% CI'; 1060 | %mend labels2; 1061 | 1062 | 1063 | /******************************************************************************/ 1064 | /******************************************************************************/ 1065 | /*Macro PRINTRESULTS: formats and prints results from specified number of scenarios*/ 1066 | %macro printresults( 1067 | scenarios = , basecase = , outputdest= 1068 | ); 1069 | %let varSim = pY_mean; 1070 | %let varGform = int int2 pd pd_llim95 pd_ulim95 rd RD_llim95 RD_ulim95 rr RR_llim95 RR_ulim95 intervened averinterv; 1071 | %let varABM = int int2 pd rd rr intervened averinterv ; 1072 | 1073 | %do i= 1 %to &scenarios; 1074 | %let scenario = &i; 1075 | 1076 | %if &i =2 %then %do; 1077 | %let namesuff = High-risk; 1078 | %end; 1079 | %else %if &i =3 %then %do; 1080 | %let namesuff = Low-risk; 1081 | %end; 1082 | 1083 | %let nametemp11 = final_OBScuminc; 1084 | %let namefinal1 = &nametemp11.&scenario; 1085 | 1086 | %let nametemp21 = final_gformula; 1087 | %let namefinal2 = &nametemp21.&scenario; 1088 | 1089 | %let nametemp21b = final_gformula_conf; 1090 | %let namefinal2b = &nametemp21b.&scenario; 1091 | 1092 | 1093 | %if &scenario = 1 %then %do; 1094 | 1095 | %let nametemp31 = final_abmcuminc; 1096 | %let namefinal3 = &nametemp31.&scenario; 1097 | 1098 | %end; 1099 | 1100 | %else %if &scenario > 1 %then %do; 1101 | 1102 | %let nametemp31a = final_abm1cuminc; 1103 | %let namefinal3a = &nametemp31a.&scenario; 1104 | 1105 | %let nametemp31b = final_abm2cuminc; 1106 | %let namefinal3b = &nametemp31b.&scenario; 1107 | 1108 | %let nametemp31e = final_abm5cuminc; 1109 | %let namefinal3e = &nametemp31e.&scenario; 1110 | 1111 | %let nametemp31d = final_abm4cuminc; 1112 | %let namefinal3d = &nametemp31d.&scenario; 1113 | 1114 | %end; 1115 | 1116 | 1117 | %if &basecase = &scenario %then %do; 1118 | proc printto print = &outputdest; 1119 | run; 1120 | proc print data=&namefinal1 label; 1121 | var &varSIm; 1122 | title "Summary of Simulated Data: Basecase, Scenario &scenario"; 1123 | run; 1124 | proc printto print = &outputdest; 1125 | run; 1126 | proc print data= &namefinal2 label; 1127 | var &varGform; 1128 | title "Summary of G-formula Results: Basecase, Scenario &scenario"; 1129 | run; 1130 | proc printto print = &outputdest; 1131 | run; 1132 | proc print data= &namefinal2b label; 1133 | var &varGform; 1134 | title "Summary of G-formula Results, without confounding adjustment: Basecase, Scenario &scenario"; 1135 | run; 1136 | proc printto print = &outputdest; 1137 | run; 1138 | proc print data=&namefinal3 label; 1139 | var &varABM; 1140 | title "Summary of ABM type 1 Results: Basecase, Scenario &scenario"; 1141 | title2 "All parameters from cohort of interest"; 1142 | run; 1143 | proc printto print = &outputdest; 1144 | run; 1145 | %end; 1146 | 1147 | %else %if &basecase ne &scenario %then %do; 1148 | proc printto print = &outputdest; 1149 | run; 1150 | proc print data=&namefinal1 label; 1151 | var &varSim; 1152 | title "Summary of Simulated Data: &namesuff population, Scenario &scenario "; 1153 | run; 1154 | proc printto print = &outputdest; 1155 | run; 1156 | proc print data= &namefinal2 label; 1157 | var &varGform; 1158 | title "Summary of G-formula Results: &namesuff population, Scenario &scenario"; 1159 | run; 1160 | proc printto print = &outputdest; 1161 | run; 1162 | proc print data= &namefinal2b label; 1163 | var &varGform; 1164 | title "Summary of G-formula Results, without confounding adjustment: Basecase, Scenario &scenario"; 1165 | run; 1166 | proc printto print = &outputdest; 1167 | run; 1168 | proc print data=&namefinal3a label; 1169 | var &varABM; 1170 | title "Summary of ABM TYPE 1 Results: &namesuff population, Scenario &scenario"; 1171 | title2 "All parameters from cohort of interest"; 1172 | run; 1173 | proc print data=&namefinal3b label; 1174 | var &varABM; 1175 | title "Summary of ABM TYPE 2 Results: &namesuff population, Scenario &scenario"; 1176 | title2 "All parameters from base-case cohort"; 1177 | run; 1178 | proc print data=&namefinal3e label; 1179 | var &varABM; 1180 | title "Summary of ABM TYPE 3 Results: &namesuff population, Scenario &scenario"; 1181 | title2 "All past A parameters from base-case cohort"; 1182 | run; 1183 | 1184 | proc print data=&namefinal3d label; 1185 | var &varABM; 1186 | title "Summary of ABM TYPE 4 Results: &namesuff population, Scenario &scenario"; 1187 | title2 "All past A parameters from cohort of interest"; 1188 | run; 1189 | %end; 1190 | %end; 1191 | %mend printresults; 1192 | 1193 | 1194 | /************************************************/ 1195 | /*Run Method Comparison under range of scenarios*/ 1196 | /***********************************************/ 1197 | 1198 | 1199 | %macro final_results(outputdest="output.rtf", logdest="log.rtf", paramdest = "params.rtf", subjects = 0, nsamples =0 , 1200 | psi1 = 0, int1lam = 0, int2lam=0, int3lam =0, 1201 | int1U =1.10, g2 = 0, g3 = 0, g4 = 0, g5 =0 , g6 = 0, kappa =0 , tau = 0, 1202 | a0 =0 , a1 =0 , a2 =0 , a3 = 0, cut = 0, contU =0, pCD0 = ); 1203 | 1204 | proc printto log=&logdest; 1205 | run; 1206 | 1207 | %let timenow=%sysfunc(time(), time.); 1208 | %let datenow=%sysfunc(date(), date9.); 1209 | %put Initiating Program; 1210 | %put Start time is &datenow / &timenow ; 1211 | %put ; 1212 | 1213 | /*Specify macro variables for use in all scenarios*/ 1214 | %let scenarios = 3; 1215 | %let refint = 2; 1216 | %let tpoints = 12; 1217 | 1218 | %createparams( int1U = &int1U, int1lam = &int1lam, int2lam=&int2lam, int3lam = &int3lam, scen = &scenarios, kappa = &kappa, tau = &tau, 1219 | g2 = &g2, g3 = &g3, g4 = &g4, g5 =&g5, g6 = &g6, a0 = &a0, a1 =&a1, a2 =&a2 , a3 = &a3); 1220 | 1221 | %let simulinp = %sysfunc(open(work.simulinputs)); 1222 | %let NObs3 = %sysfunc(attrn(&simulinp,NOBS)); 1223 | %syscall set(simulinp); 1224 | 1225 | %do scen = 1 %to &scenarios; 1226 | %put &scen; 1227 | %let set = %sysfunc(fetchobs(&simulinp,&scen)); 1228 | %let sgamma0 = %sysevalf(&g0); 1229 | %let sgamma1 = %sysevalf(&g1); /*U->L*/ 1230 | %let sgamma2 = %sysevalf(&g2); 1231 | %let sgamma3 = %sysevalf(&g3); 1232 | %let sgamma4 = %sysevalf(&g4); 1233 | %let sgamma5 = %sysevalf(&g5); 1234 | %let sgamma6 = %sysevalf(&g6); 1235 | %let kappa = %sysevalf(&kappa); 1236 | %let tau = %sysevalf(&tau); 1237 | 1238 | %let salpha0 = %sysevalf(&a0); 1239 | %let salpha1 = %sysevalf(&a1); 1240 | %let salpha2 = %sysevalf(&a2); 1241 | %let salpha3 = %sysevalf(&a3); 1242 | 1243 | %let lam = %sysevalf(&lambda); /*prev U*/ 1244 | 1245 | %let abm_paramdata = Betas_1; 1246 | 1247 | %method_comparison( 1248 | lam = &lam, cut= &cut, /*parameters for prevalence of U in simulated data*/ 1249 | contU = &contU, /*switch for binary or continuous U in simulated data*/ 1250 | psi1 = &psi1, 1251 | abm_paramdata = &abm_paramdata, 1252 | scenario = &scen, /*scenario number for record keeping*/ 1253 | 1254 | tpoints = &tpoints, 1255 | subjects = &subjects, 1256 | subjectsSim = &subjects, 1257 | nsamples = &nsamples, 1258 | 1259 | sgamma0 = &sgamma0, sgamma1 = &sgamma1, sgamma2 = &sgamma2, 1260 | sgamma3 =&sgamma3, sgamma4 = &sgamma4, sgamma5 =&sgamma5, sgamma6 = &sgamma6, 1261 | kappa = &kappa, tau= &tau, 1262 | 1263 | salpha0 = &salpha0, salpha1 = &salpha1 , salpha2 = &salpha2, salpha3 = &salpha3, 1264 | 1265 | refint = &refint, 1266 | paramdest = ¶mdest, 1267 | pCD0 = &pCD0 1268 | ); 1269 | 1270 | 1271 | proc printto log=&logdest; 1272 | run; 1273 | %put Scenario &scen complete; 1274 | %put %sysfunc(time(), time.); 1275 | proc printto log=&logdest; 1276 | run; 1277 | 1278 | 1279 | %end; 1280 | 1281 | %let set2=%sysfunc(close(&simulinp)); 1282 | 1283 | %printresults(basecase = 1, scenarios = &scenarios, outputdest=&outputdest); 1284 | 1285 | 1286 | proc printto log=&logdest; 1287 | run; 1288 | 1289 | %let timenow2=%sysfunc(time(), time.); 1290 | %let datenow2=%sysfunc(date(), date9.); 1291 | %put Program Complete; 1292 | %put End time is &datenow2 / &timenow2 ; 1293 | %put ; 1294 | 1295 | proc printto; 1296 | run; 1297 | 1298 | %mend final_results; 1299 | 1300 | 1301 | 1302 | 1303 | 1304 | /**********************************************************************************/ 1305 | /****Input parameters for creating simulated population to dataset*/ 1306 | 1307 | %macro createparams(int1U = , int2U = , int3U = , int1lam = , int2lam=, int3lam =, scen = , 1308 | g2 = 4.00, g3 = 2.50, g4 = 0.1, g5 =0.1, g6 = 1, a0 = 0.33, a1 = 5.0, a2 =3.00 , a3 = 1.50, kappa = , tau = ); 1309 | data simulinputs; 1310 | do i = 1 to &scen; 1311 | /*constant parameters*/ 1312 | pvalue = cdf('normal', ((200-588)/300), 0, 1); 1313 | g0 = pvalue; /*sgamma0 - logit prob of CD4<200*: p(CD4<200) = 0.09 - Althoff 2007 (CEPAC ref) */ 1314 | 1315 | g1 = &int1U;/*sgamma1 - Effect of U on L*/ 1316 | 1317 | /*constant parameters*/ 1318 | g2 = &g2; /*sgamma2 - log RR low CD4 given low CD4 last month*/ 1319 | g3 = &g3; /*sgamma3 - log RR low CD4 given low CD4 2 months ago*/ 1320 | g4 = &g4; /*sgamma4 - log RR low CD4 given on treatment last month*/ 1321 | g5 = &g5; /*sgamma5 - log RR low CD4 given on treatment 2 months ago*/ 1322 | g6 = &g6; 1323 | kappa = κ 1324 | tau = τ 1325 | 1326 | a0 = log(&a0/(1-&a0)); /*salpha0 - logit prob of on treatment*/ 1327 | a1 = &a1; /*salpha1 - log RR for on treatment when CD4 <200 in current month*/ 1328 | a2 = &a2; /*salpha2 - log RR for on treatment when CD4 <200 in last month*/ 1329 | a3 = &a3; /*salpha3 - log RR for on treatment when CD4 <200 in 2 months ago*/ 1330 | 1331 | /*scenario dependent parameter: lambda*/ 1332 | if i = 1 then 1333 | lambda = &int1lam; /*Prevalence of U*/ 1334 | else if i = 2 then 1335 | lambda = &int2lam; 1336 | else if i =3 then 1337 | lambda = &int3lam; 1338 | output; 1339 | end; 1340 | run; 1341 | 1342 | proc printto print = &outputdest; 1343 | run; 1344 | 1345 | proc print data=simulinputs; 1346 | run; 1347 | 1348 | proc printto; 1349 | run; 1350 | 1351 | %mend createparams; 1352 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ABMs-for-causal-inference 2 | The respository 'ABMs for causal inference' archives the SAS code for comparing agent-based models and the parametric g-formula, published in the American Journal of Epidemiology in 2017 [1]. The code is also included as an appendix to the American Journal of Epidemiology publication. 3 | 4 | This repository includes code for simulating data with time-varying treatment-confounder feedback, and estimating the effect of treatment using an agent-based model and the parametric g-formula. This code demonstrates the potential for biases when using agent-based models for causal inference. SAS 9.4 was used for all analyses. 5 | 6 | The code appendix contains the following programs: 7 | 1. RunMacros.sas 8 | This program takes as input the parameters used to simulate the dataset for analyses, the number of subjects in the desired simulated cohort, and the number of bootstrap samples to run, and feeds this information into the macros which perform the analyses. 9 | 2. Macros.sas 10 | This program contains the macros required to execute the simulation, except the previously published g-formula macro[2] (version: June-2015), restricted cubic spline macro[3,4], and SAS %lst macro[5]. 11 | 12 | References: 13 | 14 | [1] Murray EJ, Robins JM, George R. Seage III, Freedberg KA, Hernan MA. A Comparison of Agent-Based Models and the Parametric G-Formula for Causal Inference. American Journal of Epidemiology. 2017;186(2):131-42 [http://bit.ly/2tKYt75]. 15 | 16 | [2] Taubman SL, Young JG, Picciotto S, Logan R, Hernán MA. G-formula SAS macro 2012. Version June-2015. Available from: http://www.hsph.harvard.edu/causal/software/. 17 | 18 | [3] Harrell FE. %rcspline macro. Clinical Biostatistics Duke University Medical Center, 1988. 19 | 20 | [4] Devlin TF, Weeks BJ, editors. Spline functions for logistic regression modeling. Proceedings of the Eleventh Annual SAS Users Group International; 1986 February 9-12; Atlanta, Georgia: Cary NC: SAS Institute. 21 | 22 | [5] Sample 44124: Counting the number of missing and non-missing values for each variable in a data set: SAS Institute Inc; [11/30/2016]. Available from: http://support.sas.com/kb/44/124.html. 23 | 24 | 25 | -------------------------------------------------------------------------------- /RunMacros.sas: -------------------------------------------------------------------------------- 1 | 2 | libname abm '/ABM'; 3 | 4 | %include 'macros.sas'; 5 | %include '/gformula/June-2015/gformula.sas'; 6 | %include 'rcspline.sas'; 7 | %include 'lst.sas'; 8 | 9 | options minoperator mindelimiter=','; 10 | options nonotes nocenter ; 11 | 12 | 13 | 14 | /**Main analyses**/ 15 | 16 | %let start = 1; 17 | %let numruns = 0; 18 | 19 | %let name =null.; 20 | %let psi1 = 0; 21 | 22 | %let nameA = out.rtf; 23 | %let nameB = log.rtf; 24 | %let nameC = params.rtf; 25 | 26 | %let logdest = &name.&nameB; 27 | %let paramdest = &name.&nameC; 28 | 29 | data simparams_test; 30 | int1U =25; 31 | g2 = 0; 32 | g3 = 0; 33 | g4 = 0.675; 34 | g5 = 0; 35 | g6 = 0; 36 | a0 = 0.1; 37 | a1 = 0.3; 38 | a2 = 0.25; 39 | a3 = 0.10; 40 | kappa = -11.2; 41 | tau =0.38 ; 42 | pCD0 = 0.20; 43 | run; 44 | 45 | proc print data = simparams_test; 46 | run; 47 | 48 | 49 | %let newsim = %sysfunc(open(simparams_test)); 50 | %let NObs3 = %sysfunc(attrn(&newsim,NOBS)); 51 | %syscall set(newsim); 52 | 53 | %macro factorial(startrun = &start, num = &numruns, name1 = , psi1 = , nsamples = 500); 54 | 55 | %let endrun = %sysevalf(&startrun + &num); 56 | 57 | %do blockrun = &startrun %to &endrun; 58 | 59 | %let set99 = %sysfunc(fetchobs(&newsim,&blockrun)); 60 | 61 | %let int1U_new = %sysevalf(&int1U); 62 | %let g4_new = %sysevalf(&g4); 63 | %let kappa_new = %sysevalf(&kappa); 64 | %let tau_new = %sysevalf(&tau); 65 | %let a0_new = %sysevalf(&a0); 66 | %let a1_new = %sysevalf(&a1); 67 | %let a2_new = %sysevalf(&a2); 68 | %let a3_new = %sysevalf(&a3); 69 | 70 | %let pCD0_new = %sysevalf(&pCD0); 71 | 72 | /*%let name1 = Benef;*/ 73 | %let name2 = &name1.&blockrun; 74 | %let outputdest = &name2.&nameA; 75 | 76 | %final_results(outputdest="&outputdest", logdest= "&logdest", paramdest = "¶mdest", 77 | subjects =100000, nsamples = &nsamples, psi1 = &psi1, int1lam = 0.010 , int2lam=0.1, int3lam = 0.001, 78 | int1U = &int1U_new, g4 = &g4_new, kappa = &kappa_new, tau = &tau_new, 79 | a0 = &a0_new, a1 = &a1_new, a2 = &a2_new, a3 = &a3_new, contU = 1, pCD0 = &pCD0_new); 80 | 81 | %end; 82 | 83 | %let set299=%sysfunc(close(&newsim)); 84 | %mend; 85 | 86 | %factorial(startrun = &start, num = &numruns, name1 = &name, psi1 = &psi1); 87 | -------------------------------------------------------------------------------- /license.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Eleanor (Ellie) Murray 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | --------------------------------------------------------------------------------