├── SEERaBomb
    ├── .gitignore
    ├── .Rbuildignore
    ├── src
    │   ├── .gitignore
    │   ├── SEERaBomb_init.c
    │   ├── RcppExports.cpp
    │   └── fillPYM.cpp
    ├── data
    │   ├── nvsr.rda
    │   ├── nvsr01.rda
    │   ├── stdUS.rda
    │   └── Sigurdson.rda
    ├── inst
    │   └── docs
    │   │   ├── overview.pdf
    │   │   ├── gettingData.pdf
    │   │   ├── papers
    │   │       ├── CMLmysteries
    │   │       │   ├── readme.docx
    │   │       │   ├── common.R
    │   │       │   └── thyr2CMLfig1d.R
    │   │       ├── uveal
    │   │       │   ├── seerEAR
    │   │       │   │   ├── data
    │   │       │   │   │   └── D1.RData
    │   │       │   │   ├── cmpRisk.R
    │   │       │   │   └── fig2Ealone.R
    │   │       │   └── attic
    │   │       │   │   ├── common
    │   │       │   │       ├── UVdemog.R
    │   │       │   │       ├── setup.R
    │   │       │   │       └── UVallFields.R
    │   │       │   │   ├── seerEARmort
    │   │       │   │       ├── figure1.R
    │   │       │   │       └── survByAgeG.R
    │   │       │   │   └── other
    │   │       │   │       ├── UVcsdAllSecs.R
    │   │       │   │       ├── uvealOverview.R
    │   │       │   │       ├── EyeTables.R
    │   │       │   │       └── UVcsd.R
    │   │       ├── renal
    │   │       │   ├── incidNdemog
    │   │       │   │   ├── fieldNamesExcel.R
    │   │       │   │   ├── demog.R
    │   │       │   │   ├── buildWithPrimsite.R
    │   │       │   │   ├── allFields.R
    │   │       │   │   └── csd.R
    │   │       │   ├── common
    │   │       │   │   ├── load.R
    │   │       │   │   └── acros.R
    │   │       │   └── surv
    │   │       │   │   └── surv.R
    │   │       ├── tutorial
    │   │       │   ├── tMDSrepair.R
    │   │       │   ├── incidEx2.R
    │   │       │   ├── install.R
    │   │       │   ├── msdEx1.R
    │   │       │   ├── attic
    │   │       │   │   ├── cmlMortEx1.R
    │   │       │   │   ├── incidCML.R
    │   │       │   │   ├── abombLeu.R
    │   │       │   │   ├── breast2leu.R
    │   │       │   │   ├── brain2leu.R
    │   │       │   │   ├── cmlMort.R
    │   │       │   │   ├── seerLeu.R
    │   │       │   │   └── csdDemo.R
    │   │       │   ├── abombEx5.R
    │   │       │   ├── common.R
    │   │       │   ├── ageTherapyEx4.R
    │   │       │   └── csdEx3.R
    │   │       ├── PMF
    │   │       │   ├── demogCheese.R
    │   │       │   ├── demog.R
    │   │       │   ├── README.md
    │   │       │   ├── mkAllFields.R
    │   │       │   └── mkMaxRes.R
    │   │       └── attic
    │   │       │   ├── blood2012
    │   │       │       ├── blood2012fig5bc.R
    │   │       │       ├── blood2012fig4.R
    │   │       │       └── blood2012fig6.R
    │   │       │   ├── leukemiaRes2016
    │   │       │       ├── allTimeRRtab1.R
    │   │       │       ├── prosBreastTimeCrs.R
    │   │       │       └── timeCrs.R
    │   │       │   ├── AAPSJ2016
    │   │       │       └── CMLmortRRtimeCrs.R
    │   │       │   ├── leukemia2016
    │   │       │       ├── interactions2D.R
    │   │       │       ├── amlAbombAge.R
    │   │       │       ├── lymphoidFirsts.R
    │   │       │       ├── mkExcelDemo.R
    │   │       │       ├── PYnMDSvsDBs.R
    │   │       │       └── incidAllFirsts.R
    │   │       │   └── REB2014
    │   │       │       └── baseGraphics
    │   │       │           ├── CMLagexDev.R
    │   │       │           ├── racePvalues.R
    │   │       │           ├── CMLwait.R
    │   │       │           └── CMLwaitCI.R
    │   │   ├── extraFuncs
    │   │       ├── mkPYM.R
    │   │       ├── mkPYMR.R
    │   │       └── mkPYM.c
    │   │   ├── index.html
    │   │   ├── examples
    │   │       ├── mk2Dsplines.R
    │   │       ├── mort.R
    │   │       └── attic
    │   │       │   ├── mkSEERcrt.R
    │   │       │   └── lungSurv.R
    │   │   └── changes.txt
    ├── man
    │   ├── figures
    │   │   └── mkSEERfig.png
    │   ├── ICD10cod.Rd
    │   ├── mapCancsW.Rd
    │   ├── mapCancsP.Rd
    │   ├── incidSEER.Rd
    │   ├── foldDF.Rd
    │   ├── mapCOD2.Rd
    │   ├── mapCODs.Rd
    │   ├── mapCancs.Rd
    │   ├── getBinInfo.Rd
    │   ├── mapTrts.Rd
    │   ├── mkLT.Rd
    │   ├── rad_noRad.Rd
    │   ├── riskVsAge.Rd
    │   ├── incidAbomb.Rd
    │   ├── nvsr01.Rd
    │   ├── Sigurdson.Rd
    │   ├── seerStats.Rd
    │   ├── simSurv.Rd
    │   ├── mkAbomb.Rd
    │   ├── post1PYOm.Rd
    │   ├── getFields.Rd
    │   ├── mapRegs.Rd
    │   ├── nvsr.Rd
    │   ├── mkMrt.Rd
    │   ├── getPY.Rd
    │   ├── foldD.Rd
    │   ├── stdUS.Rd
    │   ├── plot2D.Rd
    │   ├── print.seerSet.summary.Rd
    │   ├── getE.Rd
    │   ├── mkDemographics.Rd
    │   ├── p2s.Rd
    │   ├── esd.Rd
    │   ├── fillPYM.Rd
    │   ├── mkMrtLocal.Rd
    │   ├── mk2D.Rd
    │   ├── summary.seerSet.Rd
    │   ├── mkDF.Rd
    │   ├── msd.Rd
    │   ├── mkSEERold.Rd
    │   ├── post1PYO.Rd
    │   ├── getDF.Rd
    │   ├── mkExcelTsd.Rd
    │   ├── mkExcelCsd.Rd
    │   ├── SEERaBomb-package.Rd
    │   ├── tsd.Rd
    │   ├── seerSet2.Rd
    │   ├── seerSet.Rd
    │   └── mkSEER.Rd
    ├── R
    │   ├── RcppExports.R
    │   ├── getPY.R
    │   ├── foldDF.R
    │   ├── getBinInfo.R
    │   ├── mkMrt.R
    │   ├── foldD.R
    │   ├── rad_noRad.R
    │   ├── print.seerSet.summary.R
    │   ├── mapRegs.R
    │   ├── p2s.R
    │   ├── post1PYOm.R
    │   ├── mapTrts.R
    │   ├── incidAbomb.R
    │   ├── mkLT.R
    │   ├── esd.R
    │   ├── mkMrtLocal.R
    │   ├── incidSEER.R
    │   ├── simSurv.R
    │   ├── seerStats.R
    │   ├── summary.seerSet.R
    │   ├── seerSet2.R
    │   ├── seerSet.R
    │   └── canc2py.R
    ├── DESCRIPTION
    └── NAMESPACE
├── .gitignore
├── docs
    ├── mkSEER.pptx
    ├── mkSEER2.pptx
    ├── tutFig1.pdf
    ├── tutFig1.png
    ├── tutFig2.pdf
    ├── tutFig2.png
    ├── tutFig3.pdf
    ├── tutFig3.png
    ├── tutFig4.pdf
    ├── tutFig4.png
    ├── tutFig5.pdf
    ├── tutFig5.png
    ├── tutFig6.pdf
    ├── tutFig6.png
    ├── mkSEER2v2.pptx
    ├── mkSEERfig.pptx
    ├── pmfFigure1.png
    ├── pmfTable1.png
    ├── gettingData.docx
    ├── mkSEERfigOld.pptx
    └── knitOverview.R
├── youtube
    └── meetupJune5.pptx
└── SEERaBomb.Rproj


/SEERaBomb/.gitignore:
--------------------------------------------------------------------------------
1 | src-i386/
2 | src-x64/
3 | 


--------------------------------------------------------------------------------
/SEERaBomb/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .DS_Store
5 | 


--------------------------------------------------------------------------------
/SEERaBomb/src/.gitignore:
--------------------------------------------------------------------------------
1 | SEERaBomb.so
2 | SEERaBomb.dll
3 | *.[oa]
4 | 


--------------------------------------------------------------------------------
/docs/mkSEER.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/mkSEER.pptx


--------------------------------------------------------------------------------
/docs/mkSEER2.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/mkSEER2.pptx


--------------------------------------------------------------------------------
/docs/tutFig1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/tutFig1.pdf


--------------------------------------------------------------------------------
/docs/tutFig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/tutFig1.png


--------------------------------------------------------------------------------
/docs/tutFig2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/tutFig2.pdf


--------------------------------------------------------------------------------
/docs/tutFig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/tutFig2.png


--------------------------------------------------------------------------------
/docs/tutFig3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/tutFig3.pdf


--------------------------------------------------------------------------------
/docs/tutFig3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/tutFig3.png


--------------------------------------------------------------------------------
/docs/tutFig4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/tutFig4.pdf


--------------------------------------------------------------------------------
/docs/tutFig4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/tutFig4.png


--------------------------------------------------------------------------------
/docs/tutFig5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/tutFig5.pdf


--------------------------------------------------------------------------------
/docs/tutFig5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/tutFig5.png


--------------------------------------------------------------------------------
/docs/tutFig6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/tutFig6.pdf


--------------------------------------------------------------------------------
/docs/tutFig6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/tutFig6.png


--------------------------------------------------------------------------------
/docs/mkSEER2v2.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/mkSEER2v2.pptx


--------------------------------------------------------------------------------
/docs/mkSEERfig.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/mkSEERfig.pptx


--------------------------------------------------------------------------------
/docs/pmfFigure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/pmfFigure1.png


--------------------------------------------------------------------------------
/docs/pmfTable1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/pmfTable1.png


--------------------------------------------------------------------------------
/docs/gettingData.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/gettingData.docx


--------------------------------------------------------------------------------
/docs/mkSEERfigOld.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/docs/mkSEERfigOld.pptx


--------------------------------------------------------------------------------
/SEERaBomb/data/nvsr.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/SEERaBomb/data/nvsr.rda


--------------------------------------------------------------------------------
/SEERaBomb/data/nvsr01.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/SEERaBomb/data/nvsr01.rda


--------------------------------------------------------------------------------
/SEERaBomb/data/stdUS.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/SEERaBomb/data/stdUS.rda


--------------------------------------------------------------------------------
/youtube/meetupJune5.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/youtube/meetupJune5.pptx


--------------------------------------------------------------------------------
/SEERaBomb/data/Sigurdson.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/SEERaBomb/data/Sigurdson.rda


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/overview.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/SEERaBomb/inst/docs/overview.pdf


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/gettingData.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/SEERaBomb/inst/docs/gettingData.pdf


--------------------------------------------------------------------------------
/SEERaBomb/man/figures/mkSEERfig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/SEERaBomb/man/figures/mkSEERfig.png


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/CMLmysteries/readme.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/SEERaBomb/inst/docs/papers/CMLmysteries/readme.docx


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/uveal/seerEAR/data/D1.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radivot/SEERaBomb/HEAD/SEERaBomb/inst/docs/papers/uveal/seerEAR/data/D1.RData


--------------------------------------------------------------------------------
/SEERaBomb/R/RcppExports.R:
--------------------------------------------------------------------------------
1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 | 
4 | fillPYM <- function(PYin, PYM) {
5 |     .Call(`_SEERaBomb_fillPYM`, PYin, PYM)
6 | }
7 | 
8 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/getPY.R:
--------------------------------------------------------------------------------
1 | getPY=function(surv,bin,binS,brks) {
2 |   binLengths=diff(brks)
3 |   LL=getBinInfo(bin,binS)["LL"]
4 |   UL=getBinInfo(bin,binS)["UL"]
5 |   indx=getBinInfo(bin,binS)["index"]
6 |   ifelse(surv>=UL,binLengths[indx],ifelse(surv>LL,surv-LL,0))
7 | }
8 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/renal/incidNdemog/fieldNamesExcel.R:
--------------------------------------------------------------------------------
1 | rm(list=ls())
2 | require(dplyr)
3 | library(SEERaBomb)
4 | (df=getFields("~/data/SEER")) # this doesn't fly in windows since ~ maps to /users/radivot/documents
5 | library(WriteXLS)
6 | WriteXLS(df,ExcelFileName="renal/outs/fieldDefs2019.xlsx")
7 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/extraFuncs/mkPYM.R:
--------------------------------------------------------------------------------
 1 | mkPYM <- function(PY,nyears) {
 2 |     PYM=.Call('mkPYM', PACKAGE = 'SEERaBomb', PY[,1], PY[,2], PY[,3],nyears)
 3 | #     print(nyears)
 4 |     dim(PYM)=c(126,nyears)
 5 |     colnames(PYM)=1973:(1973+nyears-1)
 6 |     rownames(PYM)=0.5:125.5
 7 |     PYM
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/foldDF.R:
--------------------------------------------------------------------------------
1 | foldDF=function(DF,keep=c("cancer1","cancer2","int","trt")) { #sum over sex
2 |   O=E=RR=all_of=NULL
3 |   DF=DF%>%group_by_at(vars(all_of(keep)))%>%summarize(O=sum(O),E=sum(E),t=mean(t))
4 |   DF%>%mutate(RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E))%>%filter(E>0)%>%ungroup 
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/getBinInfo.R:
--------------------------------------------------------------------------------
1 | getBinInfo=function(binLab,binS) {
2 |   w=regexpr("[0-9]+[\\.]?[0-9]*", binLab,perl=T)  
3 |   LL=as.numeric(substring(binLab, w, w+attr(w, "match.length")-1))
4 |   rest=substring(binLab,w+attr(w, "match.length"))
5 |   w=regexpr("[0-9]+[\\.]?[0-9]*", rest,perl=T)  
6 |   UL=as.numeric(substring(rest, w, w+attr(w, "match.length")-1))
7 |   c(LL=LL,UL=UL,index=which(binLab==binS))
8 | }
9 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/mkMrt.R:
--------------------------------------------------------------------------------
1 | mkMrt<-function(username,passwd,country="USA",mrtHome="~/data/usMort"){
2 |   mrtHome=path.expand(mrtHome)
3 |   if(!dir.exists(mrtHome)) dir.create(mrtHome,recursive=T)
4 |   mrt=demography::hmd.mx(country, username, passwd)$rate 
5 |   names(mrt)=c("Female","Male","Total")
6 |   save(mrt,file=f<-file.path(mrtHome,"mrt.RData"))
7 |   cat("The dataframe mrt has been written to:",f,"\n")
8 | }
9 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/tMDSrepair.R:
--------------------------------------------------------------------------------
1 | ###tMDSrepair.R
2 | (sc=canc%>%filter(histo3==9920|histo3==9987,yrdx>2000))#subset canc
3 | (d=sc%>%count(sex,yrdx,histo3))#n in d holds sex-year-ICDO3 group row counts 
4 | d%>%ggplot(aes(x=yrdx,y=n))+facet_grid(sex~histo3)+ylab("Cases")+gp+
5 |  xlab("Year of Diagnosis")+scale_x_continuous(breaks=seq(2005,2015,5))+tc(11)+sbb
6 | ggsave("~/Results/tutorial/tMDSrepaired.pdf",width=3,height=2.5) 
7 | 


--------------------------------------------------------------------------------
/SEERaBomb.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: No
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackagePath: SEERaBomb
18 | PackageInstallArgs: --no-multiarch --with-keep.source
19 | PackageBuildBinaryArgs: '--no-lock'
20 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/foldD.R:
--------------------------------------------------------------------------------
1 | # utils::globalVariables(c("O","E","EAR","PY","all_of")) # instead of NULLs inside the function
2 | foldD=function(D,keep=c("int")) {
3 |   O=E=EAR=PY=all_of=NULL
4 |   D=D%>%group_by_at(vars(all_of(keep)))%>%summarize(O=sum(O),E=sum(E),PY=sum(PY),t=mean(t))
5 |   D%>%mutate(EAR=(O-E)/PY,LL=EAR-1.96*sqrt(O)/PY,UL=EAR+1.96*sqrt(O)/PY,
6 |              RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E))%>%filter(E>0)%>%ungroup 
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/renal/incidNdemog/demog.R:
--------------------------------------------------------------------------------
1 | graphics.off();rm(list=ls())#clear plots and environment
2 | # library(tidyverse);library(SEERaBomb)
3 | library(SEERaBomb)
4 | load("renal/data/d.RData") #see load.R in common to make this
5 | load("~/data/SEER/mrgd/popsae.RData")#load SEER population data
6 | d$age=cut(d$agedx,c(0,40,60,80,126),include.lowest = T)
7 | d$year=cut(d$yrdx,c(1975,1995,2016),include.lowest = T,dig.lab=4)
8 | mkDemographics(d,outDir="renal/outs")
9 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/rad_noRad.R:
--------------------------------------------------------------------------------
 1 | rad_noRad=function(DF) {# collapse chemo in trt and remove unknown rads
 2 |   trt=NULL
 3 |   DF$trt=fct_collapse(DF$trt, rad = c("rad.chemo", "rad.noChemo"),
 4 |                       noRad = c("noRad.chemo", "noRad.noChemo"),
 5 |                       unk = c("unk.chemo", "unk.noChemo") )
 6 |   DF%>%filter(trt!="unk")
 7 |   # df=as.data.table(DF) # namespace warning hassle, so skip data.table for now
 8 |   # df=df[trt!="unk"]
 9 |   # as_tibble(df)
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/renal/incidNdemog/buildWithPrimsite.R:
--------------------------------------------------------------------------------
 1 | # ############ this builds the SEER data to include primSite
 2 | library(SEERaBomb)
 3 | (df=getFields("~/data/SEER")) # this doesn't fly in windows since ~ maps to /users/radivot/documents
 4 | picks=c("casenum","reg","race","sex","agedx",
 5 |         "yrbrth","seqnum","modx","yrdx","histo3",
 6 |         "ICD9","primsite","COD","surv","radiatn","chemo")
 7 | (rdf=pickFields(df,picks))
 8 | mkSEER(rdf,outFile="cancPrim",writePops=F)
 9 | 
10 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/ICD10cod.Rd:
--------------------------------------------------------------------------------
 1 | \name{ICD10cod}
 2 | \alias{ICD10cod}
 3 | \title{Tibble of CODs}
 4 | \description{ Returns a tibble with  cause of death (COD) names, codes in SEER, and ICD10 codes.} 
 5 | \usage{ICD10cod()} 
 6 | 
 7 | \value{Tibble with COD names, SEER codes and ICD10 codes.}
 8 | 
 9 | 
10 | \author{Tom Radivoyevitch (radivot@ccf.org)}
11 | \seealso{\code{\link{SEERaBomb-package} }}
12 | 
13 | \examples{
14 | library(SEERaBomb)
15 | ICD10cod() # returns the tibble
16 | }
17 | 
18 | 
19 | \keyword{IO}
20 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/index.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 | <title>SEERaBomb files</a></title>
 4 | </head>
 5 | <h3>SEERaBomb package documentation </h3>
 6 | <ul>
 7 |     <li> <a href="overview.pdf">
 8 |          Overview of Creating SEER Data R Binaries
 9 |          </a> 
10 |     <li> <a href="fieldNames.html">Field names</a>
11 |     <li> <a href="gettingData.pdf">Getting data</a>
12 | </ul>
13 | Here is the website for the <a href="http://epbi-radivot.cwru.edu/SEERaBomb/SEERaBomb.html">SEERaBomb package</a>.
14 | </html>
15 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/examples/mk2Dsplines.R:
--------------------------------------------------------------------------------
 1 | rm(list=ls()) 
 2 | library(SEERaBomb)
 3 | load("~/data/SEER/mrgd/cancDef.RData")
 4 | load("~/data/SEER/mrgd/popsae.RData")
 5 | pm=seerSet(canc,popsae,Sex="Male")
 6 | pm=mk2D(pm,secondS=c("CML","CMML","MDS","AML","CLL","ALL")) # pooled race male leukemias 
 7 | plot2D(pm)
 8 | pm
 9 | 
10 | t(canc%>%filter(cancer=="MDS")%>%group_by(yrdx)%>%summarise(count=n()))
11 | t(canc%>%filter(cancer=="CMML")%>%group_by(yrdx)%>%summarise(count=n()))
12 | t(canc%>%filter(cancer=="CML")%>%group_by(yrdx)%>%summarise(count=n()))
13 | 


--------------------------------------------------------------------------------
/docs/knitOverview.R:
--------------------------------------------------------------------------------
 1 | # note: to make a png from AI use export
 2 | # install.packages('knitr', dep=T)
 3 | #setwd("/Users/radivot/case/active/ccems/ccems/inst/doc")
 4 | #setwd("/Users/radivot/soft/rpacks/sbmlr/inst/doc")
 5 | #knit2pdf("quick-start.Rnw")         ## in 1 step
 6 | #knit2pdf("ccems.Rnw")         ## in 1 step
 7 | #purl("ccems.Rnw")
 8 | #purl("quick-start.Rnw")
 9 | 
10 | require(knitr)
11 | setwd("/Users/radivot/githubs/SEERaBomb/docs")
12 | knit2pdf("overview.Rnw",clean=TRUE)      
13 | file.remove("overview.tex")
14 | #purl("overview.Rnw")
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/print.seerSet.summary.R:
--------------------------------------------------------------------------------
 1 | print.seerSet.summary<-function(x, ...) {
 2 | #   cf=function (x) comma_format()(x)
 3 |   PY=year=NULL
 4 |   cat(x$title)
 5 |   print(x$d) #,row.names=FALSE)
 6 |   cat("Notes:\n")
 7 |   for (i in 1:length(x$notes))
 8 |     cat(i,") ",x$notes[i],"\n")
 9 | #   N=cf(sum(x$P$PY))
10 |   N=sum(x$P$PY)
11 |   tit=paste(N,"Million",ifelse(x$sex=="male","Male","Female"),"Person-Years")
12 |   p=qplot(year,PY,ylab="Person-Years (Millions)",main=tit,data=x$P)+
13 |     theme(axis.text=element_text(size=rel(2)),axis.title=element_text(size=rel(2)))
14 |   print(p)
15 | }
16 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/renal/incidNdemog/allFields.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | library(tidyverse);library(SEERaBomb)
 3 | system.time(load("~/data/SEER/mrgd/cancALL.RData")) # which takes ~65 secs
 4 | d=canc%>%filter(primsite%in%c("C649"),histo3==8310)
 5 | nms=names(d)
 6 | L=lapply(nms,function(x) {dd=as.data.frame(table(d[,x]))
 7 |                           if (dim(dd)[2]==2) names(dd)=c("Variable","Frequency")
 8 |                           dd} )
 9 | names(L)=nms
10 | L
11 | library(WriteXLS)
12 | WriteXLS(L,ExcelFileName="renal/outs/ccRCCallFields.xlsx",AdjWidth=T)
13 | 


--------------------------------------------------------------------------------
/SEERaBomb/src/SEERaBomb_init.c:
--------------------------------------------------------------------------------
 1 | #include <R.h>
 2 | #include <Rinternals.h>
 3 | #include <stdlib.h> // for NULL
 4 | #include <R_ext/Rdynload.h>
 5 | 
 6 | /* FIXME: 
 7 | Check these declarations against the C/Fortran source code.
 8 | */
 9 | 
10 | /* .Call calls */
11 | extern SEXP _SEERaBomb_fillPYM(SEXP, SEXP);
12 | 
13 | static const R_CallMethodDef CallEntries[] = {
14 |   {"_SEERaBomb_fillPYM", (DL_FUNC) &_SEERaBomb_fillPYM, 2},
15 |   {NULL, NULL, 0}
16 | };
17 | 
18 | void R_init_SEERaBomb(DllInfo *dll)
19 | {
20 |   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
21 |   R_useDynamicSymbols(dll, FALSE);
22 | }


--------------------------------------------------------------------------------
/SEERaBomb/man/mapCancsW.Rd:
--------------------------------------------------------------------------------
 1 | \name{mapCancsW}
 2 | \alias{mapCancsW}
 3 | \title{Map SEER Who Sitecodes to cancers}
 4 | \description{ Adds a factor cancer with easily recognizable levels to a SEER cancer data.frame.}
 5 | \usage{mapCancsW(D)} 
 6 | \arguments{
 7 |   \item{D}{A data frame that includes who and histo3 as columns.  }
 8 | }
 9 | 
10 | \value{The input data frame with an additional cancerW column added on.}
11 | 
12 | \author{Tom Radivoyevitch (radivot@ccf.org)}
13 | \seealso{\code{\link{SEERaBomb-package}} }
14 | 
15 | \examples{
16 | library(SEERaBomb)
17 | mapCancsW # shows default definitions
18 | }
19 | 
20 | 
21 | \keyword{IO}
22 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/PMF/demogCheese.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | load("pmf/data/d7.RData")
 3 | d7%>%group_by(sex)%>%summarize(n=n())
 4 | library(cheese)
 5 | d7%>%univariate_table()
 6 | d7%>%group_by(reg)%>%summarize(cases=n(),O=sum(status))
 7 | d7%>%group_by(reg,stcnty)%>%summarize(cases=n(),O=sum(status))%>%filter(cases>20)%>%print(n=100)
 8 | # names(canc)
 9 | # system.time(load("~/data/SEER/mrgd/cancALL.RData")) # which takes ~65 secs
10 | # (ids=unique(canc$stcnty))  # 626 counties looking at all cancers
11 | load("pmf/data/allfields2019.RData")
12 | (dids=unique(d$stcnty))  # 436 counties with at least one PMF
13 | 
14 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mapCancsP.Rd:
--------------------------------------------------------------------------------
 1 | \name{mapCancsP}
 2 | \alias{mapCancsP}
 3 | \title{Map Primary Site and ICD-O3 histology codes to cancers}
 4 | \description{ Adds a factor cancer with easily recognizable levels to a SEER cancer data.frame.}
 5 | \usage{mapCancsP(D)} 
 6 | \arguments{
 7 |   \item{D}{A data frame that includes site and histo3 as columns.  }
 8 | }
 9 | 
10 | \value{The input data frame with an additional cancer column added on.}
11 | 
12 | \author{Tom Radivoyevitch (radivot@ccf.org)}
13 | \seealso{\code{\link{SEERaBomb-package}} }
14 | 
15 | \examples{
16 | library(SEERaBomb)
17 | mapCancsP # shows default definitions
18 | }
19 | 
20 | 
21 | \keyword{IO}
22 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/renal/common/load.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | library(SEERaBomb)
 3 | library(tidyverse)
 4 | library(ggsci)
 5 | library(survival);library(survminer)
 6 | # load("~/data/SEER/mrgd/cancPrim.RData")#load SEER cancer data
 7 | # d=canc%>%filter(primsite%in%c("C649"),histo3==8310)
 8 | # table(d$histo3)
 9 | # d$cancer="ccRCC"
10 | # d=d%>%filter(!is.na(surv))%>%print(n=4)
11 | # d=d%>%mutate(status=as.numeric(COD>0),surv=(surv+0.5)/12)
12 | # d5=d%>%select(yrdx,agedx,sex,surv,status)
13 | # save(d,d5,file="renal/data/d.RData")
14 | load("renal/data/d.RData")
15 | load("~/data/mrt/mrtUSA.RData")#loads US mortality data
16 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/incidSEER.Rd:
--------------------------------------------------------------------------------
 1 | \name{incidSEER}
 2 | \alias{incidSEER}
 3 | \title{Computes SEER incidences}
 4 | \description{Creates SEER incidence rates and confidence intervals. }
 5 | \usage{incidSEER(canc,popsae,cancers)} 
 6 | \arguments{
 7 |   \item{canc}{data frame of cancer cases}
 8 |   \item{popsae}{data frame of person years at risk}
 9 |   \item{cancers}{character vector of cancer types}
10 | }
11 | 
12 | \details{This left joins popsae and cancers in canc.}
13 | 
14 | \value{A data frame with observed cases (O), incid, and incid 95\% CI limits LL and UL.}
15 | 
16 | \author{Tom Radivoyevitch (radivot@ccf.org)}
17 | \seealso{\code{\link{SEERaBomb-package} }}
18 | %\examples{
19 | %library(SEERaBomb)
20 | %}
21 | \keyword{IO}
22 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/PMF/demog.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | library(tidyverse);library(SEERaBomb)
 3 | load("~/data/SEER/mrgd/cancPrim.RData")#load SEER cancer data
 4 | load("~/data/SEER/mrgd/popsae.RData")#load SEER population data
 5 | head(canc,2)
 6 | d=canc%>%filter(primsite=="C421",histo3==9961)%>%print(n=2)
 7 | d=d%>%filter(yrdx>2000)%>%print(n=2)
 8 | d=d%>%filter(!is.na(surv))%>%mutate(status=as.numeric(COD>0),surv=(surv+0.5)/12)
 9 | d$age=cut(d$agedx,c(0,40,60,80,126),include.lowest = T)
10 | d$year=cut(d$yrdx,c(2001,2008,2016),include.lowest = T,dig.lab=4)
11 | d$cancer="PMF"
12 | mkDemographics(d,outDir="pmf/outs")
13 | d5=d%>%select(yrdx,agedx,sex,surv,status)
14 | save(d,d5,file="pmf/data/d.RData")
15 | 
16 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/foldDF.Rd:
--------------------------------------------------------------------------------
 1 | \name{foldDF}
 2 | \alias{foldDF}
 3 | \title{Fold DF Outputs of Cancer Since Diagnosis function csd}
 4 | \description{Collapses sex of row binded DF list elements of csd output computing new Relative Risks (RR).  }
 5 | \usage{foldDF(DF,keep=c("cancer1","cancer2","int","trt")) } 
 6 | \arguments{
 7 |   \item{DF}{Row bindings of csd DF with e.g. sex to collapse.}
 8 |   \item{keep}{Idea is to keep time intervals (int) and fold variables like sex. }
 9 | }
10 | \value{Tibble with new summed observed and expected cases and new RR with CI for each interval. }
11 | \author{Tom Radivoyevitch (radivot@ccf.org)}
12 | \seealso{\code{\link{SEERaBomb-package}, \link{csd} } }
13 | 
14 | \examples{ # nothing for now
15 | }
16 | 
17 | \keyword{IO}
18 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mapCOD2.Rd:
--------------------------------------------------------------------------------
 1 | \name{mapCOD2}
 2 | \alias{mapCOD2}
 3 | \title{Map CODs to strings}
 4 | \description{ Maps integer cause of death (COD) codes in COD of a SEER cancer data frame 
 5 | to a factor CODS with recognizable levels. Adapted to work with SEER release of April 2022 }
 6 | \usage{mapCOD2(D)} 
 7 | \arguments{
 8 |   \item{D}{A data frame that includes COD as a column.  }
 9 | }
10 | 
11 | \value{The input data frame with an additional CODS column added on.}
12 | 
13 | \note{Typing mapCOD2 dumps the function definition and thus the mapping used.}
14 | 
15 | \author{Tom Radivoyevitch (radivot@ccf.org)}
16 | \seealso{\code{\link{SEERaBomb-package} }}
17 | 
18 | \examples{
19 | library(SEERaBomb)
20 | mapCOD2 # shows default definitions
21 | }
22 | 
23 | 
24 | \keyword{IO}
25 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mapCODs.Rd:
--------------------------------------------------------------------------------
 1 | \name{mapCODs}
 2 | \alias{mapCODs}
 3 | \title{Map CODs to strings}
 4 | \description{ Maps integer cause of death (COD) codes in COD of a SEER cancer data frame 
 5 | to a factor CODS with recognizable levels. This is a bit slow, so it is called within mkSEER.}
 6 | \usage{mapCODs(D)} 
 7 | \arguments{
 8 |   \item{D}{A data frame that includes COD as a column.  }
 9 | }
10 | 
11 | \value{The input data frame with an additional CODS column added on.}
12 | 
13 | \note{Typing mapCODs dumps the function definition and thus the mapping used.}
14 | 
15 | \author{Tom Radivoyevitch (radivot@ccf.org)}
16 | \seealso{\code{\link{SEERaBomb-package}, \link{mapRegs}} }
17 | 
18 | \examples{
19 | library(SEERaBomb)
20 | mapCODs # shows default definitions
21 | }
22 | 
23 | 
24 | \keyword{IO}
25 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/changes.txt:
--------------------------------------------------------------------------------
 1 | This package aims to make SEER and A-bomb data analyses more readily reproducible.
 2 | 
 3 | Ver 0.01.1  5/27/2012. First Draft containing mkSEER, getFields and pickFields 
 4 |             Set up separate repository so installation works without submission to CRAN via 
 5 |             install.packages(c("LaF","RSQLite","bbmle","SEERaBomb"),
 6 |             repos=c("http://cran.cwru.edu/","http://epbi-radivot.cwru.edu/"))   
 7 |             6/17/2012. Submitted to CRAN 
 8 | 
 9 |                    
10 | Ver 0.2.1  New version now allows this (i.e. getting all fields) without error
11 |            (df=getFields())
12 |            (dff=pickFields(df,picks=df$names))
13 |                 
14 | Ver 0.3.0  Hereafter changes are documented in the NEWS file 
15 | 
16 |                    


--------------------------------------------------------------------------------
/SEERaBomb/man/mapCancs.Rd:
--------------------------------------------------------------------------------
 1 | \name{mapCancs}
 2 | \alias{mapCancs}
 3 | \title{Map ICD9 and ICD-O3 codes to cancers}
 4 | \description{ Adds a factor cancer with easily recognizable levels to a SEER cancer data.frame.}
 5 | \usage{mapCancs(D)} 
 6 | \arguments{
 7 |   \item{D}{A data frame that includes ICD9 and histo3 as columns.  }
 8 | }
 9 | 
10 | \value{The input data frame with an additional cancer column added on.}
11 | 
12 | \note{This is used by mkSEER() when it generates R binaries of the SEER data. Otherwise it provides current cancer 
13 | definitions (seen by looking at the function definition).}
14 | 
15 | \author{Tom Radivoyevitch (radivot@ccf.org)}
16 | \seealso{\code{\link{SEERaBomb-package}, \link{mapRegs}} }
17 | 
18 | \examples{
19 | library(SEERaBomb)
20 | mapCancs # shows default definitions
21 | }
22 | 
23 | 
24 | \keyword{IO}
25 | 


--------------------------------------------------------------------------------
/SEERaBomb/src/RcppExports.cpp:
--------------------------------------------------------------------------------
 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | #include <Rcpp.h>
 5 | 
 6 | using namespace Rcpp;
 7 | 
 8 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
 9 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
10 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
11 | #endif
12 | 
13 | // fillPYM
14 | NumericMatrix fillPYM(SEXP PYin, SEXP PYM);
15 | RcppExport SEXP _SEERaBomb_fillPYM(SEXP PYinSEXP, SEXP PYMSEXP) {
16 | BEGIN_RCPP
17 |     Rcpp::RObject rcpp_result_gen;
18 |     Rcpp::RNGScope rcpp_rngScope_gen;
19 |     Rcpp::traits::input_parameter< SEXP >::type PYin(PYinSEXP);
20 |     Rcpp::traits::input_parameter< SEXP >::type PYM(PYMSEXP);
21 |     rcpp_result_gen = Rcpp::wrap(fillPYM(PYin, PYM));
22 |     return rcpp_result_gen;
23 | END_RCPP
24 | }
25 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/getBinInfo.Rd:
--------------------------------------------------------------------------------
 1 | \name{getBinInfo}
 2 | \alias{getBinInfo}
 3 | \title{Gets the lower and upper limit and index of a tsd bin}
 4 | \description{ Extract time since diagnosis (tsd) interval information 
 5 | in strings produced by cut.}
 6 | \usage{getBinInfo(binLab, binS)} 
 7 | \arguments{
 8 |   \item{binLab}{The label of the specific bin of interest.  }
 9 |   \item{binS}{The character vector of bin labels in which binLab exists.  }
10 | }
11 | 
12 | \value{A numeric vector containting the lower limit (LL), upper limit (UL), and position (index) in the parent vector binS.}
13 | 
14 | \author{Tom Radivoyevitch (radivot@ccf.org)}
15 | \seealso{\code{\link{SEERaBomb-package}} }
16 | \examples{
17 | library(SEERaBomb)
18 | brks=c(0,0.25,1,3,5)  
19 | (binS=levels(cut(brks+0.1,breaks=c(brks,100)))) #make a vector of intervals 
20 | getBinInfo(binS[4],binS) # test getBinInfo
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mapTrts.Rd:
--------------------------------------------------------------------------------
 1 | \name{mapTrts}
 2 | \alias{mapTrts}
 3 | \title{Map treatment codes to factor}
 4 | \description{Uses SEER codes in the SEER field radiatn to add a
 5 | factor named trt with levels "noRad","rad", and "unk" to a cancer data frame.}
 6 | \usage{mapTrts(D)} 
 7 | \arguments{
 8 |   \item{D}{A SEER cancer data frame that includes the field radiatn as a column.  }
 9 | }
10 | 
11 | \value{The input data frame with an additional trt column added to its end.}
12 | 
13 | \note{This function is used by mkSEER when it generates merged R binaries. It is exposed to state
14 | the default definition of trt and, by way of example, 
15 | to show how to override it.}
16 | \author{Tom Radivoyevitch (radivot@ccf.org)}
17 | \seealso{\code{\link{SEERaBomb-package}, \link{mapTrts}} }
18 | 
19 | \examples{
20 | library(SEERaBomb)
21 | mapTrts # exposes default definition of trt
22 | }
23 | 
24 | 
25 | \keyword{IO}
26 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mkLT.Rd:
--------------------------------------------------------------------------------
 1 | \name{mkLT}
 2 | \alias{mkLT}
 3 | \title{Make Life Tables}
 4 | \description{Makes life tables from mortality data binaries produced by mkMrt() and places the files in the same folder.}
 5 | \usage{ mkLT(mrtHome="~/data/usMort",input="mrt.RData",output="ltb.RData")}
 6 | 
 7 | \arguments{
 8 |   \item{mrtHome}{ Directory that contains the mortality data binary.  }
 9 |   \item{input}{ File that contains the mortality data binary.  }
10 |   \item{output}{ File that will contain the life tables.  }
11 | }
12 | 
13 | \value{None. This function is called for its side-effect of producing male and female life tables in files.  }
14 | \author{ Tom Radivoyevitch (\email{radivot@ccf.org}) }
15 | \seealso{\code{\link{SEERaBomb-package},\link{mkSEER}} }
16 | \examples{
17 | \dontrun{
18 | library(SEERaBomb)
19 | mkLT()
20 | load("~/data/usMort/ltb.RData")
21 | tail(ltb$Female)
22 | }
23 | }
24 | \keyword{IO}
25 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/rad_noRad.Rd:
--------------------------------------------------------------------------------
 1 | \name{rad_noRad}
 2 | \alias{rad_noRad}
 3 | \title{Collapse trt to only rad and noRad}
 4 | \description{In csd() output DF,this collapses trt to just rad and noRad and removes rad unknowns.    }
 5 | \usage{rad_noRad(DF) } 
 6 | \arguments{
 7 |   \item{DF}{DF in list output of csd() with default trt field levels 
 8 |   "rad.chemo", "rad.noChemo", "noRad.chemo", "noRad.noChemo", "unk.chemo", and "unk.noChemo".} 
 9 | }
10 | \value{DF with trt reduced to rad and noRad, removing rows with rad unknown. }
11 | \note{ Filtering unknowns before csd() to save time is not recommended since second 
12 |       cancer treatments do not matter and can thus be unknown. After csd() trt is only the 
13 |       1st cancer trt. }
14 | 
15 | \author{Tom Radivoyevitch (radivot@ccf.org)}
16 | \seealso{\code{\link{SEERaBomb-package}, \link{csd} } }
17 | 
18 | \examples{ # nothing for now
19 | }
20 | 
21 | \keyword{IO}
22 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/riskVsAge.Rd:
--------------------------------------------------------------------------------
 1 | \name{riskVsAge}
 2 | \alias{riskVsAge}
 3 | \title{Second cancer risk vs attained age after first cancer}
 4 | \description{Computes absolute risk of 2nd cancers as a function of attained age after first cancer. }
 5 | \usage{riskVsAge(canc,firstS=c("NHL","HL","MM"),
 6 |                 secondS=c("AML","MDS"),brksa=c(0,30,50,60,70,80)) }  
 7 | \arguments{
 8 |   \item{canc}{canc made by mkSEER().}
 9 |   \item{firstS}{Character vector of first cancers of interest.}
10 |   \item{secondS}{Character vector of second cancers of interest.}
11 |   \item{brksa}{Vector of breaks in years used to form attained age intervals.}
12 | }
13 | \value{data.frame with incidence vs age. }
14 | 
15 | \note{ This function was developed with support from the Cleveland Clinic Foundation.}
16 | \author{Tom Radivoyevitch (radivot@ccf.org)}
17 | \seealso{\code{\link{SEERaBomb-package}, \link{mkSEER}} }
18 | 
19 | \keyword{IO}
20 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/blood2012/blood2012fig5bc.R:
--------------------------------------------------------------------------------
 1 | library(SEERaBomb)
 2 | if(length(grep("linux",R.Version()$os))) windows <- function( ... ) X11( ... )
 3 | if(length(grep("darwin",R.Version()$os))) windows <- function( ... ) quartz( ... )
 4 | windows(width=10,height=5)
 5 | par(mfrow=c(1,2),mar=c(4.3,2,.2,1.2),oma=c(0,0,2,0),cex=1.2,cex.lab=1.2,cex.axis=1.1)
 6 | d=Sigurdson[Sigurdson$age>20,]
 7 | d$lgTlcn=log(d$tlcn)
 8 | lm1=lm(lgTlcn~age,data=d)
 9 | lm2=lm(lgTlcn~age+I(age^2),data=d)
10 | print(summary(lm1))
11 | print(summary(lm2))
12 | print(anova(lm1,lm2))
13 | lm3=lm(lgTlcn~log(age),data=d)
14 | plot(d$tlcn~d$age,log="y",ylab="",xlab="Age",xlim=c(20,84))
15 | title("Translocations per 100 cell equivalents",outer=T)
16 | y=exp(lm1$coeff[1]+lm1$coeff[2]*d$age)
17 | lines(d$age,y)
18 | plot(d$tlcn~d$age,log="xy",ylab="",xlab="Age",xlim=c(20,84))
19 | y=exp(lm3$coeff[1]+lm3$coeff[2]*log(d$age))
20 | lines(d$age,y)
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/incidEx2.R:
--------------------------------------------------------------------------------
 1 | ###incidEx2.R
 2 | (D=incidSEER(canc,popsae,"CML"))
 3 | (D=D%>%filter(age>=20,age<=80,year>=2000)%>%mutate(ageG=cut(age,seq(20,80,5))))
 4 | (D=D%>%group_by(sex,race,ageG)%>%
 5 |   summarize(age=mean(age),py=sum(py),n=sum(n))%>%mutate(incid=n/py))
 6 | summary(mm1<-mle2(n~dpois(lambda=exp(c+k*(age-50))*py),
 7 |                   #parameters=list(c~sex,k~sex),#k no diff
 8 |                   parameters=list(c~sex,k~race),#kW-O P=7e-8
 9 |                   method="Nelder-Mead",start=list(c=0,k=0.04),data=D)) 
10 | D$EI=predict(mm1)/D$py#same as D=D%>%ungroup()%>%mutate(EI=predict(mm1)/py) 
11 | D%>%ggplot(aes(x=age,y=incid,col=sex,shape=sex))+gxi+gyi+facet_grid(~race)+gp+
12 |   geom_line(aes(y=EI))+scale_y_log10(breaks=c(1,2,5))+agts+jco+tc(13)+sbb+ltb+
13 |   theme(legend.position=c(.45,.91),legend.key.height=unit(.7,'lines'))
14 | ggsave("~/Results/tutorial/incidEx2.pdf",width=4,height=3)
15 | 
16 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/renal/common/acros.R:
--------------------------------------------------------------------------------
 1 | gp=geom_point()
 2 | gl=geom_line()
 3 | geRR=geom_errorbar(aes(ymin=rrL,ymax=rrU),width=.2)
 4 | # ge=geom_errorbar(aes(ymin=LL,ymax=UL),width=0.2)#for absolute risks
 5 | gh=geom_hline(yintercept=1)
 6 | # svts=scale_x_continuous(breaks=c(0,5,10))#surv times
 7 | sbb=theme(strip.background=element_blank())
 8 | ltb=theme(legend.margin=margin(0,0,0,0),legend.title=element_blank())
 9 | ltp=theme(legend.position="top")
10 | # lh=theme(legend.direction="horizontal")
11 | jco=scale_color_jco()
12 | tc=function(sz) theme_classic(base_size=sz);
13 | gx=xlab("Years Since Diagnosis")
14 | 
15 | agts=scale_x_continuous(breaks=c(25,50,75))#age times
16 | gxi=xlab("Age (Years)")
17 | gyi=ylab(quote(paste("Cases per ",10^5," Person Years")))
18 | sy=scale_y_log10()
19 | 
20 | 
21 | # svts=scale_x_continuous(breaks=seq(0,50,10))#surv times
22 | 
23 | # myt=theme(legend.text=element_text(size=12),strip.text=element_text(size=12))
24 | 


--------------------------------------------------------------------------------
/SEERaBomb/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: SEERaBomb
 2 | Title: SEER and Atomic Bomb Survivor Data Analysis Tools
 3 | Version: 2019.5
 4 | Date: 2022-4-20
 5 | Description: Creates SEER (Surveillance, Epidemiology and End Results) and 
 6 |     A-bomb data binaries from ASCII sources and provides tools for estimating
 7 |     SEER second cancer risks. Methods are described in <doi:10.1038/leu.2015.258>.
 8 | Authors@R: c(person("Tomas", "Radivoyevitch", role = c("aut", "cre"),
 9 |                      email = "radivot@ccf.org"),
10 |               person("R.", "Molenaar", role = "ctb",
11 |                      email = "r.j.molenaar@amc.uva.nl"))
12 | Depends: dplyr, ggplot2, R (>= 3.5) 
13 | Suggests:  bbmle
14 | License: GPL (>= 2)
15 | Imports: Rcpp (>= 0.11.3),demography,   
16 |          reshape2, mgcv, tibble, LaF, DBI, RSQLite, openxlsx, WriteXLS, 
17 |          labelled, scales, forcats, purrr,readr, tidyr, stringr, plyr, survival
18 | LinkingTo: Rcpp
19 | LazyData: yes
20 | URL: http://epbi-radivot.cwru.edu/SEERaBomb/SEERaBomb.html
21 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/uveal/attic/common/UVdemog.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | library(tidyverse);library(SEERaBomb)
 3 | load("~/data/SEER/mrgd/cancPrim.RData")#load SEER cancer data
 4 | load("~/data/SEER/mrgd/popsae.RData")#load SEER population data
 5 | head(canc,2)
 6 | canc=canc%>%filter(primsite%in%c("C693","C694","C692"),histo3%in%8720:8790)%>%print(n=2)
 7 | canc$cancer=as.character(canc$cancer)
 8 | # C69.3 (choroid), C69.4 (ciliary body and iris), and C69.2 (retina). 
 9 | canc$cancer[canc$primsite=="C693"]="Choroid" 
10 | canc$cancer[canc$primsite=="C694"]="Ciliary"
11 | canc$cancer[canc$primsite=="C692"]="Retinal"
12 | canc%>%filter(is.na(surv))%>%print(n=4) # one choroid survival is missing so tables will be shy 1 for it
13 | d=canc%>%filter(!is.na(surv))%>%print(n=4)
14 | d$age=cut(d$agedx,c(0,40,60,80,126),include.lowest = T)
15 | d$year=cut(d$yrdx,c(1975,1995,2016),include.lowest = T,dig.lab=4)
16 | mkDemographics(d,outDir="uveal/outs")
17 | d$cancer="uveal"
18 | mkDemographics(d,outDir="uveal/outs")
19 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/PMF/README.md:
--------------------------------------------------------------------------------
1 | ### Generalized Additive Model Visualization of Primary Myelofibrosis Mortality 
2 | These scripts recreate Figure 1 and Table 1.  Begin with mkAllFields.R. Uncomment the commented block to create SEER binaries (from ASCII SEER data) and allfields2019.RData. Now run demog.R  to create PMF data frames with fewer fields and demographics in excel sheets. Next, use mkMaxRes.R to create maxRes.RData wherein deaths are partitioned into bins that preserve resolution as much as possible given ages are in years and survival is in months. These mortalities are then binned into 8x8 (O, PY) matrices and fitted to GAMs in Figure1.R.  Table1.R uses models saved by Figure1.R to produce Table 1 and the Louisiana State result.  Commented code chunks in Table1.R need to be uncommented and run once to create  binaries. One of these (d7.Rdata) is used by demogCheese.R to create an optional demographics table using the R package cheese.  The script pltMaxRes.R optionally yields 3-D rotatable views of the data in maxRes.RData.
3 | 
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/install.R:
--------------------------------------------------------------------------------
 1 | ###install.R
 2 | install.packages("SEERaBomb")
 3 | install.packages("devtools")
 4 | devtools::install_github("radivot/SEERaBomb",subdir="SEERaBomb") 
 5 | 
 6 | library(SEERaBomb)  #loads installed package SEERaBomb into memory
 7 | (df=getFields())    #gets SEER fields into a data frame
 8 | (rdf=pickFields(df))#picks a subset of SEER fields and defines their types
 9 | mkSEER(rdf)         #makes merged data file ~/data/SEER/mrgd/cancDef.Rdata
10 | 
11 | load("~/data/SEER/mrgd/cancDef.RData")#loads data.frame canc into memory
12 | head(canc,3)                          #returns top 3 rows of canc
13 | table(canc$cancer) #reports numbers of each cancer type
14 | 
15 | mkAbomb()#converts files in ~/data/abomb into ~/data/abomb/abomb.RData
16 | load("~/data/abomb/abomb.RData")#loads data frames heme and solid 
17 | View(heme)                      #note descriptions under column names
18 | 
19 | mkMrt("username","password")#sub in your account info
20 | 
21 | install.packages(c("bbmle","survminer","ggsci","tidyverse"))
22 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/incidAbomb.Rd:
--------------------------------------------------------------------------------
 1 | \name{incidAbomb}
 2 | \alias{incidAbomb}
 3 | \title{Computes A-bomb incidences}
 4 | \description{Creates A-bomb survivor incidence rates and confidence intervals. }
 5 | \usage{incidAbomb(d)} 
 6 | \arguments{
 7 |   \item{d}{Tibble, typically grouped, with DG column ending 1st block and py starting the last.}
 8 | }
 9 | 
10 | \details{The columns  \code{DG} and  \code{py} must exist in  \code{d}, in that order. 
11 | Person-year weighted means will be formed of any columns between them. 
12 | Its OK if none exist. Sums are formed on \code{py} and anything to their right. It is assumed
13 | that cancer types begin after py, with at most upy and/or subjects intervening them. 
14 | }
15 | 
16 | \value{A tibble data frame, summarized by groups, with cancers after py in a new cancers column,
17 | and new columns O (observed cases), incid and incid 95\% CI limits LL and UL.}
18 | 
19 | \author{Tom Radivoyevitch (radivot@ccf.org)}
20 | \seealso{\code{\link{SEERaBomb-package} }}
21 | %\examples{
22 | %library(SEERaBomb)
23 | %}
24 | \keyword{IO}
25 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/mapRegs.R:
--------------------------------------------------------------------------------
 1 | mapRegs<-function(code=NA ){
 2 |   desc=c("1501"="San Francisco-Oakland SMSA (1973)",  "1502"="Connecticut (1973)",  "1520"="Metropolitan Detroit (1973)",
 3 |          "1521"="Hawaii (1973)",    "1522"="Iowa (1973)",    "1523"="New Mexico (1973)",
 4 |          "1525"="Seattle (Puget Sound) (1974)",    "1526"="Utah (1973)",    "1527"="Metropolitan Atlanta (1975)",
 5 |          "1529"="Alaska*",   "1531"="San Jose-Monterey",   "1535"="Los Angeles",    "1537"="Rural Georgia",
 6 |          "1541"="Greater California (excluding SF, LA & SJ)",     "1542"= "Kentucky",
 7 |          "1543"="Louisiana",      "1544"=  "New Jersey",       "1547"= "Greater Georgia (excluding AT and RG)")
 8 |   sym=c("1501"="sf","1502"="CT","1520"="dM","1521"="HI","1522"="IA","1523"="NM","1525"="sW","1526"="UT","1527"="aG",
 9 |         "1529"="AK", "1531"="sj", "1535"="la", "1537"="rG",
10 |         "1541"="CA", "1542"="KY", "1543"="LA", "1544"="NJ", "1547"="GA")
11 |   regs=data.frame(sym,desc)
12 |   if (is.na(code[1])) return(regs) else return(regs[as.character(code),"sym"])
13 | }  
14 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/CMLmysteries/common.R:
--------------------------------------------------------------------------------
 1 | ### common.R  common block sourced in by other scripts
 2 | graphics.off();rm(list=ls())#clear plots and environment
 3 | library(tidyverse);library(SEERaBomb);library(ggsci);library(bbmle)
 4 | load("~/data/SEER/mrgd/cancDef.RData")#load SEER cancer data
 5 | gp=geom_point();gl=geom_line()
 6 | geRR=geom_errorbar(aes(ymin=rrL,ymax=rrU),width=.1,alpha=0.4)
 7 | ge=geom_errorbar(aes(ymin=LL,ymax=UL),width=0.1,alpha=0.4)#for absolute risks
 8 | gh=geom_hline(yintercept=1)
 9 | svts=scale_x_continuous(breaks=c(0,5,10))#surv times
10 | agts=scale_x_continuous(breaks=c(25,50,75))#age times
11 | sbb=theme(strip.background=element_blank())
12 | ltb=theme(legend.margin=margin(0,0,0,0),legend.title=element_blank())
13 | ltp=theme(legend.position="top")
14 | lh=theme(legend.direction="horizontal")
15 | sy=scale_y_log10()
16 | cc=coord_cartesian(ylim=c(0,10))#clips high errorbars
17 | jco=scale_color_jco()
18 | tc=function(sz) theme_classic(base_size=sz);
19 | gxi=xlab("Age (Years)")
20 | gyi=ylab(quote(paste("Cases per ",10^5," Person Years")))
21 | 
22 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/nvsr01.Rd:
--------------------------------------------------------------------------------
 1 | \name{nvsr01}
 2 | \alias{nvsr01}
 3 | \docType{data}
 4 | \title{National Vital Statistic Report (nvsr01) Data}
 5 | \description{
 6 |   US mortality rates (probability of death that year) in 2001 (Report 52_14).   
 7 | }
 8 | \usage{
 9 | nvsr01
10 | }
11 | \format{
12 |   A data frame with the following columns.
13 |   \describe{
14 |     \item{\code{age}}{Single-year resolution ages up to 99.5. }
15 |     \item{\code{p}}{Pooled sexes and races. }
16 |     \item{\code{pm}}{Pooled races, males. }
17 |     \item{\code{pf}}{Pooled races, females. }
18 |     \item{\code{wm}}{White males. }
19 |     \item{\code{wf}}{White females. }
20 |     \item{\code{bm}}{Black males. }
21 |     \item{\code{bf}}{Black females. }
22 |   }
23 | }
24 | 
25 | \details{This data is used to extrapolate PY at risk in SEER population files from 85+ to older ages.  
26 |   }
27 | 
28 | \examples{
29 | library(SEERaBomb)
30 | head(nvsr01)
31 | }  
32 | 
33 | \references{
34 | \url{ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/NVSR/52_14/}
35 | }
36 | 
37 | \keyword{datasets}
38 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/Sigurdson.Rd:
--------------------------------------------------------------------------------
 1 | \name{Sigurdson}
 2 | \alias{Sigurdson}
 3 | \docType{data}
 4 | \title{Chromosome translocation versus age data}
 5 | \description{
 6 |   This is chromosome translocation versus age data that is pooled across gender and race.    
 7 | }
 8 | \usage{data(Sigurdson)}
 9 | \format{
10 |   A data frame named Sigurdson with the following columns.
11 |   \describe{
12 |     \item{\code{age}}{Age of donor of lymphocytes. }
13 |     \item{\code{tlcn}}{Total number of chromosomal translocations per 100 cell equivalents. }
14 |   }
15 | }
16 | 
17 | \details{The data were obtained using FISH, see reference below. This dataset is loaded automatically with 
18 | \code{library(SEERaBomb)}. As such, the function \code{data()} is not needed.  }
19 | 
20 | \examples{
21 | library(SEERaBomb)
22 | with(Sigurdson,plot(age,tlcn,cex=2,cex.axis=2,cex.lab=2,las=1,cex.main=2,
23 |                ylab="",main="Translocations per 100 cells"))
24 | }  
25 | 
26 | \references{
27 | Sigurdson et al. Mutation Research 652 (2008) 112-121
28 | }
29 | 
30 | 
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/seerStats.Rd:
--------------------------------------------------------------------------------
 1 | \name{seerStats}
 2 | \alias{seerStats}
 3 | \title{Summarize SEER data.}
 4 | \description{Shows numbers of cases per cancer in each SEER database and PY in each registry. Sexes are pooled. }
 5 | \usage{seerStats(canc,popsa)} 
 6 | \arguments{
 7 |   \item{canc}{Data frame of cancers that includes agedx and cancer columns. }
 8 |   \item{popsa}{Data frame of population PY at 1-year age resolution.  }
 9 | }
10 |    
11 | \value{A list of 2 data.frames with sexes pooled, one of cases with cancer types as rows and as columns, databases, totals, cases >=100 years old or not, and numbers of first-, second-, third- and higher cancers. The second data.frame holds population PY, and PY-weighted ages, per registry. }
12 |   
13 | 
14 | \author{Tom Radivoyevitch (radivot@ccf.org)}
15 | \seealso{\code{\link{SEERaBomb-package}, \link{mk2D}, \link{plot2D}} }
16 | \examples{
17 | \dontrun{ 
18 | library(SEERaBomb)
19 | load("~/data/SEER/mrgd/cancDef.RData") #load in canc
20 | load("~/data/SEER/mrgd/popsae.RData") # load in popsae
21 | seerStats(canc,popsae)
22 | }
23 | }
24 | 
25 | \keyword{IO}
26 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/simSurv.Rd:
--------------------------------------------------------------------------------
 1 | \name{simSurv}
 2 | \alias{simSurv}
 3 | \title{Simulate Survival Times}
 4 | \description{Uses background mortality rates to simulate background survival time for matching sex, age and year.}
 5 | \usage{ simSurv(d,mrt,rep=1,ltb=NULL,unif=TRUE)}
 6 | 
 7 | \arguments{
 8 |   \item{d}{Data frame containing sex, agedx, yrdx, surv, and status columns of observed data. }
 9 |   \item{mrt}{Mortality data binary made by mkMrt().  }
10 |   \item{rep}{Number of simulated replicates of each observed case.  }
11 |   \item{ltb}{Life table data binary made by mkLT(). Default of NULL => skip it.    }
12 |   \item{unif}{TRUE => death time in final year is uniform random draw. False => death at mid-point of year.}
13 | }
14 | 
15 | \value{Input d with simulation rows added below it, identified as "simulated" in a new column called type.}
16 | \author{ Tom Radivoyevitch (\email{radivot@ccf.org}) }
17 | \seealso{\code{\link{SEERaBomb-package},\link{mkSEER}} }
18 | \examples{
19 | \dontrun{
20 | library(SEERaBomb)
21 | mkLT()
22 | load("~/data/usMort/ltb.RData")
23 | tail(ltb$Female)
24 | }
25 | }
26 | \keyword{IO}
27 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mkAbomb.Rd:
--------------------------------------------------------------------------------
 1 | \name{mkAbomb}
 2 | \alias{mkAbomb}
 3 | \title{Make Abomb Binaries}
 4 | \description{Converts Abomb files \file{lsshempy.csv} and \file{lssinc07.csv} 
 5 | into tibbles \code{heme} and \code{solid} in the file \file{abomb.RData}.}
 6 | \usage{mkAbomb(AbombHome="~/data/abomb")}
 7 | 
 8 | \arguments{
 9 |   \item{AbombHome}{ Directory with Abomb files.  Should be writable by user.}
10 | }
11 | 
12 | \details{  
13 | The files \file{lsshempy.csv} and \file{lssinc07.csv} can be found under
14 | The incidence of leukemia, lymphoma and multiple myeloma among atomic bomb survivors: 1950-2001 and Life Span Study Cancer Incidence Data, 1958-1998 of the Radiation Effects Research Foundation (RERF) website \url{https://www.rerf.or.jp/}.
15 | }
16 | \value{None. This function is called for its side-effect of producing \file{abomb.RData}. }
17 | \author{ Tom Radivoyevitch (\email{radivot@ccf.org}) }
18 | \seealso{\code{\link{SEERaBomb-package},\link{mkSEER}} }
19 | \examples{
20 | \dontrun{
21 | library(SEERaBomb)
22 | mkAbomb()
23 | load("~/data/abomb/abomb.RData")
24 | View(heme)
25 | }
26 | }
27 | \keyword{IO}
28 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/p2s.R:
--------------------------------------------------------------------------------
 1 | p2s=function(canc,firstS,secondS,yrcut=2010) {
 2 |   cancer=surv=yrdx=seqnum=NULL 
 3 |   # df names  were chosen thinking myeloid neoplasms (mn) are primary cancers and AMLs are secondary
 4 |   canc$yrmodx=round((canc$modx-0.5)/12+canc$yrdx,3) # Make yrmodx to improve resolution over yrdx: 2014.0=Jan 1, 2014
 5 |   (mn=canc%>%filter(cancer%in%firstS,seqnum<2, yrdx>=yrcut)) 
 6 |   mn$cancer=factor(mn$cancer,levels=firstS)
 7 |   aml=canc%>%filter(cancer%in%secondS,seqnum==2, yrdx>=yrcut)
 8 |   mrn=intersect(mn$casenum,aml$casenum)
 9 |   mn1=mn[mn$casenum%in%mrn,]
10 |   aml2=aml[aml$casenum%in%mrn,]
11 |   mn1$casenum=as.numeric(mn1$casenum)
12 |   aml2$casenum=as.numeric(aml2$casenum)
13 |   mn1=mn1[order(mn1$casenum),]
14 |   aml2=aml2[order(aml2$casenum),]
15 |   aml2$trt=mn1$trt
16 |   aml2$yrdiff=aml2$yrmodx-mn1$yrmodx
17 |   mn1$surv=aml2$yrdiff*12
18 |   mn1$cancer2=aml2$cancer
19 |   mnNoC2=subset(mn,!(mn$casenum%in%mrn))
20 |   mnNoC2$c2occ=0
21 |   mn1$c2occ=1
22 |   mnNoC2$cancer2=NA
23 |   d=rbind(mn1,mnNoC2)
24 |   d=d%>%filter(surv<9999)
25 |   d$status=0
26 |   d$status[d$cancer2%in%secondS]=1
27 |   d
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/post1PYOm.Rd:
--------------------------------------------------------------------------------
 1 | \name{post1PYOm}
 2 | \alias{post1PYOm}
 3 | \title{Get person-years at risk and observed deaths after cancer }
 4 | \description{Converts data.frame D into a list of objects containing information regarding person years at risk
 5 | of death after cancer and the number of deaths observed, in a defined time since exposure interval.  }
 6 | \usage{post1PYOm(D,brks=c(0,2,5),binIndx=1,yearEnd)} 
 7 | \arguments{
 8 |   \item{D}{Input data.frame with columns: yrdx, agedx, surv, and status (1=dead).}
 9 |   \item{brks}{A vector of break points in years used to form time since diagnosis bins. }
10 |   \item{binIndx}{The index of the interval for which py are to be computed by calling this function.}
11 |   \item{yearEnd}{The most recent year of available mortality data.}
12 | }
13 | \value{A list where the first element is a PY matrix (PYM), the second
14 | element is a vector of deaths observed in the intervals, and the third is a scalar, the py-weighted midpoint of the time intervals. }
15 | \author{Tom Radivoyevitch (radivot@ccf.org)}
16 | \seealso{\code{\link{SEERaBomb-package}, \link{getE},\link{post1PYO}} }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/msdEx1.R:
--------------------------------------------------------------------------------
 1 | ###msdEx1.R 
 2 | d=canc%>%filter(cancer=="CML")%>%print(n=13)
 3 | d%>%summarize(n=n(),na=sum(is.na(surv)),prct=100*na/n)#<2% missing
 4 | d=d%>%mutate(status=as.numeric(COD>0),surv=(surv+0.5)/12)
 5 | d=d%>%select(yrdx,agedx,sex,surv,status)%>%print(n=13)
 6 | (D=msd(d,mrt,brkst=c(0,0.5,1,2,3,4,5,6,8),brksy=c(1975,1990,2005,2016)))
 7 | gx=xlab("Years Since CML Diagnosis")
 8 | gy=ylab("Relative Risk of Mortality")
 9 | myt=theme(legend.text=element_text(size=12),strip.text=element_text(size=12))
10 | D%>%ggplot(aes(x=t,y=RR,col=Years))+facet_grid(.~sex)+gp+gl+gx+gy+gh+
11 |        svts+jco+tc(14)+ltb+ltp+sbb+ylim(c(0,NA))+geRR+myt
12 | ggsave("~/Results/tutorial/msdEx1A.pdf",width=4.5,height=3)
13 | 
14 | labs=c("1975-1990","1991-2005","2006-2016")
15 | d=d%>%mutate(yrg=cut(yrdx,c(1974,1990,2005,2016),labels=labs))%>%print(n=13)
16 | fit=survfit(Surv(surv,status)~yrg+sex,data=d) 
17 | gy=ylab("Survival Probability")
18 | ggsurvplot_facet(fit,d,facet.by="sex",legend.title="",xlim=c(0,12),
19 |           short.panel.labs=T)+svts+jco+sbb+myt+gx+gy
20 | ggsave("~/Results/tutorial/survEx1B.pdf",width=4.5,height=3)
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/getFields.Rd:
--------------------------------------------------------------------------------
 1 | \name{getFields}
 2 | \alias{getFields}
 3 | \title{Get fields from SEER SAS file}
 4 | \description{Converts the SAS file in the SEER \file{incidence} directory into a data frame in R.}
 5 | \usage{getFields(seerHome="~/data/SEER")}
 6 | 
 7 | \arguments{
 8 |   \item{seerHome}{The directory that contains the SEER \file{population} and \file{incidence} directories.}
 9 | }
10 | \details{SEER provides a SAS file for reading SEER ASCII data files into SAS.  This file is parsed by 
11 | getFields() to generate
12 | a data frame in R that contains all of the SEER fields. This data frame describes these fields in 
13 | terms of their names (short and long forms), their starting points, and their widths.}
14 | 
15 | \value{A data frame with one row for each field and columns that contain corresponding 
16 | starting positions, widths, sas names, short names, and expansions thereof.}
17 | 
18 | \author{Tom Radivoyevitch (\email{radivot@ccf.org}) }
19 | \seealso{\code{\link{SEERaBomb-package}, \link{mkSEER}, \link{pickFields}} }
20 | \examples{
21 | \dontrun{
22 | library(SEERaBomb)
23 | (df=getFields())
24 | head(df,20)
25 | }
26 | }
27 | \keyword{IO}
28 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/leukemiaRes2016/allTimeRRtab1.R:
--------------------------------------------------------------------------------
 1 | #allTimeRRtab1.R  (gets all-time RR in Table 1)
 2 | # first run CLLcomputing.R
 3 | rm(list=ls())
 4 | library(dplyr)
 5 | library(SEERaBomb)
 6 | system.time(load("~/Results/CLL/pm.RData")) # 1 secs to load. 
 7 | system.time(load("~/Results/CLL/pf.RData")) # 1 secs to load. 
 8 | mkci=function(x) sprintf("%.2f (%.2f, %.2f)",x[1],x[2],x[3])
 9 | dm=mkDF(pm,"b0")
10 | df=mkDF(pf,"b0")
11 | HM=c("AML","AMLti","APL","MDS","CMML","CML","MPN","ALL","CLL","HCL","OL","NHL","MM","HL")
12 | df=df[!df$cancer1%in%HM,]
13 | dm=dm[!dm$cancer1%in%HM,]
14 | head(df)
15 | f=df%>%group_by(trt)%>%summarize(O=sum(O),E=sum(E),sex="F")
16 | m=dm%>%group_by(trt)%>%summarize(O=sum(O),E=sum(E),sex="M")
17 | D=rbind(f,m)
18 | (T1=D%>%mutate(RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E)))
19 | mkci(T1[1,5:7]) #F IR
20 | mkci(T1[2,5:7]) #F no IR
21 | mkci(T1[3,5:7]) #M IR
22 | mkci(T1[4,5:7]) #M no IR
23 | 
24 | #now pool sexes
25 | D1=D%>%group_by(trt)%>%summarize(O=sum(O),E=sum(E))
26 | (T1=D1%>%mutate(RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E)))
27 | mkci(T1[1,4:6]) #IR
28 | mkci(T1[2,4:6]) # no IR
29 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/post1PYOm.R:
--------------------------------------------------------------------------------
 1 | post1PYOm=function(D,brks=c(0,2,5),binIndx=1,yearEnd) { survC=surv=agedx=py=year=ageL=NULL 
 2 |   # yearEnd=ceiling(max(D$yrdx+D$surv))
 3 |   # yearStart=floor(min(D$yrdx)) #no, stuck with 1973 as earliest year, else redesign fillPYM
 4 |   binS=levels(cut(brks+0.1,breaks=c(brks,100)))
 5 |   bin=binS[binIndx]
 6 |   LL=getBinInfo(bin,binS)["LL"]
 7 |   D=D%>%mutate(survC=cut(surv,breaks=c(-1,brks,100),include.lowest = TRUE)) 
 8 |   DO=D%>%filter(survC==bin) 
 9 |   O=sum(DO$status)
10 |   D=D%>%filter(surv>LL) 
11 |   D=D%>%mutate(py=getPY(surv,bin,binS,brks)) # getpy leaves zeros when surv end is left of LL
12 |   D=D%>%filter(py>0)  #get rid of such rows upfront
13 |   D=D%>%mutate(ageL=agedx+LL) 
14 |   D$year=floor(D$yrdx+LL)
15 |   D=D%>%select(py,ageL,year)
16 |   if(dim(D)[1]==0) binMidPnt=LL else binMidPnt=LL+sum(D$py)/dim(D)[1]/2
17 |   # binMidPnt=LL+sum(D$py)/dim(D)[1]/2
18 |   PYin=as.matrix(D)
19 |   yrs=1975:yearEnd; ages=0.5:125.5  # this + next line = initiate PYM with zeros
20 |   PYM=matrix(0,ncol=length(yrs),nrow=length(ages),dimnames=list(ages,yrs)) 
21 |   fillPYM(PYin,PYM)
22 |   list(PYM=PYM,O=O,binMidPnt=binMidPnt)
23 | }
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/attic/cmlMortEx1.R:
--------------------------------------------------------------------------------
 1 | ###msdEx1.R 
 2 | d=canc%>%filter(cancer=="CML")%>%print(n=13)
 3 | d%>%summarize(n=n(),na=sum(is.na(surv)),prct=100*na/n)#<2% missing
 4 | d=d%>%mutate(status=as.numeric(COD>0),surv=(surv+0.5)/12)
 5 | d=d%>%select(yrdx,agedx,sex,surv,status)%>%print(n=13)
 6 | (D=msd(d,mrt,brkst=c(0,0.5,1,2,3,4,5,6,8),brksy=c(1973,1990,2005,2015)))
 7 | gx=xlab("Years Since CML Diagnosis")
 8 | gy=ylab("Relative Risk of Mortality")
 9 | myt=theme(legend.text=element_text(size=12),strip.text=element_text(size=12))
10 | D%>%ggplot(aes(x=t,y=RR,col=Years))+facet_grid(.~sex)+gp+gl+gx+gy+gh+
11 |        svts+jco+tc(14)+ltb+ltp+sbb+ylim(c(0,NA))+geRR+myt
12 | ggsave("~/Results/tutorial/CMLmortEx1A.pdf",width=4.5,height=3)
13 | 
14 | labs=c("1973-1990","1991-2005","2006-2015")
15 | d=d%>%mutate(yrg=cut(yrdx,c(1972,1990,2005,2015),labels=labs))%>%print(n=13)
16 | fit=survfit(Surv(surv,status)~yrg+sex,data=d) 
17 | gy=ylab("Survival Probability")
18 | ggsurvplot_facet(fit,d,facet.by="sex",legend.title="",xlim=c(0,12),
19 |           short.panel.labs=T)+svts+jco+sbb+myt+gx+gy
20 | ggsave("~/Results/tutorial/CMLsurvEx1B.pdf",width=4.5,height=3)
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mapRegs.Rd:
--------------------------------------------------------------------------------
 1 | \name{mapRegs}
 2 | \alias{mapRegs}
 3 | \title{Map registry codes to acronyms}
 4 | \description{ Maps codes for SEER registries to 2-letter acronyms and 
 5 |                corresponding descriptions.}
 6 | \usage{mapRegs(code=NA)} 
 7 | \arguments{
 8 |   \item{code}{ Full SEER codes as found in SEER Cancer files. Add 1500 to popuation file codes get such cancer file codes. 
 9 |   If this argument is missing (the default) a full dataframe of symbols and descriptions is returned. }
10 | }
11 | 
12 | \value{A dataframe of SEER registry symbols and descriptions with rownames such as "1501" for 
13 | sf (san francisco) and "1520" for dM (detroit Michigan), or just the symbol if the rowname is given.
14 | Note that city characters are in lower case and state characters are in upper case.}
15 | 
16 | \note{This function is used by mkSEER when it generates merged R binaries. It is exposed 
17 | to provide quick access to registry acronym definitions.}
18 | 
19 | 
20 | \author{Tom Radivoyevitch (radivot@ccf.org)}
21 | \seealso{\code{\link{SEERaBomb-package}, \link{mapCancs}} }
22 | \examples{
23 | library(SEERaBomb)
24 | mapRegs(1501)
25 | mapRegs()
26 | }
27 | \keyword{IO}
28 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/extraFuncs/mkPYMR.R:
--------------------------------------------------------------------------------
 1 | mkPYMR <- function(PY,yearEnd) {
 2 |   py = PY[,1]
 3 |   age=PY[,2] 
 4 |   year=PY[,3]
 5 |   print(range(py))
 6 |   print(range(age))
 7 |   print(range(year))
 8 |   nr = length(py);
 9 |   yrs=1973:yearEnd
10 |   nyears=length(yrs)
11 |   ages=0.5:125.5
12 |   #   ages=0.5:135.5
13 |   nages=length(ages)
14 |   PYM=matrix(0,ncol=nyears,nrow=nages)
15 |   #    PYM=numeric(nyears*nages)
16 |   if (nr>=1) for (i in 1:nr) {
17 |     pystrip=py[i]
18 |     rem = pystrip-floor(pystrip)
19 |     quo = floor(pystrip)
20 |     strtAgeIndx=round(age[i]+.5)
21 |     strtYrIndx=round(year[i]-1972)
22 |     for (j in 0:quo)   
23 |       if ( (strtAgeIndx+j<=nages) & (strtYrIndx+j<=nyears))
24 |         if (j==quo) 
25 |           PYM[strtAgeIndx+j,strtYrIndx+j]=PYM[strtAgeIndx+j,strtYrIndx+j]+rem   else
26 |             PYM[strtAgeIndx+j,strtYrIndx+j]=PYM[strtAgeIndx+j,strtYrIndx+j]+1
27 |   }
28 | #   print(nyears)
29 | #   print(nages)
30 | #   print(nages*nyears)
31 | #   print(head(PYM))
32 | #   print(tail(PYM))
33 | #   print(dim(PYM))
34 |   # length(PYM)<-nages*nyears
35 |   #   dim(PYM)=c(nages,nyears)
36 |   colnames(PYM)=yrs
37 |   rownames(PYM)=ages
38 |   PYM
39 | }
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/nvsr.Rd:
--------------------------------------------------------------------------------
 1 | \name{nvsr}
 2 | \alias{nvsr}
 3 | \docType{data}
 4 | \title{National Vital Statistic Report (nvsr) Data}
 5 | \description{
 6 |   US mortality rates (probability of death that year) in 2010; report published Nov. 2014.   
 7 | }
 8 | \usage{
 9 | nvsr
10 | }
11 | \format{
12 |   A data frame with the following columns.
13 |   \describe{
14 |     \item{\code{age}}{Single-year resolution ages up to 99.5. }
15 |     \item{\code{p}}{Pooled sexes and races. }
16 |     \item{\code{pm}}{Pooled races, males. }
17 |     \item{\code{pf}}{Pooled races, females. }
18 |     \item{\code{w}}{Whites, sexes pooled.}
19 |     \item{\code{b}}{Blacks, sexes pooled.}
20 |     \item{\code{o}}{Others, sexes pooled.}
21 |     \item{\code{wm}}{White males. }
22 |     \item{\code{wf}}{White females. }
23 |     \item{\code{bm}}{Black males. }
24 |     \item{\code{bf}}{Black females. }
25 |     \item{\code{om}}{Other males. }
26 |     \item{\code{of}}{Other females. }
27 |   }
28 | }
29 | 
30 | \examples{
31 | library(SEERaBomb)
32 | head(nvsr)
33 | }  
34 | 
35 | \references{
36 | National Vital Statistics Reports, Vol. 63, No. 7, November 6, 2014
37 | \url{https://www.cdc.gov/nchs/data/nvsr/nvsr63/nvsr63_07.pdf}
38 | }
39 | 
40 | \keyword{datasets}
41 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/mapTrts.R:
--------------------------------------------------------------------------------
 1 | mapTrts<-function(D){
 2 |   # adds a trt column to define cancer treatments of interest as a factor
 3 |   if ("radiatn"%in%names(D)&"chemo"%in%names(D)) {
 4 |     # this is how radiatn breaksdown. 0=none, 1=ExtBeam, 2+3=iso+implants, 4=extB+other
 5 |     # 5=NOS, 6=other rad (other=other than beam), 7=refused, 8+9=Unknown
 6 |     D$trt="noRad"  # will be left as 0 and 7. Do it this way to initialize the vector
 7 |     D$trt[D$radiatn%in%c(8,9)] ="unk"
 8 |     D$trt[D$radiatn>0 & D$radiatn<7] ="rad"
 9 |     D$trt=paste0(D$trt,c(".noChemo",".chemo")[D$chemo+1])
10 |     D$trt=factor(D$trt,levels=c("rad.chemo","rad.noChemo","noRad.chemo","noRad.noChemo","unk.chemo","unk.noChemo"))
11 |   } 
12 |   if ("radiatn"%in%names(D)&!"chemo"%in%names(D)) {
13 |     D$trt="noRad"  
14 |     D$trt[D$radiatn%in%c(8,9)] ="unk"
15 |     D$trt[D$radiatn>0 & D$radiatn<7] ="rad"
16 |     D$trt=factor(D$trt,levels=c("rad","noRad","unk"))
17 |   } 
18 |   if (!"radiatn"%in%names(D)&"chemo"%in%names(D)) {
19 |     D$trt="chemo"  
20 |     D$trt[D$chemo==0] ="noChemo"
21 |     D$trt=factor(D$trt,levels=c("chemo","noChemo"))
22 |   } 
23 |   if (!"radiatn"%in%names(D)&!"chemo"%in%names(D)) {
24 |     D$trt="NA"  
25 |     D$trt=factor(D$trt)
26 |   }
27 |   D
28 | }  
29 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mkMrt.Rd:
--------------------------------------------------------------------------------
 1 | \name{mkMrt}
 2 | \alias{mkMrt}
 3 | \title{Make mortality binaries}
 4 | \description{Gets mortality data from the Human Mortality Database \url{https://www.mortality.org/} 
 5 | and puts it in the file \file{mrt.RData}.}
 6 | \usage{ mkMrt(username,passwd,country="USA",mrtHome="~/data/usMort")}
 7 | 
 8 | \arguments{
 9 |   \item{username}{ Username of Human Mortality Database account.}
10 |   \item{passwd}{ Password of Human Mortality Database account.}
11 |   \item{country}{ This should probably stay at its default of USA.}
12 |   \item{mrtHome}{ Directory that will contain the mortality data binary.  Should be writable by user.}
13 | }
14 | 
15 | \value{None. This function is called for its side-effect of producing \file{mrt.RData}. }
16 | \references{ Barbieri M, Wilmoth JR, Shkolnikov VM, et al. Data Resource Profile: 
17 |              The Human Mortality Database (HMD). Int J Epidemiol. 2015;44: 1549-1556.}
18 | \author{ Tom Radivoyevitch (\email{radivot@ccf.org}) }
19 | \seealso{\code{\link{SEERaBomb-package},\link{mkSEER}} }
20 | \examples{
21 | \dontrun{
22 | library(SEERaBomb)
23 | mkMrt("username", "password")# sub in your personal account info
24 | load("~/data/usMort/mrt.RData")
25 | head(mrt$Female)
26 | }
27 | }
28 | \keyword{IO}
29 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/getPY.Rd:
--------------------------------------------------------------------------------
 1 | \name{getPY}
 2 | \alias{getPY}
 3 | \title{Get tsd interval PY}
 4 | \description{Gets person year (PY) contributions to a particular time since diagnosis (tsd) interval
 5 |      from survival times.}
 6 | \usage{getPY(surv, bin, binS, brks)} 
 7 | \arguments{
 8 |   \item{surv}{The total survival time of the patient.  }
 9 |   \item{bin}{The label of the specific bin of interest.  }
10 |   \item{binS}{The character vector of bin labels in which binLab exists.  }
11 |   \item{brks}{The numeric vector of break points used by cut to create binS. }
12 | }
13 | 
14 | \value{A vector as long as the survival vector input of PY at risk in a particular interval.}
15 | 
16 | \author{Tom Radivoyevitch (radivot@ccf.org)}
17 | \seealso{\code{\link{SEERaBomb-package}, \link{getBinInfo}} }
18 | \examples{
19 | library(SEERaBomb)
20 | brks=c(0,0.25,1,3,6)  
21 | (binS=levels(cut(brks+0.1,breaks=c(brks,100)))) #make a vector of intervals 
22 | survTimes=c(8,16,1.5,3.7)
23 | getPY(survTimes,binS[1],binS,brks)# all contribute 0.25 to first interval 
24 | getPY(survTimes,binS[4],binS,brks)# 3rd and 4th survivals contribute 0 and 0.7 to (3,6]
25 | getPY(survTimes,binS[5],binS,brks)# 1st and 2nd survival contribute 2 and 10 years to (6,100]
26 | }
27 | \keyword{internal}
28 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/foldD.Rd:
--------------------------------------------------------------------------------
 1 | \name{foldD}
 2 | \alias{foldD}
 3 | \title{Fold Output D of Mortality Since Diagnosis function msd}
 4 | \description{Collapses output D of msd, computing new Excess Absoluted Risks (EAR) and Relative Risks (RR) as it folds the dataframe to a reduced size.  }
 5 | \usage{foldD(D,keep=c("int")) } 
 6 | \arguments{
 7 |   \item{D}{Output of msd hold time intervals and at least sex as a variable to fold over.}
 8 |   \item{keep}{Default is to keep time intervals (int) and fold variables like sex. }
 9 | }
10 | \value{Tibble with new summed observed and expected cases, and new EAR and RR with CI for each interval. }
11 | \author{Tom Radivoyevitch (radivot@ccf.org)}
12 | \seealso{\code{\link{SEERaBomb-package}, \link{msd},\link{mkMrtLocal}} }
13 | 
14 | \examples{
15 | \dontrun{
16 | library(SEERaBomb)
17 | load("~/data/SEER/mrgd/cancDef.RData") #loads in canc
18 | lu=canc\%>\%filter(cancer=="lung")
19 | lu=lu\%>\%mutate(status=as.numeric(COD>0))\%>\%select(yrdx,agedx,sex,surv,status) 
20 | lu=lu\%>\%mutate(surv=round((surv+0.5)/12,3))#convert surv to years
21 | load("~/data/mrt/mrtUSA.RData")  #loads R binary created by mkMortLocal()
22 | brks=c(0,0.5,3,6,10,15,20,25)    
23 | (dlu=msd(lu,mrt,brkst=brks))
24 | foldD(dlu)
25 | }
26 | }
27 | 
28 | \keyword{IO}
29 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/stdUS.Rd:
--------------------------------------------------------------------------------
 1 | \name{stdUS}
 2 | \alias{stdUS}
 3 | \docType{data}
 4 | \title{The standard population of the US in 2000}
 5 | \description{
 6 |   The US population in 2000 for ages up to 100 years. Sexes are pooled.  
 7 | }
 8 | \usage{
 9 | stdUS
10 | }
11 | \format{
12 |   A data frame with the following columns.
13 |   \describe{
14 |     \item{\code{age}}{Sigle-year resolution ages up to 100. }
15 |     \item{\code{pop}}{The population within each age group. }
16 |     \item{\code{prop}}{Proportion of the total population within each age group. }
17 |   }
18 | }
19 | 
20 | \details{This population data can be used to map age specific incidence rate vectors
21 | into summarizing scalars. It allows cancer incidence rates across different
22 | SEER registries to be compared without concerns of differences in age distributions of the populations. 
23 |   }
24 | 
25 | \examples{
26 | library(SEERaBomb)
27 | with(stdUS,plot(age,pop/1e6,type="l",xlab="Age",
28 |      ylab="People (Millions)",main="US Population in 2000"))
29 | library(dplyr)
30 | stdUS\%>\%filter(age>=85)\%>\%summarize(weighted.mean(age,w=pop)) 
31 | ### so ave age >=85.0 is 89.4
32 | }  
33 | 
34 | \references{
35 | \url{https://seer.cancer.gov/stdpopulations/}
36 | }
37 | 
38 | 
39 | \keyword{datasets}
40 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/AAPSJ2016/CMLmortRRtimeCrs.R:
--------------------------------------------------------------------------------
 1 | library(SEERaBomb)
 2 | load("~/data/SEER/mrgd/cancDef.RData") #loads in canc
 3 | d=canc%>%filter(cancer=="CML")
 4 | d=d%>%mutate(status=as.numeric(COD>0),surv=(surv+0.5)/12)
 5 | d=d%>%select(yrdx,agedx,sex,surv,status)  #SEER CML survival times
 6 | # mkMrt("username", "password") #use your Human Mortality Database account info
 7 | load("~/data/usMort/mrt.RData")
 8 | brkst=c(0,0.5,1,2,3,4,5,6,8) 
 9 | brksy=c(1972,1990,2002,2015)
10 | D=msd(d,mrt,brkst,brksy)           #msd()  = Mortality Since Diagnosis
11 | head(D,2)
12 | quartz(width=6,height=5)
13 | # quartz(width=6,height=5)
14 | theme_update(legend.position = c(.85, .37),axis.text=element_text(size=rel(1.4)),
15 |              axis.title=element_text(size=rel(1.4)),strip.text = element_text(size = rel(1.5)))
16 | # theme_update(legend.position=c(.48,.43),axis.text=element_text(size=rel(1.4)),
17 | #              axis.title=element_text(size=rel(1.4)),strip.text=element_text(size=rel(1.5)))
18 | g=qplot(x=t,y=RR,data=D,col=Years,geom=c("line","point"), ylim=c(0,18),
19 |         xlab="Years Since CML Diagnosis",ylab="Relative Risk of Mortality")
20 | g=g+geom_line(size=1)+facet_grid(sex~.)+geom_abline(intercept=1, slope=0) 
21 | g+geom_errorbar(aes(ymin=rrL,ymax=rrU,width=.15))
22 |  


--------------------------------------------------------------------------------
/SEERaBomb/man/plot2D.Rd:
--------------------------------------------------------------------------------
 1 | \name{plot2D}
 2 | \alias{plot2D}
 3 | \title{Plot 2D cancer incidence splines}
 4 | \description{Plots splines of incidence versus age and calendar year produced by mk2D. }
 5 | \usage{plot2D(seerSet, write=TRUE,outDir="~/Results/plots",col="red")} 
 6 | \arguments{
 7 |   \item{seerSet}{seerSet object after it is processed by mk2D.   }
 8 |   \item{write}{TRUE if you want to write images to a seerSet subfolder. 
 9 |   The name of this subfolder is the basefilename (bfn) of the seerSet. }
10 |   \item{outDir}{Parent folder of seerSet subfolders.}
11 |   \item{col}{Color of surface plot.}
12 | }
13 | \details{A plot will be produced for each cancer fitted by mk2D. For the first of these,
14 | RGL will open a new X11 window.  Adjustments of size and angle of this 
15 | first plot will hold for all subsequent plots. After each plot, 
16 | the user hits any key to write the plot to a file and advance through the list of cancers.}
17 | 
18 | \value{None, results go to the screen and to png files.   }
19 | 
20 | \author{Tom Radivoyevitch (radivot@ccf.org)}
21 | \seealso{\code{\link{SEERaBomb-package}, \link{mk2D},\link{seerSet}} }
22 | 
23 | \examples{
24 | \dontrun{
25 | library(SEERaBomb)
26 | n=simSeerSet()
27 | n=mk2D(n) 
28 | plot2D(n)
29 | }
30 | }
31 | 
32 | 
33 | \keyword{IO}
34 | 


--------------------------------------------------------------------------------
/SEERaBomb/src/fillPYM.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;  
 3 | 
 4 | // [[Rcpp::export]]
 5 | NumericMatrix fillPYM(SEXP PYin, SEXP PYM) {
 6 |   NumericMatrix PY1(PYin);
 7 |   NumericMatrix PYM1(PYM);
 8 |   int nrows = PY1.nrow(),nages = PYM1.nrow(),nyears = PYM1.ncol(), quo,strtYrIndx,strtAgeIndx;
 9 |   double rem, pystrip, pre,age;
10 |   for (int i = 0; i < nrows ; i++) {
11 |     age=PY1(i,1);
12 |     pre=ceil(age)-age;
13 |     if (pre==0) pre=1;
14 |     pystrip=PY1(i,0);
15 |     if (pystrip>pre) {
16 |       pystrip=pystrip-pre; 
17 |       rem = pystrip-floor(pystrip);
18 |       quo = floor(pystrip);
19 |     }
20 |     else {
21 |       pre=pystrip; 
22 |       pystrip=0;
23 |       rem=0;
24 |       quo=0;
25 |     }
26 |     strtAgeIndx=floor(age);
27 |     strtYrIndx=PY1(i,2)-1975;
28 |     /*    strtAgeIndx=roundl(PY1(i,1)+.5)-1;
29 |     strtYrIndx=roundl(PY1(i,2)-1972)-1; */
30 |     for (int j = 0; j <= (quo+1) ; j++)   {
31 |       if ( (strtAgeIndx+j<nages) & (strtYrIndx+j<nyears)) {
32 |         if (j==0) PYM1(strtAgeIndx+j,strtYrIndx+j)+=pre; else
33 |         if (j==(quo+1)) 
34 |         PYM1(strtAgeIndx+j,strtYrIndx+j)+=rem;
35 |         else
36 |         PYM1(strtAgeIndx+j,strtYrIndx+j)+=1;
37 |       }
38 |     }
39 |   }  
40 |   return PYM1;
41 | }
42 | 
43 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/uveal/attic/common/setup.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | library(SEERaBomb);library(tidyverse)
 3 | library(survival);library(survminer)
 4 | # load("~/data/SEER/mrgd/cancPrim.RData")#load SEER cancer data
 5 | # d=canc%>%filter(primsite%in%c("C693","C694","C692"),histo3%in%8720:8790)
 6 | # d$cancer="uveal"
 7 | # d=d%>%filter(!is.na(surv))%>%mutate(status=as.numeric(COD>0),surv=(surv+0.5)/12)
 8 | # d5=d%>%select(yrdx,agedx,sex,surv,status)
 9 | # save(d,d5,file="uveal/data/d.RData")
10 | load("uveal/data/d.RData")
11 | load("~/data/mrt/mrtUSA.RData")#loads US mortality data
12 | gp=geom_point(); gl=geom_line()
13 | ge=geom_errorbar(aes(ymin=LL,ymax=UL),width=0.2)#for absolute risks
14 | sbb=theme(strip.background=element_blank())
15 | ltb=theme(legend.margin=margin(0,0,0,0),legend.title=element_blank())
16 | ltp=theme(legend.position="top")
17 | lh=theme(legend.direction="horizontal")
18 | tc=function(sz) theme_classic(base_size=sz);
19 | svts=scale_x_continuous(breaks=seq(0,35,5),limits=c(0,38))
20 | gx=xlab("Years Since Diagnosis")
21 | gy=ylab("Excess Absolute Risk of Mortality")
22 | 
23 | 
24 | # geRR=geom_errorbar(aes(ymin=rrL,ymax=rrU),width=.2)
25 | # gh=geom_hline(yintercept=1)
26 | # agts=scale_x_continuous(breaks=c(25,50,75))#age times
27 | 
28 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/abombEx5.R:
--------------------------------------------------------------------------------
 1 | ###abombEx5.R
 2 | (d=heme%>%select(ageG:DG,age,agex,t,D,py,AML=AMLtot,ALL,CML))
 3 | myt=theme(legend.position=c(.52,.85),legend.key.height=unit(.7,'lines'))
 4 | cc=coord_cartesian(ylim=c(.2,200));f=facet_wrap(~cancer)
 5 | 
 6 | gx=xlab("Attained Age (Years)")
 7 | (dA=incidAbomb(d%>%group_by(ageG,DG)))
 8 | ggplot(dA,aes(x=age,y=I,col=DG))+gp+gl+gx+sy+gyi+ge+f+cc+tc(12)+myt+jco+sbb+ltb
 9 | ggsave("~/Results/tutorial/abombLeuAge.pdf",width=4,height=2.5)#Fig.5A
10 | 
11 | gx=xlab("Age at Time of Bombing (Years)")
12 | (dB=incidAbomb(d%>%group_by(agexG,DG)))
13 | ggplot(dB,aes(x=agex,y=I,col=DG))+gp+gl+gx+sy+gyi+ge+f+cc+tc(12)+myt+jco+sbb+ltb
14 | ggsave("~/Results/tutorial/abombLeuAgex.pdf",width=4,height=2.5)#Fig.5B
15 | 
16 | gx=xlab("Years Since Bombing")
17 | (dC=incidAbomb(d%>%group_by(tG,DG)))
18 | ggplot(dC,aes(x=t,y=I,col=DG))+gp+gl+gx+sy+gyi+ge+f+cc+tc(12)+myt+jco+sbb+ltb
19 | ggsave("~/Results/tutorial/abombLeuTsx.pdf",width=4,height=2.5)#Fig.5C
20 | 
21 | gx=xlab("Dose (Sv)")
22 | d$DG<-cut(d$D,c(-1,.02,.25,.5,.75,1.5,100))
23 | (dD=incidAbomb(d%>%group_by(DG)))
24 | ggplot(dD,aes(x=D,y=I))+gp+gl+gx+sy+gyi+ge+f+cc+tc(12)+myt+jco+sbb+ltb+
25 |   scale_x_continuous(breaks=0:2)
26 | ggsave("~/Results/tutorial/abombLeuDoseResp.pdf",width=4,height=2.5)#Fig.5D
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/common.R:
--------------------------------------------------------------------------------
 1 | ###common.R
 2 | graphics.off();rm(list=ls())#clear plots and environment
 3 | library(tidyverse);library(SEERaBomb);library(ggsci)#load packages          
 4 | library(survival);library(survminer);library(bbmle)
 5 | load("~/data/SEER/mrgd/cancDef.RData")#load SEER cancer data
 6 | load("~/data/SEER/mrgd/popsae.RData")#load SEER population data
 7 | load("~/data/abomb/abomb.RData")#load A-bomb data
 8 | load("~/data/usMort/mrt.RData")#loads US mortality data
 9 | canc$cancer=fct_collapse(canc$cancer,AML=c("AML","AMLti","APL"))
10 | secs=c("AML","ALL","CML")#second cancers of interest
11 | gp=geom_point();gl=geom_line()
12 | geRR=geom_errorbar(aes(ymin=rrL,ymax=rrU),width=.2)
13 | ge=geom_errorbar(aes(ymin=LL,ymax=UL),width=0.2)#for absolute risks
14 | gh=geom_hline(yintercept=1)
15 | svts=scale_x_continuous(breaks=c(0,5,10))#surv times
16 | agts=scale_x_continuous(breaks=c(25,50,75))#age times
17 | sbb=theme(strip.background=element_blank())
18 | ltb=theme(legend.margin=margin(0,0,0,0),legend.title=element_blank())
19 | ltp=theme(legend.position="top")
20 | lh=theme(legend.direction="horizontal")
21 | sy=scale_y_log10()
22 | jco=scale_color_jco()
23 | tc=function(sz) theme_classic(base_size=sz);
24 | gxi=xlab("Age (Years)")
25 | gyi=ylab(quote(paste("Cases per ",10^5," Person Years")))
26 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/leukemia2016/interactions2D.R:
--------------------------------------------------------------------------------
 1 | # interactions2D.R (Fig S2: show NHL and liver need an interaction term in the model)
 2 | library(SEERaBomb)
 3 | load("~/data/SEER/mrgd/cancDef.RData")
 4 | levels(canc$cancer)
 5 | load("~/data/SEER/mrgd/popsa.RData")
 6 | zap=c("cervixCIS","benign")
 7 | # mk2D cannot handle cervixCIS because it ends abruptly in 1995. Benigns 
 8 | # starting in 2004 are also a problem. The problem is that a plane of zeros ends
 9 | # up with too much of a smooth landing from the hills, i.e. tall cliffs aren't
10 | # well approximated by splines. So for CMML and MDS special handing of smaller
11 | # PY matrices takes place, but such code is ugly and hard to think about, so we
12 | # don't want to clutter SEERaBomb source with additional special cases unless
13 | # the second cancers are very important to us. So cervicalCIS and benigns are
14 | # being left out. Note that benigns cause an additional problem, which is that
15 | # their seqnums go 60 to 88. Handling this would be additional work that
16 | # probably isn't worth it from a heme cancer centric perspective.
17 | canc=canc%>%filter(!cancer%in%zap)
18 | canc$cancer=factor(canc$cancer)
19 | pm=seerSet(canc,popsa,Sex="Male")
20 | pm=mk2D(pm,txt="interaction") # pooled race males, all male cancers not zapped get fitted
21 | plot2D(pm)
22 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/attic/incidCML.R:
--------------------------------------------------------------------------------
 1 | library(SEERaBomb);library(ggsci);library(bbmle)
 2 | load("~/data/SEER/mrgd/cancDef.RData") 
 3 | load("~/data/SEER/mrgd/popsae.RData")#loads popsae 
 4 | (D=incidSEER(canc,popsae,"CML"))
 5 | (D=D%>%filter(age>=20,age<=80,year>=2000)%>%mutate(ageG=cut(age,seq(20,80,5))))
 6 | (D=D%>%group_by(sex,race,ageG)%>%
 7 |   summarize(age=mean(age),py=sum(py),n=sum(n))%>%mutate(incid=n/py))
 8 | summary(mm1<-mle2(n~dpois(lambda=exp(c+k*(age-50))*py),
 9 |                   #parameters=list(c~sex,k~sex),#k no diff
10 |                   parameters=list(c~sex,k~race),#kW-O P=7e-8
11 |                   method="Nelder-Mead",start=list(c=0,k=0.04),data=D)) 
12 | D$EI=predict(mm1)/D$py#same as D=D%>%ungroup()%>%mutate(EI=predict(mm1)/py) 
13 | qplot(age,incid,col=sex,shape=sex,data=D)+facet_grid(~race)+geom_line(aes(y=EI))+
14 |   labs(x="Age (years)",y=quote(paste("CML Cases per ",10^5," Person Years")))+
15 |   scale_y_log10(breaks=c(1,2,5))+scale_x_continuous(breaks=c(25,50,75))+
16 |   scale_color_jco()+theme_classic(base_size = 13)+
17 |   theme(legend.title=element_blank(),legend.margin=margin(0,0,0,0),
18 |         strip.background=element_blank(),
19 |         legend.position=c(.45,.91),legend.key.height=unit(.7,'lines'))
20 | ggsave("~/Results/tutorial/incidCML.pdf",width=4,height=3)
21 | 
22 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/print.seerSet.summary.Rd:
--------------------------------------------------------------------------------
 1 | \name{print.seerSet.summary}
 2 | \alias{print.seerSet.summary}
 3 | \title{Prints seerSet.summary objects}
 4 | \description{Renders data.frame of cases and median ages and
 5 | survival times with a title above and notes below it. Also qplots PY versus years.}
 6 | \usage{
 7 | \S3method{print}{seerSet.summary}(x, \dots)
 8 | }
 9 | %\usage{print.seerSet.summary(x, ...)} 
10 | \arguments{
11 |   \item{x}{seerSet.summary object produced by summary.seerSet(). }
12 |   \item{...}{Included to match arg list of generic print. }
13 | }
14 | 
15 |    
16 | \value{None.  }
17 |   
18 | 
19 | \author{Tom Radivoyevitch (radivot@ccf.org)}
20 | \seealso{\code{\link{SEERaBomb-package}, \link{summary.seerSet}, \link{seerSet}} }
21 | \examples{
22 | \dontrun{ 
23 | library(SEERaBomb)
24 | load("~/data/SEER/mrgd/cancDef.RData") #load in canc
25 | load("~/data/SEER/mrgd/popsae.RData") # load in popsae
26 | canc=canc\%>\%select(casenum,race:histo3,surv,cancer,trt,id) 
27 | popsa=popsae\%>\%group_by(db,race,sex,age,year)\%>\%summarize(py=sum(py)) # sum on regs
28 | pm=seerSet(canc,popsa,Sex="male",ageStart=0,ageEnd=100) #pooled (races) male seerSet
29 | pm # no print method for seerSet object, so we see the list
30 | summary(pm) # print method for summary renders the summary and a plot of PY
31 | }
32 | }
33 | 
34 | \keyword{internal}
35 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/incidAbomb.R:
--------------------------------------------------------------------------------
 1 | incidAbomb=function(d) {
 2 |   py=O=.=NULL
 3 |   # load("~/data/abomb/abomb.RData")
 4 |   # head(heme,2)
 5 |   # library(tidyverse)
 6 |   # # (d=heme%>%select(ageG:DG,py,AML=AMLtot,ALL,CML))
 7 |   # (d=heme%>%select(ageG:DG,D,py,AML=AMLtot,ALL,CML))
 8 |   # d=d%>%group_by(ageG,DG)
 9 | 
10 |   nms=names(d)
11 |   if (!"DG"%in%nms) stop("Dose group column DG must be in d.")
12 |   if (!"py"%in%nms) stop("Person-years column py must be in d.")
13 |   end1=which(nms=="DG")
14 |   start3=which(nms=="py")
15 |   n=length(nms)
16 |   hd=nms[1:end1]
17 |   sums=nms[start3:n]
18 |   cancers=sums[-1]
19 |   if ("upy"%in%nms) cancers=cancers[-1]
20 |   if ("subjects"%in%nms) cancers=cancers[-1]
21 |   (mnsBlkNotEmpty=start3>(end1+1))
22 |   if (mnsBlkNotEmpty) means=nms[(end1+1):(start3-1)]
23 |   (ds=d%>%summarize_at(vars(sums),funs(sum)))
24 |   if (mnsBlkNotEmpty) (dm=d%>%summarize_at(vars(means),funs(weighted.mean(.,w=py))))
25 |   if (mnsBlkNotEmpty) D=left_join(dm,ds) else #like bind_cols() but removes extra col copies 
26 |     D=ds
27 |   options(warn=-1) #warning about column labels expected in next line
28 |   D=D%>%tidyr::gather(value="O",key="cancer",cancers)#O=Observed
29 |   options(warn=0)
30 |   D$cancer=factor(D$cancer,levels=cancers)#set order
31 |   tibble::as_tibble(D%>%mutate(py=py/1e5,I=O/py,LL=qpois(0.025,O)/py,UL=qpois(0.975,O)/py))
32 | }
33 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/getE.Rd:
--------------------------------------------------------------------------------
 1 | \name{getE}
 2 | \alias{getE}
 3 | \title{Get expected numbers of cases  }
 4 | \description{Converts list of matrices produced by post1PYO into a matrix that corresponds to the matrix of observed cases 
 5 | produced by post1PYO. }
 6 | \usage{getE(LPYM,D,ageStart,ageEnd,yearEnd,firstS,secondS)} 
 7 | \arguments{
 8 |   \item{LPYM}{List of first cancer PY matrices produced by post1PYO.}
 9 |   \item{D}{D component of seerSet list produced by mk2D. }
10 |   \item{ageStart}{ageStart component of seerSet. }
11 |   \item{ageEnd}{ageEnd component of seerSet. }
12 |   \item{yearEnd}{The most recent year in the SEER datasets. }
13 |   \item{firstS}{Character vector of first cancers of interest; added to seerSet by tsd. }
14 |   \item{secondS}{Character vector of second cancers of interest; added to seerSet by mk2D. }
15 | }
16 | 
17 | \value{A matrix of expected cases with first cancers labeling the rows and second cancers labeling the columns.}
18 | 
19 | \examples{
20 | \dontrun{
21 | library(SEERaBomb)
22 | pm=simSeerSet()
23 | pm=mk2D(pm,secondS="B") 
24 | L=post1PYO(pm$canc,brks=c(0,2,5),binIndx=1,Trt="rad",yearEnd=2012 )
25 | names(L)
26 | names(pm)
27 | with(pm,getE(L$LPYM,D,ageStart,ageEnd,yearEnd,cancerS,secondS))
28 | }
29 | }
30 | 
31 | 
32 | \author{Tom Radivoyevitch (radivot@ccf.org)}
33 | \seealso{\code{\link{SEERaBomb-package}, \link{seerSet}} }
34 | \keyword{internal}
35 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/extraFuncs/mkPYM.c:
--------------------------------------------------------------------------------
 1 | #include <R.h>
 2 | #include <Rinternals.h>
 3 | 
 4 | SEXP mkPYM(SEXP py, SEXP age, SEXP year, SEXP nyears)
 5 |      {
 6 |        R_len_t nr;
 7 |        int i, j, quo,strtYrIndx,strtAgeIndx,*xnyears;
 8 |        double *xpy,*xage,*xyear, *xout, rem, pystrip;
 9 |        PROTECT(py = coerceVector(py, REALSXP));
10 |        PROTECT(age = coerceVector(age, REALSXP));
11 |        PROTECT(year = coerceVector(year, REALSXP));
12 |        PROTECT(nyears = coerceVector(nyears, INTSXP));
13 |        SEXP out = PROTECT(allocVector(REALSXP,5040 ));
14 |        nr = length(py);
15 |        xpy = REAL(py);xage = REAL(age);xyear = REAL(year); xnyears = INTEGER(nyears); xout = REAL(out);
16 | /*       memset(xout, 0, 5040 * sizeof(double)); */
17 |        for(i = 0; i < 5040; i++) xout[i] = 0.0;  
18 |     for (i = 0; i < nr ; i++) {
19 |      pystrip=xpy[i];
20 |      rem = pystrip-floor(pystrip);
21 |      quo = floor(pystrip);
22 |      strtAgeIndx=roundl(xage[i]+.5)-1;
23 |      strtYrIndx=roundl(xyear[i]-1972)-1;
24 |      for (j = 0; j <= quo ; j++)  
25 |       if ( ((strtAgeIndx+j)<126) & ((strtYrIndx+j)<*xnyears)) {
26 |         if (j==quo) 
27 |          xout[strtAgeIndx+j+126*(strtYrIndx+j)]+=rem;
28 |         else
29 |          xout[strtAgeIndx+j+126*(strtYrIndx+j)]+=1;
30 |       }
31 |           
32 |     }  
33 |        UNPROTECT(5);
34 |        return(out);
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mkDemographics.Rd:
--------------------------------------------------------------------------------
 1 | \name{mkDemographics}
 2 | \alias{mkDemographics}
 3 | \title{Make Demographics Tables}
 4 | \description{Provides, in an Excel file, quartiles of age at diagnoses  in one sheet and median overall survival times on a second. Many tables are placed in each sheet. One Excel file is produced per cancer type.}
 5 | \usage{mkDemographics(canc,outDir="~/Results/SEERaBomb")} 
 6 | \arguments{
 7 |   \item{canc}{A dataframe that includes cancer, age at diagnosis (agedx), age (grouped agedx), 
 8 |   race, sex, year (grouped), COD, surv, and trt.  }
 9 |   \item{outDir}{Folder of the Excel file(s) that will be generated.}
10 | }
11 | 
12 | \value{ Returned invisibly is a list of data frames corresponding to tables of the Excel file(s).}
13 | 
14 | \author{Tom Radivoyevitch (radivot@ccf.org)}
15 | \seealso{\code{\link{SEERaBomb-package}} }
16 | 
17 | \examples{
18 | \dontrun{
19 | library(SEERaBomb)
20 | rm(list=ls())
21 | load("~/data/SEER/mrgd/cancDef.RData")
22 | canc$year=cut(canc$yrdx,c(1973,2003,2009,2015),include.lowest = T,dig.lab=4)
23 | canc$age=cut(canc$agedx,c(0,40,50,60,70,80,90,126),include.lowest = T)
24 | canc=canc\%>\%filter(surv<9999)
25 | canc=canc\%>\%select(-age86,-radiatn,-chemo,-db,-casenum,-modx,-seqnum,-yrbrth,-ICD9,-reg,-histo3)
26 | canc=canc\%>\%filter(cancer\%in\%c("AML","MDS","MPN"))
27 | head(canc,3)
28 | mkDemographics(canc)
29 | }
30 | }
31 | 
32 | \keyword{IO}
33 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/p2s.Rd:
--------------------------------------------------------------------------------
 1 | \name{p2s}
 2 | \alias{p2s}
 3 | \title{Primary to Secondary}
 4 | \description{Using a SEER data frame, this function computes times between primary and secondary cancers. In the resulting data frame, surv and status can be analyzed at the individual level, e.g. using Cox regression. 
 5 | }
 6 | \usage{p2s(canc,firstS,secondS,yrcut=2010) } 
 7 | \arguments{
 8 |   \item{canc}{Data frame produced by mkSEER().  }
 9 |   \item{firstS}{Vector of names (as Strings) of first cancers you wish to consider. }
10 |   \item{secondS}{Vector of names of second cancers you wish to consider. }
11 |   \item{yrcut}{Only cases diagnosed in yrcut or newer are analyzed. The default of 2010 is the year AML cases after MDS
12 |   began to be entered into SEER as second cancers; before they were considered to be part of the first cancer.
13 |   This function facilitates studies of the rate at which myeloid neoplasms such as MDS progress to AML. }
14 | }
15 | 
16 | \value{Data frame with a row for each primary (first cancer) diagnosed on or after yrcut. The surv column holds the
17 | time in months to last follow up or death (status=0), or to the time of diagnosis of the second cancer (status=1). 
18 |      }
19 | 
20 | \author{Remco J. Molenaar (r.j.molenaar@amc.uva.nl )}
21 | \seealso{\code{\link{SEERaBomb-package}, \link{mk2D},\link{seerSet}} }
22 | 
23 | \examples{
24 | \dontrun{
25 | # 
26 | }
27 | }
28 | 
29 | \keyword{IO}
30 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/renal/incidNdemog/csd.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | library(tidyverse);library(SEERaBomb);library(ggsci)
 3 | library(survival);library(survminer);library(bbmle)
 4 | load("~/data/SEER/mrgd/cancPrim.RData")#load SEER cancer data
 5 | load("~/data/SEER/mrgd/popsae.RData")
 6 | canc$cancer=as.character(canc$cancer)
 7 | canc[(canc$primsite%in%c("C649"))&(canc$histo3%in%8310),"cancer"]="ccRCC"  
 8 | secs=c("ccRCC")#second cancer of interest
 9 | pf=seerSet(canc,popsae,Sex="Female");pm=seerSet(canc,popsae,Sex="Male") 
10 | pf=mk2D(pf,secondS=secs);pm=mk2D(pm,secondS=secs)
11 | # plot2D(pf) # these stopped working with R4.0 so skip for now
12 | # plot2D(pm)
13 | mybrks=c(0,1,2,3,5,10)
14 | pf=csd(pf,brkst=mybrks)
15 | pm=csd(pm,brkst=mybrks)
16 | (lab=paste0("b",paste(mybrks,collapse="_")))
17 | mkExcelCsd(pf,lab,outDir="uveal/outs",outName="csdF",flip=T)
18 | mkExcelCsd(pm,lab,outDir="uveal/outs",outName="csdM",flip=T)
19 | 
20 | DF=bind_rows(pf$DF,pm$DF)
21 | D=DF%>%filter(cancer1!="ccRCC")%>%group_by(int)%>%summarize(O=sum(O),E=sum(E),t=mean(t,na.rm=T))
22 | D=D%>%mutate(RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E))
23 | D
24 | source("uveal/common/acros.R")
25 | gx=xlab("Years Since 1st Cancer Dx")
26 | gy=ylab("Relative Risk of ccRCC")
27 | D%>%ggplot(aes(x=t,y=RR))+gp+gl+gx+gy+geRR+tc(14)
28 | ggsave("renal/outs/csd.pdf",width=4,height=3)
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/mkLT.R:
--------------------------------------------------------------------------------
 1 | mkLT<-function(mrtHome="~/data/usMort",input="mrt.RData",output="ltb.RData"){
 2 |   # mrtHome="~/data/usMort";input="mrt.RData";output="ltb.RData"
 3 |   mrt=NULL
 4 |   mrtHome=path.expand(mrtHome)
 5 |   if(!dir.exists(mrtHome)) stop("The directory mrtHome does not exist. Use mkMrt to create it and place mortality data in it.")
 6 |   inp=file.path(mrtHome,input) 
 7 |   out=file.path(mrtHome,output) 
 8 |   outXL=file.path(mrtHome,"LT.xls") 
 9 |   load(inp)#loads US mortality data
10 |   ltb=NULL
11 |   for (sex in c("Male","Female")){
12 |     # (M=mrt$Female)
13 |     (M=mrt[[sex]])
14 |     (LT=M) # replace rates with Esurvs
15 |     aN=dim(LT)[1]
16 |     yN=dim(LT)[2]
17 |     #need to slap on copies of final column to right for future of the young 
18 |     (Mfill=matrix(M[,yN],nrow=111,ncol=111))
19 |     (Mbig=cbind(M,Mfill))
20 |     for (i in 1:aN) 
21 |       for (j in 1:yN) 
22 |       {
23 |         Palive=1
24 |         # cat("i=",i,"j=",j,"k=",k,"P=",Palive,"\n")
25 |         for (k in 0:(aN-i)) 
26 |         {
27 |           Palive=Palive*(1-Mbig[i+k,j+k])
28 |           if (Palive<0.5) break
29 |         }
30 |         # k=k+(0.5-Palive)
31 |         LT[i,j]=k#-1/log(Palive)
32 |       }
33 |     ltb[[sex]]=as.data.frame(LT)
34 |   }
35 |   save(ltb,file=out)
36 |   WriteXLS(ltb,outXL,row.names = T,FreezeRow = 1, FreezeCol = 1)
37 |   cat("The lifetable list ltb has been written to the files ",out," and ",outXL,"\n")
38 | }
39 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/esd.Rd:
--------------------------------------------------------------------------------
 1 | \name{esd}
 2 | \alias{esd}
 3 | \title{Event vs years Since Diagnosis}
 4 | \description{Computes relative risks (RR) of second cancers over specified years-since-diagnosis intervals. 
 5 | SEER incidence rates are used to compute background/expected numbers of cases E, sex, age, and calendar year
 6 | specifically.  RR = O/E where O and E are the numbers of observed and expected cases. }
 7 | \usage{esd(d,srfF,srfM,brkst=c(0,2,5),brksy=NULL) } 
 8 | \arguments{
 9 |   \item{d}{Input data.frame with columns: yrdx, agedx, sex, py at risk (in years), cancer1, and cancer2.
10 |   Cancer1 and cancer2 should use standard SEERaBomb cancer names, see mapCancs. Cases
11 |   not ending in a second cancer should have cancer2 set to "none".}
12 |   \item{srfF}{Female incidence surface. Output D of mk2D for females, for cancers in cancer2 }
13 |   \item{srfM}{Male incidence surface. Output D of mk2D for males, for cancers in cancer2 }
14 |   \item{brkst}{Vector of breaks in years used to form times since diagnosis intervals/bins. }
15 |   \item{brksy}{Vector of breaks of calendar years to show  trends. Leave NULL for all in one. }
16 | }
17 | \value{data.frame with observed and expected cases, RR, and RR CI for each time since diagnosis interval. }
18 | \note{ This function was developed with support from the Cleveland Clinic Foundation.}
19 | \author{Tom Radivoyevitch (radivot@ccf.org)}
20 | \seealso{\code{\link{SEERaBomb-package}, \link{mk2D},\link{msd}} }
21 | 
22 | \keyword{IO}
23 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/fillPYM.Rd:
--------------------------------------------------------------------------------
 1 | \name{fillPYM}
 2 | \alias{fillPYM}
 3 | \alias{SEERaBomb_fillPYM}
 4 | 
 5 | \title{Fills age-year person year (PY) matrix}
 6 | \description{This internal function converts a matrix with lots of individual PY contributions and starting ages and years as rows, 
 7 | into a population-level PY histrogram matrix of (1-year age)x(1-year year) resolution bins. The output matrix is
 8 | more square-like (currently at 40 calendar years by 126 age years) than the tall dataframe-like input matrix.
 9 | }
10 | \usage{
11 | fillPYM(PYin,PYM)
12 | %SEERaBomb_fillPYM(PYin,PYM)
13 | }
14 | 
15 | \arguments{
16 |   \item{PYin}{Tall input matrix where rows hold individual PY at risk, year and starting (left) ages ageL.  }
17 |   \item{PYM}{Output PY matrix with age and calendar year rows and columns at single year resolution.  }
18 | }
19 | 
20 | \value{The second argument becomes the output. This matrix should be filled with zeros in R before calling this 
21 | function. After the call, it will be filled; this function uses pointers in Rcpp C++ code. }
22 | 
23 | \author{Tom Radivoyevitch (radivot@ccf.org)}
24 | 
25 | 
26 | \examples{
27 |   yrs=1975:1990
28 |   ages=0.5:70.5
29 |   PYM=matrix(0,ncol=length(yrs),nrow=length(ages))
30 |   colnames(PYM)=yrs
31 |   rownames(PYM)=ages
32 | (PYin=structure(c(3.5, 11.25,5.2, 51.5, 58.5,0.75, 1976, 1977,1980),.Dim = c(3L,3L), 
33 |                   .Dimnames=list(c("1","2","3"),c("py", "ageL", "year"))))
34 | fillPYM(PYin, PYM)
35 | }
36 | \keyword{internal}
37 | 
38 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/esd.R:
--------------------------------------------------------------------------------
 1 | esd=function(d,srfF,srfM,brkst=c(0,2,5),brksy=NULL){ 
 2 |   esd2=function(d,srfF,srfM,frst,sec,brkst,brksy){ #event since diagnosis (esd)
 3 |     Eincid=age=cancer=cancer1=year=NULL
 4 |     Xf=reshape2::acast(srfF%>%filter(cancer==sec)%>%select(year,age,Eincid), age~year, value.var="Eincid")
 5 |     Xm=reshape2::acast(srfM%>%filter(cancer==sec)%>%select(year,age,Eincid), age~year, value.var="Eincid")
 6 |     extendX=function(X) { #extend incidence D matrices to be just like mrt matrices
 7 |       nrows=dim(X)[1]
 8 |       ncols=dim(X)[2]
 9 |       Bot=matrix(rep(X[nrows,],11),ncol=ncols,byrow=T)
10 |       X=rbind(X,Bot)
11 |       if (ncols<30) {  # i.e. 13 for MDS this year, extend left, else leave alone
12 |         Left=matrix(rep(X[,1],28),nrow=111)
13 |         X=cbind(Left,X)
14 |       }
15 |       colnames(X)=1975:2016
16 |       rownames(X)=0:110
17 |       rownames(X)[111]="110+"
18 |       X/1e5
19 |     }
20 |     Xm=extendX(Xm)
21 |     Xf=extendX(Xf)
22 |     mrt=list(Female=Xf,Male=Xm)
23 |     d=d%>%filter(cancer1==frst)
24 |     d$cancer1=NULL
25 |     d$surv=d$py
26 |     d$py=NULL
27 |     d$status=0
28 |     d$status[d$cancer2==sec]=1
29 |     d$cancer2=NULL
30 |     # head(d)
31 |     msd(d,mrt,brkst,brksy)
32 |   }
33 |   frstS=unique(d$cancer1)
34 |   secS=setdiff(unique(d$cancer2),"none")
35 |   D=NULL
36 |   # debug(esd2)
37 |   for (i in frstS) 
38 |     for (j in secS)
39 |       D=rbind(D,cbind(esd2(d,srfF,srfM,frst=i,sec=j,brkst,brksy),cancer1=i,cancer2=j))
40 |   D
41 | }
42 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/uveal/attic/seerEARmort/figure1.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | load("uveal/data/allfields2019.RData")
 3 | library(dplyr)
 4 | d$CODS=as.character(d$CODS)
 5 | d=d%>%mutate(CODS=ifelse(!CODS%in%c("alive","melanoma","eye"),"other",CODS))
 6 | d$CODS[d$CODS=="eye"]="melanoma"
 7 | #Figure 1
 8 | d=d%>%mutate(status=as.numeric(COD>0),surv=(surv+0.5)/12)
 9 | d=d%>%mutate(survG=cut(surv,c(0,5,10,15,20,25,30,45),
10 |                        dig.lab=4,include.lowest=T,right=F))
11 | m=d
12 | table(m$survG,m$CODS) # make this in dplyr
13 | library(tidyr)
14 | (fg1=m%>%group_by(survG,CODS)%>%summarize(n=n()))
15 | (fg1=fg1%>%spread(CODS,n))
16 | # (fg1=fg1%>%mutate(n=other+melanoma))
17 | # (fg1=fg1%>%mutate(frc=other/n))
18 | # fg1%>%group_by()%>%summarize(M=sum(melanoma),O=sum(other),P=O/(M+O))
19 | library(WriteXLS)
20 | WriteXLS(list(Fig1=fg1),ExcelFileName="uveal/outs/fig1.xlsx")
21 | # fraction of deaths by UVM in all vs SEER9
22 | (tb=table(d$CODS))
23 | sum(tb[2:3])
24 | tb[2]/sum(tb[2:3])
25 | d9=d%>%filter(db=="73")
26 | (tb=table(d9$CODS))
27 | sum(tb[2:3])
28 | tb[2]/sum(tb[2:3])
29 | table(d$db)
30 | ######### skip below
31 | 
32 | # 
33 | # 
34 | # 
35 | # 
36 | # range(d$surv)
37 | # 
38 | # d=d%>%mutate(yrgrp=cut(yrdx,c(1975,1983,1993,2003,2016),
39 | #                        dig.lab=4,include.lowest=T,right=F)) 
40 | # table(d$yrgrp)
41 | # names(d)
42 | # (tb=table(d$trt))
43 | # # (tb=table(d$nosurg,d$trt)) # surg field is a nightmare
44 | # # (tb=table(d$nosurg))
45 | # # (tb=table(d$radsurg))
46 | # d$surv
47 | # table(d$CODS)
48 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/mkMrtLocal.R:
--------------------------------------------------------------------------------
 1 | mkMrtLocal=function(country="USA",mrtHome="~/data/mrt",
 2 |                     mrtSrc1="~/data/hmd_countries",
 3 |                     mrtSrc2="~/data/hmd_statistics/death_rates/Mx_1x1"
 4 |                     ){
 5 |   Year=NULL
 6 |   # country="USA";mrtHome="~/data/mrt"
 7 |   # mrtSrc1="~/data/hmd_countries"
 8 |   # mrtSrc2="~/data/hmd_statistics/death_rates/Mx_1x1"
 9 |   mrtSrc1=path.expand(mrtSrc1)
10 |   mrtSrc2=path.expand(mrtSrc2)
11 |   f1<-file.path(mrtSrc1,country,"STATS","Mx_1x1.txt")
12 |   f2<-file.path(mrtSrc2,paste0(country,".Mx_1x1.txt"))
13 |   
14 |   if(file.exists(f1)) {f=f1; cat("Using  input file:",f,"\n")} else
15 |     if(file.exists(f2)) {f=f2; cat("Using  input file:",f,"\n")} else stop("Cannot find local Human Mortality Data!") 
16 |   X=read_table(f,skip=2,na=".")
17 |   # X=X%>%filter(Year<2021)  # change this every year to sync with SEER
18 |   X$Age[X$Age=="110+"]="110"
19 |   X$Age=as.numeric(X$Age)
20 |   # X$Male=as.numeric(X$Male)
21 |   # X$Female=as.numeric(X$Female)
22 |   F=X[,1:3]
23 |   M=X[,c(1:2,4)]
24 |   names(M)[3]="rate"
25 |   names(F)[3]="rate"
26 |   spreadit=function(M) {
27 |     M=spread(M,key=Year,value="rate")
28 |     M$Age=NULL
29 |     M=as.matrix(M)
30 |     row.names(M)=c(0:109,"110+")
31 |     M
32 |   }
33 |   M=spreadit(M)
34 |   F=spreadit(F)
35 |   mrt=NULL
36 |   mrt$Female=F
37 |   mrt$Male=M
38 |   
39 |   mrtHome=path.expand(mrtHome)
40 |   if(!dir.exists(mrtHome)) dir.create(mrtHome,recursive=T)
41 |   save(mrt,file=f<-file.path(mrtHome,paste0("mrt",country,".RData")))
42 |   cat("The dataframe mrt has been written to:",f,"\n")
43 | }
44 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/ageTherapyEx4.R:
--------------------------------------------------------------------------------
 1 | ###ageTherapyEx4.R
 2 | d=incidSEER(canc,popsae,secs)
 3 | d=d%>%filter(age<=85,year>=2000)
 4 | d=d%>%mutate(ageG=cut(age,seq(0,85,5)))
 5 | d=d%>%group_by(cancer,ageG)%>%
 6 |   summarize(age=mean(age),py=sum(py),n=sum(n))%>%
 7 |   mutate(incid=n/py,grp="Background")
 8 | d=d%>%select(cancer,grp,everything(),-ageG)#reorder columns
 9 | #the next 3 lines defines a set of non-heme-malignacies
10 | HM=c(secs,"MDS","CMML","MPN","CLL","HCL","OL","NHL","MM","HL","LGL")
11 | sc=canc%>%filter(agedx<50,yrdx<2000)#based on frequency at younger ages
12 | (NHM=setdiff(names(sort(table(sc$cancer),decr=T)[1:8]),HM))#non-hemes
13 | brksa=c(0,40,50,60,70,80)#broad 1st interval avoids 0 CML groups
14 | system.time(D<-riskVsAge(canc,firstS=NHM,secondS=secs,brksa=brksa))#~36s
15 | D=D%>%filter(rad!="Unk",chemo!="Unk")
16 | D=D%>%group_by(cancer2,rad,chemo,age)%>%
17 |   summarize(py=sum(py),n=sum(o),incid=n/py)
18 | D=D%>%rename(cancer=cancer2)%>%unite(grp,rad,chemo,sep=", ")
19 | dd=bind_rows(D,d)%>%filter(age>=20)
20 | ord=c("Rad, Chemo","No Rad, Chemo","Rad, No Chemo",
21 |       "No Rad, No Chemo","Background")
22 | dd$grp=factor(dd$grp,levels=ord)
23 | dd$cancer=factor(dd$cancer,levels=secs)
24 | myt=theme(legend.position=c(.7,.83),legend.key.height=unit(.65,'lines'))
25 | dd=dd%>%mutate(LL=qpois(0.025,n)/py,UL=qpois(0.975,n)/py)#make CI
26 | dd=dd%>%mutate(ages=age+(as.numeric(grp)-3))#shift ages to see CI
27 | dd%>%ggplot(aes(x=ages,y=incid,col=grp))+gl+facet_grid(~cancer)+gxi+
28 |   agts+gyi+jco+sy+tc(11)+ltb+sbb+myt+ge
29 | ggsave("~/Results/tutorial/ageTherapyEx4.pdf",width=3.5,height=2.5)
30 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mkMrtLocal.Rd:
--------------------------------------------------------------------------------
 1 | \name{mkMrtLocal}
 2 | \alias{mkMrtLocal}
 3 | \title{Make mortality binaries from local HMD data files}
 4 | \description{Converts locally installed Human Mortality Data \url{https://www.mortality.org/} into an R binary file
 5 |  \file{mrtCOUNTRY.RData}.}
 6 | \usage{mkMrtLocal(country="USA",mrtHome="~/data/mrt",
 7 |                     mrtSrc1="~/data/hmd_countries",
 8 |                     mrtSrc2="~/data/hmd_statistics/death_rates/Mx_1x1"
 9 |                     )}
10 | 
11 | \arguments{
12 |   \item{country}{Default is USA. See names of subfolders of \file{hmd_countries} for other options.}
13 |   \item{mrtHome}{Directory that will contain the mortality data binary.  Should be writable by user.}
14 |   \item{mrtSrc1}{Directory with hmd_countries data (first choice of files = "all HMD countries").}
15 |   \item{mrtSrc2}{Directory with hmd_statistics data (second choice of files = "all HMD statistics").}
16 | }
17 | 
18 | \value{None. This function is called for its side-effect of producing \file{mrt.RData} from HMD files
19 |       organized as all HMD countries or all HMD statistics on the HMD download page (you need at least one of these). }
20 | \references{ Barbieri M, Wilmoth JR, Shkolnikov VM, et al. Data Resource Profile: 
21 |              The Human Mortality Database (HMD). Int J Epidemiol. 2015;44: 1549-1556.}
22 | \author{ Tom Radivoyevitch (\email{radivot@ccf.org}) }
23 | \seealso{\code{\link{SEERaBomb-package},\link{mkSEER}} }
24 | \examples{
25 | \dontrun{
26 | library(SEERaBomb)
27 | mkMrtLocal() 
28 | load("~/data/mrt/mrtUSA.RData")
29 | head(mrt$Female)
30 | }
31 | }
32 | \keyword{IO}
33 | 


--------------------------------------------------------------------------------
/SEERaBomb/NAMESPACE:
--------------------------------------------------------------------------------
 1 | useDynLib(SEERaBomb, .registration = TRUE)
 2 | #import(dbplyr)  ## too many warnings, so let user of mkSEER figure out to install this
 3 | #import(data.table)  ## depends
 4 | import(dplyr)  ## depends
 5 | import(ggplot2)  ## depends
 6 | #import(rgl)  ## depends
 7 | #import(tibble)  ## depends
 8 | #import(demography)  ## depends
 9 | #import(LaF,RSQLite,rgl,XLConnect) ## needed less critically but for too many functions to list
10 | #import(LaF,RSQLite,rgl,openxlsx,labelled) ## needed less critically but for too many functions to list
11 | import(LaF,RSQLite,openxlsx,labelled) ## needed less critically but for too many functions to list
12 | importFrom(Rcpp, evalCpp)
13 | importFrom(demography, hmd.mx)
14 | importFrom(survival, survfit,Surv)
15 | #importFrom(data.table, as.data.table,=)  # didn't fly
16 | importFrom(DBI, dbDriver)
17 | importFrom(mgcv, gam,s,ti)
18 | importFrom(plyr, ldply)
19 | importFrom(WriteXLS, WriteXLS)
20 | importFrom(reshape2, melt, acast,dcast)
21 | importFrom(scales, comma_format)
22 | importFrom(forcats, as_factor,fct_collapse,fct_drop)
23 | importFrom(purrr, map,pmap)
24 | importFrom(readr, read_table)
25 | importFrom(tidyr, separate,gather,spread,unnest)
26 | importFrom(tibble, as_tibble)
27 | importFrom(stringr, str_replace_all,str_to_title,str_detect)
28 | importFrom("stats","quantile", "qpois", "D", "median", "qchisq", "qt", "rexp", "rpois", "runif", "var", "weighted.mean","lm","predict")
29 | importFrom("utils", "data", "head", "read.csv", "tail")
30 | exportPattern("^[[:alpha:]]+")
31 | ##S3method(autoplot,seerSet)
32 | S3method(summary,seerSet)
33 | S3method(print,seerSet.summary)
34 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/examples/mort.R:
--------------------------------------------------------------------------------
 1 | # extends cmlMort.R to 3x2 AML-ALL-CML tables of plots
 2 | graphics.off();rm(list=ls())
 3 | library(SEERaBomb)
 4 | load("~/data/SEER/mrgd/cancDef.RData")
 5 | leus=c("AML","ALL","CML")
 6 | d=canc%>%filter(cancer%in%leus)%>%mutate(cancer=factor(cancer,leus))
 7 | d=d%>%mutate(status=as.numeric(COD>0),surv=(surv+0.5)/12)
 8 | (d=d%>%select(yrdx,agedx,sex,surv,status,cancer)%>%group_by(cancer))
 9 | load("~/data/usMort/mrt.RData")#loads mrt
10 | (D=d%>%do(msd(.,mrt,brkst=c(0,0.5,1,2,3,4,5,6,8),brksy=c(1973,1990,2005,2015)))) 
11 | (g=qplot(x=t,y=RR,data=D,col=Years,geom=c("line","point"),facets=sex~cancer,
12 |         xlab="Years Since CML Diagnosis",ylab="Relative Risk of Mortality"))
13 | (g=g+scale_x_continuous(breaks=seq(0,15,5))+scale_color_jco())
14 | (g=g+theme(legend.position="top",legend.title=element_blank()))
15 | (g=g+geom_abline(intercept=1,slope=0)+ylim(c(0,NA)))
16 | g+geom_errorbar(aes(ymin=rrL,ymax=rrU),width=.2)
17 | ggsave("~/Results/tutorial/mortRR.pdf",width=4.5,height=3)
18 | 
19 | labs=c("1973-1990","1991-2005","2006-2015")
20 | d=d%>%mutate(yrg=cut(yrdx,c(1972,1990,2005,2015),labels=labs))%>%print(n=13)
21 | library(survival);library(survminer)
22 | levels(d$cancer)
23 | fit=survfit(Surv(surv,status)~yrg+sex+cancer,data=d) 
24 | levels(d$cancer)
25 | ggsurvplot_facet(fit,d,facet.by=c("sex","cancer"),ylab="Survival Probability",
26 |                  xlab="Years Since CML Diagnosis",legend.title="",
27 |                  xlim=c(0,12),short.panel.labs=T)+
28 |   scale_x_continuous(breaks=seq(0,15,5))+scale_color_jco()
29 | ggsave("~/Results/tutorial/leuSurvTrends.pdf",width=4.5,height=3)
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/leukemiaRes2016/prosBreastTimeCrs.R:
--------------------------------------------------------------------------------
 1 | # prosBreastTimeCrs.R   (RR time courses after Prostate and Breast first cancers)
 2 | rm(list=ls())
 3 | library(dplyr)
 4 | library(ggplot2)
 5 | library(reshape2)
 6 | library(SEERaBomb)
 7 | system.time(load("~/Results/CLL/pm.RData")) # 4 secs to load. 
 8 | system.time(load("~/Results/CLL/pf.RData")) # 4 secs to load. 
 9 | Dm=mkDF(pm)
10 | Df=mkDF(pf)
11 | Dm=Dm%>%filter(cancer1=="prostate")
12 | Df=Df%>%filter(cancer1=="breast")
13 | Dm$cancer="Prostate"
14 | Df$cancer="Breast"
15 | D=rbind(Dm,Df)
16 | D$Radiation="No"
17 | D$Radiation[D$trt=="rad"]="Yes"
18 | D$Period="Early"
19 | D$Period[D$t>5]="Late"
20 | D=D%>%select(int,t,RR,L=rrL,U=rrU,Radiation,Period,cancer)
21 | head(D)
22 | 
23 | graphics.off()
24 | quartz(width=7,height=3.7)
25 | 
26 | theme_set(theme_bw())
27 | theme_update(legend.position = c(.65, .45),
28 |              axis.text=element_text(size=rel(1.2)),
29 |              axis.title=element_text(size=rel(1.3)),
30 |              legend.title=element_text(size=rel(0.9)),
31 |              legend.text=element_text(size=rel(0.9)),
32 |              strip.text = element_text(size = rel(1.5)))
33 | g=qplot(x=t,y=RR,data=D,col=Radiation,geom=c("line","point"),#xlim=c(-.1,24),
34 |         xlab="Years Since Diagnosis of First Cancer",ylab="CLL Relative Risk")
35 | g=g+scale_y_log10(breaks=c(0.3,1,10),labels=c("0.3","1","10"),limits=c(0.15,30))
36 | g=g+facet_grid(cancer~Period,scales="free")+geom_abline(intercept=0, slope=0)
37 | g=g+  geom_errorbar(aes(ymin=L,ymax=U,width=.05))
38 | g = g + scale_color_grey(start = 0, end = 0.6)
39 | g
40 | ggsave("~/Results/CLL/prosBrsTimCrs.eps")  
41 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mk2D.Rd:
--------------------------------------------------------------------------------
 1 | \name{mk2D}
 2 | \alias{mk2D}
 3 | \title{Make 2D-spline fits of incidences}
 4 | \description{Produces two dimensional (2D) spline 
 5 | fits of cancer incidence versus age and calendar year, with interactions. In conjunction with person years 
 6 | (PY) at risk, this 
 7 | is used in csd() to produce expected numbers of cases under a null hypothesis 
 8 | that prior cancers do not impact subsequent cancer risks.}
 9 | \usage{mk2D(seerSet, knots=5, write=FALSE, outDir="~/Results",txt=NULL,secondS=NULL)} 
10 | \arguments{
11 |   \item{seerSet}{Object of class seerSet, i.e. output list of seerSet(). }
12 |   \item{knots}{Base number of knots; overrides are in place for some cancers.  }
13 |   \item{write}{TRUE = write 2D fits to files. The fits can be >300 MB and take >60 seconds to write, 
14 |   so leave FALSE unless you need it.}
15 |   \item{outDir}{Folder that will hold the output files.}
16 |   \item{txt}{Additional text to distinguish files with different cancer lists. This may be 
17 |   useful during spline fit development. }
18 |   \item{secondS}{Charcter vector of second cancers of interest (note: I often capitalize 
19 |   the final S of vectors of Strings).}
20 | }
21 | 
22 | \value{The input seerSet with an additional data frame D added to this list. 
23 | D holds background/expected incidences over a 1-year resolution age-year grid. }
24 | 
25 | \author{Tom Radivoyevitch (radivot@ccf.org)}
26 | \seealso{\code{\link{SEERaBomb-package}, \link{plot2D}, \link{seerSet}} }
27 | 
28 | \examples{
29 | \dontrun{
30 | library(SEERaBomb)
31 | (pm=simSeerSet())
32 | (pm=mk2D(pm))
33 | names(pm)
34 | head(pm$D)
35 | tail(pm$D)
36 | }
37 | }
38 | 
39 | 
40 | \keyword{IO}
41 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/leukemia2016/amlAbombAge.R:
--------------------------------------------------------------------------------
 1 | # amlAbombAge.R  (Figure 2J)
 2 | library(dplyr)
 3 | # library(SEERaBomb)
 4 | # mkAbomb() #~\data\abomb\lsshempy.csv, lssinc07.csv=> tables heme and solid in abomb.db
 5 | db <- src_sqlite("~/data/abomb/abomb.db")
 6 | d=collect(tbl(db, sql("SELECT * from heme")))%>%   
 7 |   mutate(Dose=cut(D,c(-1,.01,.4,10),labels=c("Low","Medium","High"),include.lowest=TRUE)) %>%
 8 |   mutate(agec=cut(age,c(seq(0,80,20),110),labels=seq(10,90,20))) %>%
 9 |   group_by(Dose,agec) %>%
10 |   summarise(age=mean(age),py=sum(py)/1e5,AML=sum(AMLtot),
11 |             L=qpois(.025,AML),U=qpois(.975,AML),Incid=AML/py,IL=L/py,IU=U/py)
12 | d
13 | d$Dose=factor(d$Dose,levels=c("High","Medium","Low"))
14 | library(ggplot2)
15 | graphics.off()
16 | quartz(width=7,height=5)
17 | # theme_set(theme_bw(base_size = 18))
18 | theme_set(theme_gray(base_size = 16)) 
19 | theme_update(legend.position = c(.7, .2),
20 |              axis.text=element_text(size=rel(1.3)),
21 |              axis.title=element_text(size=rel(1.3)),
22 |              legend.title=element_text(size=rel(1.1)),
23 |              legend.text=element_text(size=rel(1.1)))
24 | g=qplot(x=age+0.5*(as.numeric(Dose)-1),y=Incid,data=d,col=Dose,log="y",ylab="Incidence (Cases/100,000 PY)",
25 |         geom=c("line","point"),xlab="Age")+geom_line(lwd=1.3)
26 | # g = g + scale_color_grey(start = 0.8, end = 0)
27 | g+  geom_errorbar(aes(ymin=IL,ymax=IU,width=.15))
28 | 
29 | ggsave("~/Results/amlMDS/abombAMLage.eps")  
30 | ggsave("~/Results/amlMDS/abombAMLage.png")  
31 | 
32 | # Side calculation
33 | (2.5/9)^0.5 # = whole body A-bomb dose equivalent of average radiation therapy AML carcinogenicity
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/summary.seerSet.Rd:
--------------------------------------------------------------------------------
 1 | \name{summary.seerSet}
 2 | \alias{summary.seerSet}
 3 | \title{Summary of seerSet object}
 4 | \description{Creates a data.frame of cases and median ages and
 5 | survival times for each cancer and treatment type. }
 6 | \usage{
 7 | \S3method{summary}{seerSet}(object, \dots)
 8 | }
 9 | %\usage{summary.seerSet(object, ...)} 
10 | \arguments{
11 |   \item{object}{seerSet object produced by seerSet(). }
12 |   \item{...}{Included to match arg list of generic summary. }
13 | }
14 | 
15 |    
16 | \value{A list that includes: a data.frame of cases, median ages at diagnosis, and
17 | survival times, in years, for each cancer and treatment type; a data.frame of person-years by year; and 
18 | smaller things such as a title, sex, race, and notes. The resulting list is set to class seerSet.summary 
19 | which has a print method. }
20 |   
21 | 
22 | \author{Tom Radivoyevitch (radivot@ccf.org)}
23 | \seealso{\code{\link{SEERaBomb-package}, \link{mk2D}, \link{plot2D}} }
24 | \examples{
25 | \dontrun{ 
26 | library(SEERaBomb)
27 | load("~/data/SEER/mrgd/cancDef.RData") #load in canc
28 | load("~/data/SEER/mrgd/popsae.RData") # load in popsae
29 | canc=canc\%>\%select(casenum,race:yrdx,surv,cancer,trt,id) 
30 | popsa=popsae\%>\%group_by(db,race,sex,age,year)\%>\%summarize(py=sum(py)) # sum on regs
31 | pm=seerSet(canc,popsa,Sex="male",ageStart=0,ageEnd=100) #pooled (races) male seerSet
32 | pm # no print method for seerSet object, so we see the list
33 | (x=summary(pm)) # print renders summary and plot of PY
34 | class(x)<-NULL #if you want to see the list as is, kill its class.
35 | x    # It then goes through the regular print method for lists.
36 | }
37 | }
38 | 
39 | \keyword{IO}
40 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/blood2012/blood2012fig4.R:
--------------------------------------------------------------------------------
 1 | aBombHome="~/data/abomb"
 2 | load(file.path(aBombHome,"Hema87.RData"));
 3 | names(d)
 4 | d=d[d$py>0,] #remove two recs with zero py
 5 | d=d[d$kerma==1,] # take only kerma < 4 Gy
 6 | d$py=10^4*d$py
 7 | d$calg=as.integer(cut(d$calg,c(0,2,4,6,8,10)))
 8 | head(d)
 9 | m=d[d$sex==1,]; f=d[d$sex==2,] 
10 | 
11 | flin<-function(x,df) {
12 |   c1=x[1];k=x[2];L=x[3:7];   		
13 |   with(df,{mn = exp(c1+k*age)*py + sv*exp(L[calg])*py;
14 |            -sum(CML*log(mn) - mn)})	}
15 | 
16 | # the idea here is to let the data speak through L, a bit like a one way anova
17 | X0=c(c1=-13,k=0.05,rep(-10,5))
18 | sol=optim(X0,flin,df=m,method="L-BFGS-B",hessian=TRUE,control=list(maxit=400))  
19 | waitm=exp(sol$par[3:7])
20 | sol=optim(X0,flin,df=f,method="L-BFGS-B",hessian=TRUE,control=list(maxit=400))  
21 | waitf=exp(sol$par[3:7])
22 | # if(length(grep("linux",R.Version()$os))) windows <- function( ... ) X11( ... )
23 | # windows(width=5,height=5)
24 | # dev.new(width=5,height=5)
25 | 
26 | par(mfrow=c(1,1),mar=c(4.7,0,2.3,0.2),lwd=3,cex.lab=1.8,cex.axis=1.7,cex.main=1.7,oma=c(0,4.5,1,0.7))
27 | years=c(1952,1958,1966,1976,1985)-1945
28 | plot(years,waitm,cex=2,pch=1,xlab="Years Since Exposure",ylab="",yaxt="n",col="blue",ylim=c(0,8.2e-4))
29 | points(years,waitf,pch=2,cex=2,col="red")
30 | legend(25,8.6e-4,c("Males","Females"),pch=1:2,col=c("blue","red"),cex=1.8,bty="n")
31 | mtext(expression(paste("Cases per ",10^4," Person-Year-Sv")),side=2,line=2.4,cex=1.8,outer=T)
32 | axis(side=2,las=1, at=c(0,2e-4,4e-4,6e-4,8e-4),labels=c(0,2,4,6,8),outer=T)
33 | title("IR-to-CML Latency",outer=T,line=-1)
34 | par(mar=c(5.1,4.1,4.1,2.1),oma=c(0,0,0,0)) # reset to standards
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/uveal/attic/other/UVcsdAllSecs.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | library(tidyverse);library(SEERaBomb)
 3 | # load("~/data/SEER/mrgd/cancPrim.RData")#load SEER cancer data
 4 | # load("~/data/SEER/mrgd/popsae.RData")
 5 | # (codes=paste0("C",692:694))
 6 | # canc$cancer=as.character(canc$cancer)
 7 | # canc[(canc$primsite%in%codes)&(canc$histo3%in%8720:8790),"cancer"]="uveal"  
 8 | # pf=seerSet(canc,popsae,Sex="Female");pm=seerSet(canc,popsae,Sex="Male") 
 9 | # pf=mk2D(pf);pm=mk2D(pm) # all secs (takes time!!)
10 | # mybrks=c(0,1,2,3,5,10)
11 | # pf=csd(pf,brkst=mybrks)  #  (takes time!!)
12 | # pm=csd(pm,brkst=mybrks)  #  (takes time!!)
13 | # # (lab=paste0("b",paste(mybrks,collapse="_")))
14 | # # mkExcelCsd(pf,lab,outDir="uveal/outs",outName="csdFall")
15 | # # mkExcelCsd(pm,lab,outDir="uveal/outs",outName="csdMall")
16 | # # save(pm,pf,file="uveal/data/pmpf.RData")  # takes a while!!!
17 | load(file="uveal/data/pmpf.RData")  
18 | DF=bind_rows(pf$DF,pm$DF)
19 | DF=rad_noRad(DF)
20 | DF=foldDF(DF)
21 | UM=DF%>%filter(cancer1=="uveal")
22 | UM%>%group_by(trt)%>%summarize(O=sum(O))  # ~1100 second cancers after uveal
23 | (CML=UM%>%filter(cancer2=="CML"))
24 | CML%>%group_by(trt)%>%summarize(O=sum(O))  # none of which are CMLs ... skip idea of uveal rad causing cancer
25 | UM%>%filter(rrL>1.1,!cancer2%in%c("eye","melanoma","uveal","otherMalig","skinCIS"))%>%print(n=150)
26 | # liver likely mets of uveal, renal and thyroid likely overdiagnosis due to CT scans
27 | # MPN and CLL are interesting and perhaps related to immunological aspects of uveal control
28 | ### lung cancer is rad, are isotopes moving to the lung, or is this also due to CT scan detecting them early.
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/attic/abombLeu.R:
--------------------------------------------------------------------------------
 1 | rm(list=ls())
 2 | library(SEERaBomb);library(ggsci)
 3 | myt=theme(legend.position=c(.52,.85),legend.title=element_blank(),
 4 |     legend.direction="vertical",legend.margin=margin(0,0,0,0),
 5 |     legend.key.height=unit(.7,'lines'),strip.background=element_blank())
 6 | gy=ylab(quote(paste("Cases per ",10^5," Person-Years")))
 7 | ge=geom_errorbar(aes(ymin=LL,ymax=UL),width=0.1);jco=scale_color_jco()
 8 | cc=coord_cartesian(ylim=c(.2,200));tc=theme_classic(base_size=12)
 9 | gp=geom_point();gl=geom_line();f=facet_wrap(~cancer);sy=scale_y_log10()
10 | load("~/data/abomb/abomb.RData")
11 | (d=heme%>%select(ageG:DG,age,agex,t,D,py,AML=AMLtot,ALL,CML))
12 | 
13 | gx=xlab("Attained Age (Years)")
14 | (dA=incidAbomb(d%>%group_by(ageG,DG)))
15 | ggplot(dA,aes(x=age,y=I,shape=DG,col=DG))+gp+gl+gx+sy+gy+ge+f+cc+tc+myt+jco
16 | ggsave("~/Results/tutorial/abombLeuAge.pdf",width=4,height=2.5)#Fig.6A
17 | 
18 | gx=xlab("Age at Time of Bombing (Years)")
19 | (dB=incidAbomb(d%>%group_by(agexG,DG)))
20 | ggplot(dB,aes(x=agex,y=I,shape=DG,col=DG))+gp+gl+gx+sy+gy+ge+f+cc+tc+myt+jco
21 | ggsave("~/Results/tutorial/abombLeuAgex.pdf",width=4,height=2.5)#Fig.6B
22 | 
23 | gx=xlab("Years Since Bombing")
24 | (dC=incidAbomb(d%>%group_by(tG,DG)))
25 | ggplot(dC,aes(x=t,y=I,shape=DG,col=DG))+gp+gl+gx+sy+gy+ge+f+cc+tc+myt+jco
26 | ggsave("~/Results/tutorial/abombLeuTsx.pdf",width=4,height=2.5)#Fig.6C
27 | 
28 | gx=xlab("Dose (Sv)")
29 | d$DG<-cut(d$D,c(-1,.02,.25,.5,.75,1.5,100))
30 | (dD=incidAbomb(d%>%group_by(DG)))
31 | ggplot(dD,aes(x=D,y=I))+gp+gl+gx+sy+gy+ge+f+cc+tc+myt+jco+
32 |   scale_x_continuous(breaks=0:2)
33 | ggsave("~/Results/tutorial/abombLeuDoseResp.pdf",width=4,height=2.5)#Fig.6D
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/uveal/attic/other/uvealOverview.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | library(WriteXLS) 
 3 | library(dplyr) 
 4 | library(scales)    
 5 | cf=function (x) comma_format(width=17,justify="right")(x)
 6 | 
 7 | load("~/data/SEER/mrgd/cancPrim.RData")
 8 | load("~/data/SEER/mrgd/popsae.RData")
 9 | head(popsae)
10 | PY=(popsae%>%group_by(year)%>%summarize(PY=sum(py)))[[2]]
11 | 
12 | (codes=paste0("C",690:699))
13 | d=canc%>%filter(primsite%in%codes,histo3%in%8720:8790) 
14 | d$cancer=as.character(d$cancer)
15 | # C69.3 (choroid), C69.4 (ciliary body and iris), and C69.2 (retina). 
16 | d$cancer[d$primsite=="C690"]="C690-Conjunctiva"
17 | d$cancer[d$primsite=="C691"]="C691-Cornea"
18 | d$cancer[d$primsite=="C692"]="C692-Retinal"
19 | d$cancer[d$primsite=="C693"]="C693-Choroid"
20 | d$cancer[d$primsite=="C694"]="C694-Ciliary"
21 | d$cancer[d$primsite=="C695"]="C695-Lacrimal"
22 | d$cancer[d$primsite=="C696"]="C696-Orbital"
23 | d$cancer[d$primsite=="C698"]="C698-Overlap"
24 | d$cancer[d$primsite=="C699"]="C699-NOS"
25 | 
26 | unlink(f<-"uveal/outs/uvealOverview.xlsx") 
27 | yrca=table(d$yrdx,d$cancer)
28 | yrTot=apply(yrca,1,sum)
29 | Total=apply(yrca,2,sum)
30 | gtot=sum(Total)
31 | D1=data.frame(year=c(1975:2016,NA),as.data.frame(rbind(yrca,Total)),total=c(yrTot,gtot),PY=cf(c(PY,sum(PY))))
32 | 
33 | total=table(d$cancer)
34 | h3ca=table(d$histo3,d$cancer)
35 | D2=as.data.frame(cbind(hist03=c(as.numeric(row.names(h3ca)),NA),rbind(h3ca,total)))
36 | 
37 | total=table(d$histo3)
38 | yrh3=table(d$yrdx,d$histo3)
39 | D3=as.data.frame(cbind(year=c(1975:2016,NA),rbind(yrh3,total)))
40 | WriteXLS(list(yearXcancer=D1,histo3Xcancer=D2,yearXhisto3=D3),ExcelFileName=f,FreezeRow=1,FreezeCol=1,AdjWidth=T)
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/PMF/mkAllFields.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | library(tidyverse);library(SEERaBomb)
 3 | library(writexl)
 4 | (df=getFields("~/data/SEER")) # this doesn't fly in windows since ~ maps to /users/radivot/documents
 5 | write_xlsx(df,path="pmf/outs/fieldDefs2019.xlsx") # key to big file above
 6 | # picks=c("casenum","reg","race","sex","agedx",
 7 | #         "yrbrth","seqnum","modx","yrdx","histo3",
 8 | #         "ICD9","primsite","COD","surv","radiatn","chemo")
 9 | # (rdf=pickFields(df,picks))
10 | # mkSEER(rdf,outFile="cancPrim",writePops=F) 
11 | # rdf=pickFields(df,picks=df$names)
12 | # mkSEER(rdf,outFile="cancAll",writePops=F) #149 secs # runs out of mem (fixed via more virt memory)
13 | # system.time(load("~/data/SEER/mrgd/cancALL.RData")) # which takes ~65 secs
14 | # d=canc%>%filter(histo3==9961)%>%print(n=2)
15 | # table(d$primsite) # only 4 cases outside of bone = C421
16 | # d=d%>%filter(primsite%in%c("C421"))%>%print(n=2) #4458 cases
17 | # d=d%>%filter(!is.na(surv))%>%print(n=4)  # 4453 with surv times
18 | # save(d,file="pmf/data/allfields2019.RData")
19 | load("pmf/data/allfields2019.RData")
20 | nms=names(d)
21 | L=lapply(nms,function(x) {dd=as.data.frame(table(d[,x],useNA="always"))
22 |                           if (dim(dd)[2]==2) names(dd)=c("Variable","Frequency")
23 |                           dd} )
24 | names(L)=nms
25 | write_xlsx(L,path="pmf/outs/AllFieldsNA.xlsx") # key to big file above
26 | 
27 | table(d$seqnum,d$yrdx)  # flare up in 2nd cancers in 2010 and 2012
28 | table(d$dxconf,d$yrdx)  # flare up in code 3 in   2010 and 2012 
29 | # (3 = immuno and genotype additional confirmation)
30 | sort(table(d$COD))  # COD=3800=unknown behavior neoplasm is biggest at 1106
31 | 
32 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mkDF.Rd:
--------------------------------------------------------------------------------
 1 | \name{mkDF}
 2 | \alias{mkDF}
 3 | \title{Converts seerSet$L series to a data.frame}
 4 | \description{Creates a data.frame of observed and expected cases for each first and second cancer and treatment. 
 5 | Use of this function is deprecated. Use getDF() instead, if needed: csd now calls getDF internally for the 
 6 | most recent time series, so even it may not need to be called directly.}
 7 | \usage{mkDF(seerSet,srs)} 
 8 | \arguments{
 9 |   \item{seerSet}{seerSet object produced by tsd(). }
10 |   \item{srs}{Series. The time series of interest. NULL (default) implies the currently active series, which is
11 |      the most recent. A number i implies the ith series. A string identifies the series by name (numeric vectors 
12 |      will be coerced to such a string via paste0("b",paste(brks,collapse="_")) where brks = vector of time breakpoints.}
13 | }
14 |    
15 | \value{A data.frame in long format that can be used by ggplot.  }
16 |   
17 | 
18 | \author{Tom Radivoyevitch (radivot@ccf.org)}
19 | \seealso{\code{\link{SEERaBomb-package}, \link{mk2D}, \link{tsd}} }
20 | \examples{
21 | \dontrun{ 
22 | library(SEERaBomb)
23 | load("~/data/SEER/mrgd/cancDef.RData") #load in canc
24 | load("~/data/SEER/mrgd/popsae.RData") # load in popsae
25 | canc=canc\%>\%select(-reg,-recno,-agerec,-numprims,-COD,
26 |           -age19,-age86,-radiatn,-ICD9,-db,-histo3) 
27 | popsa=popsae\%>\%group_by(db,race,sex,age,year)\%>\%summarize(py=sum(py)) # sum on regs
28 | pm=seerSet(canc,popsa,Sex="male",ageStart=0,ageEnd=100) #pooled (races) male seerSet
29 | pm=mk2D(pm,secondS=c("AML","MDS"))
30 | brks=c(0,1,5)
31 | firstS=c("NHL","MM")
32 | pm=tsd(pm,brks=brks,trts=c("rad","noRad"),firstS=firstS)
33 | mkDF(pm)
34 | }
35 | }
36 | 
37 | \keyword{internal}
38 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/incidSEER.R:
--------------------------------------------------------------------------------
 1 | incidSEER=function(canc,popsae,cancers) {
 2 |   cancer=sex=race=agedx=yrdx=year=age=py=cases=NULL
 3 |   
 4 |   # load("~/data/SEER/mrgd/popsae.RData") # loads in popsae (extended to ages 85-99)
 5 |   # load("~/data/SEER/mrgd/cancDef.RData") #loads in canc
 6 |   # library(tidyverse)
 7 |   # cancers="CMML"
 8 |   # cancers=c("CML","CMML")
 9 |   
10 |   startYrs=c(CMML=1993,MDS=2001,NOS=2001,RA=2001,RAEB=2001,RARS=2001,Del5q=2001,LGL=2010)
11 |   (startYrs=c(startYrs,MPN=2001,unknown=2001,AMLti=2001,LGL=2010))
12 |   (outnms=setdiff(cancers,names(startYrs)))
13 |   x=rep(1975,length(outnms))
14 |   names(x)<-outnms
15 |   (startYrs=c(startYrs,x))
16 |   D=canc%>%select(cancer,sex,race,agedx,year=yrdx)%>%mutate(age=agedx+0.5)%>%filter(cancer%in%cancers)%>%select(-agedx)
17 |   # reshape2::dcast(d%>%group_by(cancer,year)%>%summarize(n=n()),cancer~year)
18 | 
19 |   m=D%>%group_by(cancer,sex,race,age,year)%>%summarise(n=n())
20 |   p=popsae%>%group_by(sex,race,age,year)%>%summarise(py=sum(py))
21 |   s=data.frame(sex=sort(unique(m$sex)))
22 |   # r=data.frame(race=sort(unique(m$race))) # skip since race is in automatically, like age and year
23 |   c=data.frame(cancer=cancers)
24 |   cs=merge(c,s)%>%arrange(cancer,sex)
25 |   # csr=merge(cs,r)%>%arrange(cancer,sex,race) #comment to keep label of race
26 |   options(warn=-1)
27 |   pL=left_join(cs,p,by="sex")
28 |   d=left_join(pL,m)
29 |   options(warn=0)
30 |   d[is.na(d$n),"n"]=0 #join left missings where zero's should be, so fix this
31 |   # head(d)
32 |   d=d%>%group_by(cancer)%>%filter(year>=startYrs[as.character(cancer[1])])
33 |   # reshape2::dcast(d%>%group_by(cancer,year)%>%summarize(n=sum(n)),cancer~year) #check, should be same
34 |   d%>%mutate(py=py/1e5,incid=n/py)
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/attic/breast2leu.R:
--------------------------------------------------------------------------------
 1 | rm(list=ls());library(tidyverse);library(SEERaBomb)
 2 | load("~/data/SEER/mrgd/popsae.RData") 
 3 | (p=popsae%>%count(race,sex,age,year,wt=py)%>%rename(py=n))
 4 | load("~/data/SEER/mrgd/cancDef.RData") 
 5 | canc$cancer=fct_collapse(canc$cancer,AML=c("AML","AMLti","APL"))
 6 | secs=c("CML","AML","ALL") 
 7 | (d=canc%>%filter(sex=="Female",cancer%in%c("breast",secs)))
 8 | pf=seerSet(d,p,Sex="Female")#pooled (races) females 
 9 | pf=mk2D(pf,secondS=secs)#adds secs background rates to pf
10 | trts=c("rad.chemo","rad.noChemo","noRad.chemo","noRad.noChemo")
11 | pf=csd(pf,brkst=c(0,1,2,3,5,10),brksa=c(0,60),trts=trts,firstS="breast")
12 | (D=pf$DF%>%filter(ageG=="(0,60]")) 
13 | D=D%>%mutate(cancer2=fct_relevel(cancer2,"AML"))#make AML 1st
14 | D$t=D$t+c(0,0.075,0.15)#shift times to separate error bars
15 | myt=theme(legend.title=element_blank(),legend.margin=margin(0,0,0,0))
16 | myth=theme(legend.direction="horizontal",legend.key.height=unit(.25,'lines'))
17 | myth=myt+myth+theme(legend.position=c(.25,.95))
18 | g=ggplot(aes(x=t,y=RR,col=cancer2),data=D)+geom_point()+geom_line()+myth+
19 |   labs(x="Years Since Breast Cancer Diagnosis",y="Relative Risk of Leukemia")
20 | g=g+facet_grid(rad~chemo)+geom_abline(intercept=1,slope=0) 
21 | g+geom_errorbar(aes(ymin=rrL,ymax=rrU),width=0.05)+coord_cartesian(ylim=c(0,25)) 
22 | ggsave("~/Results/tutorial/breast2leu.pdf",width=4,height=2.5) 
23 | 
24 | # D%>%filter(cancer2=="CML",rad=="Rad") # see values of CML RR CI at peaks
25 | # D%>%group_by(rad,chemo)%>%summarize(O=sum(O),E=sum(E),meanPYage=weighted.mean(age,w=py)) 
26 | # pf=csd(pf, brkst=c(0,1,2,3,5,10),brksy=c(1973,2000), brksa=c(0,60), trts=trts, firstS="breast")
27 | # (D=pf$DF%>%filter(ageG=="(0,60]",yearG=="[2000,2016)")) #no diff in CML (0,1] of noRad.chemo
28 | 
29 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/blood2012/blood2012fig6.R:
--------------------------------------------------------------------------------
 1 | aBombHome="~/data/abomb"
 2 | load(file.path(aBombHome,"Hema87.RData"));
 3 | names(d)
 4 | d=d[d$py>0,] #remove two recs with zero py
 5 | d=d[d$kerma==1,] # take only kerma < 4 Gy
 6 | d$py=10^4*d$py
 7 | 
 8 | svc<-with(d,cut(sv,c(-1,.02,1,10)))
 9 | PYT<-with(d,tapply(py,list(svc,agexg),sum,na.rm=TRUE))
10 | AGET<-with(d,tapply(agex,list(svc,agexg),mean,na.rm=TRUE))
11 | CMLT<-with(d,tapply(CML,list(svc,agexg),sum,na.rm=TRUE)) 
12 | (CMLt=cbind(apply(CMLT[,1:4],1,sum),apply(CMLT[,5:8],1,sum),apply(CMLT[,9:13],1,sum)))
13 | (PYt=cbind(apply(PYT[,1:4],1,sum),apply(PYT[,5:8],1,sum),apply(PYT[,9:13],1,sum)))
14 | (AGEt=cbind(apply(AGET[,1:4],1,mean),apply(AGET[,5:8],1,mean),apply(AGET[,9:13],1,mean)))
15 | (AGEw=cbind(apply(AGET[,1:4]*PYT[,1:4],1,sum),apply(AGET[,5:8]*PYT[,5:8],1,sum),apply(AGET[,9:13]*PYT[,9:13],1,sum)))
16 | (AGEtw=AGEw/PYt)
17 | (incid=CMLt/PYt)
18 | #	(incidUpper=(CMLt+2*sqrt(CMLt))/PYt)
19 | #	(incidLow=(CMLt-2*sqrt(CMLt))/PYt)
20 | brb=c("blue","red","black")
21 | pchs=c(2,1,22)
22 | graphics.off()
23 | if(length(grep("linux",R.Version()$os))) windows <- function( ... ) X11( ... )
24 | if(length(grep("darwin",R.Version()$os))) windows <- function( ... ) quartz( ... )
25 | 
26 | windows(width=6,height=6)
27 | par(mfrow=c(1,1),mar=c(4.7,5.4,1.3,.8))
28 | matplot(t(AGEtw),t(incid),log="y",type='b',ylab=expression(paste("Cases per ",10^5," Person-Years")),
29 |         xlab="Age at exposure",
30 |         ylim=c(1e-6,8e-4),lty=1,col=brb,cex=2.5,pch=pchs,lwd=3,cex.lab=2,cex.axis=2,yaxt="n")
31 | axis(2,at=c(1e-6,1e-5,1e-4),labels=c(0.1,1,10),cex.axis=2)
32 | text(30,18e-5,"mostly radiogenic",cex=1.5)
33 | text(37,1.3e-5,"mostly background",cex=1.5,col="blue")
34 | legend(20,0.6e-5,c("High Dose","Medium Dose","Low Dose"),pch=pchs[3:1],col=brb[3:1],cex=1.5,pt.cex=2,pt.lwd=2)
35 | 
36 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/uveal/attic/common/UVallFields.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | library(tidyverse);library(SEERaBomb);library(WriteXLS)
 3 | (df=getFields("~/data/SEER")) # this doesn't fly in windows since ~ maps to /users/radivot/documents
 4 | WriteXLS(df,ExcelFileName="uveal/outs/fieldDefs2019.xlsx") # key to big file above
 5 | # picks=c("casenum","reg","race","sex","agedx",
 6 | #         "yrbrth","seqnum","modx","yrdx","histo3",
 7 | #         "ICD9","primsite","COD","surv","radiatn","chemo")
 8 | # (rdf=pickFields(df,picks))
 9 | # mkSEER(rdf,outFile="cancPrim",writePops=F) 
10 | # rdf=pickFields(df,picks=df$names)
11 | # mkSEER(rdf,outFile="cancAll",writePops=F) #149 secs # runs out of mem (fixed via more virt memory)
12 | 
13 | # system.time(load("~/data/SEER/mrgd/cancALL.RData")) # which takes ~65 secs
14 | # d=canc%>%filter(primsite%in%c("C693","C694","C692"))%>%print(n=2)
15 | # d=d%>%filter(histo3%in%8720:8790)%>%print(n=2)
16 | # d=d%>%filter(!is.na(surv))%>%print(n=4)
17 | # save(d,file="uveal/data/allfields2019.RData")
18 | load("uveal/data/allfields2019.RData")
19 | nms=names(d)
20 | L=lapply(nms,function(x) {dd=as.data.frame(table(d[,x],useNA="always"))
21 |                           if (dim(dd)[2]==2) names(dd)=c("Variable","Frequency")
22 |                           dd} )
23 | names(L)=nms
24 | WriteXLS(L,ExcelFileName="uveal/outs/uvealAllFieldsNA.xlsx",AdjWidth=T)
25 | # scrap yard below
26 | # tb=tibble(siz=d$cstumsiz,stg=d$dajcc7t,ext=d$csexten,stg2=d$dajccstg)
27 | # tb%>%ggplot(aes(x=ext,y=siz))+geom_point() # lots of missing sizes
28 | # tb%>%ggplot(aes(x=stg,y=siz))+geom_jitter()
29 | # tb%>%ggplot(aes(x=stg,y=stg2))+geom_jitter()
30 | # plot(d$cstumsiz,d$csexten)
31 | # plot(d$cstumsiz,d$dajcc7t)
32 | # ### conclusion: there is no good tumor size metric, just  stages dajcc7t
33 | # 
34 | 
35 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/REB2014/baseGraphics/CMLagexDev.R:
--------------------------------------------------------------------------------
 1 | # This script performs the deviance calculations associated with Eq. 2 in our active MS
 2 | rm(list=ls(all=TRUE))
 3 | cols=c("city","sex","doseg","agexg","calg","kerma","PY","adjPY","num.entering",
 4 | "age","agex","tsx","cal","sv","gam","neut","lymphoma","NHL","leukemia","AML","ALL","CML","ATL","MM")      
 5 | d<-read.table("c:/data/abomb/HEMA87.dat", header=F,col.names=cols);
 6 | d=d[d$adjPY>0,] #remove two recs with zero py
 7 | d=d[d$kerma==1,] # take only kerma < 4 Gy
 8 | d=d[d$city==1,]#hiroshima=1, comment for pooling
 9 | #d=d[d$age>=20,]# restriction to adults => male difference regardless of city pooling
10 | d$py=10^4*d$adjPY
11 | d$calg=as.integer(cut(d$calg,c(0,2,4,6,8,10))) # pairwise binning of waiting time groups
12 | m=d[d$sex==1,]; f=d[d$sex==2,] 
13 | agem=55  # pooling cities holds significance with age>20 only if agem=55 centering is used
14 | agem=0   # Hiroshima alone, with or without age>20, holds significance regardless of agem=0 or 55
15 | 
16 | require(bbmle)
17 | nLL<-function(c0,k,beta,L1,L2,L3,L4,L5,x,agem) with(x, {L=c(L1,L2,L3,L4,L5)
18 | 				mn = (exp(c0+k*(age-agem)) + exp(-beta*abs(agex-30)/28.85)*sv*exp(L[calg]))*py
19 | 				-sum(stats::dpois(CML, mn, log=TRUE))})	
20 | fit0 <- mle2(nLL,start=list(c0=-10,k=.04,L1=-10,L2=-10,L3=-10,L4=-10,L5=-10,beta=0.5),data=list(x=m,agem=agem))
21 | fit0Fx <- mle2(nLL,start=list(c0=-10,k=.04,L1=-10,L2=-10,L3=-10,L4=-10,L5=-10),fixed = list(beta=0),data=list(x=m,agem=agem))
22 | deviance(fit0Fx)-deviance(fit0)
23 | 
24 | fit0 <- mle2(nLL,start=list(c0=-10,k=.04,L1=-10,L2=-10,L3=-10,L4=-10,L5=-10,beta=0.5),data=list(x=f,agem=agem))
25 | fit0Fx <- mle2(nLL,start=list(c0=-10,k=.04,L1=-10,L2=-10,L3=-10,L4=-10,L5=-10),fixed = list(beta=0),data=list(x=f,agem=agem))
26 | deviance(fit0Fx)-deviance(fit0)
27 | 
28 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/examples/attic/mkSEERcrt.R:
--------------------------------------------------------------------------------
 1 | #NOTE: this script is outdated now that "CUSTOM".zip data files are available
 2 | d=read.csv("~/data/SEER/crt.csv")
 3 | names(d)=c("casenum","seqnum","RT","CT")
 4 | crt=d%>%select(casenum,seqnum,CT,RT)
 5 | save(crt,file="~/data/SEER/crt.RData")
 6 | head(crt)
 7 | load("~/data/SEER/crt.RData")
 8 | load("~/data/SEER/mrgd/cancDef.RData")
 9 | levels(crt$seqnum)
10 | head(crt$seqnum)
11 | x=strsplit(as.character(crt$seqnum), "[^0-9]+")
12 | crt$seq=sapply(x,function(y) y[[1]])
13 | crt$seq[crt$seqnum=="One primary only"]=0
14 | crt$seq[crt$seqnum=="Only one state registry-defined neoplasm"]=60
15 | crt$seq[crt$seqnum=="Unknown seq num - federally required in situ or malig tumors"]=99
16 | crt$seq[crt$seqnum=="Unknown seq num - state registry-defined neoplasms"]=88
17 | (S=paste0(1:27,"th of ",1:27," or more state registry-defined neoplasms"))
18 | for (i in 1:27)  crt$seq[crt$seqnum==S[i]] = 60+i
19 | crt$seq=as.integer(crt$seq)
20 | crt$seq[1:800]
21 | crt$ct=as.integer(crt$CT)-1
22 | levels(crt$RT)
23 | crt$radiatn=9  # 9 is gone, so shouldn't be any left after below
24 | crt$radiatn[crt$RT=="None/Unknown"]=0
25 | crt$radiatn[crt$RT=="Beam radiation"]=1
26 | crt$radiatn[crt$RT=="Radioactive implants"]=2
27 | crt$radiatn[crt$RT=="Radioisotopes"]=3
28 | crt$radiatn[crt$RT=="Combination of beam with implants or isotopes"]=4
29 | crt$radiatn[crt$RT=="Radiation, NOS  method or source not specified"]=5
30 | crt$radiatn[crt$RT=="Other radiation (1973-1987 cases only)"]=6
31 | crt$radiatn[crt$RT=="Refused"]=7
32 | crt$radiatn[crt$RT=="Recommended, unknown if administered"]=8
33 | head(crt,20)
34 | cr=crt%>%select(casenum,seqnum=seq,ct,radiatn)
35 | d=left_join(canc,cr)
36 | canc=d
37 | canc$trt="noRad"
38 | canc$trt[canc$radiatn==8]="unk"
39 | canc$trt[canc$radiatn%in%c(1:6)]="rad"
40 | save(canc,file="~/data/SEER/cancCRT.RData")
41 | 
42 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/uveal/seerEAR/cmpRisk.R:
--------------------------------------------------------------------------------
 1 | library(cmprsk) #run after surv.r
 2 | (d=d%>%select(yrdx,agedx,sex,surv,status,CODS))
 3 | D2=d%>%mutate(status=ifelse((status==1)&(!CODS%in%c("melanoma","eye")),2,status))
 4 | fitc=with(D2,cuminc(surv,status))
 5 | labs=c("Death by Melanoma","Death by Other Causes")
 6 | # plot(fitc,curvlab=labs,xlab="Years Since Diagnosis",ylab="Cumulative Probability of Death")
 7 | # pdf("outs/cumIncEstimates.pdf",width=4,height=4)
 8 | # plot(fitc,curvlab=labs,xlab="Years Since Diagnosis",ylab="Cumulative Probability of Death",ylim=c(0,1))
 9 | names(fitc)=c(" 1"," 2")  # gets rid of 1 at top
10 | (g=ggcompetingrisks(fitc)+sbb+gx+ylab("Cumulative Probability of Death")+
11 |     scale_color_manual(labels=labs,values=c("#F8766D","#00BFC4"))+ylim(c(0,1))+
12 |     theme(legend.position = c(0.3, 0.8),legend.title=element_blank()) +labs(title="",caption=""))
13 | ggsave("outs/cumIncY0to1.pdf",width=4.5,height=3.5) #Fig. 3
14 | library("MALDIquant")
15 | dmfs=as_tibble(fitc[[1]])
16 | dos=as_tibble(fitc[[2]])
17 | x0=seq(5,35,5)
18 | (kos=match.closest(x0,dos$time))
19 | (dos=dos[kos,]%>%mutate(CIos=paste0(sprintf('%.3f',est)," (",
20 |                                            sprintf('%.3f',est-1.96*sqrt(var)),", ",
21 |                                            sprintf('%.3f',est+1.96*sqrt(var)),")")))
22 | (kmfs=match.closest(x0,dmfs$time))
23 | (dmfs=dmfs[kmfs,]%>%mutate(CImfs=paste0(sprintf('%.3f',est)," (",
24 |                                                sprintf('%.3f',est-1.96*sqrt(var)),", ",
25 |                                                sprintf('%.3f',est+1.96*sqrt(var)),")")))
26 | 
27 | L=list(CumInc=tibble(Time=x0, "All Death Cumulative Incidence"=dos$CIos, #table 3
28 |                      "Melanoma Death Cumulative Incidence"=dmfs$CImfs))
29 | WriteXLS(L,ExcelFileName="outs/cumIncEstimates.xlsx",AdjWidth = T)
30 | 
31 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/msd.Rd:
--------------------------------------------------------------------------------
 1 | \name{msd}
 2 | \alias{msd}
 3 | \title{Mortality vs years Since Diagnosis}
 4 | \description{Computes relative risks (RR) of death over specified years-since-diagnosis intervals. 
 5 | US mortality rates obtained via the R package demography are used to compute background death dedepence on 
 6 | age and calendar year.  RR is then O/E where O and E are the number of observed and expected cases. }
 7 | \usage{msd(canc,mrt,brkst=c(0,2,5),brksy=NULL) } 
 8 | \arguments{
 9 |   \item{canc}{Input data.frame with columns: yrdx, agedx, sex, surv (in years), and status (1=dead).}
10 |   \item{mrt}{List with male and female fields, each matrices with mortality rates vs year and age. }
11 |   \item{brkst}{Vector of breaks in years used to form Times since diagnosis intervals/bins. }
12 |   \item{brksy}{Vector of breaks of calendar Years to show mortality trends. Leave NULL for all in one. }
13 | }
14 | \value{data.frame with observed and expected cases, RR, and RR CI for each tsd interval. }
15 | \note{ This function was developed with support from the Cleveland Clinic Foundation.}
16 | \author{Tom Radivoyevitch (radivot@ccf.org)}
17 | \seealso{\code{\link{SEERaBomb-package}, \link{mk2D},\link{seerSet}} }
18 | 
19 | \examples{
20 | \dontrun{
21 | library(SEERaBomb)
22 | load("~/data/SEER/mrgd/cancDef.RData") #loads in canc
23 | lu=canc\%>\%filter(cancer=="lung")
24 | lu=lu\%>\%mutate(status=as.numeric(COD>0))\%>\%select(yrdx,agedx,sex,surv,status) 
25 | lu=lu\%>\%mutate(surv=round((surv+0.5)/12,3))#convert surv to years
26 | 
27 | # library(demography)
28 | # d=hmd.mx("USA", "username", "password") #make an account and put your info in here
29 | # mrt=d$rate
30 | # save(mrt,file="~/data/usMort/mrt.RData")
31 | load("~/data/usMort/mrt.RData"); object.size(mrt)# 250kb
32 | brks=c(0,0.5,3,6,10,15,20,25)    
33 | (dlu=msd(lu,mrt,brkst=brks))
34 | }
35 | }
36 | 
37 | \keyword{IO}
38 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mkSEERold.Rd:
--------------------------------------------------------------------------------
 1 | \name{mkSEERold}
 2 | \alias{mkSEERold}
 3 | \title{ Make SEER binaries as before  }
 4 | \description{ Converts SEER ASCII text files into smaller R binaries.
 5 |        This is being maintained to avoid fixing old scripts. Please use mkSEER for new scripts.}
 6 | \usage{mkSEERold(df,seerHome="~/data/SEER",
 7 |             dataset=c("00","73","92"),SQL=TRUE, mkDFs=FALSE)}
 8 | 
 9 | \arguments{
10 |   \item{df}{ A data frame that was the output of \code{pickFields}. This determines which fields to transfer. }
11 |   \item{seerHome}{ The directory that contains the SEER \file{population} and \file{incidence} directories.  This should be writable by the user.}
12 |   \item{dataset}{The SEER database to use, specified as a string of the last two digits of the starting year, i.e. \code{"73"} = 1973-2011, \code{"92"}  = 1992-2011, and \code{"00"} = 2000-2011. This determines the subdirectory in \file{incidence} and \file{population} from which data is taken and is also the name of the subdirectory of \code{seerHome} to which the R data files will be written, e.g. \code{"00"} processes the 2000 database and places the binary results in /data/SEER/00.}
13 |   \item{SQL}{TRUE if an SQLite database is to be created. The file \file{all.db} produced in this case can be significantly larger than the sum of the \file{*.RData} files also produced. }           
14 |   \item{mkDFs}{TRUE if you wish to make data frame binaries. }           
15 | }
16 | 
17 | \details{See \link{mkSEER}.}
18 | \value{None.  This function produces R binary data files.}
19 | 
20 | \author{ Tom Radivoyevitch (\email{radivot@ccf.org}) }
21 | \seealso{\code{\link{SEERaBomb-package},\link{mkSEER},\link{getFields},\link{pickFields} } }
22 | \examples{
23 | \dontrun{
24 | library(SEERaBomb)
25 | (df=getFields())
26 | (df=pickFields(df))
27 | for (i in c("73","92","00")) mkSEERold(df,dataset=i)
28 | }
29 | }
30 | \keyword{internal}
31 | 
32 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/leukemia2016/lymphoidFirsts.R:
--------------------------------------------------------------------------------
 1 | # lymphoidFirsts.R (Figure 4)
 2 | # first run amlMDScomputing.R, then start here to make time course plots 
 3 | rm(list=ls())
 4 | library(dplyr)
 5 | library(reshape2)
 6 | library(ggplot2)
 7 | library(SEERaBomb)
 8 | LC=c("CLL","SLL","HCL","NHL","MM","HL") # lymphoid first cancers
 9 | system.time(load("~/Results/amlMDS/pm.RData")) # 4 secs to load. 
10 | system.time(load("~/Results/amlMDS/pf.RData")) # 4 secs to load. 
11 | brks=c(0,0.25,0.5,0.75,1,1.5,2,2.5,3,4,5,6,8,10,12)
12 | df=mkDF(pf,brks)
13 | dm=mkDF(pm,brks)
14 | d=rbind(cbind(df,Sex="Female"),cbind(dm,Sex="Male"))
15 | d=d%>%filter(cancer1%in%LC)%>%group_by(cancer2,Sex,int)%>%summarize(O=sum(O),E=sum(E),t=weighted.mean(t,py,na.rm=T))
16 | D=d%>%mutate(RR=O/E, L=qchisq(.025,2*O)/(2*E),U=qchisq(.975,2*O+2)/(2*E))
17 | D[D$cancer2=="MDS","t"]=D[D$cancer2=="MDS","t"]+0.05
18 | graphics.off()
19 | quartz(width=7,height=4)
20 | # theme_set(theme_bw())
21 | theme_update(legend.position = c(.92, .825),
22 |              axis.text=element_text(size=rel(1.2)),
23 |              axis.title=element_text(size=rel(1.3)),
24 |              axis.title.x=element_text(size=rel(1.0)),
25 |              legend.title=element_text(size=rel(1)),
26 |              legend.text=element_text(size=rel(1)),
27 |              strip.text = element_text(size = rel(1.5)))
28 | g=qplot(x=t,y=RR,data=D,col=cancer2,geom=c("line","point"),
29 |         xlab="Years Since Dx of Lymphoid First Cancer",ylab="Relative Risk")
30 | g=g+facet_grid(Sex~.,scales="free")+geom_abline(intercept=1, slope=0)
31 | # g = g + scale_color_grey(start = 0, end = 0.6)
32 | g1 <- guide_legend("Second\nCancer")
33 | g=g + guides(color=g1) 
34 | g=g+  geom_errorbar(aes(ymin=L,ymax=U,width=.15))+scale_y_continuous(breaks=c(0,5,10,15))
35 | print(g)
36 | ggsave("~/Results/amlMDS/lymphoidFirst.eps")  
37 | ggsave("~/Results/amlMDS/lymphoidFirst.png")  
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/attic/brain2leu.R:
--------------------------------------------------------------------------------
 1 | # results here were too noisy to include in the paper
 2 | rm(list=ls()); library(tidyverse); library(SEERaBomb)
 3 | load("~/data/SEER/mrgd/popsae.RData") #get popsae
 4 | (p=popsae%>%count(race,sex,age,year,wt=py)%>%rename(py=n))
 5 | load("~/data/SEER/mrgd/cancDef.RData") #get canc 
 6 | canc$cancer=fct_collapse(canc$cancer,AML=c("AML","AMLti","APL"))
 7 | secs=c("CML","AML","ALL") #second cancers of interest
 8 | (d=canc%>%filter(cancer%in%c("brain",secs)))
 9 | d%>%count(cancer,trt)%>%print(n=30)
10 | pf=seerSet(d,p,Sex="Female") #pooled (races) females 
11 | pm=seerSet(d,p,Sex="Male") 
12 | pf=mk2D(pf,secondS=secs)# adds secs background rates to pf
13 | pm=mk2D(pm,secondS=secs)
14 | trts=c("rad.chemo", "rad.noChemo", "noRad.chemo", "noRad.noChemo")
15 | pf=csd(pf, brkst=c(0,1,2,3,5,10), brksa=c(0,60), trts=trts, firstS="brain")
16 | pm=csd(pm, brkst=c(0,1,2,3,5,10), brksa=c(0,60), trts=trts, firstS="brain")
17 | DF=bind_rows(pf$DF,pm$DF)
18 | DF=DF%>%mutate(cancer2=fct_relevel(cancer2,"AML")) #make AML 1st factor level
19 | DF$t=DF$t+c(0,0.075,0.15) #shift times to see overlaid error bars in plots
20 | D=DF%>%group_by(int,rad,chemo,cancer2)%>%summarize(O=sum(O),E=sum(E),t=mean(t))
21 | D=D%>%mutate(RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E))
22 | graphics.off();quartz(width=4,height=2.5)
23 | myt=theme(legend.position=c(.25, .95),legend.title=element_blank(),
24 |           legend.direction="horizontal",legend.margin=margin(0,0,0,0),
25 |           legend.key.height = unit(.25, 'lines'))
26 | g=ggplot(aes(x=t,y=RR,col=cancer2),data=D)+geom_point()+geom_line()+myt+
27 |   labs(x="Years Since Brain Cancer Diagnosis",y="Relative Risk of Leukemia")
28 | g=g+facet_grid(rad~chemo)+geom_abline(intercept=1, slope=0) 
29 | g+geom_errorbar(aes(ymin=rrL,ymax=rrU),width=0.1) +coord_cartesian(ylim=c(0,30))
30 | ggsave("~/Results/tutorial/brain2leu.pdf") 
31 | 
32 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/renal/surv/surv.R:
--------------------------------------------------------------------------------
 1 | source("renal/common/load.R")
 2 | source("renal/common/acros.R")
 3 | 
 4 | (d5=d%>%select(yrdx,agedx,sex,surv,status))
 5 | (d6=d%>%select(yrdx,agedx,sex,surv,status,CODS))
 6 | (d6=d6%>%mutate(status=ifelse((status==1)&(!CODS%in%c("renal")),0,status))%>%select(-CODS))
 7 | d5$grp="All Cause Mortality"
 8 | d6$grp="Death by RCC"
 9 | 
10 | D=bind_rows(d5,d6)
11 | fit=survfit(Surv(surv,status)~grp,data=D) 
12 | gy=ylab("Survival Probability")
13 | labs=c("OS","MFS")
14 | ggsurvplot(fit,D,legend.title="",legend.labs=labs)+gx
15 | ggsave("renal/outs/survOSnMFS.pdf",width=4.5,height=3)
16 | 
17 | ddT=simSurv(d,mrt,rep=3,ltb=NULL,unif=TRUE)
18 | fit=survfit(Surv(surv,status)~type,data=ddT) 
19 | ggsurvplot(fit,ddT,legend.title="",legend.labs=c("Observed","Expected"),xlim=c(0,60))+gx #+svts
20 | ggsave("renal/outs/survWithCntrl.png",width=4.5,height=3)
21 | 
22 | library("MALDIquant")
23 | ddF=simSurv(d,mrt,rep=1,ltb=NULL,unif=FALSE)
24 | fitd=survfit(Surv(surv,status)~1,data=ddF%>%filter(type=="Observed")) 
25 | fitc=survfit(Surv(surv,status)~1,data=ddF%>%filter(type=="Simulated")) 
26 | (tc=fitc$time[1:40])
27 | (svc=fitc$surv[1:40])
28 | (td=fitd$time)
29 | (svd=fitd$surv)
30 | (k=match.closest(tc,td))
31 | svd=svd[k]
32 | tibble(tc,svr=svd/svc)%>%ggplot(aes(tc,svr))+geom_step()+gx+gy
33 | # bit crude looking
34 | 
35 | fitd=survfit(Surv(surv,status)~1,data=ddT%>%filter(type=="Observed")) 
36 | fitc=survfit(Surv(surv,status)~1,data=ddT%>%filter(type=="Simulated")) 
37 | x=0:40
38 | x=seq(0,30,.1)
39 | x=seq(0,42,.1)
40 | (tc=fitc$time)
41 | (k=match.closest(x,tc))
42 | (svc=fitc$surv[k])
43 | (td=fitd$time)
44 | (k=match.closest(x,td))
45 | (svd=fitd$surv[k])
46 | gy=ylab("Relative Survival Probability")
47 | tibble(x,svr=svd/svc)%>%ggplot(aes(x,svr))+geom_step()+gx+gy
48 | ggsave("renal/outs/survRel.pdf",width=4.5,height=3)
49 | ggsave("renal/outs/survRel.png",width=4.5,height=3)
50 | 
51 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/attic/cmlMort.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())
 2 | library(SEERaBomb)#version 2018.1 or higher is needed for this tutorial
 3 | load("~/data/SEER/mrgd/cancDef.RData")
 4 | d=canc%>%filter(cancer=="CML")%>%print(n=13)
 5 | d%>%summarize(n=n(),na=sum(is.na(surv)),prct=100*na/n)#<2% missing
 6 | d=d%>%mutate(status=as.numeric(COD>0),surv=(surv+0.5)/12)#move into mkSEER?
 7 | d=d%>%select(yrdx,agedx,sex,surv,status)%>%print(n=13)
 8 | load("~/data/usMort/mrt.RData")#loads mrt
 9 | (D=msd(d,mrt,brkst=c(0,0.5,1,2,3,4,5,6,8),brksy=c(1973,1990,2005,2015)))
10 | (g=qplot(x=t,y=RR,data=D,col=Years,geom=c("line","point"),facets=.~sex,
11 |         xlab="Years Since CML Diagnosis",ylab="Relative Risk of Mortality"))
12 | (g=g+scale_x_continuous(breaks=seq(0,15,5)))
13 | library(ggsci);jco=scale_color_jco();tc=theme_classic(base_size=14)
14 | (g=g+jco+tc+theme(legend.position="top",legend.title=element_blank(),
15 |            strip.background=element_blank(),
16 |            legend.text=element_text(size=12),
17 |           strip.text=element_text(size=12)))
18 | (g=g+geom_abline(intercept=1,slope=0)+ylim(c(0,NA)))
19 | g+geom_errorbar(aes(ymin=rrL,ymax=rrU),width=.2)
20 | ggsave("~/Results/tutorial/CMLmortRR.pdf",width=4.5,height=3)
21 | 
22 | labs=c("1973-1990","1991-2005","2006-2015")
23 | d=d%>%mutate(yrg=cut(yrdx,c(1972,1990,2005,2015),labels=labs))%>%print(n=13)
24 | library(survival);library(survminer)
25 | fit=survfit(Surv(surv,status)~yrg+sex,data=d) 
26 | ggsurvplot_facet(fit,d,facet.by=c("sex"),ylab="Survival Probability",
27 |           xlab="Years Since CML Diagnosis",legend.title="",
28 |           xlim=c(0,12),short.panel.labs=T)+
29 |   scale_x_continuous(breaks=seq(0,15,5))+
30 |   jco+theme(strip.background=element_blank(),strip.text=element_text(size=12),
31 |             legend.text=element_text(size=12))
32 | ggsave("~/Results/tutorial/CMLsurvTrends.pdf",width=4.5,height=3)
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/examples/attic/lungSurv.R:
--------------------------------------------------------------------------------
 1 | .seerHome="~/data/SEER" # Assume here that the SEER data is in /data/SEER. Change this if not. 
 2 | rm(list=ls()) # note: dot variables defined above persist through such cleanings
 3 | library(RSQLite) # install.packages("RSQLite")
 4 | m=dbDriver("SQLite")
 5 | con=dbConnect(m,dbname=file.path(.seerHome,"00/all.db"))
 6 | da=dbGetQuery(con,"SELECT * from respir where ICD9>=1620 and ICD9<=1629 
 7 | and histo3=8140 and seqnum<2") # adenos
 8 | # da=transform(da,dwd=(COD>0),hist= "adenocarcinoma" ) # not alive; 0=alive
 9 | da=transform(da,dwd=((COD>=20010)&(COD<=37000)),hist= "adenocarcinoma" ) # dead with cancer, 0=alive
10 | MFcnta=summary(as.factor(da$sex))
11 | head(da)
12 | ds=dbGetQuery(con,"SELECT * from respir where ICD9>=1620 and ICD9<=1629 
13 | and histo3>=8070 and histo3<=8079 and seqnum<2") # squames
14 | MFcnts=summary(as.factor(ds$sex))
15 | # ds=transform(ds,dwd=(COD>0),hist= "squamous cell") # not alive; 0=alive
16 | ds=transform(ds,dwd=((COD>=20010)&(COD<=37000)),hist= "squamous cell" ) # dead with cancer, 0=alive
17 | d=rbind(da,ds)
18 | 
19 | sapply(d,class)
20 | head(d)
21 | library(survival)
22 | # graphics.off()
23 | if(length(grep("linux",R.Version()$os))) windows <- function( ... ) X11( ... )
24 | if(length(grep("darwin",R.Version()$os))) windows <- function( ... ) quartz( ... )
25 | windows(width=9,height=5)
26 | par(mfrow=c(1,2),mar=c(4.5,4.1,1,1))
27 | rb=c("blue","red") #males blue, femals red 
28 | plot(S<-survfit(Surv(surv,dwd)~hist,data = subset(d,sex==1)), # main="SEER 2000-2010",
29 |      xlab="Months",ylab="Survival", xlim=c(0,60) , #ylim=c(0.6,1),
30 |      col=rb, main="Adeno Vs Squame")
31 | print(S) 
32 | legend("topleft",c("Males"),bty="n")
33 | 
34 | plot(S<-survfit(Surv(surv,dwd)~hist,data = subset(d,sex==2)), ,
35 |      xlab="Months",ylab="Survival", xlim=c(0,60), #ylim=c(0.6,1),
36 |      col=rb, main="Adeno Vs Squame")
37 | print(S) 
38 | legend("topleft",c("Females"),bty="n")
39 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/post1PYO.Rd:
--------------------------------------------------------------------------------
 1 | \name{post1PYO}
 2 | \alias{post1PYO}
 3 | \title{Get person-years at risk and observed cases after first cancers }
 4 | \description{Converts a canc data.frame into a list of objects containing information regarding person years at risk
 5 | for a second cancer after having a first cancer, and the number observed, in a defined time since exposure interval
 6 | and after a defined first cancer therapy.  }
 7 | \usage{post1PYO(canc, brks=c(0,2,5),binIndx=1,Trt="rad",PYLong=FALSE,yearEnd,firstS,secondS)} 
 8 | \arguments{
 9 |   \item{canc}{Input canc data.frame that is already sex, and possibly race, specific, but not cancer specific, as 
10 |               treatment of any first cancer could potentially cause any second cancer.}
11 |   \item{brks}{A vector of break points in years used to form time since diagnosis bins. }
12 |   \item{binIndx}{The index of the interval for which py are to be computed by calling this function.}
13 |   \item{Trt}{The treatment for the first cancers. Note that the second cancer treatment is irrelevant here, so 
14 |   the input canc must not be reduced to only certain treatment types. }
15 |   \item{PYLong}{PYLong of tsd.}
16 |   \item{yearEnd}{This is taken from the seerSet object.}
17 |   \item{firstS}{Vector of first cancers of interest as strings.}
18 |   \item{secondS}{Vector of second cancers of interest as strings.}
19 | }
20 | 
21 | \value{A list where the first element is a list LPYM with as many PY matrices (PYM) as cancers in canc. The second
22 | element is a matrix of cases observed in this interval after this treatment, where row names are first cancers and column names
23 | are second cancers. The third element is a trivial scalar, the py-weighted midpoint of the time interval selected. }
24 | 
25 | \note{After the SEER data is installed, see the script mkRRtsx.R in the examples folder. }
26 | \author{Tom Radivoyevitch (radivot@ccf.org)}
27 | \seealso{\code{\link{SEERaBomb-package}, \link{getE},\link{seerSet}} }
28 | \keyword{internal}
29 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/getDF.Rd:
--------------------------------------------------------------------------------
 1 | \name{getDF}
 2 | \alias{getDF}
 3 | \title{Converts a seerSet$L series to a data.frame}
 4 | \description{Creates a data.frame of observed and expected cases for each first and second cancer and treatment. 
 5 | csd() calls this internally for the most recent time series, so it may not need to be called directly.}
 6 | \usage{getDF(seerSet,srs=NULL)} 
 7 | \arguments{
 8 |   \item{seerSet}{seerSet object produced by csd(). }
 9 |   \item{srs}{Series. The time series of interest. NULL (default) implies the currently active series, which is
10 |      the most recent. A number i implies the ith series. A string identifies the series by name (numeric vectors 
11 |      will be coerced to such a string via paste0("b",paste(brks,collapse="_")) where brks = vector of time breakpoints.}
12 | }
13 |    
14 | \value{A data.frame in long format that can be used by ggplot.  }
15 | 
16 | \note{I envision getting away from saving multiseries seerSet objects and instead just saving several DF outputs of getDF. 
17 | Besides smaller objects, a reason for this is that two L objects out of csd can now be confounded if they have
18 | the same time since diagnosis series but a different series for age and/or year of diagnosis. }
19 |   
20 | 
21 | \author{Tom Radivoyevitch (radivot@ccf.org)}
22 | \seealso{\code{\link{SEERaBomb-package}, \link{csd}} }
23 | \examples{
24 | \dontrun{ 
25 | library(SEERaBomb)
26 | load("~/data/SEER/mrgd/cancDef.RData") #load in canc
27 | load("~/data/SEER/mrgd/popsae.RData") # load in popsae
28 | canc=canc\%>\%select(-reg,-recno,-agerec,-numprims,-COD,
29 |           -age19,-age86,-radiatn,-ICD9,-db,-histo3) 
30 | popsa=popsae\%>\%group_by(db,race,sex,age,year)\%>\%summarize(py=sum(py)) # sum on regs
31 | pm=seerSet(canc,popsa,Sex="male",ageStart=0,ageEnd=100) #pooled (races) male seerSet
32 | pm=mk2D(pm,secondS=c("AML","MDS"))
33 | firstS=c("NHL","MM")
34 | pm=csd(pm,brkst=c(0,1,5),trts=c("rad","noRad"),firstS=firstS)
35 | pm$DF 
36 | getDF(pm)
37 | }
38 | }
39 | 
40 | \keyword{IO}
41 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/simSurv.R:
--------------------------------------------------------------------------------
 1 | simSurv<-function(d,mrt,rep=1,ltb=NULL,unif=TRUE){
 2 |   yrdx=agedx=sex=surv=status=P=NULL
 3 |   pullP=function(sex,age,year) {
 4 |     # sex="Male";age=10;year=1999
 5 |     # load("~/data/usMort/mrt.RData")#loads US mortality data
 6 |     M=mrt[[sex]]
 7 |     aN=dim(M)[1]
 8 |     yN=dim(M)[2]
 9 |     P=rep(0.5,aN)
10 |     #need to slap on copies of final column to right for future of young 
11 |     (Mfill=matrix(M[,yN],nrow=111,ncol=111))
12 |     strtYr=max(as.numeric(colnames(M)))
13 |     colnames(Mfill)=(strtYr+1):(strtYr+111)
14 |     (Mbig=cbind(M,Mfill))
15 |     rownames(Mbig)=c(0:109,"110")
16 |     i=1
17 |     for (k in 0:(aN-age-1))  {
18 |       P[i]= Mbig[as.character(age+k),as.character(year+k)]
19 |       i=i+1
20 |     }
21 |     P
22 |   }
23 |   
24 |   simP=function(P) {
25 |     for (k in 1:length(P)) 
26 |       if(unif){ 
27 |         if(runif(1)<P[k]) return(k-runif(1))
28 |       } else {
29 |         if(runif(1)<P[k]) return(k-0.5)
30 |       }
31 |   }
32 |   # library(tidyverse)
33 |   # load("~/data/SEER/mrgd/cancDef.RData")#load SEER cancer data
34 |   # d=canc%>%filter(cancer=="CMML") #,agedx==99)
35 |   # d=d%>%mutate(status=as.numeric(COD>0),surv=(surv+0.5)/12)
36 |   d=d%>%select(yrdx,agedx,sex,surv,status)#%>%mutate(ID=seq.int(nrow(d)))
37 |   nd=d%>%select(-surv,-status) # these will be simulated in the new data
38 |   # head(nd,2)
39 |   nd=nd%>%mutate(P=pmap(list(sex,agedx,yrdx),pullP))
40 |   # rep=5
41 |   sim=function(P) replicate(rep, simP(P))
42 |   nd=nd%>%mutate(surv=map(P,sim))
43 |   # head(nd,2)
44 |   D=nd%>%select(-P)
45 |   D=D%>%unnest(surv)
46 |   D$status=1
47 |   D$type="Simulated"
48 |   # head(D,2)
49 |   d$type="Observed"
50 |   d=rbind(d,D)
51 |   # head(d,2)
52 |   
53 |   if (!is.null(ltb)) {
54 |     getLT=function(sex,agedx,yrdx)  ltb[[as.character(sex)]][as.character(agedx),as.character(yrdx)]
55 |     # getLT("Male",80,2000)
56 |     ed=d
57 |     ed$surv=mapply(getLT,d$sex,d$agedx,d$yrdx)
58 |     ed$status=1
59 |     ed$type="LT"
60 |     d=rbind(d,ed)
61 |   }
62 |   d
63 | }
64 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mkExcelTsd.Rd:
--------------------------------------------------------------------------------
 1 | \name{mkExcelTsd}
 2 | \alias{mkExcelTsd}
 3 | \title{Make RR Excel file from tsd function output}
 4 | \description{Provides relative risks (RR) organized by 1st and 2nd cancers, times since 1st cancer diagnoses, and 1st cancer treatment. RR = O/E where O = observed cases and E = cases expected under a null hypothesis that prior cancers do not impact subsequent risks. If flip = FALSE (default), sheets = 1st cancers and rows = 2nd cancers, else sheets = 2nd cancers and rows = 1st cancers; columns are always intervals of years since diagnosis, in 
 5 | 1st cancer treatment blocks. RR CI and observed numbers are included in each data cell.}
 6 | \usage{mkExcelTsd(seerSet,tsdn,outDir="~/Results",outName=NULL,flip=FALSE)} 
 7 | \arguments{
 8 |   \item{seerSet}{A seerSet list after it has been processed by tsd().  }
 9 |   \item{tsdn}{Name of set of times since diagnosis. This is based on the brks argument to tsd().
10 |               If length >1 a brkst vector is assumed and coerced/collapsed to a tsdn string.}
11 |   \item{outDir}{Folder of the Excel file that will be generated.}
12 |   \item{outName}{if null (default), Excel file name = seerSet base file name (bfn) + tsdn,
13 |                 else it is outName. Eitherway, "Flipped" is appended to the name if flip is TRUE.  }
14 |   \item{flip}{If FALSE, sheets are first cancers, rows seconds. If TRUE, sheets are second cancers, rows firsts.}
15 | }
16 | 
17 | \value{ Returned invisibly, a list of data frames corresponding to sheets of the Excel file. 
18 |      }
19 | 
20 | \note{ Outputs are for a given sex. Races are typically pooled.}
21 | 
22 | \author{Tom Radivoyevitch (radivot@ccf.org)}
23 | \seealso{\code{\link{SEERaBomb-package}, \link{mk2D},\link{seerSet}} }
24 | 
25 | \examples{
26 | \dontrun{
27 | library(SEERaBomb)
28 | pm=simSeerSet()
29 | pm=mk2D(pm) 
30 | mybrks=c(0,1,5,10)
31 | pm=tsd(pm,brkst=mybrks,trts=c("noRad","rad")) 
32 | (lab=paste0("b",paste(mybrks,collapse="_")))
33 | (L=mkExcelTsd(pm,lab))
34 | (L=mkExcelTsd(pm,lab,flip=TRUE))
35 | }
36 | 
37 | }
38 | 
39 | 
40 | \keyword{internal}
41 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/csdEx3.R:
--------------------------------------------------------------------------------
 1 | ###csdEx3.R
 2 | (d=canc%>%filter(sex=="Female",cancer%in%c("breast",secs)))
 3 | pf=seerSet(d,popsae,Sex="Female")#pooled (races) females 
 4 | pf=mk2D(pf,secondS=secs)#adds secs background rates to pf
 5 | trts=c("rad.chemo","rad.noChemo","noRad.chemo","noRad.noChemo")
 6 | pf=csd(pf,brkst=c(0,1,2,3,5,10),brksa=c(0,60),trts=trts,firstS="breast")
 7 | (dA=pf$DF%>%filter(ageG=="(0,60]")) 
 8 | gy=ylab("Relative Risk of Leukemia")
 9 | myt=theme(legend.key.height=unit(.25,'lines'),legend.position=c(.5,.95))
10 | cc=coord_cartesian(ylim=c(0,25))#clips high errorbars
11 | gx=xlab("Years Since Breast Cancer Diagnosis")
12 | dA%>%ggplot(aes(x=t,y=RR,col=cancer2))+facet_grid(rad~chemo)+ 
13 |   gp+gl+gx+gy+gh+geRR+tc(14)+ltp+cc+jco+sbb+ltb+myt+lh
14 | ggsave("~/Results/tutorial/breastEx3A.pdf",width=4,height=4) 
15 | 
16 | (d=canc%>%filter(cancer%in%c("thyroid",secs)))
17 | pf=seerSet(d,popsae,Sex="Female");pm=seerSet(d,popsae,Sex="Male") 
18 | pf=mk2D(pf,secondS=secs);pm=mk2D(pm,secondS=secs)
19 | trts=c("rad.noChemo","noRad.noChemo")
20 | pf=csd(pf,brkst=c(0,1,2,3,5,10),brksa=c(0,60),trts=trts,firstS="thyroid")
21 | pm=csd(pm,brkst=c(0,1,2,3,5,10),brksa=c(0,60),trts=trts,firstS="thyroid")
22 | DF=bind_rows(pf$DF,pm$DF)
23 | D=DF%>%group_by(int,rad,cancer2)%>%summarize(O=sum(O),E=sum(E),t=mean(t))
24 | D=D%>%mutate(RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E))
25 | Dtop=D%>%mutate(grp=str_c(rad,": All Ages"))
26 | D=DF%>%filter(rad=="Rad")
27 | D=D%>%group_by(int,ageG,cancer2)%>%summarize(O=sum(O),E=sum(E),t=mean(t))
28 | D=D%>%mutate(RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E))
29 | D$ageG=c("Age 0-60","Age >60")[D$ageG]
30 | Dbot=D%>%mutate(grp=str_c("Rad: ",ageG))
31 | dB=bind_rows(Dtop,Dbot)
32 | dB$grp=as_factor(dB$grp)#orders by occurrence, as wanted
33 | gx=xlab("Years Since Thyroid Cancer Diagnosis")
34 | dB%>%ggplot(aes(x=t,y=RR,col=cancer2))+facet_wrap(~grp)+ 
35 |           gp+gl+gx+gy+gh+geRR+tc(14)+myt+cc+jco+sbb+ltb+ltp+myt+lh
36 | ggsave("~/Results/tutorial/thyroidEx3B.pdf",width=4,height=4)
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/leukemia2016/mkExcelDemo.R:
--------------------------------------------------------------------------------
 1 | # mkExcelDemo.R
 2 | rm(list=ls()) 
 3 | library(SEERaBomb) 
 4 | library(tidyverse)
 5 | library(magrittr)
 6 | if (1) {
 7 |   load("~/data/SEER/mrgd/cancDef.RData") 
 8 |   load("~/data/SEER/mrgd/popsae.RData") 
 9 |   canc=canc%>%select(-reg,-radiatn,-histo3,-ICD9)
10 |   canc=canc%>%filter(cancer!="benign")
11 |   canc%<>%separate(trt,c("trt","trtc"))
12 |   canc$trt=factor(canc$trt)
13 |   popsa=popsae%>%group_by(db,race,sex,age,year)%>%summarize(py=sum(py)) # sum on regs
14 |   m=seerSet(canc,popsa,Sex="Male",ageStart=0,ageEnd=100) 
15 |   f=seerSet(canc,popsa,Sex="Female",ageStart=0,ageEnd=100) 
16 |   m=mk2D(m) 
17 |   f=mk2D(f) 
18 |   brks=c(0,0.5,1,2,3,10)
19 |   m=tsd(m,brks=brks,trts=c("rad","noRad")) 
20 |   f=tsd(f,brks=brks,trts=c("rad","noRad"))
21 |   system.time(save(m,f,file="~/Results/amlMDS/mfExcel.RData")) #~10 seconds 
22 | } else {
23 |   load("~/Results/amlMDS/mfExcel.RData") 
24 | }
25 | 
26 | mkExcelTsd(m,"b0_0.5_1_2_3_10",outDir="~/Results/amlMDS",outName="males") #out filenames are otherwise coded. 
27 | mkExcelTsd(f,"b0_0.5_1_2_3_10",outDir="~/Results/amlMDS",outName="females")
28 | mkExcelTsd(m,"b0_0.5_1_2_3_10",outDir="~/Results/amlMDS",outName="males",flip=T)
29 | mkExcelTsd(f,"b0_0.5_1_2_3_10",outDir="~/Results/amlMDS",outName="females",flip=T)
30 | 
31 | # test using csd instead of tsd
32 | if(1) {
33 |   mc=csd(m,brkst=c(0,0.5,1,2,3,10),trts=c("rad","noRad")) 
34 |   fc=csd(f,brkst=c(0,0.5,1,2,3,10),trts=c("rad","noRad")) 
35 |   system.time(save(mc,fc,file="~/Results/amlMDS/mfExcelCSD.RData")) # 33 secs
36 | } else {
37 |   load("~/Results/amlMDS/mfExcelCSD.RData") 
38 | }
39 | mkExcelCsd(mc,"b0_0.5_1_2_3_10",biny="[1975,2017)",outDir="~/Results/amlMDS",outName="malesCsd") 
40 | mkExcelCsd(fc,"b0_0.5_1_2_3_10",biny="[1975,2017)",outDir="~/Results/amlMDS",outName="femalesCsd") 
41 | mkExcelCsd(mc,"b0_0.5_1_2_3_10",biny="[1975,2017)",outDir="~/Results/amlMDS",outName="malesCsd",flip=T) 
42 | mkExcelCsd(fc,"b0_0.5_1_2_3_10",biny="[1975,2017)",outDir="~/Results/amlMDS",outName="femalesCsd",flip=T) 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/REB2014/baseGraphics/racePvalues.R:
--------------------------------------------------------------------------------
 1 | load("/data/SEER/00/pops.RData") # this loads in pops
 2 | pyf=pym=vector(3,mode="list"); 
 3 | for (i in 0:18) { for (r in 1:2) {
 4 | 		pym[[r]][i+1]=with(pops,sum(population[(popsex==1)&(popage==i)&(poprace==r)]))
 5 | 		pyf[[r]][i+1]=with(pops,sum(population[(popsex==2)&(popage==i)&(poprace==r)]))}
 6 | 	pym[[3]][i+1]=with(pops,sum(population[(popsex==1)&(popage==i)&(poprace>2)]))
 7 | 	pyf[[3]][i+1]=with(pops,sum(population[(popsex==2)&(popage==i)&(poprace>2)])) }
 8 | 
 9 | load("/data/SEER/00/lymyleuk.RData") # this loads in DF
10 | casesf=casesm=vector(3,mode="list"); 
11 | for (i in 1:3) {
12 | 	if (i==3) d=DF[(DF$histo2==9863)&(DF$numprims==1)&(DF$race>2)&(DF$race<98),] else 
13 | 		d=DF[(DF$histo2==9863)&(DF$numprims==1)&(DF$race==i),] 
14 | 	casesm[[i]]=hist(d$agerec[d$sex==1],breaks=c(seq(-.5,17.5,1),100),plot=FALSE)$counts
15 | 	casesf[[i]]=hist(d$agerec[d$sex==2],breaks=c(seq(-.5,17.5,1),100),plot=FALSE)$counts}	
16 | 
17 | (DF=data.frame(age=rep(c(0.5,3,seq(7.5,87.5,5)),6),race=rep(c("white","black","asian"),each=19,times=2),
18 | 	sex=rep(c("male","female"),each=57),cases=c(unlist(casesm),unlist(casesf)),py=c(unlist(pym),unlist(pyf))))
19 | lapply(DF,class)
20 | DF=subset(DF,age>20)
21 | DF=transform(DF,agec=age-55)  # center so that intercept is incidence at age 55
22 | head(DF)
23 | 
24 | summary(lmf<-glm(cases~agec*race*sex+offset(log(py)),family=poisson,data=DF))
25 | # as expected, sex has no significant interactions with anything so move it to additive
26 | summary(lmf<-glm(cases~agec*race+sex+offset(log(py)),family=poisson,data=DF))
27 | # shows that Asians (reference group) do not differ significantly from blacks in slope, but do in intercept
28 | # also shows that Asians differ from whites in both slope and intercept	
29 | exp(0.383664) # confirms average M/F across races of ~1.5
30 | exp(coef(lmf)[5]) # same thing: 5th coeff is male offset
31 | 
32 | summary(lmf<-glm(cases~agec+sex+offset(log(py)),family=poisson,data=subset(DF,race=="asian") ))
33 | # confirms Asian slope of k= 0.025 in Figure 4 
34 | exp(coef(lmf)[3]) # confirms Asian M/F = 1.7 in Figure 4
35 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mkExcelCsd.Rd:
--------------------------------------------------------------------------------
 1 | \name{mkExcelCsd}
 2 | \alias{mkExcelCsd}
 3 | \title{Make RR Excel file from csd output}
 4 | \description{Provides relative risks (RR) organized by 1st and 2nd cancers, times since 1st cancer diagnoses, and 1st cancer treatment. RR = O/E where O = observed cases and E = cases expected under a null hypothesis that prior cancers do not impact subsequent risks. If flip = FALSE (default), sheets = 1st cancers and rows = 2nd cancers, else sheets = 2nd cancers and rows = 1st cancers; columns are always intervals of years since diagnosis, in 
 5 | 1st cancer treatment blocks. RR CI and observed numbers are included in each data cell.}
 6 | \usage{mkExcelCsd(seerSet,tsdn,biny="[1975,2017)",bina="(0,126]",
 7 |                         outDir="~/Results",outName=NULL,flip=FALSE)} 
 8 | \arguments{
 9 |   \item{seerSet}{A seerSet list after it has been processed by csd().  }
10 |   \item{tsdn}{Name of set of times since diagnosis. This is based on the brkst argument to csd().
11 |               If length >1 a brkst vector is assumed and coerced/collapsed to a tsdn string.}
12 |   \item{biny}{Year at DX interval. }
13 |   \item{bina}{Age at DX interval.}
14 |   \item{outDir}{Folder of the Excel file that will be generated.}
15 |   \item{outName}{if null (default), Excel file name = seerSet base file name (bfn) + tsdn,
16 |                 else it is outName. Eitherway, "Flipped" is appended to the name if flip is TRUE.  }
17 |   \item{flip}{If FALSE, sheets are first cancers, rows seconds. If TRUE, sheets are second cancers, rows firsts.}
18 | }
19 | 
20 | \value{ Returned invisibly, a list of data frames corresponding to sheets of the Excel file. 
21 |      }
22 | 
23 | \note{ Outputs are for a given sex. Races are typically pooled.}
24 | 
25 | \author{Tom Radivoyevitch (radivot@ccf.org)}
26 | \seealso{\code{\link{SEERaBomb-package}, \link{mk2D},\link{seerSet}} }
27 | 
28 | \examples{
29 | \dontrun{
30 | library(SEERaBomb)
31 | pm=simSeerSet()
32 | pm=mk2D(pm) 
33 | mybrks=c(0,1,5,10)
34 | pm=csd(pm,brkst=mybrks,trts=c("noRad","rad")) 
35 | (lab=paste0("b",paste(mybrks,collapse="_")))
36 | (L=mkExcelCsd(pm,lab))
37 | (L=mkExcelCsd(pm,lab,flip=TRUE))
38 | }
39 | 
40 | }
41 | 
42 | 
43 | \keyword{IO}
44 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/PMF/mkMaxRes.R:
--------------------------------------------------------------------------------
 1 | #####  aggregate deaths into Max Resolution bins (i.e. only equivalent deaths stacks)
 2 | graphics.off();rm(list=ls())#clear plots and environment
 3 | library(tidyverse);library(SEERaBomb)
 4 | load("pmf/data/d.RData") # made in demog.R
 5 | load("~/data/mrt/mrtUSA.RData")#loads US mortality data
 6 | (d=d%>%select(yrdx,agedx,sex,surv,status))
 7 | D=d%>%group_by(sex)%>%nest()
 8 | D$data[[1]]
 9 | D=D%>%mutate(ddata=map(data,function(x) x%>%filter(status==1)))
10 | D=D%>%mutate(adata=map(ddata,function(x) x%>%group_by(agedx)%>%group_split()))
11 | D
12 | D$adata[[1]]
13 | (fa=sort(unique(D$ddata[[1]]$agedx)))
14 | length(fa) #64 unique death ages for females
15 | sapply(D$adata[[1]],function(x) x$agedx[1]) # same, check
16 | 
17 | getBt=function(x) {
18 |   ut=sort(unique(x$surv))
19 |   c(0,ut[-length(ut)]+diff(ut)/2)
20 | }  
21 | 
22 | D=D%>%mutate(brkst=map(adata,function(x) lapply(x,getBt)))
23 | D
24 | D$brkst[[1]]
25 | getBa=function(x) {
26 |   ua=sort(unique(x$agedx))
27 |   c(0,ua[-length(ua)]+diff(ua)/2,100)
28 | }
29 | 
30 | D=D%>%mutate(brksa=map(ddata,getBa))
31 | D
32 | mkAgeG=function(x,y) x%>%mutate(ageG=cut(agedx,breaks=y,include.lowest = T))
33 | D=D%>%mutate(data=map2(data,brksa,mkAgeG))
34 | D
35 | 
36 | D$data[[1]]
37 | D  #done now with ddata and brksa so remove them
38 | D=D%>%select(-ddata,-brksa)
39 | D
40 | D=D%>%mutate(tbL=map(data,function(x) x%>%group_by(ageG)%>%group_split()))
41 | D # dims check, remove adata
42 | D=D%>%select(-adata)
43 | D$tbL[[1]][[1]] #need sex back in here for msd to work
44 | D=D%>%mutate(tbL=map2(tbL,sex,function(x,y) map2(x,y,function(x,y) x%>%mutate(sex=y)) ))
45 | D$tbL[[1]][[1]] #got it
46 | D$tbL[[2]][[1]] #got it
47 | 
48 | getMrt=function(x,y) mapply(function(x,y) msd(x,mrt,brkst=y),x,y,SIMPLIFY = FALSE)
49 | D=D%>%mutate(m=map2(tbL,brkst,getMrt)) # takes a while
50 | D
51 | D$m[[1]][[1]]
52 | D$tbL[[1]][[1]]  # need to move the mean age from here into m
53 | D=D%>%mutate(ma=map2(m,tbL,function(x,y) map2(x,y,function(x,y) x%>%mutate(a=mean(y$agedx))) ))
54 | D$ma[[1]][[1]] #got it
55 | D  # now we just need to rbind ma
56 | (m=bind_rows(D$ma))
57 | m=m%>%select(-(EAR:rrU))
58 | save(m,file="pmf/data/maxRes.RData")
59 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/seerStats.R:
--------------------------------------------------------------------------------
 1 | seerStats<-function(canc,popsa) {
 2 |   age=agedx=db=py=reg=cancer=over99=total=sex=year=seqnum=gte100=PY=NULL 
 3 |   #   cf=function (x) comma_format()(x)
 4 |   #seerStats(canc,popsae)
 5 |   L=NULL
 6 |   L$PDR=popsa%>%group_by(db,reg)%>%summarize(PY=round(sum(py)/1e6,1),age=weighted.mean(age,py))
 7 |   L$PSY=popsa%>%group_by(sex,year)%>%summarize(PY=round(sum(py)/1e6,1))
 8 |   D=canc%>%group_by(cancer,db)%>%summarize(n=n())
 9 |   A=canc%>%filter(agedx>99.5)%>%group_by(cancer)%>%summarize(n=n())
10 |   canc$seqnum=ifelse(canc$seqnum>3,4,canc$seqnum)
11 |   canc$seqnum=ifelse(canc$seqnum==0,1,canc$seqnum)
12 |   Sq=canc%>%group_by(cancer,seqnum)%>%summarize(n=n())
13 |   C=dcast(D,cancer~db,value.var="n",fun.aggregate = sum,margins=c("db"))
14 |   #   C=dcast(D,cancer~db,value.var="n",fun.aggregate = sum,margins=TRUE)
15 |   O=dcast(A,cancer~.,value.var="n")
16 |   S=dcast(Sq,cancer~seqnum,value.var="n",fun.aggregate = sum,margins=c("seqnum")) #checks fine with total
17 |   #    S=dcast(Sq,cancer~seqnum,value.var="n")
18 |   d=left_join(C,O,by="cancer")
19 |   names(d)[5:6]=c("total","gte100")
20 |   d=d%>%mutate(under100=total-gte100)
21 |   d=left_join(d,S,by="cancer")
22 |   names(d)[8:12]=c("firsts","seconds","thirds","highers","all")
23 |   d[is.na(d)]=0
24 |   d$cancer=as.character(d$cancer)
25 |   d[dim(d)[1]+1,]=data.frame("total",t(sapply(d[-1],sum)))
26 |   d[dim(d)[1],1]="total"
27 |   L$d=d
28 |   cat(paste("\nTable 1. Cases per database, by >= or < 100, and by cancer sequence, through",
29 |             max(popsa$year),"(sexes and races pooled).\n"))
30 |   print(d) 
31 |   cat(paste("\nTable 2. Population PY in millions and PY-weighted mean ages, through",
32 |             max(popsa$year),"(sexes and races pooled).\n"))
33 |   cat(paste(""))
34 |   print(as.data.frame(L$PDR)) 
35 |   N=sum(L$PSY$PY)
36 |   tit=paste(N,"Million Total Person-Years")
37 |   p=qplot(year,PY,col=sex,ylab="Person-Years (Millions)",main=tit,data=L$PSY)+
38 |     theme(legend.position = c(.18, .75),legend.title = element_blank(),legend.text=element_text(size=rel(1.5)),
39 |           axis.text=element_text(size=rel(2)),axis.title=element_text(size=rel(2)))
40 |   print(p)
41 |   
42 |   invisible(L)
43 | } 
44 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/uveal/seerEAR/fig2Ealone.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment 
 2 | library(tidyverse)
 3 | load("data/D1.RData")
 4 | 
 5 | t=seq(0,40,.01)
 6 | mylin=function(t,m1=0.0011,t1=14.4,t2=26.8) {
 7 |   y=ifelse(t<t1,m1*t,0)
 8 |   y=ifelse((t>=t1)&(t<t2),m1*t1-(m1*t1/(t2-t1))*(t-t1),y)
 9 |   y=ifelse(t>=t2,0,y)
10 |   y
11 | }
12 | mygam=function(t,c=-0.37849645,k=0.39624240) k^2*(t/2)*exp(c-k*t)
13 | pd=tibble(t,Lines=mylin(t),Gamma=mygam(t),Sum=Lines+Gamma)
14 | pd=pd%>%gather(key="func",value="y",-t)
15 | 
16 | pS=pd%>%filter(func=="Sum")
17 | pS=pS%>%mutate(cy=cumsum(0.01*y),y=1-exp(-cy),Method="Excess Absolute Risk")
18 | 
19 | library(bbmle)#O=EAR*PY+E
20 | (s=summary(mod<-mle2(O~dpois(lambda=(mygam(t,c,k)+mylin(t,m1,t1,t2))*PY+E),  
21 |                      method="Nelder-Mead",
22 |                      start=list(c=0,k=1,m1=0.002,t1=10,t2=20),data=D1,
23 |                      control=list(maxit=10000))))
24 | library(broom)
25 | mkCIm=function(est,sd) paste0(sprintf('%.4f',est)," (",
26 |                               sprintf('%.4f',est-1.96*sd),", ",
27 |                               sprintf('%.4f',est+1.96*sd),")") 
28 | (params=tidy(mod)%>%mutate(CI=mkCIm(estimate,std.error)))
29 | 
30 | library(WriteXLS)
31 | WriteXLS(list(Table5=params%>%select(Parameter=term,  
32 |                                      "Estimate (95% CI)"=CI,
33 |                                      "P Value"=p.value)),
34 |          ExcelFileName="outs/TableModParams.xlsx",AdjWidth=T) 
35 | 
36 | pd=pd%>%mutate(comp=ifelse(func=="Lines","Triangle",func),func=as_factor(comp))
37 | 
38 | gp=geom_point() 
39 | ge=geom_errorbar(aes(ymin=LL,ymax=UL),width=0.2)#for absolute risks
40 | ltb=theme(legend.margin=margin(0,0,0,0),legend.title=element_blank())
41 | ltp=theme(legend.position="top")
42 | tc=function(sz) theme_classic(base_size=sz)
43 | svts=scale_x_continuous(breaks=seq(0,35,5),limits=c(0,38))
44 | gx=xlab("Years Since Diagnosis")
45 | gy=ylab("Excess Absolute Risk of Mortality")
46 | gh0=geom_hline(yintercept=0)
47 | 
48 | D1%>%ggplot(aes(x=t,y=EAR))+gp+gx + ge+ gy+
49 |   geom_line(aes(x=t,y=y,col=func),data=pd)+
50 |   tc(14)+ltb+ltp
51 | ggsave("outs/EARfig2Ealone.pdf",width=4,height=3)
52 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/SEERaBomb-package.Rd:
--------------------------------------------------------------------------------
 1 | \name{SEERaBomb-package}
 2 | \alias{SEERaBomb-package}
 3 | \docType{package}
 4 | \title{ SEER and A-Bomb Data Analysis Tools }
 5 | \description{Creates SEER (Surveillance, Epidemiology and End Results) and Japanese A-bomb survivor data binaries 
 6 |              from ASCII sources and provides tools for estimating SEER second cancer risks.     
 7 | Depicted below, \code{mkSEER}
 8 | merges cancers and populations of all three of the SEER databases into single cancer and 
 9 | population data frames.  
10 | 
11 | %\if{html}{\figure{mkSEERfig2.png}{options: width=420, height=340}}
12 | \if{html}{\figure{mkSEERfig.png}{options: width=420}}
13 | \if{latex}{\figure{mkSEERfig.png}{options: width=5in}}
14 | 
15 | SEER data field positions and names change over the years and the original purpose of  
16 | SEERaBomb was to buffer/protect R scripts from such changes. A second purpose was to speed up SEER data computations by 
17 | reducing the data [via \code{pickFields()}] to only fields of interest. SEERaBomb now has an additional purpose:
18 | estimating relative risks of SEER second cancers after diagnoses of first cancers, using all three SEER databases.
19 | Note: SEER no longer includes radiation therapy data by default. Users must thus obtain custom SEER treatment data  
20 | \url{https://seer.cancer.gov/data/treatment.html}.
21 | }
22 | 
23 | \details{
24 | \tabular{ll}{
25 | Package: \tab \pkg{SEERaBomb}\cr
26 | Type: \tab Package\cr
27 | Depends: \tab dplyr,  ggplot2, rgl,   demography\cr
28 | Suggests: \tab bbmle \cr
29 | License: \tab GPL-2\cr
30 | LazyData: \tab yes\cr
31 | URL: \tab \url{http://epbi-radivot.cwru.edu/SEERaBomb/SEERaBomb.html}\cr
32 | }
33 | }
34 | 
35 | 
36 | 
37 | \references{    Surveillance, Epidemiology, and End Results (SEER) Program
38 |      (www.seer.cancer.gov) Research Data (1973-2015), National Cancer
39 |      Institute, DCCPS, Surveillance Research Program, Surveillance Systems Branch,
40 |      released April 2018, based on the November 2017 submission.}
41 | 
42 | \author{ Tom Radivoyevitch (\email{radivot@ccf.org}) }
43 | \seealso{\code{\link{getFields},\link{pickFields},\link{mkSEER},\link{mkSEERold},\link{mkAbomb} }}
44 | 
45 | 
46 | \keyword{package}
47 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/tsd.Rd:
--------------------------------------------------------------------------------
 1 | \name{tsd}
 2 | \alias{tsd}
 3 | \title{Compute RR vs tsd}
 4 | \description{Computes relative risks (RR) over specified times since diagnoses (tsd) of a 1st cancer. 2D spline fits are used to produce expected cases E controlling for background risk dedepence on 
 5 | age and calendar year.  RR is then O/E where O is the number of observed cases.
 6 | WARNING: Use of this function is deprecated, please use csd() instead.}
 7 | \usage{tsd(seerSet,brks,trts=NULL,PYLong=FALSE,firstS="all") } % ,outDir="~/Results", txt=NULL)} 
 8 | \arguments{
 9 |   \item{seerSet}{A seerSet object produced by mk2D().  }
10 |   \item{brks}{Vector of breaks in years used to form tsd intervals/bins. }
11 |   \item{trts}{Character vector of treatments of interest. Default of NULL => all levels in seerSet's canc$trt. }
12 |   \item{PYLong}{Set true if in addition to O and E for each tsd interval you also want PY strips
13 |              for each individual; having these big dataframes slows saving seerSets, so only fetch if needed.}
14 |   \item{firstS}{Character vector of first cancers of interest. Default of "all" sets it to the vector of all cancers
15 |                 in the seerSet field cancerS, which is created when the object is first created by seerSet().}
16 | }
17 | 
18 | \value{The input with an L component added to it or extended it if it already existed. Each component of
19 | L is a nested list of lists that can yield
20 | second cancer relative risks as a function of time since diagnosis and different first cancers and if they 
21 | were irradiated or not. 
22 |      }
23 | 
24 | \note{ This function was developed with support from the Cleveland Clinic Foundation.}
25 | \author{Tom Radivoyevitch (radivot@ccf.org)}
26 | \seealso{\code{\link{SEERaBomb-package}, \link{mk2D},\link{seerSet}} }
27 | 
28 | \examples{
29 | \dontrun{
30 | library(SEERaBomb)
31 | pm=simSeerSet()
32 | pm=mk2D(pm) 
33 | mybrks=c(0,1,5,10)
34 | pm=tsd(pm,brks=mybrks,trts=c("noRad","rad"),PYM=TRUE)
35 | (lab=paste0("b",paste(mybrks,collapse="_")))
36 | LM=pm$L[[lab]]$'rad'
37 | names(LM)
38 | LM$PyM
39 | LM$Obs
40 | LM$Exp
41 | table(LM$PyM$`(0,1]`$cancer2)
42 | table(LM$PyM$`(1,5]`$cancer2)
43 | table(LM$PyM$`(5,10]`$cancer2)
44 | }
45 | }
46 | 
47 | \keyword{internal}
48 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/leukemiaRes2016/timeCrs.R:
--------------------------------------------------------------------------------
 1 | # timeCrs.R   (CLL RR time courses after Non-Hematological first cancers)
 2 | rm(list=ls()) 
 3 | library(SEERaBomb) 
 4 | library(reshape2)
 5 | library(dplyr)  
 6 | library(grid)
 7 | load("~/Results/CLL/pm.RData") 
 8 | load("~/Results/CLL/pf.RData") 
 9 | brks=c(0,.1,.2,.3,0.6,1,1.5,2,2.5,3,4,5,7,10,13,16,20) # res back up since now only CLL
10 | (brkS=paste0("b",paste(brks,collapse="_")))
11 | HM=c("AML","AMLti","APL","MDS","CMML","CML","MPN","ALL","CLL","HCL","OL","NHL","MM","HL")
12 | dm=mkDF(pm,brkS)
13 | df=mkDF(pf,brkS)
14 | df=df[!df$cancer1%in%HM,]  # exclude non-heme first cancers
15 | dm=dm[!dm$cancer1%in%HM,]
16 | paste(levels(df$int),collapse=", ") # to paste intervals into Figure caption
17 | # sum observed and expected cases across all non-heme first cancer types
18 | f=df%>%group_by(trt,int)%>%summarize(O=sum(O),E=sum(E),t=mean(t,na.rm=T),sex="Female")
19 | m=dm%>%group_by(trt,int)%>%summarize(O=sum(O),E=sum(E),t=mean(t,na.rm=T),sex="Male")
20 | D=rbind(f,m)
21 | (D=D%>%mutate(RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E)))
22 | D$Radiation="No" # update treatment names for legend
23 | D$Radiation[D$trt=="rad"]="Yes"
24 | D$Period="Early"  # make two separate time scales for plotting
25 | D$Period[D$t>5]="Late"
26 | graphics.off()
27 | quartz(width=6.5,height=3.8)
28 | # theme_set(theme_bw())
29 | theme_update(legend.position = c(.80, .35))
30 | theme_update(axis.text=element_text(size=rel(1.4)),
31 |              axis.title=element_text(size=rel(1.4)),
32 |              legend.title=element_text(size=rel(1.1)),
33 |              legend.text=element_text(size=rel(1.1)),
34 |              strip.text = element_text(size = rel(1.4)))
35 | g=qplot(x=t,y=RR,data=D,col=Radiation,geom=c("line","point"),
36 |         xlab="Years Since Dx of Non-Hematologic 1st Cancer",ylab="CLL 2nd Cancer Relative Risk")
37 | g=g+scale_y_log10(breaks=c(0.3,1,10),labels=c("0.3","1","10"),limits=c(0.2,20))
38 | g=g+facet_grid(sex~Period,scales="free")+geom_abline(intercept=0, slope=0)
39 | # g = g + scale_color_grey(start = 0, end = 0.6)
40 | g=g+  geom_errorbar(aes(ymin=rrL,ymax=rrU,width=.15))+ theme(legend.key.height=unit(.45, "cm"))
41 | print(g)
42 | ggsave("~/Results/CLL/timeCrs.eps") 
43 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/summary.seerSet.R:
--------------------------------------------------------------------------------
 1 | summary.seerSet<-function(object, ...) {
 2 |   age=rad=trt=race=surv=year=py=popsa=cancer=NULL 
 3 |   cf=function (x) comma_format()(x)
 4 |   # object=pf; library(reshape2,scales)
 5 |   object$canc=object$canc%>%tidyr::separate(trt,c("rad","chemo"),sep="[\\.]",fixed=T)
 6 |   D=object$canc%>%filter(rad!="unk")%>%group_by(cancer,rad)%>%
 7 |     summarize(n=n(),age=round(mean(age),1),surv=round(median(surv,na.rm = TRUE),1)) #,
 8 | #               seq=mean(ifelse(seqnum==0,1,seqnum)) ) #%>%filter(n>9)
 9 |   P=object$popsa%>%group_by(year)%>%summarize(PY=round(sum(py)/1e6,1))
10 |   A=dcast(D,cancer~rad,value.var="age")
11 |   S=dcast(D,cancer~rad,value.var="surv")
12 |   #   Sq=dcast(D,cancer~trt,value.var="seq")
13 |   N=dcast(D,cancer~rad,value.var="n")
14 |   #   N=dcast(D,cancer~trt,value.var="n",margins=c("cancer"),fun.agg=sum)
15 |   d=left_join(N,A,by="cancer")
16 |   d=left_join(d,S,by="cancer")
17 |   names(d)=c("Cancer",paste(rep(c("Count","Age","Survival"),each=2),c("noRad","rad"),sep="."))
18 |   seerSetSum=NULL
19 |   seerSetSum$title=paste0("              Counts, Means of Ages, and Median Survivals in Years\n               Sex: ",
20 |                           object$sex,"    Race: ",object$race,
21 |                           "   Years: ",min(object$popsa$year),"-",max(object$popsa$year) ,"\n")
22 |   Cnts=c(total=dim(object$canc)[1],
23 |          unkTrt=dim(object$canc%>%filter(rad=="unk"))[1],
24 |          unkTrtNsurv=dim(object$canc%>%filter(rad=="unk",is.na(surv)))[1],
25 |          unkSurv=dim(object$canc%>%filter(is.na(surv)))[1])
26 |   seerSetSum$cnts=Cnts
27 |   seerSetSum$sex=object$sex
28 |   
29 |   seerSetSum$notes=c(paste("Of",cf(Cnts["total"]),"total",object$sex,"cases of",object$race,"race,",cf(Cnts["unkTrt"]),"with unknown treatment were not included."),
30 |                      paste("Of",cf(Cnts["unkSurv"]),"cases with unknown survival,",cf(Cnts["unkTrtNsurv"]),
31 |                            "were excluded due to also having unknown treatment."),
32 |                      "In 2005, due to hurricane Katrina some PY (and cases) are kept in a separate database not used here.")
33 |   
34 |   seerSetSum$P=P
35 |   seerSetSum$d=d
36 |   class(seerSetSum)="seerSet.summary"
37 |   seerSetSum
38 | } 
39 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/seerSet2.R:
--------------------------------------------------------------------------------
 1 | seerSet2<-function(canc,popsa,Sex, Race="pooled",ageStart=15,ageEnd=85) {
 2 |   # gimic to get rid of unwanted notes in R CMD check
 3 |    agedx=age=age86=yrdx=sex=race=surv=yrbrth=py=year=NULL 
 4 |   
 5 |   if (!"age"%in%names(popsa)) { #assume first time here
 6 |     # so rename age86 as age to help plotting and joining later
 7 |     popsa=popsa%>%mutate(age=age86)%>%select(-age86) 
 8 |   }
 9 |   
10 |   if (!"age"%in%names(canc)) { #assume  first time here
11 |     # let year be yrdx as a whole integer to free it to become a real
12 |     canc=canc%>%mutate(year=yrdx) 
13 | #when the next 2 mutations were in one call, yrdx would occasionally overwrite surv ... very weird.
14 |     canc=canc%>%mutate(yrdx=yrdx+0.5) 
15 |     canc=canc%>%mutate(surv=round((surv+0.5)/12,3))%>%  
16 |       mutate(age=agedx+0.5) #convert ages at diagnosis to best guesses
17 |     canc=canc%>%select(-agedx) 
18 |   }  
19 |   
20 | #   canc=canc%>%filter(age>=(ageStart+0.5),age<(ageEnd+0.5),sex==Sex)
21 |   canc=canc%>%filter(age>=ageStart,age<ageEnd,sex==Sex)
22 |   popsa=popsa%>%filter(age>=ageStart,age<ageEnd,sex==Sex)
23 |   if (Race!="pooled") {
24 |     canc=canc%>%filter(race==Race)
25 |     popsa=popsa%>%filter(race==Race) 
26 |   }
27 |   canc$cancer=factor(canc$cancer) # get rid of any opposite sex cancer type levels
28 |   cancerS=levels(canc$cancer)
29 |   
30 |   if ("age86"%in%names(canc)) canc=canc%>%select(-age86) 
31 |   if ("sex"%in%names(canc)) canc=canc%>%select(-sex) 
32 |   if ("race"%in%names(canc)) canc=canc%>%select(-race) 
33 |   if ("yrbrth"%in%names(canc)) canc=canc%>%select(-yrbrth)  
34 |   # note: modx was removed from canc above
35 |   
36 |   # if ("sex"%in%names(popsa)) popsa=popsa%>%select(-sex)
37 |   # if ("race"%in%names(popsa)) popsa=popsa%>%select(-race)
38 | 
39 |   if ("sex"%in%names(popsa)) popsa$sex=NULL
40 |   if ("race"%in%names(popsa)) popsa$race=NULL
41 |   
42 |   
43 |   popsa=popsa%>%group_by(age,year)%>%summarize(py=sum(py))
44 |   
45 |   # and package it all up
46 |   seerSet=list(canc=canc,popsa=popsa,sex=Sex,race=Race,ageStart=ageStart,ageEnd=ageEnd,cancerS=cancerS,yearEnd=max(popsa$year))
47 |   class(seerSet)="seerSet"
48 |   seerSet
49 | } # return a list that can be attached or with-ed in other functions
50 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/attic/seerLeu.R:
--------------------------------------------------------------------------------
 1 | rm(list=ls())  
 2 | library(SEERaBomb);library(tidyverse);library(ggsci)
 3 | load("~/data/SEER/mrgd/cancDef.RData") 
 4 | canc$cancer=fct_collapse(canc$cancer,AML=c("AML","AMLti","APL"))
 5 | load("~/data/SEER/mrgd/popsae.RData") 
 6 | leus=c("AML","ALL","CML")
 7 | d=incidSEER(canc,popsae,leus)
 8 | d=d%>%filter(age<=85,year>=2000)
 9 | d=d%>%mutate(ageG=cut(age,seq(0,85,5)))
10 | d=d%>%group_by(cancer,ageG)%>%
11 |   summarize(age=mean(age),py=sum(py),n=sum(n))%>%
12 |   mutate(incid=n/py,grp="Background")
13 | d=d%>%select(cancer,grp,everything(),-ageG)#reorder columns
14 | NHM=c("breast","thyroid","brain","renal")#NHM=non-heme malignancy
15 | brksa=c(0,40,50,60,70,80)#broad 1st interval avoids 0 CML groups
16 | system.time(ra<-riskVsAge(canc,firstS=NHM,secondS=leus,brksa=brksa))#~15s
17 | raCML=riskVsAge(canc,firstS=c("AML","ALL"),secondS="CML",brksa=brksa)
18 | D=bind_rows(ra,raCML)
19 | D=D%>%filter(rad!="Unk",chemo!="Unk")
20 | D=D%>%group_by(cancer2,rad,chemo,age)%>%
21 |   summarize(py=sum(py),n=sum(o),incid=n/py)
22 | D=D%>%rename(cancer=cancer2)%>%unite(grp,rad,chemo,sep=", ")
23 | dd=bind_rows(D,d)
24 | ord=c("Rad, Chemo","No Rad, Chemo","Rad, No Chemo",
25 |       "No Rad, No Chemo","Background")
26 | dd$grp=factor(dd$grp,levels=ord)
27 | dd$cancer=factor(dd$cancer,levels=c("AML","ALL","CML"))
28 | myt=theme(legend.position=c(.25,.22),legend.title=element_blank(),
29 |           legend.direction="vertical",legend.margin=margin(0,0,0,0),
30 |           legend.key.height=unit(.65,'lines'),strip.background=element_blank())
31 | ggplot(dd,aes(x=age,y=incid,col=grp))+geom_line()+facet_grid(~cancer)+
32 |   xlab("Attained Age (Years)")+scale_x_continuous(breaks=c(0,25,50,75))+
33 |   ylab(quote(paste("Cases per ",10^5," Person-Years")))+scale_color_jco()+
34 |   scale_y_log10()+coord_cartesian(ylim=c(0.01,40))+theme_classic()+myt
35 | ggsave("~/Results/tutorial/seerLeu.pdf",width=3.5,height=2.5)#Fig.5
36 | 
37 | # dd=dd%>%mutate(LL=qpois(0.025,n)/py,UL=qpois(0.975,n)/py) #look at some CI
38 | # dd%>%filter(cancer=="CML",grp%in%c("Rad, Chemo","Rad, No Chemo"))%>%arrange(age)
39 | # dd%>%filter(cancer=="AML",grp%in%c("Rad, Chemo","No Rad, Chemo"))%>%arrange(age)
40 | # dd%>%filter(cancer=="ALL",grp%in%c("Rad, Chemo","No Rad, Chemo"))%>%arrange(age)
41 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/seerSet.R:
--------------------------------------------------------------------------------
 1 | seerSet<-function(canc,popsa,Sex, Race="pooled",ageStart=15,ageEnd=85) {
 2 |   # gimic to get rid of unwanted notes in R CMD check
 3 |    agedx=age=age86=yrdx=sex=race=surv=modx=yrbrth=py=year=NULL 
 4 |   
 5 |   if (!"age"%in%names(popsa)) { #assume first time here
 6 |     # so rename age86 as age to help plotting and joining later
 7 |     popsa=popsa%>%mutate(age=age86)%>%select(-age86) 
 8 |   }
 9 |   
10 |   if (!"age"%in%names(canc)) { #assume  first time here
11 |     # let year be yrdx as a whole integer to free it to become a real
12 |     canc=canc%>%mutate(year=yrdx) 
13 | #when the next 2 mutations were in one call, yrdx would occasionally overwrite surv ... very weird.
14 |     canc=canc%>%mutate(yrdx=round(yrdx+(modx-0.5)/12,3))    #modx=1=January 
15 |     canc=canc%>%mutate(surv=round((surv+0.5)/12,3))%>%  
16 |       select(-modx)%>%
17 |       mutate(age=agedx+0.5) #convert ages at diagnosis to best guesses
18 |     canc=canc%>%select(-agedx) 
19 |   }  
20 |   
21 | #   canc=canc%>%filter(age>=(ageStart+0.5),age<(ageEnd+0.5),sex==Sex)
22 |   canc=canc%>%filter(age>=ageStart,age<ageEnd,sex==Sex)
23 |   popsa=popsa%>%filter(age>=ageStart,age<ageEnd,sex==Sex)
24 |   if (Race!="pooled") {
25 |     canc=canc%>%filter(race==Race)
26 |     popsa=popsa%>%filter(race==Race) 
27 |   }
28 |   canc$cancer=factor(canc$cancer) # get rid of any opposite sex cancer type levels
29 |   cancerS=levels(canc$cancer)
30 |   
31 |   if ("age86"%in%names(canc)) canc=canc%>%select(-age86) 
32 |   if ("sex"%in%names(canc)) canc=canc%>%select(-sex) 
33 |   if ("race"%in%names(canc)) canc=canc%>%select(-race) 
34 |   if ("yrbrth"%in%names(canc)) canc=canc%>%select(-yrbrth)  
35 |   # note: modx was removed from canc above
36 |   
37 |   # if ("sex"%in%names(popsa)) popsa=popsa%>%select(-sex)
38 |   # if ("race"%in%names(popsa)) popsa=popsa%>%select(-race)
39 | 
40 |   if ("sex"%in%names(popsa)) popsa$sex=NULL
41 |   if ("race"%in%names(popsa)) popsa$race=NULL
42 |   
43 |   
44 |   popsa=popsa%>%group_by(age,year)%>%summarize(py=sum(py))
45 |   
46 |   # and package it all up
47 |   seerSet=list(canc=canc,popsa=popsa,sex=Sex,race=Race,ageStart=ageStart,ageEnd=ageEnd,cancerS=cancerS,yearEnd=max(popsa$year))
48 |   class(seerSet)="seerSet"
49 |   seerSet
50 | } # return a list that can be attached or with-ed in other functions
51 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/uveal/attic/other/EyeTables.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | library(tidyverse);library(SEERaBomb)
 3 | library(WriteXLS)
 4 | load("~/data/SEER/mrgd/cancPrim.RData")#load SEER cancer data
 5 | head(canc,2)
 6 | (codes=paste0("C",690:699))
 7 | d=canc%>%filter(primsite%in%codes,histo3%in%8720:8790) 
 8 | d$cancer=as.character(d$cancer)
 9 | # C69.3 (choroid), C69.4 (ciliary body and iris), and C69.2 (retina). 
10 | d$cancer[d$primsite=="C690"]="C690-Conjunctiva"
11 | d$cancer[d$primsite=="C691"]="C691-Cornea"
12 | d$cancer[d$primsite=="C692"]="C692-Retinal"
13 | d$cancer[d$primsite=="C693"]="C693-Choroid"
14 | d$cancer[d$primsite=="C694"]="C694-Ciliary"
15 | d$cancer[d$primsite=="C695"]="C695-Lacrimal"
16 | d$cancer[d$primsite=="C696"]="C696-Orbital"
17 | d$cancer[d$primsite=="C698"]="C698-Overlap"
18 | d$cancer[d$primsite=="C699"]="C699-NOS"
19 | (tb1=d%>%group_by(cancer)%>%summarize(cases=n())%>%mutate(percent=100*cases/sum(cases)))
20 | tb1=add_row(tb1,cancer="Total",cases=sum(tb1$cases),percent=sum(tb1$percent))
21 | names(tb1)=c("Cancer","Cases","Prct")
22 | WriteXLS(tb1,ExcelFileName="uveal/outs/tb1.xlsx",AdjWidth=T)
23 | 
24 | dh=d%>%mutate(yrgrp=ifelse(yrdx<2014,"1975-2013","2014-2016"))
25 | table(dh$yrgrp)
26 | (tb2a=dh%>%group_by(cancer,yrgrp)%>%summarize(nO3=n())%>%mutate(propO3=nO3/sum(nO3)))
27 | (tb2a=tb2a%>%select(-propO3)%>%spread(yrgrp,nO3))
28 | 
29 | (tb2b=dh%>%group_by(cancer,yrgrp,db)%>%summarize(nO3=n())%>%mutate(propO3=nO3/sum(nO3)))
30 | (tb2b=tb2b%>%select(-propO3)%>%spread(db,nO3))
31 | WriteXLS(list(Table2a=tb2a,Table2b=tb2b),ExcelFileName="uveal/outs/tb2.xlsx",AdjWidth=T)
32 | 
33 | ########### toying below
34 | d=canc%>%filter(cancer=="eye") #17480 # mapCancs # eye is ICD9 190.0 to 190.9
35 | d$cancer=as.character(d$cancer)
36 | d$cancer[d$primsite=="C690"]="Conjunctiva"
37 | d$cancer[d$primsite=="C691"]="Cornea"
38 | d$cancer[d$primsite=="C692"]="Retinal"
39 | d$cancer[d$primsite=="C693"]="Choroid"
40 | d$cancer[d$primsite=="C694"]="Ciliary"
41 | d$cancer[d$primsite=="C695"]="Lacrimal"
42 | d$cancer[d$primsite=="C696"]="Orbital"
43 | d$cancer[d$primsite=="C698"]="Overlap"
44 | d$cancer[d$primsite=="C699"]="NOS"
45 | table(d$cancer) # lots of retinals
46 | d=d%>%filter(histo3%in%8720:8790) 
47 | table(d$cancer)  # all lost. Use SEER*stat to find out who they are
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/REB2014/baseGraphics/CMLwait.R:
--------------------------------------------------------------------------------
 1 | # Figure 2 of active MS  (differs from Blood Fig 4 only in computing mean T and M/F)
 2 | aBombHome="/data/abomb"
 3 | cols=c("city","sex","doseg","agexg","calg","kerma","PY","adjPY","num.entering",
 4 | 		"age","agex","tsx","cal","sv","gam","neut","lymphoma","NHL","leukemia","AML","ALL","CML","ATL","MM")      
 5 | d<-read.table(file.path(aBombHome,"hema87.dat"), header=F,col.names=cols);
 6 | d=d[d$adjPY>0,] #remove two recs with zero py
 7 | d=d[d$kerma==1,] # take only kerma < 4 Gy
 8 | d$py=10^4*d$adjPY
 9 | m=d[d$sex==1,]; f=d[d$sex==2,] 
10 | years=c(1951,1953,1956,1959,1963,1968,1973,1978,1983,1987)-1945
11 | agem=0
12 | flin<-function(x,df) {
13 | 	c0=x[1];k=x[2];L=x[3:12]; # let data speak through L, like a one way anova
14 | 	with(df,{mn = exp(c0+k*(age-agem))*py + sv*exp(L[calg])*py;
15 | 				-sum(CML*log(mn) - mn)})	}
16 | 
17 | X0=c(c0=ifelse(agem==0,-13,-10),k=0.05,rep(-10,10))
18 | solm=optim(X0,flin,df=m,method="L-BFGS-B",hessian=TRUE,control=list(maxit=400))  
19 | waitm=exp(solm$par[3:12])
20 | solf=optim(X0,flin,df=f,method="L-BFGS-B",hessian=TRUE,control=list(maxit=400))  
21 | waitf=exp(solf$par[3:12])
22 | graphics.off()
23 | windows(width=5,height=5)
24 | par(mfrow=c(1,1),mar=c(4.7,0,2.3,0.2),lwd=3,cex.lab=1.8,cex.axis=1.7,cex.main=1.7,oma=c(0,4.5,1,0.7))
25 | plot(years,waitm,cex=2,pch=1,xlab="Years Since Exposure",ylab="",yaxt="n",col="blue",ylim=c(0,8.2e-4))
26 | points(years,waitf,pch=2,cex=2,col="red")
27 | legend(25,8.6e-4,c("Males","Females"),pch=1:2,col=c("blue","red"),cex=1.8,bty="n")
28 | mtext(expression(paste("Cases per ",10^4," Person-Year-Sv")),side=2,line=2.6,cex=1.8,outer=T)
29 | axis(side=2,las=1, at=c(0,2e-4,4e-4,6e-4,8e-4),labels=c(0,2,4,6,8),outer=T)
30 | title("IR-to-CML Latency",outer=T,line=-1)
31 | print(sum(waitm)/sum(waitf)); print(MovF<-format(sum(waitm)/sum(waitf),digits=2))
32 | pm=waitm/sum(waitm)
33 | pf=waitf/sum(waitf)
34 | print(taum<-sprintf("%s yrs",format(years%*%pm,digits=3)))
35 | print(tauf<-sprintf("%s yrs",format(years%*%pf,digits=4)))
36 | mtext(bquote(tau[m] == .(taum)),side=3,line=-6,cex=1.5,adj=.9,col="blue")
37 | mtext(bquote(tau[f] == .(tauf)),side=3,line=-7.5,cex=1.5,adj=.9,col="red")
38 | mtext(bquote(M/F == .(MovF)),side=3,line=-9.3,cex=1.3,adj=.9)
39 | par(mar=c(5.1,4.1,4.1,2.1),oma=c(0,0,0,0)) # reset to standards


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/uveal/attic/seerEARmort/survByAgeG.R:
--------------------------------------------------------------------------------
 1 | source("uveal/common/setup.R")
 2 | # 2.  can we tell from SEER data that people in cured fraction were young and
 3 | # had smaller tumors?   #TR could be artifact of being young to live along enough 
 4 | # 4. Is old age poor prognosis (usually accepted to be so) is an
 5 | # artefact; Is there a difference between young and old (and other groups of
 6 | # SEER) that is statistcally significant ??
 7 | (d6=d%>%select(yrdx,agedx,sex,surv,status,CODS))
 8 | (d6=d6%>%mutate(status=ifelse((status==1)&(!CODS%in%c("melanoma","eye")),0,status))%>%select(-CODS))
 9 | d6=d6%>%mutate(ageG=cut(agedx,c(0,20,40,50,60,70,80,90,100)))
10 | table(d6$ageG)
11 | fit6=survfit(Surv(surv,status)~ageG,data=d6) 
12 | ggsurvplot(fit6,d6,legend.title="",legend.labs=levels(d6$ageG),legend=c(.2,.3))+gx 
13 | ggsave("uveal/outs/kmMFSageG.pdf",width=4.5,height=4.5)
14 | 
15 | library(cmprsk)
16 | (d=d%>%select(yrdx,agedx,sex,surv,status,CODS))
17 | (D2=d%>%mutate(status=ifelse((status==1)&(!CODS%in%c("melanoma","eye")),2,status)))
18 | D2=D2%>%mutate(ageG=cut(agedx,c(0,20,40,50,60,70,80,90,100)))
19 | fitc=with(D2,cuminc(surv,status,ageG))
20 | labs=c("Death by Melanoma","Death by Other Causes")
21 | plot(fitc,curvlab=labs,xlab="Years Since Diagnosis",ylab="Cumulative Probability of Death")
22 | (g=ggcompetingrisks(fitc)+sbb+gx+ylab("Cumulative Probability of Death")+
23 |     scale_color_manual(labels = labs, values = c("#F8766D", "#00BFC4")) +
24 |     theme(legend.position = "top",legend.title=element_blank()) +labs(title="",caption=""))
25 | ggsave("uveal/outs/cumIncAgeG.pdf",width=4.5,height=4.5)
26 | str(fitc)
27 | fitc[[1]]
28 | L=fitc[1:16]
29 | lapply(L,function(x) x[["est"]])
30 | sapply(L,function(x) tail(x$est,1))[1:8]
31 | 
32 | ##### OS plots give typical dependence
33 | range(d5$agedx)
34 | d5=d5%>%mutate(ageG=cut(agedx,c(0,20,40,50,60,70,80,90,100)))
35 | table(d5$ageG)
36 | fit5=survfit(Surv(surv,status)~ageG,data=d5) 
37 | ggsurvplot(fit5,d5,legend.title="",legend.labs=levels(d5$ageG))+gx
38 | ggsave("uveal/outs/kmOSageG.pdf",width=4.5,height=4.5)
39 | 
40 | 
41 | ddT=simSurv(d5,mrt,rep=1,ltb=NULL,unif=TRUE)
42 | ddT=ddT%>%mutate(ageG=cut(agedx,c(0,20,40,50,60,70,80,90,100)))%>%filter(type=="Simulated")
43 | fit=survfit(Surv(surv,status)~ageG,data=ddTs) 
44 | ggsurvplot(fit,ddT,legend.title="",legend.labs=levels(d6$ageG),xlim=c(0,43))+gx # +svts
45 | ggsave("uveal/outs/kmOSsimulatedNormals.pdf",width=4.5,height=4.5)
46 | 
47 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/leukemia2016/PYnMDSvsDBs.R:
--------------------------------------------------------------------------------
 1 | # PYnMDSvsDBs.R   (Figure 1)
 2 | # the following was made earlier using SEERaBomb's mkSEER
 3 | load("~/data/SEER/mrgd/popsa.RData")
 4 | library(dplyr)
 5 | library(ggplot2)
 6 | (pys=popsa%>%group_by(db,year)%>%summarize(npy=sum(py)))
 7 | (pys2=pys%>%mutate(year=year+0.999)) # to make step function plots hold flat across each year
 8 | pys=rbind(pys,pys2)
 9 | graphics.off() # start fresh on plots
10 | quartz(width=7,height=5)
11 | # quartz(width=7,height=5,dpi=300)
12 | # quartz(width=3.4,height=2.5,dpi=300)
13 | # theme_set(theme_bw())
14 | theme_update(axis.title = element_text(size = rel(2.3)),
15 |              axis.text = element_text(size = rel(1.5)),
16 |              legend.position = c(.1, .16),
17 |              legend.title=element_text(size=rel(1.2)),
18 |              legend.text=element_text(size=rel(1.2)),
19 |              strip.text = element_text(size = rel(1.5)))
20 | g=ggplot(pys)+geom_ribbon(aes(x=year,ymax=npy/1e6,ymin=0,fill=db))+facet_grid(db~.) #+ theme_bw()
21 | # g = g + scale_fill_grey(start = 0.8, end = 0)
22 | g=g+labs(y=expression(paste(10^6," Person Years")),x="Year") 
23 | g+guides(fill = guide_legend("SEER\nDatabase"))+scale_x_continuous(breaks=c(1973,seq(1980,2010,by=10))) 
24 | 
25 | ggsave("~/Results/amlMDS/fig1A.pdf")
26 | # ggsave("~/Results/amlMDS/fig1A.png")
27 | # ggsave("~/Results/amlMDS/fig1A.eps")
28 | 
29 | load("~/data/SEER/mrgd/cancDef.RData") #loads in canc
30 | mds=canc%>%filter(cancer=="MDS")
31 | mds=mds%>%filter(yrdx>2000) # filter out 22 scattered cases before 2001 (that may be typos, i.e. bad data)
32 | head(mds)
33 | (D=mds%>%group_by(db,yrdx)%>%summarize(cases=n()))
34 | (D2=D%>%mutate(yrdx=yrdx+0.999))
35 | D=rbind(D,D2)
36 | 
37 | # quartz(width=7,height=5,dpi=300)
38 | quartz(width=7,height=5)
39 | # quartz(width=3.4,height=2.5)
40 | theme_update(axis.title = element_text(size = rel(2.3)),
41 |              axis.text = element_text(size = rel(1.5)),
42 |              legend.position = "none",
43 |              strip.text = element_text(size = rel(1.5)))
44 | g=ggplot(D)+geom_ribbon(aes(x=yrdx,ymax=cases,ymin=0,fill=db))+facet_grid(db~.)
45 | # g = g + scale_fill_grey(start = 0.8, end = 0)
46 | g=g+labs(y="MDS Cases",x="Year")+scale_x_continuous(breaks=seq(2001,2015,by=2)) 
47 | g+scale_y_continuous(breaks=seq(0,2000,by=1000)) 
48 | ggsave("~/Results/amlMDS/fig1B.pdf")
49 | # ggsave("~/Results/amlMDS/fig1B.png")
50 | # ggsave("~/Results/amlMDS/fig1B.eps")
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/seerSet2.Rd:
--------------------------------------------------------------------------------
 1 | \name{seerSet2}
 2 | \alias{seerSet2}
 3 | \title{Join SEER cancers and PY}
 4 | \description{Creates a sex-specific list of cancer and population person year (PY) data frames, possibly specific 
 5 | to a race and interval of ages at diagnosis, without month of diagnosis. }
 6 | \usage{seerSet2(canc,popsa,Sex, Race="pooled",ageStart=15,ageEnd=85)} 
 7 | \arguments{
 8 |   \item{canc}{Data frame of cancers that includes agedx, sex, race, yrdx,surv and cancer. }
 9 |   \item{popsa}{Data frame of population PY at 1-year age resolution.  }
10 |   \item{Sex}{"Male"  or "Female". }
11 |   \item{Race}{"white", "black", "other",  or "pooled" (default).}
12 |   \item{ageStart,ageEnd}{canc and popsa will be reduced to ages in ageEnd>age>=ageStart.}
13 | }
14 | 
15 | 
16 | \details{In the output: 1) 0.5 years is added to ages at diagnosis (agedx) 
17 |   to reverse SEER flooring to integers; 2) 0.5 months is added to survival months (again, to reverse flooring) 
18 |   before dividing by 12 to convert to years; 
19 | 3) year of diagnosis integers are converted to reals by adding to them 0.5 
20 | If ageEnd>85, popsae (extended to ages up to 99) should be used as the input for popsa. 
21 | If popsa is used, the age86 column of popsa will be replaced by an age column.
22 | The age86 and yrbrth columns of a canc are not used and will be removed if they happen to be present; 
23 | users should manually remove any other columns not needed to minimize seerSet object sizes.
24 | Sex and race columns in inputs are removed from outputs as they are specified
25 | in other (scalar) seerSet elements. Also removed from canc are cancer factor levels not present for that sex. 
26 | }
27 |    
28 | \value{A list containing sex specific subsets of canc and popsa and information regarding how they were reduced. }
29 |   
30 | 
31 | \author{Tom Radivoyevitch (radivot@ccf.org)}
32 | \seealso{\code{\link{SEERaBomb-package}, \link{mk2D}, \link{plot2D}} }
33 | \examples{
34 | \dontrun{ 
35 | library(SEERaBomb)
36 | simSeerSet() # without data, a simulated seerSet
37 | #  else, with data ...
38 | load("~/data/SEER/mrgd/cancDef.RData") #load in canc
39 | load("~/data/SEER/mrgd/popsae.RData") # load in popsae
40 | # trim columns
41 | library(dplyr)  
42 | canc=canc\%>\%select(-reg,-recno,-agerec,-numprims,-COD,
43 |           -age19,-age86,-radiatn,-ICD9,-db,-histo3) 
44 | popsae=popsae\%>\%select(-reg,-db)   
45 | seerSet2(canc,popsae,Sex="male",ageStart=0,ageEnd=100) 
46 | }
47 | }
48 | 
49 | \keyword{IO}
50 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/leukemia2016/incidAllFirsts.R:
--------------------------------------------------------------------------------
 1 | # incidAllFirsts.R (Figure 2)
 2 | rm(list=ls()) 
 3 | library(SEERaBomb)
 4 | library(ggplot2)
 5 | library(dplyr)
 6 | load("~/data/SEER/mrgd/cancDef.RData")
 7 | load("~/data/SEER/mrgd/popsae.RData") 
 8 | HM=c("AML","MDS","CMML","CML","MPN","ALL","CLL","SLL","HCL","OL","NHL","MM","HL","LGL")
 9 | 
10 | graphics.off()
11 | quartz(height=4.5,width=7) 
12 | # theme_set(theme_bw())
13 | theme_update(legend.position = c(.85, .21),
14 |              axis.text = element_text(size = rel(1.5)),
15 |              plot.title = element_text(size = rel(1.7),hjust = 0.5),
16 |              axis.title = element_text(size = rel(1.5)),
17 |              legend.text = element_text(size = rel(1.5)),
18 |              legend.title = element_text(size = rel(1.5))      )
19 | 
20 | for (HSC in c(FALSE,TRUE)) {
21 |   if (HSC)
22 |     m=canc%>%filter(cancer%in%HM)%>%mutate(age=agedx+0.5)%>%
23 |       group_by(sex,age)%>%summarise(cases=n()) else
24 |         m=canc%>%filter(!cancer%in%HM)%>%mutate(age=agedx+0.5)%>%
25 |           group_by(sex,age)%>%summarise(cases=n()) 
26 |       
27 |       pops=popsae%>%group_by(sex,age)%>%summarise(py=sum(py))
28 |       head(m)
29 |       s=data.frame(sex=sort(unique(m$sex)))
30 |       pL=left_join(s,pops)
31 |       head(pL)
32 |       d=left_join(pL,m)
33 |       d=d%>%mutate(py=py/1e5,incid=cases/py)
34 |       head(d)
35 |       names(d)[1]="Sex"
36 |       levels(d$Sex)=c("Male","Female")
37 |       g=qplot(age,incid,col=Sex,data=d,
38 |               main=ifelse(HSC,"All Hematologic Cancers Combined","All Non-Hematologic Cancers Combined"),
39 |               ylab="Cases/100,000 Person-Years",
40 |               xlab="Age",log="y")+geom_line() 
41 |       # g=g+ggtitle(ifelse(HSC,"Hematological Cancers","Non-Hematological Cancers"))
42 |       # g = g + scale_color_grey(start = 0, end = 0.6)
43 |       if (HSC) {brks=c(0,25,50,75,100)} else{
44 |         g=g+ geom_vline(xintercept = c(40,50,65),col="gray")
45 |         brks=c(0,25,40,50,65,75,100)
46 |       }
47 |       g=g+scale_x_continuous(breaks=brks,labels=brks)
48 |       print(g)
49 |       if (HSC) ggsave("~/Results/amlMDS/allCancersCombVsAgeHSC.pdf") else
50 |         ggsave("~/Results/amlMDS/allCancersCombVsAge.pdf")
51 |       if (HSC) ggsave("~/Results/amlMDS/allCancersCombVsAgeHSC.png") else
52 |         ggsave("~/Results/amlMDS/allCancersCombVsAge.png")
53 | }
54 | d=d%>%group_by(age)%>%mutate(r=incid[1]/incid[2])
55 | head(d)
56 | d$r
57 | mean(d$r)
58 | 


--------------------------------------------------------------------------------
/SEERaBomb/R/canc2py.R:
--------------------------------------------------------------------------------
 1 | canc2py=function(canc,firstS,secondS) {
 2 |   COD=agedx=age=age1=agec=agedx=cancer=cancer1=cancer2=casenum=modx=NULL
 3 |   py=seqnum=sex=surv=trt=trt1=year=yrbrth=yrdiffn=yrdx=yrdx1=yrdx2=NULL
 4 |   # library(SEERaBomb)
 5 |   # load("~/data/SEER/mrgd/cancDef.RData") 
 6 |   # firstS=c("NHL","HL","MM")
 7 |   # secondS=c("AML","MDS")
 8 |   canc=canc%>%filter(cancer%in%union(firstS,secondS))
 9 |   canc=canc%>%mutate(year=yrdx) 
10 |   canc=canc%>%mutate(yrdx=round(yrdx+(modx-0.5)/12,3))    #modx=1=January 
11 |   canc=canc%>%mutate(surv=round((surv+0.5)/12,3))%>%  
12 |     mutate(age=agedx+0.5) #convert ages at diagnosis to best guesses
13 |   canc=canc%>%select(casenum,yrbrth,sex,age,seqnum,year,yrdx,surv,COD,trt,cancer) 
14 |   D2=canc%>%filter(seqnum==2,cancer%in%secondS) # D2 holds second primaries
15 |   D2$cancer=factor(D2$cancer,levels=secondS) # get rid of levels not in secondS
16 |   D0=canc%>%filter(seqnum==0,surv<200,cancer%in%firstS)
17 |   D0$cancer=factor(D0$cancer,levels=firstS) # get rid of levels not in firstS. 
18 |   # head(D0,2)
19 |   D1=canc%>%filter(seqnum==1,cancer%in%firstS)%>%select(casenum,cancer,yrdx,age,trt,sex)  
20 |   D1$cancer=factor(D1$cancer,levels=firstS) # get rid of levels not in firstS
21 |   D1=D1%>%filter(casenum%in%D2$casenum) # reduce firsts to just those with a second in D2 
22 |   names(D1)[2:5]=c("cancer1","yrdx1","age1","trt1") #rename D1 cols so as not to join by them.
23 |   D2=D2%>%select(casenum,cancer2=cancer,yrdx2=yrdx,age2=age) # reduce D2 to cols we want to slap on 
24 |   D12=left_join(D2,D1,by="casenum") #Keeps all D2 rows, inserts missing where D1 doesn't match. 
25 |   D12=D12%>%filter(!is.na(yrdx1)) # removes firsts before 1973
26 |   D12=D12%>%mutate(yrdiffn=yrdx2-yrdx1)
27 |   D12$yrdiffn[D12$yrdiffn==0]=0.33/12 # if first and second are in the same month, assume 1/3 of a month apart
28 |   PY0=D0%>%mutate(py=surv) 
29 |   PY0=PY0%>%filter(py>0)  #get rid of py=0  rows upfront
30 |   PY0=PY0%>%mutate(agedx=age)
31 |   # PY0$yrdx=floor(PY0$yrdx)
32 |   PY0$cancer2="none"
33 |   PY0=PY0%>%select(yrdx,agedx,sex,py,cancer1=cancer,cancer2,trt)
34 |   PY12=D12%>%mutate(py=yrdiffn) 
35 |   PY12=PY12%>%filter(py>0)  #get rid of py=0 rows upfront
36 |   PY12=PY12%>%mutate(agedx=age1)
37 |    # PY12$yrdx=floor(PY12$yrdx1)
38 |   PY12$yrdx=PY12$yrdx1
39 |   PY12=PY12%>%select(yrdx,agedx,sex,py,cancer1,cancer2,trt=trt1)
40 |   d=rbind(PY0,PY12)
41 |   d$cancer2=factor(d$cancer2)
42 |   d$yeaR=d$yrdx
43 |   d$yrdx=floor(d$yrdx)
44 |   d
45 | }
46 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/attic/REB2014/baseGraphics/CMLwaitCI.R:
--------------------------------------------------------------------------------
 1 | # Figure S1: Plots the CI of the Fk estimates in Fig. 2
 2 | aBombHome="/data/abomb"
 3 | cols=c("city","sex","doseg","agexg","calg","kerma","PY","adjPY","num.entering",
 4 | 		"age","agex","tsx","cal","sv","gam","neut","lymphoma","NHL","leukemia","AML","ALL","CML","ATL","MM")      
 5 | d<-read.table(file.path(aBombHome,"hema87.dat"), header=F,col.names=cols);
 6 | d=d[d$adjPY>0,] #remove two recs with zero py
 7 | d=d[d$kerma==1,] # take only kerma < 4 Gy
 8 | d$py=10^4*d$adjPY
 9 | m=d[d$sex==1,]; f=d[d$sex==2,] 
10 | years=c(1951,1953,1956,1959,1963,1968,1973,1978,1983,1987)-1945
11 | agem=0
12 | flin<-function(x,df) {
13 | 	c0=x[1];k=x[2];L=x[3:12]; # let data speak through L, like a one way anova
14 | 	with(df,{mn = exp(c0+k*(age-agem))*py + sv*exp(L[calg])*py;
15 | 				-sum(CML*log(mn) - mn)})	}
16 | 
17 | X0=c(c0=ifelse(agem==0,-13,-10),k=0.05,rep(-10,10))
18 | solm=optim(X0,flin,df=m,method="L-BFGS-B",hessian=TRUE,control=list(maxit=400))  
19 | waitm=exp(solm$par[3:12])
20 | solf=optim(X0,flin,df=f,method="L-BFGS-B",hessian=TRUE,control=list(maxit=400))  
21 | waitf=exp(solf$par[3:12])
22 | getCI<-function(sol) {
23 | 	if (det(sol$hessian)>0) {
24 | 		sig=sqrt(diag(solve(sol$hessian)))
25 | 		print("Hessian OK") 
26 | 	} else {
27 | 		sig=Inf
28 | 		print("Hessian Singular") 
29 | 	}
30 | 	upper=signif(sol$par+1.96*sig,5)
31 | 	lower=signif(sol$par-1.96*sig,5)
32 | 	point=signif(sol$par,5)
33 | 	CI=cbind(point,lower,upper)
34 | 	CI=exp(CI) 
35 | 	row.names(CI)=1:nrow(CI)
36 | 	CI=transform(CI,li=point-lower,ui=upper-point,len=upper-lower)
37 | 	CI
38 | }
39 | (mCI=getCI(solm))
40 | (fCI=getCI(solf))
41 | 
42 | mui=mCI[3:12,"ui"]
43 | mli=mCI[3:12,"li"]
44 | mpt=mCI[3:12,"point"]
45 | 
46 | fui=fCI[3:12,"ui"]
47 | fli=fCI[3:12,"li"]
48 | fpt=fCI[3:12,"point"]
49 | 
50 | require(plotrix)
51 | windows(width=5,height=5)
52 | par(mfrow=c(1,1),mar=c(4.7,4.7,2.3,0.2),lwd=2,cex.lab=1.8,cex.axis=1.7,cex.main=1.7)
53 | plotCI(x=years,y=mpt,uiw=mui,liw=mli,ylab="",yaxt="n",col="blue",xlab="Years Since Exposure",ylim=c(0,8.2e-4),pch="x",cex=2)
54 | plotCI(x=years+.3,y=fpt,uiw=fui,liw=fli,col="red",add=T,pch="o",cex=2)
55 | axis(side=2,las=1, at=c(0,2e-4,4e-4,6e-4,8e-4),labels=c(0,2,4,6,8))
56 | mtext(expression(paste("Cases per ",10^4," Person-Year-Sv")),side=2,line=2.6,cex=1.8)
57 | mtext("Males (x)",side=3,line=.6,cex=1.8,col="blue",adj=0.05)
58 | mtext("Females (o)",side=3,line=.6,cex=1.8,col="red",adj=0.95)
59 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/CMLmysteries/thyr2CMLfig1d.R:
--------------------------------------------------------------------------------
 1 | ### thyr2CMLfig1d.R   
 2 | # source("CML/naga/common.R")#run it first directly
 3 | secs=c("CML")#second cancers of interest
 4 | (d=canc%>%filter(cancer%in%c("thyroid",secs)))
 5 | load("~/data/SEER/mrgd/popsae.RData")
 6 | pf=seerSet(d,popsae,Sex="Female");pm=seerSet(d,popsae,Sex="Male") 
 7 | pf=mk2D(pf,secondS=secs);pm=mk2D(pm,secondS=secs)
 8 | trts=c("rad.noChemo","noRad.noChemo")
 9 | pf=csd(pf,brkst=c(0,1,6,10),brksa=c(0,60),trts=trts,firstS="thyroid")
10 | pm=csd(pm,brkst=c(0,1,6,10),brksa=c(0,60),trts=trts,firstS="thyroid")
11 | DF=bind_rows(pf$DF,pm$DF)
12 | gx=xlab("Years Since Thyroid Cancer Diagnosis")
13 | gy=ylab("Relative Risk of CML")
14 | Ds=DF%>%group_by(sex,int,rad)%>%summarize(O=sum(O),E=sum(E),t=mean(t))
15 | Ds=Ds%>%mutate(RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E))
16 | Ds=Ds%>%filter(rad=="Rad")
17 | summary(m1<-mle2(O~dpois(lambda=(1+k1^3*t^2*exp(c1-k1*t))*E),
18 |                  # parameters=list(c1~sex,k1~sex),
19 |                  # parameters=list(k1~sex),
20 |                  # parameters=list(c1~sex),
21 |                  method="Nelder-Mead",start=list(c1=2.8,k1=.22),data=Ds,
22 |                  control=list(maxit=10000))) 
23 | pd=data.frame(t=seq(0,15,0.1),E=100)
24 | (ds=data.frame(sex=c("Male","Female")))
25 | pd=merge(ds,pd)
26 | pd$Err=predict(m1,pd)/pd$E
27 | Ds%>%ggplot(aes(x=t,y=RR,shape=sex))+
28 |   geom_point(size=3)+
29 |   gx+gy+gh+tc(14)+jco+sbb+ltb+gp+gl+geRR+cc+
30 |   geom_line(aes(y=Err),size=.5,alpha=.4,data=pd)+
31 |   theme(legend.position=c(.62,.75),legend.title=element_blank())
32 | ggsave("~/Results/CML/thyr2CMLfig2B.pdf",width=4,height=3)
33 | #  END Figure 1. Plot of thyroid cancer to CML kinetics 
34 | #  Begin Calculation of latency of fitted curve
35 | (cf=coef(summary(m1)))
36 | (m1=round(cbind(point=cf[,1],LL=cf[,1]-1.96*cf[,2],UL=cf[,1]+1.96*cf[,2]),2))
37 | (P=signif(cf[,4],2))
38 | parms=row.names(cf)
39 | (str=str_c(parms," = ",m1[,1]," (",m1[,2],", ",m1[,3],"), P = ",P,collapse="; "))
40 | del=0.1
41 | t=seq(0,100,del)
42 | k=0.49
43 | p=k^3*t^2*exp(-k*t)/2
44 | sum(p)*del  #prove we have a prob density
45 | p=k^3*t^3*exp(-k*t)/2
46 | sum(p)*del  # mean of the density with k at its mean is 6.1 y
47 | k=0.36
48 | p=k^3*t^3*exp(-k*t)/2
49 | sum(p)*del  # mean with k at its lower limit => latency upper limit
50 | k=0.62
51 | p=k^3*t^3*exp(-k*t)/2
52 | sum(p)*del  # mean with k at its upper limit => latency lower limit
53 | # END calculations of latency of smooth curve in Figure 1
54 | 
55 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/uveal/attic/other/UVcsd.R:
--------------------------------------------------------------------------------
 1 | graphics.off();rm(list=ls())#clear plots and environment
 2 | library(tidyverse);library(SEERaBomb);library(ggsci)
 3 | library(survival);library(survminer);library(bbmle)
 4 | load("~/data/SEER/mrgd/cancPrim.RData")#load SEER cancer data
 5 | load("~/data/SEER/mrgd/popsae.RData")
 6 | (codes=paste0("C",692:694))
 7 | canc$cancer=as.character(canc$cancer)
 8 | canc[(canc$primsite%in%codes)&(canc$histo3%in%8720:8790),"cancer"]="uveal"  
 9 | 
10 | secs=c("uveal")#second cancer of interest
11 | gp=geom_point();gl=geom_line()
12 | geRR=geom_errorbar(aes(ymin=rrL,ymax=rrU),width=.2)
13 | ge=geom_errorbar(aes(ymin=LL,ymax=UL),width=0.2)#for absolute risks
14 | gh=geom_hline(yintercept=1)
15 | svts=scale_x_continuous(breaks=c(0,5,10))#surv times
16 | agts=scale_x_continuous(breaks=c(25,50,75))#age times
17 | sbb=theme(strip.background=element_blank())
18 | ltb=theme(legend.margin=margin(0,0,0,0),legend.title=element_blank())
19 | ltp=theme(legend.position="top")
20 | lh=theme(legend.direction="horizontal")
21 | sy=scale_y_log10()
22 | jco=scale_color_jco()
23 | tc=function(sz) theme_classic(base_size=sz);
24 | pf=seerSet(canc,popsae,Sex="Female");pm=seerSet(canc,popsae,Sex="Male") 
25 | pf=mk2D(pf,secondS=secs);pm=mk2D(pm,secondS=secs)
26 | # plot2D(pm)
27 | # plot2D(pf)
28 | mybrks=c(0,1,2,3,5,10)
29 | pf=csd(pf,brkst=mybrks)
30 | pm=csd(pm,brkst=mybrks)
31 | DF=bind_rows(pf$DF,pm$DF)
32 | D=DF%>%group_by(int,rad)%>%summarize(O=sum(O),E=sum(E),t=mean(t,na.rm=T))
33 | D=D%>%mutate(RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E))
34 | D=D%>%filter(rad!="Unk")
35 | D
36 | gx=xlab("Years Since 1st Cancer Diagnosis")
37 | gy=ylab("Relative Risk of Uveal Cancer")
38 | D%>%ggplot(aes(x=t,y=RR,col=rad))+ 
39 |   gp+gl+gx+gy+gh+geRR+tc(14)+jco+sbb+ltb+ltp+lh
40 | ggsave("uveal/outs/csdRadvsNot.pdf",width=4,height=3)
41 | 
42 | (lab=paste0("b",paste(mybrks,collapse="_")))
43 | mkExcelCsd(pf,lab,outDir="uveal/outs",outName="csdF",flip=T)
44 | mkExcelCsd(pm,lab,outDir="uveal/outs",outName="csdM",flip=T)
45 | 
46 | D=DF%>%filter(cancer1!="uveal")%>%group_by(int,rad)%>%summarize(O=sum(O),E=sum(E),t=mean(t,na.rm=T))
47 | D=D%>%mutate(RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E))
48 | D=D%>%filter(rad!="Unk")
49 | D
50 | gx=xlab("Years Since 1st Non-Uveal Dx")
51 | gy=ylab("Relative Risk of Uveal Cancer")
52 | D%>%ggplot(aes(x=t,y=RR,col=rad))+ 
53 |   gp+gl+gx+gy+gh+geRR+tc(14)+jco+sbb+ltb+ltp+lh
54 | ggsave("uveal/outs/csdRadvsNotAndNotUV.pdf",width=4,height=3)
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/seerSet.Rd:
--------------------------------------------------------------------------------
 1 | \name{seerSet}
 2 | \alias{seerSet}
 3 | \title{Join SEER cancers and PY}
 4 | \description{Creates a sex-specific list of cancer and population person year (PY) data frames, possibly specific 
 5 | to a race and interval of ages at diagnosis. }
 6 | \usage{seerSet(canc,popsa,Sex, Race="pooled",ageStart=15,ageEnd=85)} 
 7 | \arguments{
 8 |   \item{canc}{Data frame of cancers that includes agedx, sex, race, yrdx, modx, surv and cancer. }
 9 |   \item{popsa}{Data frame of population PY at 1-year age resolution.  }
10 |   \item{Sex}{"Male"  or "Female". }
11 |   \item{Race}{"white", "black", "other",  or "pooled" (default).}
12 |   \item{ageStart,ageEnd}{canc and popsa will be reduced to ages in ageEnd>age>=ageStart.}
13 | }
14 | 
15 | 
16 | \details{In the output: 1) 0.5 years is added to ages at diagnosis (agedx) 
17 |   to reverse SEER flooring to integers; 2) 0.5 months is added to survival months (again, to reverse flooring) 
18 |   before dividing by 12 to convert to years; 
19 | 3) year of diagnosis integers are converted to reals by adding to them the month of diagnosis (modx) - 0.5 
20 | divided by 12 (note that a modx of 1 represents anytime in the month of January).
21 | If ageEnd>85, popsae (extended to ages up to 99) should be used as the input for popsa. 
22 | If popsa is used, the age86 column of popsa will be replaced by an age column.
23 | The age86 and yrbrth columns of a canc are not used and will be removed if they happen to be present; 
24 | users should manually remove any other columns not needed to minimize seerSet object sizes.
25 | Sex and race columns in inputs are removed from outputs as they are specified
26 | in other (scalar) seerSet elements. Also removed from canc are cancer factor levels not present for that sex. 
27 | }
28 |    
29 | \value{A list containing sex specific subsets of canc and popsa and information regarding how they were reduced. }
30 |   
31 | 
32 | \author{Tom Radivoyevitch (radivot@ccf.org)}
33 | \seealso{\code{\link{SEERaBomb-package}, \link{mk2D}, \link{plot2D}} }
34 | \examples{
35 | \dontrun{ 
36 | library(SEERaBomb)
37 | simSeerSet() # without data, a simulated seerSet
38 | #  else, with data ...
39 | load("~/data/SEER/mrgd/cancDef.RData") #load in canc
40 | load("~/data/SEER/mrgd/popsae.RData") # load in popsae
41 | # trim columns
42 | library(dplyr)  
43 | canc=canc\%>\%select(-reg,-recno,-agerec,-numprims,-COD,
44 |           -age19,-age86,-radiatn,-ICD9,-db,-histo3) 
45 | popsae=popsae\%>\%select(-reg,-db)   
46 | seerSet(canc,popsae,Sex="male",ageStart=0,ageEnd=100) 
47 | }
48 | }
49 | 
50 | \keyword{IO}
51 | 


--------------------------------------------------------------------------------
/SEERaBomb/inst/docs/papers/tutorial/attic/csdDemo.R:
--------------------------------------------------------------------------------
 1 | rm(list=ls());library(tidyverse);library(SEERaBomb);library(ggsci)
 2 | myt=theme(legend.title=element_blank(),legend.margin=margin(0,0,0,0),
 3 |           legend.direction="horizontal",legend.key.height=unit(.25,'lines'),
 4 |           legend.position=c(.5,.95),strip.background=element_blank())
 5 | ge=geom_errorbar(aes(ymin=rrL,ymax=rrU),width=0.1)
 6 | gy=ylab("Relative Risk of Leukemia");cc=coord_cartesian(ylim=c(0,25))
 7 | gp=geom_point();gl=geom_line();gh=geom_hline(yintercept=1)
 8 | jco=scale_color_jco();tc=theme_classic(base_size=14)
 9 | 
10 | load("~/data/SEER/mrgd/cancDef.RData");load("~/data/SEER/mrgd/popsae.RData") 
11 | canc$cancer=fct_collapse(canc$cancer,AML=c("AML","AMLti","APL"))
12 | secs=c("AML","ALL","CML") 
13 | (d=canc%>%filter(sex=="Female",cancer%in%c("breast",secs)))
14 | pf=seerSet(d,popsae,Sex="Female")#pooled (races) females 
15 | pf=mk2D(pf,secondS=secs)#adds secs background rates to pf
16 | trts=c("rad.chemo","rad.noChemo","noRad.chemo","noRad.noChemo")
17 | pf=csd(pf,brkst=c(0,1,2,3,5,10),brksa=c(0,60),trts=trts,firstS="breast")
18 | (dA=pf$DF%>%filter(ageG=="(0,60]")) 
19 | gx=xlab("Years Since Breast Cancer Diagnosis")
20 | g=ggplot(dA,aes(x=t,y=RR,col=cancer2))+gp+gl+gx+gy+gh+ge+tc+myt+cc+jco
21 | g+facet_grid(rad~chemo) 
22 | ggsave("~/Results/tutorial/breast2leu.pdf",width=4,height=4)#Fig.4A 
23 | 
24 | (d=canc%>%filter(cancer%in%c("thyroid",secs)))
25 | pf=seerSet(d,popsae,Sex="Female");pm=seerSet(d,popsae,Sex="Male") 
26 | pf=mk2D(pf,secondS=secs);pm=mk2D(pm,secondS=secs)
27 | trts=c("rad.noChemo","noRad.noChemo")
28 | pf=csd(pf,brkst=c(0,1,2,3,5,10),brksa=c(0,60),trts=trts,firstS="thyroid")
29 | pm=csd(pm,brkst=c(0,1,2,3,5,10),brksa=c(0,60),trts=trts,firstS="thyroid")
30 | DF=bind_rows(pf$DF,pm$DF)
31 | D=DF%>%group_by(int,rad,cancer2)%>%summarize(O=sum(O),E=sum(E),t=mean(t))
32 | D=D%>%mutate(RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E))
33 | Dtop=D%>%mutate(grp=str_c(rad,": All Ages"))
34 | D=DF%>%filter(rad=="Rad")
35 | D=D%>%group_by(int,ageG,cancer2)%>%summarize(O=sum(O),E=sum(E),t=mean(t))
36 | D=D%>%mutate(RR=O/E,rrL=qchisq(.025,2*O)/(2*E),rrU=qchisq(.975,2*O+2)/(2*E))
37 | D$ageG=c("Age 0-60","Age >60")[D$ageG]
38 | Dbot=D%>%mutate(grp=str_c("Rad: ",ageG))
39 | dB=bind_rows(Dtop,Dbot)
40 | dB$grp=as_factor(dB$grp)#orders by occurrence, as wanted
41 | gx=xlab("Years Since Thyroid Cancer Diagnosis")
42 | g=ggplot(dB,aes(x=t,y=RR,col=cancer2))+gp+gl+gx+gy+gh+ge+tc+myt+cc+jco
43 | g+facet_wrap(~grp) 
44 | ggsave("~/Results/tutorial/thyroid2leu.pdf",width=4,height=4)
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/SEERaBomb/man/mkSEER.Rd:
--------------------------------------------------------------------------------
 1 | \name{mkSEER}
 2 | \alias{mkSEER}
 3 | \title{ Make R binaries of SEER data.}
 4 | \description{Converts SEER ASCII text files into large R binaries that include all cancer types and registries combined.}
 5 | \usage{mkSEER(df,seerHome="~/data/SEER",outDir="mrgd",outFile="cancDef",
 6 |                   indices = list(c("sex","race"), c("histo3","seqnum"),  "ICD9"),
 7 |                   writePops=TRUE,writeRData=TRUE,writeDB=FALSE)}
 8 | 
 9 | \arguments{
10 |   \item{df}{ A data frame that was the output of \code{pickFields()}. This determines which fields to transfer. 
11 |   Using the output of \code{getFields()} is a common mistake that must be avoided.}
12 |   \item{seerHome}{ The directory that contains the SEER \file{population} and \file{incidence} directories.  This should be writable by the user.}
13 |   \item{outDir}{seerHome subdirectory to write to. Default is \file{mrgd} for all registries merged together.}
14 |   \item{outFile}{Base name of the SQLite database and cancer binary. Default = cancDef (Cancer Default). }           
15 |   \item{indices}{Passed to \code{copy_to()} in \pkg{dplyr}. }           
16 |   \item{writePops}{TRUE if you wish to write out the population data frame binaries. Doing so takes ~10 seconds, so
17 |   savings of FALSE are small.}           
18 |   \item{writeRData}{TRUE if you wish to write out the cancer data frame binary. Writing files takes most of the time. }           
19 |   \item{writeDB}{TRUE if you wish to write cancer, popga, popsa, and popsae data frames to SQLite database tables.  }           
20 | }
21 | 
22 | \details{This function uses the R package \pkg{LaF} to access the fixed-width format data files 
23 | of SEER. \pkg{LaF} is fast, but it requires knowledge of all the widths of columns wanted, as well as the the widths of unwanted stretches in between. This knowledge is produced by \code{getFields()} and \code{pickFields()} combined. It is passed to \code{mkSEER()} via the argument \code{df}. 
24 | }
25 | \value{None, it produces R binary files of the SEER data.}
26 | 
27 | \note{ This takes a substantial amount of RAM (it works on a Mac with 16 GB of RAM) and time (~3 minutes
28 | using default fields).}
29 | \author{ Tom Radivoyevitch (\email{radivot@ccf.org}) }
30 | \seealso{\code{\link{SEERaBomb-package},\link{getFields},\link{pickFields}} }
31 | \examples{
32 | \dontrun{
33 | library(SEERaBomb)
34 | (df=getFields())
35 | (df=pickFields(df))
36 | # the following will take a several minutes, but may only need 
37 | # to be done roughly once per year, with each release.
38 | mkSEER(df)
39 | }
40 | }
41 | \keyword{IO}
42 | 


--------------------------------------------------------------------------------